feat(lsp): semantic tokens for highlighting internal commands with spaces (#15173)

# Description

We decided to move that specific highlighting task from tree-sitter-nu
to lsp for various reasons.
https://github.com/nushell/tree-sitter-nu/pull/184

# User-Facing Changes

Before:
<img width="404" alt="image"
src="https://github.com/user-attachments/assets/79fad167-e424-4411-8aa2-334f08ecc4ab"
/>

After:
<img width="404" alt="image"
src="https://github.com/user-attachments/assets/8eec7c6c-2f63-4a7d-9e98-9e0c397be6bf"
/>


# Tests + Formatting
+1
# After Submitting
This commit is contained in:
zc he 2025-02-25 21:14:48 +08:00 committed by GitHub
parent 938fa6ee55
commit f51a79181a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 210 additions and 3 deletions

View File

@ -6,7 +6,8 @@ use lsp_types::{
CompletionItem, CompletionItemKind, CompletionItemLabelDetails, CompletionParams, CompletionItem, CompletionItemKind, CompletionItemLabelDetails, CompletionParams,
CompletionResponse, CompletionTextEdit, Documentation, Hover, HoverContents, HoverParams, CompletionResponse, CompletionTextEdit, Documentation, Hover, HoverContents, HoverParams,
InlayHint, Location, MarkupContent, MarkupKind, OneOf, Position, Range, ReferencesOptions, InlayHint, Location, MarkupContent, MarkupKind, OneOf, Position, Range, ReferencesOptions,
RenameOptions, ServerCapabilities, TextDocumentSyncKind, TextEdit, Uri, RenameOptions, SemanticToken, SemanticTokenType, SemanticTokensLegend, SemanticTokensOptions,
SemanticTokensServerCapabilities, ServerCapabilities, TextDocumentSyncKind, TextEdit, Uri,
WorkDoneProgressOptions, WorkspaceFolder, WorkspaceFoldersServerCapabilities, WorkDoneProgressOptions, WorkspaceFolder, WorkspaceFoldersServerCapabilities,
WorkspaceServerCapabilities, WorkspaceServerCapabilities,
}; };
@ -33,6 +34,7 @@ mod diagnostics;
mod goto; mod goto;
mod hints; mod hints;
mod notification; mod notification;
mod semantic_tokens;
mod symbols; mod symbols;
mod workspace; mod workspace;
@ -53,6 +55,7 @@ pub struct LanguageServer {
initial_engine_state: EngineState, initial_engine_state: EngineState,
symbol_cache: SymbolCache, symbol_cache: SymbolCache,
inlay_hints: BTreeMap<Uri, Vec<InlayHint>>, inlay_hints: BTreeMap<Uri, Vec<InlayHint>>,
semantic_tokens: BTreeMap<Uri, Vec<SemanticToken>>,
workspace_folders: BTreeMap<String, WorkspaceFolder>, workspace_folders: BTreeMap<String, WorkspaceFolder>,
/// for workspace wide requests /// for workspace wide requests
occurrences: BTreeMap<Uri, Vec<Range>>, occurrences: BTreeMap<Uri, Vec<Range>>,
@ -106,6 +109,7 @@ impl LanguageServer {
initial_engine_state: engine_state, initial_engine_state: engine_state,
symbol_cache: SymbolCache::new(), symbol_cache: SymbolCache::new(),
inlay_hints: BTreeMap::new(), inlay_hints: BTreeMap::new(),
semantic_tokens: BTreeMap::new(),
workspace_folders: BTreeMap::new(), workspace_folders: BTreeMap::new(),
occurrences: BTreeMap::new(), occurrences: BTreeMap::new(),
channels: None, channels: None,
@ -143,6 +147,17 @@ impl LanguageServer {
..Default::default() ..Default::default()
}), }),
workspace_symbol_provider: Some(OneOf::Left(true)), workspace_symbol_provider: Some(OneOf::Left(true)),
semantic_tokens_provider: Some(
SemanticTokensServerCapabilities::SemanticTokensOptions(SemanticTokensOptions {
// NOTE: only internal command names with space supported for now
legend: SemanticTokensLegend {
token_types: vec![SemanticTokenType::FUNCTION],
token_modifiers: vec![],
},
full: Some(lsp_types::SemanticTokensFullOptions::Bool(true)),
..Default::default()
}),
),
..Default::default() ..Default::default()
}) })
.expect("Must be serializable"); .expect("Must be serializable");
@ -202,6 +217,11 @@ impl LanguageServer {
request::InlayHintRequest::METHOD => { request::InlayHintRequest::METHOD => {
Self::handle_lsp_request(request, |params| self.get_inlay_hints(params)) Self::handle_lsp_request(request, |params| self.get_inlay_hints(params))
} }
request::SemanticTokensFullRequest::METHOD => {
Self::handle_lsp_request(request, |params| {
self.get_semantic_tokens(params)
})
}
request::PrepareRenameRequest::METHOD => { request::PrepareRenameRequest::METHOD => {
let id = request.id.clone(); let id = request.id.clone();
if let Err(e) = self.prepare_rename(request) { if let Err(e) = self.prepare_rename(request) {
@ -336,7 +356,7 @@ impl LanguageServer {
&mut self, &mut self,
engine_state: &'a mut EngineState, engine_state: &'a mut EngineState,
uri: &Uri, uri: &Uri,
need_hints: bool, need_extra_info: bool,
) -> Option<(Arc<Block>, Span, StateWorkingSet<'a>)> { ) -> Option<(Arc<Block>, Span, StateWorkingSet<'a>)> {
let mut working_set = StateWorkingSet::new(engine_state); let mut working_set = StateWorkingSet::new(engine_state);
let docs = self.docs.lock().ok()?; let docs = self.docs.lock().ok()?;
@ -347,10 +367,14 @@ impl LanguageServer {
let _ = working_set.files.push(file_path.clone(), Span::unknown()); let _ = working_set.files.push(file_path.clone(), Span::unknown());
let block = nu_parser::parse(&mut working_set, Some(file_path_str), contents, false); let block = nu_parser::parse(&mut working_set, Some(file_path_str), contents, false);
let span = working_set.get_span_for_filename(file_path_str)?; let span = working_set.get_span_for_filename(file_path_str)?;
if need_hints { if need_extra_info {
let file_inlay_hints = let file_inlay_hints =
Self::extract_inlay_hints(&working_set, &block, span.start, file); Self::extract_inlay_hints(&working_set, &block, span.start, file);
self.inlay_hints.insert(uri.clone(), file_inlay_hints); self.inlay_hints.insert(uri.clone(), file_inlay_hints);
let file_semantic_tokens =
Self::extract_semantic_tokens(&working_set, &block, span.start, file);
self.semantic_tokens
.insert(uri.clone(), file_semantic_tokens);
} }
if self.need_parse { if self.need_parse {
// TODO: incremental parsing // TODO: incremental parsing

View File

@ -0,0 +1,168 @@
use std::sync::Arc;
use lsp_textdocument::FullTextDocument;
use lsp_types::{SemanticToken, SemanticTokens, SemanticTokensParams};
use nu_protocol::{
ast::{Block, Expr, Expression, Traverse},
engine::StateWorkingSet,
Span,
};
use crate::{span_to_range, LanguageServer};
/// Important for keep spans in increasing order,
/// since `SemanticToken`s are created by relative positions
/// to one's previous token
///
/// Currently supported types:
/// 1. internal command names with space
fn extract_semantic_tokens_from_expression(
expr: &Expression,
working_set: &StateWorkingSet,
) -> Option<Vec<Span>> {
let closure = |e| extract_semantic_tokens_from_expression(e, working_set);
match &expr.expr {
Expr::Call(call) => {
let command_name_bytes = working_set.get_span_contents(call.head);
let head_span = if command_name_bytes.contains(&b' ')
// Some keywords that are already highlighted properly, e.g. by tree-sitter-nu
&& !command_name_bytes.starts_with(b"export")
&& !command_name_bytes.starts_with(b"overlay")
{
vec![call.head]
} else {
vec![]
};
let spans = head_span
.into_iter()
.chain(
call.arguments
.iter()
.filter_map(|arg| arg.expr())
.flat_map(|e| e.flat_map(working_set, &closure)),
)
.collect();
Some(spans)
}
_ => None,
}
}
impl LanguageServer {
pub(crate) fn get_semantic_tokens(
&mut self,
params: &SemanticTokensParams,
) -> Option<SemanticTokens> {
self.semantic_tokens
.get(&params.text_document.uri)
.map(|vec| SemanticTokens {
result_id: None,
data: vec.clone(),
})
}
pub(crate) fn extract_semantic_tokens(
working_set: &StateWorkingSet,
block: &Arc<Block>,
offset: usize,
file: &FullTextDocument,
) -> Vec<SemanticToken> {
let spans = block.flat_map(working_set, &|e| {
extract_semantic_tokens_from_expression(e, working_set)
});
let mut last_token_line = 0;
let mut last_token_char = 0;
let mut last_span = Span::unknown();
let mut tokens = vec![];
for sp in spans {
let range = span_to_range(&sp, file, offset);
// shouldn't happen
if sp < last_span {
continue;
}
// in case the start position is at the end of lastline
let real_start_char = if range.end.line != range.start.line {
0
} else {
range.start.character
};
let mut delta_start = real_start_char;
if range.end.line == last_token_line {
delta_start -= last_token_char;
}
tokens.push(SemanticToken {
delta_start,
delta_line: range.end.line.saturating_sub(last_token_line),
length: range.end.character.saturating_sub(real_start_char),
// 0 means function in semantic_token_legend
token_type: 0,
token_modifiers_bitset: 0,
});
last_span = sp;
last_token_line = range.end.line;
last_token_char = real_start_char;
}
tokens
}
}
#[cfg(test)]
mod tests {
use crate::path_to_uri;
use crate::tests::{initialize_language_server, open_unchecked, result_from_message};
use assert_json_diff::assert_json_eq;
use lsp_server::{Connection, Message};
use lsp_types::{
request::{Request, SemanticTokensFullRequest},
TextDocumentIdentifier, Uri, WorkDoneProgressParams,
};
use lsp_types::{PartialResultParams, SemanticTokensParams};
use nu_test_support::fs::fixtures;
fn send_semantic_token_request(client_connection: &Connection, uri: Uri) -> Message {
client_connection
.sender
.send(Message::Request(lsp_server::Request {
id: 1.into(),
method: SemanticTokensFullRequest::METHOD.to_string(),
params: serde_json::to_value(SemanticTokensParams {
text_document: TextDocumentIdentifier { uri },
work_done_progress_params: WorkDoneProgressParams::default(),
partial_result_params: PartialResultParams::default(),
})
.unwrap(),
}))
.unwrap();
client_connection
.receiver
.recv_timeout(std::time::Duration::from_secs(2))
.unwrap()
}
#[test]
fn semantic_token_internals() {
let (client_connection, _recv) = initialize_language_server(None);
let mut script = fixtures();
script.push("lsp");
script.push("semantic_tokens");
script.push("internals.nu");
let script = path_to_uri(&script);
open_unchecked(&client_connection, script.clone());
let resp = send_semantic_token_request(&client_connection, script.clone());
assert_json_eq!(
result_from_message(resp),
serde_json::json!(
{ "data": [
// delta_line, delta_start, length, token_type, token_modifiers_bitset
0, 0, 13, 0, 0,
1, 2, 10, 0, 0,
7, 15, 13, 0, 0,
0, 20, 10, 0, 0,
4, 0, 7, 0, 0
]})
);
}
}

View File

@ -0,0 +1,15 @@
str substring 1..
| ansi strip
# User defined one
export def "foo bar" [] {
# inside a block
(
# same line
"🤔🤖🐘" | str substring 1.. | ansi strip
)
}
foo bar
overlay use foo