From 6cdc9e3b77c15ee34064823f7e6cf163cc84affe Mon Sep 17 00:00:00 2001 From: zc he Date: Tue, 29 Oct 2024 19:35:37 +0800 Subject: [PATCH] Fix LSP non-ascii characters offset issues. (#14002) This PR is supposed to fix #13582, #11522, as well as related goto definition/reference issues (wrong position if non ascii characters ahead). # Description image Changes: 1. span/completion should use byte offset instead of character index 2. lsp Postions related ops in Ropey remain to use character index # User-Facing Changes Should be none, tested in neovim with config: ```lua require("lspconfig").nushell.setup({ cmd = { "nu", "-I", vim.fn.getcwd(), "--no-config-file", "--lsp", }, filetypes = { "nu" }, }) ``` # Tests + Formatting tests::complete_command_with_utf_line parameters fixed to align with true lsp requests (in character index, not byte). As for the issue_11522.nu, manually tested: image # After Submitting --- Cargo.toml | 2 +- crates/nu-lsp/src/diagnostics.rs | 11 +- crates/nu-lsp/src/lib.rs | 297 +++++++++++++++++---- crates/nu-lsp/src/notification.rs | 27 +- tests/fixtures/lsp/goto/command_unicode.nu | 5 + 5 files changed, 273 insertions(+), 69 deletions(-) create mode 100644 tests/fixtures/lsp/goto/command_unicode.nu diff --git a/Cargo.toml b/Cargo.toml index 9d9078d423..0390bc956b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -103,7 +103,7 @@ log = "0.4" lru = "0.12" lscolors = { version = "0.17", default-features = false } lsp-server = "0.7.5" -lsp-types = "0.95.0" +lsp-types = { version = "0.95.0", features = ["proposed"] } mach2 = "0.4" md5 = { version = "0.10", package = "md-5" } miette = "7.2" diff --git a/crates/nu-lsp/src/diagnostics.rs b/crates/nu-lsp/src/diagnostics.rs index 423fffbf6c..e1ae63dc59 100644 --- a/crates/nu-lsp/src/diagnostics.rs +++ b/crates/nu-lsp/src/diagnostics.rs @@ -45,7 +45,12 @@ impl LanguageServer { let message = err.to_string(); diagnostics.diagnostics.push(Diagnostic { - range: Self::span_to_range(&err.span(), rope_of_file, offset), + range: Self::span_to_range( + &err.span(), + rope_of_file, + offset, + &self.position_encoding, + ), severity: Some(DiagnosticSeverity::ERROR), message, ..Default::default() @@ -71,7 +76,7 @@ mod tests { #[test] fn publish_diagnostics_variable_does_not_exists() { - let (client_connection, _recv) = initialize_language_server(); + let (client_connection, _recv) = initialize_language_server(None); let mut script = fixtures(); script.push("lsp"); @@ -102,7 +107,7 @@ mod tests { #[test] fn publish_diagnostics_fixed_unknown_variable() { - let (client_connection, _recv) = initialize_language_server(); + let (client_connection, _recv) = initialize_language_server(None); let mut script = fixtures(); script.push("lsp"); diff --git a/crates/nu-lsp/src/lib.rs b/crates/nu-lsp/src/lib.rs index d698b126e0..0fe100c6b4 100644 --- a/crates/nu-lsp/src/lib.rs +++ b/crates/nu-lsp/src/lib.rs @@ -4,17 +4,18 @@ use lsp_types::{ request::{Completion, GotoDefinition, HoverRequest, Request}, CompletionItem, CompletionItemKind, CompletionParams, CompletionResponse, CompletionTextEdit, GotoDefinitionParams, GotoDefinitionResponse, Hover, HoverContents, HoverParams, Location, - MarkupContent, MarkupKind, OneOf, Range, ServerCapabilities, TextDocumentSyncKind, TextEdit, - Url, + MarkupContent, MarkupKind, OneOf, Position, PositionEncodingKind, Range, ServerCapabilities, + TextDocumentSyncKind, TextEdit, Url, }; use miette::{IntoDiagnostic, Result}; use nu_cli::{NuCompleter, SuggestionKind}; use nu_parser::{flatten_block, parse, FlatShape}; use nu_protocol::{ - engine::{EngineState, Stack, StateWorkingSet}, + engine::{CachedFile, EngineState, Stack, StateWorkingSet}, DeclId, Span, Value, VarId, }; use ropey::Rope; +use serde_json::json; use std::{ collections::BTreeMap, path::{Path, PathBuf}, @@ -36,6 +37,7 @@ pub struct LanguageServer { connection: Connection, io_threads: Option, ropes: BTreeMap, + position_encoding: PositionEncodingKind, } impl LanguageServer { @@ -52,9 +54,21 @@ impl LanguageServer { connection, io_threads, ropes: BTreeMap::new(), + position_encoding: PositionEncodingKind::UTF16, }) } + fn get_offset_encoding(&self, initialization_params: serde_json::Value) -> String { + initialization_params + .pointer("/capabilities/offsetEncoding/0") + .unwrap_or( + initialization_params + .pointer("/capabilities/offset_encoding/0") + .unwrap_or(&json!("utf-16")), + ) + .to_string() + } + pub fn serve_requests(mut self, engine_state: EngineState) -> Result<()> { let server_capabilities = serde_json::to_value(ServerCapabilities { text_document_sync: Some(lsp_types::TextDocumentSyncCapability::Kind( @@ -67,12 +81,14 @@ impl LanguageServer { }) .expect("Must be serializable"); - let _initialization_params = self + let initialization_params = self .connection .initialize_while(server_capabilities, || { !engine_state.signals().interrupted() }) .into_diagnostic()?; + self.position_encoding = + PositionEncodingKind::from(self.get_offset_encoding(initialization_params)); while !engine_state.signals().interrupted() { let msg = match self @@ -174,29 +190,78 @@ impl LanguageServer { } } - fn span_to_range(span: &Span, rope_of_file: &Rope, offset: usize) -> lsp_types::Range { - let line = rope_of_file.byte_to_line(span.start - offset); - let character = span.start - offset - rope_of_file.line_to_char(line); - - let start = lsp_types::Position { - line: line as u32, - character: character as u32, - }; - - let line = rope_of_file.byte_to_line(span.end - offset); - let character = span.end - offset - rope_of_file.line_to_char(line); - - let end = lsp_types::Position { - line: line as u32, - character: character as u32, - }; - - lsp_types::Range { start, end } + fn span_to_range( + span: &Span, + rope_of_file: &Rope, + offset: usize, + position_encoding: &PositionEncodingKind, + ) -> Range { + let start = Self::lsp_byte_offset_to_utf_cu_position( + span.start.saturating_sub(offset), + rope_of_file, + position_encoding, + ); + let end = Self::lsp_byte_offset_to_utf_cu_position( + span.end.saturating_sub(offset), + rope_of_file, + position_encoding, + ); + Range { start, end } } - pub fn lsp_position_to_location(position: &lsp_types::Position, rope_of_file: &Rope) -> usize { - let line_idx = rope_of_file.line_to_char(position.line as usize); - line_idx + position.character as usize + fn lsp_byte_offset_to_utf_cu_position( + offset: usize, + rope_of_file: &Rope, + position_encoding: &PositionEncodingKind, + ) -> Position { + let line = rope_of_file.try_byte_to_line(offset).unwrap_or(0); + match position_encoding.as_str() { + "\"utf-8\"" => { + let character = offset - rope_of_file.line_to_byte(line); + Position { + line: line as u32, + character: character as u32, + } + } + _ => { + let character = rope_of_file.char_to_utf16_cu(rope_of_file.byte_to_char(offset)) + - rope_of_file.char_to_utf16_cu(rope_of_file.line_to_char(line)); + Position { + line: line as u32, + character: character as u32, + } + } + } + } + + fn utf16_cu_position_to_char(rope_of_file: &Rope, position: &Position) -> usize { + let line_utf_idx = + rope_of_file.char_to_utf16_cu(rope_of_file.line_to_char(position.line as usize)); + rope_of_file.utf16_cu_to_char(line_utf_idx + position.character as usize) + } + + pub fn lsp_position_to_location( + position: &Position, + rope_of_file: &Rope, + position_encoding: &PositionEncodingKind, + ) -> usize { + match position_encoding.as_str() { + "\"utf-8\"" => rope_of_file.byte_to_char( + rope_of_file.line_to_byte(position.line as usize) + position.character as usize, + ), + _ => Self::utf16_cu_position_to_char(rope_of_file, position), + } + } + + fn lsp_position_to_byte_offset(&self, position: &Position, rope_of_file: &Rope) -> usize { + match self.position_encoding.as_str() { + "\"utf-8\"" => { + rope_of_file.line_to_byte(position.line as usize) + position.character as usize + } + _ => rope_of_file + .try_char_to_byte(Self::utf16_cu_position_to_char(rope_of_file, position)) + .expect("Character index out of range!"), + } } fn find_id( @@ -240,7 +305,7 @@ impl LanguageServer { } fn read_in_file<'a>( - &mut self, + &self, engine_state: &'a mut EngineState, file_url: &Url, ) -> Option<(&Rope, &PathBuf, StateWorkingSet<'a>)> { @@ -253,6 +318,15 @@ impl LanguageServer { Some((file, path, working_set)) } + fn rope_file_from_cached_file(&mut self, cached_file: &CachedFile) -> Result<(Url, &Rope), ()> { + let uri = Url::from_file_path(&*cached_file.name)?; + let rope_of_file = self.ropes.entry(uri.to_file_path()?).or_insert_with(|| { + let raw_string = String::from_utf8_lossy(&cached_file.content); + Rope::from_str(&raw_string) + }); + Ok((uri, rope_of_file)) + } + fn goto_definition( &mut self, engine_state: &mut EngineState, @@ -270,7 +344,7 @@ impl LanguageServer { &mut working_set, path, file, - Self::lsp_position_to_location(¶ms.text_document_position_params.position, file), + self.lsp_position_to_byte_offset(¶ms.text_document_position_params.position, file), )?; match id { @@ -280,12 +354,16 @@ impl LanguageServer { if let Some(span) = &block.span { for cached_file in working_set.files() { if cached_file.covered_span.contains(span.start) { + let position_encoding = self.position_encoding.clone(); + let (uri, rope_of_file) = + self.rope_file_from_cached_file(cached_file).ok()?; return Some(GotoDefinitionResponse::Scalar(Location { - uri: Url::from_file_path(&*cached_file.name).ok()?, + uri, range: Self::span_to_range( span, - file, + rope_of_file, cached_file.covered_span.start, + &position_encoding, ), })); } @@ -300,16 +378,16 @@ impl LanguageServer { .covered_span .contains(var.declaration_span.start) { + let position_encoding = self.position_encoding.clone(); + let (uri, rope_of_file) = + self.rope_file_from_cached_file(cached_file).ok()?; return Some(GotoDefinitionResponse::Scalar(Location { - uri: params - .text_document_position_params - .text_document - .uri - .clone(), + uri, range: Self::span_to_range( &var.declaration_span, - file, + rope_of_file, cached_file.covered_span.start, + &position_encoding, ), })); } @@ -333,7 +411,7 @@ impl LanguageServer { &mut working_set, path, file, - Self::lsp_position_to_location(¶ms.text_document_position_params.position, file), + self.lsp_position_to_byte_offset(¶ms.text_document_position_params.position, file), )?; match id { @@ -552,7 +630,7 @@ impl LanguageServer { NuCompleter::new(Arc::new(engine_state.clone()), Arc::new(Stack::new())); let location = - Self::lsp_position_to_location(¶ms.text_document_position.position, rope_of_file); + self.lsp_position_to_byte_offset(¶ms.text_document_position.position, rope_of_file); let results = completer.fetch_completions_at(&rope_of_file.to_string()[..location], location); if results.is_empty() { @@ -618,7 +696,9 @@ mod tests { use nu_test_support::fs::{fixtures, root}; use std::sync::mpsc::Receiver; - pub fn initialize_language_server() -> (Connection, Receiver>) { + pub fn initialize_language_server( + client_offset_encoding: Option>, + ) -> (Connection, Receiver>) { use std::sync::mpsc; let (client_connection, server_connection) = Connection::memory(); let lsp_server = LanguageServer::initialize_connection(server_connection, None).unwrap(); @@ -636,6 +716,10 @@ mod tests { id: 1.into(), method: Initialize::METHOD.to_string(), params: serde_json::to_value(InitializeParams { + capabilities: lsp_types::ClientCapabilities { + offset_encoding: client_offset_encoding, + ..Default::default() + }, ..Default::default() }) .unwrap(), @@ -659,7 +743,7 @@ mod tests { #[test] fn shutdown_on_request() { - let (client_connection, recv) = initialize_language_server(); + let (client_connection, recv) = initialize_language_server(None); client_connection .sender @@ -685,7 +769,7 @@ mod tests { #[test] fn goto_definition_for_none_existing_file() { - let (client_connection, _recv) = initialize_language_server(); + let (client_connection, _recv) = initialize_language_server(None); let mut none_existent_path = root(); none_existent_path.push("none-existent.nu"); @@ -700,7 +784,7 @@ mod tests { text_document: TextDocumentIdentifier { uri: Url::from_file_path(none_existent_path).unwrap(), }, - position: lsp_types::Position { + position: Position { line: 0, character: 0, }, @@ -817,7 +901,7 @@ mod tests { params: serde_json::to_value(GotoDefinitionParams { text_document_position_params: TextDocumentPositionParams { text_document: TextDocumentIdentifier { uri }, - position: lsp_types::Position { line, character }, + position: Position { line, character }, }, work_done_progress_params: WorkDoneProgressParams::default(), partial_result_params: PartialResultParams::default(), @@ -834,7 +918,7 @@ mod tests { #[test] fn goto_definition_of_variable() { - let (client_connection, _recv) = initialize_language_server(); + let (client_connection, _recv) = initialize_language_server(None); let mut script = fixtures(); script.push("lsp"); @@ -865,7 +949,7 @@ mod tests { #[test] fn goto_definition_of_command() { - let (client_connection, _recv) = initialize_language_server(); + let (client_connection, _recv) = initialize_language_server(None); let mut script = fixtures(); script.push("lsp"); @@ -894,9 +978,72 @@ mod tests { ); } + #[test] + fn goto_definition_of_command_utf8() { + let (client_connection, _recv) = + initialize_language_server(Some(vec!["utf-8".to_string(), "utf-16".to_string()])); + + let mut script = fixtures(); + script.push("lsp"); + script.push("goto"); + script.push("command_unicode.nu"); + let script = Url::from_file_path(script).unwrap(); + + open_unchecked(&client_connection, script.clone()); + + let resp = goto_definition(&client_connection, script.clone(), 4, 1); + let result = if let Message::Response(response) = resp { + response.result + } else { + panic!() + }; + + assert_json_eq!( + result, + serde_json::json!({ + "uri": script, + "range": { + "start": { "line": 0, "character": 28 }, + "end": { "line": 2, "character": 1 } + } + }) + ); + } + + #[test] + fn goto_definition_of_command_utf16() { + let (client_connection, _recv) = initialize_language_server(None); + + let mut script = fixtures(); + script.push("lsp"); + script.push("goto"); + script.push("command_unicode.nu"); + let script = Url::from_file_path(script).unwrap(); + + open_unchecked(&client_connection, script.clone()); + + let resp = goto_definition(&client_connection, script.clone(), 4, 1); + let result = if let Message::Response(response) = resp { + response.result + } else { + panic!() + }; + + assert_json_eq!( + result, + serde_json::json!({ + "uri": script, + "range": { + "start": { "line": 0, "character": 19 }, + "end": { "line": 2, "character": 1 } + } + }) + ); + } + #[test] fn goto_definition_of_command_parameter() { - let (client_connection, _recv) = initialize_language_server(); + let (client_connection, _recv) = initialize_language_server(None); let mut script = fixtures(); script.push("lsp"); @@ -934,7 +1081,7 @@ mod tests { params: serde_json::to_value(HoverParams { text_document_position_params: TextDocumentPositionParams { text_document: TextDocumentIdentifier { uri }, - position: lsp_types::Position { line, character }, + position: Position { line, character }, }, work_done_progress_params: WorkDoneProgressParams::default(), }) @@ -950,7 +1097,7 @@ mod tests { #[test] fn hover_on_variable() { - let (client_connection, _recv) = initialize_language_server(); + let (client_connection, _recv) = initialize_language_server(None); let mut script = fixtures(); script.push("lsp"); @@ -977,7 +1124,7 @@ mod tests { #[test] fn hover_on_custom_command() { - let (client_connection, _recv) = initialize_language_server(); + let (client_connection, _recv) = initialize_language_server(None); let mut script = fixtures(); script.push("lsp"); @@ -1007,7 +1154,7 @@ mod tests { #[test] fn hover_on_str_join() { - let (client_connection, _recv) = initialize_language_server(); + let (client_connection, _recv) = initialize_language_server(None); let mut script = fixtures(); script.push("lsp"); @@ -1044,7 +1191,7 @@ mod tests { params: serde_json::to_value(CompletionParams { text_document_position: TextDocumentPositionParams { text_document: TextDocumentIdentifier { uri }, - position: lsp_types::Position { line, character }, + position: Position { line, character }, }, work_done_progress_params: WorkDoneProgressParams::default(), partial_result_params: PartialResultParams::default(), @@ -1062,7 +1209,7 @@ mod tests { #[test] fn complete_on_variable() { - let (client_connection, _recv) = initialize_language_server(); + let (client_connection, _recv) = initialize_language_server(None); let mut script = fixtures(); script.push("lsp"); @@ -1099,7 +1246,7 @@ mod tests { #[test] fn complete_command_with_space() { - let (client_connection, _recv) = initialize_language_server(); + let (client_connection, _recv) = initialize_language_server(None); let mut script = fixtures(); script.push("lsp"); @@ -1136,8 +1283,9 @@ mod tests { } #[test] - fn complete_command_with_utf_line() { - let (client_connection, _recv) = initialize_language_server(); + fn complete_command_with_utf8_line() { + let (client_connection, _recv) = + initialize_language_server(Some(vec!["utf-8".to_string()])); let mut script = fixtures(); script.push("lsp"); @@ -1173,9 +1321,48 @@ mod tests { ); } + #[test] + fn complete_command_with_utf16_line() { + let (client_connection, _recv) = + initialize_language_server(Some(vec!["utf-16".to_string()])); + + let mut script = fixtures(); + script.push("lsp"); + script.push("completion"); + script.push("utf_pipeline.nu"); + let script = Url::from_file_path(script).unwrap(); + + open_unchecked(&client_connection, script.clone()); + + let resp = complete(&client_connection, script, 0, 13); + let result = if let Message::Response(response) = resp { + response.result + } else { + panic!() + }; + + assert_json_eq!( + result, + serde_json::json!([ + { + "label": "str trim", + "detail": "Trim whitespace or specific character.", + "textEdit": { + "range": { + "start": { "line": 0, "character": 8 }, + "end": { "line": 0, "character": 13 }, + }, + "newText": "str trim" + }, + "kind": 3 + } + ]) + ); + } + #[test] fn complete_keyword() { - let (client_connection, _recv) = initialize_language_server(); + let (client_connection, _recv) = initialize_language_server(None); let mut script = fixtures(); script.push("lsp"); diff --git a/crates/nu-lsp/src/notification.rs b/crates/nu-lsp/src/notification.rs index a715a67d4f..a3e563b468 100644 --- a/crates/nu-lsp/src/notification.rs +++ b/crates/nu-lsp/src/notification.rs @@ -64,20 +64,27 @@ impl LanguageServer { if let Ok(file_path) = params.text_document.uri.to_file_path() { for content_change in params.content_changes.into_iter() { let entry = self.ropes.entry(file_path.clone()); - match (content_change.range, content_change.range) { - (Some(range), _) => { + match content_change.range { + Some(range) => { entry.and_modify(|rope| { - let start = Self::lsp_position_to_location(&range.start, rope); - let end = Self::lsp_position_to_location(&range.end, rope); + let start = Self::lsp_position_to_location( + &range.start, + rope, + &self.position_encoding, + ); + let end = Self::lsp_position_to_location( + &range.end, + rope, + &self.position_encoding, + ); rope.remove(start..end); rope.insert(start, &content_change.text); }); } - (None, None) => { + None => { entry.and_modify(|r| *r = Rope::from_str(&content_change.text)); } - _ => {} } } @@ -99,7 +106,7 @@ mod tests { #[test] fn hover_correct_documentation_on_let() { - let (client_connection, _recv) = initialize_language_server(); + let (client_connection, _recv) = initialize_language_server(None); let mut script = fixtures(); script.push("lsp"); @@ -129,7 +136,7 @@ mod tests { #[test] fn hover_on_command_after_full_content_change() { - let (client_connection, _recv) = initialize_language_server(); + let (client_connection, _recv) = initialize_language_server(None); let mut script = fixtures(); script.push("lsp"); @@ -170,7 +177,7 @@ hello"#, #[test] fn hover_on_command_after_partial_content_change() { - let (client_connection, _recv) = initialize_language_server(); + let (client_connection, _recv) = initialize_language_server(None); let mut script = fixtures(); script.push("lsp"); @@ -215,7 +222,7 @@ hello"#, #[test] fn open_document_with_utf_char() { - let (client_connection, _recv) = initialize_language_server(); + let (client_connection, _recv) = initialize_language_server(None); let mut script = fixtures(); script.push("lsp"); diff --git a/tests/fixtures/lsp/goto/command_unicode.nu b/tests/fixtures/lsp/goto/command_unicode.nu new file mode 100644 index 0000000000..90fd9d329b --- /dev/null +++ b/tests/fixtures/lsp/goto/command_unicode.nu @@ -0,0 +1,5 @@ +def 🐚🤖啊あà [name] { + $"hello ($name)" +} + +🐚🤖啊あà nushell