Refactor source cache into CachedFile struct (#12240)

# Description
Get rid of two parallel `Vec`s in `StateDelta` and `EngineState`, that
also duplicated span information. Use a struct with documenting fields.

Also use `Arc<str>` and `Arc<[u8]>` for the allocations as they are
never modified and cloned often (see #12229 for the first improvement).
This also makes the representation more compact as no capacity is
necessary.

# User-Facing Changes
API breakage on `EngineState`/`StateWorkingSet`/`StateDelta` that should
not really affect plugin authors.
This commit is contained in:
Stefan Holderbach 2024-03-20 19:43:50 +01:00 committed by GitHub
parent 63335e99ae
commit ec528c0626
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 133 additions and 104 deletions

View File

@ -1,7 +1,7 @@
use crate::completions::{Completer, CompletionOptions, MatchAlgorithm, SortBy}; use crate::completions::{Completer, CompletionOptions, MatchAlgorithm, SortBy};
use nu_parser::FlatShape; use nu_parser::FlatShape;
use nu_protocol::{ use nu_protocol::{
engine::{EngineState, StateWorkingSet}, engine::{CachedFile, EngineState, StateWorkingSet},
Span, Span,
}; };
use reedline::Suggestion; use reedline::Suggestion;
@ -229,8 +229,9 @@ pub fn find_non_whitespace_index(contents: &[u8], start: usize) -> usize {
} }
} }
pub fn is_passthrough_command(working_set_file_contents: &[(Arc<Vec<u8>>, usize, usize)]) -> bool { pub fn is_passthrough_command(working_set_file_contents: &[CachedFile]) -> bool {
for (contents, _, _) in working_set_file_contents { for cached_file in working_set_file_contents {
let contents = &cached_file.content;
let last_pipe_pos_rev = contents.iter().rev().position(|x| x == &b'|'); let last_pipe_pos_rev = contents.iter().rev().position(|x| x == &b'|');
let last_pipe_pos = last_pipe_pos_rev.map(|x| contents.len() - x).unwrap_or(0); let last_pipe_pos = last_pipe_pos_rev.map(|x| contents.len() - x).unwrap_or(0);
@ -295,7 +296,7 @@ mod command_completions_tests {
let input = ele.0.as_bytes(); let input = ele.0.as_bytes();
let mut engine_state = EngineState::new(); let mut engine_state = EngineState::new();
engine_state.add_file("test.nu".into(), vec![]); engine_state.add_file("test.nu".into(), Arc::new([]));
let delta = { let delta = {
let mut working_set = StateWorkingSet::new(&engine_state); let mut working_set = StateWorkingSet::new(&engine_state);

View File

@ -94,8 +94,8 @@ fn gather_env_vars(
let span_offset = engine_state.next_span_start(); let span_offset = engine_state.next_span_start();
engine_state.add_file( engine_state.add_file(
"Host Environment Variables".to_string(), "Host Environment Variables".into(),
fake_env_file.as_bytes().to_vec(), fake_env_file.as_bytes().into(),
); );
let (tokens, _) = lex(fake_env_file.as_bytes(), span_offset, &[], &[], true); let (tokens, _) = lex(fake_env_file.as_bytes(), span_offset, &[], &[], true);

View File

@ -43,13 +43,15 @@ impl Command for ViewFiles {
) -> Result<PipelineData, ShellError> { ) -> Result<PipelineData, ShellError> {
let mut records = vec![]; let mut records = vec![];
for (file, start, end) in engine_state.files() { for file in engine_state.files() {
let start = file.covered_span.start;
let end = file.covered_span.end;
records.push(Value::record( records.push(Value::record(
record! { record! {
"filename" => Value::string(&**file, call.head), "filename" => Value::string(&*file.name, call.head),
"start" => Value::int(*start as i64, call.head), "start" => Value::int(start as i64, call.head),
"end" => Value::int(*end as i64, call.head), "end" => Value::int(end as i64, call.head),
"size" => Value::int(*end as i64 - *start as i64, call.head), "size" => Value::int(end as i64 - start as i64, call.head),
}, },
call.head, call.head,
)); ));

View File

@ -284,11 +284,15 @@ impl LanguageServer {
if let Some(block_id) = working_set.get_decl(decl_id).get_block_id() { if let Some(block_id) = working_set.get_decl(decl_id).get_block_id() {
let block = working_set.get_block(block_id); let block = working_set.get_block(block_id);
if let Some(span) = &block.span { if let Some(span) = &block.span {
for (file_path, file_start, file_end) in working_set.files() { for cached_file in working_set.files() {
if span.start >= *file_start && span.start < *file_end { if cached_file.covered_span.contains(span.start) {
return Some(GotoDefinitionResponse::Scalar(Location { return Some(GotoDefinitionResponse::Scalar(Location {
uri: Url::from_file_path(&**file_path).ok()?, uri: Url::from_file_path(&*cached_file.name).ok()?,
range: Self::span_to_range(span, file, *file_start), range: Self::span_to_range(
span,
file,
cached_file.covered_span.start,
),
})); }));
} }
} }
@ -297,9 +301,10 @@ impl LanguageServer {
} }
Id::Variable(var_id) => { Id::Variable(var_id) => {
let var = working_set.get_variable(var_id); let var = working_set.get_variable(var_id);
for (_, file_start, file_end) in working_set.files() { for cached_file in working_set.files() {
if var.declaration_span.start >= *file_start if cached_file
&& var.declaration_span.start < *file_end .covered_span
.contains(var.declaration_span.start)
{ {
return Some(GotoDefinitionResponse::Scalar(Location { return Some(GotoDefinitionResponse::Scalar(Location {
uri: params uri: params
@ -307,7 +312,11 @@ impl LanguageServer {
.text_document .text_document
.uri .uri
.clone(), .clone(),
range: Self::span_to_range(&var.declaration_span, file, *file_start), range: Self::span_to_range(
&var.declaration_span,
file,
cached_file.covered_span.start,
),
})); }));
} }
} }

View File

@ -0,0 +1,15 @@
use crate::Span;
use std::sync::Arc;
/// Unit of cached source code
#[derive(Clone)]
pub struct CachedFile {
// Use Arcs of slice types for more compact representation (capacity less)
// Could possibly become an `Arc<PathBuf>`
/// The file name with which the code is associated (also includes REPL input)
pub name: Arc<str>,
/// Source code as raw bytes
pub content: Arc<[u8]>,
/// global span coordinates that are covered by this [`CachedFile`]
pub covered_span: Span,
}

View File

@ -1,6 +1,7 @@
use fancy_regex::Regex; use fancy_regex::Regex;
use lru::LruCache; use lru::LruCache;
use super::cached_file::CachedFile;
use super::{usage::build_usage, usage::Usage, StateDelta}; use super::{usage::build_usage, usage::Usage, StateDelta};
use super::{ use super::{
Command, EnvVars, OverlayFrame, ScopeFrame, Stack, Variable, Visibility, DEFAULT_OVERLAY_NAME, Command, EnvVars, OverlayFrame, ScopeFrame, Stack, Variable, Visibility, DEFAULT_OVERLAY_NAME,
@ -74,8 +75,7 @@ impl Clone for IsDebugging {
/// but they also rely on using IDs rather than full definitions. /// but they also rely on using IDs rather than full definitions.
#[derive(Clone)] #[derive(Clone)]
pub struct EngineState { pub struct EngineState {
files: Vec<(Arc<String>, usize, usize)>, files: Vec<CachedFile>,
file_contents: Vec<(Arc<Vec<u8>>, usize, usize)>,
pub(super) virtual_paths: Vec<(String, VirtualPath)>, pub(super) virtual_paths: Vec<(String, VirtualPath)>,
vars: Vec<Variable>, vars: Vec<Variable>,
decls: Arc<Vec<Box<dyn Command + 'static>>>, decls: Arc<Vec<Box<dyn Command + 'static>>>,
@ -119,7 +119,6 @@ impl EngineState {
pub fn new() -> Self { pub fn new() -> Self {
Self { Self {
files: vec![], files: vec![],
file_contents: vec![],
virtual_paths: vec![], virtual_paths: vec![],
vars: vec![ vars: vec![
Variable::new(Span::new(0, 0), Type::Any, false), Variable::new(Span::new(0, 0), Type::Any, false),
@ -183,7 +182,6 @@ impl EngineState {
pub fn merge_delta(&mut self, mut delta: StateDelta) -> Result<(), ShellError> { pub fn merge_delta(&mut self, mut delta: StateDelta) -> Result<(), ShellError> {
// Take the mutable reference and extend the permanent state from the working set // Take the mutable reference and extend the permanent state from the working set
self.files.extend(delta.files); self.files.extend(delta.files);
self.file_contents.extend(delta.file_contents);
self.virtual_paths.extend(delta.virtual_paths); self.virtual_paths.extend(delta.virtual_paths);
self.vars.extend(delta.vars); self.vars.extend(delta.vars);
self.blocks.extend(delta.blocks); self.blocks.extend(delta.blocks);
@ -628,8 +626,8 @@ impl EngineState {
} }
pub fn print_contents(&self) { pub fn print_contents(&self) {
for (contents, _, _) in self.file_contents.iter() { for cached_file in self.files.iter() {
let string = String::from_utf8_lossy(contents); let string = String::from_utf8_lossy(&cached_file.content);
println!("{string}"); println!("{string}");
} }
} }
@ -747,9 +745,10 @@ impl EngineState {
} }
pub fn get_span_contents(&self, span: Span) -> &[u8] { pub fn get_span_contents(&self, span: Span) -> &[u8] {
for (contents, start, finish) in &self.file_contents { for file in &self.files {
if span.start >= *start && span.end <= *finish { if file.covered_span.contains_span(span) {
return &contents[(span.start - start)..(span.end - start)]; return &file.content
[(span.start - file.covered_span.start)..(span.end - file.covered_span.start)];
} }
} }
&[0u8; 0] &[0u8; 0]
@ -909,26 +908,28 @@ impl EngineState {
} }
pub fn next_span_start(&self) -> usize { pub fn next_span_start(&self) -> usize {
if let Some((_, _, last)) = self.file_contents.last() { if let Some(cached_file) = self.files.last() {
*last cached_file.covered_span.end
} else { } else {
0 0
} }
} }
pub fn files(&self) -> impl Iterator<Item = &(Arc<String>, usize, usize)> { pub fn files(&self) -> impl Iterator<Item = &CachedFile> {
self.files.iter() self.files.iter()
} }
pub fn add_file(&mut self, filename: String, contents: Vec<u8>) -> usize { pub fn add_file(&mut self, filename: Arc<str>, content: Arc<[u8]>) -> FileId {
let next_span_start = self.next_span_start(); let next_span_start = self.next_span_start();
let next_span_end = next_span_start + contents.len(); let next_span_end = next_span_start + content.len();
self.file_contents let covered_span = Span::new(next_span_start, next_span_end);
.push((Arc::new(contents), next_span_start, next_span_end));
self.files self.files.push(CachedFile {
.push((Arc::new(filename), next_span_start, next_span_end)); name: filename,
content,
covered_span,
});
self.num_files() - 1 self.num_files() - 1
} }
@ -968,8 +969,9 @@ impl EngineState {
.unwrap_or_default() .unwrap_or_default()
} }
pub fn get_file_contents(&self) -> &[(Arc<Vec<u8>>, usize, usize)] { // TODO: see if we can completely get rid of this
&self.file_contents pub fn get_file_contents(&self) -> &[CachedFile] {
&self.files
} }
pub fn get_startup_time(&self) -> i64 { pub fn get_startup_time(&self) -> i64 {
@ -1043,7 +1045,7 @@ mod engine_state_tests {
#[test] #[test]
fn add_file_gives_id_including_parent() { fn add_file_gives_id_including_parent() {
let mut engine_state = EngineState::new(); let mut engine_state = EngineState::new();
let parent_id = engine_state.add_file("test.nu".into(), vec![]); let parent_id = engine_state.add_file("test.nu".into(), Arc::new([]));
let mut working_set = StateWorkingSet::new(&engine_state); let mut working_set = StateWorkingSet::new(&engine_state);
let working_set_id = working_set.add_file("child.nu".into(), &[]); let working_set_id = working_set.add_file("child.nu".into(), &[]);
@ -1055,7 +1057,7 @@ mod engine_state_tests {
#[test] #[test]
fn merge_states() -> Result<(), ShellError> { fn merge_states() -> Result<(), ShellError> {
let mut engine_state = EngineState::new(); let mut engine_state = EngineState::new();
engine_state.add_file("test.nu".into(), vec![]); engine_state.add_file("test.nu".into(), Arc::new([]));
let delta = { let delta = {
let mut working_set = StateWorkingSet::new(&engine_state); let mut working_set = StateWorkingSet::new(&engine_state);
@ -1066,8 +1068,8 @@ mod engine_state_tests {
engine_state.merge_delta(delta)?; engine_state.merge_delta(delta)?;
assert_eq!(engine_state.num_files(), 2); assert_eq!(engine_state.num_files(), 2);
assert_eq!(&*engine_state.files[0].0, "test.nu"); assert_eq!(&*engine_state.files[0].name, "test.nu");
assert_eq!(&*engine_state.files[1].0, "child.nu"); assert_eq!(&*engine_state.files[1].name, "child.nu");
Ok(()) Ok(())
} }

View File

@ -1,3 +1,4 @@
mod cached_file;
mod call_info; mod call_info;
mod capture_block; mod capture_block;
mod command; mod command;
@ -11,6 +12,8 @@ mod stdio;
mod usage; mod usage;
mod variable; mod variable;
pub use cached_file::CachedFile;
pub use call_info::*; pub use call_info::*;
pub use capture_block::*; pub use capture_block::*;
pub use command::*; pub use command::*;

View File

@ -1,3 +1,4 @@
use super::cached_file::CachedFile;
use super::{usage::Usage, Command, EngineState, OverlayFrame, ScopeFrame, Variable, VirtualPath}; use super::{usage::Usage, Command, EngineState, OverlayFrame, ScopeFrame, Variable, VirtualPath};
use crate::ast::Block; use crate::ast::Block;
use crate::Module; use crate::Module;
@ -12,8 +13,7 @@ use crate::RegisteredPlugin;
/// can be applied to the global state to update it to contain both previous state and the state held /// can be applied to the global state to update it to contain both previous state and the state held
/// within the delta. /// within the delta.
pub struct StateDelta { pub struct StateDelta {
pub(super) files: Vec<(Arc<String>, usize, usize)>, pub(super) files: Vec<CachedFile>,
pub(crate) file_contents: Vec<(Arc<Vec<u8>>, usize, usize)>,
pub(super) virtual_paths: Vec<(String, VirtualPath)>, pub(super) virtual_paths: Vec<(String, VirtualPath)>,
pub(super) vars: Vec<Variable>, // indexed by VarId pub(super) vars: Vec<Variable>, // indexed by VarId
pub(super) decls: Vec<Box<dyn Command>>, // indexed by DeclId pub(super) decls: Vec<Box<dyn Command>>, // indexed by DeclId
@ -38,7 +38,6 @@ impl StateDelta {
StateDelta { StateDelta {
files: vec![], files: vec![],
file_contents: vec![],
virtual_paths: vec![], virtual_paths: vec![],
vars: vec![], vars: vec![],
decls: vec![], decls: vec![],
@ -131,7 +130,7 @@ impl StateDelta {
self.scope.pop(); self.scope.pop();
} }
pub fn get_file_contents(&self) -> &[(Arc<Vec<u8>>, usize, usize)] { pub fn get_file_contents(&self) -> &[CachedFile] {
&self.file_contents &self.files
} }
} }

View File

@ -1,3 +1,4 @@
use super::cached_file::CachedFile;
use super::{ use super::{
usage::build_usage, Command, EngineState, OverlayFrame, StateDelta, Variable, VirtualPath, usage::build_usage, Command, EngineState, OverlayFrame, StateDelta, Variable, VirtualPath,
Visibility, PWD_ENV, Visibility, PWD_ENV,
@ -285,8 +286,8 @@ impl<'a> StateWorkingSet<'a> {
pub fn next_span_start(&self) -> usize { pub fn next_span_start(&self) -> usize {
let permanent_span_start = self.permanent_state.next_span_start(); let permanent_span_start = self.permanent_state.next_span_start();
if let Some((_, _, last)) = self.delta.file_contents.last() { if let Some(cached_file) = self.delta.files.last() {
*last cached_file.covered_span.end
} else { } else {
permanent_span_start permanent_span_start
} }
@ -296,21 +297,22 @@ impl<'a> StateWorkingSet<'a> {
self.permanent_state.next_span_start() self.permanent_state.next_span_start()
} }
pub fn files(&'a self) -> impl Iterator<Item = &(Arc<String>, usize, usize)> { pub fn files(&self) -> impl Iterator<Item = &CachedFile> {
self.permanent_state.files().chain(self.delta.files.iter()) self.permanent_state.files().chain(self.delta.files.iter())
} }
pub fn get_contents_of_file(&self, file_id: usize) -> Option<&[u8]> { pub fn get_contents_of_file(&self, file_id: FileId) -> Option<&[u8]> {
for (id, (contents, _, _)) in self.delta.file_contents.iter().enumerate() { if let Some(cached_file) = self.permanent_state.get_file_contents().get(file_id) {
if self.permanent_state.num_files() + id == file_id { return Some(&cached_file.content);
return Some(contents);
}
} }
// The index subtraction will not underflow, if we hit the permanent state first.
for (id, (contents, _, _)) in self.permanent_state.get_file_contents().iter().enumerate() { // Check if you try reordering for locality
if id == file_id { if let Some(cached_file) = self
return Some(contents); .delta
} .get_file_contents()
.get(file_id - self.permanent_state.num_files())
{
return Some(&cached_file.content);
} }
None None
@ -319,27 +321,22 @@ impl<'a> StateWorkingSet<'a> {
#[must_use] #[must_use]
pub fn add_file(&mut self, filename: String, contents: &[u8]) -> FileId { pub fn add_file(&mut self, filename: String, contents: &[u8]) -> FileId {
// First, look for the file to see if we already have it // First, look for the file to see if we already have it
for (idx, (fname, file_start, file_end)) in self.files().enumerate() { for (idx, cached_file) in self.files().enumerate() {
if **fname == filename { if *cached_file.name == filename && &*cached_file.content == contents {
let prev_contents = self.get_span_contents(Span::new(*file_start, *file_end)); return idx;
if prev_contents == contents {
return idx;
}
} }
} }
let next_span_start = self.next_span_start(); let next_span_start = self.next_span_start();
let next_span_end = next_span_start + contents.len(); let next_span_end = next_span_start + contents.len();
self.delta.file_contents.push(( let covered_span = Span::new(next_span_start, next_span_end);
Arc::new(contents.to_vec()),
next_span_start,
next_span_end,
));
self.delta self.delta.files.push(CachedFile {
.files name: filename.into(),
.push((Arc::new(filename), next_span_start, next_span_end)); content: contents.into(),
covered_span,
});
self.num_files() - 1 self.num_files() - 1
} }
@ -352,35 +349,31 @@ impl<'a> StateWorkingSet<'a> {
} }
pub fn get_span_for_filename(&self, filename: &str) -> Option<Span> { pub fn get_span_for_filename(&self, filename: &str) -> Option<Span> {
let (file_id, ..) = self let file_id = self.files().position(|file| &*file.name == filename)?;
.files()
.enumerate()
.find(|(_, (fname, _, _))| **fname == filename)?;
Some(self.get_span_for_file(file_id)) Some(self.get_span_for_file(file_id))
} }
pub fn get_span_for_file(&self, file_id: usize) -> Span { /// Panics:
/// On invalid `FileId`
///
/// Use with care
pub fn get_span_for_file(&self, file_id: FileId) -> Span {
let result = self let result = self
.files() .files()
.nth(file_id) .nth(file_id)
.expect("internal error: could not find source for previously parsed file"); .expect("internal error: could not find source for previously parsed file");
Span::new(result.1, result.2) result.covered_span
} }
pub fn get_span_contents(&self, span: Span) -> &[u8] { pub fn get_span_contents(&self, span: Span) -> &[u8] {
let permanent_end = self.permanent_state.next_span_start(); let permanent_end = self.permanent_state.next_span_start();
if permanent_end <= span.start { if permanent_end <= span.start {
for (contents, start, finish) in &self.delta.file_contents { for cached_file in &self.delta.files {
if (span.start >= *start) && (span.end <= *finish) { if cached_file.covered_span.contains_span(span) {
let begin = span.start - start; return &cached_file.content[span.start - cached_file.covered_span.start
let mut end = span.end - start; ..span.end - cached_file.covered_span.start];
if begin > end {
end = *finish - permanent_end;
}
return &contents[begin..end];
} }
} }
} }
@ -1033,19 +1026,24 @@ impl<'a> miette::SourceCode for &StateWorkingSet<'a> {
let finding_span = "Finding span in StateWorkingSet"; let finding_span = "Finding span in StateWorkingSet";
dbg!(finding_span, span); dbg!(finding_span, span);
} }
for (filename, start, end) in self.files() { for cached_file in self.files() {
let (filename, start, end) = (
&cached_file.name,
cached_file.covered_span.start,
cached_file.covered_span.end,
);
if debugging { if debugging {
dbg!(&filename, start, end); dbg!(&filename, start, end);
} }
if span.offset() >= *start && span.offset() + span.len() <= *end { if span.offset() >= start && span.offset() + span.len() <= end {
if debugging { if debugging {
let found_file = "Found matching file"; let found_file = "Found matching file";
dbg!(found_file); dbg!(found_file);
} }
let our_span = Span::new(*start, *end); let our_span = cached_file.covered_span;
// We need to move to a local span because we're only reading // We need to move to a local span because we're only reading
// the specific file contents via self.get_span_contents. // the specific file contents via self.get_span_contents.
let local_span = (span.offset() - *start, span.len()).into(); let local_span = (span.offset() - start, span.len()).into();
if debugging { if debugging {
dbg!(&local_span); dbg!(&local_span);
} }
@ -1066,7 +1064,7 @@ impl<'a> miette::SourceCode for &StateWorkingSet<'a> {
} }
let data = span_contents.data(); let data = span_contents.data();
if **filename == "<cli>" { if &**filename == "<cli>" {
if debugging { if debugging {
let success_cli = "Successfully read CLI span"; let success_cli = "Successfully read CLI span";
dbg!(success_cli, String::from_utf8_lossy(data)); dbg!(success_cli, String::from_utf8_lossy(data));
@ -1084,7 +1082,7 @@ impl<'a> miette::SourceCode for &StateWorkingSet<'a> {
dbg!(success_file); dbg!(success_file);
} }
return Ok(Box::new(miette::MietteSpanContents::new_named( return Ok(Box::new(miette::MietteSpanContents::new_named(
(**filename).clone(), (**filename).to_owned(),
data, data,
retranslated, retranslated,
span_contents.line(), span_contents.line(),

View File

@ -160,14 +160,14 @@ pub fn goto_def(engine_state: &mut EngineState, file_path: &str, location: &Valu
let block = working_set.get_block(block_id); let block = working_set.get_block(block_id);
if let Some(span) = &block.span { if let Some(span) = &block.span {
for file in working_set.files() { for file in working_set.files() {
if span.start >= file.1 && span.start < file.2 { if file.covered_span.contains(span.start) {
println!( println!(
"{}", "{}",
json!( json!(
{ {
"file": &**file.0, "file": &*file.name,
"start": span.start - file.1, "start": span.start - file.covered_span.start,
"end": span.end - file.1 "end": span.end - file.covered_span.start,
} }
) )
); );
@ -180,14 +180,14 @@ pub fn goto_def(engine_state: &mut EngineState, file_path: &str, location: &Valu
Some((Id::Variable(var_id), ..)) => { Some((Id::Variable(var_id), ..)) => {
let var = working_set.get_variable(var_id); let var = working_set.get_variable(var_id);
for file in working_set.files() { for file in working_set.files() {
if var.declaration_span.start >= file.1 && var.declaration_span.start < file.2 { if file.covered_span.contains(var.declaration_span.start) {
println!( println!(
"{}", "{}",
json!( json!(
{ {
"file": &**file.0, "file": &*file.name,
"start": var.declaration_span.start - file.1, "start": var.declaration_span.start - file.covered_span.start,
"end": var.declaration_span.end - file.1 "end": var.declaration_span.end - file.covered_span.start,
} }
) )
); );