Reuse the cached parse results of parsed files (#8949)

# Description

This does a lookup in the cache of parsed files to see if a span can be
found for a file that was previously loaded with the same contents, then
uses that span to find the parsed block for that file. The end result
should, in theory, be identical but doesn't require any reparsing or
creating new blocks/new definitions that aren't needed.

This drops the sg.nu benchmark from:
```
╭───┬───────────────────╮
│ 0 │ 280ms 606µs 208ns │
│ 1 │ 282ms 654µs 416ns │
│ 2 │ 252ms 640µs 541ns │
│ 3 │  250ms 940µs 41ns │
│ 4 │ 241ms 216µs 375ns │
│ 5 │ 257ms 310µs 583ns │
│ 6 │ 196ms 739µs 416ns │
╰───┴───────────────────╯
```

to:
```
╭───┬───────────────────╮
│ 0 │ 118ms 698µs 125ns │
│ 1 │       121ms 327µs │
│ 2 │ 121ms 873µs 500ns │
│ 3 │  124ms 94µs 708ns │
│ 4 │ 113ms 733µs 291ns │
│ 5 │ 108ms 663µs 125ns │
│ 6 │  63ms 482µs 625ns │
╰───┴───────────────────╯
```

I was hoping to also see some startup time improvements, but I didn't
notice much there.

# User-Facing Changes
<!-- List of all changes that impact the user experience here. This
helps us keep track of breaking changes. -->

# Tests + Formatting
<!--
Don't forget to add tests that cover your changes.

Make sure you've run and fixed any issues with these commands:

- `cargo fmt --all -- --check` to check standard code formatting (`cargo
fmt --all` applies these changes)
- `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used -A
clippy::needless_collect` to check that you're using the standard code
style
- `cargo test --workspace` to check that all tests pass
- `cargo run -- crates/nu-std/tests/run.nu` to run the tests for the
standard library

> **Note**
> from `nushell` you can also use the `toolkit` as follows
> ```bash
> use toolkit.nu # or use an `env_change` hook to activate it
automatically
> toolkit check pr
> ```
-->

# After Submitting
<!-- If your PR had any user-facing changes, update [the
documentation](https://github.com/nushell/nushell.github.io) after the
PR is merged, if necessary. This will help us keep the docs up to date.
-->
This commit is contained in:
JT 2023-04-22 07:00:33 +12:00 committed by GitHub
parent 47af701380
commit d00038eb4b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 39 additions and 11 deletions

View File

@ -1,5 +1,5 @@
use nu_command::create_default_context;
use nu_protocol::{engine::StateWorkingSet, Category};
use nu_protocol::{engine::StateWorkingSet, Category, Span};
use quickcheck_macros::quickcheck;
mod commands;
@ -15,7 +15,8 @@ fn quickcheck_parse(data: String) -> bool {
let mut working_set = StateWorkingSet::new(&context);
let _ = working_set.add_file("quickcheck".into(), data.as_bytes());
let _ = nu_parser::parse_block(&mut working_set, &tokens, false, false);
let _ =
nu_parser::parse_block(&mut working_set, &tokens, Span::new(0, 0), false, false);
}
}
true

View File

@ -2671,7 +2671,7 @@ pub fn parse_source(working_set: &mut StateWorkingSet, spans: &[Span]) -> Pipeli
// working set, if it was a successful parse.
let block = parse(
working_set,
path.file_name().and_then(|x| x.to_str()),
Some(&path.to_string_lossy()),
&contents,
scoped,
);

View File

@ -1998,7 +1998,7 @@ pub fn parse_full_cell_path(
// Creating a Type scope to parse the new block. This will keep track of
// the previous input type found in that block
let output = parse_block(working_set, &output, true, true);
let output = parse_block(working_set, &output, span, true, true);
working_set
.type_scope
.add_type(working_set.type_scope.get_last_output());
@ -4015,7 +4015,7 @@ pub fn parse_block_expression(working_set: &mut StateWorkingSet, span: Span) ->
_ => (None, 0),
};
let mut output = parse_block(working_set, &output[amt_to_skip..], false, false);
let mut output = parse_block(working_set, &output[amt_to_skip..], span, false, false);
if let Some(signature) = signature {
output.signature = signature.0;
@ -4309,7 +4309,7 @@ pub fn parse_closure_expression(
}
}
let mut output = parse_block(working_set, &output[amt_to_skip..], false, false);
let mut output = parse_block(working_set, &output[amt_to_skip..], span, false, false);
if let Some(signature) = signature {
output.signature = signature.0;
@ -5196,6 +5196,7 @@ pub fn parse_record(working_set: &mut StateWorkingSet, span: Span) -> Expression
pub fn parse_block(
working_set: &mut StateWorkingSet,
tokens: &[Token],
span: Span,
scoped: bool,
is_subexpression: bool,
) -> Block {
@ -5352,6 +5353,8 @@ pub fn parse_block(
}
working_set.type_scope.exit_scope();
block.span = Some(span);
block
}
@ -5816,12 +5819,20 @@ pub fn parse(
let file_id = working_set.add_file(name, contents);
let new_span = working_set.get_span_for_file(file_id);
let previously_parsed_block = working_set.find_block_by_span(new_span);
let mut output = {
if let Some(block) = previously_parsed_block {
return block;
} else {
let (output, err) = lex(contents, new_span.start, &[], &[], false);
if let Some(err) = err {
working_set.error(err)
}
let mut output = parse_block(working_set, &output, scoped, false);
parse_block(working_set, &output, new_span, scoped, false)
}
};
let mut seen = vec![];
let mut seen_blocks = HashMap::new();

View File

@ -1941,6 +1941,22 @@ impl<'a> StateWorkingSet<'a> {
.collect();
build_usage(&comment_lines)
}
pub fn find_block_by_span(&self, span: Span) -> Option<Block> {
for block in &self.delta.blocks {
if Some(span) == block.span {
return Some(block.clone());
}
}
for block in &self.permanent_state.blocks {
if Some(span) == block.span {
return Some(block.clone());
}
}
None
}
}
impl Default for EngineState {