From 8d8b44342b300a59f0d0941412dd367683a4bef1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20K=C3=A4llberg?= Date: Wed, 20 Sep 2023 17:53:48 +0200 Subject: [PATCH] Fix exponential parser time on sequence of [[[[ (#10439) # Description Before this change, parsing `[[[[[[[[[[[[[[[[[[[[[[` would cause nushell to consume several gigabytes of memory, now it should be linear in time. The old code first tried parsing the head of the table as a list and then after that it checked if it got more arguments. If it didn't, it throws away the previous result and tries to parse the whole thing as a list, which means we call `parse_list_expression` twice for each call to `parse_table_expression`, resulting in the exponential growth The fix is to simply check that we have all the arguments we need before parsing the head of the table, so we know that we will either call parse_list_expression only on sub-expressions or on the whole thing, never both. Fixes #10438 # User-Facing Changes Should give a noticable speedup when typing a sequence of `[[[[[[` open brackets # Tests + Formatting I would like to add tests, but I'm not sure how to do that without crashing CI with OOM on regression - [x] Don't forget to add tests that cover your changes. - [x] `cargo fmt --all -- --check` to check standard code formatting (`cargo fmt --all` applies these changes) - [x] `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used` to check that you're using the standard code style - [x] `cargo test --workspace` to check that all tests pass (on Windows make sure to [enable developer mode](https://learn.microsoft.com/en-us/windows/apps/get-started/developer-mode-features-and-debugging)) - [x] `cargo run -- -c "use std testing; testing run-tests --path crates/nu-std"` to run the tests for the standard library # After Submitting If your PR had any user-facing changes, update [the documentation](https://github.com/nushell/nushell.github.io) after the PR is merged, if necessary. This will help us keep the docs up to date. --- crates/nu-parser/src/parser.rs | 25 +++++++------------------ src/tests/test_parser.rs | 7 +++++++ 2 files changed, 14 insertions(+), 18 deletions(-) diff --git a/crates/nu-parser/src/parser.rs b/crates/nu-parser/src/parser.rs index b56c60022..89598a209 100644 --- a/crates/nu-parser/src/parser.rs +++ b/crates/nu-parser/src/parser.rs @@ -4004,29 +4004,18 @@ fn parse_table_expression(working_set: &mut StateWorkingSet, span: Span) -> Expr working_set.error(err); } - let head = if let Some(first) = tokens.first() { - if working_set.get_span_contents(first.span).starts_with(b"[") { - parse_list_expression(working_set, first.span, &SyntaxShape::Any) - } else { - return parse_list_expression(working_set, span, &SyntaxShape::Any); - } - } else { + // Check that we have all arguments first, before trying to parse the first + // in order to avoid exponential parsing time + let [first, second, rest @ ..] = &tokens[..] else { return parse_list_expression(working_set, span, &SyntaxShape::Any); }; - - if tokens - .get(1) - .filter(|second| second.contents == TokenContents::Semicolon) - .is_none() + if !working_set.get_span_contents(first.span).starts_with(b"[") + || second.contents != TokenContents::Semicolon + || rest.is_empty() { return parse_list_expression(working_set, span, &SyntaxShape::Any); }; - - let rest = &tokens[2..]; - if rest.is_empty() { - return parse_list_expression(working_set, span, &SyntaxShape::Any); - } - + let head = parse_list_expression(working_set, first.span, &SyntaxShape::Any); let head = { let Expression { expr: Expr::List(vals), diff --git a/src/tests/test_parser.rs b/src/tests/test_parser.rs index 49289a7cf..a384a9812 100644 --- a/src/tests/test_parser.rs +++ b/src/tests/test_parser.rs @@ -455,6 +455,13 @@ fn single_value_row_condition() -> TestResult { ) } +#[test] +fn performance_nested_lists() -> TestResult { + // Parser used to be exponential on deeply nested lists + // TODO: Add a timeout + fail_test(r#"[[[[[[[[[[[[[[[[[[[[[[[[[[[["#, "Unexpected end of code") +} + #[test] fn unary_not_1() -> TestResult { run_test(r#"not false"#, "true")