Add backtrack named flag to parse (issue #15997) (#16000)

<!--
if this PR closes one or more issues, you can automatically link the PR
with
them by using one of the [*linking
keywords*](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue#linking-a-pull-request-to-an-issue-using-a-keyword),
e.g.
- this PR should close #xxxx
- fixes #xxxx

you can also mention related issues, PRs or discussions!
-->

# Description
<!--
Thank you for improving Nushell. Please, check our [contributing
guide](../CONTRIBUTING.md) and talk to the core team before making major
changes.

Description of your pull request goes here. **Provide examples and/or
screenshots** if your changes affect the user experience.
-->

Addresses #15997

Adds a `--backtrack` or `-b` named flag to the `parse` command. Allows a
user to specify a max backtrack limit for fancy-regex other than the
default 1,000,000 limit.

Uses a RegexBuilder to add the manual config.

# User-Facing Changes
<!-- List of all changes that impact the user experience here. This
helps us keep track of breaking changes. -->

Adds a new named flag `backtrack` to the `parse` command. The flag is
optional and defaults to 1,000,000.

# Tests + Formatting
<!--
Don't forget to add tests that cover your changes.

Make sure you've run and fixed any issues with these commands:

- `cargo fmt --all -- --check` to check standard code formatting (`cargo
fmt --all` applies these changes)
- `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used` to
check that you're using the standard code style
- `cargo test --workspace` to check that all tests pass (on Windows make
sure to [enable developer
mode](https://learn.microsoft.com/en-us/windows/apps/get-started/developer-mode-features-and-debugging))
- `cargo run -- -c "use toolkit.nu; toolkit test stdlib"` to run the
tests for the standard library

> **Note**
> from `nushell` you can also use the `toolkit` as follows
> ```bash
> use toolkit.nu # or use an `env_change` hook to activate it
automatically
> toolkit check pr
> ```
-->

Added an example test to the parse command using `--backtrack 1500000`.
This commit is contained in:
nuggetcrab 2025-06-19 06:42:30 -05:00 committed by GitHub
parent 8a0f2ca9f9
commit 2bed202b82
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1,4 +1,4 @@
use fancy_regex::{Captures, Regex};
use fancy_regex::{Captures, Regex, RegexBuilder};
use nu_engine::command_prelude::*;
use nu_protocol::{ListStream, Signals, engine::StateWorkingSet};
use std::collections::VecDeque;
@ -31,6 +31,12 @@ impl Command for Parse {
(Type::List(Box::new(Type::Any)), Type::table()),
])
.switch("regex", "use full regex syntax for patterns", Some('r'))
.named(
"backtrack",
SyntaxShape::Int,
"set the max backtrack limit for regex",
Some('b'),
)
.allow_variants_without_examples(true)
.category(Category::Strings)
}
@ -96,6 +102,14 @@ impl Command for Parse {
"capture0" => Value::test_string("b"),
})])),
},
Example {
description: "Parse a string with a manually set fancy-regex backtrack limit",
example: "\"hi there\" | parse --backtrack 1500000 \"{foo} {bar}\"",
result: Some(Value::test_list(vec![Value::test_record(record! {
"foo" => Value::test_string("hi"),
"bar" => Value::test_string("there"),
})])),
},
]
}
@ -112,7 +126,10 @@ impl Command for Parse {
) -> Result<PipelineData, ShellError> {
let pattern: Spanned<String> = call.req(engine_state, stack, 0)?;
let regex: bool = call.has_flag(engine_state, stack, "regex")?;
operate(engine_state, pattern, regex, call, input)
let backtrack_limit: usize = call
.get_flag(engine_state, stack, "backtrack")?
.unwrap_or(1_000_000); // 1_000_000 is fancy_regex default
operate(engine_state, pattern, regex, backtrack_limit, call, input)
}
fn run_const(
@ -123,7 +140,17 @@ impl Command for Parse {
) -> Result<PipelineData, ShellError> {
let pattern: Spanned<String> = call.req_const(working_set, 0)?;
let regex: bool = call.has_flag_const(working_set, "regex")?;
operate(working_set.permanent(), pattern, regex, call, input)
let backtrack_limit: usize = call
.get_flag_const(working_set, "backtrack")?
.unwrap_or(1_000_000);
operate(
working_set.permanent(),
pattern,
regex,
backtrack_limit,
call,
input,
)
}
}
@ -131,6 +158,7 @@ fn operate(
engine_state: &EngineState,
pattern: Spanned<String>,
regex: bool,
backtrack_limit: usize,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
@ -145,13 +173,16 @@ fn operate(
build_regex(&pattern_item, pattern_span)?
};
let regex = Regex::new(&item_to_parse).map_err(|e| ShellError::GenericError {
error: "Error with regular expression".into(),
msg: e.to_string(),
span: Some(pattern_span),
help: None,
inner: vec![],
})?;
let regex = RegexBuilder::new(&item_to_parse)
.backtrack_limit(backtrack_limit)
.build()
.map_err(|e| ShellError::GenericError {
error: "Error with regular expression".into(),
msg: e.to_string(),
span: Some(pattern_span),
help: None,
inner: vec![],
})?;
let columns = regex
.capture_names()