replace regex crate with fancy_regex (#14646)

# Description

We removed the regex crate long ago but there were a few instances where
we could not remove it because fancy-regex did not have a split/splitn,
and maybe other functions. Those functions now exist in the latest
fancy-regex crate so we can now remove it.
 
# User-Facing Changes
<!-- List of all changes that impact the user experience here. This
helps us keep track of breaking changes. -->

# Tests + Formatting
<!--
Don't forget to add tests that cover your changes.

Make sure you've run and fixed any issues with these commands:

- `cargo fmt --all -- --check` to check standard code formatting (`cargo
fmt --all` applies these changes)
- `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used` to
check that you're using the standard code style
- `cargo test --workspace` to check that all tests pass (on Windows make
sure to [enable developer
mode](https://learn.microsoft.com/en-us/windows/apps/get-started/developer-mode-features-and-debugging))
- `cargo run -- -c "use toolkit.nu; toolkit test stdlib"` to run the
tests for the standard library

> **Note**
> from `nushell` you can also use the `toolkit` as follows
> ```bash
> use toolkit.nu # or use an `env_change` hook to activate it
automatically
> toolkit check pr
> ```
-->

# After Submitting
<!-- If your PR had any user-facing changes, update [the
documentation](https://github.com/nushell/nushell.github.io) after the
PR is merged, if necessary. This will help us keep the docs up to date.
-->
This commit is contained in:
Darren Schroeder
2025-01-01 17:37:50 -06:00
committed by GitHub
parent c6523eb8d9
commit f69b22f00b
9 changed files with 181 additions and 124 deletions

View File

@ -1,7 +1,7 @@
use crate::parse_date_from_string;
use fancy_regex::{Regex, RegexBuilder};
use nu_engine::command_prelude::*;
use nu_protocol::PipelineIterator;
use regex::{Regex, RegexBuilder};
use std::collections::HashSet;
use std::sync::LazyLock;
@ -143,7 +143,7 @@ fn process_cell(val: Value, display_as_filesizes: bool, span: Span) -> Result<Va
let val_str = val.coerce_str().unwrap_or_default();
// step 2: bounce string up against regexes
if BOOLEAN_RE.is_match(&val_str) {
if BOOLEAN_RE.is_match(&val_str).unwrap_or(false) {
let bval = val_str
.parse::<bool>()
.map_err(|_| ShellError::CantConvert {
@ -156,12 +156,12 @@ fn process_cell(val: Value, display_as_filesizes: bool, span: Span) -> Result<Va
})?;
Ok(Value::bool(bval, span))
} else if FLOAT_RE.is_match(&val_str) {
} else if FLOAT_RE.is_match(&val_str).unwrap_or(false) {
let fval = val_str
.parse::<f64>()
.map_err(|_| ShellError::CantConvert {
to_type: "string".to_string(),
from_type: "float".to_string(),
to_type: "float".to_string(),
from_type: "string".to_string(),
span,
help: Some(format!(
r#""{val_str}" does not represent a valid floating point value"#
@ -169,12 +169,12 @@ fn process_cell(val: Value, display_as_filesizes: bool, span: Span) -> Result<Va
})?;
Ok(Value::float(fval, span))
} else if INTEGER_RE.is_match(&val_str) {
} else if INTEGER_RE.is_match(&val_str).unwrap_or(false) {
let ival = val_str
.parse::<i64>()
.map_err(|_| ShellError::CantConvert {
to_type: "string".to_string(),
from_type: "int".to_string(),
to_type: "int".to_string(),
from_type: "string".to_string(),
span,
help: Some(format!(
r#""{val_str}" does not represent a valid integer value"#
@ -186,15 +186,15 @@ fn process_cell(val: Value, display_as_filesizes: bool, span: Span) -> Result<Va
} else {
Ok(Value::int(ival, span))
}
} else if INTEGER_WITH_DELIMS_RE.is_match(&val_str) {
} else if INTEGER_WITH_DELIMS_RE.is_match(&val_str).unwrap_or(false) {
let mut val_str = val_str.into_owned();
val_str.retain(|x| !['_', ','].contains(&x));
let ival = val_str
.parse::<i64>()
.map_err(|_| ShellError::CantConvert {
to_type: "string".to_string(),
from_type: "int".to_string(),
to_type: "int".to_string(),
from_type: "string".to_string(),
span,
help: Some(format!(
r#""{val_str}" does not represent a valid integer value"#
@ -206,7 +206,7 @@ fn process_cell(val: Value, display_as_filesizes: bool, span: Span) -> Result<Va
} else {
Ok(Value::int(ival, span))
}
} else if DATETIME_DMY_RE.is_match(&val_str) {
} else if DATETIME_DMY_RE.is_match(&val_str).unwrap_or(false) {
let dt = parse_date_from_string(&val_str, span).map_err(|_| ShellError::CantConvert {
to_type: "date".to_string(),
from_type: "string".to_string(),
@ -217,7 +217,7 @@ fn process_cell(val: Value, display_as_filesizes: bool, span: Span) -> Result<Va
})?;
Ok(Value::date(dt, span))
} else if DATETIME_YMD_RE.is_match(&val_str) {
} else if DATETIME_YMD_RE.is_match(&val_str).unwrap_or(false) {
let dt = parse_date_from_string(&val_str, span).map_err(|_| ShellError::CantConvert {
to_type: "date".to_string(),
from_type: "string".to_string(),
@ -228,7 +228,7 @@ fn process_cell(val: Value, display_as_filesizes: bool, span: Span) -> Result<Va
})?;
Ok(Value::date(dt, span))
} else if DATETIME_YMDZ_RE.is_match(&val_str) {
} else if DATETIME_YMDZ_RE.is_match(&val_str).unwrap_or(false) {
let dt = parse_date_from_string(&val_str, span).map_err(|_| ShellError::CantConvert {
to_type: "date".to_string(),
from_type: "string".to_string(),
@ -372,118 +372,154 @@ mod test {
#[test]
fn test_float_parse() {
// The regex should work on all these but nushell's float parser is more strict
assert!(FLOAT_RE.is_match("0.1"));
assert!(FLOAT_RE.is_match("3.0"));
assert!(FLOAT_RE.is_match("3.00001"));
assert!(FLOAT_RE.is_match("-9.9990e-003"));
assert!(FLOAT_RE.is_match("9.9990e+003"));
assert!(FLOAT_RE.is_match("9.9990E+003"));
assert!(FLOAT_RE.is_match("9.9990E+003"));
assert!(FLOAT_RE.is_match(".5"));
assert!(FLOAT_RE.is_match("2.5E-10"));
assert!(FLOAT_RE.is_match("2.5e10"));
assert!(FLOAT_RE.is_match("NaN"));
assert!(FLOAT_RE.is_match("-NaN"));
assert!(FLOAT_RE.is_match("-inf"));
assert!(FLOAT_RE.is_match("inf"));
assert!(FLOAT_RE.is_match("-7e-05"));
assert!(FLOAT_RE.is_match("7e-05"));
assert!(FLOAT_RE.is_match("+7e+05"));
assert!(FLOAT_RE.is_match("0.1").unwrap());
assert!(FLOAT_RE.is_match("3.0").unwrap());
assert!(FLOAT_RE.is_match("3.00001").unwrap());
assert!(FLOAT_RE.is_match("-9.9990e-003").unwrap());
assert!(FLOAT_RE.is_match("9.9990e+003").unwrap());
assert!(FLOAT_RE.is_match("9.9990E+003").unwrap());
assert!(FLOAT_RE.is_match("9.9990E+003").unwrap());
assert!(FLOAT_RE.is_match(".5").unwrap());
assert!(FLOAT_RE.is_match("2.5E-10").unwrap());
assert!(FLOAT_RE.is_match("2.5e10").unwrap());
assert!(FLOAT_RE.is_match("NaN").unwrap());
assert!(FLOAT_RE.is_match("-NaN").unwrap());
assert!(FLOAT_RE.is_match("-inf").unwrap());
assert!(FLOAT_RE.is_match("inf").unwrap());
assert!(FLOAT_RE.is_match("-7e-05").unwrap());
assert!(FLOAT_RE.is_match("7e-05").unwrap());
assert!(FLOAT_RE.is_match("+7e+05").unwrap());
}
#[test]
fn test_int_parse() {
assert!(INTEGER_RE.is_match("0"));
assert!(INTEGER_RE.is_match("1"));
assert!(INTEGER_RE.is_match("10"));
assert!(INTEGER_RE.is_match("100"));
assert!(INTEGER_RE.is_match("1000"));
assert!(INTEGER_RE.is_match("10000"));
assert!(INTEGER_RE.is_match("100000"));
assert!(INTEGER_RE.is_match("1000000"));
assert!(INTEGER_RE.is_match("10000000"));
assert!(INTEGER_RE.is_match("100000000"));
assert!(INTEGER_RE.is_match("1000000000"));
assert!(INTEGER_RE.is_match("10000000000"));
assert!(INTEGER_RE.is_match("100000000000"));
assert!(INTEGER_WITH_DELIMS_RE.is_match("1_000"));
assert!(INTEGER_WITH_DELIMS_RE.is_match("10_000"));
assert!(INTEGER_WITH_DELIMS_RE.is_match("100_000"));
assert!(INTEGER_WITH_DELIMS_RE.is_match("1_000_000"));
assert!(INTEGER_WITH_DELIMS_RE.is_match("10_000_000"));
assert!(INTEGER_WITH_DELIMS_RE.is_match("100_000_000"));
assert!(INTEGER_WITH_DELIMS_RE.is_match("1_000_000_000"));
assert!(INTEGER_WITH_DELIMS_RE.is_match("10_000_000_000"));
assert!(INTEGER_WITH_DELIMS_RE.is_match("100_000_000_000"));
assert!(INTEGER_WITH_DELIMS_RE.is_match("1,000"));
assert!(INTEGER_WITH_DELIMS_RE.is_match("10,000"));
assert!(INTEGER_WITH_DELIMS_RE.is_match("100,000"));
assert!(INTEGER_WITH_DELIMS_RE.is_match("1,000,000"));
assert!(INTEGER_WITH_DELIMS_RE.is_match("10,000,000"));
assert!(INTEGER_WITH_DELIMS_RE.is_match("100,000,000"));
assert!(INTEGER_WITH_DELIMS_RE.is_match("1,000,000,000"));
assert!(INTEGER_WITH_DELIMS_RE.is_match("10,000,000,000"));
assert!(INTEGER_RE.is_match("0").unwrap());
assert!(INTEGER_RE.is_match("1").unwrap());
assert!(INTEGER_RE.is_match("10").unwrap());
assert!(INTEGER_RE.is_match("100").unwrap());
assert!(INTEGER_RE.is_match("1000").unwrap());
assert!(INTEGER_RE.is_match("10000").unwrap());
assert!(INTEGER_RE.is_match("100000").unwrap());
assert!(INTEGER_RE.is_match("1000000").unwrap());
assert!(INTEGER_RE.is_match("10000000").unwrap());
assert!(INTEGER_RE.is_match("100000000").unwrap());
assert!(INTEGER_RE.is_match("1000000000").unwrap());
assert!(INTEGER_RE.is_match("10000000000").unwrap());
assert!(INTEGER_RE.is_match("100000000000").unwrap());
assert!(INTEGER_WITH_DELIMS_RE.is_match("1_000").unwrap());
assert!(INTEGER_WITH_DELIMS_RE.is_match("10_000").unwrap());
assert!(INTEGER_WITH_DELIMS_RE.is_match("100_000").unwrap());
assert!(INTEGER_WITH_DELIMS_RE.is_match("1_000_000").unwrap());
assert!(INTEGER_WITH_DELIMS_RE.is_match("10_000_000").unwrap());
assert!(INTEGER_WITH_DELIMS_RE.is_match("100_000_000").unwrap());
assert!(INTEGER_WITH_DELIMS_RE.is_match("1_000_000_000").unwrap());
assert!(INTEGER_WITH_DELIMS_RE.is_match("10_000_000_000").unwrap());
assert!(INTEGER_WITH_DELIMS_RE.is_match("100_000_000_000").unwrap());
assert!(INTEGER_WITH_DELIMS_RE.is_match("1,000").unwrap());
assert!(INTEGER_WITH_DELIMS_RE.is_match("10,000").unwrap());
assert!(INTEGER_WITH_DELIMS_RE.is_match("100,000").unwrap());
assert!(INTEGER_WITH_DELIMS_RE.is_match("1,000,000").unwrap());
assert!(INTEGER_WITH_DELIMS_RE.is_match("10,000,000").unwrap());
assert!(INTEGER_WITH_DELIMS_RE.is_match("100,000,000").unwrap());
assert!(INTEGER_WITH_DELIMS_RE.is_match("1,000,000,000").unwrap());
assert!(INTEGER_WITH_DELIMS_RE.is_match("10,000,000,000").unwrap());
}
#[test]
fn test_bool_parse() {
assert!(BOOLEAN_RE.is_match("true"));
assert!(BOOLEAN_RE.is_match("false"));
assert!(!BOOLEAN_RE.is_match("1"));
assert!(!BOOLEAN_RE.is_match("0"));
assert!(BOOLEAN_RE.is_match("true").unwrap());
assert!(BOOLEAN_RE.is_match("false").unwrap());
assert!(!BOOLEAN_RE.is_match("1").unwrap());
assert!(!BOOLEAN_RE.is_match("0").unwrap());
}
#[test]
fn test_datetime_ymdz_pattern() {
assert!(DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00Z"));
assert!(DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00.123456789Z"));
assert!(DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00+01:00"));
assert!(DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00.123456789+01:00"));
assert!(DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00-01:00"));
assert!(DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00.123456789-01:00"));
assert!(DATETIME_YMDZ_RE.is_match("'2022-01-01T00:00:00Z'"));
assert!(DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00Z").unwrap());
assert!(DATETIME_YMDZ_RE
.is_match("2022-01-01T00:00:00.123456789Z")
.unwrap());
assert!(DATETIME_YMDZ_RE
.is_match("2022-01-01T00:00:00+01:00")
.unwrap());
assert!(DATETIME_YMDZ_RE
.is_match("2022-01-01T00:00:00.123456789+01:00")
.unwrap());
assert!(DATETIME_YMDZ_RE
.is_match("2022-01-01T00:00:00-01:00")
.unwrap());
assert!(DATETIME_YMDZ_RE
.is_match("2022-01-01T00:00:00.123456789-01:00")
.unwrap());
assert!(DATETIME_YMDZ_RE.is_match("'2022-01-01T00:00:00Z'").unwrap());
assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00"));
assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00."));
assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00.123456789"));
assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00+01"));
assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00+01:0"));
assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00+1:00"));
assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00.123456789+01"));
assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00.123456789+01:0"));
assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00.123456789+1:00"));
assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00-01"));
assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00-01:0"));
assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00-1:00"));
assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00.123456789-01"));
assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00.123456789-01:0"));
assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00.123456789-1:00"));
assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00").unwrap());
assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00.").unwrap());
assert!(!DATETIME_YMDZ_RE
.is_match("2022-01-01T00:00:00.123456789")
.unwrap());
assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00+01").unwrap());
assert!(!DATETIME_YMDZ_RE
.is_match("2022-01-01T00:00:00+01:0")
.unwrap());
assert!(!DATETIME_YMDZ_RE
.is_match("2022-01-01T00:00:00+1:00")
.unwrap());
assert!(!DATETIME_YMDZ_RE
.is_match("2022-01-01T00:00:00.123456789+01")
.unwrap());
assert!(!DATETIME_YMDZ_RE
.is_match("2022-01-01T00:00:00.123456789+01:0")
.unwrap());
assert!(!DATETIME_YMDZ_RE
.is_match("2022-01-01T00:00:00.123456789+1:00")
.unwrap());
assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00-01").unwrap());
assert!(!DATETIME_YMDZ_RE
.is_match("2022-01-01T00:00:00-01:0")
.unwrap());
assert!(!DATETIME_YMDZ_RE
.is_match("2022-01-01T00:00:00-1:00")
.unwrap());
assert!(!DATETIME_YMDZ_RE
.is_match("2022-01-01T00:00:00.123456789-01")
.unwrap());
assert!(!DATETIME_YMDZ_RE
.is_match("2022-01-01T00:00:00.123456789-01:0")
.unwrap());
assert!(!DATETIME_YMDZ_RE
.is_match("2022-01-01T00:00:00.123456789-1:00")
.unwrap());
}
#[test]
fn test_datetime_ymd_pattern() {
assert!(DATETIME_YMD_RE.is_match("2022-01-01"));
assert!(DATETIME_YMD_RE.is_match("2022/01/01"));
assert!(DATETIME_YMD_RE.is_match("2022-01-01T00:00:00"));
assert!(DATETIME_YMD_RE.is_match("2022-01-01T00:00:00.000000000"));
assert!(DATETIME_YMD_RE.is_match("'2022-01-01'"));
assert!(DATETIME_YMD_RE.is_match("2022-01-01").unwrap());
assert!(DATETIME_YMD_RE.is_match("2022/01/01").unwrap());
assert!(DATETIME_YMD_RE.is_match("2022-01-01T00:00:00").unwrap());
assert!(DATETIME_YMD_RE
.is_match("2022-01-01T00:00:00.000000000")
.unwrap());
assert!(DATETIME_YMD_RE.is_match("'2022-01-01'").unwrap());
// The regex isn't this specific, but it would be nice if it were
// assert!(!DATETIME_YMD_RE.is_match("2022-13-01"));
// assert!(!DATETIME_YMD_RE.is_match("2022-01-32"));
// assert!(!DATETIME_YMD_RE.is_match("2022-01-01T24:00:00"));
// assert!(!DATETIME_YMD_RE.is_match("2022-01-01T00:60:00"));
// assert!(!DATETIME_YMD_RE.is_match("2022-01-01T00:00:60"));
assert!(!DATETIME_YMD_RE.is_match("2022-01-01T00:00:00.0000000000"));
// assert!(!DATETIME_YMD_RE.is_match("2022-13-01").unwrap());
// assert!(!DATETIME_YMD_RE.is_match("2022-01-32").unwrap());
// assert!(!DATETIME_YMD_RE.is_match("2022-01-01T24:00:00").unwrap());
// assert!(!DATETIME_YMD_RE.is_match("2022-01-01T00:60:00").unwrap());
// assert!(!DATETIME_YMD_RE.is_match("2022-01-01T00:00:60").unwrap());
assert!(!DATETIME_YMD_RE
.is_match("2022-01-01T00:00:00.0000000000")
.unwrap());
}
#[test]
fn test_datetime_dmy_pattern() {
assert!(DATETIME_DMY_RE.is_match("31-12-2021"));
assert!(DATETIME_DMY_RE.is_match("01/01/2022"));
assert!(DATETIME_DMY_RE.is_match("15-06-2023 12:30"));
assert!(!DATETIME_DMY_RE.is_match("2022-13-01"));
assert!(!DATETIME_DMY_RE.is_match("2022-01-32"));
assert!(!DATETIME_DMY_RE.is_match("2022-01-01 24:00"));
assert!(DATETIME_DMY_RE.is_match("31-12-2021").unwrap());
assert!(DATETIME_DMY_RE.is_match("01/01/2022").unwrap());
assert!(DATETIME_DMY_RE.is_match("15-06-2023 12:30").unwrap());
assert!(!DATETIME_DMY_RE.is_match("2022-13-01").unwrap());
assert!(!DATETIME_DMY_RE.is_match("2022-01-32").unwrap());
assert!(!DATETIME_DMY_RE.is_match("2022-01-01 24:00").unwrap());
}
}