allow raw string to be used inside subexpression, list, and closure (#12776)

# Description
Fixes: #12744

This pr is moving raw string lex logic into `lex_item` function, so we
can use raw string inside subexpression, list, closure.
```nushell
> [r#'abc'#]
╭───┬─────╮
│ 0 │ abc │
╰───┴─────╯
> (r#'abc'#)
abc
> do {r#'aa'#}
aa
```

# Tests + Formatting
Done

# After Submitting
NaN
This commit is contained in:
Wind 2024-05-07 04:53:58 +08:00 committed by GitHub
parent f9d4fa2c40
commit 97fc190cc5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 138 additions and 73 deletions

View File

@ -225,6 +225,19 @@ pub fn lex_item(
)),
);
}
} else if c == b'r' && input.get(*curr_offset + 1) == Some(b'#').as_ref() {
// already checked `r#` pattern, so it's a raw string.
let lex_result = lex_raw_string(input, curr_offset, span_offset);
let span = Span::new(span_offset + token_start, span_offset + *curr_offset);
if let Err(e) = lex_result {
return (
Token {
contents: TokenContents::Item,
span,
},
Some(e),
);
}
} else if is_item_terminator(&block_level, c, additional_whitespace, special_tokens) {
break;
}
@ -331,6 +344,65 @@ pub fn lex_item(
(output, err)
}
fn lex_raw_string(
input: &[u8],
curr_offset: &mut usize,
span_offset: usize,
) -> Result<(), ParseError> {
// A raw string literal looks like `echo r#'Look, I can use 'single quotes'!'#`
// If the next character is `#` we're probably looking at a raw string literal
// so we need to read all the text until we find a closing `#`. This raw string
// can contain any character, including newlines and double quotes without needing
// to escape them.
//
// A raw string can contain many `#` as prefix,
// incase if there is a `'#` or `#'` in the string itself.
// E.g: r##'I can use '#' in a raw string'##
let mut prefix_sharp_cnt = 0;
let start = *curr_offset;
while let Some(b'#') = input.get(start + prefix_sharp_cnt + 1) {
prefix_sharp_cnt += 1;
}
// curr_offset is the character `r`, we need to move forward and skip all `#`
// characters.
//
// e.g: r###'<body>
// ^
// ^
// curr_offset
*curr_offset += prefix_sharp_cnt + 1;
// the next one should be a single quote.
if input.get(*curr_offset) != Some(&b'\'') {
return Err(ParseError::Expected(
"'",
Span::new(span_offset + *curr_offset, span_offset + *curr_offset + 1),
));
}
*curr_offset += 1;
let mut matches = false;
while let Some(ch) = input.get(*curr_offset) {
// check for postfix '###
if *ch == b'#' {
let start_ch = input[*curr_offset - prefix_sharp_cnt];
let postfix = &input[*curr_offset - prefix_sharp_cnt + 1..=*curr_offset];
if start_ch == b'\'' && postfix.iter().all(|x| *x == b'#') {
matches = true;
break;
}
}
*curr_offset += 1
}
if !matches {
return Err(ParseError::UnexpectedEof(
"#".to_string(),
Span::new(span_offset + *curr_offset, span_offset + *curr_offset),
));
}
Ok(())
}
pub fn lex_signature(
input: &[u8],
span_offset: usize,
@ -503,79 +575,6 @@ fn lex_internal(
} else if c == b' ' || c == b'\t' || additional_whitespace.contains(&c) {
// If the next character is non-newline whitespace, skip it.
curr_offset += 1;
} else if c == b'r' {
// A raw string literal looks like `echo r#'Look, I can use 'single quotes'!'#`
// If the next character is `#` we're probably looking at a raw string literal
// so we need to read all the text until we find a closing `#`. This raw string
// can contain any character, including newlines and double quotes without needing
// to escape them.
//
// A raw string can contain many `#` as prefix,
// incase if there is a `'#` or `#'` in the string itself.
// E.g: r##'I can use '#' in a raw string'##
let mut prefix_sharp_cnt = 0;
let start = curr_offset;
while let Some(b'#') = input.get(start + prefix_sharp_cnt + 1) {
prefix_sharp_cnt += 1;
}
if prefix_sharp_cnt != 0 {
// curr_offset is the character `r`, we need to move forward and skip all `#`
// characters.
//
// e.g: r###'<body>
// ^
// ^
// curr_offset
curr_offset += prefix_sharp_cnt + 1;
// the next one should be a single quote.
if input.get(curr_offset) != Some(&b'\'') {
error = Some(ParseError::Expected(
"'",
Span::new(span_offset + curr_offset, span_offset + curr_offset + 1),
));
}
curr_offset += 1;
let mut matches = false;
while let Some(ch) = input.get(curr_offset) {
// check for postfix '###
if *ch == b'#' {
let start_ch = input[curr_offset - prefix_sharp_cnt];
let postfix = &input[curr_offset - prefix_sharp_cnt + 1..=curr_offset];
if start_ch == b'\'' && postfix.iter().all(|x| *x == b'#') {
matches = true;
curr_offset += 1;
break;
}
}
curr_offset += 1
}
if matches {
output.push(Token::new(
TokenContents::Item,
Span::new(span_offset + start, span_offset + curr_offset),
));
} else if error.is_none() {
error = Some(ParseError::UnexpectedEof(
"#".to_string(),
Span::new(span_offset + curr_offset, span_offset + curr_offset),
))
}
} else {
let (token, err) = lex_item(
input,
&mut curr_offset,
span_offset,
additional_whitespace,
special_tokens,
in_signature,
);
if error.is_none() {
error = err;
}
output.push(token);
}
} else {
let token = try_lex_special_piped_item(input, &mut curr_offset, span_offset);
if let Some(token) = token {

View File

@ -87,6 +87,72 @@ fn raw_string() -> TestResult {
)
}
#[test]
fn raw_string_inside_parentheses() -> TestResult {
let (left, right) = ('(', ')');
run_test(
&format!(r#"{left}r#'abcde""fghi"''''jkl'#{right}"#),
r#"abcde""fghi"''''jkl"#,
)?;
run_test(
&format!(r#"{left}r##'abcde""fghi"''''#jkl'##{right}"#),
r#"abcde""fghi"''''#jkl"#,
)?;
run_test(
&format!(r#"{left}r###'abcde""fghi"'''##'#jkl'###{right}"#),
r#"abcde""fghi"'''##'#jkl"#,
)?;
run_test(&format!("{left}r#''#{right}"), "")?;
run_test(
&format!(r#"{left}r#'a string with sharp inside # and ends with #'#{right}"#),
"a string with sharp inside # and ends with #",
)
}
#[test]
fn raw_string_inside_list() -> TestResult {
let (left, right) = ('[', ']');
run_test(
&format!(r#"{left}r#'abcde""fghi"''''jkl'#{right} | get 0"#),
r#"abcde""fghi"''''jkl"#,
)?;
run_test(
&format!(r#"{left}r##'abcde""fghi"''''#jkl'##{right} | get 0"#),
r#"abcde""fghi"''''#jkl"#,
)?;
run_test(
&format!(r#"{left}r###'abcde""fghi"'''##'#jkl'###{right} | get 0"#),
r#"abcde""fghi"'''##'#jkl"#,
)?;
run_test(&format!("{left}r#''#{right} | get 0"), "")?;
run_test(
&format!(r#"{left}r#'a string with sharp inside # and ends with #'#{right} | get 0"#),
"a string with sharp inside # and ends with #",
)
}
#[test]
fn raw_string_inside_closure() -> TestResult {
let (left, right) = ('{', '}');
run_test(
&format!(r#"do {left}r#'abcde""fghi"''''jkl'#{right}"#),
r#"abcde""fghi"''''jkl"#,
)?;
run_test(
&format!(r#"do {left}r##'abcde""fghi"''''#jkl'##{right}"#),
r#"abcde""fghi"''''#jkl"#,
)?;
run_test(
&format!(r#"do {left}r###'abcde""fghi"'''##'#jkl'###{right}"#),
r#"abcde""fghi"'''##'#jkl"#,
)?;
run_test(&format!("do {left}r#''#{right}"), "")?;
run_test(
&format!(r#"do {left}r#'a string with sharp inside # and ends with #'#{right}"#),
"a string with sharp inside # and ends with #",
)
}
#[test]
fn incomplete_raw_string() -> TestResult {
fail_test("r#abc", "expected '")