mirror of
https://github.com/nushell/nushell.git
synced 2025-02-16 10:32:29 +01:00
allow raw string to be used inside subexpression, list, and closure (#12776)
# Description Fixes: #12744 This pr is moving raw string lex logic into `lex_item` function, so we can use raw string inside subexpression, list, closure. ```nushell > [r#'abc'#] ╭───┬─────╮ │ 0 │ abc │ ╰───┴─────╯ > (r#'abc'#) abc > do {r#'aa'#} aa ``` # Tests + Formatting Done # After Submitting NaN
This commit is contained in:
parent
f9d4fa2c40
commit
97fc190cc5
@ -225,6 +225,19 @@ pub fn lex_item(
|
||||
)),
|
||||
);
|
||||
}
|
||||
} else if c == b'r' && input.get(*curr_offset + 1) == Some(b'#').as_ref() {
|
||||
// already checked `r#` pattern, so it's a raw string.
|
||||
let lex_result = lex_raw_string(input, curr_offset, span_offset);
|
||||
let span = Span::new(span_offset + token_start, span_offset + *curr_offset);
|
||||
if let Err(e) = lex_result {
|
||||
return (
|
||||
Token {
|
||||
contents: TokenContents::Item,
|
||||
span,
|
||||
},
|
||||
Some(e),
|
||||
);
|
||||
}
|
||||
} else if is_item_terminator(&block_level, c, additional_whitespace, special_tokens) {
|
||||
break;
|
||||
}
|
||||
@ -331,6 +344,65 @@ pub fn lex_item(
|
||||
(output, err)
|
||||
}
|
||||
|
||||
fn lex_raw_string(
|
||||
input: &[u8],
|
||||
curr_offset: &mut usize,
|
||||
span_offset: usize,
|
||||
) -> Result<(), ParseError> {
|
||||
// A raw string literal looks like `echo r#'Look, I can use 'single quotes'!'#`
|
||||
// If the next character is `#` we're probably looking at a raw string literal
|
||||
// so we need to read all the text until we find a closing `#`. This raw string
|
||||
// can contain any character, including newlines and double quotes without needing
|
||||
// to escape them.
|
||||
//
|
||||
// A raw string can contain many `#` as prefix,
|
||||
// incase if there is a `'#` or `#'` in the string itself.
|
||||
// E.g: r##'I can use '#' in a raw string'##
|
||||
let mut prefix_sharp_cnt = 0;
|
||||
let start = *curr_offset;
|
||||
while let Some(b'#') = input.get(start + prefix_sharp_cnt + 1) {
|
||||
prefix_sharp_cnt += 1;
|
||||
}
|
||||
|
||||
// curr_offset is the character `r`, we need to move forward and skip all `#`
|
||||
// characters.
|
||||
//
|
||||
// e.g: r###'<body>
|
||||
// ^
|
||||
// ^
|
||||
// curr_offset
|
||||
*curr_offset += prefix_sharp_cnt + 1;
|
||||
// the next one should be a single quote.
|
||||
if input.get(*curr_offset) != Some(&b'\'') {
|
||||
return Err(ParseError::Expected(
|
||||
"'",
|
||||
Span::new(span_offset + *curr_offset, span_offset + *curr_offset + 1),
|
||||
));
|
||||
}
|
||||
|
||||
*curr_offset += 1;
|
||||
let mut matches = false;
|
||||
while let Some(ch) = input.get(*curr_offset) {
|
||||
// check for postfix '###
|
||||
if *ch == b'#' {
|
||||
let start_ch = input[*curr_offset - prefix_sharp_cnt];
|
||||
let postfix = &input[*curr_offset - prefix_sharp_cnt + 1..=*curr_offset];
|
||||
if start_ch == b'\'' && postfix.iter().all(|x| *x == b'#') {
|
||||
matches = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
*curr_offset += 1
|
||||
}
|
||||
if !matches {
|
||||
return Err(ParseError::UnexpectedEof(
|
||||
"#".to_string(),
|
||||
Span::new(span_offset + *curr_offset, span_offset + *curr_offset),
|
||||
));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn lex_signature(
|
||||
input: &[u8],
|
||||
span_offset: usize,
|
||||
@ -503,79 +575,6 @@ fn lex_internal(
|
||||
} else if c == b' ' || c == b'\t' || additional_whitespace.contains(&c) {
|
||||
// If the next character is non-newline whitespace, skip it.
|
||||
curr_offset += 1;
|
||||
} else if c == b'r' {
|
||||
// A raw string literal looks like `echo r#'Look, I can use 'single quotes'!'#`
|
||||
// If the next character is `#` we're probably looking at a raw string literal
|
||||
// so we need to read all the text until we find a closing `#`. This raw string
|
||||
// can contain any character, including newlines and double quotes without needing
|
||||
// to escape them.
|
||||
//
|
||||
// A raw string can contain many `#` as prefix,
|
||||
// incase if there is a `'#` or `#'` in the string itself.
|
||||
// E.g: r##'I can use '#' in a raw string'##
|
||||
let mut prefix_sharp_cnt = 0;
|
||||
let start = curr_offset;
|
||||
while let Some(b'#') = input.get(start + prefix_sharp_cnt + 1) {
|
||||
prefix_sharp_cnt += 1;
|
||||
}
|
||||
|
||||
if prefix_sharp_cnt != 0 {
|
||||
// curr_offset is the character `r`, we need to move forward and skip all `#`
|
||||
// characters.
|
||||
//
|
||||
// e.g: r###'<body>
|
||||
// ^
|
||||
// ^
|
||||
// curr_offset
|
||||
curr_offset += prefix_sharp_cnt + 1;
|
||||
// the next one should be a single quote.
|
||||
if input.get(curr_offset) != Some(&b'\'') {
|
||||
error = Some(ParseError::Expected(
|
||||
"'",
|
||||
Span::new(span_offset + curr_offset, span_offset + curr_offset + 1),
|
||||
));
|
||||
}
|
||||
|
||||
curr_offset += 1;
|
||||
let mut matches = false;
|
||||
while let Some(ch) = input.get(curr_offset) {
|
||||
// check for postfix '###
|
||||
if *ch == b'#' {
|
||||
let start_ch = input[curr_offset - prefix_sharp_cnt];
|
||||
let postfix = &input[curr_offset - prefix_sharp_cnt + 1..=curr_offset];
|
||||
if start_ch == b'\'' && postfix.iter().all(|x| *x == b'#') {
|
||||
matches = true;
|
||||
curr_offset += 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
curr_offset += 1
|
||||
}
|
||||
if matches {
|
||||
output.push(Token::new(
|
||||
TokenContents::Item,
|
||||
Span::new(span_offset + start, span_offset + curr_offset),
|
||||
));
|
||||
} else if error.is_none() {
|
||||
error = Some(ParseError::UnexpectedEof(
|
||||
"#".to_string(),
|
||||
Span::new(span_offset + curr_offset, span_offset + curr_offset),
|
||||
))
|
||||
}
|
||||
} else {
|
||||
let (token, err) = lex_item(
|
||||
input,
|
||||
&mut curr_offset,
|
||||
span_offset,
|
||||
additional_whitespace,
|
||||
special_tokens,
|
||||
in_signature,
|
||||
);
|
||||
if error.is_none() {
|
||||
error = err;
|
||||
}
|
||||
output.push(token);
|
||||
}
|
||||
} else {
|
||||
let token = try_lex_special_piped_item(input, &mut curr_offset, span_offset);
|
||||
if let Some(token) = token {
|
||||
|
@ -87,6 +87,72 @@ fn raw_string() -> TestResult {
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn raw_string_inside_parentheses() -> TestResult {
|
||||
let (left, right) = ('(', ')');
|
||||
run_test(
|
||||
&format!(r#"{left}r#'abcde""fghi"''''jkl'#{right}"#),
|
||||
r#"abcde""fghi"''''jkl"#,
|
||||
)?;
|
||||
run_test(
|
||||
&format!(r#"{left}r##'abcde""fghi"''''#jkl'##{right}"#),
|
||||
r#"abcde""fghi"''''#jkl"#,
|
||||
)?;
|
||||
run_test(
|
||||
&format!(r#"{left}r###'abcde""fghi"'''##'#jkl'###{right}"#),
|
||||
r#"abcde""fghi"'''##'#jkl"#,
|
||||
)?;
|
||||
run_test(&format!("{left}r#''#{right}"), "")?;
|
||||
run_test(
|
||||
&format!(r#"{left}r#'a string with sharp inside # and ends with #'#{right}"#),
|
||||
"a string with sharp inside # and ends with #",
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn raw_string_inside_list() -> TestResult {
|
||||
let (left, right) = ('[', ']');
|
||||
run_test(
|
||||
&format!(r#"{left}r#'abcde""fghi"''''jkl'#{right} | get 0"#),
|
||||
r#"abcde""fghi"''''jkl"#,
|
||||
)?;
|
||||
run_test(
|
||||
&format!(r#"{left}r##'abcde""fghi"''''#jkl'##{right} | get 0"#),
|
||||
r#"abcde""fghi"''''#jkl"#,
|
||||
)?;
|
||||
run_test(
|
||||
&format!(r#"{left}r###'abcde""fghi"'''##'#jkl'###{right} | get 0"#),
|
||||
r#"abcde""fghi"'''##'#jkl"#,
|
||||
)?;
|
||||
run_test(&format!("{left}r#''#{right} | get 0"), "")?;
|
||||
run_test(
|
||||
&format!(r#"{left}r#'a string with sharp inside # and ends with #'#{right} | get 0"#),
|
||||
"a string with sharp inside # and ends with #",
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn raw_string_inside_closure() -> TestResult {
|
||||
let (left, right) = ('{', '}');
|
||||
run_test(
|
||||
&format!(r#"do {left}r#'abcde""fghi"''''jkl'#{right}"#),
|
||||
r#"abcde""fghi"''''jkl"#,
|
||||
)?;
|
||||
run_test(
|
||||
&format!(r#"do {left}r##'abcde""fghi"''''#jkl'##{right}"#),
|
||||
r#"abcde""fghi"''''#jkl"#,
|
||||
)?;
|
||||
run_test(
|
||||
&format!(r#"do {left}r###'abcde""fghi"'''##'#jkl'###{right}"#),
|
||||
r#"abcde""fghi"'''##'#jkl"#,
|
||||
)?;
|
||||
run_test(&format!("do {left}r#''#{right}"), "")?;
|
||||
run_test(
|
||||
&format!(r#"do {left}r#'a string with sharp inside # and ends with #'#{right}"#),
|
||||
"a string with sharp inside # and ends with #",
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn incomplete_raw_string() -> TestResult {
|
||||
fail_test("r#abc", "expected '")
|
||||
|
Loading…
Reference in New Issue
Block a user