diff --git a/crates/nu-cli/src/syntax_highlight.rs b/crates/nu-cli/src/syntax_highlight.rs index 8d0c582bd1..da293f03ac 100644 --- a/crates/nu-cli/src/syntax_highlight.rs +++ b/crates/nu-cli/src/syntax_highlight.rs @@ -128,6 +128,7 @@ impl Highlighter for NuHighlighter { FlatShape::Operator => add_colored_token(&shape.1, next_token), FlatShape::Signature => add_colored_token(&shape.1, next_token), FlatShape::String => add_colored_token(&shape.1, next_token), + FlatShape::RawString => add_colored_token(&shape.1, next_token), FlatShape::StringInterpolation => add_colored_token(&shape.1, next_token), FlatShape::DateTime => add_colored_token(&shape.1, next_token), FlatShape::List => { @@ -353,6 +354,7 @@ fn find_matching_block_end_in_expr( Expr::Directory(_, _) => None, Expr::GlobPattern(_, _) => None, Expr::String(_) => None, + Expr::RawString(_) => None, Expr::CellPath(_) => None, Expr::ImportPattern(_) => None, Expr::Overlay(_) => None, diff --git a/crates/nu-color-config/src/shape_color.rs b/crates/nu-color-config/src/shape_color.rs index 188abda992..cb896f2bfc 100644 --- a/crates/nu-color-config/src/shape_color.rs +++ b/crates/nu-color-config/src/shape_color.rs @@ -32,6 +32,7 @@ pub fn default_shape_color(shape: String) -> Style { "shape_or" => Style::new().fg(Color::Purple).bold(), "shape_pipe" => Style::new().fg(Color::Purple).bold(), "shape_range" => Style::new().fg(Color::Yellow).bold(), + "shape_raw_string" => Style::new().fg(Color::LightMagenta).bold(), "shape_record" => Style::new().fg(Color::Cyan).bold(), "shape_redirection" => Style::new().fg(Color::Purple).bold(), "shape_signature" => Style::new().fg(Color::Green).bold(), diff --git a/crates/nu-command/tests/commands/let_.rs b/crates/nu-command/tests/commands/let_.rs index a9a6c4b3b1..4bedf31104 100644 --- a/crates/nu-command/tests/commands/let_.rs +++ b/crates/nu-command/tests/commands/let_.rs @@ -91,3 +91,18 @@ fn let_glob_type() { let actual = nu!("let x: glob = 'aa'; $x | describe"); assert_eq!(actual.out, "glob"); } + +#[test] +fn let_raw_string() { + let actual = nu!(r#"let x = r#'abcde""fghi"''''jkl'#; $x"#); + assert_eq!(actual.out, r#"abcde""fghi"''''jkl"#); + + let actual = nu!(r#"let x = r##'abcde""fghi"''''#jkl'##; $x"#); + assert_eq!(actual.out, r#"abcde""fghi"''''#jkl"#); + + let actual = nu!(r#"let x = r###'abcde""fghi"'''##'#jkl'###; $x"#); + assert_eq!(actual.out, r#"abcde""fghi"'''##'#jkl"#); + + let actual = nu!(r#"let x = r#'abc'#; $x"#); + assert_eq!(actual.out, "abc"); +} diff --git a/crates/nu-command/tests/commands/mut_.rs b/crates/nu-command/tests/commands/mut_.rs index be2d588ab0..7078cd1df1 100644 --- a/crates/nu-command/tests/commands/mut_.rs +++ b/crates/nu-command/tests/commands/mut_.rs @@ -125,3 +125,18 @@ fn mut_glob_type() { let actual = nu!("mut x: glob = 'aa'; $x | describe"); assert_eq!(actual.out, "glob"); } + +#[test] +fn mut_raw_string() { + let actual = nu!(r#"mut x = r#'abcde""fghi"''''jkl'#; $x"#); + assert_eq!(actual.out, r#"abcde""fghi"''''jkl"#); + + let actual = nu!(r#"mut x = r##'abcde""fghi"''''#jkl'##; $x"#); + assert_eq!(actual.out, r#"abcde""fghi"''''#jkl"#); + + let actual = nu!(r#"mut x = r###'abcde""fghi"'''##'#jkl'###; $x"#); + assert_eq!(actual.out, r#"abcde""fghi"'''##'#jkl"#); + + let actual = nu!(r#"mut x = r#'abc'#; $x"#); + assert_eq!(actual.out, "abc"); +} diff --git a/crates/nu-parser/src/flatten.rs b/crates/nu-parser/src/flatten.rs index 0f99efb6fb..cb1d1de110 100644 --- a/crates/nu-parser/src/flatten.rs +++ b/crates/nu-parser/src/flatten.rs @@ -38,6 +38,7 @@ pub enum FlatShape { Or, Pipe, Range, + RawString, Record, Redirection, Signature, @@ -78,6 +79,7 @@ impl Display for FlatShape { FlatShape::Or => write!(f, "shape_or"), FlatShape::Pipe => write!(f, "shape_pipe"), FlatShape::Range => write!(f, "shape_range"), + FlatShape::RawString => write!(f, "shape_raw_string"), FlatShape::Record => write!(f, "shape_record"), FlatShape::Redirection => write!(f, "shape_redirection"), FlatShape::Signature => write!(f, "shape_signature"), @@ -509,6 +511,9 @@ pub fn flatten_expression( Expr::String(_) => { vec![(expr.span, FlatShape::String)] } + Expr::RawString(_) => { + vec![(expr.span, FlatShape::RawString)] + } Expr::Table(table) => { let outer_span = expr.span; let mut last_end = outer_span.start; diff --git a/crates/nu-parser/src/lex.rs b/crates/nu-parser/src/lex.rs index 399afb428e..11afea861d 100644 --- a/crates/nu-parser/src/lex.rs +++ b/crates/nu-parser/src/lex.rs @@ -503,6 +503,79 @@ fn lex_internal( } else if c == b' ' || c == b'\t' || additional_whitespace.contains(&c) { // If the next character is non-newline whitespace, skip it. curr_offset += 1; + } else if c == b'r' { + // A raw string literal looks like `echo r#'Look, I can use 'single quotes'!'#` + // If the next character is `#` we're probably looking at a raw string literal + // so we need to read all the text until we find a closing `#`. This raw string + // can contain any character, including newlines and double quotes without needing + // to escape them. + // + // A raw string can contain many `#` as prefix, + // incase if there is a `'#` or `#'` in the string itself. + // E.g: r##'I can use '#' in a raw string'## + let mut prefix_sharp_cnt = 0; + let start = curr_offset; + while let Some(b'#') = input.get(start + prefix_sharp_cnt + 1) { + prefix_sharp_cnt += 1; + } + + if prefix_sharp_cnt != 0 { + // curr_offset is the character `r`, we need to move forward and skip all `#` + // characters. + // + // e.g: r###'
+ // ^ + // ^ + // curr_offset + curr_offset += prefix_sharp_cnt + 1; + // the next one should be a single quote. + if input.get(curr_offset) != Some(&b'\'') { + error = Some(ParseError::Expected( + "'", + Span::new(span_offset + curr_offset, span_offset + curr_offset + 1), + )); + } + + curr_offset += 1; + let mut matches = false; + while let Some(ch) = input.get(curr_offset) { + // check for postfix '### + if *ch == b'#' { + let start_ch = input[curr_offset - prefix_sharp_cnt]; + let postfix = &input[curr_offset - prefix_sharp_cnt + 1..=curr_offset]; + if start_ch == b'\'' && postfix.iter().all(|x| *x == b'#') { + matches = true; + curr_offset += 1; + break; + } + } + curr_offset += 1 + } + if matches { + output.push(Token::new( + TokenContents::Item, + Span::new(span_offset + start, span_offset + curr_offset), + )); + } else if error.is_none() { + error = Some(ParseError::UnexpectedEof( + "#".to_string(), + Span::new(span_offset + curr_offset, span_offset + curr_offset), + )) + } + } else { + let (token, err) = lex_item( + input, + &mut curr_offset, + span_offset, + additional_whitespace, + special_tokens, + in_signature, + ); + if error.is_none() { + error = err; + } + output.push(token); + } } else { let token = try_lex_special_piped_item(input, &mut curr_offset, span_offset); if let Some(token) = token { diff --git a/crates/nu-parser/src/parse_keywords.rs b/crates/nu-parser/src/parse_keywords.rs index 9b0b552259..4c2b2f7bd8 100644 --- a/crates/nu-parser/src/parse_keywords.rs +++ b/crates/nu-parser/src/parse_keywords.rs @@ -3341,6 +3341,7 @@ pub fn parse_mut(working_set: &mut StateWorkingSet, spans: &[Span]) -> Pipeline } pub fn parse_source(working_set: &mut StateWorkingSet, lite_command: &LiteCommand) -> Pipeline { + trace!("parsing source"); let spans = &lite_command.parts; let name = working_set.get_span_contents(spans[0]); diff --git a/crates/nu-parser/src/parser.rs b/crates/nu-parser/src/parser.rs index cca0eaaa2c..19b84c1e75 100644 --- a/crates/nu-parser/src/parser.rs +++ b/crates/nu-parser/src/parser.rs @@ -66,6 +66,11 @@ pub fn is_math_expression_like(working_set: &mut StateWorkingSet, span: Span) -> let b = bytes[0]; + // check for raw string + if bytes.starts_with(b"r#") { + return true; + } + if b == b'(' || b == b'{' || b == b'[' || b == b'$' || b == b'"' || b == b'\'' || b == b'-' { return true; } @@ -578,6 +583,7 @@ pub fn parse_multispan_value( spans_idx: &mut usize, shape: &SyntaxShape, ) -> Expression { + trace!("parse multispan value"); match shape { SyntaxShape::VarWithOptType => { trace!("parsing: var with opt type"); @@ -1565,6 +1571,66 @@ pub(crate) fn parse_dollar_expr(working_set: &mut StateWorkingSet, span: Span) - } } +pub fn parse_raw_string(working_set: &mut StateWorkingSet, span: Span) -> Expression { + trace!("parsing: raw-string, with required delimiters"); + + let bytes = working_set.get_span_contents(span); + + let prefix_sharp_cnt = if bytes.starts_with(b"r#") { + // actually `sharp_cnt` is always `index - 1` + // but create a variable here to make it clearer. + let mut sharp_cnt = 1; + let mut index = 2; + while index < bytes.len() && bytes[index] == b'#' { + index += 1; + sharp_cnt += 1; + } + sharp_cnt + } else { + working_set.error(ParseError::Expected("r#", span)); + return garbage(span); + }; + let expect_postfix_sharp_cnt = prefix_sharp_cnt; + // check the length of whole raw string. + // the whole raw string should contains at least + // 1(r) + prefix_sharp_cnt + 1(') + 1(') + postfix_sharp characters + if bytes.len() < prefix_sharp_cnt + expect_postfix_sharp_cnt + 3 { + working_set.error(ParseError::Unclosed('\''.into(), span)); + return garbage(span); + } + + // check for unbalanced # and single quotes. + let postfix_bytes = &bytes[bytes.len() - expect_postfix_sharp_cnt..bytes.len()]; + if postfix_bytes.iter().any(|b| *b != b'#') { + working_set.error(ParseError::Unbalanced( + "prefix #".to_string(), + "postfix #".to_string(), + span, + )); + return garbage(span); + } + // check for unblanaced single quotes. + if bytes[1 + prefix_sharp_cnt] != b'\'' + || bytes[bytes.len() - expect_postfix_sharp_cnt - 1] != b'\'' + { + working_set.error(ParseError::Unclosed('\''.into(), span)); + return garbage(span); + } + + let bytes = &bytes[prefix_sharp_cnt + 1 + 1..bytes.len() - 1 - prefix_sharp_cnt]; + if let Ok(token) = String::from_utf8(bytes.into()) { + Expression { + expr: Expr::RawString(token), + span, + ty: Type::String, + custom_completion: None, + } + } else { + working_set.error(ParseError::Expected("utf8 raw-string", span)); + garbage(span) + } +} + pub fn parse_paren_expr( working_set: &mut StateWorkingSet, span: Span, @@ -4553,6 +4619,9 @@ pub fn parse_value( return Expression::garbage(span); } }, + b'r' if bytes.len() > 1 && bytes[1] == b'#' => { + return parse_raw_string(working_set, span); + } _ => {} } @@ -6075,6 +6144,7 @@ pub fn discover_captures_in_expr( } } Expr::String(_) => {} + Expr::RawString(_) => {} Expr::StringInterpolation(exprs) => { for expr in exprs { discover_captures_in_expr(working_set, expr, seen, seen_blocks, output)?; @@ -6236,6 +6306,7 @@ pub fn parse( contents: &[u8], scoped: bool, ) -> Arc