diff --git a/crates/nu-cli/src/syntax_highlight.rs b/crates/nu-cli/src/syntax_highlight.rs index 8d0c582bd1..da293f03ac 100644 --- a/crates/nu-cli/src/syntax_highlight.rs +++ b/crates/nu-cli/src/syntax_highlight.rs @@ -128,6 +128,7 @@ impl Highlighter for NuHighlighter { FlatShape::Operator => add_colored_token(&shape.1, next_token), FlatShape::Signature => add_colored_token(&shape.1, next_token), FlatShape::String => add_colored_token(&shape.1, next_token), + FlatShape::RawString => add_colored_token(&shape.1, next_token), FlatShape::StringInterpolation => add_colored_token(&shape.1, next_token), FlatShape::DateTime => add_colored_token(&shape.1, next_token), FlatShape::List => { @@ -353,6 +354,7 @@ fn find_matching_block_end_in_expr( Expr::Directory(_, _) => None, Expr::GlobPattern(_, _) => None, Expr::String(_) => None, + Expr::RawString(_) => None, Expr::CellPath(_) => None, Expr::ImportPattern(_) => None, Expr::Overlay(_) => None, diff --git a/crates/nu-color-config/src/shape_color.rs b/crates/nu-color-config/src/shape_color.rs index 188abda992..cb896f2bfc 100644 --- a/crates/nu-color-config/src/shape_color.rs +++ b/crates/nu-color-config/src/shape_color.rs @@ -32,6 +32,7 @@ pub fn default_shape_color(shape: String) -> Style { "shape_or" => Style::new().fg(Color::Purple).bold(), "shape_pipe" => Style::new().fg(Color::Purple).bold(), "shape_range" => Style::new().fg(Color::Yellow).bold(), + "shape_raw_string" => Style::new().fg(Color::LightMagenta).bold(), "shape_record" => Style::new().fg(Color::Cyan).bold(), "shape_redirection" => Style::new().fg(Color::Purple).bold(), "shape_signature" => Style::new().fg(Color::Green).bold(), diff --git a/crates/nu-command/tests/commands/let_.rs b/crates/nu-command/tests/commands/let_.rs index a9a6c4b3b1..4bedf31104 100644 --- a/crates/nu-command/tests/commands/let_.rs +++ b/crates/nu-command/tests/commands/let_.rs @@ -91,3 +91,18 @@ fn let_glob_type() { let actual = nu!("let x: glob = 'aa'; $x | describe"); assert_eq!(actual.out, "glob"); } + +#[test] +fn let_raw_string() { + let actual = nu!(r#"let x = r#'abcde""fghi"''''jkl'#; $x"#); + assert_eq!(actual.out, r#"abcde""fghi"''''jkl"#); + + let actual = nu!(r#"let x = r##'abcde""fghi"''''#jkl'##; $x"#); + assert_eq!(actual.out, r#"abcde""fghi"''''#jkl"#); + + let actual = nu!(r#"let x = r###'abcde""fghi"'''##'#jkl'###; $x"#); + assert_eq!(actual.out, r#"abcde""fghi"'''##'#jkl"#); + + let actual = nu!(r#"let x = r#'abc'#; $x"#); + assert_eq!(actual.out, "abc"); +} diff --git a/crates/nu-command/tests/commands/mut_.rs b/crates/nu-command/tests/commands/mut_.rs index be2d588ab0..7078cd1df1 100644 --- a/crates/nu-command/tests/commands/mut_.rs +++ b/crates/nu-command/tests/commands/mut_.rs @@ -125,3 +125,18 @@ fn mut_glob_type() { let actual = nu!("mut x: glob = 'aa'; $x | describe"); assert_eq!(actual.out, "glob"); } + +#[test] +fn mut_raw_string() { + let actual = nu!(r#"mut x = r#'abcde""fghi"''''jkl'#; $x"#); + assert_eq!(actual.out, r#"abcde""fghi"''''jkl"#); + + let actual = nu!(r#"mut x = r##'abcde""fghi"''''#jkl'##; $x"#); + assert_eq!(actual.out, r#"abcde""fghi"''''#jkl"#); + + let actual = nu!(r#"mut x = r###'abcde""fghi"'''##'#jkl'###; $x"#); + assert_eq!(actual.out, r#"abcde""fghi"'''##'#jkl"#); + + let actual = nu!(r#"mut x = r#'abc'#; $x"#); + assert_eq!(actual.out, "abc"); +} diff --git a/crates/nu-parser/src/flatten.rs b/crates/nu-parser/src/flatten.rs index 0f99efb6fb..cb1d1de110 100644 --- a/crates/nu-parser/src/flatten.rs +++ b/crates/nu-parser/src/flatten.rs @@ -38,6 +38,7 @@ pub enum FlatShape { Or, Pipe, Range, + RawString, Record, Redirection, Signature, @@ -78,6 +79,7 @@ impl Display for FlatShape { FlatShape::Or => write!(f, "shape_or"), FlatShape::Pipe => write!(f, "shape_pipe"), FlatShape::Range => write!(f, "shape_range"), + FlatShape::RawString => write!(f, "shape_raw_string"), FlatShape::Record => write!(f, "shape_record"), FlatShape::Redirection => write!(f, "shape_redirection"), FlatShape::Signature => write!(f, "shape_signature"), @@ -509,6 +511,9 @@ pub fn flatten_expression( Expr::String(_) => { vec![(expr.span, FlatShape::String)] } + Expr::RawString(_) => { + vec![(expr.span, FlatShape::RawString)] + } Expr::Table(table) => { let outer_span = expr.span; let mut last_end = outer_span.start; diff --git a/crates/nu-parser/src/lex.rs b/crates/nu-parser/src/lex.rs index 399afb428e..11afea861d 100644 --- a/crates/nu-parser/src/lex.rs +++ b/crates/nu-parser/src/lex.rs @@ -503,6 +503,79 @@ fn lex_internal( } else if c == b' ' || c == b'\t' || additional_whitespace.contains(&c) { // If the next character is non-newline whitespace, skip it. curr_offset += 1; + } else if c == b'r' { + // A raw string literal looks like `echo r#'Look, I can use 'single quotes'!'#` + // If the next character is `#` we're probably looking at a raw string literal + // so we need to read all the text until we find a closing `#`. This raw string + // can contain any character, including newlines and double quotes without needing + // to escape them. + // + // A raw string can contain many `#` as prefix, + // incase if there is a `'#` or `#'` in the string itself. + // E.g: r##'I can use '#' in a raw string'## + let mut prefix_sharp_cnt = 0; + let start = curr_offset; + while let Some(b'#') = input.get(start + prefix_sharp_cnt + 1) { + prefix_sharp_cnt += 1; + } + + if prefix_sharp_cnt != 0 { + // curr_offset is the character `r`, we need to move forward and skip all `#` + // characters. + // + // e.g: r###' + // ^ + // ^ + // curr_offset + curr_offset += prefix_sharp_cnt + 1; + // the next one should be a single quote. + if input.get(curr_offset) != Some(&b'\'') { + error = Some(ParseError::Expected( + "'", + Span::new(span_offset + curr_offset, span_offset + curr_offset + 1), + )); + } + + curr_offset += 1; + let mut matches = false; + while let Some(ch) = input.get(curr_offset) { + // check for postfix '### + if *ch == b'#' { + let start_ch = input[curr_offset - prefix_sharp_cnt]; + let postfix = &input[curr_offset - prefix_sharp_cnt + 1..=curr_offset]; + if start_ch == b'\'' && postfix.iter().all(|x| *x == b'#') { + matches = true; + curr_offset += 1; + break; + } + } + curr_offset += 1 + } + if matches { + output.push(Token::new( + TokenContents::Item, + Span::new(span_offset + start, span_offset + curr_offset), + )); + } else if error.is_none() { + error = Some(ParseError::UnexpectedEof( + "#".to_string(), + Span::new(span_offset + curr_offset, span_offset + curr_offset), + )) + } + } else { + let (token, err) = lex_item( + input, + &mut curr_offset, + span_offset, + additional_whitespace, + special_tokens, + in_signature, + ); + if error.is_none() { + error = err; + } + output.push(token); + } } else { let token = try_lex_special_piped_item(input, &mut curr_offset, span_offset); if let Some(token) = token { diff --git a/crates/nu-parser/src/parse_keywords.rs b/crates/nu-parser/src/parse_keywords.rs index 9b0b552259..4c2b2f7bd8 100644 --- a/crates/nu-parser/src/parse_keywords.rs +++ b/crates/nu-parser/src/parse_keywords.rs @@ -3341,6 +3341,7 @@ pub fn parse_mut(working_set: &mut StateWorkingSet, spans: &[Span]) -> Pipeline } pub fn parse_source(working_set: &mut StateWorkingSet, lite_command: &LiteCommand) -> Pipeline { + trace!("parsing source"); let spans = &lite_command.parts; let name = working_set.get_span_contents(spans[0]); diff --git a/crates/nu-parser/src/parser.rs b/crates/nu-parser/src/parser.rs index cca0eaaa2c..19b84c1e75 100644 --- a/crates/nu-parser/src/parser.rs +++ b/crates/nu-parser/src/parser.rs @@ -66,6 +66,11 @@ pub fn is_math_expression_like(working_set: &mut StateWorkingSet, span: Span) -> let b = bytes[0]; + // check for raw string + if bytes.starts_with(b"r#") { + return true; + } + if b == b'(' || b == b'{' || b == b'[' || b == b'$' || b == b'"' || b == b'\'' || b == b'-' { return true; } @@ -578,6 +583,7 @@ pub fn parse_multispan_value( spans_idx: &mut usize, shape: &SyntaxShape, ) -> Expression { + trace!("parse multispan value"); match shape { SyntaxShape::VarWithOptType => { trace!("parsing: var with opt type"); @@ -1565,6 +1571,66 @@ pub(crate) fn parse_dollar_expr(working_set: &mut StateWorkingSet, span: Span) - } } +pub fn parse_raw_string(working_set: &mut StateWorkingSet, span: Span) -> Expression { + trace!("parsing: raw-string, with required delimiters"); + + let bytes = working_set.get_span_contents(span); + + let prefix_sharp_cnt = if bytes.starts_with(b"r#") { + // actually `sharp_cnt` is always `index - 1` + // but create a variable here to make it clearer. + let mut sharp_cnt = 1; + let mut index = 2; + while index < bytes.len() && bytes[index] == b'#' { + index += 1; + sharp_cnt += 1; + } + sharp_cnt + } else { + working_set.error(ParseError::Expected("r#", span)); + return garbage(span); + }; + let expect_postfix_sharp_cnt = prefix_sharp_cnt; + // check the length of whole raw string. + // the whole raw string should contains at least + // 1(r) + prefix_sharp_cnt + 1(') + 1(') + postfix_sharp characters + if bytes.len() < prefix_sharp_cnt + expect_postfix_sharp_cnt + 3 { + working_set.error(ParseError::Unclosed('\''.into(), span)); + return garbage(span); + } + + // check for unbalanced # and single quotes. + let postfix_bytes = &bytes[bytes.len() - expect_postfix_sharp_cnt..bytes.len()]; + if postfix_bytes.iter().any(|b| *b != b'#') { + working_set.error(ParseError::Unbalanced( + "prefix #".to_string(), + "postfix #".to_string(), + span, + )); + return garbage(span); + } + // check for unblanaced single quotes. + if bytes[1 + prefix_sharp_cnt] != b'\'' + || bytes[bytes.len() - expect_postfix_sharp_cnt - 1] != b'\'' + { + working_set.error(ParseError::Unclosed('\''.into(), span)); + return garbage(span); + } + + let bytes = &bytes[prefix_sharp_cnt + 1 + 1..bytes.len() - 1 - prefix_sharp_cnt]; + if let Ok(token) = String::from_utf8(bytes.into()) { + Expression { + expr: Expr::RawString(token), + span, + ty: Type::String, + custom_completion: None, + } + } else { + working_set.error(ParseError::Expected("utf8 raw-string", span)); + garbage(span) + } +} + pub fn parse_paren_expr( working_set: &mut StateWorkingSet, span: Span, @@ -4553,6 +4619,9 @@ pub fn parse_value( return Expression::garbage(span); } }, + b'r' if bytes.len() > 1 && bytes[1] == b'#' => { + return parse_raw_string(working_set, span); + } _ => {} } @@ -6075,6 +6144,7 @@ pub fn discover_captures_in_expr( } } Expr::String(_) => {} + Expr::RawString(_) => {} Expr::StringInterpolation(exprs) => { for expr in exprs { discover_captures_in_expr(working_set, expr, seen, seen_blocks, output)?; @@ -6236,6 +6306,7 @@ pub fn parse( contents: &[u8], scoped: bool, ) -> Arc { + trace!("parse"); let name = match fname { Some(fname) => { // use the canonical name for this filename @@ -6253,9 +6324,13 @@ pub fn parse( let mut output = { if let Some(block) = previously_parsed_block { + // dbg!("previous block"); return block; } else { + // dbg!("starting lex"); let (output, err) = lex(contents, new_span.start, &[], &[], false); + // dbg!("finished lex"); + // dbg!(&output); if let Some(err) = err { working_set.error(err) } diff --git a/crates/nu-protocol/src/ast/expr.rs b/crates/nu-protocol/src/ast/expr.rs index 53a0717f34..13d9e42985 100644 --- a/crates/nu-protocol/src/ast/expr.rs +++ b/crates/nu-protocol/src/ast/expr.rs @@ -36,6 +36,7 @@ pub enum Expr { Directory(String, bool), GlobPattern(String, bool), String(String), + RawString(String), CellPath(CellPath), FullCellPath(Box), ImportPattern(Box), @@ -80,6 +81,7 @@ impl Expr { | Expr::ValueWithUnit(_) | Expr::DateTime(_) | Expr::String(_) + | Expr::RawString(_) | Expr::CellPath(_) | Expr::StringInterpolation(_) | Expr::Nothing => { diff --git a/crates/nu-protocol/src/ast/expression.rs b/crates/nu-protocol/src/ast/expression.rs index 2f31196871..8fdbc8567c 100644 --- a/crates/nu-protocol/src/ast/expression.rs +++ b/crates/nu-protocol/src/ast/expression.rs @@ -279,6 +279,7 @@ impl Expression { } Expr::Signature(_) => false, Expr::String(_) => false, + Expr::RawString(_) => false, Expr::RowCondition(block_id) | Expr::Subexpression(block_id) => { let block = working_set.get_block(*block_id); @@ -436,6 +437,7 @@ impl Expression { } Expr::Signature(_) => {} Expr::String(_) => {} + Expr::RawString(_) => {} Expr::StringInterpolation(items) => { for i in items { i.replace_span(working_set, replaced, new_span) diff --git a/crates/nu-protocol/src/debugger/profiler.rs b/crates/nu-protocol/src/debugger/profiler.rs index d1efe90cb0..9d5bece0ab 100644 --- a/crates/nu-protocol/src/debugger/profiler.rs +++ b/crates/nu-protocol/src/debugger/profiler.rs @@ -253,7 +253,7 @@ fn expr_to_string(engine_state: &EngineState, expr: &Expr) -> String { Expr::Record(_) => "record".to_string(), Expr::RowCondition(_) => "row condition".to_string(), Expr::Signature(_) => "signature".to_string(), - Expr::String(_) => "string".to_string(), + Expr::String(_) | Expr::RawString(_) => "string".to_string(), Expr::StringInterpolation(_) => "string interpolation".to_string(), Expr::Subexpression(_) => "subexpression".to_string(), Expr::Table(_) => "table".to_string(), diff --git a/crates/nu-protocol/src/eval_base.rs b/crates/nu-protocol/src/eval_base.rs index 06d13a5f99..f211f48560 100644 --- a/crates/nu-protocol/src/eval_base.rs +++ b/crates/nu-protocol/src/eval_base.rs @@ -139,7 +139,7 @@ pub trait Eval { Ok(Value::list(output_rows, expr.span)) } Expr::Keyword(kw) => Self::eval::(state, mut_state, &kw.expr), - Expr::String(s) => Ok(Value::string(s.clone(), expr.span)), + Expr::String(s) | Expr::RawString(s) => Ok(Value::string(s.clone(), expr.span)), Expr::Nothing => Ok(Value::nothing(expr.span)), Expr::ValueWithUnit(value) => match Self::eval::(state, mut_state, &value.expr)? { Value::Int { val, .. } => value.unit.item.build_value(val, value.unit.span), diff --git a/crates/nu-utils/src/sample_config/default_config.nu b/crates/nu-utils/src/sample_config/default_config.nu index ca82cb2e8c..70e8a2ca7d 100644 --- a/crates/nu-utils/src/sample_config/default_config.nu +++ b/crates/nu-utils/src/sample_config/default_config.nu @@ -69,6 +69,7 @@ let dark_theme = { shape_table: blue_bold shape_variable: purple shape_vardecl: purple + shape_raw_string: light_purple } let light_theme = { @@ -134,6 +135,7 @@ let light_theme = { shape_table: blue_bold shape_variable: purple shape_vardecl: purple + shape_raw_string: light_purple } # External completer example diff --git a/crates/nuon/src/from.rs b/crates/nuon/src/from.rs index a06a75e3f5..e1ba37b966 100644 --- a/crates/nuon/src/from.rs +++ b/crates/nuon/src/from.rs @@ -319,7 +319,7 @@ fn convert_to_value( msg: "signatures not supported in nuon".into(), span: expr.span, }), - Expr::String(s) => Ok(Value::string(s, span)), + Expr::String(s) | Expr::RawString(s) => Ok(Value::string(s, span)), Expr::StringInterpolation(..) => Err(ShellError::OutsideSpannedLabeledError { src: original_text.to_string(), error: "Error when loading".into(), diff --git a/src/ide.rs b/src/ide.rs index 8e0a60421b..2b39dda946 100644 --- a/src/ide.rs +++ b/src/ide.rs @@ -569,6 +569,16 @@ pub fn hover(engine_state: &mut EngineState, file_path: &str, location: &Value) } }) ), + FlatShape::RawString => println!( + "{}", + json!({ + "hover": "raw-string", + "span": { + "start": span.start - offset, + "end": span.end - offset + } + }) + ), FlatShape::StringInterpolation => println!( "{}", json!({ diff --git a/src/tests/test_strings.rs b/src/tests/test_strings.rs index 762ac7579f..dcb03801b9 100644 --- a/src/tests/test_strings.rs +++ b/src/tests/test_strings.rs @@ -71,3 +71,23 @@ fn case_insensitive_sort_columns() -> TestResult { r#"[{"version":"four","package":"abc"},{"version":"three","package":"abc"},{"version":"two","package":"Abc"}]"#, ) } + +#[test] +fn raw_string() -> TestResult { + run_test(r#"r#'abcde""fghi"''''jkl'#"#, r#"abcde""fghi"''''jkl"#)?; + run_test(r#"r##'abcde""fghi"''''#jkl'##"#, r#"abcde""fghi"''''#jkl"#)?; + run_test( + r#"r###'abcde""fghi"'''##'#jkl'###"#, + r#"abcde""fghi"'''##'#jkl"#, + )?; + run_test("r#''#", "")?; + run_test( + r#"r#'a string with sharp inside # and ends with #'#"#, + "a string with sharp inside # and ends with #", + ) +} + +#[test] +fn incomplete_raw_string() -> TestResult { + fail_test("r#abc", "expected '") +} diff --git a/tests/const_/mod.rs b/tests/const_/mod.rs index 603eaba66c..04db29d3dd 100644 --- a/tests/const_/mod.rs +++ b/tests/const_/mod.rs @@ -400,3 +400,18 @@ fn const_glob_type() { let actual = nu!("const x: glob = 'aa'; $x | describe"); assert_eq!(actual.out, "glob"); } + +#[test] +fn const_raw_string() { + let actual = nu!(r#"const x = r#'abcde""fghi"''''jkl'#; $x"#); + assert_eq!(actual.out, r#"abcde""fghi"''''jkl"#); + + let actual = nu!(r#"const x = r##'abcde""fghi"''''#jkl'##; $x"#); + assert_eq!(actual.out, r#"abcde""fghi"''''#jkl"#); + + let actual = nu!(r#"const x = r###'abcde""fghi"'''##'#jkl'###; $x"#); + assert_eq!(actual.out, r#"abcde""fghi"'''##'#jkl"#); + + let actual = nu!(r#"const x = r#'abc'#; $x"#); + assert_eq!(actual.out, "abc"); +}