add raw-string literal support (#9956)

# Description

This PR adds raw string support by using `r#` at the beginning of single
quoted strings and `#` at the end.

Notice that escapes do not process, even within single quotes,
parentheses don't mean anything, $variables don't mean anything. It's
just a string.
```nushell
❯ echo r#'one\ntwo (blah) ($var)'#
one\ntwo (blah) ($var)
```
Notice how they work without `echo` or `print` and how they work without
carriage returns.
```nushell
❯ r#'adsfa'#
adsfa
❯ r##"asdfa'@qpejq'##
asdfa'@qpejq
❯ r#'asdfasdfasf
∙ foqwejfqo@'23rfjqf'#
```
They also have a special configurable color in the repl. (use single
quotes though)

![image](https://github.com/nushell/nushell/assets/343840/8780e21d-de4c-45b3-9880-2425f5fe10ef)

They should work like rust raw literals and allow `r##`, `r###`,
`r####`, etc, to help with having one or many `#`'s in the middle of
your raw-string.

They should work with `let` as well.

```nushell
r#'some\nraw\nstring'# | str upcase
```

closes https://github.com/nushell/nushell/issues/5091
# User-Facing Changes
<!-- List of all changes that impact the user experience here. This
helps us keep track of breaking changes. -->

# Tests + Formatting
<!--
Don't forget to add tests that cover your changes.

Make sure you've run and fixed any issues with these commands:

- `cargo fmt --all -- --check` to check standard code formatting (`cargo
fmt --all` applies these changes)
- `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used -A
clippy::needless_collect -A clippy::result_large_err` to check that
you're using the standard code style
- `cargo test --workspace` to check that all tests pass
- `cargo run -- -c "use std testing; testing run-tests --path
crates/nu-std"` to run the tests for the standard library

> **Note**
> from `nushell` you can also use the `toolkit` as follows
> ```bash
> use toolkit.nu # or use an `env_change` hook to activate it
automatically
> toolkit check pr
> ```
-->

# After Submitting
<!-- If your PR had any user-facing changes, update [the
documentation](https://github.com/nushell/nushell.github.io) after the
PR is merged, if necessary. This will help us keep the docs up to date.
-->

---------

Co-authored-by: WindSoilder <WindSoilder@outlook.com>
Co-authored-by: Ian Manske <ian.manske@pm.me>
This commit is contained in:
Darren Schroeder
2024-05-02 09:36:37 -04:00
committed by GitHub
parent b5741ef14b
commit 8ed0d84d6a
17 changed files with 241 additions and 3 deletions

View File

@ -66,6 +66,11 @@ pub fn is_math_expression_like(working_set: &mut StateWorkingSet, span: Span) ->
let b = bytes[0];
// check for raw string
if bytes.starts_with(b"r#") {
return true;
}
if b == b'(' || b == b'{' || b == b'[' || b == b'$' || b == b'"' || b == b'\'' || b == b'-' {
return true;
}
@ -578,6 +583,7 @@ pub fn parse_multispan_value(
spans_idx: &mut usize,
shape: &SyntaxShape,
) -> Expression {
trace!("parse multispan value");
match shape {
SyntaxShape::VarWithOptType => {
trace!("parsing: var with opt type");
@ -1565,6 +1571,66 @@ pub(crate) fn parse_dollar_expr(working_set: &mut StateWorkingSet, span: Span) -
}
}
pub fn parse_raw_string(working_set: &mut StateWorkingSet, span: Span) -> Expression {
trace!("parsing: raw-string, with required delimiters");
let bytes = working_set.get_span_contents(span);
let prefix_sharp_cnt = if bytes.starts_with(b"r#") {
// actually `sharp_cnt` is always `index - 1`
// but create a variable here to make it clearer.
let mut sharp_cnt = 1;
let mut index = 2;
while index < bytes.len() && bytes[index] == b'#' {
index += 1;
sharp_cnt += 1;
}
sharp_cnt
} else {
working_set.error(ParseError::Expected("r#", span));
return garbage(span);
};
let expect_postfix_sharp_cnt = prefix_sharp_cnt;
// check the length of whole raw string.
// the whole raw string should contains at least
// 1(r) + prefix_sharp_cnt + 1(') + 1(') + postfix_sharp characters
if bytes.len() < prefix_sharp_cnt + expect_postfix_sharp_cnt + 3 {
working_set.error(ParseError::Unclosed('\''.into(), span));
return garbage(span);
}
// check for unbalanced # and single quotes.
let postfix_bytes = &bytes[bytes.len() - expect_postfix_sharp_cnt..bytes.len()];
if postfix_bytes.iter().any(|b| *b != b'#') {
working_set.error(ParseError::Unbalanced(
"prefix #".to_string(),
"postfix #".to_string(),
span,
));
return garbage(span);
}
// check for unblanaced single quotes.
if bytes[1 + prefix_sharp_cnt] != b'\''
|| bytes[bytes.len() - expect_postfix_sharp_cnt - 1] != b'\''
{
working_set.error(ParseError::Unclosed('\''.into(), span));
return garbage(span);
}
let bytes = &bytes[prefix_sharp_cnt + 1 + 1..bytes.len() - 1 - prefix_sharp_cnt];
if let Ok(token) = String::from_utf8(bytes.into()) {
Expression {
expr: Expr::RawString(token),
span,
ty: Type::String,
custom_completion: None,
}
} else {
working_set.error(ParseError::Expected("utf8 raw-string", span));
garbage(span)
}
}
pub fn parse_paren_expr(
working_set: &mut StateWorkingSet,
span: Span,
@ -4553,6 +4619,9 @@ pub fn parse_value(
return Expression::garbage(span);
}
},
b'r' if bytes.len() > 1 && bytes[1] == b'#' => {
return parse_raw_string(working_set, span);
}
_ => {}
}
@ -6075,6 +6144,7 @@ pub fn discover_captures_in_expr(
}
}
Expr::String(_) => {}
Expr::RawString(_) => {}
Expr::StringInterpolation(exprs) => {
for expr in exprs {
discover_captures_in_expr(working_set, expr, seen, seen_blocks, output)?;
@ -6236,6 +6306,7 @@ pub fn parse(
contents: &[u8],
scoped: bool,
) -> Arc<Block> {
trace!("parse");
let name = match fname {
Some(fname) => {
// use the canonical name for this filename
@ -6253,9 +6324,13 @@ pub fn parse(
let mut output = {
if let Some(block) = previously_parsed_block {
// dbg!("previous block");
return block;
} else {
// dbg!("starting lex");
let (output, err) = lex(contents, new_span.start, &[], &[], false);
// dbg!("finished lex");
// dbg!(&output);
if let Some(err) = err {
working_set.error(err)
}