2023-04-07 02:35:45 +02:00
|
|
|
use nu_protocol::{ParseError, Span};
|
2021-06-30 03:42:56 +02:00
|
|
|
|
2022-11-22 19:26:13 +01:00
|
|
|
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
2021-06-30 03:42:56 +02:00
|
|
|
pub enum TokenContents {
|
|
|
|
Item,
|
|
|
|
Comment,
|
|
|
|
Pipe,
|
2022-12-08 00:02:11 +01:00
|
|
|
PipePipe,
|
2021-06-30 03:42:56 +02:00
|
|
|
Semicolon,
|
2022-11-22 19:26:13 +01:00
|
|
|
OutGreaterThan,
|
2023-11-27 14:52:39 +01:00
|
|
|
OutGreaterGreaterThan,
|
2022-11-22 19:26:13 +01:00
|
|
|
ErrGreaterThan,
|
2023-11-27 14:52:39 +01:00
|
|
|
ErrGreaterGreaterThan,
|
2022-11-22 19:26:13 +01:00
|
|
|
OutErrGreaterThan,
|
2023-11-27 14:52:39 +01:00
|
|
|
OutErrGreaterGreaterThan,
|
2021-06-30 03:42:56 +02:00
|
|
|
Eol,
|
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Debug, PartialEq, Eq)]
|
|
|
|
pub struct Token {
|
|
|
|
pub contents: TokenContents,
|
|
|
|
pub span: Span,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Token {
|
|
|
|
pub fn new(contents: TokenContents, span: Span) -> Token {
|
|
|
|
Token { contents, span }
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Clone, Copy, Debug)]
|
|
|
|
pub enum BlockKind {
|
|
|
|
Paren,
|
|
|
|
CurlyBracket,
|
|
|
|
SquareBracket,
|
2023-03-24 12:54:06 +01:00
|
|
|
AngleBracket,
|
2021-06-30 03:42:56 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
impl BlockKind {
|
|
|
|
fn closing(self) -> u8 {
|
|
|
|
match self {
|
|
|
|
BlockKind::Paren => b')',
|
|
|
|
BlockKind::SquareBracket => b']',
|
|
|
|
BlockKind::CurlyBracket => b'}',
|
2023-03-24 12:54:06 +01:00
|
|
|
BlockKind::AngleBracket => b'>',
|
2021-06-30 03:42:56 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// A baseline token is terminated if it's not nested inside of a paired
|
|
|
|
// delimiter and the next character is one of: `|`, `;`, `#` or any
|
|
|
|
// whitespace.
|
2021-07-17 00:11:15 +02:00
|
|
|
fn is_item_terminator(
|
|
|
|
block_level: &[BlockKind],
|
|
|
|
c: u8,
|
|
|
|
additional_whitespace: &[u8],
|
|
|
|
special_tokens: &[u8],
|
|
|
|
) -> bool {
|
2021-06-30 03:42:56 +02:00
|
|
|
block_level.is_empty()
|
2021-07-06 00:58:56 +02:00
|
|
|
&& (c == b' '
|
|
|
|
|| c == b'\t'
|
|
|
|
|| c == b'\n'
|
2021-08-10 07:08:10 +02:00
|
|
|
|| c == b'\r'
|
2021-07-06 00:58:56 +02:00
|
|
|
|| c == b'|'
|
|
|
|
|| c == b';'
|
2021-07-17 00:11:15 +02:00
|
|
|
|| additional_whitespace.contains(&c)
|
|
|
|
|| special_tokens.contains(&c))
|
2021-07-16 23:55:12 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// A special token is one that is a byte that stands alone as its own token. For example
|
|
|
|
// when parsing a signature you may want to have `:` be able to separate tokens and also
|
|
|
|
// to be handled as its own token to notify you you're about to parse a type in the example
|
|
|
|
// `foo:bar`
|
2021-07-17 00:11:15 +02:00
|
|
|
fn is_special_item(block_level: &[BlockKind], c: u8, special_tokens: &[u8]) -> bool {
|
|
|
|
block_level.is_empty() && special_tokens.contains(&c)
|
2021-06-30 03:42:56 +02:00
|
|
|
}
|
|
|
|
|
2021-07-06 00:58:56 +02:00
|
|
|
pub fn lex_item(
|
|
|
|
input: &[u8],
|
|
|
|
curr_offset: &mut usize,
|
2021-07-22 08:04:50 +02:00
|
|
|
span_offset: usize,
|
2021-07-17 00:11:15 +02:00
|
|
|
additional_whitespace: &[u8],
|
|
|
|
special_tokens: &[u8],
|
2023-03-24 12:54:06 +01:00
|
|
|
in_signature: bool,
|
2022-11-22 19:26:13 +01:00
|
|
|
) -> (Token, Option<ParseError>) {
|
2021-06-30 03:42:56 +02:00
|
|
|
// This variable tracks the starting character of a string literal, so that
|
|
|
|
// we remain inside the string literal lexer mode until we encounter the
|
|
|
|
// closing quote.
|
|
|
|
let mut quote_start: Option<u8> = None;
|
|
|
|
|
|
|
|
let mut in_comment = false;
|
|
|
|
|
|
|
|
let token_start = *curr_offset;
|
|
|
|
|
|
|
|
// This Vec tracks paired delimiters
|
|
|
|
let mut block_level: Vec<BlockKind> = vec![];
|
|
|
|
|
|
|
|
// The process of slurping up a baseline token repeats:
|
|
|
|
//
|
2022-03-03 19:14:03 +01:00
|
|
|
// - String literal, which begins with `'` or `"`, and continues until
|
2021-06-30 03:42:56 +02:00
|
|
|
// the same character is encountered again.
|
|
|
|
// - Delimiter pair, which begins with `[`, `(`, or `{`, and continues until
|
|
|
|
// the matching closing delimiter is found, skipping comments and string
|
|
|
|
// literals.
|
|
|
|
// - When not nested inside of a delimiter pair, when a terminating
|
|
|
|
// character (whitespace, `|`, `;` or `#`) is encountered, the baseline
|
|
|
|
// token is done.
|
|
|
|
// - Otherwise, accumulate the character into the current baseline token.
|
|
|
|
while let Some(c) = input.get(*curr_offset) {
|
|
|
|
let c = *c;
|
|
|
|
|
2022-03-03 19:14:03 +01:00
|
|
|
if let Some(start) = quote_start {
|
|
|
|
// Check if we're in an escape sequence
|
|
|
|
if c == b'\\' && start == b'"' {
|
|
|
|
// Go ahead and consume the escape character if possible
|
|
|
|
if input.get(*curr_offset + 1).is_some() {
|
|
|
|
// Successfully escaped the character
|
|
|
|
*curr_offset += 2;
|
|
|
|
continue;
|
|
|
|
} else {
|
|
|
|
let span = Span::new(span_offset + token_start, span_offset + *curr_offset);
|
|
|
|
|
|
|
|
return (
|
2022-11-22 19:26:13 +01:00
|
|
|
Token {
|
|
|
|
contents: TokenContents::Item,
|
|
|
|
span,
|
|
|
|
},
|
2022-03-03 19:14:03 +01:00
|
|
|
Some(ParseError::UnexpectedEof(
|
|
|
|
(start as char).to_string(),
|
2022-12-03 10:44:12 +01:00
|
|
|
Span::new(span.end, span.end),
|
2022-03-03 19:14:03 +01:00
|
|
|
)),
|
|
|
|
);
|
|
|
|
}
|
|
|
|
}
|
2021-06-30 03:42:56 +02:00
|
|
|
// If we encountered the closing quote character for the current
|
|
|
|
// string, we're done with the current string.
|
2022-03-03 19:14:03 +01:00
|
|
|
if c == start {
|
|
|
|
// Also need to check to make sure we aren't escaped
|
2021-06-30 03:42:56 +02:00
|
|
|
quote_start = None;
|
|
|
|
}
|
|
|
|
} else if c == b'#' {
|
2021-07-17 00:11:15 +02:00
|
|
|
if is_item_terminator(&block_level, c, additional_whitespace, special_tokens) {
|
2021-06-30 03:42:56 +02:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
in_comment = true;
|
2021-12-14 21:17:02 +01:00
|
|
|
} else if c == b'\n' || c == b'\r' {
|
2021-06-30 03:42:56 +02:00
|
|
|
in_comment = false;
|
2021-07-17 00:11:15 +02:00
|
|
|
if is_item_terminator(&block_level, c, additional_whitespace, special_tokens) {
|
2021-06-30 03:42:56 +02:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
} else if in_comment {
|
2021-07-17 00:11:15 +02:00
|
|
|
if is_item_terminator(&block_level, c, additional_whitespace, special_tokens) {
|
2021-06-30 03:42:56 +02:00
|
|
|
break;
|
|
|
|
}
|
2021-07-17 00:11:15 +02:00
|
|
|
} else if is_special_item(&block_level, c, special_tokens) && token_start == *curr_offset {
|
2021-07-16 23:55:12 +02:00
|
|
|
*curr_offset += 1;
|
|
|
|
break;
|
2022-04-04 22:42:26 +02:00
|
|
|
} else if c == b'\'' || c == b'"' || c == b'`' {
|
2021-06-30 03:42:56 +02:00
|
|
|
// We encountered the opening quote of a string literal.
|
|
|
|
quote_start = Some(c);
|
|
|
|
} else if c == b'[' {
|
|
|
|
// We encountered an opening `[` delimiter.
|
|
|
|
block_level.push(BlockKind::SquareBracket);
|
2023-03-24 12:54:06 +01:00
|
|
|
} else if c == b'<' && in_signature {
|
|
|
|
block_level.push(BlockKind::AngleBracket);
|
|
|
|
} else if c == b'>' && in_signature {
|
|
|
|
if let Some(BlockKind::AngleBracket) = block_level.last() {
|
|
|
|
let _ = block_level.pop();
|
|
|
|
}
|
2021-06-30 03:42:56 +02:00
|
|
|
} else if c == b']' {
|
|
|
|
// We encountered a closing `]` delimiter. Pop off the opening `[`
|
|
|
|
// delimiter.
|
|
|
|
if let Some(BlockKind::SquareBracket) = block_level.last() {
|
|
|
|
let _ = block_level.pop();
|
|
|
|
}
|
|
|
|
} else if c == b'{' {
|
|
|
|
// We encountered an opening `{` delimiter.
|
|
|
|
block_level.push(BlockKind::CurlyBracket);
|
|
|
|
} else if c == b'}' {
|
|
|
|
// We encountered a closing `}` delimiter. Pop off the opening `{`.
|
|
|
|
if let Some(BlockKind::CurlyBracket) = block_level.last() {
|
|
|
|
let _ = block_level.pop();
|
2023-01-24 09:05:46 +01:00
|
|
|
} else {
|
|
|
|
// We encountered a closing `}` delimiter, but the last opening
|
|
|
|
// delimiter was not a `{`. This is an error.
|
|
|
|
let span = Span::new(span_offset + token_start, span_offset + *curr_offset);
|
|
|
|
|
|
|
|
*curr_offset += 1;
|
|
|
|
return (
|
|
|
|
Token {
|
|
|
|
contents: TokenContents::Item,
|
|
|
|
span,
|
|
|
|
},
|
|
|
|
Some(ParseError::Unbalanced(
|
|
|
|
"{".to_string(),
|
|
|
|
"}".to_string(),
|
2023-04-17 11:51:10 +02:00
|
|
|
Span::new(span.end, span.end + 1),
|
2023-01-24 09:05:46 +01:00
|
|
|
)),
|
|
|
|
);
|
2021-06-30 03:42:56 +02:00
|
|
|
}
|
|
|
|
} else if c == b'(' {
|
2021-11-08 00:18:00 +01:00
|
|
|
// We encountered an opening `(` delimiter.
|
2021-06-30 03:42:56 +02:00
|
|
|
block_level.push(BlockKind::Paren);
|
|
|
|
} else if c == b')' {
|
|
|
|
// We encountered a closing `)` delimiter. Pop off the opening `(`.
|
|
|
|
if let Some(BlockKind::Paren) = block_level.last() {
|
|
|
|
let _ = block_level.pop();
|
|
|
|
}
|
2021-07-17 00:11:15 +02:00
|
|
|
} else if is_item_terminator(&block_level, c, additional_whitespace, special_tokens) {
|
2021-06-30 03:42:56 +02:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
*curr_offset += 1;
|
|
|
|
}
|
|
|
|
|
2021-07-22 08:04:50 +02:00
|
|
|
let span = Span::new(span_offset + token_start, span_offset + *curr_offset);
|
2021-06-30 03:42:56 +02:00
|
|
|
|
2021-09-07 05:56:30 +02:00
|
|
|
// If there is still unclosed opening delimiters, remember they were missing
|
2021-06-30 03:42:56 +02:00
|
|
|
if let Some(block) = block_level.last() {
|
|
|
|
let delim = block.closing();
|
2022-12-03 10:44:12 +01:00
|
|
|
let cause =
|
|
|
|
ParseError::UnexpectedEof((delim as char).to_string(), Span::new(span.end, span.end));
|
2021-06-30 03:42:56 +02:00
|
|
|
|
2022-11-22 19:26:13 +01:00
|
|
|
return (
|
|
|
|
Token {
|
|
|
|
contents: TokenContents::Item,
|
|
|
|
span,
|
|
|
|
},
|
|
|
|
Some(cause),
|
|
|
|
);
|
2021-06-30 03:42:56 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if let Some(delim) = quote_start {
|
|
|
|
// The non-lite parse trims quotes on both sides, so we add the expected quote so that
|
|
|
|
// anyone wanting to consume this partial parse (e.g., completions) will be able to get
|
|
|
|
// correct information from the non-lite parse.
|
|
|
|
return (
|
2022-11-22 19:26:13 +01:00
|
|
|
Token {
|
|
|
|
contents: TokenContents::Item,
|
|
|
|
span,
|
|
|
|
},
|
2021-09-22 07:29:53 +02:00
|
|
|
Some(ParseError::UnexpectedEof(
|
|
|
|
(delim as char).to_string(),
|
2022-12-03 10:44:12 +01:00
|
|
|
Span::new(span.end, span.end),
|
2021-09-22 07:29:53 +02:00
|
|
|
)),
|
2021-06-30 03:42:56 +02:00
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
// If we didn't accumulate any characters, it's an unexpected error.
|
|
|
|
if *curr_offset - token_start == 0 {
|
|
|
|
return (
|
2022-11-22 19:26:13 +01:00
|
|
|
Token {
|
|
|
|
contents: TokenContents::Item,
|
|
|
|
span,
|
|
|
|
},
|
2021-06-30 03:42:56 +02:00
|
|
|
Some(ParseError::UnexpectedEof("command".to_string(), span)),
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
2023-10-18 23:23:17 +02:00
|
|
|
let mut err = None;
|
|
|
|
let output = match &input[(span.start - span_offset)..(span.end - span_offset)] {
|
|
|
|
b"out>" | b"o>" => Token {
|
|
|
|
contents: TokenContents::OutGreaterThan,
|
|
|
|
span,
|
|
|
|
},
|
2023-11-27 14:52:39 +01:00
|
|
|
b"out>>" | b"o>>" => Token {
|
|
|
|
contents: TokenContents::OutGreaterGreaterThan,
|
|
|
|
span,
|
|
|
|
},
|
2023-10-18 23:23:17 +02:00
|
|
|
b"err>" | b"e>" => Token {
|
|
|
|
contents: TokenContents::ErrGreaterThan,
|
|
|
|
span,
|
|
|
|
},
|
2023-11-27 14:52:39 +01:00
|
|
|
b"err>>" | b"e>>" => Token {
|
|
|
|
contents: TokenContents::ErrGreaterGreaterThan,
|
|
|
|
span,
|
|
|
|
},
|
2023-10-18 23:23:17 +02:00
|
|
|
b"out+err>" | b"err+out>" | b"o+e>" | b"e+o>" => Token {
|
|
|
|
contents: TokenContents::OutErrGreaterThan,
|
|
|
|
span,
|
|
|
|
},
|
2023-11-27 14:52:39 +01:00
|
|
|
b"out+err>>" | b"err+out>>" | b"o+e>>" | b"e+o>>" => Token {
|
|
|
|
contents: TokenContents::OutErrGreaterGreaterThan,
|
|
|
|
span,
|
|
|
|
},
|
2023-10-18 23:23:17 +02:00
|
|
|
b"&&" => {
|
|
|
|
err = Some(ParseError::ShellAndAnd(span));
|
2022-12-08 00:02:11 +01:00
|
|
|
Token {
|
2022-12-13 04:36:13 +01:00
|
|
|
contents: TokenContents::Item,
|
2022-12-08 00:02:11 +01:00
|
|
|
span,
|
2023-10-18 23:23:17 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
b"2>" => {
|
|
|
|
err = Some(ParseError::ShellErrRedirect(span));
|
2022-12-08 00:02:11 +01:00
|
|
|
Token {
|
|
|
|
contents: TokenContents::Item,
|
|
|
|
span,
|
2023-10-18 23:23:17 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
b"2>&1" => {
|
|
|
|
err = Some(ParseError::ShellOutErrRedirect(span));
|
2022-11-22 19:26:13 +01:00
|
|
|
Token {
|
|
|
|
contents: TokenContents::Item,
|
|
|
|
span,
|
2023-10-18 23:23:17 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
_ => Token {
|
|
|
|
contents: TokenContents::Item,
|
|
|
|
span,
|
|
|
|
},
|
|
|
|
};
|
|
|
|
(output, err)
|
2021-06-30 03:42:56 +02:00
|
|
|
}
|
|
|
|
|
2023-03-24 12:54:06 +01:00
|
|
|
pub fn lex_signature(
|
|
|
|
input: &[u8],
|
|
|
|
span_offset: usize,
|
|
|
|
additional_whitespace: &[u8],
|
|
|
|
special_tokens: &[u8],
|
|
|
|
skip_comment: bool,
|
|
|
|
) -> (Vec<Token>, Option<ParseError>) {
|
|
|
|
lex_internal(
|
|
|
|
input,
|
|
|
|
span_offset,
|
|
|
|
additional_whitespace,
|
|
|
|
special_tokens,
|
|
|
|
skip_comment,
|
|
|
|
true,
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
2021-06-30 03:42:56 +02:00
|
|
|
pub fn lex(
|
|
|
|
input: &[u8],
|
|
|
|
span_offset: usize,
|
2021-07-17 00:11:15 +02:00
|
|
|
additional_whitespace: &[u8],
|
|
|
|
special_tokens: &[u8],
|
2021-11-21 19:13:09 +01:00
|
|
|
skip_comment: bool,
|
2023-03-24 12:54:06 +01:00
|
|
|
) -> (Vec<Token>, Option<ParseError>) {
|
|
|
|
lex_internal(
|
|
|
|
input,
|
|
|
|
span_offset,
|
|
|
|
additional_whitespace,
|
|
|
|
special_tokens,
|
|
|
|
skip_comment,
|
|
|
|
false,
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
|
|
|
fn lex_internal(
|
|
|
|
input: &[u8],
|
|
|
|
span_offset: usize,
|
|
|
|
additional_whitespace: &[u8],
|
|
|
|
special_tokens: &[u8],
|
|
|
|
skip_comment: bool,
|
|
|
|
// within signatures we want to treat `<` and `>` specially
|
|
|
|
in_signature: bool,
|
2021-06-30 03:42:56 +02:00
|
|
|
) -> (Vec<Token>, Option<ParseError>) {
|
|
|
|
let mut error = None;
|
|
|
|
|
2021-07-22 08:04:50 +02:00
|
|
|
let mut curr_offset = 0;
|
2021-06-30 03:42:56 +02:00
|
|
|
|
|
|
|
let mut output = vec![];
|
|
|
|
let mut is_complete = true;
|
|
|
|
|
|
|
|
while let Some(c) = input.get(curr_offset) {
|
|
|
|
let c = *c;
|
|
|
|
if c == b'|' {
|
|
|
|
// If the next character is `|`, it's either `|` or `||`.
|
|
|
|
let idx = curr_offset;
|
|
|
|
let prev_idx = idx;
|
|
|
|
curr_offset += 1;
|
|
|
|
|
|
|
|
// If the next character is `|`, we're looking at a `||`.
|
|
|
|
if let Some(c) = input.get(curr_offset) {
|
|
|
|
if *c == b'|' {
|
|
|
|
let idx = curr_offset;
|
|
|
|
curr_offset += 1;
|
|
|
|
output.push(Token::new(
|
2022-12-08 00:02:11 +01:00
|
|
|
TokenContents::PipePipe,
|
2021-07-03 05:35:15 +02:00
|
|
|
Span::new(span_offset + prev_idx, span_offset + idx + 1),
|
2021-06-30 03:42:56 +02:00
|
|
|
));
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Otherwise, it's just a regular `|` token.
|
2022-03-05 14:20:13 +01:00
|
|
|
|
|
|
|
// Before we push, check to see if the previous character was a newline.
|
|
|
|
// If so, then this is a continuation of the previous line
|
|
|
|
if let Some(prev) = output.last_mut() {
|
|
|
|
match prev.contents {
|
|
|
|
TokenContents::Eol => {
|
|
|
|
*prev = Token::new(
|
|
|
|
TokenContents::Pipe,
|
|
|
|
Span::new(span_offset + idx, span_offset + idx + 1),
|
allow comment in multiple line pipeline (#9436)
<!--
if this PR closes one or more issues, you can automatically link the PR
with
them by using one of the [*linking
keywords*](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue#linking-a-pull-request-to-an-issue-using-a-keyword),
e.g.
- this PR should close #xxxx
- fixes #xxxx
you can also mention related issues, PRs or discussions!
-->
# Description
- fixes: #5517
- fixes: #9250
For the following commands:
```
ls
# | le
| length
```
I found that it generates a bad lite parsing result:
```
LiteBlock {
block: [
LitePipeline {
commands: [
Command(None, LiteCommand { comments: [], parts: [Span { start: 138600, end: 138602 }] })
]
},
LitePipeline {
commands: [
Command(Some(Span { start: 138610, end: 138611 }),
LiteCommand { comments: [Span { start: 138603, end: 138609 }], parts: [Span { start: 138612, end: 138618 }] })
]
}
]
}
```
Which should contains only one `LitePipeline`, and the second
`LitePipeline` is generated because of `Eol` lex token:
```
[
Token { contents: Item, span: Span { start: 138600, end: 138602 } },
Token { contents: Eol, span: Span { start: 138602, end: 138603 } }, // it generates the second LitePipeline
Token { contents: Comment, span: Span { start: 138603, end: 138609 } },
Token { contents: Pipe, span: Span { start: 138610, end: 138611 } },
Token { contents: Item, span: Span { start: 138612, end: 138618 } }
]
```
To fix the issue, I remove the `Eol` token when we meet `Comment` right
after `Eol`, then it will generate a good LiteBlock, and everything will
work fine.
### After the fix:
Token:
```
[
Token { contents: Item, span: Span { start: 138618, end: 138620 } },
Token { contents: Comment, span: Span { start: 138622, end: 138628 } },
Token { contents: Pipe, span: Span { start: 138629, end: 138630 } },
Token { contents: Item, span: Span { start: 138631, end: 138637 } }
]
```
LiteBlock:
```
LiteBlock {
block: [
LitePipeline {
commands: [
Command(
None,
LiteCommand {
comments: [Span { start: 138622, end: 138628 }],
parts: [Span { start: 138618, end: 138620 }]
}
),
Command(
Some(Span { start: 138629, end: 138630 }),
LiteCommand { comments: [], parts: [Span { start: 138631, end: 138637 }] })] }] }
```
<!--
Thank you for improving Nushell. Please, check our [contributing
guide](../CONTRIBUTING.md) and talk to the core team before making major
changes.
Description of your pull request goes here. **Provide examples and/or
screenshots** if your changes affect the user experience.
-->
# User-Facing Changes
<!-- List of all changes that impact the user experience here. This
helps us keep track of breaking changes. -->
# Tests + Formatting
<!--
Don't forget to add tests that cover your changes.
Make sure you've run and fixed any issues with these commands:
- `cargo fmt --all -- --check` to check standard code formatting (`cargo
fmt --all` applies these changes)
- `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used -A
clippy::needless_collect -A clippy::result_large_err` to check that
you're using the standard code style
- `cargo test --workspace` to check that all tests pass
- `cargo run -- crates/nu-std/tests/run.nu` to run the tests for the
standard library
> **Note**
> from `nushell` you can also use the `toolkit` as follows
> ```bash
> use toolkit.nu # or use an `env_change` hook to activate it
automatically
> toolkit check pr
> ```
-->
# After Submitting
<!-- If your PR had any user-facing changes, update [the
documentation](https://github.com/nushell/nushell.github.io) after the
PR is merged, if necessary. This will help us keep the docs up to date.
-->
2023-06-15 13:11:42 +02:00
|
|
|
);
|
|
|
|
// And this is a continuation of the previous line if previous line is a
|
|
|
|
// comment line (combined with EOL + Comment)
|
|
|
|
//
|
|
|
|
// Initially, the last one token is TokenContents::Pipe, we don't need to
|
|
|
|
// check it, so the beginning offset is 2.
|
|
|
|
let mut offset = 2;
|
|
|
|
while output.len() > offset {
|
|
|
|
let index = output.len() - offset;
|
|
|
|
if output[index].contents == TokenContents::Comment
|
|
|
|
&& output[index - 1].contents == TokenContents::Eol
|
|
|
|
{
|
|
|
|
output.remove(index - 1);
|
|
|
|
offset += 1;
|
|
|
|
} else {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2022-03-05 14:20:13 +01:00
|
|
|
}
|
|
|
|
_ => {
|
|
|
|
output.push(Token::new(
|
|
|
|
TokenContents::Pipe,
|
|
|
|
Span::new(span_offset + idx, span_offset + idx + 1),
|
|
|
|
));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
output.push(Token::new(
|
|
|
|
TokenContents::Pipe,
|
|
|
|
Span::new(span_offset + idx, span_offset + idx + 1),
|
|
|
|
));
|
|
|
|
}
|
|
|
|
|
2021-06-30 03:42:56 +02:00
|
|
|
is_complete = false;
|
|
|
|
} else if c == b';' {
|
|
|
|
// If the next character is a `;`, we're looking at a semicolon token.
|
|
|
|
|
|
|
|
if !is_complete && error.is_none() {
|
|
|
|
error = Some(ParseError::ExtraTokens(Span::new(
|
|
|
|
curr_offset,
|
|
|
|
curr_offset + 1,
|
|
|
|
)));
|
|
|
|
}
|
|
|
|
let idx = curr_offset;
|
|
|
|
curr_offset += 1;
|
|
|
|
output.push(Token::new(
|
|
|
|
TokenContents::Semicolon,
|
2021-07-22 08:04:50 +02:00
|
|
|
Span::new(span_offset + idx, span_offset + idx + 1),
|
2021-06-30 03:42:56 +02:00
|
|
|
));
|
2022-03-05 14:20:13 +01:00
|
|
|
} else if c == b'\r' {
|
|
|
|
// Ignore a stand-alone carriage return
|
|
|
|
curr_offset += 1;
|
|
|
|
} else if c == b'\n' {
|
2021-06-30 03:42:56 +02:00
|
|
|
// If the next character is a newline, we're looking at an EOL (end of line) token.
|
|
|
|
let idx = curr_offset;
|
|
|
|
curr_offset += 1;
|
2021-07-17 00:11:15 +02:00
|
|
|
if !additional_whitespace.contains(&c) {
|
2021-07-22 08:04:50 +02:00
|
|
|
output.push(Token::new(
|
|
|
|
TokenContents::Eol,
|
|
|
|
Span::new(span_offset + idx, span_offset + idx + 1),
|
|
|
|
));
|
2021-06-30 03:42:56 +02:00
|
|
|
}
|
|
|
|
} else if c == b'#' {
|
|
|
|
// If the next character is `#`, we're at the beginning of a line
|
|
|
|
// comment. The comment continues until the next newline.
|
|
|
|
let mut start = curr_offset;
|
|
|
|
|
|
|
|
while let Some(input) = input.get(curr_offset) {
|
2022-02-25 19:03:39 +01:00
|
|
|
if *input == b'\n' {
|
2021-11-21 19:13:09 +01:00
|
|
|
if !skip_comment {
|
|
|
|
output.push(Token::new(
|
|
|
|
TokenContents::Comment,
|
2022-01-03 22:37:45 +01:00
|
|
|
Span::new(span_offset + start, span_offset + curr_offset),
|
2021-11-21 19:13:09 +01:00
|
|
|
));
|
|
|
|
}
|
2021-06-30 03:42:56 +02:00
|
|
|
start = curr_offset;
|
|
|
|
|
|
|
|
break;
|
2021-12-15 23:56:12 +01:00
|
|
|
} else {
|
|
|
|
curr_offset += 1;
|
2021-06-30 03:42:56 +02:00
|
|
|
}
|
|
|
|
}
|
2021-11-21 19:13:09 +01:00
|
|
|
if start != curr_offset && !skip_comment {
|
2021-06-30 03:42:56 +02:00
|
|
|
output.push(Token::new(
|
|
|
|
TokenContents::Comment,
|
2021-07-22 08:04:50 +02:00
|
|
|
Span::new(span_offset + start, span_offset + curr_offset),
|
2021-06-30 03:42:56 +02:00
|
|
|
));
|
|
|
|
}
|
2021-07-17 00:11:15 +02:00
|
|
|
} else if c == b' ' || c == b'\t' || additional_whitespace.contains(&c) {
|
2021-06-30 03:42:56 +02:00
|
|
|
// If the next character is non-newline whitespace, skip it.
|
|
|
|
curr_offset += 1;
|
|
|
|
} else {
|
|
|
|
// Otherwise, try to consume an unclassified token.
|
|
|
|
|
2022-11-22 19:26:13 +01:00
|
|
|
let (token, err) = lex_item(
|
2021-07-17 00:11:15 +02:00
|
|
|
input,
|
|
|
|
&mut curr_offset,
|
2021-07-22 08:04:50 +02:00
|
|
|
span_offset,
|
2021-07-17 00:11:15 +02:00
|
|
|
additional_whitespace,
|
|
|
|
special_tokens,
|
2023-03-24 12:54:06 +01:00
|
|
|
in_signature,
|
2021-07-17 00:11:15 +02:00
|
|
|
);
|
2021-06-30 03:42:56 +02:00
|
|
|
if error.is_none() {
|
|
|
|
error = err;
|
|
|
|
}
|
|
|
|
is_complete = true;
|
2022-11-22 19:26:13 +01:00
|
|
|
output.push(token);
|
2021-06-30 03:42:56 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
(output, error)
|
2021-08-31 21:33:41 +02:00
|
|
|
}
|