forked from extern/nushell
d0e0701a88
# Description Previously `nix run nixpkgs#hello` was lexed as `Item, Item, Item, Comment`, however, `#hello` is *not* supposed to be a comment here and should be parsed as part of the third `Item`. This change introduces this behavior by not interrupting the parse of the current token upon seeing a `#`. Thank you so much for considering this, I think many `nix` users will be grateful for this change and I think this will lead to more adaptation in the ecosystem. - closes #8137 and #6335 # User-Facing Changes - code like `somecode# bla` and `somecode#bla` will not be parsed as `somecode, comment` but as `somecode#bla`, hence this is a breaking change for all users who didn't put a space before a comment introducing token (`#`) # Tests + Formatting I've added tests that cover this behavior in `test_lex.rs` - [x] `cargo fmt --all -- --check` to check standard code formatting (`cargo fmt --all` applies these changes) - [x] `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used -A clippy::needless_collect` to check that you're using the standard code style - [x] `cargo test --workspace` to check that all tests pass # After Submitting > If your PR had any user-facing changes, update [the documentation](https://github.com/nushell/nushell.github.io) after the PR is merged, if necessary. This will help us keep the docs up to date. I think this is expected behavior in most other shells, so the documentation was lacking for not documenting the unexpected behavior before and hence now is automatically more complete >D
190 lines
4.1 KiB
Rust
190 lines
4.1 KiB
Rust
use nu_parser::{lex, ParseError, Token, TokenContents};
|
|
use nu_protocol::Span;
|
|
|
|
#[test]
|
|
fn lex_basic() {
|
|
let file = b"let x = 4";
|
|
|
|
let output = lex(file, 0, &[], &[], true);
|
|
|
|
assert!(output.1.is_none());
|
|
}
|
|
|
|
#[test]
|
|
fn lex_newline() {
|
|
let file = b"let x = 300\nlet y = 500;";
|
|
|
|
let output = lex(file, 0, &[], &[], true);
|
|
|
|
assert!(output.0.contains(&Token {
|
|
contents: TokenContents::Eol,
|
|
span: Span::new(11, 12)
|
|
}));
|
|
}
|
|
|
|
#[test]
|
|
fn lex_empty() {
|
|
let file = b"";
|
|
|
|
let output = lex(file, 0, &[], &[], true);
|
|
|
|
assert!(output.0.is_empty());
|
|
assert!(output.1.is_none());
|
|
}
|
|
|
|
#[test]
|
|
fn lex_parenthesis() {
|
|
// The whole parenthesis is an item for the lexer
|
|
let file = b"let x = (300 + (322 * 444));";
|
|
|
|
let output = lex(file, 0, &[], &[], true);
|
|
|
|
assert_eq!(
|
|
output.0.get(3).unwrap(),
|
|
&Token {
|
|
contents: TokenContents::Item,
|
|
span: Span::new(8, 27)
|
|
}
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn lex_comment() {
|
|
let file = b"let x = 300 # a comment \n $x + 444";
|
|
|
|
let output = lex(file, 0, &[], &[], false);
|
|
|
|
assert_eq!(
|
|
output.0.get(4).unwrap(),
|
|
&Token {
|
|
contents: TokenContents::Comment,
|
|
span: Span::new(12, 24)
|
|
}
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn lex_is_incomplete() {
|
|
let file = b"let x = 300 | ;";
|
|
|
|
let output = lex(file, 0, &[], &[], true);
|
|
|
|
let err = output.1.unwrap();
|
|
assert!(matches!(err, ParseError::ExtraTokens(_)));
|
|
}
|
|
|
|
#[test]
|
|
fn lex_incomplete_paren() {
|
|
let file = b"let x = (300 + ( 4 + 1)";
|
|
|
|
let output = lex(file, 0, &[], &[], true);
|
|
|
|
let err = output.1.unwrap();
|
|
assert!(matches!(err, ParseError::UnexpectedEof(v, _) if v == ")"));
|
|
}
|
|
|
|
#[test]
|
|
fn lex_incomplete_quote() {
|
|
let file = b"let x = '300 + 4 + 1";
|
|
|
|
let output = lex(file, 0, &[], &[], true);
|
|
|
|
let err = output.1.unwrap();
|
|
assert!(matches!(err, ParseError::UnexpectedEof(v, _) if v == "'"));
|
|
}
|
|
|
|
#[test]
|
|
fn lex_comments_no_space() {
|
|
// test for parses that contain tokens that normally introduce comments
|
|
// Code:
|
|
// let z = 42 #the comment
|
|
// let x#y = 69 #hello
|
|
// let flk = nixpkgs#hello #hello
|
|
let file = b"let z = 42 #the comment \n let x#y = 69 #hello \n let flk = nixpkgs#hello #hello";
|
|
let output = lex(file, 0, &[], &[], false);
|
|
|
|
assert_eq!(
|
|
output.0.get(4).unwrap(),
|
|
&Token {
|
|
contents: TokenContents::Comment,
|
|
span: Span::new(11, 24)
|
|
}
|
|
);
|
|
|
|
assert_eq!(
|
|
output.0.get(7).unwrap(),
|
|
&Token {
|
|
contents: TokenContents::Item,
|
|
span: Span::new(30, 33)
|
|
}
|
|
);
|
|
|
|
assert_eq!(
|
|
output.0.get(10).unwrap(),
|
|
&Token {
|
|
contents: TokenContents::Comment,
|
|
span: Span::new(39, 46)
|
|
}
|
|
);
|
|
|
|
assert_eq!(
|
|
output.0.get(15).unwrap(),
|
|
&Token {
|
|
contents: TokenContents::Item,
|
|
span: Span::new(58, 71)
|
|
}
|
|
);
|
|
|
|
assert_eq!(
|
|
output.0.get(16).unwrap(),
|
|
&Token {
|
|
contents: TokenContents::Comment,
|
|
span: Span::new(72, 78)
|
|
}
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn lex_comments() {
|
|
// Comments should keep the end of line token
|
|
// Code:
|
|
// let z = 4
|
|
// let x = 4 #comment
|
|
// let y = 1 # comment
|
|
let file = b"let z = 4 #comment \n let x = 4 # comment\n let y = 1 # comment";
|
|
|
|
let output = lex(file, 0, &[], &[], false);
|
|
|
|
assert_eq!(
|
|
output.0.get(4).unwrap(),
|
|
&Token {
|
|
contents: TokenContents::Comment,
|
|
span: Span::new(10, 19)
|
|
}
|
|
);
|
|
assert_eq!(
|
|
output.0.get(5).unwrap(),
|
|
&Token {
|
|
contents: TokenContents::Eol,
|
|
span: Span::new(19, 20)
|
|
}
|
|
);
|
|
|
|
// When there is no space between the comment and the new line the span
|
|
// for the command and the EOL overlaps
|
|
assert_eq!(
|
|
output.0.get(10).unwrap(),
|
|
&Token {
|
|
contents: TokenContents::Comment,
|
|
span: Span::new(31, 40)
|
|
}
|
|
);
|
|
assert_eq!(
|
|
output.0.get(11).unwrap(),
|
|
&Token {
|
|
contents: TokenContents::Eol,
|
|
span: Span::new(40, 41)
|
|
}
|
|
);
|
|
}
|