nushell/crates/nu-parser/src/lite_parser.rs
Bahex 442df9e39c
Custom command attributes (#14906)
# Description
Add custom command attributes.

- Attributes are placed before a command definition and start with a `@`
character.
- Attribute invocations consist of const command call. The command's
name must start with "attr ", but this prefix is not used in the
invocation.
- A command named `attr example` is invoked as an attribute as
`@example`
-   Several built-in attribute commands are provided as part of this PR
    -   `attr example`: Attaches an example to the commands help text
        ```nushell
        # Double numbers
        @example "double an int"  { 5 | double }   --result 10
        @example "double a float" { 0.5 | double } --result 1.0
        def double []: [number -> number] {
            $in * 2
        }
        ```
    -   `attr search-terms`: Adds search terms to a command
    -   ~`attr env`: Equivalent to using `def --env`~
- ~`attr wrapped`: Equivalent to using `def --wrapped`~ shelved for
later discussion
    -   several testing related attributes in `std/testing`
- If an attribute has no internal/special purpose, it's stored as
command metadata that can be obtained with `scope commands`.
- This allows having attributes like `@test` which can be used by test
runners.
-   Used the `@example` attribute for `std` examples.
-   Updated the std tests and test runner to use `@test` attributes
-   Added completions for attributes

# User-Facing Changes
Users can add examples to their own command definitions, and add other
arbitrary attributes.

# Tests + Formatting

- 🟢 toolkit fmt
- 🟢 toolkit clippy
- 🟢 toolkit test
- 🟢 toolkit test stdlib

# After Submitting
- Add documentation about the attribute syntax and built-in attributes
- `help attributes`

---------

Co-authored-by: 132ikl <132@ikl.sh>
2025-02-11 06:34:51 -06:00

524 lines
21 KiB
Rust

//! Lite parsing converts a flat stream of tokens from the lexer to a syntax element structure that
//! can be parsed.
use crate::{Token, TokenContents};
use itertools::{Either, Itertools};
use nu_protocol::{ast::RedirectionSource, engine::StateWorkingSet, ParseError, Span};
use std::mem;
#[derive(Debug, Clone, Copy)]
pub enum LiteRedirectionTarget {
File {
connector: Span,
file: Span,
append: bool,
},
Pipe {
connector: Span,
},
}
impl LiteRedirectionTarget {
pub fn connector(&self) -> Span {
match self {
LiteRedirectionTarget::File { connector, .. }
| LiteRedirectionTarget::Pipe { connector } => *connector,
}
}
pub fn spans(&self) -> impl Iterator<Item = Span> {
match *self {
LiteRedirectionTarget::File {
connector, file, ..
} => Either::Left([connector, file].into_iter()),
LiteRedirectionTarget::Pipe { connector } => Either::Right(std::iter::once(connector)),
}
}
}
#[derive(Debug, Clone)]
pub enum LiteRedirection {
Single {
source: RedirectionSource,
target: LiteRedirectionTarget,
},
Separate {
out: LiteRedirectionTarget,
err: LiteRedirectionTarget,
},
}
impl LiteRedirection {
pub fn spans(&self) -> impl Iterator<Item = Span> {
match self {
LiteRedirection::Single { target, .. } => Either::Left(target.spans()),
LiteRedirection::Separate { out, err } => {
Either::Right(out.spans().chain(err.spans()).sorted())
}
}
}
}
#[derive(Debug, Clone, Default)]
pub struct LiteCommand {
pub pipe: Option<Span>,
pub comments: Vec<Span>,
pub parts: Vec<Span>,
pub redirection: Option<LiteRedirection>,
/// one past the end indices of attributes
pub attribute_idx: Vec<usize>,
}
impl LiteCommand {
fn push(&mut self, span: Span) {
self.parts.push(span);
}
fn check_accepts_redirection(&self, span: Span) -> Option<ParseError> {
self.parts
.is_empty()
.then_some(ParseError::UnexpectedRedirection { span })
}
fn try_add_redirection(
&mut self,
source: RedirectionSource,
target: LiteRedirectionTarget,
) -> Result<(), ParseError> {
let redirection = match (self.redirection.take(), source) {
(None, _) if self.parts.is_empty() => Err(ParseError::UnexpectedRedirection {
span: target.connector(),
}),
(None, source) => Ok(LiteRedirection::Single { source, target }),
(
Some(LiteRedirection::Single {
source: RedirectionSource::Stdout,
target: out,
}),
RedirectionSource::Stderr,
) => Ok(LiteRedirection::Separate { out, err: target }),
(
Some(LiteRedirection::Single {
source: RedirectionSource::Stderr,
target: err,
}),
RedirectionSource::Stdout,
) => Ok(LiteRedirection::Separate { out: target, err }),
(
Some(LiteRedirection::Single {
source,
target: first,
}),
_,
) => Err(ParseError::MultipleRedirections(
source,
first.connector(),
target.connector(),
)),
(
Some(LiteRedirection::Separate { out, .. }),
RedirectionSource::Stdout | RedirectionSource::StdoutAndStderr,
) => Err(ParseError::MultipleRedirections(
RedirectionSource::Stdout,
out.connector(),
target.connector(),
)),
(Some(LiteRedirection::Separate { err, .. }), RedirectionSource::Stderr) => {
Err(ParseError::MultipleRedirections(
RedirectionSource::Stderr,
err.connector(),
target.connector(),
))
}
}?;
self.redirection = Some(redirection);
Ok(())
}
pub fn parts_including_redirection(&self) -> impl Iterator<Item = Span> + '_ {
self.parts
.iter()
.copied()
.chain(
self.redirection
.iter()
.flat_map(|redirection| redirection.spans()),
)
.sorted_unstable_by_key(|a| (a.start, a.end))
}
pub fn command_parts(&self) -> &[Span] {
let command_start = self.attribute_idx.last().copied().unwrap_or(0);
&self.parts[command_start..]
}
pub fn has_attributes(&self) -> bool {
!self.attribute_idx.is_empty()
}
pub fn attribute_commands(&'_ self) -> impl Iterator<Item = LiteCommand> + '_ {
std::iter::once(0)
.chain(self.attribute_idx.iter().copied())
.tuple_windows()
.map(|(s, e)| LiteCommand {
parts: self.parts[s..e].to_owned(),
..Default::default()
})
}
}
#[derive(Debug, Clone, Default)]
pub struct LitePipeline {
pub commands: Vec<LiteCommand>,
}
impl LitePipeline {
fn push(&mut self, element: &mut LiteCommand) {
if !element.parts.is_empty() || element.redirection.is_some() {
self.commands.push(mem::take(element));
}
}
}
#[derive(Debug, Clone, Default)]
pub struct LiteBlock {
pub block: Vec<LitePipeline>,
}
impl LiteBlock {
fn push(&mut self, pipeline: &mut LitePipeline) {
if !pipeline.commands.is_empty() {
self.block.push(mem::take(pipeline));
}
}
}
fn last_non_comment_token(tokens: &[Token], cur_idx: usize) -> Option<TokenContents> {
let mut expect = TokenContents::Comment;
for token in tokens.iter().take(cur_idx).rev() {
// skip ([Comment]+ [Eol]) pair
match (token.contents, expect) {
(TokenContents::Comment, TokenContents::Comment)
| (TokenContents::Comment, TokenContents::Eol) => expect = TokenContents::Eol,
(TokenContents::Eol, TokenContents::Eol) => expect = TokenContents::Comment,
(token, _) => return Some(token),
}
}
None
}
#[derive(PartialEq, Eq)]
enum Mode {
Assignment,
Attribute,
Normal,
}
pub fn lite_parse(
tokens: &[Token],
working_set: &StateWorkingSet,
) -> (LiteBlock, Option<ParseError>) {
if tokens.is_empty() {
return (LiteBlock::default(), None);
}
let mut block = LiteBlock::default();
let mut pipeline = LitePipeline::default();
let mut command = LiteCommand::default();
let mut last_token = TokenContents::Eol;
let mut file_redirection = None;
let mut curr_comment: Option<Vec<Span>> = None;
let mut mode = Mode::Normal;
let mut error = None;
for (idx, token) in tokens.iter().enumerate() {
match mode {
Mode::Attribute => {
match &token.contents {
// Consume until semicolon or terminating EOL. Attributes can't contain pipelines or redirections.
TokenContents::Eol | TokenContents::Semicolon => {
command.attribute_idx.push(command.parts.len());
mode = Mode::Normal;
if matches!(last_token, TokenContents::Eol | TokenContents::Semicolon) {
// Clear out the comment as we're entering a new comment
curr_comment = None;
pipeline.push(&mut command);
block.push(&mut pipeline);
}
}
TokenContents::Comment => {
command.comments.push(token.span);
curr_comment = None;
}
_ => command.push(token.span),
}
}
Mode::Assignment => {
match &token.contents {
// Consume until semicolon or terminating EOL. Assignments absorb pipelines and
// redirections.
TokenContents::Eol => {
// Handle `[Command] [Pipe] ([Comment] | [Eol])+ [Command]`
//
// `[Eol]` branch checks if previous token is `[Pipe]` to construct pipeline
// and so `[Comment] | [Eol]` should be ignore to make it work
let actual_token = last_non_comment_token(tokens, idx);
if actual_token != Some(TokenContents::Pipe) {
mode = Mode::Normal;
pipeline.push(&mut command);
block.push(&mut pipeline);
}
if last_token == TokenContents::Eol {
// Clear out the comment as we're entering a new comment
curr_comment = None;
}
}
TokenContents::Semicolon => {
mode = Mode::Normal;
pipeline.push(&mut command);
block.push(&mut pipeline);
}
TokenContents::Comment => {
command.comments.push(token.span);
curr_comment = None;
}
_ => command.push(token.span),
}
}
Mode::Normal => {
if let Some((source, append, span)) = file_redirection.take() {
match &token.contents {
TokenContents::PipePipe => {
error = error.or(Some(ParseError::ShellOrOr(token.span)));
command.push(span);
command.push(token.span);
}
TokenContents::Item => {
let target = LiteRedirectionTarget::File {
connector: span,
file: token.span,
append,
};
if let Err(err) = command.try_add_redirection(source, target) {
error = error.or(Some(err));
command.push(span);
command.push(token.span)
}
}
TokenContents::AssignmentOperator => {
error = error
.or(Some(ParseError::Expected("redirection target", token.span)));
command.push(span);
command.push(token.span);
}
TokenContents::OutGreaterThan
| TokenContents::OutGreaterGreaterThan
| TokenContents::ErrGreaterThan
| TokenContents::ErrGreaterGreaterThan
| TokenContents::OutErrGreaterThan
| TokenContents::OutErrGreaterGreaterThan => {
error = error
.or(Some(ParseError::Expected("redirection target", token.span)));
command.push(span);
command.push(token.span);
}
TokenContents::Pipe
| TokenContents::ErrGreaterPipe
| TokenContents::OutErrGreaterPipe => {
error = error
.or(Some(ParseError::Expected("redirection target", token.span)));
command.push(span);
pipeline.push(&mut command);
command.pipe = Some(token.span);
}
TokenContents::Eol => {
error = error
.or(Some(ParseError::Expected("redirection target", token.span)));
command.push(span);
pipeline.push(&mut command);
}
TokenContents::Semicolon => {
error = error
.or(Some(ParseError::Expected("redirection target", token.span)));
command.push(span);
pipeline.push(&mut command);
block.push(&mut pipeline);
}
TokenContents::Comment => {
error =
error.or(Some(ParseError::Expected("redirection target", span)));
command.push(span);
command.comments.push(token.span);
curr_comment = None;
}
}
} else {
match &token.contents {
TokenContents::PipePipe => {
error = error.or(Some(ParseError::ShellOrOr(token.span)));
command.push(token.span);
}
TokenContents::Item => {
// FIXME: This is commented out to preserve old parser behavior,
// but we should probably error here.
//
// if element.redirection.is_some() {
// error = error.or(Some(ParseError::LabeledError(
// "Unexpected positional".into(),
// "cannot add positional arguments after output redirection".into(),
// token.span,
// )));
// }
//
// For example, this is currently allowed: ^echo thing o> out.txt extra_arg
if working_set.get_span_contents(token.span).starts_with(b"@") {
if matches!(
last_token,
TokenContents::Eol | TokenContents::Semicolon
) {
mode = Mode::Attribute;
}
command.push(token.span);
} else {
// If we have a comment, go ahead and attach it
if let Some(curr_comment) = curr_comment.take() {
command.comments = curr_comment;
}
command.push(token.span);
}
}
TokenContents::AssignmentOperator => {
// When in assignment mode, we'll just consume pipes or redirections as part of
// the command.
mode = Mode::Assignment;
if let Some(curr_comment) = curr_comment.take() {
command.comments = curr_comment;
}
command.push(token.span);
}
TokenContents::OutGreaterThan => {
error = error.or(command.check_accepts_redirection(token.span));
file_redirection = Some((RedirectionSource::Stdout, false, token.span));
}
TokenContents::OutGreaterGreaterThan => {
error = error.or(command.check_accepts_redirection(token.span));
file_redirection = Some((RedirectionSource::Stdout, true, token.span));
}
TokenContents::ErrGreaterThan => {
error = error.or(command.check_accepts_redirection(token.span));
file_redirection = Some((RedirectionSource::Stderr, false, token.span));
}
TokenContents::ErrGreaterGreaterThan => {
error = error.or(command.check_accepts_redirection(token.span));
file_redirection = Some((RedirectionSource::Stderr, true, token.span));
}
TokenContents::OutErrGreaterThan => {
error = error.or(command.check_accepts_redirection(token.span));
file_redirection =
Some((RedirectionSource::StdoutAndStderr, false, token.span));
}
TokenContents::OutErrGreaterGreaterThan => {
error = error.or(command.check_accepts_redirection(token.span));
file_redirection =
Some((RedirectionSource::StdoutAndStderr, true, token.span));
}
TokenContents::ErrGreaterPipe => {
let target = LiteRedirectionTarget::Pipe {
connector: token.span,
};
if let Err(err) =
command.try_add_redirection(RedirectionSource::Stderr, target)
{
error = error.or(Some(err));
}
pipeline.push(&mut command);
command.pipe = Some(token.span);
}
TokenContents::OutErrGreaterPipe => {
let target = LiteRedirectionTarget::Pipe {
connector: token.span,
};
if let Err(err) = command
.try_add_redirection(RedirectionSource::StdoutAndStderr, target)
{
error = error.or(Some(err));
}
pipeline.push(&mut command);
command.pipe = Some(token.span);
}
TokenContents::Pipe => {
pipeline.push(&mut command);
command.pipe = Some(token.span);
}
TokenContents::Eol => {
// Handle `[Command] [Pipe] ([Comment] | [Eol])+ [Command]`
//
// `[Eol]` branch checks if previous token is `[Pipe]` to construct pipeline
// and so `[Comment] | [Eol]` should be ignore to make it work
let actual_token = last_non_comment_token(tokens, idx);
if actual_token != Some(TokenContents::Pipe) {
pipeline.push(&mut command);
block.push(&mut pipeline);
}
if last_token == TokenContents::Eol {
// Clear out the comment as we're entering a new comment
curr_comment = None;
}
}
TokenContents::Semicolon => {
pipeline.push(&mut command);
block.push(&mut pipeline);
}
TokenContents::Comment => {
// Comment is beside something
if last_token != TokenContents::Eol {
command.comments.push(token.span);
curr_comment = None;
} else {
// Comment precedes something
if let Some(curr_comment) = &mut curr_comment {
curr_comment.push(token.span);
} else {
curr_comment = Some(vec![token.span]);
}
}
}
}
}
}
}
last_token = token.contents;
}
if let Some((_, _, span)) = file_redirection {
command.push(span);
error = error.or(Some(ParseError::Expected("redirection target", span)));
}
if let Mode::Attribute = mode {
command.attribute_idx.push(command.parts.len());
}
pipeline.push(&mut command);
block.push(&mut pipeline);
if last_non_comment_token(tokens, tokens.len()) == Some(TokenContents::Pipe) {
(
block,
Some(ParseError::UnexpectedEof(
"pipeline missing end".into(),
tokens[tokens.len() - 1].span,
)),
)
} else {
(block, error)
}
}