forked from extern/nushell
Add pattern matching (#8590)
# Description This adds `match` and basic pattern matching. An example: ``` match $x { 1..10 => { print "Value is between 1 and 10" } { foo: $bar } => { print $"Value has a 'foo' field with value ($bar)" } [$a, $b] => { print $"Value is a list with two items: ($a) and ($b)" } _ => { print "Value is none of the above" } } ``` Like the recent changes to `if` to allow it to be used as an expression, `match` can also be used as an expression. This allows you to assign the result to a variable, eg) `let xyz = match ...` I've also included a short-hand pattern for matching records, as I think it might help when doing a lot of record patterns: `{$foo}` which is equivalent to `{foo: $foo}`. There are still missing components, so consider this the first step in full pattern matching support. Currently missing: * Patterns for strings * Or-patterns (like the `|` in Rust) * Patterns for tables (unclear how we want to match a table, so it'll need some design) * Patterns for binary values * And much more # User-Facing Changes [see above] # Tests + Formatting Don't forget to add tests that cover your changes. Make sure you've run and fixed any issues with these commands: - `cargo fmt --all -- --check` to check standard code formatting (`cargo fmt --all` applies these changes) - `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used -A clippy::needless_collect` to check that you're using the standard code style - `cargo test --workspace` to check that all tests pass > **Note** > from `nushell` you can also use the `toolkit` as follows > ```bash > use toolkit.nu # or use an `env_change` hook to activate it automatically > toolkit check pr > ``` # After Submitting If your PR had any user-facing changes, update [the documentation](https://github.com/nushell/nushell.github.io) after the PR is merged, if necessary. This will help us keep the docs up to date.
This commit is contained in:
@ -1,5 +1,6 @@
|
||||
use nu_protocol::ast::{
|
||||
Block, Expr, Expression, ImportPatternMember, PathMember, Pipeline, PipelineElement,
|
||||
Block, Expr, Expression, ImportPatternMember, MatchPattern, PathMember, Pattern, Pipeline,
|
||||
PipelineElement,
|
||||
};
|
||||
use nu_protocol::DeclId;
|
||||
use nu_protocol::{engine::StateWorkingSet, Span};
|
||||
@ -25,6 +26,7 @@ pub enum FlatShape {
|
||||
InternalCall,
|
||||
List,
|
||||
Literal,
|
||||
MatchPattern,
|
||||
Nothing,
|
||||
Operator,
|
||||
Or,
|
||||
@ -60,6 +62,7 @@ impl Display for FlatShape {
|
||||
FlatShape::InternalCall => write!(f, "shape_internalcall"),
|
||||
FlatShape::List => write!(f, "shape_list"),
|
||||
FlatShape::Literal => write!(f, "shape_literal"),
|
||||
FlatShape::MatchPattern => write!(f, "shape_match_pattern"),
|
||||
FlatShape::Nothing => write!(f, "shape_nothing"),
|
||||
FlatShape::Operator => write!(f, "shape_operator"),
|
||||
FlatShape::Or => write!(f, "shape_or"),
|
||||
@ -212,6 +215,20 @@ pub fn flatten_expression(
|
||||
Expr::Float(_) => {
|
||||
vec![(expr.span, FlatShape::Float)]
|
||||
}
|
||||
Expr::MatchPattern(pattern) => {
|
||||
// FIXME: do nicer flattening later
|
||||
flatten_pattern(pattern)
|
||||
}
|
||||
Expr::MatchBlock(matches) => {
|
||||
let mut output = vec![];
|
||||
|
||||
for match_ in matches {
|
||||
output.extend(flatten_pattern(&match_.0));
|
||||
output.extend(flatten_expression(working_set, &match_.1));
|
||||
}
|
||||
|
||||
output
|
||||
}
|
||||
Expr::ValueWithUnit(x, unit) => {
|
||||
let mut output = flatten_expression(working_set, x);
|
||||
output.push((unit.span, FlatShape::String));
|
||||
@ -488,3 +505,60 @@ pub fn flatten_pipeline(
|
||||
}
|
||||
output
|
||||
}
|
||||
|
||||
pub fn flatten_pattern(match_pattern: &MatchPattern) -> Vec<(Span, FlatShape)> {
|
||||
let mut output = vec![];
|
||||
match &match_pattern.pattern {
|
||||
Pattern::Garbage => {
|
||||
output.push((match_pattern.span, FlatShape::Garbage));
|
||||
}
|
||||
Pattern::IgnoreValue => {
|
||||
output.push((match_pattern.span, FlatShape::Nothing));
|
||||
}
|
||||
Pattern::List(items) => {
|
||||
if let Some(first) = items.first() {
|
||||
if let Some(last) = items.last() {
|
||||
output.push((
|
||||
Span::new(match_pattern.span.start, first.span.start),
|
||||
FlatShape::MatchPattern,
|
||||
));
|
||||
for item in items {
|
||||
output.extend(flatten_pattern(item));
|
||||
}
|
||||
output.push((
|
||||
Span::new(last.span.end, match_pattern.span.end),
|
||||
FlatShape::MatchPattern,
|
||||
))
|
||||
}
|
||||
} else {
|
||||
output.push((match_pattern.span, FlatShape::MatchPattern));
|
||||
}
|
||||
}
|
||||
Pattern::Record(items) => {
|
||||
if let Some(first) = items.first() {
|
||||
if let Some(last) = items.last() {
|
||||
output.push((
|
||||
Span::new(match_pattern.span.start, first.1.span.start),
|
||||
FlatShape::MatchPattern,
|
||||
));
|
||||
for item in items {
|
||||
output.extend(flatten_pattern(&item.1));
|
||||
}
|
||||
output.push((
|
||||
Span::new(last.1.span.end, match_pattern.span.end),
|
||||
FlatShape::MatchPattern,
|
||||
))
|
||||
}
|
||||
} else {
|
||||
output.push((match_pattern.span, FlatShape::MatchPattern));
|
||||
}
|
||||
}
|
||||
Pattern::Value(_) => {
|
||||
output.push((match_pattern.span, FlatShape::MatchPattern));
|
||||
}
|
||||
Pattern::Variable(_) => {
|
||||
output.push((match_pattern.span, FlatShape::Variable));
|
||||
}
|
||||
}
|
||||
output
|
||||
}
|
||||
|
@ -6,6 +6,7 @@ mod known_external;
|
||||
mod lex;
|
||||
mod lite_parser;
|
||||
mod parse_keywords;
|
||||
mod parse_patterns;
|
||||
mod parser;
|
||||
mod type_check;
|
||||
|
||||
|
248
crates/nu-parser/src/parse_patterns.rs
Normal file
248
crates/nu-parser/src/parse_patterns.rs
Normal file
@ -0,0 +1,248 @@
|
||||
use nu_protocol::{
|
||||
ast::{Expr, Expression, MatchPattern, Pattern},
|
||||
engine::StateWorkingSet,
|
||||
Span, SyntaxShape, Type,
|
||||
};
|
||||
|
||||
use crate::{
|
||||
lex, lite_parse,
|
||||
parser::{is_variable, parse_value},
|
||||
LiteElement, ParseError,
|
||||
};
|
||||
|
||||
pub fn garbage(span: Span) -> MatchPattern {
|
||||
MatchPattern {
|
||||
pattern: Pattern::Garbage,
|
||||
span,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse_match_pattern(
|
||||
working_set: &mut StateWorkingSet,
|
||||
span: Span,
|
||||
) -> (Expression, Option<ParseError>) {
|
||||
working_set.enter_scope();
|
||||
let (output, err) = parse_pattern(working_set, span);
|
||||
working_set.exit_scope();
|
||||
|
||||
(
|
||||
Expression {
|
||||
expr: Expr::MatchPattern(Box::new(output)),
|
||||
span,
|
||||
ty: Type::Any,
|
||||
custom_completion: None,
|
||||
},
|
||||
err,
|
||||
)
|
||||
}
|
||||
|
||||
pub fn parse_pattern(
|
||||
working_set: &mut StateWorkingSet,
|
||||
span: Span,
|
||||
) -> (MatchPattern, Option<ParseError>) {
|
||||
let bytes = working_set.get_span_contents(span);
|
||||
|
||||
if bytes.starts_with(b"$") {
|
||||
// Variable pattern
|
||||
parse_variable_pattern(working_set, span)
|
||||
} else if bytes.starts_with(b"{") {
|
||||
// Record pattern
|
||||
parse_record_pattern(working_set, span)
|
||||
} else if bytes.starts_with(b"[") {
|
||||
// List pattern
|
||||
parse_list_pattern(working_set, span)
|
||||
} else if bytes == b"_" {
|
||||
(
|
||||
MatchPattern {
|
||||
pattern: Pattern::IgnoreValue,
|
||||
span,
|
||||
},
|
||||
None,
|
||||
)
|
||||
} else {
|
||||
// Literal value
|
||||
let (value, error) = parse_value(working_set, span, &SyntaxShape::Any, &[]);
|
||||
(
|
||||
MatchPattern {
|
||||
pattern: Pattern::Value(value),
|
||||
span,
|
||||
},
|
||||
error,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse_variable_pattern(
|
||||
working_set: &mut StateWorkingSet,
|
||||
span: Span,
|
||||
) -> (MatchPattern, Option<ParseError>) {
|
||||
let bytes = working_set.get_span_contents(span);
|
||||
|
||||
if is_variable(bytes) {
|
||||
if let Some(var_id) = working_set.find_variable(bytes) {
|
||||
(
|
||||
MatchPattern {
|
||||
pattern: Pattern::Variable(var_id),
|
||||
span,
|
||||
},
|
||||
None,
|
||||
)
|
||||
} else {
|
||||
let var_id = working_set.add_variable(bytes.to_vec(), span, Type::Any, true);
|
||||
|
||||
(
|
||||
MatchPattern {
|
||||
pattern: Pattern::Variable(var_id),
|
||||
span,
|
||||
},
|
||||
None,
|
||||
)
|
||||
}
|
||||
} else {
|
||||
(
|
||||
garbage(span),
|
||||
Some(ParseError::Expected("valid variable name".into(), span)),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse_list_pattern(
|
||||
working_set: &mut StateWorkingSet,
|
||||
span: Span,
|
||||
) -> (MatchPattern, Option<ParseError>) {
|
||||
let bytes = working_set.get_span_contents(span);
|
||||
|
||||
let mut error = None;
|
||||
|
||||
let mut start = span.start;
|
||||
let mut end = span.end;
|
||||
|
||||
if bytes.starts_with(b"[") {
|
||||
start += 1;
|
||||
}
|
||||
if bytes.ends_with(b"]") {
|
||||
end -= 1;
|
||||
} else {
|
||||
error = error.or_else(|| Some(ParseError::Unclosed("]".into(), Span::new(end, end))));
|
||||
}
|
||||
|
||||
let inner_span = Span::new(start, end);
|
||||
let source = working_set.get_span_contents(inner_span);
|
||||
|
||||
let (output, err) = lex(source, inner_span.start, &[b'\n', b'\r', b','], &[], true);
|
||||
error = error.or(err);
|
||||
|
||||
let (output, err) = lite_parse(&output);
|
||||
error = error.or(err);
|
||||
|
||||
let mut args = vec![];
|
||||
|
||||
if !output.block.is_empty() {
|
||||
for arg in &output.block[0].commands {
|
||||
let mut spans_idx = 0;
|
||||
|
||||
if let LiteElement::Command(_, command) = arg {
|
||||
while spans_idx < command.parts.len() {
|
||||
let (arg, err) = parse_pattern(working_set, command.parts[spans_idx]);
|
||||
error = error.or(err);
|
||||
|
||||
args.push(arg);
|
||||
|
||||
spans_idx += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
(
|
||||
MatchPattern {
|
||||
pattern: Pattern::List(args),
|
||||
span,
|
||||
},
|
||||
error,
|
||||
)
|
||||
}
|
||||
|
||||
pub fn parse_record_pattern(
|
||||
working_set: &mut StateWorkingSet,
|
||||
span: Span,
|
||||
) -> (MatchPattern, Option<ParseError>) {
|
||||
let bytes = working_set.get_span_contents(span);
|
||||
|
||||
let mut error = None;
|
||||
let mut start = span.start;
|
||||
let mut end = span.end;
|
||||
|
||||
if bytes.starts_with(b"{") {
|
||||
start += 1;
|
||||
} else {
|
||||
error = error.or_else(|| {
|
||||
Some(ParseError::Expected(
|
||||
"{".into(),
|
||||
Span::new(start, start + 1),
|
||||
))
|
||||
});
|
||||
}
|
||||
|
||||
if bytes.ends_with(b"}") {
|
||||
end -= 1;
|
||||
} else {
|
||||
error = error.or_else(|| Some(ParseError::Unclosed("}".into(), Span::new(end, end))));
|
||||
}
|
||||
|
||||
let inner_span = Span::new(start, end);
|
||||
let source = working_set.get_span_contents(inner_span);
|
||||
|
||||
let (tokens, err) = lex(source, start, &[b'\n', b'\r', b','], &[b':'], true);
|
||||
error = error.or(err);
|
||||
|
||||
let mut output = vec![];
|
||||
let mut idx = 0;
|
||||
|
||||
while idx < tokens.len() {
|
||||
let bytes = working_set.get_span_contents(tokens[idx].span);
|
||||
let (field, pattern) = if !bytes.is_empty() && bytes[0] == b'$' {
|
||||
// If this is a variable, treat it as both the name of the field and the pattern
|
||||
let field = String::from_utf8_lossy(&bytes[1..]).to_string();
|
||||
|
||||
let (pattern, err) = parse_variable_pattern(working_set, tokens[idx].span);
|
||||
error = error.or(err);
|
||||
|
||||
(field, pattern)
|
||||
} else {
|
||||
let field = String::from_utf8_lossy(bytes).to_string();
|
||||
|
||||
idx += 1;
|
||||
if idx == tokens.len() {
|
||||
return (
|
||||
garbage(span),
|
||||
Some(ParseError::Expected("record".into(), span)),
|
||||
);
|
||||
}
|
||||
let colon = working_set.get_span_contents(tokens[idx].span);
|
||||
idx += 1;
|
||||
if idx == tokens.len() || colon != b":" {
|
||||
//FIXME: need better error
|
||||
return (
|
||||
garbage(span),
|
||||
Some(ParseError::Expected("record".into(), span)),
|
||||
);
|
||||
}
|
||||
let (pattern, err) = parse_pattern(working_set, tokens[idx].span);
|
||||
error = error.or(err);
|
||||
|
||||
(field, pattern)
|
||||
};
|
||||
idx += 1;
|
||||
|
||||
output.push((field, pattern));
|
||||
}
|
||||
|
||||
(
|
||||
MatchPattern {
|
||||
pattern: Pattern::Record(output),
|
||||
span,
|
||||
},
|
||||
error,
|
||||
)
|
||||
}
|
@ -3,6 +3,7 @@ use crate::{
|
||||
lex,
|
||||
lite_parser::{lite_parse, LiteCommand, LiteElement},
|
||||
parse_mut,
|
||||
parse_patterns::{parse_match_pattern, parse_pattern},
|
||||
type_check::{math_result_type, type_compatible},
|
||||
ParseError, Token, TokenContents,
|
||||
};
|
||||
@ -76,6 +77,7 @@ pub fn is_math_expression_like(
|
||||
|| bytes == b"null"
|
||||
|| bytes == b"not"
|
||||
|| bytes == b"if"
|
||||
|| bytes == b"match"
|
||||
{
|
||||
return true;
|
||||
}
|
||||
@ -120,7 +122,7 @@ fn is_identifier(bytes: &[u8]) -> bool {
|
||||
bytes.iter().all(|x| is_identifier_byte(*x))
|
||||
}
|
||||
|
||||
fn is_variable(bytes: &[u8]) -> bool {
|
||||
pub fn is_variable(bytes: &[u8]) -> bool {
|
||||
if bytes.len() > 1 && bytes[0] == b'$' {
|
||||
is_identifier(&bytes[1..])
|
||||
} else {
|
||||
@ -1787,6 +1789,8 @@ pub fn parse_brace_expr(
|
||||
parse_closure_expression(working_set, shape, span, expand_aliases_denylist, true)
|
||||
} else if matches!(shape, SyntaxShape::Block) {
|
||||
parse_block_expression(working_set, span, expand_aliases_denylist)
|
||||
} else if matches!(shape, SyntaxShape::MatchBlock) {
|
||||
parse_match_block_expression(working_set, span, expand_aliases_denylist)
|
||||
} else {
|
||||
parse_record(working_set, span, expand_aliases_denylist)
|
||||
}
|
||||
@ -1802,6 +1806,8 @@ pub fn parse_brace_expr(
|
||||
parse_closure_expression(working_set, shape, span, expand_aliases_denylist, true)
|
||||
} else if matches!(shape, SyntaxShape::Block) {
|
||||
parse_block_expression(working_set, span, expand_aliases_denylist)
|
||||
} else if matches!(shape, SyntaxShape::MatchBlock) {
|
||||
parse_match_block_expression(working_set, span, expand_aliases_denylist)
|
||||
} else {
|
||||
(
|
||||
Expression::garbage(span),
|
||||
@ -4415,6 +4421,113 @@ pub fn parse_block_expression(
|
||||
)
|
||||
}
|
||||
|
||||
pub fn parse_match_block_expression(
|
||||
working_set: &mut StateWorkingSet,
|
||||
span: Span,
|
||||
expand_aliases_denylist: &[usize],
|
||||
) -> (Expression, Option<ParseError>) {
|
||||
let bytes = working_set.get_span_contents(span);
|
||||
let mut error = None;
|
||||
|
||||
let mut start = span.start;
|
||||
let mut end = span.end;
|
||||
|
||||
if bytes.starts_with(b"{") {
|
||||
start += 1;
|
||||
} else {
|
||||
return (
|
||||
garbage(span),
|
||||
Some(ParseError::Expected("closure".into(), span)),
|
||||
);
|
||||
}
|
||||
if bytes.ends_with(b"}") {
|
||||
end -= 1;
|
||||
} else {
|
||||
error = error.or_else(|| Some(ParseError::Unclosed("}".into(), Span::new(end, end))));
|
||||
}
|
||||
|
||||
let inner_span = Span::new(start, end);
|
||||
|
||||
let source = working_set.get_span_contents(inner_span);
|
||||
|
||||
let (output, err) = lex(source, start, &[b' ', b'\r', b'\n', b','], &[], false);
|
||||
error = error.or(err);
|
||||
|
||||
let mut position = 0;
|
||||
|
||||
let mut output_matches = vec![];
|
||||
|
||||
while position < output.len() {
|
||||
// Each match gets its own scope
|
||||
|
||||
working_set.enter_scope();
|
||||
|
||||
// First parse the pattern
|
||||
let (pattern, err) = parse_pattern(working_set, output[position].span);
|
||||
error = error.or(err);
|
||||
|
||||
position += 1;
|
||||
|
||||
if position >= output.len() {
|
||||
error = error.or(Some(ParseError::Mismatch(
|
||||
"=>".into(),
|
||||
"end of input".into(),
|
||||
Span::new(output[position - 1].span.end, output[position - 1].span.end),
|
||||
)));
|
||||
|
||||
working_set.exit_scope();
|
||||
break;
|
||||
}
|
||||
|
||||
// Then the =>
|
||||
let thick_arrow = working_set.get_span_contents(output[position].span);
|
||||
if thick_arrow != b"=>" {
|
||||
error = error.or(Some(ParseError::Mismatch(
|
||||
"=>".into(),
|
||||
"end of input".into(),
|
||||
Span::new(output[position - 1].span.end, output[position - 1].span.end),
|
||||
)));
|
||||
}
|
||||
|
||||
// Finally, the value/expression/block that we will run to produce the result
|
||||
position += 1;
|
||||
|
||||
if position >= output.len() {
|
||||
error = error.or(Some(ParseError::Mismatch(
|
||||
"match result".into(),
|
||||
"end of input".into(),
|
||||
Span::new(output[position - 1].span.end, output[position - 1].span.end),
|
||||
)));
|
||||
|
||||
working_set.exit_scope();
|
||||
break;
|
||||
}
|
||||
|
||||
let (result, err) = parse_multispan_value(
|
||||
working_set,
|
||||
&[output[position].span],
|
||||
&mut 0,
|
||||
&SyntaxShape::OneOf(vec![SyntaxShape::Block, SyntaxShape::Expression]),
|
||||
expand_aliases_denylist,
|
||||
);
|
||||
error = error.or(err);
|
||||
position += 1;
|
||||
working_set.exit_scope();
|
||||
|
||||
output_matches.push((pattern, result));
|
||||
}
|
||||
|
||||
(
|
||||
Expression {
|
||||
expr: Expr::MatchBlock(output_matches),
|
||||
span,
|
||||
ty: Type::Any,
|
||||
custom_completion: None,
|
||||
},
|
||||
error,
|
||||
)
|
||||
}
|
||||
|
||||
pub fn parse_closure_expression(
|
||||
working_set: &mut StateWorkingSet,
|
||||
shape: &SyntaxShape,
|
||||
@ -4650,6 +4763,10 @@ pub fn parse_value(
|
||||
_ => {}
|
||||
}
|
||||
|
||||
if matches!(shape, SyntaxShape::MatchPattern) {
|
||||
return parse_match_pattern(working_set, span);
|
||||
}
|
||||
|
||||
match bytes[0] {
|
||||
b'$' => return parse_dollar_expr(working_set, span, expand_aliases_denylist),
|
||||
b'(' => return parse_paren_expr(working_set, span, shape, expand_aliases_denylist),
|
||||
@ -4688,6 +4805,7 @@ pub fn parse_value(
|
||||
SyntaxShape::GlobPattern => parse_glob_pattern(working_set, span),
|
||||
SyntaxShape::String => parse_string(working_set, span, expand_aliases_denylist),
|
||||
SyntaxShape::Binary => parse_binary(working_set, span),
|
||||
SyntaxShape::MatchPattern => parse_match_pattern(working_set, span),
|
||||
SyntaxShape::Signature => {
|
||||
if bytes.starts_with(b"[") {
|
||||
parse_signature(working_set, span, expand_aliases_denylist)
|
||||
@ -4998,7 +5116,7 @@ pub fn parse_math_expression(
|
||||
|
||||
let first_span = working_set.get_span_contents(spans[0]);
|
||||
|
||||
if first_span == b"if" {
|
||||
if first_span == b"if" || first_span == b"match" {
|
||||
// If expression
|
||||
if spans.len() > 1 {
|
||||
return parse_call(working_set, spans, spans[0], expand_aliases_denylist, false);
|
||||
@ -6085,6 +6203,13 @@ pub fn discover_captures_in_expr(
|
||||
output.extend(&result);
|
||||
}
|
||||
}
|
||||
Expr::MatchPattern(_) => {}
|
||||
Expr::MatchBlock(match_block) => {
|
||||
for match_ in match_block {
|
||||
let result = discover_captures_in_expr(working_set, &match_.1, seen, seen_blocks)?;
|
||||
output.extend(&result);
|
||||
}
|
||||
}
|
||||
Expr::RowCondition(block_id) | Expr::Subexpression(block_id) => {
|
||||
let block = working_set.get_block(*block_id);
|
||||
let results = {
|
||||
|
Reference in New Issue
Block a user