Add pattern matching (#8590)

# Description

This adds `match` and basic pattern matching.

An example:

```
match $x {
  1..10 => { print "Value is between 1 and 10" }
  { foo: $bar } => { print $"Value has a 'foo' field with value ($bar)" }
  [$a, $b] => { print $"Value is a list with two items: ($a) and ($b)" }
  _ => { print "Value is none of the above" }
}
```

Like the recent changes to `if` to allow it to be used as an expression,
`match` can also be used as an expression. This allows you to assign the
result to a variable, eg) `let xyz = match ...`

I've also included a short-hand pattern for matching records, as I think
it might help when doing a lot of record patterns: `{$foo}` which is
equivalent to `{foo: $foo}`.

There are still missing components, so consider this the first step in
full pattern matching support. Currently missing:
* Patterns for strings
* Or-patterns (like the `|` in Rust)
* Patterns for tables (unclear how we want to match a table, so it'll
need some design)
* Patterns for binary values
* And much more

# User-Facing Changes

[see above]

# Tests + Formatting

Don't forget to add tests that cover your changes.

Make sure you've run and fixed any issues with these commands:

- `cargo fmt --all -- --check` to check standard code formatting (`cargo
fmt --all` applies these changes)
- `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used -A
clippy::needless_collect` to check that you're using the standard code
style
- `cargo test --workspace` to check that all tests pass

> **Note**
> from `nushell` you can also use the `toolkit` as follows
> ```bash
> use toolkit.nu # or use an `env_change` hook to activate it
automatically
> toolkit check pr
> ```

# After Submitting

If your PR had any user-facing changes, update [the
documentation](https://github.com/nushell/nushell.github.io) after the
PR is merged, if necessary. This will help us keep the docs up to date.
This commit is contained in:
JT
2023-03-24 14:52:01 +13:00
committed by GitHub
parent be52f7fb07
commit 2c3aade057
30 changed files with 955 additions and 7 deletions

View File

@ -1,5 +1,6 @@
use nu_protocol::ast::{
Block, Expr, Expression, ImportPatternMember, PathMember, Pipeline, PipelineElement,
Block, Expr, Expression, ImportPatternMember, MatchPattern, PathMember, Pattern, Pipeline,
PipelineElement,
};
use nu_protocol::DeclId;
use nu_protocol::{engine::StateWorkingSet, Span};
@ -25,6 +26,7 @@ pub enum FlatShape {
InternalCall,
List,
Literal,
MatchPattern,
Nothing,
Operator,
Or,
@ -60,6 +62,7 @@ impl Display for FlatShape {
FlatShape::InternalCall => write!(f, "shape_internalcall"),
FlatShape::List => write!(f, "shape_list"),
FlatShape::Literal => write!(f, "shape_literal"),
FlatShape::MatchPattern => write!(f, "shape_match_pattern"),
FlatShape::Nothing => write!(f, "shape_nothing"),
FlatShape::Operator => write!(f, "shape_operator"),
FlatShape::Or => write!(f, "shape_or"),
@ -212,6 +215,20 @@ pub fn flatten_expression(
Expr::Float(_) => {
vec![(expr.span, FlatShape::Float)]
}
Expr::MatchPattern(pattern) => {
// FIXME: do nicer flattening later
flatten_pattern(pattern)
}
Expr::MatchBlock(matches) => {
let mut output = vec![];
for match_ in matches {
output.extend(flatten_pattern(&match_.0));
output.extend(flatten_expression(working_set, &match_.1));
}
output
}
Expr::ValueWithUnit(x, unit) => {
let mut output = flatten_expression(working_set, x);
output.push((unit.span, FlatShape::String));
@ -488,3 +505,60 @@ pub fn flatten_pipeline(
}
output
}
pub fn flatten_pattern(match_pattern: &MatchPattern) -> Vec<(Span, FlatShape)> {
let mut output = vec![];
match &match_pattern.pattern {
Pattern::Garbage => {
output.push((match_pattern.span, FlatShape::Garbage));
}
Pattern::IgnoreValue => {
output.push((match_pattern.span, FlatShape::Nothing));
}
Pattern::List(items) => {
if let Some(first) = items.first() {
if let Some(last) = items.last() {
output.push((
Span::new(match_pattern.span.start, first.span.start),
FlatShape::MatchPattern,
));
for item in items {
output.extend(flatten_pattern(item));
}
output.push((
Span::new(last.span.end, match_pattern.span.end),
FlatShape::MatchPattern,
))
}
} else {
output.push((match_pattern.span, FlatShape::MatchPattern));
}
}
Pattern::Record(items) => {
if let Some(first) = items.first() {
if let Some(last) = items.last() {
output.push((
Span::new(match_pattern.span.start, first.1.span.start),
FlatShape::MatchPattern,
));
for item in items {
output.extend(flatten_pattern(&item.1));
}
output.push((
Span::new(last.1.span.end, match_pattern.span.end),
FlatShape::MatchPattern,
))
}
} else {
output.push((match_pattern.span, FlatShape::MatchPattern));
}
}
Pattern::Value(_) => {
output.push((match_pattern.span, FlatShape::MatchPattern));
}
Pattern::Variable(_) => {
output.push((match_pattern.span, FlatShape::Variable));
}
}
output
}

View File

@ -6,6 +6,7 @@ mod known_external;
mod lex;
mod lite_parser;
mod parse_keywords;
mod parse_patterns;
mod parser;
mod type_check;

View File

@ -0,0 +1,248 @@
use nu_protocol::{
ast::{Expr, Expression, MatchPattern, Pattern},
engine::StateWorkingSet,
Span, SyntaxShape, Type,
};
use crate::{
lex, lite_parse,
parser::{is_variable, parse_value},
LiteElement, ParseError,
};
pub fn garbage(span: Span) -> MatchPattern {
MatchPattern {
pattern: Pattern::Garbage,
span,
}
}
pub fn parse_match_pattern(
working_set: &mut StateWorkingSet,
span: Span,
) -> (Expression, Option<ParseError>) {
working_set.enter_scope();
let (output, err) = parse_pattern(working_set, span);
working_set.exit_scope();
(
Expression {
expr: Expr::MatchPattern(Box::new(output)),
span,
ty: Type::Any,
custom_completion: None,
},
err,
)
}
pub fn parse_pattern(
working_set: &mut StateWorkingSet,
span: Span,
) -> (MatchPattern, Option<ParseError>) {
let bytes = working_set.get_span_contents(span);
if bytes.starts_with(b"$") {
// Variable pattern
parse_variable_pattern(working_set, span)
} else if bytes.starts_with(b"{") {
// Record pattern
parse_record_pattern(working_set, span)
} else if bytes.starts_with(b"[") {
// List pattern
parse_list_pattern(working_set, span)
} else if bytes == b"_" {
(
MatchPattern {
pattern: Pattern::IgnoreValue,
span,
},
None,
)
} else {
// Literal value
let (value, error) = parse_value(working_set, span, &SyntaxShape::Any, &[]);
(
MatchPattern {
pattern: Pattern::Value(value),
span,
},
error,
)
}
}
pub fn parse_variable_pattern(
working_set: &mut StateWorkingSet,
span: Span,
) -> (MatchPattern, Option<ParseError>) {
let bytes = working_set.get_span_contents(span);
if is_variable(bytes) {
if let Some(var_id) = working_set.find_variable(bytes) {
(
MatchPattern {
pattern: Pattern::Variable(var_id),
span,
},
None,
)
} else {
let var_id = working_set.add_variable(bytes.to_vec(), span, Type::Any, true);
(
MatchPattern {
pattern: Pattern::Variable(var_id),
span,
},
None,
)
}
} else {
(
garbage(span),
Some(ParseError::Expected("valid variable name".into(), span)),
)
}
}
pub fn parse_list_pattern(
working_set: &mut StateWorkingSet,
span: Span,
) -> (MatchPattern, Option<ParseError>) {
let bytes = working_set.get_span_contents(span);
let mut error = None;
let mut start = span.start;
let mut end = span.end;
if bytes.starts_with(b"[") {
start += 1;
}
if bytes.ends_with(b"]") {
end -= 1;
} else {
error = error.or_else(|| Some(ParseError::Unclosed("]".into(), Span::new(end, end))));
}
let inner_span = Span::new(start, end);
let source = working_set.get_span_contents(inner_span);
let (output, err) = lex(source, inner_span.start, &[b'\n', b'\r', b','], &[], true);
error = error.or(err);
let (output, err) = lite_parse(&output);
error = error.or(err);
let mut args = vec![];
if !output.block.is_empty() {
for arg in &output.block[0].commands {
let mut spans_idx = 0;
if let LiteElement::Command(_, command) = arg {
while spans_idx < command.parts.len() {
let (arg, err) = parse_pattern(working_set, command.parts[spans_idx]);
error = error.or(err);
args.push(arg);
spans_idx += 1;
}
}
}
}
(
MatchPattern {
pattern: Pattern::List(args),
span,
},
error,
)
}
pub fn parse_record_pattern(
working_set: &mut StateWorkingSet,
span: Span,
) -> (MatchPattern, Option<ParseError>) {
let bytes = working_set.get_span_contents(span);
let mut error = None;
let mut start = span.start;
let mut end = span.end;
if bytes.starts_with(b"{") {
start += 1;
} else {
error = error.or_else(|| {
Some(ParseError::Expected(
"{".into(),
Span::new(start, start + 1),
))
});
}
if bytes.ends_with(b"}") {
end -= 1;
} else {
error = error.or_else(|| Some(ParseError::Unclosed("}".into(), Span::new(end, end))));
}
let inner_span = Span::new(start, end);
let source = working_set.get_span_contents(inner_span);
let (tokens, err) = lex(source, start, &[b'\n', b'\r', b','], &[b':'], true);
error = error.or(err);
let mut output = vec![];
let mut idx = 0;
while idx < tokens.len() {
let bytes = working_set.get_span_contents(tokens[idx].span);
let (field, pattern) = if !bytes.is_empty() && bytes[0] == b'$' {
// If this is a variable, treat it as both the name of the field and the pattern
let field = String::from_utf8_lossy(&bytes[1..]).to_string();
let (pattern, err) = parse_variable_pattern(working_set, tokens[idx].span);
error = error.or(err);
(field, pattern)
} else {
let field = String::from_utf8_lossy(bytes).to_string();
idx += 1;
if idx == tokens.len() {
return (
garbage(span),
Some(ParseError::Expected("record".into(), span)),
);
}
let colon = working_set.get_span_contents(tokens[idx].span);
idx += 1;
if idx == tokens.len() || colon != b":" {
//FIXME: need better error
return (
garbage(span),
Some(ParseError::Expected("record".into(), span)),
);
}
let (pattern, err) = parse_pattern(working_set, tokens[idx].span);
error = error.or(err);
(field, pattern)
};
idx += 1;
output.push((field, pattern));
}
(
MatchPattern {
pattern: Pattern::Record(output),
span,
},
error,
)
}

View File

@ -3,6 +3,7 @@ use crate::{
lex,
lite_parser::{lite_parse, LiteCommand, LiteElement},
parse_mut,
parse_patterns::{parse_match_pattern, parse_pattern},
type_check::{math_result_type, type_compatible},
ParseError, Token, TokenContents,
};
@ -76,6 +77,7 @@ pub fn is_math_expression_like(
|| bytes == b"null"
|| bytes == b"not"
|| bytes == b"if"
|| bytes == b"match"
{
return true;
}
@ -120,7 +122,7 @@ fn is_identifier(bytes: &[u8]) -> bool {
bytes.iter().all(|x| is_identifier_byte(*x))
}
fn is_variable(bytes: &[u8]) -> bool {
pub fn is_variable(bytes: &[u8]) -> bool {
if bytes.len() > 1 && bytes[0] == b'$' {
is_identifier(&bytes[1..])
} else {
@ -1787,6 +1789,8 @@ pub fn parse_brace_expr(
parse_closure_expression(working_set, shape, span, expand_aliases_denylist, true)
} else if matches!(shape, SyntaxShape::Block) {
parse_block_expression(working_set, span, expand_aliases_denylist)
} else if matches!(shape, SyntaxShape::MatchBlock) {
parse_match_block_expression(working_set, span, expand_aliases_denylist)
} else {
parse_record(working_set, span, expand_aliases_denylist)
}
@ -1802,6 +1806,8 @@ pub fn parse_brace_expr(
parse_closure_expression(working_set, shape, span, expand_aliases_denylist, true)
} else if matches!(shape, SyntaxShape::Block) {
parse_block_expression(working_set, span, expand_aliases_denylist)
} else if matches!(shape, SyntaxShape::MatchBlock) {
parse_match_block_expression(working_set, span, expand_aliases_denylist)
} else {
(
Expression::garbage(span),
@ -4415,6 +4421,113 @@ pub fn parse_block_expression(
)
}
pub fn parse_match_block_expression(
working_set: &mut StateWorkingSet,
span: Span,
expand_aliases_denylist: &[usize],
) -> (Expression, Option<ParseError>) {
let bytes = working_set.get_span_contents(span);
let mut error = None;
let mut start = span.start;
let mut end = span.end;
if bytes.starts_with(b"{") {
start += 1;
} else {
return (
garbage(span),
Some(ParseError::Expected("closure".into(), span)),
);
}
if bytes.ends_with(b"}") {
end -= 1;
} else {
error = error.or_else(|| Some(ParseError::Unclosed("}".into(), Span::new(end, end))));
}
let inner_span = Span::new(start, end);
let source = working_set.get_span_contents(inner_span);
let (output, err) = lex(source, start, &[b' ', b'\r', b'\n', b','], &[], false);
error = error.or(err);
let mut position = 0;
let mut output_matches = vec![];
while position < output.len() {
// Each match gets its own scope
working_set.enter_scope();
// First parse the pattern
let (pattern, err) = parse_pattern(working_set, output[position].span);
error = error.or(err);
position += 1;
if position >= output.len() {
error = error.or(Some(ParseError::Mismatch(
"=>".into(),
"end of input".into(),
Span::new(output[position - 1].span.end, output[position - 1].span.end),
)));
working_set.exit_scope();
break;
}
// Then the =>
let thick_arrow = working_set.get_span_contents(output[position].span);
if thick_arrow != b"=>" {
error = error.or(Some(ParseError::Mismatch(
"=>".into(),
"end of input".into(),
Span::new(output[position - 1].span.end, output[position - 1].span.end),
)));
}
// Finally, the value/expression/block that we will run to produce the result
position += 1;
if position >= output.len() {
error = error.or(Some(ParseError::Mismatch(
"match result".into(),
"end of input".into(),
Span::new(output[position - 1].span.end, output[position - 1].span.end),
)));
working_set.exit_scope();
break;
}
let (result, err) = parse_multispan_value(
working_set,
&[output[position].span],
&mut 0,
&SyntaxShape::OneOf(vec![SyntaxShape::Block, SyntaxShape::Expression]),
expand_aliases_denylist,
);
error = error.or(err);
position += 1;
working_set.exit_scope();
output_matches.push((pattern, result));
}
(
Expression {
expr: Expr::MatchBlock(output_matches),
span,
ty: Type::Any,
custom_completion: None,
},
error,
)
}
pub fn parse_closure_expression(
working_set: &mut StateWorkingSet,
shape: &SyntaxShape,
@ -4650,6 +4763,10 @@ pub fn parse_value(
_ => {}
}
if matches!(shape, SyntaxShape::MatchPattern) {
return parse_match_pattern(working_set, span);
}
match bytes[0] {
b'$' => return parse_dollar_expr(working_set, span, expand_aliases_denylist),
b'(' => return parse_paren_expr(working_set, span, shape, expand_aliases_denylist),
@ -4688,6 +4805,7 @@ pub fn parse_value(
SyntaxShape::GlobPattern => parse_glob_pattern(working_set, span),
SyntaxShape::String => parse_string(working_set, span, expand_aliases_denylist),
SyntaxShape::Binary => parse_binary(working_set, span),
SyntaxShape::MatchPattern => parse_match_pattern(working_set, span),
SyntaxShape::Signature => {
if bytes.starts_with(b"[") {
parse_signature(working_set, span, expand_aliases_denylist)
@ -4998,7 +5116,7 @@ pub fn parse_math_expression(
let first_span = working_set.get_span_contents(spans[0]);
if first_span == b"if" {
if first_span == b"if" || first_span == b"match" {
// If expression
if spans.len() > 1 {
return parse_call(working_set, spans, spans[0], expand_aliases_denylist, false);
@ -6085,6 +6203,13 @@ pub fn discover_captures_in_expr(
output.extend(&result);
}
}
Expr::MatchPattern(_) => {}
Expr::MatchBlock(match_block) => {
for match_ in match_block {
let result = discover_captures_in_expr(working_set, &match_.1, seen, seen_blocks)?;
output.extend(&result);
}
}
Expr::RowCondition(block_id) | Expr::Subexpression(block_id) => {
let block = working_set.get_block(*block_id);
let results = {