From 3b7d7861e3acbf13f75957b990fb529b3390a802 Mon Sep 17 00:00:00 2001 From: JT Date: Tue, 7 Sep 2021 10:02:24 +1200 Subject: [PATCH] Add cell paths --- crates/nu-cli/src/errors.rs | 34 ++++ crates/nu-engine/src/eval.rs | 5 + crates/nu-parser/src/flatten.rs | 13 +- crates/nu-parser/src/parser.rs | 213 +++++++++++++++--------- crates/nu-protocol/src/ast/cell_path.rs | 19 +++ crates/nu-protocol/src/ast/expr.rs | 3 +- crates/nu-protocol/src/ast/mod.rs | 2 + crates/nu-protocol/src/shell_error.rs | 4 + crates/nu-protocol/src/syntax_shape.rs | 8 +- crates/nu-protocol/src/ty.rs | 4 +- crates/nu-protocol/src/value.rs | 169 +++++++++++++++++-- src/tests.rs | 20 +++ 12 files changed, 395 insertions(+), 99 deletions(-) create mode 100644 crates/nu-protocol/src/ast/cell_path.rs diff --git a/crates/nu-cli/src/errors.rs b/crates/nu-cli/src/errors.rs index a75d06ae9..81cce51b5 100644 --- a/crates/nu-cli/src/errors.rs +++ b/crates/nu-cli/src/errors.rs @@ -328,6 +328,40 @@ pub fn report_shell_error( Label::primary(diag_file_id, diag_range).with_message("division by zero") ]) } + ShellError::AccessBeyondEnd(len, span) => { + let (diag_file_id, diag_range) = convert_span_to_diag(working_set, span)?; + + Diagnostic::error() + .with_message("Row number too large") + .with_labels(vec![Label::primary(diag_file_id, diag_range) + .with_message(format!("row number too large (max: {})", *len))]) + } + ShellError::AccessBeyondEndOfStream(span) => { + let (diag_file_id, diag_range) = convert_span_to_diag(working_set, span)?; + + Diagnostic::error() + .with_message("Row number too large") + .with_labels(vec![Label::primary(diag_file_id, diag_range) + .with_message("row number too large")]) + } + ShellError::IncompatiblePathAccess(name, span) => { + let (diag_file_id, diag_range) = convert_span_to_diag(working_set, span)?; + + Diagnostic::error() + .with_message("Data cannot be accessed with a column path") + .with_labels(vec![Label::primary(diag_file_id, diag_range) + .with_message(format!("{} doesn't support column paths", name))]) + } + ShellError::CantFindColumn(span) => { + let (diag_file_id, diag_range) = convert_span_to_diag(working_set, span)?; + + //FIXME: add "did you mean" + Diagnostic::error() + .with_message("Cannot find column") + .with_labels(vec![ + Label::primary(diag_file_id, diag_range).with_message("cannot find column") + ]) + } }; // println!("DIAG"); diff --git a/crates/nu-engine/src/eval.rs b/crates/nu-engine/src/eval.rs index f4042816b..c4f7b7344 100644 --- a/crates/nu-engine/src/eval.rs +++ b/crates/nu-engine/src/eval.rs @@ -102,6 +102,11 @@ pub fn eval_expression( Expr::Var(var_id) => context .get_var(*var_id) .map_err(move |_| ShellError::VariableNotFoundAtRuntime(expr.span)), + Expr::FullCellPath(column_path) => { + let value = eval_expression(context, &column_path.head)?; + + value.follow_column_path(&column_path.tail) + } Expr::Call(call) => eval_call(context, call, Value::nothing()), Expr::ExternalCall(_, _) => Err(ShellError::ExternalNotSupported(expr.span)), Expr::Operator(_) => Ok(Value::Nothing { span: expr.span }), diff --git a/crates/nu-parser/src/flatten.rs b/crates/nu-parser/src/flatten.rs index dec4c89f5..7439da618 100644 --- a/crates/nu-parser/src/flatten.rs +++ b/crates/nu-parser/src/flatten.rs @@ -1,4 +1,4 @@ -use nu_protocol::ast::{Block, Expr, Expression, Pipeline, Statement}; +use nu_protocol::ast::{Block, Expr, Expression, PathMember, Pipeline, Statement}; use nu_protocol::{engine::StateWorkingSet, Span}; #[derive(Debug)] @@ -67,6 +67,17 @@ pub fn flatten_expression( Expr::Float(_) => { vec![(expr.span, FlatShape::Float)] } + Expr::FullCellPath(column_path) => { + let mut output = vec![]; + output.extend(flatten_expression(working_set, &column_path.head)); + for path_element in &column_path.tail { + match path_element { + PathMember::String { span, .. } => output.push((*span, FlatShape::String)), + PathMember::Int { span, .. } => output.push((*span, FlatShape::Int)), + } + } + output + } Expr::Range(from, to, op) => { let mut output = vec![]; if let Some(f) = from { diff --git a/crates/nu-parser/src/parser.rs b/crates/nu-parser/src/parser.rs index 347629f7a..24b898bd5 100644 --- a/crates/nu-parser/src/parser.rs +++ b/crates/nu-parser/src/parser.rs @@ -6,7 +6,8 @@ use crate::{ use nu_protocol::{ ast::{ - Block, Call, Expr, Expression, Operator, Pipeline, RangeInclusion, RangeOperator, Statement, + Block, Call, Expr, Expression, FullCellPath, Operator, PathMember, Pipeline, + RangeInclusion, RangeOperator, Statement, }, engine::StateWorkingSet, span, Flag, PositionalArg, Signature, Span, SyntaxShape, Type, VarId, @@ -595,9 +596,9 @@ pub fn parse_call( } } -pub fn parse_int(token: &str, span: Span) -> (Expression, Option) { - if let Some(token) = token.strip_prefix("0x") { - if let Ok(v) = i64::from_str_radix(token, 16) { +pub fn parse_int(token: &[u8], span: Span) -> (Expression, Option) { + if let Some(token) = token.strip_prefix(b"0x") { + if let Ok(v) = i64::from_str_radix(&String::from_utf8_lossy(token), 16) { ( Expression { expr: Expr::Int(v), @@ -616,8 +617,8 @@ pub fn parse_int(token: &str, span: Span) -> (Expression, Option) { )), ) } - } else if let Some(token) = token.strip_prefix("0b") { - if let Ok(v) = i64::from_str_radix(token, 2) { + } else if let Some(token) = token.strip_prefix(b"0b") { + if let Ok(v) = i64::from_str_radix(&String::from_utf8_lossy(token), 2) { ( Expression { expr: Expr::Int(v), @@ -636,8 +637,8 @@ pub fn parse_int(token: &str, span: Span) -> (Expression, Option) { )), ) } - } else if let Some(token) = token.strip_prefix("0o") { - if let Ok(v) = i64::from_str_radix(token, 8) { + } else if let Some(token) = token.strip_prefix(b"0o") { + if let Ok(v) = i64::from_str_radix(&String::from_utf8_lossy(token), 8) { ( Expression { expr: Expr::Int(v), @@ -656,7 +657,7 @@ pub fn parse_int(token: &str, span: Span) -> (Expression, Option) { )), ) } - } else if let Ok(x) = token.parse::() { + } else if let Ok(x) = String::from_utf8_lossy(token).parse::() { ( Expression { expr: Expr::Int(x), @@ -673,8 +674,8 @@ pub fn parse_int(token: &str, span: Span) -> (Expression, Option) { } } -pub fn parse_float(token: &str, span: Span) -> (Expression, Option) { - if let Ok(x) = token.parse::() { +pub fn parse_float(token: &[u8], span: Span) -> (Expression, Option) { + if let Ok(x) = String::from_utf8_lossy(token).parse::() { ( Expression { expr: Expr::Float(x), @@ -691,7 +692,7 @@ pub fn parse_float(token: &str, span: Span) -> (Expression, Option) } } -pub fn parse_number(token: &str, span: Span) -> (Expression, Option) { +pub fn parse_number(token: &[u8], span: Span) -> (Expression, Option) { if let (x, None) = parse_int(token, span) { (x, None) } else if let (x, None) = parse_float(token, span) { @@ -850,7 +851,7 @@ pub(crate) fn parse_dollar_expr( } else if let (expr, None) = parse_range(working_set, span) { (expr, None) } else { - parse_variable_expr(working_set, span) + parse_full_column_path(working_set, span) } } @@ -1049,52 +1050,132 @@ pub fn parse_full_column_path( span: Span, ) -> (Expression, Option) { // FIXME: assume for now a paren expr, but needs more - let bytes = working_set.get_span_contents(span); + let full_column_span = span; + let source = working_set.get_span_contents(span); let mut error = None; - let mut start = span.start; - let mut end = span.end; + let (tokens, err) = lex(source, span.start, &[b'\n'], &[b'.']); + error = error.or(err); - if bytes.starts_with(b"(") { - start += 1; - } - if bytes.ends_with(b")") { - end -= 1; + let mut tokens = tokens.into_iter(); + if let Some(head) = tokens.next() { + let bytes = working_set.get_span_contents(head.span); + let head = if bytes.starts_with(b"(") { + let mut start = head.span.start; + let mut end = head.span.end; + + if bytes.starts_with(b"(") { + start += 1; + } + if bytes.ends_with(b")") { + end -= 1; + } else { + error = error.or_else(|| { + Some(ParseError::Unclosed( + ")".into(), + Span { + start: end, + end: end + 1, + }, + )) + }); + } + + let span = Span { start, end }; + + let source = working_set.get_span_contents(span); + + let (tokens, err) = lex(source, span.start, &[b'\n'], &[]); + error = error.or(err); + + let (output, err) = lite_parse(&tokens); + error = error.or(err); + + let (output, err) = parse_block(working_set, &output, true); + error = error.or(err); + + let block_id = working_set.add_block(output); + + Expression { + expr: Expr::Subexpression(block_id), + span, + ty: Type::Unknown, // FIXME + } + } else if bytes.starts_with(b"$") { + let (out, err) = parse_variable_expr(working_set, head.span); + error = error.or(err); + + out + } else { + return ( + garbage(span), + Some(ParseError::Mismatch( + "variable or subexpression".into(), + String::from_utf8_lossy(bytes).to_string(), + span, + )), + ); + }; + + let mut tail = vec![]; + + let mut expect_dot = true; + for path_element in tokens { + let bytes = working_set.get_span_contents(path_element.span); + + if expect_dot { + expect_dot = false; + if bytes.len() != 1 || bytes[0] != b'.' { + error = + error.or_else(|| Some(ParseError::Expected('.'.into(), path_element.span))); + } + } else { + expect_dot = true; + + match parse_int(bytes, path_element.span) { + ( + Expression { + expr: Expr::Int(val), + span, + .. + }, + None, + ) => tail.push(PathMember::Int { + val: val as usize, + span, + }), + _ => { + let (result, err) = parse_string(working_set, path_element.span); + error = error.or(err); + match result { + Expression { + expr: Expr::String(string), + span, + .. + } => { + tail.push(PathMember::String { val: string, span }); + } + _ => { + error = error + .or_else(|| Some(ParseError::Expected("string".into(), span))); + } + } + } + } + } + } + + ( + Expression { + expr: Expr::FullCellPath(Box::new(FullCellPath { head, tail })), + ty: Type::Unknown, + span: full_column_span, + }, + error, + ) } else { - error = error.or_else(|| { - Some(ParseError::Unclosed( - ")".into(), - Span { - start: end, - end: end + 1, - }, - )) - }); + (garbage(span), error) } - - let span = Span { start, end }; - - let source = working_set.get_span_contents(span); - - let (output, err) = lex(source, start, &[b'\n'], &[]); - error = error.or(err); - - let (output, err) = lite_parse(&output); - error = error.or(err); - - let (output, err) = parse_block(working_set, &output, true); - error = error.or(err); - - let block_id = working_set.add_block(output); - - ( - Expression { - expr: Expr::Subexpression(block_id), - span, - ty: Type::Unknown, // FIXME - }, - error, - ) } pub fn parse_string( @@ -1136,7 +1217,7 @@ pub fn parse_shape_name( let result = match bytes { b"any" => SyntaxShape::Any, b"string" => SyntaxShape::String, - b"column-path" => SyntaxShape::ColumnPath, + b"cell-path" => SyntaxShape::CellPath, b"number" => SyntaxShape::Number, b"range" => SyntaxShape::Range, b"int" => SyntaxShape::Int, @@ -1899,26 +1980,8 @@ pub fn parse_value( } match shape { - SyntaxShape::Number => { - if let Ok(token) = String::from_utf8(bytes.into()) { - parse_number(&token, span) - } else { - ( - garbage(span), - Some(ParseError::Expected("number".into(), span)), - ) - } - } - SyntaxShape::Int => { - if let Ok(token) = String::from_utf8(bytes.into()) { - parse_int(&token, span) - } else { - ( - garbage(span), - Some(ParseError::Expected("int".into(), span)), - ) - } - } + SyntaxShape::Number => parse_number(bytes, span), + SyntaxShape::Int => parse_int(bytes, span), SyntaxShape::Range => parse_range(working_set, span), SyntaxShape::String | SyntaxShape::GlobPattern | SyntaxShape::FilePath => { parse_string(working_set, span) diff --git a/crates/nu-protocol/src/ast/cell_path.rs b/crates/nu-protocol/src/ast/cell_path.rs new file mode 100644 index 000000000..26cefd855 --- /dev/null +++ b/crates/nu-protocol/src/ast/cell_path.rs @@ -0,0 +1,19 @@ +use super::Expression; +use crate::Span; + +#[derive(Debug, Clone)] +pub enum PathMember { + String { val: String, span: Span }, + Int { val: usize, span: Span }, +} + +#[derive(Debug, Clone)] +pub struct CellPath { + pub members: Vec, +} + +#[derive(Debug, Clone)] +pub struct FullCellPath { + pub head: Expression, + pub tail: Vec, +} diff --git a/crates/nu-protocol/src/ast/expr.rs b/crates/nu-protocol/src/ast/expr.rs index 021b57b9e..ac29cd24b 100644 --- a/crates/nu-protocol/src/ast/expr.rs +++ b/crates/nu-protocol/src/ast/expr.rs @@ -1,4 +1,4 @@ -use super::{Call, Expression, Operator, RangeOperator}; +use super::{Call, Expression, FullCellPath, Operator, RangeOperator}; use crate::{BlockId, Signature, Span, VarId}; #[derive(Debug, Clone)] @@ -22,6 +22,7 @@ pub enum Expr { Table(Vec, Vec>), Keyword(Vec, Span, Box), String(String), // FIXME: improve this in the future? + FullCellPath(Box), Signature(Box), Garbage, } diff --git a/crates/nu-protocol/src/ast/mod.rs b/crates/nu-protocol/src/ast/mod.rs index 90c3901bd..4bd33ec76 100644 --- a/crates/nu-protocol/src/ast/mod.rs +++ b/crates/nu-protocol/src/ast/mod.rs @@ -1,5 +1,6 @@ mod block; mod call; +mod cell_path; mod expr; mod expression; mod operator; @@ -8,6 +9,7 @@ mod statement; pub use block::*; pub use call::*; +pub use cell_path::*; pub use expr::*; pub use expression::*; pub use operator::*; diff --git a/crates/nu-protocol/src/shell_error.rs b/crates/nu-protocol/src/shell_error.rs index 92eb30643..678816b35 100644 --- a/crates/nu-protocol/src/shell_error.rs +++ b/crates/nu-protocol/src/shell_error.rs @@ -17,4 +17,8 @@ pub enum ShellError { CantConvert(String, Span), DivisionByZero(Span), CannotCreateRange(Span), + AccessBeyondEnd(usize, Span), + AccessBeyondEndOfStream(Span), + IncompatiblePathAccess(String, Span), + CantFindColumn(Span), } diff --git a/crates/nu-protocol/src/syntax_shape.rs b/crates/nu-protocol/src/syntax_shape.rs index c62048d17..8e73ef22f 100644 --- a/crates/nu-protocol/src/syntax_shape.rs +++ b/crates/nu-protocol/src/syntax_shape.rs @@ -13,10 +13,10 @@ pub enum SyntaxShape { String, /// A dotted path to navigate the table - ColumnPath, + CellPath, /// A dotted path to navigate the table (including variable) - FullColumnPath, + FullCellPath, /// Only a numeric (integer or decimal) value is allowed Number, @@ -76,12 +76,12 @@ impl SyntaxShape { match self { SyntaxShape::Any => Type::Unknown, SyntaxShape::Block => Type::Block, - SyntaxShape::ColumnPath => Type::Unknown, + SyntaxShape::CellPath => Type::Unknown, SyntaxShape::Duration => Type::Duration, SyntaxShape::Expression => Type::Unknown, SyntaxShape::FilePath => Type::FilePath, SyntaxShape::Filesize => Type::Filesize, - SyntaxShape::FullColumnPath => Type::Unknown, + SyntaxShape::FullCellPath => Type::Unknown, SyntaxShape::GlobPattern => Type::String, SyntaxShape::Int => Type::Int, SyntaxShape::List(x) => { diff --git a/crates/nu-protocol/src/ty.rs b/crates/nu-protocol/src/ty.rs index e94468215..6056b641d 100644 --- a/crates/nu-protocol/src/ty.rs +++ b/crates/nu-protocol/src/ty.rs @@ -8,7 +8,7 @@ pub enum Type { Bool, String, Block, - ColumnPath, + CellPath, Duration, FilePath, Filesize, @@ -27,7 +27,7 @@ impl Display for Type { match self { Type::Block => write!(f, "block"), Type::Bool => write!(f, "bool"), - Type::ColumnPath => write!(f, "column path"), + Type::CellPath => write!(f, "cell path"), Type::Duration => write!(f, "duration"), Type::FilePath => write!(f, "filepath"), Type::Filesize => write!(f, "filesize"), diff --git a/crates/nu-protocol/src/value.rs b/crates/nu-protocol/src/value.rs index 18a538e68..53ca35c28 100644 --- a/crates/nu-protocol/src/value.rs +++ b/crates/nu-protocol/src/value.rs @@ -1,6 +1,6 @@ use std::{cell::RefCell, fmt::Debug, rc::Rc}; -use crate::ast::RangeInclusion; +use crate::ast::{PathMember, RangeInclusion}; use crate::{span, BlockId, Span, Type}; use crate::ShellError; @@ -364,21 +364,26 @@ impl Value { } Value::String { val, .. } => val, Value::ValueStream { stream, .. } => stream.into_string(), - Value::List { val, .. } => val - .into_iter() - .map(|x| x.into_string()) - .collect::>() - .join(", "), - Value::Table { val, .. } => val - .into_iter() - .map(|x| { - x.into_iter() - .map(|x| x.into_string()) - .collect::>() - .join(", ") - }) - .collect::>() - .join("\n"), + Value::List { val, .. } => format!( + "[{}]", + val.into_iter() + .map(|x| x.into_string()) + .collect::>() + .join(", ") + ), + Value::Table { val, headers, .. } => format!( + "[= {} =\n {}]", + headers.join(", "), + val.into_iter() + .map(|x| { + x.into_iter() + .map(|x| x.into_string()) + .collect::>() + .join(", ") + }) + .collect::>() + .join("\n") + ), Value::RowStream { headers, stream, .. } => stream.into_string(headers), @@ -393,6 +398,138 @@ impl Value { span: Span::unknown(), } } + + pub fn follow_column_path(self, column_path: &[PathMember]) -> Result { + let mut current = self; + for member in column_path { + // FIXME: this uses a few extra clones for simplicity, but there may be a way + // to traverse the path without them + match member { + PathMember::Int { + val: count, + span: origin_span, + } => { + // Treat a numeric path member as `nth ` + match &mut current { + Value::List { val, .. } => { + if let Some(item) = val.get(*count) { + current = item.clone(); + } else { + return Err(ShellError::AccessBeyondEnd(val.len(), *origin_span)); + } + } + Value::ValueStream { stream, .. } => { + if let Some(item) = stream.nth(*count) { + current = item; + } else { + return Err(ShellError::AccessBeyondEndOfStream(*origin_span)); + } + } + Value::Table { headers, val, span } => { + if let Some(row) = val.get(*count) { + current = Value::Table { + headers: headers.clone(), + val: vec![row.clone()], + span: *span, + } + } else { + return Err(ShellError::AccessBeyondEnd(val.len(), *origin_span)); + } + } + Value::RowStream { + headers, + stream, + span, + } => { + if let Some(row) = stream.nth(*count) { + current = Value::Table { + headers: headers.clone(), + val: vec![row.clone()], + span: *span, + } + } else { + return Err(ShellError::AccessBeyondEndOfStream(*origin_span)); + } + } + x => { + return Err(ShellError::IncompatiblePathAccess( + format!("{}", x.get_type()), + *origin_span, + )) + } + } + } + PathMember::String { + val, + span: origin_span, + } => match &mut current { + Value::Table { + headers, + val: cells, + span, + } => { + let mut found = false; + for header in headers.iter().enumerate() { + if header.1 == val { + found = true; + + let mut column = vec![]; + for row in cells { + column.push(row[header.0].clone()) + } + + current = Value::List { + val: column, + span: *span, + }; + break; + } + } + + if !found { + return Err(ShellError::CantFindColumn(*origin_span)); + } + } + Value::RowStream { + headers, + stream, + span, + } => { + let mut found = false; + for header in headers.iter().enumerate() { + if header.1 == val { + found = true; + + let mut column = vec![]; + for row in stream { + column.push(row[header.0].clone()) + } + + current = Value::List { + val: column, + span: *span, + }; + break; + } + } + + if !found { + //FIXME: add "did you mean" + return Err(ShellError::CantFindColumn(*origin_span)); + } + } + x => { + return Err(ShellError::IncompatiblePathAccess( + format!("{}", x.get_type()), + *origin_span, + )) + } + }, + } + } + + Ok(current) + } } impl PartialEq for Value { diff --git a/src/tests.rs b/src/tests.rs index ffb2bcf54..50e33440d 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -254,3 +254,23 @@ fn build_string3() -> TestResult { "nushell rocks", ) } + +#[test] +fn cell_path_subexpr1() -> TestResult { + run_test("([[lang, gems]; [nu, 100]]).lang", "[nu]") +} + +#[test] +fn cell_path_subexpr2() -> TestResult { + run_test("([[lang, gems]; [nu, 100]]).lang.0", "nu") +} + +#[test] +fn cell_path_var1() -> TestResult { + run_test("let x = [[lang, gems]; [nu, 100]]; $x.lang", "[nu]") +} + +#[test] +fn cell_path_var2() -> TestResult { + run_test("let x = [[lang, gems]; [nu, 100]]; $x.lang.0", "nu") +}