allow tables to have annotations (#9613)

# Description

follow up to #8529 and #8914

this works very similarly to record annotations, only difference being
that

```sh
table<name: string>
      ^^^^  ^^^^^^
      |     | 
      |     represents the type of the items in that column
      |
      represents the column name
```
more info on the syntax can be found
[here](https://github.com/nushell/nushell/pull/8914#issue-1672113520)

# User-Facing Changes

**[BREAKING CHANGE]**
this change adds a field to `SyntaxShape::Table` so any plugins that
used it will have to update and include the field. though if you are
unsure of the type the table expects, `SyntaxShape::Table(vec![])` will
suffice
This commit is contained in:
mike 2023-07-07 12:06:09 +03:00 committed by GitHub
parent 440a0e960a
commit 8e38596bc9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
25 changed files with 343 additions and 153 deletions

View File

@ -25,7 +25,7 @@ impl Command for DropDuplicates {
Signature::build(self.name())
.optional(
"subset",
SyntaxShape::Table,
SyntaxShape::Table(vec![]),
"subset of columns to drop duplicates",
)
.switch("maintain", "maintain order", Some('m'))

View File

@ -24,7 +24,7 @@ impl Command for DropNulls {
Signature::build(self.name())
.optional(
"subset",
SyntaxShape::Table,
SyntaxShape::Table(vec![]),
"subset of columns to drop nulls",
)
.input_output_type(

View File

@ -26,13 +26,13 @@ impl Command for MeltDF {
Signature::build(self.name())
.required_named(
"columns",
SyntaxShape::Table,
SyntaxShape::Table(vec![]),
"column names for melting",
Some('c'),
)
.required_named(
"values",
SyntaxShape::Table,
SyntaxShape::Table(vec![]),
"column names used as value columns",
Some('v'),
)

View File

@ -35,7 +35,7 @@ impl Command for Summary {
)
.named(
"quantiles",
SyntaxShape::Table,
SyntaxShape::Table(vec![]),
"provide optional quantiles",
Some('q'),
)

View File

@ -32,6 +32,9 @@ impl Command for BytesIndexOf {
.input_output_types(vec![
(Type::Binary, Type::Int),
(Type::Binary, Type::List(Box::new(Type::Int))),
// FIXME: this shouldn't be needed, cell paths should work with the two
// above
(Type::Table(vec![]), Type::Table(vec![])),
])
.required(
"pattern",

View File

@ -26,7 +26,7 @@ impl Command for UpdateCells {
)
.named(
"columns",
SyntaxShape::Table,
SyntaxShape::Table(vec![]),
"list of columns to update",
Some('c'),
)

View File

@ -19,6 +19,7 @@ impl Command for SubCommand {
.input_output_types(vec![
(Type::String, Type::Number),
(Type::Bool, Type::Number),
(Type::Table(vec![]), Type::Table(vec![])),
])
.rest(
"rest",

View File

@ -23,6 +23,7 @@ impl Command for SubCommand {
// TODO: --convert option should be implemented as `format duration`
(Type::String, Type::String),
(Type::Duration, Type::String),
(Type::Table(vec![]), Type::Table(vec![])),
])
.named(
"convert",

View File

@ -38,7 +38,7 @@ impl Command for SubCommand {
])
.named(
"columns",
SyntaxShape::Table,
SyntaxShape::Table(vec![]),
"For a record or table input, convert strings in the given columns to their basename",
Some('c'),
)

View File

@ -35,7 +35,7 @@ impl Command for SubCommand {
.input_output_types(vec![(Type::String, Type::String)])
.named(
"columns",
SyntaxShape::Table,
SyntaxShape::Table(vec![]),
"For a record or table input, convert strings at the given columns to their dirname",
Some('c'),
)

View File

@ -34,7 +34,7 @@ impl Command for SubCommand {
.input_output_types(vec![(Type::String, Type::Bool)])
.named(
"columns",
SyntaxShape::Table,
SyntaxShape::Table(vec![]),
"For a record or table input, check strings at the given columns, and replace with result",
Some('c'),
)

View File

@ -43,7 +43,7 @@ impl Command for SubCommand {
.switch("no-symlink", "Do not resolve symbolic links", Some('n'))
.named(
"columns",
SyntaxShape::Table,
SyntaxShape::Table(vec![]),
"For a record or table input, expand strings at the given columns",
Some('c'),
)

View File

@ -39,7 +39,7 @@ impl Command for SubCommand {
])
.named(
"columns",
SyntaxShape::Table,
SyntaxShape::Table(vec![]),
"For a record or table input, join strings at the given columns",
Some('c'),
)

View File

@ -35,7 +35,7 @@ impl Command for SubCommand {
.input_output_types(vec![(Type::String, Type::Record(vec![]))])
.named(
"columns",
SyntaxShape::Table,
SyntaxShape::Table(vec![]),
"For a record or table input, convert strings at the given columns",
Some('c'),
)

View File

@ -40,7 +40,7 @@ impl Command for SubCommand {
)
.named(
"columns",
SyntaxShape::Table,
SyntaxShape::Table(vec![]),
"For a record or table input, convert strings at the given columns",
Some('c'),
)

View File

@ -32,7 +32,7 @@ impl Command for SubCommand {
.input_output_types(vec![(Type::String, Type::List(Box::new(Type::String)))])
.named(
"columns",
SyntaxShape::Table,
SyntaxShape::Table(vec![]),
"For a record or table input, split strings at the given columns",
Some('c'),
)

View File

@ -32,7 +32,7 @@ impl Command for SubCommand {
.input_output_types(vec![(Type::String, Type::String)])
.named(
"columns",
SyntaxShape::Table,
SyntaxShape::Table(vec![]),
"For a record or table input, check strings at the given columns, and replace with result",
Some('c'),
)

View File

@ -29,7 +29,10 @@ impl Command for SubCommand {
fn signature(&self) -> Signature {
Signature::build("str contains")
.input_output_types(vec![(Type::String, Type::Bool)])
.input_output_types(vec![
(Type::String, Type::Bool),
(Type::Table(vec![]), Type::Table(vec![])),
])
.vectorizes_over_list(true)
.required("string", SyntaxShape::String, "the substring to find")
.rest(

View File

@ -28,7 +28,10 @@ impl Command for SubCommand {
fn signature(&self) -> Signature {
Signature::build("str distance")
.input_output_types(vec![(Type::String, Type::Int)])
.input_output_types(vec![
(Type::String, Type::Int),
(Type::Table(vec![]), Type::Table(vec![])),
])
.required(
"compare-string",
SyntaxShape::String,

View File

@ -2724,7 +2724,7 @@ pub fn parse_shape_name(
_ if bytes.starts_with(b"record") => parse_collection_shape(working_set, bytes, span),
b"signature" => SyntaxShape::Signature,
b"string" => SyntaxShape::String,
b"table" => SyntaxShape::Table,
_ if bytes.starts_with(b"table") => parse_collection_shape(working_set, bytes, span),
b"variable" => SyntaxShape::Variable,
b"var-with-opt-type" => SyntaxShape::VarWithOptType,
_ => {
@ -2765,14 +2765,24 @@ fn parse_collection_shape(
bytes: &[u8],
span: Span,
) -> SyntaxShape {
assert!(bytes.starts_with(b"record"));
let name = "record";
let mk_shape = SyntaxShape::Record;
assert!(bytes.starts_with(b"record") || bytes.starts_with(b"table"));
let is_table = bytes.starts_with(b"table");
let name = if is_table { "table" } else { "record" };
let prefix = (if is_table { "table<" } else { "record<" }).as_bytes();
let prefix_len = prefix.len();
let mk_shape = |ty| -> SyntaxShape {
if is_table {
SyntaxShape::Table(ty)
} else {
SyntaxShape::Record(ty)
}
};
if bytes == name.as_bytes() {
mk_shape(vec![])
} else if bytes.starts_with(b"record<") {
let Some(inner_span) = prepare_inner_span(working_set, bytes, span, 7) else {
} else if bytes.starts_with(prefix) {
let Some(inner_span) = prepare_inner_span(working_set, bytes, span, prefix_len) else {
return SyntaxShape::Any;
};
@ -3902,122 +3912,173 @@ pub fn parse_list_expression(
}
}
pub fn parse_table_expression(
working_set: &mut StateWorkingSet,
original_span: Span,
) -> Expression {
let bytes = working_set.get_span_contents(original_span);
fn parse_table_expression(working_set: &mut StateWorkingSet, span: Span) -> Expression {
let bytes = working_set.get_span_contents(span);
let inner_span = {
let start = if bytes.starts_with(b"[") {
span.start + 1
} else {
span.start
};
let mut start = original_span.start;
let mut end = original_span.end;
let end = if bytes.ends_with(b"]") {
span.end - 1
} else {
let end = span.end;
working_set.error(ParseError::Unclosed("]".into(), Span::new(end, end)));
span.end
};
if bytes.starts_with(b"[") {
start += 1;
}
if bytes.ends_with(b"]") {
end -= 1;
} else {
working_set.error(ParseError::Unclosed("]".into(), Span::new(end, end)));
}
let inner_span = Span::new(start, end);
Span::new(start, end)
};
let source = working_set.get_span_contents(inner_span);
let (output, err) = lex(source, start, &[b'\n', b'\r', b','], &[], true);
let (tokens, err) = lex(source, inner_span.start, &[b'\n', b'\r', b','], &[], true);
if let Some(err) = err {
working_set.error(err);
}
let (output, err) = lite_parse(&output);
if let Some(err) = err {
working_set.error(err);
}
match output.block.len() {
0 => Expression {
expr: Expr::List(vec![]),
span: original_span,
ty: Type::List(Box::new(Type::Any)),
custom_completion: None,
},
1 => {
// List
parse_list_expression(working_set, original_span, &SyntaxShape::Any)
let head = if let Some(first) = tokens.first() {
if working_set.get_span_contents(first.span).starts_with(b"[") {
parse_list_expression(working_set, first.span, &SyntaxShape::Any)
} else {
return parse_list_expression(working_set, span, &SyntaxShape::Any);
}
_ => {
match &output.block[0].commands[0] {
LiteElement::Command(_, command)
| LiteElement::Redirection(_, _, command)
| LiteElement::SeparateRedirection {
out: (_, command), ..
} else {
return parse_list_expression(working_set, span, &SyntaxShape::Any);
};
if tokens
.get(1)
.filter(|second| second.contents == TokenContents::Semicolon)
.is_none()
{
return parse_list_expression(working_set, span, &SyntaxShape::Any);
};
let rest = &tokens[2..];
if rest.is_empty() {
return parse_list_expression(working_set, span, &SyntaxShape::Any);
}
let head = {
let Expression { expr: Expr::List(vals), .. } = head else {
unreachable!("head must be a list by now")
};
vals
};
let errors = working_set.parse_errors.len();
let rows = rest
.iter()
.fold(Vec::with_capacity(rest.len()), |mut acc, it| {
use std::cmp::Ordering;
let text = working_set.get_span_contents(it.span).to_vec();
match text.as_slice() {
b"," => acc,
_ if !&text.starts_with(b"[") => {
let err = ParseError::LabeledErrorWithHelp {
error: String::from("Table item not list"),
label: String::from("not a list"),
span: it.span,
help: String::from("All table items must be lists"),
};
working_set.error(err);
acc
}
| LiteElement::SameTargetRedirection {
cmd: (_, command), ..
} => {
let mut table_headers = vec![];
let headers =
parse_list_expression(working_set, command.parts[0], &SyntaxShape::Any);
if let Expression {
expr: Expr::List(headers),
_ => {
let ls = parse_list_expression(working_set, it.span, &SyntaxShape::Any);
let Expression {
expr: Expr::List(item),
span,
..
} = headers
{
table_headers = headers;
} = ls else {
unreachable!("the item must be a list")
};
match item.len().cmp(&head.len()) {
Ordering::Less => {
let err = ParseError::MissingColumns(head.len(), span);
working_set.error(err);
}
Ordering::Greater => {
let span = {
let start = item[head.len()].span.start;
let end = span.end;
Span::new(start, end)
};
let err = ParseError::ExtraColumns(head.len(), span);
working_set.error(err);
}
Ordering::Equal => {}
}
match &output.block[1].commands[0] {
LiteElement::Command(_, command)
| LiteElement::Redirection(_, _, command)
| LiteElement::SeparateRedirection {
out: (_, command), ..
}
| LiteElement::SameTargetRedirection {
cmd: (_, command), ..
} => {
let mut rows = vec![];
for part in &command.parts {
let values =
parse_list_expression(working_set, *part, &SyntaxShape::Any);
if let Expression {
expr: Expr::List(values),
span,
..
} = values
{
match values.len().cmp(&table_headers.len()) {
std::cmp::Ordering::Less => working_set.error(
ParseError::MissingColumns(table_headers.len(), span),
),
std::cmp::Ordering::Equal => {}
std::cmp::Ordering::Greater => {
working_set.error(ParseError::ExtraColumns(
table_headers.len(),
values[table_headers.len()].span,
))
}
}
rows.push(values);
}
}
Expression {
expr: Expr::Table(table_headers, rows),
span: original_span,
ty: Type::Table(vec![]), //FIXME
custom_completion: None,
}
}
}
acc.push(item);
acc
}
}
}
});
let ty = if working_set.parse_errors.len() == errors {
let (ty, errs) = table_type(&head, &rows);
working_set.parse_errors.extend(errs.into_iter());
ty
} else {
Type::Table(vec![])
};
Expression {
expr: Expr::Table(head, rows),
span,
ty,
custom_completion: None,
}
}
fn table_type(head: &[Expression], rows: &[Vec<Expression>]) -> (Type, Vec<ParseError>) {
let mut errors = vec![];
let mut rows = rows.to_vec();
let mut mk_ty = || -> Type {
rows.iter_mut()
.map(|row| row.pop().map(|x| x.ty).unwrap_or_default())
.reduce(|acc, ty| -> Type {
if type_compatible(&acc, &ty) {
ty
} else {
Type::Any
}
})
.unwrap_or_default()
};
let mk_error = |span| ParseError::LabeledErrorWithHelp {
error: "Table column name not string".into(),
label: "must be a string".into(),
help: "Table column names should be able to be converted into strings".into(),
span,
};
let mut ty = head
.iter()
.rev()
.map(|expr| {
if let Some(str) = expr.as_string() {
str
} else {
errors.push(mk_error(expr.span));
String::from("{ column }")
}
})
.map(|title| (title, mk_ty()))
.collect_vec();
ty.reverse();
(Type::Table(ty), errors)
}
pub fn parse_block_expression(working_set: &mut StateWorkingSet, span: Span) -> Expression {
trace!("parsing: block expression");
@ -4456,7 +4517,7 @@ pub fn parse_value(
b'[' => match shape {
SyntaxShape::Any
| SyntaxShape::List(_)
| SyntaxShape::Table
| SyntaxShape::Table(_)
| SyntaxShape::Signature => {}
_ => {
working_set.error(ParseError::Expected("non-[] value", span));
@ -4503,7 +4564,7 @@ pub fn parse_value(
Expression::garbage(span)
}
}
SyntaxShape::Table => {
SyntaxShape::Table(_) => {
if bytes.starts_with(b"[") {
parse_table_expression(working_set, span)
} else {

View File

@ -32,8 +32,8 @@ pub fn type_compatible(lhs: &Type, rhs: &Type) -> bool {
(Type::Closure, Type::Block) => true,
(Type::Any, _) => true,
(_, Type::Any) => true,
(Type::Record(fields_lhs), Type::Record(fields_rhs)) => {
is_compatible(fields_lhs, fields_rhs)
(Type::Record(lhs), Type::Record(rhs)) | (Type::Table(lhs), Type::Table(rhs)) => {
is_compatible(lhs, rhs)
}
(lhs, rhs) => lhs == rhs,
}

View File

@ -108,7 +108,7 @@ pub enum SyntaxShape {
String,
/// A table is allowed, eg `[[first, second]; [1, 2]]`
Table,
Table(Vec<(String, SyntaxShape)>),
/// A variable name, eg `$foo`
Variable,
@ -119,6 +119,12 @@ pub enum SyntaxShape {
impl SyntaxShape {
pub fn to_type(&self) -> Type {
let mk_ty = |tys: &[(String, SyntaxShape)]| {
tys.iter()
.map(|(key, val)| (key.clone(), val.to_type()))
.collect()
};
match self {
SyntaxShape::Any => Type::Any,
SyntaxShape::Block => Type::Block,
@ -151,18 +157,12 @@ impl SyntaxShape {
SyntaxShape::OneOf(_) => Type::Any,
SyntaxShape::Operator => Type::Any,
SyntaxShape::Range => Type::Any,
SyntaxShape::Record(entries) => {
let ty = entries
.iter()
.map(|(key, val)| (key.clone(), val.to_type()))
.collect();
Type::Record(ty)
}
SyntaxShape::Record(entries) => Type::Record(mk_ty(entries)),
SyntaxShape::RowCondition => Type::Bool,
SyntaxShape::Boolean => Type::Bool,
SyntaxShape::Signature => Type::Signature,
SyntaxShape::String => Type::String,
SyntaxShape::Table => Type::Table(vec![]), // FIXME: What role should columns play in the Table type?
SyntaxShape::Table(columns) => Type::Table(mk_ty(columns)),
SyntaxShape::VarWithOptType => Type::Any,
SyntaxShape::Variable => Type::Any,
}
@ -171,6 +171,13 @@ impl SyntaxShape {
impl Display for SyntaxShape {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let mk_fmt = |tys: &[(String, SyntaxShape)]| -> String {
tys.iter()
.map(|(x, y)| format!("{x}: {y}"))
.collect::<Vec<String>>()
.join(", ")
};
match self {
SyntaxShape::Keyword(kw, shape) => {
write!(f, "\"{}\" {}", String::from_utf8_lossy(kw), shape)
@ -198,21 +205,19 @@ impl Display for SyntaxShape {
}
}
SyntaxShape::Binary => write!(f, "binary"),
SyntaxShape::Table => write!(f, "table"),
SyntaxShape::List(x) => write!(f, "list<{x}>"),
SyntaxShape::Table(columns) => {
if columns.is_empty() {
write!(f, "table")
} else {
write!(f, "table<{}>", mk_fmt(columns))
}
}
SyntaxShape::Record(entries) => {
if entries.is_empty() {
write!(f, "record")
} else {
write!(
f,
"record<{}>",
entries
.iter()
.map(|(x, y)| format!("{x}: {y}"))
.collect::<Vec<String>>()
.join(", "),
)
write!(f, "record<{}>", mk_fmt(entries))
}
}
SyntaxShape::Filesize => write!(f, "filesize"),

View File

@ -81,6 +81,12 @@ impl Type {
}
pub fn to_shape(&self) -> SyntaxShape {
let mk_shape = |tys: &[(String, Type)]| {
tys.iter()
.map(|(key, val)| (key.clone(), val.to_shape()))
.collect()
};
match self {
Type::Int => SyntaxShape::Int,
Type::Float => SyntaxShape::Number,
@ -96,14 +102,8 @@ impl Type {
Type::List(x) => SyntaxShape::List(Box::new(x.to_shape())),
Type::Number => SyntaxShape::Number,
Type::Nothing => SyntaxShape::Nothing,
Type::Record(entries) => {
let entries = entries
.iter()
.map(|(key, val)| (key.clone(), val.to_shape()))
.collect();
SyntaxShape::Record(entries)
}
Type::Table(_) => SyntaxShape::Table,
Type::Record(entries) => SyntaxShape::Record(mk_shape(entries)),
Type::Table(columns) => SyntaxShape::Table(mk_shape(columns)),
Type::ListStream => SyntaxShape::List(Box::new(SyntaxShape::Any)),
Type::Any => SyntaxShape::Any,
Type::Error => SyntaxShape::Any,

View File

@ -32,7 +32,7 @@ impl Plugin for Query {
)
.named(
"as-table",
SyntaxShape::Table,
SyntaxShape::Table(vec![]),
"find table based on column header list",
Some('t'),
)

View File

@ -262,3 +262,116 @@ fn record_annotations_with_extra_characters() -> TestResult {
let expected = "Extra characters in the parameter name";
fail_test(input, expected)
}
#[test]
fn table_annotations_none() -> TestResult {
let input = "def run [t: table] { $t }; run [[]; []] | describe";
let expected = "table";
run_test(input, expected)
}
#[test]
fn table_annotations() -> TestResult {
let input = "def run [t: table<age: int>] { $t }; run [[age]; [3]] | describe";
let expected = "table<age: int>";
run_test(input, expected)
}
#[test]
fn table_annotations_two_types() -> TestResult {
let input = "\
def run [t: table<name: string age: int>] { $t };
run [[name, age]; [nushell, 3]] | describe";
let expected = "table<name: string, age: int>";
run_test(input, expected)
}
#[test]
fn table_annotations_two_types_comma_sep() -> TestResult {
let input = "\
def run [t: table<name: string, age: int>] { $t };
run [[name, age]; [nushell, 3]] | describe";
let expected = "table<name: string, age: int>";
run_test(input, expected)
}
#[test]
fn table_annotations_key_with_no_type() -> TestResult {
let input = "def run [t: table<name>] { $t }; run [[name]; [nushell]] | describe";
let expected = "table<name: string>";
run_test(input, expected)
}
#[test]
fn table_annotations_two_types_one_with_no_type() -> TestResult {
let input = "\
def run [t: table<name: string, age>] { $t };
run [[name, age]; [nushell, 3]] | describe";
let expected = "table<name: string, age: int>";
run_test(input, expected)
}
#[test]
fn table_annotations_two_types_both_with_no_types() -> TestResult {
let input = "\
def run [t: table<name, age>] { $t };
run [[name, age]; [nushell, 3]] | describe";
let expected = "table<name: string, age: int>";
run_test(input, expected)
}
#[test]
fn table_annotations_type_inference_1() -> TestResult {
let input = "def run [t: table<age: any>] { $t }; run [[age]; [2wk]] | describe";
let expected = "table<age: duration>";
run_test(input, expected)
}
#[test]
fn table_annotations_type_inference_2() -> TestResult {
let input = "def run [t: table<size>] { $t }; run [[size]; [2mb]] | describe";
let expected = "table<size: filesize>";
run_test(input, expected)
}
#[test]
fn table_annotations_not_terminated() -> TestResult {
let input = "def run [t: table<age: int] { $t }";
let expected = "expected closing >";
fail_test(input, expected)
}
#[test]
fn table_annotations_not_terminated_inner() -> TestResult {
let input = "def run [t: table<name: string, repos: list<string>] { $t }";
let expected = "expected closing >";
fail_test(input, expected)
}
#[test]
fn table_annotations_no_type_after_colon() -> TestResult {
let input = "def run [t: table<name: >] { $t }";
let expected = "type after colon";
fail_test(input, expected)
}
#[test]
fn table_annotations_type_mismatch_column() -> TestResult {
let input = "def run [t: table<name: string>] { $t }; run [[nme]; [nushell]]";
let expected = "expected table<name: string>, found table<nme: string>";
fail_test(input, expected)
}
#[test]
fn table_annotations_type_mismatch_shape() -> TestResult {
let input = "def run [t: table<age: int>] { $t }; run [[age]; [2wk]]";
let expected = "expected table<age: int>, found table<age: duration>";
fail_test(input, expected)
}
#[test]
fn table_annotations_with_extra_characters() -> TestResult {
let input = "def run [t: table<int>extra] {$t | length}; run [[int]; [8]]";
let expected = "Extra characters in the parameter name";
fail_test(input, expected)
}