Flatten command. (#2670)

This commit is contained in:
Andrés N. Robalino 2020-10-14 04:36:11 -05:00 committed by GitHub
parent 2df8775b48
commit 2fb48bd6ac
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 400 additions and 15 deletions

View File

@ -198,6 +198,7 @@ pub fn create_default_context(interactive: bool) -> Result<EvaluationContext, Bo
whole_stream_command(EachWindow),
whole_stream_command(Empty),
// Table manipulation
whole_stream_command(Flatten),
whole_stream_command(Move),
whole_stream_command(Merge),
whole_stream_command(Shuffle),

View File

@ -42,6 +42,7 @@ pub(crate) mod every;
pub(crate) mod exec;
pub(crate) mod exit;
pub(crate) mod first;
pub(crate) mod flatten;
pub(crate) mod format;
pub(crate) mod from;
pub(crate) mod from_csv;
@ -175,6 +176,7 @@ pub(crate) use every::Every;
pub(crate) use exec::Exec;
pub(crate) use exit::Exit;
pub(crate) use first::First;
pub(crate) use flatten::Command as Flatten;
pub(crate) use format::Format;
pub(crate) use from::From;
pub(crate) use from_csv::FromCSV;
@ -278,7 +280,7 @@ mod tests {
use crate::examples::{test_anchors, test_examples};
use nu_errors::ShellError;
fn commands() -> Vec<Command> {
fn full_tests() -> Vec<Command> {
vec![
whole_stream_command(Append),
whole_stream_command(GroupBy),
@ -288,9 +290,15 @@ mod tests {
]
}
fn only_examples() -> Vec<Command> {
let mut commands = full_tests();
commands.extend(vec![whole_stream_command(Flatten)]);
commands
}
#[test]
fn examples_work_as_expected() -> Result<(), ShellError> {
for cmd in commands() {
for cmd in only_examples() {
test_examples(cmd)?;
}
@ -299,7 +307,7 @@ mod tests {
#[test]
fn tracks_metadata() -> Result<(), ShellError> {
for cmd in commands() {
for cmd in full_tests() {
test_anchors(cmd)?;
}

View File

@ -0,0 +1,184 @@
use crate::command_registry::CommandRegistry;
use crate::commands::WholeStreamCommand;
use crate::prelude::*;
use nu_errors::ShellError;
use nu_protocol::{
Dictionary, ReturnSuccess, Signature, SyntaxShape, TaggedDictBuilder, UntaggedValue, Value,
};
use nu_source::Tagged;
pub struct Command;
#[derive(Deserialize)]
pub struct Arguments {
rest: Vec<Tagged<String>>,
}
#[async_trait]
impl WholeStreamCommand for Command {
fn name(&self) -> &str {
"flatten"
}
fn signature(&self) -> Signature {
Signature::build("flatten").rest(SyntaxShape::String, "optionally flatten data by column")
}
fn usage(&self) -> &str {
"Flatten the table."
}
async fn run(
&self,
args: CommandArgs,
registry: &CommandRegistry,
) -> Result<OutputStream, ShellError> {
flatten(args, registry).await
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "flatten a table",
example: "echo [[N, u, s, h, e, l, l]] | flatten | first",
result: Some(vec![Value::from("N")]),
},
Example {
description: "flatten a column having a nested table",
example: "echo [[origin, people]; [Ecuador, $(echo [[name, meal]; ['Andres', 'arepa']])]] | flatten | get meal",
result: Some(vec![Value::from("arepa")]),
},
Example {
description: "restrict the flattening by passing column names",
example: "echo [[origin, crate, versions]; [World, $(echo [[name]; ['nu-cli']]), ['0.21', '0.22']]] | flatten versions | last | = $it.versions",
result: Some(vec![Value::from("0.22")]),
}
]
}
}
async fn flatten(
args: CommandArgs,
registry: &CommandRegistry,
) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let registry = registry.clone();
let (Arguments { rest: columns }, input) = args.process(&registry).await?;
Ok(input
.map(move |item| {
futures::stream::iter(flat_value(&columns, &item, &tag).into_iter().flatten())
})
.flatten()
.to_output_stream())
}
enum TableInside<'a> {
Entries(&'a str, &'a Tag, Vec<&'a Value>),
}
fn flat_value(
columns: &[Tagged<String>],
item: &Value,
name_tag: impl Into<Tag>,
) -> Result<Vec<Result<ReturnSuccess, ShellError>>, ShellError> {
let tag = item.tag.clone();
let name_tag = name_tag.into();
let res = {
if item.is_row() {
let mut out = TaggedDictBuilder::new(tag);
let mut a_table = None;
let mut tables_explicitly_flattened = 0;
for (column, value) in item.row_entries() {
let column_requested = columns.iter().find(|c| c.item == *column);
if let Value {
value: UntaggedValue::Row(Dictionary { entries: mapa }),
..
} = value
{
if column_requested.is_none() && !columns.is_empty() {
out.insert_value(column, value.clone());
continue;
}
for (k, v) in mapa.into_iter() {
out.insert_value(k, v.clone());
}
} else if value.is_table() {
if tables_explicitly_flattened >= 1 && column_requested.is_some() {
let attempted = if let Some(name) = column_requested {
name.span()
} else {
name_tag.span
};
let already_flattened =
if let Some(TableInside::Entries(_, column_tag, _)) = a_table {
column_tag.span
} else {
name_tag.span
};
return Ok(vec![ReturnSuccess::value(
UntaggedValue::Error(ShellError::labeled_error_with_secondary(
"can only flatten one inner table at the same time",
"tried flattening more than one column with inner tables",
attempted,
"...but is flattened already",
already_flattened,
))
.into_value(name_tag),
)]);
}
if !columns.is_empty() {
if let Some(requested) = column_requested {
a_table = Some(TableInside::Entries(
&requested.item,
&requested.tag,
value.table_entries().collect(),
));
tables_explicitly_flattened += 1;
} else {
out.insert_value(column, value.clone());
}
} else if a_table.is_none() {
a_table = Some(TableInside::Entries(
&column,
&value.tag,
value.table_entries().collect(),
))
} else {
out.insert_value(column, value.clone());
}
} else {
out.insert_value(column, value.clone());
}
}
let mut expanded = vec![];
if let Some(TableInside::Entries(column, _, entries)) = a_table {
for entry in entries.into_iter() {
let mut base = out.clone();
base.insert_value(column, entry.clone());
expanded.push(base.into_value());
}
} else {
expanded.push(out.into_value());
}
expanded
} else if item.is_table() {
item.table_entries().map(Clone::clone).collect()
} else {
vec![item.clone()]
}
};
Ok(res.into_iter().map(ReturnSuccess::value).collect())
}

View File

@ -16,8 +16,8 @@ use crate::command_registry::CommandRegistry;
use crate::commands::classified::block::run_block;
use crate::commands::command::CommandArgs;
use crate::commands::{
whole_stream_command, BuildString, Command, Each, Echo, Get, Keep, StrCollect,
WholeStreamCommand, Wrap,
whole_stream_command, BuildString, Command, Each, Echo, First, Get, Keep, Last, Nth,
StrCollect, WholeStreamCommand, Wrap,
};
use crate::evaluation_context::EvaluationContext;
use crate::stream::{InputStream, OutputStream};
@ -37,9 +37,12 @@ pub fn test_examples(cmd: Command) -> Result<(), ShellError> {
// Minimal restricted commands to aid in testing
whole_stream_command(Echo {}),
whole_stream_command(BuildString {}),
whole_stream_command(First {}),
whole_stream_command(Get {}),
whole_stream_command(Keep {}),
whole_stream_command(Each {}),
whole_stream_command(Last {}),
whole_stream_command(Nth {}),
whole_stream_command(StrCollect),
whole_stream_command(Wrap),
cmd,
@ -150,9 +153,12 @@ pub fn test_anchors(cmd: Command) -> Result<(), ShellError> {
whole_stream_command(MockEcho {}),
whole_stream_command(MockLs {}),
whole_stream_command(BuildString {}),
whole_stream_command(First {}),
whole_stream_command(Get {}),
whole_stream_command(Keep {}),
whole_stream_command(Each {}),
whole_stream_command(Last {}),
whole_stream_command(Nth {}),
whole_stream_command(StrCollect),
whole_stream_command(Wrap),
cmd,
@ -351,16 +357,33 @@ impl WholeStreamCommand for MockEcho {
Value {
value: UntaggedValue::Table(table),
..
} => futures::stream::iter(
table
.into_iter()
.map(move |mut v| {
} => {
if table.len() == 1 && table[0].is_table() {
let mut values: Vec<Value> =
table[0].table_entries().map(Clone::clone).collect();
for v in values.iter_mut() {
v.tag = base_value.tag();
v
})
.map(ReturnSuccess::value),
)
.to_output_stream(),
}
let subtable =
vec![UntaggedValue::Table(values).into_value(base_value.tag())];
futures::stream::iter(subtable.into_iter().map(ReturnSuccess::value))
.to_output_stream()
} else {
futures::stream::iter(
table
.into_iter()
.map(move |mut v| {
v.tag = base_value.tag();
v
})
.map(ReturnSuccess::value),
)
.to_output_stream()
}
}
_ => OutputStream::one(Ok(ReturnSuccess::Value(Value {
value: i.value.clone(),
tag: base_value.tag,

View File

@ -0,0 +1,168 @@
use nu_test_support::fs::Stub::FileWithContentToBeTrimmed;
use nu_test_support::playground::Playground;
use nu_test_support::{nu, pipeline};
#[test]
fn flatten_nested_tables_with_columns() {
let actual = nu!(
cwd: ".", pipeline(
r#"
echo [[origin, people]; [Ecuador, $(= 'Andres' | wrap name)]]
[[origin, people]; [Nu, $(= 'nuno' | wrap name)]]
| flatten
| get name
| str collect ','
"#
));
assert_eq!(actual.out, "Andres,nuno");
}
#[test]
fn flatten_nested_tables_that_have_many_columns() {
let actual = nu!(
cwd: ".", pipeline(
r#"
echo [[origin, people]; [Ecuador, $(echo [[name, meal]; ['Andres', 'arepa']])]]
[[origin, people]; [USA, $(echo [[name, meal]; ['Katz', 'nurepa']])]]
| flatten
| get meal
| str collect ','
"#
));
assert_eq!(actual.out, "arepa,nurepa");
}
#[test]
fn flatten_nested_tables() {
let actual = nu!(
cwd: ".", pipeline(
r#"
echo [[Andrés, Nicolás, Robalino]] | flatten | nth 1
"#
));
assert_eq!(actual.out, "Nicolás");
}
#[test]
fn flatten_row_column_explictly() {
Playground::setup("flatten_test_1", |dirs, sandbox| {
sandbox.with_files(vec![FileWithContentToBeTrimmed(
"katz.json",
r#"
[
{
"origin": "Ecuador",
"people": {
"name": "Andres",
"meal": "arepa"
},
"code": { "id": 1, "references": 2},
"tags": ["carbohydrate", "corn", "maiz"],
"city": ["Guayaquil", "Samborondón"]
},
{
"origin": "USA",
"people": {
"name": "Katz",
"meal": "nurepa"
},
"code": { "id": 2, "references": 1},
"tags": ["carbohydrate", "shell food", "amigos flavor"],
"city": ["Oregon", "Brooklin"]
}
]
"#,
)]);
let actual = nu!(
cwd: dirs.test(),
"open katz.json | flatten people | where name == Andres | count"
);
assert_eq!(actual.out, "1");
})
}
#[test]
fn flatten_table_columns_explictly() {
Playground::setup("flatten_test_2", |dirs, sandbox| {
sandbox.with_files(vec![FileWithContentToBeTrimmed(
"katz.json",
r#"
[
{
"origin": "Ecuador",
"people": {
"name": "Andres",
"meal": "arepa"
},
"code": { "id": 1, "references": 2},
"tags": ["carbohydrate", "corn", "maiz"],
"city": ["Guayaquil", "Samborondón"]
},
{
"origin": "USA",
"people": {
"name": "Katz",
"meal": "nurepa"
},
"code": { "id": 2, "references": 1},
"tags": ["carbohydrate", "shell food", "amigos flavor"],
"city": ["Oregon", "Brooklin"]
}
]
"#,
)]);
let actual = nu!(
cwd: dirs.test(),
"open katz.json | flatten city | where people.name == Katz | count"
);
assert_eq!(actual.out, "2");
})
}
#[test]
fn flatten_more_than_one_column_that_are_subtables_not_supported() {
Playground::setup("flatten_test_3", |dirs, sandbox| {
sandbox.with_files(vec![FileWithContentToBeTrimmed(
"katz.json",
r#"
[
{
"origin": "Ecuador",
"people": {
"name": "Andres",
"meal": "arepa"
},
"code": { "id": 1, "references": 2},
"tags": ["carbohydrate", "corn", "maiz"],
"city": ["Guayaquil", "Samborondón"]
},
{
"origin": "USA",
"people": {
"name": "Katz",
"meal": "nurepa"
},
"code": { "id": 2, "references": 1},
"tags": ["carbohydrate", "shell food", "amigos flavor"],
"city": ["Oregon", "Brooklin"]
}
]
"#,
)]);
let actual = nu!(
cwd: dirs.test(),
"open katz.json | flatten tags city"
);
assert!(actual.err.contains("tried flattening"));
assert!(actual.err.contains("but is flattened already"));
})
}

View File

@ -16,6 +16,7 @@ mod empty;
mod enter;
mod every;
mod first;
mod flatten;
mod format;
mod get;
mod group_by;

View File

@ -196,7 +196,7 @@ impl Dictionary {
}
/// A helper to help create dictionaries for you. It has the ability to insert values into the dictionary while maintaining the tags that need to be applied to the individual members
#[derive(Debug)]
#[derive(Debug, Clone)]
pub struct TaggedDictBuilder {
tag: Tag,
dict: IndexMap<String, Value>,