histogram: support regular values. (#2300)

This commit is contained in:
Andrés N. Robalino 2020-08-04 04:57:25 -05:00 committed by GitHub
parent c48c092125
commit 7f35bfc005
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 84 additions and 44 deletions

View File

@ -8,7 +8,6 @@ pub struct Histogram;
#[derive(Deserialize)] #[derive(Deserialize)]
pub struct HistogramArgs { pub struct HistogramArgs {
column_name: Tagged<String>,
rest: Vec<Tagged<String>>, rest: Vec<Tagged<String>>,
} }
@ -19,16 +18,10 @@ impl WholeStreamCommand for Histogram {
} }
fn signature(&self) -> Signature { fn signature(&self) -> Signature {
Signature::build("histogram") Signature::build("histogram").rest(
.required( SyntaxShape::String,
"column_name", "column name to give the histogram's frequency column",
SyntaxShape::String, )
"the name of the column to graph by",
)
.rest(
SyntaxShape::String,
"column name to give the histogram's frequency column",
)
} }
fn usage(&self) -> &str { fn usage(&self) -> &str {
@ -52,13 +45,13 @@ impl WholeStreamCommand for Histogram {
}, },
Example { Example {
description: description:
"Get a histogram for the types of files, with frequency column named count", "Get a histogram for the types of files, with frequency column named percentage",
example: "ls | histogram type count", example: "ls | histogram type percentage",
result: None, result: None,
}, },
Example { Example {
description: "Get a histogram for a list of numbers", description: "Get a histogram for a list of numbers",
example: "echo [1 2 3 1 1 1 2 2 1 1] | wrap values | histogram values", example: "echo [1 2 3 1 1 1 2 2 1 1] | histogram",
result: None, result: None,
}, },
] ]
@ -72,35 +65,16 @@ pub async fn histogram(
let registry = registry.clone(); let registry = registry.clone();
let name = args.call_info.name_tag.clone(); let name = args.call_info.name_tag.clone();
let (HistogramArgs { column_name, rest }, input) = args.process(&registry).await?; let (HistogramArgs { rest: mut columns }, input) = args.process(&registry).await?;
let values: Vec<Value> = input.collect().await; let values: Vec<Value> = input.collect().await;
let column_grouper = column_name.clone(); let column_grouper = if !columns.is_empty() {
Some(columns.remove(0))
} else {
None
};
let results = crate::utils::data::report( let column_names_supplied: Vec<_> = columns.iter().map(|f| f.item.clone()).collect();
&UntaggedValue::table(&values).into_value(&name),
crate::utils::data::Operation {
grouper: Some(Box::new(move |_, _| Ok(String::from("frequencies")))),
splitter: Some(Box::new(move |_, row: &Value| {
let key = &column_grouper;
match row.get_data_by_key(key.borrow_spanned()) {
Some(key) => nu_value_ext::as_string(&key),
None => Err(ShellError::labeled_error(
"unknown column",
"unknown column",
key.tag(),
)),
}
})),
format: None,
eval: &None,
},
&name,
)?;
let labels = results.labels.y.clone();
let column_names_supplied: Vec<_> = rest.iter().map(|f| f.item.clone()).collect();
let frequency_column_name = if column_names_supplied.is_empty() { let frequency_column_name = if column_names_supplied.is_empty() {
"frequency".to_string() "frequency".to_string()
@ -108,7 +82,24 @@ pub async fn histogram(
column_names_supplied[0].clone() column_names_supplied[0].clone()
}; };
let column = (*column_name).clone(); let column = if let Some(ref column) = column_grouper {
column.clone()
} else {
"value".to_string().tagged(&name)
};
let results = crate::utils::data::report(
&UntaggedValue::table(&values).into_value(&name),
crate::utils::data::Operation {
grouper: Some(Box::new(move |_, _| Ok(String::from("frequencies")))),
splitter: Some(splitter(column_grouper)),
format: None,
eval: &None,
},
&name,
)?;
let labels = results.labels.y.clone();
let mut idx = 0; let mut idx = 0;
Ok(futures::stream::iter( Ok(futures::stream::iter(
@ -136,7 +127,7 @@ pub async fn histogram(
})? })?
.clone(); .clone();
fact.insert_value(&column, column_value); fact.insert_value(&column.item, column_value);
fact.insert_untagged("count", UntaggedValue::int(count)); fact.insert_untagged("count", UntaggedValue::int(count));
let string = std::iter::repeat("*") let string = std::iter::repeat("*")
@ -155,6 +146,26 @@ pub async fn histogram(
.to_output_stream()) .to_output_stream())
} }
fn splitter(
by: Option<Tagged<String>>,
) -> Box<dyn Fn(usize, &Value) -> Result<String, ShellError> + Send> {
match by {
Some(column) => Box::new(move |_, row: &Value| {
let key = &column;
match row.get_data_by_key(key.borrow_spanned()) {
Some(key) => nu_value_ext::as_string(&key),
None => Err(ShellError::labeled_error(
"unknown column",
"unknown column",
key.tag(),
)),
}
}),
None => Box::new(move |_, row: &Value| nu_value_ext::as_string(&row)),
}
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::Histogram; use super::Histogram;

View File

@ -3,7 +3,7 @@ use nu_test_support::playground::Playground;
use nu_test_support::{nu, pipeline}; use nu_test_support::{nu, pipeline};
#[test] #[test]
fn summarizes() { fn summarizes_by_column_given() {
Playground::setup("histogram_test_1", |dirs, sandbox| { Playground::setup("histogram_test_1", |dirs, sandbox| {
sandbox.with_files(vec![FileWithContentToBeTrimmed( sandbox.with_files(vec![FileWithContentToBeTrimmed(
"los_tres_caballeros.csv", "los_tres_caballeros.csv",
@ -34,9 +34,38 @@ fn summarizes() {
}) })
} }
#[test]
fn summarizes_by_values() {
Playground::setup("histogram_test_2", |dirs, sandbox| {
sandbox.with_files(vec![FileWithContentToBeTrimmed(
"los_tres_caballeros.csv",
r#"
first_name,last_name,rusty_at
Andrés,Robalino,Ecuador
Jonathan,Turner,Estados Unidos
Yehuda,Katz,Estados Unidos
"#,
)]);
let actual = nu!(
cwd: dirs.test(), pipeline(
r#"
open los_tres_caballeros.csv
| get rusty_at
| histogram
| where value == "Estados Unidos"
| get count
| echo $it
"#
));
assert_eq!(actual.out, "2");
})
}
#[test] #[test]
fn help() { fn help() {
Playground::setup("histogram_test_help", |dirs, _sandbox| { Playground::setup("histogram_test_3", |dirs, _sandbox| {
let help_command = nu!( let help_command = nu!(
cwd: dirs.test(), pipeline( cwd: dirs.test(), pipeline(
r#" r#"