2019-11-12 08:07:43 +01:00
use crate ::commands ::group_by ::group ;
2019-11-12 09:38:55 +01:00
use crate ::commands ::WholeStreamCommand ;
2019-11-12 08:07:43 +01:00
use crate ::prelude ::* ;
2020-01-04 05:00:39 +01:00
use crate ::utils ::data_processing ::{ columns_sorted , evaluate , map_max , reduce , t_sort } ;
Extract core stuff into own crates
This commit extracts five new crates:
- nu-source, which contains the core source-code handling logic in Nu,
including Text, Span, and also the pretty.rs-based debug logic
- nu-parser, which is the parser and expander logic
- nu-protocol, which is the bulk of the types and basic conveniences
used by plugins
- nu-errors, which contains ShellError, ParseError and error handling
conveniences
- nu-textview, which is the textview plugin extracted into a crate
One of the major consequences of this refactor is that it's no longer
possible to `impl X for Spanned<Y>` outside of the `nu-source` crate, so
a lot of types became more concrete (Value became a concrete type
instead of Spanned<Value>, for example).
This also turned a number of inherent methods in the main nu crate into
plain functions (impl Value {} became a bunch of functions in the
`value` namespace in `crate::data::value`).
2019-11-26 03:30:48 +01:00
use nu_errors ::ShellError ;
2019-12-04 20:52:31 +01:00
use nu_protocol ::{
Primitive , ReturnSuccess , Signature , SyntaxShape , TaggedDictBuilder , UntaggedValue , Value ,
} ;
2019-11-21 15:33:14 +01:00
use nu_source ::Tagged ;
2020-01-04 05:00:39 +01:00
use num_traits ::{ ToPrimitive , Zero } ;
2019-11-12 08:07:43 +01:00
pub struct Histogram ;
#[ derive(Deserialize) ]
pub struct HistogramArgs {
column_name : Tagged < String > ,
2019-11-12 09:38:55 +01:00
rest : Vec < Tagged < String > > ,
2019-11-12 08:07:43 +01:00
}
impl WholeStreamCommand for Histogram {
fn name ( & self ) -> & str {
" histogram "
}
fn signature ( & self ) -> Signature {
2019-11-12 09:38:55 +01:00
Signature ::build ( " histogram " )
. required (
" column_name " ,
SyntaxShape ::String ,
" the name of the column to graph by " ,
)
. rest (
2020-04-06 09:16:14 +02:00
SyntaxShape ::String ,
2019-11-12 09:38:55 +01:00
" column name to give the histogram's frequency column " ,
)
2019-11-12 08:07:43 +01:00
}
fn usage ( & self ) -> & str {
" Creates a new table with a histogram based on the column name passed in. "
}
fn run (
& self ,
args : CommandArgs ,
registry : & CommandRegistry ,
) -> Result < OutputStream , ShellError > {
2020-05-16 05:18:24 +02:00
histogram ( args , registry )
2019-11-12 08:07:43 +01:00
}
2020-05-12 17:54:29 +02:00
2020-05-18 14:56:01 +02:00
fn examples ( & self ) -> Vec < Example > {
vec! [
2020-05-12 17:54:29 +02:00
Example {
description : " Get a histogram for the types of files " ,
example : " ls | histogram type " ,
2020-05-18 14:56:01 +02:00
result : None ,
2020-05-12 17:54:29 +02:00
} ,
Example {
description :
" Get a histogram for the types of files, with frequency column named count " ,
example : " ls | histogram type count " ,
2020-05-18 14:56:01 +02:00
result : None ,
2020-05-12 17:54:29 +02:00
} ,
Example {
description : " Get a histogram for a list of numbers " ,
2020-05-18 14:56:01 +02:00
example : " echo [1 2 3 1 1 1 2 2 1 1] | histogram " ,
result : None ,
2020-05-12 17:54:29 +02:00
} ,
]
}
2019-11-12 08:07:43 +01:00
}
pub fn histogram (
2020-05-16 05:18:24 +02:00
args : CommandArgs ,
registry : & CommandRegistry ,
2019-11-12 08:07:43 +01:00
) -> Result < OutputStream , ShellError > {
2020-05-16 05:18:24 +02:00
let registry = registry . clone ( ) ;
let name = args . call_info . name_tag . clone ( ) ;
2019-11-12 08:07:43 +01:00
let stream = async_stream! {
2020-05-16 05:18:24 +02:00
let ( HistogramArgs { column_name , rest } , mut input ) = args . process ( & registry ) . await ? ;
Move external closer to internal (#1611)
* Refactor InputStream and affected commands.
First, making `values` private and leaning on the `Stream` implementation makes
consumes of `InputStream` less likely to have to change in the future, if we
change what an `InputStream` is internally.
Second, we're dropping `Option<InputStream>` as the input to pipelines,
internals, and externals. Instead, `InputStream.is_empty` can be used to check
for "emptiness". Empty streams are typically only ever used as the first input
to a pipeline.
* Add run_external internal command.
We want to push external commands closer to internal commands, eventually
eliminating the concept of "external" completely. This means we can consolidate
a couple of things:
- Variable evaluation (for example, `$it`, `$nu`, alias vars)
- Behaviour of whole stream vs per-item external execution
It should also make it easier for us to start introducing argument signatures
for external commands,
* Update run_external.rs
* Update run_external.rs
* Update run_external.rs
* Update run_external.rs
Co-authored-by: Jonathan Turner <jonathandturner@users.noreply.github.com>
2020-04-20 05:30:44 +02:00
let values : Vec < Value > = input . collect ( ) . await ;
2019-11-12 08:07:43 +01:00
let Tagged { item : group_by , .. } = column_name . clone ( ) ;
let groups = group ( & column_name , values , & name ) ? ;
let group_labels = columns_sorted ( Some ( group_by . clone ( ) ) , & groups , & name ) ;
let sorted = t_sort ( Some ( group_by . clone ( ) ) , None , & groups , & name ) ? ;
let evaled = evaluate ( & sorted , None , & name ) ? ;
let reduced = reduce ( & evaled , None , & name ) ? ;
let maxima = map_max ( & reduced , None , & name ) ? ;
let percents = percentages ( & reduced , maxima , & name ) ? ;
match percents {
2019-11-21 15:33:14 +01:00
Value {
value : UntaggedValue ::Table ( datasets ) ,
2019-11-12 08:07:43 +01:00
..
} = > {
let mut idx = 0 ;
2019-11-12 09:38:55 +01:00
let column_names_supplied : Vec < _ > = rest . iter ( ) . map ( | f | f . item . clone ( ) ) . collect ( ) ;
let frequency_column_name = if column_names_supplied . is_empty ( ) {
2019-11-27 03:32:05 +01:00
" frequency " . to_string ( )
2019-11-12 09:38:55 +01:00
} else {
column_names_supplied [ 0 ] . clone ( )
} ;
let column = ( * column_name ) . clone ( ) ;
2020-05-20 08:02:36 +02:00
let count_column_name = " count " . to_string ( ) ;
let count_shell_error = ShellError ::labeled_error ( " Unable to load group count " , " unabled to load group count " , & name ) ;
let mut count_values : Vec < u64 > = Vec ::new ( ) ;
for table_entry in reduced . table_entries ( ) {
match table_entry {
Value {
value : UntaggedValue ::Table ( list ) ,
..
} = > {
for i in list {
if let Ok ( count ) = i . value . clone ( ) . into_value ( & name ) . as_u64 ( ) {
count_values . push ( count ) ;
} else {
yield Err ( count_shell_error ) ;
return ;
}
}
}
_ = > {
yield Err ( count_shell_error ) ;
return ;
}
}
}
2020-01-02 05:02:46 +01:00
if let Value { value : UntaggedValue ::Table ( start ) , .. } = datasets . get ( 0 ) . ok_or_else ( | | ShellError ::labeled_error ( " Unable to load dataset " , " unabled to load dataset " , & name ) ) ? {
2019-12-06 16:28:26 +01:00
for percentage in start . iter ( ) {
2019-11-12 09:38:55 +01:00
2019-11-12 08:07:43 +01:00
let mut fact = TaggedDictBuilder ::new ( & name ) ;
2020-01-02 05:02:46 +01:00
let value : Tagged < String > = group_labels . get ( idx ) . ok_or_else ( | | ShellError ::labeled_error ( " Unable to load group labels " , " unabled to load group labels " , & name ) ) ? . clone ( ) ;
2019-12-04 20:52:31 +01:00
fact . insert_value ( & column , UntaggedValue ::string ( value . item ) . into_value ( value . tag ) ) ;
2019-11-12 08:07:43 +01:00
2020-05-20 08:02:36 +02:00
fact . insert_untagged ( & count_column_name , UntaggedValue ::int ( count_values [ idx ] ) ) ;
2020-01-02 05:02:46 +01:00
if let Value { value : UntaggedValue ::Primitive ( Primitive ::Int ( ref num ) ) , ref tag } = percentage . clone ( ) {
let string = std ::iter ::repeat ( " * " ) . take ( num . to_i32 ( ) . ok_or_else ( | | ShellError ::labeled_error ( " Expected a number " , " expected a number " , tag ) ) ? as usize ) . collect ::< String > ( ) ;
2019-12-04 20:52:31 +01:00
fact . insert_untagged ( & frequency_column_name , UntaggedValue ::string ( string ) ) ;
2019-11-12 08:07:43 +01:00
}
2019-12-06 16:28:26 +01:00
idx + = 1 ;
2019-11-12 08:07:43 +01:00
2019-11-21 15:33:14 +01:00
yield ReturnSuccess ::value ( fact . into_value ( ) ) ;
2019-11-12 08:07:43 +01:00
}
}
}
_ = > { }
}
} ;
Ok ( stream . to_output_stream ( ) )
}
2019-11-21 15:33:14 +01:00
fn percentages ( values : & Value , max : Value , tag : impl Into < Tag > ) -> Result < Value , ShellError > {
2019-11-12 08:07:43 +01:00
let tag = tag . into ( ) ;
2019-11-21 15:33:14 +01:00
let results : Value = match values {
Value {
value : UntaggedValue ::Table ( datasets ) ,
2019-11-12 08:07:43 +01:00
..
} = > {
let datasets : Vec < _ > = datasets
2019-12-06 16:28:26 +01:00
. iter ( )
2019-11-12 09:38:55 +01:00
. map ( | subsets | match subsets {
2019-11-21 15:33:14 +01:00
Value {
value : UntaggedValue ::Table ( data ) ,
2019-11-12 09:38:55 +01:00
..
} = > {
2020-01-04 05:00:39 +01:00
let data = data
. iter ( )
. map ( | d | match d {
Value {
value : UntaggedValue ::Primitive ( Primitive ::Int ( n ) ) ,
..
} = > {
let max = match & max {
2019-11-21 15:33:14 +01:00
Value {
2020-01-04 05:00:39 +01:00
value : UntaggedValue ::Primitive ( Primitive ::Int ( maxima ) ) ,
2019-11-12 09:38:55 +01:00
..
2020-01-04 05:00:39 +01:00
} = > maxima . clone ( ) ,
_ = > Zero ::zero ( ) ,
} ;
let n = ( n * 100 ) / max ;
UntaggedValue ::int ( n ) . into_value ( & tag )
}
_ = > UntaggedValue ::int ( 0 ) . into_value ( & tag ) ,
} )
. collect ::< Vec < _ > > ( ) ;
2019-11-21 15:33:14 +01:00
UntaggedValue ::Table ( data ) . into_value ( & tag )
2019-11-12 08:07:43 +01:00
}
2019-11-21 15:33:14 +01:00
_ = > UntaggedValue ::Table ( vec! [ ] ) . into_value ( & tag ) ,
2019-11-12 08:07:43 +01:00
} )
. collect ( ) ;
2019-11-21 15:33:14 +01:00
UntaggedValue ::Table ( datasets ) . into_value ( & tag )
2019-11-12 08:07:43 +01:00
}
other = > other . clone ( ) ,
} ;
Ok ( results )
}
2020-05-18 14:56:01 +02:00
#[ cfg(test) ]
mod tests {
use super ::Histogram ;
#[ test ]
fn examples_work_as_expected ( ) {
use crate ::examples ::test as test_examples ;
test_examples ( Histogram { } )
}
}