From d620f76a21161f7661861a0cb936dacaa1bc510b Mon Sep 17 00:00:00 2001 From: JT <547158+jntrnr@users.noreply.github.com> Date: Wed, 16 Feb 2022 14:30:37 -0500 Subject: [PATCH] Make comparisons/sort-by more 'global' (#4500) * Make comparisons/sort-by more 'global' * Let custom values do their own comparisons --- .../src/dataframe/eager/drop_duplicates.rs | 4 +- crates/nu-command/src/dataframe/eager/mod.rs | 1 + crates/nu-command/tests/commands/sort_by.rs | 13 + crates/nu-protocol/src/ast/cell_path.rs | 4 +- crates/nu-protocol/src/ast/operator.rs | 2 +- crates/nu-protocol/src/value/mod.rs | 328 +++++++++++++++--- crates/nu-protocol/src/value/range.rs | 18 + 7 files changed, 325 insertions(+), 45 deletions(-) diff --git a/crates/nu-command/src/dataframe/eager/drop_duplicates.rs b/crates/nu-command/src/dataframe/eager/drop_duplicates.rs index 4d61a95e1..ccf51661e 100644 --- a/crates/nu-command/src/dataframe/eager/drop_duplicates.rs +++ b/crates/nu-command/src/dataframe/eager/drop_duplicates.rs @@ -39,11 +39,11 @@ impl Command for DropDuplicates { NuDataFrame::try_from_columns(vec![ Column::new( "a".to_string(), - vec![Value::test_int(1), Value::test_int(3)], + vec![Value::test_int(3), Value::test_int(1)], ), Column::new( "b".to_string(), - vec![Value::test_int(2), Value::test_int(4)], + vec![Value::test_int(4), Value::test_int(2)], ), ]) .expect("simple df for test should not fail") diff --git a/crates/nu-command/src/dataframe/eager/mod.rs b/crates/nu-command/src/dataframe/eager/mod.rs index 1177bd205..211aabe83 100644 --- a/crates/nu-command/src/dataframe/eager/mod.rs +++ b/crates/nu-command/src/dataframe/eager/mod.rs @@ -82,6 +82,7 @@ pub fn add_eager_decls(working_set: &mut StateWorkingSet) { DataTypes, DescribeDF, DropDF, + DropDuplicates, DropNulls, Dummies, FilterWith, diff --git a/crates/nu-command/tests/commands/sort_by.rs b/crates/nu-command/tests/commands/sort_by.rs index 08dcd923e..1bae1e773 100644 --- a/crates/nu-command/tests/commands/sort_by.rs +++ b/crates/nu-command/tests/commands/sort_by.rs @@ -146,3 +146,16 @@ fn ls_sort_by_type_name_insensitive() { let json_output = r#"[{"name": "C","type": "Dir"},{"name": "a.txt","type": "File"},{"name": "B.txt","type": "File"}]"#; assert_eq!(actual.out, json_output); } + +#[test] +fn sort_different_types() { + let actual = nu!( + cwd: "tests/fixtures/formats", pipeline( + r#" + [a, 1, b, 2, c, 3, [4, 5, 6], d, 4, [1, 2, 3]] | sort-by | to json --raw + "# + )); + + let json_output = r#"[1,2,3,4,"a","b","c","d",[1,2,3],[4,5,6]]"#; + assert_eq!(actual.out, json_output); +} diff --git a/crates/nu-protocol/src/ast/cell_path.rs b/crates/nu-protocol/src/ast/cell_path.rs index e21c8216b..02f4310f5 100644 --- a/crates/nu-protocol/src/ast/cell_path.rs +++ b/crates/nu-protocol/src/ast/cell_path.rs @@ -2,7 +2,7 @@ use super::Expression; use crate::Span; use serde::{Deserialize, Serialize}; -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone, PartialOrd, Serialize, Deserialize)] pub enum PathMember { String { val: String, span: Span }, Int { val: usize, span: Span }, @@ -18,7 +18,7 @@ impl PartialEq for PathMember { } } -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone, PartialEq, PartialOrd, Serialize, Deserialize)] pub struct CellPath { pub members: Vec, } diff --git a/crates/nu-protocol/src/ast/operator.rs b/crates/nu-protocol/src/ast/operator.rs index 690390888..8291c6134 100644 --- a/crates/nu-protocol/src/ast/operator.rs +++ b/crates/nu-protocol/src/ast/operator.rs @@ -50,7 +50,7 @@ impl Display for Operator { } } -#[derive(Debug, Copy, Clone, PartialEq, Serialize, Deserialize)] +#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Serialize, Deserialize)] pub enum RangeInclusion { Inclusive, RightExclusive, diff --git a/crates/nu-protocol/src/value/mod.rs b/crates/nu-protocol/src/value/mod.rs index c5dcb31c0..54cdc9c64 100644 --- a/crates/nu-protocol/src/value/mod.rs +++ b/crates/nu-protocol/src/value/mod.rs @@ -30,6 +30,8 @@ pub use custom_value::CustomValue; use crate::ShellError; /// Core structured values that pass through the pipeline in engine-q +// NOTE: Please do not reorder these enum cases without thinking through the +// impact on the PartialOrd implementation and the global sort order #[derive(Debug, Serialize, Deserialize)] pub enum Value { Bool { @@ -40,6 +42,10 @@ pub enum Value { val: i64, span: Span, }, + Float { + val: f64, + span: Span, + }, Filesize { val: i64, span: Span, @@ -56,10 +62,6 @@ pub enum Value { val: Box, span: Span, }, - Float { - val: f64, - span: Span, - }, String { val: String, span: Span, @@ -901,49 +903,295 @@ impl PartialOrd for Value { } match (self, other) { - (Value::Bool { val: lhs, .. }, Value::Bool { val: rhs, .. }) => lhs.partial_cmp(rhs), - (Value::Int { val: lhs, .. }, Value::Int { val: rhs, .. }) => lhs.partial_cmp(rhs), - (Value::Float { val: lhs, .. }, Value::Float { val: rhs, .. }) => { - compare_floats(*lhs, *rhs) - } - (Value::Date { val: lhs, .. }, Value::Date { val: rhs, .. }) => lhs.partial_cmp(rhs), - (Value::String { val: lhs, .. }, Value::String { val: rhs, .. }) => { - lhs.partial_cmp(rhs) - } - (Value::Int { val: lhs, .. }, Value::Float { val: rhs, .. }) => { - compare_floats(*lhs as f64, *rhs) - } - (Value::Float { val: lhs, .. }, Value::Int { val: rhs, .. }) => { - compare_floats(*lhs, *rhs as f64) - } - (Value::Duration { val: lhs, .. }, Value::Duration { val: rhs, .. }) => { - lhs.partial_cmp(rhs) - } - (Value::Filesize { val: lhs, .. }, Value::Filesize { val: rhs, .. }) => { - lhs.partial_cmp(rhs) - } - (Value::Block { val: b1, .. }, Value::Block { val: b2, .. }) if b1 == b2 => { - Some(Ordering::Equal) - } - (Value::List { vals: lhs, .. }, Value::List { vals: rhs, .. }) => lhs.partial_cmp(rhs), + (Value::Bool { val: lhs, .. }, rhs) => match rhs { + Value::Bool { val: rhs, .. } => lhs.partial_cmp(rhs), + Value::Int { .. } => Some(Ordering::Less), + Value::Float { .. } => Some(Ordering::Less), + Value::Filesize { .. } => Some(Ordering::Less), + Value::Duration { .. } => Some(Ordering::Less), + Value::Date { .. } => Some(Ordering::Less), + Value::Range { .. } => Some(Ordering::Less), + Value::String { .. } => Some(Ordering::Less), + Value::Record { .. } => Some(Ordering::Less), + Value::List { .. } => Some(Ordering::Less), + Value::Block { .. } => Some(Ordering::Less), + Value::Nothing { .. } => Some(Ordering::Less), + Value::Error { .. } => Some(Ordering::Less), + Value::Binary { .. } => Some(Ordering::Less), + Value::CellPath { .. } => Some(Ordering::Less), + Value::CustomValue { .. } => Some(Ordering::Less), + }, + (Value::Int { val: lhs, .. }, rhs) => match rhs { + Value::Bool { .. } => Some(Ordering::Greater), + Value::Int { val: rhs, .. } => lhs.partial_cmp(rhs), + Value::Float { val: rhs, .. } => compare_floats(*lhs as f64, *rhs), + Value::Filesize { .. } => Some(Ordering::Less), + Value::Duration { .. } => Some(Ordering::Less), + Value::Date { .. } => Some(Ordering::Less), + Value::Range { .. } => Some(Ordering::Less), + Value::String { .. } => Some(Ordering::Less), + Value::Record { .. } => Some(Ordering::Less), + Value::List { .. } => Some(Ordering::Less), + Value::Block { .. } => Some(Ordering::Less), + Value::Nothing { .. } => Some(Ordering::Less), + Value::Error { .. } => Some(Ordering::Less), + Value::Binary { .. } => Some(Ordering::Less), + Value::CellPath { .. } => Some(Ordering::Less), + Value::CustomValue { .. } => Some(Ordering::Less), + }, + (Value::Float { val: lhs, .. }, rhs) => match rhs { + Value::Bool { .. } => Some(Ordering::Greater), + Value::Int { val: rhs, .. } => compare_floats(*lhs, *rhs as f64), + Value::Float { val: rhs, .. } => compare_floats(*lhs, *rhs), + Value::Filesize { .. } => Some(Ordering::Less), + Value::Duration { .. } => Some(Ordering::Less), + Value::Date { .. } => Some(Ordering::Less), + Value::Range { .. } => Some(Ordering::Less), + Value::String { .. } => Some(Ordering::Less), + Value::Record { .. } => Some(Ordering::Less), + Value::List { .. } => Some(Ordering::Less), + Value::Block { .. } => Some(Ordering::Less), + Value::Nothing { .. } => Some(Ordering::Less), + Value::Error { .. } => Some(Ordering::Less), + Value::Binary { .. } => Some(Ordering::Less), + Value::CellPath { .. } => Some(Ordering::Less), + Value::CustomValue { .. } => Some(Ordering::Less), + }, + (Value::Filesize { val: lhs, .. }, rhs) => match rhs { + Value::Bool { .. } => Some(Ordering::Greater), + Value::Int { .. } => Some(Ordering::Greater), + Value::Float { .. } => Some(Ordering::Greater), + Value::Filesize { val: rhs, .. } => lhs.partial_cmp(rhs), + Value::Duration { .. } => Some(Ordering::Less), + Value::Date { .. } => Some(Ordering::Less), + Value::Range { .. } => Some(Ordering::Less), + Value::String { .. } => Some(Ordering::Less), + Value::Record { .. } => Some(Ordering::Less), + Value::List { .. } => Some(Ordering::Less), + Value::Block { .. } => Some(Ordering::Less), + Value::Nothing { .. } => Some(Ordering::Less), + Value::Error { .. } => Some(Ordering::Less), + Value::Binary { .. } => Some(Ordering::Less), + Value::CellPath { .. } => Some(Ordering::Less), + Value::CustomValue { .. } => Some(Ordering::Less), + }, + (Value::Duration { val: lhs, .. }, rhs) => match rhs { + Value::Bool { .. } => Some(Ordering::Greater), + Value::Int { .. } => Some(Ordering::Greater), + Value::Float { .. } => Some(Ordering::Greater), + Value::Filesize { .. } => Some(Ordering::Greater), + Value::Duration { val: rhs, .. } => lhs.partial_cmp(rhs), + Value::Date { .. } => Some(Ordering::Less), + Value::Range { .. } => Some(Ordering::Less), + Value::String { .. } => Some(Ordering::Less), + Value::Record { .. } => Some(Ordering::Less), + Value::List { .. } => Some(Ordering::Less), + Value::Block { .. } => Some(Ordering::Less), + Value::Nothing { .. } => Some(Ordering::Less), + Value::Error { .. } => Some(Ordering::Less), + Value::Binary { .. } => Some(Ordering::Less), + Value::CellPath { .. } => Some(Ordering::Less), + Value::CustomValue { .. } => Some(Ordering::Less), + }, + (Value::Date { val: lhs, .. }, rhs) => match rhs { + Value::Bool { .. } => Some(Ordering::Greater), + Value::Int { .. } => Some(Ordering::Greater), + Value::Float { .. } => Some(Ordering::Greater), + Value::Filesize { .. } => Some(Ordering::Greater), + Value::Duration { .. } => Some(Ordering::Greater), + Value::Date { val: rhs, .. } => lhs.partial_cmp(rhs), + Value::Range { .. } => Some(Ordering::Less), + Value::String { .. } => Some(Ordering::Less), + Value::Record { .. } => Some(Ordering::Less), + Value::List { .. } => Some(Ordering::Less), + Value::Block { .. } => Some(Ordering::Less), + Value::Nothing { .. } => Some(Ordering::Less), + Value::Error { .. } => Some(Ordering::Less), + Value::Binary { .. } => Some(Ordering::Less), + Value::CellPath { .. } => Some(Ordering::Less), + Value::CustomValue { .. } => Some(Ordering::Less), + }, + (Value::Range { val: lhs, .. }, rhs) => match rhs { + Value::Bool { .. } => Some(Ordering::Greater), + Value::Int { .. } => Some(Ordering::Greater), + Value::Float { .. } => Some(Ordering::Greater), + Value::Filesize { .. } => Some(Ordering::Greater), + Value::Duration { .. } => Some(Ordering::Greater), + Value::Date { .. } => Some(Ordering::Greater), + Value::Range { val: rhs, .. } => lhs.partial_cmp(rhs), + Value::String { .. } => Some(Ordering::Less), + Value::Record { .. } => Some(Ordering::Less), + Value::List { .. } => Some(Ordering::Less), + Value::Block { .. } => Some(Ordering::Less), + Value::Nothing { .. } => Some(Ordering::Less), + Value::Error { .. } => Some(Ordering::Less), + Value::Binary { .. } => Some(Ordering::Less), + Value::CellPath { .. } => Some(Ordering::Less), + Value::CustomValue { .. } => Some(Ordering::Less), + }, + (Value::String { val: lhs, .. }, rhs) => match rhs { + Value::Bool { .. } => Some(Ordering::Greater), + Value::Int { .. } => Some(Ordering::Greater), + Value::Float { .. } => Some(Ordering::Greater), + Value::Filesize { .. } => Some(Ordering::Greater), + Value::Duration { .. } => Some(Ordering::Greater), + Value::Date { .. } => Some(Ordering::Greater), + Value::Range { .. } => Some(Ordering::Greater), + Value::String { val: rhs, .. } => lhs.partial_cmp(rhs), + Value::Record { .. } => Some(Ordering::Less), + Value::List { .. } => Some(Ordering::Less), + Value::Block { .. } => Some(Ordering::Less), + Value::Nothing { .. } => Some(Ordering::Less), + Value::Error { .. } => Some(Ordering::Less), + Value::Binary { .. } => Some(Ordering::Less), + Value::CellPath { .. } => Some(Ordering::Less), + Value::CustomValue { .. } => Some(Ordering::Less), + }, ( Value::Record { - vals: lhs, - cols: lhs_headers, + cols: lhs_cols, + vals: lhs_vals, .. }, + rhs, + ) => match rhs { + Value::Bool { .. } => Some(Ordering::Greater), + Value::Int { .. } => Some(Ordering::Greater), + Value::Float { .. } => Some(Ordering::Greater), + Value::Filesize { .. } => Some(Ordering::Greater), + Value::Duration { .. } => Some(Ordering::Greater), + Value::Date { .. } => Some(Ordering::Greater), + Value::Range { .. } => Some(Ordering::Greater), + Value::String { .. } => Some(Ordering::Greater), Value::Record { - vals: rhs, - cols: rhs_headers, + cols: rhs_cols, + vals: rhs_vals, .. - }, - ) if lhs_headers == rhs_headers && lhs == rhs => Some(Ordering::Equal), - (Value::Binary { val: lhs, .. }, Value::Binary { val: rhs, .. }) => { - lhs.partial_cmp(rhs) - } + } => { + let result = lhs_cols.partial_cmp(rhs_cols); + if result == Some(Ordering::Equal) { + lhs_vals.partial_cmp(rhs_vals) + } else { + result + } + } + Value::List { .. } => Some(Ordering::Less), + Value::Block { .. } => Some(Ordering::Less), + Value::Nothing { .. } => Some(Ordering::Less), + Value::Error { .. } => Some(Ordering::Less), + Value::Binary { .. } => Some(Ordering::Less), + Value::CellPath { .. } => Some(Ordering::Less), + Value::CustomValue { .. } => Some(Ordering::Less), + }, + (Value::List { vals: lhs, .. }, rhs) => match rhs { + Value::Bool { .. } => Some(Ordering::Greater), + Value::Int { .. } => Some(Ordering::Greater), + Value::Float { .. } => Some(Ordering::Greater), + Value::Filesize { .. } => Some(Ordering::Greater), + Value::Duration { .. } => Some(Ordering::Greater), + Value::Date { .. } => Some(Ordering::Greater), + Value::Range { .. } => Some(Ordering::Greater), + Value::String { .. } => Some(Ordering::Greater), + Value::Record { .. } => Some(Ordering::Greater), + Value::List { vals: rhs, .. } => lhs.partial_cmp(rhs), + Value::Block { .. } => Some(Ordering::Less), + Value::Nothing { .. } => Some(Ordering::Less), + Value::Error { .. } => Some(Ordering::Less), + Value::Binary { .. } => Some(Ordering::Less), + Value::CellPath { .. } => Some(Ordering::Less), + Value::CustomValue { .. } => Some(Ordering::Less), + }, + (Value::Block { val: lhs, .. }, rhs) => match rhs { + Value::Bool { .. } => Some(Ordering::Greater), + Value::Int { .. } => Some(Ordering::Greater), + Value::Float { .. } => Some(Ordering::Greater), + Value::Filesize { .. } => Some(Ordering::Greater), + Value::Duration { .. } => Some(Ordering::Greater), + Value::Date { .. } => Some(Ordering::Greater), + Value::Range { .. } => Some(Ordering::Greater), + Value::String { .. } => Some(Ordering::Greater), + Value::Record { .. } => Some(Ordering::Greater), + Value::List { .. } => Some(Ordering::Greater), + Value::Block { val: rhs, .. } => lhs.partial_cmp(rhs), + Value::Nothing { .. } => Some(Ordering::Less), + Value::Error { .. } => Some(Ordering::Less), + Value::Binary { .. } => Some(Ordering::Less), + Value::CellPath { .. } => Some(Ordering::Less), + Value::CustomValue { .. } => Some(Ordering::Less), + }, + (Value::Nothing { .. }, rhs) => match rhs { + Value::Bool { .. } => Some(Ordering::Greater), + Value::Int { .. } => Some(Ordering::Greater), + Value::Float { .. } => Some(Ordering::Greater), + Value::Filesize { .. } => Some(Ordering::Greater), + Value::Duration { .. } => Some(Ordering::Greater), + Value::Date { .. } => Some(Ordering::Greater), + Value::Range { .. } => Some(Ordering::Greater), + Value::String { .. } => Some(Ordering::Greater), + Value::Record { .. } => Some(Ordering::Greater), + Value::List { .. } => Some(Ordering::Greater), + Value::Block { .. } => Some(Ordering::Greater), + Value::Nothing { .. } => Some(Ordering::Equal), + Value::Error { .. } => Some(Ordering::Less), + Value::Binary { .. } => Some(Ordering::Less), + Value::CellPath { .. } => Some(Ordering::Less), + Value::CustomValue { .. } => Some(Ordering::Less), + }, + (Value::Error { .. }, rhs) => match rhs { + Value::Bool { .. } => Some(Ordering::Greater), + Value::Int { .. } => Some(Ordering::Greater), + Value::Float { .. } => Some(Ordering::Greater), + Value::Filesize { .. } => Some(Ordering::Greater), + Value::Duration { .. } => Some(Ordering::Greater), + Value::Date { .. } => Some(Ordering::Greater), + Value::Range { .. } => Some(Ordering::Greater), + Value::String { .. } => Some(Ordering::Greater), + Value::Record { .. } => Some(Ordering::Greater), + Value::List { .. } => Some(Ordering::Greater), + Value::Block { .. } => Some(Ordering::Greater), + Value::Nothing { .. } => Some(Ordering::Greater), + Value::Error { .. } => Some(Ordering::Equal), + Value::Binary { .. } => Some(Ordering::Less), + Value::CellPath { .. } => Some(Ordering::Less), + Value::CustomValue { .. } => Some(Ordering::Less), + }, + (Value::Binary { val: lhs, .. }, rhs) => match rhs { + Value::Bool { .. } => Some(Ordering::Greater), + Value::Int { .. } => Some(Ordering::Greater), + Value::Float { .. } => Some(Ordering::Greater), + Value::Filesize { .. } => Some(Ordering::Greater), + Value::Duration { .. } => Some(Ordering::Greater), + Value::Date { .. } => Some(Ordering::Greater), + Value::Range { .. } => Some(Ordering::Greater), + Value::String { .. } => Some(Ordering::Greater), + Value::Record { .. } => Some(Ordering::Greater), + Value::List { .. } => Some(Ordering::Greater), + Value::Block { .. } => Some(Ordering::Greater), + Value::Nothing { .. } => Some(Ordering::Greater), + Value::Error { .. } => Some(Ordering::Greater), + Value::Binary { val: rhs, .. } => lhs.partial_cmp(rhs), + Value::CellPath { .. } => Some(Ordering::Less), + Value::CustomValue { .. } => Some(Ordering::Less), + }, + (Value::CellPath { val: lhs, .. }, rhs) => match rhs { + Value::Bool { .. } => Some(Ordering::Greater), + Value::Int { .. } => Some(Ordering::Greater), + Value::Float { .. } => Some(Ordering::Greater), + Value::Filesize { .. } => Some(Ordering::Greater), + Value::Duration { .. } => Some(Ordering::Greater), + Value::Date { .. } => Some(Ordering::Greater), + Value::Range { .. } => Some(Ordering::Greater), + Value::String { .. } => Some(Ordering::Greater), + Value::Record { .. } => Some(Ordering::Greater), + Value::List { .. } => Some(Ordering::Greater), + Value::Block { .. } => Some(Ordering::Greater), + Value::Nothing { .. } => Some(Ordering::Greater), + Value::Error { .. } => Some(Ordering::Greater), + Value::Binary { .. } => Some(Ordering::Greater), + Value::CellPath { val: rhs, .. } => lhs.partial_cmp(rhs), + Value::CustomValue { .. } => Some(Ordering::Less), + }, (Value::CustomValue { val: lhs, .. }, rhs) => lhs.partial_cmp(rhs), - (Value::Nothing { .. }, Value::Nothing { .. }) => Some(Ordering::Equal), - (_, _) => None, } } } diff --git a/crates/nu-protocol/src/value/range.rs b/crates/nu-protocol/src/value/range.rs index 65c88c57f..c1b66456d 100644 --- a/crates/nu-protocol/src/value/range.rs +++ b/crates/nu-protocol/src/value/range.rs @@ -129,6 +129,24 @@ impl Range { } } +impl PartialOrd for Range { + fn partial_cmp(&self, other: &Self) -> Option { + match self.from.partial_cmp(&other.from) { + Some(core::cmp::Ordering::Equal) => {} + ord => return ord, + } + match self.incr.partial_cmp(&other.incr) { + Some(core::cmp::Ordering::Equal) => {} + ord => return ord, + } + match self.to.partial_cmp(&other.to) { + Some(core::cmp::Ordering::Equal) => {} + ord => return ord, + } + self.inclusion.partial_cmp(&other.inclusion) + } +} + pub struct RangeIterator { curr: Value, end: Value,