From 87a99bbabfdb4cc151c07857c999929df1fd57b5 Mon Sep 17 00:00:00 2001 From: Patrick Meredith Date: Mon, 26 Aug 2019 10:16:34 -0400 Subject: [PATCH 1/2] Implement to-bson --- src/cli.rs | 1 + src/commands.rs | 2 + src/commands/from_bson.rs | 10 +- src/commands/to_bson.rs | 231 ++++++++++++++++++++++++++++++++++++++ src/object/base.rs | 42 +++++++ tests/filters_test.rs | 11 ++ 6 files changed, 296 insertions(+), 1 deletion(-) create mode 100644 src/commands/to_bson.rs diff --git a/src/cli.rs b/src/cli.rs index 614ad28f96..b59f5634ad 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -162,6 +162,7 @@ pub async fn cli() -> Result<(), Box> { whole_stream_command(Reverse), whole_stream_command(Trim), whole_stream_command(ToArray), + whole_stream_command(ToBSON), whole_stream_command(ToCSV), whole_stream_command(ToJSON), whole_stream_command(ToTOML), diff --git a/src/commands.rs b/src/commands.rs index 9b17e9db95..d1d9297fd4 100644 --- a/src/commands.rs +++ b/src/commands.rs @@ -48,6 +48,7 @@ crate mod split_row; crate mod table; crate mod tags; crate mod to_array; +crate mod to_bson; crate mod to_csv; crate mod to_json; crate mod to_toml; @@ -104,6 +105,7 @@ crate use split_row::SplitRow; crate use table::Table; crate use tags::Tags; crate use to_array::ToArray; +crate use to_bson::ToBSON; crate use to_csv::ToCSV; crate use to_json::ToJSON; crate use to_toml::ToTOML; diff --git a/src/commands/from_bson.rs b/src/commands/from_bson.rs index 2d98e1907c..e244614ccf 100644 --- a/src/commands/from_bson.rs +++ b/src/commands/from_bson.rs @@ -58,6 +58,7 @@ fn convert_bson_value_to_nu_value(v: &Bson, tag: impl Into) -> Tagged Value::Primitive(Primitive::Int(*n as i64)).tagged(tag), Bson::I64(n) => Value::Primitive(Primitive::Int(*n as i64)).tagged(tag), Bson::JavaScriptCode(js) => { @@ -104,7 +105,14 @@ fn convert_bson_value_to_nu_value(v: &Bson, tag: impl Into) -> Tagged Value::Primitive(Primitive::String(obj_id.to_hex())).tagged(tag), + Bson::ObjectId(obj_id) => { + let mut collected = TaggedDictBuilder::new(tag); + collected.insert_tagged( + "$object_id".to_string(), + Value::Primitive(Primitive::String(obj_id.to_hex())).tagged(tag), + ); + collected.into_tagged_value() + } Bson::UtcDatetime(dt) => Value::Primitive(Primitive::Date(*dt)).tagged(tag), Bson::Symbol(s) => { let mut collected = TaggedDictBuilder::new(tag); diff --git a/src/commands/to_bson.rs b/src/commands/to_bson.rs new file mode 100644 index 0000000000..60dc1cf2c8 --- /dev/null +++ b/src/commands/to_bson.rs @@ -0,0 +1,231 @@ +use crate::commands::WholeStreamCommand; +use crate::object::{Dictionary, Primitive, Value}; +use crate::prelude::*; +use bson::{encode_document, oid::ObjectId, spec::BinarySubtype, Bson, Document}; +use std::convert::TryInto; + +pub struct ToBSON; + +impl WholeStreamCommand for ToBSON { + fn run( + &self, + args: CommandArgs, + registry: &CommandRegistry, + ) -> Result { + to_bson(args, registry) + } + + fn name(&self) -> &str { + "to-bson" + } + + fn signature(&self) -> Signature { + Signature::build("to-bson") + } +} + +pub fn value_to_bson_value(v: &Value) -> Bson { + match v { + Value::Primitive(Primitive::Boolean(b)) => Bson::Boolean(*b), + Value::Primitive(Primitive::Bytes(b)) => Bson::I64(*b as i64), + Value::Primitive(Primitive::Date(d)) => Bson::UtcDatetime(*d), + Value::Primitive(Primitive::EndOfStream) => Bson::Null, + Value::Primitive(Primitive::BeginningOfStream) => Bson::Null, + Value::Primitive(Primitive::Float(f)) => Bson::FloatingPoint(f.into_inner()), + Value::Primitive(Primitive::Int(i)) => Bson::I64(*i), + Value::Primitive(Primitive::Nothing) => Bson::Null, + Value::Primitive(Primitive::String(s)) => Bson::String(s.clone()), + Value::Primitive(Primitive::Path(s)) => Bson::String(s.display().to_string()), + Value::List(l) => Bson::Array(l.iter().map(|x| value_to_bson_value(x)).collect()), + Value::Block(_) => Bson::Null, + Value::Binary(b) => Bson::Binary(BinarySubtype::Generic, b.clone()), + Value::Object(o) => object_value_to_bson(o), + } +} + +// object_value_to_bson handles all Objects, even those that correspond to special +// types (things like regex or javascript code). +fn object_value_to_bson(o: &Dictionary) -> Bson { + let mut it = o.entries.iter(); + if it.len() > 2 { + return generic_object_value_to_bson(o); + } + match it.next() { + Some((regex, tagged_regex_value)) if regex == "$regex" => match it.next() { + Some((options, tagged_opts_value)) if options == "$options" => { + let r: Result = tagged_regex_value.try_into(); + let opts: Result = tagged_opts_value.try_into(); + if r.is_err() || opts.is_err() { + generic_object_value_to_bson(o) + } else { + Bson::RegExp(r.unwrap(), opts.unwrap()) + } + } + _ => generic_object_value_to_bson(o), + }, + Some((javascript, tagged_javascript_value)) if javascript == "$javascript" => { + match it.next() { + Some((scope, tagged_scope_value)) if scope == "$scope" => { + let js: Result = tagged_javascript_value.try_into(); + let s: Result<&Dictionary, _> = tagged_scope_value.try_into(); + if js.is_err() || s.is_err() { + generic_object_value_to_bson(o) + } else { + if let Bson::Document(doc) = object_value_to_bson(s.unwrap()) { + Bson::JavaScriptCodeWithScope(js.unwrap(), doc) + } else { + generic_object_value_to_bson(o) + } + } + } + None => { + let js: Result = tagged_javascript_value.try_into(); + if js.is_err() { + generic_object_value_to_bson(o) + } else { + Bson::JavaScriptCode(js.unwrap()) + } + } + _ => generic_object_value_to_bson(o), + } + } + Some((timestamp, tagged_timestamp_value)) if timestamp == "$timestamp" => { + let ts: Result = tagged_timestamp_value.try_into(); + if ts.is_err() { + generic_object_value_to_bson(o) + } else { + Bson::TimeStamp(ts.unwrap()) + } + } + Some((binary_subtype, tagged_binary_subtype_value)) + if binary_subtype == "$binary_subtype" => + { + match it.next() { + Some((binary, tagged_bin_value)) if binary == "$binary" => { + let bst = get_binary_subtype(tagged_binary_subtype_value); + let bin: Result, _> = tagged_bin_value.try_into(); + if bst.is_none() || bin.is_err() { + generic_object_value_to_bson(o) + } else { + Bson::Binary(bst.unwrap(), bin.unwrap()) + } + } + _ => generic_object_value_to_bson(o), + } + } + Some((object_id, tagged_object_id_value)) if object_id == "$object_id" => { + let obj_id: Result = tagged_object_id_value.try_into(); + if obj_id.is_err() { + generic_object_value_to_bson(o) + } else { + let obj_id = ObjectId::with_string(&obj_id.unwrap()); + if obj_id.is_err() { + generic_object_value_to_bson(o) + } else { + Bson::ObjectId(obj_id.unwrap()) + } + } + } + Some((symbol, tagged_symbol_value)) if symbol == "$symbol" => { + let sym: Result = tagged_symbol_value.try_into(); + if sym.is_err() { + generic_object_value_to_bson(o) + } else { + Bson::Symbol(sym.unwrap()) + } + } + _ => generic_object_value_to_bson(o), + } +} + +fn get_binary_subtype<'a>(tagged_value: &'a Tagged) -> Option { + match tagged_value.item() { + Value::Primitive(Primitive::String(s)) => Some(match s.as_ref() { + "generic" => BinarySubtype::Generic, + "function" => BinarySubtype::Function, + "binary_old" => BinarySubtype::BinaryOld, + "uuid_old" => BinarySubtype::UuidOld, + "uuid" => BinarySubtype::Uuid, + "md5" => BinarySubtype::Md5, + _ => unreachable!(), + }), + Value::Primitive(Primitive::Int(i)) => Some(BinarySubtype::UserDefined(*i as u8)), + _ => None, + } +} + +// generic_object_value_bson handles any Object that does not +// correspond to a special bson type (things like regex or javascript code). +fn generic_object_value_to_bson(o: &Dictionary) -> Bson { + let mut doc = Document::new(); + for (k, v) in o.entries.iter() { + doc.insert(k.clone(), value_to_bson_value(v)); + } + Bson::Document(doc) +} + +fn shell_encode_document( + writer: &mut Vec, + doc: Document, + span: Span, +) -> Result<(), ShellError> { + match encode_document(writer, &doc) { + Err(e) => Err(ShellError::labeled_error( + format!("Failed to encode document due to: {:?}", e), + "requires BSON-compatible document", + span, + )), + _ => Ok(()), + } +} + +fn bson_value_to_bytes(bson: Bson, span: Span) -> Result, ShellError> { + let mut out = Vec::new(); + match bson { + Bson::Array(a) => { + for v in a.into_iter() { + match v { + Bson::Document(d) => shell_encode_document(&mut out, d, span)?, + _ => { + return Err(ShellError::labeled_error( + format!("All top level values must be Documents, got {:?}", v), + "requires BSON-compatible document", + span, + )) + } + } + } + } + Bson::Document(d) => shell_encode_document(&mut out, d, span)?, + _ => { + return Err(ShellError::labeled_error( + format!("All top level values must be Documents, got {:?}", bson), + "requires BSON-compatible document", + span, + )) + } + } + Ok(out) +} + +fn to_bson(args: CommandArgs, registry: &CommandRegistry) -> Result { + let args = args.evaluate_once(registry)?; + let name_span = args.name_span(); + let out = args.input; + + Ok(out + .values + .map( + move |a| match bson_value_to_bytes(value_to_bson_value(&a), name_span) { + Ok(x) => ReturnSuccess::value(Value::Binary(x).simple_spanned(name_span)), + _ => Err(ShellError::labeled_error_with_secondary( + "Expected an object with BSON-compatible structure from pipeline", + "requires BSON-compatible input: Must be Array or Object", + name_span, + format!("{} originates from here", a.item.type_name()), + a.span(), + )), + }, + ) + .to_output_stream()) +} diff --git a/src/object/base.rs b/src/object/base.rs index 612b8f5293..5b136b3d4c 100644 --- a/src/object/base.rs +++ b/src/object/base.rs @@ -243,6 +243,48 @@ impl std::convert::TryFrom<&'a Tagged> for i64 { } } +impl std::convert::TryFrom<&'a Tagged> for String { + type Error = ShellError; + + fn try_from(value: &'a Tagged) -> Result { + match value.item() { + Value::Primitive(Primitive::String(s)) => Ok(s.clone()), + v => Err(ShellError::type_error( + "String", + value.copy_span(v.type_name()), + )), + } + } +} + +impl std::convert::TryFrom<&'a Tagged> for Vec { + type Error = ShellError; + + fn try_from(value: &'a Tagged) -> Result, ShellError> { + match value.item() { + Value::Binary(b) => Ok(b.clone()), + v => Err(ShellError::type_error( + "Binary", + value.copy_span(v.type_name()), + )), + } + } +} + +impl std::convert::TryFrom<&'a Tagged> for &'a crate::object::Dictionary { + type Error = ShellError; + + fn try_from(value: &'a Tagged) -> Result<&'a crate::object::Dictionary, ShellError> { + match value.item() { + Value::Object(d) => Ok(d), + v => Err(ShellError::type_error( + "Dictionary", + value.copy_span(v.type_name()), + )), + } + } +} + #[derive(Serialize, Deserialize)] pub enum Switch { Present, diff --git a/tests/filters_test.rs b/tests/filters_test.rs index 7baca493a3..ac68ea5fff 100644 --- a/tests/filters_test.rs +++ b/tests/filters_test.rs @@ -106,6 +106,17 @@ fn can_convert_table_to_json_text_and_from_json_text_back_into_table() { assert_eq!(output, "markup"); } +#[test] +fn can_convert_json_text_to_bson_and_back_into_table() { + nu!( + output, + cwd("tests/fixtures/formats"), + "echo '{\"root\":[{\"x\": 2, \"y\": 4}, {\"z\": \"42\"}]}' | from-json | to-bson | from-bson | get root | nth 1 | get z | echo $it" + ); + + assert_eq!(output, "42"); +} + #[test] fn can_convert_table_to_toml_text_and_from_toml_text_back_into_table() { nu!( From 738675259e21f30d869bb033b88fae364b14db4d Mon Sep 17 00:00:00 2001 From: Patrick Meredith Date: Mon, 26 Aug 2019 21:26:49 -0400 Subject: [PATCH 2/2] Improve test so that it should work on Windows --- tests/command_open_tests.rs | 4 ++-- tests/filters_test.rs | 4 ++-- tests/fixtures/formats/sample.bson | Bin 439 -> 521 bytes 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/command_open_tests.rs b/tests/command_open_tests.rs index bf33ec63f1..3f6da5adf5 100644 --- a/tests/command_open_tests.rs +++ b/tests/command_open_tests.rs @@ -28,7 +28,7 @@ fn open_can_parse_bson_1() { nu!( output, cwd("tests/fixtures/formats"), - "open sample.bson | nth 0 | get b | echo $it" + "open sample.bson | get root | nth 0 | get b | echo $it" ); assert_eq!(output, "hello"); @@ -39,7 +39,7 @@ fn open_can_parse_bson_2() { nu!( output, cwd("tests/fixtures/formats"), - "open sample.bson | nth 6 | get b | get '$binary_subtype' | echo $it " + "open sample.bson | get root | nth 6 | get b | get '$binary_subtype' | echo $it " ); assert_eq!(output, "function"); diff --git a/tests/filters_test.rs b/tests/filters_test.rs index ac68ea5fff..710f0b89a8 100644 --- a/tests/filters_test.rs +++ b/tests/filters_test.rs @@ -111,10 +111,10 @@ fn can_convert_json_text_to_bson_and_back_into_table() { nu!( output, cwd("tests/fixtures/formats"), - "echo '{\"root\":[{\"x\": 2, \"y\": 4}, {\"z\": \"42\"}]}' | from-json | to-bson | from-bson | get root | nth 1 | get z | echo $it" + "open sample.bson | to-bson | from-bson | get root | nth 1 | get b | echo $it" ); - assert_eq!(output, "42"); + assert_eq!(output, "whel"); } #[test] diff --git a/tests/fixtures/formats/sample.bson b/tests/fixtures/formats/sample.bson index 95c98eb4e1f1f8ec1517944c23045ba72b098940..951c805065e7d736a2238be45ad4f23ff9734dd5 100644 GIT binary patch delta 227 zcmdna+{vQE$;80G9-o=Q5S!96@gT?9O|5o0Cs~T}^Gg`sGBPkQ8!$}BWMVd)m|@Mt zY&3C^hCj10gBJr#PvW`z#fBBN0&f^?nUfd{f#OWb`6;Ok+(1@NYKcOHf~`WOH3M^T za(+Q7g8)#Ju@XptfPe##U^bb|%BaM|Y&uzuQOTRxjKK{k2ysr5nfJ`N%aejRwFQ|I o8Ki*Xm_{30Gcd3wfo)^VNX^N~S18Xf%1L2hHlN(YXbSWh0B{#FoB#j- delta 144 zcmeBV+0HysDRW|<)x^{4);0_b4D9ilDGafR=k6C9R@4f-VYKB>Vo(N(a|3ZsYKcOH zf~`WOHG=?<%UB5{KtRBOVX`Bm(&RKoMJsC{A7Vz5nfJ`N%aejRwFP+-QS=*IGq5Is WMHn+ub8_+(%JYkIQYK$wGz9