From 3e232a5db830f6fe8fc31140c967ea45e72850d5 Mon Sep 17 00:00:00 2001 From: JT Date: Fri, 1 Oct 2021 18:11:49 +1300 Subject: [PATCH 1/2] Add 'from json' --- Cargo.lock | 105 ++ Cargo.toml | 2 + crates/nu-command/Cargo.toml | 6 +- crates/nu-command/src/default_context.rs | 51 +- crates/nu-command/src/formats/from/command.rs | 28 + crates/nu-command/src/formats/from/json.rs | 111 ++ crates/nu-command/src/formats/from/mod.rs | 5 + crates/nu-command/src/formats/mod.rs | 3 + crates/nu-command/src/lib.rs | 2 + crates/nu-json/Cargo.toml | 24 + crates/nu-json/LICENSE | 29 + crates/nu-json/src/builder.rs | 115 ++ crates/nu-json/src/de.rs | 833 ++++++++++++ crates/nu-json/src/error.rs | 166 +++ crates/nu-json/src/lib.rs | 13 + crates/nu-json/src/ser.rs | 1020 +++++++++++++++ crates/nu-json/src/util.rs | 333 +++++ crates/nu-json/src/value.rs | 1158 +++++++++++++++++ crates/nu-json/tests/main.rs | 212 +++ crates/nu-path/Cargo.toml | 12 + crates/nu-path/README.md | 3 + crates/nu-path/src/dots.rs | 259 ++++ crates/nu-path/src/expansions.rs | 75 ++ crates/nu-path/src/lib.rs | 8 + crates/nu-path/src/tilde.rs | 85 ++ crates/nu-path/src/util.rs | 4 + crates/nu-path/tests/mod.rs | 1 + crates/nu-path/tests/util.rs | 45 + crates/nu-protocol/Cargo.toml | 1 + crates/nu-protocol/src/ast/operator.rs | 5 +- crates/nu-protocol/src/shell_error.rs | 3 +- crates/nu-protocol/src/span.rs | 3 +- crates/nu-protocol/src/ty.rs | 4 +- crates/nu-protocol/src/value/mod.rs | 3 +- crates/nu-protocol/src/value/range.rs | 3 +- crates/nu-protocol/src/value/stream.rs | 20 + src/tests.rs | 14 + 37 files changed, 4722 insertions(+), 42 deletions(-) create mode 100644 crates/nu-command/src/formats/from/command.rs create mode 100644 crates/nu-command/src/formats/from/json.rs create mode 100644 crates/nu-command/src/formats/from/mod.rs create mode 100644 crates/nu-command/src/formats/mod.rs create mode 100644 crates/nu-json/Cargo.toml create mode 100644 crates/nu-json/LICENSE create mode 100644 crates/nu-json/src/builder.rs create mode 100644 crates/nu-json/src/de.rs create mode 100644 crates/nu-json/src/error.rs create mode 100644 crates/nu-json/src/lib.rs create mode 100644 crates/nu-json/src/ser.rs create mode 100644 crates/nu-json/src/util.rs create mode 100644 crates/nu-json/src/value.rs create mode 100644 crates/nu-json/tests/main.rs create mode 100644 crates/nu-path/Cargo.toml create mode 100644 crates/nu-path/README.md create mode 100644 crates/nu-path/src/dots.rs create mode 100644 crates/nu-path/src/expansions.rs create mode 100644 crates/nu-path/src/lib.rs create mode 100644 crates/nu-path/src/tilde.rs create mode 100644 crates/nu-path/src/util.rs create mode 100644 crates/nu-path/tests/mod.rs create mode 100644 crates/nu-path/tests/util.rs diff --git a/Cargo.lock b/Cargo.lock index 37aa2c504..d18a075ac 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -171,12 +171,39 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8" +[[package]] +name = "dirs-next" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b98cf8ebf19c3d1b223e151f99a4f9f0690dca41414773390fc824184ac833e1" +dependencies = [ + "cfg-if", + "dirs-sys-next", +] + +[[package]] +name = "dirs-sys-next" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d" +dependencies = [ + "libc", + "redox_users", + "winapi", +] + [[package]] name = "doc-comment" version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" +[[package]] +name = "dunce" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "453440c271cf5577fd2a40e4942540cb7d0d2f85e27c8d07dd0023c925a67541" + [[package]] name = "either" version = "1.6.1" @@ -192,7 +219,9 @@ dependencies = [ "nu-cli", "nu-command", "nu-engine", + "nu-json", "nu-parser", + "nu-path", "nu-protocol", "nu-table", "pretty_assertions", @@ -256,6 +285,12 @@ dependencies = [ "either", ] +[[package]] +name = "itoa" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" + [[package]] name = "lazy_static" version = "1.4.0" @@ -268,6 +303,16 @@ version = "0.2.102" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2a5ac8f984bfcf3a823267e5fde638acc3325f6496633a5da6bb6eb2171e103" +[[package]] +name = "linked-hash-map" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fb9b38af92608140b86b693604b9ffcc5824240a484d1ecd4795bacb2fe88f3" +dependencies = [ + "serde", + "serde_test", +] + [[package]] name = "lock_api" version = "0.4.5" @@ -393,8 +438,10 @@ version = "0.1.0" dependencies = [ "glob", "nu-engine", + "nu-json", "nu-protocol", "nu-table", + "thiserror", ] [[package]] @@ -405,6 +452,19 @@ dependencies = [ "nu-protocol", ] +[[package]] +name = "nu-json" +version = "0.37.1" +dependencies = [ + "lazy_static", + "linked-hash-map", + "nu-path", + "num-traits", + "regex", + "serde", + "serde_json", +] + [[package]] name = "nu-parser" version = "0.1.0" @@ -414,11 +474,20 @@ dependencies = [ "thiserror", ] +[[package]] +name = "nu-path" +version = "0.37.1" +dependencies = [ + "dirs-next", + "dunce", +] + [[package]] name = "nu-protocol" version = "0.1.0" dependencies = [ "miette", + "serde", "thiserror", ] @@ -623,6 +692,16 @@ dependencies = [ "bitflags", ] +[[package]] +name = "redox_users" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "528532f3d801c87aec9def2add9ca802fe569e44a544afe633765267840abe64" +dependencies = [ + "getrandom", + "redox_syscall", +] + [[package]] name = "reedline" version = "0.2.0" @@ -674,6 +753,12 @@ version = "0.1.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ef03e0a2b150c7a90d01faf6254c9c48a41e95fb2a8c2ac1c6f0d2b9aefc342" +[[package]] +name = "ryu" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e" + [[package]] name = "scopeguard" version = "1.1.0" @@ -700,6 +785,26 @@ dependencies = [ "syn", ] +[[package]] +name = "serde_json" +version = "1.0.68" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f690853975602e1bfe1ccbf50504d67174e3bcf340f23b5ea9992e0587a52d8" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "serde_test" +version = "1.0.130" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d82178225dbdeae2d5d190e8649287db6a3a32c6d24da22ae3146325aa353e4c" +dependencies = [ + "serde", +] + [[package]] name = "signal-hook" version = "0.3.10" diff --git a/Cargo.toml b/Cargo.toml index dd6206c3d..5ab8b8831 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,7 +13,9 @@ reedline = { git = "https://github.com/jntrnr/reedline", branch = "main" } nu-cli = { path="./crates/nu-cli" } nu-command = { path="./crates/nu-command" } nu-engine = { path="./crates/nu-engine" } +nu-json = { path="./crates/nu-json" } nu-parser = { path="./crates/nu-parser" } +nu-path = { path="./crates/nu-path" } nu-protocol = { path = "./crates/nu-protocol" } nu-table = { path = "./crates/nu-table" } miette = "3.0.0" diff --git a/crates/nu-command/Cargo.toml b/crates/nu-command/Cargo.toml index 574848b3a..892f1aef8 100644 --- a/crates/nu-command/Cargo.toml +++ b/crates/nu-command/Cargo.toml @@ -6,9 +6,11 @@ edition = "2018" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -nu-protocol = { path = "../nu-protocol" } nu-engine = { path = "../nu-engine" } +nu-json = { path = "../nu-json" } +nu-protocol = { path = "../nu-protocol" } nu-table = { path = "../nu-table" } # Potential dependencies for extras -glob = "0.3.0" \ No newline at end of file +glob = "0.3.0" +thiserror = "1.0.29" \ No newline at end of file diff --git a/crates/nu-command/src/default_context.rs b/crates/nu-command/src/default_context.rs index 618ca1c58..fa2e17bdc 100644 --- a/crates/nu-command/src/default_context.rs +++ b/crates/nu-command/src/default_context.rs @@ -6,8 +6,8 @@ use nu_protocol::{ }; use crate::{ - Alias, Benchmark, BuildString, Def, Do, Each, External, For, Git, GitCheckout, If, Length, Let, - LetEnv, Lines, ListGitBranches, Ls, Module, Table, Use, Where, + Alias, Benchmark, BuildString, Def, Do, Each, External, For, From, FromJson, Git, GitCheckout, + If, Length, Let, LetEnv, Lines, ListGitBranches, Ls, Module, Table, Use, Where, }; pub fn create_default_context() -> Rc> { @@ -20,41 +20,26 @@ pub fn create_default_context() -> Rc> { Signature::build("where").required("cond", SyntaxShape::RowCondition, "condition"); working_set.add_decl(sig.predeclare()); - working_set.add_decl(Box::new(If)); - - working_set.add_decl(Box::new(Let)); - - working_set.add_decl(Box::new(LetEnv)); - working_set.add_decl(Box::new(Alias)); - - working_set.add_decl(Box::new(BuildString)); - - working_set.add_decl(Box::new(Def)); - - working_set.add_decl(Box::new(For)); - - working_set.add_decl(Box::new(Each)); - - working_set.add_decl(Box::new(Where)); - - working_set.add_decl(Box::new(Do)); - working_set.add_decl(Box::new(Benchmark)); - - working_set.add_decl(Box::new(Length)); - - working_set.add_decl(Box::new(Ls)); - - working_set.add_decl(Box::new(Module)); - - working_set.add_decl(Box::new(Use)); - - working_set.add_decl(Box::new(Table)); - + working_set.add_decl(Box::new(BuildString)); + working_set.add_decl(Box::new(Def)); + working_set.add_decl(Box::new(Do)); + working_set.add_decl(Box::new(Each)); working_set.add_decl(Box::new(External)); - + working_set.add_decl(Box::new(For)); + working_set.add_decl(Box::new(From)); + working_set.add_decl(Box::new(FromJson)); + working_set.add_decl(Box::new(If)); + working_set.add_decl(Box::new(Length)); + working_set.add_decl(Box::new(Let)); + working_set.add_decl(Box::new(LetEnv)); working_set.add_decl(Box::new(Lines)); + working_set.add_decl(Box::new(Ls)); + working_set.add_decl(Box::new(Module)); + working_set.add_decl(Box::new(Table)); + working_set.add_decl(Box::new(Use)); + working_set.add_decl(Box::new(Where)); // This is a WIP proof of concept working_set.add_decl(Box::new(ListGitBranches)); diff --git a/crates/nu-command/src/formats/from/command.rs b/crates/nu-command/src/formats/from/command.rs new file mode 100644 index 000000000..cc2b6b2da --- /dev/null +++ b/crates/nu-command/src/formats/from/command.rs @@ -0,0 +1,28 @@ +use nu_protocol::ast::Call; +use nu_protocol::engine::{Command, EvaluationContext}; +use nu_protocol::{ShellError, Signature, Value}; + +pub struct From; + +impl Command for From { + fn name(&self) -> &str { + "from" + } + + fn usage(&self) -> &str { + "Parse a string or binary data into structured data" + } + + fn signature(&self) -> nu_protocol::Signature { + Signature::build("from") + } + + fn run( + &self, + _context: &EvaluationContext, + _call: &Call, + _input: Value, + ) -> Result { + Ok(Value::nothing()) + } +} diff --git a/crates/nu-command/src/formats/from/json.rs b/crates/nu-command/src/formats/from/json.rs new file mode 100644 index 000000000..a7e01036a --- /dev/null +++ b/crates/nu-command/src/formats/from/json.rs @@ -0,0 +1,111 @@ +use nu_protocol::ast::Call; +use nu_protocol::engine::{Command, EvaluationContext}; +use nu_protocol::{IntoValueStream, ShellError, Signature, Span, Value}; + +pub struct FromJson; + +impl Command for FromJson { + fn name(&self) -> &str { + "from json" + } + + fn usage(&self) -> &str { + "Convert from json to structured data" + } + + fn signature(&self) -> nu_protocol::Signature { + Signature::build("from json").switch( + "objects", + "treat each line as a separate value", + Some('o'), + ) + } + + fn run( + &self, + _context: &EvaluationContext, + call: &Call, + input: Value, + ) -> Result { + let span = input.span(); + let mut string_input = input.into_string(); + string_input.push('\n'); + + // TODO: turn this into a structured underline of the nu_json error + if call.has_flag("objects") { + #[allow(clippy::needless_collect)] + let lines: Vec = string_input.lines().map(|x| x.to_string()).collect(); + Ok(Value::Stream { + stream: lines + .into_iter() + .map(move |mut x| { + x.push('\n'); + match convert_string_to_value(x, span) { + Ok(v) => v, + Err(error) => Value::Error { error }, + } + }) + .into_value_stream(), + span, + }) + } else { + convert_string_to_value(string_input, span) + } + } +} + +fn convert_nujson_to_value(value: &nu_json::Value, span: Span) -> Value { + match value { + nu_json::Value::Array(array) => { + let v: Vec = array + .iter() + .map(|x| convert_nujson_to_value(x, span)) + .collect(); + + Value::List { vals: v, span } + } + nu_json::Value::Bool(b) => Value::Bool { val: *b, span }, + nu_json::Value::F64(f) => Value::Float { val: *f, span }, + nu_json::Value::I64(i) => Value::Int { val: *i, span }, + nu_json::Value::Null => Value::Nothing { span }, + nu_json::Value::Object(k) => { + let mut cols = vec![]; + let mut vals = vec![]; + + for item in k { + cols.push(item.0.clone()); + vals.push(convert_nujson_to_value(item.1, span)); + } + + Value::Record { cols, vals, span } + } + nu_json::Value::U64(u) => { + if *u > i64::MAX as u64 { + Value::Error { + error: ShellError::CantConvert("i64 sized integer".into(), span), + } + } else { + Value::Int { + val: *u as i64, + span, + } + } + } + nu_json::Value::String(s) => Value::String { + val: s.clone(), + span, + }, + } +} + +fn convert_string_to_value(string_input: String, span: Span) -> Result { + let result: Result = nu_json::from_str(&string_input); + match result { + Ok(value) => Ok(convert_nujson_to_value(&value, span)), + + Err(_x) => Err(ShellError::CantConvert( + "structured data from json".into(), + span, + )), + } +} diff --git a/crates/nu-command/src/formats/from/mod.rs b/crates/nu-command/src/formats/from/mod.rs new file mode 100644 index 000000000..78251e562 --- /dev/null +++ b/crates/nu-command/src/formats/from/mod.rs @@ -0,0 +1,5 @@ +mod command; +mod json; + +pub use command::From; +pub use json::FromJson; diff --git a/crates/nu-command/src/formats/mod.rs b/crates/nu-command/src/formats/mod.rs new file mode 100644 index 000000000..ad1e00563 --- /dev/null +++ b/crates/nu-command/src/formats/mod.rs @@ -0,0 +1,3 @@ +mod from; + +pub use from::*; diff --git a/crates/nu-command/src/lib.rs b/crates/nu-command/src/lib.rs index 0bfddbcaa..da1fd1a68 100644 --- a/crates/nu-command/src/lib.rs +++ b/crates/nu-command/src/lib.rs @@ -4,6 +4,7 @@ mod env; mod experimental; mod filesystem; mod filters; +mod formats; mod strings; mod system; mod viewers; @@ -14,6 +15,7 @@ pub use env::*; pub use experimental::*; pub use filesystem::*; pub use filters::*; +pub use formats::*; pub use strings::*; pub use system::*; pub use viewers::*; diff --git a/crates/nu-json/Cargo.toml b/crates/nu-json/Cargo.toml new file mode 100644 index 000000000..6f2876452 --- /dev/null +++ b/crates/nu-json/Cargo.toml @@ -0,0 +1,24 @@ +[package] +authors = ["The Nu Project Contributors", "Christian Zangl "] +description = "Fork of serde-hjson" +edition = "2018" +license = "MIT" +name = "nu-json" +version = "0.37.1" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[features] +preserve_order = ["linked-hash-map", "linked-hash-map/serde_impl"] +default = ["preserve_order"] + +[dependencies] +serde = "1.0" +num-traits = "0.2.14" +regex = "^1.0" +lazy_static = "1" +linked-hash-map = { version="0.5", optional=true } + +[dev-dependencies] +nu-path = { version = "0.37.1", path="../nu-path" } +serde_json = "1.0.39" diff --git a/crates/nu-json/LICENSE b/crates/nu-json/LICENSE new file mode 100644 index 000000000..e6fee54fa --- /dev/null +++ b/crates/nu-json/LICENSE @@ -0,0 +1,29 @@ +The MIT License (MIT) + +Copyright (c) 2014 The Rust Project Developers +Copyright (c) 2016 Christian Zangl +Copyright (c) 2020 The Nu Project Contributors + +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/crates/nu-json/src/builder.rs b/crates/nu-json/src/builder.rs new file mode 100644 index 000000000..a69eee28c --- /dev/null +++ b/crates/nu-json/src/builder.rs @@ -0,0 +1,115 @@ +use serde::ser; + +use crate::value::{self, Map, Value}; + +/// This structure provides a simple interface for constructing a JSON array. +pub struct ArrayBuilder { + array: Vec, +} + +impl Default for ArrayBuilder { + fn default() -> Self { + Self::new() + } +} + +impl ArrayBuilder { + /// Construct an `ObjectBuilder`. + pub fn new() -> ArrayBuilder { + ArrayBuilder { array: Vec::new() } + } + + /// Return the constructed `Value`. + pub fn unwrap(self) -> Value { + Value::Array(self.array) + } + + /// Insert a value into the array. + pub fn push(mut self, v: T) -> ArrayBuilder { + self.array + .push(value::to_value(&v).expect("failed to serialize")); + self + } + + /// Creates and passes an `ArrayBuilder` into a closure, then inserts the resulting array into + /// this array. + pub fn push_array(mut self, f: F) -> ArrayBuilder + where + F: FnOnce(ArrayBuilder) -> ArrayBuilder, + { + let builder = ArrayBuilder::new(); + self.array.push(f(builder).unwrap()); + self + } + + /// Creates and passes an `ArrayBuilder` into a closure, then inserts the resulting object into + /// this array. + pub fn push_object(mut self, f: F) -> ArrayBuilder + where + F: FnOnce(ObjectBuilder) -> ObjectBuilder, + { + let builder = ObjectBuilder::new(); + self.array.push(f(builder).unwrap()); + self + } +} + +/// This structure provides a simple interface for constructing a JSON object. +pub struct ObjectBuilder { + object: Map, +} + +impl Default for ObjectBuilder { + fn default() -> Self { + Self::new() + } +} + +impl ObjectBuilder { + /// Construct an `ObjectBuilder`. + pub fn new() -> ObjectBuilder { + ObjectBuilder { object: Map::new() } + } + + /// Return the constructed `Value`. + pub fn unwrap(self) -> Value { + Value::Object(self.object) + } + + /// Insert a key-value pair into the object. + pub fn insert(mut self, key: S, value: V) -> ObjectBuilder + where + S: Into, + V: ser::Serialize, + { + self.object.insert( + key.into(), + value::to_value(&value).expect("failed to serialize"), + ); + self + } + + /// Creates and passes an `ObjectBuilder` into a closure, then inserts the resulting array into + /// this object. + pub fn insert_array(mut self, key: S, f: F) -> ObjectBuilder + where + S: Into, + F: FnOnce(ArrayBuilder) -> ArrayBuilder, + { + let builder = ArrayBuilder::new(); + self.object.insert(key.into(), f(builder).unwrap()); + self + } + + /// Creates and passes an `ObjectBuilder` into a closure, then inserts the resulting object into + /// this object. + pub fn insert_object(mut self, key: S, f: F) -> ObjectBuilder + where + S: Into, + F: FnOnce(ObjectBuilder) -> ObjectBuilder, + { + let builder = ObjectBuilder::new(); + self.object.insert(key.into(), f(builder).unwrap()); + self + } +} diff --git a/crates/nu-json/src/de.rs b/crates/nu-json/src/de.rs new file mode 100644 index 000000000..430aa68b0 --- /dev/null +++ b/crates/nu-json/src/de.rs @@ -0,0 +1,833 @@ +//! Hjson Deserialization +//! +//! This module provides for Hjson deserialization with the type `Deserializer`. + +use std::char; +use std::io; +use std::marker::PhantomData; +use std::str; + +use serde::de; + +use super::error::{Error, ErrorCode, Result}; +use super::util::StringReader; +use super::util::{Number, ParseNumber}; + +enum State { + Normal, + Root, + Keyname, +} + +/// A structure that deserializes Hjson into Rust values. +pub struct Deserializer> { + rdr: StringReader, + str_buf: Vec, + state: State, +} + +// macro_rules! try_or_invalid { +// ($self_:expr, $e:expr) => { +// match $e { +// Some(v) => v, +// None => { return Err($self_.error(ErrorCode::InvalidNumber)); } +// } +// } +// } + +impl Deserializer +where + Iter: Iterator, +{ + /// Creates the Hjson parser from an `std::iter::Iterator`. + #[inline] + pub fn new(rdr: Iter) -> Deserializer { + Deserializer { + rdr: StringReader::new(rdr), + str_buf: Vec::with_capacity(128), + state: State::Normal, + } + } + + /// Creates the Hjson parser from an `std::iter::Iterator`. + #[inline] + pub fn new_for_root(rdr: Iter) -> Deserializer { + let mut res = Deserializer::new(rdr); + res.state = State::Root; + res + } + + /// The `Deserializer::end` method should be called after a value has been fully deserialized. + /// This allows the `Deserializer` to validate that the input stream is at the end or that it + /// only has trailing whitespace. + #[inline] + pub fn end(&mut self) -> Result<()> { + self.rdr.parse_whitespace()?; + if self.rdr.eof()? { + Ok(()) + } else { + Err(self.rdr.error(ErrorCode::TrailingCharacters)) + } + } + + fn is_punctuator_char(&mut self, ch: u8) -> bool { + matches!(ch, b'{' | b'}' | b'[' | b']' | b',' | b':') + } + + fn parse_keyname<'de, V>(&mut self, visitor: V) -> Result + where + V: de::Visitor<'de>, + { + // quotes for keys are optional in Hjson + // unless they include {}[],: or whitespace. + // assume whitespace was already eaten + + self.str_buf.clear(); + + let mut space: Option = None; + loop { + let ch = self.rdr.next_char_or_null()?; + + if ch == b':' { + if self.str_buf.is_empty() { + return Err(self.rdr.error(ErrorCode::Custom( + "Found ':' but no key name (for an empty key name use quotes)".to_string(), + ))); + } else if space.is_some() + && space.expect("Internal error: json parsing") != self.str_buf.len() + { + return Err(self.rdr.error(ErrorCode::Custom( + "Found whitespace in your key name (use quotes to include)".to_string(), + ))); + } + self.rdr.uneat_char(ch); + let s = str::from_utf8(&self.str_buf).expect("Internal error: json parsing"); + return visitor.visit_str(s); + } else if ch <= b' ' { + if ch == 0 { + return Err(self.rdr.error(ErrorCode::EofWhileParsingObject)); + } else if space.is_none() { + space = Some(self.str_buf.len()); + } + } else if self.is_punctuator_char(ch) { + return Err(self.rdr.error(ErrorCode::Custom("Found a punctuator where a key name was expected (check your syntax or use quotes if the key name includes {}[],: or whitespace)".to_string()))); + } else { + self.str_buf.push(ch); + } + } + } + + fn parse_value<'de, V>(&mut self, visitor: V) -> Result + where + V: de::Visitor<'de>, + { + self.rdr.parse_whitespace()?; + + if self.rdr.eof()? { + return Err(self.rdr.error(ErrorCode::EofWhileParsingValue)); + } + + match self.state { + State::Keyname => { + self.state = State::Normal; + return self.parse_keyname(visitor); + } + State::Root => { + self.state = State::Normal; + return self.visit_map(true, visitor); + } + _ => {} + } + + match self.rdr.peek_or_null()? { + /* + b'-' => { + self.rdr.eat_char(); + self.parse_integer(false, visitor) + } + b'0' ... b'9' => { + self.parse_integer(true, visitor) + } + */ + b'"' => { + self.rdr.eat_char(); + self.parse_string()?; + let s = str::from_utf8(&self.str_buf).expect("Internal error: json parsing"); + visitor.visit_str(s) + } + b'[' => { + self.rdr.eat_char(); + let ret = visitor.visit_seq(SeqVisitor::new(self))?; + self.rdr.parse_whitespace()?; + match self.rdr.next_char()? { + Some(b']') => Ok(ret), + Some(_) => Err(self.rdr.error(ErrorCode::TrailingCharacters)), + None => Err(self.rdr.error(ErrorCode::EofWhileParsingList)), + } + } + b'{' => { + self.rdr.eat_char(); + self.visit_map(false, visitor) + } + b'\x00' => Err(self.rdr.error(ErrorCode::ExpectedSomeValue)), + _ => self.parse_tfnns(visitor), + } + } + + fn visit_map<'de, V>(&mut self, root: bool, visitor: V) -> Result + where + V: de::Visitor<'de>, + { + let ret = visitor.visit_map(MapVisitor::new(self, root))?; + self.rdr.parse_whitespace()?; + match self.rdr.next_char()? { + Some(b'}') => { + if !root { + Ok(ret) + } else { + Err(self.rdr.error(ErrorCode::TrailingCharacters)) + } // todo + } + Some(_) => Err(self.rdr.error(ErrorCode::TrailingCharacters)), + None => { + if root { + Ok(ret) + } else { + Err(self.rdr.error(ErrorCode::EofWhileParsingObject)) + } + } + } + } + + fn parse_ident(&mut self, ident: &[u8]) -> Result<()> { + for c in ident { + if Some(*c) != self.rdr.next_char()? { + return Err(self.rdr.error(ErrorCode::ExpectedSomeIdent)); + } + } + + Ok(()) + } + + fn parse_tfnns<'de, V>(&mut self, visitor: V) -> Result + where + V: de::Visitor<'de>, + { + // Hjson strings can be quoteless + // returns string, true, false, or null. + self.str_buf.clear(); + + let first = self.rdr.peek()?.expect("Internal error: json parsing"); + + if self.is_punctuator_char(first) { + return Err(self.rdr.error(ErrorCode::PunctuatorInQlString)); + } + + loop { + let ch = self.rdr.next_char_or_null()?; + + let is_eol = ch == b'\r' || ch == b'\n' || ch == b'\x00'; + let is_comment = ch == b'#' + || if ch == b'/' { + let next = self.rdr.peek_or_null()?; + next == b'/' || next == b'*' + } else { + false + }; + if is_eol || is_comment || ch == b',' || ch == b'}' || ch == b']' { + let chf = self.str_buf[0]; + match chf { + b'f' => { + if str::from_utf8(&self.str_buf) + .expect("Internal error: json parsing") + .trim() + == "false" + { + self.rdr.uneat_char(ch); + return visitor.visit_bool(false); + } + } + b'n' => { + if str::from_utf8(&self.str_buf) + .expect("Internal error: json parsing") + .trim() + == "null" + { + self.rdr.uneat_char(ch); + return visitor.visit_unit(); + } + } + b't' => { + if str::from_utf8(&self.str_buf) + .expect("Internal error: json parsing") + .trim() + == "true" + { + self.rdr.uneat_char(ch); + return visitor.visit_bool(true); + } + } + _ => { + if chf == b'-' || (b'0'..=b'9').contains(&chf) { + let mut pn = ParseNumber::new(self.str_buf.iter().copied()); + match pn.parse(false) { + Ok(Number::F64(v)) => { + self.rdr.uneat_char(ch); + return visitor.visit_f64(v); + } + Ok(Number::U64(v)) => { + self.rdr.uneat_char(ch); + return visitor.visit_u64(v); + } + Ok(Number::I64(v)) => { + self.rdr.uneat_char(ch); + return visitor.visit_i64(v); + } + Err(_) => {} // not a number, continue + } + } + } + } + if is_eol { + // remove any whitespace at the end (ignored in quoteless strings) + return visitor.visit_str( + str::from_utf8(&self.str_buf) + .expect("Internal error: json parsing") + .trim(), + ); + } + } + self.str_buf.push(ch); + + if self.str_buf == b"'''" { + return self.parse_ml_string(visitor); + } + } + } + + fn decode_hex_escape(&mut self) -> Result { + let mut i = 0; + let mut n = 0u16; + while i < 4 && !self.rdr.eof()? { + n = match self.rdr.next_char_or_null()? { + c @ b'0'..=b'9' => n * 16_u16 + ((c as u16) - (b'0' as u16)), + b'a' | b'A' => n * 16_u16 + 10_u16, + b'b' | b'B' => n * 16_u16 + 11_u16, + b'c' | b'C' => n * 16_u16 + 12_u16, + b'd' | b'D' => n * 16_u16 + 13_u16, + b'e' | b'E' => n * 16_u16 + 14_u16, + b'f' | b'F' => n * 16_u16 + 15_u16, + _ => { + return Err(self.rdr.error(ErrorCode::InvalidEscape)); + } + }; + + i += 1; + } + + // Error out if we didn't parse 4 digits. + if i != 4 { + return Err(self.rdr.error(ErrorCode::InvalidEscape)); + } + + Ok(n) + } + + fn ml_skip_white(&mut self) -> Result { + match self.rdr.peek_or_null()? { + b' ' | b'\t' | b'\r' => { + self.rdr.eat_char(); + Ok(true) + } + _ => Ok(false), + } + } + + fn ml_skip_indent(&mut self, indent: usize) -> Result<()> { + let mut skip = indent; + while self.ml_skip_white()? && skip > 0 { + skip -= 1; + } + Ok(()) + } + + fn parse_ml_string<'de, V>(&mut self, visitor: V) -> Result + where + V: de::Visitor<'de>, + { + self.str_buf.clear(); + + // Parse a multiline string value. + let mut triple = 0; + + // we are at ''' +1 - get indent + let (_, col) = self.rdr.pos(); + let indent = col - 4; + + // skip white/to (newline) + while self.ml_skip_white()? {} + if self.rdr.peek_or_null()? == b'\n' { + self.rdr.eat_char(); + self.ml_skip_indent(indent)?; + } + + // When parsing multiline string values, we must look for ' characters. + loop { + if self.rdr.eof()? { + return Err(self.rdr.error(ErrorCode::EofWhileParsingString)); + } // todo error("Bad multiline string"); + let ch = self.rdr.next_char_or_null()?; + + if ch == b'\'' { + triple += 1; + if triple == 3 { + if self.str_buf.last() == Some(&b'\n') { + self.str_buf.pop(); + } + let res = str::from_utf8(&self.str_buf).expect("Internal error: json parsing"); + //todo if (self.str_buf.slice(-1) === '\n') self.str_buf=self.str_buf.slice(0, -1); // remove last EOL + return visitor.visit_str(res); + } else { + continue; + } + } + + while triple > 0 { + self.str_buf.push(b'\''); + triple -= 1; + } + + if ch != b'\r' { + self.str_buf.push(ch); + } + if ch == b'\n' { + self.ml_skip_indent(indent)?; + } + } + } + + fn parse_string(&mut self) -> Result<()> { + self.str_buf.clear(); + + loop { + let ch = match self.rdr.next_char()? { + Some(ch) => ch, + None => { + return Err(self.rdr.error(ErrorCode::EofWhileParsingString)); + } + }; + + match ch { + b'"' => { + return Ok(()); + } + b'\\' => { + let ch = match self.rdr.next_char()? { + Some(ch) => ch, + None => { + return Err(self.rdr.error(ErrorCode::EofWhileParsingString)); + } + }; + + match ch { + b'"' => self.str_buf.push(b'"'), + b'\\' => self.str_buf.push(b'\\'), + b'/' => self.str_buf.push(b'/'), + b'b' => self.str_buf.push(b'\x08'), + b'f' => self.str_buf.push(b'\x0c'), + b'n' => self.str_buf.push(b'\n'), + b'r' => self.str_buf.push(b'\r'), + b't' => self.str_buf.push(b'\t'), + b'u' => { + let c = match self.decode_hex_escape()? { + 0xDC00..=0xDFFF => { + return Err(self + .rdr + .error(ErrorCode::LoneLeadingSurrogateInHexEscape)); + } + + // Non-BMP characters are encoded as a sequence of + // two hex escapes, representing UTF-16 surrogates. + n1 @ 0xD800..=0xDBFF => { + match (self.rdr.next_char()?, self.rdr.next_char()?) { + (Some(b'\\'), Some(b'u')) => (), + _ => { + return Err(self + .rdr + .error(ErrorCode::UnexpectedEndOfHexEscape)); + } + } + + let n2 = self.decode_hex_escape()?; + + if !(0xDC00..=0xDFFF).contains(&n2) { + return Err(self + .rdr + .error(ErrorCode::LoneLeadingSurrogateInHexEscape)); + } + + let n = (((n1 - 0xD800) as u32) << 10 | (n2 - 0xDC00) as u32) + + 0x1_0000; + + match char::from_u32(n as u32) { + Some(c) => c, + None => { + return Err(self + .rdr + .error(ErrorCode::InvalidUnicodeCodePoint)); + } + } + } + + n => match char::from_u32(n as u32) { + Some(c) => c, + None => { + return Err(self + .rdr + .error(ErrorCode::InvalidUnicodeCodePoint)); + } + }, + }; + + self.str_buf.extend(c.encode_utf8(&mut [0; 4]).as_bytes()); + } + _ => { + return Err(self.rdr.error(ErrorCode::InvalidEscape)); + } + } + } + ch => { + self.str_buf.push(ch); + } + } + } + } + + fn parse_object_colon(&mut self) -> Result<()> { + self.rdr.parse_whitespace()?; + + match self.rdr.next_char()? { + Some(b':') => Ok(()), + Some(_) => Err(self.rdr.error(ErrorCode::ExpectedColon)), + None => Err(self.rdr.error(ErrorCode::EofWhileParsingObject)), + } + } +} + +impl<'de, 'a, Iter> de::Deserializer<'de> for &'a mut Deserializer +where + Iter: Iterator, +{ + type Error = Error; + + #[inline] + fn deserialize_any(self, visitor: V) -> Result + where + V: de::Visitor<'de>, + { + if let State::Root = self.state {} + + self.parse_value(visitor) + } + + /// Parses a `null` as a None, and any other values as a `Some(...)`. + #[inline] + fn deserialize_option(self, visitor: V) -> Result + where + V: de::Visitor<'de>, + { + self.rdr.parse_whitespace()?; + + match self.rdr.peek_or_null()? { + b'n' => { + self.rdr.eat_char(); + self.parse_ident(b"ull")?; + visitor.visit_none() + } + _ => visitor.visit_some(self), + } + } + + /// Parses a newtype struct as the underlying value. + #[inline] + fn deserialize_newtype_struct(self, _name: &str, visitor: V) -> Result + where + V: de::Visitor<'de>, + { + visitor.visit_newtype_struct(self) + } + + serde::forward_to_deserialize_any! { + bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string + bytes byte_buf unit unit_struct seq tuple map + tuple_struct struct enum identifier ignored_any + } +} + +struct SeqVisitor<'a, Iter: 'a + Iterator> { + de: &'a mut Deserializer, +} + +impl<'a, Iter: Iterator> SeqVisitor<'a, Iter> { + fn new(de: &'a mut Deserializer) -> Self { + SeqVisitor { de } + } +} + +impl<'de, 'a, Iter> de::SeqAccess<'de> for SeqVisitor<'a, Iter> +where + Iter: Iterator, +{ + type Error = Error; + + fn next_element_seed(&mut self, seed: T) -> Result> + where + T: de::DeserializeSeed<'de>, + { + self.de.rdr.parse_whitespace()?; + + match self.de.rdr.peek()? { + Some(b']') => { + return Ok(None); + } + Some(_) => {} + None => { + return Err(self.de.rdr.error(ErrorCode::EofWhileParsingList)); + } + } + + let value = seed.deserialize(&mut *self.de)?; + + // in Hjson the comma is optional and trailing commas are allowed + self.de.rdr.parse_whitespace()?; + if self.de.rdr.peek()? == Some(b',') { + self.de.rdr.eat_char(); + self.de.rdr.parse_whitespace()?; + } + + Ok(Some(value)) + } +} + +struct MapVisitor<'a, Iter: 'a + Iterator> { + de: &'a mut Deserializer, + first: bool, + root: bool, +} + +impl<'a, Iter: Iterator> MapVisitor<'a, Iter> { + fn new(de: &'a mut Deserializer, root: bool) -> Self { + MapVisitor { + de, + first: true, + root, + } + } +} + +impl<'de, 'a, Iter> de::MapAccess<'de> for MapVisitor<'a, Iter> +where + Iter: Iterator, +{ + type Error = Error; + + fn next_key_seed(&mut self, seed: K) -> Result> + where + K: de::DeserializeSeed<'de>, + { + self.de.rdr.parse_whitespace()?; + + if self.first { + self.first = false; + } else if self.de.rdr.peek()? == Some(b',') { + // in Hjson the comma is optional and trailing commas are allowed + self.de.rdr.eat_char(); + self.de.rdr.parse_whitespace()?; + } + + match self.de.rdr.peek()? { + Some(b'}') => return Ok(None), // handled later for root + Some(_) => {} + None => { + if self.root { + return Ok(None); + } else { + return Err(self.de.rdr.error(ErrorCode::EofWhileParsingObject)); + } + } + } + + match self.de.rdr.peek()? { + Some(ch) => { + self.de.state = if ch == b'"' { + State::Normal + } else { + State::Keyname + }; + Ok(Some(seed.deserialize(&mut *self.de)?)) + } + None => Err(self.de.rdr.error(ErrorCode::EofWhileParsingValue)), + } + } + + fn next_value_seed(&mut self, seed: V) -> Result + where + V: de::DeserializeSeed<'de>, + { + self.de.parse_object_colon()?; + + seed.deserialize(&mut *self.de) + } +} + +impl<'de, 'a, Iter> de::VariantAccess<'de> for &'a mut Deserializer +where + Iter: Iterator, +{ + type Error = Error; + + fn unit_variant(self) -> Result<()> { + de::Deserialize::deserialize(self) + } + + fn newtype_variant_seed(self, seed: T) -> Result + where + T: de::DeserializeSeed<'de>, + { + seed.deserialize(self) + } + + fn tuple_variant(self, _len: usize, visitor: V) -> Result + where + V: de::Visitor<'de>, + { + de::Deserializer::deserialize_any(self, visitor) + } + + fn struct_variant(self, _fields: &'static [&'static str], visitor: V) -> Result + where + V: de::Visitor<'de>, + { + de::Deserializer::deserialize_any(self, visitor) + } +} + +////////////////////////////////////////////////////////////////////////////// + +/// Iterator that deserializes a stream into multiple Hjson values. +pub struct StreamDeserializer +where + Iter: Iterator, + T: de::DeserializeOwned, +{ + deser: Deserializer, + _marker: PhantomData, +} + +impl StreamDeserializer +where + Iter: Iterator, + T: de::DeserializeOwned, +{ + /// Returns an `Iterator` of decoded Hjson values from an iterator over + /// `Iterator`. + pub fn new(iter: Iter) -> StreamDeserializer { + StreamDeserializer { + deser: Deserializer::new(iter), + _marker: PhantomData, + } + } +} + +impl Iterator for StreamDeserializer +where + Iter: Iterator, + T: de::DeserializeOwned, +{ + type Item = Result; + + fn next(&mut self) -> Option> { + // skip whitespaces, if any + // this helps with trailing whitespaces, since whitespaces between + // values are handled for us. + if let Err(e) = self.deser.rdr.parse_whitespace() { + return Some(Err(e)); + }; + + match self.deser.rdr.eof() { + Ok(true) => None, + Ok(false) => match de::Deserialize::deserialize(&mut self.deser) { + Ok(v) => Some(Ok(v)), + Err(e) => Some(Err(e)), + }, + Err(e) => Some(Err(e)), + } + } +} + +////////////////////////////////////////////////////////////////////////////// + +/// Decodes a Hjson value from an iterator over an iterator +/// `Iterator`. +pub fn from_iter(iter: I) -> Result +where + I: Iterator>, + T: de::DeserializeOwned, +{ + let fold: io::Result> = iter.collect(); + + if let Err(e) = fold { + return Err(Error::Io(e)); + } + + let bytes = fold.expect("Internal error: json parsing"); + + // deserialize tries first to decode with legacy support (new_for_root) + // and then with the standard method if this fails. + // todo: add compile switch + + // deserialize and make sure the whole stream has been consumed + let mut de = Deserializer::new_for_root(bytes.iter().copied()); + de::Deserialize::deserialize(&mut de) + .and_then(|x| de.end().map(|()| x)) + .or_else(|_| { + let mut de2 = Deserializer::new(bytes.iter().copied()); + de::Deserialize::deserialize(&mut de2).and_then(|x| de2.end().map(|()| x)) + }) + + /* without legacy support: + // deserialize and make sure the whole stream has been consumed + let mut de = Deserializer::new(bytes.iter().map(|b| *b)); + let value = match de::Deserialize::deserialize(&mut de) + .and_then(|x| { try!(de.end()); Ok(x) }) + { + Ok(v) => Ok(v), + Err(e) => Err(e), + }; + */ +} + +/// Decodes a Hjson value from a `std::io::Read`. +pub fn from_reader(rdr: R) -> Result +where + R: io::Read, + T: de::DeserializeOwned, +{ + from_iter(rdr.bytes()) +} + +/// Decodes a Hjson value from a byte slice `&[u8]`. +pub fn from_slice(v: &[u8]) -> Result +where + T: de::DeserializeOwned, +{ + from_iter(v.iter().map(|&byte| Ok(byte))) +} + +/// Decodes a Hjson value from a `&str`. +pub fn from_str(s: &str) -> Result +where + T: de::DeserializeOwned, +{ + from_slice(s.as_bytes()) +} diff --git a/crates/nu-json/src/error.rs b/crates/nu-json/src/error.rs new file mode 100644 index 000000000..33d417c62 --- /dev/null +++ b/crates/nu-json/src/error.rs @@ -0,0 +1,166 @@ +//! JSON Errors +//! +//! This module is centered around the `Error` and `ErrorCode` types, which represents all possible +//! `serde_hjson` errors. + +use std::error; +use std::fmt; +use std::io; +use std::result; +use std::string::FromUtf8Error; + +use serde::de; +use serde::ser; + +/// The errors that can arise while parsing a JSON stream. +#[derive(Clone, PartialEq)] +pub enum ErrorCode { + /// Catchall for syntax error messages + Custom(String), + + /// EOF while parsing a list. + EofWhileParsingList, + + /// EOF while parsing an object. + EofWhileParsingObject, + + /// EOF while parsing a string. + EofWhileParsingString, + + /// EOF while parsing a JSON value. + EofWhileParsingValue, + + /// Expected this character to be a `':'`. + ExpectedColon, + + /// Expected this character to be either a `','` or a `]`. + ExpectedListCommaOrEnd, + + /// Expected this character to be either a `','` or a `}`. + ExpectedObjectCommaOrEnd, + + /// Expected to parse either a `true`, `false`, or a `null`. + ExpectedSomeIdent, + + /// Expected this character to start a JSON value. + ExpectedSomeValue, + + /// Invalid hex escape code. + InvalidEscape, + + /// Invalid number. + InvalidNumber, + + /// Invalid Unicode code point. + InvalidUnicodeCodePoint, + + /// Object key is not a string. + KeyMustBeAString, + + /// Lone leading surrogate in hex escape. + LoneLeadingSurrogateInHexEscape, + + /// JSON has non-whitespace trailing characters after the value. + TrailingCharacters, + + /// Unexpected end of hex escape. + UnexpectedEndOfHexEscape, + + /// Found a punctuator character when expecting a quoteless string. + PunctuatorInQlString, +} + +impl fmt::Debug for ErrorCode { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + //use std::fmt::Debug; + + match *self { + ErrorCode::Custom(ref msg) => write!(f, "{}", msg), + ErrorCode::EofWhileParsingList => "EOF while parsing a list".fmt(f), + ErrorCode::EofWhileParsingObject => "EOF while parsing an object".fmt(f), + ErrorCode::EofWhileParsingString => "EOF while parsing a string".fmt(f), + ErrorCode::EofWhileParsingValue => "EOF while parsing a value".fmt(f), + ErrorCode::ExpectedColon => "expected `:`".fmt(f), + ErrorCode::ExpectedListCommaOrEnd => "expected `,` or `]`".fmt(f), + ErrorCode::ExpectedObjectCommaOrEnd => "expected `,` or `}`".fmt(f), + ErrorCode::ExpectedSomeIdent => "expected ident".fmt(f), + ErrorCode::ExpectedSomeValue => "expected value".fmt(f), + ErrorCode::InvalidEscape => "invalid escape".fmt(f), + ErrorCode::InvalidNumber => "invalid number".fmt(f), + ErrorCode::InvalidUnicodeCodePoint => "invalid Unicode code point".fmt(f), + ErrorCode::KeyMustBeAString => "key must be a string".fmt(f), + ErrorCode::LoneLeadingSurrogateInHexEscape => { + "lone leading surrogate in hex escape".fmt(f) + } + ErrorCode::TrailingCharacters => "trailing characters".fmt(f), + ErrorCode::UnexpectedEndOfHexEscape => "unexpected end of hex escape".fmt(f), + ErrorCode::PunctuatorInQlString => { + "found a punctuator character when expecting a quoteless string".fmt(f) + } + } + } +} + +/// This type represents all possible errors that can occur when serializing or deserializing a +/// value into JSON. +#[derive(Debug)] +pub enum Error { + /// The JSON value had some syntactic error. + Syntax(ErrorCode, usize, usize), + + /// Some IO error occurred when serializing or deserializing a value. + Io(io::Error), + + /// Some UTF8 error occurred while serializing or deserializing a value. + FromUtf8(FromUtf8Error), +} + +impl error::Error for Error { + fn cause(&self) -> Option<&dyn error::Error> { + match *self { + Error::Io(ref error) => Some(error), + Error::FromUtf8(ref error) => Some(error), + _ => None, + } + } +} + +impl fmt::Display for Error { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + match *self { + Error::Syntax(ref code, line, col) => { + write!(fmt, "{:?} at line {} column {}", code, line, col) + } + Error::Io(ref error) => fmt::Display::fmt(error, fmt), + Error::FromUtf8(ref error) => fmt::Display::fmt(error, fmt), + } + } +} + +impl From for Error { + fn from(error: io::Error) -> Error { + Error::Io(error) + } +} + +impl From for Error { + fn from(error: FromUtf8Error) -> Error { + Error::FromUtf8(error) + } +} + +impl de::Error for Error { + fn custom(msg: T) -> Error { + Error::Syntax(ErrorCode::Custom(msg.to_string()), 0, 0) + } +} + +impl ser::Error for Error { + /// Raised when there is general error when deserializing a type. + fn custom(msg: T) -> Error { + Error::Syntax(ErrorCode::Custom(msg.to_string()), 0, 0) + } +} + +/// Helper alias for `Result` objects that return a JSON `Error`. +pub type Result = result::Result; diff --git a/crates/nu-json/src/lib.rs b/crates/nu-json/src/lib.rs new file mode 100644 index 000000000..6a196c527 --- /dev/null +++ b/crates/nu-json/src/lib.rs @@ -0,0 +1,13 @@ +pub use self::de::{ + from_iter, from_reader, from_slice, from_str, Deserializer, StreamDeserializer, +}; +pub use self::error::{Error, ErrorCode, Result}; +pub use self::ser::{to_string, to_vec, to_writer, Serializer}; +pub use self::value::{from_value, to_value, Map, Value}; + +pub mod builder; +pub mod de; +pub mod error; +pub mod ser; +mod util; +pub mod value; diff --git a/crates/nu-json/src/ser.rs b/crates/nu-json/src/ser.rs new file mode 100644 index 000000000..01ea63e86 --- /dev/null +++ b/crates/nu-json/src/ser.rs @@ -0,0 +1,1020 @@ +//! Hjson Serialization +//! +//! This module provides for Hjson serialization with the type `Serializer`. + +use std::fmt::{Display, LowerExp}; +use std::io; +use std::num::FpCategory; + +use super::error::{Error, ErrorCode, Result}; +use serde::ser; + +use super::util::ParseNumber; + +use regex::Regex; + +use lazy_static::lazy_static; + +/// A structure for serializing Rust values into Hjson. +pub struct Serializer { + writer: W, + formatter: F, +} + +impl<'a, W> Serializer> +where + W: io::Write, +{ + /// Creates a new Hjson serializer. + #[inline] + pub fn new(writer: W) -> Self { + Serializer::with_formatter(writer, HjsonFormatter::new()) + } +} + +impl Serializer +where + W: io::Write, + F: Formatter, +{ + /// Creates a new Hjson visitor whose output will be written to the writer + /// specified. + #[inline] + pub fn with_formatter(writer: W, formatter: F) -> Self { + Serializer { writer, formatter } + } + + /// Unwrap the `Writer` from the `Serializer`. + #[inline] + pub fn into_inner(self) -> W { + self.writer + } +} + +#[doc(hidden)] +#[derive(Eq, PartialEq)] +pub enum State { + Empty, + First, + Rest, +} + +#[doc(hidden)] +pub struct Compound<'a, W, F> { + ser: &'a mut Serializer, + state: State, +} + +impl<'a, W, F> ser::Serializer for &'a mut Serializer +where + W: io::Write, + F: Formatter, +{ + type Ok = (); + type Error = Error; + + type SerializeSeq = Compound<'a, W, F>; + type SerializeTuple = Compound<'a, W, F>; + type SerializeTupleStruct = Compound<'a, W, F>; + type SerializeTupleVariant = Compound<'a, W, F>; + type SerializeMap = Compound<'a, W, F>; + type SerializeStruct = Compound<'a, W, F>; + type SerializeStructVariant = Compound<'a, W, F>; + + #[inline] + fn serialize_bool(self, value: bool) -> Result<()> { + self.formatter.start_value(&mut self.writer)?; + if value { + self.writer.write_all(b"true").map_err(From::from) + } else { + self.writer.write_all(b"false").map_err(From::from) + } + } + + #[inline] + fn serialize_i8(self, value: i8) -> Result<()> { + self.formatter.start_value(&mut self.writer)?; + write!(&mut self.writer, "{}", value).map_err(From::from) + } + + #[inline] + fn serialize_i16(self, value: i16) -> Result<()> { + self.formatter.start_value(&mut self.writer)?; + write!(&mut self.writer, "{}", value).map_err(From::from) + } + + #[inline] + fn serialize_i32(self, value: i32) -> Result<()> { + self.formatter.start_value(&mut self.writer)?; + write!(&mut self.writer, "{}", value).map_err(From::from) + } + + #[inline] + fn serialize_i64(self, value: i64) -> Result<()> { + self.formatter.start_value(&mut self.writer)?; + write!(&mut self.writer, "{}", value).map_err(From::from) + } + + #[inline] + fn serialize_u8(self, value: u8) -> Result<()> { + self.formatter.start_value(&mut self.writer)?; + write!(&mut self.writer, "{}", value).map_err(From::from) + } + + #[inline] + fn serialize_u16(self, value: u16) -> Result<()> { + self.formatter.start_value(&mut self.writer)?; + write!(&mut self.writer, "{}", value).map_err(From::from) + } + + #[inline] + fn serialize_u32(self, value: u32) -> Result<()> { + self.formatter.start_value(&mut self.writer)?; + write!(&mut self.writer, "{}", value).map_err(From::from) + } + + #[inline] + fn serialize_u64(self, value: u64) -> Result<()> { + self.formatter.start_value(&mut self.writer)?; + write!(&mut self.writer, "{}", value).map_err(From::from) + } + + #[inline] + fn serialize_f32(self, value: f32) -> Result<()> { + self.formatter.start_value(&mut self.writer)?; + fmt_f32_or_null(&mut self.writer, if value == -0f32 { 0f32 } else { value }) + .map_err(From::from) + } + + #[inline] + fn serialize_f64(self, value: f64) -> Result<()> { + self.formatter.start_value(&mut self.writer)?; + fmt_f64_or_null(&mut self.writer, if value == -0f64 { 0f64 } else { value }) + .map_err(From::from) + } + + #[inline] + fn serialize_char(self, value: char) -> Result<()> { + self.formatter.start_value(&mut self.writer)?; + escape_char(&mut self.writer, value).map_err(From::from) + } + + #[inline] + fn serialize_str(self, value: &str) -> Result<()> { + quote_str(&mut self.writer, &mut self.formatter, value).map_err(From::from) + } + + #[inline] + fn serialize_bytes(self, value: &[u8]) -> Result<()> { + let mut seq = self.serialize_seq(Some(value.len()))?; + for byte in value { + ser::SerializeSeq::serialize_element(&mut seq, byte)? + } + ser::SerializeSeq::end(seq) + } + + #[inline] + fn serialize_unit(self) -> Result<()> { + self.formatter.start_value(&mut self.writer)?; + self.writer.write_all(b"null").map_err(From::from) + } + + #[inline] + fn serialize_unit_struct(self, _name: &'static str) -> Result<()> { + self.serialize_unit() + } + + #[inline] + fn serialize_unit_variant( + self, + _name: &'static str, + _variant_index: u32, + variant: &'static str, + ) -> Result<()> { + self.serialize_str(variant) + } + + /// Serialize newtypes without an object wrapper. + #[inline] + fn serialize_newtype_struct(self, _name: &'static str, value: &T) -> Result<()> + where + T: ?Sized + ser::Serialize, + { + value.serialize(self) + } + + #[inline] + fn serialize_newtype_variant( + self, + _name: &'static str, + _variant_index: u32, + variant: &'static str, + value: &T, + ) -> Result<()> + where + T: ?Sized + ser::Serialize, + { + self.formatter.open(&mut self.writer, b'{')?; + self.formatter.comma(&mut self.writer, true)?; + escape_key(&mut self.writer, variant)?; + self.formatter.colon(&mut self.writer)?; + value.serialize(&mut *self)?; + self.formatter.close(&mut self.writer, b'}') + } + + #[inline] + fn serialize_none(self) -> Result<()> { + self.serialize_unit() + } + + #[inline] + fn serialize_some(self, value: &V) -> Result<()> + where + V: ?Sized + ser::Serialize, + { + value.serialize(self) + } + + #[inline] + fn serialize_seq(self, len: Option) -> Result { + let state = if len == Some(0) { + self.formatter.start_value(&mut self.writer)?; + self.writer.write_all(b"[]")?; + State::Empty + } else { + self.formatter.open(&mut self.writer, b'[')?; + State::First + }; + Ok(Compound { ser: self, state }) + } + + #[inline] + fn serialize_tuple(self, len: usize) -> Result { + self.serialize_seq(Some(len)) + } + + #[inline] + fn serialize_tuple_struct( + self, + _name: &'static str, + len: usize, + ) -> Result { + self.serialize_seq(Some(len)) + } + + #[inline] + fn serialize_tuple_variant( + self, + _name: &'static str, + _variant_index: u32, + variant: &'static str, + len: usize, + ) -> Result { + self.formatter.open(&mut self.writer, b'{')?; + self.formatter.comma(&mut self.writer, true)?; + escape_key(&mut self.writer, variant)?; + self.formatter.colon(&mut self.writer)?; + self.serialize_seq(Some(len)) + } + + #[inline] + fn serialize_map(self, len: Option) -> Result { + let state = if len == Some(0) { + self.formatter.start_value(&mut self.writer)?; + self.writer.write_all(b"{}")?; + State::Empty + } else { + self.formatter.open(&mut self.writer, b'{')?; + State::First + }; + Ok(Compound { ser: self, state }) + } + + #[inline] + fn serialize_struct(self, _name: &'static str, len: usize) -> Result { + self.serialize_map(Some(len)) + } + + #[inline] + fn serialize_struct_variant( + self, + _name: &'static str, + _variant_index: u32, + variant: &'static str, + len: usize, + ) -> Result { + self.formatter.open(&mut self.writer, b'{')?; + self.formatter.comma(&mut self.writer, true)?; + escape_key(&mut self.writer, variant)?; + self.formatter.colon(&mut self.writer)?; + self.serialize_map(Some(len)) + } +} + +impl<'a, W, F> ser::SerializeSeq for Compound<'a, W, F> +where + W: io::Write, + F: Formatter, +{ + type Ok = (); + type Error = Error; + + fn serialize_element(&mut self, value: &T) -> Result<()> + where + T: serde::Serialize, + { + self.ser + .formatter + .comma(&mut self.ser.writer, self.state == State::First)?; + self.state = State::Rest; + value.serialize(&mut *self.ser) + } + + fn end(self) -> Result { + match self.state { + State::Empty => Ok(()), + _ => self.ser.formatter.close(&mut self.ser.writer, b']'), + } + } +} + +impl<'a, W, F> ser::SerializeTuple for Compound<'a, W, F> +where + W: io::Write, + F: Formatter, +{ + type Ok = (); + type Error = Error; + + fn serialize_element(&mut self, value: &T) -> Result<()> + where + T: serde::Serialize, + { + ser::SerializeSeq::serialize_element(self, value) + } + + fn end(self) -> Result { + ser::SerializeSeq::end(self) + } +} + +impl<'a, W, F> ser::SerializeTupleStruct for Compound<'a, W, F> +where + W: io::Write, + F: Formatter, +{ + type Ok = (); + type Error = Error; + + fn serialize_field(&mut self, value: &T) -> Result<()> + where + T: serde::Serialize, + { + ser::SerializeSeq::serialize_element(self, value) + } + + fn end(self) -> Result { + ser::SerializeSeq::end(self) + } +} + +impl<'a, W, F> ser::SerializeTupleVariant for Compound<'a, W, F> +where + W: io::Write, + F: Formatter, +{ + type Ok = (); + type Error = Error; + + fn serialize_field(&mut self, value: &T) -> Result<()> + where + T: serde::Serialize, + { + ser::SerializeSeq::serialize_element(self, value) + } + + fn end(self) -> Result { + match self.state { + State::Empty => {} + _ => self.ser.formatter.close(&mut self.ser.writer, b']')?, + } + self.ser.formatter.close(&mut self.ser.writer, b'}') + } +} + +impl<'a, W, F> ser::SerializeMap for Compound<'a, W, F> +where + W: io::Write, + F: Formatter, +{ + type Ok = (); + type Error = Error; + + fn serialize_key(&mut self, key: &T) -> Result<()> + where + T: serde::Serialize, + { + self.ser + .formatter + .comma(&mut self.ser.writer, self.state == State::First)?; + self.state = State::Rest; + + key.serialize(MapKeySerializer { ser: self.ser })?; + + self.ser.formatter.colon(&mut self.ser.writer) + } + + fn serialize_value(&mut self, value: &T) -> Result<()> + where + T: serde::Serialize, + { + value.serialize(&mut *self.ser) + } + + fn end(self) -> Result { + match self.state { + State::Empty => Ok(()), + _ => self.ser.formatter.close(&mut self.ser.writer, b'}'), + } + } +} + +impl<'a, W, F> ser::SerializeStruct for Compound<'a, W, F> +where + W: io::Write, + F: Formatter, +{ + type Ok = (); + type Error = Error; + + fn serialize_field(&mut self, key: &'static str, value: &T) -> Result<()> + where + T: serde::Serialize, + { + ser::SerializeMap::serialize_entry(self, key, value) + } + + fn end(self) -> Result { + ser::SerializeMap::end(self) + } +} + +impl<'a, W, F> ser::SerializeStructVariant for Compound<'a, W, F> +where + W: io::Write, + F: Formatter, +{ + type Ok = (); + type Error = Error; + + fn serialize_field(&mut self, key: &'static str, value: &T) -> Result<()> + where + T: serde::Serialize, + { + ser::SerializeStruct::serialize_field(self, key, value) + } + + fn end(self) -> Result { + match self.state { + State::Empty => {} + _ => self.ser.formatter.close(&mut self.ser.writer, b'}')?, + } + self.ser.formatter.close(&mut self.ser.writer, b'}') + } +} + +struct MapKeySerializer<'a, W: 'a, F: 'a> { + ser: &'a mut Serializer, +} + +impl<'a, W, F> ser::Serializer for MapKeySerializer<'a, W, F> +where + W: io::Write, + F: Formatter, +{ + type Ok = (); + type Error = Error; + + #[inline] + fn serialize_str(self, value: &str) -> Result<()> { + escape_key(&mut self.ser.writer, value).map_err(From::from) + } + + type SerializeSeq = ser::Impossible<(), Error>; + type SerializeTuple = ser::Impossible<(), Error>; + type SerializeTupleStruct = ser::Impossible<(), Error>; + type SerializeTupleVariant = ser::Impossible<(), Error>; + type SerializeMap = ser::Impossible<(), Error>; + type SerializeStruct = ser::Impossible<(), Error>; + type SerializeStructVariant = ser::Impossible<(), Error>; + + fn serialize_bool(self, _value: bool) -> Result<()> { + Err(Error::Syntax(ErrorCode::KeyMustBeAString, 0, 0)) + } + + fn serialize_i8(self, _value: i8) -> Result<()> { + Err(Error::Syntax(ErrorCode::KeyMustBeAString, 0, 0)) + } + + fn serialize_i16(self, _value: i16) -> Result<()> { + Err(Error::Syntax(ErrorCode::KeyMustBeAString, 0, 0)) + } + + fn serialize_i32(self, _value: i32) -> Result<()> { + Err(Error::Syntax(ErrorCode::KeyMustBeAString, 0, 0)) + } + + fn serialize_i64(self, _value: i64) -> Result<()> { + Err(Error::Syntax(ErrorCode::KeyMustBeAString, 0, 0)) + } + + fn serialize_u8(self, _value: u8) -> Result<()> { + Err(Error::Syntax(ErrorCode::KeyMustBeAString, 0, 0)) + } + + fn serialize_u16(self, _value: u16) -> Result<()> { + Err(Error::Syntax(ErrorCode::KeyMustBeAString, 0, 0)) + } + + fn serialize_u32(self, _value: u32) -> Result<()> { + Err(Error::Syntax(ErrorCode::KeyMustBeAString, 0, 0)) + } + + fn serialize_u64(self, _value: u64) -> Result<()> { + Err(Error::Syntax(ErrorCode::KeyMustBeAString, 0, 0)) + } + + fn serialize_f32(self, _value: f32) -> Result<()> { + Err(Error::Syntax(ErrorCode::KeyMustBeAString, 0, 0)) + } + + fn serialize_f64(self, _value: f64) -> Result<()> { + Err(Error::Syntax(ErrorCode::KeyMustBeAString, 0, 0)) + } + + fn serialize_char(self, _value: char) -> Result<()> { + Err(Error::Syntax(ErrorCode::KeyMustBeAString, 0, 0)) + } + + fn serialize_bytes(self, _value: &[u8]) -> Result<()> { + Err(Error::Syntax(ErrorCode::KeyMustBeAString, 0, 0)) + } + + fn serialize_unit(self) -> Result<()> { + Err(Error::Syntax(ErrorCode::KeyMustBeAString, 0, 0)) + } + + fn serialize_unit_struct(self, _name: &'static str) -> Result<()> { + Err(Error::Syntax(ErrorCode::KeyMustBeAString, 0, 0)) + } + + fn serialize_unit_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + ) -> Result<()> { + Err(Error::Syntax(ErrorCode::KeyMustBeAString, 0, 0)) + } + + fn serialize_newtype_struct(self, _name: &'static str, _value: &T) -> Result<()> + where + T: ?Sized + ser::Serialize, + { + Err(Error::Syntax(ErrorCode::KeyMustBeAString, 0, 0)) + } + + fn serialize_newtype_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _value: &T, + ) -> Result<()> + where + T: ?Sized + ser::Serialize, + { + Err(Error::Syntax(ErrorCode::KeyMustBeAString, 0, 0)) + } + + fn serialize_none(self) -> Result<()> { + Err(Error::Syntax(ErrorCode::KeyMustBeAString, 0, 0)) + } + + fn serialize_some(self, _value: &T) -> Result<()> + where + T: ?Sized + ser::Serialize, + { + Err(Error::Syntax(ErrorCode::KeyMustBeAString, 0, 0)) + } + + fn serialize_seq(self, _len: Option) -> Result { + Err(Error::Syntax(ErrorCode::KeyMustBeAString, 0, 0)) + } + + fn serialize_tuple(self, _len: usize) -> Result { + Err(Error::Syntax(ErrorCode::KeyMustBeAString, 0, 0)) + } + + fn serialize_tuple_struct( + self, + _name: &'static str, + _len: usize, + ) -> Result { + Err(Error::Syntax(ErrorCode::KeyMustBeAString, 0, 0)) + } + + fn serialize_tuple_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _len: usize, + ) -> Result { + Err(Error::Syntax(ErrorCode::KeyMustBeAString, 0, 0)) + } + + fn serialize_map(self, _len: Option) -> Result { + Err(Error::Syntax(ErrorCode::KeyMustBeAString, 0, 0)) + } + + fn serialize_struct(self, _name: &'static str, _len: usize) -> Result { + Err(Error::Syntax(ErrorCode::KeyMustBeAString, 0, 0)) + } + + fn serialize_struct_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _len: usize, + ) -> Result { + Err(Error::Syntax(ErrorCode::KeyMustBeAString, 0, 0)) + } +} + +/// This trait abstracts away serializing the JSON control characters +pub trait Formatter { + /// Called when serializing a '{' or '['. + fn open(&mut self, writer: &mut W, ch: u8) -> Result<()> + where + W: io::Write; + + /// Called when serializing a ','. + fn comma(&mut self, writer: &mut W, first: bool) -> Result<()> + where + W: io::Write; + + /// Called when serializing a ':'. + fn colon(&mut self, writer: &mut W) -> Result<()> + where + W: io::Write; + + /// Called when serializing a '}' or ']'. + fn close(&mut self, writer: &mut W, ch: u8) -> Result<()> + where + W: io::Write; + + /// Newline with indent. + fn newline(&mut self, writer: &mut W, add_indent: i32) -> Result<()> + where + W: io::Write; + + /// Start a value. + fn start_value(&mut self, writer: &mut W) -> Result<()> + where + W: io::Write; +} + +struct HjsonFormatter<'a> { + current_indent: usize, + current_is_array: bool, + stack: Vec, + at_colon: bool, + indent: &'a [u8], + braces_same_line: bool, +} + +impl<'a> HjsonFormatter<'a> { + /// Construct a formatter that defaults to using two spaces for indentation. + pub fn new() -> Self { + HjsonFormatter::with_indent(b" ") + } + + /// Construct a formatter that uses the `indent` string for indentation. + pub fn with_indent(indent: &'a [u8]) -> Self { + HjsonFormatter { + current_indent: 0, + current_is_array: false, + stack: Vec::new(), + at_colon: false, + indent, + braces_same_line: false, + } + } +} + +impl<'a> Formatter for HjsonFormatter<'a> { + fn open(&mut self, writer: &mut W, ch: u8) -> Result<()> + where + W: io::Write, + { + if self.current_indent > 0 && !self.current_is_array && !self.braces_same_line { + self.newline(writer, 0)?; + } else { + self.start_value(writer)?; + } + self.current_indent += 1; + self.stack.push(self.current_is_array); + self.current_is_array = ch == b'['; + writer.write_all(&[ch]).map_err(From::from) + } + + fn comma(&mut self, writer: &mut W, _: bool) -> Result<()> + where + W: io::Write, + { + writer.write_all(b"\n")?; + indent(writer, self.current_indent, self.indent) + } + + fn colon(&mut self, writer: &mut W) -> Result<()> + where + W: io::Write, + { + self.at_colon = !self.braces_same_line; + writer + .write_all(if self.braces_same_line { b": " } else { b":" }) + .map_err(From::from) + } + + fn close(&mut self, writer: &mut W, ch: u8) -> Result<()> + where + W: io::Write, + { + self.current_indent -= 1; + self.current_is_array = self.stack.pop().expect("Internal error: json parsing"); + writer.write_all(b"\n")?; + indent(writer, self.current_indent, self.indent)?; + writer.write_all(&[ch]).map_err(From::from) + } + + fn newline(&mut self, writer: &mut W, add_indent: i32) -> Result<()> + where + W: io::Write, + { + self.at_colon = false; + writer.write_all(b"\n")?; + let ii = self.current_indent as i32 + add_indent; + indent(writer, if ii < 0 { 0 } else { ii as usize }, self.indent) + } + + fn start_value(&mut self, writer: &mut W) -> Result<()> + where + W: io::Write, + { + if self.at_colon { + self.at_colon = false; + writer.write_all(b" ")? + } + Ok(()) + } +} + +/// Serializes and escapes a `&[u8]` into a Hjson string. +#[inline] +pub fn escape_bytes(wr: &mut W, bytes: &[u8]) -> Result<()> +where + W: io::Write, +{ + wr.write_all(b"\"")?; + + let mut start = 0; + + for (i, byte) in bytes.iter().enumerate() { + let escaped = match *byte { + b'"' => b"\\\"", + b'\\' => b"\\\\", + b'\x08' => b"\\b", + b'\x0c' => b"\\f", + b'\n' => b"\\n", + b'\r' => b"\\r", + b'\t' => b"\\t", + _ => { + continue; + } + }; + + if start < i { + wr.write_all(&bytes[start..i])?; + } + + wr.write_all(escaped)?; + + start = i + 1; + } + + if start != bytes.len() { + wr.write_all(&bytes[start..])?; + } + + wr.write_all(b"\"")?; + Ok(()) +} + +/// Serializes and escapes a `&str` into a Hjson string. +#[inline] +pub fn quote_str(wr: &mut W, formatter: &mut F, value: &str) -> Result<()> +where + W: io::Write, + F: Formatter, +{ + lazy_static! { + // NEEDS_ESCAPE tests if the string can be written without escapes + static ref NEEDS_ESCAPE: Regex = Regex::new("[\\\\\"\x00-\x1f\x7f-\u{9f}\u{00ad}\u{0600}-\u{0604}\u{070f}\u{17b4}\u{17b5}\u{200c}-\u{200f}\u{2028}-\u{202f}\u{2060}-\u{206f}\u{feff}\u{fff0}-\u{ffff}]").expect("Internal error: json parsing"); + // NEEDS_QUOTES tests if the string can be written as a quoteless string (includes needsEscape but without \\ and \") + static ref NEEDS_QUOTES: Regex = Regex::new("^\\s|^\"|^'''|^#|^/\\*|^//|^\\{|^\\}|^\\[|^\\]|^:|^,|\\s$|[\x00-\x1f\x7f-\u{9f}\u{00ad}\u{0600}-\u{0604}\u{070f}\u{17b4}\u{17b5}\u{200c}-\u{200f}\u{2028}-\u{202f}\u{2060}-\u{206f}\u{feff}\u{fff0}-\u{ffff}]").expect("Internal error: json parsing"); + // NEEDS_ESCAPEML tests if the string can be written as a multiline string (includes needsEscape but without \n, \r, \\ and \") + static ref NEEDS_ESCAPEML: Regex = Regex::new("'''|[\x00-\x09\x0b\x0c\x0e-\x1f\x7f-\u{9f}\u{00ad}\u{0600}-\u{0604}\u{070f}\u{17b4}\u{17b5}\u{200c}-\u{200f}\u{2028}-\u{202f}\u{2060}-\u{206f}\u{feff}\u{fff0}-\u{ffff}]").expect("Internal error: json parsing"); + // starts with a keyword and optionally is followed by a comment + static ref STARTS_WITH_KEYWORD: Regex = Regex::new(r#"^(true|false|null)\s*((,|\]|\}|#|//|/\*).*)?$"#).expect("Internal error: json parsing"); + } + + if value.is_empty() { + formatter.start_value(wr)?; + return escape_bytes(wr, value.as_bytes()); + } + + // Check if we can insert this string without quotes + // see hjson syntax (must not parse as true, false, null or number) + + let mut pn = ParseNumber::new(value.bytes()); + let is_number = pn.parse(true).is_ok(); + + if is_number || NEEDS_QUOTES.is_match(value) || STARTS_WITH_KEYWORD.is_match(value) { + // First check if the string can be expressed in multiline format or + // we must replace the offending characters with safe escape sequences. + + if NEEDS_ESCAPE.is_match(value) && !NEEDS_ESCAPEML.is_match(value) + /* && !isRootObject */ + { + ml_str(wr, formatter, value) + } else { + formatter.start_value(wr)?; + escape_bytes(wr, value.as_bytes()) + } + } else { + // without quotes + formatter.start_value(wr)?; + wr.write_all(value.as_bytes()).map_err(From::from) + } +} + +/// Serializes and escapes a `&str` into a multiline Hjson string. +pub fn ml_str(wr: &mut W, formatter: &mut F, value: &str) -> Result<()> +where + W: io::Write, + F: Formatter, +{ + // wrap the string into the ''' (multiline) format + + let a: Vec<&str> = value.split('\n').collect(); + + if a.len() == 1 { + // The string contains only a single line. We still use the multiline + // format as it avoids escaping the \ character (e.g. when used in a + // regex). + formatter.start_value(wr)?; + wr.write_all(b"'''")?; + wr.write_all(a[0].as_bytes())?; + wr.write_all(b"'''")? + } else { + formatter.newline(wr, 1)?; + wr.write_all(b"'''")?; + for line in a { + formatter.newline(wr, if !line.is_empty() { 1 } else { -999 })?; + wr.write_all(line.as_bytes())?; + } + formatter.newline(wr, 1)?; + wr.write_all(b"'''")?; + } + Ok(()) +} + +/// Serializes and escapes a `&str` into a Hjson key. +#[inline] +pub fn escape_key(wr: &mut W, value: &str) -> Result<()> +where + W: io::Write, +{ + lazy_static! { + static ref NEEDS_ESCAPE_NAME: Regex = + Regex::new(r#"[,\{\[\}\]\s:#"]|//|/\*|'''|^$"#).expect("Internal error: json parsing"); + } + + // Check if we can insert this name without quotes + if NEEDS_ESCAPE_NAME.is_match(value) { + escape_bytes(wr, value.as_bytes()).map_err(From::from) + } else { + wr.write_all(value.as_bytes()).map_err(From::from) + } +} + +#[inline] +fn escape_char(wr: &mut W, value: char) -> Result<()> +where + W: io::Write, +{ + // FIXME: this allocation is required in order to be compatible with stable + // rust, which doesn't support encoding a `char` into a stack buffer. + let mut s = String::new(); + s.push(value); + escape_bytes(wr, s.as_bytes()) +} + +fn fmt_f32_or_null(wr: &mut W, value: f32) -> Result<()> +where + W: io::Write, +{ + match value.classify() { + FpCategory::Nan | FpCategory::Infinite => wr.write_all(b"null")?, + _ => wr.write_all(fmt_small(value).as_bytes())?, + } + + Ok(()) +} + +fn fmt_f64_or_null(wr: &mut W, value: f64) -> Result<()> +where + W: io::Write, +{ + match value.classify() { + FpCategory::Nan | FpCategory::Infinite => wr.write_all(b"null")?, + _ => wr.write_all(fmt_small(value).as_bytes())?, + } + + Ok(()) +} + +fn indent(wr: &mut W, n: usize, s: &[u8]) -> Result<()> +where + W: io::Write, +{ + for _ in 0..n { + wr.write_all(s)?; + } + + Ok(()) +} + +// format similar to es6 +fn fmt_small(value: N) -> String +where + N: Display + LowerExp, +{ + let f1 = value.to_string(); + let f2 = format!("{:e}", value); + if f1.len() <= f2.len() + 1 { + f1 + } else if !f2.contains("e-") { + f2.replace("e", "e+") + } else { + f2 + } +} + +/// Encode the specified struct into a Hjson `[u8]` writer. +#[inline] +pub fn to_writer(writer: &mut W, value: &T) -> Result<()> +where + W: io::Write, + T: ser::Serialize, +{ + let mut ser = Serializer::new(writer); + value.serialize(&mut ser)?; + Ok(()) +} + +/// Encode the specified struct into a Hjson `[u8]` buffer. +#[inline] +pub fn to_vec(value: &T) -> Result> +where + T: ser::Serialize, +{ + // We are writing to a Vec, which doesn't fail. So we can ignore + // the error. + let mut writer = Vec::with_capacity(128); + to_writer(&mut writer, value)?; + Ok(writer) +} + +/// Encode the specified struct into a Hjson `String` buffer. +#[inline] +pub fn to_string(value: &T) -> Result +where + T: ser::Serialize, +{ + let vec = to_vec(value)?; + let string = String::from_utf8(vec)?; + Ok(string) +} diff --git a/crates/nu-json/src/util.rs b/crates/nu-json/src/util.rs new file mode 100644 index 000000000..5f650b109 --- /dev/null +++ b/crates/nu-json/src/util.rs @@ -0,0 +1,333 @@ +use std::io; +use std::str; + +use super::error::{Error, ErrorCode, Result}; + +pub struct StringReader> { + iter: Iter, + line: usize, + col: usize, + ch: Vec, +} + +impl StringReader +where + Iter: Iterator, +{ + #[inline] + pub fn new(iter: Iter) -> Self { + StringReader { + iter, + line: 1, + col: 0, + ch: Vec::new(), + } + } + + fn next(&mut self) -> Option> { + match self.iter.next() { + None => None, + Some(b'\n') => { + self.line += 1; + self.col = 0; + Some(Ok(b'\n')) + } + Some(c) => { + self.col += 1; + Some(Ok(c)) + } + } + } + + pub fn pos(&mut self) -> (usize, usize) { + (self.line, self.col) + } + + pub fn eof(&mut self) -> Result { + Ok(self.peek()?.is_none()) + } + + pub fn peek_next(&mut self, idx: usize) -> Result> { + while self.ch.len() <= idx { + match self.next() { + Some(Err(err)) => return Err(Error::Io(err)), + Some(Ok(ch)) => self.ch.push(ch), + None => return Ok(None), + } + } + Ok(Some(self.ch[idx])) + } + + // pub fn peek_next_or_null(&mut self, idx: usize) -> Result { + // Ok(try!(self.peek_next(idx)).unwrap_or(b'\x00')) + // } + + pub fn peek(&mut self) -> Result> { + self.peek_next(0) + } + + pub fn peek_or_null(&mut self) -> Result { + Ok(self.peek()?.unwrap_or(b'\x00')) + } + + pub fn eat_char(&mut self) -> u8 { + self.ch.remove(0) + } + + pub fn uneat_char(&mut self, ch: u8) { + self.ch.insert(0, ch); + } + + pub fn next_char(&mut self) -> Result> { + match self.ch.first() { + Some(&ch) => { + self.eat_char(); + Ok(Some(ch)) + } + None => match self.next() { + Some(Err(err)) => Err(Error::Io(err)), + Some(Ok(ch)) => Ok(Some(ch)), + None => Ok(None), + }, + } + } + + pub fn next_char_or_null(&mut self) -> Result { + Ok(self.next_char()?.unwrap_or(b'\x00')) + } + + fn eat_line(&mut self) -> Result<()> { + loop { + match self.peek()? { + Some(b'\n') | None => return Ok(()), + _ => {} + } + self.eat_char(); + } + } + + pub fn parse_whitespace(&mut self) -> Result<()> { + loop { + match self.peek_or_null()? { + b' ' | b'\n' | b'\t' | b'\r' => { + self.eat_char(); + } + b'#' => self.eat_line()?, + b'/' => { + match self.peek_next(1)? { + Some(b'/') => self.eat_line()?, + Some(b'*') => { + self.eat_char(); + self.eat_char(); + while !(self.peek()?.unwrap_or(b'*') == b'*' + && self.peek_next(1)?.unwrap_or(b'/') == b'/') + { + self.eat_char(); + } + self.eat_char(); + self.eat_char(); + } + Some(_) => { + self.eat_char(); + } + None => return Err(self.error(ErrorCode::TrailingCharacters)), //todo + } + } + _ => { + return Ok(()); + } + } + } + } + + pub fn error(&mut self, reason: ErrorCode) -> Error { + Error::Syntax(reason, self.line, self.col) + } +} + +pub enum Number { + I64(i64), + U64(u64), + F64(f64), +} + +pub struct ParseNumber> { + rdr: StringReader, + result: Vec, +} + +// macro_rules! try_or_invalid { +// ($e:expr) => { +// match $e { +// Some(v) => v, +// None => { return Err(Error::Syntax(ErrorCode::InvalidNumber, 0, 0)); } +// } +// } +// } + +impl> ParseNumber { + #[inline] + pub fn new(iter: Iter) -> Self { + ParseNumber { + rdr: StringReader::new(iter), + result: Vec::new(), + } + } + + pub fn parse(&mut self, stop_at_next: bool) -> Result { + match self.try_parse() { + Ok(()) => { + self.rdr.parse_whitespace()?; + + let mut ch = self.rdr.next_char_or_null()?; + + if stop_at_next { + let ch2 = self.rdr.peek_or_null()?; + // end scan if we find a punctuator character like ,}] or a comment + if ch == b',' + || ch == b'}' + || ch == b']' + || ch == b'#' + || ch == b'/' && (ch2 == b'/' || ch2 == b'*') + { + ch = b'\x00'; + } + } + + match ch { + b'\x00' => { + let res = + str::from_utf8(&self.result).expect("Internal error: json parsing"); + + let mut is_float = false; + for ch in res.chars() { + if ch == '.' || ch == 'e' || ch == 'E' { + is_float = true; + break; + } + } + + if is_float { + Ok(Number::F64( + res.parse::().expect("Internal error: json parsing"), + )) + } else if res.starts_with('-') { + Ok(Number::I64( + res.parse::().expect("Internal error: json parsing"), + )) + } else { + Ok(Number::U64( + res.parse::().expect("Internal error: json parsing"), + )) + } + } + _ => Err(Error::Syntax(ErrorCode::InvalidNumber, 0, 0)), + } + } + Err(e) => Err(e), + } + } + + fn try_parse(&mut self) -> Result<()> { + if self.rdr.peek_or_null()? == b'-' { + self.result.push(self.rdr.eat_char()); + } + + let mut has_value = false; + + if self.rdr.peek_or_null()? == b'0' { + self.result.push(self.rdr.eat_char()); + has_value = true; + + // There can be only one leading '0'. + if let b'0'..=b'9' = self.rdr.peek_or_null()? { + return Err(Error::Syntax(ErrorCode::InvalidNumber, 0, 0)); + } + } + + loop { + match self.rdr.peek_or_null()? { + b'0'..=b'9' => { + self.result.push(self.rdr.eat_char()); + has_value = true; + } + b'.' => { + if !has_value { + return Err(Error::Syntax(ErrorCode::InvalidNumber, 0, 0)); + } + self.rdr.eat_char(); + return self.try_decimal(); + } + b'e' | b'E' => { + if !has_value { + return Err(Error::Syntax(ErrorCode::InvalidNumber, 0, 0)); + } + self.rdr.eat_char(); + return self.try_exponent(); + } + _ => { + if !has_value { + return Err(Error::Syntax(ErrorCode::InvalidNumber, 0, 0)); + } + return Ok(()); + } + } + } + } + + fn try_decimal(&mut self) -> Result<()> { + self.result.push(b'.'); + + // Make sure a digit follows the decimal place. + match self.rdr.next_char_or_null()? { + c @ b'0'..=b'9' => { + self.result.push(c); + } + _ => { + return Err(Error::Syntax(ErrorCode::InvalidNumber, 0, 0)); + } + }; + + while let b'0'..=b'9' = self.rdr.peek_or_null()? { + self.result.push(self.rdr.eat_char()); + } + + match self.rdr.peek_or_null()? { + b'e' | b'E' => { + self.rdr.eat_char(); + self.try_exponent() + } + _ => Ok(()), + } + } + + fn try_exponent(&mut self) -> Result<()> { + self.result.push(b'e'); + + match self.rdr.peek_or_null()? { + b'+' => { + self.result.push(self.rdr.eat_char()); + } + b'-' => { + self.result.push(self.rdr.eat_char()); + } + _ => {} + }; + + // Make sure a digit follows the exponent place. + match self.rdr.next_char_or_null()? { + c @ b'0'..=b'9' => { + self.result.push(c); + } + _ => { + return Err(Error::Syntax(ErrorCode::InvalidNumber, 0, 0)); + } + }; + + while let b'0'..=b'9' = self.rdr.peek_or_null()? { + self.result.push(self.rdr.eat_char()); + } + + Ok(()) + } +} diff --git a/crates/nu-json/src/value.rs b/crates/nu-json/src/value.rs new file mode 100644 index 000000000..0d3533f9c --- /dev/null +++ b/crates/nu-json/src/value.rs @@ -0,0 +1,1158 @@ +#[cfg(not(feature = "preserve_order"))] +use std::collections::{btree_map, BTreeMap}; + +#[cfg(feature = "preserve_order")] +use linked_hash_map::{self, LinkedHashMap}; + +use std::fmt; +use std::io; +use std::str; +use std::vec; + +use num_traits::NumCast; + +use serde::de; +use serde::ser; + +use crate::error::{Error, ErrorCode}; + +type Result = std::result::Result; + +/// Represents a key/value type. +#[cfg(not(feature = "preserve_order"))] +pub type Map = BTreeMap; +/// Represents a key/value type. +#[cfg(feature = "preserve_order")] +pub type Map = LinkedHashMap; + +/// Represents the `IntoIter` type. +#[cfg(not(feature = "preserve_order"))] +pub type MapIntoIter = btree_map::IntoIter; +/// Represents the IntoIter type. +#[cfg(feature = "preserve_order")] +pub type MapIntoIter = linked_hash_map::IntoIter; + +fn map_with_capacity(size: Option) -> Map { + #[cfg(not(feature = "preserve_order"))] + { + let _ = size; + BTreeMap::new() + } + + #[cfg(feature = "preserve_order")] + { + LinkedHashMap::with_capacity(size.unwrap_or(0)) + } +} + +/// Represents a Hjson/JSON value +#[derive(Clone, PartialEq)] +pub enum Value { + /// Represents a JSON null value + Null, + + /// Represents a JSON Boolean + Bool(bool), + + /// Represents a JSON signed integer + I64(i64), + + /// Represents a JSON unsigned integer + U64(u64), + + /// Represents a JSON floating point number + F64(f64), + + /// Represents a JSON string + String(String), + + /// Represents a JSON array + Array(Vec), + + /// Represents a JSON object + Object(Map), +} + +impl Value { + /// If the `Value` is an Object, returns the value associated with the provided key. + /// Otherwise, returns None. + pub fn find<'a>(&'a self, key: &str) -> Option<&'a Value> { + match *self { + Value::Object(ref map) => map.get(key), + _ => None, + } + } + + /// Attempts to get a nested Value Object for each key in `keys`. + /// If any key is found not to exist, find_path will return None. + /// Otherwise, it will return the `Value` associated with the final key. + pub fn find_path<'a>(&'a self, keys: &[&str]) -> Option<&'a Value> { + let mut target = self; + for key in keys { + match target.find(key) { + Some(t) => { + target = t; + } + None => return None, + } + } + Some(target) + } + + /// Looks up a value by a JSON Pointer. + /// + /// JSON Pointer defines a string syntax for identifying a specific value + /// within a JavaScript Object Notation (JSON) document. + /// + /// A Pointer is a Unicode string with the reference tokens separated by `/`. + /// Inside tokens `/` is replaced by `~1` and `~` is replaced by `~0`. The + /// addressed value is returned and if there is no such value `None` is + /// returned. + /// + /// For more information read [RFC6901](https://tools.ietf.org/html/rfc6901). + pub fn pointer<'a>(&'a self, pointer: &str) -> Option<&'a Value> { + fn parse_index(s: &str) -> Option { + if s.starts_with('+') || (s.starts_with('0') && s.len() != 1) { + return None; + } + s.parse().ok() + } + if pointer.is_empty() { + return Some(self); + } + if !pointer.starts_with('/') { + return None; + } + let mut target = self; + for escaped_token in pointer.split('/').skip(1) { + let token = escaped_token.replace("~1", "/").replace("~0", "~"); + let target_opt = match *target { + Value::Object(ref map) => map.get(&token[..]), + Value::Array(ref list) => parse_index(&token[..]).and_then(|x| list.get(x)), + _ => return None, + }; + if let Some(t) = target_opt { + target = t; + } else { + return None; + } + } + Some(target) + } + + /// If the `Value` is an Object, performs a depth-first search until + /// a value associated with the provided key is found. If no value is found + /// or the `Value` is not an Object, returns None. + pub fn search<'a>(&'a self, key: &str) -> Option<&'a Value> { + match self { + Value::Object(map) => map + .get(key) + .or_else(|| map.values().find_map(|v| v.search(key))), + _ => None, + } + } + + /// Returns true if the `Value` is an Object. Returns false otherwise. + pub fn is_object(&self) -> bool { + self.as_object().is_some() + } + + /// If the `Value` is an Object, returns the associated Map. + /// Returns None otherwise. + pub fn as_object(&self) -> Option<&Map> { + match *self { + Value::Object(ref map) => Some(map), + _ => None, + } + } + + /// If the `Value` is an Object, returns the associated mutable Map. + /// Returns None otherwise. + pub fn as_object_mut(&mut self) -> Option<&mut Map> { + match *self { + Value::Object(ref mut map) => Some(map), + _ => None, + } + } + + /// Returns true if the `Value` is an Array. Returns false otherwise. + pub fn is_array(&self) -> bool { + self.as_array().is_some() + } + + /// If the `Value` is an Array, returns the associated vector. + /// Returns None otherwise. + pub fn as_array(&self) -> Option<&Vec> { + match self { + Value::Array(array) => Some(array), + _ => None, + } + } + + /// If the `Value` is an Array, returns the associated mutable vector. + /// Returns None otherwise. + pub fn as_array_mut(&mut self) -> Option<&mut Vec> { + match self { + Value::Array(list) => Some(list), + _ => None, + } + } + + /// Returns true if the `Value` is a String. Returns false otherwise. + pub fn is_string(&self) -> bool { + self.as_str().is_some() + } + + /// If the `Value` is a String, returns the associated str. + /// Returns None otherwise. + pub fn as_str(&self) -> Option<&str> { + match self { + Value::String(s) => Some(s), + _ => None, + } + } + + /// Returns true if the `Value` is a Number. Returns false otherwise. + pub fn is_number(&self) -> bool { + matches!(self, Value::I64(_) | Value::U64(_) | Value::F64(_)) + } + + /// Returns true if the `Value` is a i64. Returns false otherwise. + pub fn is_i64(&self) -> bool { + matches!(self, Value::I64(_)) + } + + /// Returns true if the `Value` is a u64. Returns false otherwise. + pub fn is_u64(&self) -> bool { + matches!(self, Value::U64(_)) + } + + /// Returns true if the `Value` is a f64. Returns false otherwise. + pub fn is_f64(&self) -> bool { + matches!(self, Value::F64(_)) + } + + /// If the `Value` is a number, return or cast it to a i64. + /// Returns None otherwise. + pub fn as_i64(&self) -> Option { + match *self { + Value::I64(n) => Some(n), + Value::U64(n) => NumCast::from(n), + _ => None, + } + } + + /// If the `Value` is a number, return or cast it to a u64. + /// Returns None otherwise. + pub fn as_u64(&self) -> Option { + match *self { + Value::I64(n) => NumCast::from(n), + Value::U64(n) => Some(n), + _ => None, + } + } + + /// If the `Value` is a number, return or cast it to a f64. + /// Returns None otherwise. + pub fn as_f64(&self) -> Option { + match *self { + Value::I64(n) => NumCast::from(n), + Value::U64(n) => NumCast::from(n), + Value::F64(n) => Some(n), + _ => None, + } + } + + /// Returns true if the `Value` is a Boolean. Returns false otherwise. + pub fn is_boolean(&self) -> bool { + self.as_bool().is_some() + } + + /// If the `Value` is a Boolean, returns the associated bool. + /// Returns None otherwise. + pub fn as_bool(&self) -> Option { + match *self { + Value::Bool(b) => Some(b), + _ => None, + } + } + + /// Returns true if the `Value` is a Null. Returns false otherwise. + pub fn is_null(&self) -> bool { + self.as_null().is_some() + } + + /// If the `Value` is a Null, returns (). + /// Returns None otherwise. + pub fn as_null(&self) -> Option<()> { + match self { + Value::Null => Some(()), + _ => None, + } + } + + fn as_unexpected(&self) -> de::Unexpected<'_> { + match *self { + Value::Null => de::Unexpected::Unit, + Value::Bool(v) => de::Unexpected::Bool(v), + Value::I64(v) => de::Unexpected::Signed(v), + Value::U64(v) => de::Unexpected::Unsigned(v), + Value::F64(v) => de::Unexpected::Float(v), + Value::String(ref v) => de::Unexpected::Str(v), + Value::Array(_) => de::Unexpected::Seq, + Value::Object(_) => de::Unexpected::Map, + } + } +} + +impl ser::Serialize for Value { + #[inline] + fn serialize(&self, serializer: S) -> Result + where + S: ser::Serializer, + { + match *self { + Value::Null => serializer.serialize_unit(), + Value::Bool(v) => serializer.serialize_bool(v), + Value::I64(v) => serializer.serialize_i64(v), + Value::U64(v) => serializer.serialize_u64(v), + Value::F64(v) => serializer.serialize_f64(v), + Value::String(ref v) => serializer.serialize_str(v), + Value::Array(ref v) => v.serialize(serializer), + Value::Object(ref v) => v.serialize(serializer), + } + } +} + +impl<'de> de::Deserialize<'de> for Value { + #[inline] + fn deserialize(deserializer: D) -> Result + where + D: de::Deserializer<'de>, + { + struct ValueVisitor; + + impl<'de> de::Visitor<'de> for ValueVisitor { + type Value = Value; + + fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str("a json value") + } + + #[inline] + fn visit_bool(self, value: bool) -> Result { + Ok(Value::Bool(value)) + } + + #[inline] + fn visit_i64(self, value: i64) -> Result { + if value < 0 { + Ok(Value::I64(value)) + } else { + Ok(Value::U64(value as u64)) + } + } + + #[inline] + fn visit_u64(self, value: u64) -> Result { + Ok(Value::U64(value)) + } + + #[inline] + fn visit_f64(self, value: f64) -> Result { + Ok(Value::F64(value)) + } + + #[inline] + fn visit_str(self, value: &str) -> Result + where + E: de::Error, + { + self.visit_string(String::from(value)) + } + + #[inline] + fn visit_string(self, value: String) -> Result { + Ok(Value::String(value)) + } + + #[inline] + fn visit_none(self) -> Result { + Ok(Value::Null) + } + + #[inline] + fn visit_some(self, deserializer: D) -> Result + where + D: de::Deserializer<'de>, + { + de::Deserialize::deserialize(deserializer) + } + + #[inline] + fn visit_unit(self) -> Result { + Ok(Value::Null) + } + + #[inline] + fn visit_seq(self, mut seq: A) -> Result + where + A: de::SeqAccess<'de>, + { + let mut v = match seq.size_hint() { + Some(cap) => Vec::with_capacity(cap), + None => Vec::new(), + }; + + while let Some(el) = seq.next_element()? { + v.push(el) + } + + Ok(Value::Array(v)) + } + + #[inline] + fn visit_map(self, mut map: A) -> Result + where + A: de::MapAccess<'de>, + { + let mut values = map_with_capacity(map.size_hint()); + while let Some((k, v)) = map.next_entry()? { + values.insert(k, v); + } + Ok(Value::Object(values)) + } + } + + deserializer.deserialize_any(ValueVisitor) + } +} + +struct WriterFormatter<'a, 'b: 'a> { + inner: &'a mut fmt::Formatter<'b>, +} + +impl<'a, 'b> io::Write for WriterFormatter<'a, 'b> { + fn write(&mut self, buf: &[u8]) -> io::Result { + fn io_error(_: E) -> io::Error { + // Value does not matter because fmt::Debug and fmt::Display impls + // below just map it to fmt::Error + io::Error::new(io::ErrorKind::Other, "fmt error") + } + let s = str::from_utf8(buf).map_err(io_error)?; + self.inner.write_str(s).map_err(io_error)?; + Ok(buf.len()) + } + + fn flush(&mut self) -> io::Result<()> { + Ok(()) + } +} + +impl fmt::Debug for Value { + /// Serializes a Hjson value into a string + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut wr = WriterFormatter { inner: f }; + super::ser::to_writer(&mut wr, self).map_err(|_| fmt::Error) + } +} + +impl fmt::Display for Value { + /// Serializes a Hjson value into a string + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut wr = WriterFormatter { inner: f }; + super::ser::to_writer(&mut wr, self).map_err(|_| fmt::Error) + } +} + +impl str::FromStr for Value { + type Err = Error; + fn from_str(s: &str) -> Result { + super::de::from_str(s) + } +} + +/// Create a `serde::Serializer` that serializes a `Serialize`e into a `Value`. +#[derive(Default)] +pub struct Serializer; + +impl ser::Serializer for Serializer { + type Ok = Value; + type Error = Error; + + type SerializeSeq = SerializeVec; + type SerializeTuple = SerializeVec; + type SerializeTupleStruct = SerializeVec; + type SerializeTupleVariant = SerializeTupleVariant; + type SerializeMap = SerializeMap; + type SerializeStruct = SerializeMap; + type SerializeStructVariant = SerializeStructVariant; + + #[inline] + fn serialize_bool(self, value: bool) -> Result { + Ok(Value::Bool(value)) + } + + #[inline] + fn serialize_i8(self, value: i8) -> Result { + self.serialize_i64(value as i64) + } + + #[inline] + fn serialize_i16(self, value: i16) -> Result { + self.serialize_i64(value as i64) + } + + #[inline] + fn serialize_i32(self, value: i32) -> Result { + self.serialize_i64(value as i64) + } + + fn serialize_i64(self, value: i64) -> Result { + let v = if value < 0 { + Value::I64(value) + } else { + Value::U64(value as u64) + }; + Ok(v) + } + + #[inline] + fn serialize_u8(self, value: u8) -> Result { + self.serialize_u64(value as u64) + } + + #[inline] + fn serialize_u16(self, value: u16) -> Result { + self.serialize_u64(value as u64) + } + + #[inline] + fn serialize_u32(self, value: u32) -> Result { + self.serialize_u64(value as u64) + } + + #[inline] + fn serialize_u64(self, value: u64) -> Result { + Ok(Value::U64(value)) + } + + #[inline] + fn serialize_f32(self, value: f32) -> Result { + self.serialize_f64(value as f64) + } + + #[inline] + fn serialize_f64(self, value: f64) -> Result { + Ok(Value::F64(value)) + } + + #[inline] + fn serialize_char(self, value: char) -> Result { + let mut s = String::new(); + s.push(value); + self.serialize_str(&s) + } + + #[inline] + fn serialize_str(self, value: &str) -> Result { + Ok(Value::String(String::from(value))) + } + + fn serialize_bytes(self, value: &[u8]) -> Result { + let mut state = self.serialize_seq(Some(value.len()))?; + for byte in value { + ser::SerializeSeq::serialize_element(&mut state, byte)?; + } + ser::SerializeSeq::end(state) + } + + #[inline] + fn serialize_unit(self) -> Result { + Ok(Value::Null) + } + + #[inline] + fn serialize_unit_struct(self, _name: &'static str) -> Result { + self.serialize_unit() + } + + #[inline] + fn serialize_unit_variant( + self, + _name: &'static str, + _variant_index: u32, + variant: &'static str, + ) -> Result { + self.serialize_str(variant) + } + + #[inline] + fn serialize_newtype_struct(self, _name: &'static str, value: &T) -> Result + where + T: ?Sized + ser::Serialize, + { + value.serialize(self) + } + + fn serialize_newtype_variant( + self, + _name: &'static str, + _variant_index: u32, + variant: &'static str, + value: &T, + ) -> Result + where + T: ?Sized + ser::Serialize, + { + let mut values = Map::new(); + values.insert(String::from(variant), to_value(&value)?); + Ok(Value::Object(values)) + } + + #[inline] + fn serialize_none(self) -> Result { + self.serialize_unit() + } + + #[inline] + fn serialize_some(self, value: &V) -> Result + where + V: ?Sized + ser::Serialize, + { + value.serialize(self) + } + + #[inline] + fn serialize_seq(self, len: Option) -> Result { + Ok(SerializeVec { + vec: Vec::with_capacity(len.unwrap_or(0)), + }) + } + + #[inline] + fn serialize_tuple(self, len: usize) -> Result { + self.serialize_seq(Some(len)) + } + + #[inline] + fn serialize_tuple_struct( + self, + _name: &'static str, + len: usize, + ) -> Result { + self.serialize_seq(Some(len)) + } + + #[inline] + fn serialize_tuple_variant( + self, + _name: &'static str, + _variant_index: u32, + variant: &'static str, + len: usize, + ) -> Result { + Ok(SerializeTupleVariant { + name: variant, + vec: Vec::with_capacity(len), + }) + } + + #[inline] + fn serialize_map(self, len: Option) -> Result { + Ok(SerializeMap { + map: map_with_capacity(len), + next_key: None, + }) + } + + #[inline] + fn serialize_struct( + self, + _name: &'static str, + len: usize, + ) -> Result { + self.serialize_map(Some(len)) + } + + #[inline] + fn serialize_struct_variant( + self, + _name: &'static str, + _variant_index: u32, + variant: &'static str, + len: usize, + ) -> Result { + Ok(SerializeStructVariant { + name: variant, + map: map_with_capacity(Some(len)), + }) + } +} + +#[doc(hidden)] +pub struct SerializeVec { + vec: Vec, +} + +#[doc(hidden)] +pub struct SerializeTupleVariant { + name: &'static str, + vec: Vec, +} + +#[doc(hidden)] +pub struct SerializeMap { + map: Map, + next_key: Option, +} + +#[doc(hidden)] +pub struct SerializeStructVariant { + name: &'static str, + map: Map, +} + +impl ser::SerializeSeq for SerializeVec { + type Ok = Value; + type Error = Error; + + fn serialize_element(&mut self, value: &T) -> Result<()> + where + T: ?Sized + ser::Serialize, + { + self.vec.push(to_value(&value)?); + Ok(()) + } + + fn end(self) -> Result { + Ok(Value::Array(self.vec)) + } +} + +impl ser::SerializeTuple for SerializeVec { + type Ok = Value; + type Error = Error; + + fn serialize_element(&mut self, value: &T) -> Result<()> + where + T: ?Sized + ser::Serialize, + { + ser::SerializeSeq::serialize_element(self, value) + } + + fn end(self) -> Result { + ser::SerializeSeq::end(self) + } +} + +impl ser::SerializeTupleStruct for SerializeVec { + type Ok = Value; + type Error = Error; + + fn serialize_field(&mut self, value: &T) -> Result<()> + where + T: ?Sized + ser::Serialize, + { + ser::SerializeSeq::serialize_element(self, value) + } + + fn end(self) -> Result { + ser::SerializeSeq::end(self) + } +} + +impl ser::SerializeTupleVariant for SerializeTupleVariant { + type Ok = Value; + type Error = Error; + + fn serialize_field(&mut self, value: &T) -> Result<()> + where + T: ?Sized + ser::Serialize, + { + self.vec.push(to_value(&value)?); + Ok(()) + } + + fn end(self) -> Result { + let mut object = Map::new(); + + object.insert(self.name.to_owned(), Value::Array(self.vec)); + + Ok(Value::Object(object)) + } +} + +impl ser::SerializeMap for SerializeMap { + type Ok = Value; + type Error = Error; + + fn serialize_key(&mut self, key: &T) -> Result<()> + where + T: ?Sized + ser::Serialize, + { + match to_value(key)? { + Value::String(s) => self.next_key = Some(s), + _ => return Err(Error::Syntax(ErrorCode::KeyMustBeAString, 0, 0)), + }; + Ok(()) + } + + fn serialize_value(&mut self, value: &T) -> Result<()> + where + T: ?Sized + ser::Serialize, + { + let key = self.next_key.take(); + // Panic because this indicates a bug in the program rather than an + // expected failure. + let key = key.expect("serialize_value called before serialize_key"); + self.map.insert(key, to_value(value)?); + Ok(()) + } + + fn end(self) -> Result { + Ok(Value::Object(self.map)) + } +} + +impl ser::SerializeStruct for SerializeMap { + type Ok = Value; + type Error = Error; + + fn serialize_field(&mut self, key: &'static str, value: &T) -> Result<()> + where + T: ?Sized + ser::Serialize, + { + ser::SerializeMap::serialize_entry(self, key, value) + } + + fn end(self) -> Result { + ser::SerializeMap::end(self) + } +} + +impl ser::SerializeStructVariant for SerializeStructVariant { + type Ok = Value; + type Error = Error; + + fn serialize_field(&mut self, key: &'static str, value: &T) -> Result<()> + where + T: ?Sized + ser::Serialize, + { + self.map.insert(key.to_owned(), to_value(&value)?); + Ok(()) + } + + fn end(self) -> Result { + let mut object = map_with_capacity(Some(1)); + + object.insert(self.name.to_owned(), Value::Object(self.map)); + + Ok(Value::Object(object)) + } +} + +impl<'de> de::Deserializer<'de> for Value { + type Error = Error; + + #[inline] + fn deserialize_any(self, visitor: V) -> Result + where + V: de::Visitor<'de>, + { + match self { + Value::Null => visitor.visit_unit(), + Value::Bool(v) => visitor.visit_bool(v), + Value::I64(v) => visitor.visit_i64(v), + Value::U64(v) => visitor.visit_u64(v), + Value::F64(v) => visitor.visit_f64(v), + Value::String(v) => visitor.visit_string(v), + Value::Array(v) => visitor.visit_seq(SeqDeserializer { + iter: v.into_iter(), + }), + Value::Object(v) => visitor.visit_map(MapDeserializer { + iter: v.into_iter(), + value: None, + }), + } + } + + #[inline] + fn deserialize_option(self, visitor: V) -> Result + where + V: de::Visitor<'de>, + { + match self { + Value::Null => visitor.visit_none(), + _ => visitor.visit_some(self), + } + } + + #[inline] + fn deserialize_enum( + self, + _name: &str, + _variants: &'static [&'static str], + visitor: V, + ) -> Result + where + V: de::Visitor<'de>, + { + let (variant, value) = match self { + Value::Object(value) => { + let mut iter = value.into_iter(); + let (variant, value) = match iter.next() { + Some(v) => v, + None => { + return Err(de::Error::invalid_type( + de::Unexpected::Map, + &"map with a single key", + )); + } + }; + // enums are encoded in json as maps with a single key:value pair + if iter.next().is_some() { + return Err(de::Error::invalid_type( + de::Unexpected::Map, + &"map with a single key", + )); + } + (variant, Some(value)) + } + Value::String(variant) => (variant, None), + val => { + return Err(de::Error::invalid_type( + val.as_unexpected(), + &"string or map", + )) + } + }; + + visitor.visit_enum(EnumDeserializer { variant, value }) + } + + #[inline] + fn deserialize_newtype_struct( + self, + _name: &'static str, + visitor: V, + ) -> Result + where + V: de::Visitor<'de>, + { + visitor.visit_newtype_struct(self) + } + + serde::forward_to_deserialize_any! { + bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string + bytes byte_buf unit unit_struct seq tuple + tuple_struct map struct identifier ignored_any + } +} + +struct EnumDeserializer { + variant: String, + value: Option, +} + +impl<'de> de::EnumAccess<'de> for EnumDeserializer { + type Error = Error; + + type Variant = VariantDeserializer; + + fn variant_seed(self, seed: V) -> Result<(V::Value, Self::Variant), Self::Error> + where + V: de::DeserializeSeed<'de>, + { + let variant = de::IntoDeserializer::into_deserializer(self.variant); + let visitor = VariantDeserializer { val: self.value }; + seed.deserialize(variant).map(|v| (v, visitor)) + } +} + +struct VariantDeserializer { + val: Option, +} + +impl<'de, 'a> de::VariantAccess<'de> for VariantDeserializer { + type Error = Error; + + fn unit_variant(self) -> Result<()> { + match self.val { + Some(val) => de::Deserialize::deserialize(val), + None => Ok(()), + } + } + + fn newtype_variant_seed(self, seed: T) -> Result + where + T: de::DeserializeSeed<'de>, + { + match self.val { + Some(value) => seed.deserialize(value), + None => Err(serde::de::Error::invalid_type( + de::Unexpected::UnitVariant, + &"newtype variant", + )), + } + } + + fn tuple_variant(self, _len: usize, visitor: V) -> Result + where + V: de::Visitor<'de>, + { + let val = self.val.expect("val is missing"); + if let Value::Array(fields) = val { + visitor.visit_seq(SeqDeserializer { + iter: fields.into_iter(), + }) + } else { + Err(de::Error::invalid_type(val.as_unexpected(), &visitor)) + } + } + + fn struct_variant(self, _fields: &'static [&'static str], visitor: V) -> Result + where + V: de::Visitor<'de>, + { + match self.val { + Some(Value::Object(fields)) => visitor.visit_map(MapDeserializer { + iter: fields.into_iter(), + value: None, + }), + Some(other) => Err(de::Error::invalid_type( + other.as_unexpected(), + &"struct variant", + )), + None => Err(de::Error::invalid_type( + de::Unexpected::UnitVariant, + &"struct variant", + )), + } + } +} + +struct SeqDeserializer { + iter: vec::IntoIter, +} + +impl<'de> de::SeqAccess<'de> for SeqDeserializer { + type Error = Error; + + fn next_element_seed(&mut self, seed: T) -> Result, Self::Error> + where + T: de::DeserializeSeed<'de>, + { + match self.iter.next() { + Some(value) => Ok(Some(seed.deserialize(value)?)), + None => Ok(None), + } + } + + fn size_hint(&self) -> Option { + match self.iter.size_hint() { + (lower, Some(upper)) if lower == upper => Some(upper), + _ => None, + } + } +} + +struct MapDeserializer { + iter: MapIntoIter, + value: Option, +} + +impl<'de, 'a> de::MapAccess<'de> for MapDeserializer { + type Error = Error; + + fn next_key_seed(&mut self, seed: K) -> Result> + where + K: de::DeserializeSeed<'de>, + { + match self.iter.next() { + Some((key, value)) => { + self.value = Some(value); + Ok(Some(seed.deserialize(Value::String(key))?)) + } + None => Ok(None), + } + } + + fn next_value_seed(&mut self, seed: V) -> Result + where + V: de::DeserializeSeed<'de>, + { + let value = self.value.take().expect("value is missing"); + seed.deserialize(value) + } + + fn size_hint(&self) -> Option { + match self.iter.size_hint() { + (lower, Some(upper)) if lower == upper => Some(upper), + _ => None, + } + } +} + +pub fn to_value(value: &T) -> Result +where + T: ser::Serialize, +{ + value.serialize(Serializer) +} + +/// Shortcut function to decode a Hjson `Value` into a `T` +pub fn from_value(value: Value) -> Result +where + T: de::DeserializeOwned, +{ + de::Deserialize::deserialize(value) +} + +/// A trait for converting values to Hjson +pub trait ToJson { + /// Converts the value of `self` to an instance of Hjson + fn to_json(&self) -> Value; +} + +impl ToJson for T +where + T: ser::Serialize, +{ + fn to_json(&self) -> Value { + to_value(&self).expect("failed to serialize") + } +} + +#[cfg(test)] +mod test { + use super::Value; + use crate::de::from_str; + + #[test] + fn number_deserialize() { + let v: Value = from_str("{\"a\":1}").unwrap(); + let vo = v.as_object().unwrap(); + assert_eq!(vo["a"].as_u64().unwrap(), 1); + + let v: Value = from_str("{\"a\":-1}").unwrap(); + let vo = v.as_object().unwrap(); + assert_eq!(vo["a"].as_i64().unwrap(), -1); + + let v: Value = from_str("{\"a\":1.1}").unwrap(); + let vo = v.as_object().unwrap(); + assert!(vo["a"].as_f64().unwrap() - 1.1 < std::f64::EPSILON); + + let v: Value = from_str("{\"a\":-1.1}").unwrap(); + let vo = v.as_object().unwrap(); + assert!(vo["a"].as_f64().unwrap() + 1.1 > -(std::f64::EPSILON)); + + let v: Value = from_str("{\"a\":1e6}").unwrap(); + let vo = v.as_object().unwrap(); + assert!(vo["a"].as_f64().unwrap() - 1e6 < std::f64::EPSILON); + + let v: Value = from_str("{\"a\":-1e6}").unwrap(); + let vo = v.as_object().unwrap(); + assert!(vo["a"].as_f64().unwrap() + 1e6 > -(std::f64::EPSILON)); + } +} diff --git a/crates/nu-json/tests/main.rs b/crates/nu-json/tests/main.rs new file mode 100644 index 000000000..28c264dc9 --- /dev/null +++ b/crates/nu-json/tests/main.rs @@ -0,0 +1,212 @@ +// FIXME: re-enable tests +/* +use nu_json::Value; +use regex::Regex; +use std::fs; +use std::io; +use std::path::{Path, PathBuf}; + +fn txt(text: &str) -> String { + let out = String::from_utf8_lossy(text.as_bytes()); + + #[cfg(windows)] + { + out.replace("\r\n", "").replace("\n", "") + } + + #[cfg(not(windows))] + { + out.to_string() + } +} + +fn hjson_expectations() -> PathBuf { + let assets = nu_test_support::fs::assets().join("nu_json"); + + nu_path::canonicalize(assets.clone()).unwrap_or_else(|e| { + panic!( + "Couldn't canonicalize hjson assets path {}: {:?}", + assets.display(), + e + ) + }) +} + +fn get_test_content(name: &str) -> io::Result { + let expectations = hjson_expectations(); + + let mut p = format!("{}/{}_test.hjson", expectations.display(), name); + + if !Path::new(&p).exists() { + p = format!("{}/{}_test.json", expectations.display(), name); + } + + fs::read_to_string(&p) +} + +fn get_result_content(name: &str) -> io::Result<(String, String)> { + let expectations = hjson_expectations(); + + let p1 = format!("{}/{}_result.json", expectations.display(), name); + let p2 = format!("{}/{}_result.hjson", expectations.display(), name); + + Ok((fs::read_to_string(&p1)?, fs::read_to_string(&p2)?)) +} + +macro_rules! run_test { + // {{ is a workaround for rust stable + ($v: ident, $list: expr, $fix: expr) => {{ + let name = stringify!($v); + $list.push(format!("{}_test", name)); + println!("- running {}", name); + let should_fail = name.starts_with("fail"); + let test_content = get_test_content(name).unwrap(); + let data: nu_json::Result = nu_json::from_str(&test_content); + assert!(should_fail == data.is_err()); + + if !should_fail { + let udata = data.unwrap(); + let (rjson, rhjson) = get_result_content(name).unwrap(); + let rjson = txt(&rjson); + let rhjson = txt(&rhjson); + let actual_hjson = nu_json::to_string(&udata).unwrap(); + let actual_hjson = txt(&actual_hjson); + let actual_json = $fix(serde_json::to_string_pretty(&udata).unwrap()); + let actual_json = txt(&actual_json); + if rhjson != actual_hjson { + println!( + "{:?}\n---hjson expected\n{}\n---hjson actual\n{}\n---\n", + name, rhjson, actual_hjson + ); + } + if rjson != actual_json { + println!( + "{:?}\n---json expected\n{}\n---json actual\n{}\n---\n", + name, rjson, actual_json + ); + } + assert!(rhjson == actual_hjson && rjson == actual_json); + } + }}; +} + +// add fixes where rust's json differs from javascript + +fn std_fix(json: String) -> String { + // serde_json serializes integers with a superfluous .0 suffix + let re = Regex::new(r"(?m)(?P\d)\.0(?P,?)$").unwrap(); + re.replace_all(&json, "$d$s").to_string() +} + +fn fix_kan(json: String) -> String { + std_fix(json).replace(" -0,", " 0,") +} + +fn fix_pass1(json: String) -> String { + std_fix(json) + .replace("1.23456789e34", "1.23456789e+34") + .replace("2.3456789012e76", "2.3456789012e+76") +} + +#[test] +fn test_hjson() { + let mut done: Vec = Vec::new(); + + println!(); + run_test!(charset, done, std_fix); + run_test!(comments, done, std_fix); + run_test!(empty, done, std_fix); + run_test!(failCharset1, done, std_fix); + run_test!(failJSON02, done, std_fix); + run_test!(failJSON05, done, std_fix); + run_test!(failJSON06, done, std_fix); + run_test!(failJSON07, done, std_fix); + run_test!(failJSON08, done, std_fix); + run_test!(failJSON10, done, std_fix); + run_test!(failJSON11, done, std_fix); + run_test!(failJSON12, done, std_fix); + run_test!(failJSON13, done, std_fix); + run_test!(failJSON14, done, std_fix); + run_test!(failJSON15, done, std_fix); + run_test!(failJSON16, done, std_fix); + run_test!(failJSON17, done, std_fix); + run_test!(failJSON19, done, std_fix); + run_test!(failJSON20, done, std_fix); + run_test!(failJSON21, done, std_fix); + run_test!(failJSON22, done, std_fix); + run_test!(failJSON23, done, std_fix); + run_test!(failJSON24, done, std_fix); + run_test!(failJSON26, done, std_fix); + run_test!(failJSON28, done, std_fix); + run_test!(failJSON29, done, std_fix); + run_test!(failJSON30, done, std_fix); + run_test!(failJSON31, done, std_fix); + run_test!(failJSON32, done, std_fix); + run_test!(failJSON33, done, std_fix); + run_test!(failJSON34, done, std_fix); + run_test!(failKey1, done, std_fix); + run_test!(failKey2, done, std_fix); + run_test!(failKey3, done, std_fix); + run_test!(failKey4, done, std_fix); + run_test!(failMLStr1, done, std_fix); + run_test!(failObj1, done, std_fix); + run_test!(failObj2, done, std_fix); + run_test!(failObj3, done, std_fix); + run_test!(failStr1a, done, std_fix); + run_test!(failStr1b, done, std_fix); + run_test!(failStr1c, done, std_fix); + run_test!(failStr1d, done, std_fix); + run_test!(failStr2a, done, std_fix); + run_test!(failStr2b, done, std_fix); + run_test!(failStr2c, done, std_fix); + run_test!(failStr2d, done, std_fix); + run_test!(failStr3a, done, std_fix); + run_test!(failStr3b, done, std_fix); + run_test!(failStr3c, done, std_fix); + run_test!(failStr3d, done, std_fix); + run_test!(failStr4a, done, std_fix); + run_test!(failStr4b, done, std_fix); + run_test!(failStr4c, done, std_fix); + run_test!(failStr4d, done, std_fix); + run_test!(failStr5a, done, std_fix); + run_test!(failStr5b, done, std_fix); + run_test!(failStr5c, done, std_fix); + run_test!(failStr5d, done, std_fix); + run_test!(failStr6a, done, std_fix); + run_test!(failStr6b, done, std_fix); + run_test!(failStr6c, done, std_fix); + run_test!(failStr6d, done, std_fix); + run_test!(kan, done, fix_kan); + run_test!(keys, done, std_fix); + run_test!(oa, done, std_fix); + run_test!(pass1, done, fix_pass1); + run_test!(pass2, done, std_fix); + run_test!(pass3, done, std_fix); + run_test!(pass4, done, std_fix); + run_test!(passSingle, done, std_fix); + run_test!(root, done, std_fix); + run_test!(stringify1, done, std_fix); + run_test!(strings, done, std_fix); + run_test!(trail, done, std_fix); + + // check if we include all assets + let paths = fs::read_dir(hjson_expectations()).unwrap(); + + let all = paths + .map(|item| String::from(item.unwrap().path().file_stem().unwrap().to_str().unwrap())) + .filter(|x| x.contains("_test")); + + let missing = all + .into_iter() + .filter(|x| done.iter().find(|y| &x == y) == None) + .collect::>(); + + if !missing.is_empty() { + for item in missing { + println!("missing: {}", item); + } + panic!(); + } +} + +*/ diff --git a/crates/nu-path/Cargo.toml b/crates/nu-path/Cargo.toml new file mode 100644 index 000000000..e2fc2b1ab --- /dev/null +++ b/crates/nu-path/Cargo.toml @@ -0,0 +1,12 @@ +[package] +authors = ["The Nu Project Contributors"] +description = "Path handling library for Nushell" +edition = "2018" +license = "MIT" +name = "nu-path" +version = "0.37.1" + +[dependencies] +dirs-next = "2.0.0" +dunce = "1.0.1" + diff --git a/crates/nu-path/README.md b/crates/nu-path/README.md new file mode 100644 index 000000000..382fd687b --- /dev/null +++ b/crates/nu-path/README.md @@ -0,0 +1,3 @@ +# nu-path + +This crate takes care of path handling in Nushell, such as canonicalization and component expansion, as well as other path-related utilities. diff --git a/crates/nu-path/src/dots.rs b/crates/nu-path/src/dots.rs new file mode 100644 index 000000000..b6025c479 --- /dev/null +++ b/crates/nu-path/src/dots.rs @@ -0,0 +1,259 @@ +use std::path::{is_separator, Component, Path, PathBuf}; + +const EXPAND_STR: &str = if cfg!(windows) { r"..\" } else { "../" }; + +fn handle_dots_push(string: &mut String, count: u8) { + if count < 1 { + return; + } + + if count == 1 { + string.push('.'); + return; + } + + for _ in 0..(count - 1) { + string.push_str(EXPAND_STR); + } + + string.pop(); // remove last '/' +} + +/// Expands any occurence of more than two dots into a sequence of ../ (or ..\ on windows), e.g., +/// "..." into "../..", "...." into "../../../", etc. +pub fn expand_ndots(path: impl AsRef) -> PathBuf { + // Check if path is valid UTF-8 and if not, return it as it is to avoid breaking it via string + // conversion. + let path_str = match path.as_ref().to_str() { + Some(s) => s, + None => return path.as_ref().into(), + }; + + // find if we need to expand any >2 dot paths and early exit if not + let mut dots_count = 0u8; + let ndots_present = { + for chr in path_str.chars() { + if chr == '.' { + dots_count += 1; + } else { + if is_separator(chr) && (dots_count > 2) { + // this path component had >2 dots + break; + } + + dots_count = 0; + } + } + + dots_count > 2 + }; + + if !ndots_present { + return path.as_ref().into(); + } + + let mut dots_count = 0u8; + let mut expanded = String::new(); + for chr in path_str.chars() { + if chr == '.' { + dots_count += 1; + } else { + if is_separator(chr) { + // check for dots expansion only at path component boundaries + handle_dots_push(&mut expanded, dots_count); + dots_count = 0; + } else { + // got non-dot within path component => do not expand any dots + while dots_count > 0 { + expanded.push('.'); + dots_count -= 1; + } + } + expanded.push(chr); + } + } + + handle_dots_push(&mut expanded, dots_count); + + expanded.into() +} + +/// Expand "." and ".." into nothing and parent directory, respectively. +pub fn expand_dots(path: impl AsRef) -> PathBuf { + let path = path.as_ref(); + + // Early-exit if path does not contain '.' or '..' + if !path + .components() + .any(|c| std::matches!(c, Component::CurDir | Component::ParentDir)) + { + return path.into(); + } + + let mut result = PathBuf::with_capacity(path.as_os_str().len()); + + // Only pop/skip path elements if the previous one was an actual path element + let prev_is_normal = |p: &Path| -> bool { + p.components() + .next_back() + .map(|c| std::matches!(c, Component::Normal(_))) + .unwrap_or(false) + }; + + path.components().for_each(|component| match component { + Component::ParentDir if prev_is_normal(&result) => { + result.pop(); + } + Component::CurDir if prev_is_normal(&result) => {} + _ => result.push(component), + }); + + dunce::simplified(&result).to_path_buf() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn expand_two_dots() { + let path = Path::new("/foo/bar/.."); + + assert_eq!( + PathBuf::from("/foo"), // missing path + expand_dots(path) + ); + } + + #[test] + fn expand_dots_with_curdir() { + let path = Path::new("/foo/./bar/./baz"); + + assert_eq!(PathBuf::from("/foo/bar/baz"), expand_dots(path)); + } + + fn check_ndots_expansion(expected: &str, s: &str) { + let expanded = expand_ndots(Path::new(s)); + assert_eq!(Path::new(expected), &expanded); + } + + // common tests + #[test] + fn string_without_ndots() { + check_ndots_expansion("../hola", "../hola"); + } + + #[test] + fn string_with_three_ndots_and_chars() { + check_ndots_expansion("a...b", "a...b"); + } + + #[test] + fn string_with_two_ndots_and_chars() { + check_ndots_expansion("a..b", "a..b"); + } + + #[test] + fn string_with_one_dot_and_chars() { + check_ndots_expansion("a.b", "a.b"); + } + + #[test] + fn expand_dots_double_dots_no_change() { + // Can't resolve this as we don't know our parent dir + assert_eq!(Path::new(".."), expand_dots(Path::new(".."))); + } + + #[test] + fn expand_dots_single_dot_no_change() { + // Can't resolve this as we don't know our current dir + assert_eq!(Path::new("."), expand_dots(Path::new("."))); + } + + #[test] + fn expand_dots_multi_single_dots_no_change() { + assert_eq!(Path::new("././."), expand_dots(Path::new("././."))); + } + + #[test] + fn expand_multi_double_dots_no_change() { + assert_eq!(Path::new("../../../"), expand_dots(Path::new("../../../"))); + } + + #[test] + fn expand_dots_no_change_with_dirs() { + // Can't resolve this as we don't know our parent dir + assert_eq!( + Path::new("../../../dir1/dir2/"), + expand_dots(Path::new("../../../dir1/dir2")) + ); + } + + #[test] + fn expand_dots_simple() { + assert_eq!(Path::new("/foo"), expand_dots(Path::new("/foo/bar/.."))); + } + + #[test] + fn expand_dots_complex() { + assert_eq!( + Path::new("/test"), + expand_dots(Path::new("/foo/./bar/../../test/././test2/../")) + ); + } + + #[cfg(windows)] + mod windows { + use super::*; + + #[test] + fn string_with_three_ndots() { + check_ndots_expansion(r"..\..", "..."); + } + + #[test] + fn string_with_mixed_ndots_and_chars() { + check_ndots_expansion( + r"a...b/./c..d/../e.f/..\..\..//.", + "a...b/./c..d/../e.f/....//.", + ); + } + + #[test] + fn string_with_three_ndots_and_final_slash() { + check_ndots_expansion(r"..\../", ".../"); + } + + #[test] + fn string_with_three_ndots_and_garbage() { + check_ndots_expansion(r"ls ..\../ garbage.*[", "ls .../ garbage.*["); + } + } + + #[cfg(not(windows))] + mod non_windows { + use super::*; + #[test] + fn string_with_three_ndots() { + check_ndots_expansion(r"../..", "..."); + } + + #[test] + fn string_with_mixed_ndots_and_chars() { + check_ndots_expansion( + "a...b/./c..d/../e.f/../../..//.", + "a...b/./c..d/../e.f/....//.", + ); + } + + #[test] + fn string_with_three_ndots_and_final_slash() { + check_ndots_expansion("../../", ".../"); + } + + #[test] + fn string_with_three_ndots_and_garbage() { + check_ndots_expansion("ls ../../ garbage.*[", "ls .../ garbage.*["); + } + } +} diff --git a/crates/nu-path/src/expansions.rs b/crates/nu-path/src/expansions.rs new file mode 100644 index 000000000..3393a5793 --- /dev/null +++ b/crates/nu-path/src/expansions.rs @@ -0,0 +1,75 @@ +use std::io; +use std::path::{Path, PathBuf}; + +use super::dots::{expand_dots, expand_ndots}; +use super::tilde::expand_tilde; + +// Join a path relative to another path. Paths starting with tilde are considered as absolute. +fn join_path_relative(path: P, relative_to: Q) -> PathBuf +where + P: AsRef, + Q: AsRef, +{ + let path = path.as_ref(); + let relative_to = relative_to.as_ref(); + + if path == Path::new(".") { + // Joining a Path with '.' appends a '.' at the end, making the prompt + // more ugly - so we don't do anything, which should result in an equal + // path on all supported systems. + relative_to.into() + } else if path.starts_with("~") { + // do not end up with "/some/path/~" + path.into() + } else { + relative_to.join(path) + } +} + +/// Resolve all symbolic links and all components (tilde, ., .., ...+) and return the path in its +/// absolute form. +/// +/// Fails under the same conditions as +/// [std::fs::canonicalize](https://doc.rust-lang.org/std/fs/fn.canonicalize.html). +pub fn canonicalize(path: impl AsRef) -> io::Result { + let path = expand_tilde(path); + let path = expand_ndots(path); + + dunce::canonicalize(path) +} + +/// Same as canonicalize() but the input path is specified relative to another path +pub fn canonicalize_with(path: P, relative_to: Q) -> io::Result +where + P: AsRef, + Q: AsRef, +{ + let path = join_path_relative(path, relative_to); + + canonicalize(path) +} + +/// Resolve only path components (tilde, ., .., ...+), if possible. +/// +/// The function works in a "best effort" mode: It does not fail but rather returns the unexpanded +/// version if the expansion is not possible. +/// +/// Furthermore, unlike canonicalize(), it does not use sys calls (such as readlink). +/// +/// Does not convert to absolute form nor does it resolve symlinks. +pub fn expand_path(path: impl AsRef) -> PathBuf { + let path = expand_tilde(path); + let path = expand_ndots(path); + expand_dots(path) +} + +/// Same as expand_path() but the input path is specified relative to another path +pub fn expand_path_with(path: P, relative_to: Q) -> PathBuf +where + P: AsRef, + Q: AsRef, +{ + let path = join_path_relative(path, relative_to); + + expand_path(path) +} diff --git a/crates/nu-path/src/lib.rs b/crates/nu-path/src/lib.rs new file mode 100644 index 000000000..9606bc15c --- /dev/null +++ b/crates/nu-path/src/lib.rs @@ -0,0 +1,8 @@ +mod dots; +mod expansions; +mod tilde; +mod util; + +pub use expansions::{canonicalize, canonicalize_with, expand_path, expand_path_with}; +pub use tilde::expand_tilde; +pub use util::trim_trailing_slash; diff --git a/crates/nu-path/src/tilde.rs b/crates/nu-path/src/tilde.rs new file mode 100644 index 000000000..e1c7ec56a --- /dev/null +++ b/crates/nu-path/src/tilde.rs @@ -0,0 +1,85 @@ +use std::path::{Path, PathBuf}; + +fn expand_tilde_with(path: impl AsRef, home: Option) -> PathBuf { + let path = path.as_ref(); + + if !path.starts_with("~") { + return path.into(); + } + + match home { + None => path.into(), + Some(mut h) => { + if h == Path::new("/") { + // Corner case: `h` is a root directory; + // don't prepend extra `/`, just drop the tilde. + path.strip_prefix("~").unwrap_or(path).into() + } else { + if let Ok(p) = path.strip_prefix("~/") { + h.push(p) + } + h + } + } + } +} + +/// Expand tilde ("~") into a home directory if it is the first path component +pub fn expand_tilde(path: impl AsRef) -> PathBuf { + // TODO: Extend this to work with "~user" style of home paths + expand_tilde_with(path, dirs_next::home_dir()) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn check_expanded(s: &str) { + let home = Path::new("/home"); + let buf = Some(PathBuf::from(home)); + assert!(expand_tilde_with(Path::new(s), buf).starts_with(&home)); + + // Tests the special case in expand_tilde for "/" as home + let home = Path::new("/"); + let buf = Some(PathBuf::from(home)); + assert!(!expand_tilde_with(Path::new(s), buf).starts_with("//")); + } + + fn check_not_expanded(s: &str) { + let home = PathBuf::from("/home"); + let expanded = expand_tilde_with(Path::new(s), Some(home)); + assert!(expanded == Path::new(s)); + } + + #[test] + fn string_with_tilde() { + check_expanded("~"); + } + + #[test] + fn string_with_tilde_forward_slash() { + check_expanded("~/test/"); + } + + #[test] + fn string_with_tilde_double_forward_slash() { + check_expanded("~//test/"); + } + + #[test] + fn does_not_expand_tilde_if_tilde_is_not_first_character() { + check_not_expanded("1~1"); + } + + #[cfg(windows)] + #[test] + fn string_with_tilde_backslash() { + check_expanded("~\\test/test2/test3"); + } + + #[cfg(windows)] + #[test] + fn string_with_double_tilde_backslash() { + check_expanded("~\\\\test\\test2/test3"); + } +} diff --git a/crates/nu-path/src/util.rs b/crates/nu-path/src/util.rs new file mode 100644 index 000000000..63351e6ae --- /dev/null +++ b/crates/nu-path/src/util.rs @@ -0,0 +1,4 @@ +/// Trim trailing path separator from a string +pub fn trim_trailing_slash(s: &str) -> &str { + s.trim_end_matches(std::path::is_separator) +} diff --git a/crates/nu-path/tests/mod.rs b/crates/nu-path/tests/mod.rs new file mode 100644 index 000000000..83c8c0aa0 --- /dev/null +++ b/crates/nu-path/tests/mod.rs @@ -0,0 +1 @@ +mod util; diff --git a/crates/nu-path/tests/util.rs b/crates/nu-path/tests/util.rs new file mode 100644 index 000000000..601d9dd43 --- /dev/null +++ b/crates/nu-path/tests/util.rs @@ -0,0 +1,45 @@ +use nu_path::trim_trailing_slash; +use std::path::MAIN_SEPARATOR; + +/// Helper function that joins string literals with '/' or '\', based on the host OS +fn join_path_sep(pieces: &[&str]) -> String { + let sep_string = String::from(MAIN_SEPARATOR); + pieces.join(&sep_string) +} + +#[test] +fn trims_trailing_slash_without_trailing_slash() { + let path = join_path_sep(&["some", "path"]); + + let actual = trim_trailing_slash(&path); + + assert_eq!(actual, &path) +} + +#[test] +fn trims_trailing_slash() { + let path = join_path_sep(&["some", "path", ""]); + + let actual = trim_trailing_slash(&path); + let expected = join_path_sep(&["some", "path"]); + + assert_eq!(actual, &expected) +} + +#[test] +fn trims_many_trailing_slashes() { + let path = join_path_sep(&["some", "path", "", "", "", ""]); + + let actual = trim_trailing_slash(&path); + let expected = join_path_sep(&["some", "path"]); + + assert_eq!(actual, &expected) +} + +#[test] +fn trims_trailing_slash_empty() { + let path = String::from(MAIN_SEPARATOR); + let actual = trim_trailing_slash(&path); + + assert_eq!(actual, "") +} diff --git a/crates/nu-protocol/Cargo.toml b/crates/nu-protocol/Cargo.toml index 1d12bba4d..d2f3a7f91 100644 --- a/crates/nu-protocol/Cargo.toml +++ b/crates/nu-protocol/Cargo.toml @@ -8,3 +8,4 @@ edition = "2018" [dependencies] thiserror = "1.0.29" miette = "3.0.0" +serde = "1.0.130" \ No newline at end of file diff --git a/crates/nu-protocol/src/ast/operator.rs b/crates/nu-protocol/src/ast/operator.rs index edd4f56fc..690390888 100644 --- a/crates/nu-protocol/src/ast/operator.rs +++ b/crates/nu-protocol/src/ast/operator.rs @@ -1,8 +1,9 @@ use crate::Span; +use serde::{Deserialize, Serialize}; use std::fmt::Display; -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub enum Operator { Equal, NotEqual, @@ -49,7 +50,7 @@ impl Display for Operator { } } -#[derive(Debug, Copy, Clone, PartialEq)] +#[derive(Debug, Copy, Clone, PartialEq, Serialize, Deserialize)] pub enum RangeInclusion { Inclusive, RightExclusive, diff --git a/crates/nu-protocol/src/shell_error.rs b/crates/nu-protocol/src/shell_error.rs index ab8a735aa..90c6b082c 100644 --- a/crates/nu-protocol/src/shell_error.rs +++ b/crates/nu-protocol/src/shell_error.rs @@ -1,9 +1,10 @@ use miette::Diagnostic; +use serde::{Deserialize, Serialize}; use thiserror::Error; use crate::{ast::Operator, Span, Type}; -#[derive(Debug, Clone, Error, Diagnostic)] +#[derive(Debug, Clone, Error, Diagnostic, Serialize, Deserialize)] pub enum ShellError { #[error("Type mismatch during operation.")] #[diagnostic(code(nu::shell::type_mismatch), url(docsrs))] diff --git a/crates/nu-protocol/src/span.rs b/crates/nu-protocol/src/span.rs index 0a43a1239..e3a190a8c 100644 --- a/crates/nu-protocol/src/span.rs +++ b/crates/nu-protocol/src/span.rs @@ -1,6 +1,7 @@ use miette::SourceSpan; +use serde::{Deserialize, Serialize}; -#[derive(Clone, Copy, Debug, PartialEq, Eq)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)] pub struct Span { pub start: usize, pub end: usize, diff --git a/crates/nu-protocol/src/ty.rs b/crates/nu-protocol/src/ty.rs index 6338a6e47..c11bafaca 100644 --- a/crates/nu-protocol/src/ty.rs +++ b/crates/nu-protocol/src/ty.rs @@ -1,6 +1,8 @@ +use serde::{Deserialize, Serialize}; + use std::fmt::Display; -#[derive(Clone, Debug, PartialEq, Eq)] +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] pub enum Type { Int, Float, diff --git a/crates/nu-protocol/src/value/mod.rs b/crates/nu-protocol/src/value/mod.rs index ce1a18a74..76f5ab35a 100644 --- a/crates/nu-protocol/src/value/mod.rs +++ b/crates/nu-protocol/src/value/mod.rs @@ -4,6 +4,7 @@ mod stream; pub use range::*; pub use row::*; +use serde::{Deserialize, Serialize}; pub use stream::*; use std::fmt::Debug; @@ -14,7 +15,7 @@ use crate::{span, BlockId, Span, Type}; use crate::ShellError; /// Core structured values that pass through the pipeline in engine-q -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Serialize, Deserialize)] pub enum Value { Bool { val: bool, diff --git a/crates/nu-protocol/src/value/range.rs b/crates/nu-protocol/src/value/range.rs index 80eb56882..38fb286ac 100644 --- a/crates/nu-protocol/src/value/range.rs +++ b/crates/nu-protocol/src/value/range.rs @@ -1,3 +1,4 @@ +use serde::{Deserialize, Serialize}; use std::cmp::Ordering; use crate::{ @@ -5,7 +6,7 @@ use crate::{ *, }; -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub struct Range { pub from: Value, pub incr: Value, diff --git a/crates/nu-protocol/src/value/stream.rs b/crates/nu-protocol/src/value/stream.rs index ca56f39f0..13222a653 100644 --- a/crates/nu-protocol/src/value/stream.rs +++ b/crates/nu-protocol/src/value/stream.rs @@ -1,6 +1,8 @@ use crate::*; use std::{cell::RefCell, fmt::Debug, rc::Rc}; +use serde::{Deserialize, Serialize}; + #[derive(Clone)] pub struct ValueStream(pub Rc>>); @@ -35,6 +37,24 @@ impl Iterator for ValueStream { } } +impl Serialize for ValueStream { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + todo!() + } +} + +impl<'de> Deserialize<'de> for ValueStream { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + todo!() + } +} + pub trait IntoValueStream { fn into_value_stream(self) -> ValueStream; } diff --git a/src/tests.rs b/src/tests.rs index 22b33e632..f47daaee2 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -382,3 +382,17 @@ fn module_imports_5() -> TestResult { "3", ) } + +#[test] +fn from_json_1() -> TestResult { + run_test(r#"('{"name": "Fred"}' | from json).name"#, "Fred") +} + +#[test] +fn from_json_2() -> TestResult { + run_test( + r#"('{"name": "Fred"} + {"name": "Sally"}' | from json -o).name.1"#, + "Sally", + ) +} From db3e9efc4bc2dd21531f69c54d650003f9e1bc46 Mon Sep 17 00:00:00 2001 From: JT Date: Fri, 1 Oct 2021 18:20:25 +1300 Subject: [PATCH 2/2] fix warnings --- crates/nu-protocol/src/value/stream.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/crates/nu-protocol/src/value/stream.rs b/crates/nu-protocol/src/value/stream.rs index 13222a653..f42349bf9 100644 --- a/crates/nu-protocol/src/value/stream.rs +++ b/crates/nu-protocol/src/value/stream.rs @@ -38,19 +38,21 @@ impl Iterator for ValueStream { } impl Serialize for ValueStream { - fn serialize(&self, serializer: S) -> Result + fn serialize(&self, _serializer: S) -> Result where S: serde::Serializer, { + // FIXME: implement these todo!() } } impl<'de> Deserialize<'de> for ValueStream { - fn deserialize(deserializer: D) -> Result + fn deserialize(_deserializer: D) -> Result where D: serde::Deserializer<'de>, { + // FIXME: implement these todo!() } }