mirror of
https://github.com/nushell/nushell.git
synced 2024-11-22 08:23:24 +01:00
Strict JSON parsing (#11592)
# Description Adds the `--strict` flag for `from json` which will try to parse text while following the exact JSON specification (e.g., no comments or trailing commas allowed). Fixes issue #11548.
This commit is contained in:
parent
6530403ff8
commit
4e0a65c822
1
Cargo.lock
generated
1
Cargo.lock
generated
@ -5104,6 +5104,7 @@ version = "1.0.112"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "4d1bd37ce2324cf3bf85e5a25f96eb4baf0d5aa6eba43e7ae8958870c4ec48ed"
|
checksum = "4d1bd37ce2324cf3bf85e5a25f96eb4baf0d5aa6eba43e7ae8958870c4ec48ed"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"indexmap",
|
||||||
"itoa",
|
"itoa",
|
||||||
"ryu",
|
"ryu",
|
||||||
"serde",
|
"serde",
|
||||||
|
@ -75,7 +75,7 @@ roxmltree = "0.18"
|
|||||||
rusqlite = { version = "0.29", features = ["bundled", "backup", "chrono"], optional = true }
|
rusqlite = { version = "0.29", features = ["bundled", "backup", "chrono"], optional = true }
|
||||||
same-file = "1.0"
|
same-file = "1.0"
|
||||||
serde = { version = "1.0", features = ["derive"] }
|
serde = { version = "1.0", features = ["derive"] }
|
||||||
serde_json = "1.0"
|
serde_json = { version = "1.0", features = ["preserve_order"] }
|
||||||
serde_urlencoded = "0.7"
|
serde_urlencoded = "0.7"
|
||||||
serde_yaml = "0.9"
|
serde_yaml = "0.9"
|
||||||
sha2 = "0.10"
|
sha2 = "0.10"
|
||||||
|
@ -22,6 +22,7 @@ impl Command for FromJson {
|
|||||||
Signature::build("from json")
|
Signature::build("from json")
|
||||||
.input_output_types(vec![(Type::String, Type::Any)])
|
.input_output_types(vec![(Type::String, Type::Any)])
|
||||||
.switch("objects", "treat each line as a separate value", Some('o'))
|
.switch("objects", "treat each line as a separate value", Some('o'))
|
||||||
|
.switch("strict", "follow the json specification exactly", Some('s'))
|
||||||
.category(Category::Formats)
|
.category(Category::Formats)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -42,6 +43,14 @@ impl Command for FromJson {
|
|||||||
"b" => Value::test_list(vec![Value::test_int(1), Value::test_int(2)]),
|
"b" => Value::test_list(vec![Value::test_int(1), Value::test_int(2)]),
|
||||||
})),
|
})),
|
||||||
},
|
},
|
||||||
|
Example {
|
||||||
|
example: r#"'{ "a": 1, "b": 2 }' | from json -s"#,
|
||||||
|
description: "Parse json strictly which will error on comments and trailing commas",
|
||||||
|
result: Some(Value::test_record(record! {
|
||||||
|
"a" => Value::test_int(1),
|
||||||
|
"b" => Value::test_int(2),
|
||||||
|
})),
|
||||||
|
},
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -59,52 +68,61 @@ impl Command for FromJson {
|
|||||||
return Ok(PipelineData::new_with_metadata(metadata, span));
|
return Ok(PipelineData::new_with_metadata(metadata, span));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let strict = call.has_flag(engine_state, stack, "strict")?;
|
||||||
|
|
||||||
// TODO: turn this into a structured underline of the nu_json error
|
// TODO: turn this into a structured underline of the nu_json error
|
||||||
if call.has_flag(engine_state, stack, "objects")? {
|
if call.has_flag(engine_state, stack, "objects")? {
|
||||||
let converted_lines: Vec<Value> = string_input
|
let lines = string_input.lines().filter(|line| !line.trim().is_empty());
|
||||||
.lines()
|
|
||||||
.filter_map(move |x| {
|
let converted_lines: Vec<_> = if strict {
|
||||||
if x.trim() == "" {
|
lines
|
||||||
None
|
.map(|line| {
|
||||||
} else {
|
convert_string_to_value_strict(line, span)
|
||||||
match convert_string_to_value(x.to_string(), span) {
|
.unwrap_or_else(|err| Value::error(err, span))
|
||||||
Ok(v) => Some(v),
|
})
|
||||||
Err(error) => Some(Value::error(error, span)),
|
.collect()
|
||||||
}
|
} else {
|
||||||
}
|
lines
|
||||||
})
|
.map(|line| {
|
||||||
.collect();
|
convert_string_to_value(line, span)
|
||||||
|
.unwrap_or_else(|err| Value::error(err, span))
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
};
|
||||||
|
|
||||||
Ok(converted_lines
|
Ok(converted_lines
|
||||||
.into_pipeline_data_with_metadata(metadata, engine_state.ctrlc.clone()))
|
.into_pipeline_data_with_metadata(metadata, engine_state.ctrlc.clone()))
|
||||||
|
} else if strict {
|
||||||
|
Ok(convert_string_to_value_strict(&string_input, span)?
|
||||||
|
.into_pipeline_data_with_metadata(metadata))
|
||||||
} else {
|
} else {
|
||||||
Ok(convert_string_to_value(string_input, span)?
|
Ok(convert_string_to_value(&string_input, span)?
|
||||||
.into_pipeline_data_with_metadata(metadata))
|
.into_pipeline_data_with_metadata(metadata))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn convert_nujson_to_value(value: &nu_json::Value, span: Span) -> Value {
|
fn convert_nujson_to_value(value: nu_json::Value, span: Span) -> Value {
|
||||||
match value {
|
match value {
|
||||||
nu_json::Value::Array(array) => {
|
nu_json::Value::Array(array) => Value::list(
|
||||||
let v: Vec<Value> = array
|
array
|
||||||
.iter()
|
.into_iter()
|
||||||
.map(|x| convert_nujson_to_value(x, span))
|
.map(|x| convert_nujson_to_value(x, span))
|
||||||
.collect();
|
.collect(),
|
||||||
|
span,
|
||||||
Value::list(v, span)
|
),
|
||||||
}
|
nu_json::Value::Bool(b) => Value::bool(b, span),
|
||||||
nu_json::Value::Bool(b) => Value::bool(*b, span),
|
nu_json::Value::F64(f) => Value::float(f, span),
|
||||||
nu_json::Value::F64(f) => Value::float(*f, span),
|
nu_json::Value::I64(i) => Value::int(i, span),
|
||||||
nu_json::Value::I64(i) => Value::int(*i, span),
|
|
||||||
nu_json::Value::Null => Value::nothing(span),
|
nu_json::Value::Null => Value::nothing(span),
|
||||||
nu_json::Value::Object(k) => Value::record(
|
nu_json::Value::Object(k) => Value::record(
|
||||||
k.iter()
|
k.into_iter()
|
||||||
.map(|(k, v)| (k.clone(), convert_nujson_to_value(v, span)))
|
.map(|(k, v)| (k, convert_nujson_to_value(v, span)))
|
||||||
.collect(),
|
.collect(),
|
||||||
span,
|
span,
|
||||||
),
|
),
|
||||||
nu_json::Value::U64(u) => {
|
nu_json::Value::U64(u) => {
|
||||||
if *u > i64::MAX as u64 {
|
if u > i64::MAX as u64 {
|
||||||
Value::error(
|
Value::error(
|
||||||
ShellError::CantConvert {
|
ShellError::CantConvert {
|
||||||
to_type: "i64 sized integer".into(),
|
to_type: "i64 sized integer".into(),
|
||||||
@ -115,22 +133,22 @@ fn convert_nujson_to_value(value: &nu_json::Value, span: Span) -> Value {
|
|||||||
span,
|
span,
|
||||||
)
|
)
|
||||||
} else {
|
} else {
|
||||||
Value::int(*u as i64, span)
|
Value::int(u as i64, span)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
nu_json::Value::String(s) => Value::string(s.clone(), span),
|
nu_json::Value::String(s) => Value::string(s, span),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Converts row+column to a Span, assuming bytes (1-based rows)
|
// Converts row+column to a Span, assuming bytes (1-based rows)
|
||||||
fn convert_row_column_to_span(row: usize, col: usize, contents: &str) -> Span {
|
fn convert_row_column_to_span(row: usize, col: usize, contents: &str) -> Span {
|
||||||
let mut cur_row = 1;
|
let mut cur_row = 1;
|
||||||
let mut cur_col = 0;
|
let mut cur_col = 1;
|
||||||
|
|
||||||
for (offset, curr_byte) in contents.bytes().enumerate() {
|
for (offset, curr_byte) in contents.bytes().enumerate() {
|
||||||
if curr_byte == b'\n' {
|
if curr_byte == b'\n' {
|
||||||
cur_row += 1;
|
cur_row += 1;
|
||||||
cur_col = 0;
|
cur_col = 1;
|
||||||
}
|
}
|
||||||
if cur_row >= row && cur_col >= col {
|
if cur_row >= row && cur_col >= col {
|
||||||
return Span::new(offset, offset);
|
return Span::new(offset, offset);
|
||||||
@ -142,22 +160,21 @@ fn convert_row_column_to_span(row: usize, col: usize, contents: &str) -> Span {
|
|||||||
Span::new(contents.len(), contents.len())
|
Span::new(contents.len(), contents.len())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn convert_string_to_value(string_input: String, span: Span) -> Result<Value, ShellError> {
|
fn convert_string_to_value(string_input: &str, span: Span) -> Result<Value, ShellError> {
|
||||||
let result: Result<nu_json::Value, nu_json::Error> = nu_json::from_str(&string_input);
|
match nu_json::from_str(string_input) {
|
||||||
match result {
|
Ok(value) => Ok(convert_nujson_to_value(value, span)),
|
||||||
Ok(value) => Ok(convert_nujson_to_value(&value, span)),
|
|
||||||
|
|
||||||
Err(x) => match x {
|
Err(x) => match x {
|
||||||
nu_json::Error::Syntax(_, row, col) => {
|
nu_json::Error::Syntax(_, row, col) => {
|
||||||
let label = x.to_string();
|
let label = x.to_string();
|
||||||
let label_span = convert_row_column_to_span(row, col, &string_input);
|
let label_span = convert_row_column_to_span(row, col, string_input);
|
||||||
Err(ShellError::GenericError {
|
Err(ShellError::GenericError {
|
||||||
error: "Error while parsing JSON text".into(),
|
error: "Error while parsing JSON text".into(),
|
||||||
msg: "error parsing JSON text".into(),
|
msg: "error parsing JSON text".into(),
|
||||||
span: Some(span),
|
span: Some(span),
|
||||||
help: None,
|
help: None,
|
||||||
inner: vec![ShellError::OutsideSpannedLabeledError {
|
inner: vec![ShellError::OutsideSpannedLabeledError {
|
||||||
src: string_input,
|
src: string_input.into(),
|
||||||
error: "Error while parsing JSON text".into(),
|
error: "Error while parsing JSON text".into(),
|
||||||
msg: label,
|
msg: label,
|
||||||
span: label_span,
|
span: label_span,
|
||||||
@ -174,6 +191,35 @@ fn convert_string_to_value(string_input: String, span: Span) -> Result<Value, Sh
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn convert_string_to_value_strict(string_input: &str, span: Span) -> Result<Value, ShellError> {
|
||||||
|
match serde_json::from_str(string_input) {
|
||||||
|
Ok(value) => Ok(convert_nujson_to_value(value, span)),
|
||||||
|
Err(err) => Err(if err.is_syntax() {
|
||||||
|
let label = err.to_string();
|
||||||
|
let label_span = convert_row_column_to_span(err.line(), err.column(), string_input);
|
||||||
|
ShellError::GenericError {
|
||||||
|
error: "Error while parsing JSON text".into(),
|
||||||
|
msg: "error parsing JSON text".into(),
|
||||||
|
span: Some(span),
|
||||||
|
help: None,
|
||||||
|
inner: vec![ShellError::OutsideSpannedLabeledError {
|
||||||
|
src: string_input.into(),
|
||||||
|
error: "Error while parsing JSON text".into(),
|
||||||
|
msg: label,
|
||||||
|
span: label_span,
|
||||||
|
}],
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
ShellError::CantConvert {
|
||||||
|
to_type: format!("structured json data ({err})"),
|
||||||
|
from_type: "string".into(),
|
||||||
|
span,
|
||||||
|
help: None,
|
||||||
|
}
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test {
|
mod test {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
@ -43,6 +43,32 @@ fn from_json_text_to_table() {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn from_json_text_to_table_strict() {
|
||||||
|
Playground::setup("filter_from_json_test_1_strict", |dirs, sandbox| {
|
||||||
|
sandbox.with_files(vec![FileWithContentToBeTrimmed(
|
||||||
|
"katz.txt",
|
||||||
|
r#"
|
||||||
|
{
|
||||||
|
"katz": [
|
||||||
|
{"name": "Yehuda", "rusty_luck": 1},
|
||||||
|
{"name": "JT", "rusty_luck": 1},
|
||||||
|
{"name": "Andres", "rusty_luck": 1},
|
||||||
|
{"name":"GorbyPuff", "rusty_luck": 1}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
"#,
|
||||||
|
)]);
|
||||||
|
|
||||||
|
let actual = nu!(
|
||||||
|
cwd: dirs.test(),
|
||||||
|
"open katz.txt | from json -s | get katz | get rusty_luck | length "
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(actual.out, "4");
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn from_json_text_recognizing_objects_independently_to_table() {
|
fn from_json_text_recognizing_objects_independently_to_table() {
|
||||||
Playground::setup("filter_from_json_test_2", |dirs, sandbox| {
|
Playground::setup("filter_from_json_test_2", |dirs, sandbox| {
|
||||||
@ -70,6 +96,33 @@ fn from_json_text_recognizing_objects_independently_to_table() {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn from_json_text_recognizing_objects_independently_to_table_strict() {
|
||||||
|
Playground::setup("filter_from_json_test_2_strict", |dirs, sandbox| {
|
||||||
|
sandbox.with_files(vec![FileWithContentToBeTrimmed(
|
||||||
|
"katz.txt",
|
||||||
|
r#"
|
||||||
|
{"name": "Yehuda", "rusty_luck": 1}
|
||||||
|
{"name": "JT", "rusty_luck": 1}
|
||||||
|
{"name": "Andres", "rusty_luck": 1}
|
||||||
|
{"name":"GorbyPuff", "rusty_luck": 3}
|
||||||
|
"#,
|
||||||
|
)]);
|
||||||
|
|
||||||
|
let actual = nu!(
|
||||||
|
cwd: dirs.test(), pipeline(
|
||||||
|
r#"
|
||||||
|
open katz.txt
|
||||||
|
| from json -o -s
|
||||||
|
| where name == "GorbyPuff"
|
||||||
|
| get rusty_luck.0
|
||||||
|
"#
|
||||||
|
));
|
||||||
|
|
||||||
|
assert_eq!(actual.out, "3");
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn table_to_json_text() {
|
fn table_to_json_text() {
|
||||||
Playground::setup("filter_to_json_test", |dirs, sandbox| {
|
Playground::setup("filter_to_json_test", |dirs, sandbox| {
|
||||||
@ -99,6 +152,35 @@ fn table_to_json_text() {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn table_to_json_text_strict() {
|
||||||
|
Playground::setup("filter_to_json_test_strict", |dirs, sandbox| {
|
||||||
|
sandbox.with_files(vec![FileWithContentToBeTrimmed(
|
||||||
|
"sample.txt",
|
||||||
|
r#"
|
||||||
|
JonAndrehudaTZ,3
|
||||||
|
GorbyPuff,100
|
||||||
|
"#,
|
||||||
|
)]);
|
||||||
|
|
||||||
|
let actual = nu!(
|
||||||
|
cwd: dirs.test(), pipeline(
|
||||||
|
r#"
|
||||||
|
open sample.txt
|
||||||
|
| lines
|
||||||
|
| split column "," name luck
|
||||||
|
| select name
|
||||||
|
| to json
|
||||||
|
| from json -s
|
||||||
|
| get 0
|
||||||
|
| get name
|
||||||
|
"#
|
||||||
|
));
|
||||||
|
|
||||||
|
assert_eq!(actual.out, "JonAndrehudaTZ");
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn top_level_values_from_json() {
|
fn top_level_values_from_json() {
|
||||||
for (value, type_name) in [("null", "nothing"), ("true", "bool"), ("false", "bool")] {
|
for (value, type_name) in [("null", "nothing"), ("true", "bool"), ("false", "bool")] {
|
||||||
@ -109,6 +191,28 @@ fn top_level_values_from_json() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn top_level_values_from_json_strict() {
|
||||||
|
for (value, type_name) in [("null", "nothing"), ("true", "bool"), ("false", "bool")] {
|
||||||
|
let actual = nu!(r#""{}" | from json -s | to json"#, value);
|
||||||
|
assert_eq!(actual.out, value);
|
||||||
|
let actual = nu!(r#""{}" | from json -s | describe"#, value);
|
||||||
|
assert_eq!(actual.out, type_name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn strict_parsing_fails_on_comment() {
|
||||||
|
let actual = nu!(r#"'{ "a": 1, /* comment */ "b": 2 }' | from json -s"#);
|
||||||
|
assert!(actual.err.contains("error parsing JSON text"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn strict_parsing_fails_on_trailing_comma() {
|
||||||
|
let actual = nu!(r#"'{ "a": 1, "b": 2, }' | from json -s"#);
|
||||||
|
assert!(actual.err.contains("error parsing JSON text"));
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn ranges_to_json_as_array() {
|
fn ranges_to_json_as_array() {
|
||||||
let value = r#"[ 1, 2, 3]"#;
|
let value = r#"[ 1, 2, 3]"#;
|
||||||
|
@ -26,15 +26,6 @@ pub struct Deserializer<Iter: Iterator<Item = u8>> {
|
|||||||
state: State,
|
state: State,
|
||||||
}
|
}
|
||||||
|
|
||||||
// macro_rules! try_or_invalid {
|
|
||||||
// ($self_:expr, $e:expr) => {
|
|
||||||
// match $e {
|
|
||||||
// Some(v) => v,
|
|
||||||
// None => { return Err($self_.error(ErrorCode::InvalidNumber)); }
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
impl<Iter> Deserializer<Iter>
|
impl<Iter> Deserializer<Iter>
|
||||||
where
|
where
|
||||||
Iter: Iterator<Item = u8>,
|
Iter: Iterator<Item = u8>,
|
||||||
|
Loading…
Reference in New Issue
Block a user