Strict JSON parsing (#11592)

# Description
Adds the `--strict` flag for `from json` which will try to parse text
while following the exact JSON specification (e.g., no comments or
trailing commas allowed). Fixes issue #11548.
This commit is contained in:
Ian Manske 2024-01-30 14:10:19 +00:00 committed by GitHub
parent 6530403ff8
commit 4e0a65c822
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 190 additions and 48 deletions

1
Cargo.lock generated
View File

@ -5104,6 +5104,7 @@ version = "1.0.112"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4d1bd37ce2324cf3bf85e5a25f96eb4baf0d5aa6eba43e7ae8958870c4ec48ed"
dependencies = [
"indexmap",
"itoa",
"ryu",
"serde",

View File

@ -75,7 +75,7 @@ roxmltree = "0.18"
rusqlite = { version = "0.29", features = ["bundled", "backup", "chrono"], optional = true }
same-file = "1.0"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
serde_json = { version = "1.0", features = ["preserve_order"] }
serde_urlencoded = "0.7"
serde_yaml = "0.9"
sha2 = "0.10"

View File

@ -22,6 +22,7 @@ impl Command for FromJson {
Signature::build("from json")
.input_output_types(vec![(Type::String, Type::Any)])
.switch("objects", "treat each line as a separate value", Some('o'))
.switch("strict", "follow the json specification exactly", Some('s'))
.category(Category::Formats)
}
@ -42,6 +43,14 @@ impl Command for FromJson {
"b" => Value::test_list(vec![Value::test_int(1), Value::test_int(2)]),
})),
},
Example {
example: r#"'{ "a": 1, "b": 2 }' | from json -s"#,
description: "Parse json strictly which will error on comments and trailing commas",
result: Some(Value::test_record(record! {
"a" => Value::test_int(1),
"b" => Value::test_int(2),
})),
},
]
}
@ -59,52 +68,61 @@ impl Command for FromJson {
return Ok(PipelineData::new_with_metadata(metadata, span));
}
let strict = call.has_flag(engine_state, stack, "strict")?;
// TODO: turn this into a structured underline of the nu_json error
if call.has_flag(engine_state, stack, "objects")? {
let converted_lines: Vec<Value> = string_input
.lines()
.filter_map(move |x| {
if x.trim() == "" {
None
} else {
match convert_string_to_value(x.to_string(), span) {
Ok(v) => Some(v),
Err(error) => Some(Value::error(error, span)),
}
}
let lines = string_input.lines().filter(|line| !line.trim().is_empty());
let converted_lines: Vec<_> = if strict {
lines
.map(|line| {
convert_string_to_value_strict(line, span)
.unwrap_or_else(|err| Value::error(err, span))
})
.collect();
.collect()
} else {
lines
.map(|line| {
convert_string_to_value(line, span)
.unwrap_or_else(|err| Value::error(err, span))
})
.collect()
};
Ok(converted_lines
.into_pipeline_data_with_metadata(metadata, engine_state.ctrlc.clone()))
} else if strict {
Ok(convert_string_to_value_strict(&string_input, span)?
.into_pipeline_data_with_metadata(metadata))
} else {
Ok(convert_string_to_value(string_input, span)?
Ok(convert_string_to_value(&string_input, span)?
.into_pipeline_data_with_metadata(metadata))
}
}
}
fn convert_nujson_to_value(value: &nu_json::Value, span: Span) -> Value {
fn convert_nujson_to_value(value: nu_json::Value, span: Span) -> Value {
match value {
nu_json::Value::Array(array) => {
let v: Vec<Value> = array
.iter()
nu_json::Value::Array(array) => Value::list(
array
.into_iter()
.map(|x| convert_nujson_to_value(x, span))
.collect();
Value::list(v, span)
}
nu_json::Value::Bool(b) => Value::bool(*b, span),
nu_json::Value::F64(f) => Value::float(*f, span),
nu_json::Value::I64(i) => Value::int(*i, span),
.collect(),
span,
),
nu_json::Value::Bool(b) => Value::bool(b, span),
nu_json::Value::F64(f) => Value::float(f, span),
nu_json::Value::I64(i) => Value::int(i, span),
nu_json::Value::Null => Value::nothing(span),
nu_json::Value::Object(k) => Value::record(
k.iter()
.map(|(k, v)| (k.clone(), convert_nujson_to_value(v, span)))
k.into_iter()
.map(|(k, v)| (k, convert_nujson_to_value(v, span)))
.collect(),
span,
),
nu_json::Value::U64(u) => {
if *u > i64::MAX as u64 {
if u > i64::MAX as u64 {
Value::error(
ShellError::CantConvert {
to_type: "i64 sized integer".into(),
@ -115,22 +133,22 @@ fn convert_nujson_to_value(value: &nu_json::Value, span: Span) -> Value {
span,
)
} else {
Value::int(*u as i64, span)
Value::int(u as i64, span)
}
}
nu_json::Value::String(s) => Value::string(s.clone(), span),
nu_json::Value::String(s) => Value::string(s, span),
}
}
// Converts row+column to a Span, assuming bytes (1-based rows)
fn convert_row_column_to_span(row: usize, col: usize, contents: &str) -> Span {
let mut cur_row = 1;
let mut cur_col = 0;
let mut cur_col = 1;
for (offset, curr_byte) in contents.bytes().enumerate() {
if curr_byte == b'\n' {
cur_row += 1;
cur_col = 0;
cur_col = 1;
}
if cur_row >= row && cur_col >= col {
return Span::new(offset, offset);
@ -142,22 +160,21 @@ fn convert_row_column_to_span(row: usize, col: usize, contents: &str) -> Span {
Span::new(contents.len(), contents.len())
}
fn convert_string_to_value(string_input: String, span: Span) -> Result<Value, ShellError> {
let result: Result<nu_json::Value, nu_json::Error> = nu_json::from_str(&string_input);
match result {
Ok(value) => Ok(convert_nujson_to_value(&value, span)),
fn convert_string_to_value(string_input: &str, span: Span) -> Result<Value, ShellError> {
match nu_json::from_str(string_input) {
Ok(value) => Ok(convert_nujson_to_value(value, span)),
Err(x) => match x {
nu_json::Error::Syntax(_, row, col) => {
let label = x.to_string();
let label_span = convert_row_column_to_span(row, col, &string_input);
let label_span = convert_row_column_to_span(row, col, string_input);
Err(ShellError::GenericError {
error: "Error while parsing JSON text".into(),
msg: "error parsing JSON text".into(),
span: Some(span),
help: None,
inner: vec![ShellError::OutsideSpannedLabeledError {
src: string_input,
src: string_input.into(),
error: "Error while parsing JSON text".into(),
msg: label,
span: label_span,
@ -174,6 +191,35 @@ fn convert_string_to_value(string_input: String, span: Span) -> Result<Value, Sh
}
}
fn convert_string_to_value_strict(string_input: &str, span: Span) -> Result<Value, ShellError> {
match serde_json::from_str(string_input) {
Ok(value) => Ok(convert_nujson_to_value(value, span)),
Err(err) => Err(if err.is_syntax() {
let label = err.to_string();
let label_span = convert_row_column_to_span(err.line(), err.column(), string_input);
ShellError::GenericError {
error: "Error while parsing JSON text".into(),
msg: "error parsing JSON text".into(),
span: Some(span),
help: None,
inner: vec![ShellError::OutsideSpannedLabeledError {
src: string_input.into(),
error: "Error while parsing JSON text".into(),
msg: label,
span: label_span,
}],
}
} else {
ShellError::CantConvert {
to_type: format!("structured json data ({err})"),
from_type: "string".into(),
span,
help: None,
}
}),
}
}
#[cfg(test)]
mod test {
use super::*;

View File

@ -43,6 +43,32 @@ fn from_json_text_to_table() {
})
}
#[test]
fn from_json_text_to_table_strict() {
Playground::setup("filter_from_json_test_1_strict", |dirs, sandbox| {
sandbox.with_files(vec![FileWithContentToBeTrimmed(
"katz.txt",
r#"
{
"katz": [
{"name": "Yehuda", "rusty_luck": 1},
{"name": "JT", "rusty_luck": 1},
{"name": "Andres", "rusty_luck": 1},
{"name":"GorbyPuff", "rusty_luck": 1}
]
}
"#,
)]);
let actual = nu!(
cwd: dirs.test(),
"open katz.txt | from json -s | get katz | get rusty_luck | length "
);
assert_eq!(actual.out, "4");
})
}
#[test]
fn from_json_text_recognizing_objects_independently_to_table() {
Playground::setup("filter_from_json_test_2", |dirs, sandbox| {
@ -70,6 +96,33 @@ fn from_json_text_recognizing_objects_independently_to_table() {
})
}
#[test]
fn from_json_text_recognizing_objects_independently_to_table_strict() {
Playground::setup("filter_from_json_test_2_strict", |dirs, sandbox| {
sandbox.with_files(vec![FileWithContentToBeTrimmed(
"katz.txt",
r#"
{"name": "Yehuda", "rusty_luck": 1}
{"name": "JT", "rusty_luck": 1}
{"name": "Andres", "rusty_luck": 1}
{"name":"GorbyPuff", "rusty_luck": 3}
"#,
)]);
let actual = nu!(
cwd: dirs.test(), pipeline(
r#"
open katz.txt
| from json -o -s
| where name == "GorbyPuff"
| get rusty_luck.0
"#
));
assert_eq!(actual.out, "3");
})
}
#[test]
fn table_to_json_text() {
Playground::setup("filter_to_json_test", |dirs, sandbox| {
@ -99,6 +152,35 @@ fn table_to_json_text() {
})
}
#[test]
fn table_to_json_text_strict() {
Playground::setup("filter_to_json_test_strict", |dirs, sandbox| {
sandbox.with_files(vec![FileWithContentToBeTrimmed(
"sample.txt",
r#"
JonAndrehudaTZ,3
GorbyPuff,100
"#,
)]);
let actual = nu!(
cwd: dirs.test(), pipeline(
r#"
open sample.txt
| lines
| split column "," name luck
| select name
| to json
| from json -s
| get 0
| get name
"#
));
assert_eq!(actual.out, "JonAndrehudaTZ");
})
}
#[test]
fn top_level_values_from_json() {
for (value, type_name) in [("null", "nothing"), ("true", "bool"), ("false", "bool")] {
@ -109,6 +191,28 @@ fn top_level_values_from_json() {
}
}
#[test]
fn top_level_values_from_json_strict() {
for (value, type_name) in [("null", "nothing"), ("true", "bool"), ("false", "bool")] {
let actual = nu!(r#""{}" | from json -s | to json"#, value);
assert_eq!(actual.out, value);
let actual = nu!(r#""{}" | from json -s | describe"#, value);
assert_eq!(actual.out, type_name);
}
}
#[test]
fn strict_parsing_fails_on_comment() {
let actual = nu!(r#"'{ "a": 1, /* comment */ "b": 2 }' | from json -s"#);
assert!(actual.err.contains("error parsing JSON text"));
}
#[test]
fn strict_parsing_fails_on_trailing_comma() {
let actual = nu!(r#"'{ "a": 1, "b": 2, }' | from json -s"#);
assert!(actual.err.contains("error parsing JSON text"));
}
#[test]
fn ranges_to_json_as_array() {
let value = r#"[ 1, 2, 3]"#;

View File

@ -26,15 +26,6 @@ pub struct Deserializer<Iter: Iterator<Item = u8>> {
state: State,
}
// macro_rules! try_or_invalid {
// ($self_:expr, $e:expr) => {
// match $e {
// Some(v) => v,
// None => { return Err($self_.error(ErrorCode::InvalidNumber)); }
// }
// }
// }
impl<Iter> Deserializer<Iter>
where
Iter: Iterator<Item = u8>,