Strict JSON parsing (#11592)

# Description
Adds the `--strict` flag for `from json` which will try to parse text
while following the exact JSON specification (e.g., no comments or
trailing commas allowed). Fixes issue #11548.
This commit is contained in:
Ian Manske
2024-01-30 14:10:19 +00:00
committed by GitHub
parent 6530403ff8
commit 4e0a65c822
5 changed files with 190 additions and 48 deletions

View File

@ -22,6 +22,7 @@ impl Command for FromJson {
Signature::build("from json")
.input_output_types(vec![(Type::String, Type::Any)])
.switch("objects", "treat each line as a separate value", Some('o'))
.switch("strict", "follow the json specification exactly", Some('s'))
.category(Category::Formats)
}
@ -42,6 +43,14 @@ impl Command for FromJson {
"b" => Value::test_list(vec![Value::test_int(1), Value::test_int(2)]),
})),
},
Example {
example: r#"'{ "a": 1, "b": 2 }' | from json -s"#,
description: "Parse json strictly which will error on comments and trailing commas",
result: Some(Value::test_record(record! {
"a" => Value::test_int(1),
"b" => Value::test_int(2),
})),
},
]
}
@ -59,52 +68,61 @@ impl Command for FromJson {
return Ok(PipelineData::new_with_metadata(metadata, span));
}
let strict = call.has_flag(engine_state, stack, "strict")?;
// TODO: turn this into a structured underline of the nu_json error
if call.has_flag(engine_state, stack, "objects")? {
let converted_lines: Vec<Value> = string_input
.lines()
.filter_map(move |x| {
if x.trim() == "" {
None
} else {
match convert_string_to_value(x.to_string(), span) {
Ok(v) => Some(v),
Err(error) => Some(Value::error(error, span)),
}
}
})
.collect();
let lines = string_input.lines().filter(|line| !line.trim().is_empty());
let converted_lines: Vec<_> = if strict {
lines
.map(|line| {
convert_string_to_value_strict(line, span)
.unwrap_or_else(|err| Value::error(err, span))
})
.collect()
} else {
lines
.map(|line| {
convert_string_to_value(line, span)
.unwrap_or_else(|err| Value::error(err, span))
})
.collect()
};
Ok(converted_lines
.into_pipeline_data_with_metadata(metadata, engine_state.ctrlc.clone()))
} else if strict {
Ok(convert_string_to_value_strict(&string_input, span)?
.into_pipeline_data_with_metadata(metadata))
} else {
Ok(convert_string_to_value(string_input, span)?
Ok(convert_string_to_value(&string_input, span)?
.into_pipeline_data_with_metadata(metadata))
}
}
}
fn convert_nujson_to_value(value: &nu_json::Value, span: Span) -> Value {
fn convert_nujson_to_value(value: nu_json::Value, span: Span) -> Value {
match value {
nu_json::Value::Array(array) => {
let v: Vec<Value> = array
.iter()
nu_json::Value::Array(array) => Value::list(
array
.into_iter()
.map(|x| convert_nujson_to_value(x, span))
.collect();
Value::list(v, span)
}
nu_json::Value::Bool(b) => Value::bool(*b, span),
nu_json::Value::F64(f) => Value::float(*f, span),
nu_json::Value::I64(i) => Value::int(*i, span),
.collect(),
span,
),
nu_json::Value::Bool(b) => Value::bool(b, span),
nu_json::Value::F64(f) => Value::float(f, span),
nu_json::Value::I64(i) => Value::int(i, span),
nu_json::Value::Null => Value::nothing(span),
nu_json::Value::Object(k) => Value::record(
k.iter()
.map(|(k, v)| (k.clone(), convert_nujson_to_value(v, span)))
k.into_iter()
.map(|(k, v)| (k, convert_nujson_to_value(v, span)))
.collect(),
span,
),
nu_json::Value::U64(u) => {
if *u > i64::MAX as u64 {
if u > i64::MAX as u64 {
Value::error(
ShellError::CantConvert {
to_type: "i64 sized integer".into(),
@ -115,22 +133,22 @@ fn convert_nujson_to_value(value: &nu_json::Value, span: Span) -> Value {
span,
)
} else {
Value::int(*u as i64, span)
Value::int(u as i64, span)
}
}
nu_json::Value::String(s) => Value::string(s.clone(), span),
nu_json::Value::String(s) => Value::string(s, span),
}
}
// Converts row+column to a Span, assuming bytes (1-based rows)
fn convert_row_column_to_span(row: usize, col: usize, contents: &str) -> Span {
let mut cur_row = 1;
let mut cur_col = 0;
let mut cur_col = 1;
for (offset, curr_byte) in contents.bytes().enumerate() {
if curr_byte == b'\n' {
cur_row += 1;
cur_col = 0;
cur_col = 1;
}
if cur_row >= row && cur_col >= col {
return Span::new(offset, offset);
@ -142,22 +160,21 @@ fn convert_row_column_to_span(row: usize, col: usize, contents: &str) -> Span {
Span::new(contents.len(), contents.len())
}
fn convert_string_to_value(string_input: String, span: Span) -> Result<Value, ShellError> {
let result: Result<nu_json::Value, nu_json::Error> = nu_json::from_str(&string_input);
match result {
Ok(value) => Ok(convert_nujson_to_value(&value, span)),
fn convert_string_to_value(string_input: &str, span: Span) -> Result<Value, ShellError> {
match nu_json::from_str(string_input) {
Ok(value) => Ok(convert_nujson_to_value(value, span)),
Err(x) => match x {
nu_json::Error::Syntax(_, row, col) => {
let label = x.to_string();
let label_span = convert_row_column_to_span(row, col, &string_input);
let label_span = convert_row_column_to_span(row, col, string_input);
Err(ShellError::GenericError {
error: "Error while parsing JSON text".into(),
msg: "error parsing JSON text".into(),
span: Some(span),
help: None,
inner: vec![ShellError::OutsideSpannedLabeledError {
src: string_input,
src: string_input.into(),
error: "Error while parsing JSON text".into(),
msg: label,
span: label_span,
@ -174,6 +191,35 @@ fn convert_string_to_value(string_input: String, span: Span) -> Result<Value, Sh
}
}
fn convert_string_to_value_strict(string_input: &str, span: Span) -> Result<Value, ShellError> {
match serde_json::from_str(string_input) {
Ok(value) => Ok(convert_nujson_to_value(value, span)),
Err(err) => Err(if err.is_syntax() {
let label = err.to_string();
let label_span = convert_row_column_to_span(err.line(), err.column(), string_input);
ShellError::GenericError {
error: "Error while parsing JSON text".into(),
msg: "error parsing JSON text".into(),
span: Some(span),
help: None,
inner: vec![ShellError::OutsideSpannedLabeledError {
src: string_input.into(),
error: "Error while parsing JSON text".into(),
msg: label,
span: label_span,
}],
}
} else {
ShellError::CantConvert {
to_type: format!("structured json data ({err})"),
from_type: "string".into(),
span,
help: None,
}
}),
}
}
#[cfg(test)]
mod test {
use super::*;