fix: Ensure consistent vals and cols when parsing with --flexible (#10814)

# Description
`from tsv` and `from csv` both support a `--flexible` flag. This flag
can be used to "allow the number of fields in records to be variable".

Previously, a record's invariant that `rec.cols.len() == rec.vals.len()`
could be broken during parsing. This can cause runtime errors as in
#10693. Other commands, like `select` were also affected.

The inconsistencies are somewhat hard to see, as most nushell code
assumes an equal number of columns and values.

# Before

### Fewer values than columns
```nushell
> let record = (echo "one,two\n1" | from csv --flexible | first)
# There are two columns
> $record | columns | to nuon
[one, two]
# But only one value
> $record | values | to nuon
[1]
# And printing the record doesn't show the second column!
> $record | to nuon
{one: 1}
```

### More values than columns
```nushell
> let record = (echo "one,two\n1,2,3" | from csv --flexible | first)
# There are two columns
> $record | columns | to nuon
[one, two]
# But three values
> $record | values | to nuon
[1, 2, 3]
# And printing the record doesn't show the third value!
> $record | to nuon
{one: 1, two: 2}
```
# After

### Fewer values than columns
```nushell
> let record = (echo "one,two\n1" | from csv --flexible | first)
# There are two columns
> $record | columns | to nuon
[one, two]
# And a matching number of values
> $record | values | to nuon
[1, null]
# And printing the record works as expected
> $record | to nuon
{one: 1, two: null}
```

### More values than columns
```nushell
> let record = (echo "one,two\n1,2,3" | from csv --flexible | first)
# There are two columns
> $record | columns | to nuon
[one, two]
# And a matching number of values
> $record | values | to nuon
[1, 2]
# And printing the record works as expected
> $record | to nuon
{one: 1, two: 2}
```

# User-Facing Changes
Using the `--flexible` flag with `from csv` and `from tsv` will not
result in corrupted record state.

# Tests + Formatting
<!--
Don't forget to add tests that cover your changes.

Make sure you've run and fixed any issues with these commands:

- `cargo fmt --all -- --check` to check standard code formatting (`cargo
fmt --all` applies these changes)
- `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used` to
check that you're using the standard code style
- `cargo test --workspace` to check that all tests pass (on Windows make
sure to [enable developer
mode](https://learn.microsoft.com/en-us/windows/apps/get-started/developer-mode-features-and-debugging))
- `cargo run -- -c "use std testing; testing run-tests --path
crates/nu-std"` to run the tests for the standard library

> **Note**
> from `nushell` you can also use the `toolkit` as follows
> ```bash
> use toolkit.nu # or use an `env_change` hook to activate it
automatically
> toolkit check pr
> ```
-->

# After Submitting
<!-- If your PR had any user-facing changes, update [the
documentation](https://github.com/nushell/nushell.github.io) after the
PR is merged, if necessary. This will help us keep the docs up to date.
-->
This commit is contained in:
Hudson Clark 2023-10-24 16:54:26 -04:00 committed by GitHub
parent 0588a4fc19
commit cb754befe9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 38 additions and 14 deletions

View File

@ -35,21 +35,25 @@ fn from_delimited_string_to_value(
let mut rows = vec![];
for row in reader.records() {
let mut output_row = vec![];
for value in row?.iter() {
if no_infer {
output_row.push(Value::string(value.to_string(), span));
continue;
}
let row = row?;
let output_row = (0..headers.len())
.map(|i| {
row.get(i)
.map(|value| {
if no_infer {
Value::string(value.to_string(), span)
} else if let Ok(i) = value.parse::<i64>() {
Value::int(i, span)
} else if let Ok(f) = value.parse::<f64>() {
Value::float(f, span)
} else {
Value::string(value.to_string(), span)
}
})
.unwrap_or(Value::nothing(span))
})
.collect::<Vec<Value>>();
if let Ok(i) = value.parse::<i64>() {
output_row.push(Value::int(i, span));
} else if let Ok(f) = value.parse::<f64>() {
output_row.push(Value::float(f, span));
} else {
output_row.push(Value::string(value.to_string(), span));
}
}
rows.push(Value::record(
Record {
cols: headers.clone(),

View File

@ -460,3 +460,23 @@ fn parses_csv_with_unicode_x1f_sep() {
assert_eq!(actual.out, "3");
})
}
#[test]
fn from_csv_test_flexible_extra_vals() {
let actual = nu!(pipeline(
r#"
echo "a,b\n1,2,3" | from csv --flexible | first | values | to nuon
"#
));
assert_eq!(actual.out, "[1, 2]");
}
#[test]
fn from_csv_test_flexible_missing_vals() {
let actual = nu!(pipeline(
r#"
echo "a,b\n1" | from csv --flexible | first | values | to nuon
"#
));
assert_eq!(actual.out, "[1, null]");
}