nushell/crates/nu-command/tests/format_conversions/csv.rs

484 lines
12 KiB
Rust
Raw Normal View History

2019-12-17 19:54:39 +01:00
use nu_test_support::fs::Stub::FileWithContentToBeTrimmed;
use nu_test_support::playground::Playground;
use nu_test_support::{nu, pipeline};
#[test]
fn table_to_csv_text_and_from_csv_text_back_into_table() {
let actual = nu!(
cwd: "tests/fixtures/formats",
"open caco3_plastics.csv | to csv | from csv | first | get origin "
);
assert_eq!(actual.out, "SPAIN");
}
#[test]
fn table_to_csv_text() {
Playground::setup("filter_to_csv_test_1", |dirs, sandbox| {
sandbox.with_files(&[FileWithContentToBeTrimmed(
"csv_text_sample.txt",
r#"
importer,shipper,tariff_item,name,origin
Plasticos Rival,Reverte,2509000000,Calcium carbonate,Spain
Tigre Ecuador,OMYA Andina,3824909999,Calcium carbonate,Colombia
"#,
)]);
let actual = nu!(
cwd: dirs.test(), pipeline(
r#"
open csv_text_sample.txt
| lines
| str trim
2020-05-24 08:41:30 +02:00
| split column "," a b c d origin
| last 1
| to csv
| lines
| get 1
"#
));
assert!(actual
.out
.contains("Tigre Ecuador,OMYA Andina,3824909999,Calcium carbonate,Colombia"));
})
}
#[test]
fn table_to_csv_text_skipping_headers_after_conversion() {
Playground::setup("filter_to_csv_test_2", |dirs, sandbox| {
sandbox.with_files(&[FileWithContentToBeTrimmed(
"csv_text_sample.txt",
r#"
importer,shipper,tariff_item,name,origin
Plasticos Rival,Reverte,2509000000,Calcium carbonate,Spain
Tigre Ecuador,OMYA Andina,3824909999,Calcium carbonate,Colombia
"#,
)]);
let actual = nu!(
cwd: dirs.test(), pipeline(
r#"
open csv_text_sample.txt
| lines
| str trim
2020-05-24 08:41:30 +02:00
| split column "," a b c d origin
| last 1
| to csv --noheaders
"#
));
assert!(actual
.out
.contains("Tigre Ecuador,OMYA Andina,3824909999,Calcium carbonate,Colombia"));
})
}
#[test]
fn infers_types() {
Playground::setup("filter_from_csv_test_1", |dirs, sandbox| {
sandbox.with_files(&[FileWithContentToBeTrimmed(
"los_cuatro_mosqueteros.csv",
r#"
first_name,last_name,rusty_luck,d
Andrés,Robalino,1,d
JT,Turner,1,d
Yehuda,Katz,1,d
Jason,Gedge,1,d
"#,
)]);
let actual = nu!(
cwd: dirs.test(), pipeline(
r#"
open los_cuatro_mosqueteros.csv
| where rusty_luck > 0
| length
"#
));
assert_eq!(actual.out, "4");
})
}
#[test]
fn from_csv_text_to_table() {
Playground::setup("filter_from_csv_test_2", |dirs, sandbox| {
sandbox.with_files(&[FileWithContentToBeTrimmed(
"los_tres_caballeros.txt",
r#"
first_name,last_name,rusty_luck
Andrés,Robalino,1
JT,Turner,1
Yehuda,Katz,1
"#,
)]);
let actual = nu!(
cwd: dirs.test(), pipeline(
r#"
open los_tres_caballeros.txt
| from csv
| get rusty_luck
| length
"#
));
assert_eq!(actual.out, "3");
})
}
#[test]
fn from_csv_text_with_separator_to_table() {
Playground::setup("filter_from_csv_test_3", |dirs, sandbox| {
sandbox.with_files(&[FileWithContentToBeTrimmed(
"los_tres_caballeros.txt",
r#"
first_name;last_name;rusty_luck
Andrés;Robalino;1
JT;Turner;1
Yehuda;Katz;1
"#,
)]);
let actual = nu!(
cwd: dirs.test(), pipeline(
r#"
open los_tres_caballeros.txt
| from csv --separator ";"
| get rusty_luck
| length
"#
));
assert_eq!(actual.out, "3");
})
}
#[test]
fn from_csv_text_with_tab_separator_to_table() {
Playground::setup("filter_from_csv_test_4", |dirs, sandbox| {
sandbox.with_files(&[FileWithContentToBeTrimmed(
"los_tres_caballeros.txt",
r#"
first_name last_name rusty_luck
Andrés Robalino 1
JT Turner 1
Yehuda Katz 1
"#,
)]);
let actual = nu!(
cwd: dirs.test(), pipeline(
r#"
open los_tres_caballeros.txt
| from csv --separator (char tab)
| get rusty_luck
| length
"#
));
assert_eq!(actual.out, "3");
})
}
#[test]
Replace `ExternalStream` with new `ByteStream` type (#12774) # Description This PR introduces a `ByteStream` type which is a `Read`-able stream of bytes. Internally, it has an enum over three different byte stream sources: ```rust pub enum ByteStreamSource { Read(Box<dyn Read + Send + 'static>), File(File), Child(ChildProcess), } ``` This is in comparison to the current `RawStream` type, which is an `Iterator<Item = Vec<u8>>` and has to allocate for each read chunk. Currently, `PipelineData::ExternalStream` serves a weird dual role where it is either external command output or a wrapper around `RawStream`. `ByteStream` makes this distinction more clear (via `ByteStreamSource`) and replaces `PipelineData::ExternalStream` in this PR: ```rust pub enum PipelineData { Empty, Value(Value, Option<PipelineMetadata>), ListStream(ListStream, Option<PipelineMetadata>), ByteStream(ByteStream, Option<PipelineMetadata>), } ``` The PR is relatively large, but a decent amount of it is just repetitive changes. This PR fixes #7017, fixes #10763, and fixes #12369. This PR also improves performance when piping external commands. Nushell should, in most cases, have competitive pipeline throughput compared to, e.g., bash. | Command | Before (MB/s) | After (MB/s) | Bash (MB/s) | | -------------------------------------------------- | -------------:| ------------:| -----------:| | `throughput \| rg 'x'` | 3059 | 3744 | 3739 | | `throughput \| nu --testbin relay o> /dev/null` | 3508 | 8087 | 8136 | # User-Facing Changes - This is a breaking change for the plugin communication protocol, because the `ExternalStreamInfo` was replaced with `ByteStreamInfo`. Plugins now only have to deal with a single input stream, as opposed to the previous three streams: stdout, stderr, and exit code. - The output of `describe` has been changed for external/byte streams. - Temporary breaking change: `bytes starts-with` no longer works with byte streams. This is to keep the PR smaller, and `bytes ends-with` already does not work on byte streams. - If a process core dumped, then instead of having a `Value::Error` in the `exit_code` column of the output returned from `complete`, it now is a `Value::Int` with the negation of the signal number. # After Submitting - Update docs and book as necessary - Release notes (e.g., plugin protocol changes) - Adapt/convert commands to work with byte streams (high priority is `str length`, `bytes starts-with`, and maybe `bytes ends-with`). - Refactor the `tee` code, Devyn has already done some work on this. --------- Co-authored-by: Devyn Cairns <devyn.cairns@gmail.com>
2024-05-16 16:11:18 +02:00
#[ignore = "csv crate has a bug when the last line is a comment: https://github.com/BurntSushi/rust-csv/issues/363"]
fn from_csv_text_with_comments_to_table() {
Playground::setup("filter_from_csv_test_5", |dirs, sandbox| {
sandbox.with_files(&[FileWithContentToBeTrimmed(
"los_tres_caballeros.txt",
r#"
# This is a comment
first_name,last_name,rusty_luck
# This one too
Andrés,Robalino,1
Jonathan,Turner,1
Yehuda,Katz,1
# This one also
"#,
)]);
let actual = nu!(
cwd: dirs.test(), pipeline(
r##"
open los_tres_caballeros.txt
| from csv --comment "#"
| get rusty_luck
| length
"##
));
assert_eq!(actual.out, "3");
})
}
#[test]
fn from_csv_text_with_custom_quotes_to_table() {
Playground::setup("filter_from_csv_test_6", |dirs, sandbox| {
sandbox.with_files(&[FileWithContentToBeTrimmed(
"los_tres_caballeros.txt",
r#"
first_name,last_name,rusty_luck
'And''rés',Robalino,1
Jonathan,Turner,1
Yehuda,Katz,1
"#,
)]);
let actual = nu!(
cwd: dirs.test(), pipeline(
r#"
open los_tres_caballeros.txt
| from csv --quote "'"
| first
| get first_name
"#
));
assert_eq!(actual.out, "And'rés");
})
}
#[test]
fn from_csv_text_with_custom_escapes_to_table() {
Playground::setup("filter_from_csv_test_7", |dirs, sandbox| {
sandbox.with_files(&[FileWithContentToBeTrimmed(
"los_tres_caballeros.txt",
r#"
first_name,last_name,rusty_luck
"And\"rés",Robalino,1
Jonathan,Turner,1
Yehuda,Katz,1
"#,
)]);
let actual = nu!(
cwd: dirs.test(), pipeline(
r"
open los_tres_caballeros.txt
| from csv --escape '\'
| first
| get first_name
"
));
assert_eq!(actual.out, "And\"rés");
})
}
#[test]
fn from_csv_text_skipping_headers_to_table() {
Playground::setup("filter_from_csv_test_8", |dirs, sandbox| {
sandbox.with_files(&[FileWithContentToBeTrimmed(
"los_tres_amigos.txt",
r#"
Andrés,Robalino,1
JT,Turner,1
Yehuda,Katz,1
"#,
)]);
let actual = nu!(
cwd: dirs.test(), pipeline(
r#"
open los_tres_amigos.txt
| from csv --noheaders
Make the subcommands (`from {csv, tsv, ssv}`) 0-based for consistency (#13209) # Description fixed #11678 The sub-commands of from command (`from {csv, tsv, ssv}`) name columns starting from index 0. This behaviour is inconsistent with other commands such as `detect columns`. This PR makes the subcommands index 0-based. # User-Facing Changes The subcommands (`from {csv, tsv, ssv}`) return a table with the columns starting at index 0 if no header data is passed. ``` ~/Development/nushell> "foo bar baz" | from ssv -n -m 1 ╭───┬─────────┬─────────┬─────────╮ │ # │ column0 │ column1 │ column2 │ ├───┼─────────┼─────────┼─────────┤ │ 0 │ foo │ bar │ baz │ ╰───┴─────────┴─────────┴─────────╯ ~/Development/nushell> "foo,bar,baz" | from csv -n ╭───┬─────────┬─────────┬─────────╮ │ # │ column0 │ column1 │ column2 │ ├───┼─────────┼─────────┼─────────┤ │ 0 │ foo │ bar │ baz │ ╰───┴─────────┴─────────┴─────────╯ ~/Development/nushell> "foo\tbar\tbaz" | from tsv -n ╭───┬─────────┬─────────┬─────────╮ │ # │ column0 │ column1 │ column2 │ ├───┼─────────┼─────────┼─────────┤ │ 0 │ foo │ bar │ baz │ ╰───┴─────────┴─────────┴─────────╯ ``` # Tests + Formatting When I ran tests, `commands::touch::change_file_mtime_to_reference` failed with the following error. The error also occurs in the master branch, so it's probably unrelated to these changes. (maybe a problem with my dev environment) ``` $ toolkit check pr ~~~~~~~~ failures: ---- commands::touch::change_file_mtime_to_reference stdout ---- === stderr thread 'commands::touch::change_file_mtime_to_reference' panicked at crates/nu-command/tests/commands/touch.rs:298:9: assertion `left == right` failed left: SystemTime { tv_sec: 1719149697, tv_nsec: 57576929 } right: SystemTime { tv_sec: 1719149697, tv_nsec: 78219489 } failures: commands::touch::change_file_mtime_to_reference test result: FAILED. 1533 passed; 1 failed; 32 ignored; 0 measured; 0 filtered out; finished in 10.87s error: test failed, to rerun pass `-p nu-command --test main` - :green_circle: `toolkit fmt` - :green_circle: `toolkit clippy` - :red_circle: `toolkit test` - :black_circle: `toolkit test stdlib` ``` # After Submitting nothing
2024-06-27 00:51:47 +02:00
| get column2
| length
"#
));
assert_eq!(actual.out, "3");
})
}
#[test]
fn from_csv_text_with_missing_columns_to_table() {
Playground::setup("filter_from_csv_test_9", |dirs, sandbox| {
sandbox.with_files(&[FileWithContentToBeTrimmed(
"los_tres_caballeros.txt",
r#"
first_name,last_name,rusty_luck
Andrés,Robalino
Jonathan,Turner,1
Yehuda,Katz,1
"#,
)]);
let actual = nu!(
cwd: dirs.test(), pipeline(
r#"
open los_tres_caballeros.txt
| from csv --flexible
| get -i rusty_luck
| compact
| length
"#
));
assert_eq!(actual.out, "2");
})
}
#[test]
fn from_csv_text_with_multiple_char_separator() {
Playground::setup("filter_from_csv_test_10", |dirs, sandbox| {
sandbox.with_files(&[FileWithContentToBeTrimmed(
"los_tres_caballeros.txt",
r#"
first_name,last_name,rusty_luck
Andrés,Robalino,1
Jonathan,Turner,1
Yehuda,Katz,1
"#,
)]);
let actual = nu!(
cwd: dirs.test(), pipeline(
r#"
open los_tres_caballeros.txt
| from csv --separator "li"
"#
));
assert!(actual
.err
.contains("separator should be a single char or a 4-byte unicode"));
})
}
#[test]
fn from_csv_text_with_wrong_type_separator() {
Playground::setup("filter_from_csv_test_11", |dirs, sandbox| {
sandbox.with_files(&[FileWithContentToBeTrimmed(
"los_tres_caballeros.txt",
r#"
first_name,last_name,rusty_luck
Andrés,Robalino,1
Jonathan,Turner,1
Yehuda,Katz,1
"#,
)]);
let actual = nu!(
cwd: dirs.test(), pipeline(
r#"
open los_tres_caballeros.txt
| from csv --separator ('123' | into int)
"#
));
assert!(actual.err.contains("can't convert int to string"));
})
}
#[test]
fn table_with_record_error() {
let actual = nu!(pipeline(
r#"
[[a b]; [1 2] [3 {a: 1 b: 2}]]
| to csv
"#
));
assert!(actual.err.contains("can't convert"))
}
#[test]
fn list_not_table_error() {
let actual = nu!(pipeline(
r#"
[{a: 1 b: 2} {a: 3 b: 4} 1]
| to csv
"#
));
assert!(actual.err.contains("can't convert"))
}
#[test]
fn string_to_csv_error() {
let actual = nu!(pipeline(
r#"
'qwe' | to csv
"#
));
Input output checking (#9680) # Description This PR tights input/output type-checking a bit more. There are a lot of commands that don't have correct input/output types, so part of the effort is updating them. This PR now contains updates to commands that had wrong input/output signatures. It doesn't add examples for these new signatures, but that can be follow-up work. # User-Facing Changes BREAKING CHANGE BREAKING CHANGE This work enforces many more checks on pipeline type correctness than previous nushell versions. This strictness may uncover incompatibilities in existing scripts or shortcomings in the type information for internal commands. # Tests + Formatting <!-- Don't forget to add tests that cover your changes. Make sure you've run and fixed any issues with these commands: - `cargo fmt --all -- --check` to check standard code formatting (`cargo fmt --all` applies these changes) - `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used -A clippy::needless_collect -A clippy::result_large_err` to check that you're using the standard code style - `cargo test --workspace` to check that all tests pass - `cargo run -- -c "use std testing; testing run-tests --path crates/nu-std"` to run the tests for the standard library > **Note** > from `nushell` you can also use the `toolkit` as follows > ```bash > use toolkit.nu # or use an `env_change` hook to activate it automatically > toolkit check pr > ``` --> # After Submitting <!-- If your PR had any user-facing changes, update [the documentation](https://github.com/nushell/nushell.github.io) after the PR is merged, if necessary. This will help us keep the docs up to date. -->
2023-07-14 05:20:35 +02:00
assert!(actual.err.contains("command doesn't support"))
}
#[test]
fn parses_csv_with_unicode_sep() {
Playground::setup("filter_from_csv_unicode_sep_test_3", |dirs, sandbox| {
sandbox.with_files(&[FileWithContentToBeTrimmed(
"los_tres_caballeros.txt",
r#"
first_name;last_name;rusty_luck
Andrés;Robalino;1
JT;Turner;1
Yehuda;Katz;1
"#,
)]);
let actual = nu!(
cwd: dirs.test(), pipeline(
r#"
open los_tres_caballeros.txt
| from csv --separator "003B"
| get rusty_luck
| length
"#
));
assert_eq!(actual.out, "3");
})
}
#[test]
fn parses_csv_with_unicode_x1f_sep() {
Playground::setup("filter_from_csv_unicode_sep_x1f_test_3", |dirs, sandbox| {
sandbox.with_files(&[FileWithContentToBeTrimmed(
"los_tres_caballeros.txt",
r#"
first_namelast_namerusty_luck
AndrésRobalino1
JTTurner1
YehudaKatz1
"#,
)]);
let actual = nu!(
cwd: dirs.test(), pipeline(
r#"
open los_tres_caballeros.txt
| from csv --separator "001F"
| get rusty_luck
| length
"#
));
assert_eq!(actual.out, "3");
})
}
fix: Ensure consistent vals and cols when parsing with `--flexible` (#10814) # Description `from tsv` and `from csv` both support a `--flexible` flag. This flag can be used to "allow the number of fields in records to be variable". Previously, a record's invariant that `rec.cols.len() == rec.vals.len()` could be broken during parsing. This can cause runtime errors as in #10693. Other commands, like `select` were also affected. The inconsistencies are somewhat hard to see, as most nushell code assumes an equal number of columns and values. # Before ### Fewer values than columns ```nushell > let record = (echo "one,two\n1" | from csv --flexible | first) # There are two columns > $record | columns | to nuon [one, two] # But only one value > $record | values | to nuon [1] # And printing the record doesn't show the second column! > $record | to nuon {one: 1} ``` ### More values than columns ```nushell > let record = (echo "one,two\n1,2,3" | from csv --flexible | first) # There are two columns > $record | columns | to nuon [one, two] # But three values > $record | values | to nuon [1, 2, 3] # And printing the record doesn't show the third value! > $record | to nuon {one: 1, two: 2} ``` # After ### Fewer values than columns ```nushell > let record = (echo "one,two\n1" | from csv --flexible | first) # There are two columns > $record | columns | to nuon [one, two] # And a matching number of values > $record | values | to nuon [1, null] # And printing the record works as expected > $record | to nuon {one: 1, two: null} ``` ### More values than columns ```nushell > let record = (echo "one,two\n1,2,3" | from csv --flexible | first) # There are two columns > $record | columns | to nuon [one, two] # And a matching number of values > $record | values | to nuon [1, 2] # And printing the record works as expected > $record | to nuon {one: 1, two: 2} ``` # User-Facing Changes Using the `--flexible` flag with `from csv` and `from tsv` will not result in corrupted record state. # Tests + Formatting <!-- Don't forget to add tests that cover your changes. Make sure you've run and fixed any issues with these commands: - `cargo fmt --all -- --check` to check standard code formatting (`cargo fmt --all` applies these changes) - `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used` to check that you're using the standard code style - `cargo test --workspace` to check that all tests pass (on Windows make sure to [enable developer mode](https://learn.microsoft.com/en-us/windows/apps/get-started/developer-mode-features-and-debugging)) - `cargo run -- -c "use std testing; testing run-tests --path crates/nu-std"` to run the tests for the standard library > **Note** > from `nushell` you can also use the `toolkit` as follows > ```bash > use toolkit.nu # or use an `env_change` hook to activate it automatically > toolkit check pr > ``` --> # After Submitting <!-- If your PR had any user-facing changes, update [the documentation](https://github.com/nushell/nushell.github.io) after the PR is merged, if necessary. This will help us keep the docs up to date. -->
2023-10-24 22:54:26 +02:00
#[test]
fn from_csv_test_flexible_extra_vals() {
let actual = nu!(pipeline(
r#"
echo "a,b\n1,2,3" | from csv --flexible | first | values | to nuon
"#
));
assert_eq!(actual.out, "[1, 2]");
}
#[test]
fn from_csv_test_flexible_missing_vals() {
let actual = nu!(pipeline(
r#"
echo "a,b\n1" | from csv --flexible | first | values | to nuon
"#
));
assert_eq!(actual.out, "[1, null]");
}