add the ability to combine columns with detect columns (#9511)

# Description

This PR adds the ability to manually combine columns in the `detect
columns` command. This is useful when items are close together but want
them to be in one column.

For example, if you want to parse the output of bash's `ls` command, the
output passed into `detect columns` would look similar to this.
```
❯ ^ls -lh
total 242K
-rw-r--r-- 1 dschroeder 197121 3.5K Mar 30 10:10 CODE_OF_CONDUCT.md
-rw-r--r-- 1 dschroeder 197121  11K Jun 21 14:16 CONTRIBUTING.md
-rw-r--r-- 1 dschroeder 197121 153K Jun 21 14:16 Cargo.lock
-rw-r--r-- 1 dschroeder 197121 5.7K Jun 21 08:47 Cargo.toml
-rw-r--r-- 1 dschroeder 197121  371 Mar 30 10:10 Cross.toml
-rw-r--r-- 1 dschroeder 197121 1.1K Apr  3 08:37 LICENSE
-rw-r--r-- 1 dschroeder 197121  13K Jun 21 14:16 README.md
drwxr-xr-x 1 dschroeder 197121    0 May 24 08:06 assets
drwxr-xr-x 1 dschroeder 197121    0 Jun 21 14:16 benches
-rw-r--r-- 1 dschroeder 197121  310 Mar 30 10:10 codecov.yml
drwxr-xr-x 1 dschroeder 197121    0 Jun  1 15:30 crates
drwxr-xr-x 1 dschroeder 197121    0 May  4 07:55 dict
drwxr-xr-x 1 dschroeder 197121    0 Mar 30 10:10 docker
-rw-r--r-- 1 dschroeder 197121 1.1K Jun 12 07:03 rust-toolchain.toml
drwxr-xr-x 1 dschroeder 197121    0 May 26 07:11 scripts
drwxr-xr-x 1 dschroeder 197121    0 Jun 15 07:14 src
drwxr-xr-x 1 dschroeder 197121    0 Jun 21 09:48 target
drwxr-xr-x 1 dschroeder 197121    0 Jun 21 12:30 tests
-rw-r--r-- 1 dschroeder 197121  16K Jun 21 14:16 toolkit.nu
drwxr-xr-x 1 dschroeder 197121    0 Jun  1 15:30 wix
```
Running it through `detect columns` is helpful but notice how the Month,
Day, and Time are in their own columns.
```
❯ ^ls -lh | detect columns --no-headers --skip 1
╭────┬────────────┬─────────┬────────────┬─────────┬─────────┬─────────┬─────────┬─────────┬─────────────────────╮
│  # │  column0   │ column1 │  column2   │ column3 │ column4 │ column5 │ column6 │ column7 │       column8       │
├────┼────────────┼─────────┼────────────┼─────────┼─────────┼─────────┼─────────┼─────────┼─────────────────────┤
│  0 │ -rw-r--r-- │ 1       │ dschroeder │ 197121  │ 3.5K    │ Mar     │ 30      │ 10:10   │ CODE_OF_CONDUCT.md  │
│  1 │ -rw-r--r-- │ 1       │ dschroeder │ 197121  │ 11K     │ Jun     │ 21      │ 14:16   │ CONTRIBUTING.md     │
│  2 │ -rw-r--r-- │ 1       │ dschroeder │ 197121  │ 153K    │ Jun     │ 21      │ 14:16   │ Cargo.lock          │
│  3 │ -rw-r--r-- │ 1       │ dschroeder │ 197121  │ 5.7K    │ Jun     │ 21      │ 08:47   │ Cargo.toml          │
│  4 │ -rw-r--r-- │ 1       │ dschroeder │ 197121  │ 371     │ Mar     │ 30      │ 10:10   │ Cross.toml          │
│  5 │ -rw-r--r-- │ 1       │ dschroeder │ 197121  │ 1.1K    │ Apr     │ 3       │ 08:37   │ LICENSE             │
│  6 │ -rw-r--r-- │ 1       │ dschroeder │ 197121  │ 13K     │ Jun     │ 21      │ 14:16   │ README.md           │
│  7 │ drwxr-xr-x │ 1       │ dschroeder │ 197121  │ 0       │ May     │ 24      │ 08:06   │ assets              │
│  8 │ drwxr-xr-x │ 1       │ dschroeder │ 197121  │ 0       │ Jun     │ 21      │ 14:16   │ benches             │
│  9 │ -rw-r--r-- │ 1       │ dschroeder │ 197121  │ 310     │ Mar     │ 30      │ 10:10   │ codecov.yml         │
│ 10 │ drwxr-xr-x │ 1       │ dschroeder │ 197121  │ 0       │ Jun     │ 1       │ 15:30   │ crates              │
│ 11 │ drwxr-xr-x │ 1       │ dschroeder │ 197121  │ 0       │ May     │ 4       │ 07:55   │ dict                │
│ 12 │ drwxr-xr-x │ 1       │ dschroeder │ 197121  │ 0       │ Mar     │ 30      │ 10:10   │ docker              │
│ 13 │ -rw-r--r-- │ 1       │ dschroeder │ 197121  │ 1.1K    │ Jun     │ 12      │ 07:03   │ rust-toolchain.toml │
│ 14 │ drwxr-xr-x │ 1       │ dschroeder │ 197121  │ 0       │ May     │ 26      │ 07:11   │ scripts             │
│ 15 │ drwxr-xr-x │ 1       │ dschroeder │ 197121  │ 0       │ Jun     │ 15      │ 07:14   │ src                 │
│ 16 │ drwxr-xr-x │ 1       │ dschroeder │ 197121  │ 0       │ Jun     │ 21      │ 09:48   │ target              │
│ 17 │ drwxr-xr-x │ 1       │ dschroeder │ 197121  │ 0       │ Jun     │ 21      │ 12:30   │ tests               │
│ 18 │ -rw-r--r-- │ 1       │ dschroeder │ 197121  │ 16K     │ Jun     │ 21      │ 14:16   │ toolkit.nu          │
│ 19 │ drwxr-xr-x │ 1       │ dschroeder │ 197121  │ 0       │ Jun     │ 1       │ 15:30   │ wix                 │
├────┼────────────┼─────────┼────────────┼─────────┼─────────┼─────────┼─────────┼─────────┼─────────────────────┤
│  # │  column0   │ column1 │  column2   │ column3 │ column4 │ column5 │ column6 │ column7 │       column8       │
╰────┴────────────┴─────────┴────────────┴─────────┴─────────┴─────────┴─────────┴─────────┴─────────────────────╯
```
Now you can add `--combine-columns <range>` and get something like this.
```
❯ ^ls -lh | detect columns --no-headers --skip 1 --combine-columns 5..7
╭────┬────────────┬─────────┬────────────┬─────────┬─────────┬──────────────┬─────────────────────╮
│  # │  column0   │ column1 │  column2   │ column3 │ column4 │   column5    │       column6       │
├────┼────────────┼─────────┼────────────┼─────────┼─────────┼──────────────┼─────────────────────┤
│  0 │ -rw-r--r-- │ 1       │ dschroeder │ 197121  │ 3.5K    │ Mar 30 10:10 │ CODE_OF_CONDUCT.md  │
│  1 │ -rw-r--r-- │ 1       │ dschroeder │ 197121  │ 11K     │ Jun 21 14:16 │ CONTRIBUTING.md     │
│  2 │ -rw-r--r-- │ 1       │ dschroeder │ 197121  │ 153K    │ Jun 21 14:16 │ Cargo.lock          │
│  3 │ -rw-r--r-- │ 1       │ dschroeder │ 197121  │ 5.7K    │ Jun 21 08:47 │ Cargo.toml          │
│  4 │ -rw-r--r-- │ 1       │ dschroeder │ 197121  │ 371     │ Mar 30 10:10 │ Cross.toml          │
│  5 │ -rw-r--r-- │ 1       │ dschroeder │ 197121  │ 1.1K    │ Apr 3 08:37  │ LICENSE             │
│  6 │ -rw-r--r-- │ 1       │ dschroeder │ 197121  │ 13K     │ Jun 21 14:16 │ README.md           │
│  7 │ drwxr-xr-x │ 1       │ dschroeder │ 197121  │ 0       │ May 24 08:06 │ assets              │
│  8 │ drwxr-xr-x │ 1       │ dschroeder │ 197121  │ 0       │ Jun 21 14:16 │ benches             │
│  9 │ -rw-r--r-- │ 1       │ dschroeder │ 197121  │ 310     │ Mar 30 10:10 │ codecov.yml         │
│ 10 │ drwxr-xr-x │ 1       │ dschroeder │ 197121  │ 0       │ Jun 1 15:30  │ crates              │
│ 11 │ drwxr-xr-x │ 1       │ dschroeder │ 197121  │ 0       │ May 4 07:55  │ dict                │
│ 12 │ drwxr-xr-x │ 1       │ dschroeder │ 197121  │ 0       │ Mar 30 10:10 │ docker              │
│ 13 │ -rw-r--r-- │ 1       │ dschroeder │ 197121  │ 1.1K    │ Jun 12 07:03 │ rust-toolchain.toml │
│ 14 │ drwxr-xr-x │ 1       │ dschroeder │ 197121  │ 0       │ May 26 07:11 │ scripts             │
│ 15 │ drwxr-xr-x │ 1       │ dschroeder │ 197121  │ 0       │ Jun 15 07:14 │ src                 │
│ 16 │ drwxr-xr-x │ 1       │ dschroeder │ 197121  │ 0       │ Jun 21 09:48 │ target              │
│ 17 │ drwxr-xr-x │ 1       │ dschroeder │ 197121  │ 0       │ Jun 21 12:30 │ tests               │
│ 18 │ -rw-r--r-- │ 1       │ dschroeder │ 197121  │ 16K     │ Jun 21 14:16 │ toolkit.nu          │
│ 19 │ drwxr-xr-x │ 1       │ dschroeder │ 197121  │ 0       │ Jun 1 15:30  │ wix                 │
├────┼────────────┼─────────┼────────────┼─────────┼─────────┼──────────────┼─────────────────────┤
│  # │  column0   │ column1 │  column2   │ column3 │ column4 │   column5    │       column6       │
╰────┴────────────┴─────────┴────────────┴─────────┴─────────┴──────────────┴─────────────────────╯
```
Then you can fancy-it-up by tweaking the other columns and renaming
columns.
```
❯ ^ls -lh | detect columns --no-headers --skip 1 --combine-columns 5..7 | update column5 {|r| $r.column5 | into datetime} | into int column1 column3 | into filesize column4 | rename perms links user inode filesize datetime name
╭────┬────────────┬───────┬────────────┬────────┬──────────┬──────────────┬─────────────────────╮
│  # │   perms    │ links │    user    │ inode  │ filesize │   datetime   │        name         │
├────┼────────────┼───────┼────────────┼────────┼──────────┼──────────────┼─────────────────────┤
│  0 │ -rw-r--r-- │     1 │ dschroeder │ 197121 │   3.5 KB │ 2 months ago │ CODE_OF_CONDUCT.md  │
│  1 │ -rw-r--r-- │     1 │ dschroeder │ 197121 │  11.0 KB │ 2 days ago   │ CONTRIBUTING.md     │
│  2 │ -rw-r--r-- │     1 │ dschroeder │ 197121 │ 153.0 KB │ 2 days ago   │ Cargo.lock          │
│  3 │ -rw-r--r-- │     1 │ dschroeder │ 197121 │   5.7 KB │ 2 days ago   │ Cargo.toml          │
│  4 │ -rw-r--r-- │     1 │ dschroeder │ 197121 │    371 B │ 2 months ago │ Cross.toml          │
│  5 │ -rw-r--r-- │     1 │ dschroeder │ 197121 │   1.1 KB │ 2 months ago │ LICENSE             │
│  6 │ -rw-r--r-- │     1 │ dschroeder │ 197121 │  13.0 KB │ 2 days ago   │ README.md           │
│  7 │ drwxr-xr-x │     1 │ dschroeder │ 197121 │      0 B │ a month ago  │ assets              │
│  8 │ drwxr-xr-x │     1 │ dschroeder │ 197121 │      0 B │ 2 days ago   │ benches             │
│  9 │ -rw-r--r-- │     1 │ dschroeder │ 197121 │    310 B │ 2 months ago │ codecov.yml         │
│ 10 │ drwxr-xr-x │     1 │ dschroeder │ 197121 │      0 B │ 3 weeks ago  │ crates              │
│ 11 │ drwxr-xr-x │     1 │ dschroeder │ 197121 │      0 B │ 2 months ago │ dict                │
│ 12 │ drwxr-xr-x │     1 │ dschroeder │ 197121 │      0 B │ 2 months ago │ docker              │
│ 13 │ -rw-r--r-- │     1 │ dschroeder │ 197121 │   1.1 KB │ 2 weeks ago  │ rust-toolchain.toml │
│ 14 │ drwxr-xr-x │     1 │ dschroeder │ 197121 │      0 B │ 4 weeks ago  │ scripts             │
│ 15 │ drwxr-xr-x │     1 │ dschroeder │ 197121 │      0 B │ a week ago   │ src                 │
│ 16 │ drwxr-xr-x │     1 │ dschroeder │ 197121 │      0 B │ 2 days ago   │ target              │
│ 17 │ drwxr-xr-x │     1 │ dschroeder │ 197121 │      0 B │ 2 days ago   │ tests               │
│ 18 │ -rw-r--r-- │     1 │ dschroeder │ 197121 │  16.0 KB │ 2 days ago   │ toolkit.nu          │
│ 19 │ drwxr-xr-x │     1 │ dschroeder │ 197121 │      0 B │ 3 weeks ago  │ wix                 │
├────┼────────────┼───────┼────────────┼────────┼──────────┼──────────────┼─────────────────────┤
│  # │   perms    │ links │    user    │ inode  │ filesize │   datetime   │        name         │
╰────┴────────────┴───────┴────────────┴────────┴──────────┴──────────────┴─────────────────────╯
```
# User-Facing Changes
<!-- List of all changes that impact the user experience here. This
helps us keep track of breaking changes. -->

# Tests + Formatting
<!--
Don't forget to add tests that cover your changes.

Make sure you've run and fixed any issues with these commands:

- `cargo fmt --all -- --check` to check standard code formatting (`cargo
fmt --all` applies these changes)
- `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used -A
clippy::needless_collect -A clippy::result_large_err` to check that
you're using the standard code style
- `cargo test --workspace` to check that all tests pass
- `cargo run -- crates/nu-std/tests/run.nu` to run the tests for the
standard library

> **Note**
> from `nushell` you can also use the `toolkit` as follows
> ```bash
> use toolkit.nu # or use an `env_change` hook to activate it
automatically
> toolkit check pr
> ```
-->

# After Submitting
<!-- If your PR had any user-facing changes, update [the
documentation](https://github.com/nushell/nushell.github.io) after the
PR is merged, if necessary. This will help us keep the docs up to date.
-->
This commit is contained in:
Darren Schroeder 2023-06-23 16:19:08 -05:00 committed by GitHub
parent 0c888486c9
commit 761946c522
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -1,12 +1,13 @@
use std::iter::Peekable;
use std::str::CharIndices;
use itertools::Itertools;
use nu_engine::CallExt;
use nu_protocol::ast::Call;
use nu_protocol::engine::{Command, EngineState, Stack};
use nu_protocol::{
Category, Example, IntoInterruptiblePipelineData, PipelineData, ShellError, Signature, Span,
Spanned, SyntaxShape, Type, Value,
Category, Example, IntoInterruptiblePipelineData, PipelineData, Range, ShellError, Signature,
Span, Spanned, SyntaxShape, Type, Value,
};
type Input<'t> = Peekable<CharIndices<'t>>;
@ -29,6 +30,12 @@ impl Command for DetectColumns {
)
.input_output_types(vec![(Type::String, Type::Table(vec![]))])
.switch("no-headers", "don't detect headers", Some('n'))
.named(
"combine-columns",
SyntaxShape::Range,
"columns to be combined; listed as a range",
Some('c'),
)
.category(Category::Strings)
}
@ -37,7 +44,7 @@ impl Command for DetectColumns {
}
fn search_terms(&self) -> Vec<&str> {
vec!["split"]
vec!["split", "tabular"]
}
fn run(
@ -78,6 +85,11 @@ impl Command for DetectColumns {
example: "$'c1 c2 c3(char nl)a b c' | detect columns",
result: None,
},
Example {
description: "Parse external ls command and combine columns for datetime",
example: "^ls -lh | detect columns --no-headers --skip 1 --combine-columns 5..7",
result: None,
},
]
}
}
@ -91,6 +103,7 @@ fn detect_columns(
let name_span = call.head;
let num_rows_to_skip: Option<usize> = call.get_flag(engine_state, stack, "skip")?;
let noheader = call.has_flag("no-headers");
let range: Option<Range> = call.get_flag(engine_state, stack, "combine-columns")?;
let ctrlc = engine_state.ctrlc.clone();
let config = engine_state.get_config();
let input = input.collect_string("", config)?;
@ -172,10 +185,70 @@ fn detect_columns(
}
}
Value::Record {
cols,
vals,
span: name_span,
if range.is_some() {
// Destructure the range parameter
let (start_index, end_index) = if let Some(range) = &range {
match nu_cmd_base::util::process_range(range) {
Ok(r) => {
// `process_range()` returns `isize::MAX` if the range is open-ended,
// which is not ideal for us
let end = if r.1 as usize > cols.len() {
cols.len()
} else {
r.1 as usize
};
(r.0 as usize, end)
}
Err(processing_error) => {
let err = processing_error("could not find range index", name_span);
return Value::Error {
error: Box::new(err),
};
}
}
} else {
(0usize, cols.len())
};
// Merge Columns
let part1 = &cols.clone()[0..start_index];
let combined = &cols.clone()[start_index..=end_index];
let binding = combined.join("");
let part3 = &cols.clone()[end_index + 1..];
let new_cols = [part1, &[binding], part3].concat();
// Now renumber columns since we merged some
let mut renum_cols = vec![];
for (idx, _acol) in new_cols.iter().enumerate() {
renum_cols.push(format!("column{idx}"));
}
// Merge Values
let part1 = &vals.clone()[0..start_index];
let combined = &vals.clone()[start_index..=end_index];
let binding = Value::string(
combined
.iter()
.map(|f| match f.as_string() {
Ok(s) => s,
_ => "".to_string(),
})
.join(" "), // add a space between items
Span::unknown(),
);
let part3 = &vals.clone()[end_index + 1..];
let new_vals = [part1, &[binding], part3].concat();
Value::Record {
cols: renum_cols,
vals: new_vals,
span: name_span,
}
} else {
Value::Record {
cols,
vals,
span: name_span,
}
}
})
.into_pipeline_data(ctrlc))
@ -207,9 +280,9 @@ pub fn find_columns(input: &str) -> Vec<Spanned<String>> {
#[derive(Clone, Copy)]
enum BlockKind {
Paren,
CurlyBracket,
SquareBracket,
Parenthesis,
Brace,
Bracket,
}
fn baseline(src: &mut Input) -> Spanned<String> {
@ -265,27 +338,27 @@ fn baseline(src: &mut Input) -> Spanned<String> {
quote_start = Some(c);
} else if c == '[' {
// We encountered an opening `[` delimiter.
block_level.push(BlockKind::SquareBracket);
block_level.push(BlockKind::Bracket);
} else if c == ']' {
// We encountered a closing `]` delimiter. Pop off the opening `[`
// delimiter.
if let Some(BlockKind::SquareBracket) = block_level.last() {
if let Some(BlockKind::Bracket) = block_level.last() {
let _ = block_level.pop();
}
} else if c == '{' {
// We encountered an opening `{` delimiter.
block_level.push(BlockKind::CurlyBracket);
block_level.push(BlockKind::Brace);
} else if c == '}' {
// We encountered a closing `}` delimiter. Pop off the opening `{`.
if let Some(BlockKind::CurlyBracket) = block_level.last() {
if let Some(BlockKind::Brace) = block_level.last() {
let _ = block_level.pop();
}
} else if c == '(' {
// We enceountered an opening `(` delimiter.
block_level.push(BlockKind::Paren);
block_level.push(BlockKind::Parenthesis);
} else if c == ')' {
// We encountered a closing `)` delimiter. Pop off the opening `(`.
if let Some(BlockKind::Paren) = block_level.last() {
if let Some(BlockKind::Parenthesis) = block_level.last() {
let _ = block_level.pop();
}
} else if is_termination(&block_level, c) {