nushell/crates/nu-command/src/strings/str_/length.rs
JT 786ba3bf91
Input output checking (#9680)
# Description

This PR tights input/output type-checking a bit more. There are a lot of
commands that don't have correct input/output types, so part of the
effort is updating them.

This PR now contains updates to commands that had wrong input/output
signatures. It doesn't add examples for these new signatures, but that
can be follow-up work.

# User-Facing Changes

BREAKING CHANGE BREAKING CHANGE

This work enforces many more checks on pipeline type correctness than
previous nushell versions. This strictness may uncover incompatibilities
in existing scripts or shortcomings in the type information for internal
commands.

# Tests + Formatting
<!--
Don't forget to add tests that cover your changes.

Make sure you've run and fixed any issues with these commands:

- `cargo fmt --all -- --check` to check standard code formatting (`cargo
fmt --all` applies these changes)
- `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used -A
clippy::needless_collect -A clippy::result_large_err` to check that
you're using the standard code style
- `cargo test --workspace` to check that all tests pass
- `cargo run -- -c "use std testing; testing run-tests --path
crates/nu-std"` to run the tests for the standard library

> **Note**
> from `nushell` you can also use the `toolkit` as follows
> ```bash
> use toolkit.nu # or use an `env_change` hook to activate it
automatically
> toolkit check pr
> ```
-->

# After Submitting
<!-- If your PR had any user-facing changes, update [the
documentation](https://github.com/nushell/nushell.github.io) after the
PR is merged, if necessary. This will help us keep the docs up to date.
-->
2023-07-14 15:20:35 +12:00

148 lines
4.3 KiB
Rust

use crate::grapheme_flags;
use nu_cmd_base::input_handler::{operate, CmdArgument};
use nu_engine::CallExt;
use nu_protocol::ast::Call;
use nu_protocol::ast::CellPath;
use nu_protocol::engine::{Command, EngineState, Stack};
use nu_protocol::Category;
use nu_protocol::{Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value};
use unicode_segmentation::UnicodeSegmentation;
struct Arguments {
cell_paths: Option<Vec<CellPath>>,
graphemes: bool,
}
impl CmdArgument for Arguments {
fn take_cell_paths(&mut self) -> Option<Vec<CellPath>> {
self.cell_paths.take()
}
}
#[derive(Clone)]
pub struct SubCommand;
impl Command for SubCommand {
fn name(&self) -> &str {
"str length"
}
fn signature(&self) -> Signature {
Signature::build("str length")
.input_output_types(vec![(Type::String, Type::Int), (Type::List(Box::new(Type::String)), Type::List(Box::new(Type::Int)))])
.vectorizes_over_list(true)
.switch(
"grapheme-clusters",
"count length using grapheme clusters (all visible chars have length 1)",
Some('g'),
)
.switch(
"utf-8-bytes",
"count length using UTF-8 bytes (default; all non-ASCII chars have length 2+)",
Some('b'),
)
.rest(
"rest",
SyntaxShape::CellPath,
"For a data structure input, replace strings at the given cell paths with their length",
)
.category(Category::Strings)
}
fn usage(&self) -> &str {
"Output the length of any strings in the pipeline."
}
fn search_terms(&self) -> Vec<&str> {
vec!["size", "count"]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let cell_paths: Vec<CellPath> = call.rest(engine_state, stack, 0)?;
let args = Arguments {
cell_paths: (!cell_paths.is_empty()).then_some(cell_paths),
graphemes: grapheme_flags(call)?,
};
operate(action, args, input, call.head, engine_state.ctrlc.clone())
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Return the lengths of a string",
example: "'hello' | str length",
result: Some(Value::test_int(5)),
},
Example {
description: "Count length using grapheme clusters",
example: "'🇯🇵ほげ ふが ぴよ' | str length -g",
result: Some(Value::test_int(9)),
},
Example {
description: "Return the lengths of multiple strings",
example: "['hi' 'there'] | str length",
result: Some(Value::List {
vals: vec![Value::test_int(2), Value::test_int(5)],
span: Span::test_data(),
}),
},
]
}
}
fn action(input: &Value, arg: &Arguments, head: Span) -> Value {
match input {
Value::String { val, .. } => Value::int(
if arg.graphemes {
val.graphemes(true).count()
} else {
val.len()
} as i64,
head,
),
Value::Error { .. } => input.clone(),
_ => Value::Error {
error: Box::new(ShellError::OnlySupportsThisInputType {
exp_input_type: "string".into(),
wrong_type: input.get_type().to_string(),
dst_span: head,
src_span: input.expect_span(),
}),
},
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn use_utf8_bytes() {
let word = Value::String {
val: String::from("🇯🇵ほげ ふが ぴよ"),
span: Span::test_data(),
};
let options = Arguments {
cell_paths: None,
graphemes: false,
};
let actual = action(&word, &options, Span::test_data());
assert_eq!(actual, Value::test_int(28));
}
#[test]
fn test_examples() {
use crate::test_examples;
test_examples(SubCommand {})
}
}