nushell/crates/nu-command/src/strings/size.rs

174 lines
4.7 KiB
Rust
Raw Normal View History

2021-10-12 23:55:29 +02:00
extern crate unicode_segmentation;
use unicode_segmentation::UnicodeSegmentation;
use nu_protocol::ast::Call;
2021-10-25 18:58:58 +02:00
use nu_protocol::engine::{Command, EngineState, Stack};
2021-12-03 00:11:25 +01:00
use nu_protocol::{Category, Example, PipelineData, ShellError, Signature, Span, Value};
2021-10-12 23:55:29 +02:00
2021-10-25 06:01:02 +02:00
#[derive(Clone)]
2021-10-12 23:55:29 +02:00
pub struct Size;
impl Command for Size {
fn name(&self) -> &str {
"size"
}
fn signature(&self) -> Signature {
Signature::build("size").category(Category::Strings)
2021-10-12 23:55:29 +02:00
}
fn usage(&self) -> &str {
"Gather word count statistics on the text."
}
fn run(
&self,
2021-10-28 06:13:10 +02:00
engine_state: &EngineState,
2021-10-25 08:31:39 +02:00
_stack: &mut Stack,
2021-10-12 23:55:29 +02:00
call: &Call,
2021-10-25 06:01:02 +02:00
input: PipelineData,
) -> Result<PipelineData, ShellError> {
2021-10-28 06:13:10 +02:00
size(engine_state, call, input)
2021-10-12 23:55:29 +02:00
}
2021-10-13 06:15:37 +02:00
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Count the number of words in a string",
example: r#""There are seven words in this sentence" | size"#,
result: Some(Value::Record {
cols: vec![
"lines".into(),
"words".into(),
"chars".into(),
"bytes".into(),
],
vals: vec![
Value::Int {
val: 0,
span: Span::unknown(),
},
Value::Int {
val: 7,
span: Span::unknown(),
},
Value::Int {
val: 38,
span: Span::unknown(),
},
Value::Int {
val: 38,
span: Span::unknown(),
},
],
span: Span::unknown(),
}),
},
Example {
description: "Counts Unicode characters correctly in a string",
example: r#""Amélie Amelie" | size"#,
result: Some(Value::Record {
cols: vec![
"lines".into(),
"words".into(),
"chars".into(),
"bytes".into(),
],
vals: vec![
Value::Int {
val: 0,
span: Span::unknown(),
},
Value::Int {
val: 2,
span: Span::unknown(),
},
Value::Int {
val: 13,
span: Span::unknown(),
},
Value::Int {
val: 15,
span: Span::unknown(),
},
],
span: Span::unknown(),
}),
},
]
}
2021-10-12 23:55:29 +02:00
}
2021-10-28 06:13:10 +02:00
fn size(
engine_state: &EngineState,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
2021-10-12 23:55:29 +02:00
let span = call.head;
2021-10-28 06:13:10 +02:00
input.map(
move |v| match v.as_string() {
Ok(s) => count(&s, span),
Err(_) => Value::Error {
2021-12-03 00:11:25 +01:00
error: ShellError::PipelineMismatch("string".into(), span, span),
2021-10-12 23:55:29 +02:00
},
2021-10-25 23:14:21 +02:00
},
2021-10-28 06:13:10 +02:00
engine_state.ctrlc.clone(),
)
2021-10-12 23:55:29 +02:00
}
fn count(contents: &str, span: Span) -> Value {
let mut lines: i64 = 0;
let mut words: i64 = 0;
let mut chars: i64 = 0;
let bytes = contents.len() as i64;
let mut end_of_word = true;
for c in UnicodeSegmentation::graphemes(contents, true) {
chars += 1;
match c {
"\n" => {
lines += 1;
end_of_word = true;
}
" " => end_of_word = true,
_ => {
if end_of_word {
words += 1;
}
end_of_word = false;
}
}
}
2021-10-13 06:15:37 +02:00
let mut cols = vec![];
let mut vals = vec![];
cols.push("lines".into());
vals.push(Value::Int { val: lines, span });
2021-10-12 23:55:29 +02:00
2021-10-13 06:15:37 +02:00
cols.push("words".into());
vals.push(Value::Int { val: words, span });
cols.push("chars".into());
vals.push(Value::Int { val: chars, span });
cols.push("bytes".into());
vals.push(Value::Int { val: bytes, span });
Value::Record { cols, vals, span }
2021-10-12 23:55:29 +02:00
}
2021-10-13 06:15:37 +02:00
#[cfg(test)]
mod test {
use super::*;
2021-10-12 23:55:29 +02:00
2021-10-13 06:15:37 +02:00
#[test]
fn test_examples() {
use crate::test_examples;
2021-10-12 23:55:29 +02:00
2021-10-13 06:15:37 +02:00
test_examples(Size {})
}
}