nushell/crates/nu-command/src/commands/size.rs

128 lines
3.8 KiB
Rust
Raw Normal View History

extern crate unicode_segmentation;
2019-05-26 04:04:13 +02:00
use crate::prelude::*;
use indexmap::indexmap;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{ReturnSuccess, Signature, TaggedDictBuilder, UntaggedValue, Value};
use unicode_segmentation::UnicodeSegmentation;
2019-05-26 04:04:13 +02:00
pub struct Size;
2020-05-29 10:22:52 +02:00
#[async_trait]
impl WholeStreamCommand for Size {
fn name(&self) -> &str {
"size"
}
fn signature(&self) -> Signature {
Signature::build("size")
}
fn usage(&self) -> &str {
"Gather word count statistics on the text."
}
async fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
2021-02-12 11:13:14 +01:00
Ok(size(args))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Count the number of words in a string",
example: r#"echo "There are seven words in this sentence" | size"#,
result: Some(vec![UntaggedValue::row(indexmap! {
"lines".to_string() => UntaggedValue::int(0).into(),
"words".to_string() => UntaggedValue::int(7).into(),
"chars".to_string() => UntaggedValue::int(38).into(),
"bytes".to_string() => UntaggedValue::int(38).into(),
})
.into()]),
},
Example {
description: "Counts Unicode characters correctly in a string",
example: r#"echo "Amélie Amelie" | size"#,
result: Some(vec![UntaggedValue::row(indexmap! {
"lines".to_string() => UntaggedValue::int(0).into(),
"words".to_string() => UntaggedValue::int(2).into(),
"chars".to_string() => UntaggedValue::int(13).into(),
"bytes".to_string() => UntaggedValue::int(15).into(),
})
.into()]),
},
]
}
}
2021-02-12 11:13:14 +01:00
fn size(args: CommandArgs) -> OutputStream {
2019-07-16 21:10:25 +02:00
let input = args.input;
let tag = args.call_info.name_tag;
let name_span = tag.span;
2021-02-12 11:13:14 +01:00
input
.map(move |v| {
if let Ok(s) = v.as_string() {
ReturnSuccess::value(count(&s, &v.tag))
} else {
Err(ShellError::labeled_error_with_secondary(
"Expected a string from pipeline",
"requires string input",
name_span,
"value originates from here",
v.tag.span,
))
}
2019-07-16 21:10:25 +02:00
})
2021-02-12 11:13:14 +01:00
.to_output_stream()
2019-05-26 04:04:13 +02:00
}
fn count(contents: &str, tag: impl Into<Tag>) -> Value {
2019-05-26 04:04:13 +02:00
let mut lines: i64 = 0;
let mut words: i64 = 0;
let mut chars: i64 = 0;
let bytes = contents.len() as i64;
2019-05-26 04:04:13 +02:00
let mut end_of_word = true;
for c in UnicodeSegmentation::graphemes(contents, true) {
2019-05-26 04:04:13 +02:00
chars += 1;
match c {
"\n" => {
2019-05-26 04:04:13 +02:00
lines += 1;
end_of_word = true;
}
" " => end_of_word = true,
2019-05-26 04:04:13 +02:00
_ => {
if end_of_word {
words += 1;
}
end_of_word = false;
}
}
}
let mut dict = TaggedDictBuilder::new(tag);
//TODO: add back in name when we have it in the tag
//dict.insert("name", value::string(name));
dict.insert_untagged("lines", UntaggedValue::int(lines));
dict.insert_untagged("words", UntaggedValue::int(words));
dict.insert_untagged("chars", UntaggedValue::int(chars));
dict.insert_untagged("bytes", UntaggedValue::int(bytes));
2019-05-26 04:04:13 +02:00
dict.into_value()
2019-05-26 04:04:13 +02:00
}
#[cfg(test)]
mod tests {
use super::ShellError;
use super::Size;
#[test]
fn examples_work_as_expected() -> Result<(), ShellError> {
use crate::examples::test as test_examples;
2021-02-12 11:13:14 +01:00
test_examples(Size {})
}
}