2021-10-12 23:55:29 +02:00
|
|
|
extern crate unicode_segmentation;
|
|
|
|
|
|
|
|
use unicode_segmentation::UnicodeSegmentation;
|
|
|
|
|
|
|
|
use nu_protocol::ast::Call;
|
2021-10-25 18:58:58 +02:00
|
|
|
use nu_protocol::engine::{Command, EngineState, Stack};
|
2021-12-03 00:11:25 +01:00
|
|
|
use nu_protocol::{Category, Example, PipelineData, ShellError, Signature, Span, Value};
|
2021-10-12 23:55:29 +02:00
|
|
|
|
2021-10-25 06:01:02 +02:00
|
|
|
#[derive(Clone)]
|
2021-10-12 23:55:29 +02:00
|
|
|
pub struct Size;
|
|
|
|
|
|
|
|
impl Command for Size {
|
|
|
|
fn name(&self) -> &str {
|
|
|
|
"size"
|
|
|
|
}
|
|
|
|
|
|
|
|
fn signature(&self) -> Signature {
|
2021-11-17 05:22:37 +01:00
|
|
|
Signature::build("size").category(Category::Strings)
|
2021-10-12 23:55:29 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
fn usage(&self) -> &str {
|
|
|
|
"Gather word count statistics on the text."
|
|
|
|
}
|
|
|
|
|
|
|
|
fn run(
|
|
|
|
&self,
|
2021-10-28 06:13:10 +02:00
|
|
|
engine_state: &EngineState,
|
2021-10-25 08:31:39 +02:00
|
|
|
_stack: &mut Stack,
|
2021-10-12 23:55:29 +02:00
|
|
|
call: &Call,
|
2021-10-25 06:01:02 +02:00
|
|
|
input: PipelineData,
|
|
|
|
) -> Result<PipelineData, ShellError> {
|
2021-10-28 06:13:10 +02:00
|
|
|
size(engine_state, call, input)
|
2021-10-12 23:55:29 +02:00
|
|
|
}
|
|
|
|
|
2021-10-13 06:15:37 +02:00
|
|
|
fn examples(&self) -> Vec<Example> {
|
|
|
|
vec![
|
|
|
|
Example {
|
|
|
|
description: "Count the number of words in a string",
|
|
|
|
example: r#""There are seven words in this sentence" | size"#,
|
|
|
|
result: Some(Value::Record {
|
|
|
|
cols: vec![
|
|
|
|
"lines".into(),
|
|
|
|
"words".into(),
|
|
|
|
"chars".into(),
|
|
|
|
"bytes".into(),
|
|
|
|
],
|
|
|
|
vals: vec![
|
|
|
|
Value::Int {
|
|
|
|
val: 0,
|
2021-12-19 08:46:13 +01:00
|
|
|
span: Span::test_data(),
|
2021-10-13 06:15:37 +02:00
|
|
|
},
|
|
|
|
Value::Int {
|
|
|
|
val: 7,
|
2021-12-19 08:46:13 +01:00
|
|
|
span: Span::test_data(),
|
2021-10-13 06:15:37 +02:00
|
|
|
},
|
|
|
|
Value::Int {
|
|
|
|
val: 38,
|
2021-12-19 08:46:13 +01:00
|
|
|
span: Span::test_data(),
|
2021-10-13 06:15:37 +02:00
|
|
|
},
|
|
|
|
Value::Int {
|
|
|
|
val: 38,
|
2021-12-19 08:46:13 +01:00
|
|
|
span: Span::test_data(),
|
2021-10-13 06:15:37 +02:00
|
|
|
},
|
|
|
|
],
|
2021-12-19 08:46:13 +01:00
|
|
|
span: Span::test_data(),
|
2021-10-13 06:15:37 +02:00
|
|
|
}),
|
|
|
|
},
|
|
|
|
Example {
|
|
|
|
description: "Counts Unicode characters correctly in a string",
|
|
|
|
example: r#""Amélie Amelie" | size"#,
|
|
|
|
result: Some(Value::Record {
|
|
|
|
cols: vec![
|
|
|
|
"lines".into(),
|
|
|
|
"words".into(),
|
|
|
|
"chars".into(),
|
|
|
|
"bytes".into(),
|
|
|
|
],
|
|
|
|
vals: vec![
|
|
|
|
Value::Int {
|
|
|
|
val: 0,
|
2021-12-19 08:46:13 +01:00
|
|
|
span: Span::test_data(),
|
2021-10-13 06:15:37 +02:00
|
|
|
},
|
|
|
|
Value::Int {
|
|
|
|
val: 2,
|
2021-12-19 08:46:13 +01:00
|
|
|
span: Span::test_data(),
|
2021-10-13 06:15:37 +02:00
|
|
|
},
|
|
|
|
Value::Int {
|
|
|
|
val: 13,
|
2021-12-19 08:46:13 +01:00
|
|
|
span: Span::test_data(),
|
2021-10-13 06:15:37 +02:00
|
|
|
},
|
|
|
|
Value::Int {
|
|
|
|
val: 15,
|
2021-12-19 08:46:13 +01:00
|
|
|
span: Span::test_data(),
|
2021-10-13 06:15:37 +02:00
|
|
|
},
|
|
|
|
],
|
2021-12-19 08:46:13 +01:00
|
|
|
span: Span::test_data(),
|
2021-10-13 06:15:37 +02:00
|
|
|
}),
|
|
|
|
},
|
|
|
|
]
|
|
|
|
}
|
2021-10-12 23:55:29 +02:00
|
|
|
}
|
|
|
|
|
2021-10-28 06:13:10 +02:00
|
|
|
fn size(
|
|
|
|
engine_state: &EngineState,
|
|
|
|
call: &Call,
|
|
|
|
input: PipelineData,
|
|
|
|
) -> Result<PipelineData, ShellError> {
|
2021-10-12 23:55:29 +02:00
|
|
|
let span = call.head;
|
2021-10-28 06:13:10 +02:00
|
|
|
input.map(
|
|
|
|
move |v| match v.as_string() {
|
|
|
|
Ok(s) => count(&s, span),
|
|
|
|
Err(_) => Value::Error {
|
2021-12-03 00:11:25 +01:00
|
|
|
error: ShellError::PipelineMismatch("string".into(), span, span),
|
2021-10-12 23:55:29 +02:00
|
|
|
},
|
2021-10-25 23:14:21 +02:00
|
|
|
},
|
2021-10-28 06:13:10 +02:00
|
|
|
engine_state.ctrlc.clone(),
|
|
|
|
)
|
2021-10-12 23:55:29 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
fn count(contents: &str, span: Span) -> Value {
|
|
|
|
let mut lines: i64 = 0;
|
|
|
|
let mut words: i64 = 0;
|
|
|
|
let mut chars: i64 = 0;
|
|
|
|
let bytes = contents.len() as i64;
|
|
|
|
let mut end_of_word = true;
|
|
|
|
|
|
|
|
for c in UnicodeSegmentation::graphemes(contents, true) {
|
|
|
|
chars += 1;
|
|
|
|
|
|
|
|
match c {
|
|
|
|
"\n" => {
|
|
|
|
lines += 1;
|
|
|
|
end_of_word = true;
|
|
|
|
}
|
|
|
|
" " => end_of_word = true,
|
|
|
|
_ => {
|
|
|
|
if end_of_word {
|
|
|
|
words += 1;
|
|
|
|
}
|
|
|
|
end_of_word = false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-10-13 06:15:37 +02:00
|
|
|
let mut cols = vec![];
|
|
|
|
let mut vals = vec![];
|
|
|
|
|
|
|
|
cols.push("lines".into());
|
|
|
|
vals.push(Value::Int { val: lines, span });
|
2021-10-12 23:55:29 +02:00
|
|
|
|
2021-10-13 06:15:37 +02:00
|
|
|
cols.push("words".into());
|
|
|
|
vals.push(Value::Int { val: words, span });
|
|
|
|
|
|
|
|
cols.push("chars".into());
|
|
|
|
vals.push(Value::Int { val: chars, span });
|
|
|
|
|
|
|
|
cols.push("bytes".into());
|
|
|
|
vals.push(Value::Int { val: bytes, span });
|
|
|
|
|
|
|
|
Value::Record { cols, vals, span }
|
2021-10-12 23:55:29 +02:00
|
|
|
}
|
|
|
|
|
2021-10-13 06:15:37 +02:00
|
|
|
#[cfg(test)]
|
|
|
|
mod test {
|
|
|
|
use super::*;
|
2021-10-12 23:55:29 +02:00
|
|
|
|
2021-10-13 06:15:37 +02:00
|
|
|
#[test]
|
|
|
|
fn test_examples() {
|
|
|
|
use crate::test_examples;
|
2021-10-12 23:55:29 +02:00
|
|
|
|
2021-10-13 06:15:37 +02:00
|
|
|
test_examples(Size {})
|
|
|
|
}
|
|
|
|
}
|