mirror of
https://github.com/nushell/nushell.git
synced 2025-04-28 23:28:20 +02:00
add unicode-width to str stats
(#14014)
# Description This PR adds another type of length to `str stats`, unicode-width. ```nushell ❯ "\u{ff03}" | str stats ╭───────────────┬───╮ │ lines │ 1 │ │ words │ 0 │ │ bytes │ 3 │ │ chars │ 1 │ │ graphemes │ 1 │ │ unicode-width │ 2 │ ╰───────────────┴───╯ ❯ "Amélie Amelie" | str stats ╭───────────────┬────╮ │ lines │ 1 │ │ words │ 2 │ │ bytes │ 15 │ │ chars │ 14 │ │ graphemes │ 13 │ │ unicode-width │ 13 │ ╰───────────────┴────╯ ❯ '今天天气真好' | str stats ╭───────────────┬────╮ │ lines │ 1 │ │ words │ 6 │ │ bytes │ 18 │ │ chars │ 6 │ │ graphemes │ 6 │ │ unicode-width │ 12 │ ╰───────────────┴────╯ ❯ "Μπορῶ νὰ φάω σπασμένα γυαλιὰ χωρὶς νὰ πάθω τίποτα." | str stats ╭───────────────┬────╮ │ lines │ 1 │ │ words │ 9 │ │ bytes │ 96 │ │ chars │ 50 │ │ graphemes │ 50 │ │ unicode-width │ 50 │ ╰───────────────┴────╯ ❯ "\n" | str stats ╭───────────────┬───╮ │ lines │ 1 │ │ words │ 0 │ │ bytes │ 1 │ │ chars │ 1 │ │ graphemes │ 1 │ │ unicode-width │ 0 │ ╰───────────────┴───╯ ``` The idea of this PR came from me wondering if we could replace `#` with `\u{ff03}` in tables. # User-Facing Changes <!-- List of all changes that impact the user experience here. This helps us keep track of breaking changes. --> # Tests + Formatting <!-- Don't forget to add tests that cover your changes. Make sure you've run and fixed any issues with these commands: - `cargo fmt --all -- --check` to check standard code formatting (`cargo fmt --all` applies these changes) - `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used` to check that you're using the standard code style - `cargo test --workspace` to check that all tests pass (on Windows make sure to [enable developer mode](https://learn.microsoft.com/en-us/windows/apps/get-started/developer-mode-features-and-debugging)) - `cargo run -- -c "use toolkit.nu; toolkit test stdlib"` to run the tests for the standard library > **Note** > from `nushell` you can also use the `toolkit` as follows > ```bash > use toolkit.nu # or use an `env_change` hook to activate it automatically > toolkit check pr > ``` --> # After Submitting <!-- If your PR had any user-facing changes, update [the documentation](https://github.com/nushell/nushell.github.io) after the PR is merged, if necessary. This will help us keep the docs up to date. -->
This commit is contained in:
parent
d6f4e4c4fe
commit
6dc71f5ad0
@ -64,17 +64,19 @@ impl Command for SubCommand {
|
|||||||
"bytes" => Value::test_int(38),
|
"bytes" => Value::test_int(38),
|
||||||
"chars" => Value::test_int(38),
|
"chars" => Value::test_int(38),
|
||||||
"graphemes" => Value::test_int(38),
|
"graphemes" => Value::test_int(38),
|
||||||
|
"unicode-width" => Value::test_int(38),
|
||||||
})),
|
})),
|
||||||
},
|
},
|
||||||
Example {
|
Example {
|
||||||
description: "Counts unicode characters",
|
description: "Counts unicode characters",
|
||||||
example: r#"'今天天气真好' | str stats "#,
|
example: r#"'今天天气真好' | str stats"#,
|
||||||
result: Some(Value::test_record(record! {
|
result: Some(Value::test_record(record! {
|
||||||
"lines" => Value::test_int(1),
|
"lines" => Value::test_int(1),
|
||||||
"words" => Value::test_int(6),
|
"words" => Value::test_int(6),
|
||||||
"bytes" => Value::test_int(18),
|
"bytes" => Value::test_int(18),
|
||||||
"chars" => Value::test_int(6),
|
"chars" => Value::test_int(6),
|
||||||
"graphemes" => Value::test_int(6),
|
"graphemes" => Value::test_int(6),
|
||||||
|
"unicode-width" => Value::test_int(12),
|
||||||
})),
|
})),
|
||||||
},
|
},
|
||||||
Example {
|
Example {
|
||||||
@ -86,6 +88,7 @@ impl Command for SubCommand {
|
|||||||
"bytes" => Value::test_int(15),
|
"bytes" => Value::test_int(15),
|
||||||
"chars" => Value::test_int(14),
|
"chars" => Value::test_int(14),
|
||||||
"graphemes" => Value::test_int(13),
|
"graphemes" => Value::test_int(13),
|
||||||
|
"unicode-width" => Value::test_int(13),
|
||||||
})),
|
})),
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
@ -139,6 +142,7 @@ fn counter(contents: &str, span: Span) -> Value {
|
|||||||
"bytes" => get_count(&counts, Counter::Bytes, span),
|
"bytes" => get_count(&counts, Counter::Bytes, span),
|
||||||
"chars" => get_count(&counts, Counter::CodePoints, span),
|
"chars" => get_count(&counts, Counter::CodePoints, span),
|
||||||
"graphemes" => get_count(&counts, Counter::GraphemeClusters, span),
|
"graphemes" => get_count(&counts, Counter::GraphemeClusters, span),
|
||||||
|
"unicode-width" => get_count(&counts, Counter::UnicodeWidth, span),
|
||||||
};
|
};
|
||||||
|
|
||||||
Value::record(record, span)
|
Value::record(record, span)
|
||||||
@ -208,6 +212,7 @@ impl Count for Counter {
|
|||||||
}
|
}
|
||||||
Counter::Words => s.unicode_words().count(),
|
Counter::Words => s.unicode_words().count(),
|
||||||
Counter::CodePoints => s.chars().count(),
|
Counter::CodePoints => s.chars().count(),
|
||||||
|
Counter::UnicodeWidth => unicode_width::UnicodeWidthStr::width(s),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -229,15 +234,19 @@ pub enum Counter {
|
|||||||
|
|
||||||
/// Counts unicode code points
|
/// Counts unicode code points
|
||||||
CodePoints,
|
CodePoints,
|
||||||
|
|
||||||
|
/// Counts the width of the string
|
||||||
|
UnicodeWidth,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A convenience array of all counter types.
|
/// A convenience array of all counter types.
|
||||||
pub const ALL_COUNTERS: [Counter; 5] = [
|
pub const ALL_COUNTERS: [Counter; 6] = [
|
||||||
Counter::GraphemeClusters,
|
Counter::GraphemeClusters,
|
||||||
Counter::Bytes,
|
Counter::Bytes,
|
||||||
Counter::Lines,
|
Counter::Lines,
|
||||||
Counter::Words,
|
Counter::Words,
|
||||||
Counter::CodePoints,
|
Counter::CodePoints,
|
||||||
|
Counter::UnicodeWidth,
|
||||||
];
|
];
|
||||||
|
|
||||||
impl fmt::Display for Counter {
|
impl fmt::Display for Counter {
|
||||||
@ -248,6 +257,7 @@ impl fmt::Display for Counter {
|
|||||||
Counter::Lines => "lines",
|
Counter::Lines => "lines",
|
||||||
Counter::Words => "words",
|
Counter::Words => "words",
|
||||||
Counter::CodePoints => "codepoints",
|
Counter::CodePoints => "codepoints",
|
||||||
|
Counter::UnicodeWidth => "unicode-width",
|
||||||
};
|
};
|
||||||
|
|
||||||
write!(f, "{s}")
|
write!(f, "{s}")
|
||||||
@ -297,6 +307,7 @@ fn test_one_newline() {
|
|||||||
correct_counts.insert(Counter::GraphemeClusters, 1);
|
correct_counts.insert(Counter::GraphemeClusters, 1);
|
||||||
correct_counts.insert(Counter::Bytes, 1);
|
correct_counts.insert(Counter::Bytes, 1);
|
||||||
correct_counts.insert(Counter::CodePoints, 1);
|
correct_counts.insert(Counter::CodePoints, 1);
|
||||||
|
correct_counts.insert(Counter::UnicodeWidth, 0);
|
||||||
|
|
||||||
assert_eq!(correct_counts, counts);
|
assert_eq!(correct_counts, counts);
|
||||||
}
|
}
|
||||||
@ -336,6 +347,7 @@ fn test_count_counts_lines() {
|
|||||||
|
|
||||||
// one more than grapheme clusters because of \r\n
|
// one more than grapheme clusters because of \r\n
|
||||||
correct_counts.insert(Counter::CodePoints, 24);
|
correct_counts.insert(Counter::CodePoints, 24);
|
||||||
|
correct_counts.insert(Counter::UnicodeWidth, 17);
|
||||||
|
|
||||||
assert_eq!(correct_counts, counts);
|
assert_eq!(correct_counts, counts);
|
||||||
}
|
}
|
||||||
@ -353,6 +365,7 @@ fn test_count_counts_words() {
|
|||||||
correct_counts.insert(Counter::Bytes, i_can_eat_glass.len());
|
correct_counts.insert(Counter::Bytes, i_can_eat_glass.len());
|
||||||
correct_counts.insert(Counter::Words, 9);
|
correct_counts.insert(Counter::Words, 9);
|
||||||
correct_counts.insert(Counter::CodePoints, 50);
|
correct_counts.insert(Counter::CodePoints, 50);
|
||||||
|
correct_counts.insert(Counter::UnicodeWidth, 50);
|
||||||
|
|
||||||
assert_eq!(correct_counts, counts);
|
assert_eq!(correct_counts, counts);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user