mirror of
https://github.com/atuinsh/atuin.git
synced 2025-01-14 02:08:19 +01:00
fix(stats): Enable multiple command stats to be shown using unicode_segmentation (#1739)
* Enable multiple command stats to be shown Add improved pipe splitting Clean up split logic Remove unnecessary lifetime annotations Add per-column command padding * Add failing test case * Update #1054 to use unicode_segmentation This addresses feedback in PR #1054 Closes #1054 * Address cargo clippy, fmt --------- Co-authored-by: Sorenson Stallings <contact@sorenson.dev> Co-authored-by: Ellie Huxtable <ellie@elliehuxtable.com>
This commit is contained in:
parent
43a1d3a862
commit
a3743f846b
5
Cargo.lock
generated
5
Cargo.lock
generated
@ -217,6 +217,7 @@ dependencies = [
|
||||
"tracing",
|
||||
"tracing-subscriber",
|
||||
"tracing-tree",
|
||||
"unicode-segmentation",
|
||||
"unicode-width",
|
||||
"uuid",
|
||||
"whoami",
|
||||
@ -3937,9 +3938,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "unicode-segmentation"
|
||||
version = "1.10.1"
|
||||
version = "1.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36"
|
||||
checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-width"
|
||||
|
@ -78,6 +78,7 @@ ratatui = "0.25"
|
||||
tracing = "0.1"
|
||||
cli-clipboard = { version = "0.4.0", optional = true }
|
||||
uuid = { workspace = true }
|
||||
unicode-segmentation = "1.11.0"
|
||||
|
||||
|
||||
[dependencies.tracing-subscriber]
|
||||
|
@ -1,6 +1,5 @@
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
use atuin_common::utils::Escapable as _;
|
||||
use clap::Parser;
|
||||
use crossterm::style::{Color, ResetColor, SetAttribute, SetForegroundColor};
|
||||
use eyre::Result;
|
||||
@ -12,6 +11,7 @@ use atuin_client::{
|
||||
settings::Settings,
|
||||
};
|
||||
use time::{Duration, OffsetDateTime, Time};
|
||||
use unicode_segmentation::UnicodeSegmentation;
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
#[command(infer_subcommands = true)]
|
||||
@ -22,12 +22,60 @@ pub struct Cmd {
|
||||
/// How many top commands to list
|
||||
#[arg(long, short, default_value = "10")]
|
||||
count: usize,
|
||||
|
||||
/// The number of consecutive commands to consider
|
||||
#[arg(long, short, default_value = "1")]
|
||||
ngram_size: usize,
|
||||
}
|
||||
|
||||
fn compute_stats(settings: &Settings, history: &[History], count: usize) -> (usize, usize) {
|
||||
fn split_at_pipe(command: &str) -> Vec<&str> {
|
||||
let mut result = vec![];
|
||||
let mut quoted = false;
|
||||
let mut start = 0;
|
||||
let mut graphemes = UnicodeSegmentation::grapheme_indices(command, true);
|
||||
|
||||
while let Some((i, c)) = graphemes.next() {
|
||||
let current = i;
|
||||
match c {
|
||||
"\"" => {
|
||||
if command[start..current] != *"\"" {
|
||||
quoted = !quoted;
|
||||
}
|
||||
}
|
||||
"'" => {
|
||||
if command[start..current] != *"'" {
|
||||
quoted = !quoted;
|
||||
}
|
||||
}
|
||||
"\\" => if graphemes.next().is_some() {},
|
||||
"|" => {
|
||||
if !quoted {
|
||||
if command[start..].starts_with('|') {
|
||||
start += 1;
|
||||
}
|
||||
result.push(&command[start..current]);
|
||||
start = current;
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
if command[start..].starts_with('|') {
|
||||
start += 1;
|
||||
}
|
||||
result.push(&command[start..]);
|
||||
result
|
||||
}
|
||||
|
||||
fn compute_stats(
|
||||
settings: &Settings,
|
||||
history: &[History],
|
||||
count: usize,
|
||||
ngram_size: usize,
|
||||
) -> (usize, usize) {
|
||||
let mut commands = HashSet::<&str>::with_capacity(history.len());
|
||||
let mut prefixes = HashMap::<&str, usize>::with_capacity(history.len());
|
||||
let mut total_unignored = 0;
|
||||
let mut prefixes = HashMap::<Vec<&str>, usize>::with_capacity(history.len());
|
||||
for i in history {
|
||||
// just in case it somehow has a leading tab or space or something (legacy atuin didn't ignore space prefixes)
|
||||
let command = i.command.trim();
|
||||
@ -39,7 +87,21 @@ fn compute_stats(settings: &Settings, history: &[History], count: usize) -> (usi
|
||||
|
||||
total_unignored += 1;
|
||||
commands.insert(command);
|
||||
*prefixes.entry(prefix).or_default() += 1;
|
||||
|
||||
split_at_pipe(i.command.trim())
|
||||
.iter()
|
||||
.map(|l| {
|
||||
let command = l.trim();
|
||||
commands.insert(command);
|
||||
command
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.windows(ngram_size)
|
||||
.for_each(|w| {
|
||||
*prefixes
|
||||
.entry(w.iter().map(|c| interesting_command(settings, c)).collect())
|
||||
.or_default() += 1;
|
||||
});
|
||||
}
|
||||
|
||||
let unique = commands.len();
|
||||
@ -54,6 +116,17 @@ fn compute_stats(settings: &Settings, history: &[History], count: usize) -> (usi
|
||||
let max = top.iter().map(|x| x.1).max().unwrap();
|
||||
let num_pad = max.ilog10() as usize + 1;
|
||||
|
||||
// Find the length of the longest command name for each column
|
||||
let column_widths = top
|
||||
.iter()
|
||||
.map(|(commands, _)| commands.iter().map(|c| c.len()).collect::<Vec<usize>>())
|
||||
.fold(vec![0; ngram_size], |acc, item| {
|
||||
acc.iter()
|
||||
.zip(item.iter())
|
||||
.map(|(a, i)| *std::cmp::max(a, i))
|
||||
.collect()
|
||||
});
|
||||
|
||||
for (command, count) in top {
|
||||
let gray = SetForegroundColor(Color::Grey);
|
||||
let bold = SetAttribute(crossterm::style::Attribute::Bold);
|
||||
@ -74,10 +147,14 @@ fn compute_stats(settings: &Settings, history: &[History], count: usize) -> (usi
|
||||
print!(" ");
|
||||
}
|
||||
|
||||
println!(
|
||||
"{ResetColor}] {gray}{count:num_pad$}{ResetColor} {bold}{}{ResetColor}",
|
||||
command.escape_control()
|
||||
);
|
||||
let formatted_command = command
|
||||
.iter()
|
||||
.zip(column_widths.iter())
|
||||
.map(|(cmd, width)| format!("{cmd:width$}"))
|
||||
.collect::<Vec<_>>()
|
||||
.join(" | ");
|
||||
|
||||
println!("{ResetColor}] {gray}{count:num_pad$}{ResetColor} {bold}{formatted_command}{ResetColor}");
|
||||
}
|
||||
println!("Total commands: {total_unignored}");
|
||||
println!("Unique commands: {unique}");
|
||||
@ -120,7 +197,7 @@ impl Cmd {
|
||||
let end = start + Duration::days(1);
|
||||
db.range(start, end).await?
|
||||
};
|
||||
compute_stats(settings, &history, self.count);
|
||||
compute_stats(settings, &history, self.count, self.ngram_size);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@ -189,7 +266,7 @@ mod tests {
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use super::compute_stats;
|
||||
use super::interesting_command;
|
||||
use super::{interesting_command, split_at_pipe};
|
||||
|
||||
#[test]
|
||||
fn ignored_commands() {
|
||||
@ -209,7 +286,7 @@ mod tests {
|
||||
.into(),
|
||||
];
|
||||
|
||||
let (total, unique) = compute_stats(&settings, &history, 10);
|
||||
let (total, unique) = compute_stats(&settings, &history, 10, 1);
|
||||
assert_eq!(total, 1);
|
||||
assert_eq!(unique, 1);
|
||||
}
|
||||
@ -312,4 +389,49 @@ mod tests {
|
||||
"cargo build foo"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn split_simple() {
|
||||
assert_eq!(split_at_pipe("fd | rg"), ["fd ", " rg"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn split_multi() {
|
||||
assert_eq!(
|
||||
split_at_pipe("kubectl | jq | rg"),
|
||||
["kubectl ", " jq ", " rg"]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn split_simple_quoted() {
|
||||
assert_eq!(
|
||||
split_at_pipe("foo | bar 'baz {} | quux' | xyzzy"),
|
||||
["foo ", " bar 'baz {} | quux' ", " xyzzy"]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn split_multi_quoted() {
|
||||
assert_eq!(
|
||||
split_at_pipe("foo | bar 'baz \"{}\" | quux' | xyzzy"),
|
||||
["foo ", " bar 'baz \"{}\" | quux' ", " xyzzy"]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn escaped_pipes() {
|
||||
assert_eq!(
|
||||
split_at_pipe("foo | bar baz \\| quux"),
|
||||
["foo ", " bar baz \\| quux"]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn emoji() {
|
||||
assert_eq!(
|
||||
split_at_pipe("git commit -m \"🚀\""),
|
||||
["git commit -m \"🚀\""]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user