fix(stats): Enable multiple command stats to be shown using unicode_segmentation (#1739)

* Enable multiple command stats to be shown

Add improved pipe splitting

Clean up split logic

Remove unnecessary lifetime annotations

Add per-column command padding

* Add failing test case

* Update #1054 to use unicode_segmentation

This addresses feedback in PR #1054

Closes #1054

* Address cargo clippy, fmt

---------

Co-authored-by: Sorenson Stallings <contact@sorenson.dev>
Co-authored-by: Ellie Huxtable <ellie@elliehuxtable.com>
This commit is contained in:
Eric Hodel 2024-02-26 04:07:59 -08:00 committed by GitHub
parent 43a1d3a862
commit a3743f846b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 137 additions and 13 deletions

5
Cargo.lock generated
View File

@ -217,6 +217,7 @@ dependencies = [
"tracing",
"tracing-subscriber",
"tracing-tree",
"unicode-segmentation",
"unicode-width",
"uuid",
"whoami",
@ -3937,9 +3938,9 @@ dependencies = [
[[package]]
name = "unicode-segmentation"
version = "1.10.1"
version = "1.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36"
checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202"
[[package]]
name = "unicode-width"

View File

@ -78,6 +78,7 @@ ratatui = "0.25"
tracing = "0.1"
cli-clipboard = { version = "0.4.0", optional = true }
uuid = { workspace = true }
unicode-segmentation = "1.11.0"
[dependencies.tracing-subscriber]

View File

@ -1,6 +1,5 @@
use std::collections::{HashMap, HashSet};
use atuin_common::utils::Escapable as _;
use clap::Parser;
use crossterm::style::{Color, ResetColor, SetAttribute, SetForegroundColor};
use eyre::Result;
@ -12,6 +11,7 @@ use atuin_client::{
settings::Settings,
};
use time::{Duration, OffsetDateTime, Time};
use unicode_segmentation::UnicodeSegmentation;
#[derive(Parser, Debug)]
#[command(infer_subcommands = true)]
@ -22,12 +22,60 @@ pub struct Cmd {
/// How many top commands to list
#[arg(long, short, default_value = "10")]
count: usize,
/// The number of consecutive commands to consider
#[arg(long, short, default_value = "1")]
ngram_size: usize,
}
fn compute_stats(settings: &Settings, history: &[History], count: usize) -> (usize, usize) {
fn split_at_pipe(command: &str) -> Vec<&str> {
let mut result = vec![];
let mut quoted = false;
let mut start = 0;
let mut graphemes = UnicodeSegmentation::grapheme_indices(command, true);
while let Some((i, c)) = graphemes.next() {
let current = i;
match c {
"\"" => {
if command[start..current] != *"\"" {
quoted = !quoted;
}
}
"'" => {
if command[start..current] != *"'" {
quoted = !quoted;
}
}
"\\" => if graphemes.next().is_some() {},
"|" => {
if !quoted {
if command[start..].starts_with('|') {
start += 1;
}
result.push(&command[start..current]);
start = current;
}
}
_ => {}
}
}
if command[start..].starts_with('|') {
start += 1;
}
result.push(&command[start..]);
result
}
fn compute_stats(
settings: &Settings,
history: &[History],
count: usize,
ngram_size: usize,
) -> (usize, usize) {
let mut commands = HashSet::<&str>::with_capacity(history.len());
let mut prefixes = HashMap::<&str, usize>::with_capacity(history.len());
let mut total_unignored = 0;
let mut prefixes = HashMap::<Vec<&str>, usize>::with_capacity(history.len());
for i in history {
// just in case it somehow has a leading tab or space or something (legacy atuin didn't ignore space prefixes)
let command = i.command.trim();
@ -39,7 +87,21 @@ fn compute_stats(settings: &Settings, history: &[History], count: usize) -> (usi
total_unignored += 1;
commands.insert(command);
*prefixes.entry(prefix).or_default() += 1;
split_at_pipe(i.command.trim())
.iter()
.map(|l| {
let command = l.trim();
commands.insert(command);
command
})
.collect::<Vec<_>>()
.windows(ngram_size)
.for_each(|w| {
*prefixes
.entry(w.iter().map(|c| interesting_command(settings, c)).collect())
.or_default() += 1;
});
}
let unique = commands.len();
@ -54,6 +116,17 @@ fn compute_stats(settings: &Settings, history: &[History], count: usize) -> (usi
let max = top.iter().map(|x| x.1).max().unwrap();
let num_pad = max.ilog10() as usize + 1;
// Find the length of the longest command name for each column
let column_widths = top
.iter()
.map(|(commands, _)| commands.iter().map(|c| c.len()).collect::<Vec<usize>>())
.fold(vec![0; ngram_size], |acc, item| {
acc.iter()
.zip(item.iter())
.map(|(a, i)| *std::cmp::max(a, i))
.collect()
});
for (command, count) in top {
let gray = SetForegroundColor(Color::Grey);
let bold = SetAttribute(crossterm::style::Attribute::Bold);
@ -74,10 +147,14 @@ fn compute_stats(settings: &Settings, history: &[History], count: usize) -> (usi
print!(" ");
}
println!(
"{ResetColor}] {gray}{count:num_pad$}{ResetColor} {bold}{}{ResetColor}",
command.escape_control()
);
let formatted_command = command
.iter()
.zip(column_widths.iter())
.map(|(cmd, width)| format!("{cmd:width$}"))
.collect::<Vec<_>>()
.join(" | ");
println!("{ResetColor}] {gray}{count:num_pad$}{ResetColor} {bold}{formatted_command}{ResetColor}");
}
println!("Total commands: {total_unignored}");
println!("Unique commands: {unique}");
@ -120,7 +197,7 @@ impl Cmd {
let end = start + Duration::days(1);
db.range(start, end).await?
};
compute_stats(settings, &history, self.count);
compute_stats(settings, &history, self.count, self.ngram_size);
Ok(())
}
}
@ -189,7 +266,7 @@ mod tests {
use time::OffsetDateTime;
use super::compute_stats;
use super::interesting_command;
use super::{interesting_command, split_at_pipe};
#[test]
fn ignored_commands() {
@ -209,7 +286,7 @@ mod tests {
.into(),
];
let (total, unique) = compute_stats(&settings, &history, 10);
let (total, unique) = compute_stats(&settings, &history, 10, 1);
assert_eq!(total, 1);
assert_eq!(unique, 1);
}
@ -312,4 +389,49 @@ mod tests {
"cargo build foo"
);
}
#[test]
fn split_simple() {
assert_eq!(split_at_pipe("fd | rg"), ["fd ", " rg"]);
}
#[test]
fn split_multi() {
assert_eq!(
split_at_pipe("kubectl | jq | rg"),
["kubectl ", " jq ", " rg"]
);
}
#[test]
fn split_simple_quoted() {
assert_eq!(
split_at_pipe("foo | bar 'baz {} | quux' | xyzzy"),
["foo ", " bar 'baz {} | quux' ", " xyzzy"]
);
}
#[test]
fn split_multi_quoted() {
assert_eq!(
split_at_pipe("foo | bar 'baz \"{}\" | quux' | xyzzy"),
["foo ", " bar 'baz \"{}\" | quux' ", " xyzzy"]
);
}
#[test]
fn escaped_pipes() {
assert_eq!(
split_at_pipe("foo | bar baz \\| quux"),
["foo ", " bar baz \\| quux"]
);
}
#[test]
fn emoji() {
assert_eq!(
split_at_pipe("git commit -m \"🚀\""),
["git commit -m \"🚀\""]
);
}
}