Rework Bash import (#747)

* Rework Bash import

Closes #745
- Imported history is now ordered correctly
- Timestamps (when `HISTTIMEFORMAT` is set) are handled correctly

* Timestamp tests test for strict sorting
This commit is contained in:
cyqsimon 2023-03-02 18:05:19 +08:00 committed by GitHub
parent a033890506
commit 63c572104b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -1,8 +1,10 @@
use std::{fs::File, io::Read, path::PathBuf}; use std::{fs::File, io::Read, path::PathBuf, str};
use async_trait::async_trait; use async_trait::async_trait;
use chrono::{DateTime, Duration, NaiveDateTime, Utc};
use directories::UserDirs; use directories::UserDirs;
use eyre::{eyre, Result}; use eyre::{eyre, Result};
use itertools::Itertools;
use super::{get_histpath, unix_byte_lines, Importer, Loader}; use super::{get_histpath, unix_byte_lines, Importer, Loader};
use crate::history::History; use crate::history::History;
@ -32,37 +34,54 @@ impl Importer for Bash {
} }
async fn entries(&mut self) -> Result<usize> { async fn entries(&mut self) -> Result<usize> {
Ok(super::count_lines(&self.bytes)) let count = unix_byte_lines(&self.bytes)
.map(LineType::from)
.filter(|line| matches!(line, LineType::Command(_)))
.count();
Ok(count)
} }
async fn load(self, h: &mut impl Loader) -> Result<()> { async fn load(self, h: &mut impl Loader) -> Result<()> {
let now = chrono::Utc::now(); let lines = unix_byte_lines(&self.bytes)
let mut line = String::new(); .map(LineType::from)
.filter(|line| !matches!(line, LineType::NotUtf8)) // invalid utf8 are ignored
.collect_vec();
for (i, b) in unix_byte_lines(&self.bytes).enumerate() { let (commands_before_first_timestamp, first_timestamp) = lines
let s = match std::str::from_utf8(b) { .iter()
Ok(s) => s, .enumerate()
Err(_) => continue, // we can skip past things like invalid utf8 .find_map(|(i, line)| match line {
}; LineType::Timestamp(t) => Some((i, *t)),
_ => None,
})
// if no known timestamps, use now as base
.unwrap_or((lines.len(), Utc::now()));
if let Some(s) = s.strip_suffix('\\') { // if no timestamp is recorded, then use this increment to set an arbitrary timestamp
line.push_str(s); // to preserve ordering
line.push_str("\\\n"); let timestamp_increment = Duration::seconds(1);
} else { // make sure there is a minimum amount of time before the first known timestamp
line.push_str(s); // to fit all commands, given the default increment
let command = std::mem::take(&mut line); let mut next_timestamp =
first_timestamp - timestamp_increment * commands_before_first_timestamp as i32;
let offset = chrono::Duration::seconds(i as i64); for line in lines.into_iter() {
h.push(History::new( match line {
now - offset, // preserve ordering LineType::NotUtf8 => unreachable!(), // already filtered
command, LineType::Timestamp(t) => next_timestamp = t,
String::from("unknown"), LineType::Command(c) => {
-1, let entry = History::new(
-1, next_timestamp,
None, c.into(),
None, "unknown".into(),
)) -1,
.await?; -1,
None,
None,
);
h.push(entry).await?;
next_timestamp += timestamp_increment;
}
} }
} }
@ -70,18 +89,47 @@ impl Importer for Bash {
} }
} }
#[derive(Debug, Clone)]
enum LineType<'a> {
NotUtf8,
/// A timestamp line start with a '#', followed immediately by an integer
/// that represents seconds since UNIX epoch.
Timestamp(DateTime<Utc>),
/// Anything that doesn't look like a timestamp.
Command(&'a str),
}
impl<'a> From<&'a [u8]> for LineType<'a> {
fn from(bytes: &'a [u8]) -> Self {
let Ok(line) = str::from_utf8(bytes) else {
return LineType::NotUtf8;
};
let parsed = match try_parse_line_as_timestamp(line) {
Some(time) => LineType::Timestamp(time),
None => LineType::Command(line),
};
parsed
}
}
fn try_parse_line_as_timestamp(line: &str) -> Option<DateTime<Utc>> {
let seconds = line.strip_prefix('#')?.parse().ok()?;
let time = NaiveDateTime::from_timestamp(seconds, 0);
Some(DateTime::from_utc(time, Utc))
}
#[cfg(test)] #[cfg(test)]
mod tests { mod test {
use itertools::assert_equal; use std::cmp::Ordering;
use itertools::{assert_equal, Itertools};
use crate::import::{tests::TestLoader, Importer}; use crate::import::{tests::TestLoader, Importer};
use super::Bash; use super::Bash;
#[tokio::test] #[tokio::test]
async fn test_parse_file() { async fn parse_no_timestamps() {
let bytes = r"cargo install atuin let bytes = r"cargo install atuin
cargo install atuin; \
cargo update cargo update
cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷ cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷
" "
@ -89,7 +137,7 @@ cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷
.to_owned(); .to_owned();
let mut bash = Bash { bytes }; let mut bash = Bash { bytes };
assert_eq!(bash.entries().await.unwrap(), 4); assert_eq!(bash.entries().await.unwrap(), 3);
let mut loader = TestLoader::default(); let mut loader = TestLoader::default();
bash.load(&mut loader).await.unwrap(); bash.load(&mut loader).await.unwrap();
@ -98,9 +146,72 @@ cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷
loader.buf.iter().map(|h| h.command.as_str()), loader.buf.iter().map(|h| h.command.as_str()),
[ [
"cargo install atuin", "cargo install atuin",
"cargo install atuin; \\\ncargo update", "cargo update",
"cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷", "cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷",
], ],
); );
assert!(is_strictly_sorted(
loader.buf.iter().map(|h| h.timestamp.timestamp())
))
}
#[tokio::test]
async fn parse_with_timestamps() {
let bytes = b"#1672918999
git reset
#1672919006
git clean -dxf
#1672919020
cd ../
"
.to_vec();
let mut bash = Bash { bytes };
assert_eq!(bash.entries().await.unwrap(), 3);
let mut loader = TestLoader::default();
bash.load(&mut loader).await.unwrap();
assert_equal(
loader.buf.iter().map(|h| h.command.as_str()),
["git reset", "git clean -dxf", "cd ../"],
);
assert_equal(
loader.buf.iter().map(|h| h.timestamp.timestamp()),
[1672918999, 1672919006, 1672919020],
)
}
#[tokio::test]
async fn parse_with_partial_timestamps() {
let bytes = b"git reset
#1672919006
git clean -dxf
cd ../
"
.to_vec();
let mut bash = Bash { bytes };
assert_eq!(bash.entries().await.unwrap(), 3);
let mut loader = TestLoader::default();
bash.load(&mut loader).await.unwrap();
assert_equal(
loader.buf.iter().map(|h| h.command.as_str()),
["git reset", "git clean -dxf", "cd ../"],
);
assert!(is_strictly_sorted(
loader.buf.iter().map(|h| h.timestamp.timestamp())
))
}
fn is_strictly_sorted<T>(iter: impl IntoIterator<Item = T>) -> bool
where
T: Clone + PartialOrd,
{
iter.into_iter()
.tuple_windows()
.all(|(a, b)| matches!(a.partial_cmp(&b), Some(Ordering::Less)))
} }
} }