Improve history import performance so that we can import 100k entries in ~6 seconds (down from ~20 seconds)

This commit is contained in:
David Dworken 2023-10-02 22:27:24 -07:00
parent c7efc3364b
commit d33bd6a046
No known key found for this signature in database
3 changed files with 63 additions and 11 deletions

View File

@ -2453,4 +2453,39 @@ func testMultipleUsers(t *testing.T, tester shellTester) {
}
}
func BenchmarkImport(b *testing.B) {
b.StopTimer()
// Setup
tester := bashTester{}
defer testutils.BackupAndRestore(b)()
// Benchmark it
for n := 0; n < b.N; n++ {
// Setup
testutils.ResetLocalState(b)
installHishtory(b, tester, "")
// Create a large history in bash that we will pre-import
numSyntheticEntries := 100_000
homedir, err := os.UserHomeDir()
require.NoError(b, err)
f, err := os.OpenFile(path.Join(homedir, ".bash_history"), os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
require.NoError(b, err)
defer f.Close()
for i := 1; i <= numSyntheticEntries; i++ {
_, err := f.WriteString(fmt.Sprintf("echo command-%d\n", i))
require.NoError(b, err)
}
require.NoError(b, f.Close())
// Benchmarked code:
b.StartTimer()
ctx := hctx.MakeContext()
numImported, err := lib.ImportHistory(ctx, false, true)
require.NoError(b, err)
require.GreaterOrEqual(b, numImported, numSyntheticEntries)
b.StopTimer()
}
}
// TODO: somehow test/confirm that hishtory works even if only bash/only zsh is installed

View File

@ -237,23 +237,25 @@ func CheckFatalError(err error) {
}
}
var ZSH_FIRST_COMMAND_BUG_REGEX = regexp.MustCompile(`: \d+:\d;(.*)`)
func stripZshWeirdness(cmd string) string {
// Zsh has this weird behavior where sometimes commands are saved in the hishtory file
// with a weird prefix. I've never been able to figure out why this happens, but we
// can at least strip it.
firstCommandBugRegex := regexp.MustCompile(`: \d+:\d;(.*)`)
matches := firstCommandBugRegex.FindStringSubmatch(cmd)
matches := ZSH_FIRST_COMMAND_BUG_REGEX.FindStringSubmatch(cmd)
if len(matches) == 2 {
return matches[1]
}
return cmd
}
var BASH_FIRST_COMMAND_BUG_REGEX = regexp.MustCompile(`^#\d+\s+$`)
func isBashWeirdness(cmd string) bool {
// Bash has this weird behavior where the it has entries like `#1664342754` in the
// history file. We want to skip these.
firstCommandBugRegex := regexp.MustCompile(`^#\d+\s+$`)
return firstCommandBugRegex.MatchString(cmd)
return BASH_FIRST_COMMAND_BUG_REGEX.MatchString(cmd)
}
func ImportHistory(ctx context.Context, shouldReadStdin, force bool) (int, error) {
@ -287,6 +289,8 @@ func ImportHistory(ctx context.Context, shouldReadStdin, force bool) (int, error
}
numEntriesImported := 0
var iteratorError error = nil
var batch []data.HistoryEntry
batchSize := 100
entriesIter(func(cmd string, err error) bool {
if err != nil {
iteratorError = err
@ -296,7 +300,7 @@ func ImportHistory(ctx context.Context, shouldReadStdin, force bool) (int, error
if isBashWeirdness(cmd) || strings.HasPrefix(cmd, " ") {
return true
}
entry := data.HistoryEntry{
entry := normalizeEntryTimezone(data.HistoryEntry{
LocalUsername: currentUser.Name,
Hostname: hostname,
Command: cmd,
@ -307,18 +311,31 @@ func ImportHistory(ctx context.Context, shouldReadStdin, force bool) (int, error
EndTime: time.Now().UTC(),
DeviceId: config.DeviceId,
EntryId: uuid.Must(uuid.NewRandom()).String(),
})
batch = append(batch, entry)
if len(batch) > batchSize {
err = RetryingDbFunction(func() error {
return db.Create(batch).Error
})
if err != nil {
iteratorError = fmt.Errorf("failed to insert imported history entry: %w", err)
return false
}
batch = make([]data.HistoryEntry, 0)
}
err = ReliableDbCreate(db, entry)
numEntriesImported += 1
if err != nil {
iteratorError = fmt.Errorf("failed to insert imported history entry: %w", err)
return false
}
return true
})
if iteratorError != nil {
return 0, iteratorError
}
// Also create any entries remaining in an unfinished batch
err = RetryingDbFunction(func() error {
return db.Create(batch).Error
})
if err != nil {
return 0, err
}
err = Reupload(ctx)
if err != nil {
return 0, fmt.Errorf("failed to upload hishtory import: %w", err)

View File

@ -48,7 +48,7 @@ func getInitialWd() string {
return dir
}
func ResetLocalState(t *testing.T) {
func ResetLocalState(t testing.TB) {
homedir, err := os.UserHomeDir()
require.NoError(t, err)
persistLog()