Improve history import performance so that we can import 100k entries in ~6 seconds (down from ~20 seconds)

This commit is contained in:
David Dworken 2023-10-02 22:27:24 -07:00
parent c7efc3364b
commit d33bd6a046
No known key found for this signature in database
3 changed files with 63 additions and 11 deletions

View File

@ -2453,4 +2453,39 @@ func testMultipleUsers(t *testing.T, tester shellTester) {
} }
} }
func BenchmarkImport(b *testing.B) {
b.StopTimer()
// Setup
tester := bashTester{}
defer testutils.BackupAndRestore(b)()
// Benchmark it
for n := 0; n < b.N; n++ {
// Setup
testutils.ResetLocalState(b)
installHishtory(b, tester, "")
// Create a large history in bash that we will pre-import
numSyntheticEntries := 100_000
homedir, err := os.UserHomeDir()
require.NoError(b, err)
f, err := os.OpenFile(path.Join(homedir, ".bash_history"), os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
require.NoError(b, err)
defer f.Close()
for i := 1; i <= numSyntheticEntries; i++ {
_, err := f.WriteString(fmt.Sprintf("echo command-%d\n", i))
require.NoError(b, err)
}
require.NoError(b, f.Close())
// Benchmarked code:
b.StartTimer()
ctx := hctx.MakeContext()
numImported, err := lib.ImportHistory(ctx, false, true)
require.NoError(b, err)
require.GreaterOrEqual(b, numImported, numSyntheticEntries)
b.StopTimer()
}
}
// TODO: somehow test/confirm that hishtory works even if only bash/only zsh is installed // TODO: somehow test/confirm that hishtory works even if only bash/only zsh is installed

View File

@ -237,23 +237,25 @@ func CheckFatalError(err error) {
} }
} }
var ZSH_FIRST_COMMAND_BUG_REGEX = regexp.MustCompile(`: \d+:\d;(.*)`)
func stripZshWeirdness(cmd string) string { func stripZshWeirdness(cmd string) string {
// Zsh has this weird behavior where sometimes commands are saved in the hishtory file // Zsh has this weird behavior where sometimes commands are saved in the hishtory file
// with a weird prefix. I've never been able to figure out why this happens, but we // with a weird prefix. I've never been able to figure out why this happens, but we
// can at least strip it. // can at least strip it.
firstCommandBugRegex := regexp.MustCompile(`: \d+:\d;(.*)`) matches := ZSH_FIRST_COMMAND_BUG_REGEX.FindStringSubmatch(cmd)
matches := firstCommandBugRegex.FindStringSubmatch(cmd)
if len(matches) == 2 { if len(matches) == 2 {
return matches[1] return matches[1]
} }
return cmd return cmd
} }
var BASH_FIRST_COMMAND_BUG_REGEX = regexp.MustCompile(`^#\d+\s+$`)
func isBashWeirdness(cmd string) bool { func isBashWeirdness(cmd string) bool {
// Bash has this weird behavior where the it has entries like `#1664342754` in the // Bash has this weird behavior where the it has entries like `#1664342754` in the
// history file. We want to skip these. // history file. We want to skip these.
firstCommandBugRegex := regexp.MustCompile(`^#\d+\s+$`) return BASH_FIRST_COMMAND_BUG_REGEX.MatchString(cmd)
return firstCommandBugRegex.MatchString(cmd)
} }
func ImportHistory(ctx context.Context, shouldReadStdin, force bool) (int, error) { func ImportHistory(ctx context.Context, shouldReadStdin, force bool) (int, error) {
@ -287,6 +289,8 @@ func ImportHistory(ctx context.Context, shouldReadStdin, force bool) (int, error
} }
numEntriesImported := 0 numEntriesImported := 0
var iteratorError error = nil var iteratorError error = nil
var batch []data.HistoryEntry
batchSize := 100
entriesIter(func(cmd string, err error) bool { entriesIter(func(cmd string, err error) bool {
if err != nil { if err != nil {
iteratorError = err iteratorError = err
@ -296,7 +300,7 @@ func ImportHistory(ctx context.Context, shouldReadStdin, force bool) (int, error
if isBashWeirdness(cmd) || strings.HasPrefix(cmd, " ") { if isBashWeirdness(cmd) || strings.HasPrefix(cmd, " ") {
return true return true
} }
entry := data.HistoryEntry{ entry := normalizeEntryTimezone(data.HistoryEntry{
LocalUsername: currentUser.Name, LocalUsername: currentUser.Name,
Hostname: hostname, Hostname: hostname,
Command: cmd, Command: cmd,
@ -307,18 +311,31 @@ func ImportHistory(ctx context.Context, shouldReadStdin, force bool) (int, error
EndTime: time.Now().UTC(), EndTime: time.Now().UTC(),
DeviceId: config.DeviceId, DeviceId: config.DeviceId,
EntryId: uuid.Must(uuid.NewRandom()).String(), EntryId: uuid.Must(uuid.NewRandom()).String(),
})
batch = append(batch, entry)
if len(batch) > batchSize {
err = RetryingDbFunction(func() error {
return db.Create(batch).Error
})
if err != nil {
iteratorError = fmt.Errorf("failed to insert imported history entry: %w", err)
return false
}
batch = make([]data.HistoryEntry, 0)
} }
err = ReliableDbCreate(db, entry)
numEntriesImported += 1 numEntriesImported += 1
if err != nil {
iteratorError = fmt.Errorf("failed to insert imported history entry: %w", err)
return false
}
return true return true
}) })
if iteratorError != nil { if iteratorError != nil {
return 0, iteratorError return 0, iteratorError
} }
// Also create any entries remaining in an unfinished batch
err = RetryingDbFunction(func() error {
return db.Create(batch).Error
})
if err != nil {
return 0, err
}
err = Reupload(ctx) err = Reupload(ctx)
if err != nil { if err != nil {
return 0, fmt.Errorf("failed to upload hishtory import: %w", err) return 0, fmt.Errorf("failed to upload hishtory import: %w", err)

View File

@ -48,7 +48,7 @@ func getInitialWd() string {
return dir return dir
} }
func ResetLocalState(t *testing.T) { func ResetLocalState(t testing.TB) {
homedir, err := os.UserHomeDir() homedir, err := os.UserHomeDir()
require.NoError(t, err) require.NoError(t, err)
persistLog() persistLog()