Add import-json and export-json commands for importing and exporting fully detailed histories (#271)

This commit is contained in:
David Dworken 2024-12-31 09:34:49 -05:00 committed by GitHub
parent 7afdc9f11f
commit ffc224e3d3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 219 additions and 6 deletions

View File

@ -190,6 +190,8 @@ A few configuration options:
hiSHtory imports your existing shell history by default. If for some reason this didn't work (e.g. you had your shell history in a non-standard file), you can import it by piping it into `hishtory import` (e.g. `cat ~/.my_history | hishtory import`). hiSHtory imports your existing shell history by default. If for some reason this didn't work (e.g. you had your shell history in a non-standard file), you can import it by piping it into `hishtory import` (e.g. `cat ~/.my_history | hishtory import`).
If you'd like to import rich history data (e.g. because you previously tracked other history metadata with another tool), you can use `hishtory import-json`. See `hishtory import-json --help` for more information.
</blockquote></details> </blockquote></details>
<details> <details>
@ -240,7 +242,7 @@ Debug logs are stored in `~/.hishtory/hishtory.log`. If you run into any issues,
<details> <details>
<summary>Uninstalling</summary><blockquote> <summary>Uninstalling</summary><blockquote>
If you'd like to uninstall hishtory, just run `hishtory uninstall`. Note that this deletes the SQLite DB storing your history, so consider running a `hishtory export` first. If you'd like to uninstall hishtory, just run `hishtory uninstall`. Note that this deletes the SQLite DB storing your history, so consider running a `hishtory export` or a `hishtory export-json` first.
Note that if you're experiencing any issues with hiSHtory, try running `hishtory update` first! Performance and reliability is always improving, and we highly value [your feedback](https://github.com/ddworken/hishtory/issues). Note that if you're experiencing any issues with hiSHtory, try running `hishtory update` first! Performance and reliability is always improving, and we highly value [your feedback](https://github.com/ddworken/hishtory/issues).

View File

@ -3435,4 +3435,39 @@ func TestStatusFullConfig(t *testing.T) {
testutils.CompareGoldens(t, out, "TestStatusFullConfig") testutils.CompareGoldens(t, out, "TestStatusFullConfig")
} }
func TestExportJson(t *testing.T) {
markTestForSharding(t, 20)
defer testutils.BackupAndRestore(t)()
tester := zshTester{}
installHishtory(t, tester, "")
// Create some history entries
db := hctx.GetDb(hctx.MakeContext())
e1 := testutils.MakeFakeHistoryEntry("echo synth1")
e1.StartTime = time.Unix(1234567, 0)
require.NoError(t, db.Create(e1).Error)
e2 := testutils.MakeFakeHistoryEntry("echo synth2")
e1.StartTime = time.Unix(1244567, 0)
require.NoError(t, db.Create(e2).Error)
// Run export-json
out := tester.RunInteractiveShell(t, `hishtory export-json | grep synth | grep -v export-json`)
testutils.CompareGoldens(t, out, "TestExportJson")
}
func TestImportJson(t *testing.T) {
markTestForSharding(t, 20)
defer testutils.BackupAndRestore(t)()
tester := zshTester{}
installHishtory(t, tester, "")
// Run an import with the export-json golden
out := tester.RunInteractiveShell(t, `cat client/testdata/TestExportJson | hishtory import-json`)
require.Equal(t, "Imported 2 history entries\n", out)
// Run export-json
out = tester.RunInteractiveShell(t, `hishtory export-json | grep synth | grep -v export-json`)
testutils.CompareGoldens(t, out, "TestExportJson")
}
// TODO: somehow test/confirm that hishtory works even if only bash/only zsh is installed // TODO: somehow test/confirm that hishtory works even if only bash/only zsh is installed

76
client/cmd/export.go Normal file
View File

@ -0,0 +1,76 @@
package cmd
import (
"context"
"encoding/json"
"fmt"
"io"
"os"
"github.com/ddworken/hishtory/client/data"
"github.com/ddworken/hishtory/client/hctx"
"github.com/ddworken/hishtory/client/lib"
"github.com/spf13/cobra"
)
var exportJsonCmd = &cobra.Command{
Use: "export-json",
Short: "Export history entries formatted in JSON lines format (as accepted by hishtory import-json, and easily parsable by other tools)",
Run: func(cmd *cobra.Command, args []string) {
ctx := hctx.MakeContext()
err := exportToJson(ctx, os.Stdout)
lib.CheckFatalError(err)
},
}
func structToMap(entry data.HistoryEntry) (map[string]interface{}, error) {
inrec, err := json.Marshal(entry)
if err != nil {
return nil, err
}
var m map[string]interface{}
err = json.Unmarshal(inrec, &m)
return m, err
}
func exportToJson(ctx context.Context, w io.Writer) error {
db := hctx.GetDb(ctx)
chunkSize := 1000
offset := 0
for {
entries, err := lib.SearchWithOffset(ctx, db, "", chunkSize, offset)
if err != nil {
return fmt.Errorf("failed to search for history entries with offset=%d: %w", offset, err)
}
if len(entries) == 0 {
break
}
for _, entry := range entries {
m, err := structToMap(*entry)
if err != nil {
return err
}
delete(m, "device_id")
delete(m, "entry_id")
j, err := json.Marshal(m)
if err != nil {
return err
}
_, err = w.Write(j)
if err != nil {
return err
}
_, err = w.Write([]byte("\n"))
if err != nil {
return err
}
}
offset += chunkSize
}
return nil
}
func init() {
rootCmd.AddCommand(exportJsonCmd)
}

View File

@ -1,11 +1,18 @@
package cmd package cmd
import ( import (
"context"
"encoding/json"
"fmt" "fmt"
"os"
"os/user"
"time"
"github.com/ddworken/hishtory/client/data"
"github.com/ddworken/hishtory/client/hctx" "github.com/ddworken/hishtory/client/hctx"
"github.com/ddworken/hishtory/client/lib" "github.com/ddworken/hishtory/client/lib"
"github.com/google/uuid"
"github.com/spf13/cobra" "github.com/spf13/cobra"
) )
@ -13,7 +20,7 @@ var importCmd = &cobra.Command{
Use: "import", Use: "import",
Hidden: true, Hidden: true,
Short: "Re-import history entries from your existing shell history", Short: "Re-import history entries from your existing shell history",
Long: "Note that you must pipe commands to be imported in via stdin. For example `history | hishtory import`.", Long: "Note that you may also pipe commands to be imported in via stdin. For example `history | hishtory import`.",
Run: func(cmd *cobra.Command, args []string) { Run: func(cmd *cobra.Command, args []string) {
ctx := hctx.MakeContext() ctx := hctx.MakeContext()
numImported, err := lib.ImportHistory(ctx, true, true) numImported, err := lib.ImportHistory(ctx, true, true)
@ -24,6 +31,95 @@ var importCmd = &cobra.Command{
}, },
} }
var importJsonCmd = &cobra.Command{
Use: "import-json",
Short: "Import history entries formatted in JSON lines format into hiSHtory",
Long: "Data is read from stdin. For example: `cat data.txt | hishtory import-json`.\n\nExample JSON format:\n\n```\n" +
"{\"command\":\"echo foo\"}\n" +
"{\"command\":\"echo bar\", \"current_working_directory\": \"/tmp/\"}\n" +
"{\"command\":\"ls\",\"current_working_directory\":\"/tmp/\",\"local_username\":\"david\",\"hostname\":\"foo\",\"home_directory\":\"/Users/david\",\"exit_code\":0,\"start_time\":\"2024-12-30T01:14:34.656407Z\",\"end_time\":\"2024-12-30T01:14:34.657407Z\"}\n```\n",
Run: func(cmd *cobra.Command, args []string) {
ctx := hctx.MakeContext()
numImported, err := importFromJson(ctx)
lib.CheckFatalError(err)
fmt.Printf("Imported %v history entries\n", numImported)
},
}
func importFromJson(ctx context.Context) (int, error) {
// Get the data needed for filling in any missing columns
currentUser, err := user.Current()
if err != nil {
return 0, err
}
hostname, err := os.Hostname()
if err != nil {
return 0, err
}
homedir := hctx.GetHome(ctx)
// Build the entries
lines, err := lib.ReadStdin()
if err != nil {
return 0, fmt.Errorf("failed to read stdin for import: %w", err)
}
var entries []data.HistoryEntry
importEntryId := uuid.Must(uuid.NewRandom()).String()
importTimestamp := time.Now().UTC()
for i, line := range lines {
var entry data.HistoryEntry
err := json.Unmarshal([]byte(line), &entry)
if err != nil {
return 0, fmt.Errorf("failed to parse JSON line %#v: %w", line, err)
}
if entry.Command == "" {
return 0, fmt.Errorf("cannot import history entries without a command, JSON line: %#v", line)
}
if len(entry.CustomColumns) > 0 {
return 0, fmt.Errorf("cannot import history entries with custom columns, JSON line: %#v", line)
}
if entry.LocalUsername == "" {
entry.LocalUsername = currentUser.Username
}
if entry.Hostname == "" {
entry.Hostname = hostname
}
if entry.CurrentWorkingDirectory == "" {
entry.CurrentWorkingDirectory = "Unknown"
}
if entry.HomeDirectory == "" {
entry.HomeDirectory = homedir
}
// Set the timestamps so that they are monotonically increasing
startTime := importTimestamp.Add(time.Millisecond * time.Duration(i*2))
endTime := startTime.Add(time.Millisecond)
if entry.StartTime == *new(time.Time) {
entry.StartTime = startTime
}
if entry.EndTime == *new(time.Time) {
entry.EndTime = endTime
}
entry.DeviceId = hctx.GetConf(ctx).DeviceId
entry.EntryId = fmt.Sprintf("%s-%d", importEntryId, i)
entries = append(entries, entry)
}
// Insert the entries into the DB
db := hctx.GetDb(ctx)
err = db.CreateInBatches(entries, lib.ImportBatchSize).Error
if err != nil {
return 0, fmt.Errorf("failed to insert entries into DB: %w", err)
}
// Trigger a checkpoint so that these bulk entries are added from the WAL to the main DB
err = db.Exec("PRAGMA wal_checkpoint").Error
if err != nil {
return 0, fmt.Errorf("failed to checkpoint imported history: %w", err)
}
return len(entries), nil
}
func init() { func init() {
rootCmd.AddCommand(importCmd) rootCmd.AddCommand(importCmd)
rootCmd.AddCommand(importJsonCmd)
} }

View File

@ -48,6 +48,9 @@ var (
GitCommit string = "Unknown" GitCommit string = "Unknown"
) )
// The batch size for the DB operations for importing history. Used by all types of imports.
var ImportBatchSize = 100
// 512KB ought to be enough for any reasonable cmd // 512KB ought to be enough for any reasonable cmd
// Funnily enough, 256KB actually wasn't enough. See https://github.com/ddworken/hishtory/issues/93 // Funnily enough, 256KB actually wasn't enough. See https://github.com/ddworken/hishtory/issues/93
var maxSupportedLineLengthForImport = 512_000 var maxSupportedLineLengthForImport = 512_000
@ -235,7 +238,7 @@ func ImportHistory(ctx context.Context, shouldReadStdin, force bool) (int, error
return 0, fmt.Errorf("failed to count input lines during hishtory import: %w", err) return 0, fmt.Errorf("failed to count input lines during hishtory import: %w", err)
} }
if shouldReadStdin { if shouldReadStdin {
extraEntries, err := readStdin() extraEntries, err := ReadStdin()
if err != nil { if err != nil {
return 0, fmt.Errorf("failed to read stdin: %w", err) return 0, fmt.Errorf("failed to read stdin: %w", err)
} }
@ -260,7 +263,6 @@ func ImportHistory(ctx context.Context, shouldReadStdin, force bool) (int, error
var iteratorError error = nil var iteratorError error = nil
var batch []data.HistoryEntry var batch []data.HistoryEntry
importTimestamp := time.Now().UTC() importTimestamp := time.Now().UTC()
batchSize := 100
importEntryId := uuid.Must(uuid.NewRandom()).String() importEntryId := uuid.Must(uuid.NewRandom()).String()
var bar *progressbar.ProgressBar var bar *progressbar.ProgressBar
if totalNumEntries > NUM_IMPORTED_ENTRIES_SLOW { if totalNumEntries > NUM_IMPORTED_ENTRIES_SLOW {
@ -296,7 +298,7 @@ func ImportHistory(ctx context.Context, shouldReadStdin, force bool) (int, error
EntryId: entryId, EntryId: entryId,
}) })
batch = append(batch, entry) batch = append(batch, entry)
if len(batch) > batchSize { if len(batch) > ImportBatchSize {
err = RetryingDbFunction(func() error { err = RetryingDbFunction(func() error {
if err := db.Create(batch).Error; err != nil { if err := db.Create(batch).Error; err != nil {
return fmt.Errorf("failed to import batch of history entries: %w", err) return fmt.Errorf("failed to import batch of history entries: %w", err)
@ -347,7 +349,7 @@ func ImportHistory(ctx context.Context, shouldReadStdin, force bool) (int, error
return numEntriesImported, nil return numEntriesImported, nil
} }
func readStdin() ([]string, error) { func ReadStdin() ([]string, error) {
ret := make([]string, 0) ret := make([]string, 0)
in := bufio.NewReader(os.Stdin) in := bufio.NewReader(os.Stdin)
for { for {

2
client/testdata/TestExportJson vendored Normal file
View File

@ -0,0 +1,2 @@
{"command":"echo synth2","current_working_directory":"/tmp/","custom_columns":null,"end_time":"2022-10-18T04:43:24Z","exit_code":2,"home_directory":"/home/david/","hostname":"localhost","local_username":"david","start_time":"2022-10-18T04:43:21Z"}
{"command":"echo synth1","current_working_directory":"/tmp/","custom_columns":null,"end_time":"2022-10-18T04:43:19Z","exit_code":2,"home_directory":"/home/david/","hostname":"localhost","local_username":"david","start_time":"1970-01-14T22:56:07-08:00"}