From 3e0af30704101adf1c85dbf79cb866683fd6fce9 Mon Sep 17 00:00:00 2001 From: Nick Craig-Wood Date: Thu, 28 Nov 2024 17:11:33 +0000 Subject: [PATCH] list: Implement Sorter to sort directory entries Later this will be extended to do out of memory sorts --- fs/list/sorter.go | 105 +++++++++++++++++++++++++++++++++++++++++ fs/list/sorter_test.go | 104 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 209 insertions(+) create mode 100644 fs/list/sorter.go create mode 100644 fs/list/sorter_test.go diff --git a/fs/list/sorter.go b/fs/list/sorter.go new file mode 100644 index 000000000..b38d66fa3 --- /dev/null +++ b/fs/list/sorter.go @@ -0,0 +1,105 @@ +package list + +import ( + "cmp" + "context" + "slices" + "sync" + + "github.com/rclone/rclone/fs" +) + +// Sorter implements an efficient mechanism for sorting list entries. +// +// If there are a large number of entries, this may be done on disk +// instead of in memory. +// +// Supply entries with the Add method, call Send at the end to deliver +// the sorted entries and finalise with CleanUp regardless of whether +// you called Add or Send. +// +// Sorted entries are delivered to the callback supplied to NewSorter +// when the Send method is called. +type Sorter struct { + ctx context.Context + mu sync.Mutex + callback fs.ListRCallback + entries fs.DirEntries + keyFn KeyFn +} + +// KeyFn turns an entry into a sort key +type KeyFn func(entry fs.DirEntry) string + +// identityKeyFn maps an entry to its Remote +func identityKeyFn(entry fs.DirEntry) string { + return entry.Remote() +} + +// NewSorter creates a new Sorter with callback for sorted entries to +// be delivered to. keyFn is used to process each entry to get a key +// function, if nil then it will just use entry.Remote() +func NewSorter(ctx context.Context, callback fs.ListRCallback, keyFn KeyFn) (*Sorter, error) { + if keyFn == nil { + keyFn = identityKeyFn + } + return &Sorter{ + ctx: ctx, + callback: callback, + keyFn: keyFn, + }, nil +} + +// Add entries to the list sorter. +// +// Does not call the callback. +// +// Safe to call from concurrent go routines +func (ls *Sorter) Add(entries fs.DirEntries) error { + ls.mu.Lock() + defer ls.mu.Unlock() + ls.entries = append(ls.entries, entries...) + return nil +} + +// Send the sorted entries to the callback. +func (ls *Sorter) Send() error { + ls.mu.Lock() + defer ls.mu.Unlock() + + // Sort the directory entries by Remote + // + // We use a stable sort here just in case there are + // duplicates. Assuming the remote delivers the entries in a + // consistent order, this will give the best user experience + // in syncing as it will use the first entry for the sync + // comparison. + slices.SortStableFunc(ls.entries, func(a, b fs.DirEntry) int { + return cmp.Compare(ls.keyFn(a), ls.keyFn(b)) + }) + return ls.callback(ls.entries) +} + +// CleanUp the Sorter, cleaning up any memory / files. +// +// It is safe and encouraged to call this regardless of whether you +// called Send or not. +// +// This does not call the callback +func (ls *Sorter) CleanUp() { + ls.mu.Lock() + defer ls.mu.Unlock() + + ls.entries = nil +} + +// SortToChan makes a callback for the Sorter which sends the output +// to the channel provided. +func SortToChan(out chan<- fs.DirEntry) fs.ListRCallback { + return func(entries fs.DirEntries) error { + for _, entry := range entries { + out <- entry + } + return nil + } +} diff --git a/fs/list/sorter_test.go b/fs/list/sorter_test.go new file mode 100644 index 000000000..992389887 --- /dev/null +++ b/fs/list/sorter_test.go @@ -0,0 +1,104 @@ +package list + +import ( + "cmp" + "context" + "fmt" + "slices" + "testing" + + "github.com/rclone/rclone/fs" + "github.com/rclone/rclone/fstest/mockdir" + "github.com/rclone/rclone/fstest/mockobject" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestSorter(t *testing.T) { + ctx := context.Background() + da := mockdir.New("a") + oA := mockobject.Object("A") + callback := func(entries fs.DirEntries) error { + require.Equal(t, fs.DirEntries{oA, da}, entries) + return nil + } + ls, err := NewSorter(ctx, callback, nil) + require.NoError(t, err) + assert.Equal(t, fmt.Sprintf("%p", callback), fmt.Sprintf("%p", ls.callback)) + assert.Equal(t, fmt.Sprintf("%p", identityKeyFn), fmt.Sprintf("%p", ls.keyFn)) + assert.Equal(t, fs.DirEntries(nil), ls.entries) + + // Test Add + err = ls.Add(fs.DirEntries{da}) + require.NoError(t, err) + assert.Equal(t, fs.DirEntries{da}, ls.entries) + err = ls.Add(fs.DirEntries{oA}) + require.NoError(t, err) + assert.Equal(t, fs.DirEntries{da, oA}, ls.entries) + + // Test Send + err = ls.Send() + require.NoError(t, err) + + // Test Cleanup + ls.CleanUp() + assert.Equal(t, fs.DirEntries(nil), ls.entries) +} + +func TestSorterIdentity(t *testing.T) { + ctx := context.Background() + cmpFn := func(a, b fs.DirEntry) int { + return cmp.Compare(a.Remote(), b.Remote()) + } + callback := func(entries fs.DirEntries) error { + assert.True(t, slices.IsSortedFunc(entries, cmpFn)) + assert.Equal(t, "a", entries[0].Remote()) + return nil + } + ls, err := NewSorter(ctx, callback, nil) + require.NoError(t, err) + defer ls.CleanUp() + + // Add things in reverse alphabetical order + for i := 'z'; i >= 'a'; i-- { + err = ls.Add(fs.DirEntries{mockobject.Object(string(i))}) + require.NoError(t, err) + } + assert.Equal(t, "z", ls.entries[0].Remote()) + assert.False(t, slices.IsSortedFunc(ls.entries, cmpFn)) + + // Check they get sorted + err = ls.Send() + require.NoError(t, err) +} + +func TestSorterKeyFn(t *testing.T) { + ctx := context.Background() + keyFn := func(entry fs.DirEntry) string { + s := entry.Remote() + return string('z' - s[0]) + } + cmpFn := func(a, b fs.DirEntry) int { + return cmp.Compare(keyFn(a), keyFn(b)) + } + callback := func(entries fs.DirEntries) error { + assert.True(t, slices.IsSortedFunc(entries, cmpFn)) + assert.Equal(t, "z", entries[0].Remote()) + return nil + } + ls, err := NewSorter(ctx, callback, keyFn) + require.NoError(t, err) + defer ls.CleanUp() + + // Add things in reverse sorted order + for i := 'a'; i <= 'z'; i++ { + err = ls.Add(fs.DirEntries{mockobject.Object(string(i))}) + require.NoError(t, err) + } + assert.Equal(t, "a", ls.entries[0].Remote()) + assert.False(t, slices.IsSortedFunc(ls.entries, cmpFn)) + + // Check they get sorted + err = ls.Send() + require.NoError(t, err) +}