list: Implement Sorter to sort directory entries

Later this will be extended to do out of memory sorts
This commit is contained in:
Nick Craig-Wood 2024-11-28 17:11:33 +00:00
parent 1a6f575e93
commit 3e0af30704
2 changed files with 209 additions and 0 deletions

105
fs/list/sorter.go Normal file
View File

@ -0,0 +1,105 @@
package list
import (
"cmp"
"context"
"slices"
"sync"
"github.com/rclone/rclone/fs"
)
// Sorter implements an efficient mechanism for sorting list entries.
//
// If there are a large number of entries, this may be done on disk
// instead of in memory.
//
// Supply entries with the Add method, call Send at the end to deliver
// the sorted entries and finalise with CleanUp regardless of whether
// you called Add or Send.
//
// Sorted entries are delivered to the callback supplied to NewSorter
// when the Send method is called.
type Sorter struct {
ctx context.Context
mu sync.Mutex
callback fs.ListRCallback
entries fs.DirEntries
keyFn KeyFn
}
// KeyFn turns an entry into a sort key
type KeyFn func(entry fs.DirEntry) string
// identityKeyFn maps an entry to its Remote
func identityKeyFn(entry fs.DirEntry) string {
return entry.Remote()
}
// NewSorter creates a new Sorter with callback for sorted entries to
// be delivered to. keyFn is used to process each entry to get a key
// function, if nil then it will just use entry.Remote()
func NewSorter(ctx context.Context, callback fs.ListRCallback, keyFn KeyFn) (*Sorter, error) {
if keyFn == nil {
keyFn = identityKeyFn
}
return &Sorter{
ctx: ctx,
callback: callback,
keyFn: keyFn,
}, nil
}
// Add entries to the list sorter.
//
// Does not call the callback.
//
// Safe to call from concurrent go routines
func (ls *Sorter) Add(entries fs.DirEntries) error {
ls.mu.Lock()
defer ls.mu.Unlock()
ls.entries = append(ls.entries, entries...)
return nil
}
// Send the sorted entries to the callback.
func (ls *Sorter) Send() error {
ls.mu.Lock()
defer ls.mu.Unlock()
// Sort the directory entries by Remote
//
// We use a stable sort here just in case there are
// duplicates. Assuming the remote delivers the entries in a
// consistent order, this will give the best user experience
// in syncing as it will use the first entry for the sync
// comparison.
slices.SortStableFunc(ls.entries, func(a, b fs.DirEntry) int {
return cmp.Compare(ls.keyFn(a), ls.keyFn(b))
})
return ls.callback(ls.entries)
}
// CleanUp the Sorter, cleaning up any memory / files.
//
// It is safe and encouraged to call this regardless of whether you
// called Send or not.
//
// This does not call the callback
func (ls *Sorter) CleanUp() {
ls.mu.Lock()
defer ls.mu.Unlock()
ls.entries = nil
}
// SortToChan makes a callback for the Sorter which sends the output
// to the channel provided.
func SortToChan(out chan<- fs.DirEntry) fs.ListRCallback {
return func(entries fs.DirEntries) error {
for _, entry := range entries {
out <- entry
}
return nil
}
}

104
fs/list/sorter_test.go Normal file
View File

@ -0,0 +1,104 @@
package list
import (
"cmp"
"context"
"fmt"
"slices"
"testing"
"github.com/rclone/rclone/fs"
"github.com/rclone/rclone/fstest/mockdir"
"github.com/rclone/rclone/fstest/mockobject"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestSorter(t *testing.T) {
ctx := context.Background()
da := mockdir.New("a")
oA := mockobject.Object("A")
callback := func(entries fs.DirEntries) error {
require.Equal(t, fs.DirEntries{oA, da}, entries)
return nil
}
ls, err := NewSorter(ctx, callback, nil)
require.NoError(t, err)
assert.Equal(t, fmt.Sprintf("%p", callback), fmt.Sprintf("%p", ls.callback))
assert.Equal(t, fmt.Sprintf("%p", identityKeyFn), fmt.Sprintf("%p", ls.keyFn))
assert.Equal(t, fs.DirEntries(nil), ls.entries)
// Test Add
err = ls.Add(fs.DirEntries{da})
require.NoError(t, err)
assert.Equal(t, fs.DirEntries{da}, ls.entries)
err = ls.Add(fs.DirEntries{oA})
require.NoError(t, err)
assert.Equal(t, fs.DirEntries{da, oA}, ls.entries)
// Test Send
err = ls.Send()
require.NoError(t, err)
// Test Cleanup
ls.CleanUp()
assert.Equal(t, fs.DirEntries(nil), ls.entries)
}
func TestSorterIdentity(t *testing.T) {
ctx := context.Background()
cmpFn := func(a, b fs.DirEntry) int {
return cmp.Compare(a.Remote(), b.Remote())
}
callback := func(entries fs.DirEntries) error {
assert.True(t, slices.IsSortedFunc(entries, cmpFn))
assert.Equal(t, "a", entries[0].Remote())
return nil
}
ls, err := NewSorter(ctx, callback, nil)
require.NoError(t, err)
defer ls.CleanUp()
// Add things in reverse alphabetical order
for i := 'z'; i >= 'a'; i-- {
err = ls.Add(fs.DirEntries{mockobject.Object(string(i))})
require.NoError(t, err)
}
assert.Equal(t, "z", ls.entries[0].Remote())
assert.False(t, slices.IsSortedFunc(ls.entries, cmpFn))
// Check they get sorted
err = ls.Send()
require.NoError(t, err)
}
func TestSorterKeyFn(t *testing.T) {
ctx := context.Background()
keyFn := func(entry fs.DirEntry) string {
s := entry.Remote()
return string('z' - s[0])
}
cmpFn := func(a, b fs.DirEntry) int {
return cmp.Compare(keyFn(a), keyFn(b))
}
callback := func(entries fs.DirEntries) error {
assert.True(t, slices.IsSortedFunc(entries, cmpFn))
assert.Equal(t, "z", entries[0].Remote())
return nil
}
ls, err := NewSorter(ctx, callback, keyFn)
require.NoError(t, err)
defer ls.CleanUp()
// Add things in reverse sorted order
for i := 'a'; i <= 'z'; i++ {
err = ls.Add(fs.DirEntries{mockobject.Object(string(i))})
require.NoError(t, err)
}
assert.Equal(t, "a", ls.entries[0].Remote())
assert.False(t, slices.IsSortedFunc(ls.entries, cmpFn))
// Check they get sorted
err = ls.Send()
require.NoError(t, err)
}