rclone/fs/dirtree/dirtree_test.go
Nick Craig-Wood 73e66a3798 dirtree: fix performance with large directories of directories and --fast-list
Before this change if using --fast-list on a directory with more than
a few thousand directories in it DirTree.CheckParents became very slow
taking up to 24 hours for a directory with 1,000,000 directories in
it.

This is because it becomes an O(N²) operation as DirTree.Find has to
search each directory in a linear fashion as it is stored as a slice.

This patch fixes the problem by scanning the DirTree for directories
before starting the CheckParents process so it never has to call
DirTree.Find.

After the fix calling DirTree.CheckParents on a directory with
1,000,000 directories in it will take about 1 second.

Anything which calls DirTree.Find can potentially have bad performance
so in the future we should redesign the DirTree to use a different
underlying datastructure or have an index.

https://forum.rclone.org/t/almost-24-hours-cpu-compute-time-during-sync-between-two-large-s3-buckets/39375/
2023-07-03 14:12:22 +01:00

222 lines
3.8 KiB
Go

package dirtree
import (
"fmt"
"testing"
"github.com/rclone/rclone/fstest/mockdir"
"github.com/rclone/rclone/fstest/mockobject"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestNew(t *testing.T) {
dt := New()
assert.Equal(t, "", dt.String())
}
func TestParentDir(t *testing.T) {
assert.Equal(t, "root/parent", parentDir("root/parent/file"))
assert.Equal(t, "parent", parentDir("parent/file"))
assert.Equal(t, "", parentDir("parent"))
assert.Equal(t, "", parentDir(""))
}
func TestDirTreeAdd(t *testing.T) {
dt := New()
o := mockobject.New("potato")
dt.Add(o)
assert.Equal(t, `/
potato
`, dt.String())
o = mockobject.New("dir/subdir/sausage")
dt.Add(o)
assert.Equal(t, `/
potato
dir/subdir/
sausage
`, dt.String())
}
func TestDirTreeAddDir(t *testing.T) {
dt := New()
d := mockdir.New("potato")
dt.Add(d)
assert.Equal(t, `/
potato/
`, dt.String())
d = mockdir.New("dir/subdir/sausage")
dt.AddDir(d)
assert.Equal(t, `/
potato/
dir/subdir/
sausage/
dir/subdir/sausage/
`, dt.String())
d = mockdir.New("")
dt.AddDir(d)
assert.Equal(t, `/
potato/
dir/subdir/
sausage/
dir/subdir/sausage/
`, dt.String())
}
func TestDirTreeAddEntry(t *testing.T) {
dt := New()
d := mockdir.New("dir/subdir/sausagedir")
dt.AddEntry(d)
o := mockobject.New("dir/subdir2/sausage2")
dt.AddEntry(o)
assert.Equal(t, `/
dir/
dir/
subdir/
subdir2/
dir/subdir/
sausagedir/
dir/subdir/sausagedir/
dir/subdir2/
sausage2
`, dt.String())
}
func TestDirTreeFind(t *testing.T) {
dt := New()
parent, foundObj := dt.Find("dir/subdir/sausage")
assert.Equal(t, "dir/subdir", parent)
assert.Nil(t, foundObj)
o := mockobject.New("dir/subdir/sausage")
dt.Add(o)
parent, foundObj = dt.Find("dir/subdir/sausage")
assert.Equal(t, "dir/subdir", parent)
assert.Equal(t, o, foundObj)
}
func TestDirTreeCheckParent(t *testing.T) {
dt := New()
o := mockobject.New("dir/subdir/sausage")
dt.Add(o)
assert.Equal(t, `dir/subdir/
sausage
`, dt.String())
dt.checkParent("", "dir/subdir", nil)
assert.Equal(t, `/
dir/
dir/
subdir/
dir/subdir/
sausage
`, dt.String())
}
func TestDirTreeCheckParents(t *testing.T) {
dt := New()
dt.Add(mockobject.New("dir/subdir/sausage"))
dt.Add(mockobject.New("dir/subdir2/sausage2"))
dt.CheckParents("")
dt.Sort() // sort since the exact order of adding parents is not defined
assert.Equal(t, `/
dir/
dir/
subdir/
subdir2/
dir/subdir/
sausage
dir/subdir2/
sausage2
`, dt.String())
}
func TestDirTreeSort(t *testing.T) {
dt := New()
dt.Add(mockobject.New("dir/subdir/B"))
dt.Add(mockobject.New("dir/subdir/A"))
assert.Equal(t, `dir/subdir/
B
A
`, dt.String())
dt.Sort()
assert.Equal(t, `dir/subdir/
A
B
`, dt.String())
}
func TestDirTreeDirs(t *testing.T) {
dt := New()
dt.Add(mockobject.New("dir/subdir/sausage"))
dt.Add(mockobject.New("dir/subdir2/sausage2"))
dt.CheckParents("")
assert.Equal(t, []string{
"",
"dir",
"dir/subdir",
"dir/subdir2",
}, dt.Dirs())
}
func TestDirTreePrune(t *testing.T) {
dt := New()
dt.Add(mockobject.New("file"))
dt.Add(mockobject.New("dir/subdir/sausage"))
dt.Add(mockobject.New("dir/subdir2/sausage2"))
dt.Add(mockobject.New("dir/file"))
dt.Add(mockobject.New("dir2/file"))
dt.CheckParents("")
err := dt.Prune(map[string]bool{
"dir": true,
})
require.NoError(t, err)
assert.Equal(t, `/
file
dir2/
dir2/
file
`, dt.String())
}
func BenchmarkCheckParents(b *testing.B) {
for _, N := range []int{1e2, 1e3, 1e4, 1e5, 1e6} {
b.Run(fmt.Sprintf("%d", N), func(b *testing.B) {
b.StopTimer()
dt := New()
for i := 0; i < N; i++ {
remote := fmt.Sprintf("dir%09d/file%09d.txt", i, 1)
o := mockobject.New(remote)
dt.Add(o)
}
b.StartTimer()
for n := 0; n < b.N; n++ {
dt.CheckParents("")
}
})
}
}