mirror of
https://github.com/rclone/rclone.git
synced 2024-12-31 19:40:25 +01:00
07133b892d
Before this change if using --fast-list on a directory with more than a few thousand directories in it DirTree.CheckParents became very slow taking up to 24 hours for a directory with 1,000,000 directories in it. This is because it becomes an O(N²) operation as DirTree.Find has to search each directory in a linear fashion as it is stored as a slice. This patch fixes the problem by scanning the DirTree for directories before starting the CheckParents process so it never has to call DirTree.Find. After the fix calling DirTree.CheckParents on a directory with 1,000,000 directories in it will take about 1 second. Anything which calls DirTree.Find can potentially have bad performance so in the future we should redesign the DirTree to use a different underlying datastructure or have an index. https://forum.rclone.org/t/almost-24-hours-cpu-compute-time-during-sync-between-two-large-s3-buckets/39375/
222 lines
3.8 KiB
Go
222 lines
3.8 KiB
Go
package dirtree
|
|
|
|
import (
|
|
"fmt"
|
|
"testing"
|
|
|
|
"github.com/rclone/rclone/fstest/mockdir"
|
|
"github.com/rclone/rclone/fstest/mockobject"
|
|
"github.com/stretchr/testify/assert"
|
|
"github.com/stretchr/testify/require"
|
|
)
|
|
|
|
func TestNew(t *testing.T) {
|
|
dt := New()
|
|
assert.Equal(t, "", dt.String())
|
|
}
|
|
|
|
func TestParentDir(t *testing.T) {
|
|
assert.Equal(t, "root/parent", parentDir("root/parent/file"))
|
|
assert.Equal(t, "parent", parentDir("parent/file"))
|
|
assert.Equal(t, "", parentDir("parent"))
|
|
assert.Equal(t, "", parentDir(""))
|
|
}
|
|
|
|
func TestDirTreeAdd(t *testing.T) {
|
|
dt := New()
|
|
o := mockobject.New("potato")
|
|
dt.Add(o)
|
|
assert.Equal(t, `/
|
|
potato
|
|
`, dt.String())
|
|
o = mockobject.New("dir/subdir/sausage")
|
|
dt.Add(o)
|
|
assert.Equal(t, `/
|
|
potato
|
|
dir/subdir/
|
|
sausage
|
|
`, dt.String())
|
|
}
|
|
|
|
func TestDirTreeAddDir(t *testing.T) {
|
|
dt := New()
|
|
d := mockdir.New("potato")
|
|
dt.Add(d)
|
|
assert.Equal(t, `/
|
|
potato/
|
|
`, dt.String())
|
|
d = mockdir.New("dir/subdir/sausage")
|
|
dt.AddDir(d)
|
|
assert.Equal(t, `/
|
|
potato/
|
|
dir/subdir/
|
|
sausage/
|
|
dir/subdir/sausage/
|
|
`, dt.String())
|
|
d = mockdir.New("")
|
|
dt.AddDir(d)
|
|
assert.Equal(t, `/
|
|
potato/
|
|
dir/subdir/
|
|
sausage/
|
|
dir/subdir/sausage/
|
|
`, dt.String())
|
|
}
|
|
|
|
func TestDirTreeAddEntry(t *testing.T) {
|
|
dt := New()
|
|
|
|
d := mockdir.New("dir/subdir/sausagedir")
|
|
dt.AddEntry(d)
|
|
o := mockobject.New("dir/subdir2/sausage2")
|
|
dt.AddEntry(o)
|
|
|
|
assert.Equal(t, `/
|
|
dir/
|
|
dir/
|
|
subdir/
|
|
subdir2/
|
|
dir/subdir/
|
|
sausagedir/
|
|
dir/subdir/sausagedir/
|
|
dir/subdir2/
|
|
sausage2
|
|
`, dt.String())
|
|
}
|
|
|
|
func TestDirTreeFind(t *testing.T) {
|
|
dt := New()
|
|
|
|
parent, foundObj := dt.Find("dir/subdir/sausage")
|
|
assert.Equal(t, "dir/subdir", parent)
|
|
assert.Nil(t, foundObj)
|
|
|
|
o := mockobject.New("dir/subdir/sausage")
|
|
dt.Add(o)
|
|
|
|
parent, foundObj = dt.Find("dir/subdir/sausage")
|
|
assert.Equal(t, "dir/subdir", parent)
|
|
assert.Equal(t, o, foundObj)
|
|
}
|
|
|
|
func TestDirTreeCheckParent(t *testing.T) {
|
|
dt := New()
|
|
|
|
o := mockobject.New("dir/subdir/sausage")
|
|
dt.Add(o)
|
|
|
|
assert.Equal(t, `dir/subdir/
|
|
sausage
|
|
`, dt.String())
|
|
|
|
dt.checkParent("", "dir/subdir", nil)
|
|
|
|
assert.Equal(t, `/
|
|
dir/
|
|
dir/
|
|
subdir/
|
|
dir/subdir/
|
|
sausage
|
|
`, dt.String())
|
|
|
|
}
|
|
|
|
func TestDirTreeCheckParents(t *testing.T) {
|
|
dt := New()
|
|
|
|
dt.Add(mockobject.New("dir/subdir/sausage"))
|
|
dt.Add(mockobject.New("dir/subdir2/sausage2"))
|
|
|
|
dt.CheckParents("")
|
|
dt.Sort() // sort since the exact order of adding parents is not defined
|
|
|
|
assert.Equal(t, `/
|
|
dir/
|
|
dir/
|
|
subdir/
|
|
subdir2/
|
|
dir/subdir/
|
|
sausage
|
|
dir/subdir2/
|
|
sausage2
|
|
`, dt.String())
|
|
}
|
|
|
|
func TestDirTreeSort(t *testing.T) {
|
|
dt := New()
|
|
|
|
dt.Add(mockobject.New("dir/subdir/B"))
|
|
dt.Add(mockobject.New("dir/subdir/A"))
|
|
|
|
assert.Equal(t, `dir/subdir/
|
|
B
|
|
A
|
|
`, dt.String())
|
|
|
|
dt.Sort()
|
|
|
|
assert.Equal(t, `dir/subdir/
|
|
A
|
|
B
|
|
`, dt.String())
|
|
}
|
|
|
|
func TestDirTreeDirs(t *testing.T) {
|
|
dt := New()
|
|
|
|
dt.Add(mockobject.New("dir/subdir/sausage"))
|
|
dt.Add(mockobject.New("dir/subdir2/sausage2"))
|
|
|
|
dt.CheckParents("")
|
|
|
|
assert.Equal(t, []string{
|
|
"",
|
|
"dir",
|
|
"dir/subdir",
|
|
"dir/subdir2",
|
|
}, dt.Dirs())
|
|
}
|
|
|
|
func TestDirTreePrune(t *testing.T) {
|
|
dt := New()
|
|
|
|
dt.Add(mockobject.New("file"))
|
|
dt.Add(mockobject.New("dir/subdir/sausage"))
|
|
dt.Add(mockobject.New("dir/subdir2/sausage2"))
|
|
dt.Add(mockobject.New("dir/file"))
|
|
dt.Add(mockobject.New("dir2/file"))
|
|
|
|
dt.CheckParents("")
|
|
|
|
err := dt.Prune(map[string]bool{
|
|
"dir": true,
|
|
})
|
|
require.NoError(t, err)
|
|
|
|
assert.Equal(t, `/
|
|
file
|
|
dir2/
|
|
dir2/
|
|
file
|
|
`, dt.String())
|
|
|
|
}
|
|
|
|
func BenchmarkCheckParents(b *testing.B) {
|
|
for _, N := range []int{1e2, 1e3, 1e4, 1e5, 1e6} {
|
|
b.Run(fmt.Sprintf("%d", N), func(b *testing.B) {
|
|
b.StopTimer()
|
|
dt := New()
|
|
for i := 0; i < N; i++ {
|
|
remote := fmt.Sprintf("dir%09d/file%09d.txt", i, 1)
|
|
o := mockobject.New(remote)
|
|
dt.Add(o)
|
|
}
|
|
b.StartTimer()
|
|
for n := 0; n < b.N; n++ {
|
|
dt.CheckParents("")
|
|
}
|
|
})
|
|
}
|
|
}
|