From bc70bff12539db55b71e631b7cc5ac8aa1931e87 Mon Sep 17 00:00:00 2001 From: Nick Craig-Wood Date: Thu, 27 Jun 2019 15:54:43 +0100 Subject: [PATCH] fs/dirtree: factor DirTree out of fs/walk and add tests --- cmd/tree/tree.go | 7 +- fs/dirtree/dirtree.go | 209 +++++++++++++++++++++++++++++++++++++ fs/dirtree/dirtree_test.go | 193 ++++++++++++++++++++++++++++++++++ fs/march/march.go | 3 +- fs/walk/walk.go | 204 ++++-------------------------------- vfs/dir.go | 5 +- 6 files changed, 431 insertions(+), 190 deletions(-) create mode 100644 fs/dirtree/dirtree.go create mode 100644 fs/dirtree/dirtree_test.go diff --git a/cmd/tree/tree.go b/cmd/tree/tree.go index 13d8ec0ab..e93f5f125 100644 --- a/cmd/tree/tree.go +++ b/cmd/tree/tree.go @@ -13,6 +13,7 @@ import ( "github.com/a8m/tree" "github.com/ncw/rclone/cmd" "github.com/ncw/rclone/fs" + "github.com/ncw/rclone/fs/dirtree" "github.com/ncw/rclone/fs/log" "github.com/ncw/rclone/fs/walk" "github.com/pkg/errors" @@ -186,10 +187,10 @@ func (to *FileInfo) String() string { } // Fs maps an fs.Fs into a tree.Fs -type Fs walk.DirTree +type Fs dirtree.DirTree // NewFs creates a new tree -func NewFs(dirs walk.DirTree) Fs { +func NewFs(dirs dirtree.DirTree) Fs { return Fs(dirs) } @@ -201,7 +202,7 @@ func (dirs Fs) Stat(filePath string) (fi os.FileInfo, err error) { if filePath == "" { return &FileInfo{fs.NewDir("", time.Now())}, nil } - _, entry := walk.DirTree(dirs).Find(filePath) + _, entry := dirtree.DirTree(dirs).Find(filePath) if entry == nil { return nil, errors.Errorf("Couldn't find %q in directory cache", filePath) } diff --git a/fs/dirtree/dirtree.go b/fs/dirtree/dirtree.go new file mode 100644 index 000000000..2cf2da9f2 --- /dev/null +++ b/fs/dirtree/dirtree.go @@ -0,0 +1,209 @@ +// Package dirtree contains the DirTree type which is used for +// building filesystem heirachies in memory. +package dirtree + +import ( + "bytes" + "fmt" + "path" + "sort" + "time" + + "github.com/ncw/rclone/fs" + "github.com/ncw/rclone/lib/errors" +) + +// DirTree is a map of directories to entries +type DirTree map[string]fs.DirEntries + +// New returns a fresh DirTree +func New() DirTree { + return make(DirTree) +} + +// parentDir finds the parent directory of path +func parentDir(entryPath string) string { + dirPath := path.Dir(entryPath) + if dirPath == "." { + dirPath = "" + } + return dirPath +} + +// Add an entry to the tree +// it doesn't create parents +func (dt DirTree) Add(entry fs.DirEntry) { + dirPath := parentDir(entry.Remote()) + dt[dirPath] = append(dt[dirPath], entry) +} + +// AddDir adds a directory entry to the tree +// this creates the directory itself if required +// it doesn't create parents +func (dt DirTree) AddDir(entry fs.DirEntry) { + dt.Add(entry) + // create the directory itself if it doesn't exist already + dirPath := entry.Remote() + if _, ok := dt[dirPath]; !ok { + dt[dirPath] = nil + } +} + +// AddEntry adds the entry and creates the parents for it regardless +// of whether it is a file or a directory. +func (dt DirTree) AddEntry(entry fs.DirEntry) { + switch entry.(type) { + case fs.Directory: + dt.AddDir(entry) + case fs.Object: + dt.Add(entry) + default: + panic("unknown entry type") + } + remoteParent := parentDir(entry.Remote()) + dt.CheckParent("", remoteParent) +} + +// Find returns the DirEntry for filePath or nil if not found +func (dt DirTree) Find(filePath string) (parentPath string, entry fs.DirEntry) { + parentPath = parentDir(filePath) + for _, entry := range dt[parentPath] { + if entry.Remote() == filePath { + return parentPath, entry + } + } + return parentPath, nil +} + +// CheckParent checks that dirPath has a *Dir in its parent +func (dt DirTree) CheckParent(root, dirPath string) { + if dirPath == root { + return + } + parentPath, entry := dt.Find(dirPath) + if entry != nil { + return + } + dt[parentPath] = append(dt[parentPath], fs.NewDir(dirPath, time.Now())) + dt.CheckParent(root, parentPath) +} + +// CheckParents checks every directory in the tree has *Dir in its parent +func (dt DirTree) CheckParents(root string) { + for dirPath := range dt { + dt.CheckParent(root, dirPath) + } +} + +// Sort sorts all the Entries +func (dt DirTree) Sort() { + for _, entries := range dt { + sort.Stable(entries) + } +} + +// Dirs returns the directories in sorted order +func (dt DirTree) Dirs() (dirNames []string) { + for dirPath := range dt { + dirNames = append(dirNames, dirPath) + } + sort.Strings(dirNames) + return dirNames +} + +// Prune remove directories from a directory tree. dirNames contains +// all directories to remove as keys, with true as values. dirNames +// will be modified in the function. +func (dt DirTree) Prune(dirNames map[string]bool) error { + // We use map[string]bool to avoid recursion (and potential + // stack exhaustion). + + // First we need delete directories from their parents. + for dName, remove := range dirNames { + if !remove { + // Currently all values should be + // true, therefore this should not + // happen. But this makes function + // more predictable. + fs.Infof(dName, "Directory in the map for prune, but the value is false") + continue + } + if dName == "" { + // if dName is root, do nothing (no parent exist) + continue + } + parent := parentDir(dName) + // It may happen that dt does not have a dName key, + // since directory was excluded based on a filter. In + // such case the loop will be skipped. + for i, entry := range dt[parent] { + switch x := entry.(type) { + case fs.Directory: + if x.Remote() == dName { + // the slice is not sorted yet + // to delete item + // a) replace it with the last one + dt[parent][i] = dt[parent][len(dt[parent])-1] + // b) remove last + dt[parent] = dt[parent][:len(dt[parent])-1] + // we modify a slice within a loop, but we stop + // iterating immediately + break + } + case fs.Object: + // do nothing + default: + return errors.Errorf("unknown object type %T", entry) + + } + } + } + + for len(dirNames) > 0 { + // According to golang specs, if new keys were added + // during range iteration, they may be skipped. + for dName, remove := range dirNames { + if !remove { + fs.Infof(dName, "Directory in the map for prune, but the value is false") + continue + } + // First, add all subdirectories to dirNames. + + // It may happen that dt[dName] does not exist. + // If so, the loop will be skipped. + for _, entry := range dt[dName] { + switch x := entry.(type) { + case fs.Directory: + excludeDir := x.Remote() + dirNames[excludeDir] = true + case fs.Object: + // do nothing + default: + return errors.Errorf("unknown object type %T", entry) + + } + } + // Then remove current directory from DirTree + delete(dt, dName) + // and from dirNames + delete(dirNames, dName) + } + } + return nil +} + +// String emits a simple representation of the DirTree +func (dt DirTree) String() string { + out := new(bytes.Buffer) + for _, dir := range dt.Dirs() { + _, _ = fmt.Fprintf(out, "%s/\n", dir) + for _, entry := range dt[dir] { + flag := "" + if _, ok := entry.(fs.Directory); ok { + flag = "/" + } + _, _ = fmt.Fprintf(out, " %s%s\n", path.Base(entry.Remote()), flag) + } + } + return out.String() +} diff --git a/fs/dirtree/dirtree_test.go b/fs/dirtree/dirtree_test.go new file mode 100644 index 000000000..7971174b6 --- /dev/null +++ b/fs/dirtree/dirtree_test.go @@ -0,0 +1,193 @@ +package dirtree + +import ( + "testing" + + "github.com/ncw/rclone/fstest/mockdir" + "github.com/ncw/rclone/fstest/mockobject" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestNew(t *testing.T) { + dt := New() + assert.Equal(t, "", dt.String()) +} + +func TestParentDir(t *testing.T) { + assert.Equal(t, "root/parent", parentDir("root/parent/file")) + assert.Equal(t, "parent", parentDir("parent/file")) + assert.Equal(t, "", parentDir("parent")) + assert.Equal(t, "", parentDir("")) +} + +func TestDirTreeAdd(t *testing.T) { + dt := New() + o := mockobject.New("potato") + dt.Add(o) + assert.Equal(t, `/ + potato +`, dt.String()) + o = mockobject.New("dir/subdir/sausage") + dt.Add(o) + assert.Equal(t, `/ + potato +dir/subdir/ + sausage +`, dt.String()) +} + +func TestDirTreeAddDir(t *testing.T) { + dt := New() + d := mockdir.New("potato") + dt.Add(d) + assert.Equal(t, `/ + potato/ +`, dt.String()) + d = mockdir.New("dir/subdir/sausage") + dt.AddDir(d) + assert.Equal(t, `/ + potato/ +dir/subdir/ + sausage/ +dir/subdir/sausage/ +`, dt.String()) +} + +func TestDirTreeAddEntry(t *testing.T) { + dt := New() + + d := mockdir.New("dir/subdir/sausagedir") + dt.AddEntry(d) + o := mockobject.New("dir/subdir2/sausage2") + dt.AddEntry(o) + + assert.Equal(t, `/ + dir/ +dir/ + subdir/ + subdir2/ +dir/subdir/ + sausagedir/ +dir/subdir/sausagedir/ +dir/subdir2/ + sausage2 +`, dt.String()) +} + +func TestDirTreeFind(t *testing.T) { + dt := New() + + parent, foundObj := dt.Find("dir/subdir/sausage") + assert.Equal(t, "dir/subdir", parent) + assert.Nil(t, foundObj) + + o := mockobject.New("dir/subdir/sausage") + dt.Add(o) + + parent, foundObj = dt.Find("dir/subdir/sausage") + assert.Equal(t, "dir/subdir", parent) + assert.Equal(t, o, foundObj) +} + +func TestDirTreeCheckParent(t *testing.T) { + dt := New() + + o := mockobject.New("dir/subdir/sausage") + dt.Add(o) + + assert.Equal(t, `dir/subdir/ + sausage +`, dt.String()) + + dt.CheckParent("", "dir/subdir") + + assert.Equal(t, `/ + dir/ +dir/ + subdir/ +dir/subdir/ + sausage +`, dt.String()) + +} + +func TestDirTreeCheckParents(t *testing.T) { + dt := New() + + dt.Add(mockobject.New("dir/subdir/sausage")) + dt.Add(mockobject.New("dir/subdir2/sausage2")) + + dt.CheckParents("") + + assert.Equal(t, `/ + dir/ +dir/ + subdir/ + subdir2/ +dir/subdir/ + sausage +dir/subdir2/ + sausage2 +`, dt.String()) +} + +func TestDirTreeSort(t *testing.T) { + dt := New() + + dt.Add(mockobject.New("dir/subdir/B")) + dt.Add(mockobject.New("dir/subdir/A")) + + assert.Equal(t, `dir/subdir/ + B + A +`, dt.String()) + + dt.Sort() + + assert.Equal(t, `dir/subdir/ + A + B +`, dt.String()) +} + +func TestDirTreeDirs(t *testing.T) { + dt := New() + + dt.Add(mockobject.New("dir/subdir/sausage")) + dt.Add(mockobject.New("dir/subdir2/sausage2")) + + dt.CheckParents("") + + assert.Equal(t, []string{ + "", + "dir", + "dir/subdir", + "dir/subdir2", + }, dt.Dirs()) +} + +func TestDirTreePrune(t *testing.T) { + dt := New() + + dt.Add(mockobject.New("file")) + dt.Add(mockobject.New("dir/subdir/sausage")) + dt.Add(mockobject.New("dir/subdir2/sausage2")) + dt.Add(mockobject.New("dir/file")) + dt.Add(mockobject.New("dir2/file")) + + dt.CheckParents("") + + err := dt.Prune(map[string]bool{ + "dir": true, + }) + require.NoError(t, err) + + assert.Equal(t, `/ + file + dir2/ +dir2/ + file +`, dt.String()) + +} diff --git a/fs/march/march.go b/fs/march/march.go index b1f95a67c..1296f9b4c 100644 --- a/fs/march/march.go +++ b/fs/march/march.go @@ -11,6 +11,7 @@ import ( "github.com/pkg/errors" "github.com/ncw/rclone/fs" + "github.com/ncw/rclone/fs/dirtree" "github.com/ncw/rclone/fs/filter" "github.com/ncw/rclone/fs/list" "github.com/ncw/rclone/fs/walk" @@ -78,7 +79,7 @@ func (m *March) makeListDir(f fs.Fs, includeAll bool) listDirFn { var ( mu sync.Mutex started bool - dirs walk.DirTree + dirs dirtree.DirTree dirsErr error ) return func(dir string) (entries fs.DirEntries, err error) { diff --git a/fs/walk/walk.go b/fs/walk/walk.go index de4990774..ac6a92b23 100644 --- a/fs/walk/walk.go +++ b/fs/walk/walk.go @@ -2,9 +2,7 @@ package walk import ( - "bytes" "context" - "fmt" "path" "sort" "strings" @@ -12,6 +10,7 @@ import ( "time" "github.com/ncw/rclone/fs" + "github.com/ncw/rclone/fs/dirtree" "github.com/ncw/rclone/fs/filter" "github.com/ncw/rclone/fs/list" "github.com/pkg/errors" @@ -215,6 +214,15 @@ func (dm *dirMap) add(dir string, sent bool) { } } +// parentDir finds the parent directory of path +func parentDir(entryPath string) string { + dirPath := path.Dir(entryPath) + if dirPath == "." { + dirPath = "" + } + return dirPath +} + // add all the directories in entries and their parents to the dirMap func (dm *dirMap) addEntries(entries fs.DirEntries) error { dm.mu.Lock() @@ -437,180 +445,8 @@ func walk(ctx context.Context, f fs.Fs, path string, includeAll bool, maxLevel i return <-errs } -// DirTree is a map of directories to entries -type DirTree map[string]fs.DirEntries - -// parentDir finds the parent directory of path -func parentDir(entryPath string) string { - dirPath := path.Dir(entryPath) - if dirPath == "." { - dirPath = "" - } - return dirPath -} - -// add an entry to the tree -func (dt DirTree) add(entry fs.DirEntry) { - dirPath := parentDir(entry.Remote()) - dt[dirPath] = append(dt[dirPath], entry) -} - -// add a directory entry to the tree -func (dt DirTree) addDir(entry fs.DirEntry) { - dt.add(entry) - // create the directory itself if it doesn't exist already - dirPath := entry.Remote() - if _, ok := dt[dirPath]; !ok { - dt[dirPath] = nil - } -} - -// Find returns the DirEntry for filePath or nil if not found -func (dt DirTree) Find(filePath string) (parentPath string, entry fs.DirEntry) { - parentPath = parentDir(filePath) - for _, entry := range dt[parentPath] { - if entry.Remote() == filePath { - return parentPath, entry - } - } - return parentPath, nil -} - -// check that dirPath has a *Dir in its parent -func (dt DirTree) checkParent(root, dirPath string) { - if dirPath == root { - return - } - parentPath, entry := dt.Find(dirPath) - if entry != nil { - return - } - dt[parentPath] = append(dt[parentPath], fs.NewDir(dirPath, time.Now())) - dt.checkParent(root, parentPath) -} - -// check every directory in the tree has *Dir in its parent -func (dt DirTree) checkParents(root string) { - for dirPath := range dt { - dt.checkParent(root, dirPath) - } -} - -// Sort sorts all the Entries -func (dt DirTree) Sort() { - for _, entries := range dt { - sort.Stable(entries) - } -} - -// Dirs returns the directories in sorted order -func (dt DirTree) Dirs() (dirNames []string) { - for dirPath := range dt { - dirNames = append(dirNames, dirPath) - } - sort.Strings(dirNames) - return dirNames -} - -// Prune remove directories from a directory tree. dirNames contains -// all directories to remove as keys, with true as values. dirNames -// will be modified in the function. -func (dt DirTree) Prune(dirNames map[string]bool) error { - // We use map[string]bool to avoid recursion (and potential - // stack exhaustion). - - // First we need delete directories from their parents. - for dName, remove := range dirNames { - if !remove { - // Currently all values should be - // true, therefore this should not - // happen. But this makes function - // more predictable. - fs.Infof(dName, "Directory in the map for prune, but the value is false") - continue - } - if dName == "" { - // if dName is root, do nothing (no parent exist) - continue - } - parent := parentDir(dName) - // It may happen that dt does not have a dName key, - // since directory was excluded based on a filter. In - // such case the loop will be skipped. - for i, entry := range dt[parent] { - switch x := entry.(type) { - case fs.Directory: - if x.Remote() == dName { - // the slice is not sorted yet - // to delete item - // a) replace it with the last one - dt[parent][i] = dt[parent][len(dt[parent])-1] - // b) remove last - dt[parent] = dt[parent][:len(dt[parent])-1] - // we modify a slice within a loop, but we stop - // iterating immediately - break - } - case fs.Object: - // do nothing - default: - return errors.Errorf("unknown object type %T", entry) - - } - } - } - - for len(dirNames) > 0 { - // According to golang specs, if new keys were added - // during range iteration, they may be skipped. - for dName, remove := range dirNames { - if !remove { - fs.Infof(dName, "Directory in the map for prune, but the value is false") - continue - } - // First, add all subdirectories to dirNames. - - // It may happen that dt[dName] does not exist. - // If so, the loop will be skipped. - for _, entry := range dt[dName] { - switch x := entry.(type) { - case fs.Directory: - excludeDir := x.Remote() - dirNames[excludeDir] = true - case fs.Object: - // do nothing - default: - return errors.Errorf("unknown object type %T", entry) - - } - } - // Then remove current directory from DirTree - delete(dt, dName) - // and from dirNames - delete(dirNames, dName) - } - } - return nil -} - -// String emits a simple representation of the DirTree -func (dt DirTree) String() string { - out := new(bytes.Buffer) - for _, dir := range dt.Dirs() { - _, _ = fmt.Fprintf(out, "%s/\n", dir) - for _, entry := range dt[dir] { - flag := "" - if _, ok := entry.(fs.Directory); ok { - flag = "/" - } - _, _ = fmt.Fprintf(out, " %s%s\n", path.Base(entry.Remote()), flag) - } - } - return out.String() -} - -func walkRDirTree(ctx context.Context, f fs.Fs, startPath string, includeAll bool, maxLevel int, listR fs.ListRFn) (DirTree, error) { - dirs := make(DirTree) +func walkRDirTree(ctx context.Context, f fs.Fs, startPath string, includeAll bool, maxLevel int, listR fs.ListRFn) (dirtree.DirTree, error) { + dirs := dirtree.New() // Entries can come in arbitrary order. We use toPrune to keep // all directories to exclude later. toPrune := make(map[string]bool) @@ -626,14 +462,14 @@ func walkRDirTree(ctx context.Context, f fs.Fs, startPath string, includeAll boo // Make sure we don't delete excluded files if not required if includeAll || filter.Active.IncludeObject(ctx, x) { if maxLevel < 0 || slashes <= maxLevel-1 { - dirs.add(x) + dirs.Add(x) } else { // Make sure we include any parent directories of excluded objects dirPath := x.Remote() for ; slashes > maxLevel-1; slashes-- { dirPath = parentDir(dirPath) } - dirs.checkParent(startPath, dirPath) + dirs.CheckParent(startPath, dirPath) } } else { fs.Debugf(x, "Excluded from sync (and deletion)") @@ -656,9 +492,9 @@ func walkRDirTree(ctx context.Context, f fs.Fs, startPath string, includeAll boo if maxLevel < 0 || slashes <= maxLevel-1 { if slashes == maxLevel-1 { // Just add the object if at maxLevel - dirs.add(x) + dirs.Add(x) } else { - dirs.addDir(x) + dirs.AddDir(x) } } } else { @@ -673,7 +509,7 @@ func walkRDirTree(ctx context.Context, f fs.Fs, startPath string, includeAll boo if err != nil { return nil, err } - dirs.checkParents(startPath) + dirs.CheckParents(startPath) if len(dirs) == 0 { dirs[startPath] = nil } @@ -686,8 +522,8 @@ func walkRDirTree(ctx context.Context, f fs.Fs, startPath string, includeAll boo } // Create a DirTree using List -func walkNDirTree(ctx context.Context, f fs.Fs, path string, includeAll bool, maxLevel int, listDir listDirFunc) (DirTree, error) { - dirs := make(DirTree) +func walkNDirTree(ctx context.Context, f fs.Fs, path string, includeAll bool, maxLevel int, listDir listDirFunc) (dirtree.DirTree, error) { + dirs := make(dirtree.DirTree) fn := func(dirPath string, entries fs.DirEntries, err error) error { if err == nil { dirs[dirPath] = entries @@ -716,7 +552,7 @@ func walkNDirTree(ctx context.Context, f fs.Fs, path string, includeAll bool, ma // constructed with just those files in. // // NB (f, path) to be replaced by fs.Dir at some point -func NewDirTree(ctx context.Context, f fs.Fs, path string, includeAll bool, maxLevel int) (DirTree, error) { +func NewDirTree(ctx context.Context, f fs.Fs, path string, includeAll bool, maxLevel int) (dirtree.DirTree, error) { if fs.Config.NoTraverse && filter.Active.HaveFilesFrom() { return walkRDirTree(ctx, f, path, includeAll, maxLevel, filter.Active.MakeListR(ctx, f.NewObject)) } diff --git a/vfs/dir.go b/vfs/dir.go index fda29c2a6..a6d1d4579 100644 --- a/vfs/dir.go +++ b/vfs/dir.go @@ -10,6 +10,7 @@ import ( "time" "github.com/ncw/rclone/fs" + "github.com/ncw/rclone/fs/dirtree" "github.com/ncw/rclone/fs/list" "github.com/ncw/rclone/fs/operations" "github.com/ncw/rclone/fs/walk" @@ -227,13 +228,13 @@ func (d *Dir) _readDir() error { // update d.items for each dir in the DirTree below this one and // set the last read time - must be called with the lock held -func (d *Dir) _readDirFromDirTree(dirTree walk.DirTree, when time.Time) error { +func (d *Dir) _readDirFromDirTree(dirTree dirtree.DirTree, when time.Time) error { return d._readDirFromEntries(dirTree[d.path], dirTree, when) } // update d.items and if dirTree is not nil update each dir in the DirTree below this one and // set the last read time - must be called with the lock held -func (d *Dir) _readDirFromEntries(entries fs.DirEntries, dirTree walk.DirTree, when time.Time) error { +func (d *Dir) _readDirFromEntries(entries fs.DirEntries, dirTree dirtree.DirTree, when time.Time) error { var err error // Cache the items by name found := make(map[string]struct{})