fs: re-implement check and cryptcheck using the same traversal as sync

This makes them 100% consistent with sync and also make them use less
memory as they no longer build the whole tree in memory first.

Fixes #1657
This commit is contained in:
Nick Craig-Wood 2017-09-01 16:33:09 +01:00
parent 261c7ad9e4
commit a8e41f081c

View File

@ -18,8 +18,7 @@ import (
"github.com/pkg/errors" "github.com/pkg/errors"
"github.com/spf13/pflag" "github.com/spf13/pflag"
"golang.org/x/net/context"
"golang.org/x/text/unicode/norm"
) )
// CalculateModifyWindow works out modify window for Fses passed in - // CalculateModifyWindow works out modify window for Fses passed in -
@ -682,68 +681,6 @@ func filterAndSortDir(entries DirEntries, includeAll bool, dir string,
return entries, nil return entries, nil
} }
// Read a map of Object.Remote to Object for the given Fs.
// dir is the start directory, "" for root
// If includeAll is specified all files will be added,
// otherwise only files passing the filter will be added.
//
// This also detects duplicates and normalised duplicates
func readFilesMap(fs Fs, includeAll bool, dir string) (files map[string]Object, err error) {
files = make(map[string]Object)
normalised := make(map[string]struct{})
err = readFilesFn(fs, includeAll, dir, func(o Object) error {
remote := o.Remote()
normalisedRemote := strings.ToLower(norm.NFC.String(remote))
if _, ok := files[remote]; !ok {
files[remote] = o
if _, ok := normalised[normalisedRemote]; ok {
Logf(o, "File found with same name but different case on %v", o.Fs())
}
} else {
Logf(o, "Duplicate file detected")
}
normalised[normalisedRemote] = struct{}{}
return nil
})
if err != nil {
err = errors.Wrapf(err, "error listing: %s", fs)
}
return files, err
}
// readFilesMaps runs readFilesMap on fdst and fsrc at the same time
// dir is the start directory, "" for root
func readFilesMaps(fdst Fs, fdstIncludeAll bool, fsrc Fs, fsrcIncludeAll bool, dir string) (dstFiles, srcFiles map[string]Object, err error) {
var wg sync.WaitGroup
var srcErr, dstErr error
list := func(fs Fs, includeAll bool, pMap *map[string]Object, pErr *error) {
defer wg.Done()
Infof(fs, "Building file list")
files, listErr := readFilesMap(fs, includeAll, dir)
if listErr != nil {
Errorf(fs, "Error building file list: %v", listErr)
*pErr = listErr
} else {
Debugf(fs, "Done building file list")
*pMap = files
}
}
wg.Add(2)
go list(fdst, fdstIncludeAll, &dstFiles, &srcErr)
go list(fsrc, fsrcIncludeAll, &srcFiles, &dstErr)
wg.Wait()
if srcErr != nil {
err = srcErr
}
if dstErr != nil {
err = dstErr
}
return dstFiles, srcFiles, err
}
// SameConfig returns true if fdst and fsrc are using the same config // SameConfig returns true if fdst and fsrc are using the same config
// file entry // file entry
func SameConfig(fdst, fsrc Info) bool { func SameConfig(fdst, fsrc Info) bool {
@ -795,58 +732,56 @@ func checkIdentical(dst, src Object) (differ bool, noHash bool) {
return false, false return false, false
} }
// CheckFn checks the files in fsrc and fdst according to Size and // checkFn is the the type of the checking function used in CheckFn()
// hash using checkFunction on each file to check the hashes. type checkFn func(a, b Object) (differ bool, noHash bool)
//
// checkFunction sees if dst and src are identical
//
// it returns true if differences were found
// it also returns whether it couldn't be hashed
func CheckFn(fdst, fsrc Fs, checkFunction func(a, b Object) (differ bool, noHash bool)) error {
dstFiles, srcFiles, err := readFilesMaps(fdst, false, fsrc, false, "")
if err != nil {
return err
}
differences := int32(0)
noHashes := int32(0)
// FIXME could do this as it goes along and make it use less // checkMarch is used to march over two Fses in the same way as
// memory. // sync/copy
type checkMarch struct {
// Move all the common files into commonFiles and delete then fdst, fsrc Fs
// from srcFiles and dstFiles check checkFn
commonFiles := make(map[string][2]Object) differences int32
for remote, src := range srcFiles { noHashes int32
if dst, ok := dstFiles[remote]; ok { srcFilesMissing int32
commonFiles[remote] = [2]Object{dst, src} dstFilesMissing int32
delete(srcFiles, remote)
delete(dstFiles, remote)
}
} }
Logf(fdst, "%d files not in %v", len(dstFiles), fsrc) // DstOnly have an object which is in the destination only
for _, dst := range dstFiles { func (c *checkMarch) DstOnly(dst DirEntry) (recurse bool) {
switch dst.(type) {
case Object:
Stats.Error() Stats.Error()
Errorf(dst, "File not in %v", fsrc) Errorf(dst, "File not in %v", c.fsrc)
atomic.AddInt32(&differences, 1) atomic.AddInt32(&c.differences, 1)
atomic.AddInt32(&c.srcFilesMissing, 1)
case Directory:
// Do the same thing to the entire contents of the directory
return true
default:
panic("Bad object in DirEntries")
}
return false
} }
Logf(fsrc, "%d files not in %s", len(srcFiles), fdst) // SrcOnly have an object which is in the source only
for _, src := range srcFiles { func (c *checkMarch) SrcOnly(src DirEntry) (recurse bool) {
switch src.(type) {
case Object:
Stats.Error() Stats.Error()
Errorf(src, "File not in %v", fdst) Errorf(src, "File not in %v", c.fdst)
atomic.AddInt32(&differences, 1) atomic.AddInt32(&c.differences, 1)
atomic.AddInt32(&c.dstFilesMissing, 1)
case Directory:
// Do the same thing to the entire contents of the directory
return true
default:
panic("Bad object in DirEntries")
}
return false
} }
checks := make(chan [2]Object, Config.Transfers) // check to see if two objects are identical using the check function
go func() { func (c *checkMarch) checkIdentical(dst, src Object) (differ bool, noHash bool) {
for _, check := range commonFiles {
checks <- check
}
close(checks)
}()
checkIdentical := func(dst, src Object) (differ bool, noHash bool) {
Stats.Checking(src.Remote()) Stats.Checking(src.Remote())
defer Stats.DoneChecking(src.Remote()) defer Stats.DoneChecking(src.Remote())
if !Config.IgnoreSize && src.Size() != dst.Size() { if !Config.IgnoreSize && src.Size() != dst.Size() {
@ -857,36 +792,79 @@ func CheckFn(fdst, fsrc Fs, checkFunction func(a, b Object) (differ bool, noHash
if Config.SizeOnly { if Config.SizeOnly {
return false, false return false, false
} }
return checkFunction(dst, src) return c.check(dst, src)
} }
var checkerWg sync.WaitGroup // Match is called when src and dst are present, so sync src to dst
checkerWg.Add(Config.Checkers) func (c *checkMarch) Match(dst, src DirEntry) (recurse bool) {
for i := 0; i < Config.Checkers; i++ { switch srcX := src.(type) {
go func() { case Object:
defer checkerWg.Done() dstX, ok := dst.(Object)
for check := range checks { if ok {
differ, noHash := checkIdentical(check[0], check[1]) differ, noHash := c.checkIdentical(dstX, srcX)
if differ { if differ {
atomic.AddInt32(&differences, 1) atomic.AddInt32(&c.differences, 1)
} else { } else {
Debugf(check[0], "OK") Debugf(dstX, "OK")
} }
if noHash { if noHash {
atomic.AddInt32(&noHashes, 1) atomic.AddInt32(&c.noHashes, 1)
} }
} else {
Stats.Error()
Errorf(src, "is file on %v but directory on %v", c.fsrc, c.fdst)
atomic.AddInt32(&c.differences, 1)
atomic.AddInt32(&c.dstFilesMissing, 1)
} }
}() case Directory:
// Do the same thing to the entire contents of the directory
_, ok := dst.(Directory)
if ok {
return true
}
Stats.Error()
Errorf(dst, "is file on %v but directory on %v", c.fdst, c.fsrc)
atomic.AddInt32(&c.differences, 1)
atomic.AddInt32(&c.srcFilesMissing, 1)
default:
panic("Bad object in DirEntries")
}
return false
} }
Infof(fdst, "Waiting for checks to finish") // CheckFn checks the files in fsrc and fdst according to Size and
checkerWg.Wait() // hash using checkFunction on each file to check the hashes.
Logf(fdst, "%d differences found", Stats.GetErrors()) //
if noHashes > 0 { // checkFunction sees if dst and src are identical
Logf(fdst, "%d hashes could not be checked", noHashes) //
// it returns true if differences were found
// it also returns whether it couldn't be hashed
func CheckFn(fdst, fsrc Fs, check checkFn) error {
c := &checkMarch{
fdst: fdst,
fsrc: fsrc,
check: check,
} }
if differences > 0 {
return errors.Errorf("%d differences found", differences) // set up a march over fdst and fsrc
m := newMarch(context.Background(), fdst, fsrc, "", c)
Infof(fdst, "Waiting for checks to finish")
m.run()
if c.dstFilesMissing > 0 {
Logf(fdst, "%d files missing", c.dstFilesMissing)
}
if c.srcFilesMissing > 0 {
Logf(fsrc, "%d files missing", c.srcFilesMissing)
}
Logf(fdst, "%d differences found", Stats.GetErrors())
if c.noHashes > 0 {
Logf(fdst, "%d hashes could not be checked", c.noHashes)
}
if c.differences > 0 {
return errors.Errorf("%d differences found", c.differences)
} }
return nil return nil
} }