rclone/cmd/bisync/march.go
nielash 98f539de8f bisync: refactor normalization code, fix deltas - fixes #7270
Refactored the case / unicode normalization logic to be much more efficient,
 and fix the last outstanding issue from #7270. Before this change, we were
 doing lots of for loops and re-normalizing strings we had already normalized
 earlier. Now, we leave the normalizing entirely to March and avoid
 re-transforming later, which seems to make a large difference in terms of
 performance.
2024-01-20 14:50:08 -05:00

226 lines
5.3 KiB
Go

package bisync
import (
"context"
"sync"
"github.com/rclone/rclone/fs"
"github.com/rclone/rclone/fs/accounting"
"github.com/rclone/rclone/fs/filter"
"github.com/rclone/rclone/fs/hash"
"github.com/rclone/rclone/fs/march"
)
var ls1 = newFileList()
var ls2 = newFileList()
var err error
var firstErr error
var marchAliasLock sync.Mutex
var marchLsLock sync.Mutex
var marchErrLock sync.Mutex
var marchCtx context.Context
func (b *bisyncRun) makeMarchListing(ctx context.Context) (*fileList, *fileList, error) {
ci := fs.GetConfig(ctx)
marchCtx = ctx
b.setupListing()
fs.Debugf(b, "starting to march!")
// set up a march over fdst (Path2) and fsrc (Path1)
m := &march.March{
Ctx: ctx,
Fdst: b.fs2,
Fsrc: b.fs1,
Dir: "",
NoTraverse: false,
Callback: b,
DstIncludeAll: false,
NoCheckDest: false,
NoUnicodeNormalization: ci.NoUnicodeNormalization,
}
err = m.Run(ctx)
fs.Debugf(b, "march completed. err: %v", err)
if err == nil {
err = firstErr
}
if err != nil {
b.abort = true
}
// save files
err = ls1.save(ctx, b.newListing1)
if err != nil {
b.abort = true
}
err = ls2.save(ctx, b.newListing2)
if err != nil {
b.abort = true
}
return ls1, ls2, err
}
// SrcOnly have an object which is on path1 only
func (b *bisyncRun) SrcOnly(o fs.DirEntry) (recurse bool) {
fs.Debugf(o, "path1 only")
b.parse(o, true)
return isDir(o)
}
// DstOnly have an object which is on path2 only
func (b *bisyncRun) DstOnly(o fs.DirEntry) (recurse bool) {
fs.Debugf(o, "path2 only")
b.parse(o, false)
return isDir(o)
}
// Match is called when object exists on both path1 and path2 (whether equal or not)
func (b *bisyncRun) Match(ctx context.Context, o2, o1 fs.DirEntry) (recurse bool) {
fs.Debugf(o1, "both path1 and path2")
marchAliasLock.Lock()
b.aliases.Add(o1.Remote(), o2.Remote())
marchAliasLock.Unlock()
b.parse(o1, true)
b.parse(o2, false)
return isDir(o1)
}
func isDir(e fs.DirEntry) bool {
switch x := e.(type) {
case fs.Object:
fs.Debugf(x, "is Object")
return false
case fs.Directory:
fs.Debugf(x, "is Dir")
return true
default:
fs.Debugf(e, "is unknown")
}
return false
}
func (b *bisyncRun) parse(e fs.DirEntry, isPath1 bool) {
switch x := e.(type) {
case fs.Object:
b.ForObject(x, isPath1)
case fs.Directory:
if b.opt.CreateEmptySrcDirs {
b.ForDir(x, isPath1)
}
default:
fs.Debugf(e, "is unknown")
}
}
func (b *bisyncRun) setupListing() {
ls1 = newFileList()
ls2 = newFileList()
hashType1 := hash.None
hashType2 := hash.None
if !b.opt.IgnoreListingChecksum {
// Currently bisync just honors --ignore-listing-checksum
// (note that this is different from --ignore-checksum)
// TODO add full support for checksums and related flags
hashType1 = b.fs1.Hashes().GetOne()
hashType2 = b.fs2.Hashes().GetOne()
}
ls1.hash = hashType1
ls2.hash = hashType2
}
func (b *bisyncRun) ForObject(o fs.Object, isPath1 bool) {
tr := accounting.Stats(marchCtx).NewCheckingTransfer(o, "listing file - "+whichPath(isPath1))
defer func() {
tr.Done(marchCtx, nil)
}()
var (
hashVal string
hashErr error
)
ls := whichLs(isPath1)
hashType := ls.hash
if hashType != hash.None {
hashVal, hashErr = o.Hash(marchCtx, hashType)
marchErrLock.Lock()
if firstErr == nil {
firstErr = hashErr
}
marchErrLock.Unlock()
}
time := o.ModTime(marchCtx).In(TZ)
id := "" // TODO
flags := "-" // "-" for a file and "d" for a directory
marchLsLock.Lock()
ls.put(o.Remote(), o.Size(), time, hashVal, id, flags)
marchLsLock.Unlock()
}
func (b *bisyncRun) ForDir(o fs.Directory, isPath1 bool) {
tr := accounting.Stats(marchCtx).NewCheckingTransfer(o, "listing dir - "+whichPath(isPath1))
defer func() {
tr.Done(marchCtx, nil)
}()
ls := whichLs(isPath1)
time := o.ModTime(marchCtx).In(TZ)
id := "" // TODO
flags := "d" // "-" for a file and "d" for a directory
marchLsLock.Lock()
//record size as 0 instead of -1, so bisync doesn't think it's a google doc
ls.put(o.Remote(), 0, time, "", id, flags)
marchLsLock.Unlock()
}
func whichLs(isPath1 bool) *fileList {
ls := ls1
if !isPath1 {
ls = ls2
}
return ls
}
func whichPath(isPath1 bool) string {
s := "Path1"
if !isPath1 {
s = "Path2"
}
return s
}
func (b *bisyncRun) findCheckFiles(ctx context.Context) (*fileList, *fileList, error) {
ctxCheckFile, filterCheckFile := filter.AddConfig(ctx)
b.handleErr(b.opt.CheckFilename, "error adding CheckFilename to filter", filterCheckFile.Add(true, b.opt.CheckFilename), true, true)
b.handleErr(b.opt.CheckFilename, "error adding ** exclusion to filter", filterCheckFile.Add(false, "**"), true, true)
ci := fs.GetConfig(ctxCheckFile)
marchCtx = ctxCheckFile
b.setupListing()
fs.Debugf(b, "starting to march!")
// set up a march over fdst (Path2) and fsrc (Path1)
m := &march.March{
Ctx: ctxCheckFile,
Fdst: b.fs2,
Fsrc: b.fs1,
Dir: "",
NoTraverse: false,
Callback: b,
DstIncludeAll: false,
NoCheckDest: false,
NoUnicodeNormalization: ci.NoUnicodeNormalization,
}
err = m.Run(ctxCheckFile)
fs.Debugf(b, "march completed. err: %v", err)
if err == nil {
err = firstErr
}
if err != nil {
b.abort = true
}
return ls1, ls2, err
}