bisync: generate final listing from sync results, not relisting -- fixes #5676

Before this change, if there were changes to sync, bisync listed each path
twice: once before the sync and once after. The second listing caused quite
a lot of problems, in addition to making each run much slower and more
expensive. A serious side-effect was that file changes could slip through
undetected, if they happened to occur while a sync was running (between the
first and second listing snapshots.)

After this change, the second listing is eliminated by getting the underlying
sync operation to report back a list of what it changed. Not only is this more
efficient, but also much more robust to concurrent modifications. It should no
longer be necessary to avoid make changes while it's running -- bisync will
simply learn about those changes next time and handle them on the next run.
Additionally, this also makes --check-sync usable again.

For further discussion, see:
https://forum.rclone.org/t/bisync-bugs-and-feature-requests/37636#:~:text=5.%20Final%20listings%20should%20be%20created%20from%20initial%20snapshot%20%2B%20deltas%2C%20not%20full%20re%2Dscans%2C%20to%20avoid%20errors%20if%20files%20changed%20during%20sync
This commit is contained in:
nielash 2023-10-01 09:36:19 -04:00
parent 3a50f35df9
commit 978cbf9360
24 changed files with 664 additions and 76 deletions

View File

@ -11,7 +11,7 @@ import (
) )
// FsPath converts Fs to a suitable rclone argument // FsPath converts Fs to a suitable rclone argument
func FsPath(f fs.Fs) string { func FsPath(f fs.Info) string {
name, path, slash := f.Name(), f.Root(), "/" name, path, slash := f.Name(), f.Root(), "/"
if name == "local" { if name == "local" {
slash = string(os.PathSeparator) slash = string(os.PathSeparator)

View File

@ -641,6 +641,8 @@ func (b *bisyncTest) runBisync(ctx context.Context, args []string) (err error) {
case "subdir": case "subdir":
fs1 = addSubdir(b.path1, val) fs1 = addSubdir(b.path1, val)
fs2 = addSubdir(b.path2, val) fs2 = addSubdir(b.path2, val)
case "ignore-listing-checksum":
opt.IgnoreListingChecksum = true
default: default:
return fmt.Errorf("invalid bisync option %q", arg) return fmt.Errorf("invalid bisync option %q", arg)
} }

View File

@ -176,9 +176,11 @@ func (b *bisyncRun) findDeltas(fctx context.Context, f fs.Fs, oldListing, newLis
} else { } else {
if old.getTime(file) != now.getTime(file) { if old.getTime(file) != now.getTime(file) {
if old.beforeOther(now, file) { if old.beforeOther(now, file) {
fs.Debugf(file, "(old: %v current: %v", old.getTime(file), now.getTime(file))
b.indent(msg, file, "File is newer") b.indent(msg, file, "File is newer")
d |= deltaNewer d |= deltaNewer
} else { // Current version is older than prior sync. } else { // Current version is older than prior sync.
fs.Debugf(file, "(old: %v current: %v", old.getTime(file), now.getTime(file))
b.indent(msg, file, "File is OLDER") b.indent(msg, file, "File is OLDER")
d |= deltaOlder d |= deltaOlder
} }
@ -217,7 +219,7 @@ func (b *bisyncRun) findDeltas(fctx context.Context, f fs.Fs, oldListing, newLis
} }
// applyDeltas // applyDeltas
func (b *bisyncRun) applyDeltas(ctx context.Context, ds1, ds2 *deltaSet) (changes1, changes2 bool, err error) { func (b *bisyncRun) applyDeltas(ctx context.Context, ds1, ds2 *deltaSet) (changes1, changes2 bool, results2to1, results1to2 []Results, queues queues, err error) {
path1 := bilib.FsPath(b.fs1) path1 := bilib.FsPath(b.fs1)
path2 := bilib.FsPath(b.fs2) path2 := bilib.FsPath(b.fs2)
@ -226,6 +228,10 @@ func (b *bisyncRun) applyDeltas(ctx context.Context, ds1, ds2 *deltaSet) (change
delete1 := bilib.Names{} delete1 := bilib.Names{}
delete2 := bilib.Names{} delete2 := bilib.Names{}
handled := bilib.Names{} handled := bilib.Names{}
renamed1 := bilib.Names{}
renamed2 := bilib.Names{}
renameSkipped := bilib.Names{}
deletedonboth := bilib.Names{}
ctxMove := b.opt.setDryRun(ctx) ctxMove := b.opt.setDryRun(ctx)
@ -299,6 +305,7 @@ func (b *bisyncRun) applyDeltas(ctx context.Context, ds1, ds2 *deltaSet) (change
equal := matches.Has(file) equal := matches.Has(file)
if equal { if equal {
fs.Infof(nil, "Files are equal! Skipping: %s", file) fs.Infof(nil, "Files are equal! Skipping: %s", file)
renameSkipped.Add(file)
} else { } else {
fs.Debugf(nil, "Files are NOT equal: %s", file) fs.Debugf(nil, "Files are NOT equal: %s", file)
b.indent("!Path1", p1+"..path1", "Renaming Path1 copy") b.indent("!Path1", p1+"..path1", "Renaming Path1 copy")
@ -307,6 +314,11 @@ func (b *bisyncRun) applyDeltas(ctx context.Context, ds1, ds2 *deltaSet) (change
b.critical = true b.critical = true
return return
} }
if b.opt.DryRun {
renameSkipped.Add(file)
} else {
renamed1.Add(file)
}
b.indent("!Path1", p2+"..path1", "Queue copy to Path2") b.indent("!Path1", p2+"..path1", "Queue copy to Path2")
copy1to2.Add(file + "..path1") copy1to2.Add(file + "..path1")
@ -315,6 +327,11 @@ func (b *bisyncRun) applyDeltas(ctx context.Context, ds1, ds2 *deltaSet) (change
err = fmt.Errorf("path2 rename failed for %s: %w", file, err) err = fmt.Errorf("path2 rename failed for %s: %w", file, err)
return return
} }
if b.opt.DryRun {
renameSkipped.Add(file)
} else {
renamed2.Add(file)
}
b.indent("!Path2", p1+"..path2", "Queue copy to Path1") b.indent("!Path2", p1+"..path2", "Queue copy to Path1")
copy2to1.Add(file + "..path2") copy2to1.Add(file + "..path2")
} }
@ -334,6 +351,7 @@ func (b *bisyncRun) applyDeltas(ctx context.Context, ds1, ds2 *deltaSet) (change
handled.Add(file) handled.Add(file)
} else if d2.is(deltaDeleted) { } else if d2.is(deltaDeleted) {
handled.Add(file) handled.Add(file)
deletedonboth.Add(file)
} }
} }
} }
@ -360,25 +378,25 @@ func (b *bisyncRun) applyDeltas(ctx context.Context, ds1, ds2 *deltaSet) (change
if copy2to1.NotEmpty() { if copy2to1.NotEmpty() {
changes1 = true changes1 = true
b.indent("Path2", "Path1", "Do queued copies to") b.indent("Path2", "Path1", "Do queued copies to")
err = b.fastCopy(ctx, b.fs2, b.fs1, copy2to1, "copy2to1") results2to1, err = b.fastCopy(ctx, b.fs2, b.fs1, copy2to1, "copy2to1")
if err != nil { if err != nil {
return return
} }
//copy empty dirs from path2 to path1 (if --create-empty-src-dirs) //copy empty dirs from path2 to path1 (if --create-empty-src-dirs)
b.syncEmptyDirs(ctx, b.fs1, copy2to1, dirs2, "make") b.syncEmptyDirs(ctx, b.fs1, copy2to1, dirs2, &results2to1, "make")
} }
if copy1to2.NotEmpty() { if copy1to2.NotEmpty() {
changes2 = true changes2 = true
b.indent("Path1", "Path2", "Do queued copies to") b.indent("Path1", "Path2", "Do queued copies to")
err = b.fastCopy(ctx, b.fs1, b.fs2, copy1to2, "copy1to2") results1to2, err = b.fastCopy(ctx, b.fs1, b.fs2, copy1to2, "copy1to2")
if err != nil { if err != nil {
return return
} }
//copy empty dirs from path1 to path2 (if --create-empty-src-dirs) //copy empty dirs from path1 to path2 (if --create-empty-src-dirs)
b.syncEmptyDirs(ctx, b.fs2, copy1to2, dirs1, "make") b.syncEmptyDirs(ctx, b.fs2, copy1to2, dirs1, &results1to2, "make")
} }
if delete1.NotEmpty() { if delete1.NotEmpty() {
@ -386,7 +404,7 @@ func (b *bisyncRun) applyDeltas(ctx context.Context, ds1, ds2 *deltaSet) (change
return return
} }
//propagate deletions of empty dirs from path2 to path1 (if --create-empty-src-dirs) //propagate deletions of empty dirs from path2 to path1 (if --create-empty-src-dirs)
b.syncEmptyDirs(ctx, b.fs1, delete1, dirs1, "remove") b.syncEmptyDirs(ctx, b.fs1, delete1, dirs1, &results2to1, "remove")
} }
if delete2.NotEmpty() { if delete2.NotEmpty() {
@ -394,9 +412,16 @@ func (b *bisyncRun) applyDeltas(ctx context.Context, ds1, ds2 *deltaSet) (change
return return
} }
//propagate deletions of empty dirs from path1 to path2 (if --create-empty-src-dirs) //propagate deletions of empty dirs from path1 to path2 (if --create-empty-src-dirs)
b.syncEmptyDirs(ctx, b.fs2, delete2, dirs2, "remove") b.syncEmptyDirs(ctx, b.fs2, delete2, dirs2, &results1to2, "remove")
} }
queues.copy1to2 = copy1to2
queues.copy2to1 = copy2to1
queues.renamed1 = renamed1
queues.renamed2 = renamed2
queues.renameSkipped = renameSkipped
queues.deletedonboth = deletedonboth
return return
} }

View File

@ -9,14 +9,18 @@ import (
"io" "io"
"os" "os"
"regexp" "regexp"
"sort"
"strconv" "strconv"
"strings" "strings"
"sync" "sync"
"time" "time"
"github.com/rclone/rclone/fs" "github.com/rclone/rclone/fs"
"github.com/rclone/rclone/fs/filter"
"github.com/rclone/rclone/fs/hash" "github.com/rclone/rclone/fs/hash"
"github.com/rclone/rclone/fs/operations"
"github.com/rclone/rclone/fs/walk" "github.com/rclone/rclone/fs/walk"
"golang.org/x/exp/slices"
) )
// ListingHeader defines first line of a listing // ListingHeader defines first line of a listing
@ -70,11 +74,29 @@ func (ls *fileList) empty() bool {
func (ls *fileList) has(file string) bool { func (ls *fileList) has(file string) bool {
_, found := ls.info[file] _, found := ls.info[file]
if !found {
//try unquoting
file, _ = strconv.Unquote(`"` + file + `"`)
_, found = ls.info[file]
}
return found return found
} }
func (ls *fileList) get(file string) *fileInfo { func (ls *fileList) get(file string) *fileInfo {
return ls.info[file] info, found := ls.info[file]
if !found {
//try unquoting
file, _ = strconv.Unquote(`"` + file + `"`)
info = ls.info[fmt.Sprint(file)]
}
return info
}
func (ls *fileList) remove(file string) {
if ls.has(file) {
ls.list = slices.Delete(ls.list, slices.Index(ls.list, file), slices.Index(ls.list, file)+1)
delete(ls.info, file)
}
} }
func (ls *fileList) put(file string, size int64, time time.Time, hash, id string, flags string) { func (ls *fileList) put(file string, size int64, time time.Time, hash, id string, flags string) {
@ -82,6 +104,9 @@ func (ls *fileList) put(file string, size int64, time time.Time, hash, id string
if fi != nil { if fi != nil {
fi.size = size fi.size = size
fi.time = time fi.time = time
fi.hash = hash
fi.id = id
fi.flags = flags
} else { } else {
fi = &fileInfo{ fi = &fileInfo{
size: size, size: size,
@ -120,12 +145,20 @@ func (ls *fileList) afterTime(file string, time time.Time) bool {
return fi.time.After(time) return fi.time.After(time)
} }
// sort by path name
func (ls *fileList) sort() {
sort.SliceStable(ls.list, func(i, j int) bool {
return ls.list[i] < ls.list[j]
})
}
// save will save listing to a file. // save will save listing to a file.
func (ls *fileList) save(ctx context.Context, listing string) error { func (ls *fileList) save(ctx context.Context, listing string) error {
file, err := os.Create(listing) file, err := os.Create(listing)
if err != nil { if err != nil {
return err return err
} }
ls.sort()
hashName := "" hashName := ""
if ls.hash != hash.None { if ls.hash != hash.None {
@ -376,3 +409,221 @@ func (b *bisyncRun) listDirsOnly(listingNum int) (*fileList, error) {
return dirsonly, err return dirsonly, err
} }
// ConvertPrecision returns the Modtime rounded to Dest's precision if lower, otherwise unchanged
// Need to use the other fs's precision (if lower) when copying
// Note: we need to use Truncate rather than Round so that After() is reliable.
// (2023-11-02 20:22:45.552679442 +0000 < UTC 2023-11-02 20:22:45.553 +0000 UTC)
func ConvertPrecision(Modtime time.Time, dst fs.Fs) time.Time {
DestPrecision := dst.Precision()
// In case it's wrapping an Fs with lower precision, try unwrapping and use the lowest.
if Modtime.Truncate(DestPrecision).After(Modtime.Truncate(fs.UnWrapFs(dst).Precision())) {
DestPrecision = fs.UnWrapFs(dst).Precision()
}
if Modtime.After(Modtime.Truncate(DestPrecision)) {
return Modtime.Truncate(DestPrecision)
}
return Modtime
}
// modifyListing will modify the listing based on the results of the sync
func (b *bisyncRun) modifyListing(ctx context.Context, src fs.Fs, dst fs.Fs, srcListing string, dstListing string, results []Results, queues queues, is1to2 bool) (err error) {
queue := queues.copy2to1
renames := queues.renamed2
direction := "2to1"
if is1to2 {
queue = queues.copy1to2
renames = queues.renamed1
direction = "1to2"
}
fs.Debugf(nil, "updating %s", direction)
fs.Debugf(nil, "RESULTS: %v", results)
fs.Debugf(nil, "QUEUE: %v", queue)
srcList, err := b.loadListing(srcListing)
if err != nil {
return fmt.Errorf("cannot read prior listing: %w", err)
}
dstList, err := b.loadListing(dstListing)
if err != nil {
return fmt.Errorf("cannot read prior listing: %w", err)
}
// set list hash type
if b.opt.Resync && !b.opt.IgnoreListingChecksum {
srcList.hash = src.Hashes().GetOne()
dstList.hash = dst.Hashes().GetOne()
}
srcWinners := newFileList()
dstWinners := newFileList()
errors := newFileList()
ctxRecheck, filterRecheck := filter.AddConfig(ctx)
for _, result := range results {
if result.Name == "" {
continue
}
if result.Flags == "d" && !b.opt.CreateEmptySrcDirs {
continue
}
// build src winners list
if result.IsSrc && result.Src != "" && (result.Winner.Err == nil || result.Flags == "d") {
srcWinners.put(result.Name, result.Size, ConvertPrecision(result.Modtime, src), result.Hash, "-", result.Flags)
fs.Debugf(nil, "winner: copy to src: %v", result)
}
// build dst winners list
if result.IsWinner && result.Winner.Side != "none" && (result.Winner.Err == nil || result.Flags == "d") {
dstWinners.put(result.Name, result.Size, ConvertPrecision(result.Modtime, dst), result.Hash, "-", result.Flags)
fs.Debugf(nil, "winner: copy to dst: %v", result)
}
// build errors list
if result.Err != nil || result.Winner.Err != nil {
errors.put(result.Name, result.Size, result.Modtime, result.Hash, "-", result.Flags)
if err := filterRecheck.AddFile(result.Name); err != nil {
fs.Debugf(result.Name, "error adding file to recheck filter: %v", err)
}
}
}
updateLists := func(side string, winners, list *fileList) {
// removals from side
for _, oldFile := range queue.ToList() {
if !winners.has(oldFile) && list.has(oldFile) && !errors.has(oldFile) {
list.remove(oldFile)
fs.Debugf(nil, "decision: removed from %s: %v", side, oldFile)
} else if winners.has(oldFile) {
// copies to side
new := winners.get(oldFile)
list.put(oldFile, new.size, new.time, new.hash, new.id, new.flags)
fs.Debugf(nil, "decision: copied to %s: %v", side, oldFile)
} else {
fs.Debugf(oldFile, "file in queue but missing from %s transfers", side)
if err := filterRecheck.AddFile(oldFile); err != nil {
fs.Debugf(oldFile, "error adding file to recheck filter: %v", err)
}
}
}
}
updateLists("src", srcWinners, srcList)
updateLists("dst", dstWinners, dstList)
// TODO: rollback on error
// account for "deltaOthers" we handled separately
if queues.deletedonboth.NotEmpty() {
for file := range queues.deletedonboth {
srcList.remove(file)
dstList.remove(file)
}
}
if renames.NotEmpty() && !b.opt.DryRun {
// renamed on src and copied to dst
renamesList := renames.ToList()
for _, file := range renamesList {
// we'll handle the other side when we go the other direction
newName := file + "..path2"
oppositeName := file + "..path1"
if is1to2 {
newName = file + "..path1"
oppositeName = file + "..path2"
}
var new *fileInfo
// we prefer to get the info from the ..path1 / ..path2 versions
// since they were actually copied as opposed to operations.MoveFile()'d.
// the size/time/hash info is therefore fresher on the renames
// but we'll settle for the original if we have to.
if srcList.has(newName) {
new = srcList.get(newName)
} else if srcList.has(oppositeName) {
new = srcList.get(oppositeName)
} else if srcList.has(file) {
new = srcList.get(file)
} else {
if err := filterRecheck.AddFile(file); err != nil {
fs.Debugf(file, "error adding file to recheck filter: %v", err)
}
}
srcList.put(newName, new.size, new.time, new.hash, new.id, new.flags)
dstList.put(newName, new.size, ConvertPrecision(new.time, src), new.hash, new.id, new.flags)
srcList.remove(file)
dstList.remove(file)
}
}
// recheck the ones we skipped because they were equal
// we never got their info because they were never synced.
// TODO: add flag to skip this for people who don't care and would rather avoid?
if queues.renameSkipped.NotEmpty() {
skippedList := queues.renameSkipped.ToList()
for _, file := range skippedList {
if err := filterRecheck.AddFile(file); err != nil {
fs.Debugf(file, "error adding file to recheck filter: %v", err)
}
}
}
if filterRecheck.HaveFilesFrom() {
b.recheck(ctxRecheck, src, dst, srcList, dstList)
}
// update files
err = srcList.save(ctx, srcListing)
if err != nil {
b.abort = true
}
err = dstList.save(ctx, dstListing)
if err != nil {
b.abort = true
}
return err
}
// recheck the ones we're not sure about
func (b *bisyncRun) recheck(ctxRecheck context.Context, src, dst fs.Fs, srcList, dstList *fileList) {
var srcObjs []fs.Object
var dstObjs []fs.Object
if err := operations.ListFn(ctxRecheck, src, func(obj fs.Object) {
srcObjs = append(srcObjs, obj)
}); err != nil {
fs.Debugf(src, "error recchecking src obj: %v", err)
}
if err := operations.ListFn(ctxRecheck, dst, func(obj fs.Object) {
dstObjs = append(dstObjs, obj)
}); err != nil {
fs.Debugf(dst, "error recchecking dst obj: %v", err)
}
putObj := func(obj fs.Object, f fs.Fs, list *fileList) {
hashVal := ""
if !b.opt.IgnoreListingChecksum {
hashType := f.Hashes().GetOne()
if hashType != hash.None {
hashVal, _ = obj.Hash(ctxRecheck, hashType)
}
}
list.put(obj.Remote(), obj.Size(), obj.ModTime(ctxRecheck), hashVal, "-", "-")
}
for _, srcObj := range srcObjs {
fs.Debugf(srcObj, "rechecking")
for _, dstObj := range dstObjs {
if srcObj.Remote() == dstObj.Remote() {
if operations.Equal(ctxRecheck, srcObj, dstObj) || b.opt.DryRun {
putObj(srcObj, src, srcList)
putObj(dstObj, dst, dstList)
} else {
fs.Infof(srcObj, "files not equal on recheck: %v %v", srcObj, dstObj)
}
}
}
}
}

View File

@ -16,7 +16,6 @@ import (
"github.com/rclone/rclone/fs" "github.com/rclone/rclone/fs"
"github.com/rclone/rclone/fs/filter" "github.com/rclone/rclone/fs/filter"
"github.com/rclone/rclone/fs/operations" "github.com/rclone/rclone/fs/operations"
"github.com/rclone/rclone/fs/sync"
"github.com/rclone/rclone/lib/atexit" "github.com/rclone/rclone/lib/atexit"
) )
@ -35,6 +34,15 @@ type bisyncRun struct {
opt *Options opt *Options
} }
type queues struct {
copy1to2 bilib.Names
copy2to1 bilib.Names
renamed1 bilib.Names // renamed on 1 and copied to 2
renamed2 bilib.Names // renamed on 2 and copied to 1
renameSkipped bilib.Names // not renamed because it was equal
deletedonboth bilib.Names
}
// Bisync handles lock file, performs bisync run and checks exit status // Bisync handles lock file, performs bisync run and checks exit status
func Bisync(ctx context.Context, fs1, fs2 fs.Fs, optArg *Options) (err error) { func Bisync(ctx context.Context, fs1, fs2 fs.Fs, optArg *Options) (err error) {
opt := *optArg // ensure that input is never changed opt := *optArg // ensure that input is never changed
@ -256,13 +264,18 @@ func (b *bisyncRun) runLocked(octx context.Context, listing1, listing2 string) (
// Determine and apply changes to Path1 and Path2 // Determine and apply changes to Path1 and Path2
noChanges := ds1.empty() && ds2.empty() noChanges := ds1.empty() && ds2.empty()
changes1 := false changes1 := false // 2to1
changes2 := false changes2 := false // 1to2
results2to1 := []Results{}
results1to2 := []Results{}
queues := queues{}
if noChanges { if noChanges {
fs.Infof(nil, "No changes found") fs.Infof(nil, "No changes found")
} else { } else {
fs.Infof(nil, "Applying changes") fs.Infof(nil, "Applying changes")
changes1, changes2, err = b.applyDeltas(octx, ds1, ds2) changes1, changes2, results2to1, results1to2, queues, err = b.applyDeltas(octx, ds1, ds2)
if err != nil { if err != nil {
b.critical = true b.critical = true
// b.retryable = true // not sure about this one // b.retryable = true // not sure about this one
@ -277,13 +290,13 @@ func (b *bisyncRun) runLocked(octx context.Context, listing1, listing2 string) (
err1 = bilib.CopyFileIfExists(newListing1, listing1) err1 = bilib.CopyFileIfExists(newListing1, listing1)
err2 = bilib.CopyFileIfExists(newListing2, listing2) err2 = bilib.CopyFileIfExists(newListing2, listing2)
} else { } else {
if changes1 { if changes1 { // 2to1
_, err1 = b.makeListing(fctx, b.fs1, listing1) err1 = b.modifyListing(fctx, b.fs2, b.fs1, listing2, listing1, results2to1, queues, false)
} else { } else {
err1 = bilib.CopyFileIfExists(newListing1, listing1) err1 = bilib.CopyFileIfExists(newListing1, listing1)
} }
if changes2 { if changes2 { // 1to2
_, err2 = b.makeListing(fctx, b.fs2, listing2) err2 = b.modifyListing(fctx, b.fs1, b.fs2, listing1, listing2, results1to2, queues, true)
} else { } else {
err2 = bilib.CopyFileIfExists(newListing2, listing2) err2 = bilib.CopyFileIfExists(newListing2, listing2)
} }
@ -394,11 +407,15 @@ func (b *bisyncRun) resync(octx, fctx context.Context, listing1, listing2 string
copy2to1 = append(copy2to1, file) copy2to1 = append(copy2to1, file)
} }
} }
var results2to1 []Results
var results1to2 []Results
var results2to1Dirs []Results
queues := queues{}
if len(copy2to1) > 0 { if len(copy2to1) > 0 {
b.indent("Path2", "Path1", "Resync is doing queued copies to") b.indent("Path2", "Path1", "Resync is doing queued copies to")
// octx does not have extra filters! // octx does not have extra filters!
err = b.fastCopy(octx, b.fs2, b.fs1, bilib.ToNames(copy2to1), "resync-copy2to1") results2to1, err = b.fastCopy(octx, b.fs2, b.fs1, bilib.ToNames(copy2to1), "resync-copy2to1")
if err != nil { if err != nil {
b.critical = true b.critical = true
return err return err
@ -413,7 +430,7 @@ func (b *bisyncRun) resync(octx, fctx context.Context, listing1, listing2 string
// prevent overwriting Google Doc files (their size is -1) // prevent overwriting Google Doc files (their size is -1)
filterSync.Opt.MinSize = 0 filterSync.Opt.MinSize = 0
} }
if err = sync.CopyDir(ctxSync, b.fs2, b.fs1, b.opt.CreateEmptySrcDirs); err != nil { if results1to2, err = b.resyncDir(ctxSync, b.fs1, b.fs2); err != nil {
b.critical = true b.critical = true
return err return err
} }
@ -434,20 +451,39 @@ func (b *bisyncRun) resync(octx, fctx context.Context, listing1, listing2 string
fs.Infof(nil, "Resynching Path2 to Path1 (for empty dirs)") fs.Infof(nil, "Resynching Path2 to Path1 (for empty dirs)")
//note copy (not sync) and dst comes before src // note copy (not sync) and dst comes before src
if err = sync.CopyDir(ctxSync, b.fs1, b.fs2, b.opt.CreateEmptySrcDirs); err != nil { if results2to1Dirs, err = b.resyncDir(ctxSync, b.fs2, b.fs1); err != nil {
b.critical = true b.critical = true
return err return err
} }
} }
fs.Infof(nil, "Resync updating listings") fs.Infof(nil, "Resync updating listings")
if _, err = b.makeListing(fctx, b.fs1, listing1); err != nil { if err := bilib.CopyFileIfExists(newListing1, listing1); err != nil {
return err
}
if err := bilib.CopyFileIfExists(newListing2, listing2); err != nil {
return err
}
// resync 2to1
queues.copy2to1 = bilib.ToNames(copy2to1)
if err = b.modifyListing(fctx, b.fs2, b.fs1, listing2, listing1, results2to1, queues, false); err != nil {
b.critical = true b.critical = true
return err return err
} }
if _, err = b.makeListing(fctx, b.fs2, listing2); err != nil { // resync 1to2
queues.copy1to2 = bilib.ToNames(filesNow1.list)
if err = b.modifyListing(fctx, b.fs1, b.fs2, listing1, listing2, results1to2, queues, true); err != nil {
b.critical = true
return err
}
// resync 2to1 (dirs)
dirs2, _ := b.listDirsOnly(2)
queues.copy2to1 = bilib.ToNames(dirs2.list)
if err = b.modifyListing(fctx, b.fs2, b.fs1, listing2, listing1, results2to1Dirs, queues, false); err != nil {
b.critical = true b.critical = true
return err return err
} }

View File

@ -2,39 +2,187 @@ package bisync
import ( import (
"context" "context"
"encoding/json"
"fmt" "fmt"
"io"
"sort" "sort"
mutex "sync" // renamed as "sync" already in use
"time"
"github.com/rclone/rclone/cmd/bisync/bilib" "github.com/rclone/rclone/cmd/bisync/bilib"
"github.com/rclone/rclone/fs" "github.com/rclone/rclone/fs"
"github.com/rclone/rclone/fs/filter" "github.com/rclone/rclone/fs/filter"
"github.com/rclone/rclone/fs/operations" "github.com/rclone/rclone/fs/operations"
"github.com/rclone/rclone/fs/sync" "github.com/rclone/rclone/fs/sync"
"golang.org/x/text/unicode/norm"
) )
func (b *bisyncRun) fastCopy(ctx context.Context, fsrc, fdst fs.Fs, files bilib.Names, queueName string) error { // Results represents a pair of synced files, as reported by the LoggerFn
// Bisync uses this to determine what happened during the sync, and modify the listings accordingly
type Results struct {
Src string
Dst string
Name string
Size int64
Modtime time.Time
Hash string
Flags string
Sigil operations.Sigil
Err error
Winner operations.Winner
IsWinner bool
IsSrc bool
IsDst bool
}
var logger = operations.NewLoggerOpt()
var lock mutex.Mutex
var ignoreListingChecksum bool
// FsPathIfAny handles type assertions and returns a formatted bilib.FsPath if valid, otherwise ""
func FsPathIfAny(x fs.DirEntry) string {
obj, ok := x.(fs.Object)
if x != nil && ok {
return bilib.FsPath(obj.Fs())
}
return ""
}
func resultName(result Results, side, src, dst fs.DirEntry) string {
if side != nil {
return side.Remote()
} else if result.IsSrc && dst != nil {
return dst.Remote()
} else if src != nil {
return src.Remote()
}
return ""
}
// WriteResults is Bisync's LoggerFn
func WriteResults(ctx context.Context, sigil operations.Sigil, src, dst fs.DirEntry, err error) {
lock.Lock()
defer lock.Unlock()
opt := operations.GetLoggerOpt(ctx)
result := Results{
Sigil: sigil,
Src: FsPathIfAny(src),
Dst: FsPathIfAny(dst),
Err: err,
}
result.Winner = operations.WinningSide(ctx, sigil, src, dst, err)
fss := []fs.DirEntry{src, dst}
for i, side := range fss {
result.Name = resultName(result, side, src, dst)
result.IsSrc = i == 0
result.IsDst = i == 1
result.Flags = "-"
if side != nil {
result.Size = side.Size()
result.Modtime = side.ModTime(ctx).In(time.UTC)
if !ignoreListingChecksum {
sideObj, ok := side.(fs.ObjectInfo)
if ok {
result.Hash, _ = sideObj.Hash(ctx, sideObj.Fs().Hashes().GetOne())
}
}
}
result.IsWinner = result.Winner.Obj == side
// used during resync only
if err == fs.ErrorIsDir {
if src != nil {
result.Src = src.Remote()
result.Name = src.Remote()
} else {
result.Dst = dst.Remote()
result.Name = dst.Remote()
}
result.Flags = "d"
result.Size = 0
}
fs.Debugf(nil, "writing result: %v", result)
err := json.NewEncoder(opt.JSON).Encode(result)
if err != nil {
fs.Errorf(result, "Error encoding JSON: %v", err)
}
}
}
// ReadResults decodes the JSON data from WriteResults
func ReadResults(results io.Reader) []Results {
dec := json.NewDecoder(results)
var slice []Results
for {
var r Results
if err := dec.Decode(&r); err == io.EOF {
break
}
fs.Debugf(nil, "result: %v", r)
slice = append(slice, r)
}
return slice
}
func (b *bisyncRun) fastCopy(ctx context.Context, fsrc, fdst fs.Fs, files bilib.Names, queueName string) ([]Results, error) {
if err := b.saveQueue(files, queueName); err != nil { if err := b.saveQueue(files, queueName); err != nil {
return err return nil, err
} }
ctxCopy, filterCopy := filter.AddConfig(b.opt.setDryRun(ctx)) ctxCopy, filterCopy := filter.AddConfig(b.opt.setDryRun(ctx))
for _, file := range files.ToList() { for _, file := range files.ToList() {
if err := filterCopy.AddFile(file); err != nil { if err := filterCopy.AddFile(file); err != nil {
return err return nil, err
}
// macOS
if err := filterCopy.AddFile(norm.NFD.String(file)); err != nil {
return nil, err
} }
} }
ignoreListingChecksum = b.opt.IgnoreListingChecksum
logger.LoggerFn = WriteResults
ctxCopyLogger := operations.WithSyncLogger(ctxCopy, logger)
var err error var err error
if b.opt.Resync { if b.opt.Resync {
err = sync.CopyDir(ctxCopy, fdst, fsrc, b.opt.CreateEmptySrcDirs) err = sync.CopyDir(ctxCopyLogger, fdst, fsrc, b.opt.CreateEmptySrcDirs)
} else { } else {
err = sync.Sync(ctxCopy, fdst, fsrc, b.opt.CreateEmptySrcDirs) err = sync.Sync(ctxCopyLogger, fdst, fsrc, b.opt.CreateEmptySrcDirs)
} }
return err fs.Debugf(nil, "logger is: %v", logger)
getResults := ReadResults(logger.JSON)
fs.Debugf(nil, "Got %v results for %v", len(getResults), queueName)
lineFormat := "%s %8d %s %s %s %q\n"
for _, result := range getResults {
fs.Debugf(nil, lineFormat, result.Flags, result.Size, result.Hash, "", result.Modtime, result.Name)
}
return getResults, err
}
func (b *bisyncRun) resyncDir(ctx context.Context, fsrc, fdst fs.Fs) ([]Results, error) {
ignoreListingChecksum = b.opt.IgnoreListingChecksum
logger.LoggerFn = WriteResults
ctxCopyLogger := operations.WithSyncLogger(ctx, logger)
err := sync.CopyDir(ctxCopyLogger, fdst, fsrc, b.opt.CreateEmptySrcDirs)
fs.Debugf(nil, "logger is: %v", logger)
getResults := ReadResults(logger.JSON)
fs.Debugf(nil, "Got %v results for %v", len(getResults), "resync")
return getResults, err
} }
// operation should be "make" or "remove" // operation should be "make" or "remove"
func (b *bisyncRun) syncEmptyDirs(ctx context.Context, dst fs.Fs, candidates bilib.Names, dirsList *fileList, operation string) { func (b *bisyncRun) syncEmptyDirs(ctx context.Context, dst fs.Fs, candidates bilib.Names, dirsList *fileList, results *[]Results, operation string) {
if b.opt.CreateEmptySrcDirs && (!b.opt.Resync || operation == "make") { if b.opt.CreateEmptySrcDirs && (!b.opt.Resync || operation == "make") {
candidatesList := candidates.ToList() candidatesList := candidates.ToList()
@ -46,18 +194,50 @@ func (b *bisyncRun) syncEmptyDirs(ctx context.Context, dst fs.Fs, candidates bil
for _, s := range candidatesList { for _, s := range candidatesList {
var direrr error var direrr error
if dirsList.has(s) { //make sure it's a dir, not a file if dirsList.has(s) { //make sure it's a dir, not a file
r := Results{}
r.Name = s
r.Size = 0
r.Modtime = dirsList.getTime(s).In(time.UTC)
r.Flags = "d"
r.Err = nil
r.Winner = operations.Winner{ // note: Obj not set
Side: "src",
Err: nil,
}
rSrc := r
rDst := r
rSrc.IsSrc = true
rSrc.IsDst = false
rDst.IsSrc = false
rDst.IsDst = true
rSrc.IsWinner = true
rDst.IsWinner = false
if operation == "remove" { if operation == "remove" {
// directories made empty by the sync will have already been deleted during the sync // directories made empty by the sync will have already been deleted during the sync
// this just catches the already-empty ones (excluded from sync by --files-from filter) // this just catches the already-empty ones (excluded from sync by --files-from filter)
direrr = operations.TryRmdir(ctx, dst, s) direrr = operations.TryRmdir(ctx, dst, s)
rSrc.Sigil = operations.MissingOnSrc
rDst.Sigil = operations.MissingOnSrc
rSrc.Dst = s
rDst.Dst = s
rSrc.Winner.Side = "none"
rDst.Winner.Side = "none"
} else if operation == "make" { } else if operation == "make" {
direrr = operations.Mkdir(ctx, dst, s) direrr = operations.Mkdir(ctx, dst, s)
rSrc.Sigil = operations.MissingOnDst
rDst.Sigil = operations.MissingOnDst
rSrc.Src = s
rDst.Src = s
} else { } else {
direrr = fmt.Errorf("invalid operation. Expected 'make' or 'remove', received '%q'", operation) direrr = fmt.Errorf("invalid operation. Expected 'make' or 'remove', received '%q'", operation)
} }
if direrr != nil { if direrr != nil {
fs.Debugf(nil, "Error syncing directory: %v", direrr) fs.Debugf(nil, "Error syncing directory: %v", direrr)
} else {
*results = append(*results, rSrc, rDst)
} }
} }
} }

View File

@ -0,0 +1 @@
"subdir/file20.txt"

View File

@ -0,0 +1,9 @@
# bisync listing v1 from test
- 109 - - 2000-01-01T00:00:00.000000000+0000 "RCLONE_TEST"
- 0 - - 2000-01-01T00:00:00.000000000+0000 "file1.copy1.txt"
- 0 - - 2000-01-01T00:00:00.000000000+0000 "file1.copy2.txt"
- 0 - - 2000-01-01T00:00:00.000000000+0000 "file1.copy3.txt"
- 0 - - 2000-01-01T00:00:00.000000000+0000 "file1.copy4.txt"
- 0 - - 2000-01-01T00:00:00.000000000+0000 "file1.copy5.txt"
- 19 - - 2001-01-02T00:00:00.000000000+0000 "file1.txt"
- 19 - - 2001-01-02T00:00:00.000000000+0000 "subdir/file20.txt"

View File

@ -0,0 +1,9 @@
# bisync listing v1 from test
- 109 - - 2000-01-01T00:00:00.000000000+0000 "RCLONE_TEST"
- 0 - - 2000-01-01T00:00:00.000000000+0000 "file1.copy1.txt"
- 0 - - 2000-01-01T00:00:00.000000000+0000 "file1.copy2.txt"
- 0 - - 2000-01-01T00:00:00.000000000+0000 "file1.copy3.txt"
- 0 - - 2000-01-01T00:00:00.000000000+0000 "file1.copy4.txt"
- 0 - - 2000-01-01T00:00:00.000000000+0000 "file1.copy5.txt"
- 0 - - 2000-01-01T00:00:00.000000000+0000 "file1.txt"
- 19 - - 2001-01-02T00:00:00.000000000+0000 "subdir/file20.txt"

View File

@ -0,0 +1,9 @@
# bisync listing v1 from test
- 109 - - 2000-01-01T00:00:00.000000000+0000 "RCLONE_TEST"
- 0 - - 2000-01-01T00:00:00.000000000+0000 "file1.copy1.txt"
- 0 - - 2000-01-01T00:00:00.000000000+0000 "file1.copy2.txt"
- 0 - - 2000-01-01T00:00:00.000000000+0000 "file1.copy3.txt"
- 0 - - 2000-01-01T00:00:00.000000000+0000 "file1.copy4.txt"
- 0 - - 2000-01-01T00:00:00.000000000+0000 "file1.copy5.txt"
- 19 - - 2001-01-02T00:00:00.000000000+0000 "file1.txt"
- 19 - - 2001-01-02T00:00:00.000000000+0000 "subdir/file20.txt"

View File

@ -0,0 +1,9 @@
# bisync listing v1 from test
- 109 - - 2000-01-01T00:00:00.000000000+0000 "RCLONE_TEST"
- 0 - - 2000-01-01T00:00:00.000000000+0000 "file1.copy1.txt"
- 0 - - 2000-01-01T00:00:00.000000000+0000 "file1.copy2.txt"
- 0 - - 2000-01-01T00:00:00.000000000+0000 "file1.copy3.txt"
- 0 - - 2000-01-01T00:00:00.000000000+0000 "file1.copy4.txt"
- 0 - - 2000-01-01T00:00:00.000000000+0000 "file1.copy5.txt"
- 19 - - 2001-01-02T00:00:00.000000000+0000 "file1.txt"
- 0 - - 2000-01-01T00:00:00.000000000+0000 "subdir/file20.txt"

View File

@ -0,0 +1,39 @@
(01) : test basic
(02) : test initial bisync
(03) : bisync resync
INFO : Synching Path1 "{path1/}" with Path2 "{path2/}"
INFO : Copying unique Path2 files to Path1
INFO : Resynching Path1 to Path2
INFO : Resync updating listings
INFO : Bisync successful
(04) : bisync resync ignore-listing-checksum
INFO : Synching Path1 "{path1/}" with Path2 "{path2/}"
INFO : Copying unique Path2 files to Path1
INFO : Resynching Path1 to Path2
INFO : Resync updating listings
INFO : Bisync successful
(05) : test place newer files on both paths
(06) : touch-copy 2001-01-02 {datadir/}file1.txt {path2/}
(07) : copy-as {datadir/}file1.txt {path1/}subdir file20.txt
(08) : test bisync run
(09) : bisync ignore-listing-checksum
INFO : Synching Path1 "{path1/}" with Path2 "{path2/}"
INFO : Path1 checking for diffs
INFO : - Path1 File is newer - subdir/file20.txt
INFO : Path1: 1 changes: 0 new, 1 newer, 0 older, 0 deleted
INFO : Path2 checking for diffs
INFO : - Path2 File is newer - file1.txt
INFO : Path2: 1 changes: 0 new, 1 newer, 0 older, 0 deleted
INFO : Applying changes
INFO : - Path1 Queue copy to Path2 - {path2/}subdir/file20.txt
INFO : - Path2 Queue copy to Path1 - {path1/}file1.txt
INFO : - Path2 Do queued copies to - Path1
INFO : - Path1 Do queued copies to - Path2
INFO : Updating listings
INFO : Validating listings for Path1 "{path1/}" vs Path2 "{path2/}"
INFO : Bisync successful

View File

@ -0,0 +1 @@
This file is used for testing the health of rclone accesses to the local/remote file system. Do not delete.

View File

@ -0,0 +1 @@
This file is newer

View File

@ -0,0 +1,14 @@
test basic
# Simple test case for development
test initial bisync
bisync resync
bisync resync ignore-listing-checksum
test place newer files on both paths
# force specific modification time since file time is lost through git
touch-copy 2001-01-02 {datadir/}file1.txt {path2/}
copy-as {datadir/}file1.txt {path1/}subdir file20.txt
test bisync run
bisync ignore-listing-checksum

View File

@ -277,7 +277,39 @@ sync run times for very large numbers of files.
The check may be run manually with `--check-sync=only`. It runs only the The check may be run manually with `--check-sync=only`. It runs only the
integrity check and terminates without actually synching. integrity check and terminates without actually synching.
See also: [Concurrent modifications](#concurrent-modifications) Note that currently, `--check-sync` **only checks filenames and NOT modtime, size, or hash.**
For a more robust integrity check of the current state, consider using [`check`](commands/rclone_check/)
(or [`cryptcheck`](/commands/rclone_cryptcheck/), if at least one path is a `crypt` remote.)
For example, a possible sequence could look like this:
1. Normally scheduled bisync run:
```
rclone bisync Path1 Path2 -MPc --check-access --max-delete 10 --filters-file /path/to/filters.txt -v --no-cleanup --ignore-listing-checksum --disable ListR --checkers=16 --drive-pacer-min-sleep=10ms --create-empty-src-dirs --resilient
```
2. Periodic independent integrity check (perhaps scheduled nightly or weekly):
```
rclone check -MvPc Path1 Path2 --filter-from /path/to/filters.txt
```
3. If diffs are found, you have some choices to correct them.
If one side is more up-to-date and you want to make the other side match it, you could run:
```
rclone sync Path1 Path2 --filter-from /path/to/filters.txt --create-empty-src-dirs -MPc -v
```
(or switch Path1 and Path2 to make Path2 the source-of-truth)
Or, if neither side is totally up-to-date, you could run a `--resync` to bring them back into agreement
(but remember that this could cause deleted files to re-appear.)
*Note also that `rclone check` does not currently include empty directories,
so if you want to know if any empty directories are out of sync,
consider alternatively running the above `rclone sync` command with `--dry-run` added.
See also: [Concurrent modifications](#concurrent-modifications), [`--resilient`](#resilient)
#### --ignore-listing-checksum #### --ignore-listing-checksum
@ -299,13 +331,6 @@ if there ARE diffs.
* Unless `--ignore-listing-checksum` is passed, bisync currently computes hashes for one path * Unless `--ignore-listing-checksum` is passed, bisync currently computes hashes for one path
*even when there's no common hash with the other path* *even when there's no common hash with the other path*
(for example, a [crypt](/crypt/#modification-times-and-hashes) remote.) (for example, a [crypt](/crypt/#modification-times-and-hashes) remote.)
* If both paths support checksums and have a common hash,
AND `--ignore-listing-checksum` was not specified when creating the listings,
`--check-sync=only` can be used to compare Path1 vs. Path2 checksums (as of the time the previous listings were created.)
However, `--check-sync=only` will NOT include checksums if the previous listings
were generated on a run using `--ignore-listing-checksum`. For a more robust integrity check of the current state,
consider using [`check`](commands/rclone_check/)
(or [`cryptcheck`](/commands/rclone_cryptcheck/), if at least one path is a `crypt` remote.)
#### --resilient #### --resilient
@ -495,44 +520,18 @@ original path on the next sync, resulting in data loss.
It is therefore recommended to _omit_ `--inplace`. It is therefore recommended to _omit_ `--inplace`.
Files that **change during** a bisync run may result in data loss. Files that **change during** a bisync run may result in data loss.
This has been seen in a highly dynamic environment, where the filesystem Prior to `rclone v1.65`, this was commonly seen in highly dynamic environments, where the filesystem
is getting hammered by running processes during the sync. was getting hammered by running processes during the sync.
The currently recommended solution is to sync at quiet times or [filter out](#filtering) As of `rclone v1.65`, bisync was redesigned to use a "snapshot" model,
unnecessary directories and files. greatly reducing the risks from changes during a sync.
Changes that are not detected during the current sync will now be detected during the following sync,
As an [alternative approach](https://forum.rclone.org/t/bisync-bugs-and-feature-requests/37636#:~:text=scans%2C%20to%20avoid-,errors%20if%20files%20changed%20during%20sync,-Given%20the%20number), and will no longer cause the entire run to throw a critical error.
consider using `--check-sync=false` (and possibly `--resilient`) to make bisync more forgiving There is additionally a mechanism to mark files as needing to be internally rechecked next time, for added safety.
of filesystems that change during the sync. It should therefore no longer be necessary to sync only at quiet times --
Be advised that this may cause bisync to miss events that occur during a bisync run, however, note that an error can still occur if a file happens to change at the exact moment it's
so it is a good idea to supplement this with a periodic independent integrity check, being read/written by bisync (same as would happen in `rclone sync`.)
and corrective sync if diffs are found. For example, a possible sequence could look like this: (See also: [`--ignore-checksum`](https://rclone.org/docs/#ignore-checksum),
[`--local-no-check-updated`](https://rclone.org/local/#local-no-check-updated))
1. Normally scheduled bisync run:
```
rclone bisync Path1 Path2 -MPc --check-access --max-delete 10 --filters-file /path/to/filters.txt -v --check-sync=false --no-cleanup --ignore-listing-checksum --disable ListR --checkers=16 --drive-pacer-min-sleep=10ms --create-empty-src-dirs --resilient
```
2. Periodic independent integrity check (perhaps scheduled nightly or weekly):
```
rclone check -MvPc Path1 Path2 --filter-from /path/to/filters.txt
```
3. If diffs are found, you have some choices to correct them.
If one side is more up-to-date and you want to make the other side match it, you could run:
```
rclone sync Path1 Path2 --filter-from /path/to/filters.txt --create-empty-src-dirs -MPc -v
```
(or switch Path1 and Path2 to make Path2 the source-of-truth)
Or, if neither side is totally up-to-date, you could run a `--resync` to bring them back into agreement
(but remember that this could cause deleted files to re-appear.)
*Note also that `rclone check` does not currently include empty directories,
so if you want to know if any empty directories are out of sync,
consider alternatively running the above `rclone sync` command with `--dry-run` added.
### Empty directories ### Empty directories
@ -1273,6 +1272,8 @@ about _Unison_ and synchronization in general.
* Copies and deletes are now handled in one operation instead of two * Copies and deletes are now handled in one operation instead of two
* `--track-renames` and `--backup-dir` are now supported * `--track-renames` and `--backup-dir` are now supported
* Partial uploads known issue on `local`/`ftp`/`sftp` has been resolved (unless using `--inplace`) * Partial uploads known issue on `local`/`ftp`/`sftp` has been resolved (unless using `--inplace`)
* Final listings are now generated from sync results, to avoid needing to re-list
* Bisync is now much more resilient to changes that happen during a bisync run, and far less prone to critical errors / undetected changes
### `v1.64` ### `v1.64`
* Fixed an [issue](https://forum.rclone.org/t/bisync-bugs-and-feature-requests/37636#:~:text=1.%20Dry%20runs%20are%20not%20completely%20dry) * Fixed an [issue](https://forum.rclone.org/t/bisync-bugs-and-feature-requests/37636#:~:text=1.%20Dry%20runs%20are%20not%20completely%20dry)