rclone/cmd/bisync/deltas.go

// Package bisync implements bisync
// Copyright (c) 2017-2020 Chris Nelson
package bisync

import (
	"bytes"
	"context"
	"fmt"
	"path/filepath"
	"sort"
	"strings"

	"github.com/rclone/rclone/cmd/bisync/bilib"
	"github.com/rclone/rclone/cmd/check"
	"github.com/rclone/rclone/fs"
	"github.com/rclone/rclone/fs/accounting"
	"github.com/rclone/rclone/fs/filter"
	"github.com/rclone/rclone/fs/operations"
	"golang.org/x/text/unicode/norm"
)

// delta
type delta uint8

const (
	deltaZero delta = 0
	deltaNew  delta = 1 << iota
	deltaNewer
	deltaOlder
	deltaSize
	deltaHash
	deltaDeleted
)

const (
	deltaModified delta = deltaNewer | deltaOlder | deltaSize | deltaHash | deltaDeleted
	deltaOther    delta = deltaNew | deltaNewer | deltaOlder
)

func (d delta) is(cond delta) bool {
	return d&cond != 0
}

// deltaSet
type deltaSet struct {
	deltas     map[string]delta
	opt        *Options
	fs         fs.Fs  // base filesystem
	msg        string // filesystem name for logging
	oldCount   int    // original number of files (for "excess deletes" check)
	deleted    int    // number of deleted files (for "excess deletes" check)
	foundSame  bool   // true if found at least one unchanged file
	checkFiles bilib.Names
}

func (ds *deltaSet) empty() bool {
	return len(ds.deltas) == 0
}

func (ds *deltaSet) sort() (sorted []string) {
	if ds.empty() {
		return
	}
	sorted = make([]string, 0, len(ds.deltas))
	for file := range ds.deltas {
		sorted = append(sorted, file)
	}
	sort.Strings(sorted)
	return
}

func (ds *deltaSet) printStats() {
	if ds.empty() {
		return
	}
	nAll := len(ds.deltas)
	nNew := 0
	nNewer := 0
	nOlder := 0
	nDeleted := 0
	for _, d := range ds.deltas {
		if d.is(deltaNew) {
			nNew++
		}
		if d.is(deltaNewer) {
			nNewer++
		}
		if d.is(deltaOlder) {
			nOlder++
		}
		if d.is(deltaDeleted) {
			nDeleted++
		}
	}
	fs.Infof(nil, "%s: %4d changes: %4d new, %4d newer, %4d older, %4d deleted",
		ds.msg, nAll, nNew, nNewer, nOlder, nDeleted)
}

// check potential conflicts (to avoid renaming if already identical)
func (b *bisyncRun) checkconflicts(ctxCheck context.Context, filterCheck *filter.Filter, fs1, fs2 fs.Fs) (bilib.Names, error) {
	matches := bilib.Names{}
	if filterCheck.HaveFilesFrom() {
		fs.Debugf(nil, "There are potential conflicts to check.")

		opt, close, checkopterr := check.GetCheckOpt(b.fs1, b.fs2)
		if checkopterr != nil {
			b.critical = true
			b.retryable = true
			fs.Debugf(nil, "GetCheckOpt error: %v", checkopterr)
			return matches, checkopterr
		}
		defer close()

		opt.Match = new(bytes.Buffer)

		// TODO: consider using custom CheckFn to act like cryptcheck, if either fs is a crypt remote and -c has been passed
		// note that cryptCheck() is not currently exported

		fs.Infof(nil, "Checking potential conflicts...")
		check := operations.Check(ctxCheck, opt)
		fs.Infof(nil, "Finished checking the potential conflicts. %s", check)

		//reset error count, because we don't want to count check errors as bisync errors
		accounting.Stats(ctxCheck).ResetErrors()

		//return the list of identical files to check against later
		if len(fmt.Sprint(opt.Match)) > 0 {
			matches = bilib.ToNames(strings.Split(fmt.Sprint(opt.Match), "\n"))
		}
		if matches.NotEmpty() {
			fs.Debugf(nil, "The following potential conflicts were determined to be identical. %v", matches)
		} else {
			fs.Debugf(nil, "None of the conflicts were determined to be identical.")
		}

	}
	return matches, nil
}

// findDeltas
func (b *bisyncRun) findDeltas(fctx context.Context, f fs.Fs, oldListing string, now *fileList, msg string) (ds *deltaSet, err error) {
	var old *fileList
	newListing := oldListing + "-new"

	old, err = b.loadListing(oldListing)
	if err != nil {
		fs.Errorf(nil, "Failed loading prior %s listing: %s", msg, oldListing)
		b.abort = true
		return
	}
	if err = b.checkListing(old, oldListing, "prior "+msg); err != nil {
		return
	}

	if err == nil {
		err = b.checkListing(now, newListing, "current "+msg)
	}
	if err != nil {
		return
	}

	ds = &deltaSet{
		deltas:     map[string]delta{},
		fs:         f,
		msg:        msg,
		oldCount:   len(old.list),
		opt:        b.opt,
		checkFiles: bilib.Names{},
	}

	for _, file := range old.list {
		d := deltaZero
		if !now.has(file) {
			b.indent(msg, file, "File was deleted")
			ds.deleted++
			d |= deltaDeleted
		} else {
			// skip dirs here, as we only care if they are new/deleted, not newer/older
			if !now.isDir(file) {
				if old.getTime(file) != now.getTime(file) {
					if old.beforeOther(now, file) {
						fs.Debugf(file, "(old: %v current: %v", old.getTime(file), now.getTime(file))
						b.indent(msg, file, "File is newer")
						d |= deltaNewer
					} else { // Current version is older than prior sync.
						fs.Debugf(file, "(old: %v current: %v", old.getTime(file), now.getTime(file))
						b.indent(msg, file, "File is OLDER")
						d |= deltaOlder
					}
				}
				// TODO Compare sizes and hashes
			}
		}

		if d.is(deltaModified) {
			ds.deltas[file] = d
		} else {
			// Once we've found at least one unchanged file,
			// we know that not everything has changed,
			// as with a DST time change
			ds.foundSame = true
		}
	}

	for _, file := range now.list {
		if !old.has(file) {
			b.indent(msg, file, "File is new")
			ds.deltas[file] = deltaNew
		}
	}

	if b.opt.CheckAccess {
		// checkFiles is a small structure compared with the `now`, so we
		// return it alone and let the full delta map be garbage collected.
		for _, file := range now.list {
			if filepath.Base(file) == b.opt.CheckFilename {
				ds.checkFiles.Add(file)
			}
		}
	}

	return
}

// applyDeltas
func (b *bisyncRun) applyDeltas(ctx context.Context, ds1, ds2 *deltaSet) (changes1, changes2 bool, results2to1, results1to2 []Results, queues queues, err error) {
	path1 := bilib.FsPath(b.fs1)
	path2 := bilib.FsPath(b.fs2)

	copy1to2 := bilib.Names{}
	copy2to1 := bilib.Names{}
	delete1 := bilib.Names{}
	delete2 := bilib.Names{}
	handled := bilib.Names{}
	renamed1 := bilib.Names{}
	renamed2 := bilib.Names{}
	renameSkipped := bilib.Names{}
	deletedonboth := bilib.Names{}
	skippedDirs1 := newFileList()
	skippedDirs2 := newFileList()

	ctxMove := b.opt.setDryRun(ctx)

	// update AliasMap for deleted files, as march does not know about them
	b.updateAliases(ctx, ds1, ds2)

	// efficient isDir check
	// we load the listing just once and store only the dirs
	dirs1, dirs1Err := b.listDirsOnly(1)
	if dirs1Err != nil {
		b.critical = true
		b.retryable = true
		fs.Debugf(nil, "Error generating dirsonly list for path1: %v", dirs1Err)
		return
	}

	dirs2, dirs2Err := b.listDirsOnly(2)
	if dirs2Err != nil {
		b.critical = true
		b.retryable = true
		fs.Debugf(nil, "Error generating dirsonly list for path2: %v", dirs2Err)
		return
	}

	// build a list of only the "deltaOther"s so we don't have to check more files than necessary
	// this is essentially the same as running rclone check with a --files-from filter, then exempting the --match results from being renamed
	// we therefore avoid having to list the same directory more than once.

	// we are intentionally overriding DryRun here because we need to perform the check, even during a dry run, or the results would be inaccurate.
	// check is a read-only operation by its nature, so it's already "dry" in that sense.
	ctxNew, ciCheck := fs.AddConfig(ctx)
	ciCheck.DryRun = false

	ctxCheck, filterCheck := filter.AddConfig(ctxNew)

	for _, file := range ds1.sort() {
		alias := b.aliases.Alias(file)
		d1 := ds1.deltas[file]
		if d1.is(deltaOther) {
			d2, in2 := ds2.deltas[file]
			if !in2 && file != alias {
				d2 = ds2.deltas[alias]
			}
			if d2.is(deltaOther) {
				checkit := func(filename string) {
					if err := filterCheck.AddFile(filename); err != nil {
						fs.Debugf(nil, "Non-critical error adding file to list of potential conflicts to check: %s", err)
					} else {
						fs.Debugf(nil, "Added file to list of potential conflicts to check: %s", filename)
					}
				}
				checkit(file)
				if file != alias {
					checkit(alias)
				}
			}
		}
	}

	//if there are potential conflicts to check, check them all here (outside the loop) in one fell swoop
	matches, err := b.checkconflicts(ctxCheck, filterCheck, b.fs1, b.fs2)

	for _, file := range ds1.sort() {
		alias := b.aliases.Alias(file)
		p1 := path1 + file
		p2 := path2 + alias
		d1 := ds1.deltas[file]

		if d1.is(deltaOther) {
			d2, in2 := ds2.deltas[file]
			// try looking under alternate name
			if !in2 && file != alias {
				d2, in2 = ds2.deltas[alias]
			}
			if !in2 {
				b.indent("Path1", p2, "Queue copy to Path2")
				copy1to2.Add(file)
			} else if d2.is(deltaDeleted) {
				b.indent("Path1", p2, "Queue copy to Path2")
				copy1to2.Add(file)
				handled.Add(file)
			} else if d2.is(deltaOther) {
				b.indent("!WARNING", file, "New or changed in both paths")

				//if files are identical, leave them alone instead of renaming
				if (dirs1.has(file) || dirs1.has(alias)) && (dirs2.has(file) || dirs2.has(alias)) {
					fs.Debugf(nil, "This is a directory, not a file. Skipping equality check and will not rename: %s", file)
					ls1.getPut(file, skippedDirs1)
					ls2.getPut(file, skippedDirs2)
				} else {
					equal := matches.Has(file)
					if !equal {
						equal = matches.Has(alias)
					}
					if equal {
						if ciCheck.FixCase && file != alias {
							// the content is equal but filename still needs to be FixCase'd, so copy1to2
							// the Path1 version is deemed "correct" in this scenario
							fs.Infof(alias, "Files are equal but will copy anyway to fix case to %s", file)
							copy1to2.Add(file)
						} else {
							fs.Infof(nil, "Files are equal! Skipping: %s", file)
							renameSkipped.Add(file)
							renameSkipped.Add(alias)
						}
					} else {
						fs.Debugf(nil, "Files are NOT equal: %s", file)
						b.indent("!Path1", p1+"..path1", "Renaming Path1 copy")
						if err = operations.MoveFile(ctxMove, b.fs1, b.fs1, file+"..path1", file); err != nil {
							err = fmt.Errorf("path1 rename failed for %s: %w", p1, err)
							b.critical = true
							return
						}
						if b.opt.DryRun {
							renameSkipped.Add(file)
						} else {
							renamed1.Add(file)
						}
						b.indent("!Path1", p2+"..path1", "Queue copy to Path2")
						copy1to2.Add(file + "..path1")

						b.indent("!Path2", p2+"..path2", "Renaming Path2 copy")
						if err = operations.MoveFile(ctxMove, b.fs2, b.fs2, alias+"..path2", alias); err != nil {
							err = fmt.Errorf("path2 rename failed for %s: %w", alias, err)
							return
						}
						if b.opt.DryRun {
							renameSkipped.Add(alias)
						} else {
							renamed2.Add(alias)
						}
						b.indent("!Path2", p1+"..path2", "Queue copy to Path1")
						copy2to1.Add(alias + "..path2")
					}
				}
				handled.Add(file)
			}
		} else {
			// Path1 deleted
			d2, in2 := ds2.deltas[file]
			// try looking under alternate name
			fs.Debugf(file, "alias: %s, in2: %v", alias, in2)
			if !in2 && file != alias {
				fs.Debugf(file, "looking for alias: %s", alias)
				d2, in2 = ds2.deltas[alias]
				if in2 {
					fs.Debugf(file, "detected alias: %s", alias)
				}
			}
			if !in2 {
				b.indent("Path2", p2, "Queue delete")
				delete2.Add(file)
				copy1to2.Add(file)
			} else if d2.is(deltaOther) {
				b.indent("Path2", p1, "Queue copy to Path1")
				copy2to1.Add(file)
				handled.Add(file)
			} else if d2.is(deltaDeleted) {
				handled.Add(file)
				deletedonboth.Add(file)
				deletedonboth.Add(alias)
			}
		}
	}

	for _, file := range ds2.sort() {
		alias := b.aliases.Alias(file)
		p1 := path1 + alias
		d2 := ds2.deltas[file]

		if handled.Has(file) || handled.Has(alias) {
			continue
		}
		if d2.is(deltaOther) {
			b.indent("Path2", p1, "Queue copy to Path1")
			copy2to1.Add(file)
		} else {
			// Deleted
			b.indent("Path1", p1, "Queue delete")
			delete1.Add(file)
			copy2to1.Add(file)
		}
	}

	// Do the batch operation
	if copy2to1.NotEmpty() {
		changes1 = true
		b.indent("Path2", "Path1", "Do queued copies to")
		results2to1, err = b.fastCopy(ctx, b.fs2, b.fs1, copy2to1, "copy2to1")
		if err != nil {
			return
		}

		//copy empty dirs from path2 to path1 (if --create-empty-src-dirs)
		b.syncEmptyDirs(ctx, b.fs1, copy2to1, dirs2, &results2to1, "make")
	}

	if copy1to2.NotEmpty() {
		changes2 = true
		b.indent("Path1", "Path2", "Do queued copies to")
		results1to2, err = b.fastCopy(ctx, b.fs1, b.fs2, copy1to2, "copy1to2")
		if err != nil {
			return
		}

		//copy empty dirs from path1 to path2 (if --create-empty-src-dirs)
		b.syncEmptyDirs(ctx, b.fs2, copy1to2, dirs1, &results1to2, "make")
	}

	if delete1.NotEmpty() {
		if err = b.saveQueue(delete1, "delete1"); err != nil {
			return
		}
		//propagate deletions of empty dirs from path2 to path1 (if --create-empty-src-dirs)
		b.syncEmptyDirs(ctx, b.fs1, delete1, dirs1, &results2to1, "remove")
	}

	if delete2.NotEmpty() {
		if err = b.saveQueue(delete2, "delete2"); err != nil {
			return
		}
		//propagate deletions of empty dirs from path1 to path2 (if --create-empty-src-dirs)
		b.syncEmptyDirs(ctx, b.fs2, delete2, dirs2, &results1to2, "remove")
	}

	queues.copy1to2 = copy1to2
	queues.copy2to1 = copy2to1
	queues.renamed1 = renamed1
	queues.renamed2 = renamed2
	queues.renameSkipped = renameSkipped
	queues.deletedonboth = deletedonboth
	queues.skippedDirs1 = skippedDirs1
	queues.skippedDirs2 = skippedDirs2

	return
}

// excessDeletes checks whether number of deletes is within allowed range
func (ds *deltaSet) excessDeletes() bool {
	maxDelete := ds.opt.MaxDelete
	maxRatio := float64(maxDelete) / 100.0
	curRatio := 0.0
	if ds.deleted > 0 && ds.oldCount > 0 {
		curRatio = float64(ds.deleted) / float64(ds.oldCount)
	}

	if curRatio <= maxRatio {
		return false
	}

	fs.Errorf("Safety abort",
		"too many deletes (>%d%%, %d of %d) on %s %s. Run with --force if desired.",
		maxDelete, ds.deleted, ds.oldCount, ds.msg, quotePath(bilib.FsPath(ds.fs)))
	return true
}

// normally we build the AliasMap from march results,
// however, march does not know about deleted files, so need to manually check them for aliases
func (b *bisyncRun) updateAliases(ctx context.Context, ds1, ds2 *deltaSet) {
	ci := fs.GetConfig(ctx)
	// skip if not needed
	if ci.NoUnicodeNormalization && !ci.IgnoreCaseSync && !b.fs1.Features().CaseInsensitive && !b.fs2.Features().CaseInsensitive {
		return
	}
	if ds1.deleted < 1 && ds2.deleted < 1 {
		return
	}

	fs.Debugf(nil, "Updating AliasMap")

	transform := func(s string) string {
		if !ci.NoUnicodeNormalization {
			s = norm.NFC.String(s)
		}
		// note: march only checks the dest, but we check both here
		if ci.IgnoreCaseSync || b.fs1.Features().CaseInsensitive || b.fs2.Features().CaseInsensitive {
			s = strings.ToLower(s)
		}
		return s
	}

	delMap1 := map[string]string{}  // [transformedname]originalname
	delMap2 := map[string]string{}  // [transformedname]originalname
	fullMap1 := map[string]string{} // [transformedname]originalname
	fullMap2 := map[string]string{} // [transformedname]originalname

	for _, name := range ls1.list {
		fullMap1[transform(name)] = name
	}
	for _, name := range ls2.list {
		fullMap2[transform(name)] = name
	}

	addDeletes := func(ds *deltaSet, delMap, fullMap map[string]string) {
		for _, file := range ds.sort() {
			d := ds.deltas[file]
			if d.is(deltaDeleted) {
				delMap[transform(file)] = file
				fullMap[transform(file)] = file
			}
		}
	}
	addDeletes(ds1, delMap1, fullMap1)
	addDeletes(ds2, delMap2, fullMap2)

	addAliases := func(delMap, fullMap map[string]string) {
		for transformedname, name := range delMap {
			matchedName, found := fullMap[transformedname]
			if found && name != matchedName {
				fs.Debugf(name, "adding alias %s", matchedName)
				b.aliases.Add(name, matchedName)
			}
		}
	}
	addAliases(delMap1, fullMap2)
	addAliases(delMap2, fullMap1)
}