bisync: allow lock file expiration/renewal with --max-lock - #7470

Background: Bisync uses lock files as a safety feature to prevent
interference from other bisync runs while it is running. Bisync normally
removes these lock files at the end of a run, but if bisync is abruptly
interrupted, these files will be left behind. By default, they will lock out
all future runs, until the user has a chance to manually check things out and
remove the lock.

Before this change, lock files blocked future runs indefinitely, so a single
interrupted run would lock out all future runs forever (absent user
intervention), and there was no way to change this behavior.

After this change, a new --max-lock flag can be used to make lock files
automatically expire after a certain period of time, so that future runs are
not locked out forever, and auto-recovery is possible. --max-lock can be any
duration 2m or greater (or 0 to disable). If set, lock files older than this
will be considered "expired", and future runs will be allowed to disregard them
and proceed. (Note that the --max-lock duration must be set by the process that
left the lock file -- not the later one interpreting it.)

If set, bisync will also "renew" these lock files every
--max-lock_minus_one_minute throughout a run, for extra safety. (For example,
with --max-lock 5m, bisync would renew the lock file (for another 5 minutes)
every 4 minutes until the run has completed.) In other words, it should not be
possible for a lock file to pass its expiration time while the process that
created it is still running -- and you can therefore be reasonably sure that
any _expired_ lock file you may find was left there by an interrupted run, not
one that is still running and just taking awhile.

If --max-lock is 0 or not set, the default is that lock files will never
expire, and will block future runs (of these same two bisync paths)
indefinitely.

For maximum resilience from disruptions, consider setting a relatively short
duration like --max-lock 2m along with --resilient and --recover, and a
relatively frequent cron schedule. The result will be a very robust
"set-it-and-forget-it" bisync run that can automatically bounce back from
almost any interruption it might encounter, without requiring the user to get
involved and run a --resync.
This commit is contained in:
nielash
2023-12-03 03:19:13 -05:00
parent 4025f42bd9
commit e9cd3e5986
4 changed files with 228 additions and 53 deletions

View File

@@ -9,7 +9,6 @@ import (
"fmt"
"os"
"path/filepath"
"strconv"
"strings"
gosync "sync"
"time"
@@ -48,6 +47,7 @@ type bisyncRun struct {
SyncCI *fs.ConfigInfo
CancelSync context.CancelFunc
DebugName string
lockFile string
}
type queues struct {
@@ -102,32 +102,14 @@ func Bisync(ctx context.Context, fs1, fs2 fs.Fs, optArg *Options) (err error) {
b.aliases = bilib.AliasMap{}
// Handle lock file
lockFile := ""
if !opt.DryRun {
lockFile = b.basePath + ".lck"
if bilib.FileExists(lockFile) {
errTip := Color(terminal.MagentaFg, "Tip: this indicates that another bisync run (of these same paths) either is still running or was interrupted before completion. \n")
errTip += Color(terminal.MagentaFg, "If you're SURE you want to override this safety feature, you can delete the lock file with the following command, then run bisync again: \n")
errTip += fmt.Sprintf(Color(terminal.HiRedFg, "rclone deletefile \"%s\""), lockFile)
return fmt.Errorf(Color(terminal.RedFg, "prior lock file found: %s \n")+errTip, Color(terminal.HiYellowFg, lockFile))
}
pidStr := []byte(strconv.Itoa(os.Getpid()))
if err = os.WriteFile(lockFile, pidStr, bilib.PermSecure); err != nil {
return fmt.Errorf("cannot create lock file: %s: %w", lockFile, err)
}
fs.Debugf(nil, "Lock file created: %s", lockFile)
err = b.setLockFile()
if err != nil {
return err
}
// Handle SIGINT
var finaliseOnce gosync.Once
markFailed := func(file string) {
failFile := file + "-err"
if bilib.FileExists(file) {
_ = os.Remove(failFile)
_ = os.Rename(file, failFile)
}
}
// waitFor runs fn() until it returns true or the timeout expires
waitFor := func(msg string, totalWait time.Duration, fn func() bool) (ok bool) {
const individualWait = 1 * time.Second
@@ -175,7 +157,7 @@ func Bisync(ctx context.Context, fs1, fs2 fs.Fs, optArg *Options) (err error) {
markFailed(b.listing1)
markFailed(b.listing2)
}
_ = os.Remove(lockFile)
b.removeLockFile()
}
})
}
@@ -185,16 +167,7 @@ func Bisync(ctx context.Context, fs1, fs2 fs.Fs, optArg *Options) (err error) {
// run bisync
err = b.runLocked(ctx)
if lockFile != "" {
errUnlock := os.Remove(lockFile)
if errUnlock == nil {
fs.Debugf(nil, "Lock file removed: %s", lockFile)
} else if err == nil {
err = errUnlock
} else {
fs.Errorf(nil, "cannot remove lockfile %s: %v", lockFile, errUnlock)
}
}
b.removeLockFile()
b.CleanupCompleted = true
if b.InGracefulShutdown {