rclone/cmd/bisync/lockfile.go

package bisync

import (
	"encoding/json"
	"fmt"
	"io"
	"os"
	"strconv"
	"sync"
	"time"

	"github.com/rclone/rclone/cmd/bisync/bilib"
	"github.com/rclone/rclone/fs"
	"github.com/rclone/rclone/lib/terminal"
)

const basicallyforever = 200 * 365 * 24 * time.Hour

var stopRenewal func()

var data = struct {
	Session     string
	PID         string
	TimeRenewed time.Time
	TimeExpires time.Time
}{}

func (b *bisyncRun) setLockFile() error {
	b.lockFile = ""
	b.setLockFileExpiration()
	if !b.opt.DryRun {
		b.lockFile = b.basePath + ".lck"
		if bilib.FileExists(b.lockFile) {
			if !b.lockFileIsExpired() {
				errTip := Color(terminal.MagentaFg, "Tip: this indicates that another bisync run (of these same paths) either is still running or was interrupted before completion. \n")
				errTip += Color(terminal.MagentaFg, "If you're SURE you want to override this safety feature, you can delete the lock file with the following command, then run bisync again: \n")
				errTip += fmt.Sprintf(Color(terminal.HiRedFg, "rclone deletefile \"%s\""), b.lockFile)
				return fmt.Errorf(Color(terminal.RedFg, "prior lock file found: %s \n")+errTip, Color(terminal.HiYellowFg, b.lockFile))
			}
		}

		pidStr := []byte(strconv.Itoa(os.Getpid()))
		if err = os.WriteFile(b.lockFile, pidStr, bilib.PermSecure); err != nil {
			return fmt.Errorf(Color(terminal.RedFg, "cannot create lock file: %s: %w"), b.lockFile, err)
		}
		fs.Debugf(nil, "Lock file created: %s", b.lockFile)
		b.renewLockFile()
		stopRenewal = b.startLockRenewal()
	}
	return nil
}

func (b *bisyncRun) removeLockFile() {
	if b.lockFile != "" {
		stopRenewal()
		errUnlock := os.Remove(b.lockFile)
		if errUnlock == nil {
			fs.Debugf(nil, "Lock file removed: %s", b.lockFile)
		} else if err == nil {
			err = errUnlock
		} else {
			fs.Errorf(nil, "cannot remove lockfile %s: %v", b.lockFile, errUnlock)
		}
		b.lockFile = "" // block removing it again
	}
}

func (b *bisyncRun) setLockFileExpiration() {
	if b.opt.MaxLock > 0 && b.opt.MaxLock < 2*time.Minute {
		fs.Logf(nil, Color(terminal.YellowFg, "--max-lock cannot be shorter than 2 minutes (unless 0.) Changing --max-lock from %v to %v"), b.opt.MaxLock, 2*time.Minute)
		b.opt.MaxLock = 2 * time.Minute
	} else if b.opt.MaxLock <= 0 {
		b.opt.MaxLock = basicallyforever
	}
}

func (b *bisyncRun) renewLockFile() {
	if b.lockFile != "" && bilib.FileExists(b.lockFile) {

		data.Session = b.basePath
		data.PID = strconv.Itoa(os.Getpid())
		data.TimeRenewed = time.Now()
		data.TimeExpires = time.Now().Add(b.opt.MaxLock)

		// save data file
		df, err := os.Create(b.lockFile)
		b.handleErr(b.lockFile, "error renewing lock file", err, true, true)
		b.handleErr(b.lockFile, "error encoding JSON to lock file", json.NewEncoder(df).Encode(data), true, true)
		b.handleErr(b.lockFile, "error closing lock file", df.Close(), true, true)
		if b.opt.MaxLock < basicallyforever {
			fs.Infof(nil, Color(terminal.HiBlueFg, "lock file renewed for %v. New expiration: %v"), b.opt.MaxLock, data.TimeExpires)
		}
	}
}

func (b *bisyncRun) lockFileIsExpired() bool {
	if b.lockFile != "" && bilib.FileExists(b.lockFile) {
		rdf, err := os.Open(b.lockFile)
		b.handleErr(b.lockFile, "error reading lock file", err, true, true)
		dec := json.NewDecoder(rdf)
		for {
			if err := dec.Decode(&data); err != nil {
				if err != io.EOF {
					fs.Errorf(b.lockFile, "err: %v", err)
				}
				break
			}
		}
		b.handleErr(b.lockFile, "error closing file", rdf.Close(), true, true)
		if !data.TimeExpires.IsZero() && data.TimeExpires.Before(time.Now()) {
			fs.Infof(b.lockFile, Color(terminal.GreenFg, "Lock file found, but it expired at %v. Will delete it and proceed."), data.TimeExpires)
			markFailed(b.listing1) // listing is untrusted so force revert to prior (if --recover) or create new ones (if --resync)
			markFailed(b.listing2)
			return true
		}
		fs.Infof(b.lockFile, Color(terminal.RedFg, "Valid lock file found. Expires at %v. (%v from now)"), data.TimeExpires, time.Since(data.TimeExpires).Abs().Round(time.Second))
		prettyprint(data, "Lockfile info", fs.LogLevelInfo)
	}
	return false
}

// StartLockRenewal renews the lockfile every --max-lock minus one minute.
//
// It returns a func which should be called to stop the renewal.
func (b *bisyncRun) startLockRenewal() func() {
	if b.opt.MaxLock <= 0 || b.opt.MaxLock >= basicallyforever || b.lockFile == "" {
		return func() {}
	}
	stopLockRenewal := make(chan struct{})
	var wg sync.WaitGroup
	wg.Add(1)
	go func() {
		defer wg.Done()
		ticker := time.NewTicker(b.opt.MaxLock - time.Minute)
		for {
			select {
			case <-ticker.C:
				b.renewLockFile()
			case <-stopLockRenewal:
				ticker.Stop()
				return
			}
		}
	}()
	return func() {
		close(stopLockRenewal)
		wg.Wait()
	}
}

func markFailed(file string) {
	failFile := file + "-err"
	if bilib.FileExists(file) {
		_ = os.Remove(failFile)
		_ = os.Rename(file, failFile)
	}
}
bisync: allow lock file expiration/renewal with --max-lock - #7470 Background: Bisync uses lock files as a safety feature to prevent interference from other bisync runs while it is running. Bisync normally removes these lock files at the end of a run, but if bisync is abruptly interrupted, these files will be left behind. By default, they will lock out all future runs, until the user has a chance to manually check things out and remove the lock. Before this change, lock files blocked future runs indefinitely, so a single interrupted run would lock out all future runs forever (absent user intervention), and there was no way to change this behavior. After this change, a new --max-lock flag can be used to make lock files automatically expire after a certain period of time, so that future runs are not locked out forever, and auto-recovery is possible. --max-lock can be any duration 2m or greater (or 0 to disable). If set, lock files older than this will be considered "expired", and future runs will be allowed to disregard them and proceed. (Note that the --max-lock duration must be set by the process that left the lock file -- not the later one interpreting it.) If set, bisync will also "renew" these lock files every --max-lock_minus_one_minute throughout a run, for extra safety. (For example, with --max-lock 5m, bisync would renew the lock file (for another 5 minutes) every 4 minutes until the run has completed.) In other words, it should not be possible for a lock file to pass its expiration time while the process that created it is still running -- and you can therefore be reasonably sure that any _expired_ lock file you may find was left there by an interrupted run, not one that is still running and just taking awhile. If --max-lock is 0 or not set, the default is that lock files will never expire, and will block future runs (of these same two bisync paths) indefinitely. For maximum resilience from disruptions, consider setting a relatively short duration like --max-lock 2m along with --resilient and --recover, and a relatively frequent cron schedule. The result will be a very robust "set-it-and-forget-it" bisync run that can automatically bounce back from almost any interruption it might encounter, without requiring the user to get involved and run a --resync. 2023-12-03 09:19:13 +01:00			`package bisync`

			`import (`
			`"encoding/json"`
			`"fmt"`
			`"io"`
			`"os"`
			`"strconv"`
			`"sync"`
			`"time"`

			`"github.com/rclone/rclone/cmd/bisync/bilib"`
			`"github.com/rclone/rclone/fs"`
			`"github.com/rclone/rclone/lib/terminal"`
			`)`

			`const basicallyforever = 200 * 365 * 24 * time.Hour`

			`var stopRenewal func()`

			`var data = struct {`
			`Session string`
			`PID string`
			`TimeRenewed time.Time`
			`TimeExpires time.Time`
			`}{}`

			`func (b *bisyncRun) setLockFile() error {`
			`b.lockFile = ""`
			`b.setLockFileExpiration()`
			`if !b.opt.DryRun {`
			`b.lockFile = b.basePath + ".lck"`
			`if bilib.FileExists(b.lockFile) {`
			`if !b.lockFileIsExpired() {`
			`errTip := Color(terminal.MagentaFg, "Tip: this indicates that another bisync run (of these same paths) either is still running or was interrupted before completion. \n")`
			`errTip += Color(terminal.MagentaFg, "If you're SURE you want to override this safety feature, you can delete the lock file with the following command, then run bisync again: \n")`
			`errTip += fmt.Sprintf(Color(terminal.HiRedFg, "rclone deletefile \"%s\""), b.lockFile)`
			`return fmt.Errorf(Color(terminal.RedFg, "prior lock file found: %s \n")+errTip, Color(terminal.HiYellowFg, b.lockFile))`
			`}`
			`}`

			`pidStr := []byte(strconv.Itoa(os.Getpid()))`
			`if err = os.WriteFile(b.lockFile, pidStr, bilib.PermSecure); err != nil {`
			`return fmt.Errorf(Color(terminal.RedFg, "cannot create lock file: %s: %w"), b.lockFile, err)`
			`}`
			`fs.Debugf(nil, "Lock file created: %s", b.lockFile)`
			`b.renewLockFile()`
			`stopRenewal = b.startLockRenewal()`
			`}`
			`return nil`
			`}`

			`func (b *bisyncRun) removeLockFile() {`
			`if b.lockFile != "" {`
			`stopRenewal()`
			`errUnlock := os.Remove(b.lockFile)`
			`if errUnlock == nil {`
			`fs.Debugf(nil, "Lock file removed: %s", b.lockFile)`
			`} else if err == nil {`
			`err = errUnlock`
			`} else {`
			`fs.Errorf(nil, "cannot remove lockfile %s: %v", b.lockFile, errUnlock)`
			`}`
			`b.lockFile = "" // block removing it again`
			`}`
			`}`

			`func (b *bisyncRun) setLockFileExpiration() {`
			`if b.opt.MaxLock > 0 && b.opt.MaxLock < 2*time.Minute {`
			`fs.Logf(nil, Color(terminal.YellowFg, "--max-lock cannot be shorter than 2 minutes (unless 0.) Changing --max-lock from %v to %v"), b.opt.MaxLock, 2*time.Minute)`
			`b.opt.MaxLock = 2 * time.Minute`
			`} else if b.opt.MaxLock <= 0 {`
			`b.opt.MaxLock = basicallyforever`
			`}`
			`}`

			`func (b *bisyncRun) renewLockFile() {`
			`if b.lockFile != "" && bilib.FileExists(b.lockFile) {`

			`data.Session = b.basePath`
			`data.PID = strconv.Itoa(os.Getpid())`
			`data.TimeRenewed = time.Now()`
			`data.TimeExpires = time.Now().Add(b.opt.MaxLock)`

			`// save data file`
			`df, err := os.Create(b.lockFile)`
			`b.handleErr(b.lockFile, "error renewing lock file", err, true, true)`
			`b.handleErr(b.lockFile, "error encoding JSON to lock file", json.NewEncoder(df).Encode(data), true, true)`
			`b.handleErr(b.lockFile, "error closing lock file", df.Close(), true, true)`
			`if b.opt.MaxLock < basicallyforever {`
			`fs.Infof(nil, Color(terminal.HiBlueFg, "lock file renewed for %v. New expiration: %v"), b.opt.MaxLock, data.TimeExpires)`
			`}`
			`}`
			`}`

			`func (b *bisyncRun) lockFileIsExpired() bool {`
			`if b.lockFile != "" && bilib.FileExists(b.lockFile) {`
			`rdf, err := os.Open(b.lockFile)`
			`b.handleErr(b.lockFile, "error reading lock file", err, true, true)`
			`dec := json.NewDecoder(rdf)`
			`for {`
bisync: fix endless loop if lockfile decoder errors Before this change, the decoder looked only for `io.EOF`, and if any other error was returned, it could cause an infinite loop. This change fixes the issue by breaking for any non-nil error. 2024-04-09 13:17:00 +02:00			`if err := dec.Decode(&data); err != nil {`
			`if err != io.EOF {`
			`fs.Errorf(b.lockFile, "err: %v", err)`
			`}`
bisync: allow lock file expiration/renewal with --max-lock - #7470 Background: Bisync uses lock files as a safety feature to prevent interference from other bisync runs while it is running. Bisync normally removes these lock files at the end of a run, but if bisync is abruptly interrupted, these files will be left behind. By default, they will lock out all future runs, until the user has a chance to manually check things out and remove the lock. Before this change, lock files blocked future runs indefinitely, so a single interrupted run would lock out all future runs forever (absent user intervention), and there was no way to change this behavior. After this change, a new --max-lock flag can be used to make lock files automatically expire after a certain period of time, so that future runs are not locked out forever, and auto-recovery is possible. --max-lock can be any duration 2m or greater (or 0 to disable). If set, lock files older than this will be considered "expired", and future runs will be allowed to disregard them and proceed. (Note that the --max-lock duration must be set by the process that left the lock file -- not the later one interpreting it.) If set, bisync will also "renew" these lock files every --max-lock_minus_one_minute throughout a run, for extra safety. (For example, with --max-lock 5m, bisync would renew the lock file (for another 5 minutes) every 4 minutes until the run has completed.) In other words, it should not be possible for a lock file to pass its expiration time while the process that created it is still running -- and you can therefore be reasonably sure that any _expired_ lock file you may find was left there by an interrupted run, not one that is still running and just taking awhile. If --max-lock is 0 or not set, the default is that lock files will never expire, and will block future runs (of these same two bisync paths) indefinitely. For maximum resilience from disruptions, consider setting a relatively short duration like --max-lock 2m along with --resilient and --recover, and a relatively frequent cron schedule. The result will be a very robust "set-it-and-forget-it" bisync run that can automatically bounce back from almost any interruption it might encounter, without requiring the user to get involved and run a --resync. 2023-12-03 09:19:13 +01:00			`break`
			`}`
			`}`
			`b.handleErr(b.lockFile, "error closing file", rdf.Close(), true, true)`
			`if !data.TimeExpires.IsZero() && data.TimeExpires.Before(time.Now()) {`
			`fs.Infof(b.lockFile, Color(terminal.GreenFg, "Lock file found, but it expired at %v. Will delete it and proceed."), data.TimeExpires)`
			`markFailed(b.listing1) // listing is untrusted so force revert to prior (if --recover) or create new ones (if --resync)`
			`markFailed(b.listing2)`
			`return true`
			`}`
			`fs.Infof(b.lockFile, Color(terminal.RedFg, "Valid lock file found. Expires at %v. (%v from now)"), data.TimeExpires, time.Since(data.TimeExpires).Abs().Round(time.Second))`
			`prettyprint(data, "Lockfile info", fs.LogLevelInfo)`
			`}`
			`return false`
			`}`

			`// StartLockRenewal renews the lockfile every --max-lock minus one minute.`
			`//`
			`// It returns a func which should be called to stop the renewal.`
			`func (b *bisyncRun) startLockRenewal() func() {`
			`if b.opt.MaxLock <= 0 \|\| b.opt.MaxLock >= basicallyforever \|\| b.lockFile == "" {`
			`return func() {}`
			`}`
			`stopLockRenewal := make(chan struct{})`
			`var wg sync.WaitGroup`
			`wg.Add(1)`
			`go func() {`
			`defer wg.Done()`
			`ticker := time.NewTicker(b.opt.MaxLock - time.Minute)`
			`for {`
			`select {`
			`case <-ticker.C:`
			`b.renewLockFile()`
			`case <-stopLockRenewal:`
			`ticker.Stop()`
			`return`
			`}`
			`}`
			`}()`
			`return func() {`
			`close(stopLockRenewal)`
			`wg.Wait()`
			`}`
			`}`

			`func markFailed(file string) {`
			`failFile := file + "-err"`
			`if bilib.FileExists(file) {`
			`_ = os.Remove(failFile)`
			`_ = os.Rename(file, failFile)`
			`}`
			`}`