2021-05-16 18:39:33 +02:00
// TestBisync is a test engine for bisync test cases.
// See https://rclone.org/bisync/#testing for documentation.
// Test cases are organized in subdirs beneath ./testdata
// Results are compared against golden listings and log file.
package bisync_test
import (
"bytes"
"context"
2021-11-04 11:12:57 +01:00
"errors"
2021-05-16 18:39:33 +02:00
"flag"
"fmt"
"log"
"os"
"path"
"path/filepath"
"regexp"
"runtime"
"sort"
"strconv"
"strings"
"testing"
"time"
2023-11-17 18:14:38 +01:00
"unicode/utf8"
2021-05-16 18:39:33 +02:00
"github.com/rclone/rclone/cmd/bisync"
"github.com/rclone/rclone/cmd/bisync/bilib"
"github.com/rclone/rclone/fs"
"github.com/rclone/rclone/fs/accounting"
"github.com/rclone/rclone/fs/cache"
"github.com/rclone/rclone/fs/filter"
"github.com/rclone/rclone/fs/fspath"
2023-11-17 18:14:38 +01:00
"github.com/rclone/rclone/fs/hash"
2021-05-16 18:39:33 +02:00
"github.com/rclone/rclone/fs/object"
"github.com/rclone/rclone/fs/operations"
"github.com/rclone/rclone/fs/sync"
"github.com/rclone/rclone/fstest"
"github.com/rclone/rclone/lib/atexit"
2023-11-17 18:14:38 +01:00
"github.com/rclone/rclone/lib/encoder"
2021-05-16 18:39:33 +02:00
"github.com/rclone/rclone/lib/random"
2023-11-06 12:59:41 +01:00
"github.com/rclone/rclone/lib/terminal"
2023-10-01 10:12:39 +02:00
"golang.org/x/text/unicode/norm"
2021-05-16 18:39:33 +02:00
"github.com/pmezard/go-difflib/difflib"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
_ "github.com/rclone/rclone/backend/all" // for integration tests
)
const (
touchDateFormat = "2006-01-02"
goldenCanonBase = "_testdir_"
logFileName = "test.log"
dropMe = "*** [DROP THIS LINE] ***"
eol = "\n"
slash = string ( os . PathSeparator )
fixSlash = ( runtime . GOOS == "windows" )
)
// logReplacements make modern test logs comparable with golden dir.
// It is a string slice of even length with this structure:
2022-08-05 17:35:41 +02:00
//
// {`matching regular expression`, "mangled result string", ...}
2021-05-16 18:39:33 +02:00
var logReplacements = [ ] string {
// skip syslog facility markers
` ^(<[1-9]>)(INFO |ERROR |NOTICE|DEBUG ):(.*)$ ` , "$2:$3" ,
// skip log prefixes
` ^\d+/\d\d/\d\d \d\d:\d\d:\d\d(?:\.\d { 6})? ` , "" ,
// ignore rclone info messages
` ^INFO : .*?: (Deleted|Copied |Moved |Updated ).*$ ` , dropMe ,
` ^NOTICE: .*?: Replacing invalid UTF-8 characters in "[^"]*"$ ` , dropMe ,
// ignore rclone debug messages
` ^DEBUG : .*$ ` , dropMe ,
// ignore dropbox info messages
` ^NOTICE: too_many_(requests|write_operations)/\.*: Too many requests or write operations.*$ ` , dropMe ,
` ^NOTICE: Dropbox root .*?: Forced to upload files to set modification times on this backend.$ ` , dropMe ,
` ^INFO : .*?: src and dst identical but can't set mod time without deleting and re-uploading$ ` , dropMe ,
2023-11-17 18:14:38 +01:00
// ignore crypt info messages
` ^INFO : .*?: Crypt detected! Using cryptcheck instead of check. \(Use --size-only or --ignore-checksum to disable\)$ ` , dropMe ,
// ignore drive info messages
` ^NOTICE:.*?Files of unknown size \(such as Google Docs\) do not sync reliably with --checksum or --size-only\. Consider using modtime instead \(the default\) or --drive-skip-gdocs.*?$ ` , dropMe ,
bisync: full support for comparing checksum, size, modtime - fixes #5679 fixes #5683 fixes #5684 fixes #5675
Before this change, bisync could only detect changes based on modtime, and
would refuse to run if either path lacked modtime support. This made bisync
unavailable for many of rclone's backends. Additionally, bisync did not account
for the Fs's precision when comparing modtimes, meaning that they could only be
reliably compared within the same side -- not against the opposite side. Size
and checksum (even when available) were ignored completely for deltas.
After this change, bisync now fully supports comparing based on any combination
of size, modtime, and checksum, lifting the prior restriction on backends
without modtime support. The comparison logic considers the backend's
precision, hash types, and other features as appropriate.
The comparison features optionally use a new --compare flag (which takes any
combination of size,modtime,checksum) and even supports some combinations not
otherwise supported in `sync` (like comparing all three at the same time.) By
default (without the --compare flag), bisync inherits the same comparison
options as `sync` (that is: size and modtime by default, unless modified with
flags such as --checksum or --size-only.) If the --compare flag is set, it will
override these defaults.
If --compare includes checksum and both remotes support checksums but have no
hash types in common with each other, checksums will be considered only for
comparisons within the same side (to determine what has changed since the prior
sync), but not for comparisons against the opposite side. If one side supports
checksums and the other does not, checksums will only be considered on the side
that supports them. When comparing with checksum and/or size without modtime,
bisync cannot determine whether a file is newer or older -- only whether it is
changed or unchanged. (If it is changed on both sides, bisync still does the
standard equality-check to avoid declaring a sync conflict unless it absolutely
has to.)
Also included are some new flags to customize the checksum comparison behavior
on backends where hashes are slow or unavailable. --no-slow-hash and
--slow-hash-sync-only allow selectively ignoring checksums on backends such as
local where they are slow. --download-hash allows computing them by downloading
when (and only when) they're otherwise not available. Of course, this option
probably won't be practical with large files, but may be a good option for
syncing small-but-important files with maximum accuracy (for example, a source
code repo on a crypt remote.) An additional advantage over methods like
cryptcheck is that the original file is not required for comparison (for
example, --download-hash can be used to bisync two different crypt remotes with
different passwords.)
Additionally, all of the above are now considered during the final --check-sync
for much-improved accuracy (before this change, it only compared filenames!)
Many other details are explained in the included docs.
2023-12-01 01:44:38 +01:00
// ignore differences in backend features
` ^.*?"HashType1":.*?$ ` , dropMe ,
` ^.*?"HashType2":.*?$ ` , dropMe ,
` ^.*?"SlowHashDetected":.*?$ ` , dropMe ,
` ^.*? for same-side diffs on .*?$ ` , dropMe ,
` ^.*?Downloading hashes.*?$ ` , dropMe ,
2021-05-16 18:39:33 +02:00
}
// Some dry-run messages differ depending on the particular remote.
var dryrunReplacements = [ ] string {
` ^(NOTICE: file5.txt: Skipped) (copy|update modification time) (as --dry-run is set [(]size \d+[)])$ ` ,
` $1 copy (or update modification time) $3 ` ,
}
// Some groups of log lines may appear unordered because rclone applies
// many operations in parallel to boost performance.
var logHoppers = [ ] string {
// Test case `dry-run` produced log mismatches due to non-deterministic
// order of captured dry-run info messages.
` NOTICE: \S+?: Skipped (?:copy|move|delete|copy \(or [^)]+\)|update modification time) as --dry-run is set \(size \d+\) ` ,
// Test case `extended-filenames` detected difference in order of files
// with extended unicode names between Windows and Unix or GDrive,
// but the order is in fact not important for success.
` (?:INFO |NOTICE): - Path[12] +File (?:was deleted|is new|is newer|is OLDER) +- .* ` ,
// Test case `check-access-filters` detected listing miscompares due
// to indeterminate order of rclone operations in presence of multiple
// subdirectories. The order inconsistency initially showed up in the
// listings and triggered reordering of log messages, but the actual
// files will in fact match.
2023-10-09 05:16:23 +02:00
` .* +.....Access test failed: Path[12] file not found in Path[12].* ` ,
2021-05-16 18:39:33 +02:00
// Test case `resync` suffered from the order of queued copies.
` (?:INFO |NOTICE): - Path2 Resync will copy to Path1 +- .* ` ,
2023-10-09 05:16:23 +02:00
// Test case `normalization` can have random order of fix-case files.
` (?:INFO |NOTICE): .*: Fixed case by renaming to: .* ` ,
bisync: add options to auto-resolve conflicts - fixes #7471
Before this change, when a file was new/changed on both paths (relative to the
prior sync), and the versions on each side were not identical, bisync would
keep both versions, renaming them with ..path1 and ..path2 suffixes,
respectively. Many users have requested more control over how bisync handles
such conflicts -- including an option to automatically select one version as
the "winner" and rename or delete the "loser". This change introduces support
for such options.
--conflict-resolve CHOICE
In bisync, a "conflict" is a file that is *new* or *changed* on *both sides*
(relative to the prior run) AND is *not currently identical* on both sides.
`--conflict-resolve` controls how bisync handles such a scenario. The currently
supported options are:
- `none` - (the default) - do not attempt to pick a winner, keep and rename
both files according to `--conflict-loser` and
`--conflict-suffix` settings. For example, with the default
settings, `file.txt` on Path1 is renamed `file.txt.conflict1` and `file.txt` on
Path2 is renamed `file.txt.conflict2`. Both are copied to the opposite path
during the run, so both sides end up with a copy of both files. (As `none` is
the default, it is not necessary to specify `--conflict-resolve none` -- you
can just omit the flag.)
- `newer` - the newer file (by `modtime`) is considered the winner and is
copied without renaming. The older file (the "loser") is handled according to
`--conflict-loser` and `--conflict-suffix` settings (either renamed or
deleted.) For example, if `file.txt` on Path1 is newer than `file.txt` on
Path2, the result on both sides (with other default settings) will be `file.txt`
(winner from Path1) and `file.txt.conflict1` (loser from Path2).
- `older` - same as `newer`, except the older file is considered the winner,
and the newer file is considered the loser.
- `larger` - the larger file (by `size`) is considered the winner (regardless
of `modtime`, if any).
- `smaller` - the smaller file (by `size`) is considered the winner (regardless
of `modtime`, if any).
- `path1` - the version from Path1 is unconditionally considered the winner
(regardless of `modtime` and `size`, if any). This can be useful if one side is
usually more trusted or up-to-date than the other.
- `path2` - same as `path1`, except the path2 version is considered the
winner.
For all of the above options, note the following:
- If either of the underlying remotes lacks support for the chosen method, it
will be ignored and fall back to `none`. (For example, if `--conflict-resolve
newer` is set, but one of the paths uses a remote that doesn't support
`modtime`.)
- If a winner can't be determined because the chosen method's attribute is
missing or equal, it will be ignored and fall back to `none`. (For example, if
`--conflict-resolve newer` is set, but the Path1 and Path2 modtimes are
identical, even if the sizes may differ.)
- If the file's content is currently identical on both sides, it is not
considered a "conflict", even if new or changed on both sides since the prior
sync. (For example, if you made a change on one side and then synced it to the
other side by other means.) Therefore, none of the conflict resolution flags
apply in this scenario.
- The conflict resolution flags do not apply during a `--resync`, as there is
no "prior run" to speak of (but see `--resync-mode` for similar
options.)
--conflict-loser CHOICE
`--conflict-loser` determines what happens to the "loser" of a sync conflict
(when `--conflict-resolve` determines a winner) or to both
files (when there is no winner.) The currently supported options are:
- `num` - (the default) - auto-number the conflicts by automatically appending
the next available number to the `--conflict-suffix`, in chronological order.
For example, with the default settings, the first conflict for `file.txt` will
be renamed `file.txt.conflict1`. If `file.txt.conflict1` already exists,
`file.txt.conflict2` will be used instead (etc., up to a maximum of
9223372036854775807 conflicts.)
- `pathname` - rename the conflicts according to which side they came from,
which was the default behavior prior to `v1.66`. For example, with
`--conflict-suffix path`, `file.txt` from Path1 will be renamed
`file.txt.path1`, and `file.txt` from Path2 will be renamed `file.txt.path2`.
If two non-identical suffixes are provided (ex. `--conflict-suffix
cloud,local`), the trailing digit is omitted. Importantly, note that with
`pathname`, there is no auto-numbering beyond `2`, so if `file.txt.path2`
somehow already exists, it will be overwritten. Using a dynamic date variable
in your `--conflict-suffix` (see below) is one possible way to avoid this. Note
also that conflicts-of-conflicts are possible, if the original conflict is not
manually resolved -- for example, if for some reason you edited
`file.txt.path1` on both sides, and those edits were different, the result
would be `file.txt.path1.path1` and `file.txt.path1.path2` (in addition to
`file.txt.path2`.)
- `delete` - keep the winner only and delete the loser, instead of renaming it.
If a winner cannot be determined (see `--conflict-resolve` for details on how
this could happen), `delete` is ignored and the default `num` is used instead
(i.e. both versions are kept and renamed, and neither is deleted.) `delete` is
inherently the most destructive option, so use it only with care.
For all of the above options, note that if a winner cannot be determined (see
`--conflict-resolve` for details on how this could happen), or if
`--conflict-resolve` is not in use, *both* files will be renamed.
--conflict-suffix STRING[,STRING]
`--conflict-suffix` controls the suffix that is appended when bisync renames a
`--conflict-loser` (default: `conflict`).
`--conflict-suffix` will accept either one string or two comma-separated
strings to assign different suffixes to Path1 vs. Path2. This may be helpful
later in identifying the source of the conflict. (For example,
`--conflict-suffix dropboxconflict,laptopconflict`)
With `--conflict-loser num`, a number is always appended to the suffix. With
`--conflict-loser pathname`, a number is appended only when one suffix is
specified (or when two identical suffixes are specified.) i.e. with
`--conflict-loser pathname`, all of the following would produce exactly the
same result:
```
--conflict-suffix path
--conflict-suffix path,path
--conflict-suffix path1,path2
```
Suffixes may be as short as 1 character. By default, the suffix is appended
after any other extensions (ex. `file.jpg.conflict1`), however, this can be
changed with the `--suffix-keep-extension` flag (i.e. to instead result in
`file.conflict1.jpg`).
`--conflict-suffix` supports several *dynamic date variables* when enclosed in
curly braces as globs. This can be helpful to track the date and/or time that
each conflict was handled by bisync. For example:
```
--conflict-suffix {DateOnly}-conflict
// result: myfile.txt.2006-01-02-conflict1
```
All of the formats described [here](https://pkg.go.dev/time#pkg-constants) and
[here](https://pkg.go.dev/time#example-Time.Format) are supported, but take
care to ensure that your chosen format does not use any characters that are
illegal on your remotes (for example, macOS does not allow colons in
filenames, and slashes are also best avoided as they are often interpreted as
directory separators.) To address this particular issue, an additional
`{MacFriendlyTime}` (or just `{mac}`) option is supported, which results in
`2006-01-02 0304PM`.
Note that `--conflict-suffix` is entirely separate from rclone's main `--sufix`
flag. This is intentional, as users may wish to use both flags simultaneously,
if also using `--backup-dir`.
Finally, note that the default in bisync prior to `v1.66` was to rename
conflicts with `..path1` and `..path2` (with two periods, and `path` instead of
`conflict`.) Bisync now defaults to a single dot instead of a double dot, but
additional dots can be added by including them in the specified suffix string.
For example, for behavior equivalent to the previous default, use:
```
[--conflict-resolve none] --conflict-loser pathname --conflict-suffix .path
```
2023-12-15 13:47:15 +01:00
// order of files re-checked prior to a conflict rename
` ERROR : .*: md5 differ.* ` ,
2021-05-16 18:39:33 +02:00
}
// Some log lines can contain Windows path separator that must be
// converted to "/" in every matching token to match golden logs.
var logLinesWithSlash = [ ] string {
2023-11-17 18:14:38 +01:00
` .*\(\d\d\) :.*(fix-names|touch-glob|touch-copy|copy-file|copy-as|copy-dir|delete-file) ` ,
bisync: full support for comparing checksum, size, modtime - fixes #5679 fixes #5683 fixes #5684 fixes #5675
Before this change, bisync could only detect changes based on modtime, and
would refuse to run if either path lacked modtime support. This made bisync
unavailable for many of rclone's backends. Additionally, bisync did not account
for the Fs's precision when comparing modtimes, meaning that they could only be
reliably compared within the same side -- not against the opposite side. Size
and checksum (even when available) were ignored completely for deltas.
After this change, bisync now fully supports comparing based on any combination
of size, modtime, and checksum, lifting the prior restriction on backends
without modtime support. The comparison logic considers the backend's
precision, hash types, and other features as appropriate.
The comparison features optionally use a new --compare flag (which takes any
combination of size,modtime,checksum) and even supports some combinations not
otherwise supported in `sync` (like comparing all three at the same time.) By
default (without the --compare flag), bisync inherits the same comparison
options as `sync` (that is: size and modtime by default, unless modified with
flags such as --checksum or --size-only.) If the --compare flag is set, it will
override these defaults.
If --compare includes checksum and both remotes support checksums but have no
hash types in common with each other, checksums will be considered only for
comparisons within the same side (to determine what has changed since the prior
sync), but not for comparisons against the opposite side. If one side supports
checksums and the other does not, checksums will only be considered on the side
that supports them. When comparing with checksum and/or size without modtime,
bisync cannot determine whether a file is newer or older -- only whether it is
changed or unchanged. (If it is changed on both sides, bisync still does the
standard equality-check to avoid declaring a sync conflict unless it absolutely
has to.)
Also included are some new flags to customize the checksum comparison behavior
on backends where hashes are slow or unavailable. --no-slow-hash and
--slow-hash-sync-only allow selectively ignoring checksums on backends such as
local where they are slow. --download-hash allows computing them by downloading
when (and only when) they're otherwise not available. Of course, this option
probably won't be practical with large files, but may be a good option for
syncing small-but-important files with maximum accuracy (for example, a source
code repo on a crypt remote.) An additional advantage over methods like
cryptcheck is that the original file is not required for comparison (for
example, --download-hash can be used to bisync two different crypt remotes with
different passwords.)
Additionally, all of the above are now considered during the final --check-sync
for much-improved accuracy (before this change, it only compared filenames!)
Many other details are explained in the included docs.
2023-12-01 01:44:38 +01:00
` INFO : - .*Path[12].* +.*Queue copy to.* Path[12].* ` ,
2021-05-16 18:39:33 +02:00
` INFO : Synching Path1 .*? with Path2 ` ,
` INFO : Validating listings for ` ,
}
var regexFixSlash = regexp . MustCompile ( "^(" + strings . Join ( logLinesWithSlash , "|" ) + ")" )
// Command line flags for bisync test
var (
argTestCase = flag . String ( "case" , "" , "Bisync test case to run" )
argRemote2 = flag . String ( "remote2" , "" , "Path2 for bisync tests" )
argNoCompare = flag . Bool ( "no-compare" , false , "Do not compare test results with golden" )
argNoCleanup = flag . Bool ( "no-cleanup" , false , "Keep test files" )
argGolden = flag . Bool ( "golden" , false , "Store results as golden" )
argDebug = flag . Bool ( "debug" , false , "Print debug messages" )
argStopAt = flag . Int ( "stop-at" , 0 , "Stop after given test step" )
// Flag -refresh-times helps with Dropbox tests failing with message
// "src and dst identical but can't set mod time without deleting and re-uploading"
argRefreshTimes = flag . Bool ( "refresh-times" , false , "Force refreshing the target modtime, useful for Dropbox (default: false)" )
)
// bisyncTest keeps all test data in a single place
type bisyncTest struct {
// per-test state
t * testing . T
step int
stopped bool
stepStr string
testCase string
sessionName string
// test dirs
testDir string
dataDir string
initDir string
goldenDir string
workDir string
fs1 fs . Fs
path1 string
canonPath1 string
fs2 fs . Fs
path2 string
canonPath2 string
// test log
logDir string
logPath string
logFile * os . File
// global state
dataRoot string
randName string
tempDir string
parent1 fs . Fs
parent2 fs . Fs
// global flags
argRemote1 string
argRemote2 string
noCompare bool
noCleanup bool
golden bool
debug bool
stopAt int
2023-10-06 22:38:47 +02:00
TestFn bisync . TestFunc
2021-05-16 18:39:33 +02:00
}
2023-11-06 12:59:41 +01:00
var color = bisync . Color
2021-05-16 18:39:33 +02:00
// TestBisync is a test engine for bisync test cases.
func TestBisync ( t * testing . T ) {
ctx := context . Background ( )
fstest . Initialise ( )
ci := fs . GetConfig ( ctx )
ciSave := * ci
defer func ( ) {
* ci = ciSave
} ( )
if * argRefreshTimes {
ci . RefreshTimes = true
}
2023-12-23 21:31:33 +01:00
bisync . Colors = true
bisync: add --resync-mode for customizing --resync - fixes #5681
Before this change, the path1 version of a file always prevailed during
--resync, and many users requested options to automatically select the winner
based on characteristics such as newer, older, larger, and smaller. This change
adds support for such options.
Note that ideally this feature would have been implemented by allowing the
existing `--resync` flag to optionally accept string values such as `--resync
newer`. However, this would have been a breaking change, as the existing flag
is a `bool` and it does not seem to be possible to have a `string` flag that
accepts both `--resync newer` and `--resync` (with no argument.) (`NoOptDefVal`
does not work for this, as it would force an `=` like `--resync=newer`.) So
instead, the best compromise to avoid a breaking change was to add a new
`--resync-mode CHOICE` flag that implies `--resync`, while maintaining the
existing behavior of `--resync` (which implies `--resync-mode path1`. i.e. both
flags are now valid, and either can be used without the other.
--resync-mode CHOICE
In the event that a file differs on both sides during a `--resync`,
`--resync-mode` controls which version will overwrite the other. The supported
options are similar to `--conflict-resolve`. For all of the following options,
the version that is kept is referred to as the "winner", and the version that
is overwritten (deleted) is referred to as the "loser". The options are named
after the "winner":
- `path1` - (the default) - the version from Path1 is unconditionally
considered the winner (regardless of `modtime` and `size`, if any). This can be
useful if one side is more trusted or up-to-date than the other, at the time of
the `--resync`.
- `path2` - same as `path1`, except the path2 version is considered the winner.
- `newer` - the newer file (by `modtime`) is considered the winner, regardless
of which side it came from. This may result in having a mix of some winners
from Path1, and some winners from Path2. (The implementation is analagous to
running `rclone copy --update` in both directions.)
- `older` - same as `newer`, except the older file is considered the winner,
and the newer file is considered the loser.
- `larger` - the larger file (by `size`) is considered the winner (regardless
of `modtime`, if any). This can be a useful option for remotes without
`modtime` support, or with the kinds of files (such as logs) that tend to grow
but not shrink, over time.
- `smaller` - the smaller file (by `size`) is considered the winner (regardless
of `modtime`, if any).
For all of the above options, note the following:
- If either of the underlying remotes lacks support for the chosen method, it
will be ignored and will fall back to the default of `path1`. (For example, if
`--resync-mode newer` is set, but one of the paths uses a remote that doesn't
support `modtime`.)
- If a winner can't be determined because the chosen method's attribute is
missing or equal, it will be ignored, and bisync will instead try to determine
whether the files differ by looking at the other `--compare` methods in effect.
(For example, if `--resync-mode newer` is set, but the Path1 and Path2 modtimes
are identical, bisync will compare the sizes.) If bisync concludes that they
differ, preference is given to whichever is the "source" at that moment. (In
practice, this gives a slight advantage to Path2, as the 2to1 copy comes before
the 1to2 copy.) If the files _do not_ differ, nothing is copied (as both sides
are already correct).
- These options apply only to files that exist on both sides (with the same
name and relative path). Files that exist *only* on one side and not the other
are *always* copied to the other, during `--resync` (this is one of the main
differences between resync and non-resync runs.).
- `--conflict-resolve`, `--conflict-loser`, and `--conflict-suffix` do not
apply during `--resync`, and unlike these flags, nothing is renamed during
`--resync`. When a file differs on both sides during `--resync`, one version
always overwrites the other (much like in `rclone copy`.) (Consider using
`--backup-dir` to retain a backup of the losing version.)
- Unlike for `--conflict-resolve`, `--resync-mode none` is not a valid option
(or rather, it will be interpreted as "no resync", unless `--resync` has also
been specified, in which case it will be ignored.)
- Winners and losers are decided at the individual file-level only (there is
not currently an option to pick an entire winning directory atomically,
although the `path1` and `path2` options typically produce a similar result.)
- To maintain backward-compatibility, the `--resync` flag implies
`--resync-mode path1` unless a different `--resync-mode` is explicitly
specified. Similarly, all `--resync-mode` options (except `none`) imply
`--resync`, so it is not necessary to use both the `--resync` and
`--resync-mode` flags simultaneously -- either one is sufficient without the
other.
2023-12-22 20:09:35 +01:00
time . Local , _ = time . LoadLocation ( "America/New_York" )
2021-05-16 18:39:33 +02:00
baseDir , err := os . Getwd ( )
require . NoError ( t , err , "get current directory" )
randName := "bisync." + time . Now ( ) . Format ( "150405-" ) + random . String ( 5 )
tempDir := filepath . Join ( os . TempDir ( ) , randName )
workDir := filepath . Join ( tempDir , "workdir" )
b := & bisyncTest {
// per-test state
t : t ,
// global state
tempDir : tempDir ,
randName : randName ,
workDir : workDir ,
dataRoot : filepath . Join ( baseDir , "testdata" ) ,
logDir : filepath . Join ( tempDir , "logs" ) ,
logPath : filepath . Join ( workDir , logFileName ) ,
// global flags
argRemote1 : * fstest . RemoteName ,
argRemote2 : * argRemote2 ,
noCompare : * argNoCompare ,
noCleanup : * argNoCleanup ,
golden : * argGolden ,
debug : * argDebug ,
stopAt : * argStopAt ,
}
b . mkdir ( b . tempDir )
b . mkdir ( b . logDir )
fnHandle := atexit . Register ( func ( ) {
if atexit . Signalled ( ) {
b . cleanupAll ( )
}
} )
defer func ( ) {
b . cleanupAll ( )
atexit . Unregister ( fnHandle )
} ( )
argCase := * argTestCase
if argCase == "" {
argCase = "all"
if testing . Short ( ) {
// remote tests can be long, help with "go test -short"
argCase = "basic"
}
}
testList := strings . Split ( argCase , "," )
if strings . ToLower ( argCase ) == "all" {
testList = nil
for _ , testCase := range b . listDir ( b . dataRoot ) {
if strings . HasPrefix ( testCase , "test_" ) {
2023-12-23 17:06:04 +01:00
// if dir is empty, skip it (can happen due to gitignored files/dirs when checking out branch)
if len ( b . listDir ( filepath . Join ( b . dataRoot , testCase ) ) ) == 0 {
continue
}
2021-05-16 18:39:33 +02:00
testList = append ( testList , testCase )
}
}
}
require . False ( t , b . stopAt > 0 && len ( testList ) > 1 , "-stop-at is meaningful only for a single test" )
for _ , testCase := range testList {
testCase = strings . ReplaceAll ( testCase , "-" , "_" )
testCase = strings . TrimPrefix ( testCase , "test_" )
t . Run ( testCase , func ( childTest * testing . T ) {
b . runTestCase ( ctx , childTest , testCase )
} )
}
}
func ( b * bisyncTest ) cleanupAll ( ) {
if b . noCleanup {
return
}
ctx := context . Background ( )
if b . parent1 != nil {
_ = operations . Purge ( ctx , b . parent1 , "" )
}
if b . parent2 != nil {
_ = operations . Purge ( ctx , b . parent2 , "" )
}
_ = os . RemoveAll ( b . tempDir )
}
func ( b * bisyncTest ) runTestCase ( ctx context . Context , t * testing . T , testCase string ) {
b . t = t
b . testCase = testCase
var err error
b . fs1 , b . parent1 , b . path1 , b . canonPath1 = b . makeTempRemote ( ctx , b . argRemote1 , "path1" )
b . fs2 , b . parent2 , b . path2 , b . canonPath2 = b . makeTempRemote ( ctx , b . argRemote2 , "path2" )
b . sessionName = bilib . SessionName ( b . fs1 , b . fs2 )
b . testDir = b . ensureDir ( b . dataRoot , "test_" + b . testCase , false )
b . initDir = b . ensureDir ( b . testDir , "initial" , false )
b . goldenDir = b . ensureDir ( b . testDir , "golden" , false )
b . dataDir = b . ensureDir ( b . testDir , "modfiles" , true ) // optional
2023-10-01 10:12:39 +02:00
// normalize unicode so tets are runnable on macOS
b . sessionName = norm . NFC . String ( b . sessionName )
b . goldenDir = norm . NFC . String ( b . goldenDir )
2021-05-16 18:39:33 +02:00
// For test stability, jam initial dates to a fixed past date.
// Test cases that change files will touch specific files to fixed new dates.
initDate := time . Date ( 2000 , time . January , 1 , 0 , 0 , 0 , 0 , bisync . TZ )
err = filepath . Walk ( b . initDir , func ( path string , info os . FileInfo , err error ) error {
if err == nil && ! info . IsDir ( ) {
return os . Chtimes ( path , initDate , initDate )
}
return err
} )
require . NoError ( b . t , err , "jamming initial dates" )
// Prepare initial content
b . cleanupCase ( ctx )
initFs , err := fs . NewFs ( ctx , b . initDir )
require . NoError ( b . t , err )
require . NoError ( b . t , sync . CopyDir ( ctx , b . fs1 , initFs , true ) , "setting up path1" )
require . NoError ( b . t , sync . CopyDir ( ctx , b . fs2 , initFs , true ) , "setting up path2" )
// Create log file
b . mkdir ( b . workDir )
b . logFile , err = os . Create ( b . logPath )
require . NoError ( b . t , err , "creating log file" )
// Execute test scenario
scenFile := filepath . Join ( b . testDir , "scenario.txt" )
2022-08-20 16:38:02 +02:00
scenBuf , err := os . ReadFile ( scenFile )
2021-05-16 18:39:33 +02:00
scenReplacer := b . newReplacer ( false )
require . NoError ( b . t , err )
b . step = 0
b . stopped = false
for _ , line := range strings . Split ( string ( scenBuf ) , "\n" ) {
comment := strings . Index ( line , "#" )
if comment != - 1 {
line = line [ : comment ]
}
line = strings . TrimSpace ( line )
if line == "" {
if b . golden {
// Keep empty lines in golden logs
_ , _ = b . logFile . WriteString ( "\n" )
}
continue
}
b . step ++
b . stepStr = fmt . Sprintf ( "(%02d) :" , b . step )
line = scenReplacer . Replace ( line )
if err = b . runTestStep ( ctx , line ) ; err != nil {
require . Failf ( b . t , "test step failed" , "step %d failed: %v" , b . step , err )
return
}
if b . stopAt > 0 && b . step >= b . stopAt {
comment := ""
if b . golden {
comment = " (ignoring -golden)"
}
b . logPrintf ( "Stopping after step %d%s" , b . step , comment )
b . stopped = true
b . noCleanup = true
b . noCompare = true
break
}
}
// Perform post-run activities
require . NoError ( b . t , b . logFile . Close ( ) , "flushing test log" )
b . logFile = nil
savedLog := b . testCase + ".log"
err = bilib . CopyFile ( b . logPath , filepath . Join ( b . logDir , savedLog ) )
require . NoError ( b . t , err , "saving log file %s" , savedLog )
if b . golden && ! b . stopped {
log . Printf ( "Store results to golden directory" )
b . storeGolden ( )
return
}
errorCount := 0
if b . noCompare {
log . Printf ( "Skip comparing results with golden directory" )
errorCount = - 2
} else {
errorCount = b . compareResults ( )
}
if b . noCleanup {
log . Printf ( "Skip cleanup" )
} else {
b . cleanupCase ( ctx )
}
var msg string
var passed bool
switch errorCount {
case 0 :
2023-11-06 12:59:41 +01:00
msg = color ( terminal . GreenFg , fmt . Sprintf ( "TEST %s PASSED" , b . testCase ) )
2021-05-16 18:39:33 +02:00
passed = true
case - 2 :
2023-11-06 12:59:41 +01:00
msg = color ( terminal . YellowFg , fmt . Sprintf ( "TEST %s SKIPPED" , b . testCase ) )
2021-05-16 18:39:33 +02:00
passed = true
case - 1 :
2023-11-06 12:59:41 +01:00
msg = color ( terminal . RedFg , fmt . Sprintf ( "TEST %s FAILED - WRONG NUMBER OF FILES" , b . testCase ) )
2021-05-16 18:39:33 +02:00
passed = false
default :
2023-11-06 12:59:41 +01:00
msg = color ( terminal . RedFg , fmt . Sprintf ( "TEST %s FAILED - %d MISCOMPARED FILES" , b . testCase , errorCount ) )
2021-05-16 18:39:33 +02:00
buckets := b . fs1 . Features ( ) . BucketBased || b . fs2 . Features ( ) . BucketBased
passed = false
if b . testCase == "rmdirs" && buckets {
msg += " (expected failure on bucket remotes)"
passed = true
}
}
b . t . Log ( msg )
if ! passed {
b . t . FailNow ( )
}
}
// makeTempRemote creates temporary folder and makes a filesystem
// if a local path is provided, it's ignored (the test will run under system temp)
func ( b * bisyncTest ) makeTempRemote ( ctx context . Context , remote , subdir string ) ( f , parent fs . Fs , path , canon string ) {
var err error
if bilib . IsLocalPath ( remote ) {
if remote != "" && remote != "local" {
b . t . Fatalf ( ` Missing ":" in remote %q. Use "local" to test with local filesystem. ` , remote )
}
parent , err = fs . NewFs ( ctx , b . tempDir )
require . NoError ( b . t , err , "parsing %s" , b . tempDir )
path = filepath . Join ( b . tempDir , b . testCase )
canon = bilib . CanonicalPath ( path ) + "_"
path = filepath . Join ( path , subdir )
} else {
last := remote [ len ( remote ) - 1 ]
if last != ':' && last != '/' {
remote += "/"
}
remote += b . randName
parent , err = fs . NewFs ( ctx , remote )
require . NoError ( b . t , err , "parsing %s" , remote )
path = remote + "/" + b . testCase
canon = bilib . CanonicalPath ( path ) + "_"
path += "/" + subdir
}
f , err = fs . NewFs ( ctx , path )
require . NoError ( b . t , err , "parsing %s/%s" , remote , subdir )
path = bilib . FsPath ( f ) // Make it canonical
if f . Precision ( ) == fs . ModTimeNotSupported {
b . t . Skipf ( "modification time support is missing on %s" , subdir )
}
return
}
func ( b * bisyncTest ) cleanupCase ( ctx context . Context ) {
// Silence "directory not found" errors from the ftp backend
_ = bilib . CaptureOutput ( func ( ) {
_ = operations . Purge ( ctx , b . fs1 , "" )
} )
_ = bilib . CaptureOutput ( func ( ) {
_ = operations . Purge ( ctx , b . fs2 , "" )
} )
_ = os . RemoveAll ( b . workDir )
accounting . Stats ( ctx ) . ResetCounters ( )
}
func ( b * bisyncTest ) runTestStep ( ctx context . Context , line string ) ( err error ) {
var fsrc , fdst fs . Fs
accounting . Stats ( ctx ) . ResetErrors ( )
2023-11-06 12:59:41 +01:00
b . logPrintf ( "%s %s" , color ( terminal . CyanFg , b . stepStr ) , color ( terminal . BlueFg , line ) )
2021-05-16 18:39:33 +02:00
ci := fs . GetConfig ( ctx )
ciSave := * ci
defer func ( ) {
* ci = ciSave
} ( )
ci . LogLevel = fs . LogLevelInfo
if b . debug {
ci . LogLevel = fs . LogLevelDebug
}
2023-10-06 22:38:47 +02:00
testFunc := func ( ) {
2023-11-17 18:14:38 +01:00
src := filepath . Join ( b . dataDir , "file7.txt" )
2023-10-06 22:38:47 +02:00
for i := 0 ; i < 50 ; i ++ {
2023-11-17 18:14:38 +01:00
dst := "file" + fmt . Sprint ( i ) + ".txt"
err := b . copyFile ( ctx , src , b . path2 , dst )
if err != nil {
fs . Errorf ( src , "error copying file: %v" , err )
}
dst = "file" + fmt . Sprint ( 100 - i ) + ".txt"
err = b . copyFile ( ctx , src , b . path1 , dst )
if err != nil {
fs . Errorf ( dst , "error copying file: %v" , err )
}
2023-10-06 22:38:47 +02:00
}
}
2021-05-16 18:39:33 +02:00
args := splitLine ( line )
switch args [ 0 ] {
case "test" :
b . checkArgs ( args , 1 , 0 )
return nil
case "copy-listings" :
b . checkArgs ( args , 1 , 1 )
return b . saveTestListings ( args [ 1 ] , true )
case "move-listings" :
b . checkArgs ( args , 1 , 1 )
return b . saveTestListings ( args [ 1 ] , false )
case "purge-children" :
b . checkArgs ( args , 1 , 1 )
if fsrc , err = fs . NewFs ( ctx , args [ 1 ] ) ; err != nil {
return err
}
2023-11-17 18:14:38 +01:00
err = purgeChildren ( ctx , fsrc , "" )
if err != nil {
return err
}
return
2021-05-16 18:39:33 +02:00
case "delete-file" :
b . checkArgs ( args , 1 , 1 )
dir , file := filepath . Split ( args [ 1 ] )
if fsrc , err = fs . NewFs ( ctx , dir ) ; err != nil {
return err
}
var obj fs . Object
if obj , err = fsrc . NewObject ( ctx , file ) ; err != nil {
return err
}
return operations . DeleteFile ( ctx , obj )
case "delete-glob" :
b . checkArgs ( args , 2 , 2 )
if fsrc , err = fs . NewFs ( ctx , args [ 1 ] ) ; err != nil {
return err
}
return deleteFiles ( ctx , fsrc , args [ 2 ] )
case "touch-glob" :
b . checkArgs ( args , 3 , 3 )
date , src , glob := args [ 1 ] , args [ 2 ] , args [ 3 ]
if fsrc , err = fs . NewFs ( ctx , src ) ; err != nil {
return err
}
_ , err = touchFiles ( ctx , date , fsrc , src , glob )
return err
case "touch-copy" :
b . checkArgs ( args , 3 , 3 )
date , src , dst := args [ 1 ] , args [ 2 ] , args [ 3 ]
dir , file := filepath . Split ( src )
if fsrc , err = fs . NewFs ( ctx , dir ) ; err != nil {
return err
}
if _ , err = touchFiles ( ctx , date , fsrc , dir , file ) ; err != nil {
return err
}
return b . copyFile ( ctx , src , dst , "" )
case "copy-file" :
b . checkArgs ( args , 2 , 2 )
return b . copyFile ( ctx , args [ 1 ] , args [ 2 ] , "" )
case "copy-as" :
b . checkArgs ( args , 3 , 3 )
return b . copyFile ( ctx , args [ 1 ] , args [ 2 ] , args [ 3 ] )
2023-10-09 05:16:23 +02:00
case "copy-as-NFC" :
b . checkArgs ( args , 3 , 3 )
ci . NoUnicodeNormalization = true
ci . FixCase = true
return b . copyFile ( ctx , args [ 1 ] , norm . NFC . String ( args [ 2 ] ) , norm . NFC . String ( args [ 3 ] ) )
case "copy-as-NFD" :
b . checkArgs ( args , 3 , 3 )
ci . NoUnicodeNormalization = true
ci . FixCase = true
return b . copyFile ( ctx , args [ 1 ] , norm . NFD . String ( args [ 2 ] ) , norm . NFD . String ( args [ 3 ] ) )
2021-05-16 18:39:33 +02:00
case "copy-dir" , "sync-dir" :
b . checkArgs ( args , 2 , 2 )
if fsrc , err = cache . Get ( ctx , args [ 1 ] ) ; err != nil {
return err
}
if fdst , err = cache . Get ( ctx , args [ 2 ] ) ; err != nil {
return err
}
switch args [ 0 ] {
case "copy-dir" :
err = sync . CopyDir ( ctx , fdst , fsrc , true )
case "sync-dir" :
err = sync . Sync ( ctx , fdst , fsrc , true )
}
return err
case "list-dirs" :
b . checkArgs ( args , 1 , 1 )
2023-11-17 18:14:38 +01:00
return b . listSubdirs ( ctx , args [ 1 ] , true )
case "list-files" :
b . checkArgs ( args , 1 , 1 )
return b . listSubdirs ( ctx , args [ 1 ] , false )
2021-05-16 18:39:33 +02:00
case "bisync" :
2023-10-09 05:16:23 +02:00
ci . NoUnicodeNormalization = false
ci . IgnoreCaseSync = false
// ci.FixCase = true
2021-05-16 18:39:33 +02:00
return b . runBisync ( ctx , args [ 1 : ] )
2023-10-06 22:38:47 +02:00
case "test-func" :
b . TestFn = testFunc
return
2023-11-17 18:14:38 +01:00
case "fix-names" :
// in case the local os converted any filenames
ci . NoUnicodeNormalization = true
ci . FixCase = true
ci . IgnoreTimes = true
reset := func ( ) {
ci . NoUnicodeNormalization = false
ci . FixCase = false
ci . IgnoreTimes = false
}
defer reset ( )
b . checkArgs ( args , 1 , 1 )
var ok bool
var remoteName string
var remotePath string
remoteName , remotePath , err = fspath . SplitFs ( args [ 1 ] )
if err != nil {
return err
}
if remoteName == "" {
remoteName = "/"
}
fsrc , err = fs . NewFs ( ctx , remoteName )
if err != nil {
return err
}
// DEBUG
fs . Debugf ( remotePath , "is NFC: %v" , norm . NFC . IsNormalString ( remotePath ) )
fs . Debugf ( remotePath , "is NFD: %v" , norm . NFD . IsNormalString ( remotePath ) )
fs . Debugf ( remotePath , "is valid UTF8: %v" , utf8 . ValidString ( remotePath ) )
// check if it's a dir, try moving it
var leaf string
_ , leaf , err = fspath . Split ( remotePath )
if err == nil && leaf == "" {
remotePath = args [ 1 ]
fs . Debugf ( remotePath , "attempting to fix directory" )
fixDirname := func ( old , new string ) {
if new != old {
oldName , err := fs . NewFs ( ctx , old )
if err != nil {
fs . Logf ( old , "error getting Fs: %v" , err )
}
fs . Debugf ( nil , "Attempting to move %s to %s" , oldName . Root ( ) , new )
// Create random name to temporarily move dir to
tmpDirName := strings . TrimSuffix ( new , slash ) + "-rclone-move-" + random . String ( 8 )
var tmpDirFs fs . Fs
tmpDirFs , _ = fs . NewFs ( ctx , tmpDirName )
err = sync . MoveDir ( ctx , tmpDirFs , oldName , true , true )
if err != nil {
fs . Debugf ( oldName , "error attempting to move folder: %v" , err )
}
// now move the temp dir to real name
fsrc , _ = fs . NewFs ( ctx , new )
err = sync . MoveDir ( ctx , fsrc , tmpDirFs , true , true )
if err != nil {
fs . Debugf ( tmpDirFs , "error attempting to move folder to %s: %v" , fsrc . Root ( ) , err )
}
} else {
fs . Debugf ( nil , "old and new are equal. Skipping. %s (%s) %s (%s)" , old , stringToHash ( old ) , new , stringToHash ( new ) )
}
}
if norm . NFC . String ( remotePath ) != remotePath && norm . NFD . String ( remotePath ) != remotePath {
fs . Debugf ( remotePath , "This is neither fully NFD or NFC -- can't fix reliably!" )
}
fixDirname ( norm . NFC . String ( remotePath ) , remotePath )
fixDirname ( norm . NFD . String ( remotePath ) , remotePath )
return
}
// if it's a file
fs . Debugf ( remotePath , "attempting to fix file -- filename hash: %s" , stringToHash ( leaf ) )
fixFilename := func ( old , new string ) {
ok , err := fs . FileExists ( ctx , fsrc , old )
if err != nil {
fs . Debugf ( remotePath , "error checking if file exists: %v" , err )
}
fs . Debugf ( old , "file exists: %v %s" , ok , stringToHash ( old ) )
fs . Debugf ( nil , "FILE old: %s new: %s equal: %v" , old , new , old == new )
fs . Debugf ( nil , "HASH old: %s new: %s equal: %v" , stringToHash ( old ) , stringToHash ( new ) , stringToHash ( old ) == stringToHash ( new ) )
if ok && new != old {
fs . Debugf ( new , "attempting to rename %s to %s" , old , new )
err = operations . MoveFile ( ctx , fsrc , fsrc , new , old )
if err != nil {
fs . Errorf ( new , "error trying to rename %s to %s - %v" , old , new , err )
}
}
}
// look for NFC version
fixFilename ( norm . NFC . String ( remotePath ) , remotePath )
// if it's in a subdir we just moved, the file and directory might have different encodings. Check for that.
mixed := strings . TrimSuffix ( norm . NFD . String ( remotePath ) , norm . NFD . String ( leaf ) ) + norm . NFC . String ( leaf )
fixFilename ( mixed , remotePath )
// Try NFD
fixFilename ( norm . NFD . String ( remotePath ) , remotePath )
// Try mixed in reverse
mixed = strings . TrimSuffix ( norm . NFC . String ( remotePath ) , norm . NFC . String ( leaf ) ) + norm . NFD . String ( leaf )
fixFilename ( mixed , remotePath )
// check if it's right now, error if not
ok , err = fs . FileExists ( ctx , fsrc , remotePath )
if ! ok || err != nil {
fs . Logf ( remotePath , "Can't find expected file %s (was it renamed by the os?) %v" , args [ 1 ] , err )
return
} else {
// include hash of filename to make unicode form differences easier to see in logs
fs . Debugf ( remotePath , "verified file exists at correct path. filename hash: %s" , stringToHash ( leaf ) )
}
return
2021-05-16 18:39:33 +02:00
default :
2021-11-04 11:12:57 +01:00
return fmt . Errorf ( "unknown command: %q" , args [ 0 ] )
2021-05-16 18:39:33 +02:00
}
}
// splitLine splits scenario line into tokens and performs
// substitutions that involve whitespace or control chars.
func splitLine ( line string ) ( args [ ] string ) {
for _ , s := range strings . Fields ( line ) {
b := [ ] byte ( whitespaceReplacer . Replace ( s ) )
b = regexChar . ReplaceAllFunc ( b , func ( b [ ] byte ) [ ] byte {
c , _ := strconv . ParseUint ( string ( b [ 5 : 7 ] ) , 16 , 8 )
return [ ] byte { byte ( c ) }
} )
args = append ( args , string ( b ) )
}
return
}
var whitespaceReplacer = strings . NewReplacer (
"{spc}" , " " ,
"{tab}" , "\t" ,
"{eol}" , eol ,
)
var regexChar = regexp . MustCompile ( ` \ { chr:([0-9a-f] { 2})\} ` )
// checkArgs verifies the number of the test command arguments
func ( b * bisyncTest ) checkArgs ( args [ ] string , min , max int ) {
cmd := args [ 0 ]
num := len ( args ) - 1
if min == max && num != min {
b . t . Fatalf ( "%q must have strictly %d args" , cmd , min )
}
if min > 0 && num < min {
b . t . Fatalf ( "%q must have at least %d args" , cmd , min )
}
if max > 0 && num > max {
b . t . Fatalf ( "%q must have at most %d args" , cmd , max )
}
}
func ( b * bisyncTest ) runBisync ( ctx context . Context , args [ ] string ) ( err error ) {
opt := & bisync . Options {
Workdir : b . workDir ,
NoCleanup : true ,
SaveQueues : true ,
MaxDelete : bisync . DefaultMaxDelete ,
CheckFilename : bisync . DefaultCheckFilename ,
CheckSync : bisync . CheckSyncTrue ,
2023-10-06 22:38:47 +02:00
TestFn : b . TestFn ,
2021-05-16 18:39:33 +02:00
}
octx , ci := fs . AddConfig ( ctx )
fs1 , fs2 := b . fs1 , b . fs2
addSubdir := func ( path , subdir string ) fs . Fs {
remote := path + subdir
f , err := fs . NewFs ( ctx , remote )
require . NoError ( b . t , err , "parsing remote %q" , remote )
return f
}
for _ , arg := range args {
val := ""
pos := strings . Index ( arg , "=" )
if pos > 0 {
arg , val = arg [ : pos ] , arg [ pos + 1 : ]
}
switch arg {
case "resync" :
opt . Resync = true
case "dry-run" :
ci . DryRun = true
opt . DryRun = true
case "force" :
opt . Force = true
2023-07-11 13:09:06 +02:00
case "create-empty-src-dirs" :
opt . CreateEmptySrcDirs = true
2021-05-16 18:39:33 +02:00
case "remove-empty-dirs" :
opt . RemoveEmptyDirs = true
case "check-sync-only" :
opt . CheckSync = bisync . CheckSyncOnly
case "no-check-sync" :
opt . CheckSync = bisync . CheckSyncFalse
case "check-access" :
opt . CheckAccess = true
case "check-filename" :
opt . CheckFilename = val
case "filters-file" :
opt . FiltersFile = val
case "max-delete" :
opt . MaxDelete , err = strconv . Atoi ( val )
require . NoError ( b . t , err , "parsing max-delete=%q" , val )
case "size-only" :
ci . SizeOnly = true
bisync: full support for comparing checksum, size, modtime - fixes #5679 fixes #5683 fixes #5684 fixes #5675
Before this change, bisync could only detect changes based on modtime, and
would refuse to run if either path lacked modtime support. This made bisync
unavailable for many of rclone's backends. Additionally, bisync did not account
for the Fs's precision when comparing modtimes, meaning that they could only be
reliably compared within the same side -- not against the opposite side. Size
and checksum (even when available) were ignored completely for deltas.
After this change, bisync now fully supports comparing based on any combination
of size, modtime, and checksum, lifting the prior restriction on backends
without modtime support. The comparison logic considers the backend's
precision, hash types, and other features as appropriate.
The comparison features optionally use a new --compare flag (which takes any
combination of size,modtime,checksum) and even supports some combinations not
otherwise supported in `sync` (like comparing all three at the same time.) By
default (without the --compare flag), bisync inherits the same comparison
options as `sync` (that is: size and modtime by default, unless modified with
flags such as --checksum or --size-only.) If the --compare flag is set, it will
override these defaults.
If --compare includes checksum and both remotes support checksums but have no
hash types in common with each other, checksums will be considered only for
comparisons within the same side (to determine what has changed since the prior
sync), but not for comparisons against the opposite side. If one side supports
checksums and the other does not, checksums will only be considered on the side
that supports them. When comparing with checksum and/or size without modtime,
bisync cannot determine whether a file is newer or older -- only whether it is
changed or unchanged. (If it is changed on both sides, bisync still does the
standard equality-check to avoid declaring a sync conflict unless it absolutely
has to.)
Also included are some new flags to customize the checksum comparison behavior
on backends where hashes are slow or unavailable. --no-slow-hash and
--slow-hash-sync-only allow selectively ignoring checksums on backends such as
local where they are slow. --download-hash allows computing them by downloading
when (and only when) they're otherwise not available. Of course, this option
probably won't be practical with large files, but may be a good option for
syncing small-but-important files with maximum accuracy (for example, a source
code repo on a crypt remote.) An additional advantage over methods like
cryptcheck is that the original file is not required for comparison (for
example, --download-hash can be used to bisync two different crypt remotes with
different passwords.)
Additionally, all of the above are now considered during the final --check-sync
for much-improved accuracy (before this change, it only compared filenames!)
Many other details are explained in the included docs.
2023-12-01 01:44:38 +01:00
case "ignore-size" :
ci . IgnoreSize = true
case "checksum" :
ci . CheckSum = true
opt . Compare . DownloadHash = true // allows us to test crypt and the like
case "compare-all" :
opt . CompareFlag = "size,modtime,checksum"
opt . Compare . DownloadHash = true // allows us to test crypt and the like
2021-05-16 18:39:33 +02:00
case "subdir" :
fs1 = addSubdir ( b . path1 , val )
fs2 = addSubdir ( b . path2 , val )
2023-12-18 19:03:14 +01:00
case "backupdir1" :
opt . BackupDir1 = val
case "backupdir2" :
opt . BackupDir2 = val
2023-10-01 15:36:19 +02:00
case "ignore-listing-checksum" :
opt . IgnoreListingChecksum = true
2023-10-09 05:16:23 +02:00
case "no-norm" :
ci . NoUnicodeNormalization = true
ci . IgnoreCaseSync = false
case "norm" :
ci . NoUnicodeNormalization = false
ci . IgnoreCaseSync = true
case "fix-case" :
ci . NoUnicodeNormalization = false
ci . IgnoreCaseSync = true
ci . FixCase = true
bisync: add options to auto-resolve conflicts - fixes #7471
Before this change, when a file was new/changed on both paths (relative to the
prior sync), and the versions on each side were not identical, bisync would
keep both versions, renaming them with ..path1 and ..path2 suffixes,
respectively. Many users have requested more control over how bisync handles
such conflicts -- including an option to automatically select one version as
the "winner" and rename or delete the "loser". This change introduces support
for such options.
--conflict-resolve CHOICE
In bisync, a "conflict" is a file that is *new* or *changed* on *both sides*
(relative to the prior run) AND is *not currently identical* on both sides.
`--conflict-resolve` controls how bisync handles such a scenario. The currently
supported options are:
- `none` - (the default) - do not attempt to pick a winner, keep and rename
both files according to `--conflict-loser` and
`--conflict-suffix` settings. For example, with the default
settings, `file.txt` on Path1 is renamed `file.txt.conflict1` and `file.txt` on
Path2 is renamed `file.txt.conflict2`. Both are copied to the opposite path
during the run, so both sides end up with a copy of both files. (As `none` is
the default, it is not necessary to specify `--conflict-resolve none` -- you
can just omit the flag.)
- `newer` - the newer file (by `modtime`) is considered the winner and is
copied without renaming. The older file (the "loser") is handled according to
`--conflict-loser` and `--conflict-suffix` settings (either renamed or
deleted.) For example, if `file.txt` on Path1 is newer than `file.txt` on
Path2, the result on both sides (with other default settings) will be `file.txt`
(winner from Path1) and `file.txt.conflict1` (loser from Path2).
- `older` - same as `newer`, except the older file is considered the winner,
and the newer file is considered the loser.
- `larger` - the larger file (by `size`) is considered the winner (regardless
of `modtime`, if any).
- `smaller` - the smaller file (by `size`) is considered the winner (regardless
of `modtime`, if any).
- `path1` - the version from Path1 is unconditionally considered the winner
(regardless of `modtime` and `size`, if any). This can be useful if one side is
usually more trusted or up-to-date than the other.
- `path2` - same as `path1`, except the path2 version is considered the
winner.
For all of the above options, note the following:
- If either of the underlying remotes lacks support for the chosen method, it
will be ignored and fall back to `none`. (For example, if `--conflict-resolve
newer` is set, but one of the paths uses a remote that doesn't support
`modtime`.)
- If a winner can't be determined because the chosen method's attribute is
missing or equal, it will be ignored and fall back to `none`. (For example, if
`--conflict-resolve newer` is set, but the Path1 and Path2 modtimes are
identical, even if the sizes may differ.)
- If the file's content is currently identical on both sides, it is not
considered a "conflict", even if new or changed on both sides since the prior
sync. (For example, if you made a change on one side and then synced it to the
other side by other means.) Therefore, none of the conflict resolution flags
apply in this scenario.
- The conflict resolution flags do not apply during a `--resync`, as there is
no "prior run" to speak of (but see `--resync-mode` for similar
options.)
--conflict-loser CHOICE
`--conflict-loser` determines what happens to the "loser" of a sync conflict
(when `--conflict-resolve` determines a winner) or to both
files (when there is no winner.) The currently supported options are:
- `num` - (the default) - auto-number the conflicts by automatically appending
the next available number to the `--conflict-suffix`, in chronological order.
For example, with the default settings, the first conflict for `file.txt` will
be renamed `file.txt.conflict1`. If `file.txt.conflict1` already exists,
`file.txt.conflict2` will be used instead (etc., up to a maximum of
9223372036854775807 conflicts.)
- `pathname` - rename the conflicts according to which side they came from,
which was the default behavior prior to `v1.66`. For example, with
`--conflict-suffix path`, `file.txt` from Path1 will be renamed
`file.txt.path1`, and `file.txt` from Path2 will be renamed `file.txt.path2`.
If two non-identical suffixes are provided (ex. `--conflict-suffix
cloud,local`), the trailing digit is omitted. Importantly, note that with
`pathname`, there is no auto-numbering beyond `2`, so if `file.txt.path2`
somehow already exists, it will be overwritten. Using a dynamic date variable
in your `--conflict-suffix` (see below) is one possible way to avoid this. Note
also that conflicts-of-conflicts are possible, if the original conflict is not
manually resolved -- for example, if for some reason you edited
`file.txt.path1` on both sides, and those edits were different, the result
would be `file.txt.path1.path1` and `file.txt.path1.path2` (in addition to
`file.txt.path2`.)
- `delete` - keep the winner only and delete the loser, instead of renaming it.
If a winner cannot be determined (see `--conflict-resolve` for details on how
this could happen), `delete` is ignored and the default `num` is used instead
(i.e. both versions are kept and renamed, and neither is deleted.) `delete` is
inherently the most destructive option, so use it only with care.
For all of the above options, note that if a winner cannot be determined (see
`--conflict-resolve` for details on how this could happen), or if
`--conflict-resolve` is not in use, *both* files will be renamed.
--conflict-suffix STRING[,STRING]
`--conflict-suffix` controls the suffix that is appended when bisync renames a
`--conflict-loser` (default: `conflict`).
`--conflict-suffix` will accept either one string or two comma-separated
strings to assign different suffixes to Path1 vs. Path2. This may be helpful
later in identifying the source of the conflict. (For example,
`--conflict-suffix dropboxconflict,laptopconflict`)
With `--conflict-loser num`, a number is always appended to the suffix. With
`--conflict-loser pathname`, a number is appended only when one suffix is
specified (or when two identical suffixes are specified.) i.e. with
`--conflict-loser pathname`, all of the following would produce exactly the
same result:
```
--conflict-suffix path
--conflict-suffix path,path
--conflict-suffix path1,path2
```
Suffixes may be as short as 1 character. By default, the suffix is appended
after any other extensions (ex. `file.jpg.conflict1`), however, this can be
changed with the `--suffix-keep-extension` flag (i.e. to instead result in
`file.conflict1.jpg`).
`--conflict-suffix` supports several *dynamic date variables* when enclosed in
curly braces as globs. This can be helpful to track the date and/or time that
each conflict was handled by bisync. For example:
```
--conflict-suffix {DateOnly}-conflict
// result: myfile.txt.2006-01-02-conflict1
```
All of the formats described [here](https://pkg.go.dev/time#pkg-constants) and
[here](https://pkg.go.dev/time#example-Time.Format) are supported, but take
care to ensure that your chosen format does not use any characters that are
illegal on your remotes (for example, macOS does not allow colons in
filenames, and slashes are also best avoided as they are often interpreted as
directory separators.) To address this particular issue, an additional
`{MacFriendlyTime}` (or just `{mac}`) option is supported, which results in
`2006-01-02 0304PM`.
Note that `--conflict-suffix` is entirely separate from rclone's main `--sufix`
flag. This is intentional, as users may wish to use both flags simultaneously,
if also using `--backup-dir`.
Finally, note that the default in bisync prior to `v1.66` was to rename
conflicts with `..path1` and `..path2` (with two periods, and `path` instead of
`conflict`.) Bisync now defaults to a single dot instead of a double dot, but
additional dots can be added by including them in the specified suffix string.
For example, for behavior equivalent to the previous default, use:
```
[--conflict-resolve none] --conflict-loser pathname --conflict-suffix .path
```
2023-12-15 13:47:15 +01:00
case "conflict-resolve" :
_ = opt . ConflictResolve . Set ( val )
case "conflict-loser" :
_ = opt . ConflictLoser . Set ( val )
case "conflict-suffix" :
opt . ConflictSuffixFlag = val
bisync: add --resync-mode for customizing --resync - fixes #5681
Before this change, the path1 version of a file always prevailed during
--resync, and many users requested options to automatically select the winner
based on characteristics such as newer, older, larger, and smaller. This change
adds support for such options.
Note that ideally this feature would have been implemented by allowing the
existing `--resync` flag to optionally accept string values such as `--resync
newer`. However, this would have been a breaking change, as the existing flag
is a `bool` and it does not seem to be possible to have a `string` flag that
accepts both `--resync newer` and `--resync` (with no argument.) (`NoOptDefVal`
does not work for this, as it would force an `=` like `--resync=newer`.) So
instead, the best compromise to avoid a breaking change was to add a new
`--resync-mode CHOICE` flag that implies `--resync`, while maintaining the
existing behavior of `--resync` (which implies `--resync-mode path1`. i.e. both
flags are now valid, and either can be used without the other.
--resync-mode CHOICE
In the event that a file differs on both sides during a `--resync`,
`--resync-mode` controls which version will overwrite the other. The supported
options are similar to `--conflict-resolve`. For all of the following options,
the version that is kept is referred to as the "winner", and the version that
is overwritten (deleted) is referred to as the "loser". The options are named
after the "winner":
- `path1` - (the default) - the version from Path1 is unconditionally
considered the winner (regardless of `modtime` and `size`, if any). This can be
useful if one side is more trusted or up-to-date than the other, at the time of
the `--resync`.
- `path2` - same as `path1`, except the path2 version is considered the winner.
- `newer` - the newer file (by `modtime`) is considered the winner, regardless
of which side it came from. This may result in having a mix of some winners
from Path1, and some winners from Path2. (The implementation is analagous to
running `rclone copy --update` in both directions.)
- `older` - same as `newer`, except the older file is considered the winner,
and the newer file is considered the loser.
- `larger` - the larger file (by `size`) is considered the winner (regardless
of `modtime`, if any). This can be a useful option for remotes without
`modtime` support, or with the kinds of files (such as logs) that tend to grow
but not shrink, over time.
- `smaller` - the smaller file (by `size`) is considered the winner (regardless
of `modtime`, if any).
For all of the above options, note the following:
- If either of the underlying remotes lacks support for the chosen method, it
will be ignored and will fall back to the default of `path1`. (For example, if
`--resync-mode newer` is set, but one of the paths uses a remote that doesn't
support `modtime`.)
- If a winner can't be determined because the chosen method's attribute is
missing or equal, it will be ignored, and bisync will instead try to determine
whether the files differ by looking at the other `--compare` methods in effect.
(For example, if `--resync-mode newer` is set, but the Path1 and Path2 modtimes
are identical, bisync will compare the sizes.) If bisync concludes that they
differ, preference is given to whichever is the "source" at that moment. (In
practice, this gives a slight advantage to Path2, as the 2to1 copy comes before
the 1to2 copy.) If the files _do not_ differ, nothing is copied (as both sides
are already correct).
- These options apply only to files that exist on both sides (with the same
name and relative path). Files that exist *only* on one side and not the other
are *always* copied to the other, during `--resync` (this is one of the main
differences between resync and non-resync runs.).
- `--conflict-resolve`, `--conflict-loser`, and `--conflict-suffix` do not
apply during `--resync`, and unlike these flags, nothing is renamed during
`--resync`. When a file differs on both sides during `--resync`, one version
always overwrites the other (much like in `rclone copy`.) (Consider using
`--backup-dir` to retain a backup of the losing version.)
- Unlike for `--conflict-resolve`, `--resync-mode none` is not a valid option
(or rather, it will be interpreted as "no resync", unless `--resync` has also
been specified, in which case it will be ignored.)
- Winners and losers are decided at the individual file-level only (there is
not currently an option to pick an entire winning directory atomically,
although the `path1` and `path2` options typically produce a similar result.)
- To maintain backward-compatibility, the `--resync` flag implies
`--resync-mode path1` unless a different `--resync-mode` is explicitly
specified. Similarly, all `--resync-mode` options (except `none`) imply
`--resync`, so it is not necessary to use both the `--resync` and
`--resync-mode` flags simultaneously -- either one is sufficient without the
other.
2023-12-22 20:09:35 +01:00
case "resync-mode" :
_ = opt . ResyncMode . Set ( val )
2021-05-16 18:39:33 +02:00
default :
2021-11-04 11:12:57 +01:00
return fmt . Errorf ( "invalid bisync option %q" , arg )
2021-05-16 18:39:33 +02:00
}
}
output := bilib . CaptureOutput ( func ( ) {
err = bisync . Bisync ( octx , fs1 , fs2 , opt )
} )
_ , _ = os . Stdout . Write ( output )
_ , _ = b . logFile . Write ( output )
if err != nil {
b . logPrintf ( "Bisync error: %v" , err )
}
return nil
}
// saveTestListings creates a copy of test artifacts with given prefix
// including listings (.lst*), queues (.que) and filters (.flt, .flt.md5)
func ( b * bisyncTest ) saveTestListings ( prefix string , keepSource bool ) ( err error ) {
count := 0
for _ , srcFile := range b . listDir ( b . workDir ) {
switch fileType ( srcFile ) {
case "listing" , "queue" , "filters" :
// fall thru
default :
continue
}
count ++
dstFile := fmt . Sprintf ( "%s.%s.sav" , prefix , b . toGolden ( srcFile ) )
src := filepath . Join ( b . workDir , srcFile )
dst := filepath . Join ( b . workDir , dstFile )
if err = bilib . CopyFile ( src , dst ) ; err != nil {
return
}
if keepSource {
continue
}
if err = os . Remove ( src ) ; err != nil {
return
}
}
if count == 0 {
err = errors . New ( "listings not found" )
}
return
}
func ( b * bisyncTest ) copyFile ( ctx context . Context , src , dst , asName string ) ( err error ) {
var fsrc , fdst fs . Fs
var srcPath , srcFile , dstPath , dstFile string
2023-11-17 18:14:38 +01:00
switch fsrc , err = fs . NewFs ( ctx , src ) ; err {
2021-05-16 18:39:33 +02:00
case fs . ErrorIsFile :
// ok
case nil :
return errors . New ( "source must be a file" )
default :
return err
}
if _ , srcPath , err = fspath . SplitFs ( src ) ; err != nil {
return err
}
srcFile = path . Base ( srcPath )
if dstPath , dstFile , err = fspath . Split ( dst ) ; err != nil {
return err
}
if dstPath == "" {
return errors . New ( "invalid destination" )
}
if dstFile != "" {
dstPath = dst // force directory
}
2023-11-17 18:14:38 +01:00
if fdst , err = fs . NewFs ( ctx , dstPath ) ; err != nil {
2021-05-16 18:39:33 +02:00
return err
}
if asName != "" {
dstFile = asName
} else {
dstFile = srcFile
}
fctx , fi := filter . AddConfig ( ctx )
if err := fi . AddFile ( srcFile ) ; err != nil {
return err
}
return operations . CopyFile ( fctx , fdst , fsrc , dstFile , srcFile )
}
2023-11-17 18:14:38 +01:00
// listSubdirs is equivalent to `rclone lsf -R [--dirs-only]`
func ( b * bisyncTest ) listSubdirs ( ctx context . Context , remote string , DirsOnly bool ) error {
2021-05-16 18:39:33 +02:00
f , err := fs . NewFs ( ctx , remote )
if err != nil {
return err
}
2023-11-17 18:14:38 +01:00
2021-05-16 18:39:33 +02:00
opt := operations . ListJSONOpt {
NoModTime : true ,
NoMimeType : true ,
2023-11-17 18:14:38 +01:00
DirsOnly : DirsOnly ,
2021-05-16 18:39:33 +02:00
Recurse : true ,
}
fmt := operations . ListFormat { }
fmt . SetDirSlash ( true )
fmt . AddPath ( )
printItem := func ( item * operations . ListJSONItem ) error {
2023-11-17 18:14:38 +01:00
b . logPrintf ( "%s - filename hash: %s" , fmt . Format ( item ) , stringToHash ( item . Name ) )
2021-05-16 18:39:33 +02:00
return nil
}
return operations . ListJSON ( ctx , f , "" , & opt , printItem )
}
// purgeChildren deletes child files and purges subdirs under given path.
// Note: this cannot be done with filters.
func purgeChildren ( ctx context . Context , f fs . Fs , dir string ) error {
entries , firstErr := f . List ( ctx , dir )
if firstErr != nil {
return firstErr
}
for _ , entry := range entries {
var err error
switch dirObj := entry . ( type ) {
case fs . Object :
fs . Debugf ( dirObj , "Remove file" )
err = dirObj . Remove ( ctx )
case fs . Directory :
fs . Debugf ( dirObj , "Purge subdir" )
err = operations . Purge ( ctx , f , dirObj . Remote ( ) )
}
if firstErr == nil {
firstErr = err
}
}
return firstErr
}
// deleteFiles deletes a group of files by the name pattern.
func deleteFiles ( ctx context . Context , f fs . Fs , glob string ) error {
fctx , fi := filter . AddConfig ( ctx )
if err := fi . Add ( true , glob ) ; err != nil {
return err
}
if err := fi . Add ( false , "/**" ) ; err != nil {
return err
}
return operations . Delete ( fctx , f )
}
// touchFiles sets modification time on a group of files.
// Returns names of touched files and/or error.
// Note: `rclone touch` can touch only single file, doesn't support filters.
func touchFiles ( ctx context . Context , dateStr string , f fs . Fs , dir , glob string ) ( [ ] string , error ) {
files := [ ] string { }
date , err := time . ParseInLocation ( touchDateFormat , dateStr , bisync . TZ )
if err != nil {
2021-11-04 11:12:57 +01:00
return files , fmt . Errorf ( "invalid date %q: %w" , dateStr , err )
2021-05-16 18:39:33 +02:00
}
matcher , firstErr := filter . GlobToRegexp ( glob , false )
if firstErr != nil {
2021-11-04 11:12:57 +01:00
return files , fmt . Errorf ( "invalid glob %q" , glob )
2021-05-16 18:39:33 +02:00
}
entries , firstErr := f . List ( ctx , "" )
if firstErr != nil {
return files , firstErr
}
for _ , entry := range entries {
obj , isFile := entry . ( fs . Object )
if ! isFile {
continue
}
remote := obj . Remote ( )
if ! matcher . MatchString ( remote ) {
continue
}
files = append ( files , dir + remote )
fs . Debugf ( obj , "Set modification time %s" , dateStr )
err := obj . SetModTime ( ctx , date )
if err == fs . ErrorCantSetModTimeWithoutDelete {
// Workaround for dropbox, similar to --refresh-times
err = nil
buf := new ( bytes . Buffer )
size := obj . Size ( )
2023-04-24 13:01:53 +02:00
separator := ""
2021-05-16 18:39:33 +02:00
if size > 0 {
2023-04-24 13:01:53 +02:00
err = operations . Cat ( ctx , f , buf , 0 , size , [ ] byte ( separator ) )
2021-05-16 18:39:33 +02:00
}
info := object . NewStaticObjectInfo ( remote , date , size , true , nil , f )
if err == nil {
_ = obj . Remove ( ctx )
_ , err = f . Put ( ctx , buf , info )
}
}
if firstErr == nil {
firstErr = err
}
}
return files , firstErr
}
// compareResults validates scenario results against golden dir
func ( b * bisyncTest ) compareResults ( ) int {
goldenFiles := b . listDir ( b . goldenDir )
resultFiles := b . listDir ( b . workDir )
// Adapt test file names to their golden counterparts
renamed := false
for _ , fileName := range resultFiles {
goldName := b . toGolden ( fileName )
if goldName != fileName {
filePath := filepath . Join ( b . workDir , fileName )
goldPath := filepath . Join ( b . workDir , goldName )
require . NoError ( b . t , os . Rename ( filePath , goldPath ) )
renamed = true
}
}
if renamed {
resultFiles = b . listDir ( b . workDir )
}
goldenSet := bilib . ToNames ( goldenFiles )
resultSet := bilib . ToNames ( resultFiles )
goldenNum := len ( goldenFiles )
resultNum := len ( resultFiles )
errorCount := 0
const divider = "----------------------------------------------------------"
if goldenNum != resultNum {
log . Print ( divider )
2023-11-06 12:59:41 +01:00
log . Print ( color ( terminal . RedFg , "MISCOMPARE - Number of Golden and Results files do not match:" ) )
2021-05-16 18:39:33 +02:00
log . Printf ( " Golden count: %d" , goldenNum )
log . Printf ( " Result count: %d" , resultNum )
log . Printf ( " Golden files: %s" , strings . Join ( goldenFiles , ", " ) )
log . Printf ( " Result files: %s" , strings . Join ( resultFiles , ", " ) )
}
for _ , file := range goldenFiles {
if ! resultSet . Has ( file ) {
errorCount ++
log . Printf ( " File found in Golden but not in Results: %s" , file )
}
}
for _ , file := range resultFiles {
if ! goldenSet . Has ( file ) {
errorCount ++
log . Printf ( " File found in Results but not in Golden: %s" , file )
}
}
for _ , file := range goldenFiles {
if ! resultSet . Has ( file ) {
continue
}
goldenText := b . mangleResult ( b . goldenDir , file , false )
resultText := b . mangleResult ( b . workDir , file , false )
if fileType ( file ) == "log" {
// save mangled logs so difference is easier on eyes
goldenFile := filepath . Join ( b . logDir , "mangled.golden.log" )
resultFile := filepath . Join ( b . logDir , "mangled.result.log" )
2022-08-20 16:38:02 +02:00
require . NoError ( b . t , os . WriteFile ( goldenFile , [ ] byte ( goldenText ) , bilib . PermSecure ) )
require . NoError ( b . t , os . WriteFile ( resultFile , [ ] byte ( resultText ) , bilib . PermSecure ) )
2021-05-16 18:39:33 +02:00
}
2023-10-01 10:12:39 +02:00
if goldenText == resultText || strings . Contains ( resultText , ".DS_Store" ) {
2021-05-16 18:39:33 +02:00
continue
}
errorCount ++
diff := difflib . UnifiedDiff {
A : difflib . SplitLines ( goldenText ) ,
B : difflib . SplitLines ( resultText ) ,
Context : 0 ,
}
text , err := difflib . GetUnifiedDiffString ( diff )
require . NoError ( b . t , err , "diff failed" )
log . Print ( divider )
2023-11-06 12:59:41 +01:00
log . Printf ( color ( terminal . RedFg , "| MISCOMPARE -Golden vs +Results for %s" ) , file )
2021-05-16 18:39:33 +02:00
for _ , line := range strings . Split ( strings . TrimSpace ( text ) , "\n" ) {
log . Printf ( "| %s" , strings . TrimSpace ( line ) )
}
}
if errorCount > 0 {
log . Print ( divider )
}
if errorCount == 0 && goldenNum != resultNum {
return - 1
}
return errorCount
}
// storeGolden will store workdir files to the golden directory.
// Golden results will have adapted file names and contain
// generic strings instead of local or cloud paths.
func ( b * bisyncTest ) storeGolden ( ) {
// Perform consistency checks
files := b . listDir ( b . workDir )
require . NotEmpty ( b . t , files , "nothing to store in golden dir" )
// Pass 1: validate files before storing
for _ , fileName := range files {
if fileType ( fileName ) == "lock" {
continue
}
2023-12-18 19:03:14 +01:00
if fileName == "backupdirs" {
log . Printf ( "skipping: %v" , fileName )
continue
}
2021-05-16 18:39:33 +02:00
goldName := b . toGolden ( fileName )
if goldName != fileName {
targetPath := filepath . Join ( b . workDir , goldName )
exists := bilib . FileExists ( targetPath )
require . False ( b . t , exists , "golden name overlap for file %s" , fileName )
}
text := b . mangleResult ( b . workDir , fileName , true )
if fileType ( fileName ) == "log" {
require . NotEmpty ( b . t , text , "incorrect golden log %s" , fileName )
}
}
// Pass 2: perform a verbatim copy
_ = os . RemoveAll ( b . goldenDir )
require . NoError ( b . t , bilib . CopyDir ( b . workDir , b . goldenDir ) )
// Pass 3: adapt file names and content
for _ , fileName := range files {
if fileType ( fileName ) == "lock" {
continue
}
2023-12-18 19:03:14 +01:00
if fileName == "backupdirs" {
log . Printf ( "skipping: %v" , fileName )
continue
}
2021-05-16 18:39:33 +02:00
text := b . mangleResult ( b . goldenDir , fileName , true )
goldName := b . toGolden ( fileName )
goldPath := filepath . Join ( b . goldenDir , goldName )
2022-08-20 16:38:02 +02:00
err := os . WriteFile ( goldPath , [ ] byte ( text ) , bilib . PermSecure )
2021-05-16 18:39:33 +02:00
assert . NoError ( b . t , err , "writing golden file %s" , goldName )
if goldName != fileName {
origPath := filepath . Join ( b . goldenDir , fileName )
assert . NoError ( b . t , os . Remove ( origPath ) , "removing original file %s" , fileName )
}
}
}
// mangleResult prepares test logs or listings for comparison
func ( b * bisyncTest ) mangleResult ( dir , file string , golden bool ) string {
2023-12-18 19:03:14 +01:00
if file == "backupdirs" {
return "skipping backupdirs"
}
2022-08-20 16:38:02 +02:00
buf , err := os . ReadFile ( filepath . Join ( dir , file ) )
2021-05-16 18:39:33 +02:00
require . NoError ( b . t , err )
2023-10-01 10:12:39 +02:00
// normalize unicode so tets are runnable on macOS
buf = norm . NFC . Bytes ( buf )
2021-05-16 18:39:33 +02:00
text := string ( buf )
switch fileType ( strings . TrimSuffix ( file , ".sav" ) ) {
case "queue" :
lines := strings . Split ( text , eol )
sort . Strings ( lines )
2023-11-17 18:14:38 +01:00
for i , line := range lines {
lines [ i ] = normalizeEncoding ( line )
}
2021-05-16 18:39:33 +02:00
return joinLines ( lines )
case "listing" :
2023-11-17 18:14:38 +01:00
return b . mangleListing ( text , golden , file )
2021-05-16 18:39:33 +02:00
case "log" :
// fall thru
default :
return text
}
// Adapt log lines to the golden way.
2023-11-17 18:14:38 +01:00
// First replace filenames with whitespace
// some backends (such as crypt) log them on multiple lines due to encoding differences, while others (local) do not
wsrep := [ ] string {
"subdir with" + eol + "white space.txt/file2 with" + eol + "white space.txt" ,
"subdir with white space.txt/file2 with white space.txt" ,
}
whitespaceJoiner := strings . NewReplacer ( wsrep ... )
s := whitespaceJoiner . Replace ( string ( buf ) )
lines := strings . Split ( s , eol )
2021-05-16 18:39:33 +02:00
pathReplacer := b . newReplacer ( true )
rep := logReplacements
if b . testCase == "dry_run" {
rep = append ( rep , dryrunReplacements ... )
}
repFrom := make ( [ ] * regexp . Regexp , len ( rep ) / 2 )
repTo := make ( [ ] string , len ( rep ) / 2 )
for i := 0 ; i < len ( rep ) ; i += 2 {
repFrom [ i / 2 ] = regexp . MustCompile ( rep [ i ] )
repTo [ i / 2 ] = rep [ i + 1 ]
}
hoppers := make ( [ ] * regexp . Regexp , len ( logHoppers ) )
dampers := make ( [ ] [ ] string , len ( logHoppers ) )
for i , regex := range logHoppers {
hoppers [ i ] = regexp . MustCompile ( "^" + regex + "$" )
}
// The %q format doubles backslashes, hence "{1,2}"
regexBackslash := regexp . MustCompile ( ` \\ { 1,2} ` )
emptyCount := 0
maxEmpty := 0
if b . golden {
maxEmpty = 2
}
result := make ( [ ] string , 0 , len ( lines ) )
for _ , s := range lines {
// Adapt file paths
s = pathReplacer . Replace ( strings . TrimSpace ( s ) )
// Apply regular expression replacements
for i := 0 ; i < len ( repFrom ) ; i ++ {
s = repFrom [ i ] . ReplaceAllString ( s , repTo [ i ] )
}
s = strings . TrimSpace ( s )
if s == dropMe {
continue
}
if fixSlash && regexFixSlash . MatchString ( s ) {
s = regexBackslash . ReplaceAllString ( s , "/" )
}
// Sort consecutive groups of naturally unordered lines.
// Any such group must end before the log ends or it might be lost.
absorbed := false
for i := 0 ; i < len ( dampers ) ; i ++ {
match := false
if s != "" && ! absorbed {
match = hoppers [ i ] . MatchString ( s )
}
if match {
dampers [ i ] = append ( dampers [ i ] , s )
absorbed = true
} else if len ( dampers [ i ] ) > 0 {
sort . Strings ( dampers [ i ] )
result = append ( result , dampers [ i ] ... )
dampers [ i ] = nil
}
}
if absorbed {
continue
}
// Skip empty lines unless storing to golden
if s == "" {
if emptyCount < maxEmpty {
result = append ( result , "" )
}
emptyCount ++
continue
}
result = append ( result , s )
emptyCount = 0
}
return joinLines ( result )
}
// mangleListing sorts listing lines before comparing.
2023-11-17 18:14:38 +01:00
func ( b * bisyncTest ) mangleListing ( text string , golden bool , file string ) string {
2021-05-16 18:39:33 +02:00
lines := strings . Split ( text , eol )
hasHeader := len ( lines ) > 0 && strings . HasPrefix ( lines [ 0 ] , bisync . ListingHeader )
if hasHeader {
lines = lines [ 1 : ]
}
// Split line in 4 groups: (flag, size)(hash.)( .id., .......modtime....... )(name).
regex := regexp . MustCompile ( ` ^([^ ] +\d+ )([^ ]+)( [^ ]+ [\d-]+T[\d:.]+[\d+-]+ )(".+")$ ` )
getFile := func ( s string ) string {
if match := regex . FindStringSubmatch ( strings . TrimSpace ( s ) ) ; match != nil {
if name , err := strconv . Unquote ( match [ 4 ] ) ; err == nil {
return name
}
}
return s
}
sort . SliceStable ( lines , func ( i , j int ) bool {
return getFile ( lines [ i ] ) < getFile ( lines [ j ] )
} )
2023-11-17 18:14:38 +01:00
// parse whether this is Path1 or Path2 (so we can apply per-Fs precision/hash settings)
isPath1 := strings . Contains ( file , ".path1.lst" )
f := b . fs2
if isPath1 {
f = b . fs1
}
// account for differences in backend features when comparing
2021-05-16 18:39:33 +02:00
if ! golden {
for i , s := range lines {
2023-11-17 18:14:38 +01:00
// Store hash as golden but ignore when comparing (only if no md5 support).
2021-05-16 18:39:33 +02:00
match := regex . FindStringSubmatch ( strings . TrimSpace ( s ) )
2023-11-17 18:14:38 +01:00
if match != nil && match [ 2 ] != "-" && ( ! b . fs1 . Hashes ( ) . Contains ( hash . MD5 ) || ! b . fs2 . Hashes ( ) . Contains ( hash . MD5 ) ) { // if hash is not empty and either side lacks md5
lines [ i ] = match [ 1 ] + "-" + match [ 3 ] + match [ 4 ] // replace it with "-" for comparison purposes (see #5679)
}
// account for modtime precision
var lineRegex = regexp . MustCompile ( ` ^(\S) +(-?\d+) (\S+) (\S+) (\d { 4}-\d\d-\d\dT\d\d:\d\d:\d\d\.\d { 9}[+-]\d { 4}) (".+")$ ` )
const timeFormat = "2006-01-02T15:04:05.000000000-0700"
const lineFormat = "%s %8d %s %s %s %q\n"
var TZ = time . UTC
fields := lineRegex . FindStringSubmatch ( strings . TrimSuffix ( lines [ i ] , "\n" ) )
if fields != nil {
sizeVal , sizeErr := strconv . ParseInt ( fields [ 2 ] , 10 , 64 )
if sizeErr == nil {
// account for filename encoding differences by normalizing to OS encoding
fields [ 6 ] = normalizeEncoding ( fields [ 6 ] )
timeStr := fields [ 5 ]
if f . Precision ( ) == fs . ModTimeNotSupported {
lines [ i ] = fmt . Sprintf ( lineFormat , fields [ 1 ] , sizeVal , fields [ 3 ] , fields [ 4 ] , "-" , fields [ 6 ] )
continue
}
timeVal , timeErr := time . ParseInLocation ( timeFormat , timeStr , TZ )
if timeErr == nil {
timeRound := timeVal . Round ( f . Precision ( ) * 2 )
lines [ i ] = fmt . Sprintf ( lineFormat , fields [ 1 ] , sizeVal , fields [ 3 ] , fields [ 4 ] , timeRound , fields [ 6 ] )
}
}
2021-05-16 18:39:33 +02:00
}
}
}
text = joinLines ( lines )
if hasHeader && golden {
text = bisync . ListingHeader + " test\n" + text
}
return text
}
// joinLines joins text lines dropping empty lines at the beginning and at the end
func joinLines ( lines [ ] string ) string {
text := strings . Join ( lines , eol )
text = strings . TrimLeft ( text , eol )
text = strings . TrimRight ( text , eol )
if text != "" {
text += eol
}
return text
}
// newReplacer can create two kinds of string replacers.
// If mangle is false, it will substitute macros in test scenario.
// If true then mangle paths in test log to match with golden log.
func ( b * bisyncTest ) newReplacer ( mangle bool ) * strings . Replacer {
if ! mangle {
rep := [ ] string {
"{datadir/}" , b . dataDir + slash ,
"{testdir/}" , b . testDir + slash ,
"{workdir/}" , b . workDir + slash ,
"{path1/}" , b . path1 ,
"{path2/}" , b . path2 ,
"{session}" , b . sessionName ,
"{/}" , slash ,
}
return strings . NewReplacer ( rep ... )
}
rep := [ ] string {
b . dataDir + slash , "{datadir/}" ,
b . testDir + slash , "{testdir/}" ,
b . workDir + slash , "{workdir/}" ,
2023-11-17 18:14:38 +01:00
b . fs1 . String ( ) , "{path1String}" ,
b . fs2 . String ( ) , "{path2String}" ,
2021-05-16 18:39:33 +02:00
b . path1 , "{path1/}" ,
b . path2 , "{path2/}" ,
2023-07-11 10:35:01 +02:00
"//?/" + strings . TrimSuffix ( strings . Replace ( b . path1 , slash , "/" , - 1 ) , "/" ) , "{path1}" , // fix windows-specific issue
"//?/" + strings . TrimSuffix ( strings . Replace ( b . path2 , slash , "/" , - 1 ) , "/" ) , "{path2}" ,
strings . TrimSuffix ( b . path1 , slash ) , "{path1}" , // ensure it's still recognized without trailing slash
strings . TrimSuffix ( b . path2 , slash ) , "{path2}" ,
2023-11-11 06:34:41 +01:00
b . workDir , "{workdir}" ,
2021-05-16 18:39:33 +02:00
b . sessionName , "{session}" ,
}
if fixSlash {
prep := [ ] string { }
for i := 0 ; i < len ( rep ) ; i += 2 {
// A hack for backslashes doubled by the go format "%q".
doubled := strings . ReplaceAll ( rep [ i ] , "\\" , "\\\\" )
if rep [ i ] != doubled {
prep = append ( prep , doubled , rep [ i + 1 ] )
}
}
// Put longer patterns first to ensure correct translation.
rep = append ( prep , rep ... )
}
return strings . NewReplacer ( rep ... )
}
// toGolden makes a result file name golden.
// It replaces each canonical path separately instead of using the
// session name to allow for subdirs in the extended-char-paths case.
func ( b * bisyncTest ) toGolden ( name string ) string {
name = strings . ReplaceAll ( name , b . canonPath1 , goldenCanonBase )
name = strings . ReplaceAll ( name , b . canonPath2 , goldenCanonBase )
name = strings . TrimSuffix ( name , ".sav" )
2023-10-01 10:12:39 +02:00
// normalize unicode so tets are runnable on macOS
name = norm . NFC . String ( name )
2021-05-16 18:39:33 +02:00
return name
}
func ( b * bisyncTest ) mkdir ( dir string ) {
require . NoError ( b . t , os . MkdirAll ( dir , os . ModePerm ) )
}
func ( b * bisyncTest ) ensureDir ( parent , dir string , optional bool ) string {
path := filepath . Join ( parent , dir )
if ! optional {
info , err := os . Stat ( path )
require . NoError ( b . t , err , "%s must exist" , path )
require . True ( b . t , info . IsDir ( ) , "%s must be a directory" , path )
}
return path
}
func ( b * bisyncTest ) listDir ( dir string ) ( names [ ] string ) {
2022-08-20 16:38:02 +02:00
files , err := os . ReadDir ( dir )
2021-05-16 18:39:33 +02:00
require . NoError ( b . t , err )
2023-10-06 22:38:47 +02:00
ignoreIt := func ( file string ) bool {
ignoreList := [ ] string {
// ".lst-control", ".lst-dry-control", ".lst-old", ".lst-dry-old",
".DS_Store" }
for _ , s := range ignoreList {
if strings . Contains ( file , s ) {
return true
}
}
return false
}
2021-05-16 18:39:33 +02:00
for _ , file := range files {
2023-10-06 22:38:47 +02:00
if ignoreIt ( file . Name ( ) ) {
2023-10-01 10:12:39 +02:00
continue
}
names = append ( names , filepath . Base ( norm . NFC . String ( file . Name ( ) ) ) )
2021-05-16 18:39:33 +02:00
}
// Sort files to ensure comparability.
sort . Strings ( names )
return
}
// fileType detects test artifact type.
// Notes:
// - "filtersfile.txt" will NOT be recognized as a filters file
// - only "test.log" will be recognized as a test log file
func fileType ( fileName string ) string {
if fileName == logFileName {
return "log"
}
switch filepath . Ext ( fileName ) {
2023-10-06 22:38:47 +02:00
case ".lst" , ".lst-new" , ".lst-err" , ".lst-dry" , ".lst-dry-new" , ".lst-old" , ".lst-dry-old" , ".lst-control" , ".lst-dry-control" :
2021-05-16 18:39:33 +02:00
return "listing"
case ".que" :
return "queue"
case ".lck" :
return "lock"
case ".flt" :
return "filters"
}
if strings . HasSuffix ( fileName , ".flt.md5" ) {
return "filters"
}
return "other"
}
// logPrintf prints a message to stdout and to the test log
func ( b * bisyncTest ) logPrintf ( text string , args ... interface { } ) {
line := fmt . Sprintf ( text , args ... )
log . Print ( line )
if b . logFile != nil {
_ , err := fmt . Fprintln ( b . logFile , line )
require . NoError ( b . t , err , "writing log file" )
}
}
2023-11-17 18:14:38 +01:00
// account for filename encoding differences between remotes by normalizing to OS encoding
func normalizeEncoding ( s string ) string {
if s == "" || s == "." {
return s
}
nameVal , err := strconv . Unquote ( s )
if err != nil {
nameVal = s
}
nameVal = filepath . Clean ( nameVal )
nameVal = encoder . OS . FromStandardPath ( nameVal )
return strconv . Quote ( encoder . OS . ToStandardPath ( filepath . ToSlash ( nameVal ) ) )
}
func stringToHash ( s string ) string {
ht := hash . MD5
hasher , err := hash . NewMultiHasherTypes ( hash . NewHashSet ( ht ) )
if err != nil {
fs . Errorf ( s , "hash unsupported: %v" , err )
}
_ , err = hasher . Write ( [ ] byte ( s ) )
if err != nil {
fs . Errorf ( s , "failed to write to hasher: %v" , err )
}
sum , err := hasher . SumString ( ht , false )
if err != nil {
fs . Errorf ( s , "hasher returned an error: %v" , err )
}
return sum
}