sftp: add support for more hashes (crc32, sha256, blake3, xxh3, xxh128)

This commit is contained in:
albertony 2024-04-15 11:35:37 +02:00
parent 4477b8774a
commit a30b32de9a
2 changed files with 230 additions and 82 deletions

View File

@ -216,15 +216,45 @@ E.g. the second example above should be rewritten as:
Help: "Windows Command Prompt", Help: "Windows Command Prompt",
}, },
}, },
}, {
Name: "hashes",
Help: `Comma separated list of supported checksum types.`,
Default: fs.CommaSepList{},
Advanced: true,
}, { }, {
Name: "md5sum_command", Name: "md5sum_command",
Default: "", Default: "",
Help: "The command used to read md5 hashes.\n\nLeave blank for autodetect.", Help: "The command used to read MD5 hashes.\n\nLeave blank for autodetect.",
Advanced: true, Advanced: true,
}, { }, {
Name: "sha1sum_command", Name: "sha1sum_command",
Default: "", Default: "",
Help: "The command used to read sha1 hashes.\n\nLeave blank for autodetect.", Help: "The command used to read SHA-1 hashes.\n\nLeave blank for autodetect.",
Advanced: true,
}, {
Name: "crc32sum_command",
Default: "",
Help: "The command used to read CRC-32 hashes.\n\nLeave blank for autodetect.",
Advanced: true,
}, {
Name: "sha256sum_command",
Default: "",
Help: "The command used to read SHA-256 hashes.\n\nLeave blank for autodetect.",
Advanced: true,
}, {
Name: "blake3sum_command",
Default: "",
Help: "The command used to read BLAKE3 hashes.\n\nLeave blank for autodetect.",
Advanced: true,
}, {
Name: "xxh3sum_command",
Default: "",
Help: "The command used to read XXH3 hashes.\n\nLeave blank for autodetect.",
Advanced: true,
}, {
Name: "xxh128sum_command",
Default: "",
Help: "The command used to read XXH128 hashes.\n\nLeave blank for autodetect.",
Advanced: true, Advanced: true,
}, { }, {
Name: "skip_links", Name: "skip_links",
@ -520,8 +550,14 @@ type Options struct {
PathOverride string `config:"path_override"` PathOverride string `config:"path_override"`
SetModTime bool `config:"set_modtime"` SetModTime bool `config:"set_modtime"`
ShellType string `config:"shell_type"` ShellType string `config:"shell_type"`
Hashes fs.CommaSepList `config:"hashes"`
Md5sumCommand string `config:"md5sum_command"` Md5sumCommand string `config:"md5sum_command"`
Sha1sumCommand string `config:"sha1sum_command"` Sha1sumCommand string `config:"sha1sum_command"`
Crc32sumCommand string `config:"crc32sum_command"`
Sha256sumCommand string `config:"sha256sum_command"`
Blake3sumCommand string `config:"blake3sum_command"`
Xxh3sumCommand string `config:"xxh3sum_command"`
Xxh128sumCommand string `config:"xxh128sum_command"`
SkipLinks bool `config:"skip_links"` SkipLinks bool `config:"skip_links"`
Subsystem string `config:"subsystem"` Subsystem string `config:"subsystem"`
ServerCommand string `config:"server_command"` ServerCommand string `config:"server_command"`
@ -568,13 +604,18 @@ type Fs struct {
// Object is a remote SFTP file that has been stat'd (so it exists, but is not necessarily open for reading) // Object is a remote SFTP file that has been stat'd (so it exists, but is not necessarily open for reading)
type Object struct { type Object struct {
fs *Fs fs *Fs
remote string remote string
size int64 // size of the object size int64 // size of the object
modTime uint32 // modification time of the object as unix time modTime uint32 // modification time of the object as unix time
mode os.FileMode // mode bits from the file mode os.FileMode // mode bits from the file
md5sum *string // Cached MD5 checksum md5sum *string // Cached MD5 checksum
sha1sum *string // Cached SHA1 checksum sha1sum *string // Cached SHA-1 checksum
crc32sum *string // Cached CRC-32 checksum
sha256sum *string // Cached SHA-256 checksum
blake3sum *string // Cached BLAKE3 checksum
xxh3sum *string // Cached XXH3 checksum
xxh128sum *string // Cached XXH128 checksum
} }
// conn encapsulates an ssh client and corresponding sftp client // conn encapsulates an ssh client and corresponding sftp client
@ -1609,14 +1650,105 @@ func (f *Fs) Hashes() hash.Set {
return *f.cachedHashes return *f.cachedHashes
} }
hashSet := hash.NewHashSet() hashTypesSupported := hash.NewHashSet()
f.cachedHashes = &hashSet f.cachedHashes = &hashTypesSupported
if f.opt.DisableHashCheck || f.shellType == shellTypeNotSupported { if f.opt.DisableHashCheck || f.shellType == shellTypeNotSupported {
return hashSet return hashTypesSupported
}
hashTypes := hash.NewHashSet()
if len(f.opt.Hashes) > 0 {
for _, hashName := range f.opt.Hashes {
var hashType hash.Type
if err := hashType.Set(hashName); err != nil {
fs.Infof(nil, "Invalid token %q in hash string %q", hashName, f.opt.Hashes.String())
}
hashTypes.Add(hashType)
}
} else {
hashTypes.Add(hash.MD5, hash.SHA1)
}
hashCommands := map[hash.Type]struct {
option *string
emptyHash string
hashCommands []struct{ hashFile, hashEmpty string }
}{
hash.MD5: {
&f.opt.Md5sumCommand,
"d41d8cd98f00b204e9800998ecf8427e",
[]struct{ hashFile, hashEmpty string }{
{"md5sum", "md5sum"},
{"md5 -r", "md5 -r"},
{"rclone md5sum", "rclone md5sum"},
},
},
hash.SHA1: {
&f.opt.Sha1sumCommand,
"da39a3ee5e6b4b0d3255bfef95601890afd80709",
[]struct{ hashFile, hashEmpty string }{
{"sha1sum", "sha1sum"},
{"sha1 -r", "sha1 -r"},
{"rclone sha1sum", "rclone sha1sum"},
},
},
hash.CRC32: {
&f.opt.Sha1sumCommand,
"00000000",
[]struct{ hashFile, hashEmpty string }{
{"crc32", "crc32"},
{"rclone hashsum crc32", "rclone hashsum crc32"},
},
},
hash.SHA256: {
&f.opt.Sha256sumCommand,
"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
[]struct{ hashFile, hashEmpty string }{
{"sha256sum", "sha1sum"},
{"sha256 -r", "sha1 -r"},
{"rclone hashsum sha256", "rclone hashsum sha256"},
},
},
hash.BLAKE3: {
&f.opt.Blake3sumCommand,
"af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262",
[]struct{ hashFile, hashEmpty string }{
{"b3sum", "b3sum"},
{"rclone hashsum blake3", "rclone hashsum blake3"},
},
},
hash.XXH3: {
&f.opt.Xxh3sumCommand,
"2d06800538d394c2",
[]struct{ hashFile, hashEmpty string }{
{"xxhsum -H3", "xxhsum -H3"},
{"rclone hashsum xxh3", "rclone hashsum xxh3"},
},
},
hash.XXH128: {
&f.opt.Xxh128sumCommand,
"99aa06d3014798d86001c324468d497f",
[]struct{ hashFile, hashEmpty string }{
{"xxhsum -H2", "xxhsum -H2"},
{"rclone hashsum xxh128", "rclone hashsum xxh128"},
},
},
}
if f.shellType == "powershell" {
for _, hashType := range []hash.Type{hash.MD5, hash.SHA1, hash.SHA256} {
if entry, ok := hashCommands[hashType]; ok {
entry.hashCommands = append(hashCommands[hashType].hashCommands, struct {
hashFile, hashEmpty string
}{
fmt.Sprintf("&{param($Path);Get-FileHash -Algorithm %v -LiteralPath $Path -ErrorAction Stop|Select-Object -First 1 -ExpandProperty Hash|ForEach-Object{\"$($_.ToLower()) ${Path}\"}}", hashType),
fmt.Sprintf("Get-FileHash -Algorithm %v -InputStream ([System.IO.MemoryStream]::new()) -ErrorAction Stop|Select-Object -First 1 -ExpandProperty Hash|ForEach-Object{$_.ToLower()}", hashType),
})
hashCommands[hashType] = entry
}
}
} }
// look for a hash command which works
checkHash := func(hashType hash.Type, commands []struct{ hashFile, hashEmpty string }, expected string, hashCommand *string, changed *bool) bool { checkHash := func(hashType hash.Type, commands []struct{ hashFile, hashEmpty string }, expected string, hashCommand *string, changed *bool) bool {
if *hashCommand == hashCommandNotSupported { if *hashCommand == hashCommandNotSupported {
return false return false
@ -1645,55 +1777,25 @@ func (f *Fs) Hashes() hash.Set {
} }
changed := false changed := false
md5Commands := []struct { for _, hashType := range hashTypes.Array() {
hashFile, hashEmpty string if entry, ok := hashCommands[hashType]; ok {
}{ if works := checkHash(hashType, entry.hashCommands, entry.emptyHash, entry.option, &changed); works {
{"md5sum", "md5sum"}, hashTypesSupported.Add(hashType)
{"md5 -r", "md5 -r"}, }
{"rclone md5sum", "rclone md5sum"}, }
} }
sha1Commands := []struct {
hashFile, hashEmpty string
}{
{"sha1sum", "sha1sum"},
{"sha1 -r", "sha1 -r"},
{"rclone sha1sum", "rclone sha1sum"},
}
if f.shellType == "powershell" {
md5Commands = append(md5Commands, struct {
hashFile, hashEmpty string
}{
"&{param($Path);Get-FileHash -Algorithm MD5 -LiteralPath $Path -ErrorAction Stop|Select-Object -First 1 -ExpandProperty Hash|ForEach-Object{\"$($_.ToLower()) ${Path}\"}}",
"Get-FileHash -Algorithm MD5 -InputStream ([System.IO.MemoryStream]::new()) -ErrorAction Stop|Select-Object -First 1 -ExpandProperty Hash|ForEach-Object{$_.ToLower()}",
})
sha1Commands = append(sha1Commands, struct {
hashFile, hashEmpty string
}{
"&{param($Path);Get-FileHash -Algorithm SHA1 -LiteralPath $Path -ErrorAction Stop|Select-Object -First 1 -ExpandProperty Hash|ForEach-Object{\"$($_.ToLower()) ${Path}\"}}",
"Get-FileHash -Algorithm SHA1 -InputStream ([System.IO.MemoryStream]::new()) -ErrorAction Stop|Select-Object -First 1 -ExpandProperty Hash|ForEach-Object{$_.ToLower()}",
})
}
md5Works := checkHash(hash.MD5, md5Commands, "d41d8cd98f00b204e9800998ecf8427e", &f.opt.Md5sumCommand, &changed)
sha1Works := checkHash(hash.SHA1, sha1Commands, "da39a3ee5e6b4b0d3255bfef95601890afd80709", &f.opt.Sha1sumCommand, &changed)
if changed { if changed {
// Save permanently in config to avoid the extra work next time // Save permanently in config to avoid the extra work next time
fs.Debugf(f, "Setting hash command for %v to %q (set sha1sum_command to override)", hash.MD5, f.opt.Md5sumCommand) for _, hashType := range hashTypes.Array() {
f.m.Set("md5sum_command", f.opt.Md5sumCommand) if entry, ok := hashCommands[hashType]; ok {
fs.Debugf(f, "Setting hash command for %v to %q (set md5sum_command to override)", hash.SHA1, f.opt.Sha1sumCommand) fs.Debugf(f, "Setting hash command for %v to %q (set %vsum_command to override)", hashType, *entry.option, hashType)
f.m.Set("sha1sum_command", f.opt.Sha1sumCommand) f.m.Set(fmt.Sprintf("%vsum_command", hashType), *entry.option)
}
}
} }
if sha1Works { return hashTypesSupported
hashSet.Add(hash.SHA1)
}
if md5Works {
hashSet.Add(hash.MD5)
}
return hashSet
} }
// About gets usage stats // About gets usage stats
@ -1828,17 +1930,43 @@ func (o *Object) Hash(ctx context.Context, r hash.Type) (string, error) {
_ = o.fs.Hashes() _ = o.fs.Hashes()
var hashCmd string var hashCmd string
if r == hash.MD5 { switch r {
case hash.MD5:
if o.md5sum != nil { if o.md5sum != nil {
return *o.md5sum, nil return *o.md5sum, nil
} }
hashCmd = o.fs.opt.Md5sumCommand hashCmd = o.fs.opt.Md5sumCommand
} else if r == hash.SHA1 { case hash.SHA1:
if o.sha1sum != nil { if o.sha1sum != nil {
return *o.sha1sum, nil return *o.sha1sum, nil
} }
hashCmd = o.fs.opt.Sha1sumCommand hashCmd = o.fs.opt.Sha1sumCommand
} else { case hash.CRC32:
if o.crc32sum != nil {
return *o.crc32sum, nil
}
hashCmd = o.fs.opt.Crc32sumCommand
case hash.SHA256:
if o.sha256sum != nil {
return *o.sha256sum, nil
}
hashCmd = o.fs.opt.Sha256sumCommand
case hash.BLAKE3:
if o.blake3sum != nil {
return *o.blake3sum, nil
}
hashCmd = o.fs.opt.Blake3sumCommand
case hash.XXH3:
if o.xxh3sum != nil {
return *o.xxh3sum, nil
}
hashCmd = o.fs.opt.Xxh3sumCommand
case hash.XXH128:
if o.xxh128sum != nil {
return *o.xxh128sum, nil
}
hashCmd = o.fs.opt.Xxh128sumCommand
default:
return "", hash.ErrUnsupported return "", hash.ErrUnsupported
} }
if hashCmd == "" || hashCmd == hashCommandNotSupported { if hashCmd == "" || hashCmd == hashCommandNotSupported {
@ -1855,10 +1983,21 @@ func (o *Object) Hash(ctx context.Context, r hash.Type) (string, error) {
} }
hashString := parseHash(outBytes) hashString := parseHash(outBytes)
fs.Debugf(o, "Parsed hash: %s", hashString) fs.Debugf(o, "Parsed hash: %s", hashString)
if r == hash.MD5 { switch r {
case hash.MD5:
o.md5sum = &hashString o.md5sum = &hashString
} else if r == hash.SHA1 { case hash.SHA1:
o.sha1sum = &hashString o.sha1sum = &hashString
case hash.CRC32:
o.crc32sum = &hashString
case hash.SHA256:
o.sha256sum = &hashString
case hash.BLAKE3:
o.blake3sum = &hashString
case hash.XXH3:
o.xxh3sum = &hashString
case hash.XXH128:
o.xxh128sum = &hashString
} }
return hashString, nil return hashString, nil
} }
@ -1923,7 +2062,7 @@ func (f *Fs) remoteShellPath(remote string) string {
} }
// Converts a byte array from the SSH session returned by // Converts a byte array from the SSH session returned by
// an invocation of md5sum/sha1sum to a hash string // an invocation of hash command to a hash string
// as expected by the rest of this application // as expected by the rest of this application
func parseHash(bytes []byte) string { func parseHash(bytes []byte) string {
// For strings with backslash *sum writes a leading \ // For strings with backslash *sum writes a leading \
@ -2152,6 +2291,11 @@ func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, op
// Clear the hash cache since we are about to update the object // Clear the hash cache since we are about to update the object
o.md5sum = nil o.md5sum = nil
o.sha1sum = nil o.sha1sum = nil
o.crc32sum = nil
o.sha256sum = nil
o.blake3sum = nil
o.xxh3sum = nil
o.xxh128sum = nil
c, err := o.fs.getSftpConnection(ctx) c, err := o.fs.getSftpConnection(ctx)
if err != nil { if err != nil {
return fmt.Errorf("Update: %w", err) return fmt.Errorf("Update: %w", err)

View File

@ -318,29 +318,30 @@ is able to use checksumming if the same login has shell access,
and can execute remote commands. If there is a command that can and can execute remote commands. If there is a command that can
calculate compatible checksums on the remote system, Rclone can calculate compatible checksums on the remote system, Rclone can
then be configured to execute this whenever a checksum is needed, then be configured to execute this whenever a checksum is needed,
and read back the results. Currently MD5 and SHA-1 are supported. and read back the results. By default MD5 and SHA-1 are considered,
but also CRC32, SHA-256, BLAKE3, XXH3 and XXH128 are supported,
option `hashes` can be set to specify which to consider.
Normally this requires an external utility being available on Normally this requires an external utility being available on
the server. By default rclone will try commands `md5sum`, `md5` the server. E.g. for MD5 checksums, by default rclone will try commands
and `rclone md5sum` for MD5 checksums, and the first one found usable `md5sum`, `md5` and `rclone md5sum`, and the first one found
will be picked. Same with `sha1sum`, `sha1` and `rclone sha1sum` usable will be picked. These utilities normally need to be in the
commands for SHA-1 checksums. These utilities normally need to remote's PATH to be found.
be in the remote's PATH to be found.
In some cases the shell itself is capable of calculating checksums. In some cases the shell itself is capable of calculating checksums.
PowerShell is an example of such a shell. If rclone detects that the PowerShell is an example of such a shell. If rclone detects that the
remote shell is PowerShell, which means it most probably is a remote shell is PowerShell, which means it most probably is a
Windows OpenSSH server, rclone will use a predefined script block Windows OpenSSH server, rclone will use a predefined script block
to produce the checksums when no external checksum commands are found to produce the checksums for MD5, SHA-1 and SHA-256 when no external
(see [shell access](#shell-access)). This assumes PowerShell version checksum commands are found (see [shell access](#shell-access)). This
4.0 or newer. assumes PowerShell version 4.0 or newer.
The options `md5sum_command` and `sha1_command` can be used to customize The options `md5sum_command`, `sha1_command`, etc. can be used to customize
the command to be executed for calculation of checksums. You can for the commands to be executed for calculation of checksums. You can for
example set a specific path to where md5sum and sha1sum executables example set a specific path to where the md5sum executable are located,
are located, or use them to specify some other tools that print checksums or specify some other tool that print checksums in compatible format.
in compatible format. The value can include command-line arguments, The value can include command-line arguments, or even shell script blocks
or even shell script blocks as with PowerShell. Rclone has subcommands as with PowerShell. Rclone has subcommands [hashsum](/commands/rclone_hashsum/),
[md5sum](/commands/rclone_md5sum/) and [sha1sum](/commands/rclone_sha1sum/) [md5sum](/commands/rclone_md5sum/) and [sha1sum](/commands/rclone_sha1sum/)
that use compatible format, which means if you have an rclone executable that use compatible format, which means if you have an rclone executable
on the server it can be used. As mentioned above, they will be automatically on the server it can be used. As mentioned above, they will be automatically
@ -356,11 +357,14 @@ configuration, so next time it will use the same. Value `none`
will be set if none of the default commands could be used for a specific will be set if none of the default commands could be used for a specific
algorithm, and this algorithm will not be supported by the remote. algorithm, and this algorithm will not be supported by the remote.
Disabling the checksumming may be required if you are connecting to SFTP servers Disabling the checksumming completely may be required if you are connecting to
which are not under your control, and to which the execution of remote shell SFTP servers which are not under your control, and to which the execution of
commands is prohibited. Set the configuration option `disable_hashcheck` remote shell commands is prohibited. Set the configuration option `disable_hashcheck`
to `true` to disable checksumming entirely, or set `shell_type` to `none` to `true` to disable checksumming entirely (you get the same effect by setting
to disable all functionality based on remote shell command execution. option `hashes` to `none` or options `md5sum_command`, `sha1_command` etc.
to `none`). Set option `shell_type` to `none` to not only disable checksumming,
but also disable all other functionality that are based on remote shell command
execution.
### Modification times and hashes ### Modification times and hashes