Add a download flag to hashsum and related commands to force rclone to download and hash files locally

This commit modifies the operations.hashSum function by adding an alternate code path. This code path is triggered by passing downloadFlag = True. When activated, rclone will download files from the remote and hash them locally. downloadFlag = False preserves the existing behavior of using the remote to retrieve the hash.

This commit modifies HashLister to support the new hashSum method as well as consolidating the roles of HashLister, HashListerBase64, Md5sum, and Sha1sum.  The printing of hashes from the function defined in HashLister has been revised to work with --progress.  There are light changes to operations.syncFprintf and cmd.startProgress for this.

The unit test operations_test.TestHashSums is modified to support this change and test the download functionality.

The command functions hashsum, md5sum, sha1sum, and dbhashsum are modified to support this change.  A download flag has been added and an output-file flag has been added.  The output-file flag writes hashes to a file instead of stdout to avoid the need to redirect stdout.
This commit is contained in:
lostheli 2020-12-18 07:45:58 -05:00 committed by Nick Craig-Wood
parent ed7af3f370
commit c8cfa43ccc
10 changed files with 319 additions and 75 deletions

View File

@ -2,10 +2,10 @@ package dbhashsum
import (
"context"
"os"
"github.com/rclone/rclone/backend/dropbox"
"github.com/rclone/rclone/cmd"
"github.com/rclone/rclone/cmd/hashsum"
"github.com/rclone/rclone/fs"
"github.com/rclone/rclone/fs/operations"
"github.com/spf13/cobra"
@ -13,6 +13,8 @@ import (
func init() {
cmd.Root.AddCommand(commandDefinition)
cmdFlags := commandDefinition.Flags()
hashsum.AddHashFlags(cmdFlags)
}
var commandDefinition = &cobra.Command{
@ -23,6 +25,11 @@ Produces a Dropbox hash file for all the objects in the path. The
hashes are calculated according to [Dropbox content hash
rules](https://www.dropbox.com/developers/reference/content-hash).
The output is in the same format as md5sum and sha1sum.
By default, the hash is requested from the remote. If Dropbox hash is
not supported by the remote, no hash will be returned. With the
download flag, the file will be downloaded from the remote and
hashed locally enabling Dropbox hash for any remote.
`,
Hidden: true,
Run: func(command *cobra.Command, args []string) {
@ -30,7 +37,15 @@ The output is in the same format as md5sum and sha1sum.
fsrc := cmd.NewFsSrc(args)
fs.Logf(nil, `"rclone dbhashsum" is deprecated, use "rclone hashsum %v %s" instead`, dropbox.DbHashType, args[0])
cmd.Run(false, false, command, func() error {
return operations.HashLister(context.Background(), dropbox.DbHashType, fsrc, os.Stdout)
if hashsum.HashsumOutfile == "" {
return operations.HashLister(context.Background(), dropbox.DbHashType, hashsum.OutputBase64, hashsum.DownloadFlag, fsrc, nil)
}
output, close, err := hashsum.GetHashsumOutput(hashsum.HashsumOutfile)
if err != nil {
return err
}
defer close()
return operations.HashLister(context.Background(), dropbox.DbHashType, hashsum.OutputBase64, hashsum.DownloadFlag, fsrc, output)
})
},
}

View File

@ -2,25 +2,55 @@ package hashsum
import (
"context"
"errors"
"fmt"
"os"
"github.com/pkg/errors"
"github.com/rclone/rclone/cmd"
"github.com/rclone/rclone/fs"
"github.com/rclone/rclone/fs/config/flags"
"github.com/rclone/rclone/fs/hash"
"github.com/rclone/rclone/fs/operations"
"github.com/spf13/cobra"
"github.com/spf13/pflag"
)
// Global hashsum flags for reuse in md5sum, sha1sum, and dbhashsum
var (
outputBase64 = false
OutputBase64 = false
DownloadFlag = false
HashsumOutfile = ""
)
func init() {
cmd.Root.AddCommand(commandDefinition)
cmdFlags := commandDefinition.Flags()
flags.BoolVarP(cmdFlags, &outputBase64, "base64", "", outputBase64, "Output base64 encoded hashsum")
AddHashFlags(cmdFlags)
}
// AddHashFlags is a convenience function to add the command flags OutputBase64 and DownloadFlag to hashsum, md5sum, sha1sum, and dbhashsum
func AddHashFlags(cmdFlags *pflag.FlagSet) {
flags.BoolVarP(cmdFlags, &OutputBase64, "base64", "", OutputBase64, "Output base64 encoded hashsum")
flags.StringVarP(cmdFlags, &HashsumOutfile, "output-file", "", HashsumOutfile, "Output hashsums to a file rather than the terminal")
flags.BoolVarP(cmdFlags, &DownloadFlag, "download", "", DownloadFlag, "Download the file and hash it locally; if this flag is not specified, the hash is requested from the remote")
}
// GetHashsumOutput opens and closes the output file when using the output-file flag
func GetHashsumOutput(filename string) (out *os.File, close func(), err error) {
out, err = os.Create(filename)
if err != nil {
err = errors.Wrapf(err, "Failed to open output file %v", filename)
return nil, nil, err
}
close = func() {
err := out.Close()
if err != nil {
fs.Errorf(nil, "Failed to close output file %v: %v", filename, err)
}
}
return out, close, nil
}
var commandDefinition = &cobra.Command{
@ -31,7 +61,12 @@ Produces a hash file for all the objects in the path using the hash
named. The output is in the same format as the standard
md5sum/sha1sum tool.
Run without a hash to see the list of supported hashes, e.g.
By default, the hash is requested from the remote. If the hash is
not supported by the remote, no hash will be returned. With the
download flag, the file will be downloaded from the remote and
hashed locally enabling any hash for any remote.
Run without a hash to see the list of all supported hashes, e.g.
$ rclone hashsum
Supported hashes are:
@ -61,11 +96,17 @@ Then
return err
}
fsrc := cmd.NewFsSrc(args[1:])
cmd.Run(false, false, command, func() error {
if outputBase64 {
return operations.HashListerBase64(context.Background(), ht, fsrc, os.Stdout)
if HashsumOutfile == "" {
return operations.HashLister(context.Background(), ht, OutputBase64, DownloadFlag, fsrc, nil)
}
return operations.HashLister(context.Background(), ht, fsrc, os.Stdout)
output, close, err := GetHashsumOutput(HashsumOutfile)
if err != nil {
return err
}
defer close()
return operations.HashLister(context.Background(), ht, OutputBase64, DownloadFlag, fsrc, output)
})
return nil
},

View File

@ -2,15 +2,18 @@ package md5sum
import (
"context"
"os"
"github.com/rclone/rclone/cmd"
"github.com/rclone/rclone/cmd/hashsum"
"github.com/rclone/rclone/fs/hash"
"github.com/rclone/rclone/fs/operations"
"github.com/spf13/cobra"
)
func init() {
cmd.Root.AddCommand(commandDefinition)
cmdFlags := commandDefinition.Flags()
hashsum.AddHashFlags(cmdFlags)
}
var commandDefinition = &cobra.Command{
@ -19,12 +22,25 @@ var commandDefinition = &cobra.Command{
Long: `
Produces an md5sum file for all the objects in the path. This
is in the same format as the standard md5sum tool produces.
By default, the hash is requested from the remote. If MD5 is
not supported by the remote, no hash will be returned. With the
download flag, the file will be downloaded from the remote and
hashed locally enabling MD5 for any remote.
`,
Run: func(command *cobra.Command, args []string) {
cmd.CheckArgs(1, 1, command, args)
fsrc := cmd.NewFsSrc(args)
cmd.Run(false, false, command, func() error {
return operations.Md5sum(context.Background(), fsrc, os.Stdout)
if hashsum.HashsumOutfile == "" {
return operations.HashLister(context.Background(), hash.MD5, hashsum.OutputBase64, hashsum.DownloadFlag, fsrc, nil)
}
output, close, err := hashsum.GetHashsumOutput(hashsum.HashsumOutfile)
if err != nil {
return err
}
defer close()
return operations.HashLister(context.Background(), hash.MD5, hashsum.OutputBase64, hashsum.DownloadFlag, fsrc, output)
})
},
}

View File

@ -12,6 +12,7 @@ import (
"github.com/rclone/rclone/fs"
"github.com/rclone/rclone/fs/accounting"
"github.com/rclone/rclone/fs/log"
"github.com/rclone/rclone/fs/operations"
"github.com/rclone/rclone/lib/terminal"
)
@ -28,6 +29,8 @@ const (
func startProgress() func() {
stopStats := make(chan struct{})
oldLogPrint := fs.LogPrint
oldSyncPrint := operations.SyncPrintf
if !log.Redirected() {
// Intercept the log calls if not logging to file or syslog
fs.LogPrint = func(level fs.LogLevel, text string) {
@ -35,6 +38,12 @@ func startProgress() func() {
}
}
// Intercept output from functions such as HashLister to stdout
operations.SyncPrintf = func(format string, a ...interface{}) {
printProgress(fmt.Sprintf(format, a...))
}
var wg sync.WaitGroup
wg.Add(1)
go func() {
@ -52,6 +61,7 @@ func startProgress() func() {
ticker.Stop()
printProgress("")
fs.LogPrint = oldLogPrint
operations.SyncPrintf = oldSyncPrint
fmt.Println("")
return
}

View File

@ -2,15 +2,18 @@ package sha1sum
import (
"context"
"os"
"github.com/rclone/rclone/cmd"
"github.com/rclone/rclone/cmd/hashsum"
"github.com/rclone/rclone/fs/hash"
"github.com/rclone/rclone/fs/operations"
"github.com/spf13/cobra"
)
func init() {
cmd.Root.AddCommand(commandDefinition)
cmdFlags := commandDefinition.Flags()
hashsum.AddHashFlags(cmdFlags)
}
var commandDefinition = &cobra.Command{
@ -19,12 +22,25 @@ var commandDefinition = &cobra.Command{
Long: `
Produces an sha1sum file for all the objects in the path. This
is in the same format as the standard sha1sum tool produces.
By default, the hash is requested from the remote. If SHA-1 is
not supported by the remote, no hash will be returned. With the
download flag, the file will be downloaded from the remote and
hashed locally enabling SHA-1 for any remote.
`,
Run: func(command *cobra.Command, args []string) {
cmd.CheckArgs(1, 1, command, args)
fsrc := cmd.NewFsSrc(args)
cmd.Run(false, false, command, func() error {
return operations.Sha1sum(context.Background(), fsrc, os.Stdout)
if hashsum.HashsumOutfile == "" {
return operations.HashLister(context.Background(), hash.SHA1, hashsum.OutputBase64, hashsum.DownloadFlag, fsrc, nil)
}
output, close, err := hashsum.GetHashsumOutput(hashsum.HashsumOutfile)
if err != nil {
return err
}
defer close()
return operations.HashLister(context.Background(), hash.SHA1, hashsum.OutputBase64, hashsum.DownloadFlag, fsrc, output)
})
},
}

View File

@ -16,7 +16,12 @@ Produces a hash file for all the objects in the path using the hash
named. The output is in the same format as the standard
md5sum/sha1sum tool.
Run without a hash to see the list of supported hashes, eg
By default, the hash is requested from the remote. If the hash is
not supported by the remote, no hash will be returned. With the
download flag, the file will be downloaded from the remote and
hashed locally enabling any hash for any remote.
Run without a hash to see the list of all supported hashes, e.g.
$ rclone hashsum
Supported hashes are:
@ -38,7 +43,9 @@ rclone hashsum <hash> remote:path [flags]
```
--base64 Output base64 encoded hashsum
--download Download the file and hash it locally; if this flag is not specified, the hash is requested from the remote
-h, --help help for hashsum
--output-file string Output hashsums to a file rather than the terminal
```
See the [global flags page](/flags/) for global options not listed here.

View File

@ -15,6 +15,11 @@ Produces an md5sum file for all the objects in the path.
Produces an md5sum file for all the objects in the path. This
is in the same format as the standard md5sum tool produces.
By default, the hash is requested from the remote. If MD5 is
not supported by the remote, no hash will be returned. With the
download flag, the file will be downloaded from the remote and
hashed locally enabling MD5 for any remote.
```
rclone md5sum remote:path [flags]
@ -23,7 +28,10 @@ rclone md5sum remote:path [flags]
## Options
```
--base64 Output base64 encoded hashsum
--download Download the file and hash it locally; if this flag is not specified, the hash is requested from the remote
-h, --help help for md5sum
--output-file string Output hashsums to a file rather than the terminal
```
See the [global flags page](/flags/) for global options not listed here.

View File

@ -15,6 +15,11 @@ Produces an sha1sum file for all the objects in the path.
Produces an sha1sum file for all the objects in the path. This
is in the same format as the standard sha1sum tool produces.
By default, the hash is requested from the remote. If SHA-1 is
not supported by the remote, no hash will be returned. With the
download flag, the file will be downloaded from the remote and
hashed locally enabling SHA-1 for any remote.
```
rclone sha1sum remote:path [flags]
@ -23,7 +28,10 @@ rclone sha1sum remote:path [flags]
## Options
```
--base64 Output base64 encoded hashsum
--download Download the file and hash it locally; if this flag is not specified, the hash is requested from the remote
-h, --help help for sha1sum
--output-file string Output hashsums to a file rather than the terminal
```
See the [global flags page](/flags/) for global options not listed here.

View File

@ -797,14 +797,28 @@ func ListFn(ctx context.Context, f fs.Fs, fn func(fs.Object)) error {
// mutex for synchronized output
var outMutex sync.Mutex
// SyncPrintf is a global var holding the Printf function used in syncFprintf so that it can be overridden
// Note, despite name, does not provide sync and should not be called directly
// Call syncFprintf, which provides sync
var SyncPrintf = func(format string, a ...interface{}) {
fmt.Printf(format, a...)
}
// Synchronized fmt.Fprintf
//
// Ignores errors from Fprintf
//
// Updated to print to terminal if no writer is defined
// This special behavior is used to allow easier replacement of the print to terminal code by progress
func syncFprintf(w io.Writer, format string, a ...interface{}) {
outMutex.Lock()
defer outMutex.Unlock()
if w == nil {
SyncPrintf(format, a...)
} else {
_, _ = fmt.Fprintf(w, format, a...)
}
}
// List the Fs to the supplied writer
//
@ -833,63 +847,103 @@ func ListLong(ctx context.Context, f fs.Fs, w io.Writer) error {
})
}
// Md5sum list the Fs to the supplied writer
//
// Produces the same output as the md5sum command - obeys includes and
// excludes
//
// Lists in parallel which may get them out of order
func Md5sum(ctx context.Context, f fs.Fs, w io.Writer) error {
return HashLister(ctx, hash.MD5, f, w)
}
// Sha1sum list the Fs to the supplied writer
//
// Obeys includes and excludes
//
// Lists in parallel which may get them out of order
func Sha1sum(ctx context.Context, f fs.Fs, w io.Writer) error {
return HashLister(ctx, hash.SHA1, f, w)
}
// hashSum returns the human readable hash for ht passed in. This may
// be UNSUPPORTED or ERROR. If it isn't returning a valid hash it will
// return an error.
func hashSum(ctx context.Context, ht hash.Type, o fs.Object) (string, error) {
func hashSum(ctx context.Context, ht hash.Type, downloadFlag bool, o fs.Object) (string, error) {
var sum string
var err error
// If downloadFlag is true, download and hash the file.
// If downloadFlag is false, call o.Hash asking the remote for the hash
if downloadFlag {
// Setup: Define accounting, open the file with NewReOpen to provide restarts, account for the transfer, and setup a multi-hasher with the appropriate type
// Execution: io.Copy file to hasher, get hash and encode in hex
tr := accounting.Stats(ctx).NewTransfer(o)
defer func() {
tr.Done(ctx, err)
}()
// Open with NewReOpen to provide restarts
var options []fs.OpenOption
for _, option := range fs.GetConfig(ctx).DownloadHeaders {
options = append(options, option)
}
in, err := NewReOpen(ctx, o, fs.GetConfig(ctx).LowLevelRetries, options...)
if err != nil {
return "ERROR", errors.Wrapf(err, "Failed to open file %v", o)
}
// Account and buffer the transfer
in = tr.Account(ctx, in).WithBuffer()
// Setup hasher
hasher, err := hash.NewMultiHasherTypes(hash.NewHashSet(ht))
if err != nil {
return "UNSUPPORTED", errors.Wrap(err, "Hash unsupported")
}
// Copy to hasher, downloading the file and passing directly to hash
_, err = io.Copy(hasher, in)
if err != nil {
return "ERROR", errors.Wrap(err, "Failed to copy file to hasher")
}
// Get hash and encode as hex
byteSum, err := hasher.Sum(ht)
if err != nil {
return "ERROR", errors.Wrap(err, "Hasher returned an error")
}
sum = hex.EncodeToString(byteSum)
} else {
tr := accounting.Stats(ctx).NewCheckingTransfer(o)
defer func() {
tr.Done(ctx, err)
}()
sum, err := o.Hash(ctx, ht)
sum, err = o.Hash(ctx, ht)
if err == hash.ErrUnsupported {
sum = "UNSUPPORTED"
return "UNSUPPORTED", errors.Wrap(err, "Hash unsupported")
} else if err != nil {
fs.Debugf(o, "Failed to read %v: %v", ht, err)
sum = "ERROR"
return "ERROR", errors.Wrapf(err, "Failed to get hash %v from backed: %v", ht, err)
}
return sum, err
}
return sum, nil
}
// HashLister does an md5sum equivalent for the hash type passed in
func HashLister(ctx context.Context, ht hash.Type, f fs.Fs, w io.Writer) error {
return ListFn(ctx, f, func(o fs.Object) {
sum, _ := hashSum(ctx, ht, o)
syncFprintf(w, "%*s %s\n", hash.Width(ht), sum, o.Remote())
})
}
// HashListerBase64 does an md5sum equivalent for the hash type passed in with base64 encoded
func HashListerBase64(ctx context.Context, ht hash.Type, f fs.Fs, w io.Writer) error {
return ListFn(ctx, f, func(o fs.Object) {
sum, err := hashSum(ctx, ht, o)
if err == nil {
// Updated to handle both standard hex encoding and base64
// Updated to perform multiple hashes concurrently
func HashLister(ctx context.Context, ht hash.Type, outputBase64 bool, downloadFlag bool, f fs.Fs, w io.Writer) error {
concurrencyControl := make(chan struct{}, fs.GetConfig(ctx).Transfers)
var wg sync.WaitGroup
err := ListFn(ctx, f, func(o fs.Object) {
wg.Add(1)
concurrencyControl <- struct{}{}
go func() {
defer func() {
<-concurrencyControl
wg.Done()
}()
sum, err := hashSum(ctx, ht, downloadFlag, o)
if outputBase64 && err == nil {
hexBytes, _ := hex.DecodeString(sum)
sum = base64.URLEncoding.EncodeToString(hexBytes)
}
width := base64.URLEncoding.EncodedLen(hash.Width(ht) / 2)
syncFprintf(w, "%*s %s\n", width, sum, o.Remote())
} else {
syncFprintf(w, "%*s %s\n", hash.Width(ht), sum, o.Remote())
}
if err != nil {
err = fs.CountError(err)
fs.Errorf(o, "%v", err)
}
}()
})
wg.Wait()
return err
}
// Count counts the objects and their sizes in the Fs

View File

@ -193,10 +193,10 @@ func TestHashSums(t *testing.T) {
fstest.CheckItems(t, r.Fremote, file1, file2)
// MD5 Sum
// MD5 Sum without download
var buf bytes.Buffer
err := operations.Md5sum(ctx, r.Fremote, &buf)
err := operations.HashLister(ctx, hash.MD5, false, true, r.Fremote, &buf)
require.NoError(t, err)
res := buf.String()
if !strings.Contains(res, "336d5ebc5436534e61d16e63ddfca327 empty space\n") &&
@ -210,10 +210,27 @@ func TestHashSums(t *testing.T) {
t.Errorf("potato2 missing: %q", res)
}
// SHA1 Sum
// MD5 Sum with download
buf.Reset()
err = operations.Sha1sum(ctx, r.Fremote, &buf)
err = operations.HashLister(ctx, hash.MD5, false, true, r.Fremote, &buf)
require.NoError(t, err)
res = buf.String()
if !strings.Contains(res, "336d5ebc5436534e61d16e63ddfca327 empty space\n") &&
!strings.Contains(res, " UNSUPPORTED empty space\n") &&
!strings.Contains(res, " empty space\n") {
t.Errorf("empty space missing: %q", res)
}
if !strings.Contains(res, "d6548b156ea68a4e003e786df99eee76 potato2\n") &&
!strings.Contains(res, " UNSUPPORTED potato2\n") &&
!strings.Contains(res, " potato2\n") {
t.Errorf("potato2 missing: %q", res)
}
// SHA1 Sum without download
buf.Reset()
err = operations.HashLister(ctx, hash.SHA1, false, false, r.Fremote, &buf)
require.NoError(t, err)
res = buf.String()
if !strings.Contains(res, "3bc15c8aae3e4124dd409035f32ea2fd6835efc9 empty space\n") &&
@ -227,13 +244,30 @@ func TestHashSums(t *testing.T) {
t.Errorf("potato2 missing: %q", res)
}
// QuickXorHash Sum
// SHA1 Sum with download
buf.Reset()
err = operations.HashLister(ctx, hash.SHA1, false, true, r.Fremote, &buf)
require.NoError(t, err)
res = buf.String()
if !strings.Contains(res, "3bc15c8aae3e4124dd409035f32ea2fd6835efc9 empty space\n") &&
!strings.Contains(res, " UNSUPPORTED empty space\n") &&
!strings.Contains(res, " empty space\n") {
t.Errorf("empty space missing: %q", res)
}
if !strings.Contains(res, "9dc7f7d3279715991a22853f5981df582b7f9f6d potato2\n") &&
!strings.Contains(res, " UNSUPPORTED potato2\n") &&
!strings.Contains(res, " potato2\n") {
t.Errorf("potato2 missing: %q", res)
}
// QuickXorHash Sum without download
buf.Reset()
var ht hash.Type
err = ht.Set("QuickXorHash")
require.NoError(t, err)
err = operations.HashLister(ctx, ht, r.Fremote, &buf)
err = operations.HashLister(ctx, ht, false, false, r.Fremote, &buf)
require.NoError(t, err)
res = buf.String()
if !strings.Contains(res, "2d00000000000000000000000100000000000000 empty space\n") &&
@ -247,10 +281,45 @@ func TestHashSums(t *testing.T) {
t.Errorf("potato2 missing: %q", res)
}
// QuickXorHash Sum with Base64 Encoded
// QuickXorHash Sum with download
buf.Reset()
err = operations.HashListerBase64(ctx, ht, r.Fremote, &buf)
require.NoError(t, err)
err = operations.HashLister(ctx, ht, false, true, r.Fremote, &buf)
require.NoError(t, err)
res = buf.String()
if !strings.Contains(res, "2d00000000000000000000000100000000000000 empty space\n") &&
!strings.Contains(res, " UNSUPPORTED empty space\n") &&
!strings.Contains(res, " empty space\n") {
t.Errorf("empty space missing: %q", res)
}
if !strings.Contains(res, "4001dad296b6b4a52d6d694b67dad296b6b4a52d potato2\n") &&
!strings.Contains(res, " UNSUPPORTED potato2\n") &&
!strings.Contains(res, " potato2\n") {
t.Errorf("potato2 missing: %q", res)
}
// QuickXorHash Sum with Base64 Encoded, without download
buf.Reset()
err = operations.HashLister(ctx, ht, true, false, r.Fremote, &buf)
require.NoError(t, err)
res = buf.String()
if !strings.Contains(res, "LQAAAAAAAAAAAAAAAQAAAAAAAAA= empty space\n") &&
!strings.Contains(res, " UNSUPPORTED empty space\n") &&
!strings.Contains(res, " empty space\n") {
t.Errorf("empty space missing: %q", res)
}
if !strings.Contains(res, "QAHa0pa2tKUtbWlLZ9rSlra0pS0= potato2\n") &&
!strings.Contains(res, " UNSUPPORTED potato2\n") &&
!strings.Contains(res, " potato2\n") {
t.Errorf("potato2 missing: %q", res)
}
// QuickXorHash Sum with Base64 Encoded and download
buf.Reset()
err = operations.HashLister(ctx, ht, true, true, r.Fremote, &buf)
require.NoError(t, err)
res = buf.String()
if !strings.Contains(res, "LQAAAAAAAAAAAAAAAQAAAAAAAAA= empty space\n") &&