lsjson: Add --hash-type parameter and use it in lsf to speed up hashing

Before this change if you specified --hash MD5 in rclone lsf it would
calculate all the hashes and just return the MD5 hash which was very
slow on the local backend.

Likewise specifying --hash on rclone lsjson was equally slow.

This change introduces the --hash-type flag (and corresponding
internal parameter) so that the hashes required can be selected in
lsjson.

This is used internally in lsf when the --hash parameter is selected
to speed up the hashing by only hashing with the one hash specified.

Fixes #4181
This commit is contained in:
Nick Craig-Wood 2020-04-28 18:44:04 +01:00
parent a3f0992a22
commit f37af9afec
3 changed files with 41 additions and 15 deletions

View File

@ -185,6 +185,7 @@ func Lsf(ctx context.Context, fsrc fs.Fs, out io.Writer) error {
case 'h': case 'h':
list.AddHash(hashType) list.AddHash(hashType)
opt.ShowHash = true opt.ShowHash = true
opt.HashTypes = []string{hashType.String()}
case 'i': case 'i':
list.AddID() list.AddID()
case 'm': case 'm':

View File

@ -29,6 +29,7 @@ func init() {
flags.BoolVarP(cmdFlags, &opt.ShowOrigIDs, "original", "", false, "Show the ID of the underlying Object.") flags.BoolVarP(cmdFlags, &opt.ShowOrigIDs, "original", "", false, "Show the ID of the underlying Object.")
flags.BoolVarP(cmdFlags, &opt.FilesOnly, "files-only", "", false, "Show only files in the listing.") flags.BoolVarP(cmdFlags, &opt.FilesOnly, "files-only", "", false, "Show only files in the listing.")
flags.BoolVarP(cmdFlags, &opt.DirsOnly, "dirs-only", "", false, "Show only directories in the listing.") flags.BoolVarP(cmdFlags, &opt.DirsOnly, "dirs-only", "", false, "Show only directories in the listing.")
flags.StringArrayVarP(cmdFlags, &opt.HashTypes, "hash-type", "", nil, "Show only this hash type (may be repeated).")
} }
var commandDefinition = &cobra.Command{ var commandDefinition = &cobra.Command{
@ -58,17 +59,25 @@ The output is an array of Items, where each Item looks like this
"Tier" : "hot", "Tier" : "hot",
} }
If --hash is not specified the Hashes property won't be emitted. If --hash is not specified the Hashes property won't be emitted. The
types of hash can be specified with the --hash-type parameter (which
may be repeated). If --hash-type is set then it implies --hash.
If --no-modtime is specified then ModTime will be blank. This can speed things up on remotes where reading the ModTime takes an extra request (eg s3, swift). If --no-modtime is specified then ModTime will be blank. This can
speed things up on remotes where reading the ModTime takes an extra
request (eg s3, swift).
If --no-mimetype is specified then MimeType will be blank. This can speed things up on remotes where reading the MimeType takes an extra request (eg s3, swift). If --no-mimetype is specified then MimeType will be blank. This can
speed things up on remotes where reading the MimeType takes an extra
request (eg s3, swift).
If --encrypted is not specified the Encrypted won't be emitted. If --encrypted is not specified the Encrypted won't be emitted.
If --dirs-only is not specified files in addition to directories are returned If --dirs-only is not specified files in addition to directories are
returned
If --files-only is not specified directories in addition to the files will be returned. If --files-only is not specified directories in addition to the files
will be returned.
The Path field will only show folders below the remote path being listed. The Path field will only show folders below the remote path being listed.
If "remote:path" contains the file "subfolder/file.txt", the Path for "file.txt" If "remote:path" contains the file "subfolder/file.txt", the Path for "file.txt"

View File

@ -8,6 +8,7 @@ import (
"github.com/pkg/errors" "github.com/pkg/errors"
"github.com/rclone/rclone/backend/crypt" "github.com/rclone/rclone/backend/crypt"
"github.com/rclone/rclone/fs" "github.com/rclone/rclone/fs"
"github.com/rclone/rclone/fs/hash"
"github.com/rclone/rclone/fs/walk" "github.com/rclone/rclone/fs/walk"
) )
@ -69,14 +70,15 @@ func formatForPrecision(precision time.Duration) string {
// ListJSONOpt describes the options for ListJSON // ListJSONOpt describes the options for ListJSON
type ListJSONOpt struct { type ListJSONOpt struct {
Recurse bool `json:"recurse"` Recurse bool `json:"recurse"`
NoModTime bool `json:"noModTime"` NoModTime bool `json:"noModTime"`
NoMimeType bool `json:"noMimeType"` NoMimeType bool `json:"noMimeType"`
ShowEncrypted bool `json:"showEncrypted"` ShowEncrypted bool `json:"showEncrypted"`
ShowOrigIDs bool `json:"showOrigIDs"` ShowOrigIDs bool `json:"showOrigIDs"`
ShowHash bool `json:"showHash"` ShowHash bool `json:"showHash"`
DirsOnly bool `json:"dirsOnly"` DirsOnly bool `json:"dirsOnly"`
FilesOnly bool `json:"filesOnly"` FilesOnly bool `json:"filesOnly"`
HashTypes []string `json:"hashTypes"` // hash types to show if ShowHash is set, eg "MD5", "SHA-1"
} }
// ListJSON lists fsrc using the options in opt calling callback for each item // ListJSON lists fsrc using the options in opt calling callback for each item
@ -99,6 +101,20 @@ func ListJSON(ctx context.Context, fsrc fs.Fs, remote string, opt *ListJSONOpt,
canGetTier := features.GetTier canGetTier := features.GetTier
format := formatForPrecision(fsrc.Precision()) format := formatForPrecision(fsrc.Precision())
isBucket := features.BucketBased && remote == "" && fsrc.Root() == "" // if bucket based remote listing the root mark directories as buckets isBucket := features.BucketBased && remote == "" && fsrc.Root() == "" // if bucket based remote listing the root mark directories as buckets
showHash := opt.ShowHash
hashTypes := fsrc.Hashes().Array()
if len(opt.HashTypes) != 0 {
showHash = true
hashTypes = []hash.Type{}
for _, hashType := range opt.HashTypes {
var ht hash.Type
err := ht.Set(hashType)
if err != nil {
return err
}
hashTypes = append(hashTypes, ht)
}
}
err := walk.ListR(ctx, fsrc, remote, false, ConfigMaxDepth(opt.Recurse), walk.ListAll, func(entries fs.DirEntries) (err error) { err := walk.ListR(ctx, fsrc, remote, false, ConfigMaxDepth(opt.Recurse), walk.ListAll, func(entries fs.DirEntries) (err error) {
for _, entry := range entries { for _, entry := range entries {
switch entry.(type) { switch entry.(type) {
@ -150,9 +166,9 @@ func ListJSON(ctx context.Context, fsrc fs.Fs, remote string, opt *ListJSONOpt,
item.IsBucket = isBucket item.IsBucket = isBucket
case fs.Object: case fs.Object:
item.IsDir = false item.IsDir = false
if opt.ShowHash { if showHash {
item.Hashes = make(map[string]string) item.Hashes = make(map[string]string)
for _, hashType := range x.Fs().Hashes().Array() { for _, hashType := range hashTypes {
hash, err := x.Hash(ctx, hashType) hash, err := x.Hash(ctx, hashType)
if err != nil { if err != nil {
fs.Errorf(x, "Failed to read hash: %v", err) fs.Errorf(x, "Failed to read hash: %v", err)