mirror of
https://github.com/rclone/rclone.git
synced 2024-11-07 17:14:44 +01:00
dropbox: support Dropbox content hashing scheme - fixes #1302
* add support to hashing module * add dbhashsum to list the hashes * add support to dropbox module This means objects up and downloaded to/from Dropbox will have their hashes checked. Note after this change local objects are calculating MD5, SHA1 and DBHASH which is excessive and needs to be fixed.
This commit is contained in:
parent
8916455e4f
commit
6381959850
@ -13,6 +13,7 @@ import (
|
||||
_ "github.com/ncw/rclone/cmd/copy"
|
||||
_ "github.com/ncw/rclone/cmd/copyto"
|
||||
_ "github.com/ncw/rclone/cmd/cryptcheck"
|
||||
_ "github.com/ncw/rclone/cmd/dbhashsum"
|
||||
_ "github.com/ncw/rclone/cmd/dedupe"
|
||||
_ "github.com/ncw/rclone/cmd/delete"
|
||||
_ "github.com/ncw/rclone/cmd/genautocomplete"
|
||||
|
@ -91,7 +91,7 @@ To copy a local directory to a dropbox directory called backup
|
||||
|
||||
rclone copy /home/source remote:backup
|
||||
|
||||
### Modified time and MD5SUMs ###
|
||||
### Modified time and Hashes ###
|
||||
|
||||
Dropbox supports modified times, but the only way to set a
|
||||
modification time is to re-upload the file.
|
||||
@ -102,6 +102,10 @@ decide to upload all your old data to fix the modification times. If
|
||||
you don't want this to happen use `--size-only` or `--checksum` flag
|
||||
to stop it.
|
||||
|
||||
Dropbox supports [its own hash
|
||||
type](https://www.dropbox.com/developers/reference/content-hash) which
|
||||
is checked for all transfers.
|
||||
|
||||
### Specific options ###
|
||||
|
||||
Here are the command line options specific to this cloud storage
|
||||
|
@ -20,7 +20,7 @@ Here is an overview of the major features of each cloud storage system.
|
||||
| Google Drive | MD5 | Yes | No | Yes | R/W |
|
||||
| Amazon S3 | MD5 | Yes | No | No | R/W |
|
||||
| Openstack Swift | MD5 | Yes | No | No | R/W |
|
||||
| Dropbox | - | Yes | Yes | No | - |
|
||||
| Dropbox | DBHASH †| Yes | Yes | No | - |
|
||||
| Google Cloud Storage | MD5 | Yes | No | No | R/W |
|
||||
| Amazon Drive | MD5 | No | Yes | No | R |
|
||||
| Microsoft OneDrive | SHA1 | Yes | Yes | No | R |
|
||||
@ -28,7 +28,7 @@ Here is an overview of the major features of each cloud storage system.
|
||||
| Backblaze B2 | SHA1 | Yes | No | No | R/W |
|
||||
| Yandex Disk | MD5 | Yes | No | No | R/W |
|
||||
| SFTP | - | Yes | Depends | No | - |
|
||||
| FTP | None | No | Yes | No | - |
|
||||
| FTP | - | No | Yes | No | - |
|
||||
| The local filesystem | All | Yes | Depends | No | - |
|
||||
|
||||
### Hash ###
|
||||
@ -41,6 +41,10 @@ the `check` command.
|
||||
To use the checksum checks between filesystems they must support a
|
||||
common hash type.
|
||||
|
||||
† Note that Dropbox supports [its own custom
|
||||
hash](https://www.dropbox.com/developers/reference/content-hash).
|
||||
This is an SHA256 sum of all the 4MB block SHA256s.
|
||||
|
||||
### ModTime ###
|
||||
|
||||
The cloud storage system supports setting modification times on
|
||||
|
@ -2,7 +2,6 @@
|
||||
package dropbox
|
||||
|
||||
// FIXME put low level retries in
|
||||
// FIXME add dropbox style hashes
|
||||
// FIXME dropbox for business would be quite easy to add
|
||||
|
||||
/*
|
||||
@ -99,12 +98,14 @@ type Fs struct {
|
||||
}
|
||||
|
||||
// Object describes a dropbox object
|
||||
//
|
||||
// Dropbox Objects always have full metadata
|
||||
type Object struct {
|
||||
fs *Fs // what this object is part of
|
||||
remote string // The remote path
|
||||
bytes int64 // size of the object
|
||||
modTime time.Time // time it was last modified
|
||||
hasMetadata bool // metadata is valid
|
||||
fs *Fs // what this object is part of
|
||||
remote string // The remote path
|
||||
bytes int64 // size of the object
|
||||
modTime time.Time // time it was last modified
|
||||
hash string // content_hash of the object
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------
|
||||
@ -640,7 +641,7 @@ func (f *Fs) DirMove(src fs.Fs, srcRemote, dstRemote string) error {
|
||||
|
||||
// Hashes returns the supported hash sets.
|
||||
func (f *Fs) Hashes() fs.HashSet {
|
||||
return fs.HashSet(fs.HashNone)
|
||||
return fs.HashSet(fs.HashDropbox)
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------
|
||||
@ -663,9 +664,16 @@ func (o *Object) Remote() string {
|
||||
return o.remote
|
||||
}
|
||||
|
||||
// Hash is unsupported on Dropbox
|
||||
// Hash returns the dropbox special hash
|
||||
func (o *Object) Hash(t fs.HashType) (string, error) {
|
||||
return "", fs.ErrHashUnsupported
|
||||
if t != fs.HashDropbox {
|
||||
return "", fs.ErrHashUnsupported
|
||||
}
|
||||
err := o.readMetaData()
|
||||
if err != nil {
|
||||
return "", errors.Wrap(err, "failed to read hash from metadata")
|
||||
}
|
||||
return o.hash, nil
|
||||
}
|
||||
|
||||
// Size returns the size of an object in bytes
|
||||
@ -679,7 +687,7 @@ func (o *Object) Size() int64 {
|
||||
func (o *Object) setMetadataFromEntry(info *files.FileMetadata) error {
|
||||
o.bytes = int64(info.Size)
|
||||
o.modTime = info.ClientModified
|
||||
o.hasMetadata = true
|
||||
o.hash = info.ContentHash
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -722,7 +730,7 @@ func (o *Object) metadataKey() string {
|
||||
|
||||
// readMetaData gets the info if it hasn't already been fetched
|
||||
func (o *Object) readMetaData() (err error) {
|
||||
if o.hasMetadata {
|
||||
if !o.modTime.IsZero() {
|
||||
return nil
|
||||
}
|
||||
// Last resort
|
||||
|
16
fs/hash.go
16
fs/hash.go
@ -9,6 +9,7 @@ import (
|
||||
"io"
|
||||
"strings"
|
||||
|
||||
"github.com/ncw/rclone/dropbox/dbhash"
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
@ -26,18 +27,23 @@ const (
|
||||
// HashSHA1 indicates SHA-1 support
|
||||
HashSHA1
|
||||
|
||||
// HashDropbox indicates Dropbox special hash
|
||||
// https://www.dropbox.com/developers/reference/content-hash
|
||||
HashDropbox
|
||||
|
||||
// HashNone indicates no hashes are supported
|
||||
HashNone HashType = 0
|
||||
)
|
||||
|
||||
// SupportedHashes returns a set of all the supported hashes by
|
||||
// HashStream and MultiHasher.
|
||||
var SupportedHashes = NewHashSet(HashMD5, HashSHA1)
|
||||
var SupportedHashes = NewHashSet(HashMD5, HashSHA1, HashDropbox)
|
||||
|
||||
// HashWidth returns the width in characters for any HashType
|
||||
var HashWidth = map[HashType]int{
|
||||
HashMD5: 32,
|
||||
HashSHA1: 40,
|
||||
HashMD5: 32,
|
||||
HashSHA1: 40,
|
||||
HashDropbox: 64,
|
||||
}
|
||||
|
||||
// HashStream will calculate hashes of all supported hash types.
|
||||
@ -73,6 +79,8 @@ func (h HashType) String() string {
|
||||
return "MD5"
|
||||
case HashSHA1:
|
||||
return "SHA-1"
|
||||
case HashDropbox:
|
||||
return "DropboxHash"
|
||||
default:
|
||||
err := fmt.Sprintf("internal error: unknown hash type: 0x%x", int(h))
|
||||
panic(err)
|
||||
@ -94,6 +102,8 @@ func hashFromTypes(set HashSet) (map[HashType]hash.Hash, error) {
|
||||
hashers[t] = md5.New()
|
||||
case HashSHA1:
|
||||
hashers[t] = sha1.New()
|
||||
case HashDropbox:
|
||||
hashers[t] = dbhash.New()
|
||||
default:
|
||||
err := fmt.Sprintf("internal error: Unsupported hash type %v", t)
|
||||
panic(err)
|
||||
|
@ -65,16 +65,18 @@ var hashTestSet = []hashTest{
|
||||
{
|
||||
input: []byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14},
|
||||
output: map[fs.HashType]string{
|
||||
fs.HashMD5: "bf13fc19e5151ac57d4252e0e0f87abe",
|
||||
fs.HashSHA1: "3ab6543c08a75f292a5ecedac87ec41642d12166",
|
||||
fs.HashMD5: "bf13fc19e5151ac57d4252e0e0f87abe",
|
||||
fs.HashSHA1: "3ab6543c08a75f292a5ecedac87ec41642d12166",
|
||||
fs.HashDropbox: "214d2fcf3566e94c99ad2f59bd993daca46d8521a0c447adf4b324f53fddc0c7",
|
||||
},
|
||||
},
|
||||
// Empty data set
|
||||
{
|
||||
input: []byte{},
|
||||
output: map[fs.HashType]string{
|
||||
fs.HashMD5: "d41d8cd98f00b204e9800998ecf8427e",
|
||||
fs.HashSHA1: "da39a3ee5e6b4b0d3255bfef95601890afd80709",
|
||||
fs.HashMD5: "d41d8cd98f00b204e9800998ecf8427e",
|
||||
fs.HashSHA1: "da39a3ee5e6b4b0d3255bfef95601890afd80709",
|
||||
fs.HashDropbox: "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
|
||||
},
|
||||
},
|
||||
}
|
||||
@ -88,13 +90,13 @@ func TestMultiHasher(t *testing.T) {
|
||||
sums := mh.Sums()
|
||||
for k, v := range sums {
|
||||
expect, ok := test.output[k]
|
||||
require.True(t, ok)
|
||||
require.True(t, ok, "test output for hash not found")
|
||||
assert.Equal(t, v, expect)
|
||||
}
|
||||
// Test that all are present
|
||||
for k, v := range test.output {
|
||||
expect, ok := sums[k]
|
||||
require.True(t, ok)
|
||||
require.True(t, ok, "test output for hash not found")
|
||||
assert.Equal(t, v, expect)
|
||||
}
|
||||
}
|
||||
@ -145,8 +147,8 @@ func TestHashStreamTypes(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestHashSetStringer(t *testing.T) {
|
||||
h := fs.NewHashSet(fs.HashSHA1, fs.HashMD5)
|
||||
assert.Equal(t, h.String(), "[MD5, SHA-1]")
|
||||
h := fs.NewHashSet(fs.HashSHA1, fs.HashMD5, fs.HashDropbox)
|
||||
assert.Equal(t, h.String(), "[MD5, SHA-1, DropboxHash]")
|
||||
h = fs.NewHashSet(fs.HashSHA1)
|
||||
assert.Equal(t, h.String(), "[SHA-1]")
|
||||
h = fs.NewHashSet()
|
||||
|
@ -978,6 +978,15 @@ func Sha1sum(f Fs, w io.Writer) error {
|
||||
return hashLister(HashSHA1, f, w)
|
||||
}
|
||||
|
||||
// DropboxHashSum list the Fs to the supplied writer
|
||||
//
|
||||
// Obeys includes and excludes
|
||||
//
|
||||
// Lists in parallel which may get them out of order
|
||||
func DropboxHashSum(f Fs, w io.Writer) error {
|
||||
return hashLister(HashDropbox, f, w)
|
||||
}
|
||||
|
||||
func hashLister(ht HashType, f Fs, w io.Writer) error {
|
||||
return ListFn(f, func(o Object) {
|
||||
Stats.Checking(o.Remote())
|
||||
|
@ -386,7 +386,7 @@ func TestLsLong(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestMd5sum(t *testing.T) {
|
||||
func TestHashSums(t *testing.T) {
|
||||
r := NewRun(t)
|
||||
defer r.Finalise()
|
||||
file1 := r.WriteBoth("potato2", "------------------------------------------------------------", t1)
|
||||
@ -394,6 +394,8 @@ func TestMd5sum(t *testing.T) {
|
||||
|
||||
fstest.CheckItems(t, r.fremote, file1, file2)
|
||||
|
||||
// MD5 Sum
|
||||
|
||||
var buf bytes.Buffer
|
||||
err := fs.Md5sum(r.fremote, &buf)
|
||||
require.NoError(t, err)
|
||||
@ -408,20 +410,12 @@ func TestMd5sum(t *testing.T) {
|
||||
!strings.Contains(res, " potato2\n") {
|
||||
t.Errorf("potato2 missing: %q", res)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSha1sum(t *testing.T) {
|
||||
r := NewRun(t)
|
||||
defer r.Finalise()
|
||||
file1 := r.WriteBoth("potato2", "------------------------------------------------------------", t1)
|
||||
file2 := r.WriteBoth("empty space", "", t2)
|
||||
// SHA1 Sum
|
||||
|
||||
fstest.CheckItems(t, r.fremote, file1, file2)
|
||||
|
||||
var buf bytes.Buffer
|
||||
err := fs.Sha1sum(r.fremote, &buf)
|
||||
err = fs.Sha1sum(r.fremote, &buf)
|
||||
require.NoError(t, err)
|
||||
res := buf.String()
|
||||
res = buf.String()
|
||||
if !strings.Contains(res, "da39a3ee5e6b4b0d3255bfef95601890afd80709 empty space\n") &&
|
||||
!strings.Contains(res, " UNSUPPORTED empty space\n") &&
|
||||
!strings.Contains(res, " empty space\n") {
|
||||
@ -432,6 +426,22 @@ func TestSha1sum(t *testing.T) {
|
||||
!strings.Contains(res, " potato2\n") {
|
||||
t.Errorf("potato2 missing: %q", res)
|
||||
}
|
||||
|
||||
// Dropbox Hash Sum
|
||||
|
||||
err = fs.DropboxHashSum(r.fremote, &buf)
|
||||
require.NoError(t, err)
|
||||
res = buf.String()
|
||||
if !strings.Contains(res, "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 empty space\n") &&
|
||||
!strings.Contains(res, " UNSUPPORTED empty space\n") &&
|
||||
!strings.Contains(res, " empty space\n") {
|
||||
t.Errorf("empty space missing: %q", res)
|
||||
}
|
||||
if !strings.Contains(res, "a979481df794fed9c3990a6a422e0b1044ac802c15fab13af9c687f8bdbee01a potato2\n") &&
|
||||
!strings.Contains(res, " UNSUPPORTED potato2\n") &&
|
||||
!strings.Contains(res, " potato2\n") {
|
||||
t.Errorf("potato2 missing: %q", res)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCount(t *testing.T) {
|
||||
|
Loading…
Reference in New Issue
Block a user