From 8916455e4f227232c87819a3dcbd0926ac247950 Mon Sep 17 00:00:00 2001 From: Nick Craig-Wood Date: Fri, 26 May 2017 14:19:18 +0100 Subject: [PATCH] dropbox: implement dropbox hasher #1302 --- dropbox/dbhash/dbhash.go | 127 ++++++++++++++++++++++++++++++++++ dropbox/dbhash/dbhash_test.go | 88 +++++++++++++++++++++++ 2 files changed, 215 insertions(+) create mode 100644 dropbox/dbhash/dbhash.go create mode 100644 dropbox/dbhash/dbhash_test.go diff --git a/dropbox/dbhash/dbhash.go b/dropbox/dbhash/dbhash.go new file mode 100644 index 000000000..34f06f272 --- /dev/null +++ b/dropbox/dbhash/dbhash.go @@ -0,0 +1,127 @@ +// Package dbhash implements the dropbox hash as described in +// +// https://www.dropbox.com/developers/reference/content-hash +package dbhash + +import ( + "crypto/sha256" + "hash" +) + +const ( + // BlockSize of the checksum in bytes. + BlockSize = sha256.BlockSize + // Size of the checksum in bytes. + Size = sha256.BlockSize + bytesPerBlock = 4 * 1024 * 1024 + hashReturnedError = "hash function returned error" +) + +type digest struct { + n int // bytes written into blockHash so far + blockHash hash.Hash + totalHash hash.Hash + sumCalled bool + writtenMore bool +} + +// New returns a new hash.Hash computing the Dropbox checksum. +func New() hash.Hash { + d := &digest{} + d.Reset() + return d +} + +// writeBlockHash writes the current block hash into the total hash +func (d *digest) writeBlockHash() { + blockHash := d.blockHash.Sum(nil) + _, err := d.totalHash.Write(blockHash) + if err != nil { + panic(hashReturnedError) + } + // reset counters for blockhash + d.n = 0 + d.blockHash.Reset() +} + +// Write writes len(p) bytes from p to the underlying data stream. It returns +// the number of bytes written from p (0 <= n <= len(p)) and any error +// encountered that caused the write to stop early. Write must return a non-nil +// error if it returns n < len(p). Write must not modify the slice data, even +// temporarily. +// +// Implementations must not retain p. +func (d *digest) Write(p []byte) (n int, err error) { + n = len(p) + for len(p) > 0 { + d.writtenMore = true + toWrite := bytesPerBlock - d.n + if toWrite > len(p) { + toWrite = len(p) + } + _, err = d.blockHash.Write(p[:toWrite]) + if err != nil { + panic(hashReturnedError) + } + d.n += toWrite + p = p[toWrite:] + // Accumulate the total hash + if d.n == bytesPerBlock { + d.writeBlockHash() + } + } + return n, nil +} + +// Sum appends the current hash to b and returns the resulting slice. +// It does not change the underlying hash state. +// +// TODO(ncw) Sum() can only be called once for this type of hash. +// If you call Sum(), then Write() then Sum() it will result in +// a panic. Calling Write() then Sum(), then Sum() is OK. +func (d *digest) Sum(b []byte) []byte { + if d.sumCalled && d.writtenMore { + panic("digest.Sum() called more than once") + } + d.sumCalled = true + d.writtenMore = false + if d.n != 0 { + d.writeBlockHash() + } + return d.totalHash.Sum(b) +} + +// Reset resets the Hash to its initial state. +func (d *digest) Reset() { + d.n = 0 + d.totalHash = sha256.New() + d.blockHash = sha256.New() + d.sumCalled = false + d.writtenMore = false +} + +// Size returns the number of bytes Sum will return. +func (d *digest) Size() int { + return d.totalHash.Size() +} + +// BlockSize returns the hash's underlying block size. +// The Write method must be able to accept any amount +// of data, but it may operate more efficiently if all writes +// are a multiple of the block size. +func (d *digest) BlockSize() int { + return d.totalHash.BlockSize() +} + +// Sum returns the Dropbox checksum of the data. +func Sum(data []byte) [Size]byte { + var d digest + d.Reset() + d.Write(data) + var out [Size]byte + d.Sum(out[:0]) + return out +} + +// must implement this interface +var _ hash.Hash = (*digest)(nil) diff --git a/dropbox/dbhash/dbhash_test.go b/dropbox/dbhash/dbhash_test.go new file mode 100644 index 000000000..f219d0961 --- /dev/null +++ b/dropbox/dbhash/dbhash_test.go @@ -0,0 +1,88 @@ +package dbhash_test + +import ( + "encoding/hex" + "fmt" + "testing" + + "github.com/ncw/rclone/dropbox/dbhash" + "github.com/stretchr/testify/assert" +) + +func testChunk(t *testing.T, chunk int) { + data := make([]byte, chunk) + for i := 0; i < chunk; i++ { + data[i] = 'A' + } + for _, test := range []struct { + n int + want string + }{ + {0, "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"}, + {1, "1cd6ef71e6e0ff46ad2609d403dc3fee244417089aa4461245a4e4fe23a55e42"}, + {2, "01e0655fb754d10418a73760f57515f4903b298e6d67dda6bf0987fa79c22c88"}, + {4096, "8620913d33852befe09f16fff8fd75f77a83160d29f76f07e0276e9690903035"}, + {4194303, "647c8627d70f7a7d13ce96b1e7710a771a55d41a62c3da490d92e56044d311fa"}, + {4194304, "d4d63bac5b866c71620185392a8a6218ac1092454a2d16f820363b69852befa3"}, + {4194305, "8f553da8d00d0bf509d8470e242888be33019c20c0544811f5b2b89e98360b92"}, + {8388607, "83b30cf4fb5195b04a937727ae379cf3d06673bf8f77947f6a92858536e8369c"}, + {8388608, "e08b3ba1f538804075c5f939accdeaa9efc7b5c01865c94a41e78ca6550a88e7"}, + {8388609, "02c8a4aefc2bfc9036f89a7098001865885938ca580e5c9e5db672385edd303c"}, + } { + d := dbhash.New() + var toWrite int + for toWrite = test.n; toWrite >= chunk; toWrite -= chunk { + n, err := d.Write(data) + assert.Nil(t, err) + assert.Equal(t, chunk, n) + } + n, err := d.Write(data[:toWrite]) + assert.Nil(t, err) + assert.Equal(t, toWrite, n) + got := hex.EncodeToString(d.Sum(nil)) + assert.Equal(t, test.want, got, fmt.Sprintf("when testing length %d", n)) + + } +} + +func TestHashChunk16M(t *testing.T) { testChunk(t, 16*1024*1024) } +func TestHashChunk8M(t *testing.T) { testChunk(t, 8*1024*1024) } +func TestHashChunk4M(t *testing.T) { testChunk(t, 4*1024*1024) } +func TestHashChunk2M(t *testing.T) { testChunk(t, 2*1024*1024) } +func TestHashChunk1M(t *testing.T) { testChunk(t, 1*1024*1024) } +func TestHashChunk64k(t *testing.T) { testChunk(t, 64*1024) } +func TestHashChunk32k(t *testing.T) { testChunk(t, 32*1024) } +func TestHashChunk2048(t *testing.T) { testChunk(t, 2048) } +func TestHashChunk2047(t *testing.T) { testChunk(t, 2047) } + +func TestSumCalledTwice(t *testing.T) { + d := dbhash.New() + assert.NotPanics(t, func() { d.Sum(nil) }) + d.Reset() + assert.NotPanics(t, func() { d.Sum(nil) }) + assert.NotPanics(t, func() { d.Sum(nil) }) + _, _ = d.Write([]byte{1}) + assert.Panics(t, func() { d.Sum(nil) }) +} + +func TestSize(t *testing.T) { + d := dbhash.New() + assert.Equal(t, 32, d.Size()) +} + +func TestBlockSize(t *testing.T) { + d := dbhash.New() + assert.Equal(t, 64, d.BlockSize()) +} + +func TestSum(t *testing.T) { + assert.Equal(t, + [64]byte{ + 0x1c, 0xd6, 0xef, 0x71, 0xe6, 0xe0, 0xff, 0x46, + 0xad, 0x26, 0x09, 0xd4, 0x03, 0xdc, 0x3f, 0xee, + 0x24, 0x44, 0x17, 0x08, 0x9a, 0xa4, 0x46, 0x12, + 0x45, 0xa4, 0xe4, 0xfe, 0x23, 0xa5, 0x5e, 0x42, + }, + dbhash.Sum([]byte{'A'}), + ) +}