jottacloud: docs, fixes and tests for MD5 calculation

* Add docs for --jottacloud-md5-memory-limit
  * Factor out readMD5 function and add tests
  * Fix accounting
  * Make sure temp file is deleted at the start (not Windows)
This commit is contained in:
Nick Craig-Wood 2018-08-20 15:38:21 +01:00
parent ee4485a316
commit f9cf70e3aa
3 changed files with 154 additions and 51 deletions

View File

@ -17,6 +17,7 @@ import (
"github.com/ncw/rclone/backend/jottacloud/api"
"github.com/ncw/rclone/fs"
"github.com/ncw/rclone/fs/accounting"
"github.com/ncw/rclone/fs/config"
"github.com/ncw/rclone/fs/config/configmap"
"github.com/ncw/rclone/fs/config/configstruct"
@ -67,7 +68,7 @@ func init() {
}},
}, {
Name: "md5_memory_limit",
Help: "Files with a size bigger than this value will be cached on disk otherwise they just read into memory ",
Help: "Files bigger than this will be cached on disk to calculate the MD5 if required.",
Default: fs.SizeSuffix(10 * 1024 * 1024),
Advanced: true,
}},
@ -753,67 +754,86 @@ func (o *Object) Open(options ...fs.OpenOption) (in io.ReadCloser, err error) {
return resp.Body, err
}
// Read the md5 of in returning a reader which will read the same contents
//
// The cleanup function should be called when out is finished with
// regardless of whether this function returned an error or not.
func readMD5(in io.Reader, size, threshold int64) (md5sum string, out io.Reader, cleanup func(), err error) {
// we need a MD5
md5Hasher := md5.New()
// use the teeReader to write to the local file AND caclulate the MD5 while doing so
teeReader := io.TeeReader(in, md5Hasher)
// nothing to clean up by default
cleanup = func() {}
// don't cache small files on disk to reduce wear of the disk
if size > threshold {
var tempFile *os.File
// create the cache file
tempFile, err = ioutil.TempFile("", cachePrefix)
if err != nil {
return
}
_ = os.Remove(tempFile.Name()) // Delete the file - may not work on Windows
// clean up the file after we are done downloading
cleanup = func() {
// the file should normally already be close, but just to make sure
_ = tempFile.Close()
_ = os.Remove(tempFile.Name()) // delete the cache file after we are done - may be deleted already
}
// copy the ENTIRE file to disc and calculate the MD5 in the process
if _, err = io.Copy(tempFile, teeReader); err != nil {
return
}
// jump to the start of the local file so we can pass it along
if _, err = tempFile.Seek(0, 0); err != nil {
return
}
// replace the already read source with a reader of our cached file
out = tempFile
} else {
// that's a small file, just read it into memory
var inData []byte
inData, err = ioutil.ReadAll(teeReader)
if err != nil {
return
}
// set the reader to our read memory block
out = bytes.NewReader(inData)
}
return hex.EncodeToString(md5Hasher.Sum(nil)), out, cleanup, nil
}
// Update the object with the contents of the io.Reader, modTime and size
//
// If existing is set then it updates the object rather than creating a new one
//
// The new object may have been created if an error is returned
func (o *Object) Update(in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (err error) {
size := src.Size()
md5String, err := src.Hash(hash.MD5)
if err != nil || md5String == "" {
// if the source can't provide a MD5 hash we are screwed and need to calculate it ourselfs
// we read the entire file and cache it in the localdir and send it AFTERwards
// we need a MD5
md5Hasher := md5.New()
// use the teeReader to write to the local file AND caclulate the MD5 while doing so
teeReader := io.TeeReader(in, md5Hasher)
// don't cache small files on disk to reduce wear of the disk
if src.Size() > int64(o.fs.opt.MD5MemoryThreshold) {
// create the cache file
tempFile, err := ioutil.TempFile("", cachePrefix)
if err != nil {
return err
}
// clean up the file after we are done downloading
defer func() {
// the file should normally already be close, but just to make sure
_ = tempFile.Close()
// delete the cache file after we are done
_ = os.Remove(tempFile.Name())
}()
// reade the ENTIRE file to disc and calculate the MD5 in the process
if _, err = io.Copy(tempFile, teeReader); err != nil {
return err
}
// jump to the start of the local file so we can pass it along
if _, err = tempFile.Seek(0, 0); err != nil {
return err
}
// replace the already read source with a reader of our cached file
in = tempFile
} else {
// that's a small file, just read it into memory
var inData []byte
inData, err = ioutil.ReadAll(teeReader)
if err != nil {
return err
}
// set the reader to our read memory block
in = bytes.NewReader(inData)
// unwrap the accounting from the input, we use wrap to put it
// back on after the buffering
var wrap accounting.WrapFn
in, wrap = accounting.UnWrap(in)
var cleanup func()
md5String, in, cleanup, err = readMD5(in, size, int64(o.fs.opt.MD5MemoryThreshold))
defer cleanup()
if err != nil {
return errors.Wrap(err, "failed to calculate MD5")
}
// YEAH, the thing we need the MD5 string
md5String = hex.EncodeToString(md5Hasher.Sum(nil))
// Wrap the accounting back onto the stream
in = wrap(in)
}
size := src.Size()
var resp *http.Response
var result api.JottaFile
opts := rest.Opts{

View File

@ -0,0 +1,67 @@
package jottacloud
import (
"crypto/md5"
"fmt"
"io"
"io/ioutil"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
// A test reader to return a test pattern of size
type testReader struct {
size int64
c byte
}
// Reader is the interface that wraps the basic Read method.
func (r *testReader) Read(p []byte) (n int, err error) {
for i := range p {
if r.size <= 0 {
return n, io.EOF
}
p[i] = r.c
r.c = (r.c + 1) % 253
r.size--
n++
}
return
}
func TestReadMD5(t *testing.T) {
// smoke test the reader
b, err := ioutil.ReadAll(&testReader{size: 10})
require.NoError(t, err)
assert.Equal(t, []byte{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, b)
// Check readMD5 for different size and threshold
for _, size := range []int64{0, 1024, 10 * 1024, 100 * 1024} {
t.Run(fmt.Sprintf("%d", size), func(t *testing.T) {
hasher := md5.New()
n, err := io.Copy(hasher, &testReader{size: size})
require.NoError(t, err)
assert.Equal(t, n, size)
wantMD5 := fmt.Sprintf("%x", hasher.Sum(nil))
for _, threshold := range []int64{512, 1024, 10 * 1024, 20 * 1024} {
t.Run(fmt.Sprintf("%d", threshold), func(t *testing.T) {
in := &testReader{size: size}
gotMD5, out, cleanup, err := readMD5(in, size, threshold)
defer cleanup()
require.NoError(t, err)
assert.Equal(t, wantMD5, gotMD5)
// check md5hash of out
hasher := md5.New()
n, err := io.Copy(hasher, out)
require.NoError(t, err)
assert.Equal(t, n, size)
outMD5 := fmt.Sprintf("%x", hasher.Sum(nil))
assert.Equal(t, wantMD5, outMD5)
})
}
})
}
}

View File

@ -91,6 +91,12 @@ not.
Jottacloud supports MD5 type hashes, so you can use the `--checksum`
flag.
Note that Jottacloud requires the MD5 hash before upload so if the
source does not have an MD5 checksum then the file will be cached
temporarily on disk (wherever the `TMPDIR` environment variable points
to) before it is uploaded. Small files will be cached in memory - see
the `--jottacloud-md5-memory-limit` flag.
### Deleting files ###
Any files you delete with rclone will end up in the trash. Due to a lack of API documentation emptying the trash is currently only possible via the Jottacloud website.
@ -108,6 +114,16 @@ There are quite a few characters that can't be in Jottacloud file names. Rclone
Jottacloud only supports filenames up to 255 characters in length.
### Specific options ###
Here are the command line options specific to this cloud storage
system.
#### --jottacloud-md5-memory-limit SizeSuffix
Files bigger than this will be cached on disk to calculate the MD5 if
required. (default 10M)
### Troubleshooting ###
Jottacloud exhibits some inconsistent behaviours regarding deleted files and folders which may cause Copy, Move and DirMove operations to previously deleted paths to fail. Emptying the trash should help in such cases.