rcat: configurable small files cutoff and implement proper upload verification

This commit is contained in:
Stefan Breunig 2017-09-11 08:26:53 +02:00
parent 57817397a0
commit 80b1f2a494
4 changed files with 170 additions and 125 deletions

View File

@ -22,13 +22,24 @@ rclone rcat reads from standard input (stdin) and copies it to a
single remote file.
echo "hello world" | rclone rcat remote:path/to/file
ffmpeg - | rclone rcat --checksum remote:path/to/file
Note that since the size is not known in advance, chunking options
will likely be ignored. The upload can also not be retried because
the data is not kept around until the upload succeeds. If you need
to transfer a lot of data, you're better off caching locally and
then ` + "`rclone move`" + ` it to the destination.
`,
If the remote file already exists, it will be overwritten.
rcat will try to upload small files in a single request, which is
usually more efficient than the streaming/chunked upload endpoints,
which use multiple requests. Exact behaviour depends on the remote.
What is considered a small file may be set through
` + "`--streaming-upload-cutoff`" + `. Uploading only starts after
the cutoff is reached or if the file ends before that. The data
must fit into RAM. The cutoff needs to be small enough to adhere
the limits of your remote, please see there. Generally speaking,
setting this cutoff too high will decrease your performance.
Note that the upload can also not be retried because the data is
not kept around until the upload succeeds. If you need to transfer
a lot of data, you're better off caching locally and then
` + "`rclone move`" + ` it to the destination.`,
Run: func(command *cobra.Command, args []string) {
cmd.CheckArgs(1, 1, command, args)

View File

@ -102,6 +102,7 @@ var (
bindAddr = StringP("bind", "", "", "Local address to bind to for outgoing connections, IPv4, IPv6 or name.")
disableFeatures = StringP("disable", "", "", "Disable a comma separated list of features. Use help to see a list.")
userAgent = StringP("user-agent", "", "rclone/"+Version, "Set the user-agent to a specified string. The default is rclone/ version")
streamingUploadCutoff = SizeSuffix(100 * 1024)
logLevel = LogLevelNotice
statsLogLevel = LogLevelInfo
bwLimit BwTimetable
@ -117,6 +118,7 @@ func init() {
VarP(&statsLogLevel, "stats-log-level", "", "Log level to show --stats output DEBUG|INFO|NOTICE|ERROR")
VarP(&bwLimit, "bwlimit", "", "Bandwidth limit in kBytes/s, or use suffix b|k|M|G or a full timetable.")
VarP(&bufferSize, "buffer-size", "", "Buffer size when copying files.")
VarP(&streamingUploadCutoff, "streaming-upload-cutoff", "", "Cutoff for switching to chunked upload if file size is unknown. Upload starts after reaching cutoff or when file ends.")
}
// crypt internals
@ -238,6 +240,7 @@ type ConfigInfo struct {
TPSLimitBurst int
BindAddr net.IP
DisableFeatures []string
StreamingUploadCutoff SizeSuffix
}
// Return the path to the configuration file
@ -377,6 +380,7 @@ func LoadConfig() {
Config.TPSLimit = *tpsLimit
Config.TPSLimitBurst = *tpsLimitBurst
Config.BufferSize = bufferSize
Config.StreamingUploadCutoff = streamingUploadCutoff
Config.TrackRenames = *trackRenames

View File

@ -65,7 +65,7 @@ func HashEquals(src, dst string) bool {
// err - may return an error which will already have been logged
//
// If an error is returned it will return equal as false
func CheckHashes(src, dst Object) (equal bool, hash HashType, err error) {
func CheckHashes(src ObjectInfo, dst Object) (equal bool, hash HashType, err error) {
common := src.Fs().Hashes().Overlap(dst.Fs().Hashes())
// Debugf(nil, "Shared hashes: %v", common)
if common.Count() == 0 {
@ -115,11 +115,11 @@ func CheckHashes(src, dst Object) (equal bool, hash HashType, err error) {
//
// Otherwise the file is considered to be not equal including if there
// were errors reading info.
func Equal(src, dst Object) bool {
func Equal(src ObjectInfo, dst Object) bool {
return equal(src, dst, Config.SizeOnly, Config.CheckSum)
}
func equal(src, dst Object, sizeOnly, checkSum bool) bool {
func equal(src ObjectInfo, dst Object, sizeOnly, checkSum bool) bool {
if !Config.IgnoreSize {
if src.Size() != dst.Size() {
Debugf(src, "Sizes differ")
@ -1587,27 +1587,44 @@ func Rcat(fdst Fs, dstFileName string, in0 io.ReadCloser, modTime time.Time) (er
Stats.Transferring(dstFileName)
defer func() {
Stats.DoneTransferring(dstFileName, err == nil)
if err := in0.Close(); err != nil {
if err = in0.Close(); err != nil {
Debugf(fdst, "Rcat: failed to close source: %v", err)
}
}()
hashOption := &HashesOption{Hashes: NewHashSet()}
in := in0
buf := make([]byte, 100*1024)
if n, err := io.ReadFull(in0, buf); err != nil {
Debugf(fdst, "File to upload is small, uploading instead of streaming")
in = ioutil.NopCloser(bytes.NewReader(buf[:n]))
in = NewAccountSizeName(in, int64(n), dstFileName).WithBuffer()
if !Config.SizeOnly {
hashOption = &HashesOption{Hashes: HashSet(fdst.Hashes().GetOne())}
}
objInfo := NewStaticObjectInfo(dstFileName, modTime, int64(n), false, nil, nil)
_, err := fdst.Put(in, objInfo, hashOption)
hashOption := &HashesOption{Hashes: fdst.Hashes()}
hash, err := NewMultiHasherTypes(fdst.Hashes())
if err != nil {
return err
}
in = ioutil.NopCloser(io.MultiReader(bytes.NewReader(buf), in0))
readCounter := NewCountingReader(in0)
trackingIn := io.TeeReader(readCounter, hash)
compare := func(dst Object) error {
src := NewStaticObjectInfo(dstFileName, modTime, int64(readCounter.BytesRead()), false, hash.Sums(), fdst)
if !Equal(src, dst) {
Stats.Error()
err = errors.Errorf("corrupted on transfer")
Errorf(dst, "%v", err)
return err
}
return nil
}
// check if file small enough for direct upload
buf := make([]byte, Config.StreamingUploadCutoff)
if n, err := io.ReadFull(trackingIn, buf); err == io.EOF || err == io.ErrUnexpectedEOF {
Debugf(fdst, "File to upload is small (%d bytes), uploading instead of streaming", n)
in := ioutil.NopCloser(bytes.NewReader(buf[:n]))
in = NewAccountSizeName(in, int64(n), dstFileName).WithBuffer()
objInfo := NewStaticObjectInfo(dstFileName, modTime, int64(n), false, nil, nil)
dst, err := fdst.Put(in, objInfo, hashOption)
if err != nil {
return err
}
return compare(dst)
}
in := ioutil.NopCloser(io.MultiReader(bytes.NewReader(buf), trackingIn))
fStreamTo := fdst
canStream := fdst.Features().PutStream != nil
@ -1626,10 +1643,6 @@ func Rcat(fdst Fs, dstFileName string, in0 io.ReadCloser, modTime time.Time) (er
fStreamTo = tmpLocalFs
}
if !Config.SizeOnly {
hashOption = &HashesOption{Hashes: HashSet(fStreamTo.Hashes().GetOne())}
}
in = NewAccountSizeName(in, -1, dstFileName).WithBuffer()
if Config.DryRun {
@ -1641,11 +1654,17 @@ func Rcat(fdst Fs, dstFileName string, in0 io.ReadCloser, modTime time.Time) (er
objInfo := NewStaticObjectInfo(dstFileName, modTime, -1, false, nil, nil)
tmpObj, err := fStreamTo.Features().PutStream(in, objInfo, hashOption)
if err == nil && !canStream {
err = Copy(fdst, nil, dstFileName, tmpObj)
}
if err != nil {
return err
}
if err = compare(tmpObj); err != nil {
return err
}
if !canStream {
return Copy(fdst, nil, dstFileName, tmpObj)
}
return nil
}
// Rmdirs removes any empty directories (or directories only
// containing empty directories) under f, including f.

View File

@ -732,6 +732,10 @@ func TestCat(t *testing.T) {
}
func TestRcat(t *testing.T) {
checkSumBefore := fs.Config.CheckSum
defer func() { fs.Config.CheckSum = checkSumBefore }()
check := func() {
r := NewRun(t)
defer r.Finalise()
@ -740,7 +744,7 @@ func TestRcat(t *testing.T) {
data1 := "this is some really nice test data"
path1 := "small_file_from_pipe"
data2 := string(make([]byte, 100*1024+1))
data2 := string(make([]byte, fs.Config.StreamingUploadCutoff+1))
path2 := "big_file_from_pipe"
in := ioutil.NopCloser(strings.NewReader(data1))
@ -756,6 +760,13 @@ func TestRcat(t *testing.T) {
fstest.CheckItems(t, r.fremote, file1, file2)
}
fs.Config.CheckSum = true
check()
fs.Config.CheckSum = false
check()
}
func TestRmdirs(t *testing.T) {
r := NewRun(t)
defer r.Finalise()