From 9049bb62cac19058da7e4ca133c2aae67d8002d6 Mon Sep 17 00:00:00 2001 From: Ivan Andreev Date: Wed, 9 Oct 2019 12:21:45 +0300 Subject: [PATCH] chunker: prevent chunk corruption, survive meta-like input --- backend/chunker/chunker.go | 217 ++++++++++----- backend/chunker/chunker_internal_test.go | 321 +++++++++++++++++++++++ docs/content/chunker.md | 28 +- fstest/fstests/fstests.go | 19 +- 4 files changed, 506 insertions(+), 79 deletions(-) diff --git a/backend/chunker/chunker.go b/backend/chunker/chunker.go index ba44fdb64..f83dc1a5b 100644 --- a/backend/chunker/chunker.go +++ b/backend/chunker/chunker.go @@ -98,6 +98,14 @@ const optimizeFirstChunk = false // revealHidden is a stub until chunker lands the `reveal hidden` option. const revealHidden = false +// Prevent memory overflow due to specially crafted chunk name +const maxSafeChunkNumber = 10000000 + +// standard chunker errors +var ( + ErrChunkOverflow = errors.New("chunk number overflow") +) + // Note: metadata logic is tightly coupled with chunker code in many // places, eg. in checks whether a file should have meta object or is // eligible for chunking. @@ -176,18 +184,17 @@ falling back to SHA1 if unsupported. Requires "simplejson".`, Help: `Similar to "md5quick" but prefers SHA1 over MD5. Requires "simplejson".`, }}, }, { - Name: "fail_on_bad_chunks", + Name: "fail_hard", Advanced: true, Default: false, - Help: `The list command might encounter files with missinng or invalid chunks. -This boolean flag tells what rclone should do in such cases.`, + Help: `Choose how chunker should handle files with missing or invalid chunks.`, Examples: []fs.OptionExample{ { Value: "true", - Help: "Fail with error.", + Help: "Report errors and abort current command.", }, { Value: "false", - Help: "Silently ignore invalid object.", + Help: "Warn user, skip incomplete file and proceed.", }, }, }}, @@ -231,6 +238,7 @@ func NewFs(name, rpath string, m configmap.Mapper) (fs.Fs, error) { root: rpath, opt: *opt, } + f.dirSort = true // processEntries requires that meta Objects prerun data chunks atm. switch opt.MetaFormat { case "none": @@ -298,13 +306,13 @@ func NewFs(name, rpath string, m configmap.Mapper) (fs.Fs, error) { // Options defines the configuration for this backend type Options struct { - Remote string `config:"remote"` - ChunkSize fs.SizeSuffix `config:"chunk_size"` - NameFormat string `config:"name_format"` - StartFrom int `config:"start_from"` - MetaFormat string `config:"meta_format"` - HashType string `config:"hash_type"` - FailOnBadChunks bool `config:"fail_on_bad_chunks"` + Remote string `config:"remote"` + ChunkSize fs.SizeSuffix `config:"chunk_size"` + NameFormat string `config:"name_format"` + StartFrom int `config:"start_from"` + MetaFormat string `config:"meta_format"` + HashType string `config:"hash_type"` + FailHard bool `config:"fail_hard"` } // Fs represents a wrapped fs.Fs @@ -322,6 +330,7 @@ type Fs struct { nameRegexp *regexp.Regexp // regular expression to match chunk names opt Options // copy of Options features *fs.Features // optional features + dirSort bool // reserved for future, ignored } // setChunkNameFormat converts pattern based chunk name format @@ -454,6 +463,20 @@ func (f *Fs) parseChunkName(filePath string) (mainPath string, chunkNo int, ctrl return } +// forbidChunk prints error message or raises error if file is chunk. +// First argument sets log prefix, use `false` to suppress message. +func (f *Fs) forbidChunk(o interface{}, filePath string) error { + if mainPath, _, _, _ := f.parseChunkName(filePath); mainPath != "" { + if f.opt.FailHard { + return fmt.Errorf("chunk overlap with %q", mainPath) + } + if boolVal, isBool := o.(bool); !isBool || boolVal { + fs.Errorf(o, "chunk overlap with %q", mainPath) + } + } + return nil +} + // List the objects and directories in dir into entries. // The entries can be returned in any order but should be // for a complete directory. @@ -480,7 +503,7 @@ func (f *Fs) List(ctx context.Context, dir string) (entries fs.DirEntries, err e if err != nil { return nil, err } - return f.chunkEntries(ctx, entries, f.opt.FailOnBadChunks) + return f.processEntries(ctx, entries, dir) } // ListR lists the objects and directories of the Fs starting @@ -498,11 +521,11 @@ func (f *Fs) List(ctx context.Context, dir string) (entries fs.DirEntries, err e // immediately. // // Don't implement this unless you have a more efficient way -// of listing recursively that doing a directory traversal. +// of listing recursively than doing a directory traversal. func (f *Fs) ListR(ctx context.Context, dir string, callback fs.ListRCallback) (err error) { do := f.base.Features().ListR return do(ctx, dir, func(entries fs.DirEntries) error { - newEntries, err := f.chunkEntries(ctx, entries, f.opt.FailOnBadChunks) + newEntries, err := f.processEntries(ctx, entries, dir) if err != nil { return err } @@ -510,13 +533,15 @@ func (f *Fs) ListR(ctx context.Context, dir string, callback fs.ListRCallback) ( }) } -// chunkEntries is called by List(R). It assembles chunk entries from -// wrapped remote into composite directory entries. -func (f *Fs) chunkEntries(ctx context.Context, origEntries fs.DirEntries, hardErrors bool) (chunkedEntries fs.DirEntries, err error) { +// processEntries assembles chunk entries into composite entries +func (f *Fs) processEntries(ctx context.Context, origEntries fs.DirEntries, dirPath string) (newEntries fs.DirEntries, err error) { // sort entries, so that meta objects (if any) appear before their chunks - sortedEntries := make(fs.DirEntries, len(origEntries)) - copy(sortedEntries, origEntries) - sort.Sort(sortedEntries) + sortedEntries := origEntries + if f.dirSort { + sortedEntries := make(fs.DirEntries, len(origEntries)) + copy(sortedEntries, origEntries) + sort.Sort(sortedEntries) + } byRemote := make(map[string]*Object) badEntry := make(map[string]bool) @@ -554,7 +579,7 @@ func (f *Fs) chunkEntries(ctx context.Context, origEntries fs.DirEntries, hardEr } } if err := mainObject.addChunk(entry, chunkNo); err != nil { - if hardErrors { + if f.opt.FailHard { return nil, err } badEntry[mainRemote] = true @@ -570,7 +595,7 @@ func (f *Fs) chunkEntries(ctx context.Context, origEntries fs.DirEntries, hardEr wrapDir.SetRemote(entry.Remote()) tempEntries = append(tempEntries, wrapDir) default: - if hardErrors { + if f.opt.FailHard { return nil, fmt.Errorf("Unknown object type %T", entry) } fs.Debugf(f, "unknown object type %T", entry) @@ -581,7 +606,7 @@ func (f *Fs) chunkEntries(ctx context.Context, origEntries fs.DirEntries, hardEr if object, ok := entry.(*Object); ok { remote := object.Remote() if isSubdir[remote] { - if hardErrors { + if f.opt.FailHard { return nil, fmt.Errorf("%q is both meta object and directory", remote) } badEntry[remote] = true // fall thru @@ -591,17 +616,20 @@ func (f *Fs) chunkEntries(ctx context.Context, origEntries fs.DirEntries, hardEr continue } if err := object.validate(); err != nil { - if hardErrors { + if f.opt.FailHard { return nil, err } fs.Debugf(f, "invalid chunks in object %q", remote) continue } } - chunkedEntries = append(chunkedEntries, entry) + newEntries = append(newEntries, entry) } - return chunkedEntries, nil + if f.dirSort { + sort.Sort(newEntries) + } + return newEntries, nil } // NewObject finds the Object at remote. @@ -615,8 +643,8 @@ func (f *Fs) chunkEntries(ctx context.Context, origEntries fs.DirEntries, hardEr // but opening even a small file can be slow on some backends. // func (f *Fs) NewObject(ctx context.Context, remote string) (fs.Object, error) { - if mainRemote, _, _, _ := f.parseChunkName(remote); mainRemote != "" { - return nil, fmt.Errorf("%q should be meta object, not a chunk", remote) + if err := f.forbidChunk(false, remote); err != nil { + return nil, errors.Wrap(err, "can't access") } var ( @@ -734,12 +762,12 @@ func (o *Object) readMetadata(ctx context.Context) error { if err != nil { return err } + _ = reader.Close() // ensure file handle is freed on windows switch o.f.opt.MetaFormat { case "simplejson": - metaInfo, err := unmarshalSimpleJSON(ctx, metaObject, metadata) + metaInfo, err := unmarshalSimpleJSON(ctx, metaObject, metadata, true) if err != nil { - // TODO: in a rare case we might mistake a small file for metadata return errors.Wrap(err, "invalid metadata") } if o.size != metaInfo.Size() || len(o.chunks) != metaInfo.nChunks { @@ -775,8 +803,12 @@ func (f *Fs) put(ctx context.Context, in io.Reader, src fs.ObjectInfo, remote st baseRemote := remote // Transfer chunks data - for chunkNo := 0; !c.done; chunkNo++ { - tempRemote := f.makeChunkName(baseRemote, chunkNo, "", xactNo) + for c.chunkNo = 0; !c.done; c.chunkNo++ { + if c.chunkNo > maxSafeChunkNumber { + return nil, ErrChunkOverflow + } + + tempRemote := f.makeChunkName(baseRemote, c.chunkNo, "", xactNo) size := c.sizeLeft if size > c.chunkSize { size = c.chunkSize @@ -785,7 +817,7 @@ func (f *Fs) put(ctx context.Context, in io.Reader, src fs.ObjectInfo, remote st // If a single chunk is expected, avoid the extra rename operation chunkRemote := tempRemote - if c.expectSingle && chunkNo == 0 && optimizeFirstChunk { + if c.expectSingle && c.chunkNo == 0 && optimizeFirstChunk { chunkRemote = baseRemote } info := f.wrapInfo(src, chunkRemote, size) @@ -836,8 +868,17 @@ func (f *Fs) put(ctx context.Context, in io.Reader, src fs.ObjectInfo, remote st return nil, fmt.Errorf("Incorrect upload size %d != %d", c.readCount, c.sizeTotal) } - // Finalize the non-chunked object - if len(c.chunks) == 1 { + // Check for input that looks like valid metadata + needMeta := len(c.chunks) > 1 + if c.readCount <= maxMetadataSize && len(c.chunks) == 1 { + _, err := unmarshalSimpleJSON(ctx, c.chunks[0], c.smallHead, false) + needMeta = err == nil + } + + // Finalize small object as non-chunked. + // This can be bypassed, and single chunk with metadata will be + // created due to unsafe input. + if !needMeta && f.useMeta { // If previous object was chunked, remove its chunks f.removeOldChunks(ctx, baseRemote) @@ -918,10 +959,12 @@ type chunkingReader struct { readCount int64 chunkSize int64 chunkLimit int64 + chunkNo int err error done bool chunks []fs.Object expectSingle bool + smallHead []byte fs *Fs hasher gohash.Hash md5 string @@ -1001,6 +1044,9 @@ func (c *chunkingReader) Read(buf []byte) (bytesRead int, err error) { return } c.accountBytes(int64(bytesRead)) + if c.chunkNo == 0 && c.expectSingle && bytesRead > 0 && c.readCount <= maxMetadataSize { + c.smallHead = append(c.smallHead, buf[:bytesRead]...) + } if bytesRead == 0 && c.sizeLeft == 0 { err = io.EOF // Force EOF when no data left. } @@ -1048,16 +1094,25 @@ func (f *Fs) removeOldChunks(ctx context.Context, remote string) { // will return the object and the error, otherwise will return // nil and the error func (f *Fs) Put(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) { + if err := f.forbidChunk(src, src.Remote()); err != nil { + return nil, errors.Wrap(err, "refusing to put") + } return f.put(ctx, in, src, src.Remote(), options, f.base.Put) } // PutStream uploads to the remote path with the modTime given of indeterminate size func (f *Fs) PutStream(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) { + if err := f.forbidChunk(src, src.Remote()); err != nil { + return nil, errors.Wrap(err, "refusing to upload") + } return f.put(ctx, in, src, src.Remote(), options, f.base.Features().PutStream) } // Update in to the object with the modTime given of the given size func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) error { + if err := o.f.forbidChunk(o, o.Remote()); err != nil { + return errors.Wrap(err, "update refused") + } if err := o.readMetadata(ctx); err != nil { // refuse to update a file of unsupported format return errors.Wrap(err, "refusing to update") @@ -1080,13 +1135,12 @@ func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, op // // This will create a duplicate if we upload a new file without // checking to see if there is one already - use Put() for that. -// TODO: really split stream here func (f *Fs) PutUnchecked(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) { do := f.base.Features().PutUnchecked if do == nil { return nil, errors.New("can't PutUnchecked") } - // TODO: handle options and chunking! + // TODO: handle range/limit options and really chunk stream here! o, err := do(ctx, in, f.wrapInfo(src, "", -1)) if err != nil { return nil, err @@ -1117,6 +1171,9 @@ func (f *Fs) Hashes() hash.Set { // // Shouldn't return an error if it already exists func (f *Fs) Mkdir(ctx context.Context, dir string) error { + if err := f.forbidChunk(dir, dir); err != nil { + return errors.Wrap(err, "can't mkdir") + } return f.base.Mkdir(ctx, dir) } @@ -1181,6 +1238,11 @@ func (f *Fs) Purge(ctx context.Context) error { // the `delete hidden` flag above or at least the user has been warned. // func (o *Object) Remove(ctx context.Context) (err error) { + if err := o.f.forbidChunk(o, o.Remote()); err != nil { + // operations.Move can still call Remove if chunker's Move refuses + // to corrupt file in hard mode. Hence, refuse to Remove, too. + return errors.Wrap(err, "refuse to corrupt") + } if err := o.readMetadata(ctx); err != nil { // Proceed but warn user that unexpected things can happen. fs.Errorf(o, "Removing a file with unsupported metadata: %v", err) @@ -1206,6 +1268,9 @@ func (o *Object) Remove(ctx context.Context) (err error) { // copyOrMove implements copy or move func (f *Fs) copyOrMove(ctx context.Context, o *Object, remote string, do copyMoveFn, md5, sha1, opName string) (fs.Object, error) { + if err := f.forbidChunk(o, remote); err != nil { + return nil, errors.Wrapf(err, "can't %s", opName) + } if !o.isComposite() { fs.Debugf(o, "%s non-chunked object...", opName) oResult, err := do(ctx, o.mainChunk(), remote) // chain operation to a single wrapped chunk @@ -1493,6 +1558,9 @@ func (o *Object) addChunk(chunk fs.Object, chunkNo int) error { o.chunks = append(o.chunks, chunk) return nil } + if chunkNo > maxSafeChunkNumber { + return ErrChunkOverflow + } if chunkNo > len(o.chunks) { newChunks := make([]fs.Object, (chunkNo + 1), (chunkNo+1)*2) copy(newChunks, o.chunks) @@ -1897,20 +1965,31 @@ func (o *Object) ID() string { // Meta format `simplejson` type metaSimpleJSON struct { - Version int `json:"ver"` - Size int64 `json:"size"` // total size of data chunks - NChunks int `json:"nchunks"` // number of data chunks - MD5 string `json:"md5"` - SHA1 string `json:"sha1"` + // required core fields + Version *int `json:"ver"` + Size *int64 `json:"size"` // total size of data chunks + ChunkNum *int `json:"nchunks"` // number of data chunks + // optional extra fields + MD5 string `json:"md5,omitempty"` + SHA1 string `json:"sha1,omitempty"` } +// marshalSimpleJSON +// +// Current implementation creates metadata in two cases: +// - for files larger than chunk size +// - if file contents can be mistaken as meta object +// func marshalSimpleJSON(ctx context.Context, size int64, nChunks int, md5, sha1 string) ([]byte, error) { + version := metadataVersion metadata := metaSimpleJSON{ - Version: metadataVersion, - Size: size, - NChunks: nChunks, - MD5: md5, - SHA1: sha1, + // required core fields + Version: &version, + Size: &size, + ChunkNum: &nChunks, + // optional extra fields + MD5: md5, + SHA1: sha1, } data, err := json.Marshal(&metadata) if err == nil && data != nil && len(data) >= maxMetadataSize { @@ -1920,6 +1999,7 @@ func marshalSimpleJSON(ctx context.Context, size int64, nChunks int, md5, sha1 s return data, err } +// unmarshalSimpleJSON // Note: only metadata format version 1 is supported atm. // // Current implementation creates metadata only for files larger than @@ -1931,22 +2011,37 @@ func marshalSimpleJSON(ctx context.Context, size int64, nChunks int, md5, sha1 s // handled by current implementation. // The version check below will then explicitly ask user to upgrade rclone. // -func unmarshalSimpleJSON(ctx context.Context, metaObject fs.Object, data []byte) (info *ObjectInfo, err error) { - if len(data) > maxMetadataSize { +func unmarshalSimpleJSON(ctx context.Context, metaObject fs.Object, data []byte, strictChecks bool) (info *ObjectInfo, err error) { + // Be strict about JSON format + // to reduce possibility that a random small file resembles metadata. + if data != nil && len(data) > maxMetadataSize { return nil, errors.New("too big") } + if data == nil || len(data) < 2 || data[0] != '{' || data[len(data)-1] != '}' { + return nil, errors.New("invalid json") + } var metadata metaSimpleJSON err = json.Unmarshal(data, &metadata) if err != nil { return nil, err } - + // Basic fields are strictly required + // to reduce possibility that a random small file resembles metadata. + if metadata.Version == nil || metadata.Size == nil || metadata.ChunkNum == nil { + return nil, errors.New("missing required field") + } // Perform strict checks, avoid corruption of future metadata formats. - if metadata.Size < 0 { + if *metadata.Version < 1 { + return nil, errors.New("wrong version") + } + if *metadata.Size < 0 { return nil, errors.New("negative file size") } - if metadata.NChunks <= 0 { - return nil, errors.New("wrong number of chunks") + if *metadata.ChunkNum < 0 { + return nil, errors.New("negative number of chunks") + } + if *metadata.ChunkNum > maxSafeChunkNumber { + return nil, ErrChunkOverflow } if metadata.MD5 != "" { _, err = hex.DecodeString(metadata.MD5) @@ -1960,18 +2055,20 @@ func unmarshalSimpleJSON(ctx context.Context, metaObject fs.Object, data []byte) return nil, errors.New("wrong sha1 hash") } } - if metadata.Version <= 0 { - return nil, errors.New("wrong version number") + // ChunkNum is allowed to be 0 in future versions + if *metadata.ChunkNum < 1 && *metadata.Version <= metadataVersion { + return nil, errors.New("wrong number of chunks") } - if metadata.Version != metadataVersion { - return nil, errors.Errorf("version %d is not supported, please upgrade rclone", metadata.Version) + // Non-strict mode also accepts future metadata versions + if *metadata.Version > metadataVersion && strictChecks { + return nil, fmt.Errorf("version %d is not supported, please upgrade rclone", metadata.Version) } var nilFs *Fs // nil object triggers appropriate type method - info = nilFs.wrapInfo(metaObject, "", metadata.Size) + info = nilFs.wrapInfo(metaObject, "", *metadata.Size) + info.nChunks = *metadata.ChunkNum info.md5 = metadata.MD5 info.sha1 = metadata.SHA1 - info.nChunks = metadata.NChunks return info, nil } diff --git a/backend/chunker/chunker_internal_test.go b/backend/chunker/chunker_internal_test.go index 79becb545..75a78795a 100644 --- a/backend/chunker/chunker_internal_test.go +++ b/backend/chunker/chunker_internal_test.go @@ -1,15 +1,23 @@ package chunker import ( + "bytes" "context" "flag" "fmt" + "io/ioutil" + "path" + "regexp" + "strings" "testing" "github.com/rclone/rclone/fs" + "github.com/rclone/rclone/fs/operations" "github.com/rclone/rclone/fstest" "github.com/rclone/rclone/fstest/fstests" + "github.com/rclone/rclone/lib/random" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) // Command line flags @@ -240,6 +248,307 @@ func testChunkNameFormat(t *testing.T, f *Fs) { assertMakeNamePanics("fish", -2, "bind.", 0) } +func testSmallFileInternals(t *testing.T, f *Fs) { + const dir = "small" + ctx := context.Background() + saveOpt := f.opt + defer func() { + f.opt.FailHard = false + _ = operations.Purge(ctx, f.base, dir) + f.opt = saveOpt + }() + f.opt.FailHard = false + + modTime := fstest.Time("2001-02-03T04:05:06.499999999Z") + + checkSmallFileInternals := func(obj fs.Object) { + assert.NotNil(t, obj) + o, ok := obj.(*Object) + assert.True(t, ok) + assert.NotNil(t, o) + if o == nil { + return + } + switch { + case !f.useMeta: + // If meta format is "none", non-chunked file (even empty) + // internally is a single chunk without meta object. + assert.Nil(t, o.main) + assert.True(t, o.isComposite()) // sorry, sometimes a name is misleading + assert.Equal(t, 1, len(o.chunks)) + default: + // normally non-chunked file is kept in the Object's main field + assert.NotNil(t, o.main) + assert.False(t, o.isComposite()) + assert.Equal(t, 0, len(o.chunks)) + } + } + + checkContents := func(obj fs.Object, contents string) { + assert.NotNil(t, obj) + assert.Equal(t, int64(len(contents)), obj.Size()) + + r, err := obj.Open(ctx) + assert.NoError(t, err) + assert.NotNil(t, r) + if r == nil { + return + } + data, err := ioutil.ReadAll(r) + assert.NoError(t, err) + assert.Equal(t, contents, string(data)) + _ = r.Close() + } + + checkSmallFile := func(name, contents string) { + filename := path.Join(dir, name) + item := fstest.Item{Path: filename, ModTime: modTime} + _, put := fstests.PutTestContents(ctx, t, f, &item, contents, false) + assert.NotNil(t, put) + checkSmallFileInternals(put) + checkContents(put, contents) + + // objects returned by Put and NewObject must have similar structure + obj, err := f.NewObject(ctx, filename) + assert.NoError(t, err) + assert.NotNil(t, obj) + checkSmallFileInternals(obj) + checkContents(obj, contents) + + _ = obj.Remove(ctx) + _ = put.Remove(ctx) // for good + } + + checkSmallFile("emptyfile", "") + checkSmallFile("smallfile", "Ok") +} + +func testPreventCorruption(t *testing.T, f *Fs) { + if f.opt.ChunkSize > 50 { + t.Skip("this test requires small chunks") + } + const dir = "corrupted" + ctx := context.Background() + saveOpt := f.opt + defer func() { + f.opt.FailHard = false + _ = operations.Purge(ctx, f.base, dir) + f.opt = saveOpt + }() + f.opt.FailHard = true + + contents := random.String(250) + modTime := fstest.Time("2001-02-03T04:05:06.499999999Z") + const overlapMessage = "chunk overlap" + + assertOverlapError := func(err error) { + assert.Error(t, err) + if err != nil { + assert.Contains(t, err.Error(), overlapMessage) + } + } + + newFile := func(name string) fs.Object { + item := fstest.Item{Path: path.Join(dir, name), ModTime: modTime} + _, obj := fstests.PutTestContents(ctx, t, f, &item, contents, true) + require.NotNil(t, obj) + return obj + } + billyObj := newFile("billy") + + billyChunkName := func(chunkNo int) string { + return f.makeChunkName(billyObj.Remote(), chunkNo, "", -1) + } + + err := f.Mkdir(ctx, billyChunkName(1)) + assertOverlapError(err) + + _, err = f.Move(ctx, newFile("silly1"), billyChunkName(2)) + assert.Error(t, err) + assert.True(t, err == fs.ErrorCantMove || (err != nil && strings.Contains(err.Error(), overlapMessage))) + + _, err = f.Copy(ctx, newFile("silly2"), billyChunkName(3)) + assert.Error(t, err) + assert.True(t, err == fs.ErrorCantCopy || (err != nil && strings.Contains(err.Error(), overlapMessage))) + + // accessing chunks in strict mode is prohibited + f.opt.FailHard = true + billyChunk4Name := billyChunkName(4) + billyChunk4, err := f.NewObject(ctx, billyChunk4Name) + assertOverlapError(err) + + f.opt.FailHard = false + billyChunk4, err = f.NewObject(ctx, billyChunk4Name) + assert.NoError(t, err) + require.NotNil(t, billyChunk4) + + f.opt.FailHard = true + _, err = f.Put(ctx, bytes.NewBufferString(contents), billyChunk4) + assertOverlapError(err) + + // you can freely read chunks (if you have an object) + r, err := billyChunk4.Open(ctx) + assert.NoError(t, err) + var chunkContents []byte + assert.NotPanics(t, func() { + chunkContents, err = ioutil.ReadAll(r) + }) + assert.NoError(t, err) + assert.NotEqual(t, contents, string(chunkContents)) + + // but you can't change them + err = billyChunk4.Update(ctx, bytes.NewBufferString(contents), newFile("silly3")) + assertOverlapError(err) + + // Remove isn't special, you can't corrupt files even if you have an object + err = billyChunk4.Remove(ctx) + assertOverlapError(err) + + // recreate billy in case it was anyhow corrupted + willyObj := newFile("willy") + willyChunkName := f.makeChunkName(willyObj.Remote(), 1, "", -1) + f.opt.FailHard = false + willyChunk, err := f.NewObject(ctx, willyChunkName) + f.opt.FailHard = true + assert.NoError(t, err) + require.NotNil(t, willyChunk) + + _, err = operations.Copy(ctx, f, willyChunk, willyChunkName, newFile("silly4")) + assertOverlapError(err) + + // operations.Move will return error when chunker's Move refused + // to corrupt target file, but reverts to copy/delete method + // still trying to delete target chunk. Chunker must come to rescue. + _, err = operations.Move(ctx, f, willyChunk, willyChunkName, newFile("silly5")) + assertOverlapError(err) + r, err = willyChunk.Open(ctx) + assert.NoError(t, err) + assert.NotPanics(t, func() { + _, err = ioutil.ReadAll(r) + }) + assert.NoError(t, err) +} + +func testChunkNumberOverflow(t *testing.T, f *Fs) { + if f.opt.ChunkSize > 50 { + t.Skip("this test requires small chunks") + } + const dir = "wreaked" + const wreakNumber = 10200300 + ctx := context.Background() + saveOpt := f.opt + defer func() { + f.opt.FailHard = false + _ = operations.Purge(ctx, f.base, dir) + f.opt = saveOpt + }() + + modTime := fstest.Time("2001-02-03T04:05:06.499999999Z") + contents := random.String(100) + + newFile := func(f fs.Fs, name string) (fs.Object, string) { + filename := path.Join(dir, name) + item := fstest.Item{Path: filename, ModTime: modTime} + _, obj := fstests.PutTestContents(ctx, t, f, &item, contents, true) + require.NotNil(t, obj) + return obj, filename + } + + f.opt.FailHard = false + file, fileName := newFile(f, "wreaker") + wreak, _ := newFile(f.base, f.makeChunkName("wreaker", wreakNumber, "", -1)) + + f.opt.FailHard = false + fstest.CheckListingWithRoot(t, f, dir, nil, nil, f.Precision()) + _, err := f.NewObject(ctx, fileName) + assert.Error(t, err) + + f.opt.FailHard = true + _, err = f.List(ctx, dir) + assert.Error(t, err) + _, err = f.NewObject(ctx, fileName) + assert.Error(t, err) + + f.opt.FailHard = false + _ = wreak.Remove(ctx) + _ = file.Remove(ctx) +} + +func testMetadataInput(t *testing.T, f *Fs) { + const minChunkForTest = 50 + if f.opt.ChunkSize < minChunkForTest { + t.Skip("this test requires chunks that fit metadata") + } + + const dir = "usermeta" + ctx := context.Background() + saveOpt := f.opt + defer func() { + f.opt.FailHard = false + _ = operations.Purge(ctx, f.base, dir) + f.opt = saveOpt + }() + f.opt.FailHard = false + + modTime := fstest.Time("2001-02-03T04:05:06.499999999Z") + + putFile := func(f fs.Fs, name, contents, message string, check bool) fs.Object { + item := fstest.Item{Path: name, ModTime: modTime} + _, obj := fstests.PutTestContents(ctx, t, f, &item, contents, check) + assert.NotNil(t, obj, message) + return obj + } + + runSubtest := func(contents, name string) { + description := fmt.Sprintf("file with %s metadata", name) + filename := path.Join(dir, name) + require.True(t, len(contents) > 2 && len(contents) < minChunkForTest, description+" test data is correct") + + part := putFile(f.base, f.makeChunkName(filename, 0, "", -1), "oops", "", true) + _ = putFile(f, filename, contents, "upload "+description, false) + + obj, err := f.NewObject(ctx, filename) + assert.NoError(t, err, "access "+description) + assert.NotNil(t, obj) + assert.Equal(t, int64(len(contents)), obj.Size(), "size "+description) + + o, ok := obj.(*Object) + assert.NotNil(t, ok) + if o != nil { + assert.True(t, o.isComposite() && len(o.chunks) == 1, description+" is forced composite") + o = nil + } + + defer func() { + _ = obj.Remove(ctx) + _ = part.Remove(ctx) + }() + + r, err := obj.Open(ctx) + assert.NoError(t, err, "open "+description) + assert.NotNil(t, r, "open stream of "+description) + if err == nil && r != nil { + data, err := ioutil.ReadAll(r) + assert.NoError(t, err, "read all of "+description) + assert.Equal(t, contents, string(data), description+" contents is ok") + _ = r.Close() + } + } + + metaData, err := marshalSimpleJSON(ctx, 3, 1, "", "") + require.NoError(t, err) + todaysMeta := string(metaData) + runSubtest(todaysMeta, "today") + + pastMeta := regexp.MustCompile(`"ver":[0-9]+`).ReplaceAllLiteralString(todaysMeta, `"ver":1`) + pastMeta = regexp.MustCompile(`"size":[0-9]+`).ReplaceAllLiteralString(pastMeta, `"size":0`) + runSubtest(pastMeta, "past") + + futureMeta := regexp.MustCompile(`"ver":[0-9]+`).ReplaceAllLiteralString(todaysMeta, `"ver":999`) + futureMeta = regexp.MustCompile(`"nchunks":[0-9]+`).ReplaceAllLiteralString(futureMeta, `"nchunks":0,"x":"y"`) + runSubtest(futureMeta, "future") +} + // InternalTest dispatches all internal tests func (f *Fs) InternalTest(t *testing.T) { t.Run("PutLarge", func(t *testing.T) { @@ -251,6 +560,18 @@ func (f *Fs) InternalTest(t *testing.T) { t.Run("ChunkNameFormat", func(t *testing.T) { testChunkNameFormat(t, f) }) + t.Run("SmallFileInternals", func(t *testing.T) { + testSmallFileInternals(t, f) + }) + t.Run("PreventCorruption", func(t *testing.T) { + testPreventCorruption(t, f) + }) + t.Run("ChunkNumberOverflow", func(t *testing.T) { + testChunkNumberOverflow(t, f) + }) + t.Run("MetadataInput", func(t *testing.T) { + testMetadataInput(t, f) + }) } var _ fstests.InternalTester = (*Fs)(nil) diff --git a/docs/content/chunker.md b/docs/content/chunker.md index d05eba606..8921c595a 100644 --- a/docs/content/chunker.md +++ b/docs/content/chunker.md @@ -115,11 +115,16 @@ original content. When the `list` rclone command scans a directory on wrapped remote, the potential chunk files are accounted for, grouped and assembled into composite directory entries. Any temporary chunks are hidden. -`list` can sometimes come across composite files with missing or invalid -chunks, eg if wrapped file system has been tampered with or damaged. -If chunker detects a missing chunk it will by default silently ignore -the whole group. You can use the `--chunker-fail-on-bad-chunks` -rclone option to make `list` fail with a loud error message. + +List and other commands can sometimes come across composite files with +missing or invalid chunks, eg. shadowed by like-named directory or +another file. This usually means that wrapped file system has been directly +tampered with or damaged. If chunker detects a missing chunk it will +by default print warning, skip the whole incomplete group of chunks but +proceed with current command. +You can set the `--chunker-fail-hard` flag to have commands abort with +error message in such cases. + #### Chunk names @@ -368,19 +373,18 @@ Metadata is a small JSON file named after the composite file. - Simple JSON supports hash sums and chunk validation. - It has the following fields: ver, size, nchunks, md5, sha1. -#### --chunker-fail-on-bad-chunks +#### --chunker-fail-hard -The list command might encounter files with missinng or invalid chunks. -This boolean flag tells what rclone should do in such cases. +Choose how chunker should handle files with missing or invalid chunks. -- Config: fail_on_bad_chunks -- Env Var: RCLONE_CHUNKER_FAIL_ON_BAD_CHUNKS +- Config: fail_hard +- Env Var: RCLONE_CHUNKER_FAIL_HARD - Type: bool - Default: false - Examples: - "true" - - Fail with error. + - Report errors and abort current command. - "false" - - Silently ignore invalid object. + - Warn user, skip incomplete file and proceed. diff --git a/fstest/fstests/fstests.go b/fstest/fstests/fstests.go index 547f68895..9768a6f65 100644 --- a/fstest/fstests/fstests.go +++ b/fstest/fstests/fstests.go @@ -151,16 +151,19 @@ func retry(t *testing.T, what string, f func() error) { require.NoError(t, err, what) } -// testPut puts file to the remote +// testPut puts file with random contents to the remote func testPut(ctx context.Context, t *testing.T, f fs.Fs, file *fstest.Item) (string, fs.Object) { + return PutTestContents(ctx, t, f, file, random.String(100), true) +} + +// PutTestContents puts file with given contents to the remote and checks it but unlike TestPutLarge doesn't remove +func PutTestContents(ctx context.Context, t *testing.T, f fs.Fs, file *fstest.Item, contents string, check bool) (string, fs.Object) { var ( err error obj fs.Object uploadHash *hash.MultiHasher - contents string ) retry(t, "Put", func() error { - contents = random.String(100) buf := bytes.NewBufferString(contents) uploadHash = hash.NewMultiHasher() in := io.TeeReader(buf, uploadHash) @@ -171,10 +174,12 @@ func testPut(ctx context.Context, t *testing.T, f fs.Fs, file *fstest.Item) (str return err }) file.Hashes = uploadHash.Sums() - file.Check(t, obj, f.Precision()) - // Re-read the object and check again - obj = findObject(ctx, t, f, file.Path) - file.Check(t, obj, f.Precision()) + if check { + file.Check(t, obj, f.Precision()) + // Re-read the object and check again + obj = findObject(ctx, t, f, file.Path) + file.Check(t, obj, f.Precision()) + } return contents, obj }