mirror of
https://github.com/rclone/rclone.git
synced 2025-01-13 17:58:30 +01:00
s3: add provider quirk --s3-might-gzip to fix corrupted on transfer: sizes differ
Before this change, some files were giving this error when downloaded from Cloudflare and other providers. ERROR corrupted on transfer: sizes differ NNN vs MMM This is because these providers auto gzips the object when rclone wasn't expecting it to. (AWS does not gzip objects without their being uploaded gzipped). This patch adds a quirk to for fix the problem and a flag to control it. The quirk `might_gzip` is set to `true` for all providers except AWS. See: https://forum.rclone.org/t/s3-error-corrupted-on-transfer-sizes-differ-nnn-vs-mmm/33694/ Fixes: #6533
This commit is contained in:
parent
e8002c66a9
commit
e2d2a05361
@ -2164,6 +2164,31 @@ can't check the size and hash but the file contents will be decompressed.
|
||||
`,
|
||||
Advanced: true,
|
||||
Default: false,
|
||||
}, {
|
||||
Name: "might_gzip",
|
||||
Help: strings.ReplaceAll(`Set this if the backend might gzip objects.
|
||||
|
||||
Normally providers will not alter objects when they are downloaded. If
|
||||
an object was not uploaded with |Content-Encoding: gzip| then it won't
|
||||
be set on download.
|
||||
|
||||
However some providers may gzip objects even if they weren't uploaded
|
||||
with |Content-Encoding: gzip| (eg Cloudflare).
|
||||
|
||||
A symptom of this would be receiving errors like
|
||||
|
||||
ERROR corrupted on transfer: sizes differ NNN vs MMM
|
||||
|
||||
If you set this flag and rclone downloads an object with
|
||||
Content-Encoding: gzip set and chunked transfer encoding, then rclone
|
||||
will decompress the object on the fly.
|
||||
|
||||
If this is set to unset (the default) then rclone will choose
|
||||
according to the provider setting what to apply, but you can override
|
||||
rclone's choice here.
|
||||
`, "|", "`"),
|
||||
Default: fs.Tristate{},
|
||||
Advanced: true,
|
||||
}, {
|
||||
Name: "no_system_metadata",
|
||||
Help: `Suppress setting and reading of system metadata`,
|
||||
@ -2295,6 +2320,7 @@ type Options struct {
|
||||
Versions bool `config:"versions"`
|
||||
VersionAt fs.Time `config:"version_at"`
|
||||
Decompress bool `config:"decompress"`
|
||||
MightGzip fs.Tristate `config:"might_gzip"`
|
||||
NoSystemMetadata bool `config:"no_system_metadata"`
|
||||
}
|
||||
|
||||
@ -2655,10 +2681,12 @@ func setQuirks(opt *Options) {
|
||||
virtualHostStyle = true
|
||||
urlEncodeListings = true
|
||||
useMultipartEtag = true
|
||||
mightGzip = true // assume all providers might gzip until proven otherwise
|
||||
)
|
||||
switch opt.Provider {
|
||||
case "AWS":
|
||||
// No quirks
|
||||
mightGzip = false // Never auto gzips objects
|
||||
case "Alibaba":
|
||||
useMultipartEtag = false // Alibaba seems to calculate multipart Etags differently from AWS
|
||||
case "HuaweiOBS":
|
||||
@ -2772,6 +2800,12 @@ func setQuirks(opt *Options) {
|
||||
opt.UseMultipartEtag.Valid = true
|
||||
opt.UseMultipartEtag.Value = useMultipartEtag
|
||||
}
|
||||
|
||||
// set MightGzip if not manually set
|
||||
if !opt.MightGzip.Valid {
|
||||
opt.MightGzip.Valid = true
|
||||
opt.MightGzip.Value = mightGzip
|
||||
}
|
||||
}
|
||||
|
||||
// setRoot changes the root of the Fs
|
||||
@ -4858,7 +4892,7 @@ func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (in io.Read
|
||||
|
||||
// Decompress body if necessary
|
||||
if aws.StringValue(resp.ContentEncoding) == "gzip" {
|
||||
if o.fs.opt.Decompress {
|
||||
if o.fs.opt.Decompress || (resp.ContentLength == nil && o.fs.opt.MightGzip.Value) {
|
||||
return readers.NewGzipReader(resp.Body)
|
||||
}
|
||||
o.fs.warnCompressed.Do(func() {
|
||||
|
Loading…
Reference in New Issue
Block a user