mirror of
https://github.com/rclone/rclone.git
synced 2025-01-02 04:19:35 +01:00
s3: implement alternate hashes - fixes #7445 FIXME WIP
This commit is contained in:
parent
d65d1a44b3
commit
bdf1777185
backend/s3
@ -102,4 +102,5 @@ import (
|
||||
genSetFrom(new(s3.CreateMultipartUploadInput), new(s3.PutObjectInput))
|
||||
genSetFrom(new(s3.HeadObjectOutput), new(s3.PutObjectInput))
|
||||
genSetFrom(new(s3.CopyObjectInput), new(s3.PutObjectInput))
|
||||
genSetFrom(new(types.CompletedPart), new(s3.UploadPartOutput))
|
||||
}
|
||||
|
@ -2724,6 +2724,26 @@ use |-vv| to see the debug level logs.
|
||||
`, "|", "`"),
|
||||
Default: sdkLogMode(0),
|
||||
Advanced: true,
|
||||
}, {
|
||||
Name: "hash",
|
||||
Help: strings.ReplaceAll(`Set to change the hash/checksum in use
|
||||
|
||||
This can be set to one of:
|
||||
|
||||
- |`+hash.MD5.String()+`|
|
||||
- |`+hash.SHA1.String()+`|
|
||||
- |`+hash.SHA256.String()+`|
|
||||
- |`+hash.CRC32.String()+`|
|
||||
|
||||
To choose the checksum algorithm used by S3 to validate your data. Once the data
|
||||
is uploaded the checksum algorithm can only be changed by copying the data.
|
||||
|
||||
This also sets the Hash that rclone uses.
|
||||
|
||||
See: https://docs.aws.amazon.com/AmazonS3/latest/userguide/checking-object-integrity.html
|
||||
`, "|", "`"),
|
||||
Default: hash.MD5,
|
||||
Advanced: true,
|
||||
},
|
||||
}})
|
||||
}
|
||||
@ -2878,6 +2898,7 @@ type Options struct {
|
||||
UseUnsignedPayload fs.Tristate `config:"use_unsigned_payload"`
|
||||
SDKLogMode sdkLogMode `config:"sdk_log_mode"`
|
||||
DirectoryBucket bool `config:"directory_bucket"`
|
||||
Hash hash.Type `config:"hash"`
|
||||
}
|
||||
|
||||
// Fs represents a remote s3 server
|
||||
@ -2899,6 +2920,7 @@ type Fs struct {
|
||||
versioningMu sync.Mutex
|
||||
versioning fs.Tristate // if set bucket is using versions
|
||||
warnCompressed sync.Once // warn once about compressed files
|
||||
checksum types.ChecksumAlgorithm // for uploading new objects
|
||||
}
|
||||
|
||||
// Object describes a s3 object
|
||||
@ -2922,6 +2944,7 @@ type Object struct {
|
||||
contentDisposition *string // Content-Disposition: header
|
||||
contentEncoding *string // Content-Encoding: header
|
||||
contentLanguage *string // Content-Language: header
|
||||
hash *string // if hash is set, then this is the alternate hash
|
||||
}
|
||||
|
||||
// safely dereference the pointer, returning a zero T if nil
|
||||
@ -3662,6 +3685,18 @@ func NewFs(ctx context.Context, name, root string, m configmap.Mapper) (fs.Fs, e
|
||||
// The normal API doesn't work for creating directory buckets, so don't try
|
||||
f.opt.NoCheckBucket = true
|
||||
}
|
||||
switch opt.Hash {
|
||||
case hash.MD5:
|
||||
f.checksum = ""
|
||||
case hash.CRC32:
|
||||
f.checksum = types.ChecksumAlgorithmCrc32
|
||||
case hash.SHA1:
|
||||
f.checksum = types.ChecksumAlgorithmSha1
|
||||
case hash.SHA256:
|
||||
f.checksum = types.ChecksumAlgorithmSha256
|
||||
default:
|
||||
return nil, fmt.Errorf("%s is not supported for hash/checksum algorithm", opt.Hash)
|
||||
}
|
||||
f.setRoot(root)
|
||||
f.features = (&fs.Features{
|
||||
ReadMimeType: true,
|
||||
@ -4895,7 +4930,7 @@ func (f *Fs) Copy(ctx context.Context, src fs.Object, remote string) (fs.Object,
|
||||
|
||||
// Hashes returns the supported hash sets.
|
||||
func (f *Fs) Hashes() hash.Set {
|
||||
return hash.Set(hash.MD5)
|
||||
return hash.Set(hash.MD5 | f.opt.Hash)
|
||||
}
|
||||
|
||||
// PublicLink generates a public link to the remote path (usually readable by anyone)
|
||||
@ -5608,6 +5643,25 @@ func (o *Object) setMD5FromEtag(etag string) {
|
||||
// Hash returns the Md5sum of an object returning a lowercase hex string
|
||||
func (o *Object) Hash(ctx context.Context, t hash.Type) (string, error) {
|
||||
if t != hash.MD5 {
|
||||
if t == o.fs.opt.Hash {
|
||||
if o.hash == nil {
|
||||
err := o.readMetaData(ctx)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
if o.hash == nil {
|
||||
return "", nil
|
||||
}
|
||||
// FIXME check multipart
|
||||
hashBytes, err := base64.StdEncoding.DecodeString(*o.hash)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to read hash from response %q: %v", *o.hash, err)
|
||||
} else if 2*len(hashBytes) != hash.Width(o.fs.opt.Hash, false) {
|
||||
return "", fmt.Errorf("failed to read hash from response %q: wrong length", *o.hash)
|
||||
}
|
||||
return hex.EncodeToString(hashBytes), nil
|
||||
}
|
||||
return "", hash.ErrUnsupported
|
||||
}
|
||||
// If decompressing, erase the hash
|
||||
@ -5652,6 +5706,9 @@ func (f *Fs) headObject(ctx context.Context, req *s3.HeadObjectInput) (resp *s3.
|
||||
if f.opt.SSECustomerKeyMD5 != "" {
|
||||
req.SSECustomerKeyMD5 = &f.opt.SSECustomerKeyMD5
|
||||
}
|
||||
if f.checksum != "" {
|
||||
req.ChecksumMode = types.ChecksumModeEnabled
|
||||
}
|
||||
err = f.pacer.Call(func() (bool, error) {
|
||||
var err error
|
||||
resp, err = f.c.HeadObject(ctx, req)
|
||||
@ -5741,11 +5798,21 @@ func (o *Object) setMetaData(resp *s3.HeadObjectOutput) {
|
||||
o.contentDisposition = resp.ContentDisposition
|
||||
o.contentEncoding = resp.ContentEncoding
|
||||
o.contentLanguage = resp.ContentLanguage
|
||||
if o.fs.opt.Hash == hash.CRC32 {
|
||||
o.hash = resp.ChecksumCRC32
|
||||
}
|
||||
if o.fs.opt.Hash == hash.SHA1 {
|
||||
o.hash = resp.ChecksumSHA1
|
||||
}
|
||||
if o.fs.opt.Hash == hash.SHA256 {
|
||||
o.hash = resp.ChecksumSHA256
|
||||
}
|
||||
|
||||
// If decompressing then size and md5sum are unknown
|
||||
if o.fs.opt.Decompress && deref(o.contentEncoding) == "gzip" {
|
||||
o.bytes = -1
|
||||
o.md5 = ""
|
||||
o.hash = nil
|
||||
}
|
||||
}
|
||||
|
||||
@ -5910,6 +5977,9 @@ func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (in io.Read
|
||||
if o.fs.opt.SSECustomerKeyMD5 != "" {
|
||||
req.SSECustomerKeyMD5 = &o.fs.opt.SSECustomerKeyMD5
|
||||
}
|
||||
if o.fs.checksum != "" {
|
||||
req.ChecksumMode = types.ChecksumModeEnabled
|
||||
}
|
||||
// httpReq, err := s3.NewPresignClient(o.fs.c).PresignGetObject(ctx, &req)
|
||||
// if err != nil {
|
||||
// return nil, err
|
||||
@ -6088,13 +6158,13 @@ func (f *Fs) OpenChunkWriter(ctx context.Context, remote string, src fs.ObjectIn
|
||||
}
|
||||
|
||||
// add a part number and etag to the completed parts
|
||||
func (w *s3ChunkWriter) addCompletedPart(partNum *int32, eTag *string) {
|
||||
func (w *s3ChunkWriter) addCompletedPart(partNum *int32, uout *s3.UploadPartOutput) {
|
||||
w.completedPartsMu.Lock()
|
||||
defer w.completedPartsMu.Unlock()
|
||||
w.completedParts = append(w.completedParts, types.CompletedPart{
|
||||
PartNumber: partNum,
|
||||
ETag: eTag,
|
||||
})
|
||||
var part types.CompletedPart
|
||||
setFrom_typesCompletedPart_s3UploadPartOutput(&part, uout)
|
||||
part.PartNumber = partNum
|
||||
w.completedParts = append(w.completedParts, part)
|
||||
}
|
||||
|
||||
// addMd5 adds a binary md5 to the md5 calculated so far
|
||||
@ -6158,6 +6228,7 @@ func (w *s3ChunkWriter) WriteChunk(ctx context.Context, chunkNumber int, reader
|
||||
SSECustomerAlgorithm: w.multiPartUploadInput.SSECustomerAlgorithm,
|
||||
SSECustomerKey: w.multiPartUploadInput.SSECustomerKey,
|
||||
SSECustomerKeyMD5: w.multiPartUploadInput.SSECustomerKeyMD5,
|
||||
ChecksumAlgorithm: w.f.checksum,
|
||||
}
|
||||
if w.f.opt.DirectoryBucket {
|
||||
// Directory buckets do not support "Content-Md5" header
|
||||
@ -6184,7 +6255,7 @@ func (w *s3ChunkWriter) WriteChunk(ctx context.Context, chunkNumber int, reader
|
||||
return -1, fmt.Errorf("failed to upload chunk %d with %v bytes: %w", chunkNumber+1, currentChunkSize, err)
|
||||
}
|
||||
|
||||
w.addCompletedPart(s3PartNumber, uout.ETag)
|
||||
w.addCompletedPart(s3PartNumber, uout)
|
||||
|
||||
fs.Debugf(w.o, "multipart upload wrote chunk %d with %v bytes and etag %v", chunkNumber+1, currentChunkSize, *uout.ETag)
|
||||
return currentChunkSize, err
|
||||
@ -6371,6 +6442,7 @@ func (o *Object) prepareUpload(ctx context.Context, src fs.ObjectInfo, options [
|
||||
Bucket: &bucket,
|
||||
ACL: types.ObjectCannedACL(o.fs.opt.ACL),
|
||||
Key: &bucketPath,
|
||||
ChecksumAlgorithm: o.fs.checksum,
|
||||
}
|
||||
|
||||
// Fetch metadata if --metadata is in use
|
||||
|
@ -285,3 +285,12 @@ func setFrom_s3CopyObjectInput_s3PutObjectInput(a *s3.CopyObjectInput, b *s3.Put
|
||||
a.Tagging = b.Tagging
|
||||
a.WebsiteRedirectLocation = b.WebsiteRedirectLocation
|
||||
}
|
||||
|
||||
// setFrom_typesCompletedPart_s3UploadPartOutput copies matching elements from a to b
|
||||
func setFrom_typesCompletedPart_s3UploadPartOutput(a *types.CompletedPart, b *s3.UploadPartOutput) {
|
||||
a.ChecksumCRC32 = b.ChecksumCRC32
|
||||
a.ChecksumCRC32C = b.ChecksumCRC32C
|
||||
a.ChecksumSHA1 = b.ChecksumSHA1
|
||||
a.ChecksumSHA256 = b.ChecksumSHA256
|
||||
a.ETag = b.ETag
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user