mirror of
https://github.com/rclone/rclone.git
synced 2025-07-19 05:23:32 +02:00
This commit introduces a significant rewrite of PikPak's upload, specifically targeting direct handling of file uploads rather than relying on the generic S3 manager. The primary motivation is to address critical upload failures reported in #8629. * Added new `multipart.go` file for multipart uploads using AWS S3 SDK. * Removed dependency on AWS S3 manager; replaced with custom handling. * Updated PikPak test package with new multipart upload tests, including configurable chunk size and upload cutoff. * Added new configuration option `upload_cutoff` to control chunked uploads. * Defined constraints for `chunk_size` and `upload_cutoff` (min/max values, validation). * Adjusted default `upload_concurrency` from 5 to 4.
334 lines
9.0 KiB
Go
334 lines
9.0 KiB
Go
package pikpak
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"io"
|
|
"sort"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/aws/aws-sdk-go-v2/aws"
|
|
"github.com/aws/aws-sdk-go-v2/service/s3"
|
|
"github.com/aws/aws-sdk-go-v2/service/s3/types"
|
|
"github.com/rclone/rclone/backend/pikpak/api"
|
|
"github.com/rclone/rclone/fs"
|
|
"github.com/rclone/rclone/fs/accounting"
|
|
"github.com/rclone/rclone/fs/chunksize"
|
|
"github.com/rclone/rclone/fs/fserrors"
|
|
"github.com/rclone/rclone/lib/atexit"
|
|
"github.com/rclone/rclone/lib/pacer"
|
|
"github.com/rclone/rclone/lib/pool"
|
|
"golang.org/x/sync/errgroup"
|
|
)
|
|
|
|
const (
|
|
bufferSize = 1024 * 1024 // default size of the pages used in the reader
|
|
bufferCacheSize = 64 // max number of buffers to keep in cache
|
|
bufferCacheFlushTime = 5 * time.Second // flush the cached buffers after this long
|
|
)
|
|
|
|
// bufferPool is a global pool of buffers
|
|
var (
|
|
bufferPool *pool.Pool
|
|
bufferPoolOnce sync.Once
|
|
)
|
|
|
|
// get a buffer pool
|
|
func getPool() *pool.Pool {
|
|
bufferPoolOnce.Do(func() {
|
|
ci := fs.GetConfig(context.Background())
|
|
// Initialise the buffer pool when used
|
|
bufferPool = pool.New(bufferCacheFlushTime, bufferSize, bufferCacheSize, ci.UseMmap)
|
|
})
|
|
return bufferPool
|
|
}
|
|
|
|
// NewRW gets a pool.RW using the multipart pool
|
|
func NewRW() *pool.RW {
|
|
return pool.NewRW(getPool())
|
|
}
|
|
|
|
// Upload does a multipart upload in parallel
|
|
func (w *pikpakChunkWriter) Upload(ctx context.Context) (err error) {
|
|
// make concurrency machinery
|
|
tokens := pacer.NewTokenDispenser(w.con)
|
|
|
|
uploadCtx, cancel := context.WithCancel(ctx)
|
|
defer cancel()
|
|
defer atexit.OnError(&err, func() {
|
|
cancel()
|
|
fs.Debugf(w.o, "multipart upload: Cancelling...")
|
|
errCancel := w.Abort(ctx)
|
|
if errCancel != nil {
|
|
fs.Debugf(w.o, "multipart upload: failed to cancel: %v", errCancel)
|
|
}
|
|
})()
|
|
|
|
var (
|
|
g, gCtx = errgroup.WithContext(uploadCtx)
|
|
finished = false
|
|
off int64
|
|
size = w.size
|
|
chunkSize = w.chunkSize
|
|
)
|
|
|
|
// Do the accounting manually
|
|
in, acc := accounting.UnWrapAccounting(w.in)
|
|
|
|
for partNum := int64(0); !finished; partNum++ {
|
|
// Get a block of memory from the pool and token which limits concurrency.
|
|
tokens.Get()
|
|
rw := NewRW()
|
|
if acc != nil {
|
|
rw.SetAccounting(acc.AccountRead)
|
|
}
|
|
|
|
free := func() {
|
|
// return the memory and token
|
|
_ = rw.Close() // Can't return an error
|
|
tokens.Put()
|
|
}
|
|
|
|
// Fail fast, in case an errgroup managed function returns an error
|
|
// gCtx is cancelled. There is no point in uploading all the other parts.
|
|
if gCtx.Err() != nil {
|
|
free()
|
|
break
|
|
}
|
|
|
|
// Read the chunk
|
|
var n int64
|
|
n, err = io.CopyN(rw, in, chunkSize)
|
|
if err == io.EOF {
|
|
if n == 0 && partNum != 0 { // end if no data and if not first chunk
|
|
free()
|
|
break
|
|
}
|
|
finished = true
|
|
} else if err != nil {
|
|
free()
|
|
return fmt.Errorf("multipart upload: failed to read source: %w", err)
|
|
}
|
|
|
|
partNum := partNum
|
|
partOff := off
|
|
off += n
|
|
g.Go(func() (err error) {
|
|
defer free()
|
|
fs.Debugf(w.o, "multipart upload: starting chunk %d size %v offset %v/%v", partNum, fs.SizeSuffix(n), fs.SizeSuffix(partOff), fs.SizeSuffix(size))
|
|
_, err = w.WriteChunk(gCtx, int32(partNum), rw)
|
|
return err
|
|
})
|
|
}
|
|
|
|
err = g.Wait()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
err = w.Close(ctx)
|
|
if err != nil {
|
|
return fmt.Errorf("multipart upload: failed to finalise: %w", err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
var warnStreamUpload sync.Once
|
|
|
|
// state of ChunkWriter
|
|
type pikpakChunkWriter struct {
|
|
chunkSize int64
|
|
size int64
|
|
con int
|
|
f *Fs
|
|
o *Object
|
|
in io.Reader
|
|
mu sync.Mutex
|
|
completedParts []types.CompletedPart
|
|
client *s3.Client
|
|
mOut *s3.CreateMultipartUploadOutput
|
|
}
|
|
|
|
func (f *Fs) newChunkWriter(ctx context.Context, remote string, size int64, p *api.ResumableParams, in io.Reader, options ...fs.OpenOption) (w *pikpakChunkWriter, err error) {
|
|
// Temporary Object under construction
|
|
o := &Object{
|
|
fs: f,
|
|
remote: remote,
|
|
}
|
|
|
|
// calculate size of parts
|
|
chunkSize := f.opt.ChunkSize
|
|
|
|
// size can be -1 here meaning we don't know the size of the incoming file. We use ChunkSize
|
|
// buffers here (default 5 MiB). With a maximum number of parts (10,000) this will be a file of
|
|
// 48 GiB which seems like a not too unreasonable limit.
|
|
if size == -1 {
|
|
warnStreamUpload.Do(func() {
|
|
fs.Logf(f, "Streaming uploads using chunk size %v will have maximum file size of %v",
|
|
f.opt.ChunkSize, fs.SizeSuffix(int64(chunkSize)*int64(maxUploadParts)))
|
|
})
|
|
} else {
|
|
chunkSize = chunksize.Calculator(o, size, maxUploadParts, chunkSize)
|
|
}
|
|
|
|
client, err := f.newS3Client(ctx, p)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to create upload client: %w", err)
|
|
}
|
|
w = &pikpakChunkWriter{
|
|
chunkSize: int64(chunkSize),
|
|
size: size,
|
|
con: max(1, f.opt.UploadConcurrency),
|
|
f: f,
|
|
o: o,
|
|
in: in,
|
|
completedParts: make([]types.CompletedPart, 0),
|
|
client: client,
|
|
}
|
|
|
|
req := &s3.CreateMultipartUploadInput{
|
|
Bucket: &p.Bucket,
|
|
Key: &p.Key,
|
|
}
|
|
// Apply upload options
|
|
for _, option := range options {
|
|
key, value := option.Header()
|
|
lowerKey := strings.ToLower(key)
|
|
switch lowerKey {
|
|
case "":
|
|
// ignore
|
|
case "cache-control":
|
|
req.CacheControl = aws.String(value)
|
|
case "content-disposition":
|
|
req.ContentDisposition = aws.String(value)
|
|
case "content-encoding":
|
|
req.ContentEncoding = aws.String(value)
|
|
case "content-type":
|
|
req.ContentType = aws.String(value)
|
|
}
|
|
}
|
|
err = w.f.pacer.Call(func() (bool, error) {
|
|
w.mOut, err = w.client.CreateMultipartUpload(ctx, req)
|
|
return w.shouldRetry(ctx, err)
|
|
})
|
|
if err != nil {
|
|
return nil, fmt.Errorf("create multipart upload failed: %w", err)
|
|
}
|
|
fs.Debugf(w.o, "multipart upload: %q initiated", *w.mOut.UploadId)
|
|
return
|
|
}
|
|
|
|
// shouldRetry returns a boolean as to whether this err
|
|
// deserve to be retried. It returns the err as a convenience
|
|
func (w *pikpakChunkWriter) shouldRetry(ctx context.Context, err error) (bool, error) {
|
|
if fserrors.ContextError(ctx, &err) {
|
|
return false, err
|
|
}
|
|
if fserrors.ShouldRetry(err) {
|
|
return true, err
|
|
}
|
|
return false, err
|
|
}
|
|
|
|
// add a part number and etag to the completed parts
|
|
func (w *pikpakChunkWriter) addCompletedPart(part types.CompletedPart) {
|
|
w.mu.Lock()
|
|
defer w.mu.Unlock()
|
|
w.completedParts = append(w.completedParts, part)
|
|
}
|
|
|
|
// WriteChunk will write chunk number with reader bytes, where chunk number >= 0
|
|
func (w *pikpakChunkWriter) WriteChunk(ctx context.Context, chunkNumber int32, reader io.ReadSeeker) (currentChunkSize int64, err error) {
|
|
if chunkNumber < 0 {
|
|
err := fmt.Errorf("invalid chunk number provided: %v", chunkNumber)
|
|
return -1, err
|
|
}
|
|
|
|
partNumber := chunkNumber + 1
|
|
var res *s3.UploadPartOutput
|
|
err = w.f.pacer.Call(func() (bool, error) {
|
|
// Discover the size by seeking to the end
|
|
currentChunkSize, err = reader.Seek(0, io.SeekEnd)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
// rewind the reader on retry and after reading md5
|
|
_, err := reader.Seek(0, io.SeekStart)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
res, err = w.client.UploadPart(ctx, &s3.UploadPartInput{
|
|
Bucket: w.mOut.Bucket,
|
|
Key: w.mOut.Key,
|
|
UploadId: w.mOut.UploadId,
|
|
PartNumber: &partNumber,
|
|
Body: reader,
|
|
})
|
|
if err != nil {
|
|
if chunkNumber <= 8 {
|
|
return w.shouldRetry(ctx, err)
|
|
}
|
|
// retry all chunks once have done the first few
|
|
return true, err
|
|
}
|
|
return false, nil
|
|
})
|
|
if err != nil {
|
|
return -1, fmt.Errorf("failed to upload chunk %d with %v bytes: %w", partNumber, currentChunkSize, err)
|
|
}
|
|
|
|
w.addCompletedPart(types.CompletedPart{
|
|
PartNumber: &partNumber,
|
|
ETag: res.ETag,
|
|
})
|
|
|
|
fs.Debugf(w.o, "multipart upload: wrote chunk %d with %v bytes", partNumber, currentChunkSize)
|
|
return currentChunkSize, err
|
|
}
|
|
|
|
// Abort the multipart upload
|
|
func (w *pikpakChunkWriter) Abort(ctx context.Context) (err error) {
|
|
// Abort the upload session
|
|
err = w.f.pacer.Call(func() (bool, error) {
|
|
_, err = w.client.AbortMultipartUpload(ctx, &s3.AbortMultipartUploadInput{
|
|
Bucket: w.mOut.Bucket,
|
|
Key: w.mOut.Key,
|
|
UploadId: w.mOut.UploadId,
|
|
})
|
|
return w.shouldRetry(ctx, err)
|
|
})
|
|
if err != nil {
|
|
return fmt.Errorf("failed to abort multipart upload %q: %w", *w.mOut.UploadId, err)
|
|
}
|
|
fs.Debugf(w.o, "multipart upload: %q aborted", *w.mOut.UploadId)
|
|
return
|
|
}
|
|
|
|
// Close and finalise the multipart upload
|
|
func (w *pikpakChunkWriter) Close(ctx context.Context) (err error) {
|
|
// sort the completed parts by part number
|
|
sort.Slice(w.completedParts, func(i, j int) bool {
|
|
return *w.completedParts[i].PartNumber < *w.completedParts[j].PartNumber
|
|
})
|
|
// Finalise the upload session
|
|
err = w.f.pacer.Call(func() (bool, error) {
|
|
_, err = w.client.CompleteMultipartUpload(ctx, &s3.CompleteMultipartUploadInput{
|
|
Bucket: w.mOut.Bucket,
|
|
Key: w.mOut.Key,
|
|
UploadId: w.mOut.UploadId,
|
|
MultipartUpload: &types.CompletedMultipartUpload{
|
|
Parts: w.completedParts,
|
|
},
|
|
})
|
|
return w.shouldRetry(ctx, err)
|
|
})
|
|
if err != nil {
|
|
return fmt.Errorf("failed to complete multipart upload: %w", err)
|
|
}
|
|
fs.Debugf(w.o, "multipart upload: %q finished", *w.mOut.UploadId)
|
|
return
|
|
}
|