s3: Reduce memory usage streaming files by reducing max stream upload size

Before this change rclone would allow the user to stream (eg with rclone mount, rclone rcat or uploading google photos or docs) 5TB files. This meant that rclone allocated 4 * 525 MB buffers per transfer which is way too much memory by default. This change makes rclone use the configured chunk size for streamed uploads. This is 5MB by default which means that rclone can stream upload files up to 48GB by default staying below the 10,000 chunks limit. This can be increased with --s3-chunk-size if necessary. If rclone detects that a file is being streamed to s3 it will make a single NOTICE level log stating the limitation. This fixes the enormous memory usage. Fixes #3568 See: https://forum.rclone.org/t/how-much-memory-does-rclone-need/12743
2025-02-22 13:31:45 +01:00 · 2019-11-06 10:41:03 +00:00 · 2019-11-06 10:41:03 +00:00 · 9b5308144f
commit 9b5308144f
parent 4b20afa94a
1 changed files with 29 additions and 12 deletions
--- a/backend/s3/s3.go
+++ b/backend/s3/s3.go
@ -26,6 +26,7 @@ import (
 	"regexp"
 	"strconv"
 	"strings"
+	"sync"
 	"time"

 	"github.com/aws/aws-sdk-go/aws"
@ -693,14 +694,25 @@ The minimum is 0 and the maximum is 5GB.`,
 			Name: "chunk_size",
 			Help: `Chunk size to use for uploading.

-When uploading files larger than upload_cutoff they will be uploaded
-as multipart uploads using this chunk size.
+When uploading files larger than upload_cutoff or files with unknown
+size (eg from "rclone rcat" or uploaded with "rclone mount" or google
+photos or google docs) they will be uploaded as multipart uploads
+using this chunk size.

 Note that "--s3-upload-concurrency" chunks of this size are buffered
 in memory per transfer.

 If you are transferring large files over high speed links and you have
-enough memory, then increasing this will speed up the transfers.`,
+enough memory, then increasing this will speed up the transfers.
+
+Rclone will automatically increase the chunk size when uploading a
+large file of known size to stay below the 10,000 chunks limit.
+
+Files of unknown size are uploaded with the configured
+chunk_size. Since the default chunk size is 5MB and there can be at
+most 10,000 chunks, this means that by default the maximum size of
+file you can stream upload is 48GB.  If you wish to stream upload
+larger files then you will need to increase chunk_size.`,
 			Default:  minChunkSize,
 			Advanced: true,
 		}, {
@ -771,12 +783,11 @@ WARNING: Storing parts of an incomplete multipart upload counts towards space us

 // Constants
 const (
-	metaMtime           = "Mtime"                       // the meta key to store mtime in - eg X-Amz-Meta-Mtime
-	metaMD5Hash         = "Md5chksum"                   // the meta key to store md5hash in
-	listChunkSize       = 1000                          // number of items to read at once
-	maxRetries          = 10                            // number of retries to make of operations
-	maxSizeForCopy      = 5 * 1024 * 1024 * 1024        // The maximum size of object we can COPY
-	maxFileSize         = 5 * 1024 * 1024 * 1024 * 1024 // largest possible upload file size
+	metaMtime           = "Mtime"                // the meta key to store mtime in - eg X-Amz-Meta-Mtime
+	metaMD5Hash         = "Md5chksum"            // the meta key to store md5hash in
+	listChunkSize       = 1000                   // number of items to read at once
+	maxRetries          = 10                     // number of retries to make of operations
+	maxSizeForCopy      = 5 * 1024 * 1024 * 1024 // The maximum size of object we can COPY
 	minChunkSize        = fs.SizeSuffix(s3manager.MinUploadPartSize)
 	defaultUploadCutoff = fs.SizeSuffix(200 * 1024 * 1024)
 	maxUploadCutoff     = fs.SizeSuffix(5 * 1024 * 1024 * 1024)
@ -1982,6 +1993,8 @@ func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (in io.Read
 	return resp.Body, nil
 }

+var warnStreamUpload sync.Once
+
 // Update the Object from in with modTime and size
 func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) error {
 	bucket, bucketPath := o.split()
@ -2001,10 +2014,14 @@ func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, op
 			u.S3 = o.fs.c
 			u.PartSize = int64(o.fs.opt.ChunkSize)

+			// size can be -1 here meaning we don't know the size of the incoming file.  We use ChunkSize
+			// buffers here (default 5MB). With a maximum number of parts (10,000) this will be a file of
+			// 48GB which seems like a not too unreasonable limit.
 			if size == -1 {
-				// Make parts as small as possible while still being able to upload to the
-				// S3 file size limit. Rounded up to nearest MB.
-				u.PartSize = (((maxFileSize / s3manager.MaxUploadParts) >> 20) + 1) << 20
+				warnStreamUpload.Do(func() {
+					fs.Logf(o.fs, "Streaming uploads using chunk size %v will have maximum file size of %v",
+						o.fs.opt.ChunkSize, fs.SizeSuffix(u.PartSize*s3manager.MaxUploadParts))
+				})
 				return
 			}
 			// Adjust PartSize until the number of parts is small enough.