From c20e4bd99c09b3a94ab6a57f4e6aad99a38df354 Mon Sep 17 00:00:00 2001 From: liubingrun Date: Mon, 14 Jul 2025 12:25:10 -0400 Subject: [PATCH] backend/s3: Fix memory leak by cloning strings #8683 This commit addresses a potential memory leak in the S3 backend where strings extracted from large API responses were keeping the entire response in memory. The issue occurs because Go strings share underlying memory with their source, preventing garbage collection of large XML responses even when only small substrings are needed. Signed-off-by: liubingrun --- backend/s3/s3.go | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/backend/s3/s3.go b/backend/s3/s3.go index 1bed705dc..fefdd6e96 100644 --- a/backend/s3/s3.go +++ b/backend/s3/s3.go @@ -4458,7 +4458,7 @@ func (f *Fs) list(ctx context.Context, opt listOpt, fn listFn) error { } foundItems += len(resp.Contents) for i, object := range resp.Contents { - remote := deref(object.Key) + remote := *stringClone(deref(object.Key)) if urlEncodeListings { remote, err = url.QueryUnescape(remote) if err != nil { @@ -5911,7 +5911,7 @@ func (o *Object) readMetaData(ctx context.Context) (err error) { func s3MetadataToMap(s3Meta map[string]string) map[string]string { meta := make(map[string]string, len(s3Meta)) for k, v := range s3Meta { - meta[strings.ToLower(k)] = v + meta[strings.ToLower(k)] = *stringClone(v) } return meta } @@ -5954,14 +5954,14 @@ func (o *Object) setMetaData(resp *s3.HeadObjectOutput) { o.lastModified = *resp.LastModified } } - o.mimeType = deref(resp.ContentType) + o.mimeType = strings.Clone(deref(resp.ContentType)) // Set system metadata - o.storageClass = (*string)(&resp.StorageClass) - o.cacheControl = resp.CacheControl - o.contentDisposition = resp.ContentDisposition - o.contentEncoding = resp.ContentEncoding - o.contentLanguage = resp.ContentLanguage + o.storageClass = stringClone(string(resp.StorageClass)) + o.cacheControl = stringClonePointer(resp.CacheControl) + o.contentDisposition = stringClonePointer(resp.ContentDisposition) + o.contentEncoding = stringClonePointer(resp.ContentEncoding) + o.contentLanguage = stringClonePointer(resp.ContentLanguage) // If decompressing then size and md5sum are unknown if o.fs.opt.Decompress && deref(o.contentEncoding) == "gzip" { @@ -6476,8 +6476,8 @@ func (o *Object) uploadMultipart(ctx context.Context, src fs.ObjectInfo, in io.R } var s3cw *s3ChunkWriter = chunkWriter.(*s3ChunkWriter) - gotETag = s3cw.eTag - versionID = aws.String(s3cw.versionID) + gotETag = *stringClone(s3cw.eTag) + versionID = stringClone(s3cw.versionID) hashOfHashes := md5.Sum(s3cw.md5s) wantETag = fmt.Sprintf("%s-%d", hex.EncodeToString(hashOfHashes[:]), len(s3cw.completedParts)) @@ -6509,8 +6509,8 @@ func (o *Object) uploadSinglepartPutObject(ctx context.Context, req *s3.PutObjec } lastModified = time.Now() if resp != nil { - etag = deref(resp.ETag) - versionID = resp.VersionId + etag = *stringClone(deref(resp.ETag)) + versionID = stringClonePointer(resp.VersionId) } return etag, lastModified, versionID, nil } @@ -6562,8 +6562,8 @@ func (o *Object) uploadSinglepartPresignedRequest(ctx context.Context, req *s3.P if date, err := http.ParseTime(resp.Header.Get("Date")); err != nil { lastModified = date } - etag = resp.Header.Get("Etag") - vID := resp.Header.Get("x-amz-version-id") + etag = *stringClone(resp.Header.Get("Etag")) + vID := *stringClone(resp.Header.Get("x-amz-version-id")) if vID != "" { versionID = &vID }