chunkedreader: add --vfs-read-chunk-streams to parallel read chunks

This converts the ChunkedReader into an interface and provides two
implementations one sequential and one parallel.

This can be used to improve the performance of the VFS on high
bandwidth or high latency links.

Fixes #4760
This commit is contained in:
Nick Craig-Wood
2024-03-12 16:57:16 +00:00
parent 10270a4354
commit 27b281ef69
11 changed files with 835 additions and 236 deletions

View File

@@ -1,14 +1,12 @@
// Package chunkedreader provides functionality for reading in chunks.
// Package chunkedreader provides functionality for reading a stream in chunks.
package chunkedreader
import (
"context"
"errors"
"io"
"sync"
"github.com/rclone/rclone/fs"
"github.com/rclone/rclone/fs/hash"
)
// io related errors returned by ChunkedReader
@@ -17,22 +15,13 @@ var (
ErrorInvalidSeek = errors.New("invalid seek position")
)
// ChunkedReader is a reader for an Object with the possibility
// of reading the source in chunks of given size
//
// An initialChunkSize of <= 0 will disable chunked reading.
type ChunkedReader struct {
ctx context.Context
mu sync.Mutex // protects following fields
o fs.Object // source to read from
rc io.ReadCloser // reader for the current open chunk
offset int64 // offset the next Read will start. -1 forces a reopen of o
chunkOffset int64 // beginning of the current or next chunk
chunkSize int64 // length of the current or next chunk. -1 will open o from chunkOffset to the end
initialChunkSize int64 // default chunkSize after the chunk specified by RangeSeek is complete
maxChunkSize int64 // consecutive read chunks will double in size until reached. -1 means no limit
customChunkSize bool // is the current chunkSize set by RangeSeek?
closed bool // has Close been called?
// ChunkedReader describes what a chunked reader can do.
type ChunkedReader interface {
io.Reader
io.Seeker
io.Closer
fs.RangeSeeker
Open() (ChunkedReader, error)
}
// New returns a ChunkedReader for the Object.
@@ -41,210 +30,18 @@ type ChunkedReader struct {
// If maxChunkSize is greater than initialChunkSize, the chunk size will be
// doubled after each chunk read with a maximum of maxChunkSize.
// A Seek or RangeSeek will reset the chunk size to it's initial value
func New(ctx context.Context, o fs.Object, initialChunkSize int64, maxChunkSize int64) *ChunkedReader {
func New(ctx context.Context, o fs.Object, initialChunkSize int64, maxChunkSize int64, streams int) ChunkedReader {
if initialChunkSize <= 0 {
initialChunkSize = -1
}
if maxChunkSize != -1 && maxChunkSize < initialChunkSize {
maxChunkSize = initialChunkSize
}
return &ChunkedReader{
ctx: ctx,
o: o,
offset: -1,
chunkSize: initialChunkSize,
initialChunkSize: initialChunkSize,
maxChunkSize: maxChunkSize,
if streams < 0 {
streams = 0
}
if streams <= 1 || o.Size() < 0 {
return newSequential(ctx, o, initialChunkSize, maxChunkSize)
}
return newParallel(ctx, o, initialChunkSize, streams)
}
// Read from the file - for details see io.Reader
func (cr *ChunkedReader) Read(p []byte) (n int, err error) {
cr.mu.Lock()
defer cr.mu.Unlock()
if cr.closed {
return 0, ErrorFileClosed
}
for reqSize := int64(len(p)); reqSize > 0; reqSize = int64(len(p)) {
// the current chunk boundary. valid only when chunkSize > 0
chunkEnd := cr.chunkOffset + cr.chunkSize
fs.Debugf(cr.o, "ChunkedReader.Read at %d length %d chunkOffset %d chunkSize %d", cr.offset, reqSize, cr.chunkOffset, cr.chunkSize)
switch {
case cr.chunkSize > 0 && cr.offset == chunkEnd: // last chunk read completely
cr.chunkOffset = cr.offset
if cr.customChunkSize { // last chunkSize was set by RangeSeek
cr.customChunkSize = false
cr.chunkSize = cr.initialChunkSize
} else {
cr.chunkSize *= 2
if cr.chunkSize > cr.maxChunkSize && cr.maxChunkSize != -1 {
cr.chunkSize = cr.maxChunkSize
}
}
// recalculate the chunk boundary. valid only when chunkSize > 0
chunkEnd = cr.chunkOffset + cr.chunkSize
fallthrough
case cr.offset == -1: // first Read or Read after RangeSeek
err = cr.openRange()
if err != nil {
return
}
}
var buf []byte
chunkRest := chunkEnd - cr.offset
// limit read to chunk boundaries if chunkSize > 0
if reqSize > chunkRest && cr.chunkSize > 0 {
buf, p = p[0:chunkRest], p[chunkRest:]
} else {
buf, p = p, nil
}
var rn int
rn, err = io.ReadFull(cr.rc, buf)
n += rn
cr.offset += int64(rn)
if err != nil {
if err == io.ErrUnexpectedEOF {
err = io.EOF
}
return
}
}
return n, nil
}
// Close the file - for details see io.Closer
//
// All methods on ChunkedReader will return ErrorFileClosed afterwards
func (cr *ChunkedReader) Close() error {
cr.mu.Lock()
defer cr.mu.Unlock()
if cr.closed {
return ErrorFileClosed
}
cr.closed = true
return cr.resetReader(nil, 0)
}
// Seek the file - for details see io.Seeker
func (cr *ChunkedReader) Seek(offset int64, whence int) (int64, error) {
return cr.RangeSeek(context.TODO(), offset, whence, -1)
}
// RangeSeek the file - for details see RangeSeeker
//
// The specified length will only apply to the next chunk opened.
// RangeSeek will not reopen the source until Read is called.
func (cr *ChunkedReader) RangeSeek(ctx context.Context, offset int64, whence int, length int64) (int64, error) {
cr.mu.Lock()
defer cr.mu.Unlock()
fs.Debugf(cr.o, "ChunkedReader.RangeSeek from %d to %d length %d", cr.offset, offset, length)
if cr.closed {
return 0, ErrorFileClosed
}
size := cr.o.Size()
switch whence {
case io.SeekStart:
cr.offset = 0
case io.SeekEnd:
cr.offset = size
}
// set the new chunk start
cr.chunkOffset = cr.offset + offset
// force reopen on next Read
cr.offset = -1
if length > 0 {
cr.customChunkSize = true
cr.chunkSize = length
} else {
cr.chunkSize = cr.initialChunkSize
}
if cr.chunkOffset < 0 || cr.chunkOffset >= size {
cr.chunkOffset = 0
return 0, ErrorInvalidSeek
}
return cr.chunkOffset, nil
}
// Open forces the connection to be opened
func (cr *ChunkedReader) Open() (*ChunkedReader, error) {
cr.mu.Lock()
defer cr.mu.Unlock()
if cr.rc != nil && cr.offset != -1 {
return cr, nil
}
return cr, cr.openRange()
}
// openRange will open the source Object with the current chunk range
//
// If the current open reader implements RangeSeeker, it is tried first.
// When RangeSeek fails, o.Open with a RangeOption is used.
//
// A length <= 0 will request till the end of the file
func (cr *ChunkedReader) openRange() error {
offset, length := cr.chunkOffset, cr.chunkSize
fs.Debugf(cr.o, "ChunkedReader.openRange at %d length %d", offset, length)
if cr.closed {
return ErrorFileClosed
}
if rs, ok := cr.rc.(fs.RangeSeeker); ok {
n, err := rs.RangeSeek(cr.ctx, offset, io.SeekStart, length)
if err == nil && n == offset {
cr.offset = offset
return nil
}
if err != nil {
fs.Debugf(cr.o, "ChunkedReader.openRange seek failed (%s). Trying Open", err)
} else {
fs.Debugf(cr.o, "ChunkedReader.openRange seeked to wrong offset. Wanted %d, got %d. Trying Open", offset, n)
}
}
var rc io.ReadCloser
var err error
if length <= 0 {
if offset == 0 {
rc, err = cr.o.Open(cr.ctx, &fs.HashesOption{Hashes: hash.Set(hash.None)})
} else {
rc, err = cr.o.Open(cr.ctx, &fs.HashesOption{Hashes: hash.Set(hash.None)}, &fs.RangeOption{Start: offset, End: -1})
}
} else {
rc, err = cr.o.Open(cr.ctx, &fs.HashesOption{Hashes: hash.Set(hash.None)}, &fs.RangeOption{Start: offset, End: offset + length - 1})
}
if err != nil {
return err
}
return cr.resetReader(rc, offset)
}
// resetReader switches the current reader to the given reader.
// The old reader will be Close'd before setting the new reader.
func (cr *ChunkedReader) resetReader(rc io.ReadCloser, offset int64) error {
if cr.rc != nil {
if err := cr.rc.Close(); err != nil {
return err
}
}
cr.rc = rc
cr.offset = offset
return nil
}
var (
_ io.ReadCloser = (*ChunkedReader)(nil)
_ io.Seeker = (*ChunkedReader)(nil)
_ fs.RangeSeeker = (*ChunkedReader)(nil)
)