mirror of
https://github.com/rclone/rclone.git
synced 2025-08-09 13:55:15 +02:00
operations: make Open() return an io.ReadSeekCloser #7350
As part of reducing memory usage in rclone, we need to have a raw handle to an object we can seek with.
This commit is contained in:
@ -10,23 +10,42 @@ import (
|
||||
"github.com/rclone/rclone/fs/fserrors"
|
||||
)
|
||||
|
||||
// AccountFn is a function which will be called after every read
|
||||
// from the ReOpen.
|
||||
//
|
||||
// It may return an error which will be passed back to the user.
|
||||
type AccountFn func(n int) error
|
||||
|
||||
// ReOpen is a wrapper for an object reader which reopens the stream on error
|
||||
type ReOpen struct {
|
||||
ctx context.Context
|
||||
mu sync.Mutex // mutex to protect the below
|
||||
src fs.Object // object to open
|
||||
options []fs.OpenOption // option to pass to initial open
|
||||
rc io.ReadCloser // underlying stream
|
||||
read int64 // number of bytes read from this stream
|
||||
maxTries int // maximum number of retries
|
||||
tries int // number of retries we've had so far in this stream
|
||||
err error // if this is set then Read/Close calls will return it
|
||||
opened bool // if set then rc is valid and needs closing
|
||||
ctx context.Context
|
||||
mu sync.Mutex // mutex to protect the below
|
||||
src fs.Object // object to open
|
||||
baseOptions []fs.OpenOption // options to pass to initial open and where offset == 0
|
||||
options []fs.OpenOption // option to pass on subsequent opens where offset != 0
|
||||
rangeOption fs.RangeOption // adjust this range option on re-opens
|
||||
rc io.ReadCloser // underlying stream
|
||||
size int64 // total size of object - can be -ve
|
||||
start int64 // absolute position to start reading from
|
||||
end int64 // absolute position to end reading (exclusive)
|
||||
offset int64 // offset in the file we are at, offset from start
|
||||
newOffset int64 // if different to offset, reopen needed
|
||||
maxTries int // maximum number of retries
|
||||
tries int // number of retries we've had so far in this stream
|
||||
err error // if this is set then Read/Close calls will return it
|
||||
opened bool // if set then rc is valid and needs closing
|
||||
account AccountFn // account for a read
|
||||
reads int // count how many times the data has been read
|
||||
accountOn int // only account on or after this read
|
||||
}
|
||||
|
||||
var (
|
||||
errorFileClosed = errors.New("file already closed")
|
||||
errorTooManyTries = errors.New("failed to reopen: too many retries")
|
||||
errFileClosed = errors.New("file already closed")
|
||||
errTooManyTries = errors.New("failed to reopen: too many retries")
|
||||
errInvalidWhence = errors.New("reopen Seek: invalid whence")
|
||||
errNegativeSeek = errors.New("reopen Seek: negative position")
|
||||
errSeekPastEnd = errors.New("reopen Seek: attempt to seek past end of data")
|
||||
errBadEndSeek = errors.New("reopen Seek: can't seek from end with unknown sized object")
|
||||
)
|
||||
|
||||
// NewReOpen makes a handle which will reopen itself and seek to where
|
||||
@ -37,15 +56,49 @@ var (
|
||||
//
|
||||
// If an fs.RangeOption is set then this will applied when reading from
|
||||
// the start, and updated on retries.
|
||||
func NewReOpen(ctx context.Context, src fs.Object, maxTries int, options ...fs.OpenOption) (rc io.ReadCloser, err error) {
|
||||
func NewReOpen(ctx context.Context, src fs.Object, maxTries int, options ...fs.OpenOption) (rc *ReOpen, err error) {
|
||||
h := &ReOpen{
|
||||
ctx: ctx,
|
||||
src: src,
|
||||
maxTries: maxTries,
|
||||
options: options,
|
||||
ctx: ctx,
|
||||
src: src,
|
||||
maxTries: maxTries,
|
||||
baseOptions: options,
|
||||
size: src.Size(),
|
||||
start: 0,
|
||||
offset: 0,
|
||||
newOffset: -1, // -1 means no seek required
|
||||
}
|
||||
h.mu.Lock()
|
||||
defer h.mu.Unlock()
|
||||
|
||||
// Filter the options for subsequent opens
|
||||
h.options = make([]fs.OpenOption, 0, len(options)+1)
|
||||
var limit int64 = -1
|
||||
for _, option := range options {
|
||||
switch x := option.(type) {
|
||||
case *fs.HashesOption:
|
||||
// leave hash option out when ranging
|
||||
case *fs.RangeOption:
|
||||
h.start, limit = x.Decode(h.end)
|
||||
case *fs.SeekOption:
|
||||
h.start, limit = x.Offset, -1
|
||||
default:
|
||||
h.options = append(h.options, option)
|
||||
}
|
||||
}
|
||||
|
||||
// Put our RangeOption on the end
|
||||
h.rangeOption.Start = h.start
|
||||
h.options = append(h.options, &h.rangeOption)
|
||||
|
||||
// If a size range is set then set the end point of the file to that
|
||||
if limit >= 0 && h.size >= 0 {
|
||||
h.end = h.start + limit
|
||||
h.rangeOption.End = h.end - 1 // remember range options are inclusive
|
||||
} else {
|
||||
h.end = h.size
|
||||
h.rangeOption.End = -1
|
||||
}
|
||||
|
||||
err = h.open()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@ -66,7 +119,7 @@ func NewReOpen(ctx context.Context, src fs.Object, maxTries int, options ...fs.O
|
||||
// tries.
|
||||
//
|
||||
// Use this instead of calling the Open method on fs.Objects
|
||||
func Open(ctx context.Context, src fs.Object, options ...fs.OpenOption) (rc io.ReadCloser, err error) {
|
||||
func Open(ctx context.Context, src fs.Object, options ...fs.OpenOption) (rc *ReOpen, err error) {
|
||||
maxTries := fs.GetConfig(ctx).LowLevelRetries
|
||||
return NewReOpen(ctx, src, maxTries, options...)
|
||||
}
|
||||
@ -75,49 +128,25 @@ func Open(ctx context.Context, src fs.Object, options ...fs.OpenOption) (rc io.R
|
||||
//
|
||||
// we don't retry here as the Open() call will itself have low level retries
|
||||
func (h *ReOpen) open() error {
|
||||
opts := []fs.OpenOption{}
|
||||
var hashOption *fs.HashesOption
|
||||
var rangeOption *fs.RangeOption
|
||||
for _, option := range h.options {
|
||||
switch option := option.(type) {
|
||||
case *fs.HashesOption:
|
||||
hashOption = option
|
||||
case *fs.RangeOption:
|
||||
rangeOption = option
|
||||
case *fs.HTTPOption:
|
||||
opts = append(opts, option)
|
||||
default:
|
||||
if option.Mandatory() {
|
||||
fs.Logf(h.src, "Unsupported mandatory option: %v", option)
|
||||
}
|
||||
}
|
||||
}
|
||||
if h.read == 0 {
|
||||
if rangeOption != nil {
|
||||
opts = append(opts, rangeOption)
|
||||
}
|
||||
if hashOption != nil {
|
||||
// put hashOption on if reading from the start, ditch otherwise
|
||||
opts = append(opts, hashOption)
|
||||
}
|
||||
var opts []fs.OpenOption
|
||||
if h.offset == 0 {
|
||||
// if reading from the start using the initial options
|
||||
opts = h.baseOptions
|
||||
} else {
|
||||
if rangeOption != nil {
|
||||
// range to the read point
|
||||
opts = append(opts, &fs.RangeOption{Start: rangeOption.Start + h.read, End: rangeOption.End})
|
||||
} else {
|
||||
// seek to the read point
|
||||
opts = append(opts, &fs.SeekOption{Offset: h.read})
|
||||
}
|
||||
// otherwise use the filtered options
|
||||
opts = h.options
|
||||
// Adjust range start to where we have got to
|
||||
h.rangeOption.Start = h.start + h.offset
|
||||
}
|
||||
h.tries++
|
||||
if h.tries > h.maxTries {
|
||||
h.err = errorTooManyTries
|
||||
h.err = errTooManyTries
|
||||
} else {
|
||||
h.rc, h.err = h.src.Open(h.ctx, opts...)
|
||||
}
|
||||
if h.err != nil {
|
||||
if h.tries > 1 {
|
||||
fs.Debugf(h.src, "Reopen failed after %d bytes read: %v", h.read, h.err)
|
||||
fs.Debugf(h.src, "Reopen failed after offset %d bytes read: %v", h.offset, h.err)
|
||||
}
|
||||
return h.err
|
||||
}
|
||||
@ -125,6 +154,31 @@ func (h *ReOpen) open() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// reopen the underlying handle by closing it and reopening it.
|
||||
func (h *ReOpen) reopen() (err error) {
|
||||
// close underlying stream if needed
|
||||
if h.opened {
|
||||
h.opened = false
|
||||
_ = h.rc.Close()
|
||||
}
|
||||
return h.open()
|
||||
}
|
||||
|
||||
// account for n bytes being read
|
||||
func (h *ReOpen) accountRead(n int) error {
|
||||
if h.account == nil {
|
||||
return nil
|
||||
}
|
||||
// Don't start accounting until we've reached this many reads
|
||||
//
|
||||
// rw.reads will be 1 the first time this is called
|
||||
// rw.accountOn 2 means start accounting on the 2nd read through
|
||||
if h.reads >= h.accountOn {
|
||||
return h.account(n)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Read bytes retrying as necessary
|
||||
func (h *ReOpen) Read(p []byte) (n int, err error) {
|
||||
h.mu.Lock()
|
||||
@ -133,32 +187,128 @@ func (h *ReOpen) Read(p []byte) (n int, err error) {
|
||||
// return a previous error if there is one
|
||||
return n, h.err
|
||||
}
|
||||
n, err = h.rc.Read(p)
|
||||
if err != nil {
|
||||
h.err = err
|
||||
|
||||
// re-open if seek needed
|
||||
if h.newOffset >= 0 {
|
||||
if h.offset != h.newOffset {
|
||||
fs.Debugf(h.src, "Seek from %d to %d", h.offset, h.newOffset)
|
||||
h.offset = h.newOffset
|
||||
err = h.reopen()
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
h.newOffset = -1
|
||||
}
|
||||
h.read += int64(n)
|
||||
if err != nil && err != io.EOF && !fserrors.IsNoLowLevelRetryError(err) {
|
||||
// close underlying stream
|
||||
h.opened = false
|
||||
_ = h.rc.Close()
|
||||
// reopen stream, clearing error if successful
|
||||
fs.Debugf(h.src, "Reopening on read failure after %d bytes: retry %d/%d: %v", h.read, h.tries, h.maxTries, err)
|
||||
if h.open() == nil {
|
||||
err = nil
|
||||
|
||||
// Read a full buffer
|
||||
startOffset := h.offset
|
||||
var nn int
|
||||
for n < len(p) && err == nil {
|
||||
nn, err = h.rc.Read(p[n:])
|
||||
n += nn
|
||||
h.offset += int64(nn)
|
||||
if err != nil && err != io.EOF {
|
||||
h.err = err
|
||||
if !fserrors.IsNoLowLevelRetryError(err) {
|
||||
fs.Debugf(h.src, "Reopening on read failure after offset %d bytes: retry %d/%d: %v", h.offset, h.tries, h.maxTries, err)
|
||||
if h.reopen() == nil {
|
||||
err = nil
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Count a read of the data if we read from the start successfully
|
||||
if startOffset == 0 && n != 0 {
|
||||
h.reads++
|
||||
}
|
||||
// Account the read
|
||||
accErr := h.accountRead(n)
|
||||
if err == nil {
|
||||
err = accErr
|
||||
}
|
||||
return n, err
|
||||
}
|
||||
|
||||
// Seek sets the offset for the next Read or Write to offset, interpreted
|
||||
// according to whence: SeekStart means relative to the start of the file,
|
||||
// SeekCurrent means relative to the current offset, and SeekEnd means relative
|
||||
// to the end (for example, offset = -2 specifies the penultimate byte of the
|
||||
// file). Seek returns the new offset relative to the start of the file or an
|
||||
// error, if any.
|
||||
//
|
||||
// Seeking to an offset before the start of the file is an error. Seeking
|
||||
// to any positive offset may be allowed, but if the new offset exceeds the
|
||||
// size of the underlying object the behavior of subsequent I/O operations is
|
||||
// implementation-dependent.
|
||||
func (h *ReOpen) Seek(offset int64, whence int) (int64, error) {
|
||||
h.mu.Lock()
|
||||
defer h.mu.Unlock()
|
||||
if h.err != nil {
|
||||
// return a previous error if there is one
|
||||
return 0, h.err
|
||||
}
|
||||
var abs int64
|
||||
var size = h.end - h.start
|
||||
switch whence {
|
||||
case io.SeekStart:
|
||||
abs = offset
|
||||
case io.SeekCurrent:
|
||||
if h.newOffset >= 0 {
|
||||
abs = h.newOffset + offset
|
||||
} else {
|
||||
abs = h.offset + offset
|
||||
}
|
||||
case io.SeekEnd:
|
||||
if h.size < 0 {
|
||||
return 0, errBadEndSeek
|
||||
}
|
||||
abs = size + offset
|
||||
default:
|
||||
return 0, errInvalidWhence
|
||||
}
|
||||
if abs < 0 {
|
||||
return 0, errNegativeSeek
|
||||
}
|
||||
if h.size >= 0 && abs > size {
|
||||
return size, errSeekPastEnd
|
||||
}
|
||||
|
||||
h.tries = 0 // Reset open count on seek
|
||||
h.newOffset = abs // New offset - applied in Read
|
||||
return abs, nil
|
||||
}
|
||||
|
||||
// Close the stream
|
||||
func (h *ReOpen) Close() error {
|
||||
h.mu.Lock()
|
||||
defer h.mu.Unlock()
|
||||
if !h.opened {
|
||||
return errorFileClosed
|
||||
return errFileClosed
|
||||
}
|
||||
h.opened = false
|
||||
h.err = errorFileClosed
|
||||
h.err = errFileClosed
|
||||
return h.rc.Close()
|
||||
}
|
||||
|
||||
// SetAccounting should be provided with a function which will be
|
||||
// called after every read from the RW.
|
||||
//
|
||||
// It may return an error which will be passed back to the user.
|
||||
func (h *ReOpen) SetAccounting(account AccountFn) *ReOpen {
|
||||
h.account = account
|
||||
return h
|
||||
}
|
||||
|
||||
// DelayAccounting makes sure the accounting function only gets called
|
||||
// on the i-th or later read of the data from this point (counting
|
||||
// from 1).
|
||||
//
|
||||
// This is useful so that we don't account initial reads of the data
|
||||
// e.g. when calculating hashes.
|
||||
//
|
||||
// Set this to 0 to account everything.
|
||||
func (h *ReOpen) DelayAccounting(i int) {
|
||||
h.accountOn = i
|
||||
h.reads = 0
|
||||
}
|
||||
|
Reference in New Issue
Block a user