rclone/fs/asyncreader/asyncreader.go

366 lines
8.4 KiB
Go
Raw Normal View History

// Package asyncreader provides an asynchronous reader which reads
// independently of write
package asyncreader
import (
"context"
"errors"
"io"
2017-02-14 23:28:18 +01:00
"sync"
"time"
"github.com/rclone/rclone/fs"
"github.com/rclone/rclone/lib/pool"
"github.com/rclone/rclone/lib/readers"
)
const (
// BufferSize is the default size of the async buffer
BufferSize = 1024 * 1024
softStartInitial = 4 * 1024
bufferCacheSize = 64 // max number of buffers to keep in cache
bufferCacheFlushTime = 5 * time.Second // flush the cached buffers after this long
)
2017-02-14 23:28:18 +01:00
// ErrorStreamAbandoned is returned when the input is closed before the end of the stream
var ErrorStreamAbandoned = errors.New("stream abandoned")
// AsyncReader will do async read-ahead from the input reader
// and make the data available as an io.Reader.
// This should be fully transparent, except that once an error
// has been returned from the Reader, it will not recover.
type AsyncReader struct {
in io.ReadCloser // Input reader
ready chan *buffer // Buffers ready to be handed to the reader
token chan struct{} // Tokens which allow a buffer to be taken
exit chan struct{} // Closes when finished
buffers int // Number of buffers
err error // If an error has occurred it is here
cur *buffer // Current buffer being served
exited chan struct{} // Channel is closed been the async reader shuts down
size int // size of buffer to use
closed bool // whether we have closed the underlying stream
mu sync.Mutex // lock for Read/WriteTo/Abandon/Close
ci *fs.ConfigInfo // for reading config
}
// New returns a reader that will asynchronously read from
// the supplied Reader into a number of buffers each of size BufferSize
// It will start reading from the input at once, maybe even before this
// function has returned.
// The input can be read from the returned reader.
// When done use Close to release the buffers and close the supplied input.
func New(ctx context.Context, rd io.ReadCloser, buffers int) (*AsyncReader, error) {
if buffers <= 0 {
return nil, errors.New("number of buffers too small")
}
if rd == nil {
return nil, errors.New("nil reader supplied")
}
a := &AsyncReader{
ci: fs.GetConfig(ctx),
}
2017-02-14 23:28:18 +01:00
a.init(rd, buffers)
return a, nil
}
func (a *AsyncReader) init(rd io.ReadCloser, buffers int) {
a.in = rd
a.ready = make(chan *buffer, buffers)
2017-02-14 23:28:18 +01:00
a.token = make(chan struct{}, buffers)
a.exit = make(chan struct{})
a.exited = make(chan struct{})
a.buffers = buffers
a.cur = nil
a.size = softStartInitial
2017-02-14 23:28:18 +01:00
// Create tokens
for i := 0; i < buffers; i++ {
2017-02-14 23:28:18 +01:00
a.token <- struct{}{}
}
// Start async reader
go func() {
// Ensure that when we exit this is signalled.
defer close(a.exited)
2017-02-14 23:28:18 +01:00
defer close(a.ready)
for {
select {
2017-02-14 23:28:18 +01:00
case <-a.token:
b := a.getBuffer()
if a.size < BufferSize {
b.buf = b.buf[:a.size]
a.size <<= 1
}
err := b.read(a.in)
a.ready <- b
if err != nil {
return
}
case <-a.exit:
return
}
}
}()
}
// bufferPool is a global pool of buffers
var bufferPool *pool.Pool
var bufferPoolOnce sync.Once
2017-02-14 23:28:18 +01:00
// return the buffer to the pool (clearing it)
func (a *AsyncReader) putBuffer(b *buffer) {
bufferPool.Put(b.buf)
b.buf = nil
2017-02-14 23:28:18 +01:00
}
// get a buffer from the pool
func (a *AsyncReader) getBuffer() *buffer {
bufferPoolOnce.Do(func() {
// Initialise the buffer pool when used
bufferPool = pool.New(bufferCacheFlushTime, BufferSize, bufferCacheSize, a.ci.UseMmap)
})
return &buffer{
buf: bufferPool.Get(),
}
2017-02-14 23:28:18 +01:00
}
// Read will return the next available data.
func (a *AsyncReader) fill() (err error) {
if a.cur.isEmpty() {
if a.cur != nil {
2017-02-14 23:28:18 +01:00
a.putBuffer(a.cur)
a.token <- struct{}{}
a.cur = nil
}
b, ok := <-a.ready
if !ok {
// Return an error to show fill failed
if a.err == nil {
return ErrorStreamAbandoned
}
return a.err
}
a.cur = b
}
return nil
}
// Read will return the next available data.
func (a *AsyncReader) Read(p []byte) (n int, err error) {
a.mu.Lock()
defer a.mu.Unlock()
// Swap buffer and maybe return error
err = a.fill()
if err != nil {
return 0, err
}
// Copy what we can
n = copy(p, a.cur.buffer())
a.cur.increment(n)
// If at end of buffer, return any error, if present
if a.cur.isEmpty() {
a.err = a.cur.err
return n, a.err
}
return n, nil
}
// WriteTo writes data to w until there's no more data to write or when an error occurs.
// The return value n is the number of bytes written.
// Any error encountered during the write is also returned.
func (a *AsyncReader) WriteTo(w io.Writer) (n int64, err error) {
a.mu.Lock()
defer a.mu.Unlock()
n = 0
for {
err = a.fill()
2019-12-05 13:32:42 +01:00
if err == io.EOF {
return n, nil
}
if err != nil {
return n, err
}
n2, err := w.Write(a.cur.buffer())
a.cur.increment(n2)
n += int64(n2)
if err != nil {
return n, err
}
2019-12-05 13:32:42 +01:00
if a.cur.err == io.EOF {
a.err = a.cur.err
return n, err
}
if a.cur.err != nil {
a.err = a.cur.err
return n, a.cur.err
}
}
}
// SkipBytes will try to seek 'skip' bytes relative to the current position.
// On success it returns true. If 'skip' is outside the current buffer data or
// an error occurs, Abandon is called and false is returned.
func (a *AsyncReader) SkipBytes(skip int) (ok bool) {
a.mu.Lock()
defer func() {
a.mu.Unlock()
if !ok {
a.Abandon()
}
}()
if a.err != nil {
return false
}
if skip < 0 {
// seek backwards if skip is inside current buffer
if a.cur != nil && a.cur.offset+skip >= 0 {
a.cur.offset += skip
return true
}
return false
}
// early return if skip is past the maximum buffer capacity
if skip >= (len(a.ready)+1)*BufferSize {
return false
}
refillTokens := 0
for {
if a.cur.isEmpty() {
if a.cur != nil {
a.putBuffer(a.cur)
refillTokens++
a.cur = nil
}
select {
case b, ok := <-a.ready:
if !ok {
return false
}
a.cur = b
default:
return false
}
}
n := len(a.cur.buffer())
if n > skip {
n = skip
}
a.cur.increment(n)
skip -= n
if skip == 0 {
for ; refillTokens > 0; refillTokens-- {
a.token <- struct{}{}
}
// If at end of buffer, store any error, if present
if a.cur.isEmpty() && a.cur.err != nil {
a.err = a.cur.err
}
return true
}
if a.cur.err != nil {
a.err = a.cur.err
return false
}
}
}
// StopBuffering will ensure that the underlying async reader is shut
// down so no more is read from the input.
//
// This does not free the memory so Abandon() or Close() need to be
// called on the input.
//
// This does not wait for Read/WriteTo to complete so can be called
// concurrently to those.
func (a *AsyncReader) StopBuffering() {
select {
2017-02-14 23:28:18 +01:00
case <-a.exit:
2017-02-17 09:37:53 +01:00
// Do nothing if reader routine already exited
2017-02-14 23:28:18 +01:00
return
default:
}
2017-02-14 23:28:18 +01:00
// Close and wait for go routine
close(a.exit)
<-a.exited
}
// Abandon will ensure that the underlying async reader is shut down
// and memory is returned. It does everything but close the input.
//
// It will NOT close the input supplied on New.
func (a *AsyncReader) Abandon() {
a.StopBuffering()
// take the lock to wait for Read/WriteTo to complete
a.mu.Lock()
defer a.mu.Unlock()
2017-02-14 23:28:18 +01:00
// Return any outstanding buffers to the Pool
if a.cur != nil {
a.putBuffer(a.cur)
2017-02-17 09:37:53 +01:00
a.cur = nil
}
2017-02-14 23:28:18 +01:00
for b := range a.ready {
a.putBuffer(b)
}
}
// Close will ensure that the underlying async reader is shut down.
// It will also close the input supplied on New.
func (a *AsyncReader) Close() (err error) {
2017-02-17 09:37:53 +01:00
a.Abandon()
if a.closed {
return nil
}
a.closed = true
return a.in.Close()
}
// Internal buffer
// If an error is present, it must be returned
// once all buffer content has been served.
type buffer struct {
buf []byte
err error
offset int
}
// isEmpty returns true is offset is at end of
// buffer, or
func (b *buffer) isEmpty() bool {
if b == nil {
return true
}
if len(b.buf)-b.offset <= 0 {
return true
}
return false
}
// read into start of the buffer from the supplied reader,
// resets the offset and updates the size of the buffer.
// Any error encountered during the read is returned.
func (b *buffer) read(rd io.Reader) error {
var n int
n, b.err = readers.ReadFill(rd, b.buf)
b.buf = b.buf[0:n]
b.offset = 0
return b.err
}
// Return the buffer at current offset
func (b *buffer) buffer() []byte {
return b.buf[b.offset:]
}
// increment the offset
func (b *buffer) increment(n int) {
b.offset += n
}