rclone/lib/cache/cache.go
nielash 9b4b3033da fs/cache: fix parent not getting pinned when remote is a file
Before this change, when cache.GetFn was called on a file rather than a
directory, two cache entries would be added (the file + its parent) but only one
of them would get pinned if the caller then called Pin(f). This left the other
one exposed to expiration if the ci.FsCacheExpireDuration was reached. This was
problematic because both entries point to the same Fs, and if one entry expires
while the other is pinned, the Shutdown method gets erroneously called on an Fs
that is still in use.

An example of the problem showed up in the Hasher backend, which uses the
Shutdown method to stop the bolt db used to store hashes. If a command was run
on a Hasher file (ex. `rclone md5sum --download hasher:somelargefile.zip`) and
hashing the file took longer than the --fs-cache-expire-duration (5m by default), the
bolt db was stopped before the hashing operation completed, resulting in an
error.

This change fixes the issue by ensuring that:
1. only one entry is added to the cache (the file's parent, not the file).
2. future lookups correctly find the entry regardless of whether they are called
	with the parent name or one of its children.
3. fs.ErrorIsFile is returned when (and only when) fsString points to a file
	(preserving the fix from 8d5bc7f28b).

Note that f.Root() should always point to the parent dir as of c69eb84573
2024-09-28 13:49:56 +01:00

279 lines
6.8 KiB
Go

// Package cache implements a simple cache where the entries are
// expired after a given time (5 minutes of disuse by default).
package cache
import (
"strings"
"sync"
"time"
)
// Cache holds values indexed by string, but expired after a given (5
// minutes by default).
type Cache struct {
mu sync.Mutex
cache map[string]*cacheEntry
expireRunning bool
expireDuration time.Duration // expire the cache entry when it is older than this
expireInterval time.Duration // interval to run the cache expire
finalize func(value interface{})
}
// New creates a new cache with the default expire duration and interval
func New() *Cache {
return &Cache{
cache: map[string]*cacheEntry{},
expireRunning: false,
expireDuration: 300 * time.Second,
expireInterval: 60 * time.Second,
finalize: func(_ interface{}) {},
}
}
// SetExpireDuration sets the interval at which things expire
//
// If it is less than or equal to 0 then things are never cached
func (c *Cache) SetExpireDuration(d time.Duration) *Cache {
c.expireDuration = d
return c
}
// returns true if we aren't to cache anything
func (c *Cache) noCache() bool {
return c.expireDuration <= 0
}
// SetExpireInterval sets the interval at which the cache expiry runs
//
// Set to 0 or a -ve number to disable
func (c *Cache) SetExpireInterval(d time.Duration) *Cache {
if d <= 0 {
d = 100 * 365 * 24 * time.Hour
}
c.expireInterval = d
return c
}
// cacheEntry is stored in the cache
type cacheEntry struct {
value interface{} // cached item
err error // creation error
key string // key
lastUsed time.Time // time used for expiry
pinCount int // non zero if the entry should not be removed
}
// CreateFunc is called to create new values. If the create function
// returns an error it will be cached if ok is true, otherwise the
// error will just be returned, allowing negative caching if required.
type CreateFunc func(key string) (value interface{}, ok bool, error error)
// used marks an entry as accessed now and kicks the expire timer off
// should be called with the lock held
func (c *Cache) used(entry *cacheEntry) {
entry.lastUsed = time.Now()
if !c.expireRunning {
time.AfterFunc(c.expireInterval, c.cacheExpire)
c.expireRunning = true
}
}
// Get gets a value named key either from the cache or creates it
// afresh with the create function.
func (c *Cache) Get(key string, create CreateFunc) (value interface{}, err error) {
c.mu.Lock()
entry, ok := c.cache[key]
if !ok {
c.mu.Unlock() // Unlock in case Get is called recursively
value, ok, err = create(key)
if err != nil && !ok {
return value, err
}
entry = &cacheEntry{
value: value,
key: key,
err: err,
}
c.mu.Lock()
if !c.noCache() {
c.cache[key] = entry
}
}
defer c.mu.Unlock()
c.used(entry)
return entry.value, entry.err
}
func (c *Cache) addPin(key string, count int) {
c.mu.Lock()
entry, ok := c.cache[key]
if ok {
entry.pinCount += count
c.used(entry)
}
c.mu.Unlock()
}
// Pin a value in the cache if it exists
func (c *Cache) Pin(key string) {
c.addPin(key, 1)
}
// Unpin a value in the cache if it exists
func (c *Cache) Unpin(key string) {
c.addPin(key, -1)
}
// PutErr puts a value named key with err into the cache
func (c *Cache) PutErr(key string, value interface{}, err error) {
c.mu.Lock()
defer c.mu.Unlock()
if c.noCache() {
return
}
entry := &cacheEntry{
value: value,
key: key,
err: err,
}
c.used(entry)
c.cache[key] = entry
}
// Put puts a value named key into the cache
func (c *Cache) Put(key string, value interface{}) {
c.PutErr(key, value, nil)
}
// GetMaybe returns the key and true if found, nil and false if not
func (c *Cache) GetMaybe(key string) (value interface{}, found bool) {
c.mu.Lock()
defer c.mu.Unlock()
entry, found := c.cache[key]
if !found {
return nil, found
}
c.used(entry)
return entry.value, found
}
// Delete the entry passed in
//
// Returns true if the entry was found
func (c *Cache) Delete(key string) bool {
c.mu.Lock()
entry, found := c.cache[key]
if found {
c.finalize(entry.value)
}
delete(c.cache, key)
c.mu.Unlock()
return found
}
// DeletePrefix deletes all entries with the given prefix
//
// Returns number of entries deleted
func (c *Cache) DeletePrefix(prefix string) (deleted int) {
c.mu.Lock()
for key, entry := range c.cache {
if !strings.HasPrefix(key, prefix) {
continue
}
c.finalize(entry.value)
delete(c.cache, key)
deleted++
}
c.mu.Unlock()
return deleted
}
// Rename renames the item at oldKey to newKey.
//
// If there was an existing item at newKey then it takes precedence
// and is returned otherwise the item (if any) at oldKey is returned.
func (c *Cache) Rename(oldKey, newKey string) (value interface{}, found bool) {
c.mu.Lock()
if newEntry, newFound := c.cache[newKey]; newFound {
// If new entry is found use that
if oldEntry, oldFound := c.cache[oldKey]; oldFound {
// If there's an old entry that is different we must finalize it
if newEntry.value != oldEntry.value {
c.finalize(c.cache[oldKey].value)
}
}
delete(c.cache, oldKey)
value, found = newEntry.value, newFound
c.used(newEntry)
} else if oldEntry, oldFound := c.cache[oldKey]; oldFound {
// If old entry is found rename it to new and use that
c.cache[newKey] = oldEntry
// No need to shutdown here, as value lives on under newKey
delete(c.cache, oldKey)
c.used(oldEntry)
value, found = oldEntry.value, oldFound
}
c.mu.Unlock()
return value, found
}
// cacheExpire expires any entries that haven't been used recently
func (c *Cache) cacheExpire() {
c.mu.Lock()
defer c.mu.Unlock()
now := time.Now()
for key, entry := range c.cache {
if entry.pinCount <= 0 && now.Sub(entry.lastUsed) > c.expireDuration {
c.finalize(entry.value)
delete(c.cache, key)
}
}
if len(c.cache) != 0 {
time.AfterFunc(c.expireInterval, c.cacheExpire)
c.expireRunning = true
} else {
c.expireRunning = false
}
}
// Clear removes everything from the cache
func (c *Cache) Clear() {
c.mu.Lock()
for key, entry := range c.cache {
c.finalize(entry.value)
delete(c.cache, key)
}
c.mu.Unlock()
}
// Entries returns the number of entries in the cache
func (c *Cache) Entries() int {
c.mu.Lock()
entries := len(c.cache)
c.mu.Unlock()
return entries
}
// SetFinalizer sets a function to be called when a value drops out of the cache
func (c *Cache) SetFinalizer(finalize func(interface{})) {
c.mu.Lock()
c.finalize = finalize
c.mu.Unlock()
}
// EntriesWithPinCount returns the number of pinned and unpinned entries in the cache
//
// Each entry is counted only once, regardless of entry.pinCount
func (c *Cache) EntriesWithPinCount() (pinned, unpinned int) {
c.mu.Lock()
for _, entry := range c.cache {
if entry.pinCount <= 0 {
unpinned++
} else {
pinned++
}
}
c.mu.Unlock()
return pinned, unpinned
}