replication: fix retry wait behavior

An fsrep.Replication is either Ready, Retry or in a terminal state.
The queue prefers Ready over Retry:

Ready is sorted by nextStepDate to progress evenly..
Retry is sorted by error count, to de-prioritize filesystems that fail
often. This way we don't get stuck with individual filesystems
and lose other working filesystems to the watchdog.

fsrep.Replication no longer blocks in Retry state, we have
replication.WorkingWait for that.
This commit is contained in:
Christian Schwarz 2018-10-19 15:53:58 +02:00
parent 69bfcb7bed
commit 45373168ad
4 changed files with 53 additions and 52 deletions

View File

@ -76,7 +76,7 @@ type State uint
const ( const (
Ready State = 1 << iota Ready State = 1 << iota
RetryWait Retry
PermanentError PermanentError
Completed Completed
) )
@ -84,13 +84,17 @@ const (
func (s State) fsrsf() state { func (s State) fsrsf() state {
m := map[State]state{ m := map[State]state{
Ready: stateReady, Ready: stateReady,
RetryWait: stateRetryWait, Retry: stateRetry,
PermanentError: nil, PermanentError: nil,
Completed: nil, Completed: nil,
} }
return m[s] return m[s]
} }
func (s State) IsErrorState() bool {
return s & (Retry|PermanentError) != 0
}
type Replication struct { type Replication struct {
promBytesReplicated prometheus.Counter promBytesReplicated prometheus.Counter
@ -99,7 +103,6 @@ type Replication struct {
state State state State
fs string fs string
err error err error
retryWaitUntil time.Time
completed, pending []*ReplicationStep completed, pending []*ReplicationStep
} }
@ -109,6 +112,15 @@ func (f *Replication) State() State {
return f.state return f.state
} }
func (f *Replication) Err() error {
f.lock.Lock()
defer f.lock.Unlock()
if f.state & (Retry|PermanentError) != 0 {
return f.err
}
return nil
}
func (f *Replication) UpdateSizeEsitmate(ctx context.Context, sender Sender) error { func (f *Replication) UpdateSizeEsitmate(ctx context.Context, sender Sender) error {
f.lock.Lock() f.lock.Lock()
defer f.lock.Unlock() defer f.lock.Unlock()
@ -192,7 +204,7 @@ type ReplicationStep struct {
expectedSize int64 // 0 means no size estimate present / possible expectedSize int64 // 0 means no size estimate present / possible
} }
func (f *Replication) TakeStep(ctx context.Context, ka *watchdog.KeepAlive, sender Sender, receiver Receiver) (post State, nextStepDate, retryWaitUntil time.Time) { func (f *Replication) TakeStep(ctx context.Context, ka *watchdog.KeepAlive, sender Sender, receiver Receiver) (post State, nextStepDate time.Time) {
var u updater = func(fu func(*Replication)) State { var u updater = func(fu func(*Replication)) State {
f.lock.Lock() f.lock.Lock()
@ -214,7 +226,6 @@ func (f *Replication) TakeStep(ctx context.Context, ka *watchdog.KeepAlive, send
return return
} }
nextStepDate = f.pending[0].to.SnapshotTime() nextStepDate = f.pending[0].to.SnapshotTime()
retryWaitUntil = f.retryWaitUntil
}) })
getLogger(ctx). getLogger(ctx).
@ -223,21 +234,13 @@ func (f *Replication) TakeStep(ctx context.Context, ka *watchdog.KeepAlive, send
WithField("duration", delta). WithField("duration", delta).
Debug("fsr step taken") Debug("fsr step taken")
return post, nextStepDate, retryWaitUntil return post, nextStepDate
}
func (f *Replication) RetryWaitUntil() time.Time {
f.lock.Lock()
defer f.lock.Unlock()
return f.retryWaitUntil
} }
type updater func(func(fsr *Replication)) State type updater func(func(fsr *Replication)) State
type state func(ctx context.Context, ka *watchdog.KeepAlive, sender Sender, receiver Receiver, u updater) state type state func(ctx context.Context, ka *watchdog.KeepAlive, sender Sender, receiver Receiver, u updater) state
var RetrySleepDuration = 10 * time.Second // FIXME make configurable
func stateReady(ctx context.Context, ka *watchdog.KeepAlive, sender Sender, receiver Receiver, u updater) state { func stateReady(ctx context.Context, ka *watchdog.KeepAlive, sender Sender, receiver Receiver, u updater) state {
var current *ReplicationStep var current *ReplicationStep
@ -267,8 +270,7 @@ func stateReady(ctx context.Context, ka *watchdog.KeepAlive, sender Sender, rece
case StepReplicationRetry: case StepReplicationRetry:
fallthrough fallthrough
case StepMarkReplicatedRetry: case StepMarkReplicatedRetry:
f.retryWaitUntil = time.Now().Add(RetrySleepDuration) f.state = Retry
f.state = RetryWait
case StepPermanentError: case StepPermanentError:
f.state = PermanentError f.state = PermanentError
f.err = errors.New("a replication step failed with a permanent error") f.err = errors.New("a replication step failed with a permanent error")
@ -278,16 +280,9 @@ func stateReady(ctx context.Context, ka *watchdog.KeepAlive, sender Sender, rece
}).fsrsf() }).fsrsf()
} }
func stateRetryWait(ctx context.Context, ka *watchdog.KeepAlive, sender Sender, receiver Receiver, u updater) state { func stateRetry(ctx context.Context, ka *watchdog.KeepAlive, sender Sender, receiver Receiver, u updater) state {
var sleepUntil time.Time return u(func(fsr *Replication) {
u(func(f *Replication) { fsr.state = Ready
sleepUntil = f.retryWaitUntil
})
if time.Now().Before(sleepUntil) {
return u(nil).fsrsf()
}
return u(func(f *Replication) {
f.state = Ready
}).fsrsf() }).fsrsf()
} }
@ -314,8 +309,8 @@ func (fsr *Replication) Report() *Report {
rep.Pending[i] = fsr.pending[i].Report() rep.Pending[i] = fsr.pending[i].Report()
} }
if fsr.state&RetryWait != 0 { if fsr.state&Retry != 0 {
if len(rep.Pending) != 0 { // should always be true for RetryWait == true? if len(rep.Pending) != 0 { // should always be true for Retry == true?
rep.Problem = rep.Pending[0].Problem rep.Problem = rep.Pending[0].Problem
} }
} }

View File

@ -5,13 +5,13 @@ package fsrep
import "strconv" import "strconv"
const ( const (
_State_name_0 = "ReadyRetryWait" _State_name_0 = "ReadyRetry"
_State_name_1 = "PermanentError" _State_name_1 = "PermanentError"
_State_name_2 = "Completed" _State_name_2 = "Completed"
) )
var ( var (
_State_index_0 = [...]uint8{0, 5, 14} _State_index_0 = [...]uint8{0, 5, 10}
) )
func (i State) String() string { func (i State) String() string {

View File

@ -12,8 +12,7 @@ type replicationQueueItem struct {
state State state State
// duplicates fsr.current.nextStepDate to avoid accessing & locking fsr // duplicates fsr.current.nextStepDate to avoid accessing & locking fsr
nextStepDate time.Time nextStepDate time.Time
// duplicates fsr.retryWaitUntil to avoid accessing & locking fsr errorStateEnterCount int
retryWaitUntil time.Time
fsr *Replication fsr *Replication
} }
@ -40,10 +39,10 @@ var lessmap = map[State]lessmapEntry{
return a.nextStepDate.Before(b.nextStepDate) return a.nextStepDate.Before(b.nextStepDate)
}, },
}, },
RetryWait: { Retry: {
prio: 1, prio: 1,
less: func(a, b *replicationQueueItem) bool { less: func(a, b *replicationQueueItem) bool {
return a.retryWaitUntil.Before(b.retryWaitUntil) return a.errorStateEnterCount < b.errorStateEnterCount
}, },
}, },
} }
@ -114,8 +113,10 @@ func (h ReplicationQueueItemHandle) GetFSReplication() *Replication {
return h.i.fsr return h.i.fsr
} }
func (h ReplicationQueueItemHandle) Update(newState State, nextStepDate, retryWaitUntil time.Time) { func (h ReplicationQueueItemHandle) Update(newState State, nextStepDate time.Time) {
h.i.state = newState h.i.state = newState
h.i.nextStepDate = nextStepDate h.i.nextStepDate = nextStepDate
h.i.retryWaitUntil = retryWaitUntil if h.i.state.IsErrorState() {
h.i.errorStateEnterCount++
}
} }

View File

@ -8,6 +8,7 @@ import (
"fmt" "fmt"
"github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus"
"github.com/zrepl/zrepl/daemon/job/wakeup" "github.com/zrepl/zrepl/daemon/job/wakeup"
"github.com/zrepl/zrepl/util/envconst"
"github.com/zrepl/zrepl/util/watchdog" "github.com/zrepl/zrepl/util/watchdog"
"math/bits" "math/bits"
"net" "net"
@ -192,7 +193,7 @@ func resolveConflict(conflict error) (path []*pdu.FilesystemVersion, msg string)
return nil, "no automated way to handle conflict type" return nil, "no automated way to handle conflict type"
} }
var PlanningRetryInterval = 10 * time.Second // FIXME make constant onfigurable var RetryInterval = envconst.Duration("ZREPL_REPLICATION_RETRY_INTERVAL", 4 * time.Second)
func isPermanent(err error) bool { func isPermanent(err error) bool {
switch err { switch err {
@ -217,7 +218,7 @@ func statePlanning(ctx context.Context, ka *watchdog.KeepAlive, sender Sender, r
if isPermanent(err) { if isPermanent(err) {
r.state = PermanentError r.state = PermanentError
} else { } else {
r.sleepUntil = time.Now().Add(PlanningRetryInterval) r.sleepUntil = time.Now().Add(RetryInterval)
r.state = PlanningError r.state = PlanningError
} }
}).rsf() }).rsf()
@ -367,17 +368,9 @@ func stateWorking(ctx context.Context, ka *watchdog.KeepAlive, sender Sender, re
return rsfNext return rsfNext
} }
retryWaitUntil := active.GetFSReplication().RetryWaitUntil() state, nextStepDate := active.GetFSReplication().TakeStep(ctx, ka, sender, receiver)
if retryWaitUntil.After(time.Now()) { u(func(r *Replication) {
return u(func(r *Replication) { active.Update(state, nextStepDate)
r.sleepUntil = retryWaitUntil
r.state = WorkingWait
}).rsf()
}
state, nextStepDate, retryWaitUntil := active.GetFSReplication().TakeStep(ctx, ka, sender, receiver)
return u(func(r *Replication) {
active.Update(state, nextStepDate, retryWaitUntil)
r.active = nil r.active = nil
}).rsf() }).rsf()
@ -390,6 +383,18 @@ func stateWorking(ctx context.Context, ka *watchdog.KeepAlive, sender Sender, re
default: default:
} }
if err := active.GetFSReplication().Err(); err != nil {
return u(func(r *Replication) {
r.err = err
if isPermanent(err) {
r.state = PermanentError
} else {
r.sleepUntil = time.Now().Add(RetryInterval)
r.state = WorkingWait
}
}).rsf()
}
return u(nil).rsf() return u(nil).rsf()
} }
@ -398,8 +403,8 @@ func stateWorkingWait(ctx context.Context, ka *watchdog.KeepAlive, sender Sender
u(func(r *Replication) { u(func(r *Replication) {
sleepUntil = r.sleepUntil sleepUntil = r.sleepUntil
}) })
t := time.NewTimer(PlanningRetryInterval) t := time.NewTimer(RetryInterval)
getLogger(ctx).WithField("until", sleepUntil).Info("retry wait because no filesystems are ready") getLogger(ctx).WithField("until", sleepUntil).Info("retry wait after replication step error")
defer t.Stop() defer t.Stop()
select { select {
case <-ctx.Done(): case <-ctx.Done():