mirror of
https://github.com/zrepl/zrepl.git
synced 2024-11-25 09:54:47 +01:00
replication/driver: simplify second-attempt step correlation code & fix statekeeping
Before this change, the step correlation code returned early in several cases: - did not set f.planning.done in the cases where it was a no-op - did not set f.planning.err in the cases where correlation did not succeed Reported-by: InsanePrawn <insane.prawny@gmail.com>
This commit is contained in:
parent
c3d87289bb
commit
0b48ba54f8
@ -492,57 +492,37 @@ func (f *fs) do(ctx context.Context, pq *stepQueue, prev *fs) {
|
|||||||
// => don't set f.planning.done just yet
|
// => don't set f.planning.done just yet
|
||||||
f.debug("initial len(fs.planned.steps) = %d", len(f.planned.steps))
|
f.debug("initial len(fs.planned.steps) = %d", len(f.planned.steps))
|
||||||
|
|
||||||
// for not-first attempts, only allow fs.planned.steps
|
// for not-first attempts that succeeded in planning, only allow fs.planned.steps
|
||||||
// up to including the originally planned target snapshot
|
// up to and including the originally planned target snapshot
|
||||||
if prev != nil && prev.planning.done && prev.planning.err == nil {
|
if prev != nil && prev.planning.done && prev.planning.err == nil {
|
||||||
|
f.debug("attempting to correlate plan with previous attempt to find out what is left to do")
|
||||||
|
// find the highest of the previously uncompleted steps for which we can also find a step
|
||||||
|
// in our current plan
|
||||||
prevUncompleted := prev.planned.steps[prev.planned.step:]
|
prevUncompleted := prev.planned.steps[prev.planned.step:]
|
||||||
if len(prevUncompleted) == 0 {
|
var target struct{ prev, cur int }
|
||||||
f.debug("prevUncompleted is empty")
|
target.prev = -1
|
||||||
return
|
target.cur = -1
|
||||||
}
|
out:
|
||||||
if len(f.planned.steps) == 0 {
|
for p := len(prevUncompleted) - 1; p >= 0; p-- {
|
||||||
f.debug("fs.planned.steps is empty")
|
for q := len(f.planned.steps) - 1; q >= 0; q-- {
|
||||||
return
|
if prevUncompleted[p].step.TargetEquals(f.planned.steps[q].step) {
|
||||||
}
|
target.prev = p
|
||||||
prevFailed := prevUncompleted[0]
|
target.cur = q
|
||||||
curFirst := f.planned.steps[0]
|
break out
|
||||||
// we assume that PlanFS retries prevFailed (using curFirst)
|
|
||||||
if !prevFailed.step.TargetEquals(curFirst.step) {
|
|
||||||
f.debug("Targets don't match")
|
|
||||||
// Two options:
|
|
||||||
// A: planning algorithm is broken
|
|
||||||
// B: manual user intervention inbetween
|
|
||||||
// Neither way will we make progress, so let's error out
|
|
||||||
stepFmt := func(step *step) string {
|
|
||||||
r := step.report()
|
|
||||||
s := r.Info
|
|
||||||
if r.IsIncremental() {
|
|
||||||
return fmt.Sprintf("%s=>%s", s.From, s.To)
|
|
||||||
} else {
|
|
||||||
return fmt.Sprintf("full=>%s", s.To)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
msg := fmt.Sprintf("last attempt's uncompleted step %s does not correspond to this attempt's first planned step %s",
|
}
|
||||||
stepFmt(prevFailed), stepFmt(curFirst))
|
if target.prev == -1 || target.cur == -1 {
|
||||||
f.planned.stepErr = newTimedError(errors.New(msg), time.Now())
|
f.debug("no correlation possible between previous attempt and this attempt's plan")
|
||||||
|
f.planning.err = newTimedError(fmt.Errorf("cannot correlate previously failed attempt to current plan"), time.Now())
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
// only allow until step targets diverge
|
|
||||||
min := len(prevUncompleted)
|
f.planned.steps = f.planned.steps[0:target.cur]
|
||||||
if min > len(f.planned.steps) {
|
f.debug("found correlation, new steps are len(fs.planned.steps) = %d", len(f.planned.steps))
|
||||||
min = len(f.planned.steps)
|
} else {
|
||||||
}
|
f.debug("previous attempt does not exist or did not finish planning, no correlation possible, taking this attempt's plan as is")
|
||||||
diverge := 0
|
|
||||||
for ; diverge < min; diverge++ {
|
|
||||||
f.debug("diverge compare iteration %d", diverge)
|
|
||||||
if !f.planned.steps[diverge].step.TargetEquals(prevUncompleted[diverge].step) {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
f.debug("diverge is %d", diverge)
|
|
||||||
f.planned.steps = f.planned.steps[0:diverge]
|
|
||||||
}
|
}
|
||||||
f.debug("post-prev-merge len(fs.planned.steps) = %d", len(f.planned.steps))
|
|
||||||
|
|
||||||
// now we are done planning (f.planned.steps won't change from now on)
|
// now we are done planning (f.planned.steps won't change from now on)
|
||||||
f.planning.done = true
|
f.planning.done = true
|
||||||
|
Loading…
Reference in New Issue
Block a user