replication: report a filesystem is active vs. blocked on something

- `BlockedOn` prop in JSON report
- Bring back the `*` in front of the filesystem report as an activity indicator.

fixes https://github.com/zrepl/zrepl/issues/505
This commit is contained in:
Christian Schwarz 2021-11-14 17:19:20 +01:00
parent 1850a332ed
commit ac147b5a6f
3 changed files with 37 additions and 8 deletions

View File

@ -216,7 +216,7 @@ func drawJob(t *stringbuilder.B, name string, v *job.Status, history *bytesProgr
}
}
func printFilesystemStatus(t *stringbuilder.B, rep *report.FilesystemReport, active bool, maxFS int) {
func printFilesystemStatus(t *stringbuilder.B, rep *report.FilesystemReport, maxFS int) {
expected, replicated, containsInvalidSizeEstimates := rep.BytesSum()
sizeEstimationImpreciseNotice := ""
@ -235,7 +235,8 @@ func printFilesystemStatus(t *stringbuilder.B, rep *report.FilesystemReport, act
)
activeIndicator := " "
if active {
if rep.BlockedOn == report.FsBlockedOnNothing &&
(rep.State == report.FilesystemPlanning || rep.State == report.FilesystemStepping) {
activeIndicator = "*"
}
t.AddIndent(1)
@ -385,7 +386,7 @@ func renderReplicationReport(t *stringbuilder.B, rep *report.Report, history *by
}
}
for _, fs := range latest.Filesystems {
printFilesystemStatus(t, fs, false, maxFSLen) // FIXME bring 'active' flag back
printFilesystemStatus(t, fs, maxFSLen)
}
}

View File

@ -151,6 +151,8 @@ type fs struct {
l *chainlock.L
blockedOn report.FsBlockedOn
// ordering relationship that must be maintained for initial replication
initialRepOrd struct {
parents, children []*fs
@ -158,6 +160,7 @@ type fs struct {
}
planning struct {
waitingForStepQueue bool
done bool
err *timedError
}
@ -339,6 +342,7 @@ func (a *attempt) doGlobalPlanning(ctx context.Context, prev *attempt) map[*fs]*
fs := &fs{
fs: pfs,
l: a.l,
blockedOn: report.FsBlockedOnNothing,
}
fs.initialRepOrd.parentDidUpdate = make(chan struct{}, 1)
a.fss = append(a.fss, fs)
@ -448,6 +452,10 @@ func (a *attempt) doFilesystems(ctx context.Context, prevs map[*fs]*fs) {
ctx, endTask := trace.WithTaskAndSpan(ctx, "repl-fs", f.report().Info.Name)
defer endTask()
f.do(ctx, stepQueue, prevs[f])
f.l.HoldWhile(func() {
// every return from f means it's unblocked...
f.blockedOn = report.FsBlockedOnNothing
})
}(f)
}
a.l.DropWhile(func() {
@ -486,11 +494,16 @@ func (f *fs) do(ctx context.Context, pq *stepQueue, prev *fs) {
var psteps []Step
var errTime time.Time
var err error
f.blockedOn = report.FsBlockedOnPlanningStepQueue
f.l.DropWhile(func() {
// TODO hacky
// choose target time that is earlier than any snapshot, so fs planning is always prioritized
targetDate := time.Unix(0, 0)
defer pq.WaitReady(ctx, f, targetDate)()
f.l.HoldWhile(func() {
// transition before we call PlanFS
f.blockedOn = report.FsBlockedOnNothing
})
psteps, err = f.fs.PlanFS(ctx) // no shadow
errTime = time.Now() // no shadow
})
@ -545,6 +558,7 @@ func (f *fs) do(ctx context.Context, pq *stepQueue, prev *fs) {
f.planning.done = true
// wait for parents' initial replication
f.blockedOn = report.FsBlockedOnParentInitialRepl
var parents []string
for _, p := range f.initialRepOrd.parents {
parents = append(parents, p.fs.ReportInfo().Name)
@ -613,7 +627,6 @@ func (f *fs) do(ctx context.Context, pq *stepQueue, prev *fs) {
select {
case <-ctx.Done():
f.planned.stepErr = newTimedError(ctx.Err(), time.Now())
return
case <-f.initialRepOrd.parentDidUpdate:
// loop
}
@ -631,7 +644,9 @@ func (f *fs) do(ctx context.Context, pq *stepQueue, prev *fs) {
f.l.DropWhile(func() {
// wait for parallel replication
targetDate := s.step.TargetDate()
f.l.HoldWhile(func() { f.blockedOn = report.FsBlockedOnReplStepQueue })
defer pq.WaitReady(ctx, f, targetDate)()
f.l.HoldWhile(func() { f.blockedOn = report.FsBlockedOnNothing })
// do the step
ctx, endSpan := trace.WithSpan(ctx, fmt.Sprintf("%#v", s.step.ReportInfo()))
defer endSpan()
@ -725,6 +740,7 @@ func (f *fs) report() *report.FilesystemReport {
r := &report.FilesystemReport{
Info: f.fs.ReportInfo(),
State: state,
BlockedOn: f.blockedOn,
PlanError: f.planning.err.IntoReportError(),
StepError: f.planned.stepErr.IntoReportError(),
Steps: make([]*report.StepReport, len(f.planned.steps)),

View File

@ -62,11 +62,23 @@ const (
FilesystemDone FilesystemState = "done"
)
type FsBlockedOn string
const (
FsBlockedOnNothing FsBlockedOn = "nothing"
FsBlockedOnPlanningStepQueue FsBlockedOn = "plan-queue"
FsBlockedOnParentInitialRepl FsBlockedOn = "parent-initial-repl"
FsBlockedOnReplStepQueue FsBlockedOn = "repl-queue"
)
type FilesystemReport struct {
Info *FilesystemInfo
State FilesystemState
// Always valid.
BlockedOn FsBlockedOn
// Valid in State = FilesystemPlanningErrored
PlanError *TimedError
// Valid in State = FilesystemSteppingErrored