package pruner import ( "context" "fmt" "sort" "strings" "sync" "time" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" "github.com/zrepl/zrepl/config" "github.com/zrepl/zrepl/daemon/logging" "github.com/zrepl/zrepl/logger" "github.com/zrepl/zrepl/pruning" "github.com/zrepl/zrepl/replication/logic/pdu" "github.com/zrepl/zrepl/util/envconst" ) // The sender in the replication setup. // The pruner uses the Sender to determine which of the Target's filesystems need to be pruned. // Also, it asks the Sender about the replication cursor of each filesystem // to enable the 'not_replicated' pruning rule. // // Try to keep it compatible with github.com/zrepl/zrepl/endpoint.Endpoint type Sender interface { ReplicationCursor(ctx context.Context, req *pdu.ReplicationCursorReq) (*pdu.ReplicationCursorRes, error) ListFilesystems(ctx context.Context, req *pdu.ListFilesystemReq) (*pdu.ListFilesystemRes, error) } // The pruning target, i.e., on which snapshots are destroyed. // This can be a replication sender or receiver. // // Try to keep it compatible with github.com/zrepl/zrepl/endpoint.Endpoint type Target interface { ListFilesystems(ctx context.Context, req *pdu.ListFilesystemReq) (*pdu.ListFilesystemRes, error) ListFilesystemVersions(ctx context.Context, req *pdu.ListFilesystemVersionsReq) (*pdu.ListFilesystemVersionsRes, error) DestroySnapshots(ctx context.Context, req *pdu.DestroySnapshotsReq) (*pdu.DestroySnapshotsRes, error) } type Logger = logger.Logger type contextKey int const ( contextKeyPruneSide contextKey = 1 + iota ) func GetLogger(ctx context.Context) Logger { pruneSide := ctx.Value(contextKeyPruneSide).(string) return logging.GetLogger(ctx, logging.SubsysPruning).WithField("prune_side", pruneSide) } type args struct { ctx context.Context target Target sender Sender rules []pruning.KeepRule retryWait time.Duration considerSnapAtCursorReplicated bool promPruneSecs prometheus.Observer } type Pruner struct { args args mtx sync.RWMutex state State // State PlanErr err error // State Exec execQueue *execQueue } type PrunerFactory struct { senderRules []pruning.KeepRule receiverRules []pruning.KeepRule retryWait time.Duration considerSnapAtCursorReplicated bool promPruneSecs *prometheus.HistogramVec } type LocalPrunerFactory struct { keepRules []pruning.KeepRule retryWait time.Duration promPruneSecs *prometheus.HistogramVec } func NewLocalPrunerFactory(in config.PruningLocal, promPruneSecs *prometheus.HistogramVec) (*LocalPrunerFactory, error) { rules, err := pruning.RulesFromConfig(in.Keep) if err != nil { return nil, errors.Wrap(err, "cannot build pruning rules") } for _, r := range in.Keep { if _, ok := r.Ret.(*config.PruneKeepNotReplicated); ok { // rule NotReplicated for a local pruner doesn't make sense // because no replication happens with that job type return nil, fmt.Errorf("single-site pruner cannot support `not_replicated` keep rule") } } f := &LocalPrunerFactory{ keepRules: rules, retryWait: envconst.Duration("ZREPL_PRUNER_RETRY_INTERVAL", 10*time.Second), promPruneSecs: promPruneSecs, } return f, nil } func NewPrunerFactory(in config.PruningSenderReceiver, promPruneSecs *prometheus.HistogramVec) (*PrunerFactory, error) { keepRulesReceiver, err := pruning.RulesFromConfig(in.KeepReceiver) if err != nil { return nil, errors.Wrap(err, "cannot build receiver pruning rules") } keepRulesSender, err := pruning.RulesFromConfig(in.KeepSender) if err != nil { return nil, errors.Wrap(err, "cannot build sender pruning rules") } considerSnapAtCursorReplicated := false for _, r := range in.KeepSender { knr, ok := r.Ret.(*config.PruneKeepNotReplicated) if !ok { continue } considerSnapAtCursorReplicated = considerSnapAtCursorReplicated || !knr.KeepSnapshotAtCursor } f := &PrunerFactory{ senderRules: keepRulesSender, receiverRules: keepRulesReceiver, retryWait: envconst.Duration("ZREPL_PRUNER_RETRY_INTERVAL", 10*time.Second), considerSnapAtCursorReplicated: considerSnapAtCursorReplicated, promPruneSecs: promPruneSecs, } return f, nil } func (f *PrunerFactory) BuildSenderPruner(ctx context.Context, target Target, sender Sender) *Pruner { p := &Pruner{ args: args{ context.WithValue(ctx, contextKeyPruneSide, "sender"), target, sender, f.senderRules, f.retryWait, f.considerSnapAtCursorReplicated, f.promPruneSecs.WithLabelValues("sender"), }, state: Plan, } return p } func (f *PrunerFactory) BuildReceiverPruner(ctx context.Context, target Target, sender Sender) *Pruner { p := &Pruner{ args: args{ context.WithValue(ctx, contextKeyPruneSide, "receiver"), target, sender, f.receiverRules, f.retryWait, false, // senseless here anyways f.promPruneSecs.WithLabelValues("receiver"), }, state: Plan, } return p } func (f *LocalPrunerFactory) BuildLocalPruner(ctx context.Context, target Target, history Sender) *Pruner { p := &Pruner{ args: args{ context.WithValue(ctx, contextKeyPruneSide, "local"), target, history, f.keepRules, f.retryWait, false, // considerSnapAtCursorReplicated is not relevant for local pruning f.promPruneSecs.WithLabelValues("local"), }, state: Plan, } return p } //go:generate enumer -type=State type State int const ( Plan State = 1 << iota PlanErr Exec ExecErr Done ) type updater func(func(*Pruner)) func (p *Pruner) Prune() { p.prune(p.args) } func (p *Pruner) prune(args args) { u := func(f func(*Pruner)) { p.mtx.Lock() defer p.mtx.Unlock() f(p) } // TODO support automatic retries // It is advisable to merge this code with package replication/driver before // That will likely require re-modelling struct fs like replication/driver.attempt, // including figuring out how to resume a plan after being interrupted by network errors // The non-retrying code in this package should move straight to replication/logic. doOneAttempt(&args, u) } type Report struct { State string Error string Pending, Completed []FSReport } type FSReport struct { Filesystem string SnapshotList, DestroyList []SnapshotReport SkipReason FSSkipReason LastError string } type SnapshotReport struct { Name string Replicated bool Date time.Time } func (p *Pruner) Report() *Report { p.mtx.Lock() defer p.mtx.Unlock() r := Report{State: p.state.String()} if p.err != nil { r.Error = p.err.Error() } if p.execQueue != nil { r.Pending, r.Completed = p.execQueue.Report() } return &r } func (p *Pruner) State() State { p.mtx.Lock() defer p.mtx.Unlock() return p.state } type fs struct { path string // permanent error during planning planErr error planErrContext string // if != "", the fs was skipped for planning and the field // contains the reason skipReason FSSkipReason // snapshots presented by target // (type snapshot) snaps []pruning.Snapshot // destroy list returned by pruning.PruneSnapshots(snaps) // (type snapshot) destroyList []pruning.Snapshot mtx sync.RWMutex // only during Exec state, also used by execQueue execErrLast error } type FSSkipReason string const ( NotSkipped = "" SkipPlaceholder = "filesystem is placeholder" SkipNoCorrespondenceOnSender = "filesystem has no correspondence on sender" ) func (r FSSkipReason) NotSkipped() bool { return r == NotSkipped } func (f *fs) Report() FSReport { f.mtx.Lock() defer f.mtx.Unlock() r := FSReport{} r.Filesystem = f.path r.SkipReason = f.skipReason if !r.SkipReason.NotSkipped() { return r } if f.planErr != nil { r.LastError = f.planErr.Error() } else if f.execErrLast != nil { r.LastError = f.execErrLast.Error() } r.SnapshotList = make([]SnapshotReport, len(f.snaps)) for i, snap := range f.snaps { r.SnapshotList[i] = snap.(snapshot).Report() } r.DestroyList = make([]SnapshotReport, len(f.destroyList)) for i, snap := range f.destroyList { r.DestroyList[i] = snap.(snapshot).Report() } return r } type snapshot struct { replicated bool date time.Time fsv *pdu.FilesystemVersion } func (s snapshot) Report() SnapshotReport { return SnapshotReport{ Name: s.Name(), Replicated: s.Replicated(), Date: s.Date(), } } var _ pruning.Snapshot = snapshot{} func (s snapshot) Name() string { return s.fsv.Name } func (s snapshot) Replicated() bool { return s.replicated } func (s snapshot) Date() time.Time { return s.date } func doOneAttempt(a *args, u updater) { ctx, target, sender := a.ctx, a.target, a.sender sfssres, err := sender.ListFilesystems(ctx, &pdu.ListFilesystemReq{}) if err != nil { u(func(p *Pruner) { p.state = PlanErr p.err = err }) return } sfss := make(map[string]*pdu.Filesystem) for _, sfs := range sfssres.GetFilesystems() { sfss[sfs.GetPath()] = sfs } tfssres, err := target.ListFilesystems(ctx, &pdu.ListFilesystemReq{}) if err != nil { u(func(p *Pruner) { p.state = PlanErr p.err = err }) return } tfss := tfssres.GetFilesystems() pfss := make([]*fs, len(tfss)) tfss_loop: for i, tfs := range tfss { l := GetLogger(ctx).WithField("fs", tfs.Path) l.Debug("plan filesystem") pfs := &fs{ path: tfs.Path, } pfss[i] = pfs if tfs.GetIsPlaceholder() { pfs.skipReason = SkipPlaceholder l.WithField("skip_reason", pfs.skipReason).Debug("skipping filesystem") continue } else if sfs := sfss[tfs.GetPath()]; sfs == nil { pfs.skipReason = SkipNoCorrespondenceOnSender l.WithField("skip_reason", pfs.skipReason).WithField("sfs", sfs.GetPath()).Debug("skipping filesystem") continue } pfsPlanErrAndLog := func(err error, message string) { t := fmt.Sprintf("%T", err) pfs.planErr = err pfs.planErrContext = message l.WithField("orig_err_type", t).WithError(err).Error(fmt.Sprintf("%s: plan error, skipping filesystem", message)) } tfsvsres, err := target.ListFilesystemVersions(ctx, &pdu.ListFilesystemVersionsReq{Filesystem: tfs.Path}) if err != nil { pfsPlanErrAndLog(err, "cannot list filesystem versions") continue tfss_loop } tfsvs := tfsvsres.GetVersions() // no progress here since we could run in a live-lock (must have used target AND receiver before progress) pfs.snaps = make([]pruning.Snapshot, 0, len(tfsvs)) rcReq := &pdu.ReplicationCursorReq{ Filesystem: tfs.Path, } rc, err := sender.ReplicationCursor(ctx, rcReq) if err != nil { pfsPlanErrAndLog(err, "cannot get replication cursor bookmark") continue tfss_loop } if rc.GetNotexist() { err := errors.New("replication cursor bookmark does not exist (one successful replication is required before pruning works)") pfsPlanErrAndLog(err, "") continue tfss_loop } // scan from older to newer, all snapshots older than cursor are interpreted as replicated sort.Slice(tfsvs, func(i, j int) bool { return tfsvs[i].CreateTXG < tfsvs[j].CreateTXG }) haveCursorSnapshot := false for _, tfsv := range tfsvs { if tfsv.Type != pdu.FilesystemVersion_Snapshot { continue } if tfsv.Guid == rc.GetGuid() { haveCursorSnapshot = true } } preCursor := haveCursorSnapshot for _, tfsv := range tfsvs { if tfsv.Type != pdu.FilesystemVersion_Snapshot { continue } creation, err := tfsv.CreationAsTime() if err != nil { err := fmt.Errorf("%s: %s", tfsv.RelName(), err) pfsPlanErrAndLog(err, "fs version with invalid creation date") continue tfss_loop } // note that we cannot use CreateTXG because target and receiver could be on different pools atCursor := tfsv.Guid == rc.GetGuid() preCursor = preCursor && !atCursor pfs.snaps = append(pfs.snaps, snapshot{ replicated: preCursor || (a.considerSnapAtCursorReplicated && atCursor), date: creation, fsv: tfsv, }) } if preCursor { pfsPlanErrAndLog(fmt.Errorf("prune target has no snapshot that corresponds to sender replication cursor bookmark"), "") continue tfss_loop } // Apply prune rules pfs.destroyList = pruning.PruneSnapshots(pfs.snaps, a.rules) } u(func(pruner *Pruner) { pruner.execQueue = newExecQueue(len(pfss)) for _, pfs := range pfss { pruner.execQueue.Put(pfs, nil, false) } pruner.state = Exec }) for { var pfs *fs u(func(pruner *Pruner) { pfs = pruner.execQueue.Pop() }) if pfs == nil { break } doOneAttemptExec(a, u, pfs) } var rep *Report { // must not hold lock for report var pruner *Pruner u(func(p *Pruner) { pruner = p }) rep = pruner.Report() } u(func(p *Pruner) { if len(rep.Pending) > 0 { panic("queue should not have pending items at this point") } hadErr := false for _, fsr := range rep.Completed { hadErr = hadErr || fsr.SkipReason.NotSkipped() && fsr.LastError != "" } if hadErr { p.state = ExecErr } else { p.state = Done } }) } // attempts to exec pfs, puts it back into the queue with the result func doOneAttemptExec(a *args, u updater, pfs *fs) { destroyList := make([]*pdu.FilesystemVersion, len(pfs.destroyList)) for i := range destroyList { destroyList[i] = pfs.destroyList[i].(snapshot).fsv GetLogger(a.ctx). WithField("fs", pfs.path). WithField("destroy_snap", destroyList[i].Name). Debug("policy destroys snapshot") } req := pdu.DestroySnapshotsReq{ Filesystem: pfs.path, Snapshots: destroyList, } GetLogger(a.ctx).WithField("fs", pfs.path).Debug("destroying snapshots") res, err := a.target.DestroySnapshots(a.ctx, &req) if err != nil { u(func(pruner *Pruner) { pruner.execQueue.Put(pfs, err, false) }) return } // check if all snapshots were destroyed destroyResults := make(map[string]*pdu.DestroySnapshotRes) for _, fsres := range res.Results { destroyResults[fsres.Snapshot.Name] = fsres } err = nil destroyFails := make([]*pdu.DestroySnapshotRes, 0) for _, reqDestroy := range destroyList { res, ok := destroyResults[reqDestroy.Name] if !ok { err = fmt.Errorf("missing destroy-result for %s", reqDestroy.RelName()) break } else if res.Error != "" { destroyFails = append(destroyFails, res) } } if err == nil && len(destroyFails) > 0 { names := make([]string, len(destroyFails)) pairs := make([]string, len(destroyFails)) allSame := true lastMsg := destroyFails[0].Error for i := 0; i < len(destroyFails); i++ { allSame = allSame && destroyFails[i].Error == lastMsg relname := destroyFails[i].Snapshot.RelName() names[i] = relname pairs[i] = fmt.Sprintf("(%s: %s)", relname, destroyFails[i].Error) } if allSame { err = fmt.Errorf("destroys failed %s: %s", strings.Join(names, ", "), lastMsg) } else { err = fmt.Errorf("destroys failed: %s", strings.Join(pairs, ", ")) } } u(func(pruner *Pruner) { pruner.execQueue.Put(pfs, err, err == nil) }) if err != nil { GetLogger(a.ctx).WithError(err).Error("target could not destroy snapshots") return } }