mirror of
https://github.com/zrepl/zrepl.git
synced 2024-11-29 11:55:03 +01:00
remove JobStatus, Task abstraction and 'control status' subcommand
Control status will be replaced by job-specific output at some point. Task was not useful anymore with state machine, may reintroduce something similar at a later point, but consider alternatives: - opentracing.io - embedding everything in ctx - activity stack would work easily - log entries via proxy logger.Logger object - progress reporting should be in status reports of individial jobs
This commit is contained in:
parent
7ff72fb6d9
commit
a0f72b585b
@ -9,29 +9,24 @@ import (
|
||||
)
|
||||
|
||||
type IntervalAutosnap struct {
|
||||
task *Task
|
||||
DatasetFilter zfs.DatasetFilter
|
||||
Prefix string
|
||||
SnapshotInterval time.Duration
|
||||
}
|
||||
|
||||
func (a *IntervalAutosnap) filterFilesystems() (fss []*zfs.DatasetPath, stop bool) {
|
||||
a.task.Enter("filter_filesystems")
|
||||
defer a.task.Finish()
|
||||
func (a *IntervalAutosnap) filterFilesystems(ctx context.Context) (fss []*zfs.DatasetPath, stop bool) {
|
||||
fss, err := zfs.ZFSListMapping(a.DatasetFilter)
|
||||
stop = err != nil
|
||||
if err != nil {
|
||||
a.task.Log().WithError(err).Error("cannot list datasets")
|
||||
getLogger(ctx).WithError(err).Error("cannot list datasets")
|
||||
}
|
||||
if len(fss) == 0 {
|
||||
a.task.Log().Warn("no filesystem matching filesystem filter")
|
||||
getLogger(ctx).Warn("no filesystem matching filesystem filter")
|
||||
}
|
||||
return fss, stop
|
||||
}
|
||||
|
||||
func (a *IntervalAutosnap) findSyncPoint(fss []*zfs.DatasetPath) (syncPoint time.Time, err error) {
|
||||
a.task.Enter("find_sync_point")
|
||||
defer a.task.Finish()
|
||||
func (a *IntervalAutosnap) findSyncPoint(log Logger, fss []*zfs.DatasetPath) (syncPoint time.Time, err error) {
|
||||
type snapTime struct {
|
||||
ds *zfs.DatasetPath
|
||||
time time.Time
|
||||
@ -45,10 +40,10 @@ func (a *IntervalAutosnap) findSyncPoint(fss []*zfs.DatasetPath) (syncPoint time
|
||||
|
||||
now := time.Now()
|
||||
|
||||
a.task.Log().Debug("examine filesystem state")
|
||||
log.Debug("examine filesystem state")
|
||||
for _, d := range fss {
|
||||
|
||||
l := a.task.Log().WithField("fs", d.ToString())
|
||||
l := log.WithField("fs", d.ToString())
|
||||
|
||||
fsvs, err := zfs.ZFSListFilesystemVersions(d, NewPrefixFilter(a.Prefix))
|
||||
if err != nil {
|
||||
@ -96,76 +91,71 @@ func (a *IntervalAutosnap) findSyncPoint(fss []*zfs.DatasetPath) (syncPoint time
|
||||
}
|
||||
|
||||
func (a *IntervalAutosnap) waitForSyncPoint(ctx context.Context, syncPoint time.Time) {
|
||||
a.task.Enter("wait_sync_point")
|
||||
defer a.task.Finish()
|
||||
|
||||
const LOG_TIME_FMT string = time.ANSIC
|
||||
|
||||
a.task.Log().WithField("sync_point", syncPoint.Format(LOG_TIME_FMT)).
|
||||
getLogger(ctx).
|
||||
WithField("sync_point", syncPoint.Format(LOG_TIME_FMT)).
|
||||
Info("wait for sync point")
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
a.task.Log().WithError(ctx.Err()).Info("context done")
|
||||
getLogger(ctx).WithError(ctx.Err()).Info("context done")
|
||||
return
|
||||
case <-time.After(syncPoint.Sub(time.Now())):
|
||||
}
|
||||
}
|
||||
|
||||
func (a *IntervalAutosnap) syncUpRun(ctx context.Context, didSnaps chan struct{}) (stop bool) {
|
||||
a.task.Enter("sync_up")
|
||||
defer a.task.Finish()
|
||||
|
||||
fss, stop := a.filterFilesystems()
|
||||
fss, stop := a.filterFilesystems(ctx)
|
||||
if stop {
|
||||
return true
|
||||
}
|
||||
|
||||
syncPoint, err := a.findSyncPoint(fss)
|
||||
syncPoint, err := a.findSyncPoint(getLogger(ctx), fss)
|
||||
if err != nil {
|
||||
return true
|
||||
}
|
||||
|
||||
a.waitForSyncPoint(ctx, syncPoint)
|
||||
|
||||
a.task.Log().Debug("snapshot all filesystems to enable further snaps in lockstep")
|
||||
a.doSnapshots(didSnaps)
|
||||
getLogger(ctx).Debug("snapshot all filesystems to enable further snaps in lockstep")
|
||||
a.doSnapshots(ctx, didSnaps)
|
||||
return false
|
||||
}
|
||||
|
||||
func (a *IntervalAutosnap) Run(ctx context.Context, didSnaps chan struct{}) {
|
||||
|
||||
log := getLogger(ctx)
|
||||
|
||||
if a.syncUpRun(ctx, didSnaps) {
|
||||
a.task.Log().Error("stoppping autosnap after error in sync up")
|
||||
log.Error("stoppping autosnap after error in sync up")
|
||||
return
|
||||
}
|
||||
|
||||
// task drops back to idle here
|
||||
|
||||
a.task.Log().Debug("setting up ticker in SnapshotInterval")
|
||||
log.Debug("setting up ticker in SnapshotInterval")
|
||||
ticker := time.NewTicker(a.SnapshotInterval)
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
ticker.Stop()
|
||||
a.task.Log().WithError(ctx.Err()).Info("context done")
|
||||
log.WithError(ctx.Err()).Info("context done")
|
||||
return
|
||||
|
||||
case <-ticker.C:
|
||||
a.doSnapshots(didSnaps)
|
||||
a.doSnapshots(ctx, didSnaps)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func (a *IntervalAutosnap) doSnapshots(didSnaps chan struct{}) {
|
||||
|
||||
a.task.Enter("do_snapshots")
|
||||
defer a.task.Finish()
|
||||
func (a *IntervalAutosnap) doSnapshots(ctx context.Context, didSnaps chan struct{}) {
|
||||
log := getLogger(ctx)
|
||||
|
||||
// don't cache the result from previous run in case the user added
|
||||
// a new dataset in the meantime
|
||||
ds, stop := a.filterFilesystems()
|
||||
ds, stop := a.filterFilesystems(ctx)
|
||||
if stop {
|
||||
return
|
||||
}
|
||||
@ -175,20 +165,20 @@ func (a *IntervalAutosnap) doSnapshots(didSnaps chan struct{}) {
|
||||
suffix := time.Now().In(time.UTC).Format("20060102_150405_000")
|
||||
snapname := fmt.Sprintf("%s%s", a.Prefix, suffix)
|
||||
|
||||
l := a.task.Log().
|
||||
l := log.
|
||||
WithField("fs", d.ToString()).
|
||||
WithField("snapname", snapname)
|
||||
|
||||
l.Info("create snapshot")
|
||||
err := zfs.ZFSSnapshot(d, snapname, false)
|
||||
if err != nil {
|
||||
a.task.Log().WithError(err).Error("cannot create snapshot")
|
||||
l.WithError(err).Error("cannot create snapshot")
|
||||
}
|
||||
|
||||
l.Info("create corresponding bookmark")
|
||||
err = zfs.ZFSBookmark(d, snapname, snapname)
|
||||
if err != nil {
|
||||
a.task.Log().WithError(err).Error("cannot create bookmark")
|
||||
l.WithError(err).Error("cannot create bookmark")
|
||||
}
|
||||
|
||||
}
|
||||
@ -196,7 +186,7 @@ func (a *IntervalAutosnap) doSnapshots(didSnaps chan struct{}) {
|
||||
select {
|
||||
case didSnaps <- struct{}{}:
|
||||
default:
|
||||
a.task.Log().Error("warning: callback channel is full, discarding")
|
||||
log.Error("warning: callback channel is full, discarding")
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -57,7 +57,7 @@ type PrunePolicy interface {
|
||||
}
|
||||
|
||||
type PruningJob interface {
|
||||
Pruner(task *Task, side PrunePolicySide, dryRun bool) (Pruner, error)
|
||||
Pruner(side PrunePolicySide, dryRun bool) (Pruner, error)
|
||||
}
|
||||
|
||||
// A type for constants describing different prune policies of a PruningJob
|
||||
|
@ -34,10 +34,6 @@ func (j *ControlJob) JobName() string {
|
||||
|
||||
func (j *ControlJob) JobType() JobType { return JobTypeControl }
|
||||
|
||||
func (j *ControlJob) JobStatus(ctx context.Context) (*JobStatus, error) {
|
||||
return &JobStatus{Tasks: nil}, nil
|
||||
}
|
||||
|
||||
const (
|
||||
ControlJobEndpointPProf string = "/debug/pprof"
|
||||
ControlJobEndpointVersion string = "/version"
|
||||
@ -76,7 +72,7 @@ func (j *ControlJob) JobStart(ctx context.Context) {
|
||||
}}})
|
||||
mux.Handle(ControlJobEndpointStatus,
|
||||
requestLogger{log: log, handler: jsonResponder{func() (interface{}, error) {
|
||||
return daemon.Status(), nil
|
||||
panic("FIXME") // FIXME
|
||||
}}})
|
||||
mux.Handle("/pulljobreport",
|
||||
requestLogger{log: log, handler: jsonResponder{func() (interface{}, error) {
|
||||
|
@ -20,11 +20,6 @@ type LocalJob struct {
|
||||
PruneLHS PrunePolicy
|
||||
PruneRHS PrunePolicy
|
||||
Debug JobDebugSettings
|
||||
snapperTask *Task
|
||||
mainTask *Task
|
||||
handlerTask *Task
|
||||
pruneRHSTask *Task
|
||||
pruneLHSTask *Task
|
||||
}
|
||||
|
||||
func parseLocalJob(c JobParsingContext, name string, i map[string]interface{}) (j *LocalJob, err error) {
|
||||
@ -84,13 +79,7 @@ func (j *LocalJob) JobType() JobType { return JobTypeLocal }
|
||||
|
||||
func (j *LocalJob) JobStart(ctx context.Context) {
|
||||
|
||||
rootLog := getLogger(ctx)
|
||||
|
||||
j.snapperTask = NewTask("snapshot", j, rootLog)
|
||||
j.mainTask = NewTask("main", j, rootLog)
|
||||
j.handlerTask = NewTask("handler", j, rootLog)
|
||||
j.pruneRHSTask = NewTask("prune_rhs", j, rootLog)
|
||||
j.pruneLHSTask = NewTask("prune_lhs", j, rootLog)
|
||||
log := getLogger(ctx)
|
||||
|
||||
// Allow access to any dataset since we control what mapping
|
||||
// is passed to the pull routine.
|
||||
@ -103,89 +92,67 @@ func (j *LocalJob) JobStart(ctx context.Context) {
|
||||
|
||||
receiver, err := endpoint.NewReceiver(j.Mapping, NewPrefixFilter(j.SnapshotPrefix))
|
||||
if err != nil {
|
||||
rootLog.WithError(err).Error("unexpected error setting up local handler")
|
||||
log.WithError(err).Error("unexpected error setting up local handler")
|
||||
}
|
||||
|
||||
snapper := IntervalAutosnap{
|
||||
task: j.snapperTask,
|
||||
DatasetFilter: j.Mapping.AsFilter(),
|
||||
Prefix: j.SnapshotPrefix,
|
||||
SnapshotInterval: j.Interval,
|
||||
}
|
||||
|
||||
plhs, err := j.Pruner(j.pruneLHSTask, PrunePolicySideLeft, false)
|
||||
plhs, err := j.Pruner(PrunePolicySideLeft, false)
|
||||
if err != nil {
|
||||
rootLog.WithError(err).Error("error creating lhs pruner")
|
||||
log.WithError(err).Error("error creating lhs pruner")
|
||||
return
|
||||
}
|
||||
prhs, err := j.Pruner(j.pruneRHSTask, PrunePolicySideRight, false)
|
||||
prhs, err := j.Pruner(PrunePolicySideRight, false)
|
||||
if err != nil {
|
||||
rootLog.WithError(err).Error("error creating rhs pruner")
|
||||
log.WithError(err).Error("error creating rhs pruner")
|
||||
return
|
||||
}
|
||||
|
||||
didSnaps := make(chan struct{})
|
||||
go snapper.Run(ctx, didSnaps)
|
||||
go snapper.Run(WithLogger(ctx, log.WithField(logSubsysField, "snap")), didSnaps)
|
||||
|
||||
outer:
|
||||
for {
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
j.mainTask.Log().WithError(ctx.Err()).Info("context")
|
||||
log.WithError(ctx.Err()).Info("context")
|
||||
break outer
|
||||
case <-didSnaps:
|
||||
j.mainTask.Log().Debug("finished taking snapshots")
|
||||
j.mainTask.Log().Info("starting replication procedure")
|
||||
log.Debug("finished taking snapshots")
|
||||
log.Info("starting replication procedure")
|
||||
}
|
||||
|
||||
j.mainTask.Log().Debug("replicating from lhs to rhs")
|
||||
j.mainTask.Enter("replicate")
|
||||
|
||||
{
|
||||
ctx := WithLogger(ctx, log.WithField(logSubsysField, "replication"))
|
||||
rep := replication.NewReplication()
|
||||
rep.Drive(ctx, sender, receiver)
|
||||
|
||||
j.mainTask.Finish()
|
||||
|
||||
// use a ctx as soon as Pull gains ctx support
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
break outer
|
||||
default:
|
||||
}
|
||||
|
||||
var wg sync.WaitGroup
|
||||
|
||||
j.mainTask.Log().Info("pruning lhs")
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
plhs.Run(ctx)
|
||||
plhs.Run(WithLogger(ctx, log.WithField(logSubsysField, "prune_lhs")))
|
||||
wg.Done()
|
||||
}()
|
||||
|
||||
j.mainTask.Log().Info("pruning rhs")
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
prhs.Run(ctx)
|
||||
prhs.Run(WithLogger(ctx, log.WithField(logSubsysField, "prune_rhs")))
|
||||
wg.Done()
|
||||
}()
|
||||
|
||||
wg.Wait()
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func (j *LocalJob) JobStatus(ctxt context.Context) (*JobStatus, error) {
|
||||
return &JobStatus{Tasks: []*TaskStatus{
|
||||
j.snapperTask.Status(),
|
||||
j.pruneLHSTask.Status(),
|
||||
j.pruneRHSTask.Status(),
|
||||
j.mainTask.Status(),
|
||||
}}, nil
|
||||
}
|
||||
|
||||
func (j *LocalJob) Pruner(task *Task, side PrunePolicySide, dryRun bool) (p Pruner, err error) {
|
||||
func (j *LocalJob) Pruner(side PrunePolicySide, dryRun bool) (p Pruner, err error) {
|
||||
|
||||
var dsfilter zfs.DatasetFilter
|
||||
var pp PrunePolicy
|
||||
@ -206,7 +173,6 @@ func (j *LocalJob) Pruner(task *Task, side PrunePolicySide, dryRun bool) (p Prun
|
||||
}
|
||||
|
||||
p = Pruner{
|
||||
task,
|
||||
time.Now(),
|
||||
dryRun,
|
||||
dsfilter,
|
||||
|
@ -70,8 +70,6 @@ func (j *PrometheusJob) JobStart(ctx context.Context) {
|
||||
}
|
||||
|
||||
log := getLogger(ctx)
|
||||
task := NewTask("main", j, log)
|
||||
log = task.Log()
|
||||
|
||||
l, err := net.Listen("tcp", j.Listen)
|
||||
if err != nil {
|
||||
@ -94,6 +92,3 @@ func (j *PrometheusJob) JobStart(ctx context.Context) {
|
||||
|
||||
}
|
||||
|
||||
func (*PrometheusJob) JobStatus(ctxt context.Context) (*JobStatus, error) {
|
||||
return &JobStatus{}, nil
|
||||
}
|
||||
|
@ -26,7 +26,6 @@ type PullJob struct {
|
||||
Prune PrunePolicy
|
||||
Debug JobDebugSettings
|
||||
|
||||
task *Task
|
||||
rep *replication.Replication
|
||||
}
|
||||
|
||||
@ -107,7 +106,6 @@ func (j *PullJob) JobStart(ctx context.Context) {
|
||||
|
||||
log := getLogger(ctx)
|
||||
defer log.Info("exiting")
|
||||
j.task = NewTask("main", j, log)
|
||||
|
||||
// j.task is idle here idle here
|
||||
usr1 := make(chan os.Signal)
|
||||
@ -120,14 +118,14 @@ func (j *PullJob) JobStart(ctx context.Context) {
|
||||
j.doRun(ctx)
|
||||
duration := time.Now().Sub(begin)
|
||||
if duration > j.Interval {
|
||||
j.task.Log().
|
||||
log.
|
||||
WithField("actual_duration", duration).
|
||||
WithField("configured_interval", j.Interval).
|
||||
Warn("pull run took longer than configured interval")
|
||||
}
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
j.task.Log().WithError(ctx.Err()).Info("context")
|
||||
log.WithError(ctx.Err()).Info("context")
|
||||
return
|
||||
case <-ticker.C:
|
||||
case <-usr1:
|
||||
@ -150,9 +148,7 @@ var STREAMRPC_CONFIG = &streamrpc.ConnConfig{ // FIXME oversight and configurabi
|
||||
|
||||
func (j *PullJob) doRun(ctx context.Context) {
|
||||
|
||||
j.task.Enter("run")
|
||||
defer j.task.Finish()
|
||||
|
||||
log := getLogger(ctx)
|
||||
// FIXME
|
||||
clientConf := &streamrpc.ClientConfig{
|
||||
ConnConfig: STREAMRPC_CONFIG,
|
||||
@ -161,8 +157,6 @@ func (j *PullJob) doRun(ctx context.Context) {
|
||||
client, err := streamrpc.NewClient(j.Connect, clientConf)
|
||||
defer client.Close()
|
||||
|
||||
j.task.Enter("pull")
|
||||
|
||||
sender := endpoint.NewRemote(client)
|
||||
|
||||
puller, err := endpoint.NewReceiver(
|
||||
@ -170,43 +164,37 @@ func (j *PullJob) doRun(ctx context.Context) {
|
||||
NewPrefixFilter(j.SnapshotPrefix),
|
||||
)
|
||||
if err != nil {
|
||||
j.task.Log().WithError(err).Error("error creating receiver endpoint")
|
||||
j.task.Finish()
|
||||
log.WithError(err).Error("error creating receiver endpoint")
|
||||
return
|
||||
}
|
||||
|
||||
ctx = replication.WithLogger(ctx, replicationLogAdaptor{j.task.Log().WithField(logSubsysField, "replication")})
|
||||
ctx = streamrpc.ContextWithLogger(ctx, streamrpcLogAdaptor{j.task.Log().WithField(logSubsysField, "rpc.protocol")})
|
||||
ctx = endpoint.WithLogger(ctx, j.task.Log().WithField(logSubsysField, "rpc.endpoint"))
|
||||
|
||||
{
|
||||
ctx := replication.WithLogger(ctx, replicationLogAdaptor{log.WithField(logSubsysField, "replication")})
|
||||
ctx = streamrpc.ContextWithLogger(ctx, streamrpcLogAdaptor{log.WithField(logSubsysField, "rpc")})
|
||||
ctx = endpoint.WithLogger(ctx, log.WithField(logSubsysField, "endpoint"))
|
||||
j.rep = replication.NewReplication()
|
||||
j.rep.Drive(ctx, sender, puller)
|
||||
}
|
||||
|
||||
client.Close()
|
||||
j.task.Finish()
|
||||
|
||||
j.task.Enter("prune")
|
||||
pruner, err := j.Pruner(j.task, PrunePolicySideDefault, false)
|
||||
{
|
||||
ctx := WithLogger(ctx, log.WithField(logSubsysField, "prune"))
|
||||
pruner, err := j.Pruner(PrunePolicySideDefault, false)
|
||||
if err != nil {
|
||||
j.task.Log().WithError(err).Error("error creating pruner")
|
||||
log.WithError(err).Error("error creating pruner")
|
||||
} else {
|
||||
pruner.Run(ctx)
|
||||
}
|
||||
j.task.Finish()
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
func (j *PullJob) Report() *replication.Report {
|
||||
return j.rep.Report()
|
||||
}
|
||||
|
||||
func (j *PullJob) JobStatus(ctxt context.Context) (*JobStatus, error) {
|
||||
return &JobStatus{Tasks: []*TaskStatus{j.task.Status()}}, nil
|
||||
}
|
||||
|
||||
func (j *PullJob) Pruner(task *Task, side PrunePolicySide, dryRun bool) (p Pruner, err error) {
|
||||
func (j *PullJob) Pruner(side PrunePolicySide, dryRun bool) (p Pruner, err error) {
|
||||
p = Pruner{
|
||||
task,
|
||||
time.Now(),
|
||||
dryRun,
|
||||
j.pruneFilter,
|
||||
|
@ -19,9 +19,6 @@ type SourceJob struct {
|
||||
Interval time.Duration
|
||||
Prune PrunePolicy
|
||||
Debug JobDebugSettings
|
||||
serveTask *Task
|
||||
autosnapTask *Task
|
||||
pruneTask *Task
|
||||
}
|
||||
|
||||
func parseSourceJob(c JobParsingContext, name string, i map[string]interface{}) (j *SourceJob, err error) {
|
||||
@ -92,12 +89,8 @@ func (j *SourceJob) JobStart(ctx context.Context) {
|
||||
log := getLogger(ctx)
|
||||
defer log.Info("exiting")
|
||||
|
||||
j.autosnapTask = NewTask("autosnap", j, log)
|
||||
j.pruneTask = NewTask("prune", j, log)
|
||||
j.serveTask = NewTask("serve", j, log)
|
||||
|
||||
a := IntervalAutosnap{j.autosnapTask, j.Filesystems, j.SnapshotPrefix, j.Interval}
|
||||
p, err := j.Pruner(j.pruneTask, PrunePolicySideDefault, false)
|
||||
a := IntervalAutosnap{j.Filesystems, j.SnapshotPrefix, j.Interval}
|
||||
p, err := j.Pruner(PrunePolicySideDefault, false)
|
||||
|
||||
if err != nil {
|
||||
log.WithError(err).Error("error creating pruner")
|
||||
@ -106,8 +99,8 @@ func (j *SourceJob) JobStart(ctx context.Context) {
|
||||
|
||||
didSnaps := make(chan struct{})
|
||||
|
||||
go j.serve(ctx, j.serveTask)
|
||||
go a.Run(ctx, didSnaps)
|
||||
go j.serve(ctx) // logSubsysField set by handleConnection
|
||||
go a.Run(WithLogger(ctx, log.WithField(logSubsysField, "snap")), didSnaps)
|
||||
|
||||
outer:
|
||||
for {
|
||||
@ -115,27 +108,15 @@ outer:
|
||||
case <-ctx.Done():
|
||||
break outer
|
||||
case <-didSnaps:
|
||||
log.Info("starting pruner")
|
||||
p.Run(ctx)
|
||||
log.Info("pruner done")
|
||||
p.Run(WithLogger(ctx, log.WithField(logSubsysField, "prune")))
|
||||
}
|
||||
}
|
||||
log.WithError(ctx.Err()).Info("context")
|
||||
|
||||
}
|
||||
|
||||
func (j *SourceJob) JobStatus(ctxt context.Context) (*JobStatus, error) {
|
||||
return &JobStatus{
|
||||
Tasks: []*TaskStatus{
|
||||
j.autosnapTask.Status(),
|
||||
j.pruneTask.Status(),
|
||||
j.serveTask.Status(),
|
||||
}}, nil
|
||||
}
|
||||
|
||||
func (j *SourceJob) Pruner(task *Task, side PrunePolicySide, dryRun bool) (p Pruner, err error) {
|
||||
func (j *SourceJob) Pruner(side PrunePolicySide, dryRun bool) (p Pruner, err error) {
|
||||
p = Pruner{
|
||||
task,
|
||||
time.Now(),
|
||||
dryRun,
|
||||
j.Filesystems,
|
||||
@ -145,11 +126,13 @@ func (j *SourceJob) Pruner(task *Task, side PrunePolicySide, dryRun bool) (p Pru
|
||||
return
|
||||
}
|
||||
|
||||
func (j *SourceJob) serve(ctx context.Context, task *Task) {
|
||||
func (j *SourceJob) serve(ctx context.Context) {
|
||||
|
||||
log := getLogger(ctx)
|
||||
|
||||
listener, err := j.Serve.Listen()
|
||||
if err != nil {
|
||||
task.Log().WithError(err).Error("error listening")
|
||||
getLogger(ctx).WithError(err).Error("error listening")
|
||||
return
|
||||
}
|
||||
|
||||
@ -173,48 +156,40 @@ outer:
|
||||
case rwcMsg := <-connChan:
|
||||
|
||||
if rwcMsg.err != nil {
|
||||
task.Log().WithError(rwcMsg.err).Error("error accepting connection")
|
||||
log.WithError(rwcMsg.err).Error("error accepting connection")
|
||||
continue
|
||||
}
|
||||
|
||||
j.handleConnection(rwcMsg.conn, task)
|
||||
j.handleConnection(ctx, rwcMsg.conn)
|
||||
|
||||
case <-ctx.Done():
|
||||
task.Log().WithError(ctx.Err()).Info("context")
|
||||
log.WithError(ctx.Err()).Info("context")
|
||||
break outer
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
task.Log().Info("closing listener")
|
||||
|
||||
task.Enter("close_listener")
|
||||
defer task.Finish()
|
||||
log.Info("closing listener")
|
||||
err = listener.Close()
|
||||
if err != nil {
|
||||
task.Log().WithError(err).Error("error closing listener")
|
||||
log.WithError(err).Error("error closing listener")
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (j *SourceJob) handleConnection(conn net.Conn, task *Task) {
|
||||
|
||||
task.Enter("handle_connection")
|
||||
defer task.Finish()
|
||||
|
||||
task.Log().Info("handling client connection")
|
||||
func (j *SourceJob) handleConnection(ctx context.Context, conn net.Conn) {
|
||||
log := getLogger(ctx)
|
||||
log.Info("handling client connection")
|
||||
|
||||
senderEP := endpoint.NewSender(j.Filesystems, NewPrefixFilter(j.SnapshotPrefix))
|
||||
|
||||
ctx := context.Background()
|
||||
ctx = endpoint.WithLogger(ctx, task.Log().WithField(logSubsysField, "rpc.endpoint"))
|
||||
ctx = streamrpc.ContextWithLogger(ctx, streamrpcLogAdaptor{task.Log().WithField(logSubsysField, "rpc.protocol")})
|
||||
ctx = endpoint.WithLogger(ctx, log.WithField(logSubsysField, "serve"))
|
||||
ctx = streamrpc.ContextWithLogger(ctx, streamrpcLogAdaptor{log.WithField(logSubsysField, "rpc")})
|
||||
handler := endpoint.NewHandler(senderEP)
|
||||
if err := streamrpc.ServeConn(ctx, conn, STREAMRPC_CONFIG, handler.Handle); err != nil {
|
||||
task.Log().WithError(err).Error("error serving connection")
|
||||
log.WithError(err).Error("error serving connection")
|
||||
} else {
|
||||
task.Log().Info("client closed connection")
|
||||
log.Info("client closed connection")
|
||||
}
|
||||
|
||||
}
|
||||
|
151
cmd/control.go
151
cmd/control.go
@ -5,7 +5,6 @@ import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"github.com/dustin/go-humanize"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/spf13/cobra"
|
||||
"github.com/zrepl/zrepl/logger"
|
||||
@ -14,9 +13,6 @@ import (
|
||||
"net"
|
||||
"net/http"
|
||||
"os"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
var controlCmd = &cobra.Command{
|
||||
@ -67,20 +63,11 @@ var controlStatusCmdArgs struct {
|
||||
onlyShowJob string
|
||||
}
|
||||
|
||||
var controlStatusCmd = &cobra.Command{
|
||||
Use: "status [JOB_NAME]",
|
||||
Short: "get current status",
|
||||
Run: doControlStatusCmd,
|
||||
}
|
||||
|
||||
func init() {
|
||||
RootCmd.AddCommand(controlCmd)
|
||||
controlCmd.AddCommand(pprofCmd)
|
||||
controlCmd.AddCommand(controlVersionCmd)
|
||||
controlCmd.AddCommand(controlStatusCmd)
|
||||
controlStatusCmd.Flags().StringVar(&controlStatusCmdArgs.format, "format", "human", "output format (human|raw)")
|
||||
controlStatusCmdArgs.level = logger.Warn
|
||||
controlStatusCmd.Flags().Var(&controlStatusCmdArgs.level, "level", "minimum log level to show")
|
||||
}
|
||||
|
||||
func controlHttpClient() (client http.Client, err error) {
|
||||
@ -165,141 +152,3 @@ func doControLVersionCmd(cmd *cobra.Command, args []string) {
|
||||
fmt.Println(info.String())
|
||||
|
||||
}
|
||||
|
||||
func doControlStatusCmd(cmd *cobra.Command, args []string) {
|
||||
|
||||
log := golog.New(os.Stderr, "", 0)
|
||||
|
||||
die := func() {
|
||||
log.Print("exiting after error")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
if len(args) == 1 {
|
||||
controlStatusCmdArgs.onlyShowJob = args[0]
|
||||
} else if len(args) > 1 {
|
||||
log.Print("can only specify one job as positional argument")
|
||||
cmd.Usage()
|
||||
die()
|
||||
}
|
||||
|
||||
httpc, err := controlHttpClient()
|
||||
if err != nil {
|
||||
log.Printf("could not connect to daemon: %s", err)
|
||||
die()
|
||||
}
|
||||
|
||||
resp, err := httpc.Get("http://unix" + ControlJobEndpointStatus)
|
||||
if err != nil {
|
||||
log.Printf("error: %s", err)
|
||||
die()
|
||||
} else if resp.StatusCode != http.StatusOK {
|
||||
var msg bytes.Buffer
|
||||
io.CopyN(&msg, resp.Body, 4096)
|
||||
log.Printf("error: %s", msg.String())
|
||||
die()
|
||||
}
|
||||
|
||||
var status DaemonStatus
|
||||
err = json.NewDecoder(resp.Body).Decode(&status)
|
||||
if err != nil {
|
||||
log.Printf("error unmarshaling response: %s", err)
|
||||
die()
|
||||
}
|
||||
|
||||
switch controlStatusCmdArgs.format {
|
||||
case "raw":
|
||||
enc := json.NewEncoder(os.Stdout)
|
||||
enc.SetIndent("", " ")
|
||||
if err := enc.Encode(status); err != nil {
|
||||
log.Panic(err)
|
||||
}
|
||||
case "human":
|
||||
|
||||
formatter := HumanFormatter{}
|
||||
formatter.SetMetadataFlags(MetadataAll)
|
||||
formatter.SetIgnoreFields([]string{
|
||||
logJobField,
|
||||
})
|
||||
jobNames := make([]string, 0, len(status.Jobs))
|
||||
for name, _ := range status.Jobs {
|
||||
jobNames = append(jobNames, name)
|
||||
}
|
||||
sort.Slice(jobNames, func(i, j int) bool {
|
||||
return strings.Compare(jobNames[i], jobNames[j]) == -1
|
||||
})
|
||||
now := time.Now()
|
||||
for _, name := range jobNames {
|
||||
|
||||
if controlStatusCmdArgs.onlyShowJob != "" && name != controlStatusCmdArgs.onlyShowJob {
|
||||
continue
|
||||
}
|
||||
|
||||
job := status.Jobs[name]
|
||||
jobLogEntries := make([]logger.Entry, 0)
|
||||
informAboutError := false
|
||||
|
||||
fmt.Printf("Job '%s':\n", name)
|
||||
for _, task := range job.Tasks {
|
||||
|
||||
var header bytes.Buffer
|
||||
fmt.Fprintf(&header, " Task '%s': ", task.Name)
|
||||
if !task.Idle {
|
||||
fmt.Fprint(&header, strings.Join(task.ActivityStack, "."))
|
||||
} else {
|
||||
fmt.Fprint(&header, "<idle>")
|
||||
}
|
||||
fmt.Fprint(&header, " ")
|
||||
const TASK_STALLED_HOLDOFF_DURATION = 10 * time.Second
|
||||
sinceLastUpdate := now.Sub(task.LastUpdate)
|
||||
if !task.Idle || task.ProgressRx != 0 || task.ProgressTx != 0 {
|
||||
fmt.Fprintf(&header, "(%s / %s , Rx/Tx",
|
||||
humanize.Bytes(uint64(task.ProgressRx)),
|
||||
humanize.Bytes(uint64(task.ProgressTx)))
|
||||
if task.Idle {
|
||||
fmt.Fprint(&header, ", values from last run")
|
||||
}
|
||||
fmt.Fprint(&header, ")")
|
||||
}
|
||||
fmt.Fprint(&header, "\n")
|
||||
if !task.Idle && !task.LastUpdate.IsZero() && sinceLastUpdate >= TASK_STALLED_HOLDOFF_DURATION {
|
||||
informAboutError = true
|
||||
fmt.Fprintf(&header, " WARNING: last update %s ago at %s)",
|
||||
sinceLastUpdate.String(),
|
||||
task.LastUpdate.Format(HumanFormatterDateFormat))
|
||||
fmt.Fprint(&header, "\n")
|
||||
}
|
||||
io.Copy(os.Stdout, &header)
|
||||
|
||||
jobLogEntries = append(jobLogEntries, task.LogEntries...)
|
||||
informAboutError = informAboutError || task.MaxLogLevel >= logger.Warn
|
||||
}
|
||||
|
||||
sort.Slice(jobLogEntries, func(i, j int) bool {
|
||||
return jobLogEntries[i].Time.Before(jobLogEntries[j].Time)
|
||||
})
|
||||
if informAboutError {
|
||||
fmt.Println(" WARNING: Some tasks encountered problems since the last time they left idle state:")
|
||||
fmt.Println(" check the logs below or your log file for more information.")
|
||||
fmt.Println(" Use the --level flag if you need debug information.")
|
||||
fmt.Println()
|
||||
}
|
||||
for _, e := range jobLogEntries {
|
||||
if e.Level < controlStatusCmdArgs.level {
|
||||
continue
|
||||
}
|
||||
formatted, err := formatter.Format(&e)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
fmt.Printf(" %s\n", string(formatted))
|
||||
}
|
||||
fmt.Println()
|
||||
|
||||
}
|
||||
default:
|
||||
log.Printf("invalid output format '%s'", controlStatusCmdArgs.format)
|
||||
die()
|
||||
}
|
||||
|
||||
}
|
||||
|
318
cmd/daemon.go
318
cmd/daemon.go
@ -1,16 +1,13 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"container/list"
|
||||
"context"
|
||||
"fmt"
|
||||
"github.com/spf13/cobra"
|
||||
"github.com/zrepl/zrepl/logger"
|
||||
"io"
|
||||
"os"
|
||||
"os/signal"
|
||||
//"strings"
|
||||
"sync"
|
||||
|
||||
"syscall"
|
||||
"time"
|
||||
)
|
||||
@ -30,7 +27,6 @@ type Job interface {
|
||||
JobName() string
|
||||
JobType() JobType
|
||||
JobStart(ctxt context.Context)
|
||||
JobStatus(ctxt context.Context) (*JobStatus, error)
|
||||
}
|
||||
|
||||
type JobType string
|
||||
@ -155,315 +151,3 @@ outer:
|
||||
log.Info("exiting")
|
||||
|
||||
}
|
||||
|
||||
// Representation of a Job's status that is composed of Tasks
|
||||
type JobStatus struct {
|
||||
// Statuses of all tasks of this job
|
||||
Tasks []*TaskStatus
|
||||
// Error != "" if JobStatus() returned an error
|
||||
JobStatusError string
|
||||
}
|
||||
|
||||
// Representation of a Daemon's status that is composed of Jobs
|
||||
type DaemonStatus struct {
|
||||
StartedAt time.Time
|
||||
Jobs map[string]*JobStatus
|
||||
}
|
||||
|
||||
func (d *Daemon) Status() (s *DaemonStatus) {
|
||||
|
||||
s = &DaemonStatus{}
|
||||
s.StartedAt = d.startedAt
|
||||
|
||||
s.Jobs = make(map[string]*JobStatus, len(d.conf.Jobs))
|
||||
|
||||
for name, j := range d.conf.Jobs {
|
||||
status, err := j.JobStatus(context.TODO())
|
||||
if err != nil {
|
||||
s.Jobs[name] = &JobStatus{nil, err.Error()}
|
||||
continue
|
||||
}
|
||||
s.Jobs[name] = status
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// Representation of a Task's status
|
||||
type TaskStatus struct {
|
||||
Name string
|
||||
// Whether the task is idle.
|
||||
Idle bool
|
||||
// The stack of activities the task is currently executing.
|
||||
// The first element is the root activity and equal to Name.
|
||||
ActivityStack []string
|
||||
// Number of bytes received by the task since it last left idle state.
|
||||
ProgressRx int64
|
||||
// Number of bytes sent by the task since it last left idle state.
|
||||
ProgressTx int64
|
||||
// Log entries emitted by the task since it last left idle state.
|
||||
// Only contains the log entries emitted through the task's logger
|
||||
// (provided by Task.Log()).
|
||||
LogEntries []logger.Entry
|
||||
// The maximum log level of LogEntries.
|
||||
// Only valid if len(LogEntries) > 0.
|
||||
MaxLogLevel logger.Level
|
||||
// Last time something about the Task changed
|
||||
LastUpdate time.Time
|
||||
}
|
||||
|
||||
// An instance of Task tracks a single thread of activity that is part of a Job.
|
||||
type Task struct {
|
||||
name string // immutable
|
||||
parent Job // immutable
|
||||
|
||||
// Stack of activities the task is currently in
|
||||
// Members are instances of taskActivity
|
||||
activities *list.List
|
||||
// Last time activities was changed (not the activities inside, the list)
|
||||
activitiesLastUpdate time.Time
|
||||
// Protects Task members from modification
|
||||
rwl sync.RWMutex
|
||||
}
|
||||
|
||||
// Structure that describes the progress a Task has made
|
||||
type taskProgress struct {
|
||||
rx int64
|
||||
tx int64
|
||||
creation time.Time
|
||||
lastUpdate time.Time
|
||||
logEntries []logger.Entry
|
||||
mtx sync.RWMutex
|
||||
}
|
||||
|
||||
func newTaskProgress() (p *taskProgress) {
|
||||
return &taskProgress{
|
||||
creation: time.Now(),
|
||||
logEntries: make([]logger.Entry, 0),
|
||||
}
|
||||
}
|
||||
|
||||
func (p *taskProgress) UpdateIO(drx, dtx int64) {
|
||||
p.mtx.Lock()
|
||||
defer p.mtx.Unlock()
|
||||
p.rx += drx
|
||||
p.tx += dtx
|
||||
p.lastUpdate = time.Now()
|
||||
}
|
||||
|
||||
func (p *taskProgress) UpdateLogEntry(entry logger.Entry) {
|
||||
p.mtx.Lock()
|
||||
defer p.mtx.Unlock()
|
||||
// FIXME: ensure maximum size (issue #48)
|
||||
p.logEntries = append(p.logEntries, entry)
|
||||
p.lastUpdate = time.Now()
|
||||
}
|
||||
|
||||
func (p *taskProgress) DeepCopy() (out taskProgress) {
|
||||
p.mtx.RLock()
|
||||
defer p.mtx.RUnlock()
|
||||
out.rx, out.tx = p.rx, p.tx
|
||||
out.creation = p.creation
|
||||
out.lastUpdate = p.lastUpdate
|
||||
out.logEntries = make([]logger.Entry, len(p.logEntries))
|
||||
for i := range p.logEntries {
|
||||
out.logEntries[i] = p.logEntries[i]
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// returns a copy of this taskProgress, the mutex carries no semantic value
|
||||
func (p *taskProgress) Read() (out taskProgress) {
|
||||
p.mtx.RLock()
|
||||
defer p.mtx.RUnlock()
|
||||
return p.DeepCopy()
|
||||
}
|
||||
|
||||
// Element of a Task's activity stack
|
||||
type taskActivity struct {
|
||||
name string
|
||||
idle bool
|
||||
logger logger.Logger
|
||||
// The progress of the task that is updated by UpdateIO() and UpdateLogEntry()
|
||||
//
|
||||
// Progress happens on a task-level and is thus global to the task.
|
||||
// That's why progress is just a pointer to the current taskProgress:
|
||||
// we reset progress when leaving the idle root activity
|
||||
progress *taskProgress
|
||||
}
|
||||
|
||||
func NewTask(name string, parent Job, lg logger.Logger) *Task {
|
||||
t := &Task{
|
||||
name: name,
|
||||
parent: parent,
|
||||
activities: list.New(),
|
||||
}
|
||||
rootLogger := lg.ReplaceField(logTaskField, name).
|
||||
WithOutlet(t, logger.Debug)
|
||||
rootAct := &taskActivity{name, true, rootLogger, newTaskProgress()}
|
||||
t.activities.PushFront(rootAct)
|
||||
return t
|
||||
}
|
||||
|
||||
// callers must hold t.rwl
|
||||
func (t *Task) cur() *taskActivity {
|
||||
return t.activities.Front().Value.(*taskActivity)
|
||||
}
|
||||
|
||||
// buildActivityStack returns the stack of activity names
|
||||
// t.rwl must be held, but the slice can be returned since strings are immutable
|
||||
func (t *Task) buildActivityStack() []string {
|
||||
comps := make([]string, 0, t.activities.Len())
|
||||
for e := t.activities.Back(); e != nil; e = e.Prev() {
|
||||
act := e.Value.(*taskActivity)
|
||||
comps = append(comps, act.name)
|
||||
}
|
||||
return comps
|
||||
}
|
||||
|
||||
// Start a sub-activity.
|
||||
// Must always be matched with a call to t.Finish()
|
||||
// --- consider using defer for this purpose.
|
||||
func (t *Task) Enter(activity string) {
|
||||
t.rwl.Lock()
|
||||
defer t.rwl.Unlock()
|
||||
|
||||
prev := t.cur()
|
||||
if prev.idle {
|
||||
// reset progress when leaving idle task
|
||||
// we leave the old progress dangling to have the user not worry about
|
||||
prev.progress = newTaskProgress()
|
||||
|
||||
prom.taskLastActiveStart.WithLabelValues(
|
||||
t.parent.JobName(),
|
||||
t.parent.JobType().String(),
|
||||
t.name).
|
||||
Set(float64(prev.progress.creation.UnixNano()) / 1e9)
|
||||
|
||||
}
|
||||
act := &taskActivity{activity, false, nil, prev.progress}
|
||||
t.activities.PushFront(act)
|
||||
//stack := t.buildActivityStack()
|
||||
//activityField := strings.Join(stack, ".")
|
||||
act.logger = prev.logger
|
||||
// act.logger = prev.logger.ReplaceField(logTaskField, activityField)
|
||||
|
||||
t.activitiesLastUpdate = time.Now()
|
||||
}
|
||||
|
||||
func (t *Task) UpdateProgress(dtx, drx int64) {
|
||||
t.rwl.RLock()
|
||||
p := t.cur().progress // protected by own rwlock
|
||||
t.rwl.RUnlock()
|
||||
p.UpdateIO(dtx, drx)
|
||||
}
|
||||
|
||||
// Returns a wrapper io.Reader that updates this task's _current_ progress value.
|
||||
// Progress updates after this task resets its progress value are discarded.
|
||||
func (t *Task) ProgressUpdater(r io.Reader) *IOProgressUpdater {
|
||||
t.rwl.RLock()
|
||||
defer t.rwl.RUnlock()
|
||||
return &IOProgressUpdater{r, t.cur().progress}
|
||||
}
|
||||
|
||||
func (t *Task) Status() *TaskStatus {
|
||||
t.rwl.RLock()
|
||||
defer t.rwl.RUnlock()
|
||||
// NOTE
|
||||
// do not return any state in TaskStatus that is protected by t.rwl
|
||||
|
||||
cur := t.cur()
|
||||
stack := t.buildActivityStack()
|
||||
prog := cur.progress.Read()
|
||||
|
||||
var maxLevel logger.Level
|
||||
for _, entry := range prog.logEntries {
|
||||
if maxLevel < entry.Level {
|
||||
maxLevel = entry.Level
|
||||
}
|
||||
}
|
||||
|
||||
lastUpdate := prog.lastUpdate
|
||||
if lastUpdate.Before(t.activitiesLastUpdate) {
|
||||
lastUpdate = t.activitiesLastUpdate
|
||||
}
|
||||
|
||||
s := &TaskStatus{
|
||||
Name: stack[0],
|
||||
ActivityStack: stack,
|
||||
Idle: cur.idle,
|
||||
ProgressRx: prog.rx,
|
||||
ProgressTx: prog.tx,
|
||||
LogEntries: prog.logEntries,
|
||||
MaxLogLevel: maxLevel,
|
||||
LastUpdate: lastUpdate,
|
||||
}
|
||||
|
||||
return s
|
||||
}
|
||||
|
||||
// Finish a sub-activity.
|
||||
// Corresponds to a preceding call to t.Enter()
|
||||
func (t *Task) Finish() {
|
||||
t.rwl.Lock()
|
||||
defer t.rwl.Unlock()
|
||||
top := t.activities.Front()
|
||||
if top.Next() == nil {
|
||||
return // cannot remove root activity
|
||||
}
|
||||
t.activities.Remove(top)
|
||||
t.activitiesLastUpdate = time.Now()
|
||||
|
||||
// prometheus
|
||||
front := t.activities.Front()
|
||||
if front != nil && front == t.activities.Back() {
|
||||
idleAct := front.Value.(*taskActivity)
|
||||
if !idleAct.idle {
|
||||
panic("inconsistent implementation")
|
||||
}
|
||||
progress := idleAct.progress.Read()
|
||||
non_idle_time := t.activitiesLastUpdate.Sub(progress.creation) // use same time
|
||||
prom.taskLastActiveDuration.WithLabelValues(
|
||||
t.parent.JobName(),
|
||||
t.parent.JobType().String(),
|
||||
t.name).Set(non_idle_time.Seconds())
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Returns a logger derived from the logger passed to the constructor function.
|
||||
// The logger's task field contains the current activity stack joined by '.'.
|
||||
func (t *Task) Log() logger.Logger {
|
||||
t.rwl.RLock()
|
||||
defer t.rwl.RUnlock()
|
||||
// FIXME should influence TaskStatus's LastUpdate field
|
||||
return t.cur().logger
|
||||
}
|
||||
|
||||
// implement logger.Outlet interface
|
||||
func (t *Task) WriteEntry(entry logger.Entry) error {
|
||||
t.rwl.RLock()
|
||||
defer t.rwl.RUnlock()
|
||||
t.cur().progress.UpdateLogEntry(entry)
|
||||
|
||||
prom.taskLogEntries.WithLabelValues(
|
||||
t.parent.JobName(),
|
||||
t.parent.JobType().String(),
|
||||
t.name,
|
||||
entry.Level.String()).
|
||||
Inc()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
type IOProgressUpdater struct {
|
||||
r io.Reader
|
||||
p *taskProgress
|
||||
}
|
||||
|
||||
func (u *IOProgressUpdater) Read(p []byte) (n int, err error) {
|
||||
n, err = u.r.Read(p)
|
||||
u.p.UpdateIO(int64(n), 0)
|
||||
return
|
||||
|
||||
}
|
||||
|
36
cmd/prune.go
36
cmd/prune.go
@ -8,7 +8,6 @@ import (
|
||||
)
|
||||
|
||||
type Pruner struct {
|
||||
task *Task
|
||||
Now time.Time
|
||||
DryRun bool
|
||||
DatasetFilter zfs.DatasetFilter
|
||||
@ -23,25 +22,21 @@ type PruneResult struct {
|
||||
Remove []zfs.FilesystemVersion
|
||||
}
|
||||
|
||||
func (p *Pruner) filterFilesystems() (filesystems []*zfs.DatasetPath, stop bool) {
|
||||
p.task.Enter("filter_fs")
|
||||
defer p.task.Finish()
|
||||
func (p *Pruner) filterFilesystems(ctx context.Context) (filesystems []*zfs.DatasetPath, stop bool) {
|
||||
filesystems, err := zfs.ZFSListMapping(p.DatasetFilter)
|
||||
if err != nil {
|
||||
p.task.Log().WithError(err).Error("error applying filesystem filter")
|
||||
getLogger(ctx).WithError(err).Error("error applying filesystem filter")
|
||||
return nil, true
|
||||
}
|
||||
if len(filesystems) <= 0 {
|
||||
p.task.Log().Info("no filesystems matching filter")
|
||||
getLogger(ctx).Info("no filesystems matching filter")
|
||||
return nil, true
|
||||
}
|
||||
return filesystems, false
|
||||
}
|
||||
|
||||
func (p *Pruner) filterVersions(fs *zfs.DatasetPath) (fsversions []zfs.FilesystemVersion, stop bool) {
|
||||
p.task.Enter("filter_versions")
|
||||
defer p.task.Finish()
|
||||
log := p.task.Log().WithField("fs", fs.ToString())
|
||||
func (p *Pruner) filterVersions(ctx context.Context, fs *zfs.DatasetPath) (fsversions []zfs.FilesystemVersion, stop bool) {
|
||||
log := getLogger(ctx).WithField("fs", fs.ToString())
|
||||
|
||||
filter := NewPrefixFilter(p.SnapshotPrefix)
|
||||
fsversions, err := zfs.ZFSListFilesystemVersions(fs, filter)
|
||||
@ -56,19 +51,15 @@ func (p *Pruner) filterVersions(fs *zfs.DatasetPath) (fsversions []zfs.Filesyste
|
||||
return fsversions, false
|
||||
}
|
||||
|
||||
func (p *Pruner) pruneFilesystem(fs *zfs.DatasetPath) (r PruneResult, valid bool) {
|
||||
p.task.Enter("prune_fs")
|
||||
defer p.task.Finish()
|
||||
log := p.task.Log().WithField("fs", fs.ToString())
|
||||
func (p *Pruner) pruneFilesystem(ctx context.Context, fs *zfs.DatasetPath) (r PruneResult, valid bool) {
|
||||
log := getLogger(ctx).WithField("fs", fs.ToString())
|
||||
|
||||
fsversions, stop := p.filterVersions(fs)
|
||||
fsversions, stop := p.filterVersions(ctx, fs)
|
||||
if stop {
|
||||
return
|
||||
}
|
||||
|
||||
p.task.Enter("prune_policy")
|
||||
keep, remove, err := p.PrunePolicy.Prune(fs, fsversions)
|
||||
p.task.Finish()
|
||||
if err != nil {
|
||||
log.WithError(err).Error("error evaluating prune policy")
|
||||
return
|
||||
@ -100,9 +91,7 @@ func (p *Pruner) pruneFilesystem(fs *zfs.DatasetPath) (r PruneResult, valid bool
|
||||
// TODO special handling for EBUSY (zfs hold)
|
||||
// TODO error handling for clones? just echo to cli, skip over, and exit with non-zero status code (we're idempotent)
|
||||
if !p.DryRun {
|
||||
p.task.Enter("destroy")
|
||||
err := zfs.ZFSDestroyFilesystemVersion(fs, v)
|
||||
p.task.Finish()
|
||||
if err != nil {
|
||||
log.WithFields(fields).WithError(err).Error("error destroying version")
|
||||
}
|
||||
@ -112,14 +101,11 @@ func (p *Pruner) pruneFilesystem(fs *zfs.DatasetPath) (r PruneResult, valid bool
|
||||
}
|
||||
|
||||
func (p *Pruner) Run(ctx context.Context) (r []PruneResult, err error) {
|
||||
p.task.Enter("run")
|
||||
defer p.task.Finish()
|
||||
|
||||
if p.DryRun {
|
||||
p.task.Log().Info("doing dry run")
|
||||
getLogger(ctx).Info("doing dry run")
|
||||
}
|
||||
|
||||
filesystems, stop := p.filterFilesystems()
|
||||
filesystems, stop := p.filterFilesystems(ctx)
|
||||
if stop {
|
||||
return
|
||||
}
|
||||
@ -127,7 +113,7 @@ func (p *Pruner) Run(ctx context.Context) (r []PruneResult, err error) {
|
||||
r = make([]PruneResult, 0, len(filesystems))
|
||||
|
||||
for _, fs := range filesystems {
|
||||
res, ok := p.pruneFilesystem(fs)
|
||||
res, ok := p.pruneFilesystem(ctx, fs)
|
||||
if ok {
|
||||
r = append(r, res)
|
||||
}
|
||||
|
@ -170,8 +170,7 @@ func doTestPrunePolicy(cmd *cobra.Command, args []string) {
|
||||
|
||||
log.Printf("job dump:\n%s", pretty.Sprint(jobp))
|
||||
|
||||
task := NewTask("", jobi, log)
|
||||
pruner, err := jobp.Pruner(task, testPrunePolicyArgs.side, true)
|
||||
pruner, err := jobp.Pruner(testPrunePolicyArgs.side, true)
|
||||
if err != nil {
|
||||
log.Printf("cannot create test pruner: %s", err)
|
||||
os.Exit(1)
|
||||
|
Loading…
Reference in New Issue
Block a user