mirror of
https://github.com/zrepl/zrepl.git
synced 2024-12-31 11:28:56 +01:00
WIP: zrepl wait active JOB INVOCATION_ID WHAT (callback-based implementation)
This commit is contained in:
parent
5c6d69a69c
commit
68b895d0bc
@ -73,10 +73,11 @@ func (j *controlJob) RegisterMetrics(registerer prometheus.Registerer) {
|
||||
}
|
||||
|
||||
const (
|
||||
ControlJobEndpointPProf string = "/debug/pprof"
|
||||
ControlJobEndpointVersion string = "/version"
|
||||
ControlJobEndpointStatus string = "/status"
|
||||
ControlJobEndpointSignal string = "/signal"
|
||||
ControlJobEndpointPProf string = "/debug/pprof"
|
||||
ControlJobEndpointVersion string = "/version"
|
||||
ControlJobEndpointStatus string = "/status"
|
||||
ControlJobEndpointSignal string = "/signal"
|
||||
ControlJobEndpointWaitActive string = "/wait/active"
|
||||
)
|
||||
|
||||
func (j *controlJob) Run(ctx context.Context) {
|
||||
@ -130,6 +131,52 @@ func (j *controlJob) Run(ctx context.Context) {
|
||||
return s, nil
|
||||
}})
|
||||
|
||||
mux.Handle(ControlJobEndpointWaitActive, requestLogger{log: log, handler: jsonRequestResponder{log, func(decoder jsonDecoder) (v interface{}, err error) {
|
||||
type reqT struct {
|
||||
Job string
|
||||
InvocationId uint64
|
||||
What string
|
||||
}
|
||||
var req reqT
|
||||
if decoder(&req) != nil {
|
||||
return nil, errors.Errorf("decode failed")
|
||||
}
|
||||
|
||||
j.jobs.m.RLock()
|
||||
|
||||
jo, ok := j.jobs.jobs[req.Job]
|
||||
if !ok {
|
||||
j.jobs.m.RUnlock()
|
||||
return struct{}{}, fmt.Errorf("unknown job name %q", req.Job)
|
||||
}
|
||||
|
||||
ajo, ok := jo.(*job.ActiveSide)
|
||||
if !ok {
|
||||
v, err = struct{}{}, fmt.Errorf("job %q is not an active side (it's a %T)", jo.Name(), jo)
|
||||
j.jobs.m.RUnlock()
|
||||
return v, err
|
||||
}
|
||||
|
||||
cbCalled := make(chan struct{})
|
||||
err = ajo.AddActiveSideWaiter(req.InvocationId, req.What, func() {
|
||||
log.WithField("request", req).Debug("active side waiter done")
|
||||
close(cbCalled)
|
||||
})
|
||||
|
||||
j.jobs.m.RUnlock() // unlock before waiting!
|
||||
|
||||
if err != nil {
|
||||
return struct{}{}, err
|
||||
}
|
||||
|
||||
select {
|
||||
// TODO ctx with timeout!
|
||||
case <-cbCalled:
|
||||
return struct{}{}, nil
|
||||
}
|
||||
|
||||
}}})
|
||||
|
||||
mux.Handle(ControlJobEndpointSignal,
|
||||
requestLogger{log: log, handler: jsonRequestResponder{log, func(decoder jsonDecoder) (interface{}, error) {
|
||||
type reqT struct {
|
||||
@ -155,6 +202,7 @@ func (j *controlJob) Run(ctx context.Context) {
|
||||
|
||||
return struct{}{}, err
|
||||
}}})
|
||||
|
||||
server := http.Server{
|
||||
Handler: mux,
|
||||
// control socket is local, 1s timeout should be more than sufficient, even on a loaded system
|
||||
|
@ -43,8 +43,10 @@ type ActiveSide struct {
|
||||
promBytesReplicated *prometheus.CounterVec // labels: filesystem
|
||||
promReplicationErrors prometheus.Gauge
|
||||
|
||||
tasksMtx sync.Mutex
|
||||
tasks activeSideTasks
|
||||
tasksMtx sync.Mutex
|
||||
tasks activeSideTasks
|
||||
activeInvocationId uint64 // 0 <=> inactive
|
||||
doneWaiters, nextWaiters []func()
|
||||
}
|
||||
|
||||
//go:generate enumer -type=ActiveSideState
|
||||
@ -433,6 +435,7 @@ func (j *ActiveSide) Run(ctx context.Context) {
|
||||
go j.mode.RunPeriodic(periodicCtx, periodicDone)
|
||||
|
||||
invocationCount := 0
|
||||
|
||||
outer:
|
||||
for {
|
||||
log.Info("wait for replications")
|
||||
@ -446,12 +449,59 @@ outer:
|
||||
case <-periodicDone:
|
||||
}
|
||||
invocationCount++
|
||||
|
||||
j.tasksMtx.Lock()
|
||||
j.activeInvocationId = uint64(invocationCount)
|
||||
j.doneWaiters = j.nextWaiters
|
||||
j.nextWaiters = nil
|
||||
j.tasksMtx.Unlock()
|
||||
|
||||
invocationCtx, endSpan := trace.WithSpan(ctx, fmt.Sprintf("invocation-%d", invocationCount))
|
||||
j.do(invocationCtx)
|
||||
|
||||
j.tasksMtx.Lock()
|
||||
j.activeInvocationId = 0
|
||||
for _, f := range j.doneWaiters {
|
||||
go f()
|
||||
}
|
||||
j.doneWaiters = nil
|
||||
j.tasksMtx.Unlock()
|
||||
|
||||
endSpan()
|
||||
}
|
||||
}
|
||||
|
||||
type AddActiveSideWaiterRequest struct {
|
||||
InvocationId uint64
|
||||
What string
|
||||
}
|
||||
|
||||
func (j *ActiveSide) AddActiveSideWaiter(invocationId uint64, what string, cb func()) error {
|
||||
j.tasksMtx.Lock()
|
||||
defer j.tasksMtx.Unlock()
|
||||
|
||||
var targetQueue *[]func()
|
||||
if invocationId == 0 {
|
||||
if j.activeInvocationId != 0 {
|
||||
targetQueue = &j.doneWaiters
|
||||
} else {
|
||||
targetQueue = &j.nextWaiters
|
||||
}
|
||||
} else if j.activeInvocationId == invocationId {
|
||||
targetQueue = &j.nextWaiters
|
||||
} else {
|
||||
return fmt.Errorf("invocation %d is not the current invocation, current invocation is %d (0 means no active invocation); pass id '0' to wait for the next invocation", invocationId, j.activeInvocationId)
|
||||
}
|
||||
|
||||
switch what {
|
||||
case "invocation-done":
|
||||
*targetQueue = append(*targetQueue, cb)
|
||||
default:
|
||||
return fmt.Errorf("unknown wait target %q", what)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (j *ActiveSide) do(ctx context.Context) {
|
||||
|
||||
j.mode.ConnectEndpoints(ctx, j.connecter)
|
||||
@ -492,7 +542,6 @@ func (j *ActiveSide) do(ctx context.Context) {
|
||||
GetLogger(ctx).Info("start replication")
|
||||
repWait(true) // wait blocking
|
||||
repCancel() // always cancel to free up context resources
|
||||
|
||||
replicationReport := j.tasks.replicationReport()
|
||||
j.promReplicationErrors.Set(float64(replicationReport.GetFailedFilesystemsCountInLatestAttempt()))
|
||||
|
||||
|
@ -19,6 +19,7 @@ func GetLogger(ctx context.Context) Logger {
|
||||
return logging.GetLogger(ctx, logging.SubsysJob)
|
||||
}
|
||||
|
||||
|
||||
type Job interface {
|
||||
Name() string
|
||||
Run(ctx context.Context)
|
||||
|
1
main.go
1
main.go
@ -12,6 +12,7 @@ func init() {
|
||||
cli.AddSubcommand(daemon.DaemonCmd)
|
||||
cli.AddSubcommand(status.Subcommand)
|
||||
cli.AddSubcommand(client.SignalCmd)
|
||||
cli.AddSubcommand(client.WaitCmd)
|
||||
cli.AddSubcommand(client.StdinserverCmd)
|
||||
cli.AddSubcommand(client.ConfigcheckCmd)
|
||||
cli.AddSubcommand(client.VersionCmd)
|
||||
|
Loading…
Reference in New Issue
Block a user