mirror of
https://github.com/zrepl/zrepl.git
synced 2025-01-05 13:59:06 +01:00
WIP: zrepl wait active JOB INVOCATION_ID WHAT (callback-based implementation)
This commit is contained in:
parent
5c6d69a69c
commit
68b895d0bc
@ -73,10 +73,11 @@ func (j *controlJob) RegisterMetrics(registerer prometheus.Registerer) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const (
|
const (
|
||||||
ControlJobEndpointPProf string = "/debug/pprof"
|
ControlJobEndpointPProf string = "/debug/pprof"
|
||||||
ControlJobEndpointVersion string = "/version"
|
ControlJobEndpointVersion string = "/version"
|
||||||
ControlJobEndpointStatus string = "/status"
|
ControlJobEndpointStatus string = "/status"
|
||||||
ControlJobEndpointSignal string = "/signal"
|
ControlJobEndpointSignal string = "/signal"
|
||||||
|
ControlJobEndpointWaitActive string = "/wait/active"
|
||||||
)
|
)
|
||||||
|
|
||||||
func (j *controlJob) Run(ctx context.Context) {
|
func (j *controlJob) Run(ctx context.Context) {
|
||||||
@ -130,6 +131,52 @@ func (j *controlJob) Run(ctx context.Context) {
|
|||||||
return s, nil
|
return s, nil
|
||||||
}})
|
}})
|
||||||
|
|
||||||
|
mux.Handle(ControlJobEndpointWaitActive, requestLogger{log: log, handler: jsonRequestResponder{log, func(decoder jsonDecoder) (v interface{}, err error) {
|
||||||
|
type reqT struct {
|
||||||
|
Job string
|
||||||
|
InvocationId uint64
|
||||||
|
What string
|
||||||
|
}
|
||||||
|
var req reqT
|
||||||
|
if decoder(&req) != nil {
|
||||||
|
return nil, errors.Errorf("decode failed")
|
||||||
|
}
|
||||||
|
|
||||||
|
j.jobs.m.RLock()
|
||||||
|
|
||||||
|
jo, ok := j.jobs.jobs[req.Job]
|
||||||
|
if !ok {
|
||||||
|
j.jobs.m.RUnlock()
|
||||||
|
return struct{}{}, fmt.Errorf("unknown job name %q", req.Job)
|
||||||
|
}
|
||||||
|
|
||||||
|
ajo, ok := jo.(*job.ActiveSide)
|
||||||
|
if !ok {
|
||||||
|
v, err = struct{}{}, fmt.Errorf("job %q is not an active side (it's a %T)", jo.Name(), jo)
|
||||||
|
j.jobs.m.RUnlock()
|
||||||
|
return v, err
|
||||||
|
}
|
||||||
|
|
||||||
|
cbCalled := make(chan struct{})
|
||||||
|
err = ajo.AddActiveSideWaiter(req.InvocationId, req.What, func() {
|
||||||
|
log.WithField("request", req).Debug("active side waiter done")
|
||||||
|
close(cbCalled)
|
||||||
|
})
|
||||||
|
|
||||||
|
j.jobs.m.RUnlock() // unlock before waiting!
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return struct{}{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
select {
|
||||||
|
// TODO ctx with timeout!
|
||||||
|
case <-cbCalled:
|
||||||
|
return struct{}{}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
}}})
|
||||||
|
|
||||||
mux.Handle(ControlJobEndpointSignal,
|
mux.Handle(ControlJobEndpointSignal,
|
||||||
requestLogger{log: log, handler: jsonRequestResponder{log, func(decoder jsonDecoder) (interface{}, error) {
|
requestLogger{log: log, handler: jsonRequestResponder{log, func(decoder jsonDecoder) (interface{}, error) {
|
||||||
type reqT struct {
|
type reqT struct {
|
||||||
@ -155,6 +202,7 @@ func (j *controlJob) Run(ctx context.Context) {
|
|||||||
|
|
||||||
return struct{}{}, err
|
return struct{}{}, err
|
||||||
}}})
|
}}})
|
||||||
|
|
||||||
server := http.Server{
|
server := http.Server{
|
||||||
Handler: mux,
|
Handler: mux,
|
||||||
// control socket is local, 1s timeout should be more than sufficient, even on a loaded system
|
// control socket is local, 1s timeout should be more than sufficient, even on a loaded system
|
||||||
|
@ -43,8 +43,10 @@ type ActiveSide struct {
|
|||||||
promBytesReplicated *prometheus.CounterVec // labels: filesystem
|
promBytesReplicated *prometheus.CounterVec // labels: filesystem
|
||||||
promReplicationErrors prometheus.Gauge
|
promReplicationErrors prometheus.Gauge
|
||||||
|
|
||||||
tasksMtx sync.Mutex
|
tasksMtx sync.Mutex
|
||||||
tasks activeSideTasks
|
tasks activeSideTasks
|
||||||
|
activeInvocationId uint64 // 0 <=> inactive
|
||||||
|
doneWaiters, nextWaiters []func()
|
||||||
}
|
}
|
||||||
|
|
||||||
//go:generate enumer -type=ActiveSideState
|
//go:generate enumer -type=ActiveSideState
|
||||||
@ -433,6 +435,7 @@ func (j *ActiveSide) Run(ctx context.Context) {
|
|||||||
go j.mode.RunPeriodic(periodicCtx, periodicDone)
|
go j.mode.RunPeriodic(periodicCtx, periodicDone)
|
||||||
|
|
||||||
invocationCount := 0
|
invocationCount := 0
|
||||||
|
|
||||||
outer:
|
outer:
|
||||||
for {
|
for {
|
||||||
log.Info("wait for replications")
|
log.Info("wait for replications")
|
||||||
@ -446,12 +449,59 @@ outer:
|
|||||||
case <-periodicDone:
|
case <-periodicDone:
|
||||||
}
|
}
|
||||||
invocationCount++
|
invocationCount++
|
||||||
|
|
||||||
|
j.tasksMtx.Lock()
|
||||||
|
j.activeInvocationId = uint64(invocationCount)
|
||||||
|
j.doneWaiters = j.nextWaiters
|
||||||
|
j.nextWaiters = nil
|
||||||
|
j.tasksMtx.Unlock()
|
||||||
|
|
||||||
invocationCtx, endSpan := trace.WithSpan(ctx, fmt.Sprintf("invocation-%d", invocationCount))
|
invocationCtx, endSpan := trace.WithSpan(ctx, fmt.Sprintf("invocation-%d", invocationCount))
|
||||||
j.do(invocationCtx)
|
j.do(invocationCtx)
|
||||||
|
|
||||||
|
j.tasksMtx.Lock()
|
||||||
|
j.activeInvocationId = 0
|
||||||
|
for _, f := range j.doneWaiters {
|
||||||
|
go f()
|
||||||
|
}
|
||||||
|
j.doneWaiters = nil
|
||||||
|
j.tasksMtx.Unlock()
|
||||||
|
|
||||||
endSpan()
|
endSpan()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type AddActiveSideWaiterRequest struct {
|
||||||
|
InvocationId uint64
|
||||||
|
What string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (j *ActiveSide) AddActiveSideWaiter(invocationId uint64, what string, cb func()) error {
|
||||||
|
j.tasksMtx.Lock()
|
||||||
|
defer j.tasksMtx.Unlock()
|
||||||
|
|
||||||
|
var targetQueue *[]func()
|
||||||
|
if invocationId == 0 {
|
||||||
|
if j.activeInvocationId != 0 {
|
||||||
|
targetQueue = &j.doneWaiters
|
||||||
|
} else {
|
||||||
|
targetQueue = &j.nextWaiters
|
||||||
|
}
|
||||||
|
} else if j.activeInvocationId == invocationId {
|
||||||
|
targetQueue = &j.nextWaiters
|
||||||
|
} else {
|
||||||
|
return fmt.Errorf("invocation %d is not the current invocation, current invocation is %d (0 means no active invocation); pass id '0' to wait for the next invocation", invocationId, j.activeInvocationId)
|
||||||
|
}
|
||||||
|
|
||||||
|
switch what {
|
||||||
|
case "invocation-done":
|
||||||
|
*targetQueue = append(*targetQueue, cb)
|
||||||
|
default:
|
||||||
|
return fmt.Errorf("unknown wait target %q", what)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func (j *ActiveSide) do(ctx context.Context) {
|
func (j *ActiveSide) do(ctx context.Context) {
|
||||||
|
|
||||||
j.mode.ConnectEndpoints(ctx, j.connecter)
|
j.mode.ConnectEndpoints(ctx, j.connecter)
|
||||||
@ -492,7 +542,6 @@ func (j *ActiveSide) do(ctx context.Context) {
|
|||||||
GetLogger(ctx).Info("start replication")
|
GetLogger(ctx).Info("start replication")
|
||||||
repWait(true) // wait blocking
|
repWait(true) // wait blocking
|
||||||
repCancel() // always cancel to free up context resources
|
repCancel() // always cancel to free up context resources
|
||||||
|
|
||||||
replicationReport := j.tasks.replicationReport()
|
replicationReport := j.tasks.replicationReport()
|
||||||
j.promReplicationErrors.Set(float64(replicationReport.GetFailedFilesystemsCountInLatestAttempt()))
|
j.promReplicationErrors.Set(float64(replicationReport.GetFailedFilesystemsCountInLatestAttempt()))
|
||||||
|
|
||||||
|
@ -19,6 +19,7 @@ func GetLogger(ctx context.Context) Logger {
|
|||||||
return logging.GetLogger(ctx, logging.SubsysJob)
|
return logging.GetLogger(ctx, logging.SubsysJob)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
type Job interface {
|
type Job interface {
|
||||||
Name() string
|
Name() string
|
||||||
Run(ctx context.Context)
|
Run(ctx context.Context)
|
||||||
|
1
main.go
1
main.go
@ -12,6 +12,7 @@ func init() {
|
|||||||
cli.AddSubcommand(daemon.DaemonCmd)
|
cli.AddSubcommand(daemon.DaemonCmd)
|
||||||
cli.AddSubcommand(status.Subcommand)
|
cli.AddSubcommand(status.Subcommand)
|
||||||
cli.AddSubcommand(client.SignalCmd)
|
cli.AddSubcommand(client.SignalCmd)
|
||||||
|
cli.AddSubcommand(client.WaitCmd)
|
||||||
cli.AddSubcommand(client.StdinserverCmd)
|
cli.AddSubcommand(client.StdinserverCmd)
|
||||||
cli.AddSubcommand(client.ConfigcheckCmd)
|
cli.AddSubcommand(client.ConfigcheckCmd)
|
||||||
cli.AddSubcommand(client.VersionCmd)
|
cli.AddSubcommand(client.VersionCmd)
|
||||||
|
Loading…
Reference in New Issue
Block a user