mirror of
https://github.com/zrepl/zrepl.git
synced 2024-12-22 15:11:16 +01:00
replication: refactor driving logic (no more explicit state machine)
This commit is contained in:
parent
0230c6321f
commit
07b43bffa4
38
Gopkg.lock
generated
38
Gopkg.lock
generated
@ -161,6 +161,14 @@
|
||||
revision = "3247c84500bff8d9fb6d579d800f20b3e091582c"
|
||||
version = "v1.0.0"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:4ff67dde814694496d7aa31be44b900f9717a10c8bc9136b13f49c8ef97f439a"
|
||||
name = "github.com/montanaflynn/stats"
|
||||
packages = ["."]
|
||||
pruneopts = ""
|
||||
revision = "63fbb2597b7a13043b453a4b819945badb8f8926"
|
||||
version = "v0.5.0"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
digest = "1:f60ff065b58bd53e641112b38bbda9d2684deb828393c7ffb89c69a1ee301d17"
|
||||
@ -245,6 +253,14 @@
|
||||
pruneopts = ""
|
||||
revision = "8b1c2da0d56deffdbb9e48d4414b4e674bd8083e"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:3962f553b77bf6c03fc07cd687a22dd3b00fe11aa14d31194f5505f5bb65cdc8"
|
||||
name = "github.com/sergi/go-diff"
|
||||
packages = ["diffmatchpatch"]
|
||||
pruneopts = ""
|
||||
revision = "1744e2970ca51c86172c8190fadad617561ed6e7"
|
||||
version = "v1.0.0"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
digest = "1:146327ce93be37e68bd3ff8541090d96da8cb3adc9e35d57570e9170a29f6bf6"
|
||||
@ -280,6 +296,25 @@
|
||||
revision = "93babf24513d0e8277635da8169fcc5a46ae3f6a"
|
||||
version = "v1.11.0"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:529ed3f98838f69e13761788d0cc71b44e130058fab13bae2ce09f7a176bced4"
|
||||
name = "github.com/yudai/gojsondiff"
|
||||
packages = [
|
||||
".",
|
||||
"formatter",
|
||||
]
|
||||
pruneopts = ""
|
||||
revision = "7b1b7adf999dab73a6eb02669c3d82dbb27a3dd6"
|
||||
version = "1.0.0"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
digest = "1:9857bb2293f372b2181004d8b62179bbdb4ab0982ec6f762abe6cf2bfedaff85"
|
||||
name = "github.com/yudai/golcs"
|
||||
packages = ["."]
|
||||
pruneopts = ""
|
||||
revision = "ecda9a501e8220fae3b4b600c3db4b0ba22cfc68"
|
||||
|
||||
[[projects]]
|
||||
branch = "v2"
|
||||
digest = "1:6b8a6afafde7ed31cd0c577ba40d88ce39e8f1c5eb76d7836be7d5b74f1c534a"
|
||||
@ -406,6 +441,7 @@
|
||||
"github.com/jinzhu/copier",
|
||||
"github.com/kr/pretty",
|
||||
"github.com/mattn/go-isatty",
|
||||
"github.com/montanaflynn/stats",
|
||||
"github.com/pkg/errors",
|
||||
"github.com/pkg/profile",
|
||||
"github.com/problame/go-netssh",
|
||||
@ -415,6 +451,8 @@
|
||||
"github.com/spf13/pflag",
|
||||
"github.com/stretchr/testify/assert",
|
||||
"github.com/stretchr/testify/require",
|
||||
"github.com/yudai/gojsondiff",
|
||||
"github.com/yudai/gojsondiff/formatter",
|
||||
"github.com/zrepl/yaml-config",
|
||||
"golang.org/x/net/context",
|
||||
"golang.org/x/sys/unix",
|
||||
|
2
Makefile
2
Makefile
@ -27,7 +27,7 @@ vendordeps:
|
||||
dep ensure -v -vendor-only
|
||||
|
||||
generate: #not part of the build, must do that manually
|
||||
protoc -I=replication/pdu --go_out=plugins=grpc:replication/pdu replication/pdu/pdu.proto
|
||||
protoc -I=replication/logic/pdu --go_out=plugins=grpc:replication/logic/pdu replication/logic/pdu/pdu.proto
|
||||
go generate -x ./...
|
||||
|
||||
build:
|
||||
|
157
client/status.go
157
client/status.go
@ -10,8 +10,7 @@ import (
|
||||
"github.com/zrepl/zrepl/daemon"
|
||||
"github.com/zrepl/zrepl/daemon/job"
|
||||
"github.com/zrepl/zrepl/daemon/pruner"
|
||||
"github.com/zrepl/zrepl/replication"
|
||||
"github.com/zrepl/zrepl/replication/fsrep"
|
||||
"github.com/zrepl/zrepl/replication/report"
|
||||
"io"
|
||||
"math"
|
||||
"net/http"
|
||||
@ -342,74 +341,58 @@ func (t *tui) draw() {
|
||||
termbox.Flush()
|
||||
}
|
||||
|
||||
func (t *tui) renderReplicationReport(rep *replication.Report, history *bytesProgressHistory) {
|
||||
func (t *tui) renderReplicationReport(rep *report.Report, history *bytesProgressHistory) {
|
||||
if rep == nil {
|
||||
t.printf("...\n")
|
||||
return
|
||||
}
|
||||
|
||||
all := make([]*fsrep.Report, 0, len(rep.Completed)+len(rep.Pending) + 1)
|
||||
all = append(all, rep.Completed...)
|
||||
all = append(all, rep.Pending...)
|
||||
if rep.Active != nil {
|
||||
all = append(all, rep.Active)
|
||||
}
|
||||
sort.Slice(all, func(i, j int) bool {
|
||||
return all[i].Filesystem < all[j].Filesystem
|
||||
})
|
||||
|
||||
state, err := replication.StateString(rep.Status)
|
||||
if err != nil {
|
||||
t.printf("Status: %q (parse error: %q)\n", rep.Status, err)
|
||||
// TODO visualize more than the latest attempt by folding all attempts into one
|
||||
if len(rep.Attempts) == 0 {
|
||||
t.printf("no attempts made yet")
|
||||
return
|
||||
}
|
||||
|
||||
t.printf("Status: %s", state)
|
||||
latest := rep.Attempts[len(rep.Attempts)-1]
|
||||
sort.Slice(latest.Filesystems, func(i, j int) bool {
|
||||
return latest.Filesystems[i].Info.Name < latest.Filesystems[j].Info.Name
|
||||
})
|
||||
|
||||
t.printf("Status: %s", latest.State)
|
||||
t.newline()
|
||||
if rep.Problem != "" {
|
||||
if latest.State == report.AttemptPlanningError {
|
||||
t.printf("Problem: ")
|
||||
t.printfDrawIndentedAndWrappedIfMultiline("%s", rep.Problem)
|
||||
t.printfDrawIndentedAndWrappedIfMultiline("%s", latest.PlanError)
|
||||
t.newline()
|
||||
} else if latest.State == report.AttemptFanOutError {
|
||||
t.printf("Problem: one or more of the filesystems encountered errors")
|
||||
t.newline()
|
||||
}
|
||||
if rep.SleepUntil.After(time.Now()) && !state.IsTerminal() {
|
||||
t.printf("Sleeping until %s (%s left)\n", rep.SleepUntil, rep.SleepUntil.Sub(time.Now()))
|
||||
}
|
||||
|
||||
if state != replication.Planning && state != replication.PlanningError {
|
||||
// TODO report sleep time between retry attempts once that is implemented
|
||||
|
||||
if latest.State != report.AttemptPlanning && latest.State != report.AttemptPlanningError {
|
||||
// Draw global progress bar
|
||||
// Progress: [---------------]
|
||||
sumUpFSRep := func(rep *fsrep.Report) (transferred, total int64) {
|
||||
for _, s := range rep.Pending {
|
||||
transferred += s.Bytes
|
||||
total += s.ExpectedBytes
|
||||
}
|
||||
for _, s := range rep.Completed {
|
||||
transferred += s.Bytes
|
||||
total += s.ExpectedBytes
|
||||
}
|
||||
return
|
||||
}
|
||||
var transferred, total int64
|
||||
for _, fs := range all {
|
||||
fstx, fstotal := sumUpFSRep(fs)
|
||||
transferred += fstx
|
||||
total += fstotal
|
||||
}
|
||||
rate, changeCount := history.Update(transferred)
|
||||
expected, replicated := latest.BytesSum()
|
||||
rate, changeCount := history.Update(replicated)
|
||||
t.write("Progress: ")
|
||||
t.drawBar(50, transferred, total, changeCount)
|
||||
t.write(fmt.Sprintf(" %s / %s @ %s/s", ByteCountBinary(transferred), ByteCountBinary(total), ByteCountBinary(rate)))
|
||||
t.drawBar(50, replicated, expected, changeCount)
|
||||
t.write(fmt.Sprintf(" %s / %s @ %s/s", ByteCountBinary(replicated), ByteCountBinary(expected), ByteCountBinary(rate)))
|
||||
t.newline()
|
||||
|
||||
var maxFSLen int
|
||||
for _, fs := range latest.Filesystems {
|
||||
if len(fs.Info.Name) > maxFSLen {
|
||||
maxFSLen = len(fs.Info.Name)
|
||||
}
|
||||
}
|
||||
for _, fs := range latest.Filesystems {
|
||||
t.printFilesystemStatus(fs, false, maxFSLen) // FIXME bring 'active' flag back
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
var maxFSLen int
|
||||
for _, fs := range all {
|
||||
if len(fs.Filesystem) > maxFSLen {
|
||||
maxFSLen = len(fs.Filesystem)
|
||||
}
|
||||
}
|
||||
for _, fs := range all {
|
||||
t.printFilesystemStatus(fs, fs == rep.Active, maxFSLen)
|
||||
}
|
||||
}
|
||||
|
||||
func (t *tui) renderPrunerReport(r *pruner.Report) {
|
||||
@ -513,25 +496,6 @@ func (t *tui) renderPrunerReport(r *pruner.Report) {
|
||||
|
||||
}
|
||||
|
||||
const snapshotIndent = 1
|
||||
func calculateMaxFSLength(all []*fsrep.Report) (maxFS, maxStatus int) {
|
||||
for _, e := range all {
|
||||
if len(e.Filesystem) > maxFS {
|
||||
maxFS = len(e.Filesystem)
|
||||
}
|
||||
all2 := make([]*fsrep.StepReport, 0, len(e.Pending) + len(e.Completed))
|
||||
all2 = append(all2, e.Pending...)
|
||||
all2 = append(all2, e.Completed...)
|
||||
for _, e2 := range all2 {
|
||||
elen := len(e2.Problem) + len(e2.From) + len(e2.To) + 60 // random spacing, units, labels, etc
|
||||
if elen > maxStatus {
|
||||
maxStatus = elen
|
||||
}
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func times(str string, n int) (out string) {
|
||||
for i := 0; i < n; i++ {
|
||||
out += str
|
||||
@ -575,35 +539,13 @@ func (t *tui) drawBar(length int, bytes, totalBytes int64, changeCount int) {
|
||||
t.write("]")
|
||||
}
|
||||
|
||||
func StringStepState(s fsrep.StepState) string {
|
||||
switch s {
|
||||
case fsrep.StepReplicationReady: return "Ready"
|
||||
case fsrep.StepMarkReplicatedReady: return "MarkReady"
|
||||
case fsrep.StepCompleted: return "Completed"
|
||||
default:
|
||||
return fmt.Sprintf("UNKNOWN %d", s)
|
||||
}
|
||||
}
|
||||
|
||||
func (t *tui) printFilesystemStatus(rep *fsrep.Report, active bool, maxFS int) {
|
||||
|
||||
bytes := int64(0)
|
||||
totalBytes := int64(0)
|
||||
for _, s := range rep.Pending {
|
||||
bytes += s.Bytes
|
||||
totalBytes += s.ExpectedBytes
|
||||
}
|
||||
for _, s := range rep.Completed {
|
||||
bytes += s.Bytes
|
||||
totalBytes += s.ExpectedBytes
|
||||
}
|
||||
|
||||
func (t *tui) printFilesystemStatus(rep *report.FilesystemReport, active bool, maxFS int) {
|
||||
|
||||
expected, replicated := rep.BytesSum()
|
||||
status := fmt.Sprintf("%s (step %d/%d, %s/%s)",
|
||||
rep.Status,
|
||||
len(rep.Completed), len(rep.Pending) + len(rep.Completed),
|
||||
ByteCountBinary(bytes), ByteCountBinary(totalBytes),
|
||||
|
||||
strings.ToUpper(string(rep.State)),
|
||||
rep.CurrentStep, len(rep.Steps),
|
||||
ByteCountBinary(replicated), ByteCountBinary(expected),
|
||||
)
|
||||
|
||||
activeIndicator := " "
|
||||
@ -612,18 +554,23 @@ func (t *tui) printFilesystemStatus(rep *fsrep.Report, active bool, maxFS int) {
|
||||
}
|
||||
t.printf("%s %s %s ",
|
||||
activeIndicator,
|
||||
rightPad(rep.Filesystem, maxFS, " "),
|
||||
rightPad(rep.Info.Name, maxFS, " "),
|
||||
status)
|
||||
|
||||
next := ""
|
||||
if rep.Problem != "" {
|
||||
next = rep.Problem
|
||||
} else if len(rep.Pending) > 0 {
|
||||
if rep.Pending[0].From != "" {
|
||||
next = fmt.Sprintf("next: %s => %s", rep.Pending[0].From, rep.Pending[0].To)
|
||||
if err := rep.Error(); err != nil {
|
||||
next = err.Err
|
||||
} else if rep.State != report.FilesystemDone {
|
||||
if nextStep := rep.NextStep(); nextStep != nil {
|
||||
if nextStep.IsIncremental() {
|
||||
next = fmt.Sprintf("next: %s => %s", nextStep.Info.From, nextStep.Info.To)
|
||||
} else {
|
||||
next = fmt.Sprintf("next: %s (full)", nextStep.Info.To)
|
||||
}
|
||||
} else {
|
||||
next = fmt.Sprintf("next: %s (full)", rep.Pending[0].To)
|
||||
next = "" // individual FSes may still be in planning state
|
||||
}
|
||||
|
||||
}
|
||||
t.printfDrawIndentedAndWrappedIfMultiline("%s", next)
|
||||
|
||||
|
@ -17,10 +17,12 @@ import (
|
||||
"github.com/zrepl/zrepl/daemon/snapper"
|
||||
"github.com/zrepl/zrepl/endpoint"
|
||||
"github.com/zrepl/zrepl/replication"
|
||||
"github.com/zrepl/zrepl/replication/driver"
|
||||
"github.com/zrepl/zrepl/replication/logic"
|
||||
"github.com/zrepl/zrepl/replication/report"
|
||||
"github.com/zrepl/zrepl/rpc"
|
||||
"github.com/zrepl/zrepl/transport"
|
||||
"github.com/zrepl/zrepl/transport/fromconfig"
|
||||
"github.com/zrepl/zrepl/util/envconst"
|
||||
"github.com/zrepl/zrepl/zfs"
|
||||
)
|
||||
|
||||
@ -53,7 +55,7 @@ type activeSideTasks struct {
|
||||
state ActiveSideState
|
||||
|
||||
// valid for state ActiveSideReplicating, ActiveSidePruneSender, ActiveSidePruneReceiver, ActiveSideDone
|
||||
replication *replication.Replication
|
||||
replicationReport driver.ReportFunc
|
||||
replicationCancel context.CancelFunc
|
||||
|
||||
// valid for state ActiveSidePruneSender, ActiveSidePruneReceiver, ActiveSideDone
|
||||
@ -79,7 +81,7 @@ func (a *ActiveSide) updateTasks(u func(*activeSideTasks)) activeSideTasks {
|
||||
type activeMode interface {
|
||||
ConnectEndpoints(rpcLoggers rpc.Loggers, connecter transport.Connecter)
|
||||
DisconnectEndpoints()
|
||||
SenderReceiver() (replication.Sender, replication.Receiver)
|
||||
SenderReceiver() (logic.Sender, logic.Receiver)
|
||||
Type() Type
|
||||
RunPeriodic(ctx context.Context, wakeUpCommon chan<- struct{})
|
||||
ResetConnectBackoff()
|
||||
@ -111,7 +113,7 @@ func (m *modePush) DisconnectEndpoints() {
|
||||
m.receiver = nil
|
||||
}
|
||||
|
||||
func (m *modePush) SenderReceiver() (replication.Sender, replication.Receiver) {
|
||||
func (m *modePush) SenderReceiver() (logic.Sender, logic.Receiver) {
|
||||
m.setupMtx.Lock()
|
||||
defer m.setupMtx.Unlock()
|
||||
return m.sender, m.receiver
|
||||
@ -172,7 +174,7 @@ func (m *modePull) DisconnectEndpoints() {
|
||||
m.receiver = nil
|
||||
}
|
||||
|
||||
func (m *modePull) SenderReceiver() (replication.Sender, replication.Receiver) {
|
||||
func (m *modePull) SenderReceiver() (logic.Sender, logic.Receiver) {
|
||||
m.setupMtx.Lock()
|
||||
defer m.setupMtx.Unlock()
|
||||
return m.sender, m.receiver
|
||||
@ -274,7 +276,7 @@ func (j *ActiveSide) RegisterMetrics(registerer prometheus.Registerer) {
|
||||
func (j *ActiveSide) Name() string { return j.name }
|
||||
|
||||
type ActiveSideStatus struct {
|
||||
Replication *replication.Report
|
||||
Replication *report.Report
|
||||
PruningSender, PruningReceiver *pruner.Report
|
||||
}
|
||||
|
||||
@ -283,8 +285,8 @@ func (j *ActiveSide) Status() *Status {
|
||||
|
||||
s := &ActiveSideStatus{}
|
||||
t := j.mode.Type()
|
||||
if tasks.replication != nil {
|
||||
s.Replication = tasks.replication.Report()
|
||||
if tasks.replicationReport != nil {
|
||||
s.Replication = tasks.replicationReport()
|
||||
}
|
||||
if tasks.prunerSender != nil {
|
||||
s.PruningSender = tasks.prunerSender.Report()
|
||||
@ -345,78 +347,6 @@ func (j *ActiveSide) do(ctx context.Context) {
|
||||
}
|
||||
}()
|
||||
|
||||
// The code after this watchdog goroutine is sequential and transitions the state from
|
||||
// ActiveSideReplicating -> ActiveSidePruneSender -> ActiveSidePruneReceiver -> ActiveSideDone
|
||||
// If any of those sequential tasks 'gets stuck' (livelock, no progress), the watchdog will eventually
|
||||
// cancel its context.
|
||||
// If the task is written to support context cancellation, it will return immediately (in permanent error state),
|
||||
// and the sequential code above transitions to the next state.
|
||||
go func() {
|
||||
|
||||
wdto := envconst.Duration("ZREPL_JOB_WATCHDOG_TIMEOUT", 10*time.Minute)
|
||||
jitter := envconst.Duration("ZREPL_JOB_WATCHDOG_JITTER", 1*time.Second)
|
||||
// shadowing!
|
||||
log := log.WithField("watchdog_timeout", wdto.String())
|
||||
|
||||
log.Debug("starting watchdog")
|
||||
defer log.Debug("watchdog stopped")
|
||||
|
||||
t := time.NewTicker(wdto)
|
||||
defer t.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-t.C: // fall
|
||||
}
|
||||
|
||||
j.updateTasks(func(tasks *activeSideTasks) {
|
||||
// Since cancelling a task will cause the sequential code to transition to the next state immediately,
|
||||
// we cannot check for its progress right then (no fallthrough).
|
||||
// Instead, we return (not continue because we are in a closure) and give the new state another
|
||||
// ZREPL_JOB_WATCHDOG_TIMEOUT interval to try make some progress.
|
||||
|
||||
log.WithField("state", tasks.state).Debug("watchdog firing")
|
||||
|
||||
const WATCHDOG_ENVCONST_NOTICE = " (adjust ZREPL_JOB_WATCHDOG_TIMEOUT env variable if inappropriate)"
|
||||
|
||||
switch tasks.state {
|
||||
case ActiveSideReplicating:
|
||||
log.WithField("replication_progress", tasks.replication.Progress.String()).
|
||||
Debug("check replication progress")
|
||||
if tasks.replication.Progress.CheckTimeout(wdto, jitter) {
|
||||
log.Error("replication did not make progress, cancelling" + WATCHDOG_ENVCONST_NOTICE)
|
||||
tasks.replicationCancel()
|
||||
return
|
||||
}
|
||||
case ActiveSidePruneSender:
|
||||
log.WithField("prune_sender_progress", tasks.replication.Progress.String()).
|
||||
Debug("check pruner_sender progress")
|
||||
if tasks.prunerSender.Progress.CheckTimeout(wdto, jitter) {
|
||||
log.Error("pruner_sender did not make progress, cancelling" + WATCHDOG_ENVCONST_NOTICE)
|
||||
tasks.prunerSenderCancel()
|
||||
return
|
||||
}
|
||||
case ActiveSidePruneReceiver:
|
||||
log.WithField("prune_receiver_progress", tasks.replication.Progress.String()).
|
||||
Debug("check pruner_receiver progress")
|
||||
if tasks.prunerReceiver.Progress.CheckTimeout(wdto, jitter) {
|
||||
log.Error("pruner_receiver did not make progress, cancelling" + WATCHDOG_ENVCONST_NOTICE)
|
||||
tasks.prunerReceiverCancel()
|
||||
return
|
||||
}
|
||||
case ActiveSideDone:
|
||||
// ignore, ctx will be Done() in a few milliseconds and the watchdog will exit
|
||||
default:
|
||||
log.WithField("state", tasks.state).
|
||||
Error("watchdog implementation error: unknown active side state")
|
||||
}
|
||||
})
|
||||
|
||||
}
|
||||
}()
|
||||
|
||||
sender, receiver := j.mode.SenderReceiver()
|
||||
|
||||
{
|
||||
@ -426,16 +356,19 @@ func (j *ActiveSide) do(ctx context.Context) {
|
||||
default:
|
||||
}
|
||||
ctx, repCancel := context.WithCancel(ctx)
|
||||
tasks := j.updateTasks(func(tasks *activeSideTasks) {
|
||||
var repWait driver.WaitFunc
|
||||
j.updateTasks(func(tasks *activeSideTasks) {
|
||||
// reset it
|
||||
*tasks = activeSideTasks{}
|
||||
tasks.replicationCancel = repCancel
|
||||
tasks.replication = replication.NewReplication(j.promRepStateSecs, j.promBytesReplicated)
|
||||
tasks.replicationReport, repWait = replication.Do(
|
||||
ctx, logic.NewPlanner(j.promRepStateSecs, j.promBytesReplicated, sender, receiver),
|
||||
)
|
||||
tasks.state = ActiveSideReplicating
|
||||
})
|
||||
log.Info("start replication")
|
||||
tasks.replication.Drive(ctx, sender, receiver)
|
||||
repCancel() // always cancel to free up context resources
|
||||
repWait(true) // wait blocking
|
||||
repCancel() // always cancel to free up context resources
|
||||
}
|
||||
|
||||
{
|
||||
|
@ -14,7 +14,6 @@ import (
|
||||
"github.com/zrepl/zrepl/daemon/snapper"
|
||||
"github.com/zrepl/zrepl/endpoint"
|
||||
"github.com/zrepl/zrepl/logger"
|
||||
"github.com/zrepl/zrepl/replication"
|
||||
"github.com/zrepl/zrepl/rpc"
|
||||
"github.com/zrepl/zrepl/rpc/transportmux"
|
||||
"github.com/zrepl/zrepl/tlsconf"
|
||||
@ -78,7 +77,6 @@ const (
|
||||
)
|
||||
|
||||
func WithSubsystemLoggers(ctx context.Context, log logger.Logger) context.Context {
|
||||
ctx = replication.WithLogger(ctx, log.WithField(SubsysField, SubsysReplication))
|
||||
ctx = endpoint.WithLogger(ctx, log.WithField(SubsysField, SubsyEndpoint))
|
||||
ctx = pruner.WithLogger(ctx, log.WithField(SubsysField, SubsysPruning))
|
||||
ctx = snapper.WithLogger(ctx, log.WithField(SubsysField, SubsysSnapshot))
|
||||
|
@ -8,7 +8,7 @@ import (
|
||||
"github.com/zrepl/zrepl/config"
|
||||
"github.com/zrepl/zrepl/logger"
|
||||
"github.com/zrepl/zrepl/pruning"
|
||||
"github.com/zrepl/zrepl/replication/pdu"
|
||||
"github.com/zrepl/zrepl/replication/logic/pdu"
|
||||
"github.com/zrepl/zrepl/util/envconst"
|
||||
"github.com/zrepl/zrepl/util/watchdog"
|
||||
"net"
|
||||
|
@ -6,7 +6,7 @@ import (
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/zrepl/zrepl/logger"
|
||||
"github.com/zrepl/zrepl/pruning"
|
||||
"github.com/zrepl/zrepl/replication/pdu"
|
||||
"github.com/zrepl/zrepl/replication/logic/pdu"
|
||||
"net"
|
||||
"testing"
|
||||
"time"
|
||||
|
@ -7,8 +7,7 @@ import (
|
||||
"path"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
"github.com/zrepl/zrepl/replication"
|
||||
"github.com/zrepl/zrepl/replication/pdu"
|
||||
"github.com/zrepl/zrepl/replication/logic/pdu"
|
||||
"github.com/zrepl/zrepl/zfs"
|
||||
)
|
||||
|
||||
@ -34,7 +33,7 @@ func (s *Sender) filterCheckFS(fs string) (*zfs.DatasetPath, error) {
|
||||
return nil, err
|
||||
}
|
||||
if !pass {
|
||||
return nil, replication.NewFilteredError(fs)
|
||||
return nil, fmt.Errorf("endpoint does not allow access to filesystem %s", fs)
|
||||
}
|
||||
return dp, nil
|
||||
}
|
||||
|
301
replication/driver/replication_driver.go
Normal file
301
replication/driver/replication_driver.go
Normal file
@ -0,0 +1,301 @@
|
||||
package driver
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/zrepl/zrepl/replication/report"
|
||||
"github.com/zrepl/zrepl/util/chainlock"
|
||||
)
|
||||
|
||||
type run struct {
|
||||
l *chainlock.L
|
||||
|
||||
startedAt, finishedAt time.Time
|
||||
|
||||
// the attempts attempted so far:
|
||||
// All but the last in this slice must have finished with some errors.
|
||||
// The last attempt may not be finished and may not have errors.
|
||||
attempts []*attempt
|
||||
}
|
||||
|
||||
type Planner interface {
|
||||
Plan(context.Context) ([]FS, error)
|
||||
}
|
||||
|
||||
// an attempt represents a single planning & execution of fs replications
|
||||
type attempt struct {
|
||||
planner Planner
|
||||
|
||||
l *chainlock.L
|
||||
|
||||
startedAt, finishedAt time.Time
|
||||
// after Planner.Plan was called, planErr and fss are mutually exclusive with regards to nil-ness
|
||||
// if both are nil, it must be assumed that Planner.Plan is active
|
||||
planErr *timedError
|
||||
fss []*fs
|
||||
}
|
||||
|
||||
type timedError struct {
|
||||
Err error
|
||||
Time time.Time
|
||||
}
|
||||
|
||||
func newTimedError(err error, t time.Time) *timedError {
|
||||
if err == nil {
|
||||
panic("error must be non-nil")
|
||||
}
|
||||
if t.IsZero() {
|
||||
panic("t must be non-zero")
|
||||
}
|
||||
return &timedError{err, t}
|
||||
}
|
||||
|
||||
func (e *timedError) IntoReportError() *report.TimedError {
|
||||
if e == nil {
|
||||
return nil
|
||||
}
|
||||
return report.NewTimedError(e.Err.Error(), e.Time)
|
||||
}
|
||||
|
||||
type FS interface {
|
||||
PlanFS(context.Context) ([]Step, error)
|
||||
ReportInfo() *report.FilesystemInfo
|
||||
}
|
||||
|
||||
type Step interface {
|
||||
TargetDate() time.Time
|
||||
Step(context.Context) error
|
||||
ReportInfo() *report.StepInfo
|
||||
}
|
||||
|
||||
type fs struct {
|
||||
fs FS
|
||||
|
||||
l *chainlock.L
|
||||
|
||||
planning struct {
|
||||
done bool
|
||||
err *timedError
|
||||
}
|
||||
|
||||
// valid iff planning.done && planning.err == nil
|
||||
planned struct {
|
||||
// valid iff planning.done && planning.err == nil
|
||||
stepErr *timedError
|
||||
// all steps, in the order in which they must be completed
|
||||
steps []*step
|
||||
// index into steps, pointing at the step that is currently executing
|
||||
// if step >= len(steps), no more work needs to be done
|
||||
step int
|
||||
}
|
||||
}
|
||||
|
||||
type step struct {
|
||||
l *chainlock.L
|
||||
step Step
|
||||
}
|
||||
|
||||
type ReportFunc func() *report.Report
|
||||
type WaitFunc func(block bool) (done bool)
|
||||
|
||||
func Do(ctx context.Context, planner Planner) (ReportFunc, WaitFunc) {
|
||||
l := chainlock.New()
|
||||
run := &run{
|
||||
l: l,
|
||||
startedAt: time.Now(),
|
||||
}
|
||||
|
||||
done := make(chan struct{})
|
||||
go func() {
|
||||
defer close(done)
|
||||
|
||||
run.l.Lock()
|
||||
a1 := &attempt{
|
||||
l: l,
|
||||
startedAt: time.Now(),
|
||||
planner: planner,
|
||||
}
|
||||
run.attempts = append(run.attempts, a1)
|
||||
run.l.Unlock()
|
||||
a1.do(ctx)
|
||||
|
||||
}()
|
||||
|
||||
wait := func(block bool) bool {
|
||||
if block {
|
||||
<-done
|
||||
}
|
||||
select {
|
||||
case <-done:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
report := func() *report.Report {
|
||||
defer run.l.Lock().Unlock()
|
||||
return run.report()
|
||||
}
|
||||
return report, wait
|
||||
}
|
||||
|
||||
func (a *attempt) do(ctx context.Context) {
|
||||
pfss, err := a.planner.Plan(ctx)
|
||||
errTime := time.Now()
|
||||
defer a.l.Lock().Unlock()
|
||||
if err != nil {
|
||||
a.planErr = newTimedError(err, errTime)
|
||||
a.fss = nil
|
||||
a.finishedAt = time.Now()
|
||||
return
|
||||
}
|
||||
|
||||
for _, pfs := range pfss {
|
||||
fs := &fs{
|
||||
fs: pfs,
|
||||
l: a.l,
|
||||
}
|
||||
a.fss = append(a.fss, fs)
|
||||
}
|
||||
|
||||
stepQueue := newStepQueue()
|
||||
defer stepQueue.Start(1)()
|
||||
var fssesDone sync.WaitGroup
|
||||
for _, f := range a.fss {
|
||||
fssesDone.Add(1)
|
||||
go func(f *fs) {
|
||||
defer fssesDone.Done()
|
||||
f.do(ctx, stepQueue)
|
||||
}(f)
|
||||
}
|
||||
a.l.DropWhile(func() {
|
||||
fssesDone.Wait()
|
||||
})
|
||||
a.finishedAt = time.Now()
|
||||
}
|
||||
|
||||
func (fs *fs) do(ctx context.Context, pq *stepQueue) {
|
||||
psteps, err := fs.fs.PlanFS(ctx)
|
||||
errTime := time.Now()
|
||||
defer fs.l.Lock().Unlock()
|
||||
fs.planning.done = true
|
||||
if err != nil {
|
||||
fs.planning.err = newTimedError(err, errTime)
|
||||
return
|
||||
}
|
||||
for _, pstep := range psteps {
|
||||
step := &step{
|
||||
l: fs.l,
|
||||
step: pstep,
|
||||
}
|
||||
fs.planned.steps = append(fs.planned.steps, step)
|
||||
}
|
||||
for i, s := range fs.planned.steps {
|
||||
var (
|
||||
err error
|
||||
errTime time.Time
|
||||
)
|
||||
// lock must not be held while executing step in order for reporting to work
|
||||
fs.l.DropWhile(func() {
|
||||
targetDate := s.step.TargetDate()
|
||||
defer pq.WaitReady(fs, targetDate)()
|
||||
err = s.step.Step(ctx) // no shadow
|
||||
errTime = time.Now() // no shadow
|
||||
})
|
||||
if err != nil {
|
||||
fs.planned.stepErr = newTimedError(err, errTime)
|
||||
break
|
||||
}
|
||||
fs.planned.step = i + 1 // fs.planned.step must be == len(fs.planned.steps) if all went OK
|
||||
}
|
||||
}
|
||||
|
||||
// caller must hold lock l
|
||||
func (r *run) report() *report.Report {
|
||||
report := &report.Report{
|
||||
Attempts: make([]*report.AttemptReport, len(r.attempts)),
|
||||
StartAt: r.startedAt,
|
||||
FinishAt: r.finishedAt,
|
||||
}
|
||||
for i := range report.Attempts {
|
||||
report.Attempts[i] = r.attempts[i].report()
|
||||
}
|
||||
return report
|
||||
}
|
||||
|
||||
// caller must hold lock l
|
||||
func (a *attempt) report() *report.AttemptReport {
|
||||
|
||||
r := &report.AttemptReport{
|
||||
// State is set below
|
||||
Filesystems: make([]*report.FilesystemReport, len(a.fss)),
|
||||
StartAt: a.startedAt,
|
||||
FinishAt: a.finishedAt,
|
||||
PlanError: a.planErr.IntoReportError(),
|
||||
}
|
||||
|
||||
for i := range r.Filesystems {
|
||||
r.Filesystems[i] = a.fss[i].report()
|
||||
}
|
||||
|
||||
state := report.AttemptPlanning
|
||||
if a.planErr != nil {
|
||||
state = report.AttemptPlanningError
|
||||
} else if a.fss != nil {
|
||||
if a.finishedAt.IsZero() {
|
||||
state = report.AttemptFanOutFSs
|
||||
} else {
|
||||
fsWithError := false
|
||||
for _, s := range r.Filesystems {
|
||||
fsWithError = fsWithError || s.Error() != nil
|
||||
}
|
||||
state = report.AttemptDone
|
||||
if fsWithError {
|
||||
state = report.AttemptFanOutError
|
||||
}
|
||||
}
|
||||
}
|
||||
r.State = state
|
||||
|
||||
return r
|
||||
}
|
||||
|
||||
// caller must hold lock l
|
||||
func (f *fs) report() *report.FilesystemReport {
|
||||
state := report.FilesystemPlanningErrored
|
||||
if f.planning.err == nil {
|
||||
if f.planning.done {
|
||||
if f.planned.stepErr != nil {
|
||||
state = report.FilesystemSteppingErrored
|
||||
} else if f.planned.step < len(f.planned.steps) {
|
||||
state = report.FilesystemStepping
|
||||
} else {
|
||||
state = report.FilesystemDone
|
||||
}
|
||||
} else {
|
||||
state = report.FilesystemPlanning
|
||||
}
|
||||
}
|
||||
r := &report.FilesystemReport{
|
||||
Info: f.fs.ReportInfo(),
|
||||
State: state,
|
||||
PlanError: f.planning.err.IntoReportError(),
|
||||
StepError: f.planned.stepErr.IntoReportError(),
|
||||
Steps: make([]*report.StepReport, len(f.planned.steps)),
|
||||
CurrentStep: f.planned.step,
|
||||
}
|
||||
for i := range r.Steps {
|
||||
r.Steps[i] = f.planned.steps[i].report()
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
// caller must hold lock l
|
||||
func (s *step) report() *report.StepReport {
|
||||
r := &report.StepReport{
|
||||
Info: s.step.ReportInfo(),
|
||||
}
|
||||
return r
|
||||
}
|
204
replication/driver/replication_driver_test.go
Normal file
204
replication/driver/replication_driver_test.go
Normal file
@ -0,0 +1,204 @@
|
||||
package driver
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"sort"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
"github.com/zrepl/zrepl/replication/report"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
|
||||
jsondiff "github.com/yudai/gojsondiff"
|
||||
jsondiffformatter "github.com/yudai/gojsondiff/formatter"
|
||||
)
|
||||
|
||||
type mockPlanner struct {
|
||||
stepCounter uint32
|
||||
fss []FS // *mockFS
|
||||
}
|
||||
|
||||
func (p *mockPlanner) Plan(ctx context.Context) ([]FS, error) {
|
||||
time.Sleep(1 * time.Second)
|
||||
p.fss = []FS{
|
||||
&mockFS{
|
||||
&p.stepCounter,
|
||||
"zroot/one",
|
||||
nil,
|
||||
},
|
||||
&mockFS{
|
||||
&p.stepCounter,
|
||||
"zroot/two",
|
||||
nil,
|
||||
},
|
||||
}
|
||||
return p.fss, nil
|
||||
}
|
||||
|
||||
type mockFS struct {
|
||||
globalStepCounter *uint32
|
||||
name string
|
||||
steps []Step
|
||||
}
|
||||
|
||||
func (f *mockFS) PlanFS(ctx context.Context) ([]Step, error) {
|
||||
if f.steps != nil {
|
||||
panic("PlanFS used twice")
|
||||
}
|
||||
switch f.name {
|
||||
case "zroot/one":
|
||||
f.steps = []Step{
|
||||
&mockStep{
|
||||
fs: f,
|
||||
ident: "a",
|
||||
duration: 1 * time.Second,
|
||||
targetDate: time.Unix(2, 0),
|
||||
},
|
||||
&mockStep{
|
||||
fs: f,
|
||||
ident: "b",
|
||||
duration: 1 * time.Second,
|
||||
targetDate: time.Unix(10, 0),
|
||||
},
|
||||
&mockStep{
|
||||
fs: f,
|
||||
ident: "c",
|
||||
duration: 1 * time.Second,
|
||||
targetDate: time.Unix(20, 0),
|
||||
},
|
||||
}
|
||||
case "zroot/two":
|
||||
f.steps = []Step{
|
||||
&mockStep{
|
||||
fs: f,
|
||||
ident: "u",
|
||||
duration: 500 * time.Millisecond,
|
||||
targetDate: time.Unix(15, 0),
|
||||
},
|
||||
&mockStep{
|
||||
fs: f,
|
||||
duration: 500 * time.Millisecond,
|
||||
ident: "v",
|
||||
targetDate: time.Unix(30, 0),
|
||||
},
|
||||
}
|
||||
default:
|
||||
panic("unimplemented")
|
||||
}
|
||||
|
||||
return f.steps, nil
|
||||
}
|
||||
|
||||
func (f *mockFS) ReportInfo() *report.FilesystemInfo {
|
||||
return &report.FilesystemInfo{Name: f.name}
|
||||
}
|
||||
|
||||
type mockStep struct {
|
||||
fs *mockFS
|
||||
ident string
|
||||
duration time.Duration
|
||||
targetDate time.Time
|
||||
|
||||
// filled by method Step
|
||||
globalCtr uint32
|
||||
}
|
||||
|
||||
func (f *mockStep) String() string {
|
||||
return fmt.Sprintf("%s{%s} targetDate=%s globalCtr=%v", f.fs.name, f.ident, f.targetDate, f.globalCtr)
|
||||
}
|
||||
|
||||
func (f *mockStep) Step(ctx context.Context) error {
|
||||
f.globalCtr = atomic.AddUint32(f.fs.globalStepCounter, 1)
|
||||
time.Sleep(f.duration)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (f *mockStep) TargetDate() time.Time {
|
||||
return f.targetDate
|
||||
}
|
||||
|
||||
func (f *mockStep) ReportInfo() *report.StepInfo {
|
||||
return &report.StepInfo{From: f.ident, To: f.ident, BytesExpected: 100, BytesReplicated: 25}
|
||||
}
|
||||
|
||||
// TODO: add meaningful validation (i.e. actual checks)
|
||||
// Since the stepqueue is not deterministic due to scheduler jitter,
|
||||
// we cannot test for any definitive sequence of steps here.
|
||||
// Such checks would further only be sensible for a non-concurrent step-queue,
|
||||
// but we're going to have concurrent replication in the future.
|
||||
//
|
||||
// For the time being, let's just exercise the code a bit.
|
||||
func TestReplication(t *testing.T) {
|
||||
|
||||
ctx := context.Background()
|
||||
|
||||
mp := &mockPlanner{}
|
||||
getReport, wait := Do(ctx, mp)
|
||||
begin := time.Now()
|
||||
fireAt := []time.Duration{
|
||||
// the following values are relative to the start
|
||||
500 * time.Millisecond, // planning
|
||||
1500 * time.Millisecond, // nothing is done, a is running
|
||||
2500 * time.Millisecond, // a done, b running
|
||||
3250 * time.Millisecond, // a,b done, u running
|
||||
3750 * time.Millisecond, // a,b,u done, c running
|
||||
4750 * time.Millisecond, // a,b,u,c done, v running
|
||||
5250 * time.Millisecond, // a,b,u,c,v done
|
||||
}
|
||||
reports := make([]*report.Report, len(fireAt))
|
||||
for i := range fireAt {
|
||||
sleepUntil := begin.Add(fireAt[i])
|
||||
time.Sleep(sleepUntil.Sub(time.Now()))
|
||||
reports[i] = getReport()
|
||||
// uncomment for viewing non-diffed results
|
||||
// t.Logf("report @ %6.4f:\n%s", fireAt[i].Seconds(), pretty.Sprint(reports[i]))
|
||||
}
|
||||
waitBegin := time.Now()
|
||||
wait(true)
|
||||
waitDuration := time.Now().Sub(waitBegin)
|
||||
assert.True(t, waitDuration < 10*time.Millisecond, "%v", waitDuration) // and that's gratious
|
||||
|
||||
prev, err := json.Marshal(reports[0])
|
||||
require.NoError(t, err)
|
||||
for _, r := range reports[1:] {
|
||||
this, err := json.Marshal(r)
|
||||
require.NoError(t, err)
|
||||
differ := jsondiff.New()
|
||||
diff, err := differ.Compare(prev, this)
|
||||
require.NoError(t, err)
|
||||
df := jsondiffformatter.NewDeltaFormatter()
|
||||
res, err := df.Format(diff)
|
||||
res = res
|
||||
require.NoError(t, err)
|
||||
// uncomment the following line to get json diffs between each captured step
|
||||
// t.Logf("%s", res)
|
||||
prev, err = json.Marshal(r)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
steps := make([]*mockStep, 0)
|
||||
for _, fs := range mp.fss {
|
||||
for _, step := range fs.(*mockFS).steps {
|
||||
steps = append(steps, step.(*mockStep))
|
||||
}
|
||||
}
|
||||
|
||||
// sort steps in pq order (although, remember, pq is not deterministic)
|
||||
sort.Slice(steps, func(i, j int) bool {
|
||||
return steps[i].targetDate.Before(steps[j].targetDate)
|
||||
})
|
||||
|
||||
// manual inspection of the globalCtr value should show that, despite
|
||||
// scheduler-dependent behavior of pq, steps should generally be taken
|
||||
// from oldest to newest target date (globally, not per FS).
|
||||
t.Logf("steps sorted by target date:")
|
||||
for _, step := range steps {
|
||||
t.Logf("\t%s", step)
|
||||
}
|
||||
|
||||
}
|
163
replication/driver/replication_stepqueue.go
Normal file
163
replication/driver/replication_stepqueue.go
Normal file
@ -0,0 +1,163 @@
|
||||
package driver
|
||||
|
||||
import (
|
||||
"container/heap"
|
||||
"time"
|
||||
|
||||
"github.com/zrepl/zrepl/util/chainlock"
|
||||
)
|
||||
|
||||
type stepQueueRec struct {
|
||||
ident interface{}
|
||||
targetDate time.Time
|
||||
wakeup chan StepCompletedFunc
|
||||
}
|
||||
|
||||
type stepQueue struct {
|
||||
stop chan struct{}
|
||||
reqs chan stepQueueRec
|
||||
}
|
||||
|
||||
type stepQueueHeapItem struct {
|
||||
idx int
|
||||
req stepQueueRec
|
||||
}
|
||||
type stepQueueHeap []*stepQueueHeapItem
|
||||
|
||||
func (h stepQueueHeap) Less(i, j int) bool {
|
||||
return h[i].req.targetDate.Before(h[j].req.targetDate)
|
||||
}
|
||||
|
||||
func (h stepQueueHeap) Swap(i, j int) {
|
||||
h[i], h[j] = h[j], h[i]
|
||||
h[i].idx = i
|
||||
h[j].idx = j
|
||||
}
|
||||
|
||||
func (h stepQueueHeap) Len() int {
|
||||
return len(h)
|
||||
}
|
||||
|
||||
func (h *stepQueueHeap) Push(elem interface{}) {
|
||||
hitem := elem.(*stepQueueHeapItem)
|
||||
hitem.idx = h.Len()
|
||||
*h = append(*h, hitem)
|
||||
}
|
||||
|
||||
func (h *stepQueueHeap) Pop() interface{} {
|
||||
elem := (*h)[h.Len()-1]
|
||||
elem.idx = -1
|
||||
*h = (*h)[:h.Len()-1]
|
||||
return elem
|
||||
}
|
||||
|
||||
// returned stepQueue must be closed with method Close
|
||||
func newStepQueue() *stepQueue {
|
||||
q := &stepQueue{
|
||||
stop: make(chan struct{}),
|
||||
reqs: make(chan stepQueueRec),
|
||||
}
|
||||
return q
|
||||
}
|
||||
|
||||
// the returned done function must be called to free resources
|
||||
// allocated by the call to Start
|
||||
//
|
||||
// No WaitReady calls must be active at the time done is called
|
||||
// The behavior of calling WaitReady after done was called is undefined
|
||||
func (q *stepQueue) Start(concurrency int) (done func()) {
|
||||
if concurrency < 1 {
|
||||
panic("concurrency must be >= 1")
|
||||
}
|
||||
// l protects pending and queueItems
|
||||
l := chainlock.New()
|
||||
pendingCond := l.NewCond()
|
||||
// priority queue
|
||||
pending := &stepQueueHeap{}
|
||||
// ident => queueItem
|
||||
queueItems := make(map[interface{}]*stepQueueHeapItem)
|
||||
// stopped is used for cancellation of "wake" goroutine
|
||||
stopped := false
|
||||
active := 0
|
||||
go func() { // "stopper" goroutine
|
||||
<-q.stop
|
||||
defer l.Lock().Unlock()
|
||||
stopped = true
|
||||
pendingCond.Broadcast()
|
||||
}()
|
||||
go func() { // "reqs" goroutine
|
||||
for {
|
||||
select {
|
||||
case <-q.stop:
|
||||
select {
|
||||
case <-q.reqs:
|
||||
panic("WaitReady call active while calling Close")
|
||||
default:
|
||||
return
|
||||
}
|
||||
case req := <-q.reqs:
|
||||
func() {
|
||||
defer l.Lock().Unlock()
|
||||
if _, ok := queueItems[req.ident]; ok {
|
||||
panic("WaitReady must not be called twice for the same ident")
|
||||
}
|
||||
qitem := &stepQueueHeapItem{
|
||||
req: req,
|
||||
}
|
||||
queueItems[req.ident] = qitem
|
||||
heap.Push(pending, qitem)
|
||||
pendingCond.Broadcast()
|
||||
}()
|
||||
}
|
||||
}
|
||||
}()
|
||||
go func() { // "wake" goroutine
|
||||
defer l.Lock().Unlock()
|
||||
for {
|
||||
|
||||
for !stopped && (active >= concurrency || pending.Len() == 0) {
|
||||
pendingCond.Wait()
|
||||
}
|
||||
if stopped {
|
||||
return
|
||||
}
|
||||
if pending.Len() <= 0 {
|
||||
return
|
||||
}
|
||||
active++
|
||||
next := heap.Pop(pending).(*stepQueueHeapItem).req
|
||||
delete(queueItems, next.ident)
|
||||
|
||||
next.wakeup <- func() {
|
||||
defer l.Lock().Unlock()
|
||||
active--
|
||||
pendingCond.Broadcast()
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
done = func() {
|
||||
close(q.stop)
|
||||
}
|
||||
return done
|
||||
}
|
||||
|
||||
type StepCompletedFunc func()
|
||||
|
||||
func (q *stepQueue) sendAndWaitForWakeup(ident interface{}, targetDate time.Time) StepCompletedFunc {
|
||||
req := stepQueueRec{
|
||||
ident,
|
||||
targetDate,
|
||||
make(chan StepCompletedFunc),
|
||||
}
|
||||
q.reqs <- req
|
||||
return <-req.wakeup
|
||||
}
|
||||
|
||||
// Wait for the ident with targetDate to be selected to run.
|
||||
func (q *stepQueue) WaitReady(ident interface{}, targetDate time.Time) StepCompletedFunc {
|
||||
if targetDate.IsZero() {
|
||||
panic("targetDate of zero is reserved for marking Done")
|
||||
}
|
||||
return q.sendAndWaitForWakeup(ident, targetDate)
|
||||
}
|
166
replication/driver/replication_stepqueue_test.go
Normal file
166
replication/driver/replication_stepqueue_test.go
Normal file
@ -0,0 +1,166 @@
|
||||
package driver
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
"sort"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/montanaflynn/stats"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestPqNotconcurrent(t *testing.T) {
|
||||
|
||||
var ctr uint32
|
||||
q := newStepQueue()
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(3)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
defer q.WaitReady("1", time.Unix(1, 0))()
|
||||
ret := atomic.AddUint32(&ctr, 1)
|
||||
assert.Equal(t, uint32(1), ret)
|
||||
}()
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
defer q.WaitReady("2", time.Unix(2, 0))()
|
||||
ret := atomic.AddUint32(&ctr, 1)
|
||||
assert.Equal(t, uint32(2), ret)
|
||||
}()
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
defer q.WaitReady("3", time.Unix(3, 0))()
|
||||
ret := atomic.AddUint32(&ctr, 1)
|
||||
assert.Equal(t, uint32(3), ret)
|
||||
}()
|
||||
|
||||
time.Sleep(1 * time.Second)
|
||||
defer q.Start(1)()
|
||||
wg.Wait()
|
||||
|
||||
}
|
||||
|
||||
type record struct {
|
||||
fs int
|
||||
step int
|
||||
globalCtr uint32
|
||||
wakeAt time.Duration // relative to begin
|
||||
}
|
||||
|
||||
func (r record) String() string {
|
||||
return fmt.Sprintf("fs %08d step %08d globalCtr %08d wakeAt %2.8f", r.fs, r.step, r.globalCtr, r.wakeAt.Seconds())
|
||||
}
|
||||
|
||||
// This tests uses stepPq concurrently, simulating the following scenario:
|
||||
// Given a number of filesystems F, each filesystem has N steps to take.
|
||||
// The number of concurrent steps is limited to C.
|
||||
// The target date for each step is the step number N.
|
||||
// Hence, there are always F filesystems runnable (calling WaitReady)
|
||||
// The priority queue prioritizes steps with lower target data (= lower step number).
|
||||
// Hence, all steps with lower numbers should be woken up before steps with higher numbers.
|
||||
// However, scheduling is not 100% deterministic (runtime, OS scheduler, etc).
|
||||
// Hence, perform some statistics on the wakeup times and assert that the mean wakeup
|
||||
// times for each step are close together.
|
||||
func TestPqConcurrent(t *testing.T) {
|
||||
|
||||
q := newStepQueue()
|
||||
var wg sync.WaitGroup
|
||||
filesystems := 100
|
||||
stepsPerFS := 20
|
||||
sleepTimePerStep := 50 * time.Millisecond
|
||||
wg.Add(filesystems)
|
||||
var globalCtr uint32
|
||||
|
||||
begin := time.Now()
|
||||
records := make(chan []record, filesystems)
|
||||
for fs := 0; fs < filesystems; fs++ {
|
||||
go func(fs int) {
|
||||
defer wg.Done()
|
||||
recs := make([]record, 0)
|
||||
for step := 0; step < stepsPerFS; step++ {
|
||||
pos := atomic.AddUint32(&globalCtr, 1)
|
||||
t := time.Unix(int64(step), 0)
|
||||
done := q.WaitReady(fs, t)
|
||||
wakeAt := time.Now().Sub(begin)
|
||||
time.Sleep(sleepTimePerStep)
|
||||
done()
|
||||
recs = append(recs, record{fs, step, pos, wakeAt})
|
||||
}
|
||||
records <- recs
|
||||
}(fs)
|
||||
}
|
||||
concurrency := 5
|
||||
defer q.Start(concurrency)()
|
||||
wg.Wait()
|
||||
close(records)
|
||||
t.Logf("loop done")
|
||||
|
||||
flattenedRecs := make([]record, 0)
|
||||
for recs := range records {
|
||||
flattenedRecs = append(flattenedRecs, recs...)
|
||||
}
|
||||
|
||||
sort.Slice(flattenedRecs, func(i, j int) bool {
|
||||
return flattenedRecs[i].globalCtr < flattenedRecs[j].globalCtr
|
||||
})
|
||||
|
||||
wakeTimesByStep := map[int][]float64{}
|
||||
for _, rec := range flattenedRecs {
|
||||
wakeTimes, ok := wakeTimesByStep[rec.step]
|
||||
if !ok {
|
||||
wakeTimes = []float64{}
|
||||
}
|
||||
wakeTimes = append(wakeTimes, rec.wakeAt.Seconds())
|
||||
wakeTimesByStep[rec.step] = wakeTimes
|
||||
}
|
||||
|
||||
meansByStepId := make([]float64, stepsPerFS)
|
||||
interQuartileRangesByStepIdx := make([]float64, stepsPerFS)
|
||||
for step := 0; step < stepsPerFS; step++ {
|
||||
t.Logf("step %d", step)
|
||||
mean, _ := stats.Mean(wakeTimesByStep[step])
|
||||
meansByStepId[step] = mean
|
||||
t.Logf("\tmean: %v", mean)
|
||||
median, _ := stats.Median(wakeTimesByStep[step])
|
||||
t.Logf("\tmedian: %v", median)
|
||||
midhinge, _ := stats.Midhinge(wakeTimesByStep[step])
|
||||
t.Logf("\tmidhinge: %v", midhinge)
|
||||
min, _ := stats.Min(wakeTimesByStep[step])
|
||||
t.Logf("\tmin: %v", min)
|
||||
max, _ := stats.Max(wakeTimesByStep[step])
|
||||
t.Logf("\tmax: %v", max)
|
||||
quartiles, _ := stats.Quartile(wakeTimesByStep[step])
|
||||
t.Logf("\t%#v", quartiles)
|
||||
interQuartileRange, _ := stats.InterQuartileRange(wakeTimesByStep[step])
|
||||
t.Logf("\tinter-quartile range: %v", interQuartileRange)
|
||||
interQuartileRangesByStepIdx[step] = interQuartileRange
|
||||
}
|
||||
|
||||
iqrMean, _ := stats.Mean(interQuartileRangesByStepIdx)
|
||||
t.Logf("inter-quartile-range mean: %v", iqrMean)
|
||||
iqrDev, _ := stats.StandardDeviation(interQuartileRangesByStepIdx)
|
||||
t.Logf("inter-quartile-range deviation: %v", iqrDev)
|
||||
|
||||
// each step should have the same "distribution" (=~ "spread")
|
||||
assert.True(t, iqrDev < 0.01)
|
||||
|
||||
minTimeForAllStepsWithIdxI := sleepTimePerStep.Seconds() * float64(filesystems) / float64(concurrency)
|
||||
t.Logf("minTimeForAllStepsWithIdxI = %11.8f", minTimeForAllStepsWithIdxI)
|
||||
for i, mean := range meansByStepId {
|
||||
// we can't just do (i + 0.5) * minTimeforAllStepsWithIdxI
|
||||
// because this doesn't account for drift
|
||||
idealMean := 0.5 * minTimeForAllStepsWithIdxI
|
||||
if i > 0 {
|
||||
previousMean := meansByStepId[i-1]
|
||||
idealMean = previousMean + minTimeForAllStepsWithIdxI
|
||||
}
|
||||
deltaFromIdeal := idealMean - mean
|
||||
t.Logf("step %02d delta from ideal mean wake time: %11.8f - %11.8f = %11.8f", i, idealMean, mean, deltaFromIdeal)
|
||||
assert.True(t, math.Abs(deltaFromIdeal) < 0.05)
|
||||
}
|
||||
|
||||
}
|
@ -1,557 +0,0 @@
|
||||
// Package fsrep implements replication of a single file system with existing versions
|
||||
// from a sender to a receiver.
|
||||
package fsrep
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/zrepl/zrepl/logger"
|
||||
"github.com/zrepl/zrepl/replication/pdu"
|
||||
"github.com/zrepl/zrepl/util/bytecounter"
|
||||
"github.com/zrepl/zrepl/util/watchdog"
|
||||
"github.com/zrepl/zrepl/zfs"
|
||||
)
|
||||
|
||||
type contextKey int
|
||||
|
||||
const (
|
||||
contextKeyLogger contextKey = iota
|
||||
)
|
||||
|
||||
type Logger = logger.Logger
|
||||
|
||||
func WithLogger(ctx context.Context, log Logger) context.Context {
|
||||
return context.WithValue(ctx, contextKeyLogger, log)
|
||||
}
|
||||
|
||||
func getLogger(ctx context.Context) Logger {
|
||||
l, ok := ctx.Value(contextKeyLogger).(Logger)
|
||||
if !ok {
|
||||
l = logger.NewNullLogger()
|
||||
}
|
||||
return l
|
||||
}
|
||||
|
||||
// A Sender is usually part of a github.com/zrepl/zrepl/replication.Endpoint.
|
||||
type Sender interface {
|
||||
// If a non-nil io.ReadCloser is returned, it is guaranteed to be closed before
|
||||
// any next call to the parent github.com/zrepl/zrepl/replication.Endpoint.
|
||||
// If the send request is for dry run the io.ReadCloser will be nil
|
||||
Send(ctx context.Context, r *pdu.SendReq) (*pdu.SendRes, zfs.StreamCopier, error)
|
||||
ReplicationCursor(ctx context.Context, req *pdu.ReplicationCursorReq) (*pdu.ReplicationCursorRes, error)
|
||||
}
|
||||
|
||||
// A Sender is usually part of a github.com/zrepl/zrepl/replication.Endpoint.
|
||||
type Receiver interface {
|
||||
// Receive sends r and sendStream (the latter containing a ZFS send stream)
|
||||
// to the parent github.com/zrepl/zrepl/replication.Endpoint.
|
||||
Receive(ctx context.Context, req *pdu.ReceiveReq, receive zfs.StreamCopier) (*pdu.ReceiveRes, error)
|
||||
}
|
||||
|
||||
type StepReport struct {
|
||||
From, To string
|
||||
Status StepState
|
||||
Problem string
|
||||
Bytes int64
|
||||
ExpectedBytes int64 // 0 means no size estimate possible
|
||||
}
|
||||
|
||||
type Report struct {
|
||||
Filesystem string
|
||||
Status string
|
||||
Problem string
|
||||
Completed, Pending []*StepReport
|
||||
}
|
||||
|
||||
//go:generate enumer -type=State
|
||||
type State uint
|
||||
|
||||
const (
|
||||
Ready State = 1 << iota
|
||||
Completed
|
||||
)
|
||||
|
||||
type Error interface {
|
||||
error
|
||||
Temporary() bool
|
||||
ContextErr() bool
|
||||
LocalToFS() bool
|
||||
}
|
||||
|
||||
type Replication struct {
|
||||
promBytesReplicated prometheus.Counter
|
||||
|
||||
fs string
|
||||
|
||||
// lock protects all fields below it in this struct, but not the data behind pointers
|
||||
lock sync.Mutex
|
||||
state State
|
||||
err Error
|
||||
completed, pending []*ReplicationStep
|
||||
}
|
||||
|
||||
func (f *Replication) State() State {
|
||||
f.lock.Lock()
|
||||
defer f.lock.Unlock()
|
||||
return f.state
|
||||
}
|
||||
|
||||
func (f *Replication) FS() string { return f.fs }
|
||||
|
||||
// returns zero value time.Time{} if no more pending steps
|
||||
func (f *Replication) NextStepDate() time.Time {
|
||||
if len(f.pending) == 0 {
|
||||
return time.Time{}
|
||||
}
|
||||
return f.pending[0].to.SnapshotTime()
|
||||
}
|
||||
|
||||
func (f *Replication) Err() Error {
|
||||
f.lock.Lock()
|
||||
defer f.lock.Unlock()
|
||||
return f.err
|
||||
}
|
||||
|
||||
func (f *Replication) CanRetry() bool {
|
||||
f.lock.Lock()
|
||||
defer f.lock.Unlock()
|
||||
if f.state == Completed {
|
||||
return false
|
||||
}
|
||||
if f.state != Ready {
|
||||
panic(fmt.Sprintf("implementation error: %v", f.state))
|
||||
}
|
||||
if f.err == nil {
|
||||
return true
|
||||
}
|
||||
return f.err.Temporary()
|
||||
}
|
||||
|
||||
func (f *Replication) UpdateSizeEsitmate(ctx context.Context, sender Sender) error {
|
||||
f.lock.Lock()
|
||||
defer f.lock.Unlock()
|
||||
for _, e := range f.pending {
|
||||
if err := e.updateSizeEstimate(ctx, sender); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
type ReplicationBuilder struct {
|
||||
r *Replication
|
||||
}
|
||||
|
||||
func BuildReplication(fs string, promBytesReplicated prometheus.Counter) *ReplicationBuilder {
|
||||
return &ReplicationBuilder{&Replication{fs: fs, promBytesReplicated: promBytesReplicated}}
|
||||
}
|
||||
|
||||
func (b *ReplicationBuilder) AddStep(from, to FilesystemVersion) *ReplicationBuilder {
|
||||
step := &ReplicationStep{
|
||||
state: StepReplicationReady,
|
||||
parent: b.r,
|
||||
from: from,
|
||||
to: to,
|
||||
}
|
||||
b.r.pending = append(b.r.pending, step)
|
||||
return b
|
||||
}
|
||||
|
||||
func (b *ReplicationBuilder) Done() (r *Replication) {
|
||||
if len(b.r.pending) > 0 {
|
||||
b.r.state = Ready
|
||||
} else {
|
||||
b.r.state = Completed
|
||||
}
|
||||
r = b.r
|
||||
b.r = nil
|
||||
return r
|
||||
}
|
||||
|
||||
type ReplicationConflictError struct {
|
||||
Err error
|
||||
}
|
||||
|
||||
func (e *ReplicationConflictError) Timeout() bool { return false }
|
||||
|
||||
func (e *ReplicationConflictError) Temporary() bool { return false }
|
||||
|
||||
func (e *ReplicationConflictError) Error() string { return fmt.Sprintf("permanent error: %s", e.Err.Error()) }
|
||||
|
||||
func (e *ReplicationConflictError) LocalToFS() bool { return true }
|
||||
|
||||
func (e *ReplicationConflictError) ContextErr() bool { return false }
|
||||
|
||||
func NewReplicationConflictError(fs string, err error) *Replication {
|
||||
return &Replication{
|
||||
state: Completed,
|
||||
fs: fs,
|
||||
err: &ReplicationConflictError{Err: err},
|
||||
}
|
||||
}
|
||||
|
||||
//go:generate enumer -type=StepState
|
||||
type StepState uint
|
||||
|
||||
const (
|
||||
StepReplicationReady StepState = 1 << iota
|
||||
StepMarkReplicatedReady
|
||||
StepCompleted
|
||||
)
|
||||
|
||||
func (s StepState) IsTerminal() bool { return s == StepCompleted }
|
||||
|
||||
type FilesystemVersion interface {
|
||||
SnapshotTime() time.Time
|
||||
GetName() string // name without @ or #
|
||||
RelName() string // name with @ or #
|
||||
}
|
||||
|
||||
type ReplicationStep struct {
|
||||
// only protects state, err
|
||||
// from, to and parent are assumed to be immutable
|
||||
lock sync.Mutex
|
||||
|
||||
state StepState
|
||||
from, to FilesystemVersion
|
||||
parent *Replication
|
||||
|
||||
// both retry and permanent error
|
||||
err error
|
||||
|
||||
byteCounter bytecounter.StreamCopier
|
||||
expectedSize int64 // 0 means no size estimate present / possible
|
||||
}
|
||||
|
||||
func (f *Replication) Retry(ctx context.Context, ka *watchdog.KeepAlive, sender Sender, receiver Receiver) Error {
|
||||
|
||||
var u updater = func(fu func(*Replication)) State {
|
||||
f.lock.Lock()
|
||||
defer f.lock.Unlock()
|
||||
if fu != nil {
|
||||
fu(f)
|
||||
}
|
||||
return f.state
|
||||
}
|
||||
|
||||
|
||||
var current *ReplicationStep
|
||||
pre := u(nil)
|
||||
getLogger(ctx).WithField("fsrep_state", pre).Debug("begin fsrep.Retry")
|
||||
defer func() {
|
||||
post := u(nil)
|
||||
getLogger(ctx).WithField("fsrep_transition", post).Debug("end fsrep.Retry")
|
||||
}()
|
||||
|
||||
st := u(func(f *Replication) {
|
||||
if len(f.pending) == 0 {
|
||||
f.state = Completed
|
||||
return
|
||||
}
|
||||
current = f.pending[0]
|
||||
})
|
||||
if st == Completed {
|
||||
return nil
|
||||
}
|
||||
if st != Ready {
|
||||
panic(fmt.Sprintf("implementation error: %v", st))
|
||||
}
|
||||
|
||||
stepCtx := WithLogger(ctx, getLogger(ctx).WithField("step", current))
|
||||
getLogger(stepCtx).Debug("take step")
|
||||
err := current.Retry(stepCtx, ka, sender, receiver)
|
||||
if err != nil {
|
||||
getLogger(stepCtx).WithError(err).Error("step could not be completed")
|
||||
}
|
||||
|
||||
u(func(fsr *Replication) {
|
||||
if err != nil {
|
||||
f.err = &StepError{stepStr: current.String(), err: err}
|
||||
return
|
||||
}
|
||||
if err == nil && current.state != StepCompleted {
|
||||
panic(fmt.Sprintf("implementation error: %v", current.state))
|
||||
}
|
||||
f.err = nil
|
||||
f.completed = append(f.completed, current)
|
||||
f.pending = f.pending[1:]
|
||||
if len(f.pending) > 0 {
|
||||
f.state = Ready
|
||||
} else {
|
||||
f.state = Completed
|
||||
}
|
||||
})
|
||||
var retErr Error = nil
|
||||
u(func(fsr *Replication) {
|
||||
retErr = fsr.err
|
||||
})
|
||||
return retErr
|
||||
}
|
||||
|
||||
type updater func(func(fsr *Replication)) State
|
||||
|
||||
type state func(ctx context.Context, ka *watchdog.KeepAlive, sender Sender, receiver Receiver, u updater) state
|
||||
|
||||
type StepError struct {
|
||||
stepStr string
|
||||
err error
|
||||
}
|
||||
|
||||
var _ Error = &StepError{}
|
||||
|
||||
func (e StepError) Error() string {
|
||||
if e.LocalToFS() {
|
||||
return fmt.Sprintf("step %s failed: %s", e.stepStr, e.err)
|
||||
}
|
||||
return e.err.Error()
|
||||
}
|
||||
|
||||
func (e StepError) Timeout() bool {
|
||||
if neterr, ok := e.err.(net.Error); ok {
|
||||
return neterr.Timeout()
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (e StepError) Temporary() bool {
|
||||
if neterr, ok := e.err.(net.Error); ok {
|
||||
return neterr.Temporary()
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (e StepError) LocalToFS() bool {
|
||||
if _, ok := e.err.(net.Error); ok {
|
||||
return false
|
||||
}
|
||||
return true // conservative approximation: we'd like to check for specific errors returned over RPC here...
|
||||
}
|
||||
|
||||
func (e StepError) ContextErr() bool {
|
||||
switch e.err {
|
||||
case context.Canceled:
|
||||
return true
|
||||
case context.DeadlineExceeded:
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (fsr *Replication) Report() *Report {
|
||||
fsr.lock.Lock()
|
||||
defer fsr.lock.Unlock()
|
||||
|
||||
rep := Report{
|
||||
Filesystem: fsr.fs,
|
||||
Status: fsr.state.String(),
|
||||
}
|
||||
|
||||
if fsr.err != nil && fsr.err.LocalToFS() {
|
||||
rep.Problem = fsr.err.Error()
|
||||
}
|
||||
|
||||
rep.Completed = make([]*StepReport, len(fsr.completed))
|
||||
for i := range fsr.completed {
|
||||
rep.Completed[i] = fsr.completed[i].Report()
|
||||
}
|
||||
rep.Pending = make([]*StepReport, len(fsr.pending))
|
||||
for i := range fsr.pending {
|
||||
rep.Pending[i] = fsr.pending[i].Report()
|
||||
}
|
||||
|
||||
return &rep
|
||||
}
|
||||
|
||||
func (s *ReplicationStep) Retry(ctx context.Context, ka *watchdog.KeepAlive, sender Sender, receiver Receiver) error {
|
||||
switch s.state {
|
||||
case StepReplicationReady:
|
||||
return s.doReplication(ctx, ka, sender, receiver)
|
||||
case StepMarkReplicatedReady:
|
||||
return s.doMarkReplicated(ctx, ka, sender)
|
||||
case StepCompleted:
|
||||
return nil
|
||||
}
|
||||
panic(fmt.Sprintf("implementation error: %v", s.state))
|
||||
}
|
||||
|
||||
func (s *ReplicationStep) Error() error {
|
||||
if s.state & (StepReplicationReady|StepMarkReplicatedReady) != 0 {
|
||||
return s.err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *ReplicationStep) doReplication(ctx context.Context, ka *watchdog.KeepAlive, sender Sender, receiver Receiver) error {
|
||||
|
||||
if s.state != StepReplicationReady {
|
||||
panic(fmt.Sprintf("implementation error: %v", s.state))
|
||||
}
|
||||
|
||||
fs := s.parent.fs
|
||||
|
||||
log := getLogger(ctx)
|
||||
sr := s.buildSendRequest(false)
|
||||
|
||||
log.Debug("initiate send request")
|
||||
sres, sstreamCopier, err := sender.Send(ctx, sr)
|
||||
if err != nil {
|
||||
log.WithError(err).Error("send request failed")
|
||||
return err
|
||||
}
|
||||
if sstreamCopier == nil {
|
||||
err := errors.New("send request did not return a stream, broken endpoint implementation")
|
||||
return err
|
||||
}
|
||||
defer sstreamCopier.Close()
|
||||
|
||||
// Install a byte counter to track progress + for status report
|
||||
s.byteCounter = bytecounter.NewStreamCopier(sstreamCopier)
|
||||
byteCounterStopProgress := make(chan struct{})
|
||||
defer close(byteCounterStopProgress)
|
||||
go func() {
|
||||
var lastCount int64
|
||||
t := time.NewTicker(1 * time.Second)
|
||||
defer t.Stop()
|
||||
for {
|
||||
select {
|
||||
case <-byteCounterStopProgress:
|
||||
return
|
||||
case <-t.C:
|
||||
newCount := s.byteCounter.Count()
|
||||
if lastCount != newCount {
|
||||
ka.MadeProgress()
|
||||
} else {
|
||||
lastCount = newCount
|
||||
}
|
||||
}
|
||||
}
|
||||
}()
|
||||
defer func() {
|
||||
s.parent.promBytesReplicated.Add(float64(s.byteCounter.Count()))
|
||||
}()
|
||||
|
||||
rr := &pdu.ReceiveReq{
|
||||
Filesystem: fs,
|
||||
ClearResumeToken: !sres.UsedResumeToken,
|
||||
}
|
||||
log.Debug("initiate receive request")
|
||||
_, err = receiver.Receive(ctx, rr, s.byteCounter)
|
||||
if err != nil {
|
||||
log.
|
||||
WithError(err).
|
||||
WithField("errType", fmt.Sprintf("%T", err)).
|
||||
Error("receive request failed (might also be error on sender)")
|
||||
// This failure could be due to
|
||||
// - an unexpected exit of ZFS on the sending side
|
||||
// - an unexpected exit of ZFS on the receiving side
|
||||
// - a connectivity issue
|
||||
return err
|
||||
}
|
||||
log.Debug("receive finished")
|
||||
ka.MadeProgress()
|
||||
|
||||
s.state = StepMarkReplicatedReady
|
||||
return s.doMarkReplicated(ctx, ka, sender)
|
||||
|
||||
}
|
||||
|
||||
func (s *ReplicationStep) doMarkReplicated(ctx context.Context, ka *watchdog.KeepAlive, sender Sender) error {
|
||||
|
||||
if s.state != StepMarkReplicatedReady {
|
||||
panic(fmt.Sprintf("implementation error: %v", s.state))
|
||||
}
|
||||
|
||||
log := getLogger(ctx)
|
||||
|
||||
log.Debug("advance replication cursor")
|
||||
req := &pdu.ReplicationCursorReq{
|
||||
Filesystem: s.parent.fs,
|
||||
Op: &pdu.ReplicationCursorReq_Set{
|
||||
Set: &pdu.ReplicationCursorReq_SetOp{
|
||||
Snapshot: s.to.GetName(),
|
||||
},
|
||||
},
|
||||
}
|
||||
_, err := sender.ReplicationCursor(ctx, req)
|
||||
if err != nil {
|
||||
log.WithError(err).Error("error advancing replication cursor")
|
||||
return err
|
||||
}
|
||||
ka.MadeProgress()
|
||||
|
||||
s.state = StepCompleted
|
||||
return err
|
||||
}
|
||||
|
||||
func (s *ReplicationStep) updateSizeEstimate(ctx context.Context, sender Sender) error {
|
||||
|
||||
log := getLogger(ctx)
|
||||
|
||||
sr := s.buildSendRequest(true)
|
||||
|
||||
log.Debug("initiate dry run send request")
|
||||
sres, _, err := sender.Send(ctx, sr)
|
||||
if err != nil {
|
||||
log.WithError(err).Error("dry run send request failed")
|
||||
return err
|
||||
}
|
||||
s.expectedSize = sres.ExpectedSize
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *ReplicationStep) buildSendRequest(dryRun bool) (sr *pdu.SendReq) {
|
||||
fs := s.parent.fs
|
||||
if s.from == nil {
|
||||
sr = &pdu.SendReq{
|
||||
Filesystem: fs,
|
||||
To: s.to.RelName(),
|
||||
DryRun: dryRun,
|
||||
}
|
||||
} else {
|
||||
sr = &pdu.SendReq{
|
||||
Filesystem: fs,
|
||||
From: s.from.RelName(),
|
||||
To: s.to.RelName(),
|
||||
DryRun: dryRun,
|
||||
}
|
||||
}
|
||||
return sr
|
||||
}
|
||||
|
||||
func (s *ReplicationStep) String() string {
|
||||
if s.from == nil { // FIXME: ZFS semantics are that to is nil on non-incremental send
|
||||
return fmt.Sprintf("%s%s (full)", s.parent.fs, s.to.RelName())
|
||||
} else {
|
||||
return fmt.Sprintf("%s(%s => %s)", s.parent.fs, s.from.RelName(), s.to.RelName())
|
||||
}
|
||||
}
|
||||
|
||||
func (s *ReplicationStep) Report() *StepReport {
|
||||
var from string // FIXME follow same convention as ZFS: to should be nil on full send
|
||||
if s.from != nil {
|
||||
from = s.from.RelName()
|
||||
}
|
||||
bytes := int64(0)
|
||||
if s.byteCounter != nil {
|
||||
bytes = s.byteCounter.Count()
|
||||
}
|
||||
problem := ""
|
||||
if s.err != nil {
|
||||
problem = s.err.Error()
|
||||
}
|
||||
rep := StepReport{
|
||||
From: from,
|
||||
To: s.to.RelName(),
|
||||
Status: s.state,
|
||||
Problem: problem,
|
||||
Bytes: bytes,
|
||||
ExpectedBytes: s.expectedSize,
|
||||
}
|
||||
return &rep
|
||||
}
|
@ -1,50 +0,0 @@
|
||||
// Code generated by "enumer -type=State"; DO NOT EDIT.
|
||||
|
||||
package fsrep
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
const _StateName = "ReadyCompleted"
|
||||
|
||||
var _StateIndex = [...]uint8{0, 5, 14}
|
||||
|
||||
func (i State) String() string {
|
||||
i -= 1
|
||||
if i >= State(len(_StateIndex)-1) {
|
||||
return fmt.Sprintf("State(%d)", i+1)
|
||||
}
|
||||
return _StateName[_StateIndex[i]:_StateIndex[i+1]]
|
||||
}
|
||||
|
||||
var _StateValues = []State{1, 2}
|
||||
|
||||
var _StateNameToValueMap = map[string]State{
|
||||
_StateName[0:5]: 1,
|
||||
_StateName[5:14]: 2,
|
||||
}
|
||||
|
||||
// StateString retrieves an enum value from the enum constants string name.
|
||||
// Throws an error if the param is not part of the enum.
|
||||
func StateString(s string) (State, error) {
|
||||
if val, ok := _StateNameToValueMap[s]; ok {
|
||||
return val, nil
|
||||
}
|
||||
return 0, fmt.Errorf("%s does not belong to State values", s)
|
||||
}
|
||||
|
||||
// StateValues returns all values of the enum
|
||||
func StateValues() []State {
|
||||
return _StateValues
|
||||
}
|
||||
|
||||
// IsAState returns "true" if the value is listed in the enum definition. "false" otherwise
|
||||
func (i State) IsAState() bool {
|
||||
for _, v := range _StateValues {
|
||||
if i == v {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
@ -1,61 +0,0 @@
|
||||
// Code generated by "enumer -type=StepState"; DO NOT EDIT.
|
||||
|
||||
package fsrep
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
const (
|
||||
_StepStateName_0 = "StepReplicationReadyStepMarkReplicatedReady"
|
||||
_StepStateName_1 = "StepCompleted"
|
||||
)
|
||||
|
||||
var (
|
||||
_StepStateIndex_0 = [...]uint8{0, 20, 43}
|
||||
_StepStateIndex_1 = [...]uint8{0, 13}
|
||||
)
|
||||
|
||||
func (i StepState) String() string {
|
||||
switch {
|
||||
case 1 <= i && i <= 2:
|
||||
i -= 1
|
||||
return _StepStateName_0[_StepStateIndex_0[i]:_StepStateIndex_0[i+1]]
|
||||
case i == 4:
|
||||
return _StepStateName_1
|
||||
default:
|
||||
return fmt.Sprintf("StepState(%d)", i)
|
||||
}
|
||||
}
|
||||
|
||||
var _StepStateValues = []StepState{1, 2, 4}
|
||||
|
||||
var _StepStateNameToValueMap = map[string]StepState{
|
||||
_StepStateName_0[0:20]: 1,
|
||||
_StepStateName_0[20:43]: 2,
|
||||
_StepStateName_1[0:13]: 4,
|
||||
}
|
||||
|
||||
// StepStateString retrieves an enum value from the enum constants string name.
|
||||
// Throws an error if the param is not part of the enum.
|
||||
func StepStateString(s string) (StepState, error) {
|
||||
if val, ok := _StepStateNameToValueMap[s]; ok {
|
||||
return val, nil
|
||||
}
|
||||
return 0, fmt.Errorf("%s does not belong to StepState values", s)
|
||||
}
|
||||
|
||||
// StepStateValues returns all values of the enum
|
||||
func StepStateValues() []StepState {
|
||||
return _StepStateValues
|
||||
}
|
||||
|
||||
// IsAStepState returns "true" if the value is listed in the enum definition. "false" otherwise
|
||||
func (i StepState) IsAStepState() bool {
|
||||
for _, v := range _StepStateValues {
|
||||
if i == v {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
@ -3,7 +3,7 @@ package mainfsm
|
||||
import (
|
||||
"sort"
|
||||
|
||||
. "github.com/zrepl/zrepl/replication/pdu"
|
||||
. "github.com/zrepl/zrepl/replication/logic/pdu"
|
||||
)
|
||||
|
||||
type ConflictNoCommonAncestor struct {
|
430
replication/logic/replication_logic.go
Normal file
430
replication/logic/replication_logic.go
Normal file
@ -0,0 +1,430 @@
|
||||
package logic
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
|
||||
"github.com/zrepl/zrepl/replication/driver"
|
||||
. "github.com/zrepl/zrepl/replication/logic/diff"
|
||||
"github.com/zrepl/zrepl/replication/logic/pdu"
|
||||
"github.com/zrepl/zrepl/replication/report"
|
||||
"github.com/zrepl/zrepl/util/bytecounter"
|
||||
"github.com/zrepl/zrepl/zfs"
|
||||
)
|
||||
|
||||
// Endpoint represents one side of the replication.
|
||||
//
|
||||
// An endpoint is either in Sender or Receiver mode, represented by the correspondingly
|
||||
// named interfaces defined in this package.
|
||||
type Endpoint interface {
|
||||
// Does not include placeholder filesystems
|
||||
ListFilesystems(ctx context.Context, req *pdu.ListFilesystemReq) (*pdu.ListFilesystemRes, error)
|
||||
ListFilesystemVersions(ctx context.Context, req *pdu.ListFilesystemVersionsReq) (*pdu.ListFilesystemVersionsRes, error)
|
||||
DestroySnapshots(ctx context.Context, req *pdu.DestroySnapshotsReq) (*pdu.DestroySnapshotsRes, error)
|
||||
}
|
||||
|
||||
type Sender interface {
|
||||
Endpoint
|
||||
// If a non-nil io.ReadCloser is returned, it is guaranteed to be closed before
|
||||
// any next call to the parent github.com/zrepl/zrepl/replication.Endpoint.
|
||||
// If the send request is for dry run the io.ReadCloser will be nil
|
||||
Send(ctx context.Context, r *pdu.SendReq) (*pdu.SendRes, zfs.StreamCopier, error)
|
||||
ReplicationCursor(ctx context.Context, req *pdu.ReplicationCursorReq) (*pdu.ReplicationCursorRes, error)
|
||||
}
|
||||
|
||||
type Receiver interface {
|
||||
Endpoint
|
||||
// Receive sends r and sendStream (the latter containing a ZFS send stream)
|
||||
// to the parent github.com/zrepl/zrepl/replication.Endpoint.
|
||||
Receive(ctx context.Context, req *pdu.ReceiveReq, receive zfs.StreamCopier) (*pdu.ReceiveRes, error)
|
||||
}
|
||||
|
||||
type Planner struct {
|
||||
sender Sender
|
||||
receiver Receiver
|
||||
|
||||
promSecsPerState *prometheus.HistogramVec // labels: state
|
||||
promBytesReplicated *prometheus.CounterVec // labels: filesystem
|
||||
}
|
||||
|
||||
func (p *Planner) Plan(ctx context.Context) ([]driver.FS, error) {
|
||||
fss, err := p.doPlanning(ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
dfss := make([]driver.FS, len(fss))
|
||||
for i := range dfss {
|
||||
dfss[i] = fss[i]
|
||||
}
|
||||
return dfss, nil
|
||||
}
|
||||
|
||||
type Filesystem struct {
|
||||
sender Sender
|
||||
receiver Receiver
|
||||
|
||||
Path string // compat
|
||||
receiverFSExists bool // compat
|
||||
promBytesReplicated prometheus.Counter // compat
|
||||
}
|
||||
|
||||
func (f *Filesystem) PlanFS(ctx context.Context) ([]driver.Step, error) {
|
||||
steps, err := f.doPlanning(ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
dsteps := make([]driver.Step, len(steps))
|
||||
for i := range dsteps {
|
||||
dsteps[i] = steps[i]
|
||||
}
|
||||
return dsteps, nil
|
||||
}
|
||||
func (f *Filesystem) ReportInfo() *report.FilesystemInfo {
|
||||
return &report.FilesystemInfo{Name: f.Path} // FIXME compat name
|
||||
}
|
||||
|
||||
type Step struct {
|
||||
sender Sender
|
||||
receiver Receiver
|
||||
|
||||
parent *Filesystem
|
||||
from, to *pdu.FilesystemVersion // compat
|
||||
|
||||
byteCounter bytecounter.StreamCopier
|
||||
expectedSize int64 // 0 means no size estimate present / possible
|
||||
}
|
||||
|
||||
func (s *Step) TargetDate() time.Time {
|
||||
return s.to.SnapshotTime() // FIXME compat name
|
||||
}
|
||||
|
||||
func (s *Step) Step(ctx context.Context) error {
|
||||
return s.doReplication(ctx)
|
||||
}
|
||||
|
||||
func (s *Step) ReportInfo() *report.StepInfo {
|
||||
var byteCounter int64
|
||||
if s.byteCounter != nil {
|
||||
byteCounter = s.byteCounter.Count()
|
||||
}
|
||||
return &report.StepInfo{
|
||||
From: s.from.RelName(),
|
||||
To: s.to.RelName(),
|
||||
BytesExpected: s.expectedSize,
|
||||
BytesReplicated: byteCounter,
|
||||
}
|
||||
}
|
||||
|
||||
func NewPlanner(secsPerState *prometheus.HistogramVec, bytesReplicated *prometheus.CounterVec, sender Sender, receiver Receiver) *Planner {
|
||||
return &Planner{
|
||||
sender: sender,
|
||||
receiver: receiver,
|
||||
promSecsPerState: secsPerState,
|
||||
promBytesReplicated: bytesReplicated,
|
||||
}
|
||||
}
|
||||
func resolveConflict(conflict error) (path []*pdu.FilesystemVersion, msg string) {
|
||||
if noCommonAncestor, ok := conflict.(*ConflictNoCommonAncestor); ok {
|
||||
if len(noCommonAncestor.SortedReceiverVersions) == 0 {
|
||||
// TODO this is hard-coded replication policy: most recent snapshot as source
|
||||
var mostRecentSnap *pdu.FilesystemVersion
|
||||
for n := len(noCommonAncestor.SortedSenderVersions) - 1; n >= 0; n-- {
|
||||
if noCommonAncestor.SortedSenderVersions[n].Type == pdu.FilesystemVersion_Snapshot {
|
||||
mostRecentSnap = noCommonAncestor.SortedSenderVersions[n]
|
||||
break
|
||||
}
|
||||
}
|
||||
if mostRecentSnap == nil {
|
||||
return nil, "no snapshots available on sender side"
|
||||
}
|
||||
return []*pdu.FilesystemVersion{mostRecentSnap}, fmt.Sprintf("start replication at most recent snapshot %s", mostRecentSnap.RelName())
|
||||
}
|
||||
}
|
||||
return nil, "no automated way to handle conflict type"
|
||||
}
|
||||
|
||||
func (p *Planner) doPlanning(ctx context.Context) ([]*Filesystem, error) {
|
||||
|
||||
log := getLogger(ctx)
|
||||
|
||||
log.Info("start planning")
|
||||
|
||||
slfssres, err := p.sender.ListFilesystems(ctx, &pdu.ListFilesystemReq{})
|
||||
if err != nil {
|
||||
log.WithError(err).WithField("errType", fmt.Sprintf("%T", err)).Error("error listing sender filesystems")
|
||||
return nil, err
|
||||
}
|
||||
sfss := slfssres.GetFilesystems()
|
||||
// no progress here since we could run in a live-lock on connectivity issues
|
||||
|
||||
rlfssres, err := p.receiver.ListFilesystems(ctx, &pdu.ListFilesystemReq{})
|
||||
if err != nil {
|
||||
log.WithError(err).WithField("errType", fmt.Sprintf("%T", err)).Error("error listing receiver filesystems")
|
||||
return nil, err
|
||||
}
|
||||
rfss := rlfssres.GetFilesystems()
|
||||
|
||||
q := make([]*Filesystem, 0, len(sfss))
|
||||
for _, fs := range sfss {
|
||||
|
||||
receiverFSExists := false
|
||||
for _, rfs := range rfss {
|
||||
if rfs.Path == fs.Path {
|
||||
receiverFSExists = true
|
||||
}
|
||||
}
|
||||
|
||||
ctr := p.promBytesReplicated.WithLabelValues(fs.Path)
|
||||
|
||||
q = append(q, &Filesystem{
|
||||
sender: p.sender,
|
||||
receiver: p.receiver,
|
||||
Path: fs.Path,
|
||||
receiverFSExists: receiverFSExists,
|
||||
promBytesReplicated: ctr,
|
||||
})
|
||||
}
|
||||
|
||||
return q, nil
|
||||
}
|
||||
|
||||
func (fs *Filesystem) doPlanning(ctx context.Context) ([]*Step, error) {
|
||||
|
||||
log := getLogger(ctx).WithField("filesystem", fs.Path)
|
||||
|
||||
log.Debug("assessing filesystem")
|
||||
|
||||
sfsvsres, err := fs.sender.ListFilesystemVersions(ctx, &pdu.ListFilesystemVersionsReq{Filesystem: fs.Path})
|
||||
if err != nil {
|
||||
log.WithError(err).Error("cannot get remote filesystem versions")
|
||||
return nil, err
|
||||
}
|
||||
sfsvs := sfsvsres.GetVersions()
|
||||
|
||||
if len(sfsvs) < 1 {
|
||||
err := errors.New("sender does not have any versions")
|
||||
log.Error(err.Error())
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var rfsvs []*pdu.FilesystemVersion
|
||||
if fs.receiverFSExists {
|
||||
rfsvsres, err := fs.receiver.ListFilesystemVersions(ctx, &pdu.ListFilesystemVersionsReq{Filesystem: fs.Path})
|
||||
if err != nil {
|
||||
log.WithError(err).Error("receiver error")
|
||||
return nil, err
|
||||
}
|
||||
rfsvs = rfsvsres.GetVersions()
|
||||
} else {
|
||||
rfsvs = []*pdu.FilesystemVersion{}
|
||||
}
|
||||
|
||||
path, conflict := IncrementalPath(rfsvs, sfsvs)
|
||||
if conflict != nil {
|
||||
var msg string
|
||||
path, msg = resolveConflict(conflict) // no shadowing allowed!
|
||||
if path != nil {
|
||||
log.WithField("conflict", conflict).Info("conflict")
|
||||
log.WithField("resolution", msg).Info("automatically resolved")
|
||||
} else {
|
||||
log.WithField("conflict", conflict).Error("conflict")
|
||||
log.WithField("problem", msg).Error("cannot resolve conflict")
|
||||
}
|
||||
}
|
||||
if path == nil {
|
||||
return nil, conflict
|
||||
}
|
||||
|
||||
steps := make([]*Step, 0, len(path))
|
||||
// FIXME unify struct declarations => initializer?
|
||||
if len(path) == 1 {
|
||||
steps = append(steps, &Step{
|
||||
parent: fs,
|
||||
sender: fs.sender,
|
||||
receiver: fs.receiver,
|
||||
from: nil,
|
||||
to: path[0],
|
||||
})
|
||||
} else {
|
||||
for i := 0; i < len(path)-1; i++ {
|
||||
steps = append(steps, &Step{
|
||||
parent: fs,
|
||||
sender: fs.sender,
|
||||
receiver: fs.receiver,
|
||||
from: path[i],
|
||||
to: path[i+1],
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
log.Debug("compute send size estimate")
|
||||
errs := make(chan error, len(steps))
|
||||
var wg sync.WaitGroup
|
||||
fanOutCtx, fanOutCancel := context.WithCancel(ctx)
|
||||
defer fanOutCancel()
|
||||
for _, step := range steps {
|
||||
wg.Add(1)
|
||||
go func(step *Step) {
|
||||
defer wg.Done()
|
||||
err := step.updateSizeEstimate(fanOutCtx)
|
||||
if err != nil {
|
||||
log.WithError(err).WithField("step", step).Error("error computing size estimate")
|
||||
fanOutCancel()
|
||||
}
|
||||
errs <- err
|
||||
}(step)
|
||||
}
|
||||
wg.Wait()
|
||||
close(errs)
|
||||
var significantErr error = nil
|
||||
for err := range errs {
|
||||
if err != nil {
|
||||
if significantErr == nil || significantErr == context.Canceled {
|
||||
significantErr = err
|
||||
}
|
||||
}
|
||||
}
|
||||
if significantErr != nil {
|
||||
return nil, significantErr
|
||||
}
|
||||
|
||||
log.Debug("filesystem planning finished")
|
||||
return steps, nil
|
||||
}
|
||||
|
||||
// type FilesystemsReplicationFailedError struct {
|
||||
// FilesystemsWithError []*fsrep.Replication
|
||||
// }
|
||||
|
||||
// func (e FilesystemsReplicationFailedError) Error() string {
|
||||
// allSame := true
|
||||
// lastErr := e.FilesystemsWithError[0].Err().Error()
|
||||
// for _, fs := range e.FilesystemsWithError {
|
||||
// fsErr := fs.Err().Error()
|
||||
// allSame = allSame && lastErr == fsErr
|
||||
// }
|
||||
|
||||
// fsstr := "multiple filesystems"
|
||||
// if len(e.FilesystemsWithError) == 1 {
|
||||
// fsstr = fmt.Sprintf("filesystem %s", e.FilesystemsWithError[0].FS())
|
||||
// }
|
||||
// errorStr := lastErr
|
||||
// if !allSame {
|
||||
// errorStr = "multiple different errors"
|
||||
// }
|
||||
// return fmt.Sprintf("%s could not be replicated: %s", fsstr, errorStr)
|
||||
// }
|
||||
|
||||
func (s *Step) updateSizeEstimate(ctx context.Context) error {
|
||||
|
||||
log := getLogger(ctx)
|
||||
|
||||
sr := s.buildSendRequest(true)
|
||||
|
||||
log.Debug("initiate dry run send request")
|
||||
sres, _, err := s.sender.Send(ctx, sr)
|
||||
if err != nil {
|
||||
log.WithError(err).Error("dry run send request failed")
|
||||
return err
|
||||
}
|
||||
s.expectedSize = sres.ExpectedSize
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Step) buildSendRequest(dryRun bool) (sr *pdu.SendReq) {
|
||||
fs := s.parent.Path
|
||||
if s.from == nil {
|
||||
sr = &pdu.SendReq{
|
||||
Filesystem: fs,
|
||||
To: s.to.RelName(),
|
||||
DryRun: dryRun,
|
||||
}
|
||||
} else {
|
||||
sr = &pdu.SendReq{
|
||||
Filesystem: fs,
|
||||
From: s.from.RelName(),
|
||||
To: s.to.RelName(),
|
||||
DryRun: dryRun,
|
||||
}
|
||||
}
|
||||
return sr
|
||||
}
|
||||
|
||||
func (s *Step) doReplication(ctx context.Context) error {
|
||||
|
||||
fs := s.parent.Path
|
||||
|
||||
log := getLogger(ctx)
|
||||
sr := s.buildSendRequest(false)
|
||||
|
||||
log.Debug("initiate send request")
|
||||
sres, sstreamCopier, err := s.sender.Send(ctx, sr)
|
||||
if err != nil {
|
||||
log.WithError(err).Error("send request failed")
|
||||
return err
|
||||
}
|
||||
if sstreamCopier == nil {
|
||||
err := errors.New("send request did not return a stream, broken endpoint implementation")
|
||||
return err
|
||||
}
|
||||
defer sstreamCopier.Close()
|
||||
|
||||
// Install a byte counter to track progress + for status report
|
||||
s.byteCounter = bytecounter.NewStreamCopier(sstreamCopier)
|
||||
defer func() {
|
||||
s.parent.promBytesReplicated.Add(float64(s.byteCounter.Count()))
|
||||
}()
|
||||
|
||||
rr := &pdu.ReceiveReq{
|
||||
Filesystem: fs,
|
||||
ClearResumeToken: !sres.UsedResumeToken,
|
||||
}
|
||||
log.Debug("initiate receive request")
|
||||
_, err = s.receiver.Receive(ctx, rr, s.byteCounter)
|
||||
if err != nil {
|
||||
log.
|
||||
WithError(err).
|
||||
WithField("errType", fmt.Sprintf("%T", err)).
|
||||
Error("receive request failed (might also be error on sender)")
|
||||
// This failure could be due to
|
||||
// - an unexpected exit of ZFS on the sending side
|
||||
// - an unexpected exit of ZFS on the receiving side
|
||||
// - a connectivity issue
|
||||
return err
|
||||
}
|
||||
log.Debug("receive finished")
|
||||
|
||||
log.Debug("advance replication cursor")
|
||||
req := &pdu.ReplicationCursorReq{
|
||||
Filesystem: fs,
|
||||
Op: &pdu.ReplicationCursorReq_Set{
|
||||
Set: &pdu.ReplicationCursorReq_SetOp{
|
||||
Snapshot: s.to.GetName(),
|
||||
},
|
||||
},
|
||||
}
|
||||
_, err = s.sender.ReplicationCursor(ctx, req)
|
||||
if err != nil {
|
||||
log.WithError(err).Error("error advancing replication cursor")
|
||||
// If this fails and replication planning restarts, the diff algorithm will find
|
||||
// that cursor out of place. This is not a problem because then, it would just use another FS
|
||||
// However, we FIXME have no means to just update the cursor in a
|
||||
// second replication attempt right after this one where we don't have new snaps yet
|
||||
return err
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func (s *Step) String() string {
|
||||
if s.from == nil { // FIXME: ZFS semantics are that to is nil on non-incremental send
|
||||
return fmt.Sprintf("%s%s (full)", s.parent.Path, s.to.RelName())
|
||||
} else {
|
||||
return fmt.Sprintf("%s(%s => %s)", s.parent.Path, s.from.RelName(), s.to.RelName())
|
||||
}
|
||||
}
|
@ -1,9 +1,9 @@
|
||||
package replication
|
||||
package logic
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/zrepl/zrepl/logger"
|
||||
"github.com/zrepl/zrepl/replication/fsrep"
|
||||
)
|
||||
|
||||
type contextKey int
|
||||
@ -16,7 +16,6 @@ type Logger = logger.Logger
|
||||
|
||||
func WithLogger(ctx context.Context, l Logger) context.Context {
|
||||
ctx = context.WithValue(ctx, contextKeyLog, l)
|
||||
ctx = fsrep.WithLogger(ctx, l)
|
||||
return ctx
|
||||
}
|
||||
|
@ -1,560 +0,0 @@
|
||||
// Package replication implements replication of filesystems with existing
|
||||
// versions (snapshots) from a sender to a receiver.
|
||||
package replication
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/zrepl/zrepl/daemon/job/wakeup"
|
||||
"github.com/zrepl/zrepl/util/envconst"
|
||||
"github.com/zrepl/zrepl/util/watchdog"
|
||||
"math/bits"
|
||||
"net"
|
||||
"sort"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/zrepl/zrepl/replication/fsrep"
|
||||
. "github.com/zrepl/zrepl/replication/internal/diff"
|
||||
"github.com/zrepl/zrepl/replication/pdu"
|
||||
)
|
||||
|
||||
//go:generate enumer -type=State
|
||||
type State uint
|
||||
|
||||
const (
|
||||
Planning State = 1 << iota
|
||||
PlanningError
|
||||
Working
|
||||
WorkingWait
|
||||
Completed
|
||||
PermanentError
|
||||
)
|
||||
|
||||
func (s State) rsf() state {
|
||||
idx := bits.TrailingZeros(uint(s))
|
||||
if idx == bits.UintSize {
|
||||
panic(s) // invalid value
|
||||
}
|
||||
m := []state{
|
||||
statePlanning,
|
||||
statePlanningError,
|
||||
stateWorking,
|
||||
stateWorkingWait,
|
||||
nil,
|
||||
nil,
|
||||
}
|
||||
return m[idx]
|
||||
}
|
||||
|
||||
func (s State) IsTerminal() bool {
|
||||
return s.rsf() == nil
|
||||
}
|
||||
|
||||
// Replication implements the replication of multiple file systems from a Sender to a Receiver.
|
||||
//
|
||||
// It is a state machine that is driven by the Drive method
|
||||
// and provides asynchronous reporting via the Report method (i.e. from another goroutine).
|
||||
type Replication struct {
|
||||
// not protected by lock
|
||||
promSecsPerState *prometheus.HistogramVec // labels: state
|
||||
promBytesReplicated *prometheus.CounterVec // labels: filesystem
|
||||
|
||||
Progress watchdog.KeepAlive
|
||||
|
||||
// lock protects all fields of this struct (but not the fields behind pointers!)
|
||||
lock sync.Mutex
|
||||
|
||||
state State
|
||||
|
||||
// Working, WorkingWait, Completed, ContextDone
|
||||
queue []*fsrep.Replication
|
||||
completed []*fsrep.Replication
|
||||
active *fsrep.Replication // == queue[0] or nil, unlike in Report
|
||||
|
||||
// for PlanningError, WorkingWait and ContextError and Completed
|
||||
err error
|
||||
|
||||
// PlanningError, WorkingWait
|
||||
sleepUntil time.Time
|
||||
}
|
||||
|
||||
type Report struct {
|
||||
Status string
|
||||
Problem string
|
||||
SleepUntil time.Time
|
||||
Completed []*fsrep.Report
|
||||
Pending []*fsrep.Report
|
||||
Active *fsrep.Report // not contained in Pending, unlike in struct Replication
|
||||
}
|
||||
|
||||
func NewReplication(secsPerState *prometheus.HistogramVec, bytesReplicated *prometheus.CounterVec) *Replication {
|
||||
r := Replication{
|
||||
promSecsPerState: secsPerState,
|
||||
promBytesReplicated: bytesReplicated,
|
||||
state: Planning,
|
||||
}
|
||||
return &r
|
||||
}
|
||||
|
||||
// Endpoint represents one side of the replication.
|
||||
//
|
||||
// An endpoint is either in Sender or Receiver mode, represented by the correspondingly
|
||||
// named interfaces defined in this package.
|
||||
type Endpoint interface {
|
||||
// Does not include placeholder filesystems
|
||||
ListFilesystems(ctx context.Context, req *pdu.ListFilesystemReq) (*pdu.ListFilesystemRes, error)
|
||||
ListFilesystemVersions(ctx context.Context, req *pdu.ListFilesystemVersionsReq) (*pdu.ListFilesystemVersionsRes, error)
|
||||
DestroySnapshots(ctx context.Context, req *pdu.DestroySnapshotsReq) (*pdu.DestroySnapshotsRes, error)
|
||||
}
|
||||
|
||||
type Sender interface {
|
||||
Endpoint
|
||||
fsrep.Sender
|
||||
}
|
||||
|
||||
type Receiver interface {
|
||||
Endpoint
|
||||
fsrep.Receiver
|
||||
}
|
||||
|
||||
type FilteredError struct{ fs string }
|
||||
|
||||
func NewFilteredError(fs string) *FilteredError {
|
||||
return &FilteredError{fs}
|
||||
}
|
||||
|
||||
func (f FilteredError) Error() string { return "endpoint does not allow access to filesystem " + f.fs }
|
||||
|
||||
type updater func(func(*Replication)) (newState State)
|
||||
type state func(ctx context.Context, ka *watchdog.KeepAlive, sender Sender, receiver Receiver, u updater) state
|
||||
|
||||
// Drive starts the state machine and returns only after replication has finished (with or without errors).
|
||||
// The Logger in ctx is used for both debug and error logging, but is not guaranteed to be stable
|
||||
// or end-user friendly.
|
||||
// User-facing replication progress reports and can be obtained using the Report method,
|
||||
// whose output will not change after Drive returns.
|
||||
//
|
||||
// FIXME: Drive may be only called once per instance of Replication
|
||||
func (r *Replication) Drive(ctx context.Context, sender Sender, receiver Receiver) {
|
||||
|
||||
var u updater = func(f func(*Replication)) State {
|
||||
r.lock.Lock()
|
||||
defer r.lock.Unlock()
|
||||
if f != nil {
|
||||
f(r)
|
||||
}
|
||||
return r.state
|
||||
}
|
||||
|
||||
var s state = statePlanning
|
||||
var pre, post State
|
||||
for s != nil {
|
||||
preTime := time.Now()
|
||||
pre = u(nil)
|
||||
s = s(ctx, &r.Progress, sender, receiver, u)
|
||||
delta := time.Now().Sub(preTime)
|
||||
r.promSecsPerState.WithLabelValues(pre.String()).Observe(delta.Seconds())
|
||||
post = u(nil)
|
||||
getLogger(ctx).
|
||||
WithField("transition", fmt.Sprintf("%s => %s", pre, post)).
|
||||
WithField("duration", delta).
|
||||
Debug("main state transition")
|
||||
if post == Working && pre != post {
|
||||
getLogger(ctx).Info("start working")
|
||||
}
|
||||
}
|
||||
|
||||
getLogger(ctx).
|
||||
WithField("final_state", post).
|
||||
Debug("main final state")
|
||||
}
|
||||
|
||||
func resolveConflict(conflict error) (path []*pdu.FilesystemVersion, msg string) {
|
||||
if noCommonAncestor, ok := conflict.(*ConflictNoCommonAncestor); ok {
|
||||
if len(noCommonAncestor.SortedReceiverVersions) == 0 {
|
||||
// TODO this is hard-coded replication policy: most recent snapshot as source
|
||||
var mostRecentSnap *pdu.FilesystemVersion
|
||||
for n := len(noCommonAncestor.SortedSenderVersions) - 1; n >= 0; n-- {
|
||||
if noCommonAncestor.SortedSenderVersions[n].Type == pdu.FilesystemVersion_Snapshot {
|
||||
mostRecentSnap = noCommonAncestor.SortedSenderVersions[n]
|
||||
break
|
||||
}
|
||||
}
|
||||
if mostRecentSnap == nil {
|
||||
return nil, "no snapshots available on sender side"
|
||||
}
|
||||
return []*pdu.FilesystemVersion{mostRecentSnap}, fmt.Sprintf("start replication at most recent snapshot %s", mostRecentSnap.RelName())
|
||||
}
|
||||
}
|
||||
return nil, "no automated way to handle conflict type"
|
||||
}
|
||||
|
||||
var RetryInterval = envconst.Duration("ZREPL_REPLICATION_RETRY_INTERVAL", 10 * time.Second)
|
||||
|
||||
type Error interface {
|
||||
error
|
||||
Temporary() bool
|
||||
}
|
||||
|
||||
var _ Error = fsrep.Error(nil)
|
||||
var _ Error = net.Error(nil)
|
||||
|
||||
func isPermanent(err error) bool {
|
||||
if e, ok := err.(Error); ok {
|
||||
return !e.Temporary()
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func statePlanning(ctx context.Context, ka *watchdog.KeepAlive, sender Sender, receiver Receiver, u updater) state {
|
||||
|
||||
log := getLogger(ctx)
|
||||
|
||||
log.Info("start planning")
|
||||
|
||||
handlePlanningError := func(err error) state {
|
||||
return u(func(r *Replication) {
|
||||
ge := GlobalError{Err: err, Temporary: !isPermanent(err)}
|
||||
log.WithError(ge).Error("encountered global error while planning replication")
|
||||
r.err = ge
|
||||
if !ge.Temporary {
|
||||
r.state = PermanentError
|
||||
} else {
|
||||
r.sleepUntil = time.Now().Add(RetryInterval)
|
||||
r.state = PlanningError
|
||||
}
|
||||
}).rsf()
|
||||
}
|
||||
|
||||
slfssres, err := sender.ListFilesystems(ctx, &pdu.ListFilesystemReq{})
|
||||
if err != nil {
|
||||
log.WithError(err).WithField("errType", fmt.Sprintf("%T", err)).Error("error listing sender filesystems")
|
||||
return handlePlanningError(err)
|
||||
}
|
||||
sfss := slfssres.GetFilesystems()
|
||||
// no progress here since we could run in a live-lock on connectivity issues
|
||||
|
||||
rlfssres, err := receiver.ListFilesystems(ctx, &pdu.ListFilesystemReq{})
|
||||
if err != nil {
|
||||
log.WithError(err).WithField("errType", fmt.Sprintf("%T", err)).Error("error listing receiver filesystems")
|
||||
return handlePlanningError(err)
|
||||
}
|
||||
rfss := rlfssres.GetFilesystems()
|
||||
ka.MadeProgress() // for both sender and receiver
|
||||
|
||||
q := make([]*fsrep.Replication, 0, len(sfss))
|
||||
mainlog := log
|
||||
for _, fs := range sfss {
|
||||
|
||||
log := mainlog.WithField("filesystem", fs.Path)
|
||||
|
||||
log.Debug("assessing filesystem")
|
||||
|
||||
sfsvsres, err := sender.ListFilesystemVersions(ctx, &pdu.ListFilesystemVersionsReq{Filesystem: fs.Path})
|
||||
if err != nil {
|
||||
log.WithError(err).Error("cannot get remote filesystem versions")
|
||||
return handlePlanningError(err)
|
||||
}
|
||||
sfsvs := sfsvsres.GetVersions()
|
||||
ka.MadeProgress()
|
||||
|
||||
if len(sfsvs) < 1 {
|
||||
err := errors.New("sender does not have any versions")
|
||||
log.Error(err.Error())
|
||||
q = append(q, fsrep.NewReplicationConflictError(fs.Path, err))
|
||||
continue
|
||||
}
|
||||
|
||||
receiverFSExists := false
|
||||
for _, rfs := range rfss {
|
||||
if rfs.Path == fs.Path {
|
||||
receiverFSExists = true
|
||||
}
|
||||
}
|
||||
|
||||
var rfsvs []*pdu.FilesystemVersion
|
||||
if receiverFSExists {
|
||||
rfsvsres, err := receiver.ListFilesystemVersions(ctx, &pdu.ListFilesystemVersionsReq{Filesystem: fs.Path})
|
||||
if err != nil {
|
||||
if _, ok := err.(*FilteredError); ok {
|
||||
log.Info("receiver ignores filesystem")
|
||||
continue
|
||||
}
|
||||
log.WithError(err).Error("receiver error")
|
||||
return handlePlanningError(err)
|
||||
}
|
||||
rfsvs = rfsvsres.GetVersions()
|
||||
} else {
|
||||
rfsvs = []*pdu.FilesystemVersion{}
|
||||
}
|
||||
ka.MadeProgress()
|
||||
|
||||
path, conflict := IncrementalPath(rfsvs, sfsvs)
|
||||
if conflict != nil {
|
||||
var msg string
|
||||
path, msg = resolveConflict(conflict) // no shadowing allowed!
|
||||
if path != nil {
|
||||
log.WithField("conflict", conflict).Info("conflict")
|
||||
log.WithField("resolution", msg).Info("automatically resolved")
|
||||
} else {
|
||||
log.WithField("conflict", conflict).Error("conflict")
|
||||
log.WithField("problem", msg).Error("cannot resolve conflict")
|
||||
}
|
||||
}
|
||||
ka.MadeProgress()
|
||||
if path == nil {
|
||||
q = append(q, fsrep.NewReplicationConflictError(fs.Path, conflict))
|
||||
continue
|
||||
}
|
||||
|
||||
var promBytesReplicated *prometheus.CounterVec
|
||||
u(func(replication *Replication) { // FIXME args struct like in pruner (also use for sender and receiver)
|
||||
promBytesReplicated = replication.promBytesReplicated
|
||||
})
|
||||
fsrfsm := fsrep.BuildReplication(fs.Path, promBytesReplicated.WithLabelValues(fs.Path))
|
||||
if len(path) == 1 {
|
||||
fsrfsm.AddStep(nil, path[0])
|
||||
} else {
|
||||
for i := 0; i < len(path)-1; i++ {
|
||||
fsrfsm.AddStep(path[i], path[i+1])
|
||||
}
|
||||
}
|
||||
qitem := fsrfsm.Done()
|
||||
ka.MadeProgress()
|
||||
|
||||
log.Debug("compute send size estimate")
|
||||
if err = qitem.UpdateSizeEsitmate(ctx, sender); err != nil {
|
||||
log.WithError(err).Error("error computing size estimate")
|
||||
return handlePlanningError(err)
|
||||
}
|
||||
ka.MadeProgress()
|
||||
|
||||
q = append(q, qitem)
|
||||
}
|
||||
|
||||
ka.MadeProgress()
|
||||
|
||||
return u(func(r *Replication) {
|
||||
r.completed = nil
|
||||
r.queue = q
|
||||
r.err = nil
|
||||
r.state = Working
|
||||
}).rsf()
|
||||
}
|
||||
|
||||
func statePlanningError(ctx context.Context, ka *watchdog.KeepAlive, sender Sender, receiver Receiver, u updater) state {
|
||||
var sleepUntil time.Time
|
||||
u(func(r *Replication) {
|
||||
sleepUntil = r.sleepUntil
|
||||
})
|
||||
t := time.NewTimer(sleepUntil.Sub(time.Now()))
|
||||
getLogger(ctx).WithField("until", sleepUntil).Info("retry wait after planning error")
|
||||
defer t.Stop()
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return u(func(r *Replication) {
|
||||
r.state = PermanentError
|
||||
r.err = ctx.Err()
|
||||
}).rsf()
|
||||
case <-t.C:
|
||||
case <-wakeup.Wait(ctx):
|
||||
}
|
||||
return u(func(r *Replication) {
|
||||
r.state = Planning
|
||||
}).rsf()
|
||||
}
|
||||
|
||||
type GlobalError struct {
|
||||
Err error
|
||||
Temporary bool
|
||||
}
|
||||
|
||||
func (e GlobalError) Error() string {
|
||||
errClass := "temporary"
|
||||
if !e.Temporary {
|
||||
errClass = "permanent"
|
||||
}
|
||||
return fmt.Sprintf("%s global error: %s", errClass, e.Err)
|
||||
}
|
||||
|
||||
type FilesystemsReplicationFailedError struct {
|
||||
FilesystemsWithError []*fsrep.Replication
|
||||
}
|
||||
|
||||
func (e FilesystemsReplicationFailedError) Error() string {
|
||||
allSame := true
|
||||
lastErr := e.FilesystemsWithError[0].Err().Error()
|
||||
for _, fs := range e.FilesystemsWithError {
|
||||
fsErr := fs.Err().Error()
|
||||
allSame = allSame && lastErr == fsErr
|
||||
}
|
||||
|
||||
fsstr := "multiple filesystems"
|
||||
if len(e.FilesystemsWithError) == 1 {
|
||||
fsstr = fmt.Sprintf("filesystem %s", e.FilesystemsWithError[0].FS())
|
||||
}
|
||||
errorStr := lastErr
|
||||
if !allSame {
|
||||
errorStr = "multiple different errors"
|
||||
}
|
||||
return fmt.Sprintf("%s could not be replicated: %s", fsstr, errorStr)
|
||||
}
|
||||
|
||||
func stateWorking(ctx context.Context, ka *watchdog.KeepAlive, sender Sender, receiver Receiver, u updater) state {
|
||||
|
||||
var active *fsrep.Replication
|
||||
rsfNext := u(func(r *Replication) {
|
||||
|
||||
r.err = nil
|
||||
|
||||
newq := make([]*fsrep.Replication, 0, len(r.queue))
|
||||
for i := range r.queue {
|
||||
if r.queue[i].CanRetry() {
|
||||
newq = append(newq, r.queue[i])
|
||||
} else {
|
||||
r.completed = append(r.completed, r.queue[i])
|
||||
}
|
||||
}
|
||||
sort.SliceStable(newq, func(i, j int) bool {
|
||||
return newq[i].NextStepDate().Before(newq[j].NextStepDate())
|
||||
})
|
||||
r.queue = newq
|
||||
|
||||
if len(r.queue) == 0 {
|
||||
r.state = Completed
|
||||
fsWithErr := FilesystemsReplicationFailedError{ // prepare it
|
||||
FilesystemsWithError: make([]*fsrep.Replication, 0, len(r.completed)),
|
||||
}
|
||||
for _, fs := range r.completed {
|
||||
if fs.CanRetry() {
|
||||
panic(fmt.Sprintf("implementation error: completed contains retryable FS %s %#v",
|
||||
fs.FS(), fs.Err()))
|
||||
}
|
||||
if fs.Err() != nil {
|
||||
fsWithErr.FilesystemsWithError = append(fsWithErr.FilesystemsWithError, fs)
|
||||
}
|
||||
}
|
||||
if len(fsWithErr.FilesystemsWithError) > 0 {
|
||||
r.err = fsWithErr
|
||||
r.state = PermanentError
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
active = r.queue[0] // do not dequeue: if it's done, it will be sorted the next time we check for more work
|
||||
r.active = active
|
||||
}).rsf()
|
||||
|
||||
if active == nil {
|
||||
return rsfNext
|
||||
}
|
||||
|
||||
activeCtx := fsrep.WithLogger(ctx, getLogger(ctx).WithField("fs", active.FS()))
|
||||
err := active.Retry(activeCtx, ka, sender, receiver)
|
||||
u(func(r *Replication) {
|
||||
r.active = nil
|
||||
}).rsf()
|
||||
|
||||
if err != nil {
|
||||
if err.ContextErr() && ctx.Err() != nil {
|
||||
getLogger(ctx).WithError(err).
|
||||
Info("filesystem replication was cancelled")
|
||||
u(func(r*Replication) {
|
||||
r.err = GlobalError{Err: err, Temporary: false}
|
||||
r.state = PermanentError
|
||||
})
|
||||
} else if err.LocalToFS() {
|
||||
getLogger(ctx).WithError(err).
|
||||
Error("filesystem replication encountered a filesystem-specific error")
|
||||
// we stay in this state and let the queuing logic above de-prioritize this failing FS
|
||||
} else if err.Temporary() {
|
||||
getLogger(ctx).WithError(err).
|
||||
Error("filesystem encountered a non-filesystem-specific temporary error, enter retry-wait")
|
||||
u(func(r *Replication) {
|
||||
r.err = GlobalError{Err: err, Temporary: true}
|
||||
r.sleepUntil = time.Now().Add(RetryInterval)
|
||||
r.state = WorkingWait
|
||||
}).rsf()
|
||||
} else {
|
||||
getLogger(ctx).WithError(err).
|
||||
Error("encountered a permanent non-filesystem-specific error")
|
||||
u(func(r *Replication) {
|
||||
r.err = GlobalError{Err: err, Temporary: false}
|
||||
r.state = PermanentError
|
||||
}).rsf()
|
||||
}
|
||||
}
|
||||
|
||||
return u(nil).rsf()
|
||||
}
|
||||
|
||||
func stateWorkingWait(ctx context.Context, ka *watchdog.KeepAlive, sender Sender, receiver Receiver, u updater) state {
|
||||
var sleepUntil time.Time
|
||||
u(func(r *Replication) {
|
||||
sleepUntil = r.sleepUntil
|
||||
})
|
||||
t := time.NewTimer(RetryInterval)
|
||||
getLogger(ctx).WithField("until", sleepUntil).Info("retry wait after error")
|
||||
defer t.Stop()
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return u(func(r *Replication) {
|
||||
r.state = PermanentError
|
||||
r.err = ctx.Err()
|
||||
}).rsf()
|
||||
|
||||
case <-t.C:
|
||||
case <-wakeup.Wait(ctx):
|
||||
}
|
||||
return u(func(r *Replication) {
|
||||
r.state = Working
|
||||
}).rsf()
|
||||
}
|
||||
|
||||
// Report provides a summary of the progress of the Replication,
|
||||
// i.e., a condensed dump of the internal state machine.
|
||||
// Report is safe to be called asynchronously while Drive is running.
|
||||
func (r *Replication) Report() *Report {
|
||||
r.lock.Lock()
|
||||
defer r.lock.Unlock()
|
||||
|
||||
rep := Report{
|
||||
Status: r.state.String(),
|
||||
SleepUntil: r.sleepUntil,
|
||||
}
|
||||
|
||||
if r.err != nil {
|
||||
rep.Problem = r.err.Error()
|
||||
}
|
||||
|
||||
if r.state&(Planning|PlanningError) != 0 {
|
||||
return &rep
|
||||
}
|
||||
|
||||
rep.Pending = make([]*fsrep.Report, 0, len(r.queue))
|
||||
rep.Completed = make([]*fsrep.Report, 0, len(r.completed)) // room for active (potentially)
|
||||
|
||||
// since r.active == r.queue[0], do not contain it in pending output
|
||||
pending := r.queue
|
||||
if r.active != nil {
|
||||
rep.Active = r.active.Report()
|
||||
pending = r.queue[1:]
|
||||
}
|
||||
for _, fsr := range pending {
|
||||
rep.Pending= append(rep.Pending, fsr.Report())
|
||||
}
|
||||
for _, fsr := range r.completed {
|
||||
rep.Completed = append(rep.Completed, fsr.Report())
|
||||
}
|
||||
|
||||
return &rep
|
||||
}
|
||||
|
||||
func (r *Replication) State() State {
|
||||
r.lock.Lock()
|
||||
defer r.lock.Unlock()
|
||||
return r.state
|
||||
}
|
13
replication/replication.go
Normal file
13
replication/replication.go
Normal file
@ -0,0 +1,13 @@
|
||||
// Package replication implements replication of filesystems with existing
|
||||
// versions (snapshots) from a sender to a receiver.
|
||||
package replication
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/zrepl/zrepl/replication/driver"
|
||||
)
|
||||
|
||||
func Do(ctx context.Context, planner driver.Planner) (driver.ReportFunc, driver.WaitFunc) {
|
||||
return driver.Do(ctx, planner)
|
||||
}
|
150
replication/report/replication_report.go
Normal file
150
replication/report/replication_report.go
Normal file
@ -0,0 +1,150 @@
|
||||
package report
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"time"
|
||||
)
|
||||
|
||||
type Report struct {
|
||||
StartAt, FinishAt time.Time
|
||||
Attempts []*AttemptReport
|
||||
}
|
||||
|
||||
var _, _ = json.Marshal(&Report{})
|
||||
|
||||
type TimedError struct {
|
||||
Err string
|
||||
Time time.Time
|
||||
}
|
||||
|
||||
func NewTimedError(err string, t time.Time) *TimedError {
|
||||
if err == "" {
|
||||
panic("error must be empty")
|
||||
}
|
||||
if t.IsZero() {
|
||||
panic("t must be non-zero")
|
||||
}
|
||||
return &TimedError{err, t}
|
||||
}
|
||||
|
||||
func (s *TimedError) Error() string {
|
||||
return s.Err
|
||||
}
|
||||
|
||||
var _, _ = json.Marshal(&TimedError{})
|
||||
|
||||
type AttemptReport struct {
|
||||
State AttemptState
|
||||
StartAt, FinishAt time.Time
|
||||
PlanError *TimedError
|
||||
Filesystems []*FilesystemReport
|
||||
}
|
||||
|
||||
type AttemptState string
|
||||
|
||||
const (
|
||||
AttemptPlanning AttemptState = "planning"
|
||||
AttemptPlanningError AttemptState = "planning-error"
|
||||
AttemptFanOutFSs AttemptState = "fan-out-filesystems"
|
||||
AttemptFanOutError AttemptState = "filesystem-error"
|
||||
AttemptDone AttemptState = "done"
|
||||
)
|
||||
|
||||
type FilesystemState string
|
||||
|
||||
const (
|
||||
FilesystemPlanning FilesystemState = "planning"
|
||||
FilesystemPlanningErrored FilesystemState = "planning-error"
|
||||
FilesystemStepping FilesystemState = "stepping"
|
||||
FilesystemSteppingErrored FilesystemState = "step-error"
|
||||
FilesystemDone FilesystemState = "done"
|
||||
)
|
||||
|
||||
type FilesystemReport struct {
|
||||
Info *FilesystemInfo
|
||||
|
||||
State FilesystemState
|
||||
|
||||
// Valid in State = FilesystemPlanningErrored
|
||||
PlanError *TimedError
|
||||
// Valid in State = FilesystemSteppingErrored
|
||||
StepError *TimedError
|
||||
|
||||
// Valid in State = FilesystemStepping
|
||||
CurrentStep int
|
||||
Steps []*StepReport
|
||||
}
|
||||
|
||||
type FilesystemInfo struct {
|
||||
Name string
|
||||
}
|
||||
|
||||
type StepReport struct {
|
||||
Info *StepInfo
|
||||
}
|
||||
|
||||
type StepInfo struct {
|
||||
From, To string
|
||||
BytesExpected int64
|
||||
BytesReplicated int64
|
||||
}
|
||||
|
||||
func (a *AttemptReport) BytesSum() (expected, replicated int64) {
|
||||
for _, fs := range a.Filesystems {
|
||||
e, r := fs.BytesSum()
|
||||
expected += e
|
||||
replicated += r
|
||||
}
|
||||
return expected, replicated
|
||||
}
|
||||
|
||||
func (f *FilesystemReport) BytesSum() (expected, replicated int64) {
|
||||
for _, step := range f.Steps {
|
||||
expected += step.Info.BytesExpected
|
||||
replicated += step.Info.BytesReplicated
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (f *AttemptReport) FilesystemsByState() map[FilesystemState][]*FilesystemReport {
|
||||
r := make(map[FilesystemState][]*FilesystemReport, 4)
|
||||
for _, fs := range f.Filesystems {
|
||||
l := r[fs.State]
|
||||
l = append(l, fs)
|
||||
r[fs.State] = l
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
func (f *FilesystemReport) Error() *TimedError {
|
||||
switch f.State {
|
||||
case FilesystemPlanningErrored:
|
||||
return f.PlanError
|
||||
case FilesystemSteppingErrored:
|
||||
return f.StepError
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// may return nil
|
||||
func (f *FilesystemReport) NextStep() *StepReport {
|
||||
switch f.State {
|
||||
case FilesystemDone:
|
||||
return nil
|
||||
case FilesystemPlanningErrored:
|
||||
return nil
|
||||
case FilesystemSteppingErrored:
|
||||
return nil
|
||||
case FilesystemPlanning:
|
||||
return nil
|
||||
case FilesystemStepping:
|
||||
// invariant is that this is always correct
|
||||
// TODO what about 0-length Steps but short intermediary state?
|
||||
return f.Steps[f.CurrentStep]
|
||||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
|
||||
func (f *StepReport) IsIncremental() bool {
|
||||
return f.Info.From != "" // FIXME change to ZFS semantics (To != "")
|
||||
}
|
@ -1,76 +0,0 @@
|
||||
// Code generated by "enumer -type=State"; DO NOT EDIT.
|
||||
|
||||
package replication
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
const (
|
||||
_StateName_0 = "PlanningPlanningError"
|
||||
_StateName_1 = "Working"
|
||||
_StateName_2 = "WorkingWait"
|
||||
_StateName_3 = "Completed"
|
||||
_StateName_4 = "PermanentError"
|
||||
)
|
||||
|
||||
var (
|
||||
_StateIndex_0 = [...]uint8{0, 8, 21}
|
||||
_StateIndex_1 = [...]uint8{0, 7}
|
||||
_StateIndex_2 = [...]uint8{0, 11}
|
||||
_StateIndex_3 = [...]uint8{0, 9}
|
||||
_StateIndex_4 = [...]uint8{0, 14}
|
||||
)
|
||||
|
||||
func (i State) String() string {
|
||||
switch {
|
||||
case 1 <= i && i <= 2:
|
||||
i -= 1
|
||||
return _StateName_0[_StateIndex_0[i]:_StateIndex_0[i+1]]
|
||||
case i == 4:
|
||||
return _StateName_1
|
||||
case i == 8:
|
||||
return _StateName_2
|
||||
case i == 16:
|
||||
return _StateName_3
|
||||
case i == 32:
|
||||
return _StateName_4
|
||||
default:
|
||||
return fmt.Sprintf("State(%d)", i)
|
||||
}
|
||||
}
|
||||
|
||||
var _StateValues = []State{1, 2, 4, 8, 16, 32}
|
||||
|
||||
var _StateNameToValueMap = map[string]State{
|
||||
_StateName_0[0:8]: 1,
|
||||
_StateName_0[8:21]: 2,
|
||||
_StateName_1[0:7]: 4,
|
||||
_StateName_2[0:11]: 8,
|
||||
_StateName_3[0:9]: 16,
|
||||
_StateName_4[0:14]: 32,
|
||||
}
|
||||
|
||||
// StateString retrieves an enum value from the enum constants string name.
|
||||
// Throws an error if the param is not part of the enum.
|
||||
func StateString(s string) (State, error) {
|
||||
if val, ok := _StateNameToValueMap[s]; ok {
|
||||
return val, nil
|
||||
}
|
||||
return 0, fmt.Errorf("%s does not belong to State values", s)
|
||||
}
|
||||
|
||||
// StateValues returns all values of the enum
|
||||
func StateValues() []State {
|
||||
return _StateValues
|
||||
}
|
||||
|
||||
// IsAState returns "true" if the value is listed in the enum definition. "false" otherwise
|
||||
func (i State) IsAState() bool {
|
||||
for _, v := range _StateValues {
|
||||
if i == v {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
@ -7,7 +7,7 @@ import (
|
||||
"strings"
|
||||
|
||||
"github.com/golang/protobuf/proto"
|
||||
"github.com/zrepl/zrepl/replication/pdu"
|
||||
"github.com/zrepl/zrepl/replication/logic/pdu"
|
||||
"github.com/zrepl/zrepl/rpc/dataconn/stream"
|
||||
"github.com/zrepl/zrepl/transport"
|
||||
"github.com/zrepl/zrepl/zfs"
|
||||
|
@ -7,7 +7,7 @@ import (
|
||||
|
||||
"github.com/golang/protobuf/proto"
|
||||
"github.com/zrepl/zrepl/logger"
|
||||
"github.com/zrepl/zrepl/replication/pdu"
|
||||
"github.com/zrepl/zrepl/replication/logic/pdu"
|
||||
"github.com/zrepl/zrepl/rpc/dataconn/stream"
|
||||
"github.com/zrepl/zrepl/transport"
|
||||
"github.com/zrepl/zrepl/zfs"
|
||||
|
@ -24,7 +24,7 @@ import (
|
||||
"github.com/pkg/profile"
|
||||
|
||||
"github.com/zrepl/zrepl/logger"
|
||||
"github.com/zrepl/zrepl/replication/pdu"
|
||||
"github.com/zrepl/zrepl/replication/logic/pdu"
|
||||
"github.com/zrepl/zrepl/rpc/dataconn"
|
||||
"github.com/zrepl/zrepl/rpc/dataconn/timeoutconn"
|
||||
"github.com/zrepl/zrepl/transport"
|
||||
|
@ -36,7 +36,7 @@ func ClientConn(cn transport.Connecter, log Logger) *grpc.ClientConn {
|
||||
})
|
||||
dialerOption := grpc.WithDialer(grpcclientidentity.NewDialer(log, cn))
|
||||
cred := grpc.WithTransportCredentials(grpcclientidentity.NewTransportCredentials(log))
|
||||
ctx, cancel := context.WithTimeout(context.TODO(), 5*time.Second)
|
||||
ctx, cancel := context.WithTimeout(context.TODO(), 5*time.Second) // FIXME constant
|
||||
defer cancel()
|
||||
cc, err := grpc.DialContext(ctx, "doesn't matter done by dialer", dialerOption, cred, ka)
|
||||
if err != nil {
|
||||
|
@ -7,8 +7,8 @@ import (
|
||||
|
||||
"google.golang.org/grpc"
|
||||
|
||||
"github.com/zrepl/zrepl/replication"
|
||||
"github.com/zrepl/zrepl/replication/pdu"
|
||||
"github.com/zrepl/zrepl/replication/logic"
|
||||
"github.com/zrepl/zrepl/replication/logic/pdu"
|
||||
"github.com/zrepl/zrepl/rpc/dataconn"
|
||||
"github.com/zrepl/zrepl/rpc/grpcclientidentity/grpchelper"
|
||||
"github.com/zrepl/zrepl/rpc/versionhandshake"
|
||||
@ -26,9 +26,9 @@ type Client struct {
|
||||
loggers Loggers
|
||||
}
|
||||
|
||||
var _ replication.Endpoint = &Client{}
|
||||
var _ replication.Sender = &Client{}
|
||||
var _ replication.Receiver = &Client{}
|
||||
var _ logic.Endpoint = &Client{}
|
||||
var _ logic.Sender = &Client{}
|
||||
var _ logic.Receiver = &Client{}
|
||||
|
||||
type DialContextFunc = func(ctx context.Context, network string, addr string) (net.Conn, error)
|
||||
|
||||
|
@ -7,7 +7,7 @@ import (
|
||||
"google.golang.org/grpc"
|
||||
|
||||
"github.com/zrepl/zrepl/endpoint"
|
||||
"github.com/zrepl/zrepl/replication/pdu"
|
||||
"github.com/zrepl/zrepl/replication/logic/pdu"
|
||||
"github.com/zrepl/zrepl/rpc/dataconn"
|
||||
"github.com/zrepl/zrepl/rpc/grpcclientidentity"
|
||||
"github.com/zrepl/zrepl/rpc/netadaptor"
|
||||
|
42
util/chainlock/chainlock.go
Normal file
42
util/chainlock/chainlock.go
Normal file
@ -0,0 +1,42 @@
|
||||
// package chainlock implements a mutex whose Lock and Unlock
|
||||
// methods return the lock itself, to enable chaining.
|
||||
//
|
||||
// Intended Usage
|
||||
//
|
||||
// defer s.lock().unlock()
|
||||
// // drop lock while waiting for wait group
|
||||
// func() {
|
||||
// defer a.l.Unlock().Lock()
|
||||
// fssesDone.Wait()
|
||||
// }()
|
||||
//
|
||||
package chainlock
|
||||
|
||||
import "sync"
|
||||
|
||||
type L struct {
|
||||
mtx sync.Mutex
|
||||
}
|
||||
|
||||
func New() *L {
|
||||
return &L{}
|
||||
}
|
||||
|
||||
func (l *L) Lock() *L {
|
||||
l.mtx.Lock()
|
||||
return l
|
||||
}
|
||||
|
||||
func (l *L) Unlock() *L {
|
||||
l.mtx.Unlock()
|
||||
return l
|
||||
}
|
||||
|
||||
func (l *L) NewCond() *sync.Cond {
|
||||
return sync.NewCond(&l.mtx)
|
||||
}
|
||||
|
||||
func (l *L) DropWhile(f func()) {
|
||||
defer l.Unlock().Lock()
|
||||
f()
|
||||
}
|
Loading…
Reference in New Issue
Block a user