diff --git a/.travis.yml b/.travis.yml index 8ced506..aec423b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,6 +2,7 @@ dist: xenial services: - docker +env: # for allow_failures: https://docs.travis-ci.com/user/customizing-the-build/ matrix: include: @@ -15,45 +16,35 @@ matrix: --user "$(id -u):$(id -g)" \ zrepl_build make vendordeps release - # all go entries vary only by go version - - language: go + - &zrepl_build_template + language: go + go_import_path: github.com/zrepl/zrepl + before_install: + - wget https://github.com/protocolbuffers/protobuf/releases/download/v3.6.1/protoc-3.6.1-linux-x86_64.zip + - echo "6003de742ea3fcf703cfec1cd4a3380fd143081a2eb0e559065563496af27807 protoc-3.6.1-linux-x86_64.zip" | sha256sum -c + - sudo unzip -d /usr protoc-3.6.1-linux-x86_64.zip + - ./lazy.sh godep + - make vendordeps + script: + - make + - make vet + - make test + - make artifacts/zrepl-freebsd-amd64 + - make artifacts/zrepl-linux-amd64 + - make artifacts/zrepl-darwin-amd64 go: - "1.11" - go_import_path: github.com/zrepl/zrepl - before_install: - - wget https://github.com/protocolbuffers/protobuf/releases/download/v3.6.1/protoc-3.6.1-linux-x86_64.zip - - echo "6003de742ea3fcf703cfec1cd4a3380fd143081a2eb0e559065563496af27807 protoc-3.6.1-linux-x86_64.zip" | sha256sum -c - - sudo unzip -d /usr protoc-3.6.1-linux-x86_64.zip - - ./lazy.sh godep - - make vendordeps - script: - - make - - make vet - - make test - - make artifacts/zrepl-freebsd-amd64 - - make artifacts/zrepl-linux-amd64 - - make artifacts/zrepl-darwin-amd64 - - language: go + - <<: *zrepl_build_template + go: + - "1.12" + + - <<: *zrepl_build_template go: - "master" - go_import_path: github.com/zrepl/zrepl - before_install: - - wget https://github.com/protocolbuffers/protobuf/releases/download/v3.6.1/protoc-3.6.1-linux-x86_64.zip - - echo "6003de742ea3fcf703cfec1cd4a3380fd143081a2eb0e559065563496af27807 protoc-3.6.1-linux-x86_64.zip" | sha256sum -c - - sudo unzip -d /usr protoc-3.6.1-linux-x86_64.zip - - ./lazy.sh godep - - make vendordeps - script: - - make - - make vet - - make test - - make artifacts/zrepl-freebsd-amd64 - - make artifacts/zrepl-linux-amd64 - - make artifacts/zrepl-darwin-amd64 - # all python entries vary only by python version - - language: python + - &zrepl_docs_template + language: python python: - "3.4" install: @@ -61,29 +52,18 @@ matrix: - pip install -r docs/requirements.txt script: - make docs - - language: python + - <<: *zrepl_docs_template python: - "3.5" - install: - - sudo apt-get install libgirepository1.0-dev - - pip install -r docs/requirements.txt - script: - - make docs - - language: python + - <<: *zrepl_docs_template python: - "3.6" - install: - - sudo apt-get install libgirepository1.0-dev - - pip install -r docs/requirements.txt - script: - - make docs - - language: python + - <<: *zrepl_docs_template python: - "3.7" - install: - - sudo apt-get install libgirepository1.0-dev - - pip install -r docs/requirements.txt - script: - - make docs - + + allow_failures: + - <<: *zrepl_build_template + go: + - "master" diff --git a/Gopkg.lock b/Gopkg.lock index 4674a95..7df6bf1 100644 --- a/Gopkg.lock +++ b/Gopkg.lock @@ -89,6 +89,14 @@ revision = "aa810b61a9c79d51363740d207bb46cf8e620ed5" version = "v1.2.0" +[[projects]] + digest = "1:ad92aa49f34cbc3546063c7eb2cabb55ee2278b72842eda80e2a20a8a06a8d73" + name = "github.com/google/uuid" + packages = ["."] + pruneopts = "" + revision = "0cd6bf5da1e1c83f8b45653022c74f71af0538a4" + version = "v1.1.1" + [[projects]] branch = "master" digest = "1:cb09475f771b9167fb9333629f5d6a7161572602ea040f1094602b0dc8709878" @@ -161,6 +169,14 @@ revision = "3247c84500bff8d9fb6d579d800f20b3e091582c" version = "v1.0.0" +[[projects]] + digest = "1:4ff67dde814694496d7aa31be44b900f9717a10c8bc9136b13f49c8ef97f439a" + name = "github.com/montanaflynn/stats" + packages = ["."] + pruneopts = "" + revision = "63fbb2597b7a13043b453a4b819945badb8f8926" + version = "v0.5.0" + [[projects]] branch = "master" digest = "1:f60ff065b58bd53e641112b38bbda9d2684deb828393c7ffb89c69a1ee301d17" @@ -245,6 +261,14 @@ pruneopts = "" revision = "8b1c2da0d56deffdbb9e48d4414b4e674bd8083e" +[[projects]] + digest = "1:3962f553b77bf6c03fc07cd687a22dd3b00fe11aa14d31194f5505f5bb65cdc8" + name = "github.com/sergi/go-diff" + packages = ["diffmatchpatch"] + pruneopts = "" + revision = "1744e2970ca51c86172c8190fadad617561ed6e7" + version = "v1.0.0" + [[projects]] branch = "master" digest = "1:146327ce93be37e68bd3ff8541090d96da8cb3adc9e35d57570e9170a29f6bf6" @@ -280,6 +304,25 @@ revision = "93babf24513d0e8277635da8169fcc5a46ae3f6a" version = "v1.11.0" +[[projects]] + digest = "1:529ed3f98838f69e13761788d0cc71b44e130058fab13bae2ce09f7a176bced4" + name = "github.com/yudai/gojsondiff" + packages = [ + ".", + "formatter", + ] + pruneopts = "" + revision = "7b1b7adf999dab73a6eb02669c3d82dbb27a3dd6" + version = "1.0.0" + +[[projects]] + branch = "master" + digest = "1:9857bb2293f372b2181004d8b62179bbdb4ab0982ec6f762abe6cf2bfedaff85" + name = "github.com/yudai/golcs" + packages = ["."] + pruneopts = "" + revision = "ecda9a501e8220fae3b4b600c3db4b0ba22cfc68" + [[projects]] branch = "v2" digest = "1:6b8a6afafde7ed31cd0c577ba40d88ce39e8f1c5eb76d7836be7d5b74f1c534a" @@ -403,9 +446,11 @@ "github.com/go-logfmt/logfmt", "github.com/golang/protobuf/proto", "github.com/golang/protobuf/protoc-gen-go", + "github.com/google/uuid", "github.com/jinzhu/copier", "github.com/kr/pretty", "github.com/mattn/go-isatty", + "github.com/montanaflynn/stats", "github.com/pkg/errors", "github.com/pkg/profile", "github.com/problame/go-netssh", @@ -415,14 +460,18 @@ "github.com/spf13/pflag", "github.com/stretchr/testify/assert", "github.com/stretchr/testify/require", + "github.com/yudai/gojsondiff", + "github.com/yudai/gojsondiff/formatter", "github.com/zrepl/yaml-config", "golang.org/x/net/context", "golang.org/x/sys/unix", "golang.org/x/tools/cmd/stringer", "google.golang.org/grpc", + "google.golang.org/grpc/codes", "google.golang.org/grpc/credentials", "google.golang.org/grpc/keepalive", "google.golang.org/grpc/peer", + "google.golang.org/grpc/status", ] solver-name = "gps-cdcl" solver-version = 1 diff --git a/Gopkg.toml b/Gopkg.toml index 01e2aae..55c0a9b 100644 --- a/Gopkg.toml +++ b/Gopkg.toml @@ -59,3 +59,7 @@ required = [ [[constraint]] name = "google.golang.org/grpc" version = "1" + +[[constraint]] + version = "1.1.0" + name = "github.com/google/uuid" diff --git a/Makefile b/Makefile index 120eb28..42b6990 100644 --- a/Makefile +++ b/Makefile @@ -27,7 +27,7 @@ vendordeps: dep ensure -v -vendor-only generate: #not part of the build, must do that manually - protoc -I=replication/pdu --go_out=plugins=grpc:replication/pdu replication/pdu/pdu.proto + protoc -I=replication/logic/pdu --go_out=plugins=grpc:replication/logic/pdu replication/logic/pdu/pdu.proto go generate -x ./... build: diff --git a/README.md b/README.md index dd329aa..2126dc7 100644 --- a/README.md +++ b/README.md @@ -60,8 +60,6 @@ Make sure to develop an understanding how zrepl is typically used by studying th │   ├── prometheus │   ├── pruner # pruner implementation │   ├── snapper # snapshotter implementation -│   ├── streamrpcconfig # abstraction for configuration of go-streamrpc -│   └── transport # transports implementation ├── docs # sphinx-based documentation ├── dist # supplemental material for users & package maintainers │   ├── **/*.rst # documentation in reStructuredText @@ -74,10 +72,24 @@ Make sure to develop an understanding how zrepl is typically used by studying th ├── logger # our own logger package ├── pruning # pruning rules (the logic, not the actual execution) │   └── retentiongrid -├── replication # the fsm that implements replication of multiple file systems -│   ├── fsrep # replication of a single filesystem -│   └── pdu # the protobuf-generated structs + helpers passed to an endpoint +├── replication +│ ├── driver # the driver of the replication logic (status reporting, error handling) +│ ├── logic # planning & executing replication steps via rpc +| |   └── pdu # the generated gRPC & protobuf code used in replication (and endpoints) +│ └── report # the JSON-serializable report datastructures exposed to the client +├── rpc # the hybrid gRPC + ./dataconn RPC client: connects to a remote replication.Endpoint +│ ├── dataconn # Bulk data-transfer RPC protocol +│ ├── grpcclientidentity # adaptor to inject package transport's 'client identity' concept into gRPC contexts +│ ├── netadaptor # adaptor to convert a package transport's Connecter and Listener into net.* primitives +│ ├── transportmux # TCP connecter and listener used to split control & data traffic +│ └── versionhandshake # replication protocol version handshake perfomed on newly established connections ├── tlsconf # abstraction for Go TLS server + client config +├── transport # transports implementation +│ ├── fromconfig +│ ├── local +│ ├── ssh +│ ├── tcp +│ └── tls ├── util ├── vendor # managed by dep ├── version # abstraction for versions (filled during build by Makefile) diff --git a/client/status.go b/client/status.go index bf882f7..f1bc9c0 100644 --- a/client/status.go +++ b/client/status.go @@ -10,8 +10,7 @@ import ( "github.com/zrepl/zrepl/daemon" "github.com/zrepl/zrepl/daemon/job" "github.com/zrepl/zrepl/daemon/pruner" - "github.com/zrepl/zrepl/replication" - "github.com/zrepl/zrepl/replication/fsrep" + "github.com/zrepl/zrepl/replication/report" "io" "math" "net/http" @@ -122,7 +121,7 @@ func wrap(s string, width int) string { if idx := strings.IndexAny(s, "\n\r"); idx != -1 && idx < rem { rem = idx+1 } - untilNewline := strings.TrimSpace(s[:rem]) + untilNewline := strings.TrimRight(s[:rem], "\n\r") s = s[rem:] if len(untilNewline) == 0 { continue @@ -130,7 +129,7 @@ func wrap(s string, width int) string { b.WriteString(untilNewline) b.WriteString("\n") } - return strings.TrimSpace(b.String()) + return strings.TrimRight(b.String(), "\n\r") } func (t *tui) printfDrawIndentedAndWrappedIfMultiline(format string, a ...interface{}) { @@ -342,74 +341,91 @@ func (t *tui) draw() { termbox.Flush() } -func (t *tui) renderReplicationReport(rep *replication.Report, history *bytesProgressHistory) { +func (t *tui) renderReplicationReport(rep *report.Report, history *bytesProgressHistory) { if rep == nil { t.printf("...\n") return } - all := make([]*fsrep.Report, 0, len(rep.Completed)+len(rep.Pending) + 1) - all = append(all, rep.Completed...) - all = append(all, rep.Pending...) - if rep.Active != nil { - all = append(all, rep.Active) + if rep.WaitReconnectError != nil { + t.printfDrawIndentedAndWrappedIfMultiline("Connectivity: %s", rep.WaitReconnectError) + t.newline() } - sort.Slice(all, func(i, j int) bool { - return all[i].Filesystem < all[j].Filesystem + if !rep.WaitReconnectSince.IsZero() { + delta := rep.WaitReconnectUntil.Sub(time.Now()).Round(time.Second) + if rep.WaitReconnectUntil.IsZero() || delta > 0 { + var until string + if rep.WaitReconnectUntil.IsZero() { + until = "waiting indefinitely" + } else { + until = fmt.Sprintf("hard fail in %s @ %s", delta, rep.WaitReconnectUntil) + } + t.printfDrawIndentedAndWrappedIfMultiline("Connectivity: reconnecting with exponential backoff (since %s) (%s)", + rep.WaitReconnectSince, until) + } else { + t.printfDrawIndentedAndWrappedIfMultiline("Connectivity: reconnects reached hard-fail timeout @ %s", rep.WaitReconnectUntil) + } + t.newline() + } + + // TODO visualize more than the latest attempt by folding all attempts into one + if len(rep.Attempts) == 0 { + t.printf("no attempts made yet") + return + } else { + t.printf("Attempt #%d", len(rep.Attempts)) + if len(rep.Attempts) > 1 { + t.printf(". Previous attempts failed with the follwing statuses:") + t.newline() + t.addIndent(1) + for i, a := range rep.Attempts[:len(rep.Attempts)-1] { + t.printfDrawIndentedAndWrappedIfMultiline("#%d: %s (failed at %s) (ran %s)", i + 1, a.State, a.FinishAt, a.FinishAt.Sub(a.StartAt)) + t.newline() + } + t.addIndent(-1) + } else { + t.newline() + } + } + + latest := rep.Attempts[len(rep.Attempts)-1] + sort.Slice(latest.Filesystems, func(i, j int) bool { + return latest.Filesystems[i].Info.Name < latest.Filesystems[j].Info.Name }) - state, err := replication.StateString(rep.Status) - if err != nil { - t.printf("Status: %q (parse error: %q)\n", rep.Status, err) - return - } - - t.printf("Status: %s", state) + t.printf("Status: %s", latest.State) t.newline() - if rep.Problem != "" { + if latest.State == report.AttemptPlanningError { t.printf("Problem: ") - t.printfDrawIndentedAndWrappedIfMultiline("%s", rep.Problem) + t.printfDrawIndentedAndWrappedIfMultiline("%s", latest.PlanError) + t.newline() + } else if latest.State == report.AttemptFanOutError { + t.printf("Problem: one or more of the filesystems encountered errors") t.newline() } - if rep.SleepUntil.After(time.Now()) && !state.IsTerminal() { - t.printf("Sleeping until %s (%s left)\n", rep.SleepUntil, rep.SleepUntil.Sub(time.Now())) - } - if state != replication.Planning && state != replication.PlanningError { + if latest.State != report.AttemptPlanning && latest.State != report.AttemptPlanningError { + // Draw global progress bar // Progress: [---------------] - sumUpFSRep := func(rep *fsrep.Report) (transferred, total int64) { - for _, s := range rep.Pending { - transferred += s.Bytes - total += s.ExpectedBytes - } - for _, s := range rep.Completed { - transferred += s.Bytes - total += s.ExpectedBytes - } - return - } - var transferred, total int64 - for _, fs := range all { - fstx, fstotal := sumUpFSRep(fs) - transferred += fstx - total += fstotal - } - rate, changeCount := history.Update(transferred) + expected, replicated := latest.BytesSum() + rate, changeCount := history.Update(replicated) t.write("Progress: ") - t.drawBar(50, transferred, total, changeCount) - t.write(fmt.Sprintf(" %s / %s @ %s/s", ByteCountBinary(transferred), ByteCountBinary(total), ByteCountBinary(rate))) + t.drawBar(50, replicated, expected, changeCount) + t.write(fmt.Sprintf(" %s / %s @ %s/s", ByteCountBinary(replicated), ByteCountBinary(expected), ByteCountBinary(rate))) t.newline() + + var maxFSLen int + for _, fs := range latest.Filesystems { + if len(fs.Info.Name) > maxFSLen { + maxFSLen = len(fs.Info.Name) + } + } + for _, fs := range latest.Filesystems { + t.printFilesystemStatus(fs, false, maxFSLen) // FIXME bring 'active' flag back + } + } - var maxFSLen int - for _, fs := range all { - if len(fs.Filesystem) > maxFSLen { - maxFSLen = len(fs.Filesystem) - } - } - for _, fs := range all { - t.printFilesystemStatus(fs, fs == rep.Active, maxFSLen) - } } func (t *tui) renderPrunerReport(r *pruner.Report) { @@ -430,9 +446,6 @@ func (t *tui) renderPrunerReport(r *pruner.Report) { if r.Error != "" { t.printf("Error: %s\n", r.Error) } - if r.SleepUntil.After(time.Now()) { - t.printf("Sleeping until %s (%s left)\n", r.SleepUntil, r.SleepUntil.Sub(time.Now())) - } type commonFS struct { *pruner.FSReport @@ -448,8 +461,7 @@ func (t *tui) renderPrunerReport(r *pruner.Report) { switch state { case pruner.Plan: fallthrough - case pruner.PlanWait: fallthrough - case pruner.ErrPerm: + case pruner.PlanErr: return } @@ -489,8 +501,18 @@ func (t *tui) renderPrunerReport(r *pruner.Report) { for _, fs := range all { t.write(rightPad(fs.Filesystem, maxFSname, " ")) t.write(" ") + if !fs.SkipReason.NotSkipped() { + t.printf("skipped: %s\n", fs.SkipReason) + continue + } if fs.LastError != "" { - t.printf("ERROR (%d): %s\n", fs.ErrorCount, fs.LastError) // whitespace is padding + if strings.ContainsAny(fs.LastError, "\r\n") { + t.printf("ERROR:") + t.printfDrawIndentedAndWrappedIfMultiline("%s\n", fs.LastError) + } else { + t.printfDrawIndentedAndWrappedIfMultiline("ERROR: %s\n", fs.LastError) + } + t.newline() continue } @@ -513,25 +535,6 @@ func (t *tui) renderPrunerReport(r *pruner.Report) { } -const snapshotIndent = 1 -func calculateMaxFSLength(all []*fsrep.Report) (maxFS, maxStatus int) { - for _, e := range all { - if len(e.Filesystem) > maxFS { - maxFS = len(e.Filesystem) - } - all2 := make([]*fsrep.StepReport, 0, len(e.Pending) + len(e.Completed)) - all2 = append(all2, e.Pending...) - all2 = append(all2, e.Completed...) - for _, e2 := range all2 { - elen := len(e2.Problem) + len(e2.From) + len(e2.To) + 60 // random spacing, units, labels, etc - if elen > maxStatus { - maxStatus = elen - } - } - } - return -} - func times(str string, n int) (out string) { for i := 0; i < n; i++ { out += str @@ -575,35 +578,13 @@ func (t *tui) drawBar(length int, bytes, totalBytes int64, changeCount int) { t.write("]") } -func StringStepState(s fsrep.StepState) string { - switch s { - case fsrep.StepReplicationReady: return "Ready" - case fsrep.StepMarkReplicatedReady: return "MarkReady" - case fsrep.StepCompleted: return "Completed" - default: - return fmt.Sprintf("UNKNOWN %d", s) - } -} - -func (t *tui) printFilesystemStatus(rep *fsrep.Report, active bool, maxFS int) { - - bytes := int64(0) - totalBytes := int64(0) - for _, s := range rep.Pending { - bytes += s.Bytes - totalBytes += s.ExpectedBytes - } - for _, s := range rep.Completed { - bytes += s.Bytes - totalBytes += s.ExpectedBytes - } - +func (t *tui) printFilesystemStatus(rep *report.FilesystemReport, active bool, maxFS int) { + expected, replicated := rep.BytesSum() status := fmt.Sprintf("%s (step %d/%d, %s/%s)", - rep.Status, - len(rep.Completed), len(rep.Pending) + len(rep.Completed), - ByteCountBinary(bytes), ByteCountBinary(totalBytes), - + strings.ToUpper(string(rep.State)), + rep.CurrentStep, len(rep.Steps), + ByteCountBinary(replicated), ByteCountBinary(expected), ) activeIndicator := " " @@ -612,18 +593,23 @@ func (t *tui) printFilesystemStatus(rep *fsrep.Report, active bool, maxFS int) { } t.printf("%s %s %s ", activeIndicator, - rightPad(rep.Filesystem, maxFS, " "), + rightPad(rep.Info.Name, maxFS, " "), status) next := "" - if rep.Problem != "" { - next = rep.Problem - } else if len(rep.Pending) > 0 { - if rep.Pending[0].From != "" { - next = fmt.Sprintf("next: %s => %s", rep.Pending[0].From, rep.Pending[0].To) + if err := rep.Error(); err != nil { + next = err.Err + } else if rep.State != report.FilesystemDone { + if nextStep := rep.NextStep(); nextStep != nil { + if nextStep.IsIncremental() { + next = fmt.Sprintf("next: %s => %s", nextStep.Info.From, nextStep.Info.To) + } else { + next = fmt.Sprintf("next: %s (full)", nextStep.Info.To) + } } else { - next = fmt.Sprintf("next: %s (full)", rep.Pending[0].To) + next = "" // individual FSes may still be in planning state } + } t.printfDrawIndentedAndWrappedIfMultiline("%s", next) diff --git a/config/config.go b/config/config.go index 7ff0ca9..dbc0aad 100644 --- a/config/config.go +++ b/config/config.go @@ -68,7 +68,38 @@ type PushJob struct { type PullJob struct { ActiveJob `yaml:",inline"` RootFS string `yaml:"root_fs"` - Interval time.Duration `yaml:"interval,positive"` + Interval PositiveDurationOrManual `yaml:"interval"` +} + +type PositiveDurationOrManual struct { + Interval time.Duration + Manual bool +} + +var _ yaml.Unmarshaler = (*PositiveDurationOrManual)(nil) + +func (i *PositiveDurationOrManual) UnmarshalYAML(u func(interface{}, bool) error) (err error) { + var s string + if err := u(&s, true); err != nil { + return err + } + switch s { + case "manual": + i.Manual = true + i.Interval = 0 + case "": + return fmt.Errorf("value must not be empty") + default: + i.Manual = false + i.Interval, err = time.ParseDuration(s) + if err != nil { + return err + } + if i.Interval <= 0 { + return fmt.Errorf("value must be a positive duration, got %q", s) + } + } + return nil } type SinkJob struct { diff --git a/config/config_positiveintervalormanual_test.go b/config/config_positiveintervalormanual_test.go new file mode 100644 index 0000000..237813e --- /dev/null +++ b/config/config_positiveintervalormanual_test.go @@ -0,0 +1,41 @@ +package config + +import ( + "fmt" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/zrepl/yaml-config" +) + +func TestPositiveDurationOrManual(t *testing.T) { + cases := []struct { + Comment, Input string + Result *PositiveDurationOrManual + }{ + {"empty is error", "", nil}, + {"negative is error", "-1s", nil}, + {"zero seconds is error", "0s", nil}, + {"zero is error", "0", nil}, + {"non-manual is error", "something", nil}, + {"positive seconds works", "1s", &PositiveDurationOrManual{Manual: false, Interval: 1 * time.Second}}, + {"manual works", "manual", &PositiveDurationOrManual{Manual: true, Interval: 0}}, + } + for _, tc := range cases { + t.Run(tc.Comment, func(t *testing.T) { + var out struct { + FieldName PositiveDurationOrManual `yaml:"fieldname"` + } + input := fmt.Sprintf("\nfieldname: %s\n", tc.Input) + err := yaml.UnmarshalStrict([]byte(input), &out) + if tc.Result == nil { + assert.Error(t, err) + t.Logf("%#v", out) + } else { + assert.Equal(t, *tc.Result, out.FieldName) + } + }) + } + +} diff --git a/daemon/job/active.go b/daemon/job/active.go index 907f4be..d70184f 100644 --- a/daemon/job/active.go +++ b/daemon/job/active.go @@ -17,10 +17,12 @@ import ( "github.com/zrepl/zrepl/daemon/snapper" "github.com/zrepl/zrepl/endpoint" "github.com/zrepl/zrepl/replication" + "github.com/zrepl/zrepl/replication/driver" + "github.com/zrepl/zrepl/replication/logic" + "github.com/zrepl/zrepl/replication/report" "github.com/zrepl/zrepl/rpc" "github.com/zrepl/zrepl/transport" "github.com/zrepl/zrepl/transport/fromconfig" - "github.com/zrepl/zrepl/util/envconst" "github.com/zrepl/zrepl/zfs" ) @@ -53,7 +55,7 @@ type activeSideTasks struct { state ActiveSideState // valid for state ActiveSideReplicating, ActiveSidePruneSender, ActiveSidePruneReceiver, ActiveSideDone - replication *replication.Replication + replicationReport driver.ReportFunc replicationCancel context.CancelFunc // valid for state ActiveSidePruneSender, ActiveSidePruneReceiver, ActiveSideDone @@ -79,7 +81,7 @@ func (a *ActiveSide) updateTasks(u func(*activeSideTasks)) activeSideTasks { type activeMode interface { ConnectEndpoints(rpcLoggers rpc.Loggers, connecter transport.Connecter) DisconnectEndpoints() - SenderReceiver() (replication.Sender, replication.Receiver) + SenderReceiver() (logic.Sender, logic.Receiver) Type() Type RunPeriodic(ctx context.Context, wakeUpCommon chan<- struct{}) ResetConnectBackoff() @@ -111,7 +113,7 @@ func (m *modePush) DisconnectEndpoints() { m.receiver = nil } -func (m *modePush) SenderReceiver() (replication.Sender, replication.Receiver) { +func (m *modePush) SenderReceiver() (logic.Sender, logic.Receiver) { m.setupMtx.Lock() defer m.setupMtx.Unlock() return m.sender, m.receiver @@ -151,7 +153,7 @@ type modePull struct { receiver *endpoint.Receiver sender *rpc.Client rootFS *zfs.DatasetPath - interval time.Duration + interval config.PositiveDurationOrManual } func (m *modePull) ConnectEndpoints(loggers rpc.Loggers, connecter transport.Connecter) { @@ -172,7 +174,7 @@ func (m *modePull) DisconnectEndpoints() { m.receiver = nil } -func (m *modePull) SenderReceiver() (replication.Sender, replication.Receiver) { +func (m *modePull) SenderReceiver() (logic.Sender, logic.Receiver) { m.setupMtx.Lock() defer m.setupMtx.Unlock() return m.sender, m.receiver @@ -181,7 +183,12 @@ func (m *modePull) SenderReceiver() (replication.Sender, replication.Receiver) { func (*modePull) Type() Type { return TypePull } func (m *modePull) RunPeriodic(ctx context.Context, wakeUpCommon chan<- struct{}) { - t := time.NewTicker(m.interval) + if m.interval.Manual { + GetLogger(ctx).Info("manual pull configured, periodic pull disabled") + // "waiting for wakeups" is printed in common ActiveSide.do + return + } + t := time.NewTicker(m.interval.Interval) defer t.Stop() for { select { @@ -210,9 +217,6 @@ func (m *modePull) ResetConnectBackoff() { func modePullFromConfig(g *config.Global, in *config.PullJob) (m *modePull, err error) { m = &modePull{} - if in.Interval <= 0 { - return nil, errors.New("interval must be positive") - } m.interval = in.Interval m.rootFS, err = zfs.NewDatasetPath(in.RootFS) @@ -274,7 +278,7 @@ func (j *ActiveSide) RegisterMetrics(registerer prometheus.Registerer) { func (j *ActiveSide) Name() string { return j.name } type ActiveSideStatus struct { - Replication *replication.Report + Replication *report.Report PruningSender, PruningReceiver *pruner.Report } @@ -283,8 +287,8 @@ func (j *ActiveSide) Status() *Status { s := &ActiveSideStatus{} t := j.mode.Type() - if tasks.replication != nil { - s.Replication = tasks.replication.Report() + if tasks.replicationReport != nil { + s.Replication = tasks.replicationReport() } if tasks.prunerSender != nil { s.PruningSender = tasks.prunerSender.Report() @@ -345,78 +349,6 @@ func (j *ActiveSide) do(ctx context.Context) { } }() - // The code after this watchdog goroutine is sequential and transitions the state from - // ActiveSideReplicating -> ActiveSidePruneSender -> ActiveSidePruneReceiver -> ActiveSideDone - // If any of those sequential tasks 'gets stuck' (livelock, no progress), the watchdog will eventually - // cancel its context. - // If the task is written to support context cancellation, it will return immediately (in permanent error state), - // and the sequential code above transitions to the next state. - go func() { - - wdto := envconst.Duration("ZREPL_JOB_WATCHDOG_TIMEOUT", 10*time.Minute) - jitter := envconst.Duration("ZREPL_JOB_WATCHDOG_JITTER", 1*time.Second) - // shadowing! - log := log.WithField("watchdog_timeout", wdto.String()) - - log.Debug("starting watchdog") - defer log.Debug("watchdog stopped") - - t := time.NewTicker(wdto) - defer t.Stop() - - for { - select { - case <-ctx.Done(): - return - case <-t.C: // fall - } - - j.updateTasks(func(tasks *activeSideTasks) { - // Since cancelling a task will cause the sequential code to transition to the next state immediately, - // we cannot check for its progress right then (no fallthrough). - // Instead, we return (not continue because we are in a closure) and give the new state another - // ZREPL_JOB_WATCHDOG_TIMEOUT interval to try make some progress. - - log.WithField("state", tasks.state).Debug("watchdog firing") - - const WATCHDOG_ENVCONST_NOTICE = " (adjust ZREPL_JOB_WATCHDOG_TIMEOUT env variable if inappropriate)" - - switch tasks.state { - case ActiveSideReplicating: - log.WithField("replication_progress", tasks.replication.Progress.String()). - Debug("check replication progress") - if tasks.replication.Progress.CheckTimeout(wdto, jitter) { - log.Error("replication did not make progress, cancelling" + WATCHDOG_ENVCONST_NOTICE) - tasks.replicationCancel() - return - } - case ActiveSidePruneSender: - log.WithField("prune_sender_progress", tasks.replication.Progress.String()). - Debug("check pruner_sender progress") - if tasks.prunerSender.Progress.CheckTimeout(wdto, jitter) { - log.Error("pruner_sender did not make progress, cancelling" + WATCHDOG_ENVCONST_NOTICE) - tasks.prunerSenderCancel() - return - } - case ActiveSidePruneReceiver: - log.WithField("prune_receiver_progress", tasks.replication.Progress.String()). - Debug("check pruner_receiver progress") - if tasks.prunerReceiver.Progress.CheckTimeout(wdto, jitter) { - log.Error("pruner_receiver did not make progress, cancelling" + WATCHDOG_ENVCONST_NOTICE) - tasks.prunerReceiverCancel() - return - } - case ActiveSideDone: - // ignore, ctx will be Done() in a few milliseconds and the watchdog will exit - default: - log.WithField("state", tasks.state). - Error("watchdog implementation error: unknown active side state") - } - }) - - } - }() - sender, receiver := j.mode.SenderReceiver() { @@ -426,16 +358,19 @@ func (j *ActiveSide) do(ctx context.Context) { default: } ctx, repCancel := context.WithCancel(ctx) - tasks := j.updateTasks(func(tasks *activeSideTasks) { + var repWait driver.WaitFunc + j.updateTasks(func(tasks *activeSideTasks) { // reset it *tasks = activeSideTasks{} tasks.replicationCancel = repCancel - tasks.replication = replication.NewReplication(j.promRepStateSecs, j.promBytesReplicated) + tasks.replicationReport, repWait = replication.Do( + ctx, logic.NewPlanner(j.promRepStateSecs, j.promBytesReplicated, sender, receiver), + ) tasks.state = ActiveSideReplicating }) log.Info("start replication") - tasks.replication.Drive(ctx, sender, receiver) - repCancel() // always cancel to free up context resources + repWait(true) // wait blocking + repCancel() // always cancel to free up context resources } { diff --git a/daemon/logging/build_logging.go b/daemon/logging/build_logging.go index ce90d3c..52b7e15 100644 --- a/daemon/logging/build_logging.go +++ b/daemon/logging/build_logging.go @@ -15,7 +15,7 @@ import ( "github.com/zrepl/zrepl/daemon/snapper" "github.com/zrepl/zrepl/endpoint" "github.com/zrepl/zrepl/logger" - "github.com/zrepl/zrepl/replication" + "github.com/zrepl/zrepl/replication/driver" "github.com/zrepl/zrepl/rpc" "github.com/zrepl/zrepl/rpc/transportmux" "github.com/zrepl/zrepl/tlsconf" @@ -79,7 +79,7 @@ const ( ) func WithSubsystemLoggers(ctx context.Context, log logger.Logger) context.Context { - ctx = replication.WithLogger(ctx, log.WithField(SubsysField, SubsysReplication)) + ctx = driver.WithLogger(ctx, log.WithField(SubsysField, SubsysReplication)) ctx = endpoint.WithLogger(ctx, log.WithField(SubsysField, SubsyEndpoint)) ctx = pruner.WithLogger(ctx, log.WithField(SubsysField, SubsysPruning)) ctx = snapper.WithLogger(ctx, log.WithField(SubsysField, SubsysSnapshot)) diff --git a/daemon/pruner/pruner.go b/daemon/pruner/pruner.go index bb515ad..47cb08e 100644 --- a/daemon/pruner/pruner.go +++ b/daemon/pruner/pruner.go @@ -8,10 +8,8 @@ import ( "github.com/zrepl/zrepl/config" "github.com/zrepl/zrepl/logger" "github.com/zrepl/zrepl/pruning" - "github.com/zrepl/zrepl/replication/pdu" + "github.com/zrepl/zrepl/replication/logic/pdu" "github.com/zrepl/zrepl/util/envconst" - "github.com/zrepl/zrepl/util/watchdog" - "net" "sort" "strings" "sync" @@ -21,6 +19,7 @@ import ( // Try to keep it compatible with gitub.com/zrepl/zrepl/endpoint.Endpoint type History interface { ReplicationCursor(ctx context.Context, req *pdu.ReplicationCursorReq) (*pdu.ReplicationCursorRes, error) + ListFilesystems(ctx context.Context, req *pdu.ListFilesystemReq) (*pdu.ListFilesystemRes, error) } // Try to keep it compatible with gitub.com/zrepl/zrepl/endpoint.Endpoint @@ -60,14 +59,11 @@ type args struct { type Pruner struct { args args - Progress watchdog.KeepAlive - mtx sync.RWMutex state State - // State ErrWait|ErrPerm - sleepUntil time.Time + // State PlanErr err error // State Exec @@ -161,71 +157,43 @@ type State int const ( Plan State = 1 << iota - PlanWait + PlanErr Exec - ExecWait - ErrPerm + ExecErr Done ) -func (s State) statefunc() state { - var statemap = map[State]state{ - Plan: statePlan, - PlanWait: statePlanWait, - Exec: stateExec, - ExecWait: stateExecWait, - ErrPerm: nil, - Done: nil, - } - return statemap[s] -} - -func (s State) IsTerminal() bool { - return s.statefunc() == nil -} - -type updater func(func(*Pruner)) State -type state func(args *args, u updater) state +type updater func(func(*Pruner)) func (p *Pruner) Prune() { p.prune(p.args) } func (p *Pruner) prune(args args) { - s := p.state.statefunc() - for s != nil { - pre := p.state - s = s(&args, func(f func(*Pruner)) State { + u := func(f func(*Pruner)) { p.mtx.Lock() defer p.mtx.Unlock() f(p) - return p.state - }) - post := p.state - GetLogger(args.ctx). - WithField("transition", fmt.Sprintf("%s=>%s", pre, post)). - Debug("state transition") - if err := p.Error(); err != nil { - GetLogger(args.ctx). - WithError(p.err). - WithField("state", post.String()). - Error("entering error state after error") } + // TODO support automatic retries + // It is advisable to merge this code with package replication/driver before + // That will likely require re-modelling struct fs like replication/driver.attempt, + // including figuring out how to resume a plan after being interrupted by network errors + // The non-retrying code in this package should move straight to replication/logic. + doOneAttempt(&args, u) } -} type Report struct { - State string - SleepUntil time.Time - Error string + State string + Error string Pending, Completed []FSReport } type FSReport struct { - Filesystem string + Filesystem string SnapshotList, DestroyList []SnapshotReport - ErrorCount int - LastError string + SkipReason FSSkipReason + LastError string } type SnapshotReport struct { @@ -240,14 +208,9 @@ func (p *Pruner) Report() *Report { r := Report{State: p.state.String()} - if p.state & (PlanWait|ExecWait) != 0 { - r.SleepUntil = p.sleepUntil - } - if p.state & (PlanWait|ExecWait|ErrPerm) != 0 { if p.err != nil { r.Error = p.err.Error() } - } if p.execQueue != nil { r.Pending, r.Completed = p.execQueue.Report() @@ -262,20 +225,16 @@ func (p *Pruner) State() State { return p.state } -func (p *Pruner) Error() error { - p.mtx.Lock() - defer p.mtx.Unlock() - if p.state & (PlanWait|ExecWait|ErrPerm) != 0 { - return p.err - } - return nil -} - type fs struct { path string // permanent error during planning - planErr error + planErr error + planErrContext string + + // if != "", the fs was skipped for planning and the field + // contains the reason + skipReason FSSkipReason // snapshots presented by target // (type snapshot) @@ -288,8 +247,18 @@ type fs struct { // only during Exec state, also used by execQueue execErrLast error - execErrCount int +} +type FSSkipReason string + +const ( + NotSkipped = "" + SkipPlaceholder = "filesystem is placeholder" + SkipNoCorrespondenceOnSender = "filesystem has no correspondence on sender" +) + +func (r FSSkipReason) NotSkipped() bool { + return r == NotSkipped } func (f *fs) Report() FSReport { @@ -298,7 +267,11 @@ func (f *fs) Report() FSReport { r := FSReport{} r.Filesystem = f.path - r.ErrorCount = f.execErrCount + r.SkipReason = f.skipReason + if !r.SkipReason.NotSkipped() { + return r + } + if f.planErr != nil { r.LastError = f.planErr.Error() } else if f.execErrLast != nil { @@ -340,68 +313,66 @@ func (s snapshot) Replicated() bool { return s.replicated } func (s snapshot) Date() time.Time { return s.date } -type Error interface { - error - Temporary() bool -} - -var _ Error = net.Error(nil) - -func shouldRetry(e error) bool { - if neterr, ok := e.(net.Error); ok { - return neterr.Temporary() - } - return false -} - -func onErr(u updater, e error) state { - return u(func(p *Pruner) { - p.err = e - if !shouldRetry(e) { - p.state = ErrPerm - return - } - switch p.state { - case Plan: - p.state = PlanWait - case Exec: - p.state = ExecWait - default: - panic(p.state) - } - }).statefunc() -} - -func statePlan(a *args, u updater) state { +func doOneAttempt(a *args, u updater) { ctx, target, receiver := a.ctx, a.target, a.receiver - var ka *watchdog.KeepAlive - u(func(pruner *Pruner) { - ka = &pruner.Progress - }) + + sfssres, err := receiver.ListFilesystems(ctx, &pdu.ListFilesystemReq{}) + if err != nil { + u(func(p *Pruner) { + p.state = PlanErr + p.err = err + }) + return + } + sfss := make(map[string]*pdu.Filesystem) + for _, sfs := range sfssres.GetFilesystems() { + sfss[sfs.GetPath()] = sfs + } tfssres, err := target.ListFilesystems(ctx, &pdu.ListFilesystemReq{}) if err != nil { - return onErr(u, err) + u(func(p *Pruner) { + p.state = PlanErr + p.err = err + }) + return } tfss := tfssres.GetFilesystems() pfss := make([]*fs, len(tfss)) +tfss_loop: for i, tfs := range tfss { l := GetLogger(ctx).WithField("fs", tfs.Path) l.Debug("plan filesystem") - pfs := &fs{ - path: tfs.Path, + path: tfs.Path, } pfss[i] = pfs + if tfs.GetIsPlaceholder() { + pfs.skipReason = SkipPlaceholder + l.WithField("skip_reason", pfs.skipReason).Debug("skipping filesystem") + continue + } else if sfs := sfss[tfs.GetPath()]; sfs == nil { + pfs.skipReason = SkipNoCorrespondenceOnSender + l.WithField("skip_reason", pfs.skipReason).WithField("sfs", sfs.GetPath()).Debug("skipping filesystem") + continue + } + + pfsPlanErrAndLog := func(err error, message string) { + t := fmt.Sprintf("%T", err) + pfs.planErr = err + pfs.planErrContext = message + l.WithField("orig_err_type", t).WithError(err).Error(fmt.Sprintf("%s: plan error, skipping filesystem", message)) + } + tfsvsres, err := target.ListFilesystemVersions(ctx, &pdu.ListFilesystemVersionsReq{Filesystem: tfs.Path}) if err != nil { - l.WithError(err).Error("cannot list filesystem versions") - return onErr(u, err) + pfsPlanErrAndLog(err, "cannot list filesystem versions") + continue tfss_loop } tfsvs := tfsvsres.GetVersions() // no progress here since we could run in a live-lock (must have used target AND receiver before progress) @@ -410,24 +381,21 @@ func statePlan(a *args, u updater) state { rcReq := &pdu.ReplicationCursorReq{ Filesystem: tfs.Path, - Op: &pdu.ReplicationCursorReq_Get{ + Op: &pdu.ReplicationCursorReq_Get{ Get: &pdu.ReplicationCursorReq_GetOp{}, }, } rc, err := receiver.ReplicationCursor(ctx, rcReq) if err != nil { - l.WithError(err).Error("cannot get replication cursor") - return onErr(u, err) + pfsPlanErrAndLog(err, "cannot get replication cursor bookmark") + continue tfss_loop } - ka.MadeProgress() - if rc.GetNotexist() { - l.Error("replication cursor does not exist, skipping") - pfs.destroyList = []pruning.Snapshot{} - pfs.planErr = fmt.Errorf("replication cursor bookmark does not exist (one successful replication is required before pruning works)") - continue + if rc.GetNotexist() { + err := errors.New("replication cursor bookmark does not exist (one successful replication is required before pruning works)") + pfsPlanErrAndLog(err, "") + continue tfss_loop } - // scan from older to newer, all snapshots older than cursor are interpreted as replicated sort.Slice(tfsvs, func(i, j int) bool { return tfsvs[i].CreateTXG < tfsvs[j].CreateTXG @@ -449,11 +417,9 @@ func statePlan(a *args, u updater) state { } creation, err := tfsv.CreationAsTime() if err != nil { - err := fmt.Errorf("%s%s has invalid creation date: %s", tfs, tfsv.RelName(), err) - l.WithError(err). - WithField("tfsv", tfsv.RelName()). - Error("error with fileesystem version") - return onErr(u, err) + err := fmt.Errorf("%s: %s", tfsv.RelName(), err) + pfsPlanErrAndLog(err, "fs version with invalid creation date") + continue tfss_loop } // note that we cannot use CreateTXG because target and receiver could be on different pools atCursor := tfsv.Guid == rc.GetGuid() @@ -465,44 +431,63 @@ func statePlan(a *args, u updater) state { }) } if preCursor { - err := fmt.Errorf("replication cursor not found in prune target filesystem versions") - l.Error(err.Error()) - return onErr(u, err) + pfsPlanErrAndLog(fmt.Errorf("replication cursor not found in prune target filesystem versions"), "") + continue tfss_loop } // Apply prune rules pfs.destroyList = pruning.PruneSnapshots(pfs.snaps, a.rules) - ka.MadeProgress() } - return u(func(pruner *Pruner) { - pruner.Progress.MadeProgress() + u(func(pruner *Pruner) { pruner.execQueue = newExecQueue(len(pfss)) for _, pfs := range pfss { pruner.execQueue.Put(pfs, nil, false) } pruner.state = Exec - }).statefunc() -} - -func stateExec(a *args, u updater) state { + }) + for { var pfs *fs - state := u(func(pruner *Pruner) { + u(func(pruner *Pruner) { pfs = pruner.execQueue.Pop() + }) if pfs == nil { - nextState := Done - if pruner.execQueue.HasCompletedFSWithErrors() { - nextState = ErrPerm + break + } + doOneAttemptExec(a, u, pfs) + } + + var rep *Report + { + // must not hold lock for report + var pruner *Pruner + u(func(p *Pruner) { + pruner = p + }) + rep = pruner.Report() + } + u(func(p *Pruner) { + if len(rep.Pending) > 0 { + panic("queue should not have pending items at this point") + } + hadErr := false + for _, fsr := range rep.Completed { + hadErr = hadErr || fsr.SkipReason.NotSkipped() && fsr.LastError != "" } - pruner.state = nextState - return + if hadErr { + p.state = ExecErr + } else { + p.state = Done } }) - if state != Exec { - return state.statefunc() + + } +// attempts to exec pfs, puts it back into the queue with the result +func doOneAttemptExec(a *args, u updater, pfs *fs) { + destroyList := make([]*pdu.FilesystemVersion, len(pfs.destroyList)) for i := range destroyList { destroyList[i] = pfs.destroyList[i].(snapshot).fsv @@ -521,7 +506,7 @@ func stateExec(a *args, u updater) state { u(func(pruner *Pruner) { pruner.execQueue.Put(pfs, err, false) }) - return onErr(u, err) + return } // check if all snapshots were destroyed destroyResults := make(map[string]*pdu.DestroySnapshotRes) @@ -562,31 +547,6 @@ func stateExec(a *args, u updater) state { }) if err != nil { GetLogger(a.ctx).WithError(err).Error("target could not destroy snapshots") - return onErr(u, err) - } - - return u(func(pruner *Pruner) { - pruner.Progress.MadeProgress() - }).statefunc() -} - -func stateExecWait(a *args, u updater) state { - return doWait(Exec, a, u) -} - -func statePlanWait(a *args, u updater) state { - return doWait(Plan, a, u) -} - -func doWait(goback State, a *args, u updater) state { - timer := time.NewTimer(a.retryWait) - defer timer.Stop() - select { - case <-timer.C: - return u(func(pruner *Pruner) { - pruner.state = goback - }).statefunc() - case <-a.ctx.Done(): - return onErr(u, a.ctx.Err()) + return } } diff --git a/daemon/pruner/pruner_queue.go b/daemon/pruner/pruner_queue.go index 063bcf7..840e93b 100644 --- a/daemon/pruner/pruner_queue.go +++ b/daemon/pruner/pruner_queue.go @@ -58,10 +58,7 @@ func (q *execQueue) Pop() *fs { func(q *execQueue) Put(fs *fs, err error, done bool) { fs.mtx.Lock() fs.execErrLast = err - if err != nil { - fs.execErrCount++ - } - if done || (err != nil && !shouldRetry(fs.execErrLast)) { + if done || err != nil { fs.mtx.Unlock() q.mtx.Lock() q.completed = append(q.completed, fs) @@ -78,9 +75,6 @@ func(q *execQueue) Put(fs *fs, err error, done bool) { defer q.pending[i].mtx.Unlock() q.pending[j].mtx.Lock() defer q.pending[j].mtx.Unlock() - if q.pending[i].execErrCount != q.pending[j].execErrCount { - return q.pending[i].execErrCount < q.pending[j].execErrCount - } return strings.Compare(q.pending[i].path, q.pending[j].path) == -1 }) q.mtx.Unlock() diff --git a/daemon/pruner/pruner_test.go b/daemon/pruner/pruner_test.go deleted file mode 100644 index 23a10e8..0000000 --- a/daemon/pruner/pruner_test.go +++ /dev/null @@ -1,206 +0,0 @@ -package pruner - -import ( - "context" - "fmt" - "github.com/stretchr/testify/assert" - "github.com/zrepl/zrepl/logger" - "github.com/zrepl/zrepl/pruning" - "github.com/zrepl/zrepl/replication/pdu" - "net" - "testing" - "time" -) - -type mockFS struct { - path string - snaps []string -} - -func (m *mockFS) Filesystem() *pdu.Filesystem { - return &pdu.Filesystem{ - Path: m.path, - } -} - -func (m *mockFS) FilesystemVersions() []*pdu.FilesystemVersion { - versions := make([]*pdu.FilesystemVersion, len(m.snaps)) - for i, v := range m.snaps { - versions[i] = &pdu.FilesystemVersion{ - Type: pdu.FilesystemVersion_Snapshot, - Name: v, - Creation: pdu.FilesystemVersionCreation(time.Unix(0, 0)), - Guid: uint64(i), - } - } - return versions -} - -type mockTarget struct { - fss []mockFS - destroyed map[string][]string - listVersionsErrs map[string][]error - listFilesystemsErr []error - destroyErrs map[string][]error -} - -func (t *mockTarget) ListFilesystems(ctx context.Context, req *pdu.ListFilesystemReq) (*pdu.ListFilesystemRes, error) { - if len(t.listFilesystemsErr) > 0 { - e := t.listFilesystemsErr[0] - t.listFilesystemsErr = t.listFilesystemsErr[1:] - return nil, e - } - fss := make([]*pdu.Filesystem, len(t.fss)) - for i := range fss { - fss[i] = t.fss[i].Filesystem() - } - return &pdu.ListFilesystemRes{Filesystems: fss}, nil -} - -func (t *mockTarget) ListFilesystemVersions(ctx context.Context, req *pdu.ListFilesystemVersionsReq) (*pdu.ListFilesystemVersionsRes, error) { - fs := req.Filesystem - if len(t.listVersionsErrs[fs]) != 0 { - e := t.listVersionsErrs[fs][0] - t.listVersionsErrs[fs] = t.listVersionsErrs[fs][1:] - return nil, e - } - - for _, mfs := range t.fss { - if mfs.path != fs { - continue - } - return &pdu.ListFilesystemVersionsRes{Versions: mfs.FilesystemVersions()}, nil - } - return nil, fmt.Errorf("filesystem %s does not exist", fs) -} - -func (t *mockTarget) DestroySnapshots(ctx context.Context, req *pdu.DestroySnapshotsReq) (*pdu.DestroySnapshotsRes, error) { - fs, snaps := req.Filesystem, req.Snapshots - if len(t.destroyErrs[fs]) != 0 { - e := t.destroyErrs[fs][0] - t.destroyErrs[fs] = t.destroyErrs[fs][1:] - return nil, e - } - destroyed := t.destroyed[fs] - res := make([]*pdu.DestroySnapshotRes, len(snaps)) - for i, s := range snaps { - destroyed = append(destroyed, s.Name) - res[i] = &pdu.DestroySnapshotRes{Error: "", Snapshot: s} - } - t.destroyed[fs] = destroyed - return &pdu.DestroySnapshotsRes{Results: res}, nil -} - -type mockCursor struct { - snapname string - guid uint64 -} -type mockHistory struct { - errs map[string][]error - cursors map[string]*mockCursor -} - -func (r *mockHistory) ReplicationCursor(ctx context.Context, req *pdu.ReplicationCursorReq) (*pdu.ReplicationCursorRes, error) { - fs := req.Filesystem - if len(r.errs[fs]) > 0 { - e := r.errs[fs][0] - r.errs[fs] = r.errs[fs][1:] - return nil, e - } - return &pdu.ReplicationCursorRes{Result: &pdu.ReplicationCursorRes_Guid{Guid: 0}}, nil -} - -type stubNetErr struct { - msg string - temporary, timeout bool -} - -var _ net.Error = stubNetErr{} - -func (e stubNetErr) Error() string { - return e.msg -} - -func (e stubNetErr) Temporary() bool { return e.temporary } - -func (e stubNetErr) Timeout() bool { return e.timeout } - -func TestPruner_Prune(t *testing.T) { - - var _ net.Error = &net.OpError{} // we use it below - target := &mockTarget{ - listFilesystemsErr: []error{ - stubNetErr{msg: "fakerror0", temporary: true}, - }, - listVersionsErrs: map[string][]error{ - "zroot/foo": { - stubNetErr{msg: "fakeerror1", temporary: true}, - stubNetErr{msg: "fakeerror2", temporary: true,}, - }, - }, - destroyErrs: map[string][]error{ - "zroot/baz": { - stubNetErr{msg: "fakeerror3", temporary: true}, // first error puts it back in the queue - stubNetErr{msg:"permanent error"}, // so it will be last when pruner gives up due to permanent err - }, - }, - destroyed: make(map[string][]string), - fss: []mockFS{ - { - path: "zroot/foo", - snaps: []string{ - "keep_a", - "keep_b", - "drop_c", - "keep_d", - }, - }, - { - path: "zroot/bar", - snaps: []string{ - "keep_e", - "keep_f", - "drop_g", - }, - }, - { - path: "zroot/baz", - snaps: []string{ - "keep_h", - "drop_i", - }, - }, - }, - } - history := &mockHistory{ - errs: map[string][]error{ - "zroot/foo": { - stubNetErr{msg: "fakeerror4", temporary: true}, - }, - }, - } - - keepRules := []pruning.KeepRule{pruning.MustKeepRegex("^keep", false)} - - p := Pruner{ - args: args{ - ctx: WithLogger(context.Background(), logger.NewTestLogger(t)), - target: target, - receiver: history, - rules: keepRules, - retryWait: 10*time.Millisecond, - }, - state: Plan, - } - p.Prune() - - exp := map[string][]string{ - "zroot/foo": {"drop_c"}, - "zroot/bar": {"drop_g"}, - } - - assert.Equal(t, exp, target.destroyed) - - //assert.Equal(t, map[string][]error{}, target.listVersionsErrs, "retried") - -} diff --git a/daemon/pruner/state_enumer.go b/daemon/pruner/state_enumer.go index 8c396ab..0a616ea 100644 --- a/daemon/pruner/state_enumer.go +++ b/daemon/pruner/state_enumer.go @@ -7,19 +7,17 @@ import ( ) const ( - _StateName_0 = "PlanPlanWait" + _StateName_0 = "PlanPlanErr" _StateName_1 = "Exec" - _StateName_2 = "ExecWait" - _StateName_3 = "ErrPerm" - _StateName_4 = "Done" + _StateName_2 = "ExecErr" + _StateName_3 = "Done" ) var ( - _StateIndex_0 = [...]uint8{0, 4, 12} + _StateIndex_0 = [...]uint8{0, 4, 11} _StateIndex_1 = [...]uint8{0, 4} - _StateIndex_2 = [...]uint8{0, 8} - _StateIndex_3 = [...]uint8{0, 7} - _StateIndex_4 = [...]uint8{0, 4} + _StateIndex_2 = [...]uint8{0, 7} + _StateIndex_3 = [...]uint8{0, 4} ) func (i State) String() string { @@ -33,22 +31,19 @@ func (i State) String() string { return _StateName_2 case i == 16: return _StateName_3 - case i == 32: - return _StateName_4 default: return fmt.Sprintf("State(%d)", i) } } -var _StateValues = []State{1, 2, 4, 8, 16, 32} +var _StateValues = []State{1, 2, 4, 8, 16} var _StateNameToValueMap = map[string]State{ _StateName_0[0:4]: 1, - _StateName_0[4:12]: 2, + _StateName_0[4:11]: 2, _StateName_1[0:4]: 4, - _StateName_2[0:8]: 8, - _StateName_3[0:7]: 16, - _StateName_4[0:4]: 32, + _StateName_2[0:7]: 8, + _StateName_3[0:4]: 16, } // StateString retrieves an enum value from the enum constants string name. diff --git a/docs/changelog.rst b/docs/changelog.rst index 5230fcb..06e302a 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -48,9 +48,6 @@ Notes to Package Maintainers This functionality will cause SIGABRT on panics and can be used to capture a coredump of the panicking process. To that extend, make sure that your package build system, your OS's coredump collection and the Go delve debugger work together. Use your build system to package the Go program in `this tutorial on Go coredumps and the delve debugger `_ , and make sure the symbol resolution etc. work on coredumps captured from the binary produced by your build system. (Special focus on symbol stripping, etc.) -* Use of ``ssh+stdinserver`` :ref:`transport ` is no longer encouraged. - Please encourage users to use the new ``tcp`` or ``tls`` transports. - You might as well mention some of the :ref:`tunneling options listed here `. Changes ~~~~~~~ @@ -58,16 +55,26 @@ Changes * |feature| :issue:`55` : Push replication (see :ref:`push job ` and :ref:`sink job `) * |feature| :ref:`TCP Transport ` * |feature| :ref:`TCP + TLS client authentication transport ` -* |feature| :issue:`78` :commit:`074f989` : Replication protocol rewrite +* |feature| :issue:`111`: RPC protocol rewrite - * Uses ``github.com/problame/go-streamrpc`` for RPC layer - * |break| Protocol breakage, update and restart of all zrepl daemons is required - * |feature| :issue:`83`: Improved error handling of network-level errors (zrepl retries instead of failing the entire job) - * |bugfix| :issue:`75` :issue:`81`: use connection timeouts and protocol-level heartbeats - * |break| |break_config|: mappings are no longer supported + * |break| Protocol breakage; Update and restart of all zrepl daemons is required. + * Use `gRPC `_ for control RPCs and a custom protocol for bulk data transfer. + * Automatic retries for network-temporary errors - * Receiving sides (``pull`` and ``sink`` job) specify a single ``root_fs``. - Received filesystems are then stored *per client* in ``${root_fs}/${client_identity}``. + * Limited to errors during replication for this release. + Addresses the common problem of ISP-forced reconnection at night, but will become + way more useful with resumable send & recv support. + Pruning errors are handled per FS, i.e., a prune RPC is attempted at least once per FS. + +* |feature| Proper timeout handling for the :ref:`SSH transport ` + + * |break| Requires Go 1.11 or later. + +* |break| |break_config|: mappings are no longer supported + + * Receiving sides (``pull`` and ``sink`` job) specify a single ``root_fs``. + Received filesystems are then stored *per client* in ``${root_fs}/${client_identity}``. + See :ref:`job-overview` for details. * |feature| |break| |break_config| Manual snapshotting + triggering of replication diff --git a/docs/configuration/jobs.rst b/docs/configuration/jobs.rst index 9a2ea89..0cf55e5 100644 --- a/docs/configuration/jobs.rst +++ b/docs/configuration/jobs.rst @@ -11,6 +11,8 @@ Job Types & Replication ======================= +.. _job-overview: + Overview & Terminology ---------------------- @@ -234,7 +236,8 @@ Job Type ``pull`` - ZFS dataset path are received to ``$root_fs/$client_identity`` * - ``interval`` - - Interval at which to pull from the source job + - | Interval at which to pull from the source job (e.g. ``10m``). + | ``manual`` disables periodic pulling, replication then only happens on :ref:`wakeup `. * - ``pruning`` - |pruning-spec| diff --git a/docs/index.rst b/docs/index.rst index 8fc5b50..f7aaa5f 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -33,6 +33,7 @@ Main Features * Advanced replication features + * [x] Automatic retries for temporary network errors * [ ] Resumable send & receive * [ ] Compressed send & receive * [ ] Raw encrypted send & receive diff --git a/docs/usage.rst b/docs/usage.rst index 5cefaa7..9a400ea 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -13,6 +13,8 @@ CLI Overview The zrepl binary is self-documenting: run ``zrepl help`` for an overview of the available subcommands or ``zrepl SUBCOMMAND --help`` for information on available flags, etc. +.. _cli-signal-wakeup: + .. list-table:: :widths: 30 70 :header-rows: 1 diff --git a/endpoint/endpoint.go b/endpoint/endpoint.go index b90f3f7..d64ca88 100644 --- a/endpoint/endpoint.go +++ b/endpoint/endpoint.go @@ -7,8 +7,7 @@ import ( "path" "github.com/pkg/errors" - "github.com/zrepl/zrepl/replication" - "github.com/zrepl/zrepl/replication/pdu" + "github.com/zrepl/zrepl/replication/logic/pdu" "github.com/zrepl/zrepl/zfs" ) @@ -34,7 +33,7 @@ func (s *Sender) filterCheckFS(fs string) (*zfs.DatasetPath, error) { return nil, err } if !pass { - return nil, replication.NewFilteredError(fs) + return nil, fmt.Errorf("endpoint does not allow access to filesystem %s", fs) } return dp, nil } @@ -49,9 +48,10 @@ func (s *Sender) ListFilesystems(ctx context.Context, r *pdu.ListFilesystemReq) rfss[i] = &pdu.Filesystem{ Path: fss[i].ToString(), // FIXME: not supporting ResumeToken yet + IsPlaceholder: false, // sender FSs are never placeholders } } - res := &pdu.ListFilesystemRes{Filesystems: rfss, Empty: len(rfss) == 0} + res := &pdu.ListFilesystemRes{Filesystems: rfss} return res, nil } @@ -108,6 +108,21 @@ func (p *Sender) DestroySnapshots(ctx context.Context, req *pdu.DestroySnapshots return doDestroySnapshots(ctx, dp, req.Snapshots) } +func (p *Sender) Ping(ctx context.Context, req *pdu.PingReq) (*pdu.PingRes, error) { + res := pdu.PingRes{ + Echo: req.GetMessage(), + } + return &res, nil +} + +func (p *Sender) PingDataconn(ctx context.Context, req *pdu.PingReq) (*pdu.PingRes, error) { + return p.Ping(ctx, req) +} + +func (p *Sender) WaitForConnectivity(ctx context.Context) error { + return nil +} + func (p *Sender) ReplicationCursor(ctx context.Context, req *pdu.ReplicationCursorReq) (*pdu.ReplicationCursorRes, error) { dp, err := p.filterCheckFS(req.Filesystem) if err != nil { @@ -229,7 +244,7 @@ func (s *Receiver) ListFilesystems(ctx context.Context, req *pdu.ListFilesystemR if err != nil { return nil, err } - // present without prefix, and only those that are not placeholders + // present filesystem without the root_fs prefix fss := make([]*pdu.Filesystem, 0, len(filtered)) for _, a := range filtered { ph, err := zfs.ZFSIsPlaceholderFilesystem(a) @@ -240,21 +255,16 @@ func (s *Receiver) ListFilesystems(ctx context.Context, req *pdu.ListFilesystemR Error("inconsistent placeholder property") return nil, errors.New("server error: inconsistent placeholder property") // don't leak path } - if ph { - getLogger(ctx). - WithField("fs", a.ToString()). - Debug("ignoring placeholder filesystem") - continue - } getLogger(ctx). WithField("fs", a.ToString()). - Debug("non-placeholder filesystem") + WithField("is_placeholder", ph). + Debug("filesystem") a.TrimPrefix(root) - fss = append(fss, &pdu.Filesystem{Path: a.ToString()}) + fss = append(fss, &pdu.Filesystem{Path: a.ToString(), IsPlaceholder: ph}) } if len(fss) == 0 { - getLogger(ctx).Debug("no non-placeholder filesystems") - return &pdu.ListFilesystemRes{Empty: true}, nil + getLogger(ctx).Debug("no filesystems found") + return &pdu.ListFilesystemRes{}, nil } return &pdu.ListFilesystemRes{Filesystems: fss}, nil } @@ -279,6 +289,21 @@ func (s *Receiver) ListFilesystemVersions(ctx context.Context, req *pdu.ListFile return &pdu.ListFilesystemVersionsRes{Versions: rfsvs}, nil } +func (s *Receiver) Ping(ctx context.Context, req *pdu.PingReq) (*pdu.PingRes, error) { + res := pdu.PingRes{ + Echo: req.GetMessage(), + } + return &res, nil +} + +func (s *Receiver) PingDataconn(ctx context.Context, req *pdu.PingReq) (*pdu.PingRes, error) { + return s.Ping(ctx, req) +} + +func (s *Receiver) WaitForConnectivity(ctx context.Context) error { + return nil +} + func (s *Receiver) ReplicationCursor(context.Context, *pdu.ReplicationCursorReq) (*pdu.ReplicationCursorRes, error) { return nil, fmt.Errorf("ReplicationCursor not implemented for Receiver") } @@ -324,28 +349,30 @@ func (s *Receiver) Receive(ctx context.Context, req *pdu.ReceiveReq, receive zfs getLogger(ctx).WithField("visitErr", visitErr).Debug("complete tree-walk") if visitErr != nil { - return nil, err + return nil, visitErr } - needForceRecv := false + var clearPlaceholderProperty bool + var recvOpts zfs.RecvOptions props, err := zfs.ZFSGet(lp, []string{zfs.ZREPL_PLACEHOLDER_PROPERTY_NAME}) if err == nil { if isPlaceholder, _ := zfs.IsPlaceholder(lp, props.Get(zfs.ZREPL_PLACEHOLDER_PROPERTY_NAME)); isPlaceholder { - needForceRecv = true + recvOpts.RollbackAndForceRecv = true + clearPlaceholderProperty = true + } + } + if clearPlaceholderProperty { + if err := zfs.ZFSSetNoPlaceholder(lp); err != nil { + return nil, fmt.Errorf("cannot clear placeholder property for forced receive: %s", err) } } - args := make([]string, 0, 1) - if needForceRecv { - args = append(args, "-F") - } + getLogger(ctx).WithField("opts", fmt.Sprintf("%#v", recvOpts)).Debug("start receive command") - getLogger(ctx).Debug("start receive command") - - if err := zfs.ZFSRecv(ctx, lp.ToString(), receive, args...); err != nil { + if err := zfs.ZFSRecv(ctx, lp.ToString(), receive, recvOpts); err != nil { getLogger(ctx). WithError(err). - WithField("args", args). + WithField("opts", recvOpts). Error("zfs receive failed") return nil, err } diff --git a/replication/driver/errorclass_enumer.go b/replication/driver/errorclass_enumer.go new file mode 100644 index 0000000..0a56c0e --- /dev/null +++ b/replication/driver/errorclass_enumer.go @@ -0,0 +1,50 @@ +// Code generated by "enumer -type=errorClass"; DO NOT EDIT. + +package driver + +import ( + "fmt" +) + +const _errorClassName = "errorClassUnknownerrorClassPermanenterrorClassTemporaryConnectivityRelated" + +var _errorClassIndex = [...]uint8{0, 17, 36, 74} + +func (i errorClass) String() string { + if i < 0 || i >= errorClass(len(_errorClassIndex)-1) { + return fmt.Sprintf("errorClass(%d)", i) + } + return _errorClassName[_errorClassIndex[i]:_errorClassIndex[i+1]] +} + +var _errorClassValues = []errorClass{0, 1, 2} + +var _errorClassNameToValueMap = map[string]errorClass{ + _errorClassName[0:17]: 0, + _errorClassName[17:36]: 1, + _errorClassName[36:74]: 2, +} + +// errorClassString retrieves an enum value from the enum constants string name. +// Throws an error if the param is not part of the enum. +func errorClassString(s string) (errorClass, error) { + if val, ok := _errorClassNameToValueMap[s]; ok { + return val, nil + } + return 0, fmt.Errorf("%s does not belong to errorClass values", s) +} + +// errorClassValues returns all values of the enum +func errorClassValues() []errorClass { + return _errorClassValues +} + +// IsAerrorClass returns "true" if the value is listed in the enum definition. "false" otherwise +func (i errorClass) IsAerrorClass() bool { + for _, v := range _errorClassValues { + if i == v { + return true + } + } + return false +} diff --git a/replication/driver/replication_driver.go b/replication/driver/replication_driver.go new file mode 100644 index 0000000..6a5a9ea --- /dev/null +++ b/replication/driver/replication_driver.go @@ -0,0 +1,638 @@ +package driver + +import ( + "context" + "errors" + "fmt" + "net" + "sort" + "strings" + "sync" + "time" + + "github.com/zrepl/zrepl/replication/report" + "github.com/zrepl/zrepl/util/chainlock" + "github.com/zrepl/zrepl/util/envconst" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" +) + +type interval struct { + begin time.Time + end time.Time +} + +func (w *interval) SetZero() { + w.begin = time.Time{} + w.end = time.Time{} +} + +// Duration of 0 means indefinite length +func (w *interval) Set(begin time.Time, duration time.Duration) { + if begin.IsZero() { + panic("zero begin time now allowed") + } + w.begin = begin + w.end = begin.Add(duration) +} + +// Returns the End of the interval if it has a defined length. +// For indefinite lengths, returns the zero value. +func (w *interval) End() time.Time { + return w.end +} + +// Return a context with a deadline at the interval's end. +// If the interval has indefinite length (duration 0 on Set), return ctx as is. +// The returned context.CancelFunc can be called either way. +func (w *interval) ContextWithDeadlineAtEnd(ctx context.Context) (context.Context, context.CancelFunc) { + if w.begin.IsZero() { + panic("must call Set before ContextWIthDeadlineAtEnd") + } + if w.end.IsZero() { + // indefinite length, just return context as is + return ctx, func() {} + } else { + return context.WithDeadline(ctx, w.end) + } +} + +type run struct { + l *chainlock.L + + startedAt, finishedAt time.Time + + waitReconnect interval + waitReconnectError *timedError + + // the attempts attempted so far: + // All but the last in this slice must have finished with some errors. + // The last attempt may not be finished and may not have errors. + attempts []*attempt +} + +type Planner interface { + Plan(context.Context) ([]FS, error) + WaitForConnectivity(context.Context) error +} + +// an attempt represents a single planning & execution of fs replications +type attempt struct { + planner Planner + + l *chainlock.L + + startedAt, finishedAt time.Time + + // after Planner.Plan was called, planErr and fss are mutually exclusive with regards to nil-ness + // if both are nil, it must be assumed that Planner.Plan is active + planErr *timedError + fss []*fs +} + +type timedError struct { + Err error + Time time.Time +} + +func newTimedError(err error, t time.Time) *timedError { + if err == nil { + panic("error must be non-nil") + } + if t.IsZero() { + panic("t must be non-zero") + } + return &timedError{err, t} +} + +func (e *timedError) IntoReportError() *report.TimedError { + if e == nil { + return nil + } + return report.NewTimedError(e.Err.Error(), e.Time) +} + +type FS interface { + // Returns true if this FS and fs refer to the same filesystem returned + // by Planner.Plan in a previous attempt. + EqualToPreviousAttempt(fs FS) bool + // The returned steps are assumed to be dependent on exactly + // their direct predecessors in the returned list. + PlanFS(context.Context) ([]Step, error) + ReportInfo() *report.FilesystemInfo +} + +type Step interface { + // Returns true iff the target snapshot is the same for this Step and other. + // We do not use TargetDate to avoid problems with wrong system time on + // snapshot creation. + // + // Implementations can assume that `other` is a step of the same filesystem, + // although maybe from a previous attempt. + // (`same` as defined by FS.EqualToPreviousAttempt) + // + // Note that TargetEquals should return true in a situation with one + // originally sent snapshot and a subsequent attempt's step that uses + // resumable send & recv. + TargetEquals(other Step) bool + TargetDate() time.Time + Step(context.Context) error + ReportInfo() *report.StepInfo +} + +type fs struct { + fs FS + + l *chainlock.L + + planning struct { + done bool + err *timedError + } + + // valid iff planning.done && planning.err == nil + planned struct { + // valid iff planning.done && planning.err == nil + stepErr *timedError + // all steps, in the order in which they must be completed + steps []*step + // index into steps, pointing at the step that is currently executing + // if step >= len(steps), no more work needs to be done + step int + } +} + +type step struct { + l *chainlock.L + step Step +} + +type ReportFunc func() *report.Report +type WaitFunc func(block bool) (done bool) + +var maxAttempts = envconst.Int64("ZREPL_REPLICATION_MAX_ATTEMPTS", 3) +var reconnectHardFailTimeout = envconst.Duration("ZREPL_REPLICATION_RECONNECT_HARD_FAIL_TIMEOUT", 10*time.Minute) + +func Do(ctx context.Context, planner Planner) (ReportFunc, WaitFunc) { + log := getLog(ctx) + l := chainlock.New() + run := &run{ + l: l, + startedAt: time.Now(), + } + + done := make(chan struct{}) + go func() { + defer close(done) + + defer run.l.Lock().Unlock() + log.Debug("begin run") + defer log.Debug("run ended") + var prev *attempt + mainLog := log + for ano := 0; ano < int(maxAttempts) || maxAttempts == 0; ano++ { + log := mainLog.WithField("attempt_number", ano) + log.Debug("start attempt") + + run.waitReconnect.SetZero() + run.waitReconnectError = nil + + // do current attempt + cur := &attempt{ + l: l, + startedAt: time.Now(), + planner: planner, + } + run.attempts = append(run.attempts, cur) + run.l.DropWhile(func() { + cur.do(ctx, prev) + }) + prev = cur + if ctx.Err() != nil { + log.WithError(ctx.Err()).Info("context error") + return + } + + // error classification, bail out if done / permanent error + rep := cur.report() + log.WithField("attempt_state", rep.State).Debug("attempt state") + errRep := cur.errorReport() + + if rep.State == report.AttemptDone { + log.Debug("attempt completed successfully") + break + } + + mostRecentErr, mostRecentErrClass := errRep.MostRecent() + log.WithField("most_recent_err", mostRecentErr).WithField("most_recent_err_class", mostRecentErrClass).Debug("most recent error used for re-connect decision") + if mostRecentErr == nil { + // inconsistent reporting, let's bail out + log.Warn("attempt does not report done but error report does not report errors, aborting run") + break + } + log.WithError(mostRecentErr.Err).Error("most recent error in this attempt") + shouldReconnect := mostRecentErrClass == errorClassTemporaryConnectivityRelated + log.WithField("reconnect_decision", shouldReconnect).Debug("reconnect decision made") + if shouldReconnect { + run.waitReconnect.Set(time.Now(), reconnectHardFailTimeout) + log.WithField("deadline", run.waitReconnect.End()).Error("temporary connectivity-related error identified, start waiting for reconnect") + var connectErr error + var connectErrTime time.Time + run.l.DropWhile(func() { + ctx, cancel := run.waitReconnect.ContextWithDeadlineAtEnd(ctx) + defer cancel() + connectErr = planner.WaitForConnectivity(ctx) + connectErrTime = time.Now() + }) + if connectErr == nil { + log.Error("reconnect successful") // same level as 'begin with reconnect' message above + continue + } else { + run.waitReconnectError = newTimedError(connectErr, connectErrTime) + log.WithError(connectErr).Error("reconnecting failed, aborting run") + break + } + } else { + log.Error("most recent error cannot be solved by reconnecting, aborting run") + return + } + + } + + }() + + wait := func(block bool) bool { + if block { + <-done + } + select { + case <-done: + return true + default: + return false + } + } + report := func() *report.Report { + defer run.l.Lock().Unlock() + return run.report() + } + return report, wait +} + +func (a *attempt) do(ctx context.Context, prev *attempt) { + pfss, err := a.planner.Plan(ctx) + errTime := time.Now() + defer a.l.Lock().Unlock() + if err != nil { + a.planErr = newTimedError(err, errTime) + a.fss = nil + a.finishedAt = time.Now() + return + } + + for _, pfs := range pfss { + fs := &fs{ + fs: pfs, + l: a.l, + } + a.fss = append(a.fss, fs) + } + + prevs := make(map[*fs]*fs) + { + prevFSs := make(map[*fs][]*fs, len(pfss)) + if prev != nil { + debug("previous attempt has %d fss", len(a.fss)) + for _, fs := range a.fss { + for _, prevFS := range prev.fss { + if fs.fs.EqualToPreviousAttempt(prevFS.fs) { + l := prevFSs[fs] + l = append(l, prevFS) + prevFSs[fs] = l + } + } + } + } + type inconsistency struct { + cur *fs + prevs []*fs + } + var inconsistencies []inconsistency + for cur, fss := range prevFSs { + if len(fss) > 1 { + inconsistencies = append(inconsistencies, inconsistency{cur, fss}) + } + } + sort.SliceStable(inconsistencies, func(i, j int) bool { + return inconsistencies[i].cur.fs.ReportInfo().Name < inconsistencies[j].cur.fs.ReportInfo().Name + }) + if len(inconsistencies) > 0 { + var msg strings.Builder + msg.WriteString("cannot determine filesystem correspondences between different attempts:\n") + var inconsistencyLines []string + for _, i := range inconsistencies { + var prevNames []string + for _, prev := range i.prevs { + prevNames = append(prevNames, prev.fs.ReportInfo().Name) + } + l := fmt.Sprintf(" %s => %v", i.cur.fs.ReportInfo().Name, prevNames) + inconsistencyLines = append(inconsistencyLines, l) + } + fmt.Fprintf(&msg, strings.Join(inconsistencyLines, "\n")) + now := time.Now() + a.planErr = newTimedError(errors.New(msg.String()), now) + a.fss = nil + a.finishedAt = now + return + } + for cur, fss := range prevFSs { + if len(fss) > 0 { + prevs[cur] = fss[0] + } + } + } + // invariant: prevs contains an entry for each unambigious correspondence + + stepQueue := newStepQueue() + defer stepQueue.Start(1)() // TODO parallel replication + var fssesDone sync.WaitGroup + for _, f := range a.fss { + fssesDone.Add(1) + go func(f *fs) { + defer fssesDone.Done() + f.do(ctx, stepQueue, prevs[f]) + }(f) + } + a.l.DropWhile(func() { + fssesDone.Wait() + }) + a.finishedAt = time.Now() +} + +func (fs *fs) do(ctx context.Context, pq *stepQueue, prev *fs) { + psteps, err := fs.fs.PlanFS(ctx) + errTime := time.Now() + defer fs.l.Lock().Unlock() + debug := debugPrefix("fs=%s", fs.fs.ReportInfo().Name) + fs.planning.done = true + if err != nil { + fs.planning.err = newTimedError(err, errTime) + return + } + for _, pstep := range psteps { + step := &step{ + l: fs.l, + step: pstep, + } + fs.planned.steps = append(fs.planned.steps, step) + } + debug("iniital len(fs.planned.steps) = %d", len(fs.planned.steps)) + + // for not-first attempts, only allow fs.planned.steps + // up to including the originally planned target snapshot + if prev != nil && prev.planning.done && prev.planning.err == nil { + prevUncompleted := prev.planned.steps[prev.planned.step:] + if len(prevUncompleted) == 0 { + debug("prevUncompleted is empty") + return + } + if len(fs.planned.steps) == 0 { + debug("fs.planned.steps is empty") + return + } + prevFailed := prevUncompleted[0] + curFirst := fs.planned.steps[0] + // we assume that PlanFS retries prevFailed (using curFirst) + if !prevFailed.step.TargetEquals(curFirst.step) { + debug("Targets don't match") + // Two options: + // A: planning algorithm is broken + // B: manual user intervention inbetween + // Neither way will we make progress, so let's error out + stepFmt := func(step *step) string { + r := step.report() + s := r.Info + if r.IsIncremental() { + return fmt.Sprintf("%s=>%s", s.From, s.To) + } else { + return fmt.Sprintf("full=>%s", s.To) + } + } + msg := fmt.Sprintf("last attempt's uncompleted step %s does not correspond to this attempt's first planned step %s", + stepFmt(prevFailed), stepFmt(curFirst)) + fs.planned.stepErr = newTimedError(errors.New(msg), time.Now()) + return + } + // only allow until step targets diverge + min := len(prevUncompleted) + if min > len(fs.planned.steps) { + min = len(fs.planned.steps) + } + diverge := 0 + for ; diverge < min; diverge++ { + debug("diverge compare iteration %d", diverge) + if !fs.planned.steps[diverge].step.TargetEquals(prevUncompleted[diverge].step) { + break + } + } + debug("diverge is %d", diverge) + fs.planned.steps = fs.planned.steps[0:diverge] + } + debug("post-prev-merge len(fs.planned.steps) = %d", len(fs.planned.steps)) + + for i, s := range fs.planned.steps { + var ( + err error + errTime time.Time + ) + // lock must not be held while executing step in order for reporting to work + fs.l.DropWhile(func() { + targetDate := s.step.TargetDate() + defer pq.WaitReady(fs, targetDate)() + err = s.step.Step(ctx) // no shadow + errTime = time.Now() // no shadow + }) + if err != nil { + fs.planned.stepErr = newTimedError(err, errTime) + break + } + fs.planned.step = i + 1 // fs.planned.step must be == len(fs.planned.steps) if all went OK + } +} + +// caller must hold lock l +func (r *run) report() *report.Report { + report := &report.Report{ + Attempts: make([]*report.AttemptReport, len(r.attempts)), + StartAt: r.startedAt, + FinishAt: r.finishedAt, + WaitReconnectSince: r.waitReconnect.begin, + WaitReconnectUntil: r.waitReconnect.end, + WaitReconnectError: r.waitReconnectError.IntoReportError(), + } + for i := range report.Attempts { + report.Attempts[i] = r.attempts[i].report() + } + return report +} + +// caller must hold lock l +func (a *attempt) report() *report.AttemptReport { + + r := &report.AttemptReport{ + // State is set below + Filesystems: make([]*report.FilesystemReport, len(a.fss)), + StartAt: a.startedAt, + FinishAt: a.finishedAt, + PlanError: a.planErr.IntoReportError(), + } + + for i := range r.Filesystems { + r.Filesystems[i] = a.fss[i].report() + } + + state := report.AttemptPlanning + if a.planErr != nil { + state = report.AttemptPlanningError + } else if a.fss != nil { + if a.finishedAt.IsZero() { + state = report.AttemptFanOutFSs + } else { + fsWithError := false + for _, s := range r.Filesystems { + fsWithError = fsWithError || s.Error() != nil + } + state = report.AttemptDone + if fsWithError { + state = report.AttemptFanOutError + } + } + } + r.State = state + + return r +} + +// caller must hold lock l +func (f *fs) report() *report.FilesystemReport { + state := report.FilesystemPlanningErrored + if f.planning.err == nil { + if f.planning.done { + if f.planned.stepErr != nil { + state = report.FilesystemSteppingErrored + } else if f.planned.step < len(f.planned.steps) { + state = report.FilesystemStepping + } else { + state = report.FilesystemDone + } + } else { + state = report.FilesystemPlanning + } + } + r := &report.FilesystemReport{ + Info: f.fs.ReportInfo(), + State: state, + PlanError: f.planning.err.IntoReportError(), + StepError: f.planned.stepErr.IntoReportError(), + Steps: make([]*report.StepReport, len(f.planned.steps)), + CurrentStep: f.planned.step, + } + for i := range r.Steps { + r.Steps[i] = f.planned.steps[i].report() + } + return r +} + +// caller must hold lock l +func (s *step) report() *report.StepReport { + r := &report.StepReport{ + Info: s.step.ReportInfo(), + } + return r +} + +type stepErrorReport struct { + err *timedError + step int +} + +//go:generate enumer -type=errorClass +type errorClass int + +const ( + errorClassUnknown errorClass = iota + errorClassPermanent + errorClassTemporaryConnectivityRelated +) + +type errorReport struct { + flattened []*timedError + // sorted DESCending by err time + byClass map[errorClass][]*timedError +} + +// caller must hold lock l +func (a *attempt) errorReport() *errorReport { + r := &errorReport{} + if a.planErr != nil { + r.flattened = append(r.flattened, a.planErr) + } + for _, fs := range a.fss { + if fs.planning.done && fs.planning.err != nil { + r.flattened = append(r.flattened, fs.planning.err) + } else if fs.planning.done && fs.planned.stepErr != nil { + r.flattened = append(r.flattened, fs.planned.stepErr) + } + } + + // build byClass + { + r.byClass = make(map[errorClass][]*timedError) + putClass := func(err *timedError, class errorClass) { + errs := r.byClass[class] + errs = append(errs, err) + r.byClass[class] = errs + } + for _, err := range r.flattened { + if neterr, ok := err.Err.(net.Error); ok && neterr.Temporary() { + putClass(err, errorClassTemporaryConnectivityRelated) + continue + } + if st, ok := status.FromError(err.Err); ok && st.Code() == codes.Unavailable { + // technically, codes.Unavailable could be returned by the gRPC endpoint, indicating overload, etc. + // for now, let's assume it only happens for connectivity issues, as specified in + // https://grpc.io/grpc/core/md_doc_statuscodes.html + putClass(err, errorClassTemporaryConnectivityRelated) + continue + } + putClass(err, errorClassPermanent) + } + for _, errs := range r.byClass { + sort.Slice(errs, func(i, j int) bool { + return errs[i].Time.After(errs[j].Time) // sort descendingly + }) + } + } + + return r +} + +func (r *errorReport) AnyError() *timedError { + for _, err := range r.flattened { + if err != nil { + return err + } + } + return nil +} + +func (r *errorReport) MostRecent() (err *timedError, errClass errorClass) { + for class, errs := range r.byClass { + // errs are sorted descendingly during construction + if len(errs) > 0 && (err == nil || errs[0].Time.After(err.Time)) { + err = errs[0] + errClass = class + } + } + return +} diff --git a/replication/driver/replication_driver_debug.go b/replication/driver/replication_driver_debug.go new file mode 100644 index 0000000..23f7ae7 --- /dev/null +++ b/replication/driver/replication_driver_debug.go @@ -0,0 +1,29 @@ +package driver + +import ( + "fmt" + "os" +) + +var debugEnabled bool = false + +func init() { + if os.Getenv("ZREPL_REPLICATION_DRIVER_DEBUG") != "" { + debugEnabled = true + } +} + +func debug(format string, args ...interface{}) { + if debugEnabled { + fmt.Fprintf(os.Stderr, "repl: driver: %s\n", fmt.Sprintf(format, args...)) + } +} + +type debugFunc func(format string, args ...interface{}) + +func debugPrefix(prefixFormat string, prefixFormatArgs ...interface{}) debugFunc { + prefix := fmt.Sprintf(prefixFormat, prefixFormatArgs...) + return func(format string, args ...interface{}) { + debug("%s: %s", prefix, fmt.Sprintf(format, args)) + } +} \ No newline at end of file diff --git a/replication/driver/replication_driver_logging.go b/replication/driver/replication_driver_logging.go new file mode 100644 index 0000000..ccaf7c8 --- /dev/null +++ b/replication/driver/replication_driver_logging.go @@ -0,0 +1,25 @@ +package driver + +import ( + "context" + + "github.com/zrepl/zrepl/logger" +) + +type Logger = logger.Logger + +type contexKey int + +const contexKeyLogger contexKey = iota + 1 + +func getLog(ctx context.Context) Logger { + l, ok := ctx.Value(contexKeyLogger).(Logger) + if !ok { + l = logger.NewNullLogger() + } + return l +} + +func WithLogger(ctx context.Context, log Logger) context.Context { + return context.WithValue(ctx, contexKeyLogger, log) +} diff --git a/replication/driver/replication_driver_test.go b/replication/driver/replication_driver_test.go new file mode 100644 index 0000000..650434f --- /dev/null +++ b/replication/driver/replication_driver_test.go @@ -0,0 +1,215 @@ +package driver + +import ( + "context" + "encoding/json" + "fmt" + "sort" + "sync/atomic" + "testing" + "time" + + "github.com/stretchr/testify/require" + "github.com/zrepl/zrepl/replication/report" + + "github.com/stretchr/testify/assert" + + jsondiff "github.com/yudai/gojsondiff" + jsondiffformatter "github.com/yudai/gojsondiff/formatter" +) + +type mockPlanner struct { + stepCounter uint32 + fss []FS // *mockFS +} + +func (p *mockPlanner) Plan(ctx context.Context) ([]FS, error) { + time.Sleep(1 * time.Second) + p.fss = []FS{ + &mockFS{ + &p.stepCounter, + "zroot/one", + nil, + }, + &mockFS{ + &p.stepCounter, + "zroot/two", + nil, + }, + } + return p.fss, nil +} + +func (p *mockPlanner) WaitForConnectivity(context.Context) error { + return nil +} + +type mockFS struct { + globalStepCounter *uint32 + name string + steps []Step +} + +func (f *mockFS) EqualToPreviousAttempt(other FS) bool { + return f.name == other.(*mockFS).name +} + +func (f *mockFS) PlanFS(ctx context.Context) ([]Step, error) { + if f.steps != nil { + panic("PlanFS used twice") + } + switch f.name { + case "zroot/one": + f.steps = []Step{ + &mockStep{ + fs: f, + ident: "a", + duration: 1 * time.Second, + targetDate: time.Unix(2, 0), + }, + &mockStep{ + fs: f, + ident: "b", + duration: 1 * time.Second, + targetDate: time.Unix(10, 0), + }, + &mockStep{ + fs: f, + ident: "c", + duration: 1 * time.Second, + targetDate: time.Unix(20, 0), + }, + } + case "zroot/two": + f.steps = []Step{ + &mockStep{ + fs: f, + ident: "u", + duration: 500 * time.Millisecond, + targetDate: time.Unix(15, 0), + }, + &mockStep{ + fs: f, + duration: 500 * time.Millisecond, + ident: "v", + targetDate: time.Unix(30, 0), + }, + } + default: + panic("unimplemented") + } + + return f.steps, nil +} + +func (f *mockFS) ReportInfo() *report.FilesystemInfo { + return &report.FilesystemInfo{Name: f.name} +} + +type mockStep struct { + fs *mockFS + ident string + duration time.Duration + targetDate time.Time + + // filled by method Step + globalCtr uint32 +} + +func (f *mockStep) String() string { + return fmt.Sprintf("%s{%s} targetDate=%s globalCtr=%v", f.fs.name, f.ident, f.targetDate, f.globalCtr) +} + +func (f *mockStep) Step(ctx context.Context) error { + f.globalCtr = atomic.AddUint32(f.fs.globalStepCounter, 1) + time.Sleep(f.duration) + return nil +} + +func (f *mockStep) TargetEquals(s Step) bool { + return f.ident == s.(*mockStep).ident +} + +func (f *mockStep) TargetDate() time.Time { + return f.targetDate +} + +func (f *mockStep) ReportInfo() *report.StepInfo { + return &report.StepInfo{From: f.ident, To: f.ident, BytesExpected: 100, BytesReplicated: 25} +} + +// TODO: add meaningful validation (i.e. actual checks) +// Since the stepqueue is not deterministic due to scheduler jitter, +// we cannot test for any definitive sequence of steps here. +// Such checks would further only be sensible for a non-concurrent step-queue, +// but we're going to have concurrent replication in the future. +// +// For the time being, let's just exercise the code a bit. +func TestReplication(t *testing.T) { + + ctx := context.Background() + + mp := &mockPlanner{} + getReport, wait := Do(ctx, mp) + begin := time.Now() + fireAt := []time.Duration{ + // the following values are relative to the start + 500 * time.Millisecond, // planning + 1500 * time.Millisecond, // nothing is done, a is running + 2500 * time.Millisecond, // a done, b running + 3250 * time.Millisecond, // a,b done, u running + 3750 * time.Millisecond, // a,b,u done, c running + 4750 * time.Millisecond, // a,b,u,c done, v running + 5250 * time.Millisecond, // a,b,u,c,v done + } + reports := make([]*report.Report, len(fireAt)) + for i := range fireAt { + sleepUntil := begin.Add(fireAt[i]) + time.Sleep(sleepUntil.Sub(time.Now())) + reports[i] = getReport() + // uncomment for viewing non-diffed results + // t.Logf("report @ %6.4f:\n%s", fireAt[i].Seconds(), pretty.Sprint(reports[i])) + } + waitBegin := time.Now() + wait(true) + waitDuration := time.Now().Sub(waitBegin) + assert.True(t, waitDuration < 10*time.Millisecond, "%v", waitDuration) // and that's gratious + + prev, err := json.Marshal(reports[0]) + require.NoError(t, err) + for _, r := range reports[1:] { + this, err := json.Marshal(r) + require.NoError(t, err) + differ := jsondiff.New() + diff, err := differ.Compare(prev, this) + require.NoError(t, err) + df := jsondiffformatter.NewDeltaFormatter() + _, err = df.Format(diff) + require.NoError(t, err) + // uncomment the following line to get json diffs between each captured step + // t.Logf("%s", res) + prev, err = json.Marshal(r) + require.NoError(t, err) + } + + steps := make([]*mockStep, 0) + for _, fs := range mp.fss { + for _, step := range fs.(*mockFS).steps { + steps = append(steps, step.(*mockStep)) + } + } + + // sort steps in pq order (although, remember, pq is not deterministic) + sort.Slice(steps, func(i, j int) bool { + return steps[i].targetDate.Before(steps[j].targetDate) + }) + + // manual inspection of the globalCtr value should show that, despite + // scheduler-dependent behavior of pq, steps should generally be taken + // from oldest to newest target date (globally, not per FS). + t.Logf("steps sorted by target date:") + for _, step := range steps { + t.Logf("\t%s", step) + } + +} diff --git a/replication/driver/replication_stepqueue.go b/replication/driver/replication_stepqueue.go new file mode 100644 index 0000000..a6486c0 --- /dev/null +++ b/replication/driver/replication_stepqueue.go @@ -0,0 +1,163 @@ +package driver + +import ( + "container/heap" + "time" + + "github.com/zrepl/zrepl/util/chainlock" +) + +type stepQueueRec struct { + ident interface{} + targetDate time.Time + wakeup chan StepCompletedFunc +} + +type stepQueue struct { + stop chan struct{} + reqs chan stepQueueRec +} + +type stepQueueHeapItem struct { + idx int + req stepQueueRec +} +type stepQueueHeap []*stepQueueHeapItem + +func (h stepQueueHeap) Less(i, j int) bool { + return h[i].req.targetDate.Before(h[j].req.targetDate) +} + +func (h stepQueueHeap) Swap(i, j int) { + h[i], h[j] = h[j], h[i] + h[i].idx = i + h[j].idx = j +} + +func (h stepQueueHeap) Len() int { + return len(h) +} + +func (h *stepQueueHeap) Push(elem interface{}) { + hitem := elem.(*stepQueueHeapItem) + hitem.idx = h.Len() + *h = append(*h, hitem) +} + +func (h *stepQueueHeap) Pop() interface{} { + elem := (*h)[h.Len()-1] + elem.idx = -1 + *h = (*h)[:h.Len()-1] + return elem +} + +// returned stepQueue must be closed with method Close +func newStepQueue() *stepQueue { + q := &stepQueue{ + stop: make(chan struct{}), + reqs: make(chan stepQueueRec), + } + return q +} + +// the returned done function must be called to free resources +// allocated by the call to Start +// +// No WaitReady calls must be active at the time done is called +// The behavior of calling WaitReady after done was called is undefined +func (q *stepQueue) Start(concurrency int) (done func()) { + if concurrency < 1 { + panic("concurrency must be >= 1") + } + // l protects pending and queueItems + l := chainlock.New() + pendingCond := l.NewCond() + // priority queue + pending := &stepQueueHeap{} + // ident => queueItem + queueItems := make(map[interface{}]*stepQueueHeapItem) + // stopped is used for cancellation of "wake" goroutine + stopped := false + active := 0 + go func() { // "stopper" goroutine + <-q.stop + defer l.Lock().Unlock() + stopped = true + pendingCond.Broadcast() + }() + go func() { // "reqs" goroutine + for { + select { + case <-q.stop: + select { + case <-q.reqs: + panic("WaitReady call active while calling Close") + default: + return + } + case req := <-q.reqs: + func() { + defer l.Lock().Unlock() + if _, ok := queueItems[req.ident]; ok { + panic("WaitReady must not be called twice for the same ident") + } + qitem := &stepQueueHeapItem{ + req: req, + } + queueItems[req.ident] = qitem + heap.Push(pending, qitem) + pendingCond.Broadcast() + }() + } + } + }() + go func() { // "wake" goroutine + defer l.Lock().Unlock() + for { + + for !stopped && (active >= concurrency || pending.Len() == 0) { + pendingCond.Wait() + } + if stopped { + return + } + if pending.Len() <= 0 { + return + } + active++ + next := heap.Pop(pending).(*stepQueueHeapItem).req + delete(queueItems, next.ident) + + next.wakeup <- func() { + defer l.Lock().Unlock() + active-- + pendingCond.Broadcast() + } + } + }() + + done = func() { + close(q.stop) + } + return done +} + +type StepCompletedFunc func() + +func (q *stepQueue) sendAndWaitForWakeup(ident interface{}, targetDate time.Time) StepCompletedFunc { + req := stepQueueRec{ + ident, + targetDate, + make(chan StepCompletedFunc), + } + q.reqs <- req + return <-req.wakeup +} + +// Wait for the ident with targetDate to be selected to run. +func (q *stepQueue) WaitReady(ident interface{}, targetDate time.Time) StepCompletedFunc { + if targetDate.IsZero() { + panic("targetDate of zero is reserved for marking Done") + } + return q.sendAndWaitForWakeup(ident, targetDate) +} diff --git a/replication/driver/replication_stepqueue_test.go b/replication/driver/replication_stepqueue_test.go new file mode 100644 index 0000000..fc0f316 --- /dev/null +++ b/replication/driver/replication_stepqueue_test.go @@ -0,0 +1,177 @@ +package driver + +import ( + "fmt" + "math" + "sort" + "sync" + "sync/atomic" + "testing" + "time" + + "github.com/montanaflynn/stats" + "github.com/stretchr/testify/assert" +) + +// FIXME: this test relies on timing and is thus rather flaky +// (relies on scheduler responsivity of < 500ms) +func TestPqNotconcurrent(t *testing.T) { + var ctr uint32 + q := newStepQueue() + var wg sync.WaitGroup + wg.Add(4) + go func() { + defer wg.Done() + defer q.WaitReady("1", time.Unix(9999, 0))() + ret := atomic.AddUint32(&ctr, 1) + assert.Equal(t, uint32(1), ret) + time.Sleep(1 * time.Second) + }() + + // give goroutine "1" 500ms to enter queue, get the active slot and enter time.Sleep + defer q.Start(1)() + time.Sleep(500 * time.Millisecond) + + // while "1" is still running, queue in "2", "3" and "4" + go func() { + defer wg.Done() + defer q.WaitReady("2", time.Unix(2, 0))() + ret := atomic.AddUint32(&ctr, 1) + assert.Equal(t, uint32(2), ret) + }() + go func() { + defer wg.Done() + defer q.WaitReady("3", time.Unix(3, 0))() + ret := atomic.AddUint32(&ctr, 1) + assert.Equal(t, uint32(3), ret) + }() + go func() { + defer wg.Done() + defer q.WaitReady("4", time.Unix(4, 0))() + ret := atomic.AddUint32(&ctr, 1) + assert.Equal(t, uint32(4), ret) + }() + + wg.Wait() +} + +type record struct { + fs int + step int + globalCtr uint32 + wakeAt time.Duration // relative to begin +} + +func (r record) String() string { + return fmt.Sprintf("fs %08d step %08d globalCtr %08d wakeAt %2.8f", r.fs, r.step, r.globalCtr, r.wakeAt.Seconds()) +} + +// This tests uses stepPq concurrently, simulating the following scenario: +// Given a number of filesystems F, each filesystem has N steps to take. +// The number of concurrent steps is limited to C. +// The target date for each step is the step number N. +// Hence, there are always F filesystems runnable (calling WaitReady) +// The priority queue prioritizes steps with lower target data (= lower step number). +// Hence, all steps with lower numbers should be woken up before steps with higher numbers. +// However, scheduling is not 100% deterministic (runtime, OS scheduler, etc). +// Hence, perform some statistics on the wakeup times and assert that the mean wakeup +// times for each step are close together. +func TestPqConcurrent(t *testing.T) { + + q := newStepQueue() + var wg sync.WaitGroup + filesystems := 100 + stepsPerFS := 20 + sleepTimePerStep := 50 * time.Millisecond + wg.Add(filesystems) + var globalCtr uint32 + + begin := time.Now() + records := make(chan []record, filesystems) + for fs := 0; fs < filesystems; fs++ { + go func(fs int) { + defer wg.Done() + recs := make([]record, 0) + for step := 0; step < stepsPerFS; step++ { + pos := atomic.AddUint32(&globalCtr, 1) + t := time.Unix(int64(step), 0) + done := q.WaitReady(fs, t) + wakeAt := time.Now().Sub(begin) + time.Sleep(sleepTimePerStep) + done() + recs = append(recs, record{fs, step, pos, wakeAt}) + } + records <- recs + }(fs) + } + concurrency := 5 + defer q.Start(concurrency)() + wg.Wait() + close(records) + t.Logf("loop done") + + flattenedRecs := make([]record, 0) + for recs := range records { + flattenedRecs = append(flattenedRecs, recs...) + } + + sort.Slice(flattenedRecs, func(i, j int) bool { + return flattenedRecs[i].globalCtr < flattenedRecs[j].globalCtr + }) + + wakeTimesByStep := map[int][]float64{} + for _, rec := range flattenedRecs { + wakeTimes, ok := wakeTimesByStep[rec.step] + if !ok { + wakeTimes = []float64{} + } + wakeTimes = append(wakeTimes, rec.wakeAt.Seconds()) + wakeTimesByStep[rec.step] = wakeTimes + } + + meansByStepId := make([]float64, stepsPerFS) + interQuartileRangesByStepIdx := make([]float64, stepsPerFS) + for step := 0; step < stepsPerFS; step++ { + t.Logf("step %d", step) + mean, _ := stats.Mean(wakeTimesByStep[step]) + meansByStepId[step] = mean + t.Logf("\tmean: %v", mean) + median, _ := stats.Median(wakeTimesByStep[step]) + t.Logf("\tmedian: %v", median) + midhinge, _ := stats.Midhinge(wakeTimesByStep[step]) + t.Logf("\tmidhinge: %v", midhinge) + min, _ := stats.Min(wakeTimesByStep[step]) + t.Logf("\tmin: %v", min) + max, _ := stats.Max(wakeTimesByStep[step]) + t.Logf("\tmax: %v", max) + quartiles, _ := stats.Quartile(wakeTimesByStep[step]) + t.Logf("\t%#v", quartiles) + interQuartileRange, _ := stats.InterQuartileRange(wakeTimesByStep[step]) + t.Logf("\tinter-quartile range: %v", interQuartileRange) + interQuartileRangesByStepIdx[step] = interQuartileRange + } + + iqrMean, _ := stats.Mean(interQuartileRangesByStepIdx) + t.Logf("inter-quartile-range mean: %v", iqrMean) + iqrDev, _ := stats.StandardDeviation(interQuartileRangesByStepIdx) + t.Logf("inter-quartile-range deviation: %v", iqrDev) + + // each step should have the same "distribution" (=~ "spread") + assert.True(t, iqrDev < 0.01) + + minTimeForAllStepsWithIdxI := sleepTimePerStep.Seconds() * float64(filesystems) / float64(concurrency) + t.Logf("minTimeForAllStepsWithIdxI = %11.8f", minTimeForAllStepsWithIdxI) + for i, mean := range meansByStepId { + // we can't just do (i + 0.5) * minTimeforAllStepsWithIdxI + // because this doesn't account for drift + idealMean := 0.5 * minTimeForAllStepsWithIdxI + if i > 0 { + previousMean := meansByStepId[i-1] + idealMean = previousMean + minTimeForAllStepsWithIdxI + } + deltaFromIdeal := idealMean - mean + t.Logf("step %02d delta from ideal mean wake time: %11.8f - %11.8f = %11.8f", i, idealMean, mean, deltaFromIdeal) + assert.True(t, math.Abs(deltaFromIdeal) < 0.05) + } + +} diff --git a/replication/fsrep/fsfsm.go b/replication/fsrep/fsfsm.go deleted file mode 100644 index cfc8a5e..0000000 --- a/replication/fsrep/fsfsm.go +++ /dev/null @@ -1,557 +0,0 @@ -// Package fsrep implements replication of a single file system with existing versions -// from a sender to a receiver. -package fsrep - -import ( - "context" - "errors" - "fmt" - "net" - "sync" - "time" - - "github.com/prometheus/client_golang/prometheus" - "github.com/zrepl/zrepl/logger" - "github.com/zrepl/zrepl/replication/pdu" - "github.com/zrepl/zrepl/util/bytecounter" - "github.com/zrepl/zrepl/util/watchdog" - "github.com/zrepl/zrepl/zfs" -) - -type contextKey int - -const ( - contextKeyLogger contextKey = iota -) - -type Logger = logger.Logger - -func WithLogger(ctx context.Context, log Logger) context.Context { - return context.WithValue(ctx, contextKeyLogger, log) -} - -func getLogger(ctx context.Context) Logger { - l, ok := ctx.Value(contextKeyLogger).(Logger) - if !ok { - l = logger.NewNullLogger() - } - return l -} - -// A Sender is usually part of a github.com/zrepl/zrepl/replication.Endpoint. -type Sender interface { - // If a non-nil io.ReadCloser is returned, it is guaranteed to be closed before - // any next call to the parent github.com/zrepl/zrepl/replication.Endpoint. - // If the send request is for dry run the io.ReadCloser will be nil - Send(ctx context.Context, r *pdu.SendReq) (*pdu.SendRes, zfs.StreamCopier, error) - ReplicationCursor(ctx context.Context, req *pdu.ReplicationCursorReq) (*pdu.ReplicationCursorRes, error) -} - -// A Sender is usually part of a github.com/zrepl/zrepl/replication.Endpoint. -type Receiver interface { - // Receive sends r and sendStream (the latter containing a ZFS send stream) - // to the parent github.com/zrepl/zrepl/replication.Endpoint. - Receive(ctx context.Context, req *pdu.ReceiveReq, receive zfs.StreamCopier) (*pdu.ReceiveRes, error) -} - -type StepReport struct { - From, To string - Status StepState - Problem string - Bytes int64 - ExpectedBytes int64 // 0 means no size estimate possible -} - -type Report struct { - Filesystem string - Status string - Problem string - Completed, Pending []*StepReport -} - -//go:generate enumer -type=State -type State uint - -const ( - Ready State = 1 << iota - Completed -) - -type Error interface { - error - Temporary() bool - ContextErr() bool - LocalToFS() bool -} - -type Replication struct { - promBytesReplicated prometheus.Counter - - fs string - - // lock protects all fields below it in this struct, but not the data behind pointers - lock sync.Mutex - state State - err Error - completed, pending []*ReplicationStep -} - -func (f *Replication) State() State { - f.lock.Lock() - defer f.lock.Unlock() - return f.state -} - -func (f *Replication) FS() string { return f.fs } - -// returns zero value time.Time{} if no more pending steps -func (f *Replication) NextStepDate() time.Time { - if len(f.pending) == 0 { - return time.Time{} - } - return f.pending[0].to.SnapshotTime() -} - -func (f *Replication) Err() Error { - f.lock.Lock() - defer f.lock.Unlock() - return f.err -} - -func (f *Replication) CanRetry() bool { - f.lock.Lock() - defer f.lock.Unlock() - if f.state == Completed { - return false - } - if f.state != Ready { - panic(fmt.Sprintf("implementation error: %v", f.state)) - } - if f.err == nil { - return true - } - return f.err.Temporary() -} - -func (f *Replication) UpdateSizeEsitmate(ctx context.Context, sender Sender) error { - f.lock.Lock() - defer f.lock.Unlock() - for _, e := range f.pending { - if err := e.updateSizeEstimate(ctx, sender); err != nil { - return err - } - } - return nil -} - -type ReplicationBuilder struct { - r *Replication -} - -func BuildReplication(fs string, promBytesReplicated prometheus.Counter) *ReplicationBuilder { - return &ReplicationBuilder{&Replication{fs: fs, promBytesReplicated: promBytesReplicated}} -} - -func (b *ReplicationBuilder) AddStep(from, to FilesystemVersion) *ReplicationBuilder { - step := &ReplicationStep{ - state: StepReplicationReady, - parent: b.r, - from: from, - to: to, - } - b.r.pending = append(b.r.pending, step) - return b -} - -func (b *ReplicationBuilder) Done() (r *Replication) { - if len(b.r.pending) > 0 { - b.r.state = Ready - } else { - b.r.state = Completed - } - r = b.r - b.r = nil - return r -} - -type ReplicationConflictError struct { - Err error -} - -func (e *ReplicationConflictError) Timeout() bool { return false } - -func (e *ReplicationConflictError) Temporary() bool { return false } - -func (e *ReplicationConflictError) Error() string { return fmt.Sprintf("permanent error: %s", e.Err.Error()) } - -func (e *ReplicationConflictError) LocalToFS() bool { return true } - -func (e *ReplicationConflictError) ContextErr() bool { return false } - -func NewReplicationConflictError(fs string, err error) *Replication { - return &Replication{ - state: Completed, - fs: fs, - err: &ReplicationConflictError{Err: err}, - } -} - -//go:generate enumer -type=StepState -type StepState uint - -const ( - StepReplicationReady StepState = 1 << iota - StepMarkReplicatedReady - StepCompleted -) - -func (s StepState) IsTerminal() bool { return s == StepCompleted } - -type FilesystemVersion interface { - SnapshotTime() time.Time - GetName() string // name without @ or # - RelName() string // name with @ or # -} - -type ReplicationStep struct { - // only protects state, err - // from, to and parent are assumed to be immutable - lock sync.Mutex - - state StepState - from, to FilesystemVersion - parent *Replication - - // both retry and permanent error - err error - - byteCounter bytecounter.StreamCopier - expectedSize int64 // 0 means no size estimate present / possible -} - -func (f *Replication) Retry(ctx context.Context, ka *watchdog.KeepAlive, sender Sender, receiver Receiver) Error { - - var u updater = func(fu func(*Replication)) State { - f.lock.Lock() - defer f.lock.Unlock() - if fu != nil { - fu(f) - } - return f.state - } - - - var current *ReplicationStep - pre := u(nil) - getLogger(ctx).WithField("fsrep_state", pre).Debug("begin fsrep.Retry") - defer func() { - post := u(nil) - getLogger(ctx).WithField("fsrep_transition", post).Debug("end fsrep.Retry") - }() - - st := u(func(f *Replication) { - if len(f.pending) == 0 { - f.state = Completed - return - } - current = f.pending[0] - }) - if st == Completed { - return nil - } - if st != Ready { - panic(fmt.Sprintf("implementation error: %v", st)) - } - - stepCtx := WithLogger(ctx, getLogger(ctx).WithField("step", current)) - getLogger(stepCtx).Debug("take step") - err := current.Retry(stepCtx, ka, sender, receiver) - if err != nil { - getLogger(stepCtx).WithError(err).Error("step could not be completed") - } - - u(func(fsr *Replication) { - if err != nil { - f.err = &StepError{stepStr: current.String(), err: err} - return - } - if err == nil && current.state != StepCompleted { - panic(fmt.Sprintf("implementation error: %v", current.state)) - } - f.err = nil - f.completed = append(f.completed, current) - f.pending = f.pending[1:] - if len(f.pending) > 0 { - f.state = Ready - } else { - f.state = Completed - } - }) - var retErr Error = nil - u(func(fsr *Replication) { - retErr = fsr.err - }) - return retErr -} - -type updater func(func(fsr *Replication)) State - -type state func(ctx context.Context, ka *watchdog.KeepAlive, sender Sender, receiver Receiver, u updater) state - -type StepError struct { - stepStr string - err error -} - -var _ Error = &StepError{} - -func (e StepError) Error() string { - if e.LocalToFS() { - return fmt.Sprintf("step %s failed: %s", e.stepStr, e.err) - } - return e.err.Error() -} - -func (e StepError) Timeout() bool { - if neterr, ok := e.err.(net.Error); ok { - return neterr.Timeout() - } - return false -} - -func (e StepError) Temporary() bool { - if neterr, ok := e.err.(net.Error); ok { - return neterr.Temporary() - } - return false -} - -func (e StepError) LocalToFS() bool { - if _, ok := e.err.(net.Error); ok { - return false - } - return true // conservative approximation: we'd like to check for specific errors returned over RPC here... -} - -func (e StepError) ContextErr() bool { - switch e.err { - case context.Canceled: - return true - case context.DeadlineExceeded: - return true - } - return false -} - -func (fsr *Replication) Report() *Report { - fsr.lock.Lock() - defer fsr.lock.Unlock() - - rep := Report{ - Filesystem: fsr.fs, - Status: fsr.state.String(), - } - - if fsr.err != nil && fsr.err.LocalToFS() { - rep.Problem = fsr.err.Error() - } - - rep.Completed = make([]*StepReport, len(fsr.completed)) - for i := range fsr.completed { - rep.Completed[i] = fsr.completed[i].Report() - } - rep.Pending = make([]*StepReport, len(fsr.pending)) - for i := range fsr.pending { - rep.Pending[i] = fsr.pending[i].Report() - } - - return &rep -} - -func (s *ReplicationStep) Retry(ctx context.Context, ka *watchdog.KeepAlive, sender Sender, receiver Receiver) error { - switch s.state { - case StepReplicationReady: - return s.doReplication(ctx, ka, sender, receiver) - case StepMarkReplicatedReady: - return s.doMarkReplicated(ctx, ka, sender) - case StepCompleted: - return nil - } - panic(fmt.Sprintf("implementation error: %v", s.state)) -} - -func (s *ReplicationStep) Error() error { - if s.state & (StepReplicationReady|StepMarkReplicatedReady) != 0 { - return s.err - } - return nil -} - -func (s *ReplicationStep) doReplication(ctx context.Context, ka *watchdog.KeepAlive, sender Sender, receiver Receiver) error { - - if s.state != StepReplicationReady { - panic(fmt.Sprintf("implementation error: %v", s.state)) - } - - fs := s.parent.fs - - log := getLogger(ctx) - sr := s.buildSendRequest(false) - - log.Debug("initiate send request") - sres, sstreamCopier, err := sender.Send(ctx, sr) - if err != nil { - log.WithError(err).Error("send request failed") - return err - } - if sstreamCopier == nil { - err := errors.New("send request did not return a stream, broken endpoint implementation") - return err - } - defer sstreamCopier.Close() - - // Install a byte counter to track progress + for status report - s.byteCounter = bytecounter.NewStreamCopier(sstreamCopier) - byteCounterStopProgress := make(chan struct{}) - defer close(byteCounterStopProgress) - go func() { - var lastCount int64 - t := time.NewTicker(1 * time.Second) - defer t.Stop() - for { - select { - case <-byteCounterStopProgress: - return - case <-t.C: - newCount := s.byteCounter.Count() - if lastCount != newCount { - ka.MadeProgress() - } else { - lastCount = newCount - } - } - } - }() - defer func() { - s.parent.promBytesReplicated.Add(float64(s.byteCounter.Count())) - }() - - rr := &pdu.ReceiveReq{ - Filesystem: fs, - ClearResumeToken: !sres.UsedResumeToken, - } - log.Debug("initiate receive request") - _, err = receiver.Receive(ctx, rr, s.byteCounter) - if err != nil { - log. - WithError(err). - WithField("errType", fmt.Sprintf("%T", err)). - Error("receive request failed (might also be error on sender)") - // This failure could be due to - // - an unexpected exit of ZFS on the sending side - // - an unexpected exit of ZFS on the receiving side - // - a connectivity issue - return err - } - log.Debug("receive finished") - ka.MadeProgress() - - s.state = StepMarkReplicatedReady - return s.doMarkReplicated(ctx, ka, sender) - -} - -func (s *ReplicationStep) doMarkReplicated(ctx context.Context, ka *watchdog.KeepAlive, sender Sender) error { - - if s.state != StepMarkReplicatedReady { - panic(fmt.Sprintf("implementation error: %v", s.state)) - } - - log := getLogger(ctx) - - log.Debug("advance replication cursor") - req := &pdu.ReplicationCursorReq{ - Filesystem: s.parent.fs, - Op: &pdu.ReplicationCursorReq_Set{ - Set: &pdu.ReplicationCursorReq_SetOp{ - Snapshot: s.to.GetName(), - }, - }, - } - _, err := sender.ReplicationCursor(ctx, req) - if err != nil { - log.WithError(err).Error("error advancing replication cursor") - return err - } - ka.MadeProgress() - - s.state = StepCompleted - return err -} - -func (s *ReplicationStep) updateSizeEstimate(ctx context.Context, sender Sender) error { - - log := getLogger(ctx) - - sr := s.buildSendRequest(true) - - log.Debug("initiate dry run send request") - sres, _, err := sender.Send(ctx, sr) - if err != nil { - log.WithError(err).Error("dry run send request failed") - return err - } - s.expectedSize = sres.ExpectedSize - return nil -} - -func (s *ReplicationStep) buildSendRequest(dryRun bool) (sr *pdu.SendReq) { - fs := s.parent.fs - if s.from == nil { - sr = &pdu.SendReq{ - Filesystem: fs, - To: s.to.RelName(), - DryRun: dryRun, - } - } else { - sr = &pdu.SendReq{ - Filesystem: fs, - From: s.from.RelName(), - To: s.to.RelName(), - DryRun: dryRun, - } - } - return sr -} - -func (s *ReplicationStep) String() string { - if s.from == nil { // FIXME: ZFS semantics are that to is nil on non-incremental send - return fmt.Sprintf("%s%s (full)", s.parent.fs, s.to.RelName()) - } else { - return fmt.Sprintf("%s(%s => %s)", s.parent.fs, s.from.RelName(), s.to.RelName()) - } -} - -func (s *ReplicationStep) Report() *StepReport { - var from string // FIXME follow same convention as ZFS: to should be nil on full send - if s.from != nil { - from = s.from.RelName() - } - bytes := int64(0) - if s.byteCounter != nil { - bytes = s.byteCounter.Count() - } - problem := "" - if s.err != nil { - problem = s.err.Error() - } - rep := StepReport{ - From: from, - To: s.to.RelName(), - Status: s.state, - Problem: problem, - Bytes: bytes, - ExpectedBytes: s.expectedSize, - } - return &rep -} diff --git a/replication/fsrep/state_enumer.go b/replication/fsrep/state_enumer.go deleted file mode 100644 index 6e38ece..0000000 --- a/replication/fsrep/state_enumer.go +++ /dev/null @@ -1,50 +0,0 @@ -// Code generated by "enumer -type=State"; DO NOT EDIT. - -package fsrep - -import ( - "fmt" -) - -const _StateName = "ReadyCompleted" - -var _StateIndex = [...]uint8{0, 5, 14} - -func (i State) String() string { - i -= 1 - if i >= State(len(_StateIndex)-1) { - return fmt.Sprintf("State(%d)", i+1) - } - return _StateName[_StateIndex[i]:_StateIndex[i+1]] -} - -var _StateValues = []State{1, 2} - -var _StateNameToValueMap = map[string]State{ - _StateName[0:5]: 1, - _StateName[5:14]: 2, -} - -// StateString retrieves an enum value from the enum constants string name. -// Throws an error if the param is not part of the enum. -func StateString(s string) (State, error) { - if val, ok := _StateNameToValueMap[s]; ok { - return val, nil - } - return 0, fmt.Errorf("%s does not belong to State values", s) -} - -// StateValues returns all values of the enum -func StateValues() []State { - return _StateValues -} - -// IsAState returns "true" if the value is listed in the enum definition. "false" otherwise -func (i State) IsAState() bool { - for _, v := range _StateValues { - if i == v { - return true - } - } - return false -} diff --git a/replication/fsrep/stepstate_enumer.go b/replication/fsrep/stepstate_enumer.go deleted file mode 100644 index 287515c..0000000 --- a/replication/fsrep/stepstate_enumer.go +++ /dev/null @@ -1,61 +0,0 @@ -// Code generated by "enumer -type=StepState"; DO NOT EDIT. - -package fsrep - -import ( - "fmt" -) - -const ( - _StepStateName_0 = "StepReplicationReadyStepMarkReplicatedReady" - _StepStateName_1 = "StepCompleted" -) - -var ( - _StepStateIndex_0 = [...]uint8{0, 20, 43} - _StepStateIndex_1 = [...]uint8{0, 13} -) - -func (i StepState) String() string { - switch { - case 1 <= i && i <= 2: - i -= 1 - return _StepStateName_0[_StepStateIndex_0[i]:_StepStateIndex_0[i+1]] - case i == 4: - return _StepStateName_1 - default: - return fmt.Sprintf("StepState(%d)", i) - } -} - -var _StepStateValues = []StepState{1, 2, 4} - -var _StepStateNameToValueMap = map[string]StepState{ - _StepStateName_0[0:20]: 1, - _StepStateName_0[20:43]: 2, - _StepStateName_1[0:13]: 4, -} - -// StepStateString retrieves an enum value from the enum constants string name. -// Throws an error if the param is not part of the enum. -func StepStateString(s string) (StepState, error) { - if val, ok := _StepStateNameToValueMap[s]; ok { - return val, nil - } - return 0, fmt.Errorf("%s does not belong to StepState values", s) -} - -// StepStateValues returns all values of the enum -func StepStateValues() []StepState { - return _StepStateValues -} - -// IsAStepState returns "true" if the value is listed in the enum definition. "false" otherwise -func (i StepState) IsAStepState() bool { - for _, v := range _StepStateValues { - if i == v { - return true - } - } - return false -} diff --git a/replication/internal/diff/diff.go b/replication/logic/diff/diff.go similarity index 75% rename from replication/internal/diff/diff.go rename to replication/logic/diff/diff.go index 6af5246..f9ad46b 100644 --- a/replication/internal/diff/diff.go +++ b/replication/logic/diff/diff.go @@ -1,9 +1,11 @@ -package mainfsm +package diff import ( + "fmt" "sort" + "strings" - . "github.com/zrepl/zrepl/replication/pdu" + . "github.com/zrepl/zrepl/replication/logic/pdu" ) type ConflictNoCommonAncestor struct { @@ -11,7 +13,19 @@ type ConflictNoCommonAncestor struct { } func (c *ConflictNoCommonAncestor) Error() string { - return "no common snapshot or suitable bookmark between sender and receiver" + var buf strings.Builder + buf.WriteString("no common snapshot or suitable bookmark between sender and receiver") + if len(c.SortedReceiverVersions) > 0 || len(c.SortedSenderVersions) > 0 { + buf.WriteString(":\n sorted sender versions:\n") + for _, v := range c.SortedSenderVersions { + fmt.Fprintf(&buf, " %s\n", v.RelName()) + } + buf.WriteString(" sorted receiver versions:\n") + for _, v := range c.SortedReceiverVersions { + fmt.Fprintf(&buf, " %s\n", v.RelName()) + } + } + return buf.String() } type ConflictDiverged struct { @@ -21,7 +35,18 @@ type ConflictDiverged struct { } func (c *ConflictDiverged) Error() string { - return "the receiver's latest snapshot is not present on sender" + var buf strings.Builder + buf.WriteString("the receiver's latest snapshot is not present on sender:\n") + fmt.Fprintf(&buf, " last common: %s\n", c.CommonAncestor.RelName()) + fmt.Fprintf(&buf, " sender-only:\n") + for _, v := range c.SenderOnly { + fmt.Fprintf(&buf, " %s\n", v.RelName()) + } + fmt.Fprintf(&buf, " receiver-only:\n") + for _, v := range c.ReceiverOnly { + fmt.Fprintf(&buf, " %s\n", v.RelName()) + } + return buf.String() } func SortVersionListByCreateTXGThenBookmarkLTSnapshot(fsvslice []*FilesystemVersion) []*FilesystemVersion { diff --git a/replication/logic/diff/diff_test.go b/replication/logic/diff/diff_test.go new file mode 100644 index 0000000..46200b3 --- /dev/null +++ b/replication/logic/diff/diff_test.go @@ -0,0 +1,130 @@ +package diff + +import ( + "strconv" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + . "github.com/zrepl/zrepl/replication/logic/pdu" +) + +func fsvlist(fsv ...string) (r []*FilesystemVersion) { + + r = make([]*FilesystemVersion, len(fsv)) + for i, f := range fsv { + + // parse the id from fsvlist. it is used to derivce Guid,CreateTXG and Creation attrs + split := strings.Split(f, ",") + if len(split) != 2 { + panic("invalid fsv spec") + } + id, err := strconv.Atoi(split[1]) + if err != nil { + panic(err) + } + creation := func(id int) string { + return FilesystemVersionCreation(time.Unix(0, 0).Add(time.Duration(id) * time.Second)) + } + if strings.HasPrefix(f, "#") { + r[i] = &FilesystemVersion{ + Name: strings.TrimPrefix(f, "#"), + Type: FilesystemVersion_Bookmark, + Guid: uint64(id), + CreateTXG: uint64(id), + Creation: creation(id), + } + } else if strings.HasPrefix(f, "@") { + r[i] = &FilesystemVersion{ + Name: strings.TrimPrefix(f, "@"), + Type: FilesystemVersion_Snapshot, + Guid: uint64(id), + CreateTXG: uint64(id), + Creation: creation(id), + } + } else { + panic("invalid character") + } + } + return +} + +func doTest(receiver, sender []*FilesystemVersion, validate func(incpath []*FilesystemVersion, conflict error)) { + p, err := IncrementalPath(receiver, sender) + validate(p, err) +} + +func TestIncrementalPath_SnapshotsOnly(t *testing.T) { + + l := fsvlist + + // basic functionality + doTest(l("@a,1", "@b,2"), l("@a,1", "@b,2", "@c,3", "@d,4"), func(path []*FilesystemVersion, conflict error) { + assert.Equal(t, l("@b,2", "@c,3", "@d,4"), path) + }) + + // no common ancestor + doTest(l(), l("@a,1"), func(path []*FilesystemVersion, conflict error) { + assert.Nil(t, path) + ca, ok := conflict.(*ConflictNoCommonAncestor) + require.True(t, ok) + assert.Equal(t, l("@a,1"), ca.SortedSenderVersions) + }) + doTest(l("@a,1", "@b,2"), l("@c,3", "@d,4"), func(path []*FilesystemVersion, conflict error) { + assert.Nil(t, path) + ca, ok := conflict.(*ConflictNoCommonAncestor) + require.True(t, ok) + assert.Equal(t, l("@a,1", "@b,2"), ca.SortedReceiverVersions) + assert.Equal(t, l("@c,3", "@d,4"), ca.SortedSenderVersions) + }) + + // divergence is detected + doTest(l("@a,1", "@b1,2"), l("@a,1", "@b2,3"), func(path []*FilesystemVersion, conflict error) { + assert.Nil(t, path) + cd, ok := conflict.(*ConflictDiverged) + require.True(t, ok) + assert.Equal(t, l("@a,1")[0], cd.CommonAncestor) + assert.Equal(t, l("@b1,2"), cd.ReceiverOnly) + assert.Equal(t, l("@b2,3"), cd.SenderOnly) + }) + + // gaps before most recent common ancestor do not matter + doTest(l("@a,1", "@b,2", "@c,3"), l("@a,1", "@c,3", "@d,4"), func(path []*FilesystemVersion, conflict error) { + assert.Equal(t, l("@c,3", "@d,4"), path) + }) + + // sender with earlier but also current version as sender is not a conflict + doTest(l("@c,3"), l("@a,1", "@b,2", "@c,3") , func(path []*FilesystemVersion, conflict error) { + t.Logf("path: %#v", path) + t.Logf("conflict: %#v", conflict) + assert.Empty(t, path) + assert.Nil(t, conflict) + }) + +} + +func TestIncrementalPath_BookmarkSupport(t *testing.T) { + l := fsvlist + + // bookmarks are used + doTest(l("@a,1"), l("#a,1", "@b,2"), func(path []*FilesystemVersion, conflict error) { + assert.Equal(t, l("#a,1", "@b,2"), path) + }) + + // boomarks are stripped from IncrementalPath (cannot send incrementally) + doTest(l("@a,1"), l("#a,1", "#b,2", "@c,3"), func(path []*FilesystemVersion, conflict error) { + assert.Equal(t, l("#a,1", "@c,3"), path) + }) + + // test that snapshots are preferred over bookmarks in IncrementalPath + doTest(l("@a,1"), l("#a,1", "@a,1", "@b,2"), func(path []*FilesystemVersion, conflict error) { + assert.Equal(t, l("@a,1", "@b,2"), path) + }) + doTest(l("@a,1"), l("@a,1", "#a,1", "@b,2"), func(path []*FilesystemVersion, conflict error) { + assert.Equal(t, l("@a,1", "@b,2"), path) + }) + +} diff --git a/replication/pdu/pdu.pb.go b/replication/logic/pdu/pdu.pb.go similarity index 81% rename from replication/pdu/pdu.pb.go rename to replication/logic/pdu/pdu.pb.go index 6b7fd86..b783f3d 100644 --- a/replication/pdu/pdu.pb.go +++ b/replication/logic/pdu/pdu.pb.go @@ -43,7 +43,7 @@ func (x FilesystemVersion_VersionType) String() string { return proto.EnumName(FilesystemVersion_VersionType_name, int32(x)) } func (FilesystemVersion_VersionType) EnumDescriptor() ([]byte, []int) { - return fileDescriptor_pdu_89315d819a6e0938, []int{5, 0} + return fileDescriptor_pdu_83b7e2a28d820622, []int{5, 0} } type ListFilesystemReq struct { @@ -56,7 +56,7 @@ func (m *ListFilesystemReq) Reset() { *m = ListFilesystemReq{} } func (m *ListFilesystemReq) String() string { return proto.CompactTextString(m) } func (*ListFilesystemReq) ProtoMessage() {} func (*ListFilesystemReq) Descriptor() ([]byte, []int) { - return fileDescriptor_pdu_89315d819a6e0938, []int{0} + return fileDescriptor_pdu_83b7e2a28d820622, []int{0} } func (m *ListFilesystemReq) XXX_Unmarshal(b []byte) error { return xxx_messageInfo_ListFilesystemReq.Unmarshal(m, b) @@ -78,7 +78,6 @@ var xxx_messageInfo_ListFilesystemReq proto.InternalMessageInfo type ListFilesystemRes struct { Filesystems []*Filesystem `protobuf:"bytes,1,rep,name=Filesystems,proto3" json:"Filesystems,omitempty"` - Empty bool `protobuf:"varint,2,opt,name=Empty,proto3" json:"Empty,omitempty"` XXX_NoUnkeyedLiteral struct{} `json:"-"` XXX_unrecognized []byte `json:"-"` XXX_sizecache int32 `json:"-"` @@ -88,7 +87,7 @@ func (m *ListFilesystemRes) Reset() { *m = ListFilesystemRes{} } func (m *ListFilesystemRes) String() string { return proto.CompactTextString(m) } func (*ListFilesystemRes) ProtoMessage() {} func (*ListFilesystemRes) Descriptor() ([]byte, []int) { - return fileDescriptor_pdu_89315d819a6e0938, []int{1} + return fileDescriptor_pdu_83b7e2a28d820622, []int{1} } func (m *ListFilesystemRes) XXX_Unmarshal(b []byte) error { return xxx_messageInfo_ListFilesystemRes.Unmarshal(m, b) @@ -115,16 +114,10 @@ func (m *ListFilesystemRes) GetFilesystems() []*Filesystem { return nil } -func (m *ListFilesystemRes) GetEmpty() bool { - if m != nil { - return m.Empty - } - return false -} - type Filesystem struct { Path string `protobuf:"bytes,1,opt,name=Path,proto3" json:"Path,omitempty"` ResumeToken string `protobuf:"bytes,2,opt,name=ResumeToken,proto3" json:"ResumeToken,omitempty"` + IsPlaceholder bool `protobuf:"varint,3,opt,name=IsPlaceholder,proto3" json:"IsPlaceholder,omitempty"` XXX_NoUnkeyedLiteral struct{} `json:"-"` XXX_unrecognized []byte `json:"-"` XXX_sizecache int32 `json:"-"` @@ -134,7 +127,7 @@ func (m *Filesystem) Reset() { *m = Filesystem{} } func (m *Filesystem) String() string { return proto.CompactTextString(m) } func (*Filesystem) ProtoMessage() {} func (*Filesystem) Descriptor() ([]byte, []int) { - return fileDescriptor_pdu_89315d819a6e0938, []int{2} + return fileDescriptor_pdu_83b7e2a28d820622, []int{2} } func (m *Filesystem) XXX_Unmarshal(b []byte) error { return xxx_messageInfo_Filesystem.Unmarshal(m, b) @@ -168,6 +161,13 @@ func (m *Filesystem) GetResumeToken() string { return "" } +func (m *Filesystem) GetIsPlaceholder() bool { + if m != nil { + return m.IsPlaceholder + } + return false +} + type ListFilesystemVersionsReq struct { Filesystem string `protobuf:"bytes,1,opt,name=Filesystem,proto3" json:"Filesystem,omitempty"` XXX_NoUnkeyedLiteral struct{} `json:"-"` @@ -179,7 +179,7 @@ func (m *ListFilesystemVersionsReq) Reset() { *m = ListFilesystemVersion func (m *ListFilesystemVersionsReq) String() string { return proto.CompactTextString(m) } func (*ListFilesystemVersionsReq) ProtoMessage() {} func (*ListFilesystemVersionsReq) Descriptor() ([]byte, []int) { - return fileDescriptor_pdu_89315d819a6e0938, []int{3} + return fileDescriptor_pdu_83b7e2a28d820622, []int{3} } func (m *ListFilesystemVersionsReq) XXX_Unmarshal(b []byte) error { return xxx_messageInfo_ListFilesystemVersionsReq.Unmarshal(m, b) @@ -217,7 +217,7 @@ func (m *ListFilesystemVersionsRes) Reset() { *m = ListFilesystemVersion func (m *ListFilesystemVersionsRes) String() string { return proto.CompactTextString(m) } func (*ListFilesystemVersionsRes) ProtoMessage() {} func (*ListFilesystemVersionsRes) Descriptor() ([]byte, []int) { - return fileDescriptor_pdu_89315d819a6e0938, []int{4} + return fileDescriptor_pdu_83b7e2a28d820622, []int{4} } func (m *ListFilesystemVersionsRes) XXX_Unmarshal(b []byte) error { return xxx_messageInfo_ListFilesystemVersionsRes.Unmarshal(m, b) @@ -259,7 +259,7 @@ func (m *FilesystemVersion) Reset() { *m = FilesystemVersion{} } func (m *FilesystemVersion) String() string { return proto.CompactTextString(m) } func (*FilesystemVersion) ProtoMessage() {} func (*FilesystemVersion) Descriptor() ([]byte, []int) { - return fileDescriptor_pdu_89315d819a6e0938, []int{5} + return fileDescriptor_pdu_83b7e2a28d820622, []int{5} } func (m *FilesystemVersion) XXX_Unmarshal(b []byte) error { return xxx_messageInfo_FilesystemVersion.Unmarshal(m, b) @@ -339,7 +339,7 @@ func (m *SendReq) Reset() { *m = SendReq{} } func (m *SendReq) String() string { return proto.CompactTextString(m) } func (*SendReq) ProtoMessage() {} func (*SendReq) Descriptor() ([]byte, []int) { - return fileDescriptor_pdu_89315d819a6e0938, []int{6} + return fileDescriptor_pdu_83b7e2a28d820622, []int{6} } func (m *SendReq) XXX_Unmarshal(b []byte) error { return xxx_messageInfo_SendReq.Unmarshal(m, b) @@ -420,7 +420,7 @@ func (m *Property) Reset() { *m = Property{} } func (m *Property) String() string { return proto.CompactTextString(m) } func (*Property) ProtoMessage() {} func (*Property) Descriptor() ([]byte, []int) { - return fileDescriptor_pdu_89315d819a6e0938, []int{7} + return fileDescriptor_pdu_83b7e2a28d820622, []int{7} } func (m *Property) XXX_Unmarshal(b []byte) error { return xxx_messageInfo_Property.Unmarshal(m, b) @@ -470,7 +470,7 @@ func (m *SendRes) Reset() { *m = SendRes{} } func (m *SendRes) String() string { return proto.CompactTextString(m) } func (*SendRes) ProtoMessage() {} func (*SendRes) Descriptor() ([]byte, []int) { - return fileDescriptor_pdu_89315d819a6e0938, []int{8} + return fileDescriptor_pdu_83b7e2a28d820622, []int{8} } func (m *SendRes) XXX_Unmarshal(b []byte) error { return xxx_messageInfo_SendRes.Unmarshal(m, b) @@ -524,7 +524,7 @@ func (m *ReceiveReq) Reset() { *m = ReceiveReq{} } func (m *ReceiveReq) String() string { return proto.CompactTextString(m) } func (*ReceiveReq) ProtoMessage() {} func (*ReceiveReq) Descriptor() ([]byte, []int) { - return fileDescriptor_pdu_89315d819a6e0938, []int{9} + return fileDescriptor_pdu_83b7e2a28d820622, []int{9} } func (m *ReceiveReq) XXX_Unmarshal(b []byte) error { return xxx_messageInfo_ReceiveReq.Unmarshal(m, b) @@ -568,7 +568,7 @@ func (m *ReceiveRes) Reset() { *m = ReceiveRes{} } func (m *ReceiveRes) String() string { return proto.CompactTextString(m) } func (*ReceiveRes) ProtoMessage() {} func (*ReceiveRes) Descriptor() ([]byte, []int) { - return fileDescriptor_pdu_89315d819a6e0938, []int{10} + return fileDescriptor_pdu_83b7e2a28d820622, []int{10} } func (m *ReceiveRes) XXX_Unmarshal(b []byte) error { return xxx_messageInfo_ReceiveRes.Unmarshal(m, b) @@ -601,7 +601,7 @@ func (m *DestroySnapshotsReq) Reset() { *m = DestroySnapshotsReq{} } func (m *DestroySnapshotsReq) String() string { return proto.CompactTextString(m) } func (*DestroySnapshotsReq) ProtoMessage() {} func (*DestroySnapshotsReq) Descriptor() ([]byte, []int) { - return fileDescriptor_pdu_89315d819a6e0938, []int{11} + return fileDescriptor_pdu_83b7e2a28d820622, []int{11} } func (m *DestroySnapshotsReq) XXX_Unmarshal(b []byte) error { return xxx_messageInfo_DestroySnapshotsReq.Unmarshal(m, b) @@ -647,7 +647,7 @@ func (m *DestroySnapshotRes) Reset() { *m = DestroySnapshotRes{} } func (m *DestroySnapshotRes) String() string { return proto.CompactTextString(m) } func (*DestroySnapshotRes) ProtoMessage() {} func (*DestroySnapshotRes) Descriptor() ([]byte, []int) { - return fileDescriptor_pdu_89315d819a6e0938, []int{12} + return fileDescriptor_pdu_83b7e2a28d820622, []int{12} } func (m *DestroySnapshotRes) XXX_Unmarshal(b []byte) error { return xxx_messageInfo_DestroySnapshotRes.Unmarshal(m, b) @@ -692,7 +692,7 @@ func (m *DestroySnapshotsRes) Reset() { *m = DestroySnapshotsRes{} } func (m *DestroySnapshotsRes) String() string { return proto.CompactTextString(m) } func (*DestroySnapshotsRes) ProtoMessage() {} func (*DestroySnapshotsRes) Descriptor() ([]byte, []int) { - return fileDescriptor_pdu_89315d819a6e0938, []int{13} + return fileDescriptor_pdu_83b7e2a28d820622, []int{13} } func (m *DestroySnapshotsRes) XXX_Unmarshal(b []byte) error { return xxx_messageInfo_DestroySnapshotsRes.Unmarshal(m, b) @@ -734,7 +734,7 @@ func (m *ReplicationCursorReq) Reset() { *m = ReplicationCursorReq{} } func (m *ReplicationCursorReq) String() string { return proto.CompactTextString(m) } func (*ReplicationCursorReq) ProtoMessage() {} func (*ReplicationCursorReq) Descriptor() ([]byte, []int) { - return fileDescriptor_pdu_89315d819a6e0938, []int{14} + return fileDescriptor_pdu_83b7e2a28d820622, []int{14} } func (m *ReplicationCursorReq) XXX_Unmarshal(b []byte) error { return xxx_messageInfo_ReplicationCursorReq.Unmarshal(m, b) @@ -882,7 +882,7 @@ func (m *ReplicationCursorReq_GetOp) Reset() { *m = ReplicationCursorReq func (m *ReplicationCursorReq_GetOp) String() string { return proto.CompactTextString(m) } func (*ReplicationCursorReq_GetOp) ProtoMessage() {} func (*ReplicationCursorReq_GetOp) Descriptor() ([]byte, []int) { - return fileDescriptor_pdu_89315d819a6e0938, []int{14, 0} + return fileDescriptor_pdu_83b7e2a28d820622, []int{14, 0} } func (m *ReplicationCursorReq_GetOp) XXX_Unmarshal(b []byte) error { return xxx_messageInfo_ReplicationCursorReq_GetOp.Unmarshal(m, b) @@ -913,7 +913,7 @@ func (m *ReplicationCursorReq_SetOp) Reset() { *m = ReplicationCursorReq func (m *ReplicationCursorReq_SetOp) String() string { return proto.CompactTextString(m) } func (*ReplicationCursorReq_SetOp) ProtoMessage() {} func (*ReplicationCursorReq_SetOp) Descriptor() ([]byte, []int) { - return fileDescriptor_pdu_89315d819a6e0938, []int{14, 1} + return fileDescriptor_pdu_83b7e2a28d820622, []int{14, 1} } func (m *ReplicationCursorReq_SetOp) XXX_Unmarshal(b []byte) error { return xxx_messageInfo_ReplicationCursorReq_SetOp.Unmarshal(m, b) @@ -954,7 +954,7 @@ func (m *ReplicationCursorRes) Reset() { *m = ReplicationCursorRes{} } func (m *ReplicationCursorRes) String() string { return proto.CompactTextString(m) } func (*ReplicationCursorRes) ProtoMessage() {} func (*ReplicationCursorRes) Descriptor() ([]byte, []int) { - return fileDescriptor_pdu_89315d819a6e0938, []int{15} + return fileDescriptor_pdu_83b7e2a28d820622, []int{15} } func (m *ReplicationCursorRes) XXX_Unmarshal(b []byte) error { return xxx_messageInfo_ReplicationCursorRes.Unmarshal(m, b) @@ -1079,6 +1079,83 @@ func _ReplicationCursorRes_OneofSizer(msg proto.Message) (n int) { return n } +type PingReq struct { + Message string `protobuf:"bytes,1,opt,name=Message,proto3" json:"Message,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *PingReq) Reset() { *m = PingReq{} } +func (m *PingReq) String() string { return proto.CompactTextString(m) } +func (*PingReq) ProtoMessage() {} +func (*PingReq) Descriptor() ([]byte, []int) { + return fileDescriptor_pdu_83b7e2a28d820622, []int{16} +} +func (m *PingReq) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_PingReq.Unmarshal(m, b) +} +func (m *PingReq) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_PingReq.Marshal(b, m, deterministic) +} +func (dst *PingReq) XXX_Merge(src proto.Message) { + xxx_messageInfo_PingReq.Merge(dst, src) +} +func (m *PingReq) XXX_Size() int { + return xxx_messageInfo_PingReq.Size(m) +} +func (m *PingReq) XXX_DiscardUnknown() { + xxx_messageInfo_PingReq.DiscardUnknown(m) +} + +var xxx_messageInfo_PingReq proto.InternalMessageInfo + +func (m *PingReq) GetMessage() string { + if m != nil { + return m.Message + } + return "" +} + +type PingRes struct { + // Echo must be PingReq.Message + Echo string `protobuf:"bytes,1,opt,name=Echo,proto3" json:"Echo,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *PingRes) Reset() { *m = PingRes{} } +func (m *PingRes) String() string { return proto.CompactTextString(m) } +func (*PingRes) ProtoMessage() {} +func (*PingRes) Descriptor() ([]byte, []int) { + return fileDescriptor_pdu_83b7e2a28d820622, []int{17} +} +func (m *PingRes) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_PingRes.Unmarshal(m, b) +} +func (m *PingRes) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_PingRes.Marshal(b, m, deterministic) +} +func (dst *PingRes) XXX_Merge(src proto.Message) { + xxx_messageInfo_PingRes.Merge(dst, src) +} +func (m *PingRes) XXX_Size() int { + return xxx_messageInfo_PingRes.Size(m) +} +func (m *PingRes) XXX_DiscardUnknown() { + xxx_messageInfo_PingRes.DiscardUnknown(m) +} + +var xxx_messageInfo_PingRes proto.InternalMessageInfo + +func (m *PingRes) GetEcho() string { + if m != nil { + return m.Echo + } + return "" +} + func init() { proto.RegisterType((*ListFilesystemReq)(nil), "ListFilesystemReq") proto.RegisterType((*ListFilesystemRes)(nil), "ListFilesystemRes") @@ -1098,6 +1175,8 @@ func init() { proto.RegisterType((*ReplicationCursorReq_GetOp)(nil), "ReplicationCursorReq.GetOp") proto.RegisterType((*ReplicationCursorReq_SetOp)(nil), "ReplicationCursorReq.SetOp") proto.RegisterType((*ReplicationCursorRes)(nil), "ReplicationCursorRes") + proto.RegisterType((*PingReq)(nil), "PingReq") + proto.RegisterType((*PingRes)(nil), "PingRes") proto.RegisterEnum("FilesystemVersion_VersionType", FilesystemVersion_VersionType_name, FilesystemVersion_VersionType_value) } @@ -1113,6 +1192,7 @@ const _ = grpc.SupportPackageIsVersion4 // // For semantics around ctx use and closing/ending streaming RPCs, please refer to https://godoc.org/google.golang.org/grpc#ClientConn.NewStream. type ReplicationClient interface { + Ping(ctx context.Context, in *PingReq, opts ...grpc.CallOption) (*PingRes, error) ListFilesystems(ctx context.Context, in *ListFilesystemReq, opts ...grpc.CallOption) (*ListFilesystemRes, error) ListFilesystemVersions(ctx context.Context, in *ListFilesystemVersionsReq, opts ...grpc.CallOption) (*ListFilesystemVersionsRes, error) DestroySnapshots(ctx context.Context, in *DestroySnapshotsReq, opts ...grpc.CallOption) (*DestroySnapshotsRes, error) @@ -1127,6 +1207,15 @@ func NewReplicationClient(cc *grpc.ClientConn) ReplicationClient { return &replicationClient{cc} } +func (c *replicationClient) Ping(ctx context.Context, in *PingReq, opts ...grpc.CallOption) (*PingRes, error) { + out := new(PingRes) + err := c.cc.Invoke(ctx, "/Replication/Ping", in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + func (c *replicationClient) ListFilesystems(ctx context.Context, in *ListFilesystemReq, opts ...grpc.CallOption) (*ListFilesystemRes, error) { out := new(ListFilesystemRes) err := c.cc.Invoke(ctx, "/Replication/ListFilesystems", in, out, opts...) @@ -1165,6 +1254,7 @@ func (c *replicationClient) ReplicationCursor(ctx context.Context, in *Replicati // ReplicationServer is the server API for Replication service. type ReplicationServer interface { + Ping(context.Context, *PingReq) (*PingRes, error) ListFilesystems(context.Context, *ListFilesystemReq) (*ListFilesystemRes, error) ListFilesystemVersions(context.Context, *ListFilesystemVersionsReq) (*ListFilesystemVersionsRes, error) DestroySnapshots(context.Context, *DestroySnapshotsReq) (*DestroySnapshotsRes, error) @@ -1175,6 +1265,24 @@ func RegisterReplicationServer(s *grpc.Server, srv ReplicationServer) { s.RegisterService(&_Replication_serviceDesc, srv) } +func _Replication_Ping_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(PingReq) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(ReplicationServer).Ping(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/Replication/Ping", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(ReplicationServer).Ping(ctx, req.(*PingReq)) + } + return interceptor(ctx, in, info, handler) +} + func _Replication_ListFilesystems_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { in := new(ListFilesystemReq) if err := dec(in); err != nil { @@ -1251,6 +1359,10 @@ var _Replication_serviceDesc = grpc.ServiceDesc{ ServiceName: "Replication", HandlerType: (*ReplicationServer)(nil), Methods: []grpc.MethodDesc{ + { + MethodName: "Ping", + Handler: _Replication_Ping_Handler, + }, { MethodName: "ListFilesystems", Handler: _Replication_ListFilesystems_Handler, @@ -1272,54 +1384,58 @@ var _Replication_serviceDesc = grpc.ServiceDesc{ Metadata: "pdu.proto", } -func init() { proto.RegisterFile("pdu.proto", fileDescriptor_pdu_89315d819a6e0938) } +func init() { proto.RegisterFile("pdu.proto", fileDescriptor_pdu_83b7e2a28d820622) } -var fileDescriptor_pdu_89315d819a6e0938 = []byte{ - // 735 bytes of a gzipped FileDescriptorProto - 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x8c, 0x55, 0xdd, 0x6e, 0xda, 0x4a, - 0x10, 0xc6, 0x60, 0xc0, 0x0c, 0x51, 0x42, 0x36, 0x9c, 0xc8, 0xc7, 0xe7, 0x28, 0x42, 0xdb, 0x1b, - 0x52, 0xa9, 0x6e, 0x45, 0x7b, 0x53, 0x55, 0xaa, 0x54, 0x42, 0x7e, 0xa4, 0x56, 0x69, 0xb4, 0xd0, - 0x28, 0xca, 0x1d, 0x0d, 0xa3, 0xc4, 0x0a, 0xb0, 0xce, 0xee, 0xba, 0x0a, 0xbd, 0xec, 0x7b, 0xf4, - 0x41, 0xfa, 0x0e, 0xbd, 0xec, 0x03, 0x55, 0xbb, 0x60, 0xe3, 0x60, 0x23, 0x71, 0xe5, 0xfd, 0xbe, - 0x9d, 0x9d, 0x9d, 0xf9, 0x76, 0x66, 0x0c, 0xb5, 0x70, 0x14, 0xf9, 0xa1, 0xe0, 0x8a, 0xd3, 0x3d, - 0xd8, 0xfd, 0x14, 0x48, 0x75, 0x12, 0x8c, 0x51, 0xce, 0xa4, 0xc2, 0x09, 0xc3, 0x07, 0x7a, 0x95, - 0x25, 0x25, 0x79, 0x01, 0xf5, 0x25, 0x21, 0x5d, 0xab, 0x55, 0x6a, 0xd7, 0x3b, 0x75, 0x3f, 0x65, - 0x94, 0xde, 0x27, 0x4d, 0x28, 0x1f, 0x4f, 0x42, 0x35, 0x73, 0x8b, 0x2d, 0xab, 0xed, 0xb0, 0x39, - 0xa0, 0x5d, 0x80, 0xa5, 0x11, 0x21, 0x60, 0x5f, 0x0c, 0xd5, 0x9d, 0x6b, 0xb5, 0xac, 0x76, 0x8d, - 0x99, 0x35, 0x69, 0x41, 0x9d, 0xa1, 0x8c, 0x26, 0x38, 0xe0, 0xf7, 0x38, 0x35, 0xa7, 0x6b, 0x2c, - 0x4d, 0xd1, 0x77, 0xf0, 0xef, 0xd3, 0xe8, 0x2e, 0x51, 0xc8, 0x80, 0x4f, 0x25, 0xc3, 0x07, 0x72, - 0x90, 0xbe, 0x60, 0xe1, 0x38, 0xc5, 0xd0, 0x8f, 0xeb, 0x0f, 0x4b, 0xe2, 0x83, 0x13, 0xc3, 0x45, - 0x7e, 0xc4, 0xcf, 0x58, 0xb2, 0xc4, 0x86, 0xfe, 0xb1, 0x60, 0x37, 0xb3, 0x4f, 0x3a, 0x60, 0x0f, - 0x66, 0x21, 0x9a, 0xcb, 0xb7, 0x3b, 0x07, 0x59, 0x0f, 0xfe, 0xe2, 0xab, 0xad, 0x98, 0xb1, 0xd5, - 0x4a, 0x9c, 0x0f, 0x27, 0xb8, 0x48, 0xd7, 0xac, 0x35, 0x77, 0x1a, 0x05, 0x23, 0xb7, 0xd4, 0xb2, - 0xda, 0x36, 0x33, 0x6b, 0xf2, 0x3f, 0xd4, 0x8e, 0x04, 0x0e, 0x15, 0x0e, 0xae, 0x4e, 0x5d, 0xdb, - 0x6c, 0x2c, 0x09, 0xe2, 0x81, 0x63, 0x40, 0xc0, 0xa7, 0x6e, 0xd9, 0x78, 0x4a, 0x30, 0x3d, 0x84, - 0x7a, 0xea, 0x5a, 0xb2, 0x05, 0x4e, 0x7f, 0x3a, 0x0c, 0xe5, 0x1d, 0x57, 0x8d, 0x82, 0x46, 0x5d, - 0xce, 0xef, 0x27, 0x43, 0x71, 0xdf, 0xb0, 0xe8, 0x2f, 0x0b, 0xaa, 0x7d, 0x9c, 0x8e, 0x36, 0xd0, - 0x53, 0x07, 0x79, 0x22, 0xf8, 0x24, 0x0e, 0x5c, 0xaf, 0xc9, 0x36, 0x14, 0x07, 0xdc, 0x84, 0x5d, - 0x63, 0xc5, 0x01, 0x5f, 0x7d, 0x52, 0x3b, 0xf3, 0xa4, 0x26, 0x70, 0x3e, 0x09, 0x05, 0x4a, 0x69, - 0x02, 0x77, 0x58, 0x82, 0x75, 0x21, 0xf5, 0x70, 0x14, 0x85, 0x6e, 0x65, 0x5e, 0x48, 0x06, 0x90, - 0x7d, 0xa8, 0xf4, 0xc4, 0x8c, 0x45, 0x53, 0xb7, 0x6a, 0xe8, 0x05, 0xa2, 0x6f, 0xc0, 0xb9, 0x10, - 0x3c, 0x44, 0xa1, 0x66, 0x89, 0xa8, 0x56, 0x4a, 0xd4, 0x26, 0x94, 0x2f, 0x87, 0xe3, 0x28, 0x56, - 0x7a, 0x0e, 0xe8, 0x8f, 0x24, 0x63, 0x49, 0xda, 0xb0, 0xf3, 0x45, 0xe2, 0x68, 0xb5, 0x08, 0x1d, - 0xb6, 0x4a, 0x13, 0x0a, 0x5b, 0xc7, 0x8f, 0x21, 0xde, 0x28, 0x1c, 0xf5, 0x83, 0xef, 0x68, 0x32, - 0x2e, 0xb1, 0x27, 0x1c, 0x39, 0x04, 0x58, 0xc4, 0x13, 0xa0, 0x74, 0x6d, 0x53, 0x54, 0x35, 0x3f, - 0x0e, 0x91, 0xa5, 0x36, 0xe9, 0x15, 0x00, 0xc3, 0x1b, 0x0c, 0xbe, 0xe1, 0x26, 0xc2, 0x3f, 0x87, - 0xc6, 0xd1, 0x18, 0x87, 0x22, 0x1b, 0x67, 0x86, 0xa7, 0x5b, 0x29, 0xcf, 0x92, 0xde, 0xc2, 0x5e, - 0x0f, 0xa5, 0x12, 0x7c, 0x16, 0x57, 0xc0, 0x26, 0x9d, 0x43, 0x5e, 0x41, 0x2d, 0xb1, 0x77, 0x8b, - 0x6b, 0xbb, 0x63, 0x69, 0x44, 0xaf, 0x81, 0xac, 0x5c, 0xb4, 0x68, 0xb2, 0x18, 0x9a, 0x5b, 0xd6, - 0x34, 0x59, 0x6c, 0x63, 0x06, 0x89, 0x10, 0x5c, 0xc4, 0x2f, 0x66, 0x00, 0xed, 0xe5, 0x25, 0xa1, - 0x87, 0x54, 0x55, 0x27, 0x3e, 0x56, 0x71, 0x03, 0xef, 0xf9, 0xd9, 0x10, 0x58, 0x6c, 0x43, 0x7f, - 0x5b, 0xd0, 0x64, 0x18, 0x8e, 0x83, 0x1b, 0xd3, 0x24, 0x47, 0x91, 0x90, 0x5c, 0x6c, 0x22, 0xc6, - 0x4b, 0x28, 0xdd, 0xa2, 0x32, 0x21, 0xd5, 0x3b, 0xff, 0xf9, 0x79, 0x3e, 0xfc, 0x53, 0x54, 0x9f, - 0xc3, 0xb3, 0x02, 0xd3, 0x96, 0xfa, 0x80, 0x44, 0x65, 0x4a, 0x64, 0xed, 0x81, 0x7e, 0x7c, 0x40, - 0xa2, 0xf2, 0xaa, 0x50, 0x36, 0x0e, 0xbc, 0x67, 0x50, 0x36, 0x1b, 0xba, 0x49, 0x12, 0xe1, 0xe6, - 0x5a, 0x24, 0xb8, 0x6b, 0x43, 0x91, 0x87, 0x74, 0x90, 0x9b, 0x8d, 0x6e, 0xa1, 0xf9, 0x24, 0xd1, - 0x79, 0xd8, 0x67, 0x85, 0x64, 0x96, 0x38, 0xe7, 0x5c, 0xe1, 0x63, 0x20, 0xe7, 0xfe, 0x9c, 0xb3, - 0x02, 0x4b, 0x98, 0xae, 0x03, 0x95, 0xb9, 0x4a, 0x9d, 0x9f, 0x45, 0xdd, 0xbf, 0x89, 0x5b, 0xf2, - 0x16, 0x76, 0x9e, 0x8e, 0x50, 0x49, 0x88, 0x9f, 0xf9, 0x89, 0x78, 0x59, 0x4e, 0x92, 0x0b, 0xd8, - 0xcf, 0x9f, 0xbe, 0xc4, 0xf3, 0xd7, 0xce, 0x74, 0x6f, 0xfd, 0x9e, 0x24, 0xef, 0xa1, 0xb1, 0x5a, - 0x07, 0xa4, 0xe9, 0xe7, 0xd4, 0xb7, 0x97, 0xc7, 0x4a, 0xf2, 0x01, 0x76, 0x33, 0x92, 0x91, 0x7f, - 0x72, 0xdf, 0xc7, 0xcb, 0xa5, 0x65, 0xb7, 0x7c, 0x5d, 0x0a, 0x47, 0xd1, 0xd7, 0x8a, 0xf9, 0xa1, - 0xbe, 0xfe, 0x1b, 0x00, 0x00, 0xff, 0xff, 0xa3, 0xba, 0x8e, 0x63, 0x5d, 0x07, 0x00, 0x00, +var fileDescriptor_pdu_83b7e2a28d820622 = []byte{ + // 785 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x8c, 0x55, 0xd1, 0x8e, 0xe3, 0x34, + 0x14, 0x9d, 0xb4, 0x69, 0x9b, 0xde, 0x0e, 0xbb, 0x1d, 0x4f, 0x59, 0x85, 0x00, 0xab, 0xca, 0xcb, + 0x43, 0x17, 0x89, 0x80, 0x0a, 0x2f, 0x08, 0x09, 0x89, 0x4e, 0x67, 0x67, 0x10, 0xb0, 0x54, 0x6e, + 0x59, 0xad, 0xf6, 0x2d, 0x34, 0x57, 0x6d, 0x34, 0x6d, 0x9d, 0xb5, 0x13, 0xb4, 0xe5, 0x91, 0xbf, + 0x9a, 0x7f, 0xe0, 0x91, 0x0f, 0x42, 0x76, 0xe3, 0x34, 0x6d, 0x52, 0xa9, 0x4f, 0xf1, 0x39, 0xf7, + 0xda, 0x3e, 0xf7, 0xd8, 0xd7, 0x81, 0x76, 0x1c, 0xa6, 0x7e, 0x2c, 0x78, 0xc2, 0xe9, 0x35, 0x5c, + 0xfd, 0x1a, 0xc9, 0xe4, 0x55, 0xb4, 0x42, 0xb9, 0x95, 0x09, 0xae, 0x19, 0xbe, 0xa7, 0xa3, 0x32, + 0x29, 0xc9, 0x57, 0xd0, 0xd9, 0x13, 0xd2, 0xb5, 0xfa, 0xf5, 0x41, 0x67, 0xd8, 0xf1, 0x0b, 0x49, + 0xc5, 0x38, 0x5d, 0x02, 0xec, 0x21, 0x21, 0x60, 0x4f, 0x82, 0x64, 0xe9, 0x5a, 0x7d, 0x6b, 0xd0, + 0x66, 0x7a, 0x4c, 0xfa, 0xd0, 0x61, 0x28, 0xd3, 0x35, 0xce, 0xf8, 0x03, 0x6e, 0xdc, 0x9a, 0x0e, + 0x15, 0x29, 0xf2, 0x05, 0x7c, 0xf4, 0xb3, 0x9c, 0xac, 0x82, 0x39, 0x2e, 0xf9, 0x2a, 0x44, 0xe1, + 0xd6, 0xfb, 0xd6, 0xc0, 0x61, 0x87, 0x24, 0xfd, 0x01, 0x3e, 0x39, 0x54, 0xfb, 0x06, 0x85, 0x8c, + 0xf8, 0x46, 0x32, 0x7c, 0x4f, 0x9e, 0x17, 0x65, 0x64, 0xdb, 0x17, 0x18, 0xfa, 0xcb, 0xe9, 0xc9, + 0x92, 0xf8, 0xe0, 0x18, 0x98, 0xd5, 0x4b, 0xfc, 0x52, 0x26, 0xcb, 0x73, 0xe8, 0x7f, 0x16, 0x5c, + 0x95, 0xe2, 0x64, 0x08, 0xf6, 0x6c, 0x1b, 0xa3, 0xde, 0xfc, 0xc9, 0xf0, 0x79, 0x79, 0x05, 0x3f, + 0xfb, 0xaa, 0x2c, 0xa6, 0x73, 0x95, 0x5f, 0xaf, 0x83, 0x35, 0x66, 0xa6, 0xe8, 0xb1, 0xe2, 0xee, + 0xd2, 0x28, 0xd4, 0x26, 0xd8, 0x4c, 0x8f, 0xc9, 0x67, 0xd0, 0xbe, 0x11, 0x18, 0x24, 0x38, 0x7b, + 0x7b, 0xe7, 0xda, 0x3a, 0xb0, 0x27, 0x88, 0x07, 0x8e, 0x06, 0x11, 0xdf, 0xb8, 0x0d, 0xbd, 0x52, + 0x8e, 0xe9, 0x4b, 0xe8, 0x14, 0xb6, 0x25, 0x97, 0xe0, 0x4c, 0x37, 0x41, 0x2c, 0x97, 0x3c, 0xe9, + 0x5e, 0x28, 0x34, 0xe2, 0xfc, 0x61, 0x1d, 0x88, 0x87, 0xae, 0x45, 0x1f, 0x2d, 0x68, 0x4d, 0x71, + 0x13, 0x9e, 0xe1, 0xa7, 0x12, 0xf9, 0x4a, 0xf0, 0xb5, 0x11, 0xae, 0xc6, 0xe4, 0x09, 0xd4, 0x66, + 0x5c, 0xcb, 0x6e, 0xb3, 0xda, 0x8c, 0x1f, 0x1f, 0xbc, 0x5d, 0x3e, 0x78, 0x25, 0x9c, 0xaf, 0x63, + 0x81, 0x52, 0x6a, 0xe1, 0x0e, 0xcb, 0x31, 0xe9, 0x41, 0x63, 0x8c, 0x61, 0x1a, 0xbb, 0x4d, 0x1d, + 0xd8, 0x01, 0xf2, 0x0c, 0x9a, 0x63, 0xb1, 0x65, 0xe9, 0xc6, 0x6d, 0x69, 0x3a, 0x43, 0xf4, 0x3b, + 0x70, 0x26, 0x82, 0xc7, 0x28, 0x92, 0x6d, 0x6e, 0xaa, 0x55, 0x30, 0xb5, 0x07, 0x8d, 0x37, 0xc1, + 0x2a, 0x35, 0x4e, 0xef, 0x00, 0xfd, 0x27, 0xaf, 0x58, 0x92, 0x01, 0x3c, 0xfd, 0x43, 0x62, 0x78, + 0x7c, 0x55, 0x1d, 0x76, 0x4c, 0x13, 0x0a, 0x97, 0xb7, 0x1f, 0x62, 0x9c, 0x27, 0x18, 0x4e, 0xa3, + 0xbf, 0x51, 0x57, 0x5c, 0x67, 0x07, 0x1c, 0x79, 0x09, 0x90, 0xe9, 0x89, 0x50, 0xba, 0xb6, 0xbe, + 0x54, 0x6d, 0xdf, 0x48, 0x64, 0x85, 0x20, 0x7d, 0x0b, 0xc0, 0x70, 0x8e, 0xd1, 0x5f, 0x78, 0x8e, + 0xf1, 0x5f, 0x42, 0xf7, 0x66, 0x85, 0x81, 0x28, 0xeb, 0x2c, 0xf1, 0xf4, 0xb2, 0xb0, 0xb2, 0xa4, + 0x0b, 0xb8, 0x1e, 0xa3, 0x4c, 0x04, 0xdf, 0x9a, 0x1b, 0x70, 0x4e, 0xe7, 0x90, 0x6f, 0xa0, 0x9d, + 0xe7, 0xbb, 0xb5, 0x93, 0xdd, 0xb1, 0x4f, 0xa2, 0xef, 0x80, 0x1c, 0x6d, 0x94, 0x35, 0x99, 0x81, + 0x7a, 0x97, 0x13, 0x4d, 0x66, 0x72, 0xd4, 0x89, 0xdd, 0x0a, 0xc1, 0x85, 0x39, 0x31, 0x0d, 0xe8, + 0xb8, 0xaa, 0x08, 0xf5, 0x68, 0xb5, 0x54, 0xe1, 0xab, 0xc4, 0x34, 0xf0, 0xb5, 0x5f, 0x96, 0xc0, + 0x4c, 0x0e, 0xfd, 0xd7, 0x82, 0x1e, 0xc3, 0x78, 0x15, 0xcd, 0x75, 0x93, 0xdc, 0xa4, 0x42, 0x72, + 0x71, 0x8e, 0x19, 0x5f, 0x43, 0x7d, 0x81, 0x89, 0x96, 0xd4, 0x19, 0x7e, 0xea, 0x57, 0xad, 0xe1, + 0xdf, 0x61, 0xf2, 0x7b, 0x7c, 0x7f, 0xc1, 0x54, 0xa6, 0x9a, 0x20, 0x31, 0xd1, 0x57, 0xe4, 0xe4, + 0x84, 0xa9, 0x99, 0x20, 0x31, 0xf1, 0x5a, 0xd0, 0xd0, 0x0b, 0x78, 0x2f, 0xa0, 0xa1, 0x03, 0xaa, + 0x49, 0x72, 0xe3, 0x76, 0x5e, 0xe4, 0x78, 0x64, 0x43, 0x8d, 0xc7, 0x74, 0x56, 0x59, 0x8d, 0x6a, + 0xa1, 0xdd, 0x4b, 0xa2, 0xea, 0xb0, 0xef, 0x2f, 0xf2, 0xb7, 0xc4, 0x79, 0xcd, 0x13, 0xfc, 0x10, + 0xc9, 0xdd, 0x7a, 0xce, 0xfd, 0x05, 0xcb, 0x99, 0x91, 0x03, 0xcd, 0x9d, 0x4b, 0xf4, 0x05, 0xb4, + 0x26, 0xd1, 0x66, 0xa1, 0x6c, 0x71, 0xa1, 0xf5, 0x1b, 0x4a, 0x19, 0x2c, 0x4c, 0x53, 0x19, 0x48, + 0x3f, 0x37, 0x49, 0x52, 0xb5, 0xdd, 0xed, 0x7c, 0xc9, 0x4d, 0xdb, 0xa9, 0xf1, 0xf0, 0xb1, 0xa6, + 0xde, 0x80, 0x5c, 0x1a, 0xf1, 0xc0, 0x56, 0xe9, 0xc4, 0xf1, 0xb3, 0xa5, 0x3d, 0x33, 0x92, 0xe4, + 0x7b, 0x78, 0x7a, 0xf8, 0x44, 0x4b, 0x42, 0xfc, 0xd2, 0x4f, 0xcb, 0x2b, 0x73, 0x92, 0x4c, 0xe0, + 0x59, 0xf5, 0xeb, 0x4e, 0x3c, 0xff, 0xe4, 0x3f, 0xc3, 0x3b, 0x1d, 0x93, 0xe4, 0x47, 0xe8, 0x1e, + 0xdf, 0x33, 0xd2, 0xf3, 0x2b, 0xfa, 0xc7, 0xab, 0x62, 0x25, 0xf9, 0x09, 0xae, 0x4a, 0x47, 0x42, + 0x3e, 0xae, 0x3c, 0x7f, 0xaf, 0x92, 0x96, 0xa3, 0xc6, 0xbb, 0x7a, 0x1c, 0xa6, 0x7f, 0x36, 0xf5, + 0x0f, 0xfc, 0xdb, 0xff, 0x03, 0x00, 0x00, 0xff, 0xff, 0x37, 0x0e, 0xf2, 0xe4, 0xcd, 0x07, 0x00, + 0x00, } diff --git a/replication/pdu/pdu.proto b/replication/logic/pdu/pdu.proto similarity index 94% rename from replication/pdu/pdu.proto rename to replication/logic/pdu/pdu.proto index 1b66916..2097af5 100644 --- a/replication/pdu/pdu.proto +++ b/replication/logic/pdu/pdu.proto @@ -2,6 +2,7 @@ syntax = "proto3"; option go_package = "pdu"; service Replication { + rpc Ping (PingReq) returns (PingRes); rpc ListFilesystems (ListFilesystemReq) returns (ListFilesystemRes); rpc ListFilesystemVersions (ListFilesystemVersionsReq) returns (ListFilesystemVersionsRes); rpc DestroySnapshots (DestroySnapshotsReq) returns (DestroySnapshotsRes); @@ -13,12 +14,12 @@ message ListFilesystemReq {} message ListFilesystemRes { repeated Filesystem Filesystems = 1; - bool Empty = 2; } message Filesystem { string Path = 1; string ResumeToken = 2; + bool IsPlaceholder = 3; } message ListFilesystemVersionsReq { @@ -120,3 +121,12 @@ message ReplicationCursorRes { bool Notexist = 2; } } + +message PingReq { + string Message = 1; +} + +message PingRes { + // Echo must be PingReq.Message + string Echo = 1; +} \ No newline at end of file diff --git a/replication/pdu/pdu_extras.go b/replication/logic/pdu/pdu_extras.go similarity index 100% rename from replication/pdu/pdu_extras.go rename to replication/logic/pdu/pdu_extras.go diff --git a/replication/pdu/pdu_test.go b/replication/logic/pdu/pdu_test.go similarity index 100% rename from replication/pdu/pdu_test.go rename to replication/logic/pdu/pdu_test.go diff --git a/replication/logic/replication_logic.go b/replication/logic/replication_logic.go new file mode 100644 index 0000000..906b610 --- /dev/null +++ b/replication/logic/replication_logic.go @@ -0,0 +1,495 @@ +package logic + +import ( + "context" + "errors" + "fmt" + "sync" + "time" + + "github.com/prometheus/client_golang/prometheus" + + "github.com/zrepl/zrepl/replication/driver" + . "github.com/zrepl/zrepl/replication/logic/diff" + "github.com/zrepl/zrepl/replication/logic/pdu" + "github.com/zrepl/zrepl/replication/report" + "github.com/zrepl/zrepl/util/bytecounter" + "github.com/zrepl/zrepl/zfs" +) + +// Endpoint represents one side of the replication. +// +// An endpoint is either in Sender or Receiver mode, represented by the correspondingly +// named interfaces defined in this package. +type Endpoint interface { + // Does not include placeholder filesystems + ListFilesystems(ctx context.Context, req *pdu.ListFilesystemReq) (*pdu.ListFilesystemRes, error) + ListFilesystemVersions(ctx context.Context, req *pdu.ListFilesystemVersionsReq) (*pdu.ListFilesystemVersionsRes, error) + DestroySnapshots(ctx context.Context, req *pdu.DestroySnapshotsReq) (*pdu.DestroySnapshotsRes, error) + WaitForConnectivity(ctx context.Context) (error) +} + +type Sender interface { + Endpoint + // If a non-nil io.ReadCloser is returned, it is guaranteed to be closed before + // any next call to the parent github.com/zrepl/zrepl/replication.Endpoint. + // If the send request is for dry run the io.ReadCloser will be nil + Send(ctx context.Context, r *pdu.SendReq) (*pdu.SendRes, zfs.StreamCopier, error) + ReplicationCursor(ctx context.Context, req *pdu.ReplicationCursorReq) (*pdu.ReplicationCursorRes, error) +} + +type Receiver interface { + Endpoint + // Receive sends r and sendStream (the latter containing a ZFS send stream) + // to the parent github.com/zrepl/zrepl/replication.Endpoint. + Receive(ctx context.Context, req *pdu.ReceiveReq, receive zfs.StreamCopier) (*pdu.ReceiveRes, error) +} + +type Planner struct { + sender Sender + receiver Receiver + + promSecsPerState *prometheus.HistogramVec // labels: state + promBytesReplicated *prometheus.CounterVec // labels: filesystem +} + +func (p *Planner) Plan(ctx context.Context) ([]driver.FS, error) { + fss, err := p.doPlanning(ctx) + if err != nil { + return nil, err + } + dfss := make([]driver.FS, len(fss)) + for i := range dfss { + dfss[i] = fss[i] + } + return dfss, nil +} + +func (p *Planner) WaitForConnectivity(ctx context.Context) error { + var wg sync.WaitGroup + doPing := func(endpoint Endpoint, errOut *error) { + defer wg.Done() + err := endpoint.WaitForConnectivity(ctx) + if err != nil { + *errOut = err + } else { + *errOut = nil + } + } + wg.Add(2) + var senderErr, receiverErr error + go doPing(p.sender, &senderErr) + go doPing(p.receiver, &receiverErr) + wg.Wait() + if senderErr == nil && receiverErr == nil { + return nil + } else if senderErr != nil && receiverErr != nil { + if senderErr.Error() == receiverErr.Error() { + return fmt.Errorf("sender and receiver are not reachable: %s", senderErr.Error()) + } else { + return fmt.Errorf("sender and receiver are not reachable:\n sender: %s\n receiver: %s", senderErr, receiverErr) + } + } else { + var side string + var err *error + if senderErr != nil { + side = "sender" + err = &senderErr + } else { + side = "receiver" + err = &receiverErr + } + return fmt.Errorf("%s is not reachable: %s", side, *err) + } +} + +type Filesystem struct { + sender Sender + receiver Receiver + + Path string // compat + receiverFS *pdu.Filesystem + promBytesReplicated prometheus.Counter // compat +} + +func (f *Filesystem) EqualToPreviousAttempt(other driver.FS) bool { + g, ok := other.(*Filesystem) + if !ok { + return false + } + // TODO: use GUIDs (issued by zrepl, not those from ZFS) + return f.Path == g.Path +} + +func (f *Filesystem) PlanFS(ctx context.Context) ([]driver.Step, error) { + steps, err := f.doPlanning(ctx) + if err != nil { + return nil, err + } + dsteps := make([]driver.Step, len(steps)) + for i := range dsteps { + dsteps[i] = steps[i] + } + return dsteps, nil +} +func (f *Filesystem) ReportInfo() *report.FilesystemInfo { + return &report.FilesystemInfo{Name: f.Path} // FIXME compat name +} + +type Step struct { + sender Sender + receiver Receiver + + parent *Filesystem + from, to *pdu.FilesystemVersion // compat + + byteCounter bytecounter.StreamCopier + expectedSize int64 // 0 means no size estimate present / possible +} + +func (s *Step) TargetEquals(other driver.Step) bool { + t, ok := other.(*Step) + if !ok { + return false + } + if !s.parent.EqualToPreviousAttempt(t.parent) { + panic("Step interface promise broken: parent filesystems must be same") + } + return s.from.GetGuid() == t.from.GetGuid() && + s.to.GetGuid() == t.to.GetGuid() +} + +func (s *Step) TargetDate() time.Time { + return s.to.SnapshotTime() // FIXME compat name +} + +func (s *Step) Step(ctx context.Context) error { + return s.doReplication(ctx) +} + +func (s *Step) ReportInfo() *report.StepInfo { + var byteCounter int64 + if s.byteCounter != nil { + byteCounter = s.byteCounter.Count() + } + // FIXME stick to zfs convention of from and to + from := "" + if s.from != nil { + from = s.from.RelName() + } + return &report.StepInfo{ + From: from, + To: s.to.RelName(), + BytesExpected: s.expectedSize, + BytesReplicated: byteCounter, + } +} + +func NewPlanner(secsPerState *prometheus.HistogramVec, bytesReplicated *prometheus.CounterVec, sender Sender, receiver Receiver) *Planner { + return &Planner{ + sender: sender, + receiver: receiver, + promSecsPerState: secsPerState, + promBytesReplicated: bytesReplicated, + } +} +func resolveConflict(conflict error) (path []*pdu.FilesystemVersion, msg string) { + if noCommonAncestor, ok := conflict.(*ConflictNoCommonAncestor); ok { + if len(noCommonAncestor.SortedReceiverVersions) == 0 { + // TODO this is hard-coded replication policy: most recent snapshot as source + var mostRecentSnap *pdu.FilesystemVersion + for n := len(noCommonAncestor.SortedSenderVersions) - 1; n >= 0; n-- { + if noCommonAncestor.SortedSenderVersions[n].Type == pdu.FilesystemVersion_Snapshot { + mostRecentSnap = noCommonAncestor.SortedSenderVersions[n] + break + } + } + if mostRecentSnap == nil { + return nil, "no snapshots available on sender side" + } + return []*pdu.FilesystemVersion{mostRecentSnap}, fmt.Sprintf("start replication at most recent snapshot %s", mostRecentSnap.RelName()) + } + } + return nil, "no automated way to handle conflict type" +} + +func (p *Planner) doPlanning(ctx context.Context) ([]*Filesystem, error) { + + log := getLogger(ctx) + + log.Info("start planning") + + slfssres, err := p.sender.ListFilesystems(ctx, &pdu.ListFilesystemReq{}) + if err != nil { + log.WithError(err).WithField("errType", fmt.Sprintf("%T", err)).Error("error listing sender filesystems") + return nil, err + } + sfss := slfssres.GetFilesystems() + // no progress here since we could run in a live-lock on connectivity issues + + rlfssres, err := p.receiver.ListFilesystems(ctx, &pdu.ListFilesystemReq{}) + if err != nil { + log.WithError(err).WithField("errType", fmt.Sprintf("%T", err)).Error("error listing receiver filesystems") + return nil, err + } + rfss := rlfssres.GetFilesystems() + + q := make([]*Filesystem, 0, len(sfss)) + for _, fs := range sfss { + + var receiverFS *pdu.Filesystem + for _, rfs := range rfss { + if rfs.Path == fs.Path { + receiverFS = rfs + } + } + + ctr := p.promBytesReplicated.WithLabelValues(fs.Path) + + q = append(q, &Filesystem{ + sender: p.sender, + receiver: p.receiver, + Path: fs.Path, + receiverFS: receiverFS, + promBytesReplicated: ctr, + }) + } + + return q, nil +} + +func (fs *Filesystem) doPlanning(ctx context.Context) ([]*Step, error) { + + log := getLogger(ctx).WithField("filesystem", fs.Path) + + log.Debug("assessing filesystem") + + sfsvsres, err := fs.sender.ListFilesystemVersions(ctx, &pdu.ListFilesystemVersionsReq{Filesystem: fs.Path}) + if err != nil { + log.WithError(err).Error("cannot get remote filesystem versions") + return nil, err + } + sfsvs := sfsvsres.GetVersions() + + if len(sfsvs) < 1 { + err := errors.New("sender does not have any versions") + log.Error(err.Error()) + return nil, err + } + + var rfsvs []*pdu.FilesystemVersion + if fs.receiverFS != nil && !fs.receiverFS.GetIsPlaceholder() { + rfsvsres, err := fs.receiver.ListFilesystemVersions(ctx, &pdu.ListFilesystemVersionsReq{Filesystem: fs.Path}) + if err != nil { + log.WithError(err).Error("receiver error") + return nil, err + } + rfsvs = rfsvsres.GetVersions() + } else { + rfsvs = []*pdu.FilesystemVersion{} + } + + path, conflict := IncrementalPath(rfsvs, sfsvs) + if conflict != nil { + var msg string + path, msg = resolveConflict(conflict) // no shadowing allowed! + if path != nil { + log.WithField("conflict", conflict).Info("conflict") + log.WithField("resolution", msg).Info("automatically resolved") + } else { + log.WithField("conflict", conflict).Error("conflict") + log.WithField("problem", msg).Error("cannot resolve conflict") + } + } + if len(path) == 0 { + return nil, conflict + } + + steps := make([]*Step, 0, len(path)) + // FIXME unify struct declarations => initializer? + if len(path) == 1 { + steps = append(steps, &Step{ + parent: fs, + sender: fs.sender, + receiver: fs.receiver, + from: nil, + to: path[0], + }) + } else { + for i := 0; i < len(path)-1; i++ { + steps = append(steps, &Step{ + parent: fs, + sender: fs.sender, + receiver: fs.receiver, + from: path[i], + to: path[i+1], + }) + } + } + + log.Debug("compute send size estimate") + errs := make(chan error, len(steps)) + var wg sync.WaitGroup + fanOutCtx, fanOutCancel := context.WithCancel(ctx) + defer fanOutCancel() + for _, step := range steps { + wg.Add(1) + go func(step *Step) { + defer wg.Done() + err := step.updateSizeEstimate(fanOutCtx) + if err != nil { + log.WithError(err).WithField("step", step).Error("error computing size estimate") + fanOutCancel() + } + errs <- err + }(step) + } + wg.Wait() + close(errs) + var significantErr error = nil + for err := range errs { + if err != nil { + if significantErr == nil || significantErr == context.Canceled { + significantErr = err + } + } + } + if significantErr != nil { + return nil, significantErr + } + + log.Debug("filesystem planning finished") + return steps, nil +} + +// type FilesystemsReplicationFailedError struct { +// FilesystemsWithError []*fsrep.Replication +// } + +// func (e FilesystemsReplicationFailedError) Error() string { +// allSame := true +// lastErr := e.FilesystemsWithError[0].Err().Error() +// for _, fs := range e.FilesystemsWithError { +// fsErr := fs.Err().Error() +// allSame = allSame && lastErr == fsErr +// } + +// fsstr := "multiple filesystems" +// if len(e.FilesystemsWithError) == 1 { +// fsstr = fmt.Sprintf("filesystem %s", e.FilesystemsWithError[0].FS()) +// } +// errorStr := lastErr +// if !allSame { +// errorStr = "multiple different errors" +// } +// return fmt.Sprintf("%s could not be replicated: %s", fsstr, errorStr) +// } + +func (s *Step) updateSizeEstimate(ctx context.Context) error { + + log := getLogger(ctx) + + sr := s.buildSendRequest(true) + + log.Debug("initiate dry run send request") + sres, _, err := s.sender.Send(ctx, sr) + if err != nil { + log.WithError(err).Error("dry run send request failed") + return err + } + s.expectedSize = sres.ExpectedSize + return nil +} + +func (s *Step) buildSendRequest(dryRun bool) (sr *pdu.SendReq) { + fs := s.parent.Path + if s.from == nil { + sr = &pdu.SendReq{ + Filesystem: fs, + To: s.to.RelName(), + DryRun: dryRun, + } + } else { + sr = &pdu.SendReq{ + Filesystem: fs, + From: s.from.RelName(), + To: s.to.RelName(), + DryRun: dryRun, + } + } + return sr +} + +func (s *Step) doReplication(ctx context.Context) error { + + fs := s.parent.Path + + log := getLogger(ctx) + sr := s.buildSendRequest(false) + + log.Debug("initiate send request") + sres, sstreamCopier, err := s.sender.Send(ctx, sr) + if err != nil { + log.WithError(err).Error("send request failed") + return err + } + if sstreamCopier == nil { + err := errors.New("send request did not return a stream, broken endpoint implementation") + return err + } + defer sstreamCopier.Close() + + // Install a byte counter to track progress + for status report + s.byteCounter = bytecounter.NewStreamCopier(sstreamCopier) + defer func() { + s.parent.promBytesReplicated.Add(float64(s.byteCounter.Count())) + }() + + rr := &pdu.ReceiveReq{ + Filesystem: fs, + ClearResumeToken: !sres.UsedResumeToken, + } + log.Debug("initiate receive request") + _, err = s.receiver.Receive(ctx, rr, s.byteCounter) + if err != nil { + log. + WithError(err). + WithField("errType", fmt.Sprintf("%T", err)). + Error("receive request failed (might also be error on sender)") + // This failure could be due to + // - an unexpected exit of ZFS on the sending side + // - an unexpected exit of ZFS on the receiving side + // - a connectivity issue + return err + } + log.Debug("receive finished") + + log.Debug("advance replication cursor") + req := &pdu.ReplicationCursorReq{ + Filesystem: fs, + Op: &pdu.ReplicationCursorReq_Set{ + Set: &pdu.ReplicationCursorReq_SetOp{ + Snapshot: s.to.GetName(), + }, + }, + } + _, err = s.sender.ReplicationCursor(ctx, req) + if err != nil { + log.WithError(err).Error("error advancing replication cursor") + // If this fails and replication planning restarts, the diff algorithm will find + // that cursor out of place. This is not a problem because then, it would just use another FS + // However, we FIXME have no means to just update the cursor in a + // second replication attempt right after this one where we don't have new snaps yet + return err + } + + return err +} + +func (s *Step) String() string { + if s.from == nil { // FIXME: ZFS semantics are that to is nil on non-incremental send + return fmt.Sprintf("%s%s (full)", s.parent.Path, s.to.RelName()) + } else { + return fmt.Sprintf("%s(%s => %s)", s.parent.Path, s.from.RelName(), s.to.RelName()) + } +} diff --git a/replication/context.go b/replication/logic/replication_logic_context.go similarity index 81% rename from replication/context.go rename to replication/logic/replication_logic_context.go index 7e43981..8102c9c 100644 --- a/replication/context.go +++ b/replication/logic/replication_logic_context.go @@ -1,9 +1,9 @@ -package replication +package logic import ( "context" + "github.com/zrepl/zrepl/logger" - "github.com/zrepl/zrepl/replication/fsrep" ) type contextKey int @@ -16,7 +16,6 @@ type Logger = logger.Logger func WithLogger(ctx context.Context, l Logger) context.Context { ctx = context.WithValue(ctx, contextKeyLog, l) - ctx = fsrep.WithLogger(ctx, l) return ctx } diff --git a/replication/mainfsm.go b/replication/mainfsm.go deleted file mode 100644 index 5cf1d7b..0000000 --- a/replication/mainfsm.go +++ /dev/null @@ -1,560 +0,0 @@ -// Package replication implements replication of filesystems with existing -// versions (snapshots) from a sender to a receiver. -package replication - -import ( - "context" - "errors" - "fmt" - "github.com/prometheus/client_golang/prometheus" - "github.com/zrepl/zrepl/daemon/job/wakeup" - "github.com/zrepl/zrepl/util/envconst" - "github.com/zrepl/zrepl/util/watchdog" - "math/bits" - "net" - "sort" - "sync" - "time" - - "github.com/zrepl/zrepl/replication/fsrep" - . "github.com/zrepl/zrepl/replication/internal/diff" - "github.com/zrepl/zrepl/replication/pdu" -) - -//go:generate enumer -type=State -type State uint - -const ( - Planning State = 1 << iota - PlanningError - Working - WorkingWait - Completed - PermanentError -) - -func (s State) rsf() state { - idx := bits.TrailingZeros(uint(s)) - if idx == bits.UintSize { - panic(s) // invalid value - } - m := []state{ - statePlanning, - statePlanningError, - stateWorking, - stateWorkingWait, - nil, - nil, - } - return m[idx] -} - -func (s State) IsTerminal() bool { - return s.rsf() == nil -} - -// Replication implements the replication of multiple file systems from a Sender to a Receiver. -// -// It is a state machine that is driven by the Drive method -// and provides asynchronous reporting via the Report method (i.e. from another goroutine). -type Replication struct { - // not protected by lock - promSecsPerState *prometheus.HistogramVec // labels: state - promBytesReplicated *prometheus.CounterVec // labels: filesystem - - Progress watchdog.KeepAlive - - // lock protects all fields of this struct (but not the fields behind pointers!) - lock sync.Mutex - - state State - - // Working, WorkingWait, Completed, ContextDone - queue []*fsrep.Replication - completed []*fsrep.Replication - active *fsrep.Replication // == queue[0] or nil, unlike in Report - - // for PlanningError, WorkingWait and ContextError and Completed - err error - - // PlanningError, WorkingWait - sleepUntil time.Time -} - -type Report struct { - Status string - Problem string - SleepUntil time.Time - Completed []*fsrep.Report - Pending []*fsrep.Report - Active *fsrep.Report // not contained in Pending, unlike in struct Replication -} - -func NewReplication(secsPerState *prometheus.HistogramVec, bytesReplicated *prometheus.CounterVec) *Replication { - r := Replication{ - promSecsPerState: secsPerState, - promBytesReplicated: bytesReplicated, - state: Planning, - } - return &r -} - -// Endpoint represents one side of the replication. -// -// An endpoint is either in Sender or Receiver mode, represented by the correspondingly -// named interfaces defined in this package. -type Endpoint interface { - // Does not include placeholder filesystems - ListFilesystems(ctx context.Context, req *pdu.ListFilesystemReq) (*pdu.ListFilesystemRes, error) - ListFilesystemVersions(ctx context.Context, req *pdu.ListFilesystemVersionsReq) (*pdu.ListFilesystemVersionsRes, error) - DestroySnapshots(ctx context.Context, req *pdu.DestroySnapshotsReq) (*pdu.DestroySnapshotsRes, error) -} - -type Sender interface { - Endpoint - fsrep.Sender -} - -type Receiver interface { - Endpoint - fsrep.Receiver -} - -type FilteredError struct{ fs string } - -func NewFilteredError(fs string) *FilteredError { - return &FilteredError{fs} -} - -func (f FilteredError) Error() string { return "endpoint does not allow access to filesystem " + f.fs } - -type updater func(func(*Replication)) (newState State) -type state func(ctx context.Context, ka *watchdog.KeepAlive, sender Sender, receiver Receiver, u updater) state - -// Drive starts the state machine and returns only after replication has finished (with or without errors). -// The Logger in ctx is used for both debug and error logging, but is not guaranteed to be stable -// or end-user friendly. -// User-facing replication progress reports and can be obtained using the Report method, -// whose output will not change after Drive returns. -// -// FIXME: Drive may be only called once per instance of Replication -func (r *Replication) Drive(ctx context.Context, sender Sender, receiver Receiver) { - - var u updater = func(f func(*Replication)) State { - r.lock.Lock() - defer r.lock.Unlock() - if f != nil { - f(r) - } - return r.state - } - - var s state = statePlanning - var pre, post State - for s != nil { - preTime := time.Now() - pre = u(nil) - s = s(ctx, &r.Progress, sender, receiver, u) - delta := time.Now().Sub(preTime) - r.promSecsPerState.WithLabelValues(pre.String()).Observe(delta.Seconds()) - post = u(nil) - getLogger(ctx). - WithField("transition", fmt.Sprintf("%s => %s", pre, post)). - WithField("duration", delta). - Debug("main state transition") - if post == Working && pre != post { - getLogger(ctx).Info("start working") - } - } - - getLogger(ctx). - WithField("final_state", post). - Debug("main final state") -} - -func resolveConflict(conflict error) (path []*pdu.FilesystemVersion, msg string) { - if noCommonAncestor, ok := conflict.(*ConflictNoCommonAncestor); ok { - if len(noCommonAncestor.SortedReceiverVersions) == 0 { - // TODO this is hard-coded replication policy: most recent snapshot as source - var mostRecentSnap *pdu.FilesystemVersion - for n := len(noCommonAncestor.SortedSenderVersions) - 1; n >= 0; n-- { - if noCommonAncestor.SortedSenderVersions[n].Type == pdu.FilesystemVersion_Snapshot { - mostRecentSnap = noCommonAncestor.SortedSenderVersions[n] - break - } - } - if mostRecentSnap == nil { - return nil, "no snapshots available on sender side" - } - return []*pdu.FilesystemVersion{mostRecentSnap}, fmt.Sprintf("start replication at most recent snapshot %s", mostRecentSnap.RelName()) - } - } - return nil, "no automated way to handle conflict type" -} - -var RetryInterval = envconst.Duration("ZREPL_REPLICATION_RETRY_INTERVAL", 10 * time.Second) - -type Error interface { - error - Temporary() bool -} - -var _ Error = fsrep.Error(nil) -var _ Error = net.Error(nil) - -func isPermanent(err error) bool { - if e, ok := err.(Error); ok { - return !e.Temporary() - } - return true -} - -func statePlanning(ctx context.Context, ka *watchdog.KeepAlive, sender Sender, receiver Receiver, u updater) state { - - log := getLogger(ctx) - - log.Info("start planning") - - handlePlanningError := func(err error) state { - return u(func(r *Replication) { - ge := GlobalError{Err: err, Temporary: !isPermanent(err)} - log.WithError(ge).Error("encountered global error while planning replication") - r.err = ge - if !ge.Temporary { - r.state = PermanentError - } else { - r.sleepUntil = time.Now().Add(RetryInterval) - r.state = PlanningError - } - }).rsf() - } - - slfssres, err := sender.ListFilesystems(ctx, &pdu.ListFilesystemReq{}) - if err != nil { - log.WithError(err).WithField("errType", fmt.Sprintf("%T", err)).Error("error listing sender filesystems") - return handlePlanningError(err) - } - sfss := slfssres.GetFilesystems() - // no progress here since we could run in a live-lock on connectivity issues - - rlfssres, err := receiver.ListFilesystems(ctx, &pdu.ListFilesystemReq{}) - if err != nil { - log.WithError(err).WithField("errType", fmt.Sprintf("%T", err)).Error("error listing receiver filesystems") - return handlePlanningError(err) - } - rfss := rlfssres.GetFilesystems() - ka.MadeProgress() // for both sender and receiver - - q := make([]*fsrep.Replication, 0, len(sfss)) - mainlog := log - for _, fs := range sfss { - - log := mainlog.WithField("filesystem", fs.Path) - - log.Debug("assessing filesystem") - - sfsvsres, err := sender.ListFilesystemVersions(ctx, &pdu.ListFilesystemVersionsReq{Filesystem: fs.Path}) - if err != nil { - log.WithError(err).Error("cannot get remote filesystem versions") - return handlePlanningError(err) - } - sfsvs := sfsvsres.GetVersions() - ka.MadeProgress() - - if len(sfsvs) < 1 { - err := errors.New("sender does not have any versions") - log.Error(err.Error()) - q = append(q, fsrep.NewReplicationConflictError(fs.Path, err)) - continue - } - - receiverFSExists := false - for _, rfs := range rfss { - if rfs.Path == fs.Path { - receiverFSExists = true - } - } - - var rfsvs []*pdu.FilesystemVersion - if receiverFSExists { - rfsvsres, err := receiver.ListFilesystemVersions(ctx, &pdu.ListFilesystemVersionsReq{Filesystem: fs.Path}) - if err != nil { - if _, ok := err.(*FilteredError); ok { - log.Info("receiver ignores filesystem") - continue - } - log.WithError(err).Error("receiver error") - return handlePlanningError(err) - } - rfsvs = rfsvsres.GetVersions() - } else { - rfsvs = []*pdu.FilesystemVersion{} - } - ka.MadeProgress() - - path, conflict := IncrementalPath(rfsvs, sfsvs) - if conflict != nil { - var msg string - path, msg = resolveConflict(conflict) // no shadowing allowed! - if path != nil { - log.WithField("conflict", conflict).Info("conflict") - log.WithField("resolution", msg).Info("automatically resolved") - } else { - log.WithField("conflict", conflict).Error("conflict") - log.WithField("problem", msg).Error("cannot resolve conflict") - } - } - ka.MadeProgress() - if path == nil { - q = append(q, fsrep.NewReplicationConflictError(fs.Path, conflict)) - continue - } - - var promBytesReplicated *prometheus.CounterVec - u(func(replication *Replication) { // FIXME args struct like in pruner (also use for sender and receiver) - promBytesReplicated = replication.promBytesReplicated - }) - fsrfsm := fsrep.BuildReplication(fs.Path, promBytesReplicated.WithLabelValues(fs.Path)) - if len(path) == 1 { - fsrfsm.AddStep(nil, path[0]) - } else { - for i := 0; i < len(path)-1; i++ { - fsrfsm.AddStep(path[i], path[i+1]) - } - } - qitem := fsrfsm.Done() - ka.MadeProgress() - - log.Debug("compute send size estimate") - if err = qitem.UpdateSizeEsitmate(ctx, sender); err != nil { - log.WithError(err).Error("error computing size estimate") - return handlePlanningError(err) - } - ka.MadeProgress() - - q = append(q, qitem) - } - - ka.MadeProgress() - - return u(func(r *Replication) { - r.completed = nil - r.queue = q - r.err = nil - r.state = Working - }).rsf() -} - -func statePlanningError(ctx context.Context, ka *watchdog.KeepAlive, sender Sender, receiver Receiver, u updater) state { - var sleepUntil time.Time - u(func(r *Replication) { - sleepUntil = r.sleepUntil - }) - t := time.NewTimer(sleepUntil.Sub(time.Now())) - getLogger(ctx).WithField("until", sleepUntil).Info("retry wait after planning error") - defer t.Stop() - select { - case <-ctx.Done(): - return u(func(r *Replication) { - r.state = PermanentError - r.err = ctx.Err() - }).rsf() - case <-t.C: - case <-wakeup.Wait(ctx): - } - return u(func(r *Replication) { - r.state = Planning - }).rsf() -} - -type GlobalError struct { - Err error - Temporary bool -} - -func (e GlobalError) Error() string { - errClass := "temporary" - if !e.Temporary { - errClass = "permanent" - } - return fmt.Sprintf("%s global error: %s", errClass, e.Err) -} - -type FilesystemsReplicationFailedError struct { - FilesystemsWithError []*fsrep.Replication -} - -func (e FilesystemsReplicationFailedError) Error() string { - allSame := true - lastErr := e.FilesystemsWithError[0].Err().Error() - for _, fs := range e.FilesystemsWithError { - fsErr := fs.Err().Error() - allSame = allSame && lastErr == fsErr - } - - fsstr := "multiple filesystems" - if len(e.FilesystemsWithError) == 1 { - fsstr = fmt.Sprintf("filesystem %s", e.FilesystemsWithError[0].FS()) - } - errorStr := lastErr - if !allSame { - errorStr = "multiple different errors" - } - return fmt.Sprintf("%s could not be replicated: %s", fsstr, errorStr) -} - -func stateWorking(ctx context.Context, ka *watchdog.KeepAlive, sender Sender, receiver Receiver, u updater) state { - - var active *fsrep.Replication - rsfNext := u(func(r *Replication) { - - r.err = nil - - newq := make([]*fsrep.Replication, 0, len(r.queue)) - for i := range r.queue { - if r.queue[i].CanRetry() { - newq = append(newq, r.queue[i]) - } else { - r.completed = append(r.completed, r.queue[i]) - } - } - sort.SliceStable(newq, func(i, j int) bool { - return newq[i].NextStepDate().Before(newq[j].NextStepDate()) - }) - r.queue = newq - - if len(r.queue) == 0 { - r.state = Completed - fsWithErr := FilesystemsReplicationFailedError{ // prepare it - FilesystemsWithError: make([]*fsrep.Replication, 0, len(r.completed)), - } - for _, fs := range r.completed { - if fs.CanRetry() { - panic(fmt.Sprintf("implementation error: completed contains retryable FS %s %#v", - fs.FS(), fs.Err())) - } - if fs.Err() != nil { - fsWithErr.FilesystemsWithError = append(fsWithErr.FilesystemsWithError, fs) - } - } - if len(fsWithErr.FilesystemsWithError) > 0 { - r.err = fsWithErr - r.state = PermanentError - } - return - } - - active = r.queue[0] // do not dequeue: if it's done, it will be sorted the next time we check for more work - r.active = active - }).rsf() - - if active == nil { - return rsfNext - } - - activeCtx := fsrep.WithLogger(ctx, getLogger(ctx).WithField("fs", active.FS())) - err := active.Retry(activeCtx, ka, sender, receiver) - u(func(r *Replication) { - r.active = nil - }).rsf() - - if err != nil { - if err.ContextErr() && ctx.Err() != nil { - getLogger(ctx).WithError(err). - Info("filesystem replication was cancelled") - u(func(r*Replication) { - r.err = GlobalError{Err: err, Temporary: false} - r.state = PermanentError - }) - } else if err.LocalToFS() { - getLogger(ctx).WithError(err). - Error("filesystem replication encountered a filesystem-specific error") - // we stay in this state and let the queuing logic above de-prioritize this failing FS - } else if err.Temporary() { - getLogger(ctx).WithError(err). - Error("filesystem encountered a non-filesystem-specific temporary error, enter retry-wait") - u(func(r *Replication) { - r.err = GlobalError{Err: err, Temporary: true} - r.sleepUntil = time.Now().Add(RetryInterval) - r.state = WorkingWait - }).rsf() - } else { - getLogger(ctx).WithError(err). - Error("encountered a permanent non-filesystem-specific error") - u(func(r *Replication) { - r.err = GlobalError{Err: err, Temporary: false} - r.state = PermanentError - }).rsf() - } - } - - return u(nil).rsf() -} - -func stateWorkingWait(ctx context.Context, ka *watchdog.KeepAlive, sender Sender, receiver Receiver, u updater) state { - var sleepUntil time.Time - u(func(r *Replication) { - sleepUntil = r.sleepUntil - }) - t := time.NewTimer(RetryInterval) - getLogger(ctx).WithField("until", sleepUntil).Info("retry wait after error") - defer t.Stop() - select { - case <-ctx.Done(): - return u(func(r *Replication) { - r.state = PermanentError - r.err = ctx.Err() - }).rsf() - - case <-t.C: - case <-wakeup.Wait(ctx): - } - return u(func(r *Replication) { - r.state = Working - }).rsf() -} - -// Report provides a summary of the progress of the Replication, -// i.e., a condensed dump of the internal state machine. -// Report is safe to be called asynchronously while Drive is running. -func (r *Replication) Report() *Report { - r.lock.Lock() - defer r.lock.Unlock() - - rep := Report{ - Status: r.state.String(), - SleepUntil: r.sleepUntil, - } - - if r.err != nil { - rep.Problem = r.err.Error() - } - - if r.state&(Planning|PlanningError) != 0 { - return &rep - } - - rep.Pending = make([]*fsrep.Report, 0, len(r.queue)) - rep.Completed = make([]*fsrep.Report, 0, len(r.completed)) // room for active (potentially) - - // since r.active == r.queue[0], do not contain it in pending output - pending := r.queue - if r.active != nil { - rep.Active = r.active.Report() - pending = r.queue[1:] - } - for _, fsr := range pending { - rep.Pending= append(rep.Pending, fsr.Report()) - } - for _, fsr := range r.completed { - rep.Completed = append(rep.Completed, fsr.Report()) - } - - return &rep -} - -func (r *Replication) State() State { - r.lock.Lock() - defer r.lock.Unlock() - return r.state -} diff --git a/replication/replication.go b/replication/replication.go new file mode 100644 index 0000000..7f9e35b --- /dev/null +++ b/replication/replication.go @@ -0,0 +1,13 @@ +// Package replication implements replication of filesystems with existing +// versions (snapshots) from a sender to a receiver. +package replication + +import ( + "context" + + "github.com/zrepl/zrepl/replication/driver" +) + +func Do(ctx context.Context, planner driver.Planner) (driver.ReportFunc, driver.WaitFunc) { + return driver.Do(ctx, planner) +} diff --git a/replication/report/replication_report.go b/replication/report/replication_report.go new file mode 100644 index 0000000..7f8f605 --- /dev/null +++ b/replication/report/replication_report.go @@ -0,0 +1,152 @@ +package report + +import ( + "encoding/json" + "time" +) + +type Report struct { + StartAt, FinishAt time.Time + WaitReconnectSince, WaitReconnectUntil time.Time + WaitReconnectError *TimedError + Attempts []*AttemptReport +} + +var _, _ = json.Marshal(&Report{}) + +type TimedError struct { + Err string + Time time.Time +} + +func NewTimedError(err string, t time.Time) *TimedError { + if err == "" { + panic("error must be empty") + } + if t.IsZero() { + panic("t must be non-zero") + } + return &TimedError{err, t} +} + +func (s *TimedError) Error() string { + return s.Err +} + +var _, _ = json.Marshal(&TimedError{}) + +type AttemptReport struct { + State AttemptState + StartAt, FinishAt time.Time + PlanError *TimedError + Filesystems []*FilesystemReport +} + +type AttemptState string + +const ( + AttemptPlanning AttemptState = "planning" + AttemptPlanningError AttemptState = "planning-error" + AttemptFanOutFSs AttemptState = "fan-out-filesystems" + AttemptFanOutError AttemptState = "filesystem-error" + AttemptDone AttemptState = "done" +) + +type FilesystemState string + +const ( + FilesystemPlanning FilesystemState = "planning" + FilesystemPlanningErrored FilesystemState = "planning-error" + FilesystemStepping FilesystemState = "stepping" + FilesystemSteppingErrored FilesystemState = "step-error" + FilesystemDone FilesystemState = "done" +) + +type FilesystemReport struct { + Info *FilesystemInfo + + State FilesystemState + + // Valid in State = FilesystemPlanningErrored + PlanError *TimedError + // Valid in State = FilesystemSteppingErrored + StepError *TimedError + + // Valid in State = FilesystemStepping + CurrentStep int + Steps []*StepReport +} + +type FilesystemInfo struct { + Name string +} + +type StepReport struct { + Info *StepInfo +} + +type StepInfo struct { + From, To string + BytesExpected int64 + BytesReplicated int64 +} + +func (a *AttemptReport) BytesSum() (expected, replicated int64) { + for _, fs := range a.Filesystems { + e, r := fs.BytesSum() + expected += e + replicated += r + } + return expected, replicated +} + +func (f *FilesystemReport) BytesSum() (expected, replicated int64) { + for _, step := range f.Steps { + expected += step.Info.BytesExpected + replicated += step.Info.BytesReplicated + } + return +} + +func (f *AttemptReport) FilesystemsByState() map[FilesystemState][]*FilesystemReport { + r := make(map[FilesystemState][]*FilesystemReport, 4) + for _, fs := range f.Filesystems { + l := r[fs.State] + l = append(l, fs) + r[fs.State] = l + } + return r +} + +func (f *FilesystemReport) Error() *TimedError { + switch f.State { + case FilesystemPlanningErrored: + return f.PlanError + case FilesystemSteppingErrored: + return f.StepError + } + return nil +} + +// may return nil +func (f *FilesystemReport) NextStep() *StepReport { + switch f.State { + case FilesystemDone: + return nil + case FilesystemPlanningErrored: + return nil + case FilesystemSteppingErrored: + return nil + case FilesystemPlanning: + return nil + case FilesystemStepping: + // invariant is that this is always correct + // TODO what about 0-length Steps but short intermediary state? + return f.Steps[f.CurrentStep] + } + panic("unreachable") +} + +func (f *StepReport) IsIncremental() bool { + return f.Info.From != "" // FIXME change to ZFS semantics (To != "") +} diff --git a/replication/state_enumer.go b/replication/state_enumer.go deleted file mode 100644 index 9708fff..0000000 --- a/replication/state_enumer.go +++ /dev/null @@ -1,76 +0,0 @@ -// Code generated by "enumer -type=State"; DO NOT EDIT. - -package replication - -import ( - "fmt" -) - -const ( - _StateName_0 = "PlanningPlanningError" - _StateName_1 = "Working" - _StateName_2 = "WorkingWait" - _StateName_3 = "Completed" - _StateName_4 = "PermanentError" -) - -var ( - _StateIndex_0 = [...]uint8{0, 8, 21} - _StateIndex_1 = [...]uint8{0, 7} - _StateIndex_2 = [...]uint8{0, 11} - _StateIndex_3 = [...]uint8{0, 9} - _StateIndex_4 = [...]uint8{0, 14} -) - -func (i State) String() string { - switch { - case 1 <= i && i <= 2: - i -= 1 - return _StateName_0[_StateIndex_0[i]:_StateIndex_0[i+1]] - case i == 4: - return _StateName_1 - case i == 8: - return _StateName_2 - case i == 16: - return _StateName_3 - case i == 32: - return _StateName_4 - default: - return fmt.Sprintf("State(%d)", i) - } -} - -var _StateValues = []State{1, 2, 4, 8, 16, 32} - -var _StateNameToValueMap = map[string]State{ - _StateName_0[0:8]: 1, - _StateName_0[8:21]: 2, - _StateName_1[0:7]: 4, - _StateName_2[0:11]: 8, - _StateName_3[0:9]: 16, - _StateName_4[0:14]: 32, -} - -// StateString retrieves an enum value from the enum constants string name. -// Throws an error if the param is not part of the enum. -func StateString(s string) (State, error) { - if val, ok := _StateNameToValueMap[s]; ok { - return val, nil - } - return 0, fmt.Errorf("%s does not belong to State values", s) -} - -// StateValues returns all values of the enum -func StateValues() []State { - return _StateValues -} - -// IsAState returns "true" if the value is listed in the enum definition. "false" otherwise -func (i State) IsAState() bool { - for _, v := range _StateValues { - if i == v { - return true - } - } - return false -} diff --git a/rpc/dataconn/dataconn_client.go b/rpc/dataconn/dataconn_client.go index a12292b..8473d97 100644 --- a/rpc/dataconn/dataconn_client.go +++ b/rpc/dataconn/dataconn_client.go @@ -7,7 +7,7 @@ import ( "strings" "github.com/golang/protobuf/proto" - "github.com/zrepl/zrepl/replication/pdu" + "github.com/zrepl/zrepl/replication/logic/pdu" "github.com/zrepl/zrepl/rpc/dataconn/stream" "github.com/zrepl/zrepl/transport" "github.com/zrepl/zrepl/zfs" @@ -213,3 +213,23 @@ func (c *Client) ReqRecv(ctx context.Context, req *pdu.ReceiveReq, streamCopier return res.res, cause } + + +func (c *Client) ReqPing(ctx context.Context, req *pdu.PingReq) (*pdu.PingRes, error) { + conn, err := c.getWire(ctx) + if err != nil { + return nil, err + } + defer c.putWire(conn) + + if err := c.send(ctx, conn, EndpointPing, req, nil); err != nil { + return nil, err + } + + var res pdu.PingRes + if err := c.recv(ctx, conn, &res); err != nil { + return nil, err + } + + return &res, nil +} \ No newline at end of file diff --git a/rpc/dataconn/dataconn_server.go b/rpc/dataconn/dataconn_server.go index 41f5781..ea6700b 100644 --- a/rpc/dataconn/dataconn_server.go +++ b/rpc/dataconn/dataconn_server.go @@ -7,7 +7,7 @@ import ( "github.com/golang/protobuf/proto" "github.com/zrepl/zrepl/logger" - "github.com/zrepl/zrepl/replication/pdu" + "github.com/zrepl/zrepl/replication/logic/pdu" "github.com/zrepl/zrepl/rpc/dataconn/stream" "github.com/zrepl/zrepl/transport" "github.com/zrepl/zrepl/zfs" @@ -25,6 +25,8 @@ type Handler interface { // It is guaranteed that Server calls Receive with a stream that holds the IdleConnTimeout // configured in ServerConfig.Shared.IdleConnTimeout. Receive(ctx context.Context, r *pdu.ReceiveReq, receive zfs.StreamCopier) (*pdu.ReceiveRes, error) + // PingDataconn handles a PingReq + PingDataconn(ctx context.Context, r *pdu.PingReq) (*pdu.PingRes, error) } type Logger = logger.Logger @@ -125,6 +127,13 @@ func (s *Server) serveConn(nc *transport.AuthConn) { return } res, handlerErr = s.h.Receive(ctx, &req, &streamCopier{streamConn: c, closeStreamOnClose: false}) // SHADOWING + case EndpointPing: + var req pdu.PingReq + if err := proto.Unmarshal(reqStructured, &req); err != nil { + s.log.WithError(err).Error("cannot unmarshal ping request") + return + } + res, handlerErr = s.h.PingDataconn(ctx, &req) // SHADOWING default: s.log.WithField("endpoint", endpoint).Error("unknown endpoint") handlerErr = fmt.Errorf("requested endpoint does not exist") @@ -137,12 +146,17 @@ func (s *Server) serveConn(nc *transport.AuthConn) { // if marshaling fails. We consider failed marshaling a handler error var protobuf *bytes.Buffer if handlerErr == nil { - protobufBytes, err := proto.Marshal(res) - if err != nil { - s.log.WithError(err).Error("cannot marshal handler protobuf") - handlerErr = err + if res == nil { + handlerErr = fmt.Errorf("implementation error: handler for endpoint %q returns nil error and nil result", endpoint) + s.log.WithError(err).Error("handle implementation error") + } else { + protobufBytes, err := proto.Marshal(res) + if err != nil { + s.log.WithError(err).Error("cannot marshal handler protobuf") + handlerErr = err + } + protobuf = bytes.NewBuffer(protobufBytes) // SHADOWING } - protobuf = bytes.NewBuffer(protobufBytes) // SHADOWING } var resHeaderBuf bytes.Buffer diff --git a/rpc/dataconn/dataconn_shared.go b/rpc/dataconn/dataconn_shared.go index 0ea5a34..43ccf92 100644 --- a/rpc/dataconn/dataconn_shared.go +++ b/rpc/dataconn/dataconn_shared.go @@ -10,6 +10,7 @@ import ( ) const ( + EndpointPing string = "/v1/ping" EndpointSend string = "/v1/send" EndpointRecv string = "/v1/recv" ) diff --git a/rpc/dataconn/microbenchmark/microbenchmark.go b/rpc/dataconn/microbenchmark/microbenchmark.go index 287f7e1..0e11a8e 100644 --- a/rpc/dataconn/microbenchmark/microbenchmark.go +++ b/rpc/dataconn/microbenchmark/microbenchmark.go @@ -24,7 +24,7 @@ import ( "github.com/pkg/profile" "github.com/zrepl/zrepl/logger" - "github.com/zrepl/zrepl/replication/pdu" + "github.com/zrepl/zrepl/replication/logic/pdu" "github.com/zrepl/zrepl/rpc/dataconn" "github.com/zrepl/zrepl/rpc/dataconn/timeoutconn" "github.com/zrepl/zrepl/transport" @@ -77,6 +77,12 @@ func (devNullHandler) Receive(ctx context.Context, r *pdu.ReceiveReq, stream zfs return &res, err } +func (devNullHandler) PingDataconn(ctx context.Context, r *pdu.PingReq) (*pdu.PingRes, error) { + return &pdu.PingRes{ + Echo: r.GetMessage(), + }, nil +} + type tcpConnecter struct { addr string } diff --git a/rpc/grpcclientidentity/authlistener_grpc_adaptor.go b/rpc/grpcclientidentity/authlistener_grpc_adaptor.go index accd124..3c62cf4 100644 --- a/rpc/grpcclientidentity/authlistener_grpc_adaptor.go +++ b/rpc/grpcclientidentity/authlistener_grpc_adaptor.go @@ -105,11 +105,12 @@ func NewInterceptors(logger Logger, clientIdentityKey interface{}) (unary grpc.U if !ok { panic("peer.FromContext expected to return a peer in grpc.UnaryServerInterceptor") } - logger.WithField("peer", fmt.Sprintf("%v", p)).Debug("peer") + logger.WithField("peer_addr", fmt.Sprintf("%s", p.Addr)).Debug("peer addr") a, ok := p.AuthInfo.(*authConnAuthType) if !ok { panic(fmt.Sprintf("NewInterceptors must be used in combination with grpc.NewTransportCredentials, but got auth type %T", p.AuthInfo)) } + logger.WithField("peer_client_identity", a.clientIdentity).Debug("peer client identity") ctx = context.WithValue(ctx, clientIdentityKey, a.clientIdentity) return handler(ctx, req) } diff --git a/rpc/grpcclientidentity/grpchelper/authlistener_grpc_adaptor_wrapper.go b/rpc/grpcclientidentity/grpchelper/authlistener_grpc_adaptor_wrapper.go index 7e46681..1b9f3a2 100644 --- a/rpc/grpcclientidentity/grpchelper/authlistener_grpc_adaptor_wrapper.go +++ b/rpc/grpcclientidentity/grpchelper/authlistener_grpc_adaptor_wrapper.go @@ -36,7 +36,7 @@ func ClientConn(cn transport.Connecter, log Logger) *grpc.ClientConn { }) dialerOption := grpc.WithDialer(grpcclientidentity.NewDialer(log, cn)) cred := grpc.WithTransportCredentials(grpcclientidentity.NewTransportCredentials(log)) - ctx, cancel := context.WithTimeout(context.TODO(), 5*time.Second) + ctx, cancel := context.WithTimeout(context.TODO(), 5*time.Second) // FIXME constant defer cancel() cc, err := grpc.DialContext(ctx, "doesn't matter done by dialer", dialerOption, cred, ka) if err != nil { diff --git a/rpc/rpc_client.go b/rpc/rpc_client.go index efcaf98..818cd44 100644 --- a/rpc/rpc_client.go +++ b/rpc/rpc_client.go @@ -2,13 +2,18 @@ package rpc import ( "context" + "errors" + "fmt" "net" + "sync" + "sync/atomic" "time" "google.golang.org/grpc" - "github.com/zrepl/zrepl/replication" - "github.com/zrepl/zrepl/replication/pdu" + "github.com/google/uuid" + "github.com/zrepl/zrepl/replication/logic" + "github.com/zrepl/zrepl/replication/logic/pdu" "github.com/zrepl/zrepl/rpc/dataconn" "github.com/zrepl/zrepl/rpc/grpcclientidentity/grpchelper" "github.com/zrepl/zrepl/rpc/versionhandshake" @@ -24,11 +29,12 @@ type Client struct { controlClient pdu.ReplicationClient // this the grpc client instance, see constructor controlConn *grpc.ClientConn loggers Loggers + closed chan struct{} } -var _ replication.Endpoint = &Client{} -var _ replication.Sender = &Client{} -var _ replication.Receiver = &Client{} +var _ logic.Endpoint = &Client{} +var _ logic.Sender = &Client{} +var _ logic.Receiver = &Client{} type DialContextFunc = func(ctx context.Context, network string, addr string) (net.Conn, error) @@ -41,14 +47,21 @@ func NewClient(cn transport.Connecter, loggers Loggers) *Client { c := &Client{ loggers: loggers, + closed: make(chan struct{}), } grpcConn := grpchelper.ClientConn(muxedConnecter.control, loggers.Control) go func() { - for { + ctx, cancel := context.WithCancel(context.Background()) + go func() { + <-c.closed + cancel() + }() + defer cancel() + for ctx.Err() == nil { state := grpcConn.GetState() loggers.General.WithField("grpc_state", state.String()).Debug("grpc state change") - grpcConn.WaitForStateChange(context.TODO(), state) + grpcConn.WaitForStateChange(ctx, state) } }() c.controlClient = pdu.NewReplicationClient(grpcConn) @@ -59,8 +72,9 @@ func NewClient(cn transport.Connecter, loggers Loggers) *Client { } func (c *Client) Close() { + close(c.closed) if err := c.controlConn.Close(); err != nil { - c.loggers.General.WithError(err).Error("cannot cloe control connection") + c.loggers.General.WithError(err).Error("cannot close control connection") } // TODO c.dataClient should have Close() } @@ -101,6 +115,72 @@ func (c *Client) ReplicationCursor(ctx context.Context, in *pdu.ReplicationCurso return c.controlClient.ReplicationCursor(ctx, in) } +func (c *Client) WaitForConnectivity(ctx context.Context) error { + ctx, cancel := context.WithCancel(ctx) + defer cancel() + msg := uuid.New().String() + req := pdu.PingReq{Message: msg} + var ctrlOk, dataOk int32 + loggers := GetLoggersOrPanic(ctx) + var wg sync.WaitGroup + wg.Add(2) + checkRes := func(res *pdu.PingRes, err error, logger Logger, okVar *int32) { + if err == nil && res.GetEcho() != req.GetMessage() { + err = errors.New("pilot message not echoed correctly") + } + if err == context.Canceled { + err = nil + } + if err != nil { + logger.WithError(err).Error("ping failed") + atomic.StoreInt32(okVar, 0) + cancel() + } else { + atomic.StoreInt32(okVar, 1) + } + } + go func() { + defer wg.Done() + ctrl, ctrlErr := c.controlClient.Ping(ctx, &req, grpc.FailFast(false)) + checkRes(ctrl, ctrlErr, loggers.Control, &ctrlOk) + }() + go func() { + defer wg.Done() + for ctx.Err() == nil { + data, dataErr := c.dataClient.ReqPing(ctx, &req) + // dataClient uses transport.Connecter, which doesn't expose FailFast(false) + // => we need to mask dial timeouts + if err, ok := dataErr.(interface{ Temporary() bool }); ok && err.Temporary() { + // Rate-limit pings here in case Temporary() is a mis-classification + // or returns immediately (this is a tight loop in that case) + // TODO keep this in lockstep with controlClient + // => don't use FailFast for control, but check that both control and data worked + time.Sleep(envconst.Duration("ZREPL_RPC_DATACONN_PING_SLEEP", 1*time.Second)) + continue + } + // it's not a dial timeout, + checkRes(data, dataErr, loggers.Data, &dataOk) + return + } + }() + wg.Wait() + var what string + if ctrlOk == 1 && dataOk == 1 { + return nil + } + if ctrlOk == 0 { + what += "control" + } + if dataOk == 0 { + if len(what) > 0 { + what += " and data" + } else { + what += "data" + } + } + return fmt.Errorf("%s rpc failed to respond to ping rpcs", what) +} + func (c *Client) ResetConnectBackoff() { c.controlConn.ResetConnectBackoff() } diff --git a/rpc/rpc_server.go b/rpc/rpc_server.go index 3abbc18..f0f0f6b 100644 --- a/rpc/rpc_server.go +++ b/rpc/rpc_server.go @@ -7,7 +7,7 @@ import ( "google.golang.org/grpc" "github.com/zrepl/zrepl/endpoint" - "github.com/zrepl/zrepl/replication/pdu" + "github.com/zrepl/zrepl/replication/logic/pdu" "github.com/zrepl/zrepl/rpc/dataconn" "github.com/zrepl/zrepl/rpc/grpcclientidentity" "github.com/zrepl/zrepl/rpc/netadaptor" diff --git a/rpc/transportmux/transportmux.go b/rpc/transportmux/transportmux.go index cb6f7ca..f78c1e3 100644 --- a/rpc/transportmux/transportmux.go +++ b/rpc/transportmux/transportmux.go @@ -7,10 +7,10 @@ package transportmux import ( "context" + "fmt" "io" "net" "time" - "fmt" "github.com/zrepl/zrepl/logger" "github.com/zrepl/zrepl/transport" @@ -111,7 +111,7 @@ func Demux(ctx context.Context, rawListener transport.AuthenticatedListener, lab if ctx.Err() != nil { return } - getLog(ctx).WithError(err).Error("accept error") + getLog(ctx).WithError(err).WithField("errType", fmt.Sprintf("%T", err)).Error("accept error") continue } closeConn := func() { diff --git a/rpc/versionhandshake/versionhandshake.go b/rpc/versionhandshake/versionhandshake.go index 3864868..03835ee 100644 --- a/rpc/versionhandshake/versionhandshake.go +++ b/rpc/versionhandshake/versionhandshake.go @@ -26,14 +26,22 @@ type HandshakeError struct { msg string // If not nil, the underlying IO error that caused the handshake to fail. IOError error + isAcceptError bool } var _ net.Error = &HandshakeError{} func (e HandshakeError) Error() string { return e.msg } -// Always true to enable usage in a net.Listener. -func (e HandshakeError) Temporary() bool { return true } +// Like with net.OpErr (Go issue 6163), a client failing to handshake +// should be a temporary Accept error toward the Listener . +func (e HandshakeError) Temporary() bool { + if e.isAcceptError { + return true + } + te, ok := e.IOError.(interface{ Temporary() bool }); + return ok && te.Temporary() +} // If the underlying IOError was net.Error.Timeout(), Timeout() returns that value. // Otherwise false. @@ -142,14 +150,14 @@ func (m *HandshakeMessage) DecodeReader(r io.Reader, maxLen int) error { return nil } -func DoHandshakeCurrentVersion(conn net.Conn, deadline time.Time) error { +func DoHandshakeCurrentVersion(conn net.Conn, deadline time.Time) *HandshakeError { // current protocol version is hardcoded here return DoHandshakeVersion(conn, deadline, 1) } const HandshakeMessageMaxLen = 16 * 4096 -func DoHandshakeVersion(conn net.Conn, deadline time.Time, version int) error { +func DoHandshakeVersion(conn net.Conn, deadline time.Time, version int) *HandshakeError { ours := HandshakeMessage{ ProtocolVersion: version, Extensions: nil, diff --git a/rpc/versionhandshake/versionhandshake_transport_wrappers.go b/rpc/versionhandshake/versionhandshake_transport_wrappers.go index 660215e..09ead7a 100644 --- a/rpc/versionhandshake/versionhandshake_transport_wrappers.go +++ b/rpc/versionhandshake/versionhandshake_transport_wrappers.go @@ -55,6 +55,7 @@ func (l HandshakeListener) Accept(ctx context.Context) (*transport.AuthConn, err dl = time.Now().Add(l.timeout) // shadowing } if err := DoHandshakeCurrentVersion(conn, dl); err != nil { + err.isAcceptError = true conn.Close() return nil, err } diff --git a/util/chainlock/chainlock.go b/util/chainlock/chainlock.go new file mode 100644 index 0000000..6e7b5e5 --- /dev/null +++ b/util/chainlock/chainlock.go @@ -0,0 +1,42 @@ +// package chainlock implements a mutex whose Lock and Unlock +// methods return the lock itself, to enable chaining. +// +// Intended Usage +// +// defer s.lock().unlock() +// // drop lock while waiting for wait group +// func() { +// defer a.l.Unlock().Lock() +// fssesDone.Wait() +// }() +// +package chainlock + +import "sync" + +type L struct { + mtx sync.Mutex +} + +func New() *L { + return &L{} +} + +func (l *L) Lock() *L { + l.mtx.Lock() + return l +} + +func (l *L) Unlock() *L { + l.mtx.Unlock() + return l +} + +func (l *L) NewCond() *sync.Cond { + return sync.NewCond(&l.mtx) +} + +func (l *L) DropWhile(f func()) { + defer l.Unlock().Lock() + f() +} \ No newline at end of file diff --git a/util/envconst/envconst.go b/util/envconst/envconst.go index 8c13190..44bc9b8 100644 --- a/util/envconst/envconst.go +++ b/util/envconst/envconst.go @@ -40,3 +40,19 @@ func Int64(varname string, def int64) int64 { cache.Store(varname, d) return d } + +func Bool(varname string, def bool) bool { + if v, ok := cache.Load(varname); ok { + return v.(bool) + } + e := os.Getenv(varname) + if e == "" { + return def + } + d, err := strconv.ParseBool(e) + if err != nil { + panic(err) + } + cache.Store(varname, d) + return d +} diff --git a/util/watchdog/watchdog.go b/util/watchdog/watchdog.go deleted file mode 100644 index f7f98ee..0000000 --- a/util/watchdog/watchdog.go +++ /dev/null @@ -1,31 +0,0 @@ -package watchdog - -import ( - "fmt" - "sync" - "time" -) - -type KeepAlive struct { - mtx sync.Mutex - lastUpd time.Time -} - -func (p *KeepAlive) String() string { - if p.lastUpd.IsZero() { - return fmt.Sprintf("never updated") - } - return fmt.Sprintf("last update at %s", p.lastUpd) -} - -func (k *KeepAlive) MadeProgress() { - k.mtx.Lock() - defer k.mtx.Unlock() - k.lastUpd = time.Now() -} - -func (k *KeepAlive) CheckTimeout(timeout time.Duration, jitter time.Duration) (didTimeOut bool) { - k.mtx.Lock() - defer k.mtx.Unlock() - return k.lastUpd.Add(timeout - jitter).Before(time.Now()) -} diff --git a/zfs/conflict_string.go b/zfs/conflict_string.go deleted file mode 100644 index fa3452c..0000000 --- a/zfs/conflict_string.go +++ /dev/null @@ -1,16 +0,0 @@ -// Code generated by "stringer -type=Conflict"; DO NOT EDIT. - -package zfs - -import "strconv" - -const _Conflict_name = "ConflictIncrementalConflictAllRightConflictNoCommonAncestorConflictDiverged" - -var _Conflict_index = [...]uint8{0, 19, 35, 59, 75} - -func (i Conflict) String() string { - if i < 0 || i >= Conflict(len(_Conflict_index)-1) { - return "Conflict(" + strconv.FormatInt(int64(i), 10) + ")" - } - return _Conflict_name[_Conflict_index[i]:_Conflict_index[i+1]] -} diff --git a/zfs/diff.go b/zfs/diff.go deleted file mode 100644 index 52eb84f..0000000 --- a/zfs/diff.go +++ /dev/null @@ -1,284 +0,0 @@ -package zfs - -import ( - "bytes" - "crypto/sha512" - "encoding/hex" - "fmt" - "io" - "os/exec" - "sort" -) - -type fsbyCreateTXG []FilesystemVersion - -func (l fsbyCreateTXG) Len() int { return len(l) } -func (l fsbyCreateTXG) Swap(i, j int) { l[i], l[j] = l[j], l[i] } -func (l fsbyCreateTXG) Less(i, j int) bool { - return l[i].CreateTXG < l[j].CreateTXG -} - -//go:generate stringer -type=Conflict -type Conflict int - -const ( - ConflictIncremental Conflict = iota // no conflict, incremental repl possible - ConflictAllRight // no conflict, initial repl possible - ConflictNoCommonAncestor - ConflictDiverged -) - -/* The receiver (left) wants to know if the sender (right) has more recent versions - - Left : | C | - Right: | A | B | C | D | E | - => : | C | D | E | - - Left: | C | - Right: | D | E | - => : , no common ancestor - - Left : | C | D | E | - Right: | A | B | C | - => : , the left has newer versions - - Left : | A | B | C | | F | - Right: | C | D | E | - => : | C | | F | => diverged => - -IMPORTANT: since ZFS currently does not export dataset UUIDs, the best heuristic to - identify a filesystem version is the tuple (name,creation) -*/ -type FilesystemDiff struct { - - // Which kind of conflict / "way forward" is possible. - // Check this first to determine the semantics of this struct's remaining members - Conflict Conflict - - // Conflict = Incremental | AllRight - // The incremental steps required to get left up to right's most recent version - // 0th element is the common ancestor, ordered by birthtime, oldest first - // If len() < 2, left and right are at same most recent version - // Conflict = otherwise - // nil; there is no incremental path for left to get to right's most recent version - IncrementalPath []FilesystemVersion - - // Conflict = Incremental | AllRight: nil - // Conflict = NoCommonAncestor: left as passed as input - // Conflict = Diverged: contains path from left most recent common ancestor (mrca) to most - // recent version on left - MRCAPathLeft []FilesystemVersion - // Conflict = Incremental | AllRight: nil - // Conflict = NoCommonAncestor: right as passed as input - // Conflict = Diverged: contains path from right most recent common ancestor (mrca) - // to most recent version on right - MRCAPathRight []FilesystemVersion -} - -func (f FilesystemDiff) String() (str string) { - var b bytes.Buffer - - fmt.Fprintf(&b, "%s, ", f.Conflict) - - switch f.Conflict { - case ConflictIncremental: - fmt.Fprintf(&b, "incremental path length %v, common ancestor at %s", len(f.IncrementalPath)-1, f.IncrementalPath[0]) - case ConflictAllRight: - fmt.Fprintf(&b, "%v versions, most recent is %s", len(f.MRCAPathRight)-1, f.MRCAPathRight[len(f.MRCAPathRight)-1]) - case ConflictDiverged: - fmt.Fprintf(&b, "diverged at %s", f.MRCAPathRight[0]) // right always has at least one snap...? - case ConflictNoCommonAncestor: - fmt.Fprintf(&b, "no diff to show") - default: - fmt.Fprintf(&b, "unknown conflict type, likely a bug") - } - - return b.String() -} - -// we must assume left and right are ordered ascendingly by ZFS_PROP_CREATETXG and that -// names are unique (bas ZFS_PROP_GUID replacement) -func MakeFilesystemDiff(left, right []FilesystemVersion) (diff FilesystemDiff) { - - if right == nil { - panic("right must not be nil") - } - if left == nil { - diff = FilesystemDiff{ - IncrementalPath: nil, - Conflict: ConflictAllRight, - MRCAPathLeft: left, - MRCAPathRight: right, - } - return - } - - // Assert both left and right are sorted by createtxg - { - var leftSorted, rightSorted fsbyCreateTXG - leftSorted = left - rightSorted = right - if !sort.IsSorted(leftSorted) { - panic("cannot make filesystem diff: unsorted left") - } - if !sort.IsSorted(rightSorted) { - panic("cannot make filesystem diff: unsorted right") - } - } - - // Find most recent common ancestor by name, preferring snapshots over bookmarks - mrcaLeft := len(left) - 1 - var mrcaRight int -outer: - for ; mrcaLeft >= 0; mrcaLeft-- { - for i := len(right) - 1; i >= 0; i-- { - if left[mrcaLeft].Guid == right[i].Guid { - mrcaRight = i - if i-1 >= 0 && right[i-1].Guid == right[i].Guid && right[i-1].Type == Snapshot { - // prefer snapshots over bookmarks - mrcaRight = i - 1 - } - break outer - } - } - } - - // no common ancestor? - if mrcaLeft == -1 { - diff = FilesystemDiff{ - IncrementalPath: nil, - Conflict: ConflictNoCommonAncestor, - MRCAPathLeft: left, - MRCAPathRight: right, - } - return - } - - // diverged? - if mrcaLeft != len(left)-1 { - diff = FilesystemDiff{ - IncrementalPath: nil, - Conflict: ConflictDiverged, - MRCAPathLeft: left[mrcaLeft:], - MRCAPathRight: right[mrcaRight:], - } - return - } - - if mrcaLeft != len(left)-1 { - panic("invariant violated: mrca on left must be the last item in the left list") - } - - // incPath must not contain bookmarks except initial one, - // and only if that initial bookmark's snapshot is gone - incPath := make([]FilesystemVersion, 0, len(right)) - incPath = append(incPath, right[mrcaRight]) - // right[mrcaRight] may be a bookmark if there's no equally named snapshot - for i := mrcaRight + 1; i < len(right); i++ { - if right[i].Type != Bookmark { - incPath = append(incPath, right[i]) - } - } - - diff = FilesystemDiff{ - IncrementalPath: incPath, - } - return -} - -const ZREPL_PLACEHOLDER_PROPERTY_NAME string = "zrepl:placeholder" - -type FilesystemState struct { - Placeholder bool - // TODO extend with resume token when that feature is finally added -} - -// A somewhat efficient way to determine if a filesystem exists on this host. -// Particularly useful if exists is called more than once (will only fork exec once and cache the result) -func ZFSListFilesystemState() (localState map[string]FilesystemState, err error) { - - var actual [][]string - if actual, err = ZFSList([]string{"name", ZREPL_PLACEHOLDER_PROPERTY_NAME}, "-t", "filesystem,volume"); err != nil { - return - } - - localState = make(map[string]FilesystemState, len(actual)) - for _, e := range actual { - dp, err := NewDatasetPath(e[0]) - if err != nil { - return nil, fmt.Errorf("ZFS does not return parseable dataset path: %s", e[0]) - } - placeholder, _ := IsPlaceholder(dp, e[1]) - localState[e[0]] = FilesystemState{ - placeholder, - } - } - return - -} - -// Computes the value for the ZREPL_PLACEHOLDER_PROPERTY_NAME ZFS user property -// to mark the given DatasetPath p as a placeholder -// -// We cannot simply use booleans here since user properties are always -// inherited. -// -// We hash the DatasetPath and use it to check for a given path if it is the -// one originally marked as placeholder. -// -// However, this prohibits moving datasets around via `zfs rename`. The -// placeholder attribute must be re-computed for the dataset path after the -// move. -// -// TODO better solution available? -func PlaceholderPropertyValue(p *DatasetPath) string { - ps := []byte(p.ToString()) - sum := sha512.Sum512_256(ps) - return hex.EncodeToString(sum[:]) -} - -func IsPlaceholder(p *DatasetPath, placeholderPropertyValue string) (isPlaceholder bool, err error) { - expected := PlaceholderPropertyValue(p) - isPlaceholder = expected == placeholderPropertyValue - if !isPlaceholder { - err = fmt.Errorf("expected %s, has %s", expected, placeholderPropertyValue) - } - return -} - -// for nonexistent FS, isPlaceholder == false && err == nil -func ZFSIsPlaceholderFilesystem(p *DatasetPath) (isPlaceholder bool, err error) { - props, err := zfsGet(p.ToString(), []string{ZREPL_PLACEHOLDER_PROPERTY_NAME}, sourceAny) - if err == io.ErrUnexpectedEOF { - // interpret this as an early exit of the zfs binary due to the fs not existing - return false, nil - } else if err != nil { - return false, err - } - isPlaceholder, _ = IsPlaceholder(p, props.Get(ZREPL_PLACEHOLDER_PROPERTY_NAME)) - return -} - -func ZFSCreatePlaceholderFilesystem(p *DatasetPath) (err error) { - v := PlaceholderPropertyValue(p) - cmd := exec.Command(ZFS_BINARY, "create", - "-o", fmt.Sprintf("%s=%s", ZREPL_PLACEHOLDER_PROPERTY_NAME, v), - "-o", "mountpoint=none", - p.ToString()) - - stderr := bytes.NewBuffer(make([]byte, 0, 1024)) - cmd.Stderr = stderr - - if err = cmd.Start(); err != nil { - return err - } - - if err = cmd.Wait(); err != nil { - err = &ZFSError{ - Stderr: stderr.Bytes(), - WaitErr: err, - } - } - - return -} diff --git a/zfs/diff_test.go b/zfs/diff_test.go deleted file mode 100644 index 0dce4a1..0000000 --- a/zfs/diff_test.go +++ /dev/null @@ -1,112 +0,0 @@ -package zfs - -import ( - "github.com/stretchr/testify/assert" - "strconv" - "strings" - "testing" - "time" -) - -func fsvlist(fsv ...string) (r []FilesystemVersion) { - - r = make([]FilesystemVersion, len(fsv)) - for i, f := range fsv { - - // parse the id from fsvlist. it is used to derivce Guid,CreateTXG and Creation attrs - split := strings.Split(f, ",") - if len(split) != 2 { - panic("invalid fsv spec") - } - id, err := strconv.Atoi(split[1]) - if err != nil { - panic(err) - } - - if strings.HasPrefix(f, "#") { - r[i] = FilesystemVersion{ - Name: strings.TrimPrefix(f, "#"), - Type: Bookmark, - Guid: uint64(id), - CreateTXG: uint64(id), - Creation: time.Unix(0, 0).Add(time.Duration(id) * time.Second), - } - } else if strings.HasPrefix(f, "@") { - r[i] = FilesystemVersion{ - Name: strings.TrimPrefix(f, "@"), - Type: Snapshot, - Guid: uint64(id), - CreateTXG: uint64(id), - Creation: time.Unix(0, 0).Add(time.Duration(id) * time.Second), - } - } else { - panic("invalid character") - } - } - return -} - -func doTest(left, right []FilesystemVersion, validate func(d FilesystemDiff)) { - var d FilesystemDiff - d = MakeFilesystemDiff(left, right) - validate(d) -} - -func TestMakeFilesystemDiff_IncrementalSnapshots(t *testing.T) { - - l := fsvlist - - // basic functionality - doTest(l("@a,1", "@b,2"), l("@a,1", "@b,2", "@c,3", "@d,4"), func(d FilesystemDiff) { - assert.Equal(t, l("@b,2", "@c,3", "@d,4"), d.IncrementalPath) - }) - - // no common ancestor - doTest(l(), l("@a,1"), func(d FilesystemDiff) { - assert.Nil(t, d.IncrementalPath) - assert.EqualValues(t, d.Conflict, ConflictNoCommonAncestor) - assert.Equal(t, l("@a,1"), d.MRCAPathRight) - }) - doTest(l("@a,1", "@b,2"), l("@c,3", "@d,4"), func(d FilesystemDiff) { - assert.Nil(t, d.IncrementalPath) - assert.EqualValues(t, d.Conflict, ConflictNoCommonAncestor) - assert.Equal(t, l("@c,3", "@d,4"), d.MRCAPathRight) - }) - - // divergence is detected - doTest(l("@a,1", "@b1,2"), l("@a,1", "@b2,3"), func(d FilesystemDiff) { - assert.Nil(t, d.IncrementalPath) - assert.EqualValues(t, d.Conflict, ConflictDiverged) - assert.Equal(t, l("@a,1", "@b1,2"), d.MRCAPathLeft) - assert.Equal(t, l("@a,1", "@b2,3"), d.MRCAPathRight) - }) - - // gaps before most recent common ancestor do not matter - doTest(l("@a,1", "@b,2", "@c,3"), l("@a,1", "@c,3", "@d,4"), func(d FilesystemDiff) { - assert.Equal(t, l("@c,3", "@d,4"), d.IncrementalPath) - }) - -} - -func TestMakeFilesystemDiff_BookmarksSupport(t *testing.T) { - l := fsvlist - - // bookmarks are used - doTest(l("@a,1"), l("#a,1", "@b,2"), func(d FilesystemDiff) { - assert.Equal(t, l("#a,1", "@b,2"), d.IncrementalPath) - }) - - // boomarks are stripped from IncrementalPath (cannot send incrementally) - doTest(l("@a,1"), l("#a,1", "#b,2", "@c,3"), func(d FilesystemDiff) { - assert.Equal(t, l("#a,1", "@c,3"), d.IncrementalPath) - }) - - // test that snapshots are preferred over bookmarks in IncrementalPath - doTest(l("@a,1"), l("#a,1", "@a,1", "@b,2"), func(d FilesystemDiff) { - assert.Equal(t, l("@a,1", "@b,2"), d.IncrementalPath) - }) - doTest(l("@a,1"), l("@a,1", "#a,1", "@b,2"), func(d FilesystemDiff) { - assert.Equal(t, l("@a,1", "@b,2"), d.IncrementalPath) - }) - -} diff --git a/zfs/placeholder.go b/zfs/placeholder.go new file mode 100644 index 0000000..50ddd7f --- /dev/null +++ b/zfs/placeholder.go @@ -0,0 +1,113 @@ +package zfs + +import ( + "bytes" + "crypto/sha512" + "encoding/hex" + "fmt" + "io" + "os/exec" +) + +const ZREPL_PLACEHOLDER_PROPERTY_NAME string = "zrepl:placeholder" + +type FilesystemState struct { + Placeholder bool + // TODO extend with resume token when that feature is finally added +} + +// A somewhat efficient way to determine if a filesystem exists on this host. +// Particularly useful if exists is called more than once (will only fork exec once and cache the result) +func ZFSListFilesystemState() (localState map[string]FilesystemState, err error) { + + var actual [][]string + if actual, err = ZFSList([]string{"name", ZREPL_PLACEHOLDER_PROPERTY_NAME}, "-t", "filesystem,volume"); err != nil { + return + } + + localState = make(map[string]FilesystemState, len(actual)) + for _, e := range actual { + dp, err := NewDatasetPath(e[0]) + if err != nil { + return nil, fmt.Errorf("ZFS does not return parseable dataset path: %s", e[0]) + } + placeholder, _ := IsPlaceholder(dp, e[1]) + localState[e[0]] = FilesystemState{ + placeholder, + } + } + return + +} + +// Computes the value for the ZREPL_PLACEHOLDER_PROPERTY_NAME ZFS user property +// to mark the given DatasetPath p as a placeholder +// +// We cannot simply use booleans here since user properties are always +// inherited. +// +// We hash the DatasetPath and use it to check for a given path if it is the +// one originally marked as placeholder. +// +// However, this prohibits moving datasets around via `zfs rename`. The +// placeholder attribute must be re-computed for the dataset path after the +// move. +// +// TODO better solution available? +func PlaceholderPropertyValue(p *DatasetPath) string { + ps := []byte(p.ToString()) + sum := sha512.Sum512_256(ps) + return hex.EncodeToString(sum[:]) +} + +func IsPlaceholder(p *DatasetPath, placeholderPropertyValue string) (isPlaceholder bool, err error) { + expected := PlaceholderPropertyValue(p) + isPlaceholder = expected == placeholderPropertyValue + if !isPlaceholder { + err = fmt.Errorf("expected %s, has %s", expected, placeholderPropertyValue) + } + return +} + +// for nonexistent FS, isPlaceholder == false && err == nil +func ZFSIsPlaceholderFilesystem(p *DatasetPath) (isPlaceholder bool, err error) { + props, err := zfsGet(p.ToString(), []string{ZREPL_PLACEHOLDER_PROPERTY_NAME}, sourceAny) + if err == io.ErrUnexpectedEOF { + // interpret this as an early exit of the zfs binary due to the fs not existing + return false, nil + } else if err != nil { + return false, err + } + isPlaceholder, _ = IsPlaceholder(p, props.Get(ZREPL_PLACEHOLDER_PROPERTY_NAME)) + return +} + +func ZFSCreatePlaceholderFilesystem(p *DatasetPath) (err error) { + v := PlaceholderPropertyValue(p) + cmd := exec.Command(ZFS_BINARY, "create", + "-o", fmt.Sprintf("%s=%s", ZREPL_PLACEHOLDER_PROPERTY_NAME, v), + "-o", "mountpoint=none", + p.ToString()) + + stderr := bytes.NewBuffer(make([]byte, 0, 1024)) + cmd.Stderr = stderr + + if err = cmd.Start(); err != nil { + return err + } + + if err = cmd.Wait(); err != nil { + err = &ZFSError{ + Stderr: stderr.Bytes(), + WaitErr: err, + } + } + + return +} + +func ZFSSetNoPlaceholder(p *DatasetPath) error { + props := NewZFSProperties() + props.Set(ZREPL_PLACEHOLDER_PROPERTY_NAME, "off") + return zfsSet(p.ToString(), props) +} \ No newline at end of file diff --git a/zfs/zfs.go b/zfs/zfs.go index 8f08a66..652a494 100644 --- a/zfs/zfs.go +++ b/zfs/zfs.go @@ -9,6 +9,7 @@ import ( "io" "os" "os/exec" + "sort" "strings" "sync" "time" @@ -691,17 +692,62 @@ type StreamCopier interface { Close() error } +type RecvOptions struct { + // Rollback to the oldest snapshot, destroy it, then perform `recv -F`. + // Note that this doesn't change property values, i.e. an existing local property value will be kept. + RollbackAndForceRecv bool +} -func ZFSRecv(ctx context.Context, fs string, streamCopier StreamCopier, additionalArgs ...string) (err error) { +func ZFSRecv(ctx context.Context, fs string, streamCopier StreamCopier, opts RecvOptions) (err error) { if err := validateZFSFilesystem(fs); err != nil { return err } + fsdp, err := NewDatasetPath(fs) + if err != nil { + return err + } + + if opts.RollbackAndForceRecv { + // destroy all snapshots before `recv -F` because `recv -F` + // does not perform a rollback unless `send -R` was used (which we assume hasn't been the case) + var snaps []FilesystemVersion + { + vs, err := ZFSListFilesystemVersions(fsdp, nil) + if err != nil { + err = fmt.Errorf("cannot list versions to rollback is required: %s", err) + } + for _, v := range vs { + if v.Type == Snapshot { + snaps = append(snaps, v) + } + } + sort.Slice(snaps, func(i, j int) bool { + return snaps[i].CreateTXG < snaps[j].CreateTXG + }) + } + // bookmarks are rolled back automatically + if len(snaps) > 0 { + // use rollback to efficiently destroy all but the earliest snapshot + // then destroy that earliest snapshot + // afterwards, `recv -F` will work + rollbackTarget := snaps[0] + rollbackTargetAbs := rollbackTarget.ToAbsPath(fsdp) + debug("recv: rollback to %q", rollbackTargetAbs) + if err := ZFSRollback(fsdp, rollbackTarget, "-r"); err != nil { + return fmt.Errorf("cannot rollback %s to %s for forced receive: %s", fsdp.ToString(), rollbackTarget, err) + } + debug("recv: destroy %q", rollbackTargetAbs) + if err := ZFSDestroy(rollbackTargetAbs); err != nil { + return fmt.Errorf("cannot destroy %s for forced receive: %s", rollbackTargetAbs, err) + } + } + } args := make([]string, 0) args = append(args, "recv") - if len(args) > 0 { - args = append(args, additionalArgs...) + if opts.RollbackAndForceRecv { + args = append(args, "-F") } args = append(args, fs) @@ -1038,3 +1084,33 @@ func ZFSBookmark(fs *DatasetPath, snapshot, bookmark string) (err error) { return } + +func ZFSRollback(fs *DatasetPath, snapshot FilesystemVersion, rollbackArgs ...string) (err error) { + + snapabs := snapshot.ToAbsPath(fs) + if snapshot.Type != Snapshot { + return fmt.Errorf("can only rollback to snapshots, got %s", snapabs) + } + + args := []string{"rollback"} + args = append(args, rollbackArgs...) + args = append(args, snapabs) + + cmd := exec.Command(ZFS_BINARY, args...) + + stderr := bytes.NewBuffer(make([]byte, 0, 1024)) + cmd.Stderr = stderr + + if err = cmd.Start(); err != nil { + return err + } + + if err = cmd.Wait(); err != nil { + err = &ZFSError{ + Stderr: stderr.Bytes(), + WaitErr: err, + } + } + + return err +}