diff --git a/.circleci/config.yml b/.circleci/config.yml new file mode 100644 index 0000000..f70a239 --- /dev/null +++ b/.circleci/config.yml @@ -0,0 +1,79 @@ +version: 2.0 +workflows: + version: 2 + build: + jobs: + - build-1.11 + - build-1.12 + - build-latest +jobs: + # build-latest serves as the template + # we use YAML anchors & aliases to exchange the docker image (and hence Go version used for the build) + build-latest: &build-latest + description: Builds zrepl + parameters: + image: + description: "the docker image that the job should use" + type: string + docker: + - image: circleci/golang:latest + environment: + # required by lazy.sh + TERM: xterm + working_directory: /go/src/github.com/zrepl/zrepl + steps: + - run: + name: Setup environment variables + command: | + # used by pip (for docs) + echo 'export PATH="$HOME/.local/bin:$PATH"' >> $BASH_ENV + + - restore_cache: + keys: + - source + - vendor + - protobuf + + - checkout + + - save_cache: + key: source + paths: + - ".git" + + # install deps + - run: wget https://github.com/protocolbuffers/protobuf/releases/download/v3.6.1/protoc-3.6.1-linux-x86_64.zip + - run: echo "6003de742ea3fcf703cfec1cd4a3380fd143081a2eb0e559065563496af27807 protoc-3.6.1-linux-x86_64.zip" | sha256sum -c + - run: sudo unzip -d /usr protoc-3.6.1-linux-x86_64.zip + - save_cache: + key: protobuf + paths: + - "/usr/include/google/protobuf" + + - run: sudo apt install python3 python3-pip libgirepository1.0-dev + - run: ./lazy.sh devsetup + + - run: make vendordeps + - save_cache: + key: vendor + paths: + - "./vendor" + + - run: make + - run: make vet + - run: make test + - run: make release + + - store_artifacts: + path: ./artifacts/release + when: always + + + build-1.11: + <<: *build-latest + docker: + - image: circleci/golang:1.11 + build-1.12: + <<: *build-latest + docker: + - image: circleci/golang:1.12 diff --git a/.travis.yml b/.travis.yml index 8ced506..aec423b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,6 +2,7 @@ dist: xenial services: - docker +env: # for allow_failures: https://docs.travis-ci.com/user/customizing-the-build/ matrix: include: @@ -15,45 +16,35 @@ matrix: --user "$(id -u):$(id -g)" \ zrepl_build make vendordeps release - # all go entries vary only by go version - - language: go + - &zrepl_build_template + language: go + go_import_path: github.com/zrepl/zrepl + before_install: + - wget https://github.com/protocolbuffers/protobuf/releases/download/v3.6.1/protoc-3.6.1-linux-x86_64.zip + - echo "6003de742ea3fcf703cfec1cd4a3380fd143081a2eb0e559065563496af27807 protoc-3.6.1-linux-x86_64.zip" | sha256sum -c + - sudo unzip -d /usr protoc-3.6.1-linux-x86_64.zip + - ./lazy.sh godep + - make vendordeps + script: + - make + - make vet + - make test + - make artifacts/zrepl-freebsd-amd64 + - make artifacts/zrepl-linux-amd64 + - make artifacts/zrepl-darwin-amd64 go: - "1.11" - go_import_path: github.com/zrepl/zrepl - before_install: - - wget https://github.com/protocolbuffers/protobuf/releases/download/v3.6.1/protoc-3.6.1-linux-x86_64.zip - - echo "6003de742ea3fcf703cfec1cd4a3380fd143081a2eb0e559065563496af27807 protoc-3.6.1-linux-x86_64.zip" | sha256sum -c - - sudo unzip -d /usr protoc-3.6.1-linux-x86_64.zip - - ./lazy.sh godep - - make vendordeps - script: - - make - - make vet - - make test - - make artifacts/zrepl-freebsd-amd64 - - make artifacts/zrepl-linux-amd64 - - make artifacts/zrepl-darwin-amd64 - - language: go + - <<: *zrepl_build_template + go: + - "1.12" + + - <<: *zrepl_build_template go: - "master" - go_import_path: github.com/zrepl/zrepl - before_install: - - wget https://github.com/protocolbuffers/protobuf/releases/download/v3.6.1/protoc-3.6.1-linux-x86_64.zip - - echo "6003de742ea3fcf703cfec1cd4a3380fd143081a2eb0e559065563496af27807 protoc-3.6.1-linux-x86_64.zip" | sha256sum -c - - sudo unzip -d /usr protoc-3.6.1-linux-x86_64.zip - - ./lazy.sh godep - - make vendordeps - script: - - make - - make vet - - make test - - make artifacts/zrepl-freebsd-amd64 - - make artifacts/zrepl-linux-amd64 - - make artifacts/zrepl-darwin-amd64 - # all python entries vary only by python version - - language: python + - &zrepl_docs_template + language: python python: - "3.4" install: @@ -61,29 +52,18 @@ matrix: - pip install -r docs/requirements.txt script: - make docs - - language: python + - <<: *zrepl_docs_template python: - "3.5" - install: - - sudo apt-get install libgirepository1.0-dev - - pip install -r docs/requirements.txt - script: - - make docs - - language: python + - <<: *zrepl_docs_template python: - "3.6" - install: - - sudo apt-get install libgirepository1.0-dev - - pip install -r docs/requirements.txt - script: - - make docs - - language: python + - <<: *zrepl_docs_template python: - "3.7" - install: - - sudo apt-get install libgirepository1.0-dev - - pip install -r docs/requirements.txt - script: - - make docs - + + allow_failures: + - <<: *zrepl_build_template + go: + - "master" diff --git a/Gopkg.lock b/Gopkg.lock index 4674a95..7df6bf1 100644 --- a/Gopkg.lock +++ b/Gopkg.lock @@ -89,6 +89,14 @@ revision = "aa810b61a9c79d51363740d207bb46cf8e620ed5" version = "v1.2.0" +[[projects]] + digest = "1:ad92aa49f34cbc3546063c7eb2cabb55ee2278b72842eda80e2a20a8a06a8d73" + name = "github.com/google/uuid" + packages = ["."] + pruneopts = "" + revision = "0cd6bf5da1e1c83f8b45653022c74f71af0538a4" + version = "v1.1.1" + [[projects]] branch = "master" digest = "1:cb09475f771b9167fb9333629f5d6a7161572602ea040f1094602b0dc8709878" @@ -161,6 +169,14 @@ revision = "3247c84500bff8d9fb6d579d800f20b3e091582c" version = "v1.0.0" +[[projects]] + digest = "1:4ff67dde814694496d7aa31be44b900f9717a10c8bc9136b13f49c8ef97f439a" + name = "github.com/montanaflynn/stats" + packages = ["."] + pruneopts = "" + revision = "63fbb2597b7a13043b453a4b819945badb8f8926" + version = "v0.5.0" + [[projects]] branch = "master" digest = "1:f60ff065b58bd53e641112b38bbda9d2684deb828393c7ffb89c69a1ee301d17" @@ -245,6 +261,14 @@ pruneopts = "" revision = "8b1c2da0d56deffdbb9e48d4414b4e674bd8083e" +[[projects]] + digest = "1:3962f553b77bf6c03fc07cd687a22dd3b00fe11aa14d31194f5505f5bb65cdc8" + name = "github.com/sergi/go-diff" + packages = ["diffmatchpatch"] + pruneopts = "" + revision = "1744e2970ca51c86172c8190fadad617561ed6e7" + version = "v1.0.0" + [[projects]] branch = "master" digest = "1:146327ce93be37e68bd3ff8541090d96da8cb3adc9e35d57570e9170a29f6bf6" @@ -280,6 +304,25 @@ revision = "93babf24513d0e8277635da8169fcc5a46ae3f6a" version = "v1.11.0" +[[projects]] + digest = "1:529ed3f98838f69e13761788d0cc71b44e130058fab13bae2ce09f7a176bced4" + name = "github.com/yudai/gojsondiff" + packages = [ + ".", + "formatter", + ] + pruneopts = "" + revision = "7b1b7adf999dab73a6eb02669c3d82dbb27a3dd6" + version = "1.0.0" + +[[projects]] + branch = "master" + digest = "1:9857bb2293f372b2181004d8b62179bbdb4ab0982ec6f762abe6cf2bfedaff85" + name = "github.com/yudai/golcs" + packages = ["."] + pruneopts = "" + revision = "ecda9a501e8220fae3b4b600c3db4b0ba22cfc68" + [[projects]] branch = "v2" digest = "1:6b8a6afafde7ed31cd0c577ba40d88ce39e8f1c5eb76d7836be7d5b74f1c534a" @@ -403,9 +446,11 @@ "github.com/go-logfmt/logfmt", "github.com/golang/protobuf/proto", "github.com/golang/protobuf/protoc-gen-go", + "github.com/google/uuid", "github.com/jinzhu/copier", "github.com/kr/pretty", "github.com/mattn/go-isatty", + "github.com/montanaflynn/stats", "github.com/pkg/errors", "github.com/pkg/profile", "github.com/problame/go-netssh", @@ -415,14 +460,18 @@ "github.com/spf13/pflag", "github.com/stretchr/testify/assert", "github.com/stretchr/testify/require", + "github.com/yudai/gojsondiff", + "github.com/yudai/gojsondiff/formatter", "github.com/zrepl/yaml-config", "golang.org/x/net/context", "golang.org/x/sys/unix", "golang.org/x/tools/cmd/stringer", "google.golang.org/grpc", + "google.golang.org/grpc/codes", "google.golang.org/grpc/credentials", "google.golang.org/grpc/keepalive", "google.golang.org/grpc/peer", + "google.golang.org/grpc/status", ] solver-name = "gps-cdcl" solver-version = 1 diff --git a/Gopkg.toml b/Gopkg.toml index 01e2aae..55c0a9b 100644 --- a/Gopkg.toml +++ b/Gopkg.toml @@ -59,3 +59,7 @@ required = [ [[constraint]] name = "google.golang.org/grpc" version = "1" + +[[constraint]] + version = "1.1.0" + name = "github.com/google/uuid" diff --git a/Makefile b/Makefile index 120eb28..42b6990 100644 --- a/Makefile +++ b/Makefile @@ -27,7 +27,7 @@ vendordeps: dep ensure -v -vendor-only generate: #not part of the build, must do that manually - protoc -I=replication/pdu --go_out=plugins=grpc:replication/pdu replication/pdu/pdu.proto + protoc -I=replication/logic/pdu --go_out=plugins=grpc:replication/logic/pdu replication/logic/pdu/pdu.proto go generate -x ./... build: diff --git a/client/status.go b/client/status.go index f5e886e..9abc07a 100644 --- a/client/status.go +++ b/client/status.go @@ -10,8 +10,7 @@ import ( "github.com/zrepl/zrepl/daemon" "github.com/zrepl/zrepl/daemon/job" "github.com/zrepl/zrepl/daemon/pruner" - "github.com/zrepl/zrepl/replication" - "github.com/zrepl/zrepl/replication/fsrep" + "github.com/zrepl/zrepl/replication/report" "io" "math" "net/http" @@ -122,7 +121,7 @@ func wrap(s string, width int) string { if idx := strings.IndexAny(s, "\n\r"); idx != -1 && idx < rem { rem = idx+1 } - untilNewline := strings.TrimSpace(s[:rem]) + untilNewline := strings.TrimRight(s[:rem], "\n\r") s = s[rem:] if len(untilNewline) == 0 { continue @@ -130,7 +129,7 @@ func wrap(s string, width int) string { b.WriteString(untilNewline) b.WriteString("\n") } - return strings.TrimSpace(b.String()) + return strings.TrimRight(b.String(), "\n\r") } func (t *tui) printfDrawIndentedAndWrappedIfMultiline(format string, a ...interface{}) { @@ -353,74 +352,91 @@ func (t *tui) draw() { termbox.Flush() } -func (t *tui) renderReplicationReport(rep *replication.Report, history *bytesProgressHistory) { +func (t *tui) renderReplicationReport(rep *report.Report, history *bytesProgressHistory) { if rep == nil { t.printf("...\n") return } - all := make([]*fsrep.Report, 0, len(rep.Completed)+len(rep.Pending) + 1) - all = append(all, rep.Completed...) - all = append(all, rep.Pending...) - if rep.Active != nil { - all = append(all, rep.Active) + if rep.WaitReconnectError != nil { + t.printfDrawIndentedAndWrappedIfMultiline("Connectivity: %s", rep.WaitReconnectError) + t.newline() } - sort.Slice(all, func(i, j int) bool { - return all[i].Filesystem < all[j].Filesystem + if !rep.WaitReconnectSince.IsZero() { + delta := rep.WaitReconnectUntil.Sub(time.Now()).Round(time.Second) + if rep.WaitReconnectUntil.IsZero() || delta > 0 { + var until string + if rep.WaitReconnectUntil.IsZero() { + until = "waiting indefinitely" + } else { + until = fmt.Sprintf("hard fail in %s @ %s", delta, rep.WaitReconnectUntil) + } + t.printfDrawIndentedAndWrappedIfMultiline("Connectivity: reconnecting with exponential backoff (since %s) (%s)", + rep.WaitReconnectSince, until) + } else { + t.printfDrawIndentedAndWrappedIfMultiline("Connectivity: reconnects reached hard-fail timeout @ %s", rep.WaitReconnectUntil) + } + t.newline() + } + + // TODO visualize more than the latest attempt by folding all attempts into one + if len(rep.Attempts) == 0 { + t.printf("no attempts made yet") + return + } else { + t.printf("Attempt #%d", len(rep.Attempts)) + if len(rep.Attempts) > 1 { + t.printf(". Previous attempts failed with the follwing statuses:") + t.newline() + t.addIndent(1) + for i, a := range rep.Attempts[:len(rep.Attempts)-1] { + t.printfDrawIndentedAndWrappedIfMultiline("#%d: %s (failed at %s) (ran %s)", i + 1, a.State, a.FinishAt, a.FinishAt.Sub(a.StartAt)) + t.newline() + } + t.addIndent(-1) + } else { + t.newline() + } + } + + latest := rep.Attempts[len(rep.Attempts)-1] + sort.Slice(latest.Filesystems, func(i, j int) bool { + return latest.Filesystems[i].Info.Name < latest.Filesystems[j].Info.Name }) - state, err := replication.StateString(rep.Status) - if err != nil { - t.printf("Status: %q (parse error: %q)\n", rep.Status, err) - return - } - - t.printf("Status: %s", state) + t.printf("Status: %s", latest.State) t.newline() - if rep.Problem != "" { + if latest.State == report.AttemptPlanningError { t.printf("Problem: ") - t.printfDrawIndentedAndWrappedIfMultiline("%s", rep.Problem) + t.printfDrawIndentedAndWrappedIfMultiline("%s", latest.PlanError) + t.newline() + } else if latest.State == report.AttemptFanOutError { + t.printf("Problem: one or more of the filesystems encountered errors") t.newline() } - if rep.SleepUntil.After(time.Now()) && !state.IsTerminal() { - t.printf("Sleeping until %s (%s left)\n", rep.SleepUntil, rep.SleepUntil.Sub(time.Now())) - } - if state != replication.Planning && state != replication.PlanningError { + if latest.State != report.AttemptPlanning && latest.State != report.AttemptPlanningError { + // Draw global progress bar // Progress: [---------------] - sumUpFSRep := func(rep *fsrep.Report) (transferred, total int64) { - for _, s := range rep.Pending { - transferred += s.Bytes - total += s.ExpectedBytes - } - for _, s := range rep.Completed { - transferred += s.Bytes - total += s.ExpectedBytes - } - return - } - var transferred, total int64 - for _, fs := range all { - fstx, fstotal := sumUpFSRep(fs) - transferred += fstx - total += fstotal - } - rate, changeCount := history.Update(transferred) + expected, replicated := latest.BytesSum() + rate, changeCount := history.Update(replicated) t.write("Progress: ") - t.drawBar(50, transferred, total, changeCount) - t.write(fmt.Sprintf(" %s / %s @ %s/s", ByteCountBinary(transferred), ByteCountBinary(total), ByteCountBinary(rate))) + t.drawBar(50, replicated, expected, changeCount) + t.write(fmt.Sprintf(" %s / %s @ %s/s", ByteCountBinary(replicated), ByteCountBinary(expected), ByteCountBinary(rate))) t.newline() + + var maxFSLen int + for _, fs := range latest.Filesystems { + if len(fs.Info.Name) > maxFSLen { + maxFSLen = len(fs.Info.Name) + } + } + for _, fs := range latest.Filesystems { + t.printFilesystemStatus(fs, false, maxFSLen) // FIXME bring 'active' flag back + } + } - var maxFSLen int - for _, fs := range all { - if len(fs.Filesystem) > maxFSLen { - maxFSLen = len(fs.Filesystem) - } - } - for _, fs := range all { - t.printFilesystemStatus(fs, fs == rep.Active, maxFSLen) - } } func (t *tui) renderPrunerReport(r *pruner.Report) { @@ -441,9 +457,6 @@ func (t *tui) renderPrunerReport(r *pruner.Report) { if r.Error != "" { t.printf("Error: %s\n", r.Error) } - if r.SleepUntil.After(time.Now()) { - t.printf("Sleeping until %s (%s left)\n", r.SleepUntil, r.SleepUntil.Sub(time.Now())) - } type commonFS struct { *pruner.FSReport @@ -459,8 +472,7 @@ func (t *tui) renderPrunerReport(r *pruner.Report) { switch state { case pruner.Plan: fallthrough - case pruner.PlanWait: fallthrough - case pruner.ErrPerm: + case pruner.PlanErr: return } @@ -500,8 +512,18 @@ func (t *tui) renderPrunerReport(r *pruner.Report) { for _, fs := range all { t.write(rightPad(fs.Filesystem, maxFSname, " ")) t.write(" ") + if !fs.SkipReason.NotSkipped() { + t.printf("skipped: %s\n", fs.SkipReason) + continue + } if fs.LastError != "" { - t.printf("ERROR (%d): %s\n", fs.ErrorCount, fs.LastError) // whitespace is padding + if strings.ContainsAny(fs.LastError, "\r\n") { + t.printf("ERROR:") + t.printfDrawIndentedAndWrappedIfMultiline("%s\n", fs.LastError) + } else { + t.printfDrawIndentedAndWrappedIfMultiline("ERROR: %s\n", fs.LastError) + } + t.newline() continue } @@ -524,25 +546,6 @@ func (t *tui) renderPrunerReport(r *pruner.Report) { } -const snapshotIndent = 1 -func calculateMaxFSLength(all []*fsrep.Report) (maxFS, maxStatus int) { - for _, e := range all { - if len(e.Filesystem) > maxFS { - maxFS = len(e.Filesystem) - } - all2 := make([]*fsrep.StepReport, 0, len(e.Pending) + len(e.Completed)) - all2 = append(all2, e.Pending...) - all2 = append(all2, e.Completed...) - for _, e2 := range all2 { - elen := len(e2.Problem) + len(e2.From) + len(e2.To) + 60 // random spacing, units, labels, etc - if elen > maxStatus { - maxStatus = elen - } - } - } - return -} - func times(str string, n int) (out string) { for i := 0; i < n; i++ { out += str @@ -586,35 +589,13 @@ func (t *tui) drawBar(length int, bytes, totalBytes int64, changeCount int) { t.write("]") } -func StringStepState(s fsrep.StepState) string { - switch s { - case fsrep.StepReplicationReady: return "Ready" - case fsrep.StepMarkReplicatedReady: return "MarkReady" - case fsrep.StepCompleted: return "Completed" - default: - return fmt.Sprintf("UNKNOWN %d", s) - } -} - -func (t *tui) printFilesystemStatus(rep *fsrep.Report, active bool, maxFS int) { - - bytes := int64(0) - totalBytes := int64(0) - for _, s := range rep.Pending { - bytes += s.Bytes - totalBytes += s.ExpectedBytes - } - for _, s := range rep.Completed { - bytes += s.Bytes - totalBytes += s.ExpectedBytes - } - +func (t *tui) printFilesystemStatus(rep *report.FilesystemReport, active bool, maxFS int) { + expected, replicated := rep.BytesSum() status := fmt.Sprintf("%s (step %d/%d, %s/%s)", - rep.Status, - len(rep.Completed), len(rep.Pending) + len(rep.Completed), - ByteCountBinary(bytes), ByteCountBinary(totalBytes), - + strings.ToUpper(string(rep.State)), + rep.CurrentStep, len(rep.Steps), + ByteCountBinary(replicated), ByteCountBinary(expected), ) activeIndicator := " " @@ -623,18 +604,23 @@ func (t *tui) printFilesystemStatus(rep *fsrep.Report, active bool, maxFS int) { } t.printf("%s %s %s ", activeIndicator, - rightPad(rep.Filesystem, maxFS, " "), + rightPad(rep.Info.Name, maxFS, " "), status) next := "" - if rep.Problem != "" { - next = rep.Problem - } else if len(rep.Pending) > 0 { - if rep.Pending[0].From != "" { - next = fmt.Sprintf("next: %s => %s", rep.Pending[0].From, rep.Pending[0].To) + if err := rep.Error(); err != nil { + next = err.Err + } else if rep.State != report.FilesystemDone { + if nextStep := rep.NextStep(); nextStep != nil { + if nextStep.IsIncremental() { + next = fmt.Sprintf("next: %s => %s", nextStep.Info.From, nextStep.Info.To) + } else { + next = fmt.Sprintf("next: %s (full)", nextStep.Info.To) + } } else { - next = fmt.Sprintf("next: %s (full)", rep.Pending[0].To) + next = "" // individual FSes may still be in planning state } + } t.printfDrawIndentedAndWrappedIfMultiline("%s", next) diff --git a/config/config.go b/config/config.go index 334d5cb..64a8179 100644 --- a/config/config.go +++ b/config/config.go @@ -78,7 +78,38 @@ type PushJob struct { type PullJob struct { ActiveJob `yaml:",inline"` RootFS string `yaml:"root_fs"` - Interval time.Duration `yaml:"interval,positive"` + Interval PositiveDurationOrManual `yaml:"interval"` +} + +type PositiveDurationOrManual struct { + Interval time.Duration + Manual bool +} + +var _ yaml.Unmarshaler = (*PositiveDurationOrManual)(nil) + +func (i *PositiveDurationOrManual) UnmarshalYAML(u func(interface{}, bool) error) (err error) { + var s string + if err := u(&s, true); err != nil { + return err + } + switch s { + case "manual": + i.Manual = true + i.Interval = 0 + case "": + return fmt.Errorf("value must not be empty") + default: + i.Manual = false + i.Interval, err = time.ParseDuration(s) + if err != nil { + return err + } + if i.Interval <= 0 { + return fmt.Errorf("value must be a positive duration, got %q", s) + } + } + return nil } type SinkJob struct { diff --git a/config/config_positiveintervalormanual_test.go b/config/config_positiveintervalormanual_test.go new file mode 100644 index 0000000..237813e --- /dev/null +++ b/config/config_positiveintervalormanual_test.go @@ -0,0 +1,41 @@ +package config + +import ( + "fmt" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/zrepl/yaml-config" +) + +func TestPositiveDurationOrManual(t *testing.T) { + cases := []struct { + Comment, Input string + Result *PositiveDurationOrManual + }{ + {"empty is error", "", nil}, + {"negative is error", "-1s", nil}, + {"zero seconds is error", "0s", nil}, + {"zero is error", "0", nil}, + {"non-manual is error", "something", nil}, + {"positive seconds works", "1s", &PositiveDurationOrManual{Manual: false, Interval: 1 * time.Second}}, + {"manual works", "manual", &PositiveDurationOrManual{Manual: true, Interval: 0}}, + } + for _, tc := range cases { + t.Run(tc.Comment, func(t *testing.T) { + var out struct { + FieldName PositiveDurationOrManual `yaml:"fieldname"` + } + input := fmt.Sprintf("\nfieldname: %s\n", tc.Input) + err := yaml.UnmarshalStrict([]byte(input), &out) + if tc.Result == nil { + assert.Error(t, err) + t.Logf("%#v", out) + } else { + assert.Equal(t, *tc.Result, out.FieldName) + } + }) + } + +} diff --git a/daemon/job/active.go b/daemon/job/active.go index 907f4be..d70184f 100644 --- a/daemon/job/active.go +++ b/daemon/job/active.go @@ -17,10 +17,12 @@ import ( "github.com/zrepl/zrepl/daemon/snapper" "github.com/zrepl/zrepl/endpoint" "github.com/zrepl/zrepl/replication" + "github.com/zrepl/zrepl/replication/driver" + "github.com/zrepl/zrepl/replication/logic" + "github.com/zrepl/zrepl/replication/report" "github.com/zrepl/zrepl/rpc" "github.com/zrepl/zrepl/transport" "github.com/zrepl/zrepl/transport/fromconfig" - "github.com/zrepl/zrepl/util/envconst" "github.com/zrepl/zrepl/zfs" ) @@ -53,7 +55,7 @@ type activeSideTasks struct { state ActiveSideState // valid for state ActiveSideReplicating, ActiveSidePruneSender, ActiveSidePruneReceiver, ActiveSideDone - replication *replication.Replication + replicationReport driver.ReportFunc replicationCancel context.CancelFunc // valid for state ActiveSidePruneSender, ActiveSidePruneReceiver, ActiveSideDone @@ -79,7 +81,7 @@ func (a *ActiveSide) updateTasks(u func(*activeSideTasks)) activeSideTasks { type activeMode interface { ConnectEndpoints(rpcLoggers rpc.Loggers, connecter transport.Connecter) DisconnectEndpoints() - SenderReceiver() (replication.Sender, replication.Receiver) + SenderReceiver() (logic.Sender, logic.Receiver) Type() Type RunPeriodic(ctx context.Context, wakeUpCommon chan<- struct{}) ResetConnectBackoff() @@ -111,7 +113,7 @@ func (m *modePush) DisconnectEndpoints() { m.receiver = nil } -func (m *modePush) SenderReceiver() (replication.Sender, replication.Receiver) { +func (m *modePush) SenderReceiver() (logic.Sender, logic.Receiver) { m.setupMtx.Lock() defer m.setupMtx.Unlock() return m.sender, m.receiver @@ -151,7 +153,7 @@ type modePull struct { receiver *endpoint.Receiver sender *rpc.Client rootFS *zfs.DatasetPath - interval time.Duration + interval config.PositiveDurationOrManual } func (m *modePull) ConnectEndpoints(loggers rpc.Loggers, connecter transport.Connecter) { @@ -172,7 +174,7 @@ func (m *modePull) DisconnectEndpoints() { m.receiver = nil } -func (m *modePull) SenderReceiver() (replication.Sender, replication.Receiver) { +func (m *modePull) SenderReceiver() (logic.Sender, logic.Receiver) { m.setupMtx.Lock() defer m.setupMtx.Unlock() return m.sender, m.receiver @@ -181,7 +183,12 @@ func (m *modePull) SenderReceiver() (replication.Sender, replication.Receiver) { func (*modePull) Type() Type { return TypePull } func (m *modePull) RunPeriodic(ctx context.Context, wakeUpCommon chan<- struct{}) { - t := time.NewTicker(m.interval) + if m.interval.Manual { + GetLogger(ctx).Info("manual pull configured, periodic pull disabled") + // "waiting for wakeups" is printed in common ActiveSide.do + return + } + t := time.NewTicker(m.interval.Interval) defer t.Stop() for { select { @@ -210,9 +217,6 @@ func (m *modePull) ResetConnectBackoff() { func modePullFromConfig(g *config.Global, in *config.PullJob) (m *modePull, err error) { m = &modePull{} - if in.Interval <= 0 { - return nil, errors.New("interval must be positive") - } m.interval = in.Interval m.rootFS, err = zfs.NewDatasetPath(in.RootFS) @@ -274,7 +278,7 @@ func (j *ActiveSide) RegisterMetrics(registerer prometheus.Registerer) { func (j *ActiveSide) Name() string { return j.name } type ActiveSideStatus struct { - Replication *replication.Report + Replication *report.Report PruningSender, PruningReceiver *pruner.Report } @@ -283,8 +287,8 @@ func (j *ActiveSide) Status() *Status { s := &ActiveSideStatus{} t := j.mode.Type() - if tasks.replication != nil { - s.Replication = tasks.replication.Report() + if tasks.replicationReport != nil { + s.Replication = tasks.replicationReport() } if tasks.prunerSender != nil { s.PruningSender = tasks.prunerSender.Report() @@ -345,78 +349,6 @@ func (j *ActiveSide) do(ctx context.Context) { } }() - // The code after this watchdog goroutine is sequential and transitions the state from - // ActiveSideReplicating -> ActiveSidePruneSender -> ActiveSidePruneReceiver -> ActiveSideDone - // If any of those sequential tasks 'gets stuck' (livelock, no progress), the watchdog will eventually - // cancel its context. - // If the task is written to support context cancellation, it will return immediately (in permanent error state), - // and the sequential code above transitions to the next state. - go func() { - - wdto := envconst.Duration("ZREPL_JOB_WATCHDOG_TIMEOUT", 10*time.Minute) - jitter := envconst.Duration("ZREPL_JOB_WATCHDOG_JITTER", 1*time.Second) - // shadowing! - log := log.WithField("watchdog_timeout", wdto.String()) - - log.Debug("starting watchdog") - defer log.Debug("watchdog stopped") - - t := time.NewTicker(wdto) - defer t.Stop() - - for { - select { - case <-ctx.Done(): - return - case <-t.C: // fall - } - - j.updateTasks(func(tasks *activeSideTasks) { - // Since cancelling a task will cause the sequential code to transition to the next state immediately, - // we cannot check for its progress right then (no fallthrough). - // Instead, we return (not continue because we are in a closure) and give the new state another - // ZREPL_JOB_WATCHDOG_TIMEOUT interval to try make some progress. - - log.WithField("state", tasks.state).Debug("watchdog firing") - - const WATCHDOG_ENVCONST_NOTICE = " (adjust ZREPL_JOB_WATCHDOG_TIMEOUT env variable if inappropriate)" - - switch tasks.state { - case ActiveSideReplicating: - log.WithField("replication_progress", tasks.replication.Progress.String()). - Debug("check replication progress") - if tasks.replication.Progress.CheckTimeout(wdto, jitter) { - log.Error("replication did not make progress, cancelling" + WATCHDOG_ENVCONST_NOTICE) - tasks.replicationCancel() - return - } - case ActiveSidePruneSender: - log.WithField("prune_sender_progress", tasks.replication.Progress.String()). - Debug("check pruner_sender progress") - if tasks.prunerSender.Progress.CheckTimeout(wdto, jitter) { - log.Error("pruner_sender did not make progress, cancelling" + WATCHDOG_ENVCONST_NOTICE) - tasks.prunerSenderCancel() - return - } - case ActiveSidePruneReceiver: - log.WithField("prune_receiver_progress", tasks.replication.Progress.String()). - Debug("check pruner_receiver progress") - if tasks.prunerReceiver.Progress.CheckTimeout(wdto, jitter) { - log.Error("pruner_receiver did not make progress, cancelling" + WATCHDOG_ENVCONST_NOTICE) - tasks.prunerReceiverCancel() - return - } - case ActiveSideDone: - // ignore, ctx will be Done() in a few milliseconds and the watchdog will exit - default: - log.WithField("state", tasks.state). - Error("watchdog implementation error: unknown active side state") - } - }) - - } - }() - sender, receiver := j.mode.SenderReceiver() { @@ -426,16 +358,19 @@ func (j *ActiveSide) do(ctx context.Context) { default: } ctx, repCancel := context.WithCancel(ctx) - tasks := j.updateTasks(func(tasks *activeSideTasks) { + var repWait driver.WaitFunc + j.updateTasks(func(tasks *activeSideTasks) { // reset it *tasks = activeSideTasks{} tasks.replicationCancel = repCancel - tasks.replication = replication.NewReplication(j.promRepStateSecs, j.promBytesReplicated) + tasks.replicationReport, repWait = replication.Do( + ctx, logic.NewPlanner(j.promRepStateSecs, j.promBytesReplicated, sender, receiver), + ) tasks.state = ActiveSideReplicating }) log.Info("start replication") - tasks.replication.Drive(ctx, sender, receiver) - repCancel() // always cancel to free up context resources + repWait(true) // wait blocking + repCancel() // always cancel to free up context resources } { diff --git a/daemon/logging/build_logging.go b/daemon/logging/build_logging.go index ce90d3c..52b7e15 100644 --- a/daemon/logging/build_logging.go +++ b/daemon/logging/build_logging.go @@ -15,7 +15,7 @@ import ( "github.com/zrepl/zrepl/daemon/snapper" "github.com/zrepl/zrepl/endpoint" "github.com/zrepl/zrepl/logger" - "github.com/zrepl/zrepl/replication" + "github.com/zrepl/zrepl/replication/driver" "github.com/zrepl/zrepl/rpc" "github.com/zrepl/zrepl/rpc/transportmux" "github.com/zrepl/zrepl/tlsconf" @@ -79,7 +79,7 @@ const ( ) func WithSubsystemLoggers(ctx context.Context, log logger.Logger) context.Context { - ctx = replication.WithLogger(ctx, log.WithField(SubsysField, SubsysReplication)) + ctx = driver.WithLogger(ctx, log.WithField(SubsysField, SubsysReplication)) ctx = endpoint.WithLogger(ctx, log.WithField(SubsysField, SubsyEndpoint)) ctx = pruner.WithLogger(ctx, log.WithField(SubsysField, SubsysPruning)) ctx = snapper.WithLogger(ctx, log.WithField(SubsysField, SubsysSnapshot)) diff --git a/daemon/pruner/pruner.go b/daemon/pruner/pruner.go index ece616b..f5de9bf 100644 --- a/daemon/pruner/pruner.go +++ b/daemon/pruner/pruner.go @@ -8,10 +8,9 @@ import ( "github.com/zrepl/zrepl/config" "github.com/zrepl/zrepl/logger" "github.com/zrepl/zrepl/pruning" - "github.com/zrepl/zrepl/replication/pdu" + "github.com/zrepl/zrepl/replication/logic/pdu" "github.com/zrepl/zrepl/util/envconst" "github.com/zrepl/zrepl/util/watchdog" - "net" "sort" "strings" "sync" @@ -21,6 +20,7 @@ import ( // Try to keep it compatible with gitub.com/zrepl/zrepl/endpoint.Endpoint type History interface { ReplicationCursor(ctx context.Context, req *pdu.ReplicationCursorReq) (*pdu.ReplicationCursorRes, error) + ListFilesystems(ctx context.Context, req *pdu.ListFilesystemReq) (*pdu.ListFilesystemRes, error) } // Try to keep it compatible with gitub.com/zrepl/zrepl/endpoint.Endpoint @@ -66,8 +66,7 @@ type Pruner struct { state State - // State ErrWait|ErrPerm - sleepUntil time.Time + // State PlanErr err error // State Exec @@ -206,71 +205,43 @@ type State int const ( Plan State = 1 << iota - PlanWait + PlanErr Exec - ExecWait - ErrPerm + ExecErr Done ) -func (s State) statefunc() state { - var statemap = map[State]state{ - Plan: statePlan, - PlanWait: statePlanWait, - Exec: stateExec, - ExecWait: stateExecWait, - ErrPerm: nil, - Done: nil, - } - return statemap[s] -} - -func (s State) IsTerminal() bool { - return s.statefunc() == nil -} - -type updater func(func(*Pruner)) State -type state func(args *args, u updater) state +type updater func(func(*Pruner)) func (p *Pruner) Prune() { p.prune(p.args) } func (p *Pruner) prune(args args) { - s := p.state.statefunc() - for s != nil { - pre := p.state - s = s(&args, func(f func(*Pruner)) State { + u := func(f func(*Pruner)) { p.mtx.Lock() defer p.mtx.Unlock() f(p) - return p.state - }) - post := p.state - GetLogger(args.ctx). - WithField("transition", fmt.Sprintf("%s=>%s", pre, post)). - Debug("state transition") - if err := p.Error(); err != nil { - GetLogger(args.ctx). - WithError(p.err). - WithField("state", post.String()). - Error("entering error state after error") } + // TODO support automatic retries + // It is advisable to merge this code with package replication/driver before + // That will likely require re-modelling struct fs like replication/driver.attempt, + // including figuring out how to resume a plan after being interrupted by network errors + // The non-retrying code in this package should move straight to replication/logic. + doOneAttempt(&args, u) } -} type Report struct { - State string - SleepUntil time.Time - Error string + State string + Error string Pending, Completed []FSReport } type FSReport struct { - Filesystem string + Filesystem string SnapshotList, DestroyList []SnapshotReport - ErrorCount int - LastError string + SkipReason FSSkipReason + LastError string } type SnapshotReport struct { @@ -285,14 +256,9 @@ func (p *Pruner) Report() *Report { r := Report{State: p.state.String()} - if p.state & (PlanWait|ExecWait) != 0 { - r.SleepUntil = p.sleepUntil - } - if p.state & (PlanWait|ExecWait|ErrPerm) != 0 { if p.err != nil { r.Error = p.err.Error() } - } if p.execQueue != nil { r.Pending, r.Completed = p.execQueue.Report() @@ -307,20 +273,16 @@ func (p *Pruner) State() State { return p.state } -func (p *Pruner) Error() error { - p.mtx.Lock() - defer p.mtx.Unlock() - if p.state & (PlanWait|ExecWait|ErrPerm) != 0 { - return p.err - } - return nil -} - type fs struct { path string // permanent error during planning - planErr error + planErr error + planErrContext string + + // if != "", the fs was skipped for planning and the field + // contains the reason + skipReason FSSkipReason // snapshots presented by target // (type snapshot) @@ -333,8 +295,18 @@ type fs struct { // only during Exec state, also used by execQueue execErrLast error - execErrCount int +} +type FSSkipReason string + +const ( + NotSkipped = "" + SkipPlaceholder = "filesystem is placeholder" + SkipNoCorrespondenceOnSender = "filesystem has no correspondence on sender" +) + +func (r FSSkipReason) NotSkipped() bool { + return r == NotSkipped } func (f *fs) Report() FSReport { @@ -343,7 +315,11 @@ func (f *fs) Report() FSReport { r := FSReport{} r.Filesystem = f.path - r.ErrorCount = f.execErrCount + r.SkipReason = f.skipReason + if !r.SkipReason.NotSkipped() { + return r + } + if f.planErr != nil { r.LastError = f.planErr.Error() } else if f.execErrLast != nil { @@ -385,39 +361,7 @@ func (s snapshot) Replicated() bool { return s.replicated } func (s snapshot) Date() time.Time { return s.date } -type Error interface { - error - Temporary() bool -} - -var _ Error = net.Error(nil) - -func shouldRetry(e error) bool { - if neterr, ok := e.(net.Error); ok { - return neterr.Temporary() - } - return false -} - -func onErr(u updater, e error) state { - return u(func(p *Pruner) { - p.err = e - if !shouldRetry(e) { - p.state = ErrPerm - return - } - switch p.state { - case Plan: - p.state = PlanWait - case Exec: - p.state = ExecWait - default: - panic(p.state) - } - }).statefunc() -} - -func statePlan(a *args, u updater) state { +func doOneAttempt(a *args, u updater) { ctx, target, receiver := a.ctx, a.target, a.receiver var ka *watchdog.KeepAlive @@ -425,28 +369,62 @@ func statePlan(a *args, u updater) state { ka = &pruner.Progress }) + sfssres, err := receiver.ListFilesystems(ctx, &pdu.ListFilesystemReq{}) + if err != nil { + u(func(p *Pruner) { + p.state = PlanErr + p.err = err + }) + return + } + sfss := make(map[string]*pdu.Filesystem) + for _, sfs := range sfssres.GetFilesystems() { + sfss[sfs.GetPath()] = sfs + } + tfssres, err := target.ListFilesystems(ctx, &pdu.ListFilesystemReq{}) if err != nil { - return onErr(u, err) + u(func(p *Pruner) { + p.state = PlanErr + p.err = err + }) + return } tfss := tfssres.GetFilesystems() pfss := make([]*fs, len(tfss)) +tfss_loop: for i, tfs := range tfss { l := GetLogger(ctx).WithField("fs", tfs.Path) l.Debug("plan filesystem") - pfs := &fs{ - path: tfs.Path, + path: tfs.Path, } pfss[i] = pfs + if tfs.GetIsPlaceholder() { + pfs.skipReason = SkipPlaceholder + l.WithField("skip_reason", pfs.skipReason).Debug("skipping filesystem") + continue + } else if sfs := sfss[tfs.GetPath()]; sfs == nil { + pfs.skipReason = SkipNoCorrespondenceOnSender + l.WithField("skip_reason", pfs.skipReason).WithField("sfs", sfs.GetPath()).Debug("skipping filesystem") + continue + } + + pfsPlanErrAndLog := func(err error, message string) { + t := fmt.Sprintf("%T", err) + pfs.planErr = err + pfs.planErrContext = message + l.WithField("orig_err_type", t).WithError(err).Error(fmt.Sprintf("%s: plan error, skipping filesystem", message)) + } + tfsvsres, err := target.ListFilesystemVersions(ctx, &pdu.ListFilesystemVersionsReq{Filesystem: tfs.Path}) if err != nil { - l.WithError(err).Error("cannot list filesystem versions") - return onErr(u, err) + pfsPlanErrAndLog(err, "cannot list filesystem versions") + continue tfss_loop } tfsvs := tfsvsres.GetVersions() // no progress here since we could run in a live-lock (must have used target AND receiver before progress) @@ -455,24 +433,22 @@ func statePlan(a *args, u updater) state { rcReq := &pdu.ReplicationCursorReq{ Filesystem: tfs.Path, - Op: &pdu.ReplicationCursorReq_Get{ + Op: &pdu.ReplicationCursorReq_Get{ Get: &pdu.ReplicationCursorReq_GetOp{}, }, } rc, err := receiver.ReplicationCursor(ctx, rcReq) if err != nil { - l.WithError(err).Error("cannot get replication cursor") - return onErr(u, err) + pfsPlanErrAndLog(err, "cannot get replication cursor bookmark") + continue tfss_loop } ka.MadeProgress() - if rc.GetNotexist() { - l.Error("replication cursor does not exist, skipping") - pfs.destroyList = []pruning.Snapshot{} - pfs.planErr = fmt.Errorf("replication cursor bookmark does not exist (one successful replication is required before pruning works)") - continue + if rc.GetNotexist() { + err := errors.New("replication cursor bookmark does not exist (one successful replication is required before pruning works)") + pfsPlanErrAndLog(err, "") + continue tfss_loop } - // scan from older to newer, all snapshots older than cursor are interpreted as replicated sort.Slice(tfsvs, func(i, j int) bool { return tfsvs[i].CreateTXG < tfsvs[j].CreateTXG @@ -494,11 +470,9 @@ func statePlan(a *args, u updater) state { } creation, err := tfsv.CreationAsTime() if err != nil { - err := fmt.Errorf("%s%s has invalid creation date: %s", tfs, tfsv.RelName(), err) - l.WithError(err). - WithField("tfsv", tfsv.RelName()). - Error("error with fileesystem version") - return onErr(u, err) + err := fmt.Errorf("%s: %s", tfsv.RelName(), err) + pfsPlanErrAndLog(err, "fs version with invalid creation date") + continue tfss_loop } // note that we cannot use CreateTXG because target and receiver could be on different pools atCursor := tfsv.Guid == rc.GetGuid() @@ -510,9 +484,8 @@ func statePlan(a *args, u updater) state { }) } if preCursor { - err := fmt.Errorf("replication cursor not found in prune target filesystem versions") - l.Error(err.Error()) - return onErr(u, err) + pfsPlanErrAndLog(fmt.Errorf("replication cursor not found in prune target filesystem versions"), "") + continue tfss_loop } // Apply prune rules @@ -520,34 +493,56 @@ func statePlan(a *args, u updater) state { ka.MadeProgress() } - return u(func(pruner *Pruner) { + u(func(pruner *Pruner) { pruner.Progress.MadeProgress() pruner.execQueue = newExecQueue(len(pfss)) for _, pfs := range pfss { pruner.execQueue.Put(pfs, nil, false) } pruner.state = Exec - }).statefunc() -} - -func stateExec(a *args, u updater) state { + }) + for { var pfs *fs - state := u(func(pruner *Pruner) { + u(func(pruner *Pruner) { pfs = pruner.execQueue.Pop() + }) if pfs == nil { - nextState := Done - if pruner.execQueue.HasCompletedFSWithErrors() { - nextState = ErrPerm + break + } + doOneAttemptExec(a, u, pfs) + } + + var rep *Report + { + // must not hold lock for report + var pruner *Pruner + u(func(p *Pruner) { + pruner = p + }) + rep = pruner.Report() + } + u(func(p *Pruner) { + if len(rep.Pending) > 0 { + panic("queue should not have pending items at this point") + } + hadErr := false + for _, fsr := range rep.Completed { + hadErr = hadErr || fsr.SkipReason.NotSkipped() && fsr.LastError != "" } - pruner.state = nextState - return + if hadErr { + p.state = ExecErr + } else { + p.state = Done } }) - if state != Exec { - return state.statefunc() + + } +// attempts to exec pfs, puts it back into the queue with the result +func doOneAttemptExec(a *args, u updater, pfs *fs) { + destroyList := make([]*pdu.FilesystemVersion, len(pfs.destroyList)) for i := range destroyList { destroyList[i] = pfs.destroyList[i].(snapshot).fsv @@ -566,7 +561,7 @@ func stateExec(a *args, u updater) state { u(func(pruner *Pruner) { pruner.execQueue.Put(pfs, err, false) }) - return onErr(u, err) + return } // check if all snapshots were destroyed destroyResults := make(map[string]*pdu.DestroySnapshotRes) @@ -607,31 +602,6 @@ func stateExec(a *args, u updater) state { }) if err != nil { GetLogger(a.ctx).WithError(err).Error("target could not destroy snapshots") - return onErr(u, err) - } - - return u(func(pruner *Pruner) { - pruner.Progress.MadeProgress() - }).statefunc() -} - -func stateExecWait(a *args, u updater) state { - return doWait(Exec, a, u) -} - -func statePlanWait(a *args, u updater) state { - return doWait(Plan, a, u) -} - -func doWait(goback State, a *args, u updater) state { - timer := time.NewTimer(a.retryWait) - defer timer.Stop() - select { - case <-timer.C: - return u(func(pruner *Pruner) { - pruner.state = goback - }).statefunc() - case <-a.ctx.Done(): - return onErr(u, a.ctx.Err()) + return } } diff --git a/daemon/pruner/pruner_queue.go b/daemon/pruner/pruner_queue.go index 063bcf7..840e93b 100644 --- a/daemon/pruner/pruner_queue.go +++ b/daemon/pruner/pruner_queue.go @@ -58,10 +58,7 @@ func (q *execQueue) Pop() *fs { func(q *execQueue) Put(fs *fs, err error, done bool) { fs.mtx.Lock() fs.execErrLast = err - if err != nil { - fs.execErrCount++ - } - if done || (err != nil && !shouldRetry(fs.execErrLast)) { + if done || err != nil { fs.mtx.Unlock() q.mtx.Lock() q.completed = append(q.completed, fs) @@ -78,9 +75,6 @@ func(q *execQueue) Put(fs *fs, err error, done bool) { defer q.pending[i].mtx.Unlock() q.pending[j].mtx.Lock() defer q.pending[j].mtx.Unlock() - if q.pending[i].execErrCount != q.pending[j].execErrCount { - return q.pending[i].execErrCount < q.pending[j].execErrCount - } return strings.Compare(q.pending[i].path, q.pending[j].path) == -1 }) q.mtx.Unlock() diff --git a/daemon/pruner/pruner_test.go b/daemon/pruner/pruner_test.go deleted file mode 100644 index 23a10e8..0000000 --- a/daemon/pruner/pruner_test.go +++ /dev/null @@ -1,206 +0,0 @@ -package pruner - -import ( - "context" - "fmt" - "github.com/stretchr/testify/assert" - "github.com/zrepl/zrepl/logger" - "github.com/zrepl/zrepl/pruning" - "github.com/zrepl/zrepl/replication/pdu" - "net" - "testing" - "time" -) - -type mockFS struct { - path string - snaps []string -} - -func (m *mockFS) Filesystem() *pdu.Filesystem { - return &pdu.Filesystem{ - Path: m.path, - } -} - -func (m *mockFS) FilesystemVersions() []*pdu.FilesystemVersion { - versions := make([]*pdu.FilesystemVersion, len(m.snaps)) - for i, v := range m.snaps { - versions[i] = &pdu.FilesystemVersion{ - Type: pdu.FilesystemVersion_Snapshot, - Name: v, - Creation: pdu.FilesystemVersionCreation(time.Unix(0, 0)), - Guid: uint64(i), - } - } - return versions -} - -type mockTarget struct { - fss []mockFS - destroyed map[string][]string - listVersionsErrs map[string][]error - listFilesystemsErr []error - destroyErrs map[string][]error -} - -func (t *mockTarget) ListFilesystems(ctx context.Context, req *pdu.ListFilesystemReq) (*pdu.ListFilesystemRes, error) { - if len(t.listFilesystemsErr) > 0 { - e := t.listFilesystemsErr[0] - t.listFilesystemsErr = t.listFilesystemsErr[1:] - return nil, e - } - fss := make([]*pdu.Filesystem, len(t.fss)) - for i := range fss { - fss[i] = t.fss[i].Filesystem() - } - return &pdu.ListFilesystemRes{Filesystems: fss}, nil -} - -func (t *mockTarget) ListFilesystemVersions(ctx context.Context, req *pdu.ListFilesystemVersionsReq) (*pdu.ListFilesystemVersionsRes, error) { - fs := req.Filesystem - if len(t.listVersionsErrs[fs]) != 0 { - e := t.listVersionsErrs[fs][0] - t.listVersionsErrs[fs] = t.listVersionsErrs[fs][1:] - return nil, e - } - - for _, mfs := range t.fss { - if mfs.path != fs { - continue - } - return &pdu.ListFilesystemVersionsRes{Versions: mfs.FilesystemVersions()}, nil - } - return nil, fmt.Errorf("filesystem %s does not exist", fs) -} - -func (t *mockTarget) DestroySnapshots(ctx context.Context, req *pdu.DestroySnapshotsReq) (*pdu.DestroySnapshotsRes, error) { - fs, snaps := req.Filesystem, req.Snapshots - if len(t.destroyErrs[fs]) != 0 { - e := t.destroyErrs[fs][0] - t.destroyErrs[fs] = t.destroyErrs[fs][1:] - return nil, e - } - destroyed := t.destroyed[fs] - res := make([]*pdu.DestroySnapshotRes, len(snaps)) - for i, s := range snaps { - destroyed = append(destroyed, s.Name) - res[i] = &pdu.DestroySnapshotRes{Error: "", Snapshot: s} - } - t.destroyed[fs] = destroyed - return &pdu.DestroySnapshotsRes{Results: res}, nil -} - -type mockCursor struct { - snapname string - guid uint64 -} -type mockHistory struct { - errs map[string][]error - cursors map[string]*mockCursor -} - -func (r *mockHistory) ReplicationCursor(ctx context.Context, req *pdu.ReplicationCursorReq) (*pdu.ReplicationCursorRes, error) { - fs := req.Filesystem - if len(r.errs[fs]) > 0 { - e := r.errs[fs][0] - r.errs[fs] = r.errs[fs][1:] - return nil, e - } - return &pdu.ReplicationCursorRes{Result: &pdu.ReplicationCursorRes_Guid{Guid: 0}}, nil -} - -type stubNetErr struct { - msg string - temporary, timeout bool -} - -var _ net.Error = stubNetErr{} - -func (e stubNetErr) Error() string { - return e.msg -} - -func (e stubNetErr) Temporary() bool { return e.temporary } - -func (e stubNetErr) Timeout() bool { return e.timeout } - -func TestPruner_Prune(t *testing.T) { - - var _ net.Error = &net.OpError{} // we use it below - target := &mockTarget{ - listFilesystemsErr: []error{ - stubNetErr{msg: "fakerror0", temporary: true}, - }, - listVersionsErrs: map[string][]error{ - "zroot/foo": { - stubNetErr{msg: "fakeerror1", temporary: true}, - stubNetErr{msg: "fakeerror2", temporary: true,}, - }, - }, - destroyErrs: map[string][]error{ - "zroot/baz": { - stubNetErr{msg: "fakeerror3", temporary: true}, // first error puts it back in the queue - stubNetErr{msg:"permanent error"}, // so it will be last when pruner gives up due to permanent err - }, - }, - destroyed: make(map[string][]string), - fss: []mockFS{ - { - path: "zroot/foo", - snaps: []string{ - "keep_a", - "keep_b", - "drop_c", - "keep_d", - }, - }, - { - path: "zroot/bar", - snaps: []string{ - "keep_e", - "keep_f", - "drop_g", - }, - }, - { - path: "zroot/baz", - snaps: []string{ - "keep_h", - "drop_i", - }, - }, - }, - } - history := &mockHistory{ - errs: map[string][]error{ - "zroot/foo": { - stubNetErr{msg: "fakeerror4", temporary: true}, - }, - }, - } - - keepRules := []pruning.KeepRule{pruning.MustKeepRegex("^keep", false)} - - p := Pruner{ - args: args{ - ctx: WithLogger(context.Background(), logger.NewTestLogger(t)), - target: target, - receiver: history, - rules: keepRules, - retryWait: 10*time.Millisecond, - }, - state: Plan, - } - p.Prune() - - exp := map[string][]string{ - "zroot/foo": {"drop_c"}, - "zroot/bar": {"drop_g"}, - } - - assert.Equal(t, exp, target.destroyed) - - //assert.Equal(t, map[string][]error{}, target.listVersionsErrs, "retried") - -} diff --git a/daemon/pruner/state_enumer.go b/daemon/pruner/state_enumer.go index 8c396ab..0a616ea 100644 --- a/daemon/pruner/state_enumer.go +++ b/daemon/pruner/state_enumer.go @@ -7,19 +7,17 @@ import ( ) const ( - _StateName_0 = "PlanPlanWait" + _StateName_0 = "PlanPlanErr" _StateName_1 = "Exec" - _StateName_2 = "ExecWait" - _StateName_3 = "ErrPerm" - _StateName_4 = "Done" + _StateName_2 = "ExecErr" + _StateName_3 = "Done" ) var ( - _StateIndex_0 = [...]uint8{0, 4, 12} + _StateIndex_0 = [...]uint8{0, 4, 11} _StateIndex_1 = [...]uint8{0, 4} - _StateIndex_2 = [...]uint8{0, 8} - _StateIndex_3 = [...]uint8{0, 7} - _StateIndex_4 = [...]uint8{0, 4} + _StateIndex_2 = [...]uint8{0, 7} + _StateIndex_3 = [...]uint8{0, 4} ) func (i State) String() string { @@ -33,22 +31,19 @@ func (i State) String() string { return _StateName_2 case i == 16: return _StateName_3 - case i == 32: - return _StateName_4 default: return fmt.Sprintf("State(%d)", i) } } -var _StateValues = []State{1, 2, 4, 8, 16, 32} +var _StateValues = []State{1, 2, 4, 8, 16} var _StateNameToValueMap = map[string]State{ _StateName_0[0:4]: 1, - _StateName_0[4:12]: 2, + _StateName_0[4:11]: 2, _StateName_1[0:4]: 4, - _StateName_2[0:8]: 8, - _StateName_3[0:7]: 16, - _StateName_4[0:4]: 32, + _StateName_2[0:7]: 8, + _StateName_3[0:4]: 16, } // StateString retrieves an enum value from the enum constants string name. diff --git a/docs/configuration/jobs.rst b/docs/configuration/jobs.rst index 3468c98..cadf392 100644 --- a/docs/configuration/jobs.rst +++ b/docs/configuration/jobs.rst @@ -232,7 +232,8 @@ Job Type ``pull`` - ZFS dataset path are received to ``$root_fs/$client_identity`` * - ``interval`` - - Interval at which to pull from the source job + - | Interval at which to pull from the source job (e.g. ``10m``). + | ``manual`` disables periodic pulling, replication then only happens on :ref:`wakeup `. * - ``pruning`` - |pruning-spec| diff --git a/docs/usage.rst b/docs/usage.rst index 9d086a0..4bb2f76 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -13,6 +13,8 @@ CLI Overview The zrepl binary is self-documenting: run ``zrepl help`` for an overview of the available subcommands or ``zrepl SUBCOMMAND --help`` for information on available flags, etc. +.. _cli-signal-wakeup: + .. list-table:: :widths: 30 70 :header-rows: 1 diff --git a/endpoint/endpoint.go b/endpoint/endpoint.go index b90f3f7..d64ca88 100644 --- a/endpoint/endpoint.go +++ b/endpoint/endpoint.go @@ -7,8 +7,7 @@ import ( "path" "github.com/pkg/errors" - "github.com/zrepl/zrepl/replication" - "github.com/zrepl/zrepl/replication/pdu" + "github.com/zrepl/zrepl/replication/logic/pdu" "github.com/zrepl/zrepl/zfs" ) @@ -34,7 +33,7 @@ func (s *Sender) filterCheckFS(fs string) (*zfs.DatasetPath, error) { return nil, err } if !pass { - return nil, replication.NewFilteredError(fs) + return nil, fmt.Errorf("endpoint does not allow access to filesystem %s", fs) } return dp, nil } @@ -49,9 +48,10 @@ func (s *Sender) ListFilesystems(ctx context.Context, r *pdu.ListFilesystemReq) rfss[i] = &pdu.Filesystem{ Path: fss[i].ToString(), // FIXME: not supporting ResumeToken yet + IsPlaceholder: false, // sender FSs are never placeholders } } - res := &pdu.ListFilesystemRes{Filesystems: rfss, Empty: len(rfss) == 0} + res := &pdu.ListFilesystemRes{Filesystems: rfss} return res, nil } @@ -108,6 +108,21 @@ func (p *Sender) DestroySnapshots(ctx context.Context, req *pdu.DestroySnapshots return doDestroySnapshots(ctx, dp, req.Snapshots) } +func (p *Sender) Ping(ctx context.Context, req *pdu.PingReq) (*pdu.PingRes, error) { + res := pdu.PingRes{ + Echo: req.GetMessage(), + } + return &res, nil +} + +func (p *Sender) PingDataconn(ctx context.Context, req *pdu.PingReq) (*pdu.PingRes, error) { + return p.Ping(ctx, req) +} + +func (p *Sender) WaitForConnectivity(ctx context.Context) error { + return nil +} + func (p *Sender) ReplicationCursor(ctx context.Context, req *pdu.ReplicationCursorReq) (*pdu.ReplicationCursorRes, error) { dp, err := p.filterCheckFS(req.Filesystem) if err != nil { @@ -229,7 +244,7 @@ func (s *Receiver) ListFilesystems(ctx context.Context, req *pdu.ListFilesystemR if err != nil { return nil, err } - // present without prefix, and only those that are not placeholders + // present filesystem without the root_fs prefix fss := make([]*pdu.Filesystem, 0, len(filtered)) for _, a := range filtered { ph, err := zfs.ZFSIsPlaceholderFilesystem(a) @@ -240,21 +255,16 @@ func (s *Receiver) ListFilesystems(ctx context.Context, req *pdu.ListFilesystemR Error("inconsistent placeholder property") return nil, errors.New("server error: inconsistent placeholder property") // don't leak path } - if ph { - getLogger(ctx). - WithField("fs", a.ToString()). - Debug("ignoring placeholder filesystem") - continue - } getLogger(ctx). WithField("fs", a.ToString()). - Debug("non-placeholder filesystem") + WithField("is_placeholder", ph). + Debug("filesystem") a.TrimPrefix(root) - fss = append(fss, &pdu.Filesystem{Path: a.ToString()}) + fss = append(fss, &pdu.Filesystem{Path: a.ToString(), IsPlaceholder: ph}) } if len(fss) == 0 { - getLogger(ctx).Debug("no non-placeholder filesystems") - return &pdu.ListFilesystemRes{Empty: true}, nil + getLogger(ctx).Debug("no filesystems found") + return &pdu.ListFilesystemRes{}, nil } return &pdu.ListFilesystemRes{Filesystems: fss}, nil } @@ -279,6 +289,21 @@ func (s *Receiver) ListFilesystemVersions(ctx context.Context, req *pdu.ListFile return &pdu.ListFilesystemVersionsRes{Versions: rfsvs}, nil } +func (s *Receiver) Ping(ctx context.Context, req *pdu.PingReq) (*pdu.PingRes, error) { + res := pdu.PingRes{ + Echo: req.GetMessage(), + } + return &res, nil +} + +func (s *Receiver) PingDataconn(ctx context.Context, req *pdu.PingReq) (*pdu.PingRes, error) { + return s.Ping(ctx, req) +} + +func (s *Receiver) WaitForConnectivity(ctx context.Context) error { + return nil +} + func (s *Receiver) ReplicationCursor(context.Context, *pdu.ReplicationCursorReq) (*pdu.ReplicationCursorRes, error) { return nil, fmt.Errorf("ReplicationCursor not implemented for Receiver") } @@ -324,28 +349,30 @@ func (s *Receiver) Receive(ctx context.Context, req *pdu.ReceiveReq, receive zfs getLogger(ctx).WithField("visitErr", visitErr).Debug("complete tree-walk") if visitErr != nil { - return nil, err + return nil, visitErr } - needForceRecv := false + var clearPlaceholderProperty bool + var recvOpts zfs.RecvOptions props, err := zfs.ZFSGet(lp, []string{zfs.ZREPL_PLACEHOLDER_PROPERTY_NAME}) if err == nil { if isPlaceholder, _ := zfs.IsPlaceholder(lp, props.Get(zfs.ZREPL_PLACEHOLDER_PROPERTY_NAME)); isPlaceholder { - needForceRecv = true + recvOpts.RollbackAndForceRecv = true + clearPlaceholderProperty = true + } + } + if clearPlaceholderProperty { + if err := zfs.ZFSSetNoPlaceholder(lp); err != nil { + return nil, fmt.Errorf("cannot clear placeholder property for forced receive: %s", err) } } - args := make([]string, 0, 1) - if needForceRecv { - args = append(args, "-F") - } + getLogger(ctx).WithField("opts", fmt.Sprintf("%#v", recvOpts)).Debug("start receive command") - getLogger(ctx).Debug("start receive command") - - if err := zfs.ZFSRecv(ctx, lp.ToString(), receive, args...); err != nil { + if err := zfs.ZFSRecv(ctx, lp.ToString(), receive, recvOpts); err != nil { getLogger(ctx). WithError(err). - WithField("args", args). + WithField("opts", recvOpts). Error("zfs receive failed") return nil, err } diff --git a/replication/driver/errorclass_enumer.go b/replication/driver/errorclass_enumer.go new file mode 100644 index 0000000..0a56c0e --- /dev/null +++ b/replication/driver/errorclass_enumer.go @@ -0,0 +1,50 @@ +// Code generated by "enumer -type=errorClass"; DO NOT EDIT. + +package driver + +import ( + "fmt" +) + +const _errorClassName = "errorClassUnknownerrorClassPermanenterrorClassTemporaryConnectivityRelated" + +var _errorClassIndex = [...]uint8{0, 17, 36, 74} + +func (i errorClass) String() string { + if i < 0 || i >= errorClass(len(_errorClassIndex)-1) { + return fmt.Sprintf("errorClass(%d)", i) + } + return _errorClassName[_errorClassIndex[i]:_errorClassIndex[i+1]] +} + +var _errorClassValues = []errorClass{0, 1, 2} + +var _errorClassNameToValueMap = map[string]errorClass{ + _errorClassName[0:17]: 0, + _errorClassName[17:36]: 1, + _errorClassName[36:74]: 2, +} + +// errorClassString retrieves an enum value from the enum constants string name. +// Throws an error if the param is not part of the enum. +func errorClassString(s string) (errorClass, error) { + if val, ok := _errorClassNameToValueMap[s]; ok { + return val, nil + } + return 0, fmt.Errorf("%s does not belong to errorClass values", s) +} + +// errorClassValues returns all values of the enum +func errorClassValues() []errorClass { + return _errorClassValues +} + +// IsAerrorClass returns "true" if the value is listed in the enum definition. "false" otherwise +func (i errorClass) IsAerrorClass() bool { + for _, v := range _errorClassValues { + if i == v { + return true + } + } + return false +} diff --git a/replication/driver/replication_driver.go b/replication/driver/replication_driver.go new file mode 100644 index 0000000..6a5a9ea --- /dev/null +++ b/replication/driver/replication_driver.go @@ -0,0 +1,638 @@ +package driver + +import ( + "context" + "errors" + "fmt" + "net" + "sort" + "strings" + "sync" + "time" + + "github.com/zrepl/zrepl/replication/report" + "github.com/zrepl/zrepl/util/chainlock" + "github.com/zrepl/zrepl/util/envconst" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" +) + +type interval struct { + begin time.Time + end time.Time +} + +func (w *interval) SetZero() { + w.begin = time.Time{} + w.end = time.Time{} +} + +// Duration of 0 means indefinite length +func (w *interval) Set(begin time.Time, duration time.Duration) { + if begin.IsZero() { + panic("zero begin time now allowed") + } + w.begin = begin + w.end = begin.Add(duration) +} + +// Returns the End of the interval if it has a defined length. +// For indefinite lengths, returns the zero value. +func (w *interval) End() time.Time { + return w.end +} + +// Return a context with a deadline at the interval's end. +// If the interval has indefinite length (duration 0 on Set), return ctx as is. +// The returned context.CancelFunc can be called either way. +func (w *interval) ContextWithDeadlineAtEnd(ctx context.Context) (context.Context, context.CancelFunc) { + if w.begin.IsZero() { + panic("must call Set before ContextWIthDeadlineAtEnd") + } + if w.end.IsZero() { + // indefinite length, just return context as is + return ctx, func() {} + } else { + return context.WithDeadline(ctx, w.end) + } +} + +type run struct { + l *chainlock.L + + startedAt, finishedAt time.Time + + waitReconnect interval + waitReconnectError *timedError + + // the attempts attempted so far: + // All but the last in this slice must have finished with some errors. + // The last attempt may not be finished and may not have errors. + attempts []*attempt +} + +type Planner interface { + Plan(context.Context) ([]FS, error) + WaitForConnectivity(context.Context) error +} + +// an attempt represents a single planning & execution of fs replications +type attempt struct { + planner Planner + + l *chainlock.L + + startedAt, finishedAt time.Time + + // after Planner.Plan was called, planErr and fss are mutually exclusive with regards to nil-ness + // if both are nil, it must be assumed that Planner.Plan is active + planErr *timedError + fss []*fs +} + +type timedError struct { + Err error + Time time.Time +} + +func newTimedError(err error, t time.Time) *timedError { + if err == nil { + panic("error must be non-nil") + } + if t.IsZero() { + panic("t must be non-zero") + } + return &timedError{err, t} +} + +func (e *timedError) IntoReportError() *report.TimedError { + if e == nil { + return nil + } + return report.NewTimedError(e.Err.Error(), e.Time) +} + +type FS interface { + // Returns true if this FS and fs refer to the same filesystem returned + // by Planner.Plan in a previous attempt. + EqualToPreviousAttempt(fs FS) bool + // The returned steps are assumed to be dependent on exactly + // their direct predecessors in the returned list. + PlanFS(context.Context) ([]Step, error) + ReportInfo() *report.FilesystemInfo +} + +type Step interface { + // Returns true iff the target snapshot is the same for this Step and other. + // We do not use TargetDate to avoid problems with wrong system time on + // snapshot creation. + // + // Implementations can assume that `other` is a step of the same filesystem, + // although maybe from a previous attempt. + // (`same` as defined by FS.EqualToPreviousAttempt) + // + // Note that TargetEquals should return true in a situation with one + // originally sent snapshot and a subsequent attempt's step that uses + // resumable send & recv. + TargetEquals(other Step) bool + TargetDate() time.Time + Step(context.Context) error + ReportInfo() *report.StepInfo +} + +type fs struct { + fs FS + + l *chainlock.L + + planning struct { + done bool + err *timedError + } + + // valid iff planning.done && planning.err == nil + planned struct { + // valid iff planning.done && planning.err == nil + stepErr *timedError + // all steps, in the order in which they must be completed + steps []*step + // index into steps, pointing at the step that is currently executing + // if step >= len(steps), no more work needs to be done + step int + } +} + +type step struct { + l *chainlock.L + step Step +} + +type ReportFunc func() *report.Report +type WaitFunc func(block bool) (done bool) + +var maxAttempts = envconst.Int64("ZREPL_REPLICATION_MAX_ATTEMPTS", 3) +var reconnectHardFailTimeout = envconst.Duration("ZREPL_REPLICATION_RECONNECT_HARD_FAIL_TIMEOUT", 10*time.Minute) + +func Do(ctx context.Context, planner Planner) (ReportFunc, WaitFunc) { + log := getLog(ctx) + l := chainlock.New() + run := &run{ + l: l, + startedAt: time.Now(), + } + + done := make(chan struct{}) + go func() { + defer close(done) + + defer run.l.Lock().Unlock() + log.Debug("begin run") + defer log.Debug("run ended") + var prev *attempt + mainLog := log + for ano := 0; ano < int(maxAttempts) || maxAttempts == 0; ano++ { + log := mainLog.WithField("attempt_number", ano) + log.Debug("start attempt") + + run.waitReconnect.SetZero() + run.waitReconnectError = nil + + // do current attempt + cur := &attempt{ + l: l, + startedAt: time.Now(), + planner: planner, + } + run.attempts = append(run.attempts, cur) + run.l.DropWhile(func() { + cur.do(ctx, prev) + }) + prev = cur + if ctx.Err() != nil { + log.WithError(ctx.Err()).Info("context error") + return + } + + // error classification, bail out if done / permanent error + rep := cur.report() + log.WithField("attempt_state", rep.State).Debug("attempt state") + errRep := cur.errorReport() + + if rep.State == report.AttemptDone { + log.Debug("attempt completed successfully") + break + } + + mostRecentErr, mostRecentErrClass := errRep.MostRecent() + log.WithField("most_recent_err", mostRecentErr).WithField("most_recent_err_class", mostRecentErrClass).Debug("most recent error used for re-connect decision") + if mostRecentErr == nil { + // inconsistent reporting, let's bail out + log.Warn("attempt does not report done but error report does not report errors, aborting run") + break + } + log.WithError(mostRecentErr.Err).Error("most recent error in this attempt") + shouldReconnect := mostRecentErrClass == errorClassTemporaryConnectivityRelated + log.WithField("reconnect_decision", shouldReconnect).Debug("reconnect decision made") + if shouldReconnect { + run.waitReconnect.Set(time.Now(), reconnectHardFailTimeout) + log.WithField("deadline", run.waitReconnect.End()).Error("temporary connectivity-related error identified, start waiting for reconnect") + var connectErr error + var connectErrTime time.Time + run.l.DropWhile(func() { + ctx, cancel := run.waitReconnect.ContextWithDeadlineAtEnd(ctx) + defer cancel() + connectErr = planner.WaitForConnectivity(ctx) + connectErrTime = time.Now() + }) + if connectErr == nil { + log.Error("reconnect successful") // same level as 'begin with reconnect' message above + continue + } else { + run.waitReconnectError = newTimedError(connectErr, connectErrTime) + log.WithError(connectErr).Error("reconnecting failed, aborting run") + break + } + } else { + log.Error("most recent error cannot be solved by reconnecting, aborting run") + return + } + + } + + }() + + wait := func(block bool) bool { + if block { + <-done + } + select { + case <-done: + return true + default: + return false + } + } + report := func() *report.Report { + defer run.l.Lock().Unlock() + return run.report() + } + return report, wait +} + +func (a *attempt) do(ctx context.Context, prev *attempt) { + pfss, err := a.planner.Plan(ctx) + errTime := time.Now() + defer a.l.Lock().Unlock() + if err != nil { + a.planErr = newTimedError(err, errTime) + a.fss = nil + a.finishedAt = time.Now() + return + } + + for _, pfs := range pfss { + fs := &fs{ + fs: pfs, + l: a.l, + } + a.fss = append(a.fss, fs) + } + + prevs := make(map[*fs]*fs) + { + prevFSs := make(map[*fs][]*fs, len(pfss)) + if prev != nil { + debug("previous attempt has %d fss", len(a.fss)) + for _, fs := range a.fss { + for _, prevFS := range prev.fss { + if fs.fs.EqualToPreviousAttempt(prevFS.fs) { + l := prevFSs[fs] + l = append(l, prevFS) + prevFSs[fs] = l + } + } + } + } + type inconsistency struct { + cur *fs + prevs []*fs + } + var inconsistencies []inconsistency + for cur, fss := range prevFSs { + if len(fss) > 1 { + inconsistencies = append(inconsistencies, inconsistency{cur, fss}) + } + } + sort.SliceStable(inconsistencies, func(i, j int) bool { + return inconsistencies[i].cur.fs.ReportInfo().Name < inconsistencies[j].cur.fs.ReportInfo().Name + }) + if len(inconsistencies) > 0 { + var msg strings.Builder + msg.WriteString("cannot determine filesystem correspondences between different attempts:\n") + var inconsistencyLines []string + for _, i := range inconsistencies { + var prevNames []string + for _, prev := range i.prevs { + prevNames = append(prevNames, prev.fs.ReportInfo().Name) + } + l := fmt.Sprintf(" %s => %v", i.cur.fs.ReportInfo().Name, prevNames) + inconsistencyLines = append(inconsistencyLines, l) + } + fmt.Fprintf(&msg, strings.Join(inconsistencyLines, "\n")) + now := time.Now() + a.planErr = newTimedError(errors.New(msg.String()), now) + a.fss = nil + a.finishedAt = now + return + } + for cur, fss := range prevFSs { + if len(fss) > 0 { + prevs[cur] = fss[0] + } + } + } + // invariant: prevs contains an entry for each unambigious correspondence + + stepQueue := newStepQueue() + defer stepQueue.Start(1)() // TODO parallel replication + var fssesDone sync.WaitGroup + for _, f := range a.fss { + fssesDone.Add(1) + go func(f *fs) { + defer fssesDone.Done() + f.do(ctx, stepQueue, prevs[f]) + }(f) + } + a.l.DropWhile(func() { + fssesDone.Wait() + }) + a.finishedAt = time.Now() +} + +func (fs *fs) do(ctx context.Context, pq *stepQueue, prev *fs) { + psteps, err := fs.fs.PlanFS(ctx) + errTime := time.Now() + defer fs.l.Lock().Unlock() + debug := debugPrefix("fs=%s", fs.fs.ReportInfo().Name) + fs.planning.done = true + if err != nil { + fs.planning.err = newTimedError(err, errTime) + return + } + for _, pstep := range psteps { + step := &step{ + l: fs.l, + step: pstep, + } + fs.planned.steps = append(fs.planned.steps, step) + } + debug("iniital len(fs.planned.steps) = %d", len(fs.planned.steps)) + + // for not-first attempts, only allow fs.planned.steps + // up to including the originally planned target snapshot + if prev != nil && prev.planning.done && prev.planning.err == nil { + prevUncompleted := prev.planned.steps[prev.planned.step:] + if len(prevUncompleted) == 0 { + debug("prevUncompleted is empty") + return + } + if len(fs.planned.steps) == 0 { + debug("fs.planned.steps is empty") + return + } + prevFailed := prevUncompleted[0] + curFirst := fs.planned.steps[0] + // we assume that PlanFS retries prevFailed (using curFirst) + if !prevFailed.step.TargetEquals(curFirst.step) { + debug("Targets don't match") + // Two options: + // A: planning algorithm is broken + // B: manual user intervention inbetween + // Neither way will we make progress, so let's error out + stepFmt := func(step *step) string { + r := step.report() + s := r.Info + if r.IsIncremental() { + return fmt.Sprintf("%s=>%s", s.From, s.To) + } else { + return fmt.Sprintf("full=>%s", s.To) + } + } + msg := fmt.Sprintf("last attempt's uncompleted step %s does not correspond to this attempt's first planned step %s", + stepFmt(prevFailed), stepFmt(curFirst)) + fs.planned.stepErr = newTimedError(errors.New(msg), time.Now()) + return + } + // only allow until step targets diverge + min := len(prevUncompleted) + if min > len(fs.planned.steps) { + min = len(fs.planned.steps) + } + diverge := 0 + for ; diverge < min; diverge++ { + debug("diverge compare iteration %d", diverge) + if !fs.planned.steps[diverge].step.TargetEquals(prevUncompleted[diverge].step) { + break + } + } + debug("diverge is %d", diverge) + fs.planned.steps = fs.planned.steps[0:diverge] + } + debug("post-prev-merge len(fs.planned.steps) = %d", len(fs.planned.steps)) + + for i, s := range fs.planned.steps { + var ( + err error + errTime time.Time + ) + // lock must not be held while executing step in order for reporting to work + fs.l.DropWhile(func() { + targetDate := s.step.TargetDate() + defer pq.WaitReady(fs, targetDate)() + err = s.step.Step(ctx) // no shadow + errTime = time.Now() // no shadow + }) + if err != nil { + fs.planned.stepErr = newTimedError(err, errTime) + break + } + fs.planned.step = i + 1 // fs.planned.step must be == len(fs.planned.steps) if all went OK + } +} + +// caller must hold lock l +func (r *run) report() *report.Report { + report := &report.Report{ + Attempts: make([]*report.AttemptReport, len(r.attempts)), + StartAt: r.startedAt, + FinishAt: r.finishedAt, + WaitReconnectSince: r.waitReconnect.begin, + WaitReconnectUntil: r.waitReconnect.end, + WaitReconnectError: r.waitReconnectError.IntoReportError(), + } + for i := range report.Attempts { + report.Attempts[i] = r.attempts[i].report() + } + return report +} + +// caller must hold lock l +func (a *attempt) report() *report.AttemptReport { + + r := &report.AttemptReport{ + // State is set below + Filesystems: make([]*report.FilesystemReport, len(a.fss)), + StartAt: a.startedAt, + FinishAt: a.finishedAt, + PlanError: a.planErr.IntoReportError(), + } + + for i := range r.Filesystems { + r.Filesystems[i] = a.fss[i].report() + } + + state := report.AttemptPlanning + if a.planErr != nil { + state = report.AttemptPlanningError + } else if a.fss != nil { + if a.finishedAt.IsZero() { + state = report.AttemptFanOutFSs + } else { + fsWithError := false + for _, s := range r.Filesystems { + fsWithError = fsWithError || s.Error() != nil + } + state = report.AttemptDone + if fsWithError { + state = report.AttemptFanOutError + } + } + } + r.State = state + + return r +} + +// caller must hold lock l +func (f *fs) report() *report.FilesystemReport { + state := report.FilesystemPlanningErrored + if f.planning.err == nil { + if f.planning.done { + if f.planned.stepErr != nil { + state = report.FilesystemSteppingErrored + } else if f.planned.step < len(f.planned.steps) { + state = report.FilesystemStepping + } else { + state = report.FilesystemDone + } + } else { + state = report.FilesystemPlanning + } + } + r := &report.FilesystemReport{ + Info: f.fs.ReportInfo(), + State: state, + PlanError: f.planning.err.IntoReportError(), + StepError: f.planned.stepErr.IntoReportError(), + Steps: make([]*report.StepReport, len(f.planned.steps)), + CurrentStep: f.planned.step, + } + for i := range r.Steps { + r.Steps[i] = f.planned.steps[i].report() + } + return r +} + +// caller must hold lock l +func (s *step) report() *report.StepReport { + r := &report.StepReport{ + Info: s.step.ReportInfo(), + } + return r +} + +type stepErrorReport struct { + err *timedError + step int +} + +//go:generate enumer -type=errorClass +type errorClass int + +const ( + errorClassUnknown errorClass = iota + errorClassPermanent + errorClassTemporaryConnectivityRelated +) + +type errorReport struct { + flattened []*timedError + // sorted DESCending by err time + byClass map[errorClass][]*timedError +} + +// caller must hold lock l +func (a *attempt) errorReport() *errorReport { + r := &errorReport{} + if a.planErr != nil { + r.flattened = append(r.flattened, a.planErr) + } + for _, fs := range a.fss { + if fs.planning.done && fs.planning.err != nil { + r.flattened = append(r.flattened, fs.planning.err) + } else if fs.planning.done && fs.planned.stepErr != nil { + r.flattened = append(r.flattened, fs.planned.stepErr) + } + } + + // build byClass + { + r.byClass = make(map[errorClass][]*timedError) + putClass := func(err *timedError, class errorClass) { + errs := r.byClass[class] + errs = append(errs, err) + r.byClass[class] = errs + } + for _, err := range r.flattened { + if neterr, ok := err.Err.(net.Error); ok && neterr.Temporary() { + putClass(err, errorClassTemporaryConnectivityRelated) + continue + } + if st, ok := status.FromError(err.Err); ok && st.Code() == codes.Unavailable { + // technically, codes.Unavailable could be returned by the gRPC endpoint, indicating overload, etc. + // for now, let's assume it only happens for connectivity issues, as specified in + // https://grpc.io/grpc/core/md_doc_statuscodes.html + putClass(err, errorClassTemporaryConnectivityRelated) + continue + } + putClass(err, errorClassPermanent) + } + for _, errs := range r.byClass { + sort.Slice(errs, func(i, j int) bool { + return errs[i].Time.After(errs[j].Time) // sort descendingly + }) + } + } + + return r +} + +func (r *errorReport) AnyError() *timedError { + for _, err := range r.flattened { + if err != nil { + return err + } + } + return nil +} + +func (r *errorReport) MostRecent() (err *timedError, errClass errorClass) { + for class, errs := range r.byClass { + // errs are sorted descendingly during construction + if len(errs) > 0 && (err == nil || errs[0].Time.After(err.Time)) { + err = errs[0] + errClass = class + } + } + return +} diff --git a/replication/driver/replication_driver_debug.go b/replication/driver/replication_driver_debug.go new file mode 100644 index 0000000..23f7ae7 --- /dev/null +++ b/replication/driver/replication_driver_debug.go @@ -0,0 +1,29 @@ +package driver + +import ( + "fmt" + "os" +) + +var debugEnabled bool = false + +func init() { + if os.Getenv("ZREPL_REPLICATION_DRIVER_DEBUG") != "" { + debugEnabled = true + } +} + +func debug(format string, args ...interface{}) { + if debugEnabled { + fmt.Fprintf(os.Stderr, "repl: driver: %s\n", fmt.Sprintf(format, args...)) + } +} + +type debugFunc func(format string, args ...interface{}) + +func debugPrefix(prefixFormat string, prefixFormatArgs ...interface{}) debugFunc { + prefix := fmt.Sprintf(prefixFormat, prefixFormatArgs...) + return func(format string, args ...interface{}) { + debug("%s: %s", prefix, fmt.Sprintf(format, args)) + } +} \ No newline at end of file diff --git a/replication/driver/replication_driver_logging.go b/replication/driver/replication_driver_logging.go new file mode 100644 index 0000000..ccaf7c8 --- /dev/null +++ b/replication/driver/replication_driver_logging.go @@ -0,0 +1,25 @@ +package driver + +import ( + "context" + + "github.com/zrepl/zrepl/logger" +) + +type Logger = logger.Logger + +type contexKey int + +const contexKeyLogger contexKey = iota + 1 + +func getLog(ctx context.Context) Logger { + l, ok := ctx.Value(contexKeyLogger).(Logger) + if !ok { + l = logger.NewNullLogger() + } + return l +} + +func WithLogger(ctx context.Context, log Logger) context.Context { + return context.WithValue(ctx, contexKeyLogger, log) +} diff --git a/replication/driver/replication_driver_test.go b/replication/driver/replication_driver_test.go new file mode 100644 index 0000000..650434f --- /dev/null +++ b/replication/driver/replication_driver_test.go @@ -0,0 +1,215 @@ +package driver + +import ( + "context" + "encoding/json" + "fmt" + "sort" + "sync/atomic" + "testing" + "time" + + "github.com/stretchr/testify/require" + "github.com/zrepl/zrepl/replication/report" + + "github.com/stretchr/testify/assert" + + jsondiff "github.com/yudai/gojsondiff" + jsondiffformatter "github.com/yudai/gojsondiff/formatter" +) + +type mockPlanner struct { + stepCounter uint32 + fss []FS // *mockFS +} + +func (p *mockPlanner) Plan(ctx context.Context) ([]FS, error) { + time.Sleep(1 * time.Second) + p.fss = []FS{ + &mockFS{ + &p.stepCounter, + "zroot/one", + nil, + }, + &mockFS{ + &p.stepCounter, + "zroot/two", + nil, + }, + } + return p.fss, nil +} + +func (p *mockPlanner) WaitForConnectivity(context.Context) error { + return nil +} + +type mockFS struct { + globalStepCounter *uint32 + name string + steps []Step +} + +func (f *mockFS) EqualToPreviousAttempt(other FS) bool { + return f.name == other.(*mockFS).name +} + +func (f *mockFS) PlanFS(ctx context.Context) ([]Step, error) { + if f.steps != nil { + panic("PlanFS used twice") + } + switch f.name { + case "zroot/one": + f.steps = []Step{ + &mockStep{ + fs: f, + ident: "a", + duration: 1 * time.Second, + targetDate: time.Unix(2, 0), + }, + &mockStep{ + fs: f, + ident: "b", + duration: 1 * time.Second, + targetDate: time.Unix(10, 0), + }, + &mockStep{ + fs: f, + ident: "c", + duration: 1 * time.Second, + targetDate: time.Unix(20, 0), + }, + } + case "zroot/two": + f.steps = []Step{ + &mockStep{ + fs: f, + ident: "u", + duration: 500 * time.Millisecond, + targetDate: time.Unix(15, 0), + }, + &mockStep{ + fs: f, + duration: 500 * time.Millisecond, + ident: "v", + targetDate: time.Unix(30, 0), + }, + } + default: + panic("unimplemented") + } + + return f.steps, nil +} + +func (f *mockFS) ReportInfo() *report.FilesystemInfo { + return &report.FilesystemInfo{Name: f.name} +} + +type mockStep struct { + fs *mockFS + ident string + duration time.Duration + targetDate time.Time + + // filled by method Step + globalCtr uint32 +} + +func (f *mockStep) String() string { + return fmt.Sprintf("%s{%s} targetDate=%s globalCtr=%v", f.fs.name, f.ident, f.targetDate, f.globalCtr) +} + +func (f *mockStep) Step(ctx context.Context) error { + f.globalCtr = atomic.AddUint32(f.fs.globalStepCounter, 1) + time.Sleep(f.duration) + return nil +} + +func (f *mockStep) TargetEquals(s Step) bool { + return f.ident == s.(*mockStep).ident +} + +func (f *mockStep) TargetDate() time.Time { + return f.targetDate +} + +func (f *mockStep) ReportInfo() *report.StepInfo { + return &report.StepInfo{From: f.ident, To: f.ident, BytesExpected: 100, BytesReplicated: 25} +} + +// TODO: add meaningful validation (i.e. actual checks) +// Since the stepqueue is not deterministic due to scheduler jitter, +// we cannot test for any definitive sequence of steps here. +// Such checks would further only be sensible for a non-concurrent step-queue, +// but we're going to have concurrent replication in the future. +// +// For the time being, let's just exercise the code a bit. +func TestReplication(t *testing.T) { + + ctx := context.Background() + + mp := &mockPlanner{} + getReport, wait := Do(ctx, mp) + begin := time.Now() + fireAt := []time.Duration{ + // the following values are relative to the start + 500 * time.Millisecond, // planning + 1500 * time.Millisecond, // nothing is done, a is running + 2500 * time.Millisecond, // a done, b running + 3250 * time.Millisecond, // a,b done, u running + 3750 * time.Millisecond, // a,b,u done, c running + 4750 * time.Millisecond, // a,b,u,c done, v running + 5250 * time.Millisecond, // a,b,u,c,v done + } + reports := make([]*report.Report, len(fireAt)) + for i := range fireAt { + sleepUntil := begin.Add(fireAt[i]) + time.Sleep(sleepUntil.Sub(time.Now())) + reports[i] = getReport() + // uncomment for viewing non-diffed results + // t.Logf("report @ %6.4f:\n%s", fireAt[i].Seconds(), pretty.Sprint(reports[i])) + } + waitBegin := time.Now() + wait(true) + waitDuration := time.Now().Sub(waitBegin) + assert.True(t, waitDuration < 10*time.Millisecond, "%v", waitDuration) // and that's gratious + + prev, err := json.Marshal(reports[0]) + require.NoError(t, err) + for _, r := range reports[1:] { + this, err := json.Marshal(r) + require.NoError(t, err) + differ := jsondiff.New() + diff, err := differ.Compare(prev, this) + require.NoError(t, err) + df := jsondiffformatter.NewDeltaFormatter() + _, err = df.Format(diff) + require.NoError(t, err) + // uncomment the following line to get json diffs between each captured step + // t.Logf("%s", res) + prev, err = json.Marshal(r) + require.NoError(t, err) + } + + steps := make([]*mockStep, 0) + for _, fs := range mp.fss { + for _, step := range fs.(*mockFS).steps { + steps = append(steps, step.(*mockStep)) + } + } + + // sort steps in pq order (although, remember, pq is not deterministic) + sort.Slice(steps, func(i, j int) bool { + return steps[i].targetDate.Before(steps[j].targetDate) + }) + + // manual inspection of the globalCtr value should show that, despite + // scheduler-dependent behavior of pq, steps should generally be taken + // from oldest to newest target date (globally, not per FS). + t.Logf("steps sorted by target date:") + for _, step := range steps { + t.Logf("\t%s", step) + } + +} diff --git a/replication/driver/replication_stepqueue.go b/replication/driver/replication_stepqueue.go new file mode 100644 index 0000000..a6486c0 --- /dev/null +++ b/replication/driver/replication_stepqueue.go @@ -0,0 +1,163 @@ +package driver + +import ( + "container/heap" + "time" + + "github.com/zrepl/zrepl/util/chainlock" +) + +type stepQueueRec struct { + ident interface{} + targetDate time.Time + wakeup chan StepCompletedFunc +} + +type stepQueue struct { + stop chan struct{} + reqs chan stepQueueRec +} + +type stepQueueHeapItem struct { + idx int + req stepQueueRec +} +type stepQueueHeap []*stepQueueHeapItem + +func (h stepQueueHeap) Less(i, j int) bool { + return h[i].req.targetDate.Before(h[j].req.targetDate) +} + +func (h stepQueueHeap) Swap(i, j int) { + h[i], h[j] = h[j], h[i] + h[i].idx = i + h[j].idx = j +} + +func (h stepQueueHeap) Len() int { + return len(h) +} + +func (h *stepQueueHeap) Push(elem interface{}) { + hitem := elem.(*stepQueueHeapItem) + hitem.idx = h.Len() + *h = append(*h, hitem) +} + +func (h *stepQueueHeap) Pop() interface{} { + elem := (*h)[h.Len()-1] + elem.idx = -1 + *h = (*h)[:h.Len()-1] + return elem +} + +// returned stepQueue must be closed with method Close +func newStepQueue() *stepQueue { + q := &stepQueue{ + stop: make(chan struct{}), + reqs: make(chan stepQueueRec), + } + return q +} + +// the returned done function must be called to free resources +// allocated by the call to Start +// +// No WaitReady calls must be active at the time done is called +// The behavior of calling WaitReady after done was called is undefined +func (q *stepQueue) Start(concurrency int) (done func()) { + if concurrency < 1 { + panic("concurrency must be >= 1") + } + // l protects pending and queueItems + l := chainlock.New() + pendingCond := l.NewCond() + // priority queue + pending := &stepQueueHeap{} + // ident => queueItem + queueItems := make(map[interface{}]*stepQueueHeapItem) + // stopped is used for cancellation of "wake" goroutine + stopped := false + active := 0 + go func() { // "stopper" goroutine + <-q.stop + defer l.Lock().Unlock() + stopped = true + pendingCond.Broadcast() + }() + go func() { // "reqs" goroutine + for { + select { + case <-q.stop: + select { + case <-q.reqs: + panic("WaitReady call active while calling Close") + default: + return + } + case req := <-q.reqs: + func() { + defer l.Lock().Unlock() + if _, ok := queueItems[req.ident]; ok { + panic("WaitReady must not be called twice for the same ident") + } + qitem := &stepQueueHeapItem{ + req: req, + } + queueItems[req.ident] = qitem + heap.Push(pending, qitem) + pendingCond.Broadcast() + }() + } + } + }() + go func() { // "wake" goroutine + defer l.Lock().Unlock() + for { + + for !stopped && (active >= concurrency || pending.Len() == 0) { + pendingCond.Wait() + } + if stopped { + return + } + if pending.Len() <= 0 { + return + } + active++ + next := heap.Pop(pending).(*stepQueueHeapItem).req + delete(queueItems, next.ident) + + next.wakeup <- func() { + defer l.Lock().Unlock() + active-- + pendingCond.Broadcast() + } + } + }() + + done = func() { + close(q.stop) + } + return done +} + +type StepCompletedFunc func() + +func (q *stepQueue) sendAndWaitForWakeup(ident interface{}, targetDate time.Time) StepCompletedFunc { + req := stepQueueRec{ + ident, + targetDate, + make(chan StepCompletedFunc), + } + q.reqs <- req + return <-req.wakeup +} + +// Wait for the ident with targetDate to be selected to run. +func (q *stepQueue) WaitReady(ident interface{}, targetDate time.Time) StepCompletedFunc { + if targetDate.IsZero() { + panic("targetDate of zero is reserved for marking Done") + } + return q.sendAndWaitForWakeup(ident, targetDate) +} diff --git a/replication/driver/replication_stepqueue_test.go b/replication/driver/replication_stepqueue_test.go new file mode 100644 index 0000000..1c6740a --- /dev/null +++ b/replication/driver/replication_stepqueue_test.go @@ -0,0 +1,166 @@ +package driver + +import ( + "fmt" + "math" + "sort" + "sync" + "sync/atomic" + "testing" + "time" + + "github.com/montanaflynn/stats" + "github.com/stretchr/testify/assert" +) + +func TestPqNotconcurrent(t *testing.T) { + + var ctr uint32 + q := newStepQueue() + var wg sync.WaitGroup + wg.Add(3) + go func() { + defer wg.Done() + defer q.WaitReady("1", time.Unix(1, 0))() + ret := atomic.AddUint32(&ctr, 1) + assert.Equal(t, uint32(1), ret) + }() + go func() { + defer wg.Done() + defer q.WaitReady("2", time.Unix(2, 0))() + ret := atomic.AddUint32(&ctr, 1) + assert.Equal(t, uint32(2), ret) + }() + go func() { + defer wg.Done() + defer q.WaitReady("3", time.Unix(3, 0))() + ret := atomic.AddUint32(&ctr, 1) + assert.Equal(t, uint32(3), ret) + }() + + time.Sleep(1 * time.Second) + defer q.Start(1)() + wg.Wait() + +} + +type record struct { + fs int + step int + globalCtr uint32 + wakeAt time.Duration // relative to begin +} + +func (r record) String() string { + return fmt.Sprintf("fs %08d step %08d globalCtr %08d wakeAt %2.8f", r.fs, r.step, r.globalCtr, r.wakeAt.Seconds()) +} + +// This tests uses stepPq concurrently, simulating the following scenario: +// Given a number of filesystems F, each filesystem has N steps to take. +// The number of concurrent steps is limited to C. +// The target date for each step is the step number N. +// Hence, there are always F filesystems runnable (calling WaitReady) +// The priority queue prioritizes steps with lower target data (= lower step number). +// Hence, all steps with lower numbers should be woken up before steps with higher numbers. +// However, scheduling is not 100% deterministic (runtime, OS scheduler, etc). +// Hence, perform some statistics on the wakeup times and assert that the mean wakeup +// times for each step are close together. +func TestPqConcurrent(t *testing.T) { + + q := newStepQueue() + var wg sync.WaitGroup + filesystems := 100 + stepsPerFS := 20 + sleepTimePerStep := 50 * time.Millisecond + wg.Add(filesystems) + var globalCtr uint32 + + begin := time.Now() + records := make(chan []record, filesystems) + for fs := 0; fs < filesystems; fs++ { + go func(fs int) { + defer wg.Done() + recs := make([]record, 0) + for step := 0; step < stepsPerFS; step++ { + pos := atomic.AddUint32(&globalCtr, 1) + t := time.Unix(int64(step), 0) + done := q.WaitReady(fs, t) + wakeAt := time.Now().Sub(begin) + time.Sleep(sleepTimePerStep) + done() + recs = append(recs, record{fs, step, pos, wakeAt}) + } + records <- recs + }(fs) + } + concurrency := 5 + defer q.Start(concurrency)() + wg.Wait() + close(records) + t.Logf("loop done") + + flattenedRecs := make([]record, 0) + for recs := range records { + flattenedRecs = append(flattenedRecs, recs...) + } + + sort.Slice(flattenedRecs, func(i, j int) bool { + return flattenedRecs[i].globalCtr < flattenedRecs[j].globalCtr + }) + + wakeTimesByStep := map[int][]float64{} + for _, rec := range flattenedRecs { + wakeTimes, ok := wakeTimesByStep[rec.step] + if !ok { + wakeTimes = []float64{} + } + wakeTimes = append(wakeTimes, rec.wakeAt.Seconds()) + wakeTimesByStep[rec.step] = wakeTimes + } + + meansByStepId := make([]float64, stepsPerFS) + interQuartileRangesByStepIdx := make([]float64, stepsPerFS) + for step := 0; step < stepsPerFS; step++ { + t.Logf("step %d", step) + mean, _ := stats.Mean(wakeTimesByStep[step]) + meansByStepId[step] = mean + t.Logf("\tmean: %v", mean) + median, _ := stats.Median(wakeTimesByStep[step]) + t.Logf("\tmedian: %v", median) + midhinge, _ := stats.Midhinge(wakeTimesByStep[step]) + t.Logf("\tmidhinge: %v", midhinge) + min, _ := stats.Min(wakeTimesByStep[step]) + t.Logf("\tmin: %v", min) + max, _ := stats.Max(wakeTimesByStep[step]) + t.Logf("\tmax: %v", max) + quartiles, _ := stats.Quartile(wakeTimesByStep[step]) + t.Logf("\t%#v", quartiles) + interQuartileRange, _ := stats.InterQuartileRange(wakeTimesByStep[step]) + t.Logf("\tinter-quartile range: %v", interQuartileRange) + interQuartileRangesByStepIdx[step] = interQuartileRange + } + + iqrMean, _ := stats.Mean(interQuartileRangesByStepIdx) + t.Logf("inter-quartile-range mean: %v", iqrMean) + iqrDev, _ := stats.StandardDeviation(interQuartileRangesByStepIdx) + t.Logf("inter-quartile-range deviation: %v", iqrDev) + + // each step should have the same "distribution" (=~ "spread") + assert.True(t, iqrDev < 0.01) + + minTimeForAllStepsWithIdxI := sleepTimePerStep.Seconds() * float64(filesystems) / float64(concurrency) + t.Logf("minTimeForAllStepsWithIdxI = %11.8f", minTimeForAllStepsWithIdxI) + for i, mean := range meansByStepId { + // we can't just do (i + 0.5) * minTimeforAllStepsWithIdxI + // because this doesn't account for drift + idealMean := 0.5 * minTimeForAllStepsWithIdxI + if i > 0 { + previousMean := meansByStepId[i-1] + idealMean = previousMean + minTimeForAllStepsWithIdxI + } + deltaFromIdeal := idealMean - mean + t.Logf("step %02d delta from ideal mean wake time: %11.8f - %11.8f = %11.8f", i, idealMean, mean, deltaFromIdeal) + assert.True(t, math.Abs(deltaFromIdeal) < 0.05) + } + +} diff --git a/replication/fsrep/fsfsm.go b/replication/fsrep/fsfsm.go deleted file mode 100644 index cfc8a5e..0000000 --- a/replication/fsrep/fsfsm.go +++ /dev/null @@ -1,557 +0,0 @@ -// Package fsrep implements replication of a single file system with existing versions -// from a sender to a receiver. -package fsrep - -import ( - "context" - "errors" - "fmt" - "net" - "sync" - "time" - - "github.com/prometheus/client_golang/prometheus" - "github.com/zrepl/zrepl/logger" - "github.com/zrepl/zrepl/replication/pdu" - "github.com/zrepl/zrepl/util/bytecounter" - "github.com/zrepl/zrepl/util/watchdog" - "github.com/zrepl/zrepl/zfs" -) - -type contextKey int - -const ( - contextKeyLogger contextKey = iota -) - -type Logger = logger.Logger - -func WithLogger(ctx context.Context, log Logger) context.Context { - return context.WithValue(ctx, contextKeyLogger, log) -} - -func getLogger(ctx context.Context) Logger { - l, ok := ctx.Value(contextKeyLogger).(Logger) - if !ok { - l = logger.NewNullLogger() - } - return l -} - -// A Sender is usually part of a github.com/zrepl/zrepl/replication.Endpoint. -type Sender interface { - // If a non-nil io.ReadCloser is returned, it is guaranteed to be closed before - // any next call to the parent github.com/zrepl/zrepl/replication.Endpoint. - // If the send request is for dry run the io.ReadCloser will be nil - Send(ctx context.Context, r *pdu.SendReq) (*pdu.SendRes, zfs.StreamCopier, error) - ReplicationCursor(ctx context.Context, req *pdu.ReplicationCursorReq) (*pdu.ReplicationCursorRes, error) -} - -// A Sender is usually part of a github.com/zrepl/zrepl/replication.Endpoint. -type Receiver interface { - // Receive sends r and sendStream (the latter containing a ZFS send stream) - // to the parent github.com/zrepl/zrepl/replication.Endpoint. - Receive(ctx context.Context, req *pdu.ReceiveReq, receive zfs.StreamCopier) (*pdu.ReceiveRes, error) -} - -type StepReport struct { - From, To string - Status StepState - Problem string - Bytes int64 - ExpectedBytes int64 // 0 means no size estimate possible -} - -type Report struct { - Filesystem string - Status string - Problem string - Completed, Pending []*StepReport -} - -//go:generate enumer -type=State -type State uint - -const ( - Ready State = 1 << iota - Completed -) - -type Error interface { - error - Temporary() bool - ContextErr() bool - LocalToFS() bool -} - -type Replication struct { - promBytesReplicated prometheus.Counter - - fs string - - // lock protects all fields below it in this struct, but not the data behind pointers - lock sync.Mutex - state State - err Error - completed, pending []*ReplicationStep -} - -func (f *Replication) State() State { - f.lock.Lock() - defer f.lock.Unlock() - return f.state -} - -func (f *Replication) FS() string { return f.fs } - -// returns zero value time.Time{} if no more pending steps -func (f *Replication) NextStepDate() time.Time { - if len(f.pending) == 0 { - return time.Time{} - } - return f.pending[0].to.SnapshotTime() -} - -func (f *Replication) Err() Error { - f.lock.Lock() - defer f.lock.Unlock() - return f.err -} - -func (f *Replication) CanRetry() bool { - f.lock.Lock() - defer f.lock.Unlock() - if f.state == Completed { - return false - } - if f.state != Ready { - panic(fmt.Sprintf("implementation error: %v", f.state)) - } - if f.err == nil { - return true - } - return f.err.Temporary() -} - -func (f *Replication) UpdateSizeEsitmate(ctx context.Context, sender Sender) error { - f.lock.Lock() - defer f.lock.Unlock() - for _, e := range f.pending { - if err := e.updateSizeEstimate(ctx, sender); err != nil { - return err - } - } - return nil -} - -type ReplicationBuilder struct { - r *Replication -} - -func BuildReplication(fs string, promBytesReplicated prometheus.Counter) *ReplicationBuilder { - return &ReplicationBuilder{&Replication{fs: fs, promBytesReplicated: promBytesReplicated}} -} - -func (b *ReplicationBuilder) AddStep(from, to FilesystemVersion) *ReplicationBuilder { - step := &ReplicationStep{ - state: StepReplicationReady, - parent: b.r, - from: from, - to: to, - } - b.r.pending = append(b.r.pending, step) - return b -} - -func (b *ReplicationBuilder) Done() (r *Replication) { - if len(b.r.pending) > 0 { - b.r.state = Ready - } else { - b.r.state = Completed - } - r = b.r - b.r = nil - return r -} - -type ReplicationConflictError struct { - Err error -} - -func (e *ReplicationConflictError) Timeout() bool { return false } - -func (e *ReplicationConflictError) Temporary() bool { return false } - -func (e *ReplicationConflictError) Error() string { return fmt.Sprintf("permanent error: %s", e.Err.Error()) } - -func (e *ReplicationConflictError) LocalToFS() bool { return true } - -func (e *ReplicationConflictError) ContextErr() bool { return false } - -func NewReplicationConflictError(fs string, err error) *Replication { - return &Replication{ - state: Completed, - fs: fs, - err: &ReplicationConflictError{Err: err}, - } -} - -//go:generate enumer -type=StepState -type StepState uint - -const ( - StepReplicationReady StepState = 1 << iota - StepMarkReplicatedReady - StepCompleted -) - -func (s StepState) IsTerminal() bool { return s == StepCompleted } - -type FilesystemVersion interface { - SnapshotTime() time.Time - GetName() string // name without @ or # - RelName() string // name with @ or # -} - -type ReplicationStep struct { - // only protects state, err - // from, to and parent are assumed to be immutable - lock sync.Mutex - - state StepState - from, to FilesystemVersion - parent *Replication - - // both retry and permanent error - err error - - byteCounter bytecounter.StreamCopier - expectedSize int64 // 0 means no size estimate present / possible -} - -func (f *Replication) Retry(ctx context.Context, ka *watchdog.KeepAlive, sender Sender, receiver Receiver) Error { - - var u updater = func(fu func(*Replication)) State { - f.lock.Lock() - defer f.lock.Unlock() - if fu != nil { - fu(f) - } - return f.state - } - - - var current *ReplicationStep - pre := u(nil) - getLogger(ctx).WithField("fsrep_state", pre).Debug("begin fsrep.Retry") - defer func() { - post := u(nil) - getLogger(ctx).WithField("fsrep_transition", post).Debug("end fsrep.Retry") - }() - - st := u(func(f *Replication) { - if len(f.pending) == 0 { - f.state = Completed - return - } - current = f.pending[0] - }) - if st == Completed { - return nil - } - if st != Ready { - panic(fmt.Sprintf("implementation error: %v", st)) - } - - stepCtx := WithLogger(ctx, getLogger(ctx).WithField("step", current)) - getLogger(stepCtx).Debug("take step") - err := current.Retry(stepCtx, ka, sender, receiver) - if err != nil { - getLogger(stepCtx).WithError(err).Error("step could not be completed") - } - - u(func(fsr *Replication) { - if err != nil { - f.err = &StepError{stepStr: current.String(), err: err} - return - } - if err == nil && current.state != StepCompleted { - panic(fmt.Sprintf("implementation error: %v", current.state)) - } - f.err = nil - f.completed = append(f.completed, current) - f.pending = f.pending[1:] - if len(f.pending) > 0 { - f.state = Ready - } else { - f.state = Completed - } - }) - var retErr Error = nil - u(func(fsr *Replication) { - retErr = fsr.err - }) - return retErr -} - -type updater func(func(fsr *Replication)) State - -type state func(ctx context.Context, ka *watchdog.KeepAlive, sender Sender, receiver Receiver, u updater) state - -type StepError struct { - stepStr string - err error -} - -var _ Error = &StepError{} - -func (e StepError) Error() string { - if e.LocalToFS() { - return fmt.Sprintf("step %s failed: %s", e.stepStr, e.err) - } - return e.err.Error() -} - -func (e StepError) Timeout() bool { - if neterr, ok := e.err.(net.Error); ok { - return neterr.Timeout() - } - return false -} - -func (e StepError) Temporary() bool { - if neterr, ok := e.err.(net.Error); ok { - return neterr.Temporary() - } - return false -} - -func (e StepError) LocalToFS() bool { - if _, ok := e.err.(net.Error); ok { - return false - } - return true // conservative approximation: we'd like to check for specific errors returned over RPC here... -} - -func (e StepError) ContextErr() bool { - switch e.err { - case context.Canceled: - return true - case context.DeadlineExceeded: - return true - } - return false -} - -func (fsr *Replication) Report() *Report { - fsr.lock.Lock() - defer fsr.lock.Unlock() - - rep := Report{ - Filesystem: fsr.fs, - Status: fsr.state.String(), - } - - if fsr.err != nil && fsr.err.LocalToFS() { - rep.Problem = fsr.err.Error() - } - - rep.Completed = make([]*StepReport, len(fsr.completed)) - for i := range fsr.completed { - rep.Completed[i] = fsr.completed[i].Report() - } - rep.Pending = make([]*StepReport, len(fsr.pending)) - for i := range fsr.pending { - rep.Pending[i] = fsr.pending[i].Report() - } - - return &rep -} - -func (s *ReplicationStep) Retry(ctx context.Context, ka *watchdog.KeepAlive, sender Sender, receiver Receiver) error { - switch s.state { - case StepReplicationReady: - return s.doReplication(ctx, ka, sender, receiver) - case StepMarkReplicatedReady: - return s.doMarkReplicated(ctx, ka, sender) - case StepCompleted: - return nil - } - panic(fmt.Sprintf("implementation error: %v", s.state)) -} - -func (s *ReplicationStep) Error() error { - if s.state & (StepReplicationReady|StepMarkReplicatedReady) != 0 { - return s.err - } - return nil -} - -func (s *ReplicationStep) doReplication(ctx context.Context, ka *watchdog.KeepAlive, sender Sender, receiver Receiver) error { - - if s.state != StepReplicationReady { - panic(fmt.Sprintf("implementation error: %v", s.state)) - } - - fs := s.parent.fs - - log := getLogger(ctx) - sr := s.buildSendRequest(false) - - log.Debug("initiate send request") - sres, sstreamCopier, err := sender.Send(ctx, sr) - if err != nil { - log.WithError(err).Error("send request failed") - return err - } - if sstreamCopier == nil { - err := errors.New("send request did not return a stream, broken endpoint implementation") - return err - } - defer sstreamCopier.Close() - - // Install a byte counter to track progress + for status report - s.byteCounter = bytecounter.NewStreamCopier(sstreamCopier) - byteCounterStopProgress := make(chan struct{}) - defer close(byteCounterStopProgress) - go func() { - var lastCount int64 - t := time.NewTicker(1 * time.Second) - defer t.Stop() - for { - select { - case <-byteCounterStopProgress: - return - case <-t.C: - newCount := s.byteCounter.Count() - if lastCount != newCount { - ka.MadeProgress() - } else { - lastCount = newCount - } - } - } - }() - defer func() { - s.parent.promBytesReplicated.Add(float64(s.byteCounter.Count())) - }() - - rr := &pdu.ReceiveReq{ - Filesystem: fs, - ClearResumeToken: !sres.UsedResumeToken, - } - log.Debug("initiate receive request") - _, err = receiver.Receive(ctx, rr, s.byteCounter) - if err != nil { - log. - WithError(err). - WithField("errType", fmt.Sprintf("%T", err)). - Error("receive request failed (might also be error on sender)") - // This failure could be due to - // - an unexpected exit of ZFS on the sending side - // - an unexpected exit of ZFS on the receiving side - // - a connectivity issue - return err - } - log.Debug("receive finished") - ka.MadeProgress() - - s.state = StepMarkReplicatedReady - return s.doMarkReplicated(ctx, ka, sender) - -} - -func (s *ReplicationStep) doMarkReplicated(ctx context.Context, ka *watchdog.KeepAlive, sender Sender) error { - - if s.state != StepMarkReplicatedReady { - panic(fmt.Sprintf("implementation error: %v", s.state)) - } - - log := getLogger(ctx) - - log.Debug("advance replication cursor") - req := &pdu.ReplicationCursorReq{ - Filesystem: s.parent.fs, - Op: &pdu.ReplicationCursorReq_Set{ - Set: &pdu.ReplicationCursorReq_SetOp{ - Snapshot: s.to.GetName(), - }, - }, - } - _, err := sender.ReplicationCursor(ctx, req) - if err != nil { - log.WithError(err).Error("error advancing replication cursor") - return err - } - ka.MadeProgress() - - s.state = StepCompleted - return err -} - -func (s *ReplicationStep) updateSizeEstimate(ctx context.Context, sender Sender) error { - - log := getLogger(ctx) - - sr := s.buildSendRequest(true) - - log.Debug("initiate dry run send request") - sres, _, err := sender.Send(ctx, sr) - if err != nil { - log.WithError(err).Error("dry run send request failed") - return err - } - s.expectedSize = sres.ExpectedSize - return nil -} - -func (s *ReplicationStep) buildSendRequest(dryRun bool) (sr *pdu.SendReq) { - fs := s.parent.fs - if s.from == nil { - sr = &pdu.SendReq{ - Filesystem: fs, - To: s.to.RelName(), - DryRun: dryRun, - } - } else { - sr = &pdu.SendReq{ - Filesystem: fs, - From: s.from.RelName(), - To: s.to.RelName(), - DryRun: dryRun, - } - } - return sr -} - -func (s *ReplicationStep) String() string { - if s.from == nil { // FIXME: ZFS semantics are that to is nil on non-incremental send - return fmt.Sprintf("%s%s (full)", s.parent.fs, s.to.RelName()) - } else { - return fmt.Sprintf("%s(%s => %s)", s.parent.fs, s.from.RelName(), s.to.RelName()) - } -} - -func (s *ReplicationStep) Report() *StepReport { - var from string // FIXME follow same convention as ZFS: to should be nil on full send - if s.from != nil { - from = s.from.RelName() - } - bytes := int64(0) - if s.byteCounter != nil { - bytes = s.byteCounter.Count() - } - problem := "" - if s.err != nil { - problem = s.err.Error() - } - rep := StepReport{ - From: from, - To: s.to.RelName(), - Status: s.state, - Problem: problem, - Bytes: bytes, - ExpectedBytes: s.expectedSize, - } - return &rep -} diff --git a/replication/fsrep/state_enumer.go b/replication/fsrep/state_enumer.go deleted file mode 100644 index 6e38ece..0000000 --- a/replication/fsrep/state_enumer.go +++ /dev/null @@ -1,50 +0,0 @@ -// Code generated by "enumer -type=State"; DO NOT EDIT. - -package fsrep - -import ( - "fmt" -) - -const _StateName = "ReadyCompleted" - -var _StateIndex = [...]uint8{0, 5, 14} - -func (i State) String() string { - i -= 1 - if i >= State(len(_StateIndex)-1) { - return fmt.Sprintf("State(%d)", i+1) - } - return _StateName[_StateIndex[i]:_StateIndex[i+1]] -} - -var _StateValues = []State{1, 2} - -var _StateNameToValueMap = map[string]State{ - _StateName[0:5]: 1, - _StateName[5:14]: 2, -} - -// StateString retrieves an enum value from the enum constants string name. -// Throws an error if the param is not part of the enum. -func StateString(s string) (State, error) { - if val, ok := _StateNameToValueMap[s]; ok { - return val, nil - } - return 0, fmt.Errorf("%s does not belong to State values", s) -} - -// StateValues returns all values of the enum -func StateValues() []State { - return _StateValues -} - -// IsAState returns "true" if the value is listed in the enum definition. "false" otherwise -func (i State) IsAState() bool { - for _, v := range _StateValues { - if i == v { - return true - } - } - return false -} diff --git a/replication/fsrep/stepstate_enumer.go b/replication/fsrep/stepstate_enumer.go deleted file mode 100644 index 287515c..0000000 --- a/replication/fsrep/stepstate_enumer.go +++ /dev/null @@ -1,61 +0,0 @@ -// Code generated by "enumer -type=StepState"; DO NOT EDIT. - -package fsrep - -import ( - "fmt" -) - -const ( - _StepStateName_0 = "StepReplicationReadyStepMarkReplicatedReady" - _StepStateName_1 = "StepCompleted" -) - -var ( - _StepStateIndex_0 = [...]uint8{0, 20, 43} - _StepStateIndex_1 = [...]uint8{0, 13} -) - -func (i StepState) String() string { - switch { - case 1 <= i && i <= 2: - i -= 1 - return _StepStateName_0[_StepStateIndex_0[i]:_StepStateIndex_0[i+1]] - case i == 4: - return _StepStateName_1 - default: - return fmt.Sprintf("StepState(%d)", i) - } -} - -var _StepStateValues = []StepState{1, 2, 4} - -var _StepStateNameToValueMap = map[string]StepState{ - _StepStateName_0[0:20]: 1, - _StepStateName_0[20:43]: 2, - _StepStateName_1[0:13]: 4, -} - -// StepStateString retrieves an enum value from the enum constants string name. -// Throws an error if the param is not part of the enum. -func StepStateString(s string) (StepState, error) { - if val, ok := _StepStateNameToValueMap[s]; ok { - return val, nil - } - return 0, fmt.Errorf("%s does not belong to StepState values", s) -} - -// StepStateValues returns all values of the enum -func StepStateValues() []StepState { - return _StepStateValues -} - -// IsAStepState returns "true" if the value is listed in the enum definition. "false" otherwise -func (i StepState) IsAStepState() bool { - for _, v := range _StepStateValues { - if i == v { - return true - } - } - return false -} diff --git a/replication/internal/diff/diff.go b/replication/logic/diff/diff.go similarity index 75% rename from replication/internal/diff/diff.go rename to replication/logic/diff/diff.go index 6af5246..f9ad46b 100644 --- a/replication/internal/diff/diff.go +++ b/replication/logic/diff/diff.go @@ -1,9 +1,11 @@ -package mainfsm +package diff import ( + "fmt" "sort" + "strings" - . "github.com/zrepl/zrepl/replication/pdu" + . "github.com/zrepl/zrepl/replication/logic/pdu" ) type ConflictNoCommonAncestor struct { @@ -11,7 +13,19 @@ type ConflictNoCommonAncestor struct { } func (c *ConflictNoCommonAncestor) Error() string { - return "no common snapshot or suitable bookmark between sender and receiver" + var buf strings.Builder + buf.WriteString("no common snapshot or suitable bookmark between sender and receiver") + if len(c.SortedReceiverVersions) > 0 || len(c.SortedSenderVersions) > 0 { + buf.WriteString(":\n sorted sender versions:\n") + for _, v := range c.SortedSenderVersions { + fmt.Fprintf(&buf, " %s\n", v.RelName()) + } + buf.WriteString(" sorted receiver versions:\n") + for _, v := range c.SortedReceiverVersions { + fmt.Fprintf(&buf, " %s\n", v.RelName()) + } + } + return buf.String() } type ConflictDiverged struct { @@ -21,7 +35,18 @@ type ConflictDiverged struct { } func (c *ConflictDiverged) Error() string { - return "the receiver's latest snapshot is not present on sender" + var buf strings.Builder + buf.WriteString("the receiver's latest snapshot is not present on sender:\n") + fmt.Fprintf(&buf, " last common: %s\n", c.CommonAncestor.RelName()) + fmt.Fprintf(&buf, " sender-only:\n") + for _, v := range c.SenderOnly { + fmt.Fprintf(&buf, " %s\n", v.RelName()) + } + fmt.Fprintf(&buf, " receiver-only:\n") + for _, v := range c.ReceiverOnly { + fmt.Fprintf(&buf, " %s\n", v.RelName()) + } + return buf.String() } func SortVersionListByCreateTXGThenBookmarkLTSnapshot(fsvslice []*FilesystemVersion) []*FilesystemVersion { diff --git a/replication/logic/diff/diff_test.go b/replication/logic/diff/diff_test.go new file mode 100644 index 0000000..46200b3 --- /dev/null +++ b/replication/logic/diff/diff_test.go @@ -0,0 +1,130 @@ +package diff + +import ( + "strconv" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + . "github.com/zrepl/zrepl/replication/logic/pdu" +) + +func fsvlist(fsv ...string) (r []*FilesystemVersion) { + + r = make([]*FilesystemVersion, len(fsv)) + for i, f := range fsv { + + // parse the id from fsvlist. it is used to derivce Guid,CreateTXG and Creation attrs + split := strings.Split(f, ",") + if len(split) != 2 { + panic("invalid fsv spec") + } + id, err := strconv.Atoi(split[1]) + if err != nil { + panic(err) + } + creation := func(id int) string { + return FilesystemVersionCreation(time.Unix(0, 0).Add(time.Duration(id) * time.Second)) + } + if strings.HasPrefix(f, "#") { + r[i] = &FilesystemVersion{ + Name: strings.TrimPrefix(f, "#"), + Type: FilesystemVersion_Bookmark, + Guid: uint64(id), + CreateTXG: uint64(id), + Creation: creation(id), + } + } else if strings.HasPrefix(f, "@") { + r[i] = &FilesystemVersion{ + Name: strings.TrimPrefix(f, "@"), + Type: FilesystemVersion_Snapshot, + Guid: uint64(id), + CreateTXG: uint64(id), + Creation: creation(id), + } + } else { + panic("invalid character") + } + } + return +} + +func doTest(receiver, sender []*FilesystemVersion, validate func(incpath []*FilesystemVersion, conflict error)) { + p, err := IncrementalPath(receiver, sender) + validate(p, err) +} + +func TestIncrementalPath_SnapshotsOnly(t *testing.T) { + + l := fsvlist + + // basic functionality + doTest(l("@a,1", "@b,2"), l("@a,1", "@b,2", "@c,3", "@d,4"), func(path []*FilesystemVersion, conflict error) { + assert.Equal(t, l("@b,2", "@c,3", "@d,4"), path) + }) + + // no common ancestor + doTest(l(), l("@a,1"), func(path []*FilesystemVersion, conflict error) { + assert.Nil(t, path) + ca, ok := conflict.(*ConflictNoCommonAncestor) + require.True(t, ok) + assert.Equal(t, l("@a,1"), ca.SortedSenderVersions) + }) + doTest(l("@a,1", "@b,2"), l("@c,3", "@d,4"), func(path []*FilesystemVersion, conflict error) { + assert.Nil(t, path) + ca, ok := conflict.(*ConflictNoCommonAncestor) + require.True(t, ok) + assert.Equal(t, l("@a,1", "@b,2"), ca.SortedReceiverVersions) + assert.Equal(t, l("@c,3", "@d,4"), ca.SortedSenderVersions) + }) + + // divergence is detected + doTest(l("@a,1", "@b1,2"), l("@a,1", "@b2,3"), func(path []*FilesystemVersion, conflict error) { + assert.Nil(t, path) + cd, ok := conflict.(*ConflictDiverged) + require.True(t, ok) + assert.Equal(t, l("@a,1")[0], cd.CommonAncestor) + assert.Equal(t, l("@b1,2"), cd.ReceiverOnly) + assert.Equal(t, l("@b2,3"), cd.SenderOnly) + }) + + // gaps before most recent common ancestor do not matter + doTest(l("@a,1", "@b,2", "@c,3"), l("@a,1", "@c,3", "@d,4"), func(path []*FilesystemVersion, conflict error) { + assert.Equal(t, l("@c,3", "@d,4"), path) + }) + + // sender with earlier but also current version as sender is not a conflict + doTest(l("@c,3"), l("@a,1", "@b,2", "@c,3") , func(path []*FilesystemVersion, conflict error) { + t.Logf("path: %#v", path) + t.Logf("conflict: %#v", conflict) + assert.Empty(t, path) + assert.Nil(t, conflict) + }) + +} + +func TestIncrementalPath_BookmarkSupport(t *testing.T) { + l := fsvlist + + // bookmarks are used + doTest(l("@a,1"), l("#a,1", "@b,2"), func(path []*FilesystemVersion, conflict error) { + assert.Equal(t, l("#a,1", "@b,2"), path) + }) + + // boomarks are stripped from IncrementalPath (cannot send incrementally) + doTest(l("@a,1"), l("#a,1", "#b,2", "@c,3"), func(path []*FilesystemVersion, conflict error) { + assert.Equal(t, l("#a,1", "@c,3"), path) + }) + + // test that snapshots are preferred over bookmarks in IncrementalPath + doTest(l("@a,1"), l("#a,1", "@a,1", "@b,2"), func(path []*FilesystemVersion, conflict error) { + assert.Equal(t, l("@a,1", "@b,2"), path) + }) + doTest(l("@a,1"), l("@a,1", "#a,1", "@b,2"), func(path []*FilesystemVersion, conflict error) { + assert.Equal(t, l("@a,1", "@b,2"), path) + }) + +} diff --git a/replication/pdu/pdu.pb.go b/replication/logic/pdu/pdu.pb.go similarity index 81% rename from replication/pdu/pdu.pb.go rename to replication/logic/pdu/pdu.pb.go index 6b7fd86..b783f3d 100644 --- a/replication/pdu/pdu.pb.go +++ b/replication/logic/pdu/pdu.pb.go @@ -43,7 +43,7 @@ func (x FilesystemVersion_VersionType) String() string { return proto.EnumName(FilesystemVersion_VersionType_name, int32(x)) } func (FilesystemVersion_VersionType) EnumDescriptor() ([]byte, []int) { - return fileDescriptor_pdu_89315d819a6e0938, []int{5, 0} + return fileDescriptor_pdu_83b7e2a28d820622, []int{5, 0} } type ListFilesystemReq struct { @@ -56,7 +56,7 @@ func (m *ListFilesystemReq) Reset() { *m = ListFilesystemReq{} } func (m *ListFilesystemReq) String() string { return proto.CompactTextString(m) } func (*ListFilesystemReq) ProtoMessage() {} func (*ListFilesystemReq) Descriptor() ([]byte, []int) { - return fileDescriptor_pdu_89315d819a6e0938, []int{0} + return fileDescriptor_pdu_83b7e2a28d820622, []int{0} } func (m *ListFilesystemReq) XXX_Unmarshal(b []byte) error { return xxx_messageInfo_ListFilesystemReq.Unmarshal(m, b) @@ -78,7 +78,6 @@ var xxx_messageInfo_ListFilesystemReq proto.InternalMessageInfo type ListFilesystemRes struct { Filesystems []*Filesystem `protobuf:"bytes,1,rep,name=Filesystems,proto3" json:"Filesystems,omitempty"` - Empty bool `protobuf:"varint,2,opt,name=Empty,proto3" json:"Empty,omitempty"` XXX_NoUnkeyedLiteral struct{} `json:"-"` XXX_unrecognized []byte `json:"-"` XXX_sizecache int32 `json:"-"` @@ -88,7 +87,7 @@ func (m *ListFilesystemRes) Reset() { *m = ListFilesystemRes{} } func (m *ListFilesystemRes) String() string { return proto.CompactTextString(m) } func (*ListFilesystemRes) ProtoMessage() {} func (*ListFilesystemRes) Descriptor() ([]byte, []int) { - return fileDescriptor_pdu_89315d819a6e0938, []int{1} + return fileDescriptor_pdu_83b7e2a28d820622, []int{1} } func (m *ListFilesystemRes) XXX_Unmarshal(b []byte) error { return xxx_messageInfo_ListFilesystemRes.Unmarshal(m, b) @@ -115,16 +114,10 @@ func (m *ListFilesystemRes) GetFilesystems() []*Filesystem { return nil } -func (m *ListFilesystemRes) GetEmpty() bool { - if m != nil { - return m.Empty - } - return false -} - type Filesystem struct { Path string `protobuf:"bytes,1,opt,name=Path,proto3" json:"Path,omitempty"` ResumeToken string `protobuf:"bytes,2,opt,name=ResumeToken,proto3" json:"ResumeToken,omitempty"` + IsPlaceholder bool `protobuf:"varint,3,opt,name=IsPlaceholder,proto3" json:"IsPlaceholder,omitempty"` XXX_NoUnkeyedLiteral struct{} `json:"-"` XXX_unrecognized []byte `json:"-"` XXX_sizecache int32 `json:"-"` @@ -134,7 +127,7 @@ func (m *Filesystem) Reset() { *m = Filesystem{} } func (m *Filesystem) String() string { return proto.CompactTextString(m) } func (*Filesystem) ProtoMessage() {} func (*Filesystem) Descriptor() ([]byte, []int) { - return fileDescriptor_pdu_89315d819a6e0938, []int{2} + return fileDescriptor_pdu_83b7e2a28d820622, []int{2} } func (m *Filesystem) XXX_Unmarshal(b []byte) error { return xxx_messageInfo_Filesystem.Unmarshal(m, b) @@ -168,6 +161,13 @@ func (m *Filesystem) GetResumeToken() string { return "" } +func (m *Filesystem) GetIsPlaceholder() bool { + if m != nil { + return m.IsPlaceholder + } + return false +} + type ListFilesystemVersionsReq struct { Filesystem string `protobuf:"bytes,1,opt,name=Filesystem,proto3" json:"Filesystem,omitempty"` XXX_NoUnkeyedLiteral struct{} `json:"-"` @@ -179,7 +179,7 @@ func (m *ListFilesystemVersionsReq) Reset() { *m = ListFilesystemVersion func (m *ListFilesystemVersionsReq) String() string { return proto.CompactTextString(m) } func (*ListFilesystemVersionsReq) ProtoMessage() {} func (*ListFilesystemVersionsReq) Descriptor() ([]byte, []int) { - return fileDescriptor_pdu_89315d819a6e0938, []int{3} + return fileDescriptor_pdu_83b7e2a28d820622, []int{3} } func (m *ListFilesystemVersionsReq) XXX_Unmarshal(b []byte) error { return xxx_messageInfo_ListFilesystemVersionsReq.Unmarshal(m, b) @@ -217,7 +217,7 @@ func (m *ListFilesystemVersionsRes) Reset() { *m = ListFilesystemVersion func (m *ListFilesystemVersionsRes) String() string { return proto.CompactTextString(m) } func (*ListFilesystemVersionsRes) ProtoMessage() {} func (*ListFilesystemVersionsRes) Descriptor() ([]byte, []int) { - return fileDescriptor_pdu_89315d819a6e0938, []int{4} + return fileDescriptor_pdu_83b7e2a28d820622, []int{4} } func (m *ListFilesystemVersionsRes) XXX_Unmarshal(b []byte) error { return xxx_messageInfo_ListFilesystemVersionsRes.Unmarshal(m, b) @@ -259,7 +259,7 @@ func (m *FilesystemVersion) Reset() { *m = FilesystemVersion{} } func (m *FilesystemVersion) String() string { return proto.CompactTextString(m) } func (*FilesystemVersion) ProtoMessage() {} func (*FilesystemVersion) Descriptor() ([]byte, []int) { - return fileDescriptor_pdu_89315d819a6e0938, []int{5} + return fileDescriptor_pdu_83b7e2a28d820622, []int{5} } func (m *FilesystemVersion) XXX_Unmarshal(b []byte) error { return xxx_messageInfo_FilesystemVersion.Unmarshal(m, b) @@ -339,7 +339,7 @@ func (m *SendReq) Reset() { *m = SendReq{} } func (m *SendReq) String() string { return proto.CompactTextString(m) } func (*SendReq) ProtoMessage() {} func (*SendReq) Descriptor() ([]byte, []int) { - return fileDescriptor_pdu_89315d819a6e0938, []int{6} + return fileDescriptor_pdu_83b7e2a28d820622, []int{6} } func (m *SendReq) XXX_Unmarshal(b []byte) error { return xxx_messageInfo_SendReq.Unmarshal(m, b) @@ -420,7 +420,7 @@ func (m *Property) Reset() { *m = Property{} } func (m *Property) String() string { return proto.CompactTextString(m) } func (*Property) ProtoMessage() {} func (*Property) Descriptor() ([]byte, []int) { - return fileDescriptor_pdu_89315d819a6e0938, []int{7} + return fileDescriptor_pdu_83b7e2a28d820622, []int{7} } func (m *Property) XXX_Unmarshal(b []byte) error { return xxx_messageInfo_Property.Unmarshal(m, b) @@ -470,7 +470,7 @@ func (m *SendRes) Reset() { *m = SendRes{} } func (m *SendRes) String() string { return proto.CompactTextString(m) } func (*SendRes) ProtoMessage() {} func (*SendRes) Descriptor() ([]byte, []int) { - return fileDescriptor_pdu_89315d819a6e0938, []int{8} + return fileDescriptor_pdu_83b7e2a28d820622, []int{8} } func (m *SendRes) XXX_Unmarshal(b []byte) error { return xxx_messageInfo_SendRes.Unmarshal(m, b) @@ -524,7 +524,7 @@ func (m *ReceiveReq) Reset() { *m = ReceiveReq{} } func (m *ReceiveReq) String() string { return proto.CompactTextString(m) } func (*ReceiveReq) ProtoMessage() {} func (*ReceiveReq) Descriptor() ([]byte, []int) { - return fileDescriptor_pdu_89315d819a6e0938, []int{9} + return fileDescriptor_pdu_83b7e2a28d820622, []int{9} } func (m *ReceiveReq) XXX_Unmarshal(b []byte) error { return xxx_messageInfo_ReceiveReq.Unmarshal(m, b) @@ -568,7 +568,7 @@ func (m *ReceiveRes) Reset() { *m = ReceiveRes{} } func (m *ReceiveRes) String() string { return proto.CompactTextString(m) } func (*ReceiveRes) ProtoMessage() {} func (*ReceiveRes) Descriptor() ([]byte, []int) { - return fileDescriptor_pdu_89315d819a6e0938, []int{10} + return fileDescriptor_pdu_83b7e2a28d820622, []int{10} } func (m *ReceiveRes) XXX_Unmarshal(b []byte) error { return xxx_messageInfo_ReceiveRes.Unmarshal(m, b) @@ -601,7 +601,7 @@ func (m *DestroySnapshotsReq) Reset() { *m = DestroySnapshotsReq{} } func (m *DestroySnapshotsReq) String() string { return proto.CompactTextString(m) } func (*DestroySnapshotsReq) ProtoMessage() {} func (*DestroySnapshotsReq) Descriptor() ([]byte, []int) { - return fileDescriptor_pdu_89315d819a6e0938, []int{11} + return fileDescriptor_pdu_83b7e2a28d820622, []int{11} } func (m *DestroySnapshotsReq) XXX_Unmarshal(b []byte) error { return xxx_messageInfo_DestroySnapshotsReq.Unmarshal(m, b) @@ -647,7 +647,7 @@ func (m *DestroySnapshotRes) Reset() { *m = DestroySnapshotRes{} } func (m *DestroySnapshotRes) String() string { return proto.CompactTextString(m) } func (*DestroySnapshotRes) ProtoMessage() {} func (*DestroySnapshotRes) Descriptor() ([]byte, []int) { - return fileDescriptor_pdu_89315d819a6e0938, []int{12} + return fileDescriptor_pdu_83b7e2a28d820622, []int{12} } func (m *DestroySnapshotRes) XXX_Unmarshal(b []byte) error { return xxx_messageInfo_DestroySnapshotRes.Unmarshal(m, b) @@ -692,7 +692,7 @@ func (m *DestroySnapshotsRes) Reset() { *m = DestroySnapshotsRes{} } func (m *DestroySnapshotsRes) String() string { return proto.CompactTextString(m) } func (*DestroySnapshotsRes) ProtoMessage() {} func (*DestroySnapshotsRes) Descriptor() ([]byte, []int) { - return fileDescriptor_pdu_89315d819a6e0938, []int{13} + return fileDescriptor_pdu_83b7e2a28d820622, []int{13} } func (m *DestroySnapshotsRes) XXX_Unmarshal(b []byte) error { return xxx_messageInfo_DestroySnapshotsRes.Unmarshal(m, b) @@ -734,7 +734,7 @@ func (m *ReplicationCursorReq) Reset() { *m = ReplicationCursorReq{} } func (m *ReplicationCursorReq) String() string { return proto.CompactTextString(m) } func (*ReplicationCursorReq) ProtoMessage() {} func (*ReplicationCursorReq) Descriptor() ([]byte, []int) { - return fileDescriptor_pdu_89315d819a6e0938, []int{14} + return fileDescriptor_pdu_83b7e2a28d820622, []int{14} } func (m *ReplicationCursorReq) XXX_Unmarshal(b []byte) error { return xxx_messageInfo_ReplicationCursorReq.Unmarshal(m, b) @@ -882,7 +882,7 @@ func (m *ReplicationCursorReq_GetOp) Reset() { *m = ReplicationCursorReq func (m *ReplicationCursorReq_GetOp) String() string { return proto.CompactTextString(m) } func (*ReplicationCursorReq_GetOp) ProtoMessage() {} func (*ReplicationCursorReq_GetOp) Descriptor() ([]byte, []int) { - return fileDescriptor_pdu_89315d819a6e0938, []int{14, 0} + return fileDescriptor_pdu_83b7e2a28d820622, []int{14, 0} } func (m *ReplicationCursorReq_GetOp) XXX_Unmarshal(b []byte) error { return xxx_messageInfo_ReplicationCursorReq_GetOp.Unmarshal(m, b) @@ -913,7 +913,7 @@ func (m *ReplicationCursorReq_SetOp) Reset() { *m = ReplicationCursorReq func (m *ReplicationCursorReq_SetOp) String() string { return proto.CompactTextString(m) } func (*ReplicationCursorReq_SetOp) ProtoMessage() {} func (*ReplicationCursorReq_SetOp) Descriptor() ([]byte, []int) { - return fileDescriptor_pdu_89315d819a6e0938, []int{14, 1} + return fileDescriptor_pdu_83b7e2a28d820622, []int{14, 1} } func (m *ReplicationCursorReq_SetOp) XXX_Unmarshal(b []byte) error { return xxx_messageInfo_ReplicationCursorReq_SetOp.Unmarshal(m, b) @@ -954,7 +954,7 @@ func (m *ReplicationCursorRes) Reset() { *m = ReplicationCursorRes{} } func (m *ReplicationCursorRes) String() string { return proto.CompactTextString(m) } func (*ReplicationCursorRes) ProtoMessage() {} func (*ReplicationCursorRes) Descriptor() ([]byte, []int) { - return fileDescriptor_pdu_89315d819a6e0938, []int{15} + return fileDescriptor_pdu_83b7e2a28d820622, []int{15} } func (m *ReplicationCursorRes) XXX_Unmarshal(b []byte) error { return xxx_messageInfo_ReplicationCursorRes.Unmarshal(m, b) @@ -1079,6 +1079,83 @@ func _ReplicationCursorRes_OneofSizer(msg proto.Message) (n int) { return n } +type PingReq struct { + Message string `protobuf:"bytes,1,opt,name=Message,proto3" json:"Message,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *PingReq) Reset() { *m = PingReq{} } +func (m *PingReq) String() string { return proto.CompactTextString(m) } +func (*PingReq) ProtoMessage() {} +func (*PingReq) Descriptor() ([]byte, []int) { + return fileDescriptor_pdu_83b7e2a28d820622, []int{16} +} +func (m *PingReq) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_PingReq.Unmarshal(m, b) +} +func (m *PingReq) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_PingReq.Marshal(b, m, deterministic) +} +func (dst *PingReq) XXX_Merge(src proto.Message) { + xxx_messageInfo_PingReq.Merge(dst, src) +} +func (m *PingReq) XXX_Size() int { + return xxx_messageInfo_PingReq.Size(m) +} +func (m *PingReq) XXX_DiscardUnknown() { + xxx_messageInfo_PingReq.DiscardUnknown(m) +} + +var xxx_messageInfo_PingReq proto.InternalMessageInfo + +func (m *PingReq) GetMessage() string { + if m != nil { + return m.Message + } + return "" +} + +type PingRes struct { + // Echo must be PingReq.Message + Echo string `protobuf:"bytes,1,opt,name=Echo,proto3" json:"Echo,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *PingRes) Reset() { *m = PingRes{} } +func (m *PingRes) String() string { return proto.CompactTextString(m) } +func (*PingRes) ProtoMessage() {} +func (*PingRes) Descriptor() ([]byte, []int) { + return fileDescriptor_pdu_83b7e2a28d820622, []int{17} +} +func (m *PingRes) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_PingRes.Unmarshal(m, b) +} +func (m *PingRes) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_PingRes.Marshal(b, m, deterministic) +} +func (dst *PingRes) XXX_Merge(src proto.Message) { + xxx_messageInfo_PingRes.Merge(dst, src) +} +func (m *PingRes) XXX_Size() int { + return xxx_messageInfo_PingRes.Size(m) +} +func (m *PingRes) XXX_DiscardUnknown() { + xxx_messageInfo_PingRes.DiscardUnknown(m) +} + +var xxx_messageInfo_PingRes proto.InternalMessageInfo + +func (m *PingRes) GetEcho() string { + if m != nil { + return m.Echo + } + return "" +} + func init() { proto.RegisterType((*ListFilesystemReq)(nil), "ListFilesystemReq") proto.RegisterType((*ListFilesystemRes)(nil), "ListFilesystemRes") @@ -1098,6 +1175,8 @@ func init() { proto.RegisterType((*ReplicationCursorReq_GetOp)(nil), "ReplicationCursorReq.GetOp") proto.RegisterType((*ReplicationCursorReq_SetOp)(nil), "ReplicationCursorReq.SetOp") proto.RegisterType((*ReplicationCursorRes)(nil), "ReplicationCursorRes") + proto.RegisterType((*PingReq)(nil), "PingReq") + proto.RegisterType((*PingRes)(nil), "PingRes") proto.RegisterEnum("FilesystemVersion_VersionType", FilesystemVersion_VersionType_name, FilesystemVersion_VersionType_value) } @@ -1113,6 +1192,7 @@ const _ = grpc.SupportPackageIsVersion4 // // For semantics around ctx use and closing/ending streaming RPCs, please refer to https://godoc.org/google.golang.org/grpc#ClientConn.NewStream. type ReplicationClient interface { + Ping(ctx context.Context, in *PingReq, opts ...grpc.CallOption) (*PingRes, error) ListFilesystems(ctx context.Context, in *ListFilesystemReq, opts ...grpc.CallOption) (*ListFilesystemRes, error) ListFilesystemVersions(ctx context.Context, in *ListFilesystemVersionsReq, opts ...grpc.CallOption) (*ListFilesystemVersionsRes, error) DestroySnapshots(ctx context.Context, in *DestroySnapshotsReq, opts ...grpc.CallOption) (*DestroySnapshotsRes, error) @@ -1127,6 +1207,15 @@ func NewReplicationClient(cc *grpc.ClientConn) ReplicationClient { return &replicationClient{cc} } +func (c *replicationClient) Ping(ctx context.Context, in *PingReq, opts ...grpc.CallOption) (*PingRes, error) { + out := new(PingRes) + err := c.cc.Invoke(ctx, "/Replication/Ping", in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + func (c *replicationClient) ListFilesystems(ctx context.Context, in *ListFilesystemReq, opts ...grpc.CallOption) (*ListFilesystemRes, error) { out := new(ListFilesystemRes) err := c.cc.Invoke(ctx, "/Replication/ListFilesystems", in, out, opts...) @@ -1165,6 +1254,7 @@ func (c *replicationClient) ReplicationCursor(ctx context.Context, in *Replicati // ReplicationServer is the server API for Replication service. type ReplicationServer interface { + Ping(context.Context, *PingReq) (*PingRes, error) ListFilesystems(context.Context, *ListFilesystemReq) (*ListFilesystemRes, error) ListFilesystemVersions(context.Context, *ListFilesystemVersionsReq) (*ListFilesystemVersionsRes, error) DestroySnapshots(context.Context, *DestroySnapshotsReq) (*DestroySnapshotsRes, error) @@ -1175,6 +1265,24 @@ func RegisterReplicationServer(s *grpc.Server, srv ReplicationServer) { s.RegisterService(&_Replication_serviceDesc, srv) } +func _Replication_Ping_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(PingReq) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(ReplicationServer).Ping(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/Replication/Ping", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(ReplicationServer).Ping(ctx, req.(*PingReq)) + } + return interceptor(ctx, in, info, handler) +} + func _Replication_ListFilesystems_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { in := new(ListFilesystemReq) if err := dec(in); err != nil { @@ -1251,6 +1359,10 @@ var _Replication_serviceDesc = grpc.ServiceDesc{ ServiceName: "Replication", HandlerType: (*ReplicationServer)(nil), Methods: []grpc.MethodDesc{ + { + MethodName: "Ping", + Handler: _Replication_Ping_Handler, + }, { MethodName: "ListFilesystems", Handler: _Replication_ListFilesystems_Handler, @@ -1272,54 +1384,58 @@ var _Replication_serviceDesc = grpc.ServiceDesc{ Metadata: "pdu.proto", } -func init() { proto.RegisterFile("pdu.proto", fileDescriptor_pdu_89315d819a6e0938) } +func init() { proto.RegisterFile("pdu.proto", fileDescriptor_pdu_83b7e2a28d820622) } -var fileDescriptor_pdu_89315d819a6e0938 = []byte{ - // 735 bytes of a gzipped FileDescriptorProto - 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x8c, 0x55, 0xdd, 0x6e, 0xda, 0x4a, - 0x10, 0xc6, 0x60, 0xc0, 0x0c, 0x51, 0x42, 0x36, 0x9c, 0xc8, 0xc7, 0xe7, 0x28, 0x42, 0xdb, 0x1b, - 0x52, 0xa9, 0x6e, 0x45, 0x7b, 0x53, 0x55, 0xaa, 0x54, 0x42, 0x7e, 0xa4, 0x56, 0x69, 0xb4, 0xd0, - 0x28, 0xca, 0x1d, 0x0d, 0xa3, 0xc4, 0x0a, 0xb0, 0xce, 0xee, 0xba, 0x0a, 0xbd, 0xec, 0x7b, 0xf4, - 0x41, 0xfa, 0x0e, 0xbd, 0xec, 0x03, 0x55, 0xbb, 0x60, 0xe3, 0x60, 0x23, 0x71, 0xe5, 0xfd, 0xbe, - 0x9d, 0x9d, 0x9d, 0xf9, 0x76, 0x66, 0x0c, 0xb5, 0x70, 0x14, 0xf9, 0xa1, 0xe0, 0x8a, 0xd3, 0x3d, - 0xd8, 0xfd, 0x14, 0x48, 0x75, 0x12, 0x8c, 0x51, 0xce, 0xa4, 0xc2, 0x09, 0xc3, 0x07, 0x7a, 0x95, - 0x25, 0x25, 0x79, 0x01, 0xf5, 0x25, 0x21, 0x5d, 0xab, 0x55, 0x6a, 0xd7, 0x3b, 0x75, 0x3f, 0x65, - 0x94, 0xde, 0x27, 0x4d, 0x28, 0x1f, 0x4f, 0x42, 0x35, 0x73, 0x8b, 0x2d, 0xab, 0xed, 0xb0, 0x39, - 0xa0, 0x5d, 0x80, 0xa5, 0x11, 0x21, 0x60, 0x5f, 0x0c, 0xd5, 0x9d, 0x6b, 0xb5, 0xac, 0x76, 0x8d, - 0x99, 0x35, 0x69, 0x41, 0x9d, 0xa1, 0x8c, 0x26, 0x38, 0xe0, 0xf7, 0x38, 0x35, 0xa7, 0x6b, 0x2c, - 0x4d, 0xd1, 0x77, 0xf0, 0xef, 0xd3, 0xe8, 0x2e, 0x51, 0xc8, 0x80, 0x4f, 0x25, 0xc3, 0x07, 0x72, - 0x90, 0xbe, 0x60, 0xe1, 0x38, 0xc5, 0xd0, 0x8f, 0xeb, 0x0f, 0x4b, 0xe2, 0x83, 0x13, 0xc3, 0x45, - 0x7e, 0xc4, 0xcf, 0x58, 0xb2, 0xc4, 0x86, 0xfe, 0xb1, 0x60, 0x37, 0xb3, 0x4f, 0x3a, 0x60, 0x0f, - 0x66, 0x21, 0x9a, 0xcb, 0xb7, 0x3b, 0x07, 0x59, 0x0f, 0xfe, 0xe2, 0xab, 0xad, 0x98, 0xb1, 0xd5, - 0x4a, 0x9c, 0x0f, 0x27, 0xb8, 0x48, 0xd7, 0xac, 0x35, 0x77, 0x1a, 0x05, 0x23, 0xb7, 0xd4, 0xb2, - 0xda, 0x36, 0x33, 0x6b, 0xf2, 0x3f, 0xd4, 0x8e, 0x04, 0x0e, 0x15, 0x0e, 0xae, 0x4e, 0x5d, 0xdb, - 0x6c, 0x2c, 0x09, 0xe2, 0x81, 0x63, 0x40, 0xc0, 0xa7, 0x6e, 0xd9, 0x78, 0x4a, 0x30, 0x3d, 0x84, - 0x7a, 0xea, 0x5a, 0xb2, 0x05, 0x4e, 0x7f, 0x3a, 0x0c, 0xe5, 0x1d, 0x57, 0x8d, 0x82, 0x46, 0x5d, - 0xce, 0xef, 0x27, 0x43, 0x71, 0xdf, 0xb0, 0xe8, 0x2f, 0x0b, 0xaa, 0x7d, 0x9c, 0x8e, 0x36, 0xd0, - 0x53, 0x07, 0x79, 0x22, 0xf8, 0x24, 0x0e, 0x5c, 0xaf, 0xc9, 0x36, 0x14, 0x07, 0xdc, 0x84, 0x5d, - 0x63, 0xc5, 0x01, 0x5f, 0x7d, 0x52, 0x3b, 0xf3, 0xa4, 0x26, 0x70, 0x3e, 0x09, 0x05, 0x4a, 0x69, - 0x02, 0x77, 0x58, 0x82, 0x75, 0x21, 0xf5, 0x70, 0x14, 0x85, 0x6e, 0x65, 0x5e, 0x48, 0x06, 0x90, - 0x7d, 0xa8, 0xf4, 0xc4, 0x8c, 0x45, 0x53, 0xb7, 0x6a, 0xe8, 0x05, 0xa2, 0x6f, 0xc0, 0xb9, 0x10, - 0x3c, 0x44, 0xa1, 0x66, 0x89, 0xa8, 0x56, 0x4a, 0xd4, 0x26, 0x94, 0x2f, 0x87, 0xe3, 0x28, 0x56, - 0x7a, 0x0e, 0xe8, 0x8f, 0x24, 0x63, 0x49, 0xda, 0xb0, 0xf3, 0x45, 0xe2, 0x68, 0xb5, 0x08, 0x1d, - 0xb6, 0x4a, 0x13, 0x0a, 0x5b, 0xc7, 0x8f, 0x21, 0xde, 0x28, 0x1c, 0xf5, 0x83, 0xef, 0x68, 0x32, - 0x2e, 0xb1, 0x27, 0x1c, 0x39, 0x04, 0x58, 0xc4, 0x13, 0xa0, 0x74, 0x6d, 0x53, 0x54, 0x35, 0x3f, - 0x0e, 0x91, 0xa5, 0x36, 0xe9, 0x15, 0x00, 0xc3, 0x1b, 0x0c, 0xbe, 0xe1, 0x26, 0xc2, 0x3f, 0x87, - 0xc6, 0xd1, 0x18, 0x87, 0x22, 0x1b, 0x67, 0x86, 0xa7, 0x5b, 0x29, 0xcf, 0x92, 0xde, 0xc2, 0x5e, - 0x0f, 0xa5, 0x12, 0x7c, 0x16, 0x57, 0xc0, 0x26, 0x9d, 0x43, 0x5e, 0x41, 0x2d, 0xb1, 0x77, 0x8b, - 0x6b, 0xbb, 0x63, 0x69, 0x44, 0xaf, 0x81, 0xac, 0x5c, 0xb4, 0x68, 0xb2, 0x18, 0x9a, 0x5b, 0xd6, - 0x34, 0x59, 0x6c, 0x63, 0x06, 0x89, 0x10, 0x5c, 0xc4, 0x2f, 0x66, 0x00, 0xed, 0xe5, 0x25, 0xa1, - 0x87, 0x54, 0x55, 0x27, 0x3e, 0x56, 0x71, 0x03, 0xef, 0xf9, 0xd9, 0x10, 0x58, 0x6c, 0x43, 0x7f, - 0x5b, 0xd0, 0x64, 0x18, 0x8e, 0x83, 0x1b, 0xd3, 0x24, 0x47, 0x91, 0x90, 0x5c, 0x6c, 0x22, 0xc6, - 0x4b, 0x28, 0xdd, 0xa2, 0x32, 0x21, 0xd5, 0x3b, 0xff, 0xf9, 0x79, 0x3e, 0xfc, 0x53, 0x54, 0x9f, - 0xc3, 0xb3, 0x02, 0xd3, 0x96, 0xfa, 0x80, 0x44, 0x65, 0x4a, 0x64, 0xed, 0x81, 0x7e, 0x7c, 0x40, - 0xa2, 0xf2, 0xaa, 0x50, 0x36, 0x0e, 0xbc, 0x67, 0x50, 0x36, 0x1b, 0xba, 0x49, 0x12, 0xe1, 0xe6, - 0x5a, 0x24, 0xb8, 0x6b, 0x43, 0x91, 0x87, 0x74, 0x90, 0x9b, 0x8d, 0x6e, 0xa1, 0xf9, 0x24, 0xd1, - 0x79, 0xd8, 0x67, 0x85, 0x64, 0x96, 0x38, 0xe7, 0x5c, 0xe1, 0x63, 0x20, 0xe7, 0xfe, 0x9c, 0xb3, - 0x02, 0x4b, 0x98, 0xae, 0x03, 0x95, 0xb9, 0x4a, 0x9d, 0x9f, 0x45, 0xdd, 0xbf, 0x89, 0x5b, 0xf2, - 0x16, 0x76, 0x9e, 0x8e, 0x50, 0x49, 0x88, 0x9f, 0xf9, 0x89, 0x78, 0x59, 0x4e, 0x92, 0x0b, 0xd8, - 0xcf, 0x9f, 0xbe, 0xc4, 0xf3, 0xd7, 0xce, 0x74, 0x6f, 0xfd, 0x9e, 0x24, 0xef, 0xa1, 0xb1, 0x5a, - 0x07, 0xa4, 0xe9, 0xe7, 0xd4, 0xb7, 0x97, 0xc7, 0x4a, 0xf2, 0x01, 0x76, 0x33, 0x92, 0x91, 0x7f, - 0x72, 0xdf, 0xc7, 0xcb, 0xa5, 0x65, 0xb7, 0x7c, 0x5d, 0x0a, 0x47, 0xd1, 0xd7, 0x8a, 0xf9, 0xa1, - 0xbe, 0xfe, 0x1b, 0x00, 0x00, 0xff, 0xff, 0xa3, 0xba, 0x8e, 0x63, 0x5d, 0x07, 0x00, 0x00, +var fileDescriptor_pdu_83b7e2a28d820622 = []byte{ + // 785 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x8c, 0x55, 0xd1, 0x8e, 0xe3, 0x34, + 0x14, 0x9d, 0xb4, 0x69, 0x9b, 0xde, 0x0e, 0xbb, 0x1d, 0x4f, 0x59, 0x85, 0x00, 0xab, 0xca, 0xcb, + 0x43, 0x17, 0x89, 0x80, 0x0a, 0x2f, 0x08, 0x09, 0x89, 0x4e, 0x67, 0x67, 0x10, 0xb0, 0x54, 0x6e, + 0x59, 0xad, 0xf6, 0x2d, 0x34, 0x57, 0x6d, 0x34, 0x6d, 0x9d, 0xb5, 0x13, 0xb4, 0xe5, 0x91, 0xbf, + 0x9a, 0x7f, 0xe0, 0x91, 0x0f, 0x42, 0x76, 0xe3, 0x34, 0x6d, 0x52, 0xa9, 0x4f, 0xf1, 0x39, 0xf7, + 0xda, 0x3e, 0xf7, 0xd8, 0xd7, 0x81, 0x76, 0x1c, 0xa6, 0x7e, 0x2c, 0x78, 0xc2, 0xe9, 0x35, 0x5c, + 0xfd, 0x1a, 0xc9, 0xe4, 0x55, 0xb4, 0x42, 0xb9, 0x95, 0x09, 0xae, 0x19, 0xbe, 0xa7, 0xa3, 0x32, + 0x29, 0xc9, 0x57, 0xd0, 0xd9, 0x13, 0xd2, 0xb5, 0xfa, 0xf5, 0x41, 0x67, 0xd8, 0xf1, 0x0b, 0x49, + 0xc5, 0x38, 0x5d, 0x02, 0xec, 0x21, 0x21, 0x60, 0x4f, 0x82, 0x64, 0xe9, 0x5a, 0x7d, 0x6b, 0xd0, + 0x66, 0x7a, 0x4c, 0xfa, 0xd0, 0x61, 0x28, 0xd3, 0x35, 0xce, 0xf8, 0x03, 0x6e, 0xdc, 0x9a, 0x0e, + 0x15, 0x29, 0xf2, 0x05, 0x7c, 0xf4, 0xb3, 0x9c, 0xac, 0x82, 0x39, 0x2e, 0xf9, 0x2a, 0x44, 0xe1, + 0xd6, 0xfb, 0xd6, 0xc0, 0x61, 0x87, 0x24, 0xfd, 0x01, 0x3e, 0x39, 0x54, 0xfb, 0x06, 0x85, 0x8c, + 0xf8, 0x46, 0x32, 0x7c, 0x4f, 0x9e, 0x17, 0x65, 0x64, 0xdb, 0x17, 0x18, 0xfa, 0xcb, 0xe9, 0xc9, + 0x92, 0xf8, 0xe0, 0x18, 0x98, 0xd5, 0x4b, 0xfc, 0x52, 0x26, 0xcb, 0x73, 0xe8, 0x7f, 0x16, 0x5c, + 0x95, 0xe2, 0x64, 0x08, 0xf6, 0x6c, 0x1b, 0xa3, 0xde, 0xfc, 0xc9, 0xf0, 0x79, 0x79, 0x05, 0x3f, + 0xfb, 0xaa, 0x2c, 0xa6, 0x73, 0x95, 0x5f, 0xaf, 0x83, 0x35, 0x66, 0xa6, 0xe8, 0xb1, 0xe2, 0xee, + 0xd2, 0x28, 0xd4, 0x26, 0xd8, 0x4c, 0x8f, 0xc9, 0x67, 0xd0, 0xbe, 0x11, 0x18, 0x24, 0x38, 0x7b, + 0x7b, 0xe7, 0xda, 0x3a, 0xb0, 0x27, 0x88, 0x07, 0x8e, 0x06, 0x11, 0xdf, 0xb8, 0x0d, 0xbd, 0x52, + 0x8e, 0xe9, 0x4b, 0xe8, 0x14, 0xb6, 0x25, 0x97, 0xe0, 0x4c, 0x37, 0x41, 0x2c, 0x97, 0x3c, 0xe9, + 0x5e, 0x28, 0x34, 0xe2, 0xfc, 0x61, 0x1d, 0x88, 0x87, 0xae, 0x45, 0x1f, 0x2d, 0x68, 0x4d, 0x71, + 0x13, 0x9e, 0xe1, 0xa7, 0x12, 0xf9, 0x4a, 0xf0, 0xb5, 0x11, 0xae, 0xc6, 0xe4, 0x09, 0xd4, 0x66, + 0x5c, 0xcb, 0x6e, 0xb3, 0xda, 0x8c, 0x1f, 0x1f, 0xbc, 0x5d, 0x3e, 0x78, 0x25, 0x9c, 0xaf, 0x63, + 0x81, 0x52, 0x6a, 0xe1, 0x0e, 0xcb, 0x31, 0xe9, 0x41, 0x63, 0x8c, 0x61, 0x1a, 0xbb, 0x4d, 0x1d, + 0xd8, 0x01, 0xf2, 0x0c, 0x9a, 0x63, 0xb1, 0x65, 0xe9, 0xc6, 0x6d, 0x69, 0x3a, 0x43, 0xf4, 0x3b, + 0x70, 0x26, 0x82, 0xc7, 0x28, 0x92, 0x6d, 0x6e, 0xaa, 0x55, 0x30, 0xb5, 0x07, 0x8d, 0x37, 0xc1, + 0x2a, 0x35, 0x4e, 0xef, 0x00, 0xfd, 0x27, 0xaf, 0x58, 0x92, 0x01, 0x3c, 0xfd, 0x43, 0x62, 0x78, + 0x7c, 0x55, 0x1d, 0x76, 0x4c, 0x13, 0x0a, 0x97, 0xb7, 0x1f, 0x62, 0x9c, 0x27, 0x18, 0x4e, 0xa3, + 0xbf, 0x51, 0x57, 0x5c, 0x67, 0x07, 0x1c, 0x79, 0x09, 0x90, 0xe9, 0x89, 0x50, 0xba, 0xb6, 0xbe, + 0x54, 0x6d, 0xdf, 0x48, 0x64, 0x85, 0x20, 0x7d, 0x0b, 0xc0, 0x70, 0x8e, 0xd1, 0x5f, 0x78, 0x8e, + 0xf1, 0x5f, 0x42, 0xf7, 0x66, 0x85, 0x81, 0x28, 0xeb, 0x2c, 0xf1, 0xf4, 0xb2, 0xb0, 0xb2, 0xa4, + 0x0b, 0xb8, 0x1e, 0xa3, 0x4c, 0x04, 0xdf, 0x9a, 0x1b, 0x70, 0x4e, 0xe7, 0x90, 0x6f, 0xa0, 0x9d, + 0xe7, 0xbb, 0xb5, 0x93, 0xdd, 0xb1, 0x4f, 0xa2, 0xef, 0x80, 0x1c, 0x6d, 0x94, 0x35, 0x99, 0x81, + 0x7a, 0x97, 0x13, 0x4d, 0x66, 0x72, 0xd4, 0x89, 0xdd, 0x0a, 0xc1, 0x85, 0x39, 0x31, 0x0d, 0xe8, + 0xb8, 0xaa, 0x08, 0xf5, 0x68, 0xb5, 0x54, 0xe1, 0xab, 0xc4, 0x34, 0xf0, 0xb5, 0x5f, 0x96, 0xc0, + 0x4c, 0x0e, 0xfd, 0xd7, 0x82, 0x1e, 0xc3, 0x78, 0x15, 0xcd, 0x75, 0x93, 0xdc, 0xa4, 0x42, 0x72, + 0x71, 0x8e, 0x19, 0x5f, 0x43, 0x7d, 0x81, 0x89, 0x96, 0xd4, 0x19, 0x7e, 0xea, 0x57, 0xad, 0xe1, + 0xdf, 0x61, 0xf2, 0x7b, 0x7c, 0x7f, 0xc1, 0x54, 0xa6, 0x9a, 0x20, 0x31, 0xd1, 0x57, 0xe4, 0xe4, + 0x84, 0xa9, 0x99, 0x20, 0x31, 0xf1, 0x5a, 0xd0, 0xd0, 0x0b, 0x78, 0x2f, 0xa0, 0xa1, 0x03, 0xaa, + 0x49, 0x72, 0xe3, 0x76, 0x5e, 0xe4, 0x78, 0x64, 0x43, 0x8d, 0xc7, 0x74, 0x56, 0x59, 0x8d, 0x6a, + 0xa1, 0xdd, 0x4b, 0xa2, 0xea, 0xb0, 0xef, 0x2f, 0xf2, 0xb7, 0xc4, 0x79, 0xcd, 0x13, 0xfc, 0x10, + 0xc9, 0xdd, 0x7a, 0xce, 0xfd, 0x05, 0xcb, 0x99, 0x91, 0x03, 0xcd, 0x9d, 0x4b, 0xf4, 0x05, 0xb4, + 0x26, 0xd1, 0x66, 0xa1, 0x6c, 0x71, 0xa1, 0xf5, 0x1b, 0x4a, 0x19, 0x2c, 0x4c, 0x53, 0x19, 0x48, + 0x3f, 0x37, 0x49, 0x52, 0xb5, 0xdd, 0xed, 0x7c, 0xc9, 0x4d, 0xdb, 0xa9, 0xf1, 0xf0, 0xb1, 0xa6, + 0xde, 0x80, 0x5c, 0x1a, 0xf1, 0xc0, 0x56, 0xe9, 0xc4, 0xf1, 0xb3, 0xa5, 0x3d, 0x33, 0x92, 0xe4, + 0x7b, 0x78, 0x7a, 0xf8, 0x44, 0x4b, 0x42, 0xfc, 0xd2, 0x4f, 0xcb, 0x2b, 0x73, 0x92, 0x4c, 0xe0, + 0x59, 0xf5, 0xeb, 0x4e, 0x3c, 0xff, 0xe4, 0x3f, 0xc3, 0x3b, 0x1d, 0x93, 0xe4, 0x47, 0xe8, 0x1e, + 0xdf, 0x33, 0xd2, 0xf3, 0x2b, 0xfa, 0xc7, 0xab, 0x62, 0x25, 0xf9, 0x09, 0xae, 0x4a, 0x47, 0x42, + 0x3e, 0xae, 0x3c, 0x7f, 0xaf, 0x92, 0x96, 0xa3, 0xc6, 0xbb, 0x7a, 0x1c, 0xa6, 0x7f, 0x36, 0xf5, + 0x0f, 0xfc, 0xdb, 0xff, 0x03, 0x00, 0x00, 0xff, 0xff, 0x37, 0x0e, 0xf2, 0xe4, 0xcd, 0x07, 0x00, + 0x00, } diff --git a/replication/pdu/pdu.proto b/replication/logic/pdu/pdu.proto similarity index 94% rename from replication/pdu/pdu.proto rename to replication/logic/pdu/pdu.proto index 1b66916..2097af5 100644 --- a/replication/pdu/pdu.proto +++ b/replication/logic/pdu/pdu.proto @@ -2,6 +2,7 @@ syntax = "proto3"; option go_package = "pdu"; service Replication { + rpc Ping (PingReq) returns (PingRes); rpc ListFilesystems (ListFilesystemReq) returns (ListFilesystemRes); rpc ListFilesystemVersions (ListFilesystemVersionsReq) returns (ListFilesystemVersionsRes); rpc DestroySnapshots (DestroySnapshotsReq) returns (DestroySnapshotsRes); @@ -13,12 +14,12 @@ message ListFilesystemReq {} message ListFilesystemRes { repeated Filesystem Filesystems = 1; - bool Empty = 2; } message Filesystem { string Path = 1; string ResumeToken = 2; + bool IsPlaceholder = 3; } message ListFilesystemVersionsReq { @@ -120,3 +121,12 @@ message ReplicationCursorRes { bool Notexist = 2; } } + +message PingReq { + string Message = 1; +} + +message PingRes { + // Echo must be PingReq.Message + string Echo = 1; +} \ No newline at end of file diff --git a/replication/pdu/pdu_extras.go b/replication/logic/pdu/pdu_extras.go similarity index 100% rename from replication/pdu/pdu_extras.go rename to replication/logic/pdu/pdu_extras.go diff --git a/replication/pdu/pdu_test.go b/replication/logic/pdu/pdu_test.go similarity index 100% rename from replication/pdu/pdu_test.go rename to replication/logic/pdu/pdu_test.go diff --git a/replication/logic/replication_logic.go b/replication/logic/replication_logic.go new file mode 100644 index 0000000..906b610 --- /dev/null +++ b/replication/logic/replication_logic.go @@ -0,0 +1,495 @@ +package logic + +import ( + "context" + "errors" + "fmt" + "sync" + "time" + + "github.com/prometheus/client_golang/prometheus" + + "github.com/zrepl/zrepl/replication/driver" + . "github.com/zrepl/zrepl/replication/logic/diff" + "github.com/zrepl/zrepl/replication/logic/pdu" + "github.com/zrepl/zrepl/replication/report" + "github.com/zrepl/zrepl/util/bytecounter" + "github.com/zrepl/zrepl/zfs" +) + +// Endpoint represents one side of the replication. +// +// An endpoint is either in Sender or Receiver mode, represented by the correspondingly +// named interfaces defined in this package. +type Endpoint interface { + // Does not include placeholder filesystems + ListFilesystems(ctx context.Context, req *pdu.ListFilesystemReq) (*pdu.ListFilesystemRes, error) + ListFilesystemVersions(ctx context.Context, req *pdu.ListFilesystemVersionsReq) (*pdu.ListFilesystemVersionsRes, error) + DestroySnapshots(ctx context.Context, req *pdu.DestroySnapshotsReq) (*pdu.DestroySnapshotsRes, error) + WaitForConnectivity(ctx context.Context) (error) +} + +type Sender interface { + Endpoint + // If a non-nil io.ReadCloser is returned, it is guaranteed to be closed before + // any next call to the parent github.com/zrepl/zrepl/replication.Endpoint. + // If the send request is for dry run the io.ReadCloser will be nil + Send(ctx context.Context, r *pdu.SendReq) (*pdu.SendRes, zfs.StreamCopier, error) + ReplicationCursor(ctx context.Context, req *pdu.ReplicationCursorReq) (*pdu.ReplicationCursorRes, error) +} + +type Receiver interface { + Endpoint + // Receive sends r and sendStream (the latter containing a ZFS send stream) + // to the parent github.com/zrepl/zrepl/replication.Endpoint. + Receive(ctx context.Context, req *pdu.ReceiveReq, receive zfs.StreamCopier) (*pdu.ReceiveRes, error) +} + +type Planner struct { + sender Sender + receiver Receiver + + promSecsPerState *prometheus.HistogramVec // labels: state + promBytesReplicated *prometheus.CounterVec // labels: filesystem +} + +func (p *Planner) Plan(ctx context.Context) ([]driver.FS, error) { + fss, err := p.doPlanning(ctx) + if err != nil { + return nil, err + } + dfss := make([]driver.FS, len(fss)) + for i := range dfss { + dfss[i] = fss[i] + } + return dfss, nil +} + +func (p *Planner) WaitForConnectivity(ctx context.Context) error { + var wg sync.WaitGroup + doPing := func(endpoint Endpoint, errOut *error) { + defer wg.Done() + err := endpoint.WaitForConnectivity(ctx) + if err != nil { + *errOut = err + } else { + *errOut = nil + } + } + wg.Add(2) + var senderErr, receiverErr error + go doPing(p.sender, &senderErr) + go doPing(p.receiver, &receiverErr) + wg.Wait() + if senderErr == nil && receiverErr == nil { + return nil + } else if senderErr != nil && receiverErr != nil { + if senderErr.Error() == receiverErr.Error() { + return fmt.Errorf("sender and receiver are not reachable: %s", senderErr.Error()) + } else { + return fmt.Errorf("sender and receiver are not reachable:\n sender: %s\n receiver: %s", senderErr, receiverErr) + } + } else { + var side string + var err *error + if senderErr != nil { + side = "sender" + err = &senderErr + } else { + side = "receiver" + err = &receiverErr + } + return fmt.Errorf("%s is not reachable: %s", side, *err) + } +} + +type Filesystem struct { + sender Sender + receiver Receiver + + Path string // compat + receiverFS *pdu.Filesystem + promBytesReplicated prometheus.Counter // compat +} + +func (f *Filesystem) EqualToPreviousAttempt(other driver.FS) bool { + g, ok := other.(*Filesystem) + if !ok { + return false + } + // TODO: use GUIDs (issued by zrepl, not those from ZFS) + return f.Path == g.Path +} + +func (f *Filesystem) PlanFS(ctx context.Context) ([]driver.Step, error) { + steps, err := f.doPlanning(ctx) + if err != nil { + return nil, err + } + dsteps := make([]driver.Step, len(steps)) + for i := range dsteps { + dsteps[i] = steps[i] + } + return dsteps, nil +} +func (f *Filesystem) ReportInfo() *report.FilesystemInfo { + return &report.FilesystemInfo{Name: f.Path} // FIXME compat name +} + +type Step struct { + sender Sender + receiver Receiver + + parent *Filesystem + from, to *pdu.FilesystemVersion // compat + + byteCounter bytecounter.StreamCopier + expectedSize int64 // 0 means no size estimate present / possible +} + +func (s *Step) TargetEquals(other driver.Step) bool { + t, ok := other.(*Step) + if !ok { + return false + } + if !s.parent.EqualToPreviousAttempt(t.parent) { + panic("Step interface promise broken: parent filesystems must be same") + } + return s.from.GetGuid() == t.from.GetGuid() && + s.to.GetGuid() == t.to.GetGuid() +} + +func (s *Step) TargetDate() time.Time { + return s.to.SnapshotTime() // FIXME compat name +} + +func (s *Step) Step(ctx context.Context) error { + return s.doReplication(ctx) +} + +func (s *Step) ReportInfo() *report.StepInfo { + var byteCounter int64 + if s.byteCounter != nil { + byteCounter = s.byteCounter.Count() + } + // FIXME stick to zfs convention of from and to + from := "" + if s.from != nil { + from = s.from.RelName() + } + return &report.StepInfo{ + From: from, + To: s.to.RelName(), + BytesExpected: s.expectedSize, + BytesReplicated: byteCounter, + } +} + +func NewPlanner(secsPerState *prometheus.HistogramVec, bytesReplicated *prometheus.CounterVec, sender Sender, receiver Receiver) *Planner { + return &Planner{ + sender: sender, + receiver: receiver, + promSecsPerState: secsPerState, + promBytesReplicated: bytesReplicated, + } +} +func resolveConflict(conflict error) (path []*pdu.FilesystemVersion, msg string) { + if noCommonAncestor, ok := conflict.(*ConflictNoCommonAncestor); ok { + if len(noCommonAncestor.SortedReceiverVersions) == 0 { + // TODO this is hard-coded replication policy: most recent snapshot as source + var mostRecentSnap *pdu.FilesystemVersion + for n := len(noCommonAncestor.SortedSenderVersions) - 1; n >= 0; n-- { + if noCommonAncestor.SortedSenderVersions[n].Type == pdu.FilesystemVersion_Snapshot { + mostRecentSnap = noCommonAncestor.SortedSenderVersions[n] + break + } + } + if mostRecentSnap == nil { + return nil, "no snapshots available on sender side" + } + return []*pdu.FilesystemVersion{mostRecentSnap}, fmt.Sprintf("start replication at most recent snapshot %s", mostRecentSnap.RelName()) + } + } + return nil, "no automated way to handle conflict type" +} + +func (p *Planner) doPlanning(ctx context.Context) ([]*Filesystem, error) { + + log := getLogger(ctx) + + log.Info("start planning") + + slfssres, err := p.sender.ListFilesystems(ctx, &pdu.ListFilesystemReq{}) + if err != nil { + log.WithError(err).WithField("errType", fmt.Sprintf("%T", err)).Error("error listing sender filesystems") + return nil, err + } + sfss := slfssres.GetFilesystems() + // no progress here since we could run in a live-lock on connectivity issues + + rlfssres, err := p.receiver.ListFilesystems(ctx, &pdu.ListFilesystemReq{}) + if err != nil { + log.WithError(err).WithField("errType", fmt.Sprintf("%T", err)).Error("error listing receiver filesystems") + return nil, err + } + rfss := rlfssres.GetFilesystems() + + q := make([]*Filesystem, 0, len(sfss)) + for _, fs := range sfss { + + var receiverFS *pdu.Filesystem + for _, rfs := range rfss { + if rfs.Path == fs.Path { + receiverFS = rfs + } + } + + ctr := p.promBytesReplicated.WithLabelValues(fs.Path) + + q = append(q, &Filesystem{ + sender: p.sender, + receiver: p.receiver, + Path: fs.Path, + receiverFS: receiverFS, + promBytesReplicated: ctr, + }) + } + + return q, nil +} + +func (fs *Filesystem) doPlanning(ctx context.Context) ([]*Step, error) { + + log := getLogger(ctx).WithField("filesystem", fs.Path) + + log.Debug("assessing filesystem") + + sfsvsres, err := fs.sender.ListFilesystemVersions(ctx, &pdu.ListFilesystemVersionsReq{Filesystem: fs.Path}) + if err != nil { + log.WithError(err).Error("cannot get remote filesystem versions") + return nil, err + } + sfsvs := sfsvsres.GetVersions() + + if len(sfsvs) < 1 { + err := errors.New("sender does not have any versions") + log.Error(err.Error()) + return nil, err + } + + var rfsvs []*pdu.FilesystemVersion + if fs.receiverFS != nil && !fs.receiverFS.GetIsPlaceholder() { + rfsvsres, err := fs.receiver.ListFilesystemVersions(ctx, &pdu.ListFilesystemVersionsReq{Filesystem: fs.Path}) + if err != nil { + log.WithError(err).Error("receiver error") + return nil, err + } + rfsvs = rfsvsres.GetVersions() + } else { + rfsvs = []*pdu.FilesystemVersion{} + } + + path, conflict := IncrementalPath(rfsvs, sfsvs) + if conflict != nil { + var msg string + path, msg = resolveConflict(conflict) // no shadowing allowed! + if path != nil { + log.WithField("conflict", conflict).Info("conflict") + log.WithField("resolution", msg).Info("automatically resolved") + } else { + log.WithField("conflict", conflict).Error("conflict") + log.WithField("problem", msg).Error("cannot resolve conflict") + } + } + if len(path) == 0 { + return nil, conflict + } + + steps := make([]*Step, 0, len(path)) + // FIXME unify struct declarations => initializer? + if len(path) == 1 { + steps = append(steps, &Step{ + parent: fs, + sender: fs.sender, + receiver: fs.receiver, + from: nil, + to: path[0], + }) + } else { + for i := 0; i < len(path)-1; i++ { + steps = append(steps, &Step{ + parent: fs, + sender: fs.sender, + receiver: fs.receiver, + from: path[i], + to: path[i+1], + }) + } + } + + log.Debug("compute send size estimate") + errs := make(chan error, len(steps)) + var wg sync.WaitGroup + fanOutCtx, fanOutCancel := context.WithCancel(ctx) + defer fanOutCancel() + for _, step := range steps { + wg.Add(1) + go func(step *Step) { + defer wg.Done() + err := step.updateSizeEstimate(fanOutCtx) + if err != nil { + log.WithError(err).WithField("step", step).Error("error computing size estimate") + fanOutCancel() + } + errs <- err + }(step) + } + wg.Wait() + close(errs) + var significantErr error = nil + for err := range errs { + if err != nil { + if significantErr == nil || significantErr == context.Canceled { + significantErr = err + } + } + } + if significantErr != nil { + return nil, significantErr + } + + log.Debug("filesystem planning finished") + return steps, nil +} + +// type FilesystemsReplicationFailedError struct { +// FilesystemsWithError []*fsrep.Replication +// } + +// func (e FilesystemsReplicationFailedError) Error() string { +// allSame := true +// lastErr := e.FilesystemsWithError[0].Err().Error() +// for _, fs := range e.FilesystemsWithError { +// fsErr := fs.Err().Error() +// allSame = allSame && lastErr == fsErr +// } + +// fsstr := "multiple filesystems" +// if len(e.FilesystemsWithError) == 1 { +// fsstr = fmt.Sprintf("filesystem %s", e.FilesystemsWithError[0].FS()) +// } +// errorStr := lastErr +// if !allSame { +// errorStr = "multiple different errors" +// } +// return fmt.Sprintf("%s could not be replicated: %s", fsstr, errorStr) +// } + +func (s *Step) updateSizeEstimate(ctx context.Context) error { + + log := getLogger(ctx) + + sr := s.buildSendRequest(true) + + log.Debug("initiate dry run send request") + sres, _, err := s.sender.Send(ctx, sr) + if err != nil { + log.WithError(err).Error("dry run send request failed") + return err + } + s.expectedSize = sres.ExpectedSize + return nil +} + +func (s *Step) buildSendRequest(dryRun bool) (sr *pdu.SendReq) { + fs := s.parent.Path + if s.from == nil { + sr = &pdu.SendReq{ + Filesystem: fs, + To: s.to.RelName(), + DryRun: dryRun, + } + } else { + sr = &pdu.SendReq{ + Filesystem: fs, + From: s.from.RelName(), + To: s.to.RelName(), + DryRun: dryRun, + } + } + return sr +} + +func (s *Step) doReplication(ctx context.Context) error { + + fs := s.parent.Path + + log := getLogger(ctx) + sr := s.buildSendRequest(false) + + log.Debug("initiate send request") + sres, sstreamCopier, err := s.sender.Send(ctx, sr) + if err != nil { + log.WithError(err).Error("send request failed") + return err + } + if sstreamCopier == nil { + err := errors.New("send request did not return a stream, broken endpoint implementation") + return err + } + defer sstreamCopier.Close() + + // Install a byte counter to track progress + for status report + s.byteCounter = bytecounter.NewStreamCopier(sstreamCopier) + defer func() { + s.parent.promBytesReplicated.Add(float64(s.byteCounter.Count())) + }() + + rr := &pdu.ReceiveReq{ + Filesystem: fs, + ClearResumeToken: !sres.UsedResumeToken, + } + log.Debug("initiate receive request") + _, err = s.receiver.Receive(ctx, rr, s.byteCounter) + if err != nil { + log. + WithError(err). + WithField("errType", fmt.Sprintf("%T", err)). + Error("receive request failed (might also be error on sender)") + // This failure could be due to + // - an unexpected exit of ZFS on the sending side + // - an unexpected exit of ZFS on the receiving side + // - a connectivity issue + return err + } + log.Debug("receive finished") + + log.Debug("advance replication cursor") + req := &pdu.ReplicationCursorReq{ + Filesystem: fs, + Op: &pdu.ReplicationCursorReq_Set{ + Set: &pdu.ReplicationCursorReq_SetOp{ + Snapshot: s.to.GetName(), + }, + }, + } + _, err = s.sender.ReplicationCursor(ctx, req) + if err != nil { + log.WithError(err).Error("error advancing replication cursor") + // If this fails and replication planning restarts, the diff algorithm will find + // that cursor out of place. This is not a problem because then, it would just use another FS + // However, we FIXME have no means to just update the cursor in a + // second replication attempt right after this one where we don't have new snaps yet + return err + } + + return err +} + +func (s *Step) String() string { + if s.from == nil { // FIXME: ZFS semantics are that to is nil on non-incremental send + return fmt.Sprintf("%s%s (full)", s.parent.Path, s.to.RelName()) + } else { + return fmt.Sprintf("%s(%s => %s)", s.parent.Path, s.from.RelName(), s.to.RelName()) + } +} diff --git a/replication/context.go b/replication/logic/replication_logic_context.go similarity index 81% rename from replication/context.go rename to replication/logic/replication_logic_context.go index 7e43981..8102c9c 100644 --- a/replication/context.go +++ b/replication/logic/replication_logic_context.go @@ -1,9 +1,9 @@ -package replication +package logic import ( "context" + "github.com/zrepl/zrepl/logger" - "github.com/zrepl/zrepl/replication/fsrep" ) type contextKey int @@ -16,7 +16,6 @@ type Logger = logger.Logger func WithLogger(ctx context.Context, l Logger) context.Context { ctx = context.WithValue(ctx, contextKeyLog, l) - ctx = fsrep.WithLogger(ctx, l) return ctx } diff --git a/replication/mainfsm.go b/replication/mainfsm.go deleted file mode 100644 index 5cf1d7b..0000000 --- a/replication/mainfsm.go +++ /dev/null @@ -1,560 +0,0 @@ -// Package replication implements replication of filesystems with existing -// versions (snapshots) from a sender to a receiver. -package replication - -import ( - "context" - "errors" - "fmt" - "github.com/prometheus/client_golang/prometheus" - "github.com/zrepl/zrepl/daemon/job/wakeup" - "github.com/zrepl/zrepl/util/envconst" - "github.com/zrepl/zrepl/util/watchdog" - "math/bits" - "net" - "sort" - "sync" - "time" - - "github.com/zrepl/zrepl/replication/fsrep" - . "github.com/zrepl/zrepl/replication/internal/diff" - "github.com/zrepl/zrepl/replication/pdu" -) - -//go:generate enumer -type=State -type State uint - -const ( - Planning State = 1 << iota - PlanningError - Working - WorkingWait - Completed - PermanentError -) - -func (s State) rsf() state { - idx := bits.TrailingZeros(uint(s)) - if idx == bits.UintSize { - panic(s) // invalid value - } - m := []state{ - statePlanning, - statePlanningError, - stateWorking, - stateWorkingWait, - nil, - nil, - } - return m[idx] -} - -func (s State) IsTerminal() bool { - return s.rsf() == nil -} - -// Replication implements the replication of multiple file systems from a Sender to a Receiver. -// -// It is a state machine that is driven by the Drive method -// and provides asynchronous reporting via the Report method (i.e. from another goroutine). -type Replication struct { - // not protected by lock - promSecsPerState *prometheus.HistogramVec // labels: state - promBytesReplicated *prometheus.CounterVec // labels: filesystem - - Progress watchdog.KeepAlive - - // lock protects all fields of this struct (but not the fields behind pointers!) - lock sync.Mutex - - state State - - // Working, WorkingWait, Completed, ContextDone - queue []*fsrep.Replication - completed []*fsrep.Replication - active *fsrep.Replication // == queue[0] or nil, unlike in Report - - // for PlanningError, WorkingWait and ContextError and Completed - err error - - // PlanningError, WorkingWait - sleepUntil time.Time -} - -type Report struct { - Status string - Problem string - SleepUntil time.Time - Completed []*fsrep.Report - Pending []*fsrep.Report - Active *fsrep.Report // not contained in Pending, unlike in struct Replication -} - -func NewReplication(secsPerState *prometheus.HistogramVec, bytesReplicated *prometheus.CounterVec) *Replication { - r := Replication{ - promSecsPerState: secsPerState, - promBytesReplicated: bytesReplicated, - state: Planning, - } - return &r -} - -// Endpoint represents one side of the replication. -// -// An endpoint is either in Sender or Receiver mode, represented by the correspondingly -// named interfaces defined in this package. -type Endpoint interface { - // Does not include placeholder filesystems - ListFilesystems(ctx context.Context, req *pdu.ListFilesystemReq) (*pdu.ListFilesystemRes, error) - ListFilesystemVersions(ctx context.Context, req *pdu.ListFilesystemVersionsReq) (*pdu.ListFilesystemVersionsRes, error) - DestroySnapshots(ctx context.Context, req *pdu.DestroySnapshotsReq) (*pdu.DestroySnapshotsRes, error) -} - -type Sender interface { - Endpoint - fsrep.Sender -} - -type Receiver interface { - Endpoint - fsrep.Receiver -} - -type FilteredError struct{ fs string } - -func NewFilteredError(fs string) *FilteredError { - return &FilteredError{fs} -} - -func (f FilteredError) Error() string { return "endpoint does not allow access to filesystem " + f.fs } - -type updater func(func(*Replication)) (newState State) -type state func(ctx context.Context, ka *watchdog.KeepAlive, sender Sender, receiver Receiver, u updater) state - -// Drive starts the state machine and returns only after replication has finished (with or without errors). -// The Logger in ctx is used for both debug and error logging, but is not guaranteed to be stable -// or end-user friendly. -// User-facing replication progress reports and can be obtained using the Report method, -// whose output will not change after Drive returns. -// -// FIXME: Drive may be only called once per instance of Replication -func (r *Replication) Drive(ctx context.Context, sender Sender, receiver Receiver) { - - var u updater = func(f func(*Replication)) State { - r.lock.Lock() - defer r.lock.Unlock() - if f != nil { - f(r) - } - return r.state - } - - var s state = statePlanning - var pre, post State - for s != nil { - preTime := time.Now() - pre = u(nil) - s = s(ctx, &r.Progress, sender, receiver, u) - delta := time.Now().Sub(preTime) - r.promSecsPerState.WithLabelValues(pre.String()).Observe(delta.Seconds()) - post = u(nil) - getLogger(ctx). - WithField("transition", fmt.Sprintf("%s => %s", pre, post)). - WithField("duration", delta). - Debug("main state transition") - if post == Working && pre != post { - getLogger(ctx).Info("start working") - } - } - - getLogger(ctx). - WithField("final_state", post). - Debug("main final state") -} - -func resolveConflict(conflict error) (path []*pdu.FilesystemVersion, msg string) { - if noCommonAncestor, ok := conflict.(*ConflictNoCommonAncestor); ok { - if len(noCommonAncestor.SortedReceiverVersions) == 0 { - // TODO this is hard-coded replication policy: most recent snapshot as source - var mostRecentSnap *pdu.FilesystemVersion - for n := len(noCommonAncestor.SortedSenderVersions) - 1; n >= 0; n-- { - if noCommonAncestor.SortedSenderVersions[n].Type == pdu.FilesystemVersion_Snapshot { - mostRecentSnap = noCommonAncestor.SortedSenderVersions[n] - break - } - } - if mostRecentSnap == nil { - return nil, "no snapshots available on sender side" - } - return []*pdu.FilesystemVersion{mostRecentSnap}, fmt.Sprintf("start replication at most recent snapshot %s", mostRecentSnap.RelName()) - } - } - return nil, "no automated way to handle conflict type" -} - -var RetryInterval = envconst.Duration("ZREPL_REPLICATION_RETRY_INTERVAL", 10 * time.Second) - -type Error interface { - error - Temporary() bool -} - -var _ Error = fsrep.Error(nil) -var _ Error = net.Error(nil) - -func isPermanent(err error) bool { - if e, ok := err.(Error); ok { - return !e.Temporary() - } - return true -} - -func statePlanning(ctx context.Context, ka *watchdog.KeepAlive, sender Sender, receiver Receiver, u updater) state { - - log := getLogger(ctx) - - log.Info("start planning") - - handlePlanningError := func(err error) state { - return u(func(r *Replication) { - ge := GlobalError{Err: err, Temporary: !isPermanent(err)} - log.WithError(ge).Error("encountered global error while planning replication") - r.err = ge - if !ge.Temporary { - r.state = PermanentError - } else { - r.sleepUntil = time.Now().Add(RetryInterval) - r.state = PlanningError - } - }).rsf() - } - - slfssres, err := sender.ListFilesystems(ctx, &pdu.ListFilesystemReq{}) - if err != nil { - log.WithError(err).WithField("errType", fmt.Sprintf("%T", err)).Error("error listing sender filesystems") - return handlePlanningError(err) - } - sfss := slfssres.GetFilesystems() - // no progress here since we could run in a live-lock on connectivity issues - - rlfssres, err := receiver.ListFilesystems(ctx, &pdu.ListFilesystemReq{}) - if err != nil { - log.WithError(err).WithField("errType", fmt.Sprintf("%T", err)).Error("error listing receiver filesystems") - return handlePlanningError(err) - } - rfss := rlfssres.GetFilesystems() - ka.MadeProgress() // for both sender and receiver - - q := make([]*fsrep.Replication, 0, len(sfss)) - mainlog := log - for _, fs := range sfss { - - log := mainlog.WithField("filesystem", fs.Path) - - log.Debug("assessing filesystem") - - sfsvsres, err := sender.ListFilesystemVersions(ctx, &pdu.ListFilesystemVersionsReq{Filesystem: fs.Path}) - if err != nil { - log.WithError(err).Error("cannot get remote filesystem versions") - return handlePlanningError(err) - } - sfsvs := sfsvsres.GetVersions() - ka.MadeProgress() - - if len(sfsvs) < 1 { - err := errors.New("sender does not have any versions") - log.Error(err.Error()) - q = append(q, fsrep.NewReplicationConflictError(fs.Path, err)) - continue - } - - receiverFSExists := false - for _, rfs := range rfss { - if rfs.Path == fs.Path { - receiverFSExists = true - } - } - - var rfsvs []*pdu.FilesystemVersion - if receiverFSExists { - rfsvsres, err := receiver.ListFilesystemVersions(ctx, &pdu.ListFilesystemVersionsReq{Filesystem: fs.Path}) - if err != nil { - if _, ok := err.(*FilteredError); ok { - log.Info("receiver ignores filesystem") - continue - } - log.WithError(err).Error("receiver error") - return handlePlanningError(err) - } - rfsvs = rfsvsres.GetVersions() - } else { - rfsvs = []*pdu.FilesystemVersion{} - } - ka.MadeProgress() - - path, conflict := IncrementalPath(rfsvs, sfsvs) - if conflict != nil { - var msg string - path, msg = resolveConflict(conflict) // no shadowing allowed! - if path != nil { - log.WithField("conflict", conflict).Info("conflict") - log.WithField("resolution", msg).Info("automatically resolved") - } else { - log.WithField("conflict", conflict).Error("conflict") - log.WithField("problem", msg).Error("cannot resolve conflict") - } - } - ka.MadeProgress() - if path == nil { - q = append(q, fsrep.NewReplicationConflictError(fs.Path, conflict)) - continue - } - - var promBytesReplicated *prometheus.CounterVec - u(func(replication *Replication) { // FIXME args struct like in pruner (also use for sender and receiver) - promBytesReplicated = replication.promBytesReplicated - }) - fsrfsm := fsrep.BuildReplication(fs.Path, promBytesReplicated.WithLabelValues(fs.Path)) - if len(path) == 1 { - fsrfsm.AddStep(nil, path[0]) - } else { - for i := 0; i < len(path)-1; i++ { - fsrfsm.AddStep(path[i], path[i+1]) - } - } - qitem := fsrfsm.Done() - ka.MadeProgress() - - log.Debug("compute send size estimate") - if err = qitem.UpdateSizeEsitmate(ctx, sender); err != nil { - log.WithError(err).Error("error computing size estimate") - return handlePlanningError(err) - } - ka.MadeProgress() - - q = append(q, qitem) - } - - ka.MadeProgress() - - return u(func(r *Replication) { - r.completed = nil - r.queue = q - r.err = nil - r.state = Working - }).rsf() -} - -func statePlanningError(ctx context.Context, ka *watchdog.KeepAlive, sender Sender, receiver Receiver, u updater) state { - var sleepUntil time.Time - u(func(r *Replication) { - sleepUntil = r.sleepUntil - }) - t := time.NewTimer(sleepUntil.Sub(time.Now())) - getLogger(ctx).WithField("until", sleepUntil).Info("retry wait after planning error") - defer t.Stop() - select { - case <-ctx.Done(): - return u(func(r *Replication) { - r.state = PermanentError - r.err = ctx.Err() - }).rsf() - case <-t.C: - case <-wakeup.Wait(ctx): - } - return u(func(r *Replication) { - r.state = Planning - }).rsf() -} - -type GlobalError struct { - Err error - Temporary bool -} - -func (e GlobalError) Error() string { - errClass := "temporary" - if !e.Temporary { - errClass = "permanent" - } - return fmt.Sprintf("%s global error: %s", errClass, e.Err) -} - -type FilesystemsReplicationFailedError struct { - FilesystemsWithError []*fsrep.Replication -} - -func (e FilesystemsReplicationFailedError) Error() string { - allSame := true - lastErr := e.FilesystemsWithError[0].Err().Error() - for _, fs := range e.FilesystemsWithError { - fsErr := fs.Err().Error() - allSame = allSame && lastErr == fsErr - } - - fsstr := "multiple filesystems" - if len(e.FilesystemsWithError) == 1 { - fsstr = fmt.Sprintf("filesystem %s", e.FilesystemsWithError[0].FS()) - } - errorStr := lastErr - if !allSame { - errorStr = "multiple different errors" - } - return fmt.Sprintf("%s could not be replicated: %s", fsstr, errorStr) -} - -func stateWorking(ctx context.Context, ka *watchdog.KeepAlive, sender Sender, receiver Receiver, u updater) state { - - var active *fsrep.Replication - rsfNext := u(func(r *Replication) { - - r.err = nil - - newq := make([]*fsrep.Replication, 0, len(r.queue)) - for i := range r.queue { - if r.queue[i].CanRetry() { - newq = append(newq, r.queue[i]) - } else { - r.completed = append(r.completed, r.queue[i]) - } - } - sort.SliceStable(newq, func(i, j int) bool { - return newq[i].NextStepDate().Before(newq[j].NextStepDate()) - }) - r.queue = newq - - if len(r.queue) == 0 { - r.state = Completed - fsWithErr := FilesystemsReplicationFailedError{ // prepare it - FilesystemsWithError: make([]*fsrep.Replication, 0, len(r.completed)), - } - for _, fs := range r.completed { - if fs.CanRetry() { - panic(fmt.Sprintf("implementation error: completed contains retryable FS %s %#v", - fs.FS(), fs.Err())) - } - if fs.Err() != nil { - fsWithErr.FilesystemsWithError = append(fsWithErr.FilesystemsWithError, fs) - } - } - if len(fsWithErr.FilesystemsWithError) > 0 { - r.err = fsWithErr - r.state = PermanentError - } - return - } - - active = r.queue[0] // do not dequeue: if it's done, it will be sorted the next time we check for more work - r.active = active - }).rsf() - - if active == nil { - return rsfNext - } - - activeCtx := fsrep.WithLogger(ctx, getLogger(ctx).WithField("fs", active.FS())) - err := active.Retry(activeCtx, ka, sender, receiver) - u(func(r *Replication) { - r.active = nil - }).rsf() - - if err != nil { - if err.ContextErr() && ctx.Err() != nil { - getLogger(ctx).WithError(err). - Info("filesystem replication was cancelled") - u(func(r*Replication) { - r.err = GlobalError{Err: err, Temporary: false} - r.state = PermanentError - }) - } else if err.LocalToFS() { - getLogger(ctx).WithError(err). - Error("filesystem replication encountered a filesystem-specific error") - // we stay in this state and let the queuing logic above de-prioritize this failing FS - } else if err.Temporary() { - getLogger(ctx).WithError(err). - Error("filesystem encountered a non-filesystem-specific temporary error, enter retry-wait") - u(func(r *Replication) { - r.err = GlobalError{Err: err, Temporary: true} - r.sleepUntil = time.Now().Add(RetryInterval) - r.state = WorkingWait - }).rsf() - } else { - getLogger(ctx).WithError(err). - Error("encountered a permanent non-filesystem-specific error") - u(func(r *Replication) { - r.err = GlobalError{Err: err, Temporary: false} - r.state = PermanentError - }).rsf() - } - } - - return u(nil).rsf() -} - -func stateWorkingWait(ctx context.Context, ka *watchdog.KeepAlive, sender Sender, receiver Receiver, u updater) state { - var sleepUntil time.Time - u(func(r *Replication) { - sleepUntil = r.sleepUntil - }) - t := time.NewTimer(RetryInterval) - getLogger(ctx).WithField("until", sleepUntil).Info("retry wait after error") - defer t.Stop() - select { - case <-ctx.Done(): - return u(func(r *Replication) { - r.state = PermanentError - r.err = ctx.Err() - }).rsf() - - case <-t.C: - case <-wakeup.Wait(ctx): - } - return u(func(r *Replication) { - r.state = Working - }).rsf() -} - -// Report provides a summary of the progress of the Replication, -// i.e., a condensed dump of the internal state machine. -// Report is safe to be called asynchronously while Drive is running. -func (r *Replication) Report() *Report { - r.lock.Lock() - defer r.lock.Unlock() - - rep := Report{ - Status: r.state.String(), - SleepUntil: r.sleepUntil, - } - - if r.err != nil { - rep.Problem = r.err.Error() - } - - if r.state&(Planning|PlanningError) != 0 { - return &rep - } - - rep.Pending = make([]*fsrep.Report, 0, len(r.queue)) - rep.Completed = make([]*fsrep.Report, 0, len(r.completed)) // room for active (potentially) - - // since r.active == r.queue[0], do not contain it in pending output - pending := r.queue - if r.active != nil { - rep.Active = r.active.Report() - pending = r.queue[1:] - } - for _, fsr := range pending { - rep.Pending= append(rep.Pending, fsr.Report()) - } - for _, fsr := range r.completed { - rep.Completed = append(rep.Completed, fsr.Report()) - } - - return &rep -} - -func (r *Replication) State() State { - r.lock.Lock() - defer r.lock.Unlock() - return r.state -} diff --git a/replication/replication.go b/replication/replication.go new file mode 100644 index 0000000..7f9e35b --- /dev/null +++ b/replication/replication.go @@ -0,0 +1,13 @@ +// Package replication implements replication of filesystems with existing +// versions (snapshots) from a sender to a receiver. +package replication + +import ( + "context" + + "github.com/zrepl/zrepl/replication/driver" +) + +func Do(ctx context.Context, planner driver.Planner) (driver.ReportFunc, driver.WaitFunc) { + return driver.Do(ctx, planner) +} diff --git a/replication/report/replication_report.go b/replication/report/replication_report.go new file mode 100644 index 0000000..7f8f605 --- /dev/null +++ b/replication/report/replication_report.go @@ -0,0 +1,152 @@ +package report + +import ( + "encoding/json" + "time" +) + +type Report struct { + StartAt, FinishAt time.Time + WaitReconnectSince, WaitReconnectUntil time.Time + WaitReconnectError *TimedError + Attempts []*AttemptReport +} + +var _, _ = json.Marshal(&Report{}) + +type TimedError struct { + Err string + Time time.Time +} + +func NewTimedError(err string, t time.Time) *TimedError { + if err == "" { + panic("error must be empty") + } + if t.IsZero() { + panic("t must be non-zero") + } + return &TimedError{err, t} +} + +func (s *TimedError) Error() string { + return s.Err +} + +var _, _ = json.Marshal(&TimedError{}) + +type AttemptReport struct { + State AttemptState + StartAt, FinishAt time.Time + PlanError *TimedError + Filesystems []*FilesystemReport +} + +type AttemptState string + +const ( + AttemptPlanning AttemptState = "planning" + AttemptPlanningError AttemptState = "planning-error" + AttemptFanOutFSs AttemptState = "fan-out-filesystems" + AttemptFanOutError AttemptState = "filesystem-error" + AttemptDone AttemptState = "done" +) + +type FilesystemState string + +const ( + FilesystemPlanning FilesystemState = "planning" + FilesystemPlanningErrored FilesystemState = "planning-error" + FilesystemStepping FilesystemState = "stepping" + FilesystemSteppingErrored FilesystemState = "step-error" + FilesystemDone FilesystemState = "done" +) + +type FilesystemReport struct { + Info *FilesystemInfo + + State FilesystemState + + // Valid in State = FilesystemPlanningErrored + PlanError *TimedError + // Valid in State = FilesystemSteppingErrored + StepError *TimedError + + // Valid in State = FilesystemStepping + CurrentStep int + Steps []*StepReport +} + +type FilesystemInfo struct { + Name string +} + +type StepReport struct { + Info *StepInfo +} + +type StepInfo struct { + From, To string + BytesExpected int64 + BytesReplicated int64 +} + +func (a *AttemptReport) BytesSum() (expected, replicated int64) { + for _, fs := range a.Filesystems { + e, r := fs.BytesSum() + expected += e + replicated += r + } + return expected, replicated +} + +func (f *FilesystemReport) BytesSum() (expected, replicated int64) { + for _, step := range f.Steps { + expected += step.Info.BytesExpected + replicated += step.Info.BytesReplicated + } + return +} + +func (f *AttemptReport) FilesystemsByState() map[FilesystemState][]*FilesystemReport { + r := make(map[FilesystemState][]*FilesystemReport, 4) + for _, fs := range f.Filesystems { + l := r[fs.State] + l = append(l, fs) + r[fs.State] = l + } + return r +} + +func (f *FilesystemReport) Error() *TimedError { + switch f.State { + case FilesystemPlanningErrored: + return f.PlanError + case FilesystemSteppingErrored: + return f.StepError + } + return nil +} + +// may return nil +func (f *FilesystemReport) NextStep() *StepReport { + switch f.State { + case FilesystemDone: + return nil + case FilesystemPlanningErrored: + return nil + case FilesystemSteppingErrored: + return nil + case FilesystemPlanning: + return nil + case FilesystemStepping: + // invariant is that this is always correct + // TODO what about 0-length Steps but short intermediary state? + return f.Steps[f.CurrentStep] + } + panic("unreachable") +} + +func (f *StepReport) IsIncremental() bool { + return f.Info.From != "" // FIXME change to ZFS semantics (To != "") +} diff --git a/replication/state_enumer.go b/replication/state_enumer.go deleted file mode 100644 index 9708fff..0000000 --- a/replication/state_enumer.go +++ /dev/null @@ -1,76 +0,0 @@ -// Code generated by "enumer -type=State"; DO NOT EDIT. - -package replication - -import ( - "fmt" -) - -const ( - _StateName_0 = "PlanningPlanningError" - _StateName_1 = "Working" - _StateName_2 = "WorkingWait" - _StateName_3 = "Completed" - _StateName_4 = "PermanentError" -) - -var ( - _StateIndex_0 = [...]uint8{0, 8, 21} - _StateIndex_1 = [...]uint8{0, 7} - _StateIndex_2 = [...]uint8{0, 11} - _StateIndex_3 = [...]uint8{0, 9} - _StateIndex_4 = [...]uint8{0, 14} -) - -func (i State) String() string { - switch { - case 1 <= i && i <= 2: - i -= 1 - return _StateName_0[_StateIndex_0[i]:_StateIndex_0[i+1]] - case i == 4: - return _StateName_1 - case i == 8: - return _StateName_2 - case i == 16: - return _StateName_3 - case i == 32: - return _StateName_4 - default: - return fmt.Sprintf("State(%d)", i) - } -} - -var _StateValues = []State{1, 2, 4, 8, 16, 32} - -var _StateNameToValueMap = map[string]State{ - _StateName_0[0:8]: 1, - _StateName_0[8:21]: 2, - _StateName_1[0:7]: 4, - _StateName_2[0:11]: 8, - _StateName_3[0:9]: 16, - _StateName_4[0:14]: 32, -} - -// StateString retrieves an enum value from the enum constants string name. -// Throws an error if the param is not part of the enum. -func StateString(s string) (State, error) { - if val, ok := _StateNameToValueMap[s]; ok { - return val, nil - } - return 0, fmt.Errorf("%s does not belong to State values", s) -} - -// StateValues returns all values of the enum -func StateValues() []State { - return _StateValues -} - -// IsAState returns "true" if the value is listed in the enum definition. "false" otherwise -func (i State) IsAState() bool { - for _, v := range _StateValues { - if i == v { - return true - } - } - return false -} diff --git a/rpc/dataconn/dataconn_client.go b/rpc/dataconn/dataconn_client.go index a12292b..8473d97 100644 --- a/rpc/dataconn/dataconn_client.go +++ b/rpc/dataconn/dataconn_client.go @@ -7,7 +7,7 @@ import ( "strings" "github.com/golang/protobuf/proto" - "github.com/zrepl/zrepl/replication/pdu" + "github.com/zrepl/zrepl/replication/logic/pdu" "github.com/zrepl/zrepl/rpc/dataconn/stream" "github.com/zrepl/zrepl/transport" "github.com/zrepl/zrepl/zfs" @@ -213,3 +213,23 @@ func (c *Client) ReqRecv(ctx context.Context, req *pdu.ReceiveReq, streamCopier return res.res, cause } + + +func (c *Client) ReqPing(ctx context.Context, req *pdu.PingReq) (*pdu.PingRes, error) { + conn, err := c.getWire(ctx) + if err != nil { + return nil, err + } + defer c.putWire(conn) + + if err := c.send(ctx, conn, EndpointPing, req, nil); err != nil { + return nil, err + } + + var res pdu.PingRes + if err := c.recv(ctx, conn, &res); err != nil { + return nil, err + } + + return &res, nil +} \ No newline at end of file diff --git a/rpc/dataconn/dataconn_server.go b/rpc/dataconn/dataconn_server.go index 41f5781..ea6700b 100644 --- a/rpc/dataconn/dataconn_server.go +++ b/rpc/dataconn/dataconn_server.go @@ -7,7 +7,7 @@ import ( "github.com/golang/protobuf/proto" "github.com/zrepl/zrepl/logger" - "github.com/zrepl/zrepl/replication/pdu" + "github.com/zrepl/zrepl/replication/logic/pdu" "github.com/zrepl/zrepl/rpc/dataconn/stream" "github.com/zrepl/zrepl/transport" "github.com/zrepl/zrepl/zfs" @@ -25,6 +25,8 @@ type Handler interface { // It is guaranteed that Server calls Receive with a stream that holds the IdleConnTimeout // configured in ServerConfig.Shared.IdleConnTimeout. Receive(ctx context.Context, r *pdu.ReceiveReq, receive zfs.StreamCopier) (*pdu.ReceiveRes, error) + // PingDataconn handles a PingReq + PingDataconn(ctx context.Context, r *pdu.PingReq) (*pdu.PingRes, error) } type Logger = logger.Logger @@ -125,6 +127,13 @@ func (s *Server) serveConn(nc *transport.AuthConn) { return } res, handlerErr = s.h.Receive(ctx, &req, &streamCopier{streamConn: c, closeStreamOnClose: false}) // SHADOWING + case EndpointPing: + var req pdu.PingReq + if err := proto.Unmarshal(reqStructured, &req); err != nil { + s.log.WithError(err).Error("cannot unmarshal ping request") + return + } + res, handlerErr = s.h.PingDataconn(ctx, &req) // SHADOWING default: s.log.WithField("endpoint", endpoint).Error("unknown endpoint") handlerErr = fmt.Errorf("requested endpoint does not exist") @@ -137,12 +146,17 @@ func (s *Server) serveConn(nc *transport.AuthConn) { // if marshaling fails. We consider failed marshaling a handler error var protobuf *bytes.Buffer if handlerErr == nil { - protobufBytes, err := proto.Marshal(res) - if err != nil { - s.log.WithError(err).Error("cannot marshal handler protobuf") - handlerErr = err + if res == nil { + handlerErr = fmt.Errorf("implementation error: handler for endpoint %q returns nil error and nil result", endpoint) + s.log.WithError(err).Error("handle implementation error") + } else { + protobufBytes, err := proto.Marshal(res) + if err != nil { + s.log.WithError(err).Error("cannot marshal handler protobuf") + handlerErr = err + } + protobuf = bytes.NewBuffer(protobufBytes) // SHADOWING } - protobuf = bytes.NewBuffer(protobufBytes) // SHADOWING } var resHeaderBuf bytes.Buffer diff --git a/rpc/dataconn/dataconn_shared.go b/rpc/dataconn/dataconn_shared.go index 0ea5a34..43ccf92 100644 --- a/rpc/dataconn/dataconn_shared.go +++ b/rpc/dataconn/dataconn_shared.go @@ -10,6 +10,7 @@ import ( ) const ( + EndpointPing string = "/v1/ping" EndpointSend string = "/v1/send" EndpointRecv string = "/v1/recv" ) diff --git a/rpc/dataconn/microbenchmark/microbenchmark.go b/rpc/dataconn/microbenchmark/microbenchmark.go index 287f7e1..0e11a8e 100644 --- a/rpc/dataconn/microbenchmark/microbenchmark.go +++ b/rpc/dataconn/microbenchmark/microbenchmark.go @@ -24,7 +24,7 @@ import ( "github.com/pkg/profile" "github.com/zrepl/zrepl/logger" - "github.com/zrepl/zrepl/replication/pdu" + "github.com/zrepl/zrepl/replication/logic/pdu" "github.com/zrepl/zrepl/rpc/dataconn" "github.com/zrepl/zrepl/rpc/dataconn/timeoutconn" "github.com/zrepl/zrepl/transport" @@ -77,6 +77,12 @@ func (devNullHandler) Receive(ctx context.Context, r *pdu.ReceiveReq, stream zfs return &res, err } +func (devNullHandler) PingDataconn(ctx context.Context, r *pdu.PingReq) (*pdu.PingRes, error) { + return &pdu.PingRes{ + Echo: r.GetMessage(), + }, nil +} + type tcpConnecter struct { addr string } diff --git a/rpc/grpcclientidentity/authlistener_grpc_adaptor.go b/rpc/grpcclientidentity/authlistener_grpc_adaptor.go index accd124..3c62cf4 100644 --- a/rpc/grpcclientidentity/authlistener_grpc_adaptor.go +++ b/rpc/grpcclientidentity/authlistener_grpc_adaptor.go @@ -105,11 +105,12 @@ func NewInterceptors(logger Logger, clientIdentityKey interface{}) (unary grpc.U if !ok { panic("peer.FromContext expected to return a peer in grpc.UnaryServerInterceptor") } - logger.WithField("peer", fmt.Sprintf("%v", p)).Debug("peer") + logger.WithField("peer_addr", fmt.Sprintf("%s", p.Addr)).Debug("peer addr") a, ok := p.AuthInfo.(*authConnAuthType) if !ok { panic(fmt.Sprintf("NewInterceptors must be used in combination with grpc.NewTransportCredentials, but got auth type %T", p.AuthInfo)) } + logger.WithField("peer_client_identity", a.clientIdentity).Debug("peer client identity") ctx = context.WithValue(ctx, clientIdentityKey, a.clientIdentity) return handler(ctx, req) } diff --git a/rpc/grpcclientidentity/grpchelper/authlistener_grpc_adaptor_wrapper.go b/rpc/grpcclientidentity/grpchelper/authlistener_grpc_adaptor_wrapper.go index 7e46681..1b9f3a2 100644 --- a/rpc/grpcclientidentity/grpchelper/authlistener_grpc_adaptor_wrapper.go +++ b/rpc/grpcclientidentity/grpchelper/authlistener_grpc_adaptor_wrapper.go @@ -36,7 +36,7 @@ func ClientConn(cn transport.Connecter, log Logger) *grpc.ClientConn { }) dialerOption := grpc.WithDialer(grpcclientidentity.NewDialer(log, cn)) cred := grpc.WithTransportCredentials(grpcclientidentity.NewTransportCredentials(log)) - ctx, cancel := context.WithTimeout(context.TODO(), 5*time.Second) + ctx, cancel := context.WithTimeout(context.TODO(), 5*time.Second) // FIXME constant defer cancel() cc, err := grpc.DialContext(ctx, "doesn't matter done by dialer", dialerOption, cred, ka) if err != nil { diff --git a/rpc/rpc_client.go b/rpc/rpc_client.go index efcaf98..818cd44 100644 --- a/rpc/rpc_client.go +++ b/rpc/rpc_client.go @@ -2,13 +2,18 @@ package rpc import ( "context" + "errors" + "fmt" "net" + "sync" + "sync/atomic" "time" "google.golang.org/grpc" - "github.com/zrepl/zrepl/replication" - "github.com/zrepl/zrepl/replication/pdu" + "github.com/google/uuid" + "github.com/zrepl/zrepl/replication/logic" + "github.com/zrepl/zrepl/replication/logic/pdu" "github.com/zrepl/zrepl/rpc/dataconn" "github.com/zrepl/zrepl/rpc/grpcclientidentity/grpchelper" "github.com/zrepl/zrepl/rpc/versionhandshake" @@ -24,11 +29,12 @@ type Client struct { controlClient pdu.ReplicationClient // this the grpc client instance, see constructor controlConn *grpc.ClientConn loggers Loggers + closed chan struct{} } -var _ replication.Endpoint = &Client{} -var _ replication.Sender = &Client{} -var _ replication.Receiver = &Client{} +var _ logic.Endpoint = &Client{} +var _ logic.Sender = &Client{} +var _ logic.Receiver = &Client{} type DialContextFunc = func(ctx context.Context, network string, addr string) (net.Conn, error) @@ -41,14 +47,21 @@ func NewClient(cn transport.Connecter, loggers Loggers) *Client { c := &Client{ loggers: loggers, + closed: make(chan struct{}), } grpcConn := grpchelper.ClientConn(muxedConnecter.control, loggers.Control) go func() { - for { + ctx, cancel := context.WithCancel(context.Background()) + go func() { + <-c.closed + cancel() + }() + defer cancel() + for ctx.Err() == nil { state := grpcConn.GetState() loggers.General.WithField("grpc_state", state.String()).Debug("grpc state change") - grpcConn.WaitForStateChange(context.TODO(), state) + grpcConn.WaitForStateChange(ctx, state) } }() c.controlClient = pdu.NewReplicationClient(grpcConn) @@ -59,8 +72,9 @@ func NewClient(cn transport.Connecter, loggers Loggers) *Client { } func (c *Client) Close() { + close(c.closed) if err := c.controlConn.Close(); err != nil { - c.loggers.General.WithError(err).Error("cannot cloe control connection") + c.loggers.General.WithError(err).Error("cannot close control connection") } // TODO c.dataClient should have Close() } @@ -101,6 +115,72 @@ func (c *Client) ReplicationCursor(ctx context.Context, in *pdu.ReplicationCurso return c.controlClient.ReplicationCursor(ctx, in) } +func (c *Client) WaitForConnectivity(ctx context.Context) error { + ctx, cancel := context.WithCancel(ctx) + defer cancel() + msg := uuid.New().String() + req := pdu.PingReq{Message: msg} + var ctrlOk, dataOk int32 + loggers := GetLoggersOrPanic(ctx) + var wg sync.WaitGroup + wg.Add(2) + checkRes := func(res *pdu.PingRes, err error, logger Logger, okVar *int32) { + if err == nil && res.GetEcho() != req.GetMessage() { + err = errors.New("pilot message not echoed correctly") + } + if err == context.Canceled { + err = nil + } + if err != nil { + logger.WithError(err).Error("ping failed") + atomic.StoreInt32(okVar, 0) + cancel() + } else { + atomic.StoreInt32(okVar, 1) + } + } + go func() { + defer wg.Done() + ctrl, ctrlErr := c.controlClient.Ping(ctx, &req, grpc.FailFast(false)) + checkRes(ctrl, ctrlErr, loggers.Control, &ctrlOk) + }() + go func() { + defer wg.Done() + for ctx.Err() == nil { + data, dataErr := c.dataClient.ReqPing(ctx, &req) + // dataClient uses transport.Connecter, which doesn't expose FailFast(false) + // => we need to mask dial timeouts + if err, ok := dataErr.(interface{ Temporary() bool }); ok && err.Temporary() { + // Rate-limit pings here in case Temporary() is a mis-classification + // or returns immediately (this is a tight loop in that case) + // TODO keep this in lockstep with controlClient + // => don't use FailFast for control, but check that both control and data worked + time.Sleep(envconst.Duration("ZREPL_RPC_DATACONN_PING_SLEEP", 1*time.Second)) + continue + } + // it's not a dial timeout, + checkRes(data, dataErr, loggers.Data, &dataOk) + return + } + }() + wg.Wait() + var what string + if ctrlOk == 1 && dataOk == 1 { + return nil + } + if ctrlOk == 0 { + what += "control" + } + if dataOk == 0 { + if len(what) > 0 { + what += " and data" + } else { + what += "data" + } + } + return fmt.Errorf("%s rpc failed to respond to ping rpcs", what) +} + func (c *Client) ResetConnectBackoff() { c.controlConn.ResetConnectBackoff() } diff --git a/rpc/rpc_server.go b/rpc/rpc_server.go index 3abbc18..f0f0f6b 100644 --- a/rpc/rpc_server.go +++ b/rpc/rpc_server.go @@ -7,7 +7,7 @@ import ( "google.golang.org/grpc" "github.com/zrepl/zrepl/endpoint" - "github.com/zrepl/zrepl/replication/pdu" + "github.com/zrepl/zrepl/replication/logic/pdu" "github.com/zrepl/zrepl/rpc/dataconn" "github.com/zrepl/zrepl/rpc/grpcclientidentity" "github.com/zrepl/zrepl/rpc/netadaptor" diff --git a/rpc/transportmux/transportmux.go b/rpc/transportmux/transportmux.go index cb6f7ca..f78c1e3 100644 --- a/rpc/transportmux/transportmux.go +++ b/rpc/transportmux/transportmux.go @@ -7,10 +7,10 @@ package transportmux import ( "context" + "fmt" "io" "net" "time" - "fmt" "github.com/zrepl/zrepl/logger" "github.com/zrepl/zrepl/transport" @@ -111,7 +111,7 @@ func Demux(ctx context.Context, rawListener transport.AuthenticatedListener, lab if ctx.Err() != nil { return } - getLog(ctx).WithError(err).Error("accept error") + getLog(ctx).WithError(err).WithField("errType", fmt.Sprintf("%T", err)).Error("accept error") continue } closeConn := func() { diff --git a/rpc/versionhandshake/versionhandshake.go b/rpc/versionhandshake/versionhandshake.go index 3864868..03835ee 100644 --- a/rpc/versionhandshake/versionhandshake.go +++ b/rpc/versionhandshake/versionhandshake.go @@ -26,14 +26,22 @@ type HandshakeError struct { msg string // If not nil, the underlying IO error that caused the handshake to fail. IOError error + isAcceptError bool } var _ net.Error = &HandshakeError{} func (e HandshakeError) Error() string { return e.msg } -// Always true to enable usage in a net.Listener. -func (e HandshakeError) Temporary() bool { return true } +// Like with net.OpErr (Go issue 6163), a client failing to handshake +// should be a temporary Accept error toward the Listener . +func (e HandshakeError) Temporary() bool { + if e.isAcceptError { + return true + } + te, ok := e.IOError.(interface{ Temporary() bool }); + return ok && te.Temporary() +} // If the underlying IOError was net.Error.Timeout(), Timeout() returns that value. // Otherwise false. @@ -142,14 +150,14 @@ func (m *HandshakeMessage) DecodeReader(r io.Reader, maxLen int) error { return nil } -func DoHandshakeCurrentVersion(conn net.Conn, deadline time.Time) error { +func DoHandshakeCurrentVersion(conn net.Conn, deadline time.Time) *HandshakeError { // current protocol version is hardcoded here return DoHandshakeVersion(conn, deadline, 1) } const HandshakeMessageMaxLen = 16 * 4096 -func DoHandshakeVersion(conn net.Conn, deadline time.Time, version int) error { +func DoHandshakeVersion(conn net.Conn, deadline time.Time, version int) *HandshakeError { ours := HandshakeMessage{ ProtocolVersion: version, Extensions: nil, diff --git a/rpc/versionhandshake/versionhandshake_transport_wrappers.go b/rpc/versionhandshake/versionhandshake_transport_wrappers.go index 660215e..09ead7a 100644 --- a/rpc/versionhandshake/versionhandshake_transport_wrappers.go +++ b/rpc/versionhandshake/versionhandshake_transport_wrappers.go @@ -55,6 +55,7 @@ func (l HandshakeListener) Accept(ctx context.Context) (*transport.AuthConn, err dl = time.Now().Add(l.timeout) // shadowing } if err := DoHandshakeCurrentVersion(conn, dl); err != nil { + err.isAcceptError = true conn.Close() return nil, err } diff --git a/util/chainlock/chainlock.go b/util/chainlock/chainlock.go new file mode 100644 index 0000000..6e7b5e5 --- /dev/null +++ b/util/chainlock/chainlock.go @@ -0,0 +1,42 @@ +// package chainlock implements a mutex whose Lock and Unlock +// methods return the lock itself, to enable chaining. +// +// Intended Usage +// +// defer s.lock().unlock() +// // drop lock while waiting for wait group +// func() { +// defer a.l.Unlock().Lock() +// fssesDone.Wait() +// }() +// +package chainlock + +import "sync" + +type L struct { + mtx sync.Mutex +} + +func New() *L { + return &L{} +} + +func (l *L) Lock() *L { + l.mtx.Lock() + return l +} + +func (l *L) Unlock() *L { + l.mtx.Unlock() + return l +} + +func (l *L) NewCond() *sync.Cond { + return sync.NewCond(&l.mtx) +} + +func (l *L) DropWhile(f func()) { + defer l.Unlock().Lock() + f() +} \ No newline at end of file diff --git a/util/envconst/envconst.go b/util/envconst/envconst.go index 8c13190..44bc9b8 100644 --- a/util/envconst/envconst.go +++ b/util/envconst/envconst.go @@ -40,3 +40,19 @@ func Int64(varname string, def int64) int64 { cache.Store(varname, d) return d } + +func Bool(varname string, def bool) bool { + if v, ok := cache.Load(varname); ok { + return v.(bool) + } + e := os.Getenv(varname) + if e == "" { + return def + } + d, err := strconv.ParseBool(e) + if err != nil { + panic(err) + } + cache.Store(varname, d) + return d +} diff --git a/zfs/conflict_string.go b/zfs/conflict_string.go deleted file mode 100644 index fa3452c..0000000 --- a/zfs/conflict_string.go +++ /dev/null @@ -1,16 +0,0 @@ -// Code generated by "stringer -type=Conflict"; DO NOT EDIT. - -package zfs - -import "strconv" - -const _Conflict_name = "ConflictIncrementalConflictAllRightConflictNoCommonAncestorConflictDiverged" - -var _Conflict_index = [...]uint8{0, 19, 35, 59, 75} - -func (i Conflict) String() string { - if i < 0 || i >= Conflict(len(_Conflict_index)-1) { - return "Conflict(" + strconv.FormatInt(int64(i), 10) + ")" - } - return _Conflict_name[_Conflict_index[i]:_Conflict_index[i+1]] -} diff --git a/zfs/diff.go b/zfs/diff.go deleted file mode 100644 index 52eb84f..0000000 --- a/zfs/diff.go +++ /dev/null @@ -1,284 +0,0 @@ -package zfs - -import ( - "bytes" - "crypto/sha512" - "encoding/hex" - "fmt" - "io" - "os/exec" - "sort" -) - -type fsbyCreateTXG []FilesystemVersion - -func (l fsbyCreateTXG) Len() int { return len(l) } -func (l fsbyCreateTXG) Swap(i, j int) { l[i], l[j] = l[j], l[i] } -func (l fsbyCreateTXG) Less(i, j int) bool { - return l[i].CreateTXG < l[j].CreateTXG -} - -//go:generate stringer -type=Conflict -type Conflict int - -const ( - ConflictIncremental Conflict = iota // no conflict, incremental repl possible - ConflictAllRight // no conflict, initial repl possible - ConflictNoCommonAncestor - ConflictDiverged -) - -/* The receiver (left) wants to know if the sender (right) has more recent versions - - Left : | C | - Right: | A | B | C | D | E | - => : | C | D | E | - - Left: | C | - Right: | D | E | - => : , no common ancestor - - Left : | C | D | E | - Right: | A | B | C | - => : , the left has newer versions - - Left : | A | B | C | | F | - Right: | C | D | E | - => : | C | | F | => diverged => - -IMPORTANT: since ZFS currently does not export dataset UUIDs, the best heuristic to - identify a filesystem version is the tuple (name,creation) -*/ -type FilesystemDiff struct { - - // Which kind of conflict / "way forward" is possible. - // Check this first to determine the semantics of this struct's remaining members - Conflict Conflict - - // Conflict = Incremental | AllRight - // The incremental steps required to get left up to right's most recent version - // 0th element is the common ancestor, ordered by birthtime, oldest first - // If len() < 2, left and right are at same most recent version - // Conflict = otherwise - // nil; there is no incremental path for left to get to right's most recent version - IncrementalPath []FilesystemVersion - - // Conflict = Incremental | AllRight: nil - // Conflict = NoCommonAncestor: left as passed as input - // Conflict = Diverged: contains path from left most recent common ancestor (mrca) to most - // recent version on left - MRCAPathLeft []FilesystemVersion - // Conflict = Incremental | AllRight: nil - // Conflict = NoCommonAncestor: right as passed as input - // Conflict = Diverged: contains path from right most recent common ancestor (mrca) - // to most recent version on right - MRCAPathRight []FilesystemVersion -} - -func (f FilesystemDiff) String() (str string) { - var b bytes.Buffer - - fmt.Fprintf(&b, "%s, ", f.Conflict) - - switch f.Conflict { - case ConflictIncremental: - fmt.Fprintf(&b, "incremental path length %v, common ancestor at %s", len(f.IncrementalPath)-1, f.IncrementalPath[0]) - case ConflictAllRight: - fmt.Fprintf(&b, "%v versions, most recent is %s", len(f.MRCAPathRight)-1, f.MRCAPathRight[len(f.MRCAPathRight)-1]) - case ConflictDiverged: - fmt.Fprintf(&b, "diverged at %s", f.MRCAPathRight[0]) // right always has at least one snap...? - case ConflictNoCommonAncestor: - fmt.Fprintf(&b, "no diff to show") - default: - fmt.Fprintf(&b, "unknown conflict type, likely a bug") - } - - return b.String() -} - -// we must assume left and right are ordered ascendingly by ZFS_PROP_CREATETXG and that -// names are unique (bas ZFS_PROP_GUID replacement) -func MakeFilesystemDiff(left, right []FilesystemVersion) (diff FilesystemDiff) { - - if right == nil { - panic("right must not be nil") - } - if left == nil { - diff = FilesystemDiff{ - IncrementalPath: nil, - Conflict: ConflictAllRight, - MRCAPathLeft: left, - MRCAPathRight: right, - } - return - } - - // Assert both left and right are sorted by createtxg - { - var leftSorted, rightSorted fsbyCreateTXG - leftSorted = left - rightSorted = right - if !sort.IsSorted(leftSorted) { - panic("cannot make filesystem diff: unsorted left") - } - if !sort.IsSorted(rightSorted) { - panic("cannot make filesystem diff: unsorted right") - } - } - - // Find most recent common ancestor by name, preferring snapshots over bookmarks - mrcaLeft := len(left) - 1 - var mrcaRight int -outer: - for ; mrcaLeft >= 0; mrcaLeft-- { - for i := len(right) - 1; i >= 0; i-- { - if left[mrcaLeft].Guid == right[i].Guid { - mrcaRight = i - if i-1 >= 0 && right[i-1].Guid == right[i].Guid && right[i-1].Type == Snapshot { - // prefer snapshots over bookmarks - mrcaRight = i - 1 - } - break outer - } - } - } - - // no common ancestor? - if mrcaLeft == -1 { - diff = FilesystemDiff{ - IncrementalPath: nil, - Conflict: ConflictNoCommonAncestor, - MRCAPathLeft: left, - MRCAPathRight: right, - } - return - } - - // diverged? - if mrcaLeft != len(left)-1 { - diff = FilesystemDiff{ - IncrementalPath: nil, - Conflict: ConflictDiverged, - MRCAPathLeft: left[mrcaLeft:], - MRCAPathRight: right[mrcaRight:], - } - return - } - - if mrcaLeft != len(left)-1 { - panic("invariant violated: mrca on left must be the last item in the left list") - } - - // incPath must not contain bookmarks except initial one, - // and only if that initial bookmark's snapshot is gone - incPath := make([]FilesystemVersion, 0, len(right)) - incPath = append(incPath, right[mrcaRight]) - // right[mrcaRight] may be a bookmark if there's no equally named snapshot - for i := mrcaRight + 1; i < len(right); i++ { - if right[i].Type != Bookmark { - incPath = append(incPath, right[i]) - } - } - - diff = FilesystemDiff{ - IncrementalPath: incPath, - } - return -} - -const ZREPL_PLACEHOLDER_PROPERTY_NAME string = "zrepl:placeholder" - -type FilesystemState struct { - Placeholder bool - // TODO extend with resume token when that feature is finally added -} - -// A somewhat efficient way to determine if a filesystem exists on this host. -// Particularly useful if exists is called more than once (will only fork exec once and cache the result) -func ZFSListFilesystemState() (localState map[string]FilesystemState, err error) { - - var actual [][]string - if actual, err = ZFSList([]string{"name", ZREPL_PLACEHOLDER_PROPERTY_NAME}, "-t", "filesystem,volume"); err != nil { - return - } - - localState = make(map[string]FilesystemState, len(actual)) - for _, e := range actual { - dp, err := NewDatasetPath(e[0]) - if err != nil { - return nil, fmt.Errorf("ZFS does not return parseable dataset path: %s", e[0]) - } - placeholder, _ := IsPlaceholder(dp, e[1]) - localState[e[0]] = FilesystemState{ - placeholder, - } - } - return - -} - -// Computes the value for the ZREPL_PLACEHOLDER_PROPERTY_NAME ZFS user property -// to mark the given DatasetPath p as a placeholder -// -// We cannot simply use booleans here since user properties are always -// inherited. -// -// We hash the DatasetPath and use it to check for a given path if it is the -// one originally marked as placeholder. -// -// However, this prohibits moving datasets around via `zfs rename`. The -// placeholder attribute must be re-computed for the dataset path after the -// move. -// -// TODO better solution available? -func PlaceholderPropertyValue(p *DatasetPath) string { - ps := []byte(p.ToString()) - sum := sha512.Sum512_256(ps) - return hex.EncodeToString(sum[:]) -} - -func IsPlaceholder(p *DatasetPath, placeholderPropertyValue string) (isPlaceholder bool, err error) { - expected := PlaceholderPropertyValue(p) - isPlaceholder = expected == placeholderPropertyValue - if !isPlaceholder { - err = fmt.Errorf("expected %s, has %s", expected, placeholderPropertyValue) - } - return -} - -// for nonexistent FS, isPlaceholder == false && err == nil -func ZFSIsPlaceholderFilesystem(p *DatasetPath) (isPlaceholder bool, err error) { - props, err := zfsGet(p.ToString(), []string{ZREPL_PLACEHOLDER_PROPERTY_NAME}, sourceAny) - if err == io.ErrUnexpectedEOF { - // interpret this as an early exit of the zfs binary due to the fs not existing - return false, nil - } else if err != nil { - return false, err - } - isPlaceholder, _ = IsPlaceholder(p, props.Get(ZREPL_PLACEHOLDER_PROPERTY_NAME)) - return -} - -func ZFSCreatePlaceholderFilesystem(p *DatasetPath) (err error) { - v := PlaceholderPropertyValue(p) - cmd := exec.Command(ZFS_BINARY, "create", - "-o", fmt.Sprintf("%s=%s", ZREPL_PLACEHOLDER_PROPERTY_NAME, v), - "-o", "mountpoint=none", - p.ToString()) - - stderr := bytes.NewBuffer(make([]byte, 0, 1024)) - cmd.Stderr = stderr - - if err = cmd.Start(); err != nil { - return err - } - - if err = cmd.Wait(); err != nil { - err = &ZFSError{ - Stderr: stderr.Bytes(), - WaitErr: err, - } - } - - return -} diff --git a/zfs/diff_test.go b/zfs/diff_test.go deleted file mode 100644 index 0dce4a1..0000000 --- a/zfs/diff_test.go +++ /dev/null @@ -1,112 +0,0 @@ -package zfs - -import ( - "github.com/stretchr/testify/assert" - "strconv" - "strings" - "testing" - "time" -) - -func fsvlist(fsv ...string) (r []FilesystemVersion) { - - r = make([]FilesystemVersion, len(fsv)) - for i, f := range fsv { - - // parse the id from fsvlist. it is used to derivce Guid,CreateTXG and Creation attrs - split := strings.Split(f, ",") - if len(split) != 2 { - panic("invalid fsv spec") - } - id, err := strconv.Atoi(split[1]) - if err != nil { - panic(err) - } - - if strings.HasPrefix(f, "#") { - r[i] = FilesystemVersion{ - Name: strings.TrimPrefix(f, "#"), - Type: Bookmark, - Guid: uint64(id), - CreateTXG: uint64(id), - Creation: time.Unix(0, 0).Add(time.Duration(id) * time.Second), - } - } else if strings.HasPrefix(f, "@") { - r[i] = FilesystemVersion{ - Name: strings.TrimPrefix(f, "@"), - Type: Snapshot, - Guid: uint64(id), - CreateTXG: uint64(id), - Creation: time.Unix(0, 0).Add(time.Duration(id) * time.Second), - } - } else { - panic("invalid character") - } - } - return -} - -func doTest(left, right []FilesystemVersion, validate func(d FilesystemDiff)) { - var d FilesystemDiff - d = MakeFilesystemDiff(left, right) - validate(d) -} - -func TestMakeFilesystemDiff_IncrementalSnapshots(t *testing.T) { - - l := fsvlist - - // basic functionality - doTest(l("@a,1", "@b,2"), l("@a,1", "@b,2", "@c,3", "@d,4"), func(d FilesystemDiff) { - assert.Equal(t, l("@b,2", "@c,3", "@d,4"), d.IncrementalPath) - }) - - // no common ancestor - doTest(l(), l("@a,1"), func(d FilesystemDiff) { - assert.Nil(t, d.IncrementalPath) - assert.EqualValues(t, d.Conflict, ConflictNoCommonAncestor) - assert.Equal(t, l("@a,1"), d.MRCAPathRight) - }) - doTest(l("@a,1", "@b,2"), l("@c,3", "@d,4"), func(d FilesystemDiff) { - assert.Nil(t, d.IncrementalPath) - assert.EqualValues(t, d.Conflict, ConflictNoCommonAncestor) - assert.Equal(t, l("@c,3", "@d,4"), d.MRCAPathRight) - }) - - // divergence is detected - doTest(l("@a,1", "@b1,2"), l("@a,1", "@b2,3"), func(d FilesystemDiff) { - assert.Nil(t, d.IncrementalPath) - assert.EqualValues(t, d.Conflict, ConflictDiverged) - assert.Equal(t, l("@a,1", "@b1,2"), d.MRCAPathLeft) - assert.Equal(t, l("@a,1", "@b2,3"), d.MRCAPathRight) - }) - - // gaps before most recent common ancestor do not matter - doTest(l("@a,1", "@b,2", "@c,3"), l("@a,1", "@c,3", "@d,4"), func(d FilesystemDiff) { - assert.Equal(t, l("@c,3", "@d,4"), d.IncrementalPath) - }) - -} - -func TestMakeFilesystemDiff_BookmarksSupport(t *testing.T) { - l := fsvlist - - // bookmarks are used - doTest(l("@a,1"), l("#a,1", "@b,2"), func(d FilesystemDiff) { - assert.Equal(t, l("#a,1", "@b,2"), d.IncrementalPath) - }) - - // boomarks are stripped from IncrementalPath (cannot send incrementally) - doTest(l("@a,1"), l("#a,1", "#b,2", "@c,3"), func(d FilesystemDiff) { - assert.Equal(t, l("#a,1", "@c,3"), d.IncrementalPath) - }) - - // test that snapshots are preferred over bookmarks in IncrementalPath - doTest(l("@a,1"), l("#a,1", "@a,1", "@b,2"), func(d FilesystemDiff) { - assert.Equal(t, l("@a,1", "@b,2"), d.IncrementalPath) - }) - doTest(l("@a,1"), l("@a,1", "#a,1", "@b,2"), func(d FilesystemDiff) { - assert.Equal(t, l("@a,1", "@b,2"), d.IncrementalPath) - }) - -} diff --git a/zfs/placeholder.go b/zfs/placeholder.go new file mode 100644 index 0000000..50ddd7f --- /dev/null +++ b/zfs/placeholder.go @@ -0,0 +1,113 @@ +package zfs + +import ( + "bytes" + "crypto/sha512" + "encoding/hex" + "fmt" + "io" + "os/exec" +) + +const ZREPL_PLACEHOLDER_PROPERTY_NAME string = "zrepl:placeholder" + +type FilesystemState struct { + Placeholder bool + // TODO extend with resume token when that feature is finally added +} + +// A somewhat efficient way to determine if a filesystem exists on this host. +// Particularly useful if exists is called more than once (will only fork exec once and cache the result) +func ZFSListFilesystemState() (localState map[string]FilesystemState, err error) { + + var actual [][]string + if actual, err = ZFSList([]string{"name", ZREPL_PLACEHOLDER_PROPERTY_NAME}, "-t", "filesystem,volume"); err != nil { + return + } + + localState = make(map[string]FilesystemState, len(actual)) + for _, e := range actual { + dp, err := NewDatasetPath(e[0]) + if err != nil { + return nil, fmt.Errorf("ZFS does not return parseable dataset path: %s", e[0]) + } + placeholder, _ := IsPlaceholder(dp, e[1]) + localState[e[0]] = FilesystemState{ + placeholder, + } + } + return + +} + +// Computes the value for the ZREPL_PLACEHOLDER_PROPERTY_NAME ZFS user property +// to mark the given DatasetPath p as a placeholder +// +// We cannot simply use booleans here since user properties are always +// inherited. +// +// We hash the DatasetPath and use it to check for a given path if it is the +// one originally marked as placeholder. +// +// However, this prohibits moving datasets around via `zfs rename`. The +// placeholder attribute must be re-computed for the dataset path after the +// move. +// +// TODO better solution available? +func PlaceholderPropertyValue(p *DatasetPath) string { + ps := []byte(p.ToString()) + sum := sha512.Sum512_256(ps) + return hex.EncodeToString(sum[:]) +} + +func IsPlaceholder(p *DatasetPath, placeholderPropertyValue string) (isPlaceholder bool, err error) { + expected := PlaceholderPropertyValue(p) + isPlaceholder = expected == placeholderPropertyValue + if !isPlaceholder { + err = fmt.Errorf("expected %s, has %s", expected, placeholderPropertyValue) + } + return +} + +// for nonexistent FS, isPlaceholder == false && err == nil +func ZFSIsPlaceholderFilesystem(p *DatasetPath) (isPlaceholder bool, err error) { + props, err := zfsGet(p.ToString(), []string{ZREPL_PLACEHOLDER_PROPERTY_NAME}, sourceAny) + if err == io.ErrUnexpectedEOF { + // interpret this as an early exit of the zfs binary due to the fs not existing + return false, nil + } else if err != nil { + return false, err + } + isPlaceholder, _ = IsPlaceholder(p, props.Get(ZREPL_PLACEHOLDER_PROPERTY_NAME)) + return +} + +func ZFSCreatePlaceholderFilesystem(p *DatasetPath) (err error) { + v := PlaceholderPropertyValue(p) + cmd := exec.Command(ZFS_BINARY, "create", + "-o", fmt.Sprintf("%s=%s", ZREPL_PLACEHOLDER_PROPERTY_NAME, v), + "-o", "mountpoint=none", + p.ToString()) + + stderr := bytes.NewBuffer(make([]byte, 0, 1024)) + cmd.Stderr = stderr + + if err = cmd.Start(); err != nil { + return err + } + + if err = cmd.Wait(); err != nil { + err = &ZFSError{ + Stderr: stderr.Bytes(), + WaitErr: err, + } + } + + return +} + +func ZFSSetNoPlaceholder(p *DatasetPath) error { + props := NewZFSProperties() + props.Set(ZREPL_PLACEHOLDER_PROPERTY_NAME, "off") + return zfsSet(p.ToString(), props) +} \ No newline at end of file diff --git a/zfs/zfs.go b/zfs/zfs.go index 8f08a66..652a494 100644 --- a/zfs/zfs.go +++ b/zfs/zfs.go @@ -9,6 +9,7 @@ import ( "io" "os" "os/exec" + "sort" "strings" "sync" "time" @@ -691,17 +692,62 @@ type StreamCopier interface { Close() error } +type RecvOptions struct { + // Rollback to the oldest snapshot, destroy it, then perform `recv -F`. + // Note that this doesn't change property values, i.e. an existing local property value will be kept. + RollbackAndForceRecv bool +} -func ZFSRecv(ctx context.Context, fs string, streamCopier StreamCopier, additionalArgs ...string) (err error) { +func ZFSRecv(ctx context.Context, fs string, streamCopier StreamCopier, opts RecvOptions) (err error) { if err := validateZFSFilesystem(fs); err != nil { return err } + fsdp, err := NewDatasetPath(fs) + if err != nil { + return err + } + + if opts.RollbackAndForceRecv { + // destroy all snapshots before `recv -F` because `recv -F` + // does not perform a rollback unless `send -R` was used (which we assume hasn't been the case) + var snaps []FilesystemVersion + { + vs, err := ZFSListFilesystemVersions(fsdp, nil) + if err != nil { + err = fmt.Errorf("cannot list versions to rollback is required: %s", err) + } + for _, v := range vs { + if v.Type == Snapshot { + snaps = append(snaps, v) + } + } + sort.Slice(snaps, func(i, j int) bool { + return snaps[i].CreateTXG < snaps[j].CreateTXG + }) + } + // bookmarks are rolled back automatically + if len(snaps) > 0 { + // use rollback to efficiently destroy all but the earliest snapshot + // then destroy that earliest snapshot + // afterwards, `recv -F` will work + rollbackTarget := snaps[0] + rollbackTargetAbs := rollbackTarget.ToAbsPath(fsdp) + debug("recv: rollback to %q", rollbackTargetAbs) + if err := ZFSRollback(fsdp, rollbackTarget, "-r"); err != nil { + return fmt.Errorf("cannot rollback %s to %s for forced receive: %s", fsdp.ToString(), rollbackTarget, err) + } + debug("recv: destroy %q", rollbackTargetAbs) + if err := ZFSDestroy(rollbackTargetAbs); err != nil { + return fmt.Errorf("cannot destroy %s for forced receive: %s", rollbackTargetAbs, err) + } + } + } args := make([]string, 0) args = append(args, "recv") - if len(args) > 0 { - args = append(args, additionalArgs...) + if opts.RollbackAndForceRecv { + args = append(args, "-F") } args = append(args, fs) @@ -1038,3 +1084,33 @@ func ZFSBookmark(fs *DatasetPath, snapshot, bookmark string) (err error) { return } + +func ZFSRollback(fs *DatasetPath, snapshot FilesystemVersion, rollbackArgs ...string) (err error) { + + snapabs := snapshot.ToAbsPath(fs) + if snapshot.Type != Snapshot { + return fmt.Errorf("can only rollback to snapshots, got %s", snapabs) + } + + args := []string{"rollback"} + args = append(args, rollbackArgs...) + args = append(args, snapabs) + + cmd := exec.Command(ZFS_BINARY, args...) + + stderr := bytes.NewBuffer(make([]byte, 0, 1024)) + cmd.Stderr = stderr + + if err = cmd.Start(); err != nil { + return err + } + + if err = cmd.Wait(); err != nil { + err = &ZFSError{ + Stderr: stderr.Bytes(), + WaitErr: err, + } + } + + return err +}