mirror of
https://github.com/zrepl/zrepl.git
synced 2024-11-25 01:44:43 +01:00
Merge branch 'problame/replication_refactor' into InsanePrawn-master
This commit is contained in:
commit
17818439a0
79
.circleci/config.yml
Normal file
79
.circleci/config.yml
Normal file
@ -0,0 +1,79 @@
|
|||||||
|
version: 2.0
|
||||||
|
workflows:
|
||||||
|
version: 2
|
||||||
|
build:
|
||||||
|
jobs:
|
||||||
|
- build-1.11
|
||||||
|
- build-1.12
|
||||||
|
- build-latest
|
||||||
|
jobs:
|
||||||
|
# build-latest serves as the template
|
||||||
|
# we use YAML anchors & aliases to exchange the docker image (and hence Go version used for the build)
|
||||||
|
build-latest: &build-latest
|
||||||
|
description: Builds zrepl
|
||||||
|
parameters:
|
||||||
|
image:
|
||||||
|
description: "the docker image that the job should use"
|
||||||
|
type: string
|
||||||
|
docker:
|
||||||
|
- image: circleci/golang:latest
|
||||||
|
environment:
|
||||||
|
# required by lazy.sh
|
||||||
|
TERM: xterm
|
||||||
|
working_directory: /go/src/github.com/zrepl/zrepl
|
||||||
|
steps:
|
||||||
|
- run:
|
||||||
|
name: Setup environment variables
|
||||||
|
command: |
|
||||||
|
# used by pip (for docs)
|
||||||
|
echo 'export PATH="$HOME/.local/bin:$PATH"' >> $BASH_ENV
|
||||||
|
|
||||||
|
- restore_cache:
|
||||||
|
keys:
|
||||||
|
- source
|
||||||
|
- vendor
|
||||||
|
- protobuf
|
||||||
|
|
||||||
|
- checkout
|
||||||
|
|
||||||
|
- save_cache:
|
||||||
|
key: source
|
||||||
|
paths:
|
||||||
|
- ".git"
|
||||||
|
|
||||||
|
# install deps
|
||||||
|
- run: wget https://github.com/protocolbuffers/protobuf/releases/download/v3.6.1/protoc-3.6.1-linux-x86_64.zip
|
||||||
|
- run: echo "6003de742ea3fcf703cfec1cd4a3380fd143081a2eb0e559065563496af27807 protoc-3.6.1-linux-x86_64.zip" | sha256sum -c
|
||||||
|
- run: sudo unzip -d /usr protoc-3.6.1-linux-x86_64.zip
|
||||||
|
- save_cache:
|
||||||
|
key: protobuf
|
||||||
|
paths:
|
||||||
|
- "/usr/include/google/protobuf"
|
||||||
|
|
||||||
|
- run: sudo apt install python3 python3-pip libgirepository1.0-dev
|
||||||
|
- run: ./lazy.sh devsetup
|
||||||
|
|
||||||
|
- run: make vendordeps
|
||||||
|
- save_cache:
|
||||||
|
key: vendor
|
||||||
|
paths:
|
||||||
|
- "./vendor"
|
||||||
|
|
||||||
|
- run: make
|
||||||
|
- run: make vet
|
||||||
|
- run: make test
|
||||||
|
- run: make release
|
||||||
|
|
||||||
|
- store_artifacts:
|
||||||
|
path: ./artifacts/release
|
||||||
|
when: always
|
||||||
|
|
||||||
|
|
||||||
|
build-1.11:
|
||||||
|
<<: *build-latest
|
||||||
|
docker:
|
||||||
|
- image: circleci/golang:1.11
|
||||||
|
build-1.12:
|
||||||
|
<<: *build-latest
|
||||||
|
docker:
|
||||||
|
- image: circleci/golang:1.12
|
82
.travis.yml
82
.travis.yml
@ -2,6 +2,7 @@ dist: xenial
|
|||||||
services:
|
services:
|
||||||
- docker
|
- docker
|
||||||
|
|
||||||
|
env: # for allow_failures: https://docs.travis-ci.com/user/customizing-the-build/
|
||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
|
|
||||||
@ -15,45 +16,35 @@ matrix:
|
|||||||
--user "$(id -u):$(id -g)" \
|
--user "$(id -u):$(id -g)" \
|
||||||
zrepl_build make vendordeps release
|
zrepl_build make vendordeps release
|
||||||
|
|
||||||
# all go entries vary only by go version
|
- &zrepl_build_template
|
||||||
- language: go
|
language: go
|
||||||
|
go_import_path: github.com/zrepl/zrepl
|
||||||
|
before_install:
|
||||||
|
- wget https://github.com/protocolbuffers/protobuf/releases/download/v3.6.1/protoc-3.6.1-linux-x86_64.zip
|
||||||
|
- echo "6003de742ea3fcf703cfec1cd4a3380fd143081a2eb0e559065563496af27807 protoc-3.6.1-linux-x86_64.zip" | sha256sum -c
|
||||||
|
- sudo unzip -d /usr protoc-3.6.1-linux-x86_64.zip
|
||||||
|
- ./lazy.sh godep
|
||||||
|
- make vendordeps
|
||||||
|
script:
|
||||||
|
- make
|
||||||
|
- make vet
|
||||||
|
- make test
|
||||||
|
- make artifacts/zrepl-freebsd-amd64
|
||||||
|
- make artifacts/zrepl-linux-amd64
|
||||||
|
- make artifacts/zrepl-darwin-amd64
|
||||||
go:
|
go:
|
||||||
- "1.11"
|
- "1.11"
|
||||||
go_import_path: github.com/zrepl/zrepl
|
|
||||||
before_install:
|
|
||||||
- wget https://github.com/protocolbuffers/protobuf/releases/download/v3.6.1/protoc-3.6.1-linux-x86_64.zip
|
|
||||||
- echo "6003de742ea3fcf703cfec1cd4a3380fd143081a2eb0e559065563496af27807 protoc-3.6.1-linux-x86_64.zip" | sha256sum -c
|
|
||||||
- sudo unzip -d /usr protoc-3.6.1-linux-x86_64.zip
|
|
||||||
- ./lazy.sh godep
|
|
||||||
- make vendordeps
|
|
||||||
script:
|
|
||||||
- make
|
|
||||||
- make vet
|
|
||||||
- make test
|
|
||||||
- make artifacts/zrepl-freebsd-amd64
|
|
||||||
- make artifacts/zrepl-linux-amd64
|
|
||||||
- make artifacts/zrepl-darwin-amd64
|
|
||||||
|
|
||||||
- language: go
|
- <<: *zrepl_build_template
|
||||||
|
go:
|
||||||
|
- "1.12"
|
||||||
|
|
||||||
|
- <<: *zrepl_build_template
|
||||||
go:
|
go:
|
||||||
- "master"
|
- "master"
|
||||||
go_import_path: github.com/zrepl/zrepl
|
|
||||||
before_install:
|
|
||||||
- wget https://github.com/protocolbuffers/protobuf/releases/download/v3.6.1/protoc-3.6.1-linux-x86_64.zip
|
|
||||||
- echo "6003de742ea3fcf703cfec1cd4a3380fd143081a2eb0e559065563496af27807 protoc-3.6.1-linux-x86_64.zip" | sha256sum -c
|
|
||||||
- sudo unzip -d /usr protoc-3.6.1-linux-x86_64.zip
|
|
||||||
- ./lazy.sh godep
|
|
||||||
- make vendordeps
|
|
||||||
script:
|
|
||||||
- make
|
|
||||||
- make vet
|
|
||||||
- make test
|
|
||||||
- make artifacts/zrepl-freebsd-amd64
|
|
||||||
- make artifacts/zrepl-linux-amd64
|
|
||||||
- make artifacts/zrepl-darwin-amd64
|
|
||||||
|
|
||||||
# all python entries vary only by python version
|
- &zrepl_docs_template
|
||||||
- language: python
|
language: python
|
||||||
python:
|
python:
|
||||||
- "3.4"
|
- "3.4"
|
||||||
install:
|
install:
|
||||||
@ -61,29 +52,18 @@ matrix:
|
|||||||
- pip install -r docs/requirements.txt
|
- pip install -r docs/requirements.txt
|
||||||
script:
|
script:
|
||||||
- make docs
|
- make docs
|
||||||
- language: python
|
- <<: *zrepl_docs_template
|
||||||
python:
|
python:
|
||||||
- "3.5"
|
- "3.5"
|
||||||
install:
|
- <<: *zrepl_docs_template
|
||||||
- sudo apt-get install libgirepository1.0-dev
|
|
||||||
- pip install -r docs/requirements.txt
|
|
||||||
script:
|
|
||||||
- make docs
|
|
||||||
- language: python
|
|
||||||
python:
|
python:
|
||||||
- "3.6"
|
- "3.6"
|
||||||
install:
|
- <<: *zrepl_docs_template
|
||||||
- sudo apt-get install libgirepository1.0-dev
|
|
||||||
- pip install -r docs/requirements.txt
|
|
||||||
script:
|
|
||||||
- make docs
|
|
||||||
- language: python
|
|
||||||
python:
|
python:
|
||||||
- "3.7"
|
- "3.7"
|
||||||
install:
|
|
||||||
- sudo apt-get install libgirepository1.0-dev
|
|
||||||
- pip install -r docs/requirements.txt
|
|
||||||
script:
|
|
||||||
- make docs
|
|
||||||
|
|
||||||
|
|
||||||
|
allow_failures:
|
||||||
|
- <<: *zrepl_build_template
|
||||||
|
go:
|
||||||
|
- "master"
|
||||||
|
49
Gopkg.lock
generated
49
Gopkg.lock
generated
@ -89,6 +89,14 @@
|
|||||||
revision = "aa810b61a9c79d51363740d207bb46cf8e620ed5"
|
revision = "aa810b61a9c79d51363740d207bb46cf8e620ed5"
|
||||||
version = "v1.2.0"
|
version = "v1.2.0"
|
||||||
|
|
||||||
|
[[projects]]
|
||||||
|
digest = "1:ad92aa49f34cbc3546063c7eb2cabb55ee2278b72842eda80e2a20a8a06a8d73"
|
||||||
|
name = "github.com/google/uuid"
|
||||||
|
packages = ["."]
|
||||||
|
pruneopts = ""
|
||||||
|
revision = "0cd6bf5da1e1c83f8b45653022c74f71af0538a4"
|
||||||
|
version = "v1.1.1"
|
||||||
|
|
||||||
[[projects]]
|
[[projects]]
|
||||||
branch = "master"
|
branch = "master"
|
||||||
digest = "1:cb09475f771b9167fb9333629f5d6a7161572602ea040f1094602b0dc8709878"
|
digest = "1:cb09475f771b9167fb9333629f5d6a7161572602ea040f1094602b0dc8709878"
|
||||||
@ -161,6 +169,14 @@
|
|||||||
revision = "3247c84500bff8d9fb6d579d800f20b3e091582c"
|
revision = "3247c84500bff8d9fb6d579d800f20b3e091582c"
|
||||||
version = "v1.0.0"
|
version = "v1.0.0"
|
||||||
|
|
||||||
|
[[projects]]
|
||||||
|
digest = "1:4ff67dde814694496d7aa31be44b900f9717a10c8bc9136b13f49c8ef97f439a"
|
||||||
|
name = "github.com/montanaflynn/stats"
|
||||||
|
packages = ["."]
|
||||||
|
pruneopts = ""
|
||||||
|
revision = "63fbb2597b7a13043b453a4b819945badb8f8926"
|
||||||
|
version = "v0.5.0"
|
||||||
|
|
||||||
[[projects]]
|
[[projects]]
|
||||||
branch = "master"
|
branch = "master"
|
||||||
digest = "1:f60ff065b58bd53e641112b38bbda9d2684deb828393c7ffb89c69a1ee301d17"
|
digest = "1:f60ff065b58bd53e641112b38bbda9d2684deb828393c7ffb89c69a1ee301d17"
|
||||||
@ -245,6 +261,14 @@
|
|||||||
pruneopts = ""
|
pruneopts = ""
|
||||||
revision = "8b1c2da0d56deffdbb9e48d4414b4e674bd8083e"
|
revision = "8b1c2da0d56deffdbb9e48d4414b4e674bd8083e"
|
||||||
|
|
||||||
|
[[projects]]
|
||||||
|
digest = "1:3962f553b77bf6c03fc07cd687a22dd3b00fe11aa14d31194f5505f5bb65cdc8"
|
||||||
|
name = "github.com/sergi/go-diff"
|
||||||
|
packages = ["diffmatchpatch"]
|
||||||
|
pruneopts = ""
|
||||||
|
revision = "1744e2970ca51c86172c8190fadad617561ed6e7"
|
||||||
|
version = "v1.0.0"
|
||||||
|
|
||||||
[[projects]]
|
[[projects]]
|
||||||
branch = "master"
|
branch = "master"
|
||||||
digest = "1:146327ce93be37e68bd3ff8541090d96da8cb3adc9e35d57570e9170a29f6bf6"
|
digest = "1:146327ce93be37e68bd3ff8541090d96da8cb3adc9e35d57570e9170a29f6bf6"
|
||||||
@ -280,6 +304,25 @@
|
|||||||
revision = "93babf24513d0e8277635da8169fcc5a46ae3f6a"
|
revision = "93babf24513d0e8277635da8169fcc5a46ae3f6a"
|
||||||
version = "v1.11.0"
|
version = "v1.11.0"
|
||||||
|
|
||||||
|
[[projects]]
|
||||||
|
digest = "1:529ed3f98838f69e13761788d0cc71b44e130058fab13bae2ce09f7a176bced4"
|
||||||
|
name = "github.com/yudai/gojsondiff"
|
||||||
|
packages = [
|
||||||
|
".",
|
||||||
|
"formatter",
|
||||||
|
]
|
||||||
|
pruneopts = ""
|
||||||
|
revision = "7b1b7adf999dab73a6eb02669c3d82dbb27a3dd6"
|
||||||
|
version = "1.0.0"
|
||||||
|
|
||||||
|
[[projects]]
|
||||||
|
branch = "master"
|
||||||
|
digest = "1:9857bb2293f372b2181004d8b62179bbdb4ab0982ec6f762abe6cf2bfedaff85"
|
||||||
|
name = "github.com/yudai/golcs"
|
||||||
|
packages = ["."]
|
||||||
|
pruneopts = ""
|
||||||
|
revision = "ecda9a501e8220fae3b4b600c3db4b0ba22cfc68"
|
||||||
|
|
||||||
[[projects]]
|
[[projects]]
|
||||||
branch = "v2"
|
branch = "v2"
|
||||||
digest = "1:6b8a6afafde7ed31cd0c577ba40d88ce39e8f1c5eb76d7836be7d5b74f1c534a"
|
digest = "1:6b8a6afafde7ed31cd0c577ba40d88ce39e8f1c5eb76d7836be7d5b74f1c534a"
|
||||||
@ -403,9 +446,11 @@
|
|||||||
"github.com/go-logfmt/logfmt",
|
"github.com/go-logfmt/logfmt",
|
||||||
"github.com/golang/protobuf/proto",
|
"github.com/golang/protobuf/proto",
|
||||||
"github.com/golang/protobuf/protoc-gen-go",
|
"github.com/golang/protobuf/protoc-gen-go",
|
||||||
|
"github.com/google/uuid",
|
||||||
"github.com/jinzhu/copier",
|
"github.com/jinzhu/copier",
|
||||||
"github.com/kr/pretty",
|
"github.com/kr/pretty",
|
||||||
"github.com/mattn/go-isatty",
|
"github.com/mattn/go-isatty",
|
||||||
|
"github.com/montanaflynn/stats",
|
||||||
"github.com/pkg/errors",
|
"github.com/pkg/errors",
|
||||||
"github.com/pkg/profile",
|
"github.com/pkg/profile",
|
||||||
"github.com/problame/go-netssh",
|
"github.com/problame/go-netssh",
|
||||||
@ -415,14 +460,18 @@
|
|||||||
"github.com/spf13/pflag",
|
"github.com/spf13/pflag",
|
||||||
"github.com/stretchr/testify/assert",
|
"github.com/stretchr/testify/assert",
|
||||||
"github.com/stretchr/testify/require",
|
"github.com/stretchr/testify/require",
|
||||||
|
"github.com/yudai/gojsondiff",
|
||||||
|
"github.com/yudai/gojsondiff/formatter",
|
||||||
"github.com/zrepl/yaml-config",
|
"github.com/zrepl/yaml-config",
|
||||||
"golang.org/x/net/context",
|
"golang.org/x/net/context",
|
||||||
"golang.org/x/sys/unix",
|
"golang.org/x/sys/unix",
|
||||||
"golang.org/x/tools/cmd/stringer",
|
"golang.org/x/tools/cmd/stringer",
|
||||||
"google.golang.org/grpc",
|
"google.golang.org/grpc",
|
||||||
|
"google.golang.org/grpc/codes",
|
||||||
"google.golang.org/grpc/credentials",
|
"google.golang.org/grpc/credentials",
|
||||||
"google.golang.org/grpc/keepalive",
|
"google.golang.org/grpc/keepalive",
|
||||||
"google.golang.org/grpc/peer",
|
"google.golang.org/grpc/peer",
|
||||||
|
"google.golang.org/grpc/status",
|
||||||
]
|
]
|
||||||
solver-name = "gps-cdcl"
|
solver-name = "gps-cdcl"
|
||||||
solver-version = 1
|
solver-version = 1
|
||||||
|
@ -59,3 +59,7 @@ required = [
|
|||||||
[[constraint]]
|
[[constraint]]
|
||||||
name = "google.golang.org/grpc"
|
name = "google.golang.org/grpc"
|
||||||
version = "1"
|
version = "1"
|
||||||
|
|
||||||
|
[[constraint]]
|
||||||
|
version = "1.1.0"
|
||||||
|
name = "github.com/google/uuid"
|
||||||
|
2
Makefile
2
Makefile
@ -27,7 +27,7 @@ vendordeps:
|
|||||||
dep ensure -v -vendor-only
|
dep ensure -v -vendor-only
|
||||||
|
|
||||||
generate: #not part of the build, must do that manually
|
generate: #not part of the build, must do that manually
|
||||||
protoc -I=replication/pdu --go_out=plugins=grpc:replication/pdu replication/pdu/pdu.proto
|
protoc -I=replication/logic/pdu --go_out=plugins=grpc:replication/logic/pdu replication/logic/pdu/pdu.proto
|
||||||
go generate -x ./...
|
go generate -x ./...
|
||||||
|
|
||||||
build:
|
build:
|
||||||
|
206
client/status.go
206
client/status.go
@ -10,8 +10,7 @@ import (
|
|||||||
"github.com/zrepl/zrepl/daemon"
|
"github.com/zrepl/zrepl/daemon"
|
||||||
"github.com/zrepl/zrepl/daemon/job"
|
"github.com/zrepl/zrepl/daemon/job"
|
||||||
"github.com/zrepl/zrepl/daemon/pruner"
|
"github.com/zrepl/zrepl/daemon/pruner"
|
||||||
"github.com/zrepl/zrepl/replication"
|
"github.com/zrepl/zrepl/replication/report"
|
||||||
"github.com/zrepl/zrepl/replication/fsrep"
|
|
||||||
"io"
|
"io"
|
||||||
"math"
|
"math"
|
||||||
"net/http"
|
"net/http"
|
||||||
@ -122,7 +121,7 @@ func wrap(s string, width int) string {
|
|||||||
if idx := strings.IndexAny(s, "\n\r"); idx != -1 && idx < rem {
|
if idx := strings.IndexAny(s, "\n\r"); idx != -1 && idx < rem {
|
||||||
rem = idx+1
|
rem = idx+1
|
||||||
}
|
}
|
||||||
untilNewline := strings.TrimSpace(s[:rem])
|
untilNewline := strings.TrimRight(s[:rem], "\n\r")
|
||||||
s = s[rem:]
|
s = s[rem:]
|
||||||
if len(untilNewline) == 0 {
|
if len(untilNewline) == 0 {
|
||||||
continue
|
continue
|
||||||
@ -130,7 +129,7 @@ func wrap(s string, width int) string {
|
|||||||
b.WriteString(untilNewline)
|
b.WriteString(untilNewline)
|
||||||
b.WriteString("\n")
|
b.WriteString("\n")
|
||||||
}
|
}
|
||||||
return strings.TrimSpace(b.String())
|
return strings.TrimRight(b.String(), "\n\r")
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *tui) printfDrawIndentedAndWrappedIfMultiline(format string, a ...interface{}) {
|
func (t *tui) printfDrawIndentedAndWrappedIfMultiline(format string, a ...interface{}) {
|
||||||
@ -353,74 +352,91 @@ func (t *tui) draw() {
|
|||||||
termbox.Flush()
|
termbox.Flush()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *tui) renderReplicationReport(rep *replication.Report, history *bytesProgressHistory) {
|
func (t *tui) renderReplicationReport(rep *report.Report, history *bytesProgressHistory) {
|
||||||
if rep == nil {
|
if rep == nil {
|
||||||
t.printf("...\n")
|
t.printf("...\n")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
all := make([]*fsrep.Report, 0, len(rep.Completed)+len(rep.Pending) + 1)
|
if rep.WaitReconnectError != nil {
|
||||||
all = append(all, rep.Completed...)
|
t.printfDrawIndentedAndWrappedIfMultiline("Connectivity: %s", rep.WaitReconnectError)
|
||||||
all = append(all, rep.Pending...)
|
t.newline()
|
||||||
if rep.Active != nil {
|
|
||||||
all = append(all, rep.Active)
|
|
||||||
}
|
}
|
||||||
sort.Slice(all, func(i, j int) bool {
|
if !rep.WaitReconnectSince.IsZero() {
|
||||||
return all[i].Filesystem < all[j].Filesystem
|
delta := rep.WaitReconnectUntil.Sub(time.Now()).Round(time.Second)
|
||||||
|
if rep.WaitReconnectUntil.IsZero() || delta > 0 {
|
||||||
|
var until string
|
||||||
|
if rep.WaitReconnectUntil.IsZero() {
|
||||||
|
until = "waiting indefinitely"
|
||||||
|
} else {
|
||||||
|
until = fmt.Sprintf("hard fail in %s @ %s", delta, rep.WaitReconnectUntil)
|
||||||
|
}
|
||||||
|
t.printfDrawIndentedAndWrappedIfMultiline("Connectivity: reconnecting with exponential backoff (since %s) (%s)",
|
||||||
|
rep.WaitReconnectSince, until)
|
||||||
|
} else {
|
||||||
|
t.printfDrawIndentedAndWrappedIfMultiline("Connectivity: reconnects reached hard-fail timeout @ %s", rep.WaitReconnectUntil)
|
||||||
|
}
|
||||||
|
t.newline()
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO visualize more than the latest attempt by folding all attempts into one
|
||||||
|
if len(rep.Attempts) == 0 {
|
||||||
|
t.printf("no attempts made yet")
|
||||||
|
return
|
||||||
|
} else {
|
||||||
|
t.printf("Attempt #%d", len(rep.Attempts))
|
||||||
|
if len(rep.Attempts) > 1 {
|
||||||
|
t.printf(". Previous attempts failed with the follwing statuses:")
|
||||||
|
t.newline()
|
||||||
|
t.addIndent(1)
|
||||||
|
for i, a := range rep.Attempts[:len(rep.Attempts)-1] {
|
||||||
|
t.printfDrawIndentedAndWrappedIfMultiline("#%d: %s (failed at %s) (ran %s)", i + 1, a.State, a.FinishAt, a.FinishAt.Sub(a.StartAt))
|
||||||
|
t.newline()
|
||||||
|
}
|
||||||
|
t.addIndent(-1)
|
||||||
|
} else {
|
||||||
|
t.newline()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
latest := rep.Attempts[len(rep.Attempts)-1]
|
||||||
|
sort.Slice(latest.Filesystems, func(i, j int) bool {
|
||||||
|
return latest.Filesystems[i].Info.Name < latest.Filesystems[j].Info.Name
|
||||||
})
|
})
|
||||||
|
|
||||||
state, err := replication.StateString(rep.Status)
|
t.printf("Status: %s", latest.State)
|
||||||
if err != nil {
|
|
||||||
t.printf("Status: %q (parse error: %q)\n", rep.Status, err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
t.printf("Status: %s", state)
|
|
||||||
t.newline()
|
t.newline()
|
||||||
if rep.Problem != "" {
|
if latest.State == report.AttemptPlanningError {
|
||||||
t.printf("Problem: ")
|
t.printf("Problem: ")
|
||||||
t.printfDrawIndentedAndWrappedIfMultiline("%s", rep.Problem)
|
t.printfDrawIndentedAndWrappedIfMultiline("%s", latest.PlanError)
|
||||||
|
t.newline()
|
||||||
|
} else if latest.State == report.AttemptFanOutError {
|
||||||
|
t.printf("Problem: one or more of the filesystems encountered errors")
|
||||||
t.newline()
|
t.newline()
|
||||||
}
|
|
||||||
if rep.SleepUntil.After(time.Now()) && !state.IsTerminal() {
|
|
||||||
t.printf("Sleeping until %s (%s left)\n", rep.SleepUntil, rep.SleepUntil.Sub(time.Now()))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if state != replication.Planning && state != replication.PlanningError {
|
if latest.State != report.AttemptPlanning && latest.State != report.AttemptPlanningError {
|
||||||
|
// Draw global progress bar
|
||||||
// Progress: [---------------]
|
// Progress: [---------------]
|
||||||
sumUpFSRep := func(rep *fsrep.Report) (transferred, total int64) {
|
expected, replicated := latest.BytesSum()
|
||||||
for _, s := range rep.Pending {
|
rate, changeCount := history.Update(replicated)
|
||||||
transferred += s.Bytes
|
|
||||||
total += s.ExpectedBytes
|
|
||||||
}
|
|
||||||
for _, s := range rep.Completed {
|
|
||||||
transferred += s.Bytes
|
|
||||||
total += s.ExpectedBytes
|
|
||||||
}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
var transferred, total int64
|
|
||||||
for _, fs := range all {
|
|
||||||
fstx, fstotal := sumUpFSRep(fs)
|
|
||||||
transferred += fstx
|
|
||||||
total += fstotal
|
|
||||||
}
|
|
||||||
rate, changeCount := history.Update(transferred)
|
|
||||||
t.write("Progress: ")
|
t.write("Progress: ")
|
||||||
t.drawBar(50, transferred, total, changeCount)
|
t.drawBar(50, replicated, expected, changeCount)
|
||||||
t.write(fmt.Sprintf(" %s / %s @ %s/s", ByteCountBinary(transferred), ByteCountBinary(total), ByteCountBinary(rate)))
|
t.write(fmt.Sprintf(" %s / %s @ %s/s", ByteCountBinary(replicated), ByteCountBinary(expected), ByteCountBinary(rate)))
|
||||||
t.newline()
|
t.newline()
|
||||||
}
|
|
||||||
|
|
||||||
var maxFSLen int
|
var maxFSLen int
|
||||||
for _, fs := range all {
|
for _, fs := range latest.Filesystems {
|
||||||
if len(fs.Filesystem) > maxFSLen {
|
if len(fs.Info.Name) > maxFSLen {
|
||||||
maxFSLen = len(fs.Filesystem)
|
maxFSLen = len(fs.Info.Name)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for _, fs := range all {
|
for _, fs := range latest.Filesystems {
|
||||||
t.printFilesystemStatus(fs, fs == rep.Active, maxFSLen)
|
t.printFilesystemStatus(fs, false, maxFSLen) // FIXME bring 'active' flag back
|
||||||
}
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *tui) renderPrunerReport(r *pruner.Report) {
|
func (t *tui) renderPrunerReport(r *pruner.Report) {
|
||||||
@ -441,9 +457,6 @@ func (t *tui) renderPrunerReport(r *pruner.Report) {
|
|||||||
if r.Error != "" {
|
if r.Error != "" {
|
||||||
t.printf("Error: %s\n", r.Error)
|
t.printf("Error: %s\n", r.Error)
|
||||||
}
|
}
|
||||||
if r.SleepUntil.After(time.Now()) {
|
|
||||||
t.printf("Sleeping until %s (%s left)\n", r.SleepUntil, r.SleepUntil.Sub(time.Now()))
|
|
||||||
}
|
|
||||||
|
|
||||||
type commonFS struct {
|
type commonFS struct {
|
||||||
*pruner.FSReport
|
*pruner.FSReport
|
||||||
@ -459,8 +472,7 @@ func (t *tui) renderPrunerReport(r *pruner.Report) {
|
|||||||
|
|
||||||
switch state {
|
switch state {
|
||||||
case pruner.Plan: fallthrough
|
case pruner.Plan: fallthrough
|
||||||
case pruner.PlanWait: fallthrough
|
case pruner.PlanErr:
|
||||||
case pruner.ErrPerm:
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -500,8 +512,18 @@ func (t *tui) renderPrunerReport(r *pruner.Report) {
|
|||||||
for _, fs := range all {
|
for _, fs := range all {
|
||||||
t.write(rightPad(fs.Filesystem, maxFSname, " "))
|
t.write(rightPad(fs.Filesystem, maxFSname, " "))
|
||||||
t.write(" ")
|
t.write(" ")
|
||||||
|
if !fs.SkipReason.NotSkipped() {
|
||||||
|
t.printf("skipped: %s\n", fs.SkipReason)
|
||||||
|
continue
|
||||||
|
}
|
||||||
if fs.LastError != "" {
|
if fs.LastError != "" {
|
||||||
t.printf("ERROR (%d): %s\n", fs.ErrorCount, fs.LastError) // whitespace is padding
|
if strings.ContainsAny(fs.LastError, "\r\n") {
|
||||||
|
t.printf("ERROR:")
|
||||||
|
t.printfDrawIndentedAndWrappedIfMultiline("%s\n", fs.LastError)
|
||||||
|
} else {
|
||||||
|
t.printfDrawIndentedAndWrappedIfMultiline("ERROR: %s\n", fs.LastError)
|
||||||
|
}
|
||||||
|
t.newline()
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -524,25 +546,6 @@ func (t *tui) renderPrunerReport(r *pruner.Report) {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const snapshotIndent = 1
|
|
||||||
func calculateMaxFSLength(all []*fsrep.Report) (maxFS, maxStatus int) {
|
|
||||||
for _, e := range all {
|
|
||||||
if len(e.Filesystem) > maxFS {
|
|
||||||
maxFS = len(e.Filesystem)
|
|
||||||
}
|
|
||||||
all2 := make([]*fsrep.StepReport, 0, len(e.Pending) + len(e.Completed))
|
|
||||||
all2 = append(all2, e.Pending...)
|
|
||||||
all2 = append(all2, e.Completed...)
|
|
||||||
for _, e2 := range all2 {
|
|
||||||
elen := len(e2.Problem) + len(e2.From) + len(e2.To) + 60 // random spacing, units, labels, etc
|
|
||||||
if elen > maxStatus {
|
|
||||||
maxStatus = elen
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
func times(str string, n int) (out string) {
|
func times(str string, n int) (out string) {
|
||||||
for i := 0; i < n; i++ {
|
for i := 0; i < n; i++ {
|
||||||
out += str
|
out += str
|
||||||
@ -586,35 +589,13 @@ func (t *tui) drawBar(length int, bytes, totalBytes int64, changeCount int) {
|
|||||||
t.write("]")
|
t.write("]")
|
||||||
}
|
}
|
||||||
|
|
||||||
func StringStepState(s fsrep.StepState) string {
|
func (t *tui) printFilesystemStatus(rep *report.FilesystemReport, active bool, maxFS int) {
|
||||||
switch s {
|
|
||||||
case fsrep.StepReplicationReady: return "Ready"
|
|
||||||
case fsrep.StepMarkReplicatedReady: return "MarkReady"
|
|
||||||
case fsrep.StepCompleted: return "Completed"
|
|
||||||
default:
|
|
||||||
return fmt.Sprintf("UNKNOWN %d", s)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *tui) printFilesystemStatus(rep *fsrep.Report, active bool, maxFS int) {
|
|
||||||
|
|
||||||
bytes := int64(0)
|
|
||||||
totalBytes := int64(0)
|
|
||||||
for _, s := range rep.Pending {
|
|
||||||
bytes += s.Bytes
|
|
||||||
totalBytes += s.ExpectedBytes
|
|
||||||
}
|
|
||||||
for _, s := range rep.Completed {
|
|
||||||
bytes += s.Bytes
|
|
||||||
totalBytes += s.ExpectedBytes
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
expected, replicated := rep.BytesSum()
|
||||||
status := fmt.Sprintf("%s (step %d/%d, %s/%s)",
|
status := fmt.Sprintf("%s (step %d/%d, %s/%s)",
|
||||||
rep.Status,
|
strings.ToUpper(string(rep.State)),
|
||||||
len(rep.Completed), len(rep.Pending) + len(rep.Completed),
|
rep.CurrentStep, len(rep.Steps),
|
||||||
ByteCountBinary(bytes), ByteCountBinary(totalBytes),
|
ByteCountBinary(replicated), ByteCountBinary(expected),
|
||||||
|
|
||||||
)
|
)
|
||||||
|
|
||||||
activeIndicator := " "
|
activeIndicator := " "
|
||||||
@ -623,18 +604,23 @@ func (t *tui) printFilesystemStatus(rep *fsrep.Report, active bool, maxFS int) {
|
|||||||
}
|
}
|
||||||
t.printf("%s %s %s ",
|
t.printf("%s %s %s ",
|
||||||
activeIndicator,
|
activeIndicator,
|
||||||
rightPad(rep.Filesystem, maxFS, " "),
|
rightPad(rep.Info.Name, maxFS, " "),
|
||||||
status)
|
status)
|
||||||
|
|
||||||
next := ""
|
next := ""
|
||||||
if rep.Problem != "" {
|
if err := rep.Error(); err != nil {
|
||||||
next = rep.Problem
|
next = err.Err
|
||||||
} else if len(rep.Pending) > 0 {
|
} else if rep.State != report.FilesystemDone {
|
||||||
if rep.Pending[0].From != "" {
|
if nextStep := rep.NextStep(); nextStep != nil {
|
||||||
next = fmt.Sprintf("next: %s => %s", rep.Pending[0].From, rep.Pending[0].To)
|
if nextStep.IsIncremental() {
|
||||||
|
next = fmt.Sprintf("next: %s => %s", nextStep.Info.From, nextStep.Info.To)
|
||||||
} else {
|
} else {
|
||||||
next = fmt.Sprintf("next: %s (full)", rep.Pending[0].To)
|
next = fmt.Sprintf("next: %s (full)", nextStep.Info.To)
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
next = "" // individual FSes may still be in planning state
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
t.printfDrawIndentedAndWrappedIfMultiline("%s", next)
|
t.printfDrawIndentedAndWrappedIfMultiline("%s", next)
|
||||||
|
|
||||||
|
@ -78,7 +78,38 @@ type PushJob struct {
|
|||||||
type PullJob struct {
|
type PullJob struct {
|
||||||
ActiveJob `yaml:",inline"`
|
ActiveJob `yaml:",inline"`
|
||||||
RootFS string `yaml:"root_fs"`
|
RootFS string `yaml:"root_fs"`
|
||||||
Interval time.Duration `yaml:"interval,positive"`
|
Interval PositiveDurationOrManual `yaml:"interval"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type PositiveDurationOrManual struct {
|
||||||
|
Interval time.Duration
|
||||||
|
Manual bool
|
||||||
|
}
|
||||||
|
|
||||||
|
var _ yaml.Unmarshaler = (*PositiveDurationOrManual)(nil)
|
||||||
|
|
||||||
|
func (i *PositiveDurationOrManual) UnmarshalYAML(u func(interface{}, bool) error) (err error) {
|
||||||
|
var s string
|
||||||
|
if err := u(&s, true); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
switch s {
|
||||||
|
case "manual":
|
||||||
|
i.Manual = true
|
||||||
|
i.Interval = 0
|
||||||
|
case "":
|
||||||
|
return fmt.Errorf("value must not be empty")
|
||||||
|
default:
|
||||||
|
i.Manual = false
|
||||||
|
i.Interval, err = time.ParseDuration(s)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if i.Interval <= 0 {
|
||||||
|
return fmt.Errorf("value must be a positive duration, got %q", s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
type SinkJob struct {
|
type SinkJob struct {
|
||||||
|
41
config/config_positiveintervalormanual_test.go
Normal file
41
config/config_positiveintervalormanual_test.go
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
package config
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/zrepl/yaml-config"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestPositiveDurationOrManual(t *testing.T) {
|
||||||
|
cases := []struct {
|
||||||
|
Comment, Input string
|
||||||
|
Result *PositiveDurationOrManual
|
||||||
|
}{
|
||||||
|
{"empty is error", "", nil},
|
||||||
|
{"negative is error", "-1s", nil},
|
||||||
|
{"zero seconds is error", "0s", nil},
|
||||||
|
{"zero is error", "0", nil},
|
||||||
|
{"non-manual is error", "something", nil},
|
||||||
|
{"positive seconds works", "1s", &PositiveDurationOrManual{Manual: false, Interval: 1 * time.Second}},
|
||||||
|
{"manual works", "manual", &PositiveDurationOrManual{Manual: true, Interval: 0}},
|
||||||
|
}
|
||||||
|
for _, tc := range cases {
|
||||||
|
t.Run(tc.Comment, func(t *testing.T) {
|
||||||
|
var out struct {
|
||||||
|
FieldName PositiveDurationOrManual `yaml:"fieldname"`
|
||||||
|
}
|
||||||
|
input := fmt.Sprintf("\nfieldname: %s\n", tc.Input)
|
||||||
|
err := yaml.UnmarshalStrict([]byte(input), &out)
|
||||||
|
if tc.Result == nil {
|
||||||
|
assert.Error(t, err)
|
||||||
|
t.Logf("%#v", out)
|
||||||
|
} else {
|
||||||
|
assert.Equal(t, *tc.Result, out.FieldName)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -17,10 +17,12 @@ import (
|
|||||||
"github.com/zrepl/zrepl/daemon/snapper"
|
"github.com/zrepl/zrepl/daemon/snapper"
|
||||||
"github.com/zrepl/zrepl/endpoint"
|
"github.com/zrepl/zrepl/endpoint"
|
||||||
"github.com/zrepl/zrepl/replication"
|
"github.com/zrepl/zrepl/replication"
|
||||||
|
"github.com/zrepl/zrepl/replication/driver"
|
||||||
|
"github.com/zrepl/zrepl/replication/logic"
|
||||||
|
"github.com/zrepl/zrepl/replication/report"
|
||||||
"github.com/zrepl/zrepl/rpc"
|
"github.com/zrepl/zrepl/rpc"
|
||||||
"github.com/zrepl/zrepl/transport"
|
"github.com/zrepl/zrepl/transport"
|
||||||
"github.com/zrepl/zrepl/transport/fromconfig"
|
"github.com/zrepl/zrepl/transport/fromconfig"
|
||||||
"github.com/zrepl/zrepl/util/envconst"
|
|
||||||
"github.com/zrepl/zrepl/zfs"
|
"github.com/zrepl/zrepl/zfs"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -53,7 +55,7 @@ type activeSideTasks struct {
|
|||||||
state ActiveSideState
|
state ActiveSideState
|
||||||
|
|
||||||
// valid for state ActiveSideReplicating, ActiveSidePruneSender, ActiveSidePruneReceiver, ActiveSideDone
|
// valid for state ActiveSideReplicating, ActiveSidePruneSender, ActiveSidePruneReceiver, ActiveSideDone
|
||||||
replication *replication.Replication
|
replicationReport driver.ReportFunc
|
||||||
replicationCancel context.CancelFunc
|
replicationCancel context.CancelFunc
|
||||||
|
|
||||||
// valid for state ActiveSidePruneSender, ActiveSidePruneReceiver, ActiveSideDone
|
// valid for state ActiveSidePruneSender, ActiveSidePruneReceiver, ActiveSideDone
|
||||||
@ -79,7 +81,7 @@ func (a *ActiveSide) updateTasks(u func(*activeSideTasks)) activeSideTasks {
|
|||||||
type activeMode interface {
|
type activeMode interface {
|
||||||
ConnectEndpoints(rpcLoggers rpc.Loggers, connecter transport.Connecter)
|
ConnectEndpoints(rpcLoggers rpc.Loggers, connecter transport.Connecter)
|
||||||
DisconnectEndpoints()
|
DisconnectEndpoints()
|
||||||
SenderReceiver() (replication.Sender, replication.Receiver)
|
SenderReceiver() (logic.Sender, logic.Receiver)
|
||||||
Type() Type
|
Type() Type
|
||||||
RunPeriodic(ctx context.Context, wakeUpCommon chan<- struct{})
|
RunPeriodic(ctx context.Context, wakeUpCommon chan<- struct{})
|
||||||
ResetConnectBackoff()
|
ResetConnectBackoff()
|
||||||
@ -111,7 +113,7 @@ func (m *modePush) DisconnectEndpoints() {
|
|||||||
m.receiver = nil
|
m.receiver = nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *modePush) SenderReceiver() (replication.Sender, replication.Receiver) {
|
func (m *modePush) SenderReceiver() (logic.Sender, logic.Receiver) {
|
||||||
m.setupMtx.Lock()
|
m.setupMtx.Lock()
|
||||||
defer m.setupMtx.Unlock()
|
defer m.setupMtx.Unlock()
|
||||||
return m.sender, m.receiver
|
return m.sender, m.receiver
|
||||||
@ -151,7 +153,7 @@ type modePull struct {
|
|||||||
receiver *endpoint.Receiver
|
receiver *endpoint.Receiver
|
||||||
sender *rpc.Client
|
sender *rpc.Client
|
||||||
rootFS *zfs.DatasetPath
|
rootFS *zfs.DatasetPath
|
||||||
interval time.Duration
|
interval config.PositiveDurationOrManual
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *modePull) ConnectEndpoints(loggers rpc.Loggers, connecter transport.Connecter) {
|
func (m *modePull) ConnectEndpoints(loggers rpc.Loggers, connecter transport.Connecter) {
|
||||||
@ -172,7 +174,7 @@ func (m *modePull) DisconnectEndpoints() {
|
|||||||
m.receiver = nil
|
m.receiver = nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *modePull) SenderReceiver() (replication.Sender, replication.Receiver) {
|
func (m *modePull) SenderReceiver() (logic.Sender, logic.Receiver) {
|
||||||
m.setupMtx.Lock()
|
m.setupMtx.Lock()
|
||||||
defer m.setupMtx.Unlock()
|
defer m.setupMtx.Unlock()
|
||||||
return m.sender, m.receiver
|
return m.sender, m.receiver
|
||||||
@ -181,7 +183,12 @@ func (m *modePull) SenderReceiver() (replication.Sender, replication.Receiver) {
|
|||||||
func (*modePull) Type() Type { return TypePull }
|
func (*modePull) Type() Type { return TypePull }
|
||||||
|
|
||||||
func (m *modePull) RunPeriodic(ctx context.Context, wakeUpCommon chan<- struct{}) {
|
func (m *modePull) RunPeriodic(ctx context.Context, wakeUpCommon chan<- struct{}) {
|
||||||
t := time.NewTicker(m.interval)
|
if m.interval.Manual {
|
||||||
|
GetLogger(ctx).Info("manual pull configured, periodic pull disabled")
|
||||||
|
// "waiting for wakeups" is printed in common ActiveSide.do
|
||||||
|
return
|
||||||
|
}
|
||||||
|
t := time.NewTicker(m.interval.Interval)
|
||||||
defer t.Stop()
|
defer t.Stop()
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
@ -210,9 +217,6 @@ func (m *modePull) ResetConnectBackoff() {
|
|||||||
|
|
||||||
func modePullFromConfig(g *config.Global, in *config.PullJob) (m *modePull, err error) {
|
func modePullFromConfig(g *config.Global, in *config.PullJob) (m *modePull, err error) {
|
||||||
m = &modePull{}
|
m = &modePull{}
|
||||||
if in.Interval <= 0 {
|
|
||||||
return nil, errors.New("interval must be positive")
|
|
||||||
}
|
|
||||||
m.interval = in.Interval
|
m.interval = in.Interval
|
||||||
|
|
||||||
m.rootFS, err = zfs.NewDatasetPath(in.RootFS)
|
m.rootFS, err = zfs.NewDatasetPath(in.RootFS)
|
||||||
@ -274,7 +278,7 @@ func (j *ActiveSide) RegisterMetrics(registerer prometheus.Registerer) {
|
|||||||
func (j *ActiveSide) Name() string { return j.name }
|
func (j *ActiveSide) Name() string { return j.name }
|
||||||
|
|
||||||
type ActiveSideStatus struct {
|
type ActiveSideStatus struct {
|
||||||
Replication *replication.Report
|
Replication *report.Report
|
||||||
PruningSender, PruningReceiver *pruner.Report
|
PruningSender, PruningReceiver *pruner.Report
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -283,8 +287,8 @@ func (j *ActiveSide) Status() *Status {
|
|||||||
|
|
||||||
s := &ActiveSideStatus{}
|
s := &ActiveSideStatus{}
|
||||||
t := j.mode.Type()
|
t := j.mode.Type()
|
||||||
if tasks.replication != nil {
|
if tasks.replicationReport != nil {
|
||||||
s.Replication = tasks.replication.Report()
|
s.Replication = tasks.replicationReport()
|
||||||
}
|
}
|
||||||
if tasks.prunerSender != nil {
|
if tasks.prunerSender != nil {
|
||||||
s.PruningSender = tasks.prunerSender.Report()
|
s.PruningSender = tasks.prunerSender.Report()
|
||||||
@ -345,78 +349,6 @@ func (j *ActiveSide) do(ctx context.Context) {
|
|||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
// The code after this watchdog goroutine is sequential and transitions the state from
|
|
||||||
// ActiveSideReplicating -> ActiveSidePruneSender -> ActiveSidePruneReceiver -> ActiveSideDone
|
|
||||||
// If any of those sequential tasks 'gets stuck' (livelock, no progress), the watchdog will eventually
|
|
||||||
// cancel its context.
|
|
||||||
// If the task is written to support context cancellation, it will return immediately (in permanent error state),
|
|
||||||
// and the sequential code above transitions to the next state.
|
|
||||||
go func() {
|
|
||||||
|
|
||||||
wdto := envconst.Duration("ZREPL_JOB_WATCHDOG_TIMEOUT", 10*time.Minute)
|
|
||||||
jitter := envconst.Duration("ZREPL_JOB_WATCHDOG_JITTER", 1*time.Second)
|
|
||||||
// shadowing!
|
|
||||||
log := log.WithField("watchdog_timeout", wdto.String())
|
|
||||||
|
|
||||||
log.Debug("starting watchdog")
|
|
||||||
defer log.Debug("watchdog stopped")
|
|
||||||
|
|
||||||
t := time.NewTicker(wdto)
|
|
||||||
defer t.Stop()
|
|
||||||
|
|
||||||
for {
|
|
||||||
select {
|
|
||||||
case <-ctx.Done():
|
|
||||||
return
|
|
||||||
case <-t.C: // fall
|
|
||||||
}
|
|
||||||
|
|
||||||
j.updateTasks(func(tasks *activeSideTasks) {
|
|
||||||
// Since cancelling a task will cause the sequential code to transition to the next state immediately,
|
|
||||||
// we cannot check for its progress right then (no fallthrough).
|
|
||||||
// Instead, we return (not continue because we are in a closure) and give the new state another
|
|
||||||
// ZREPL_JOB_WATCHDOG_TIMEOUT interval to try make some progress.
|
|
||||||
|
|
||||||
log.WithField("state", tasks.state).Debug("watchdog firing")
|
|
||||||
|
|
||||||
const WATCHDOG_ENVCONST_NOTICE = " (adjust ZREPL_JOB_WATCHDOG_TIMEOUT env variable if inappropriate)"
|
|
||||||
|
|
||||||
switch tasks.state {
|
|
||||||
case ActiveSideReplicating:
|
|
||||||
log.WithField("replication_progress", tasks.replication.Progress.String()).
|
|
||||||
Debug("check replication progress")
|
|
||||||
if tasks.replication.Progress.CheckTimeout(wdto, jitter) {
|
|
||||||
log.Error("replication did not make progress, cancelling" + WATCHDOG_ENVCONST_NOTICE)
|
|
||||||
tasks.replicationCancel()
|
|
||||||
return
|
|
||||||
}
|
|
||||||
case ActiveSidePruneSender:
|
|
||||||
log.WithField("prune_sender_progress", tasks.replication.Progress.String()).
|
|
||||||
Debug("check pruner_sender progress")
|
|
||||||
if tasks.prunerSender.Progress.CheckTimeout(wdto, jitter) {
|
|
||||||
log.Error("pruner_sender did not make progress, cancelling" + WATCHDOG_ENVCONST_NOTICE)
|
|
||||||
tasks.prunerSenderCancel()
|
|
||||||
return
|
|
||||||
}
|
|
||||||
case ActiveSidePruneReceiver:
|
|
||||||
log.WithField("prune_receiver_progress", tasks.replication.Progress.String()).
|
|
||||||
Debug("check pruner_receiver progress")
|
|
||||||
if tasks.prunerReceiver.Progress.CheckTimeout(wdto, jitter) {
|
|
||||||
log.Error("pruner_receiver did not make progress, cancelling" + WATCHDOG_ENVCONST_NOTICE)
|
|
||||||
tasks.prunerReceiverCancel()
|
|
||||||
return
|
|
||||||
}
|
|
||||||
case ActiveSideDone:
|
|
||||||
// ignore, ctx will be Done() in a few milliseconds and the watchdog will exit
|
|
||||||
default:
|
|
||||||
log.WithField("state", tasks.state).
|
|
||||||
Error("watchdog implementation error: unknown active side state")
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
|
|
||||||
sender, receiver := j.mode.SenderReceiver()
|
sender, receiver := j.mode.SenderReceiver()
|
||||||
|
|
||||||
{
|
{
|
||||||
@ -426,15 +358,18 @@ func (j *ActiveSide) do(ctx context.Context) {
|
|||||||
default:
|
default:
|
||||||
}
|
}
|
||||||
ctx, repCancel := context.WithCancel(ctx)
|
ctx, repCancel := context.WithCancel(ctx)
|
||||||
tasks := j.updateTasks(func(tasks *activeSideTasks) {
|
var repWait driver.WaitFunc
|
||||||
|
j.updateTasks(func(tasks *activeSideTasks) {
|
||||||
// reset it
|
// reset it
|
||||||
*tasks = activeSideTasks{}
|
*tasks = activeSideTasks{}
|
||||||
tasks.replicationCancel = repCancel
|
tasks.replicationCancel = repCancel
|
||||||
tasks.replication = replication.NewReplication(j.promRepStateSecs, j.promBytesReplicated)
|
tasks.replicationReport, repWait = replication.Do(
|
||||||
|
ctx, logic.NewPlanner(j.promRepStateSecs, j.promBytesReplicated, sender, receiver),
|
||||||
|
)
|
||||||
tasks.state = ActiveSideReplicating
|
tasks.state = ActiveSideReplicating
|
||||||
})
|
})
|
||||||
log.Info("start replication")
|
log.Info("start replication")
|
||||||
tasks.replication.Drive(ctx, sender, receiver)
|
repWait(true) // wait blocking
|
||||||
repCancel() // always cancel to free up context resources
|
repCancel() // always cancel to free up context resources
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -15,7 +15,7 @@ import (
|
|||||||
"github.com/zrepl/zrepl/daemon/snapper"
|
"github.com/zrepl/zrepl/daemon/snapper"
|
||||||
"github.com/zrepl/zrepl/endpoint"
|
"github.com/zrepl/zrepl/endpoint"
|
||||||
"github.com/zrepl/zrepl/logger"
|
"github.com/zrepl/zrepl/logger"
|
||||||
"github.com/zrepl/zrepl/replication"
|
"github.com/zrepl/zrepl/replication/driver"
|
||||||
"github.com/zrepl/zrepl/rpc"
|
"github.com/zrepl/zrepl/rpc"
|
||||||
"github.com/zrepl/zrepl/rpc/transportmux"
|
"github.com/zrepl/zrepl/rpc/transportmux"
|
||||||
"github.com/zrepl/zrepl/tlsconf"
|
"github.com/zrepl/zrepl/tlsconf"
|
||||||
@ -79,7 +79,7 @@ const (
|
|||||||
)
|
)
|
||||||
|
|
||||||
func WithSubsystemLoggers(ctx context.Context, log logger.Logger) context.Context {
|
func WithSubsystemLoggers(ctx context.Context, log logger.Logger) context.Context {
|
||||||
ctx = replication.WithLogger(ctx, log.WithField(SubsysField, SubsysReplication))
|
ctx = driver.WithLogger(ctx, log.WithField(SubsysField, SubsysReplication))
|
||||||
ctx = endpoint.WithLogger(ctx, log.WithField(SubsysField, SubsyEndpoint))
|
ctx = endpoint.WithLogger(ctx, log.WithField(SubsysField, SubsyEndpoint))
|
||||||
ctx = pruner.WithLogger(ctx, log.WithField(SubsysField, SubsysPruning))
|
ctx = pruner.WithLogger(ctx, log.WithField(SubsysField, SubsysPruning))
|
||||||
ctx = snapper.WithLogger(ctx, log.WithField(SubsysField, SubsysSnapshot))
|
ctx = snapper.WithLogger(ctx, log.WithField(SubsysField, SubsysSnapshot))
|
||||||
|
@ -8,10 +8,9 @@ import (
|
|||||||
"github.com/zrepl/zrepl/config"
|
"github.com/zrepl/zrepl/config"
|
||||||
"github.com/zrepl/zrepl/logger"
|
"github.com/zrepl/zrepl/logger"
|
||||||
"github.com/zrepl/zrepl/pruning"
|
"github.com/zrepl/zrepl/pruning"
|
||||||
"github.com/zrepl/zrepl/replication/pdu"
|
"github.com/zrepl/zrepl/replication/logic/pdu"
|
||||||
"github.com/zrepl/zrepl/util/envconst"
|
"github.com/zrepl/zrepl/util/envconst"
|
||||||
"github.com/zrepl/zrepl/util/watchdog"
|
"github.com/zrepl/zrepl/util/watchdog"
|
||||||
"net"
|
|
||||||
"sort"
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
@ -21,6 +20,7 @@ import (
|
|||||||
// Try to keep it compatible with gitub.com/zrepl/zrepl/endpoint.Endpoint
|
// Try to keep it compatible with gitub.com/zrepl/zrepl/endpoint.Endpoint
|
||||||
type History interface {
|
type History interface {
|
||||||
ReplicationCursor(ctx context.Context, req *pdu.ReplicationCursorReq) (*pdu.ReplicationCursorRes, error)
|
ReplicationCursor(ctx context.Context, req *pdu.ReplicationCursorReq) (*pdu.ReplicationCursorRes, error)
|
||||||
|
ListFilesystems(ctx context.Context, req *pdu.ListFilesystemReq) (*pdu.ListFilesystemRes, error)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Try to keep it compatible with gitub.com/zrepl/zrepl/endpoint.Endpoint
|
// Try to keep it compatible with gitub.com/zrepl/zrepl/endpoint.Endpoint
|
||||||
@ -66,8 +66,7 @@ type Pruner struct {
|
|||||||
|
|
||||||
state State
|
state State
|
||||||
|
|
||||||
// State ErrWait|ErrPerm
|
// State PlanErr
|
||||||
sleepUntil time.Time
|
|
||||||
err error
|
err error
|
||||||
|
|
||||||
// State Exec
|
// State Exec
|
||||||
@ -206,62 +205,34 @@ type State int
|
|||||||
|
|
||||||
const (
|
const (
|
||||||
Plan State = 1 << iota
|
Plan State = 1 << iota
|
||||||
PlanWait
|
PlanErr
|
||||||
Exec
|
Exec
|
||||||
ExecWait
|
ExecErr
|
||||||
ErrPerm
|
|
||||||
Done
|
Done
|
||||||
)
|
)
|
||||||
|
|
||||||
func (s State) statefunc() state {
|
type updater func(func(*Pruner))
|
||||||
var statemap = map[State]state{
|
|
||||||
Plan: statePlan,
|
|
||||||
PlanWait: statePlanWait,
|
|
||||||
Exec: stateExec,
|
|
||||||
ExecWait: stateExecWait,
|
|
||||||
ErrPerm: nil,
|
|
||||||
Done: nil,
|
|
||||||
}
|
|
||||||
return statemap[s]
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s State) IsTerminal() bool {
|
|
||||||
return s.statefunc() == nil
|
|
||||||
}
|
|
||||||
|
|
||||||
type updater func(func(*Pruner)) State
|
|
||||||
type state func(args *args, u updater) state
|
|
||||||
|
|
||||||
func (p *Pruner) Prune() {
|
func (p *Pruner) Prune() {
|
||||||
p.prune(p.args)
|
p.prune(p.args)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *Pruner) prune(args args) {
|
func (p *Pruner) prune(args args) {
|
||||||
s := p.state.statefunc()
|
u := func(f func(*Pruner)) {
|
||||||
for s != nil {
|
|
||||||
pre := p.state
|
|
||||||
s = s(&args, func(f func(*Pruner)) State {
|
|
||||||
p.mtx.Lock()
|
p.mtx.Lock()
|
||||||
defer p.mtx.Unlock()
|
defer p.mtx.Unlock()
|
||||||
f(p)
|
f(p)
|
||||||
return p.state
|
|
||||||
})
|
|
||||||
post := p.state
|
|
||||||
GetLogger(args.ctx).
|
|
||||||
WithField("transition", fmt.Sprintf("%s=>%s", pre, post)).
|
|
||||||
Debug("state transition")
|
|
||||||
if err := p.Error(); err != nil {
|
|
||||||
GetLogger(args.ctx).
|
|
||||||
WithError(p.err).
|
|
||||||
WithField("state", post.String()).
|
|
||||||
Error("entering error state after error")
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
// TODO support automatic retries
|
||||||
|
// It is advisable to merge this code with package replication/driver before
|
||||||
|
// That will likely require re-modelling struct fs like replication/driver.attempt,
|
||||||
|
// including figuring out how to resume a plan after being interrupted by network errors
|
||||||
|
// The non-retrying code in this package should move straight to replication/logic.
|
||||||
|
doOneAttempt(&args, u)
|
||||||
}
|
}
|
||||||
|
|
||||||
type Report struct {
|
type Report struct {
|
||||||
State string
|
State string
|
||||||
SleepUntil time.Time
|
|
||||||
Error string
|
Error string
|
||||||
Pending, Completed []FSReport
|
Pending, Completed []FSReport
|
||||||
}
|
}
|
||||||
@ -269,7 +240,7 @@ type Report struct {
|
|||||||
type FSReport struct {
|
type FSReport struct {
|
||||||
Filesystem string
|
Filesystem string
|
||||||
SnapshotList, DestroyList []SnapshotReport
|
SnapshotList, DestroyList []SnapshotReport
|
||||||
ErrorCount int
|
SkipReason FSSkipReason
|
||||||
LastError string
|
LastError string
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -285,14 +256,9 @@ func (p *Pruner) Report() *Report {
|
|||||||
|
|
||||||
r := Report{State: p.state.String()}
|
r := Report{State: p.state.String()}
|
||||||
|
|
||||||
if p.state & (PlanWait|ExecWait) != 0 {
|
|
||||||
r.SleepUntil = p.sleepUntil
|
|
||||||
}
|
|
||||||
if p.state & (PlanWait|ExecWait|ErrPerm) != 0 {
|
|
||||||
if p.err != nil {
|
if p.err != nil {
|
||||||
r.Error = p.err.Error()
|
r.Error = p.err.Error()
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if p.execQueue != nil {
|
if p.execQueue != nil {
|
||||||
r.Pending, r.Completed = p.execQueue.Report()
|
r.Pending, r.Completed = p.execQueue.Report()
|
||||||
@ -307,20 +273,16 @@ func (p *Pruner) State() State {
|
|||||||
return p.state
|
return p.state
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *Pruner) Error() error {
|
|
||||||
p.mtx.Lock()
|
|
||||||
defer p.mtx.Unlock()
|
|
||||||
if p.state & (PlanWait|ExecWait|ErrPerm) != 0 {
|
|
||||||
return p.err
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
type fs struct {
|
type fs struct {
|
||||||
path string
|
path string
|
||||||
|
|
||||||
// permanent error during planning
|
// permanent error during planning
|
||||||
planErr error
|
planErr error
|
||||||
|
planErrContext string
|
||||||
|
|
||||||
|
// if != "", the fs was skipped for planning and the field
|
||||||
|
// contains the reason
|
||||||
|
skipReason FSSkipReason
|
||||||
|
|
||||||
// snapshots presented by target
|
// snapshots presented by target
|
||||||
// (type snapshot)
|
// (type snapshot)
|
||||||
@ -333,8 +295,18 @@ type fs struct {
|
|||||||
|
|
||||||
// only during Exec state, also used by execQueue
|
// only during Exec state, also used by execQueue
|
||||||
execErrLast error
|
execErrLast error
|
||||||
execErrCount int
|
}
|
||||||
|
|
||||||
|
type FSSkipReason string
|
||||||
|
|
||||||
|
const (
|
||||||
|
NotSkipped = ""
|
||||||
|
SkipPlaceholder = "filesystem is placeholder"
|
||||||
|
SkipNoCorrespondenceOnSender = "filesystem has no correspondence on sender"
|
||||||
|
)
|
||||||
|
|
||||||
|
func (r FSSkipReason) NotSkipped() bool {
|
||||||
|
return r == NotSkipped
|
||||||
}
|
}
|
||||||
|
|
||||||
func (f *fs) Report() FSReport {
|
func (f *fs) Report() FSReport {
|
||||||
@ -343,7 +315,11 @@ func (f *fs) Report() FSReport {
|
|||||||
|
|
||||||
r := FSReport{}
|
r := FSReport{}
|
||||||
r.Filesystem = f.path
|
r.Filesystem = f.path
|
||||||
r.ErrorCount = f.execErrCount
|
r.SkipReason = f.skipReason
|
||||||
|
if !r.SkipReason.NotSkipped() {
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
|
||||||
if f.planErr != nil {
|
if f.planErr != nil {
|
||||||
r.LastError = f.planErr.Error()
|
r.LastError = f.planErr.Error()
|
||||||
} else if f.execErrLast != nil {
|
} else if f.execErrLast != nil {
|
||||||
@ -385,39 +361,7 @@ func (s snapshot) Replicated() bool { return s.replicated }
|
|||||||
|
|
||||||
func (s snapshot) Date() time.Time { return s.date }
|
func (s snapshot) Date() time.Time { return s.date }
|
||||||
|
|
||||||
type Error interface {
|
func doOneAttempt(a *args, u updater) {
|
||||||
error
|
|
||||||
Temporary() bool
|
|
||||||
}
|
|
||||||
|
|
||||||
var _ Error = net.Error(nil)
|
|
||||||
|
|
||||||
func shouldRetry(e error) bool {
|
|
||||||
if neterr, ok := e.(net.Error); ok {
|
|
||||||
return neterr.Temporary()
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
func onErr(u updater, e error) state {
|
|
||||||
return u(func(p *Pruner) {
|
|
||||||
p.err = e
|
|
||||||
if !shouldRetry(e) {
|
|
||||||
p.state = ErrPerm
|
|
||||||
return
|
|
||||||
}
|
|
||||||
switch p.state {
|
|
||||||
case Plan:
|
|
||||||
p.state = PlanWait
|
|
||||||
case Exec:
|
|
||||||
p.state = ExecWait
|
|
||||||
default:
|
|
||||||
panic(p.state)
|
|
||||||
}
|
|
||||||
}).statefunc()
|
|
||||||
}
|
|
||||||
|
|
||||||
func statePlan(a *args, u updater) state {
|
|
||||||
|
|
||||||
ctx, target, receiver := a.ctx, a.target, a.receiver
|
ctx, target, receiver := a.ctx, a.target, a.receiver
|
||||||
var ka *watchdog.KeepAlive
|
var ka *watchdog.KeepAlive
|
||||||
@ -425,28 +369,62 @@ func statePlan(a *args, u updater) state {
|
|||||||
ka = &pruner.Progress
|
ka = &pruner.Progress
|
||||||
})
|
})
|
||||||
|
|
||||||
|
sfssres, err := receiver.ListFilesystems(ctx, &pdu.ListFilesystemReq{})
|
||||||
|
if err != nil {
|
||||||
|
u(func(p *Pruner) {
|
||||||
|
p.state = PlanErr
|
||||||
|
p.err = err
|
||||||
|
})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
sfss := make(map[string]*pdu.Filesystem)
|
||||||
|
for _, sfs := range sfssres.GetFilesystems() {
|
||||||
|
sfss[sfs.GetPath()] = sfs
|
||||||
|
}
|
||||||
|
|
||||||
tfssres, err := target.ListFilesystems(ctx, &pdu.ListFilesystemReq{})
|
tfssres, err := target.ListFilesystems(ctx, &pdu.ListFilesystemReq{})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return onErr(u, err)
|
u(func(p *Pruner) {
|
||||||
|
p.state = PlanErr
|
||||||
|
p.err = err
|
||||||
|
})
|
||||||
|
return
|
||||||
}
|
}
|
||||||
tfss := tfssres.GetFilesystems()
|
tfss := tfssres.GetFilesystems()
|
||||||
|
|
||||||
pfss := make([]*fs, len(tfss))
|
pfss := make([]*fs, len(tfss))
|
||||||
|
tfss_loop:
|
||||||
for i, tfs := range tfss {
|
for i, tfs := range tfss {
|
||||||
|
|
||||||
l := GetLogger(ctx).WithField("fs", tfs.Path)
|
l := GetLogger(ctx).WithField("fs", tfs.Path)
|
||||||
l.Debug("plan filesystem")
|
l.Debug("plan filesystem")
|
||||||
|
|
||||||
|
|
||||||
pfs := &fs{
|
pfs := &fs{
|
||||||
path: tfs.Path,
|
path: tfs.Path,
|
||||||
}
|
}
|
||||||
pfss[i] = pfs
|
pfss[i] = pfs
|
||||||
|
|
||||||
|
if tfs.GetIsPlaceholder() {
|
||||||
|
pfs.skipReason = SkipPlaceholder
|
||||||
|
l.WithField("skip_reason", pfs.skipReason).Debug("skipping filesystem")
|
||||||
|
continue
|
||||||
|
} else if sfs := sfss[tfs.GetPath()]; sfs == nil {
|
||||||
|
pfs.skipReason = SkipNoCorrespondenceOnSender
|
||||||
|
l.WithField("skip_reason", pfs.skipReason).WithField("sfs", sfs.GetPath()).Debug("skipping filesystem")
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
pfsPlanErrAndLog := func(err error, message string) {
|
||||||
|
t := fmt.Sprintf("%T", err)
|
||||||
|
pfs.planErr = err
|
||||||
|
pfs.planErrContext = message
|
||||||
|
l.WithField("orig_err_type", t).WithError(err).Error(fmt.Sprintf("%s: plan error, skipping filesystem", message))
|
||||||
|
}
|
||||||
|
|
||||||
tfsvsres, err := target.ListFilesystemVersions(ctx, &pdu.ListFilesystemVersionsReq{Filesystem: tfs.Path})
|
tfsvsres, err := target.ListFilesystemVersions(ctx, &pdu.ListFilesystemVersionsReq{Filesystem: tfs.Path})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
l.WithError(err).Error("cannot list filesystem versions")
|
pfsPlanErrAndLog(err, "cannot list filesystem versions")
|
||||||
return onErr(u, err)
|
continue tfss_loop
|
||||||
}
|
}
|
||||||
tfsvs := tfsvsres.GetVersions()
|
tfsvs := tfsvsres.GetVersions()
|
||||||
// no progress here since we could run in a live-lock (must have used target AND receiver before progress)
|
// no progress here since we could run in a live-lock (must have used target AND receiver before progress)
|
||||||
@ -461,18 +439,16 @@ func statePlan(a *args, u updater) state {
|
|||||||
}
|
}
|
||||||
rc, err := receiver.ReplicationCursor(ctx, rcReq)
|
rc, err := receiver.ReplicationCursor(ctx, rcReq)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
l.WithError(err).Error("cannot get replication cursor")
|
pfsPlanErrAndLog(err, "cannot get replication cursor bookmark")
|
||||||
return onErr(u, err)
|
continue tfss_loop
|
||||||
}
|
}
|
||||||
ka.MadeProgress()
|
ka.MadeProgress()
|
||||||
if rc.GetNotexist() {
|
if rc.GetNotexist() {
|
||||||
l.Error("replication cursor does not exist, skipping")
|
err := errors.New("replication cursor bookmark does not exist (one successful replication is required before pruning works)")
|
||||||
pfs.destroyList = []pruning.Snapshot{}
|
pfsPlanErrAndLog(err, "")
|
||||||
pfs.planErr = fmt.Errorf("replication cursor bookmark does not exist (one successful replication is required before pruning works)")
|
continue tfss_loop
|
||||||
continue
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// scan from older to newer, all snapshots older than cursor are interpreted as replicated
|
// scan from older to newer, all snapshots older than cursor are interpreted as replicated
|
||||||
sort.Slice(tfsvs, func(i, j int) bool {
|
sort.Slice(tfsvs, func(i, j int) bool {
|
||||||
return tfsvs[i].CreateTXG < tfsvs[j].CreateTXG
|
return tfsvs[i].CreateTXG < tfsvs[j].CreateTXG
|
||||||
@ -494,11 +470,9 @@ func statePlan(a *args, u updater) state {
|
|||||||
}
|
}
|
||||||
creation, err := tfsv.CreationAsTime()
|
creation, err := tfsv.CreationAsTime()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
err := fmt.Errorf("%s%s has invalid creation date: %s", tfs, tfsv.RelName(), err)
|
err := fmt.Errorf("%s: %s", tfsv.RelName(), err)
|
||||||
l.WithError(err).
|
pfsPlanErrAndLog(err, "fs version with invalid creation date")
|
||||||
WithField("tfsv", tfsv.RelName()).
|
continue tfss_loop
|
||||||
Error("error with fileesystem version")
|
|
||||||
return onErr(u, err)
|
|
||||||
}
|
}
|
||||||
// note that we cannot use CreateTXG because target and receiver could be on different pools
|
// note that we cannot use CreateTXG because target and receiver could be on different pools
|
||||||
atCursor := tfsv.Guid == rc.GetGuid()
|
atCursor := tfsv.Guid == rc.GetGuid()
|
||||||
@ -510,9 +484,8 @@ func statePlan(a *args, u updater) state {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
if preCursor {
|
if preCursor {
|
||||||
err := fmt.Errorf("replication cursor not found in prune target filesystem versions")
|
pfsPlanErrAndLog(fmt.Errorf("replication cursor not found in prune target filesystem versions"), "")
|
||||||
l.Error(err.Error())
|
continue tfss_loop
|
||||||
return onErr(u, err)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Apply prune rules
|
// Apply prune rules
|
||||||
@ -520,34 +493,56 @@ func statePlan(a *args, u updater) state {
|
|||||||
ka.MadeProgress()
|
ka.MadeProgress()
|
||||||
}
|
}
|
||||||
|
|
||||||
return u(func(pruner *Pruner) {
|
u(func(pruner *Pruner) {
|
||||||
pruner.Progress.MadeProgress()
|
pruner.Progress.MadeProgress()
|
||||||
pruner.execQueue = newExecQueue(len(pfss))
|
pruner.execQueue = newExecQueue(len(pfss))
|
||||||
for _, pfs := range pfss {
|
for _, pfs := range pfss {
|
||||||
pruner.execQueue.Put(pfs, nil, false)
|
pruner.execQueue.Put(pfs, nil, false)
|
||||||
}
|
}
|
||||||
pruner.state = Exec
|
pruner.state = Exec
|
||||||
}).statefunc()
|
})
|
||||||
}
|
|
||||||
|
|
||||||
func stateExec(a *args, u updater) state {
|
|
||||||
|
|
||||||
|
for {
|
||||||
var pfs *fs
|
var pfs *fs
|
||||||
state := u(func(pruner *Pruner) {
|
u(func(pruner *Pruner) {
|
||||||
pfs = pruner.execQueue.Pop()
|
pfs = pruner.execQueue.Pop()
|
||||||
|
})
|
||||||
if pfs == nil {
|
if pfs == nil {
|
||||||
nextState := Done
|
break
|
||||||
if pruner.execQueue.HasCompletedFSWithErrors() {
|
|
||||||
nextState = ErrPerm
|
|
||||||
}
|
}
|
||||||
pruner.state = nextState
|
doOneAttemptExec(a, u, pfs)
|
||||||
return
|
}
|
||||||
|
|
||||||
|
var rep *Report
|
||||||
|
{
|
||||||
|
// must not hold lock for report
|
||||||
|
var pruner *Pruner
|
||||||
|
u(func(p *Pruner) {
|
||||||
|
pruner = p
|
||||||
|
})
|
||||||
|
rep = pruner.Report()
|
||||||
|
}
|
||||||
|
u(func(p *Pruner) {
|
||||||
|
if len(rep.Pending) > 0 {
|
||||||
|
panic("queue should not have pending items at this point")
|
||||||
|
}
|
||||||
|
hadErr := false
|
||||||
|
for _, fsr := range rep.Completed {
|
||||||
|
hadErr = hadErr || fsr.SkipReason.NotSkipped() && fsr.LastError != ""
|
||||||
|
}
|
||||||
|
if hadErr {
|
||||||
|
p.state = ExecErr
|
||||||
|
} else {
|
||||||
|
p.state = Done
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
if state != Exec {
|
|
||||||
return state.statefunc()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// attempts to exec pfs, puts it back into the queue with the result
|
||||||
|
func doOneAttemptExec(a *args, u updater, pfs *fs) {
|
||||||
|
|
||||||
destroyList := make([]*pdu.FilesystemVersion, len(pfs.destroyList))
|
destroyList := make([]*pdu.FilesystemVersion, len(pfs.destroyList))
|
||||||
for i := range destroyList {
|
for i := range destroyList {
|
||||||
destroyList[i] = pfs.destroyList[i].(snapshot).fsv
|
destroyList[i] = pfs.destroyList[i].(snapshot).fsv
|
||||||
@ -566,7 +561,7 @@ func stateExec(a *args, u updater) state {
|
|||||||
u(func(pruner *Pruner) {
|
u(func(pruner *Pruner) {
|
||||||
pruner.execQueue.Put(pfs, err, false)
|
pruner.execQueue.Put(pfs, err, false)
|
||||||
})
|
})
|
||||||
return onErr(u, err)
|
return
|
||||||
}
|
}
|
||||||
// check if all snapshots were destroyed
|
// check if all snapshots were destroyed
|
||||||
destroyResults := make(map[string]*pdu.DestroySnapshotRes)
|
destroyResults := make(map[string]*pdu.DestroySnapshotRes)
|
||||||
@ -607,31 +602,6 @@ func stateExec(a *args, u updater) state {
|
|||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
GetLogger(a.ctx).WithError(err).Error("target could not destroy snapshots")
|
GetLogger(a.ctx).WithError(err).Error("target could not destroy snapshots")
|
||||||
return onErr(u, err)
|
return
|
||||||
}
|
|
||||||
|
|
||||||
return u(func(pruner *Pruner) {
|
|
||||||
pruner.Progress.MadeProgress()
|
|
||||||
}).statefunc()
|
|
||||||
}
|
|
||||||
|
|
||||||
func stateExecWait(a *args, u updater) state {
|
|
||||||
return doWait(Exec, a, u)
|
|
||||||
}
|
|
||||||
|
|
||||||
func statePlanWait(a *args, u updater) state {
|
|
||||||
return doWait(Plan, a, u)
|
|
||||||
}
|
|
||||||
|
|
||||||
func doWait(goback State, a *args, u updater) state {
|
|
||||||
timer := time.NewTimer(a.retryWait)
|
|
||||||
defer timer.Stop()
|
|
||||||
select {
|
|
||||||
case <-timer.C:
|
|
||||||
return u(func(pruner *Pruner) {
|
|
||||||
pruner.state = goback
|
|
||||||
}).statefunc()
|
|
||||||
case <-a.ctx.Done():
|
|
||||||
return onErr(u, a.ctx.Err())
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -58,10 +58,7 @@ func (q *execQueue) Pop() *fs {
|
|||||||
func(q *execQueue) Put(fs *fs, err error, done bool) {
|
func(q *execQueue) Put(fs *fs, err error, done bool) {
|
||||||
fs.mtx.Lock()
|
fs.mtx.Lock()
|
||||||
fs.execErrLast = err
|
fs.execErrLast = err
|
||||||
if err != nil {
|
if done || err != nil {
|
||||||
fs.execErrCount++
|
|
||||||
}
|
|
||||||
if done || (err != nil && !shouldRetry(fs.execErrLast)) {
|
|
||||||
fs.mtx.Unlock()
|
fs.mtx.Unlock()
|
||||||
q.mtx.Lock()
|
q.mtx.Lock()
|
||||||
q.completed = append(q.completed, fs)
|
q.completed = append(q.completed, fs)
|
||||||
@ -78,9 +75,6 @@ func(q *execQueue) Put(fs *fs, err error, done bool) {
|
|||||||
defer q.pending[i].mtx.Unlock()
|
defer q.pending[i].mtx.Unlock()
|
||||||
q.pending[j].mtx.Lock()
|
q.pending[j].mtx.Lock()
|
||||||
defer q.pending[j].mtx.Unlock()
|
defer q.pending[j].mtx.Unlock()
|
||||||
if q.pending[i].execErrCount != q.pending[j].execErrCount {
|
|
||||||
return q.pending[i].execErrCount < q.pending[j].execErrCount
|
|
||||||
}
|
|
||||||
return strings.Compare(q.pending[i].path, q.pending[j].path) == -1
|
return strings.Compare(q.pending[i].path, q.pending[j].path) == -1
|
||||||
})
|
})
|
||||||
q.mtx.Unlock()
|
q.mtx.Unlock()
|
||||||
|
@ -1,206 +0,0 @@
|
|||||||
package pruner
|
|
||||||
|
|
||||||
import (
|
|
||||||
"context"
|
|
||||||
"fmt"
|
|
||||||
"github.com/stretchr/testify/assert"
|
|
||||||
"github.com/zrepl/zrepl/logger"
|
|
||||||
"github.com/zrepl/zrepl/pruning"
|
|
||||||
"github.com/zrepl/zrepl/replication/pdu"
|
|
||||||
"net"
|
|
||||||
"testing"
|
|
||||||
"time"
|
|
||||||
)
|
|
||||||
|
|
||||||
type mockFS struct {
|
|
||||||
path string
|
|
||||||
snaps []string
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *mockFS) Filesystem() *pdu.Filesystem {
|
|
||||||
return &pdu.Filesystem{
|
|
||||||
Path: m.path,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *mockFS) FilesystemVersions() []*pdu.FilesystemVersion {
|
|
||||||
versions := make([]*pdu.FilesystemVersion, len(m.snaps))
|
|
||||||
for i, v := range m.snaps {
|
|
||||||
versions[i] = &pdu.FilesystemVersion{
|
|
||||||
Type: pdu.FilesystemVersion_Snapshot,
|
|
||||||
Name: v,
|
|
||||||
Creation: pdu.FilesystemVersionCreation(time.Unix(0, 0)),
|
|
||||||
Guid: uint64(i),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return versions
|
|
||||||
}
|
|
||||||
|
|
||||||
type mockTarget struct {
|
|
||||||
fss []mockFS
|
|
||||||
destroyed map[string][]string
|
|
||||||
listVersionsErrs map[string][]error
|
|
||||||
listFilesystemsErr []error
|
|
||||||
destroyErrs map[string][]error
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *mockTarget) ListFilesystems(ctx context.Context, req *pdu.ListFilesystemReq) (*pdu.ListFilesystemRes, error) {
|
|
||||||
if len(t.listFilesystemsErr) > 0 {
|
|
||||||
e := t.listFilesystemsErr[0]
|
|
||||||
t.listFilesystemsErr = t.listFilesystemsErr[1:]
|
|
||||||
return nil, e
|
|
||||||
}
|
|
||||||
fss := make([]*pdu.Filesystem, len(t.fss))
|
|
||||||
for i := range fss {
|
|
||||||
fss[i] = t.fss[i].Filesystem()
|
|
||||||
}
|
|
||||||
return &pdu.ListFilesystemRes{Filesystems: fss}, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *mockTarget) ListFilesystemVersions(ctx context.Context, req *pdu.ListFilesystemVersionsReq) (*pdu.ListFilesystemVersionsRes, error) {
|
|
||||||
fs := req.Filesystem
|
|
||||||
if len(t.listVersionsErrs[fs]) != 0 {
|
|
||||||
e := t.listVersionsErrs[fs][0]
|
|
||||||
t.listVersionsErrs[fs] = t.listVersionsErrs[fs][1:]
|
|
||||||
return nil, e
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, mfs := range t.fss {
|
|
||||||
if mfs.path != fs {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
return &pdu.ListFilesystemVersionsRes{Versions: mfs.FilesystemVersions()}, nil
|
|
||||||
}
|
|
||||||
return nil, fmt.Errorf("filesystem %s does not exist", fs)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *mockTarget) DestroySnapshots(ctx context.Context, req *pdu.DestroySnapshotsReq) (*pdu.DestroySnapshotsRes, error) {
|
|
||||||
fs, snaps := req.Filesystem, req.Snapshots
|
|
||||||
if len(t.destroyErrs[fs]) != 0 {
|
|
||||||
e := t.destroyErrs[fs][0]
|
|
||||||
t.destroyErrs[fs] = t.destroyErrs[fs][1:]
|
|
||||||
return nil, e
|
|
||||||
}
|
|
||||||
destroyed := t.destroyed[fs]
|
|
||||||
res := make([]*pdu.DestroySnapshotRes, len(snaps))
|
|
||||||
for i, s := range snaps {
|
|
||||||
destroyed = append(destroyed, s.Name)
|
|
||||||
res[i] = &pdu.DestroySnapshotRes{Error: "", Snapshot: s}
|
|
||||||
}
|
|
||||||
t.destroyed[fs] = destroyed
|
|
||||||
return &pdu.DestroySnapshotsRes{Results: res}, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
type mockCursor struct {
|
|
||||||
snapname string
|
|
||||||
guid uint64
|
|
||||||
}
|
|
||||||
type mockHistory struct {
|
|
||||||
errs map[string][]error
|
|
||||||
cursors map[string]*mockCursor
|
|
||||||
}
|
|
||||||
|
|
||||||
func (r *mockHistory) ReplicationCursor(ctx context.Context, req *pdu.ReplicationCursorReq) (*pdu.ReplicationCursorRes, error) {
|
|
||||||
fs := req.Filesystem
|
|
||||||
if len(r.errs[fs]) > 0 {
|
|
||||||
e := r.errs[fs][0]
|
|
||||||
r.errs[fs] = r.errs[fs][1:]
|
|
||||||
return nil, e
|
|
||||||
}
|
|
||||||
return &pdu.ReplicationCursorRes{Result: &pdu.ReplicationCursorRes_Guid{Guid: 0}}, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
type stubNetErr struct {
|
|
||||||
msg string
|
|
||||||
temporary, timeout bool
|
|
||||||
}
|
|
||||||
|
|
||||||
var _ net.Error = stubNetErr{}
|
|
||||||
|
|
||||||
func (e stubNetErr) Error() string {
|
|
||||||
return e.msg
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e stubNetErr) Temporary() bool { return e.temporary }
|
|
||||||
|
|
||||||
func (e stubNetErr) Timeout() bool { return e.timeout }
|
|
||||||
|
|
||||||
func TestPruner_Prune(t *testing.T) {
|
|
||||||
|
|
||||||
var _ net.Error = &net.OpError{} // we use it below
|
|
||||||
target := &mockTarget{
|
|
||||||
listFilesystemsErr: []error{
|
|
||||||
stubNetErr{msg: "fakerror0", temporary: true},
|
|
||||||
},
|
|
||||||
listVersionsErrs: map[string][]error{
|
|
||||||
"zroot/foo": {
|
|
||||||
stubNetErr{msg: "fakeerror1", temporary: true},
|
|
||||||
stubNetErr{msg: "fakeerror2", temporary: true,},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
destroyErrs: map[string][]error{
|
|
||||||
"zroot/baz": {
|
|
||||||
stubNetErr{msg: "fakeerror3", temporary: true}, // first error puts it back in the queue
|
|
||||||
stubNetErr{msg:"permanent error"}, // so it will be last when pruner gives up due to permanent err
|
|
||||||
},
|
|
||||||
},
|
|
||||||
destroyed: make(map[string][]string),
|
|
||||||
fss: []mockFS{
|
|
||||||
{
|
|
||||||
path: "zroot/foo",
|
|
||||||
snaps: []string{
|
|
||||||
"keep_a",
|
|
||||||
"keep_b",
|
|
||||||
"drop_c",
|
|
||||||
"keep_d",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
path: "zroot/bar",
|
|
||||||
snaps: []string{
|
|
||||||
"keep_e",
|
|
||||||
"keep_f",
|
|
||||||
"drop_g",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
path: "zroot/baz",
|
|
||||||
snaps: []string{
|
|
||||||
"keep_h",
|
|
||||||
"drop_i",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
history := &mockHistory{
|
|
||||||
errs: map[string][]error{
|
|
||||||
"zroot/foo": {
|
|
||||||
stubNetErr{msg: "fakeerror4", temporary: true},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
keepRules := []pruning.KeepRule{pruning.MustKeepRegex("^keep", false)}
|
|
||||||
|
|
||||||
p := Pruner{
|
|
||||||
args: args{
|
|
||||||
ctx: WithLogger(context.Background(), logger.NewTestLogger(t)),
|
|
||||||
target: target,
|
|
||||||
receiver: history,
|
|
||||||
rules: keepRules,
|
|
||||||
retryWait: 10*time.Millisecond,
|
|
||||||
},
|
|
||||||
state: Plan,
|
|
||||||
}
|
|
||||||
p.Prune()
|
|
||||||
|
|
||||||
exp := map[string][]string{
|
|
||||||
"zroot/foo": {"drop_c"},
|
|
||||||
"zroot/bar": {"drop_g"},
|
|
||||||
}
|
|
||||||
|
|
||||||
assert.Equal(t, exp, target.destroyed)
|
|
||||||
|
|
||||||
//assert.Equal(t, map[string][]error{}, target.listVersionsErrs, "retried")
|
|
||||||
|
|
||||||
}
|
|
@ -7,19 +7,17 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
_StateName_0 = "PlanPlanWait"
|
_StateName_0 = "PlanPlanErr"
|
||||||
_StateName_1 = "Exec"
|
_StateName_1 = "Exec"
|
||||||
_StateName_2 = "ExecWait"
|
_StateName_2 = "ExecErr"
|
||||||
_StateName_3 = "ErrPerm"
|
_StateName_3 = "Done"
|
||||||
_StateName_4 = "Done"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
_StateIndex_0 = [...]uint8{0, 4, 12}
|
_StateIndex_0 = [...]uint8{0, 4, 11}
|
||||||
_StateIndex_1 = [...]uint8{0, 4}
|
_StateIndex_1 = [...]uint8{0, 4}
|
||||||
_StateIndex_2 = [...]uint8{0, 8}
|
_StateIndex_2 = [...]uint8{0, 7}
|
||||||
_StateIndex_3 = [...]uint8{0, 7}
|
_StateIndex_3 = [...]uint8{0, 4}
|
||||||
_StateIndex_4 = [...]uint8{0, 4}
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func (i State) String() string {
|
func (i State) String() string {
|
||||||
@ -33,22 +31,19 @@ func (i State) String() string {
|
|||||||
return _StateName_2
|
return _StateName_2
|
||||||
case i == 16:
|
case i == 16:
|
||||||
return _StateName_3
|
return _StateName_3
|
||||||
case i == 32:
|
|
||||||
return _StateName_4
|
|
||||||
default:
|
default:
|
||||||
return fmt.Sprintf("State(%d)", i)
|
return fmt.Sprintf("State(%d)", i)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
var _StateValues = []State{1, 2, 4, 8, 16, 32}
|
var _StateValues = []State{1, 2, 4, 8, 16}
|
||||||
|
|
||||||
var _StateNameToValueMap = map[string]State{
|
var _StateNameToValueMap = map[string]State{
|
||||||
_StateName_0[0:4]: 1,
|
_StateName_0[0:4]: 1,
|
||||||
_StateName_0[4:12]: 2,
|
_StateName_0[4:11]: 2,
|
||||||
_StateName_1[0:4]: 4,
|
_StateName_1[0:4]: 4,
|
||||||
_StateName_2[0:8]: 8,
|
_StateName_2[0:7]: 8,
|
||||||
_StateName_3[0:7]: 16,
|
_StateName_3[0:4]: 16,
|
||||||
_StateName_4[0:4]: 32,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// StateString retrieves an enum value from the enum constants string name.
|
// StateString retrieves an enum value from the enum constants string name.
|
||||||
|
@ -232,7 +232,8 @@ Job Type ``pull``
|
|||||||
- ZFS dataset path are received to
|
- ZFS dataset path are received to
|
||||||
``$root_fs/$client_identity``
|
``$root_fs/$client_identity``
|
||||||
* - ``interval``
|
* - ``interval``
|
||||||
- Interval at which to pull from the source job
|
- | Interval at which to pull from the source job (e.g. ``10m``).
|
||||||
|
| ``manual`` disables periodic pulling, replication then only happens on :ref:`wakeup <cli-signal-wakeup>`.
|
||||||
* - ``pruning``
|
* - ``pruning``
|
||||||
- |pruning-spec|
|
- |pruning-spec|
|
||||||
|
|
||||||
|
@ -13,6 +13,8 @@ CLI Overview
|
|||||||
The zrepl binary is self-documenting:
|
The zrepl binary is self-documenting:
|
||||||
run ``zrepl help`` for an overview of the available subcommands or ``zrepl SUBCOMMAND --help`` for information on available flags, etc.
|
run ``zrepl help`` for an overview of the available subcommands or ``zrepl SUBCOMMAND --help`` for information on available flags, etc.
|
||||||
|
|
||||||
|
.. _cli-signal-wakeup:
|
||||||
|
|
||||||
.. list-table::
|
.. list-table::
|
||||||
:widths: 30 70
|
:widths: 30 70
|
||||||
:header-rows: 1
|
:header-rows: 1
|
||||||
|
@ -7,8 +7,7 @@ import (
|
|||||||
"path"
|
"path"
|
||||||
|
|
||||||
"github.com/pkg/errors"
|
"github.com/pkg/errors"
|
||||||
"github.com/zrepl/zrepl/replication"
|
"github.com/zrepl/zrepl/replication/logic/pdu"
|
||||||
"github.com/zrepl/zrepl/replication/pdu"
|
|
||||||
"github.com/zrepl/zrepl/zfs"
|
"github.com/zrepl/zrepl/zfs"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -34,7 +33,7 @@ func (s *Sender) filterCheckFS(fs string) (*zfs.DatasetPath, error) {
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
if !pass {
|
if !pass {
|
||||||
return nil, replication.NewFilteredError(fs)
|
return nil, fmt.Errorf("endpoint does not allow access to filesystem %s", fs)
|
||||||
}
|
}
|
||||||
return dp, nil
|
return dp, nil
|
||||||
}
|
}
|
||||||
@ -49,9 +48,10 @@ func (s *Sender) ListFilesystems(ctx context.Context, r *pdu.ListFilesystemReq)
|
|||||||
rfss[i] = &pdu.Filesystem{
|
rfss[i] = &pdu.Filesystem{
|
||||||
Path: fss[i].ToString(),
|
Path: fss[i].ToString(),
|
||||||
// FIXME: not supporting ResumeToken yet
|
// FIXME: not supporting ResumeToken yet
|
||||||
|
IsPlaceholder: false, // sender FSs are never placeholders
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
res := &pdu.ListFilesystemRes{Filesystems: rfss, Empty: len(rfss) == 0}
|
res := &pdu.ListFilesystemRes{Filesystems: rfss}
|
||||||
return res, nil
|
return res, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -108,6 +108,21 @@ func (p *Sender) DestroySnapshots(ctx context.Context, req *pdu.DestroySnapshots
|
|||||||
return doDestroySnapshots(ctx, dp, req.Snapshots)
|
return doDestroySnapshots(ctx, dp, req.Snapshots)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (p *Sender) Ping(ctx context.Context, req *pdu.PingReq) (*pdu.PingRes, error) {
|
||||||
|
res := pdu.PingRes{
|
||||||
|
Echo: req.GetMessage(),
|
||||||
|
}
|
||||||
|
return &res, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *Sender) PingDataconn(ctx context.Context, req *pdu.PingReq) (*pdu.PingRes, error) {
|
||||||
|
return p.Ping(ctx, req)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *Sender) WaitForConnectivity(ctx context.Context) error {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func (p *Sender) ReplicationCursor(ctx context.Context, req *pdu.ReplicationCursorReq) (*pdu.ReplicationCursorRes, error) {
|
func (p *Sender) ReplicationCursor(ctx context.Context, req *pdu.ReplicationCursorReq) (*pdu.ReplicationCursorRes, error) {
|
||||||
dp, err := p.filterCheckFS(req.Filesystem)
|
dp, err := p.filterCheckFS(req.Filesystem)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -229,7 +244,7 @@ func (s *Receiver) ListFilesystems(ctx context.Context, req *pdu.ListFilesystemR
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
// present without prefix, and only those that are not placeholders
|
// present filesystem without the root_fs prefix
|
||||||
fss := make([]*pdu.Filesystem, 0, len(filtered))
|
fss := make([]*pdu.Filesystem, 0, len(filtered))
|
||||||
for _, a := range filtered {
|
for _, a := range filtered {
|
||||||
ph, err := zfs.ZFSIsPlaceholderFilesystem(a)
|
ph, err := zfs.ZFSIsPlaceholderFilesystem(a)
|
||||||
@ -240,21 +255,16 @@ func (s *Receiver) ListFilesystems(ctx context.Context, req *pdu.ListFilesystemR
|
|||||||
Error("inconsistent placeholder property")
|
Error("inconsistent placeholder property")
|
||||||
return nil, errors.New("server error: inconsistent placeholder property") // don't leak path
|
return nil, errors.New("server error: inconsistent placeholder property") // don't leak path
|
||||||
}
|
}
|
||||||
if ph {
|
|
||||||
getLogger(ctx).
|
getLogger(ctx).
|
||||||
WithField("fs", a.ToString()).
|
WithField("fs", a.ToString()).
|
||||||
Debug("ignoring placeholder filesystem")
|
WithField("is_placeholder", ph).
|
||||||
continue
|
Debug("filesystem")
|
||||||
}
|
|
||||||
getLogger(ctx).
|
|
||||||
WithField("fs", a.ToString()).
|
|
||||||
Debug("non-placeholder filesystem")
|
|
||||||
a.TrimPrefix(root)
|
a.TrimPrefix(root)
|
||||||
fss = append(fss, &pdu.Filesystem{Path: a.ToString()})
|
fss = append(fss, &pdu.Filesystem{Path: a.ToString(), IsPlaceholder: ph})
|
||||||
}
|
}
|
||||||
if len(fss) == 0 {
|
if len(fss) == 0 {
|
||||||
getLogger(ctx).Debug("no non-placeholder filesystems")
|
getLogger(ctx).Debug("no filesystems found")
|
||||||
return &pdu.ListFilesystemRes{Empty: true}, nil
|
return &pdu.ListFilesystemRes{}, nil
|
||||||
}
|
}
|
||||||
return &pdu.ListFilesystemRes{Filesystems: fss}, nil
|
return &pdu.ListFilesystemRes{Filesystems: fss}, nil
|
||||||
}
|
}
|
||||||
@ -279,6 +289,21 @@ func (s *Receiver) ListFilesystemVersions(ctx context.Context, req *pdu.ListFile
|
|||||||
return &pdu.ListFilesystemVersionsRes{Versions: rfsvs}, nil
|
return &pdu.ListFilesystemVersionsRes{Versions: rfsvs}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *Receiver) Ping(ctx context.Context, req *pdu.PingReq) (*pdu.PingRes, error) {
|
||||||
|
res := pdu.PingRes{
|
||||||
|
Echo: req.GetMessage(),
|
||||||
|
}
|
||||||
|
return &res, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Receiver) PingDataconn(ctx context.Context, req *pdu.PingReq) (*pdu.PingRes, error) {
|
||||||
|
return s.Ping(ctx, req)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Receiver) WaitForConnectivity(ctx context.Context) error {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func (s *Receiver) ReplicationCursor(context.Context, *pdu.ReplicationCursorReq) (*pdu.ReplicationCursorRes, error) {
|
func (s *Receiver) ReplicationCursor(context.Context, *pdu.ReplicationCursorReq) (*pdu.ReplicationCursorRes, error) {
|
||||||
return nil, fmt.Errorf("ReplicationCursor not implemented for Receiver")
|
return nil, fmt.Errorf("ReplicationCursor not implemented for Receiver")
|
||||||
}
|
}
|
||||||
@ -324,28 +349,30 @@ func (s *Receiver) Receive(ctx context.Context, req *pdu.ReceiveReq, receive zfs
|
|||||||
getLogger(ctx).WithField("visitErr", visitErr).Debug("complete tree-walk")
|
getLogger(ctx).WithField("visitErr", visitErr).Debug("complete tree-walk")
|
||||||
|
|
||||||
if visitErr != nil {
|
if visitErr != nil {
|
||||||
return nil, err
|
return nil, visitErr
|
||||||
}
|
}
|
||||||
|
|
||||||
needForceRecv := false
|
var clearPlaceholderProperty bool
|
||||||
|
var recvOpts zfs.RecvOptions
|
||||||
props, err := zfs.ZFSGet(lp, []string{zfs.ZREPL_PLACEHOLDER_PROPERTY_NAME})
|
props, err := zfs.ZFSGet(lp, []string{zfs.ZREPL_PLACEHOLDER_PROPERTY_NAME})
|
||||||
if err == nil {
|
if err == nil {
|
||||||
if isPlaceholder, _ := zfs.IsPlaceholder(lp, props.Get(zfs.ZREPL_PLACEHOLDER_PROPERTY_NAME)); isPlaceholder {
|
if isPlaceholder, _ := zfs.IsPlaceholder(lp, props.Get(zfs.ZREPL_PLACEHOLDER_PROPERTY_NAME)); isPlaceholder {
|
||||||
needForceRecv = true
|
recvOpts.RollbackAndForceRecv = true
|
||||||
|
clearPlaceholderProperty = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if clearPlaceholderProperty {
|
||||||
|
if err := zfs.ZFSSetNoPlaceholder(lp); err != nil {
|
||||||
|
return nil, fmt.Errorf("cannot clear placeholder property for forced receive: %s", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
args := make([]string, 0, 1)
|
getLogger(ctx).WithField("opts", fmt.Sprintf("%#v", recvOpts)).Debug("start receive command")
|
||||||
if needForceRecv {
|
|
||||||
args = append(args, "-F")
|
|
||||||
}
|
|
||||||
|
|
||||||
getLogger(ctx).Debug("start receive command")
|
if err := zfs.ZFSRecv(ctx, lp.ToString(), receive, recvOpts); err != nil {
|
||||||
|
|
||||||
if err := zfs.ZFSRecv(ctx, lp.ToString(), receive, args...); err != nil {
|
|
||||||
getLogger(ctx).
|
getLogger(ctx).
|
||||||
WithError(err).
|
WithError(err).
|
||||||
WithField("args", args).
|
WithField("opts", recvOpts).
|
||||||
Error("zfs receive failed")
|
Error("zfs receive failed")
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
50
replication/driver/errorclass_enumer.go
Normal file
50
replication/driver/errorclass_enumer.go
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
// Code generated by "enumer -type=errorClass"; DO NOT EDIT.
|
||||||
|
|
||||||
|
package driver
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
)
|
||||||
|
|
||||||
|
const _errorClassName = "errorClassUnknownerrorClassPermanenterrorClassTemporaryConnectivityRelated"
|
||||||
|
|
||||||
|
var _errorClassIndex = [...]uint8{0, 17, 36, 74}
|
||||||
|
|
||||||
|
func (i errorClass) String() string {
|
||||||
|
if i < 0 || i >= errorClass(len(_errorClassIndex)-1) {
|
||||||
|
return fmt.Sprintf("errorClass(%d)", i)
|
||||||
|
}
|
||||||
|
return _errorClassName[_errorClassIndex[i]:_errorClassIndex[i+1]]
|
||||||
|
}
|
||||||
|
|
||||||
|
var _errorClassValues = []errorClass{0, 1, 2}
|
||||||
|
|
||||||
|
var _errorClassNameToValueMap = map[string]errorClass{
|
||||||
|
_errorClassName[0:17]: 0,
|
||||||
|
_errorClassName[17:36]: 1,
|
||||||
|
_errorClassName[36:74]: 2,
|
||||||
|
}
|
||||||
|
|
||||||
|
// errorClassString retrieves an enum value from the enum constants string name.
|
||||||
|
// Throws an error if the param is not part of the enum.
|
||||||
|
func errorClassString(s string) (errorClass, error) {
|
||||||
|
if val, ok := _errorClassNameToValueMap[s]; ok {
|
||||||
|
return val, nil
|
||||||
|
}
|
||||||
|
return 0, fmt.Errorf("%s does not belong to errorClass values", s)
|
||||||
|
}
|
||||||
|
|
||||||
|
// errorClassValues returns all values of the enum
|
||||||
|
func errorClassValues() []errorClass {
|
||||||
|
return _errorClassValues
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsAerrorClass returns "true" if the value is listed in the enum definition. "false" otherwise
|
||||||
|
func (i errorClass) IsAerrorClass() bool {
|
||||||
|
for _, v := range _errorClassValues {
|
||||||
|
if i == v {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
638
replication/driver/replication_driver.go
Normal file
638
replication/driver/replication_driver.go
Normal file
@ -0,0 +1,638 @@
|
|||||||
|
package driver
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"net"
|
||||||
|
"sort"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/zrepl/zrepl/replication/report"
|
||||||
|
"github.com/zrepl/zrepl/util/chainlock"
|
||||||
|
"github.com/zrepl/zrepl/util/envconst"
|
||||||
|
"google.golang.org/grpc/codes"
|
||||||
|
"google.golang.org/grpc/status"
|
||||||
|
)
|
||||||
|
|
||||||
|
type interval struct {
|
||||||
|
begin time.Time
|
||||||
|
end time.Time
|
||||||
|
}
|
||||||
|
|
||||||
|
func (w *interval) SetZero() {
|
||||||
|
w.begin = time.Time{}
|
||||||
|
w.end = time.Time{}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Duration of 0 means indefinite length
|
||||||
|
func (w *interval) Set(begin time.Time, duration time.Duration) {
|
||||||
|
if begin.IsZero() {
|
||||||
|
panic("zero begin time now allowed")
|
||||||
|
}
|
||||||
|
w.begin = begin
|
||||||
|
w.end = begin.Add(duration)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns the End of the interval if it has a defined length.
|
||||||
|
// For indefinite lengths, returns the zero value.
|
||||||
|
func (w *interval) End() time.Time {
|
||||||
|
return w.end
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return a context with a deadline at the interval's end.
|
||||||
|
// If the interval has indefinite length (duration 0 on Set), return ctx as is.
|
||||||
|
// The returned context.CancelFunc can be called either way.
|
||||||
|
func (w *interval) ContextWithDeadlineAtEnd(ctx context.Context) (context.Context, context.CancelFunc) {
|
||||||
|
if w.begin.IsZero() {
|
||||||
|
panic("must call Set before ContextWIthDeadlineAtEnd")
|
||||||
|
}
|
||||||
|
if w.end.IsZero() {
|
||||||
|
// indefinite length, just return context as is
|
||||||
|
return ctx, func() {}
|
||||||
|
} else {
|
||||||
|
return context.WithDeadline(ctx, w.end)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type run struct {
|
||||||
|
l *chainlock.L
|
||||||
|
|
||||||
|
startedAt, finishedAt time.Time
|
||||||
|
|
||||||
|
waitReconnect interval
|
||||||
|
waitReconnectError *timedError
|
||||||
|
|
||||||
|
// the attempts attempted so far:
|
||||||
|
// All but the last in this slice must have finished with some errors.
|
||||||
|
// The last attempt may not be finished and may not have errors.
|
||||||
|
attempts []*attempt
|
||||||
|
}
|
||||||
|
|
||||||
|
type Planner interface {
|
||||||
|
Plan(context.Context) ([]FS, error)
|
||||||
|
WaitForConnectivity(context.Context) error
|
||||||
|
}
|
||||||
|
|
||||||
|
// an attempt represents a single planning & execution of fs replications
|
||||||
|
type attempt struct {
|
||||||
|
planner Planner
|
||||||
|
|
||||||
|
l *chainlock.L
|
||||||
|
|
||||||
|
startedAt, finishedAt time.Time
|
||||||
|
|
||||||
|
// after Planner.Plan was called, planErr and fss are mutually exclusive with regards to nil-ness
|
||||||
|
// if both are nil, it must be assumed that Planner.Plan is active
|
||||||
|
planErr *timedError
|
||||||
|
fss []*fs
|
||||||
|
}
|
||||||
|
|
||||||
|
type timedError struct {
|
||||||
|
Err error
|
||||||
|
Time time.Time
|
||||||
|
}
|
||||||
|
|
||||||
|
func newTimedError(err error, t time.Time) *timedError {
|
||||||
|
if err == nil {
|
||||||
|
panic("error must be non-nil")
|
||||||
|
}
|
||||||
|
if t.IsZero() {
|
||||||
|
panic("t must be non-zero")
|
||||||
|
}
|
||||||
|
return &timedError{err, t}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *timedError) IntoReportError() *report.TimedError {
|
||||||
|
if e == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return report.NewTimedError(e.Err.Error(), e.Time)
|
||||||
|
}
|
||||||
|
|
||||||
|
type FS interface {
|
||||||
|
// Returns true if this FS and fs refer to the same filesystem returned
|
||||||
|
// by Planner.Plan in a previous attempt.
|
||||||
|
EqualToPreviousAttempt(fs FS) bool
|
||||||
|
// The returned steps are assumed to be dependent on exactly
|
||||||
|
// their direct predecessors in the returned list.
|
||||||
|
PlanFS(context.Context) ([]Step, error)
|
||||||
|
ReportInfo() *report.FilesystemInfo
|
||||||
|
}
|
||||||
|
|
||||||
|
type Step interface {
|
||||||
|
// Returns true iff the target snapshot is the same for this Step and other.
|
||||||
|
// We do not use TargetDate to avoid problems with wrong system time on
|
||||||
|
// snapshot creation.
|
||||||
|
//
|
||||||
|
// Implementations can assume that `other` is a step of the same filesystem,
|
||||||
|
// although maybe from a previous attempt.
|
||||||
|
// (`same` as defined by FS.EqualToPreviousAttempt)
|
||||||
|
//
|
||||||
|
// Note that TargetEquals should return true in a situation with one
|
||||||
|
// originally sent snapshot and a subsequent attempt's step that uses
|
||||||
|
// resumable send & recv.
|
||||||
|
TargetEquals(other Step) bool
|
||||||
|
TargetDate() time.Time
|
||||||
|
Step(context.Context) error
|
||||||
|
ReportInfo() *report.StepInfo
|
||||||
|
}
|
||||||
|
|
||||||
|
type fs struct {
|
||||||
|
fs FS
|
||||||
|
|
||||||
|
l *chainlock.L
|
||||||
|
|
||||||
|
planning struct {
|
||||||
|
done bool
|
||||||
|
err *timedError
|
||||||
|
}
|
||||||
|
|
||||||
|
// valid iff planning.done && planning.err == nil
|
||||||
|
planned struct {
|
||||||
|
// valid iff planning.done && planning.err == nil
|
||||||
|
stepErr *timedError
|
||||||
|
// all steps, in the order in which they must be completed
|
||||||
|
steps []*step
|
||||||
|
// index into steps, pointing at the step that is currently executing
|
||||||
|
// if step >= len(steps), no more work needs to be done
|
||||||
|
step int
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type step struct {
|
||||||
|
l *chainlock.L
|
||||||
|
step Step
|
||||||
|
}
|
||||||
|
|
||||||
|
type ReportFunc func() *report.Report
|
||||||
|
type WaitFunc func(block bool) (done bool)
|
||||||
|
|
||||||
|
var maxAttempts = envconst.Int64("ZREPL_REPLICATION_MAX_ATTEMPTS", 3)
|
||||||
|
var reconnectHardFailTimeout = envconst.Duration("ZREPL_REPLICATION_RECONNECT_HARD_FAIL_TIMEOUT", 10*time.Minute)
|
||||||
|
|
||||||
|
func Do(ctx context.Context, planner Planner) (ReportFunc, WaitFunc) {
|
||||||
|
log := getLog(ctx)
|
||||||
|
l := chainlock.New()
|
||||||
|
run := &run{
|
||||||
|
l: l,
|
||||||
|
startedAt: time.Now(),
|
||||||
|
}
|
||||||
|
|
||||||
|
done := make(chan struct{})
|
||||||
|
go func() {
|
||||||
|
defer close(done)
|
||||||
|
|
||||||
|
defer run.l.Lock().Unlock()
|
||||||
|
log.Debug("begin run")
|
||||||
|
defer log.Debug("run ended")
|
||||||
|
var prev *attempt
|
||||||
|
mainLog := log
|
||||||
|
for ano := 0; ano < int(maxAttempts) || maxAttempts == 0; ano++ {
|
||||||
|
log := mainLog.WithField("attempt_number", ano)
|
||||||
|
log.Debug("start attempt")
|
||||||
|
|
||||||
|
run.waitReconnect.SetZero()
|
||||||
|
run.waitReconnectError = nil
|
||||||
|
|
||||||
|
// do current attempt
|
||||||
|
cur := &attempt{
|
||||||
|
l: l,
|
||||||
|
startedAt: time.Now(),
|
||||||
|
planner: planner,
|
||||||
|
}
|
||||||
|
run.attempts = append(run.attempts, cur)
|
||||||
|
run.l.DropWhile(func() {
|
||||||
|
cur.do(ctx, prev)
|
||||||
|
})
|
||||||
|
prev = cur
|
||||||
|
if ctx.Err() != nil {
|
||||||
|
log.WithError(ctx.Err()).Info("context error")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// error classification, bail out if done / permanent error
|
||||||
|
rep := cur.report()
|
||||||
|
log.WithField("attempt_state", rep.State).Debug("attempt state")
|
||||||
|
errRep := cur.errorReport()
|
||||||
|
|
||||||
|
if rep.State == report.AttemptDone {
|
||||||
|
log.Debug("attempt completed successfully")
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
mostRecentErr, mostRecentErrClass := errRep.MostRecent()
|
||||||
|
log.WithField("most_recent_err", mostRecentErr).WithField("most_recent_err_class", mostRecentErrClass).Debug("most recent error used for re-connect decision")
|
||||||
|
if mostRecentErr == nil {
|
||||||
|
// inconsistent reporting, let's bail out
|
||||||
|
log.Warn("attempt does not report done but error report does not report errors, aborting run")
|
||||||
|
break
|
||||||
|
}
|
||||||
|
log.WithError(mostRecentErr.Err).Error("most recent error in this attempt")
|
||||||
|
shouldReconnect := mostRecentErrClass == errorClassTemporaryConnectivityRelated
|
||||||
|
log.WithField("reconnect_decision", shouldReconnect).Debug("reconnect decision made")
|
||||||
|
if shouldReconnect {
|
||||||
|
run.waitReconnect.Set(time.Now(), reconnectHardFailTimeout)
|
||||||
|
log.WithField("deadline", run.waitReconnect.End()).Error("temporary connectivity-related error identified, start waiting for reconnect")
|
||||||
|
var connectErr error
|
||||||
|
var connectErrTime time.Time
|
||||||
|
run.l.DropWhile(func() {
|
||||||
|
ctx, cancel := run.waitReconnect.ContextWithDeadlineAtEnd(ctx)
|
||||||
|
defer cancel()
|
||||||
|
connectErr = planner.WaitForConnectivity(ctx)
|
||||||
|
connectErrTime = time.Now()
|
||||||
|
})
|
||||||
|
if connectErr == nil {
|
||||||
|
log.Error("reconnect successful") // same level as 'begin with reconnect' message above
|
||||||
|
continue
|
||||||
|
} else {
|
||||||
|
run.waitReconnectError = newTimedError(connectErr, connectErrTime)
|
||||||
|
log.WithError(connectErr).Error("reconnecting failed, aborting run")
|
||||||
|
break
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
log.Error("most recent error cannot be solved by reconnecting, aborting run")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}()
|
||||||
|
|
||||||
|
wait := func(block bool) bool {
|
||||||
|
if block {
|
||||||
|
<-done
|
||||||
|
}
|
||||||
|
select {
|
||||||
|
case <-done:
|
||||||
|
return true
|
||||||
|
default:
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
report := func() *report.Report {
|
||||||
|
defer run.l.Lock().Unlock()
|
||||||
|
return run.report()
|
||||||
|
}
|
||||||
|
return report, wait
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *attempt) do(ctx context.Context, prev *attempt) {
|
||||||
|
pfss, err := a.planner.Plan(ctx)
|
||||||
|
errTime := time.Now()
|
||||||
|
defer a.l.Lock().Unlock()
|
||||||
|
if err != nil {
|
||||||
|
a.planErr = newTimedError(err, errTime)
|
||||||
|
a.fss = nil
|
||||||
|
a.finishedAt = time.Now()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, pfs := range pfss {
|
||||||
|
fs := &fs{
|
||||||
|
fs: pfs,
|
||||||
|
l: a.l,
|
||||||
|
}
|
||||||
|
a.fss = append(a.fss, fs)
|
||||||
|
}
|
||||||
|
|
||||||
|
prevs := make(map[*fs]*fs)
|
||||||
|
{
|
||||||
|
prevFSs := make(map[*fs][]*fs, len(pfss))
|
||||||
|
if prev != nil {
|
||||||
|
debug("previous attempt has %d fss", len(a.fss))
|
||||||
|
for _, fs := range a.fss {
|
||||||
|
for _, prevFS := range prev.fss {
|
||||||
|
if fs.fs.EqualToPreviousAttempt(prevFS.fs) {
|
||||||
|
l := prevFSs[fs]
|
||||||
|
l = append(l, prevFS)
|
||||||
|
prevFSs[fs] = l
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
type inconsistency struct {
|
||||||
|
cur *fs
|
||||||
|
prevs []*fs
|
||||||
|
}
|
||||||
|
var inconsistencies []inconsistency
|
||||||
|
for cur, fss := range prevFSs {
|
||||||
|
if len(fss) > 1 {
|
||||||
|
inconsistencies = append(inconsistencies, inconsistency{cur, fss})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sort.SliceStable(inconsistencies, func(i, j int) bool {
|
||||||
|
return inconsistencies[i].cur.fs.ReportInfo().Name < inconsistencies[j].cur.fs.ReportInfo().Name
|
||||||
|
})
|
||||||
|
if len(inconsistencies) > 0 {
|
||||||
|
var msg strings.Builder
|
||||||
|
msg.WriteString("cannot determine filesystem correspondences between different attempts:\n")
|
||||||
|
var inconsistencyLines []string
|
||||||
|
for _, i := range inconsistencies {
|
||||||
|
var prevNames []string
|
||||||
|
for _, prev := range i.prevs {
|
||||||
|
prevNames = append(prevNames, prev.fs.ReportInfo().Name)
|
||||||
|
}
|
||||||
|
l := fmt.Sprintf(" %s => %v", i.cur.fs.ReportInfo().Name, prevNames)
|
||||||
|
inconsistencyLines = append(inconsistencyLines, l)
|
||||||
|
}
|
||||||
|
fmt.Fprintf(&msg, strings.Join(inconsistencyLines, "\n"))
|
||||||
|
now := time.Now()
|
||||||
|
a.planErr = newTimedError(errors.New(msg.String()), now)
|
||||||
|
a.fss = nil
|
||||||
|
a.finishedAt = now
|
||||||
|
return
|
||||||
|
}
|
||||||
|
for cur, fss := range prevFSs {
|
||||||
|
if len(fss) > 0 {
|
||||||
|
prevs[cur] = fss[0]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// invariant: prevs contains an entry for each unambigious correspondence
|
||||||
|
|
||||||
|
stepQueue := newStepQueue()
|
||||||
|
defer stepQueue.Start(1)() // TODO parallel replication
|
||||||
|
var fssesDone sync.WaitGroup
|
||||||
|
for _, f := range a.fss {
|
||||||
|
fssesDone.Add(1)
|
||||||
|
go func(f *fs) {
|
||||||
|
defer fssesDone.Done()
|
||||||
|
f.do(ctx, stepQueue, prevs[f])
|
||||||
|
}(f)
|
||||||
|
}
|
||||||
|
a.l.DropWhile(func() {
|
||||||
|
fssesDone.Wait()
|
||||||
|
})
|
||||||
|
a.finishedAt = time.Now()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (fs *fs) do(ctx context.Context, pq *stepQueue, prev *fs) {
|
||||||
|
psteps, err := fs.fs.PlanFS(ctx)
|
||||||
|
errTime := time.Now()
|
||||||
|
defer fs.l.Lock().Unlock()
|
||||||
|
debug := debugPrefix("fs=%s", fs.fs.ReportInfo().Name)
|
||||||
|
fs.planning.done = true
|
||||||
|
if err != nil {
|
||||||
|
fs.planning.err = newTimedError(err, errTime)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
for _, pstep := range psteps {
|
||||||
|
step := &step{
|
||||||
|
l: fs.l,
|
||||||
|
step: pstep,
|
||||||
|
}
|
||||||
|
fs.planned.steps = append(fs.planned.steps, step)
|
||||||
|
}
|
||||||
|
debug("iniital len(fs.planned.steps) = %d", len(fs.planned.steps))
|
||||||
|
|
||||||
|
// for not-first attempts, only allow fs.planned.steps
|
||||||
|
// up to including the originally planned target snapshot
|
||||||
|
if prev != nil && prev.planning.done && prev.planning.err == nil {
|
||||||
|
prevUncompleted := prev.planned.steps[prev.planned.step:]
|
||||||
|
if len(prevUncompleted) == 0 {
|
||||||
|
debug("prevUncompleted is empty")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if len(fs.planned.steps) == 0 {
|
||||||
|
debug("fs.planned.steps is empty")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
prevFailed := prevUncompleted[0]
|
||||||
|
curFirst := fs.planned.steps[0]
|
||||||
|
// we assume that PlanFS retries prevFailed (using curFirst)
|
||||||
|
if !prevFailed.step.TargetEquals(curFirst.step) {
|
||||||
|
debug("Targets don't match")
|
||||||
|
// Two options:
|
||||||
|
// A: planning algorithm is broken
|
||||||
|
// B: manual user intervention inbetween
|
||||||
|
// Neither way will we make progress, so let's error out
|
||||||
|
stepFmt := func(step *step) string {
|
||||||
|
r := step.report()
|
||||||
|
s := r.Info
|
||||||
|
if r.IsIncremental() {
|
||||||
|
return fmt.Sprintf("%s=>%s", s.From, s.To)
|
||||||
|
} else {
|
||||||
|
return fmt.Sprintf("full=>%s", s.To)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
msg := fmt.Sprintf("last attempt's uncompleted step %s does not correspond to this attempt's first planned step %s",
|
||||||
|
stepFmt(prevFailed), stepFmt(curFirst))
|
||||||
|
fs.planned.stepErr = newTimedError(errors.New(msg), time.Now())
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// only allow until step targets diverge
|
||||||
|
min := len(prevUncompleted)
|
||||||
|
if min > len(fs.planned.steps) {
|
||||||
|
min = len(fs.planned.steps)
|
||||||
|
}
|
||||||
|
diverge := 0
|
||||||
|
for ; diverge < min; diverge++ {
|
||||||
|
debug("diverge compare iteration %d", diverge)
|
||||||
|
if !fs.planned.steps[diverge].step.TargetEquals(prevUncompleted[diverge].step) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
debug("diverge is %d", diverge)
|
||||||
|
fs.planned.steps = fs.planned.steps[0:diverge]
|
||||||
|
}
|
||||||
|
debug("post-prev-merge len(fs.planned.steps) = %d", len(fs.planned.steps))
|
||||||
|
|
||||||
|
for i, s := range fs.planned.steps {
|
||||||
|
var (
|
||||||
|
err error
|
||||||
|
errTime time.Time
|
||||||
|
)
|
||||||
|
// lock must not be held while executing step in order for reporting to work
|
||||||
|
fs.l.DropWhile(func() {
|
||||||
|
targetDate := s.step.TargetDate()
|
||||||
|
defer pq.WaitReady(fs, targetDate)()
|
||||||
|
err = s.step.Step(ctx) // no shadow
|
||||||
|
errTime = time.Now() // no shadow
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
fs.planned.stepErr = newTimedError(err, errTime)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
fs.planned.step = i + 1 // fs.planned.step must be == len(fs.planned.steps) if all went OK
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// caller must hold lock l
|
||||||
|
func (r *run) report() *report.Report {
|
||||||
|
report := &report.Report{
|
||||||
|
Attempts: make([]*report.AttemptReport, len(r.attempts)),
|
||||||
|
StartAt: r.startedAt,
|
||||||
|
FinishAt: r.finishedAt,
|
||||||
|
WaitReconnectSince: r.waitReconnect.begin,
|
||||||
|
WaitReconnectUntil: r.waitReconnect.end,
|
||||||
|
WaitReconnectError: r.waitReconnectError.IntoReportError(),
|
||||||
|
}
|
||||||
|
for i := range report.Attempts {
|
||||||
|
report.Attempts[i] = r.attempts[i].report()
|
||||||
|
}
|
||||||
|
return report
|
||||||
|
}
|
||||||
|
|
||||||
|
// caller must hold lock l
|
||||||
|
func (a *attempt) report() *report.AttemptReport {
|
||||||
|
|
||||||
|
r := &report.AttemptReport{
|
||||||
|
// State is set below
|
||||||
|
Filesystems: make([]*report.FilesystemReport, len(a.fss)),
|
||||||
|
StartAt: a.startedAt,
|
||||||
|
FinishAt: a.finishedAt,
|
||||||
|
PlanError: a.planErr.IntoReportError(),
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := range r.Filesystems {
|
||||||
|
r.Filesystems[i] = a.fss[i].report()
|
||||||
|
}
|
||||||
|
|
||||||
|
state := report.AttemptPlanning
|
||||||
|
if a.planErr != nil {
|
||||||
|
state = report.AttemptPlanningError
|
||||||
|
} else if a.fss != nil {
|
||||||
|
if a.finishedAt.IsZero() {
|
||||||
|
state = report.AttemptFanOutFSs
|
||||||
|
} else {
|
||||||
|
fsWithError := false
|
||||||
|
for _, s := range r.Filesystems {
|
||||||
|
fsWithError = fsWithError || s.Error() != nil
|
||||||
|
}
|
||||||
|
state = report.AttemptDone
|
||||||
|
if fsWithError {
|
||||||
|
state = report.AttemptFanOutError
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
r.State = state
|
||||||
|
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
|
||||||
|
// caller must hold lock l
|
||||||
|
func (f *fs) report() *report.FilesystemReport {
|
||||||
|
state := report.FilesystemPlanningErrored
|
||||||
|
if f.planning.err == nil {
|
||||||
|
if f.planning.done {
|
||||||
|
if f.planned.stepErr != nil {
|
||||||
|
state = report.FilesystemSteppingErrored
|
||||||
|
} else if f.planned.step < len(f.planned.steps) {
|
||||||
|
state = report.FilesystemStepping
|
||||||
|
} else {
|
||||||
|
state = report.FilesystemDone
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
state = report.FilesystemPlanning
|
||||||
|
}
|
||||||
|
}
|
||||||
|
r := &report.FilesystemReport{
|
||||||
|
Info: f.fs.ReportInfo(),
|
||||||
|
State: state,
|
||||||
|
PlanError: f.planning.err.IntoReportError(),
|
||||||
|
StepError: f.planned.stepErr.IntoReportError(),
|
||||||
|
Steps: make([]*report.StepReport, len(f.planned.steps)),
|
||||||
|
CurrentStep: f.planned.step,
|
||||||
|
}
|
||||||
|
for i := range r.Steps {
|
||||||
|
r.Steps[i] = f.planned.steps[i].report()
|
||||||
|
}
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
|
||||||
|
// caller must hold lock l
|
||||||
|
func (s *step) report() *report.StepReport {
|
||||||
|
r := &report.StepReport{
|
||||||
|
Info: s.step.ReportInfo(),
|
||||||
|
}
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
|
||||||
|
type stepErrorReport struct {
|
||||||
|
err *timedError
|
||||||
|
step int
|
||||||
|
}
|
||||||
|
|
||||||
|
//go:generate enumer -type=errorClass
|
||||||
|
type errorClass int
|
||||||
|
|
||||||
|
const (
|
||||||
|
errorClassUnknown errorClass = iota
|
||||||
|
errorClassPermanent
|
||||||
|
errorClassTemporaryConnectivityRelated
|
||||||
|
)
|
||||||
|
|
||||||
|
type errorReport struct {
|
||||||
|
flattened []*timedError
|
||||||
|
// sorted DESCending by err time
|
||||||
|
byClass map[errorClass][]*timedError
|
||||||
|
}
|
||||||
|
|
||||||
|
// caller must hold lock l
|
||||||
|
func (a *attempt) errorReport() *errorReport {
|
||||||
|
r := &errorReport{}
|
||||||
|
if a.planErr != nil {
|
||||||
|
r.flattened = append(r.flattened, a.planErr)
|
||||||
|
}
|
||||||
|
for _, fs := range a.fss {
|
||||||
|
if fs.planning.done && fs.planning.err != nil {
|
||||||
|
r.flattened = append(r.flattened, fs.planning.err)
|
||||||
|
} else if fs.planning.done && fs.planned.stepErr != nil {
|
||||||
|
r.flattened = append(r.flattened, fs.planned.stepErr)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// build byClass
|
||||||
|
{
|
||||||
|
r.byClass = make(map[errorClass][]*timedError)
|
||||||
|
putClass := func(err *timedError, class errorClass) {
|
||||||
|
errs := r.byClass[class]
|
||||||
|
errs = append(errs, err)
|
||||||
|
r.byClass[class] = errs
|
||||||
|
}
|
||||||
|
for _, err := range r.flattened {
|
||||||
|
if neterr, ok := err.Err.(net.Error); ok && neterr.Temporary() {
|
||||||
|
putClass(err, errorClassTemporaryConnectivityRelated)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if st, ok := status.FromError(err.Err); ok && st.Code() == codes.Unavailable {
|
||||||
|
// technically, codes.Unavailable could be returned by the gRPC endpoint, indicating overload, etc.
|
||||||
|
// for now, let's assume it only happens for connectivity issues, as specified in
|
||||||
|
// https://grpc.io/grpc/core/md_doc_statuscodes.html
|
||||||
|
putClass(err, errorClassTemporaryConnectivityRelated)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
putClass(err, errorClassPermanent)
|
||||||
|
}
|
||||||
|
for _, errs := range r.byClass {
|
||||||
|
sort.Slice(errs, func(i, j int) bool {
|
||||||
|
return errs[i].Time.After(errs[j].Time) // sort descendingly
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *errorReport) AnyError() *timedError {
|
||||||
|
for _, err := range r.flattened {
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *errorReport) MostRecent() (err *timedError, errClass errorClass) {
|
||||||
|
for class, errs := range r.byClass {
|
||||||
|
// errs are sorted descendingly during construction
|
||||||
|
if len(errs) > 0 && (err == nil || errs[0].Time.After(err.Time)) {
|
||||||
|
err = errs[0]
|
||||||
|
errClass = class
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
29
replication/driver/replication_driver_debug.go
Normal file
29
replication/driver/replication_driver_debug.go
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
package driver
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
)
|
||||||
|
|
||||||
|
var debugEnabled bool = false
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
if os.Getenv("ZREPL_REPLICATION_DRIVER_DEBUG") != "" {
|
||||||
|
debugEnabled = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func debug(format string, args ...interface{}) {
|
||||||
|
if debugEnabled {
|
||||||
|
fmt.Fprintf(os.Stderr, "repl: driver: %s\n", fmt.Sprintf(format, args...))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type debugFunc func(format string, args ...interface{})
|
||||||
|
|
||||||
|
func debugPrefix(prefixFormat string, prefixFormatArgs ...interface{}) debugFunc {
|
||||||
|
prefix := fmt.Sprintf(prefixFormat, prefixFormatArgs...)
|
||||||
|
return func(format string, args ...interface{}) {
|
||||||
|
debug("%s: %s", prefix, fmt.Sprintf(format, args))
|
||||||
|
}
|
||||||
|
}
|
25
replication/driver/replication_driver_logging.go
Normal file
25
replication/driver/replication_driver_logging.go
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
package driver
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
|
||||||
|
"github.com/zrepl/zrepl/logger"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Logger = logger.Logger
|
||||||
|
|
||||||
|
type contexKey int
|
||||||
|
|
||||||
|
const contexKeyLogger contexKey = iota + 1
|
||||||
|
|
||||||
|
func getLog(ctx context.Context) Logger {
|
||||||
|
l, ok := ctx.Value(contexKeyLogger).(Logger)
|
||||||
|
if !ok {
|
||||||
|
l = logger.NewNullLogger()
|
||||||
|
}
|
||||||
|
return l
|
||||||
|
}
|
||||||
|
|
||||||
|
func WithLogger(ctx context.Context, log Logger) context.Context {
|
||||||
|
return context.WithValue(ctx, contexKeyLogger, log)
|
||||||
|
}
|
215
replication/driver/replication_driver_test.go
Normal file
215
replication/driver/replication_driver_test.go
Normal file
@ -0,0 +1,215 @@
|
|||||||
|
package driver
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"sort"
|
||||||
|
"sync/atomic"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
"github.com/zrepl/zrepl/replication/report"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
|
||||||
|
jsondiff "github.com/yudai/gojsondiff"
|
||||||
|
jsondiffformatter "github.com/yudai/gojsondiff/formatter"
|
||||||
|
)
|
||||||
|
|
||||||
|
type mockPlanner struct {
|
||||||
|
stepCounter uint32
|
||||||
|
fss []FS // *mockFS
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *mockPlanner) Plan(ctx context.Context) ([]FS, error) {
|
||||||
|
time.Sleep(1 * time.Second)
|
||||||
|
p.fss = []FS{
|
||||||
|
&mockFS{
|
||||||
|
&p.stepCounter,
|
||||||
|
"zroot/one",
|
||||||
|
nil,
|
||||||
|
},
|
||||||
|
&mockFS{
|
||||||
|
&p.stepCounter,
|
||||||
|
"zroot/two",
|
||||||
|
nil,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
return p.fss, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *mockPlanner) WaitForConnectivity(context.Context) error {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type mockFS struct {
|
||||||
|
globalStepCounter *uint32
|
||||||
|
name string
|
||||||
|
steps []Step
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *mockFS) EqualToPreviousAttempt(other FS) bool {
|
||||||
|
return f.name == other.(*mockFS).name
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *mockFS) PlanFS(ctx context.Context) ([]Step, error) {
|
||||||
|
if f.steps != nil {
|
||||||
|
panic("PlanFS used twice")
|
||||||
|
}
|
||||||
|
switch f.name {
|
||||||
|
case "zroot/one":
|
||||||
|
f.steps = []Step{
|
||||||
|
&mockStep{
|
||||||
|
fs: f,
|
||||||
|
ident: "a",
|
||||||
|
duration: 1 * time.Second,
|
||||||
|
targetDate: time.Unix(2, 0),
|
||||||
|
},
|
||||||
|
&mockStep{
|
||||||
|
fs: f,
|
||||||
|
ident: "b",
|
||||||
|
duration: 1 * time.Second,
|
||||||
|
targetDate: time.Unix(10, 0),
|
||||||
|
},
|
||||||
|
&mockStep{
|
||||||
|
fs: f,
|
||||||
|
ident: "c",
|
||||||
|
duration: 1 * time.Second,
|
||||||
|
targetDate: time.Unix(20, 0),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
case "zroot/two":
|
||||||
|
f.steps = []Step{
|
||||||
|
&mockStep{
|
||||||
|
fs: f,
|
||||||
|
ident: "u",
|
||||||
|
duration: 500 * time.Millisecond,
|
||||||
|
targetDate: time.Unix(15, 0),
|
||||||
|
},
|
||||||
|
&mockStep{
|
||||||
|
fs: f,
|
||||||
|
duration: 500 * time.Millisecond,
|
||||||
|
ident: "v",
|
||||||
|
targetDate: time.Unix(30, 0),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
panic("unimplemented")
|
||||||
|
}
|
||||||
|
|
||||||
|
return f.steps, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *mockFS) ReportInfo() *report.FilesystemInfo {
|
||||||
|
return &report.FilesystemInfo{Name: f.name}
|
||||||
|
}
|
||||||
|
|
||||||
|
type mockStep struct {
|
||||||
|
fs *mockFS
|
||||||
|
ident string
|
||||||
|
duration time.Duration
|
||||||
|
targetDate time.Time
|
||||||
|
|
||||||
|
// filled by method Step
|
||||||
|
globalCtr uint32
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *mockStep) String() string {
|
||||||
|
return fmt.Sprintf("%s{%s} targetDate=%s globalCtr=%v", f.fs.name, f.ident, f.targetDate, f.globalCtr)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *mockStep) Step(ctx context.Context) error {
|
||||||
|
f.globalCtr = atomic.AddUint32(f.fs.globalStepCounter, 1)
|
||||||
|
time.Sleep(f.duration)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *mockStep) TargetEquals(s Step) bool {
|
||||||
|
return f.ident == s.(*mockStep).ident
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *mockStep) TargetDate() time.Time {
|
||||||
|
return f.targetDate
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *mockStep) ReportInfo() *report.StepInfo {
|
||||||
|
return &report.StepInfo{From: f.ident, To: f.ident, BytesExpected: 100, BytesReplicated: 25}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: add meaningful validation (i.e. actual checks)
|
||||||
|
// Since the stepqueue is not deterministic due to scheduler jitter,
|
||||||
|
// we cannot test for any definitive sequence of steps here.
|
||||||
|
// Such checks would further only be sensible for a non-concurrent step-queue,
|
||||||
|
// but we're going to have concurrent replication in the future.
|
||||||
|
//
|
||||||
|
// For the time being, let's just exercise the code a bit.
|
||||||
|
func TestReplication(t *testing.T) {
|
||||||
|
|
||||||
|
ctx := context.Background()
|
||||||
|
|
||||||
|
mp := &mockPlanner{}
|
||||||
|
getReport, wait := Do(ctx, mp)
|
||||||
|
begin := time.Now()
|
||||||
|
fireAt := []time.Duration{
|
||||||
|
// the following values are relative to the start
|
||||||
|
500 * time.Millisecond, // planning
|
||||||
|
1500 * time.Millisecond, // nothing is done, a is running
|
||||||
|
2500 * time.Millisecond, // a done, b running
|
||||||
|
3250 * time.Millisecond, // a,b done, u running
|
||||||
|
3750 * time.Millisecond, // a,b,u done, c running
|
||||||
|
4750 * time.Millisecond, // a,b,u,c done, v running
|
||||||
|
5250 * time.Millisecond, // a,b,u,c,v done
|
||||||
|
}
|
||||||
|
reports := make([]*report.Report, len(fireAt))
|
||||||
|
for i := range fireAt {
|
||||||
|
sleepUntil := begin.Add(fireAt[i])
|
||||||
|
time.Sleep(sleepUntil.Sub(time.Now()))
|
||||||
|
reports[i] = getReport()
|
||||||
|
// uncomment for viewing non-diffed results
|
||||||
|
// t.Logf("report @ %6.4f:\n%s", fireAt[i].Seconds(), pretty.Sprint(reports[i]))
|
||||||
|
}
|
||||||
|
waitBegin := time.Now()
|
||||||
|
wait(true)
|
||||||
|
waitDuration := time.Now().Sub(waitBegin)
|
||||||
|
assert.True(t, waitDuration < 10*time.Millisecond, "%v", waitDuration) // and that's gratious
|
||||||
|
|
||||||
|
prev, err := json.Marshal(reports[0])
|
||||||
|
require.NoError(t, err)
|
||||||
|
for _, r := range reports[1:] {
|
||||||
|
this, err := json.Marshal(r)
|
||||||
|
require.NoError(t, err)
|
||||||
|
differ := jsondiff.New()
|
||||||
|
diff, err := differ.Compare(prev, this)
|
||||||
|
require.NoError(t, err)
|
||||||
|
df := jsondiffformatter.NewDeltaFormatter()
|
||||||
|
_, err = df.Format(diff)
|
||||||
|
require.NoError(t, err)
|
||||||
|
// uncomment the following line to get json diffs between each captured step
|
||||||
|
// t.Logf("%s", res)
|
||||||
|
prev, err = json.Marshal(r)
|
||||||
|
require.NoError(t, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
steps := make([]*mockStep, 0)
|
||||||
|
for _, fs := range mp.fss {
|
||||||
|
for _, step := range fs.(*mockFS).steps {
|
||||||
|
steps = append(steps, step.(*mockStep))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// sort steps in pq order (although, remember, pq is not deterministic)
|
||||||
|
sort.Slice(steps, func(i, j int) bool {
|
||||||
|
return steps[i].targetDate.Before(steps[j].targetDate)
|
||||||
|
})
|
||||||
|
|
||||||
|
// manual inspection of the globalCtr value should show that, despite
|
||||||
|
// scheduler-dependent behavior of pq, steps should generally be taken
|
||||||
|
// from oldest to newest target date (globally, not per FS).
|
||||||
|
t.Logf("steps sorted by target date:")
|
||||||
|
for _, step := range steps {
|
||||||
|
t.Logf("\t%s", step)
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
163
replication/driver/replication_stepqueue.go
Normal file
163
replication/driver/replication_stepqueue.go
Normal file
@ -0,0 +1,163 @@
|
|||||||
|
package driver
|
||||||
|
|
||||||
|
import (
|
||||||
|
"container/heap"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/zrepl/zrepl/util/chainlock"
|
||||||
|
)
|
||||||
|
|
||||||
|
type stepQueueRec struct {
|
||||||
|
ident interface{}
|
||||||
|
targetDate time.Time
|
||||||
|
wakeup chan StepCompletedFunc
|
||||||
|
}
|
||||||
|
|
||||||
|
type stepQueue struct {
|
||||||
|
stop chan struct{}
|
||||||
|
reqs chan stepQueueRec
|
||||||
|
}
|
||||||
|
|
||||||
|
type stepQueueHeapItem struct {
|
||||||
|
idx int
|
||||||
|
req stepQueueRec
|
||||||
|
}
|
||||||
|
type stepQueueHeap []*stepQueueHeapItem
|
||||||
|
|
||||||
|
func (h stepQueueHeap) Less(i, j int) bool {
|
||||||
|
return h[i].req.targetDate.Before(h[j].req.targetDate)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h stepQueueHeap) Swap(i, j int) {
|
||||||
|
h[i], h[j] = h[j], h[i]
|
||||||
|
h[i].idx = i
|
||||||
|
h[j].idx = j
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h stepQueueHeap) Len() int {
|
||||||
|
return len(h)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h *stepQueueHeap) Push(elem interface{}) {
|
||||||
|
hitem := elem.(*stepQueueHeapItem)
|
||||||
|
hitem.idx = h.Len()
|
||||||
|
*h = append(*h, hitem)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h *stepQueueHeap) Pop() interface{} {
|
||||||
|
elem := (*h)[h.Len()-1]
|
||||||
|
elem.idx = -1
|
||||||
|
*h = (*h)[:h.Len()-1]
|
||||||
|
return elem
|
||||||
|
}
|
||||||
|
|
||||||
|
// returned stepQueue must be closed with method Close
|
||||||
|
func newStepQueue() *stepQueue {
|
||||||
|
q := &stepQueue{
|
||||||
|
stop: make(chan struct{}),
|
||||||
|
reqs: make(chan stepQueueRec),
|
||||||
|
}
|
||||||
|
return q
|
||||||
|
}
|
||||||
|
|
||||||
|
// the returned done function must be called to free resources
|
||||||
|
// allocated by the call to Start
|
||||||
|
//
|
||||||
|
// No WaitReady calls must be active at the time done is called
|
||||||
|
// The behavior of calling WaitReady after done was called is undefined
|
||||||
|
func (q *stepQueue) Start(concurrency int) (done func()) {
|
||||||
|
if concurrency < 1 {
|
||||||
|
panic("concurrency must be >= 1")
|
||||||
|
}
|
||||||
|
// l protects pending and queueItems
|
||||||
|
l := chainlock.New()
|
||||||
|
pendingCond := l.NewCond()
|
||||||
|
// priority queue
|
||||||
|
pending := &stepQueueHeap{}
|
||||||
|
// ident => queueItem
|
||||||
|
queueItems := make(map[interface{}]*stepQueueHeapItem)
|
||||||
|
// stopped is used for cancellation of "wake" goroutine
|
||||||
|
stopped := false
|
||||||
|
active := 0
|
||||||
|
go func() { // "stopper" goroutine
|
||||||
|
<-q.stop
|
||||||
|
defer l.Lock().Unlock()
|
||||||
|
stopped = true
|
||||||
|
pendingCond.Broadcast()
|
||||||
|
}()
|
||||||
|
go func() { // "reqs" goroutine
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-q.stop:
|
||||||
|
select {
|
||||||
|
case <-q.reqs:
|
||||||
|
panic("WaitReady call active while calling Close")
|
||||||
|
default:
|
||||||
|
return
|
||||||
|
}
|
||||||
|
case req := <-q.reqs:
|
||||||
|
func() {
|
||||||
|
defer l.Lock().Unlock()
|
||||||
|
if _, ok := queueItems[req.ident]; ok {
|
||||||
|
panic("WaitReady must not be called twice for the same ident")
|
||||||
|
}
|
||||||
|
qitem := &stepQueueHeapItem{
|
||||||
|
req: req,
|
||||||
|
}
|
||||||
|
queueItems[req.ident] = qitem
|
||||||
|
heap.Push(pending, qitem)
|
||||||
|
pendingCond.Broadcast()
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
go func() { // "wake" goroutine
|
||||||
|
defer l.Lock().Unlock()
|
||||||
|
for {
|
||||||
|
|
||||||
|
for !stopped && (active >= concurrency || pending.Len() == 0) {
|
||||||
|
pendingCond.Wait()
|
||||||
|
}
|
||||||
|
if stopped {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if pending.Len() <= 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
active++
|
||||||
|
next := heap.Pop(pending).(*stepQueueHeapItem).req
|
||||||
|
delete(queueItems, next.ident)
|
||||||
|
|
||||||
|
next.wakeup <- func() {
|
||||||
|
defer l.Lock().Unlock()
|
||||||
|
active--
|
||||||
|
pendingCond.Broadcast()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
done = func() {
|
||||||
|
close(q.stop)
|
||||||
|
}
|
||||||
|
return done
|
||||||
|
}
|
||||||
|
|
||||||
|
type StepCompletedFunc func()
|
||||||
|
|
||||||
|
func (q *stepQueue) sendAndWaitForWakeup(ident interface{}, targetDate time.Time) StepCompletedFunc {
|
||||||
|
req := stepQueueRec{
|
||||||
|
ident,
|
||||||
|
targetDate,
|
||||||
|
make(chan StepCompletedFunc),
|
||||||
|
}
|
||||||
|
q.reqs <- req
|
||||||
|
return <-req.wakeup
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait for the ident with targetDate to be selected to run.
|
||||||
|
func (q *stepQueue) WaitReady(ident interface{}, targetDate time.Time) StepCompletedFunc {
|
||||||
|
if targetDate.IsZero() {
|
||||||
|
panic("targetDate of zero is reserved for marking Done")
|
||||||
|
}
|
||||||
|
return q.sendAndWaitForWakeup(ident, targetDate)
|
||||||
|
}
|
166
replication/driver/replication_stepqueue_test.go
Normal file
166
replication/driver/replication_stepqueue_test.go
Normal file
@ -0,0 +1,166 @@
|
|||||||
|
package driver
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"math"
|
||||||
|
"sort"
|
||||||
|
"sync"
|
||||||
|
"sync/atomic"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/montanaflynn/stats"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestPqNotconcurrent(t *testing.T) {
|
||||||
|
|
||||||
|
var ctr uint32
|
||||||
|
q := newStepQueue()
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
wg.Add(3)
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
|
defer q.WaitReady("1", time.Unix(1, 0))()
|
||||||
|
ret := atomic.AddUint32(&ctr, 1)
|
||||||
|
assert.Equal(t, uint32(1), ret)
|
||||||
|
}()
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
|
defer q.WaitReady("2", time.Unix(2, 0))()
|
||||||
|
ret := atomic.AddUint32(&ctr, 1)
|
||||||
|
assert.Equal(t, uint32(2), ret)
|
||||||
|
}()
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
|
defer q.WaitReady("3", time.Unix(3, 0))()
|
||||||
|
ret := atomic.AddUint32(&ctr, 1)
|
||||||
|
assert.Equal(t, uint32(3), ret)
|
||||||
|
}()
|
||||||
|
|
||||||
|
time.Sleep(1 * time.Second)
|
||||||
|
defer q.Start(1)()
|
||||||
|
wg.Wait()
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
type record struct {
|
||||||
|
fs int
|
||||||
|
step int
|
||||||
|
globalCtr uint32
|
||||||
|
wakeAt time.Duration // relative to begin
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r record) String() string {
|
||||||
|
return fmt.Sprintf("fs %08d step %08d globalCtr %08d wakeAt %2.8f", r.fs, r.step, r.globalCtr, r.wakeAt.Seconds())
|
||||||
|
}
|
||||||
|
|
||||||
|
// This tests uses stepPq concurrently, simulating the following scenario:
|
||||||
|
// Given a number of filesystems F, each filesystem has N steps to take.
|
||||||
|
// The number of concurrent steps is limited to C.
|
||||||
|
// The target date for each step is the step number N.
|
||||||
|
// Hence, there are always F filesystems runnable (calling WaitReady)
|
||||||
|
// The priority queue prioritizes steps with lower target data (= lower step number).
|
||||||
|
// Hence, all steps with lower numbers should be woken up before steps with higher numbers.
|
||||||
|
// However, scheduling is not 100% deterministic (runtime, OS scheduler, etc).
|
||||||
|
// Hence, perform some statistics on the wakeup times and assert that the mean wakeup
|
||||||
|
// times for each step are close together.
|
||||||
|
func TestPqConcurrent(t *testing.T) {
|
||||||
|
|
||||||
|
q := newStepQueue()
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
filesystems := 100
|
||||||
|
stepsPerFS := 20
|
||||||
|
sleepTimePerStep := 50 * time.Millisecond
|
||||||
|
wg.Add(filesystems)
|
||||||
|
var globalCtr uint32
|
||||||
|
|
||||||
|
begin := time.Now()
|
||||||
|
records := make(chan []record, filesystems)
|
||||||
|
for fs := 0; fs < filesystems; fs++ {
|
||||||
|
go func(fs int) {
|
||||||
|
defer wg.Done()
|
||||||
|
recs := make([]record, 0)
|
||||||
|
for step := 0; step < stepsPerFS; step++ {
|
||||||
|
pos := atomic.AddUint32(&globalCtr, 1)
|
||||||
|
t := time.Unix(int64(step), 0)
|
||||||
|
done := q.WaitReady(fs, t)
|
||||||
|
wakeAt := time.Now().Sub(begin)
|
||||||
|
time.Sleep(sleepTimePerStep)
|
||||||
|
done()
|
||||||
|
recs = append(recs, record{fs, step, pos, wakeAt})
|
||||||
|
}
|
||||||
|
records <- recs
|
||||||
|
}(fs)
|
||||||
|
}
|
||||||
|
concurrency := 5
|
||||||
|
defer q.Start(concurrency)()
|
||||||
|
wg.Wait()
|
||||||
|
close(records)
|
||||||
|
t.Logf("loop done")
|
||||||
|
|
||||||
|
flattenedRecs := make([]record, 0)
|
||||||
|
for recs := range records {
|
||||||
|
flattenedRecs = append(flattenedRecs, recs...)
|
||||||
|
}
|
||||||
|
|
||||||
|
sort.Slice(flattenedRecs, func(i, j int) bool {
|
||||||
|
return flattenedRecs[i].globalCtr < flattenedRecs[j].globalCtr
|
||||||
|
})
|
||||||
|
|
||||||
|
wakeTimesByStep := map[int][]float64{}
|
||||||
|
for _, rec := range flattenedRecs {
|
||||||
|
wakeTimes, ok := wakeTimesByStep[rec.step]
|
||||||
|
if !ok {
|
||||||
|
wakeTimes = []float64{}
|
||||||
|
}
|
||||||
|
wakeTimes = append(wakeTimes, rec.wakeAt.Seconds())
|
||||||
|
wakeTimesByStep[rec.step] = wakeTimes
|
||||||
|
}
|
||||||
|
|
||||||
|
meansByStepId := make([]float64, stepsPerFS)
|
||||||
|
interQuartileRangesByStepIdx := make([]float64, stepsPerFS)
|
||||||
|
for step := 0; step < stepsPerFS; step++ {
|
||||||
|
t.Logf("step %d", step)
|
||||||
|
mean, _ := stats.Mean(wakeTimesByStep[step])
|
||||||
|
meansByStepId[step] = mean
|
||||||
|
t.Logf("\tmean: %v", mean)
|
||||||
|
median, _ := stats.Median(wakeTimesByStep[step])
|
||||||
|
t.Logf("\tmedian: %v", median)
|
||||||
|
midhinge, _ := stats.Midhinge(wakeTimesByStep[step])
|
||||||
|
t.Logf("\tmidhinge: %v", midhinge)
|
||||||
|
min, _ := stats.Min(wakeTimesByStep[step])
|
||||||
|
t.Logf("\tmin: %v", min)
|
||||||
|
max, _ := stats.Max(wakeTimesByStep[step])
|
||||||
|
t.Logf("\tmax: %v", max)
|
||||||
|
quartiles, _ := stats.Quartile(wakeTimesByStep[step])
|
||||||
|
t.Logf("\t%#v", quartiles)
|
||||||
|
interQuartileRange, _ := stats.InterQuartileRange(wakeTimesByStep[step])
|
||||||
|
t.Logf("\tinter-quartile range: %v", interQuartileRange)
|
||||||
|
interQuartileRangesByStepIdx[step] = interQuartileRange
|
||||||
|
}
|
||||||
|
|
||||||
|
iqrMean, _ := stats.Mean(interQuartileRangesByStepIdx)
|
||||||
|
t.Logf("inter-quartile-range mean: %v", iqrMean)
|
||||||
|
iqrDev, _ := stats.StandardDeviation(interQuartileRangesByStepIdx)
|
||||||
|
t.Logf("inter-quartile-range deviation: %v", iqrDev)
|
||||||
|
|
||||||
|
// each step should have the same "distribution" (=~ "spread")
|
||||||
|
assert.True(t, iqrDev < 0.01)
|
||||||
|
|
||||||
|
minTimeForAllStepsWithIdxI := sleepTimePerStep.Seconds() * float64(filesystems) / float64(concurrency)
|
||||||
|
t.Logf("minTimeForAllStepsWithIdxI = %11.8f", minTimeForAllStepsWithIdxI)
|
||||||
|
for i, mean := range meansByStepId {
|
||||||
|
// we can't just do (i + 0.5) * minTimeforAllStepsWithIdxI
|
||||||
|
// because this doesn't account for drift
|
||||||
|
idealMean := 0.5 * minTimeForAllStepsWithIdxI
|
||||||
|
if i > 0 {
|
||||||
|
previousMean := meansByStepId[i-1]
|
||||||
|
idealMean = previousMean + minTimeForAllStepsWithIdxI
|
||||||
|
}
|
||||||
|
deltaFromIdeal := idealMean - mean
|
||||||
|
t.Logf("step %02d delta from ideal mean wake time: %11.8f - %11.8f = %11.8f", i, idealMean, mean, deltaFromIdeal)
|
||||||
|
assert.True(t, math.Abs(deltaFromIdeal) < 0.05)
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -1,557 +0,0 @@
|
|||||||
// Package fsrep implements replication of a single file system with existing versions
|
|
||||||
// from a sender to a receiver.
|
|
||||||
package fsrep
|
|
||||||
|
|
||||||
import (
|
|
||||||
"context"
|
|
||||||
"errors"
|
|
||||||
"fmt"
|
|
||||||
"net"
|
|
||||||
"sync"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/prometheus/client_golang/prometheus"
|
|
||||||
"github.com/zrepl/zrepl/logger"
|
|
||||||
"github.com/zrepl/zrepl/replication/pdu"
|
|
||||||
"github.com/zrepl/zrepl/util/bytecounter"
|
|
||||||
"github.com/zrepl/zrepl/util/watchdog"
|
|
||||||
"github.com/zrepl/zrepl/zfs"
|
|
||||||
)
|
|
||||||
|
|
||||||
type contextKey int
|
|
||||||
|
|
||||||
const (
|
|
||||||
contextKeyLogger contextKey = iota
|
|
||||||
)
|
|
||||||
|
|
||||||
type Logger = logger.Logger
|
|
||||||
|
|
||||||
func WithLogger(ctx context.Context, log Logger) context.Context {
|
|
||||||
return context.WithValue(ctx, contextKeyLogger, log)
|
|
||||||
}
|
|
||||||
|
|
||||||
func getLogger(ctx context.Context) Logger {
|
|
||||||
l, ok := ctx.Value(contextKeyLogger).(Logger)
|
|
||||||
if !ok {
|
|
||||||
l = logger.NewNullLogger()
|
|
||||||
}
|
|
||||||
return l
|
|
||||||
}
|
|
||||||
|
|
||||||
// A Sender is usually part of a github.com/zrepl/zrepl/replication.Endpoint.
|
|
||||||
type Sender interface {
|
|
||||||
// If a non-nil io.ReadCloser is returned, it is guaranteed to be closed before
|
|
||||||
// any next call to the parent github.com/zrepl/zrepl/replication.Endpoint.
|
|
||||||
// If the send request is for dry run the io.ReadCloser will be nil
|
|
||||||
Send(ctx context.Context, r *pdu.SendReq) (*pdu.SendRes, zfs.StreamCopier, error)
|
|
||||||
ReplicationCursor(ctx context.Context, req *pdu.ReplicationCursorReq) (*pdu.ReplicationCursorRes, error)
|
|
||||||
}
|
|
||||||
|
|
||||||
// A Sender is usually part of a github.com/zrepl/zrepl/replication.Endpoint.
|
|
||||||
type Receiver interface {
|
|
||||||
// Receive sends r and sendStream (the latter containing a ZFS send stream)
|
|
||||||
// to the parent github.com/zrepl/zrepl/replication.Endpoint.
|
|
||||||
Receive(ctx context.Context, req *pdu.ReceiveReq, receive zfs.StreamCopier) (*pdu.ReceiveRes, error)
|
|
||||||
}
|
|
||||||
|
|
||||||
type StepReport struct {
|
|
||||||
From, To string
|
|
||||||
Status StepState
|
|
||||||
Problem string
|
|
||||||
Bytes int64
|
|
||||||
ExpectedBytes int64 // 0 means no size estimate possible
|
|
||||||
}
|
|
||||||
|
|
||||||
type Report struct {
|
|
||||||
Filesystem string
|
|
||||||
Status string
|
|
||||||
Problem string
|
|
||||||
Completed, Pending []*StepReport
|
|
||||||
}
|
|
||||||
|
|
||||||
//go:generate enumer -type=State
|
|
||||||
type State uint
|
|
||||||
|
|
||||||
const (
|
|
||||||
Ready State = 1 << iota
|
|
||||||
Completed
|
|
||||||
)
|
|
||||||
|
|
||||||
type Error interface {
|
|
||||||
error
|
|
||||||
Temporary() bool
|
|
||||||
ContextErr() bool
|
|
||||||
LocalToFS() bool
|
|
||||||
}
|
|
||||||
|
|
||||||
type Replication struct {
|
|
||||||
promBytesReplicated prometheus.Counter
|
|
||||||
|
|
||||||
fs string
|
|
||||||
|
|
||||||
// lock protects all fields below it in this struct, but not the data behind pointers
|
|
||||||
lock sync.Mutex
|
|
||||||
state State
|
|
||||||
err Error
|
|
||||||
completed, pending []*ReplicationStep
|
|
||||||
}
|
|
||||||
|
|
||||||
func (f *Replication) State() State {
|
|
||||||
f.lock.Lock()
|
|
||||||
defer f.lock.Unlock()
|
|
||||||
return f.state
|
|
||||||
}
|
|
||||||
|
|
||||||
func (f *Replication) FS() string { return f.fs }
|
|
||||||
|
|
||||||
// returns zero value time.Time{} if no more pending steps
|
|
||||||
func (f *Replication) NextStepDate() time.Time {
|
|
||||||
if len(f.pending) == 0 {
|
|
||||||
return time.Time{}
|
|
||||||
}
|
|
||||||
return f.pending[0].to.SnapshotTime()
|
|
||||||
}
|
|
||||||
|
|
||||||
func (f *Replication) Err() Error {
|
|
||||||
f.lock.Lock()
|
|
||||||
defer f.lock.Unlock()
|
|
||||||
return f.err
|
|
||||||
}
|
|
||||||
|
|
||||||
func (f *Replication) CanRetry() bool {
|
|
||||||
f.lock.Lock()
|
|
||||||
defer f.lock.Unlock()
|
|
||||||
if f.state == Completed {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
if f.state != Ready {
|
|
||||||
panic(fmt.Sprintf("implementation error: %v", f.state))
|
|
||||||
}
|
|
||||||
if f.err == nil {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
return f.err.Temporary()
|
|
||||||
}
|
|
||||||
|
|
||||||
func (f *Replication) UpdateSizeEsitmate(ctx context.Context, sender Sender) error {
|
|
||||||
f.lock.Lock()
|
|
||||||
defer f.lock.Unlock()
|
|
||||||
for _, e := range f.pending {
|
|
||||||
if err := e.updateSizeEstimate(ctx, sender); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
type ReplicationBuilder struct {
|
|
||||||
r *Replication
|
|
||||||
}
|
|
||||||
|
|
||||||
func BuildReplication(fs string, promBytesReplicated prometheus.Counter) *ReplicationBuilder {
|
|
||||||
return &ReplicationBuilder{&Replication{fs: fs, promBytesReplicated: promBytesReplicated}}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (b *ReplicationBuilder) AddStep(from, to FilesystemVersion) *ReplicationBuilder {
|
|
||||||
step := &ReplicationStep{
|
|
||||||
state: StepReplicationReady,
|
|
||||||
parent: b.r,
|
|
||||||
from: from,
|
|
||||||
to: to,
|
|
||||||
}
|
|
||||||
b.r.pending = append(b.r.pending, step)
|
|
||||||
return b
|
|
||||||
}
|
|
||||||
|
|
||||||
func (b *ReplicationBuilder) Done() (r *Replication) {
|
|
||||||
if len(b.r.pending) > 0 {
|
|
||||||
b.r.state = Ready
|
|
||||||
} else {
|
|
||||||
b.r.state = Completed
|
|
||||||
}
|
|
||||||
r = b.r
|
|
||||||
b.r = nil
|
|
||||||
return r
|
|
||||||
}
|
|
||||||
|
|
||||||
type ReplicationConflictError struct {
|
|
||||||
Err error
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *ReplicationConflictError) Timeout() bool { return false }
|
|
||||||
|
|
||||||
func (e *ReplicationConflictError) Temporary() bool { return false }
|
|
||||||
|
|
||||||
func (e *ReplicationConflictError) Error() string { return fmt.Sprintf("permanent error: %s", e.Err.Error()) }
|
|
||||||
|
|
||||||
func (e *ReplicationConflictError) LocalToFS() bool { return true }
|
|
||||||
|
|
||||||
func (e *ReplicationConflictError) ContextErr() bool { return false }
|
|
||||||
|
|
||||||
func NewReplicationConflictError(fs string, err error) *Replication {
|
|
||||||
return &Replication{
|
|
||||||
state: Completed,
|
|
||||||
fs: fs,
|
|
||||||
err: &ReplicationConflictError{Err: err},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//go:generate enumer -type=StepState
|
|
||||||
type StepState uint
|
|
||||||
|
|
||||||
const (
|
|
||||||
StepReplicationReady StepState = 1 << iota
|
|
||||||
StepMarkReplicatedReady
|
|
||||||
StepCompleted
|
|
||||||
)
|
|
||||||
|
|
||||||
func (s StepState) IsTerminal() bool { return s == StepCompleted }
|
|
||||||
|
|
||||||
type FilesystemVersion interface {
|
|
||||||
SnapshotTime() time.Time
|
|
||||||
GetName() string // name without @ or #
|
|
||||||
RelName() string // name with @ or #
|
|
||||||
}
|
|
||||||
|
|
||||||
type ReplicationStep struct {
|
|
||||||
// only protects state, err
|
|
||||||
// from, to and parent are assumed to be immutable
|
|
||||||
lock sync.Mutex
|
|
||||||
|
|
||||||
state StepState
|
|
||||||
from, to FilesystemVersion
|
|
||||||
parent *Replication
|
|
||||||
|
|
||||||
// both retry and permanent error
|
|
||||||
err error
|
|
||||||
|
|
||||||
byteCounter bytecounter.StreamCopier
|
|
||||||
expectedSize int64 // 0 means no size estimate present / possible
|
|
||||||
}
|
|
||||||
|
|
||||||
func (f *Replication) Retry(ctx context.Context, ka *watchdog.KeepAlive, sender Sender, receiver Receiver) Error {
|
|
||||||
|
|
||||||
var u updater = func(fu func(*Replication)) State {
|
|
||||||
f.lock.Lock()
|
|
||||||
defer f.lock.Unlock()
|
|
||||||
if fu != nil {
|
|
||||||
fu(f)
|
|
||||||
}
|
|
||||||
return f.state
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
var current *ReplicationStep
|
|
||||||
pre := u(nil)
|
|
||||||
getLogger(ctx).WithField("fsrep_state", pre).Debug("begin fsrep.Retry")
|
|
||||||
defer func() {
|
|
||||||
post := u(nil)
|
|
||||||
getLogger(ctx).WithField("fsrep_transition", post).Debug("end fsrep.Retry")
|
|
||||||
}()
|
|
||||||
|
|
||||||
st := u(func(f *Replication) {
|
|
||||||
if len(f.pending) == 0 {
|
|
||||||
f.state = Completed
|
|
||||||
return
|
|
||||||
}
|
|
||||||
current = f.pending[0]
|
|
||||||
})
|
|
||||||
if st == Completed {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
if st != Ready {
|
|
||||||
panic(fmt.Sprintf("implementation error: %v", st))
|
|
||||||
}
|
|
||||||
|
|
||||||
stepCtx := WithLogger(ctx, getLogger(ctx).WithField("step", current))
|
|
||||||
getLogger(stepCtx).Debug("take step")
|
|
||||||
err := current.Retry(stepCtx, ka, sender, receiver)
|
|
||||||
if err != nil {
|
|
||||||
getLogger(stepCtx).WithError(err).Error("step could not be completed")
|
|
||||||
}
|
|
||||||
|
|
||||||
u(func(fsr *Replication) {
|
|
||||||
if err != nil {
|
|
||||||
f.err = &StepError{stepStr: current.String(), err: err}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if err == nil && current.state != StepCompleted {
|
|
||||||
panic(fmt.Sprintf("implementation error: %v", current.state))
|
|
||||||
}
|
|
||||||
f.err = nil
|
|
||||||
f.completed = append(f.completed, current)
|
|
||||||
f.pending = f.pending[1:]
|
|
||||||
if len(f.pending) > 0 {
|
|
||||||
f.state = Ready
|
|
||||||
} else {
|
|
||||||
f.state = Completed
|
|
||||||
}
|
|
||||||
})
|
|
||||||
var retErr Error = nil
|
|
||||||
u(func(fsr *Replication) {
|
|
||||||
retErr = fsr.err
|
|
||||||
})
|
|
||||||
return retErr
|
|
||||||
}
|
|
||||||
|
|
||||||
type updater func(func(fsr *Replication)) State
|
|
||||||
|
|
||||||
type state func(ctx context.Context, ka *watchdog.KeepAlive, sender Sender, receiver Receiver, u updater) state
|
|
||||||
|
|
||||||
type StepError struct {
|
|
||||||
stepStr string
|
|
||||||
err error
|
|
||||||
}
|
|
||||||
|
|
||||||
var _ Error = &StepError{}
|
|
||||||
|
|
||||||
func (e StepError) Error() string {
|
|
||||||
if e.LocalToFS() {
|
|
||||||
return fmt.Sprintf("step %s failed: %s", e.stepStr, e.err)
|
|
||||||
}
|
|
||||||
return e.err.Error()
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e StepError) Timeout() bool {
|
|
||||||
if neterr, ok := e.err.(net.Error); ok {
|
|
||||||
return neterr.Timeout()
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e StepError) Temporary() bool {
|
|
||||||
if neterr, ok := e.err.(net.Error); ok {
|
|
||||||
return neterr.Temporary()
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e StepError) LocalToFS() bool {
|
|
||||||
if _, ok := e.err.(net.Error); ok {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
return true // conservative approximation: we'd like to check for specific errors returned over RPC here...
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e StepError) ContextErr() bool {
|
|
||||||
switch e.err {
|
|
||||||
case context.Canceled:
|
|
||||||
return true
|
|
||||||
case context.DeadlineExceeded:
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
func (fsr *Replication) Report() *Report {
|
|
||||||
fsr.lock.Lock()
|
|
||||||
defer fsr.lock.Unlock()
|
|
||||||
|
|
||||||
rep := Report{
|
|
||||||
Filesystem: fsr.fs,
|
|
||||||
Status: fsr.state.String(),
|
|
||||||
}
|
|
||||||
|
|
||||||
if fsr.err != nil && fsr.err.LocalToFS() {
|
|
||||||
rep.Problem = fsr.err.Error()
|
|
||||||
}
|
|
||||||
|
|
||||||
rep.Completed = make([]*StepReport, len(fsr.completed))
|
|
||||||
for i := range fsr.completed {
|
|
||||||
rep.Completed[i] = fsr.completed[i].Report()
|
|
||||||
}
|
|
||||||
rep.Pending = make([]*StepReport, len(fsr.pending))
|
|
||||||
for i := range fsr.pending {
|
|
||||||
rep.Pending[i] = fsr.pending[i].Report()
|
|
||||||
}
|
|
||||||
|
|
||||||
return &rep
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *ReplicationStep) Retry(ctx context.Context, ka *watchdog.KeepAlive, sender Sender, receiver Receiver) error {
|
|
||||||
switch s.state {
|
|
||||||
case StepReplicationReady:
|
|
||||||
return s.doReplication(ctx, ka, sender, receiver)
|
|
||||||
case StepMarkReplicatedReady:
|
|
||||||
return s.doMarkReplicated(ctx, ka, sender)
|
|
||||||
case StepCompleted:
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
panic(fmt.Sprintf("implementation error: %v", s.state))
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *ReplicationStep) Error() error {
|
|
||||||
if s.state & (StepReplicationReady|StepMarkReplicatedReady) != 0 {
|
|
||||||
return s.err
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *ReplicationStep) doReplication(ctx context.Context, ka *watchdog.KeepAlive, sender Sender, receiver Receiver) error {
|
|
||||||
|
|
||||||
if s.state != StepReplicationReady {
|
|
||||||
panic(fmt.Sprintf("implementation error: %v", s.state))
|
|
||||||
}
|
|
||||||
|
|
||||||
fs := s.parent.fs
|
|
||||||
|
|
||||||
log := getLogger(ctx)
|
|
||||||
sr := s.buildSendRequest(false)
|
|
||||||
|
|
||||||
log.Debug("initiate send request")
|
|
||||||
sres, sstreamCopier, err := sender.Send(ctx, sr)
|
|
||||||
if err != nil {
|
|
||||||
log.WithError(err).Error("send request failed")
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
if sstreamCopier == nil {
|
|
||||||
err := errors.New("send request did not return a stream, broken endpoint implementation")
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
defer sstreamCopier.Close()
|
|
||||||
|
|
||||||
// Install a byte counter to track progress + for status report
|
|
||||||
s.byteCounter = bytecounter.NewStreamCopier(sstreamCopier)
|
|
||||||
byteCounterStopProgress := make(chan struct{})
|
|
||||||
defer close(byteCounterStopProgress)
|
|
||||||
go func() {
|
|
||||||
var lastCount int64
|
|
||||||
t := time.NewTicker(1 * time.Second)
|
|
||||||
defer t.Stop()
|
|
||||||
for {
|
|
||||||
select {
|
|
||||||
case <-byteCounterStopProgress:
|
|
||||||
return
|
|
||||||
case <-t.C:
|
|
||||||
newCount := s.byteCounter.Count()
|
|
||||||
if lastCount != newCount {
|
|
||||||
ka.MadeProgress()
|
|
||||||
} else {
|
|
||||||
lastCount = newCount
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
defer func() {
|
|
||||||
s.parent.promBytesReplicated.Add(float64(s.byteCounter.Count()))
|
|
||||||
}()
|
|
||||||
|
|
||||||
rr := &pdu.ReceiveReq{
|
|
||||||
Filesystem: fs,
|
|
||||||
ClearResumeToken: !sres.UsedResumeToken,
|
|
||||||
}
|
|
||||||
log.Debug("initiate receive request")
|
|
||||||
_, err = receiver.Receive(ctx, rr, s.byteCounter)
|
|
||||||
if err != nil {
|
|
||||||
log.
|
|
||||||
WithError(err).
|
|
||||||
WithField("errType", fmt.Sprintf("%T", err)).
|
|
||||||
Error("receive request failed (might also be error on sender)")
|
|
||||||
// This failure could be due to
|
|
||||||
// - an unexpected exit of ZFS on the sending side
|
|
||||||
// - an unexpected exit of ZFS on the receiving side
|
|
||||||
// - a connectivity issue
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
log.Debug("receive finished")
|
|
||||||
ka.MadeProgress()
|
|
||||||
|
|
||||||
s.state = StepMarkReplicatedReady
|
|
||||||
return s.doMarkReplicated(ctx, ka, sender)
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *ReplicationStep) doMarkReplicated(ctx context.Context, ka *watchdog.KeepAlive, sender Sender) error {
|
|
||||||
|
|
||||||
if s.state != StepMarkReplicatedReady {
|
|
||||||
panic(fmt.Sprintf("implementation error: %v", s.state))
|
|
||||||
}
|
|
||||||
|
|
||||||
log := getLogger(ctx)
|
|
||||||
|
|
||||||
log.Debug("advance replication cursor")
|
|
||||||
req := &pdu.ReplicationCursorReq{
|
|
||||||
Filesystem: s.parent.fs,
|
|
||||||
Op: &pdu.ReplicationCursorReq_Set{
|
|
||||||
Set: &pdu.ReplicationCursorReq_SetOp{
|
|
||||||
Snapshot: s.to.GetName(),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
_, err := sender.ReplicationCursor(ctx, req)
|
|
||||||
if err != nil {
|
|
||||||
log.WithError(err).Error("error advancing replication cursor")
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
ka.MadeProgress()
|
|
||||||
|
|
||||||
s.state = StepCompleted
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *ReplicationStep) updateSizeEstimate(ctx context.Context, sender Sender) error {
|
|
||||||
|
|
||||||
log := getLogger(ctx)
|
|
||||||
|
|
||||||
sr := s.buildSendRequest(true)
|
|
||||||
|
|
||||||
log.Debug("initiate dry run send request")
|
|
||||||
sres, _, err := sender.Send(ctx, sr)
|
|
||||||
if err != nil {
|
|
||||||
log.WithError(err).Error("dry run send request failed")
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
s.expectedSize = sres.ExpectedSize
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *ReplicationStep) buildSendRequest(dryRun bool) (sr *pdu.SendReq) {
|
|
||||||
fs := s.parent.fs
|
|
||||||
if s.from == nil {
|
|
||||||
sr = &pdu.SendReq{
|
|
||||||
Filesystem: fs,
|
|
||||||
To: s.to.RelName(),
|
|
||||||
DryRun: dryRun,
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
sr = &pdu.SendReq{
|
|
||||||
Filesystem: fs,
|
|
||||||
From: s.from.RelName(),
|
|
||||||
To: s.to.RelName(),
|
|
||||||
DryRun: dryRun,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return sr
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *ReplicationStep) String() string {
|
|
||||||
if s.from == nil { // FIXME: ZFS semantics are that to is nil on non-incremental send
|
|
||||||
return fmt.Sprintf("%s%s (full)", s.parent.fs, s.to.RelName())
|
|
||||||
} else {
|
|
||||||
return fmt.Sprintf("%s(%s => %s)", s.parent.fs, s.from.RelName(), s.to.RelName())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *ReplicationStep) Report() *StepReport {
|
|
||||||
var from string // FIXME follow same convention as ZFS: to should be nil on full send
|
|
||||||
if s.from != nil {
|
|
||||||
from = s.from.RelName()
|
|
||||||
}
|
|
||||||
bytes := int64(0)
|
|
||||||
if s.byteCounter != nil {
|
|
||||||
bytes = s.byteCounter.Count()
|
|
||||||
}
|
|
||||||
problem := ""
|
|
||||||
if s.err != nil {
|
|
||||||
problem = s.err.Error()
|
|
||||||
}
|
|
||||||
rep := StepReport{
|
|
||||||
From: from,
|
|
||||||
To: s.to.RelName(),
|
|
||||||
Status: s.state,
|
|
||||||
Problem: problem,
|
|
||||||
Bytes: bytes,
|
|
||||||
ExpectedBytes: s.expectedSize,
|
|
||||||
}
|
|
||||||
return &rep
|
|
||||||
}
|
|
@ -1,50 +0,0 @@
|
|||||||
// Code generated by "enumer -type=State"; DO NOT EDIT.
|
|
||||||
|
|
||||||
package fsrep
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
)
|
|
||||||
|
|
||||||
const _StateName = "ReadyCompleted"
|
|
||||||
|
|
||||||
var _StateIndex = [...]uint8{0, 5, 14}
|
|
||||||
|
|
||||||
func (i State) String() string {
|
|
||||||
i -= 1
|
|
||||||
if i >= State(len(_StateIndex)-1) {
|
|
||||||
return fmt.Sprintf("State(%d)", i+1)
|
|
||||||
}
|
|
||||||
return _StateName[_StateIndex[i]:_StateIndex[i+1]]
|
|
||||||
}
|
|
||||||
|
|
||||||
var _StateValues = []State{1, 2}
|
|
||||||
|
|
||||||
var _StateNameToValueMap = map[string]State{
|
|
||||||
_StateName[0:5]: 1,
|
|
||||||
_StateName[5:14]: 2,
|
|
||||||
}
|
|
||||||
|
|
||||||
// StateString retrieves an enum value from the enum constants string name.
|
|
||||||
// Throws an error if the param is not part of the enum.
|
|
||||||
func StateString(s string) (State, error) {
|
|
||||||
if val, ok := _StateNameToValueMap[s]; ok {
|
|
||||||
return val, nil
|
|
||||||
}
|
|
||||||
return 0, fmt.Errorf("%s does not belong to State values", s)
|
|
||||||
}
|
|
||||||
|
|
||||||
// StateValues returns all values of the enum
|
|
||||||
func StateValues() []State {
|
|
||||||
return _StateValues
|
|
||||||
}
|
|
||||||
|
|
||||||
// IsAState returns "true" if the value is listed in the enum definition. "false" otherwise
|
|
||||||
func (i State) IsAState() bool {
|
|
||||||
for _, v := range _StateValues {
|
|
||||||
if i == v {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
@ -1,61 +0,0 @@
|
|||||||
// Code generated by "enumer -type=StepState"; DO NOT EDIT.
|
|
||||||
|
|
||||||
package fsrep
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
)
|
|
||||||
|
|
||||||
const (
|
|
||||||
_StepStateName_0 = "StepReplicationReadyStepMarkReplicatedReady"
|
|
||||||
_StepStateName_1 = "StepCompleted"
|
|
||||||
)
|
|
||||||
|
|
||||||
var (
|
|
||||||
_StepStateIndex_0 = [...]uint8{0, 20, 43}
|
|
||||||
_StepStateIndex_1 = [...]uint8{0, 13}
|
|
||||||
)
|
|
||||||
|
|
||||||
func (i StepState) String() string {
|
|
||||||
switch {
|
|
||||||
case 1 <= i && i <= 2:
|
|
||||||
i -= 1
|
|
||||||
return _StepStateName_0[_StepStateIndex_0[i]:_StepStateIndex_0[i+1]]
|
|
||||||
case i == 4:
|
|
||||||
return _StepStateName_1
|
|
||||||
default:
|
|
||||||
return fmt.Sprintf("StepState(%d)", i)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
var _StepStateValues = []StepState{1, 2, 4}
|
|
||||||
|
|
||||||
var _StepStateNameToValueMap = map[string]StepState{
|
|
||||||
_StepStateName_0[0:20]: 1,
|
|
||||||
_StepStateName_0[20:43]: 2,
|
|
||||||
_StepStateName_1[0:13]: 4,
|
|
||||||
}
|
|
||||||
|
|
||||||
// StepStateString retrieves an enum value from the enum constants string name.
|
|
||||||
// Throws an error if the param is not part of the enum.
|
|
||||||
func StepStateString(s string) (StepState, error) {
|
|
||||||
if val, ok := _StepStateNameToValueMap[s]; ok {
|
|
||||||
return val, nil
|
|
||||||
}
|
|
||||||
return 0, fmt.Errorf("%s does not belong to StepState values", s)
|
|
||||||
}
|
|
||||||
|
|
||||||
// StepStateValues returns all values of the enum
|
|
||||||
func StepStateValues() []StepState {
|
|
||||||
return _StepStateValues
|
|
||||||
}
|
|
||||||
|
|
||||||
// IsAStepState returns "true" if the value is listed in the enum definition. "false" otherwise
|
|
||||||
func (i StepState) IsAStepState() bool {
|
|
||||||
for _, v := range _StepStateValues {
|
|
||||||
if i == v {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
@ -1,9 +1,11 @@
|
|||||||
package mainfsm
|
package diff
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
"sort"
|
"sort"
|
||||||
|
"strings"
|
||||||
|
|
||||||
. "github.com/zrepl/zrepl/replication/pdu"
|
. "github.com/zrepl/zrepl/replication/logic/pdu"
|
||||||
)
|
)
|
||||||
|
|
||||||
type ConflictNoCommonAncestor struct {
|
type ConflictNoCommonAncestor struct {
|
||||||
@ -11,7 +13,19 @@ type ConflictNoCommonAncestor struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (c *ConflictNoCommonAncestor) Error() string {
|
func (c *ConflictNoCommonAncestor) Error() string {
|
||||||
return "no common snapshot or suitable bookmark between sender and receiver"
|
var buf strings.Builder
|
||||||
|
buf.WriteString("no common snapshot or suitable bookmark between sender and receiver")
|
||||||
|
if len(c.SortedReceiverVersions) > 0 || len(c.SortedSenderVersions) > 0 {
|
||||||
|
buf.WriteString(":\n sorted sender versions:\n")
|
||||||
|
for _, v := range c.SortedSenderVersions {
|
||||||
|
fmt.Fprintf(&buf, " %s\n", v.RelName())
|
||||||
|
}
|
||||||
|
buf.WriteString(" sorted receiver versions:\n")
|
||||||
|
for _, v := range c.SortedReceiverVersions {
|
||||||
|
fmt.Fprintf(&buf, " %s\n", v.RelName())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return buf.String()
|
||||||
}
|
}
|
||||||
|
|
||||||
type ConflictDiverged struct {
|
type ConflictDiverged struct {
|
||||||
@ -21,7 +35,18 @@ type ConflictDiverged struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (c *ConflictDiverged) Error() string {
|
func (c *ConflictDiverged) Error() string {
|
||||||
return "the receiver's latest snapshot is not present on sender"
|
var buf strings.Builder
|
||||||
|
buf.WriteString("the receiver's latest snapshot is not present on sender:\n")
|
||||||
|
fmt.Fprintf(&buf, " last common: %s\n", c.CommonAncestor.RelName())
|
||||||
|
fmt.Fprintf(&buf, " sender-only:\n")
|
||||||
|
for _, v := range c.SenderOnly {
|
||||||
|
fmt.Fprintf(&buf, " %s\n", v.RelName())
|
||||||
|
}
|
||||||
|
fmt.Fprintf(&buf, " receiver-only:\n")
|
||||||
|
for _, v := range c.ReceiverOnly {
|
||||||
|
fmt.Fprintf(&buf, " %s\n", v.RelName())
|
||||||
|
}
|
||||||
|
return buf.String()
|
||||||
}
|
}
|
||||||
|
|
||||||
func SortVersionListByCreateTXGThenBookmarkLTSnapshot(fsvslice []*FilesystemVersion) []*FilesystemVersion {
|
func SortVersionListByCreateTXGThenBookmarkLTSnapshot(fsvslice []*FilesystemVersion) []*FilesystemVersion {
|
130
replication/logic/diff/diff_test.go
Normal file
130
replication/logic/diff/diff_test.go
Normal file
@ -0,0 +1,130 @@
|
|||||||
|
package diff
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
|
. "github.com/zrepl/zrepl/replication/logic/pdu"
|
||||||
|
)
|
||||||
|
|
||||||
|
func fsvlist(fsv ...string) (r []*FilesystemVersion) {
|
||||||
|
|
||||||
|
r = make([]*FilesystemVersion, len(fsv))
|
||||||
|
for i, f := range fsv {
|
||||||
|
|
||||||
|
// parse the id from fsvlist. it is used to derivce Guid,CreateTXG and Creation attrs
|
||||||
|
split := strings.Split(f, ",")
|
||||||
|
if len(split) != 2 {
|
||||||
|
panic("invalid fsv spec")
|
||||||
|
}
|
||||||
|
id, err := strconv.Atoi(split[1])
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
creation := func(id int) string {
|
||||||
|
return FilesystemVersionCreation(time.Unix(0, 0).Add(time.Duration(id) * time.Second))
|
||||||
|
}
|
||||||
|
if strings.HasPrefix(f, "#") {
|
||||||
|
r[i] = &FilesystemVersion{
|
||||||
|
Name: strings.TrimPrefix(f, "#"),
|
||||||
|
Type: FilesystemVersion_Bookmark,
|
||||||
|
Guid: uint64(id),
|
||||||
|
CreateTXG: uint64(id),
|
||||||
|
Creation: creation(id),
|
||||||
|
}
|
||||||
|
} else if strings.HasPrefix(f, "@") {
|
||||||
|
r[i] = &FilesystemVersion{
|
||||||
|
Name: strings.TrimPrefix(f, "@"),
|
||||||
|
Type: FilesystemVersion_Snapshot,
|
||||||
|
Guid: uint64(id),
|
||||||
|
CreateTXG: uint64(id),
|
||||||
|
Creation: creation(id),
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
panic("invalid character")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func doTest(receiver, sender []*FilesystemVersion, validate func(incpath []*FilesystemVersion, conflict error)) {
|
||||||
|
p, err := IncrementalPath(receiver, sender)
|
||||||
|
validate(p, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestIncrementalPath_SnapshotsOnly(t *testing.T) {
|
||||||
|
|
||||||
|
l := fsvlist
|
||||||
|
|
||||||
|
// basic functionality
|
||||||
|
doTest(l("@a,1", "@b,2"), l("@a,1", "@b,2", "@c,3", "@d,4"), func(path []*FilesystemVersion, conflict error) {
|
||||||
|
assert.Equal(t, l("@b,2", "@c,3", "@d,4"), path)
|
||||||
|
})
|
||||||
|
|
||||||
|
// no common ancestor
|
||||||
|
doTest(l(), l("@a,1"), func(path []*FilesystemVersion, conflict error) {
|
||||||
|
assert.Nil(t, path)
|
||||||
|
ca, ok := conflict.(*ConflictNoCommonAncestor)
|
||||||
|
require.True(t, ok)
|
||||||
|
assert.Equal(t, l("@a,1"), ca.SortedSenderVersions)
|
||||||
|
})
|
||||||
|
doTest(l("@a,1", "@b,2"), l("@c,3", "@d,4"), func(path []*FilesystemVersion, conflict error) {
|
||||||
|
assert.Nil(t, path)
|
||||||
|
ca, ok := conflict.(*ConflictNoCommonAncestor)
|
||||||
|
require.True(t, ok)
|
||||||
|
assert.Equal(t, l("@a,1", "@b,2"), ca.SortedReceiverVersions)
|
||||||
|
assert.Equal(t, l("@c,3", "@d,4"), ca.SortedSenderVersions)
|
||||||
|
})
|
||||||
|
|
||||||
|
// divergence is detected
|
||||||
|
doTest(l("@a,1", "@b1,2"), l("@a,1", "@b2,3"), func(path []*FilesystemVersion, conflict error) {
|
||||||
|
assert.Nil(t, path)
|
||||||
|
cd, ok := conflict.(*ConflictDiverged)
|
||||||
|
require.True(t, ok)
|
||||||
|
assert.Equal(t, l("@a,1")[0], cd.CommonAncestor)
|
||||||
|
assert.Equal(t, l("@b1,2"), cd.ReceiverOnly)
|
||||||
|
assert.Equal(t, l("@b2,3"), cd.SenderOnly)
|
||||||
|
})
|
||||||
|
|
||||||
|
// gaps before most recent common ancestor do not matter
|
||||||
|
doTest(l("@a,1", "@b,2", "@c,3"), l("@a,1", "@c,3", "@d,4"), func(path []*FilesystemVersion, conflict error) {
|
||||||
|
assert.Equal(t, l("@c,3", "@d,4"), path)
|
||||||
|
})
|
||||||
|
|
||||||
|
// sender with earlier but also current version as sender is not a conflict
|
||||||
|
doTest(l("@c,3"), l("@a,1", "@b,2", "@c,3") , func(path []*FilesystemVersion, conflict error) {
|
||||||
|
t.Logf("path: %#v", path)
|
||||||
|
t.Logf("conflict: %#v", conflict)
|
||||||
|
assert.Empty(t, path)
|
||||||
|
assert.Nil(t, conflict)
|
||||||
|
})
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestIncrementalPath_BookmarkSupport(t *testing.T) {
|
||||||
|
l := fsvlist
|
||||||
|
|
||||||
|
// bookmarks are used
|
||||||
|
doTest(l("@a,1"), l("#a,1", "@b,2"), func(path []*FilesystemVersion, conflict error) {
|
||||||
|
assert.Equal(t, l("#a,1", "@b,2"), path)
|
||||||
|
})
|
||||||
|
|
||||||
|
// boomarks are stripped from IncrementalPath (cannot send incrementally)
|
||||||
|
doTest(l("@a,1"), l("#a,1", "#b,2", "@c,3"), func(path []*FilesystemVersion, conflict error) {
|
||||||
|
assert.Equal(t, l("#a,1", "@c,3"), path)
|
||||||
|
})
|
||||||
|
|
||||||
|
// test that snapshots are preferred over bookmarks in IncrementalPath
|
||||||
|
doTest(l("@a,1"), l("#a,1", "@a,1", "@b,2"), func(path []*FilesystemVersion, conflict error) {
|
||||||
|
assert.Equal(t, l("@a,1", "@b,2"), path)
|
||||||
|
})
|
||||||
|
doTest(l("@a,1"), l("@a,1", "#a,1", "@b,2"), func(path []*FilesystemVersion, conflict error) {
|
||||||
|
assert.Equal(t, l("@a,1", "@b,2"), path)
|
||||||
|
})
|
||||||
|
|
||||||
|
}
|
@ -43,7 +43,7 @@ func (x FilesystemVersion_VersionType) String() string {
|
|||||||
return proto.EnumName(FilesystemVersion_VersionType_name, int32(x))
|
return proto.EnumName(FilesystemVersion_VersionType_name, int32(x))
|
||||||
}
|
}
|
||||||
func (FilesystemVersion_VersionType) EnumDescriptor() ([]byte, []int) {
|
func (FilesystemVersion_VersionType) EnumDescriptor() ([]byte, []int) {
|
||||||
return fileDescriptor_pdu_89315d819a6e0938, []int{5, 0}
|
return fileDescriptor_pdu_83b7e2a28d820622, []int{5, 0}
|
||||||
}
|
}
|
||||||
|
|
||||||
type ListFilesystemReq struct {
|
type ListFilesystemReq struct {
|
||||||
@ -56,7 +56,7 @@ func (m *ListFilesystemReq) Reset() { *m = ListFilesystemReq{} }
|
|||||||
func (m *ListFilesystemReq) String() string { return proto.CompactTextString(m) }
|
func (m *ListFilesystemReq) String() string { return proto.CompactTextString(m) }
|
||||||
func (*ListFilesystemReq) ProtoMessage() {}
|
func (*ListFilesystemReq) ProtoMessage() {}
|
||||||
func (*ListFilesystemReq) Descriptor() ([]byte, []int) {
|
func (*ListFilesystemReq) Descriptor() ([]byte, []int) {
|
||||||
return fileDescriptor_pdu_89315d819a6e0938, []int{0}
|
return fileDescriptor_pdu_83b7e2a28d820622, []int{0}
|
||||||
}
|
}
|
||||||
func (m *ListFilesystemReq) XXX_Unmarshal(b []byte) error {
|
func (m *ListFilesystemReq) XXX_Unmarshal(b []byte) error {
|
||||||
return xxx_messageInfo_ListFilesystemReq.Unmarshal(m, b)
|
return xxx_messageInfo_ListFilesystemReq.Unmarshal(m, b)
|
||||||
@ -78,7 +78,6 @@ var xxx_messageInfo_ListFilesystemReq proto.InternalMessageInfo
|
|||||||
|
|
||||||
type ListFilesystemRes struct {
|
type ListFilesystemRes struct {
|
||||||
Filesystems []*Filesystem `protobuf:"bytes,1,rep,name=Filesystems,proto3" json:"Filesystems,omitempty"`
|
Filesystems []*Filesystem `protobuf:"bytes,1,rep,name=Filesystems,proto3" json:"Filesystems,omitempty"`
|
||||||
Empty bool `protobuf:"varint,2,opt,name=Empty,proto3" json:"Empty,omitempty"`
|
|
||||||
XXX_NoUnkeyedLiteral struct{} `json:"-"`
|
XXX_NoUnkeyedLiteral struct{} `json:"-"`
|
||||||
XXX_unrecognized []byte `json:"-"`
|
XXX_unrecognized []byte `json:"-"`
|
||||||
XXX_sizecache int32 `json:"-"`
|
XXX_sizecache int32 `json:"-"`
|
||||||
@ -88,7 +87,7 @@ func (m *ListFilesystemRes) Reset() { *m = ListFilesystemRes{} }
|
|||||||
func (m *ListFilesystemRes) String() string { return proto.CompactTextString(m) }
|
func (m *ListFilesystemRes) String() string { return proto.CompactTextString(m) }
|
||||||
func (*ListFilesystemRes) ProtoMessage() {}
|
func (*ListFilesystemRes) ProtoMessage() {}
|
||||||
func (*ListFilesystemRes) Descriptor() ([]byte, []int) {
|
func (*ListFilesystemRes) Descriptor() ([]byte, []int) {
|
||||||
return fileDescriptor_pdu_89315d819a6e0938, []int{1}
|
return fileDescriptor_pdu_83b7e2a28d820622, []int{1}
|
||||||
}
|
}
|
||||||
func (m *ListFilesystemRes) XXX_Unmarshal(b []byte) error {
|
func (m *ListFilesystemRes) XXX_Unmarshal(b []byte) error {
|
||||||
return xxx_messageInfo_ListFilesystemRes.Unmarshal(m, b)
|
return xxx_messageInfo_ListFilesystemRes.Unmarshal(m, b)
|
||||||
@ -115,16 +114,10 @@ func (m *ListFilesystemRes) GetFilesystems() []*Filesystem {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *ListFilesystemRes) GetEmpty() bool {
|
|
||||||
if m != nil {
|
|
||||||
return m.Empty
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
type Filesystem struct {
|
type Filesystem struct {
|
||||||
Path string `protobuf:"bytes,1,opt,name=Path,proto3" json:"Path,omitempty"`
|
Path string `protobuf:"bytes,1,opt,name=Path,proto3" json:"Path,omitempty"`
|
||||||
ResumeToken string `protobuf:"bytes,2,opt,name=ResumeToken,proto3" json:"ResumeToken,omitempty"`
|
ResumeToken string `protobuf:"bytes,2,opt,name=ResumeToken,proto3" json:"ResumeToken,omitempty"`
|
||||||
|
IsPlaceholder bool `protobuf:"varint,3,opt,name=IsPlaceholder,proto3" json:"IsPlaceholder,omitempty"`
|
||||||
XXX_NoUnkeyedLiteral struct{} `json:"-"`
|
XXX_NoUnkeyedLiteral struct{} `json:"-"`
|
||||||
XXX_unrecognized []byte `json:"-"`
|
XXX_unrecognized []byte `json:"-"`
|
||||||
XXX_sizecache int32 `json:"-"`
|
XXX_sizecache int32 `json:"-"`
|
||||||
@ -134,7 +127,7 @@ func (m *Filesystem) Reset() { *m = Filesystem{} }
|
|||||||
func (m *Filesystem) String() string { return proto.CompactTextString(m) }
|
func (m *Filesystem) String() string { return proto.CompactTextString(m) }
|
||||||
func (*Filesystem) ProtoMessage() {}
|
func (*Filesystem) ProtoMessage() {}
|
||||||
func (*Filesystem) Descriptor() ([]byte, []int) {
|
func (*Filesystem) Descriptor() ([]byte, []int) {
|
||||||
return fileDescriptor_pdu_89315d819a6e0938, []int{2}
|
return fileDescriptor_pdu_83b7e2a28d820622, []int{2}
|
||||||
}
|
}
|
||||||
func (m *Filesystem) XXX_Unmarshal(b []byte) error {
|
func (m *Filesystem) XXX_Unmarshal(b []byte) error {
|
||||||
return xxx_messageInfo_Filesystem.Unmarshal(m, b)
|
return xxx_messageInfo_Filesystem.Unmarshal(m, b)
|
||||||
@ -168,6 +161,13 @@ func (m *Filesystem) GetResumeToken() string {
|
|||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (m *Filesystem) GetIsPlaceholder() bool {
|
||||||
|
if m != nil {
|
||||||
|
return m.IsPlaceholder
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
type ListFilesystemVersionsReq struct {
|
type ListFilesystemVersionsReq struct {
|
||||||
Filesystem string `protobuf:"bytes,1,opt,name=Filesystem,proto3" json:"Filesystem,omitempty"`
|
Filesystem string `protobuf:"bytes,1,opt,name=Filesystem,proto3" json:"Filesystem,omitempty"`
|
||||||
XXX_NoUnkeyedLiteral struct{} `json:"-"`
|
XXX_NoUnkeyedLiteral struct{} `json:"-"`
|
||||||
@ -179,7 +179,7 @@ func (m *ListFilesystemVersionsReq) Reset() { *m = ListFilesystemVersion
|
|||||||
func (m *ListFilesystemVersionsReq) String() string { return proto.CompactTextString(m) }
|
func (m *ListFilesystemVersionsReq) String() string { return proto.CompactTextString(m) }
|
||||||
func (*ListFilesystemVersionsReq) ProtoMessage() {}
|
func (*ListFilesystemVersionsReq) ProtoMessage() {}
|
||||||
func (*ListFilesystemVersionsReq) Descriptor() ([]byte, []int) {
|
func (*ListFilesystemVersionsReq) Descriptor() ([]byte, []int) {
|
||||||
return fileDescriptor_pdu_89315d819a6e0938, []int{3}
|
return fileDescriptor_pdu_83b7e2a28d820622, []int{3}
|
||||||
}
|
}
|
||||||
func (m *ListFilesystemVersionsReq) XXX_Unmarshal(b []byte) error {
|
func (m *ListFilesystemVersionsReq) XXX_Unmarshal(b []byte) error {
|
||||||
return xxx_messageInfo_ListFilesystemVersionsReq.Unmarshal(m, b)
|
return xxx_messageInfo_ListFilesystemVersionsReq.Unmarshal(m, b)
|
||||||
@ -217,7 +217,7 @@ func (m *ListFilesystemVersionsRes) Reset() { *m = ListFilesystemVersion
|
|||||||
func (m *ListFilesystemVersionsRes) String() string { return proto.CompactTextString(m) }
|
func (m *ListFilesystemVersionsRes) String() string { return proto.CompactTextString(m) }
|
||||||
func (*ListFilesystemVersionsRes) ProtoMessage() {}
|
func (*ListFilesystemVersionsRes) ProtoMessage() {}
|
||||||
func (*ListFilesystemVersionsRes) Descriptor() ([]byte, []int) {
|
func (*ListFilesystemVersionsRes) Descriptor() ([]byte, []int) {
|
||||||
return fileDescriptor_pdu_89315d819a6e0938, []int{4}
|
return fileDescriptor_pdu_83b7e2a28d820622, []int{4}
|
||||||
}
|
}
|
||||||
func (m *ListFilesystemVersionsRes) XXX_Unmarshal(b []byte) error {
|
func (m *ListFilesystemVersionsRes) XXX_Unmarshal(b []byte) error {
|
||||||
return xxx_messageInfo_ListFilesystemVersionsRes.Unmarshal(m, b)
|
return xxx_messageInfo_ListFilesystemVersionsRes.Unmarshal(m, b)
|
||||||
@ -259,7 +259,7 @@ func (m *FilesystemVersion) Reset() { *m = FilesystemVersion{} }
|
|||||||
func (m *FilesystemVersion) String() string { return proto.CompactTextString(m) }
|
func (m *FilesystemVersion) String() string { return proto.CompactTextString(m) }
|
||||||
func (*FilesystemVersion) ProtoMessage() {}
|
func (*FilesystemVersion) ProtoMessage() {}
|
||||||
func (*FilesystemVersion) Descriptor() ([]byte, []int) {
|
func (*FilesystemVersion) Descriptor() ([]byte, []int) {
|
||||||
return fileDescriptor_pdu_89315d819a6e0938, []int{5}
|
return fileDescriptor_pdu_83b7e2a28d820622, []int{5}
|
||||||
}
|
}
|
||||||
func (m *FilesystemVersion) XXX_Unmarshal(b []byte) error {
|
func (m *FilesystemVersion) XXX_Unmarshal(b []byte) error {
|
||||||
return xxx_messageInfo_FilesystemVersion.Unmarshal(m, b)
|
return xxx_messageInfo_FilesystemVersion.Unmarshal(m, b)
|
||||||
@ -339,7 +339,7 @@ func (m *SendReq) Reset() { *m = SendReq{} }
|
|||||||
func (m *SendReq) String() string { return proto.CompactTextString(m) }
|
func (m *SendReq) String() string { return proto.CompactTextString(m) }
|
||||||
func (*SendReq) ProtoMessage() {}
|
func (*SendReq) ProtoMessage() {}
|
||||||
func (*SendReq) Descriptor() ([]byte, []int) {
|
func (*SendReq) Descriptor() ([]byte, []int) {
|
||||||
return fileDescriptor_pdu_89315d819a6e0938, []int{6}
|
return fileDescriptor_pdu_83b7e2a28d820622, []int{6}
|
||||||
}
|
}
|
||||||
func (m *SendReq) XXX_Unmarshal(b []byte) error {
|
func (m *SendReq) XXX_Unmarshal(b []byte) error {
|
||||||
return xxx_messageInfo_SendReq.Unmarshal(m, b)
|
return xxx_messageInfo_SendReq.Unmarshal(m, b)
|
||||||
@ -420,7 +420,7 @@ func (m *Property) Reset() { *m = Property{} }
|
|||||||
func (m *Property) String() string { return proto.CompactTextString(m) }
|
func (m *Property) String() string { return proto.CompactTextString(m) }
|
||||||
func (*Property) ProtoMessage() {}
|
func (*Property) ProtoMessage() {}
|
||||||
func (*Property) Descriptor() ([]byte, []int) {
|
func (*Property) Descriptor() ([]byte, []int) {
|
||||||
return fileDescriptor_pdu_89315d819a6e0938, []int{7}
|
return fileDescriptor_pdu_83b7e2a28d820622, []int{7}
|
||||||
}
|
}
|
||||||
func (m *Property) XXX_Unmarshal(b []byte) error {
|
func (m *Property) XXX_Unmarshal(b []byte) error {
|
||||||
return xxx_messageInfo_Property.Unmarshal(m, b)
|
return xxx_messageInfo_Property.Unmarshal(m, b)
|
||||||
@ -470,7 +470,7 @@ func (m *SendRes) Reset() { *m = SendRes{} }
|
|||||||
func (m *SendRes) String() string { return proto.CompactTextString(m) }
|
func (m *SendRes) String() string { return proto.CompactTextString(m) }
|
||||||
func (*SendRes) ProtoMessage() {}
|
func (*SendRes) ProtoMessage() {}
|
||||||
func (*SendRes) Descriptor() ([]byte, []int) {
|
func (*SendRes) Descriptor() ([]byte, []int) {
|
||||||
return fileDescriptor_pdu_89315d819a6e0938, []int{8}
|
return fileDescriptor_pdu_83b7e2a28d820622, []int{8}
|
||||||
}
|
}
|
||||||
func (m *SendRes) XXX_Unmarshal(b []byte) error {
|
func (m *SendRes) XXX_Unmarshal(b []byte) error {
|
||||||
return xxx_messageInfo_SendRes.Unmarshal(m, b)
|
return xxx_messageInfo_SendRes.Unmarshal(m, b)
|
||||||
@ -524,7 +524,7 @@ func (m *ReceiveReq) Reset() { *m = ReceiveReq{} }
|
|||||||
func (m *ReceiveReq) String() string { return proto.CompactTextString(m) }
|
func (m *ReceiveReq) String() string { return proto.CompactTextString(m) }
|
||||||
func (*ReceiveReq) ProtoMessage() {}
|
func (*ReceiveReq) ProtoMessage() {}
|
||||||
func (*ReceiveReq) Descriptor() ([]byte, []int) {
|
func (*ReceiveReq) Descriptor() ([]byte, []int) {
|
||||||
return fileDescriptor_pdu_89315d819a6e0938, []int{9}
|
return fileDescriptor_pdu_83b7e2a28d820622, []int{9}
|
||||||
}
|
}
|
||||||
func (m *ReceiveReq) XXX_Unmarshal(b []byte) error {
|
func (m *ReceiveReq) XXX_Unmarshal(b []byte) error {
|
||||||
return xxx_messageInfo_ReceiveReq.Unmarshal(m, b)
|
return xxx_messageInfo_ReceiveReq.Unmarshal(m, b)
|
||||||
@ -568,7 +568,7 @@ func (m *ReceiveRes) Reset() { *m = ReceiveRes{} }
|
|||||||
func (m *ReceiveRes) String() string { return proto.CompactTextString(m) }
|
func (m *ReceiveRes) String() string { return proto.CompactTextString(m) }
|
||||||
func (*ReceiveRes) ProtoMessage() {}
|
func (*ReceiveRes) ProtoMessage() {}
|
||||||
func (*ReceiveRes) Descriptor() ([]byte, []int) {
|
func (*ReceiveRes) Descriptor() ([]byte, []int) {
|
||||||
return fileDescriptor_pdu_89315d819a6e0938, []int{10}
|
return fileDescriptor_pdu_83b7e2a28d820622, []int{10}
|
||||||
}
|
}
|
||||||
func (m *ReceiveRes) XXX_Unmarshal(b []byte) error {
|
func (m *ReceiveRes) XXX_Unmarshal(b []byte) error {
|
||||||
return xxx_messageInfo_ReceiveRes.Unmarshal(m, b)
|
return xxx_messageInfo_ReceiveRes.Unmarshal(m, b)
|
||||||
@ -601,7 +601,7 @@ func (m *DestroySnapshotsReq) Reset() { *m = DestroySnapshotsReq{} }
|
|||||||
func (m *DestroySnapshotsReq) String() string { return proto.CompactTextString(m) }
|
func (m *DestroySnapshotsReq) String() string { return proto.CompactTextString(m) }
|
||||||
func (*DestroySnapshotsReq) ProtoMessage() {}
|
func (*DestroySnapshotsReq) ProtoMessage() {}
|
||||||
func (*DestroySnapshotsReq) Descriptor() ([]byte, []int) {
|
func (*DestroySnapshotsReq) Descriptor() ([]byte, []int) {
|
||||||
return fileDescriptor_pdu_89315d819a6e0938, []int{11}
|
return fileDescriptor_pdu_83b7e2a28d820622, []int{11}
|
||||||
}
|
}
|
||||||
func (m *DestroySnapshotsReq) XXX_Unmarshal(b []byte) error {
|
func (m *DestroySnapshotsReq) XXX_Unmarshal(b []byte) error {
|
||||||
return xxx_messageInfo_DestroySnapshotsReq.Unmarshal(m, b)
|
return xxx_messageInfo_DestroySnapshotsReq.Unmarshal(m, b)
|
||||||
@ -647,7 +647,7 @@ func (m *DestroySnapshotRes) Reset() { *m = DestroySnapshotRes{} }
|
|||||||
func (m *DestroySnapshotRes) String() string { return proto.CompactTextString(m) }
|
func (m *DestroySnapshotRes) String() string { return proto.CompactTextString(m) }
|
||||||
func (*DestroySnapshotRes) ProtoMessage() {}
|
func (*DestroySnapshotRes) ProtoMessage() {}
|
||||||
func (*DestroySnapshotRes) Descriptor() ([]byte, []int) {
|
func (*DestroySnapshotRes) Descriptor() ([]byte, []int) {
|
||||||
return fileDescriptor_pdu_89315d819a6e0938, []int{12}
|
return fileDescriptor_pdu_83b7e2a28d820622, []int{12}
|
||||||
}
|
}
|
||||||
func (m *DestroySnapshotRes) XXX_Unmarshal(b []byte) error {
|
func (m *DestroySnapshotRes) XXX_Unmarshal(b []byte) error {
|
||||||
return xxx_messageInfo_DestroySnapshotRes.Unmarshal(m, b)
|
return xxx_messageInfo_DestroySnapshotRes.Unmarshal(m, b)
|
||||||
@ -692,7 +692,7 @@ func (m *DestroySnapshotsRes) Reset() { *m = DestroySnapshotsRes{} }
|
|||||||
func (m *DestroySnapshotsRes) String() string { return proto.CompactTextString(m) }
|
func (m *DestroySnapshotsRes) String() string { return proto.CompactTextString(m) }
|
||||||
func (*DestroySnapshotsRes) ProtoMessage() {}
|
func (*DestroySnapshotsRes) ProtoMessage() {}
|
||||||
func (*DestroySnapshotsRes) Descriptor() ([]byte, []int) {
|
func (*DestroySnapshotsRes) Descriptor() ([]byte, []int) {
|
||||||
return fileDescriptor_pdu_89315d819a6e0938, []int{13}
|
return fileDescriptor_pdu_83b7e2a28d820622, []int{13}
|
||||||
}
|
}
|
||||||
func (m *DestroySnapshotsRes) XXX_Unmarshal(b []byte) error {
|
func (m *DestroySnapshotsRes) XXX_Unmarshal(b []byte) error {
|
||||||
return xxx_messageInfo_DestroySnapshotsRes.Unmarshal(m, b)
|
return xxx_messageInfo_DestroySnapshotsRes.Unmarshal(m, b)
|
||||||
@ -734,7 +734,7 @@ func (m *ReplicationCursorReq) Reset() { *m = ReplicationCursorReq{} }
|
|||||||
func (m *ReplicationCursorReq) String() string { return proto.CompactTextString(m) }
|
func (m *ReplicationCursorReq) String() string { return proto.CompactTextString(m) }
|
||||||
func (*ReplicationCursorReq) ProtoMessage() {}
|
func (*ReplicationCursorReq) ProtoMessage() {}
|
||||||
func (*ReplicationCursorReq) Descriptor() ([]byte, []int) {
|
func (*ReplicationCursorReq) Descriptor() ([]byte, []int) {
|
||||||
return fileDescriptor_pdu_89315d819a6e0938, []int{14}
|
return fileDescriptor_pdu_83b7e2a28d820622, []int{14}
|
||||||
}
|
}
|
||||||
func (m *ReplicationCursorReq) XXX_Unmarshal(b []byte) error {
|
func (m *ReplicationCursorReq) XXX_Unmarshal(b []byte) error {
|
||||||
return xxx_messageInfo_ReplicationCursorReq.Unmarshal(m, b)
|
return xxx_messageInfo_ReplicationCursorReq.Unmarshal(m, b)
|
||||||
@ -882,7 +882,7 @@ func (m *ReplicationCursorReq_GetOp) Reset() { *m = ReplicationCursorReq
|
|||||||
func (m *ReplicationCursorReq_GetOp) String() string { return proto.CompactTextString(m) }
|
func (m *ReplicationCursorReq_GetOp) String() string { return proto.CompactTextString(m) }
|
||||||
func (*ReplicationCursorReq_GetOp) ProtoMessage() {}
|
func (*ReplicationCursorReq_GetOp) ProtoMessage() {}
|
||||||
func (*ReplicationCursorReq_GetOp) Descriptor() ([]byte, []int) {
|
func (*ReplicationCursorReq_GetOp) Descriptor() ([]byte, []int) {
|
||||||
return fileDescriptor_pdu_89315d819a6e0938, []int{14, 0}
|
return fileDescriptor_pdu_83b7e2a28d820622, []int{14, 0}
|
||||||
}
|
}
|
||||||
func (m *ReplicationCursorReq_GetOp) XXX_Unmarshal(b []byte) error {
|
func (m *ReplicationCursorReq_GetOp) XXX_Unmarshal(b []byte) error {
|
||||||
return xxx_messageInfo_ReplicationCursorReq_GetOp.Unmarshal(m, b)
|
return xxx_messageInfo_ReplicationCursorReq_GetOp.Unmarshal(m, b)
|
||||||
@ -913,7 +913,7 @@ func (m *ReplicationCursorReq_SetOp) Reset() { *m = ReplicationCursorReq
|
|||||||
func (m *ReplicationCursorReq_SetOp) String() string { return proto.CompactTextString(m) }
|
func (m *ReplicationCursorReq_SetOp) String() string { return proto.CompactTextString(m) }
|
||||||
func (*ReplicationCursorReq_SetOp) ProtoMessage() {}
|
func (*ReplicationCursorReq_SetOp) ProtoMessage() {}
|
||||||
func (*ReplicationCursorReq_SetOp) Descriptor() ([]byte, []int) {
|
func (*ReplicationCursorReq_SetOp) Descriptor() ([]byte, []int) {
|
||||||
return fileDescriptor_pdu_89315d819a6e0938, []int{14, 1}
|
return fileDescriptor_pdu_83b7e2a28d820622, []int{14, 1}
|
||||||
}
|
}
|
||||||
func (m *ReplicationCursorReq_SetOp) XXX_Unmarshal(b []byte) error {
|
func (m *ReplicationCursorReq_SetOp) XXX_Unmarshal(b []byte) error {
|
||||||
return xxx_messageInfo_ReplicationCursorReq_SetOp.Unmarshal(m, b)
|
return xxx_messageInfo_ReplicationCursorReq_SetOp.Unmarshal(m, b)
|
||||||
@ -954,7 +954,7 @@ func (m *ReplicationCursorRes) Reset() { *m = ReplicationCursorRes{} }
|
|||||||
func (m *ReplicationCursorRes) String() string { return proto.CompactTextString(m) }
|
func (m *ReplicationCursorRes) String() string { return proto.CompactTextString(m) }
|
||||||
func (*ReplicationCursorRes) ProtoMessage() {}
|
func (*ReplicationCursorRes) ProtoMessage() {}
|
||||||
func (*ReplicationCursorRes) Descriptor() ([]byte, []int) {
|
func (*ReplicationCursorRes) Descriptor() ([]byte, []int) {
|
||||||
return fileDescriptor_pdu_89315d819a6e0938, []int{15}
|
return fileDescriptor_pdu_83b7e2a28d820622, []int{15}
|
||||||
}
|
}
|
||||||
func (m *ReplicationCursorRes) XXX_Unmarshal(b []byte) error {
|
func (m *ReplicationCursorRes) XXX_Unmarshal(b []byte) error {
|
||||||
return xxx_messageInfo_ReplicationCursorRes.Unmarshal(m, b)
|
return xxx_messageInfo_ReplicationCursorRes.Unmarshal(m, b)
|
||||||
@ -1079,6 +1079,83 @@ func _ReplicationCursorRes_OneofSizer(msg proto.Message) (n int) {
|
|||||||
return n
|
return n
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type PingReq struct {
|
||||||
|
Message string `protobuf:"bytes,1,opt,name=Message,proto3" json:"Message,omitempty"`
|
||||||
|
XXX_NoUnkeyedLiteral struct{} `json:"-"`
|
||||||
|
XXX_unrecognized []byte `json:"-"`
|
||||||
|
XXX_sizecache int32 `json:"-"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *PingReq) Reset() { *m = PingReq{} }
|
||||||
|
func (m *PingReq) String() string { return proto.CompactTextString(m) }
|
||||||
|
func (*PingReq) ProtoMessage() {}
|
||||||
|
func (*PingReq) Descriptor() ([]byte, []int) {
|
||||||
|
return fileDescriptor_pdu_83b7e2a28d820622, []int{16}
|
||||||
|
}
|
||||||
|
func (m *PingReq) XXX_Unmarshal(b []byte) error {
|
||||||
|
return xxx_messageInfo_PingReq.Unmarshal(m, b)
|
||||||
|
}
|
||||||
|
func (m *PingReq) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
|
||||||
|
return xxx_messageInfo_PingReq.Marshal(b, m, deterministic)
|
||||||
|
}
|
||||||
|
func (dst *PingReq) XXX_Merge(src proto.Message) {
|
||||||
|
xxx_messageInfo_PingReq.Merge(dst, src)
|
||||||
|
}
|
||||||
|
func (m *PingReq) XXX_Size() int {
|
||||||
|
return xxx_messageInfo_PingReq.Size(m)
|
||||||
|
}
|
||||||
|
func (m *PingReq) XXX_DiscardUnknown() {
|
||||||
|
xxx_messageInfo_PingReq.DiscardUnknown(m)
|
||||||
|
}
|
||||||
|
|
||||||
|
var xxx_messageInfo_PingReq proto.InternalMessageInfo
|
||||||
|
|
||||||
|
func (m *PingReq) GetMessage() string {
|
||||||
|
if m != nil {
|
||||||
|
return m.Message
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
type PingRes struct {
|
||||||
|
// Echo must be PingReq.Message
|
||||||
|
Echo string `protobuf:"bytes,1,opt,name=Echo,proto3" json:"Echo,omitempty"`
|
||||||
|
XXX_NoUnkeyedLiteral struct{} `json:"-"`
|
||||||
|
XXX_unrecognized []byte `json:"-"`
|
||||||
|
XXX_sizecache int32 `json:"-"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *PingRes) Reset() { *m = PingRes{} }
|
||||||
|
func (m *PingRes) String() string { return proto.CompactTextString(m) }
|
||||||
|
func (*PingRes) ProtoMessage() {}
|
||||||
|
func (*PingRes) Descriptor() ([]byte, []int) {
|
||||||
|
return fileDescriptor_pdu_83b7e2a28d820622, []int{17}
|
||||||
|
}
|
||||||
|
func (m *PingRes) XXX_Unmarshal(b []byte) error {
|
||||||
|
return xxx_messageInfo_PingRes.Unmarshal(m, b)
|
||||||
|
}
|
||||||
|
func (m *PingRes) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
|
||||||
|
return xxx_messageInfo_PingRes.Marshal(b, m, deterministic)
|
||||||
|
}
|
||||||
|
func (dst *PingRes) XXX_Merge(src proto.Message) {
|
||||||
|
xxx_messageInfo_PingRes.Merge(dst, src)
|
||||||
|
}
|
||||||
|
func (m *PingRes) XXX_Size() int {
|
||||||
|
return xxx_messageInfo_PingRes.Size(m)
|
||||||
|
}
|
||||||
|
func (m *PingRes) XXX_DiscardUnknown() {
|
||||||
|
xxx_messageInfo_PingRes.DiscardUnknown(m)
|
||||||
|
}
|
||||||
|
|
||||||
|
var xxx_messageInfo_PingRes proto.InternalMessageInfo
|
||||||
|
|
||||||
|
func (m *PingRes) GetEcho() string {
|
||||||
|
if m != nil {
|
||||||
|
return m.Echo
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
proto.RegisterType((*ListFilesystemReq)(nil), "ListFilesystemReq")
|
proto.RegisterType((*ListFilesystemReq)(nil), "ListFilesystemReq")
|
||||||
proto.RegisterType((*ListFilesystemRes)(nil), "ListFilesystemRes")
|
proto.RegisterType((*ListFilesystemRes)(nil), "ListFilesystemRes")
|
||||||
@ -1098,6 +1175,8 @@ func init() {
|
|||||||
proto.RegisterType((*ReplicationCursorReq_GetOp)(nil), "ReplicationCursorReq.GetOp")
|
proto.RegisterType((*ReplicationCursorReq_GetOp)(nil), "ReplicationCursorReq.GetOp")
|
||||||
proto.RegisterType((*ReplicationCursorReq_SetOp)(nil), "ReplicationCursorReq.SetOp")
|
proto.RegisterType((*ReplicationCursorReq_SetOp)(nil), "ReplicationCursorReq.SetOp")
|
||||||
proto.RegisterType((*ReplicationCursorRes)(nil), "ReplicationCursorRes")
|
proto.RegisterType((*ReplicationCursorRes)(nil), "ReplicationCursorRes")
|
||||||
|
proto.RegisterType((*PingReq)(nil), "PingReq")
|
||||||
|
proto.RegisterType((*PingRes)(nil), "PingRes")
|
||||||
proto.RegisterEnum("FilesystemVersion_VersionType", FilesystemVersion_VersionType_name, FilesystemVersion_VersionType_value)
|
proto.RegisterEnum("FilesystemVersion_VersionType", FilesystemVersion_VersionType_name, FilesystemVersion_VersionType_value)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1113,6 +1192,7 @@ const _ = grpc.SupportPackageIsVersion4
|
|||||||
//
|
//
|
||||||
// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://godoc.org/google.golang.org/grpc#ClientConn.NewStream.
|
// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://godoc.org/google.golang.org/grpc#ClientConn.NewStream.
|
||||||
type ReplicationClient interface {
|
type ReplicationClient interface {
|
||||||
|
Ping(ctx context.Context, in *PingReq, opts ...grpc.CallOption) (*PingRes, error)
|
||||||
ListFilesystems(ctx context.Context, in *ListFilesystemReq, opts ...grpc.CallOption) (*ListFilesystemRes, error)
|
ListFilesystems(ctx context.Context, in *ListFilesystemReq, opts ...grpc.CallOption) (*ListFilesystemRes, error)
|
||||||
ListFilesystemVersions(ctx context.Context, in *ListFilesystemVersionsReq, opts ...grpc.CallOption) (*ListFilesystemVersionsRes, error)
|
ListFilesystemVersions(ctx context.Context, in *ListFilesystemVersionsReq, opts ...grpc.CallOption) (*ListFilesystemVersionsRes, error)
|
||||||
DestroySnapshots(ctx context.Context, in *DestroySnapshotsReq, opts ...grpc.CallOption) (*DestroySnapshotsRes, error)
|
DestroySnapshots(ctx context.Context, in *DestroySnapshotsReq, opts ...grpc.CallOption) (*DestroySnapshotsRes, error)
|
||||||
@ -1127,6 +1207,15 @@ func NewReplicationClient(cc *grpc.ClientConn) ReplicationClient {
|
|||||||
return &replicationClient{cc}
|
return &replicationClient{cc}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (c *replicationClient) Ping(ctx context.Context, in *PingReq, opts ...grpc.CallOption) (*PingRes, error) {
|
||||||
|
out := new(PingRes)
|
||||||
|
err := c.cc.Invoke(ctx, "/Replication/Ping", in, out, opts...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
func (c *replicationClient) ListFilesystems(ctx context.Context, in *ListFilesystemReq, opts ...grpc.CallOption) (*ListFilesystemRes, error) {
|
func (c *replicationClient) ListFilesystems(ctx context.Context, in *ListFilesystemReq, opts ...grpc.CallOption) (*ListFilesystemRes, error) {
|
||||||
out := new(ListFilesystemRes)
|
out := new(ListFilesystemRes)
|
||||||
err := c.cc.Invoke(ctx, "/Replication/ListFilesystems", in, out, opts...)
|
err := c.cc.Invoke(ctx, "/Replication/ListFilesystems", in, out, opts...)
|
||||||
@ -1165,6 +1254,7 @@ func (c *replicationClient) ReplicationCursor(ctx context.Context, in *Replicati
|
|||||||
|
|
||||||
// ReplicationServer is the server API for Replication service.
|
// ReplicationServer is the server API for Replication service.
|
||||||
type ReplicationServer interface {
|
type ReplicationServer interface {
|
||||||
|
Ping(context.Context, *PingReq) (*PingRes, error)
|
||||||
ListFilesystems(context.Context, *ListFilesystemReq) (*ListFilesystemRes, error)
|
ListFilesystems(context.Context, *ListFilesystemReq) (*ListFilesystemRes, error)
|
||||||
ListFilesystemVersions(context.Context, *ListFilesystemVersionsReq) (*ListFilesystemVersionsRes, error)
|
ListFilesystemVersions(context.Context, *ListFilesystemVersionsReq) (*ListFilesystemVersionsRes, error)
|
||||||
DestroySnapshots(context.Context, *DestroySnapshotsReq) (*DestroySnapshotsRes, error)
|
DestroySnapshots(context.Context, *DestroySnapshotsReq) (*DestroySnapshotsRes, error)
|
||||||
@ -1175,6 +1265,24 @@ func RegisterReplicationServer(s *grpc.Server, srv ReplicationServer) {
|
|||||||
s.RegisterService(&_Replication_serviceDesc, srv)
|
s.RegisterService(&_Replication_serviceDesc, srv)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func _Replication_Ping_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
||||||
|
in := new(PingReq)
|
||||||
|
if err := dec(in); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if interceptor == nil {
|
||||||
|
return srv.(ReplicationServer).Ping(ctx, in)
|
||||||
|
}
|
||||||
|
info := &grpc.UnaryServerInfo{
|
||||||
|
Server: srv,
|
||||||
|
FullMethod: "/Replication/Ping",
|
||||||
|
}
|
||||||
|
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
|
||||||
|
return srv.(ReplicationServer).Ping(ctx, req.(*PingReq))
|
||||||
|
}
|
||||||
|
return interceptor(ctx, in, info, handler)
|
||||||
|
}
|
||||||
|
|
||||||
func _Replication_ListFilesystems_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
func _Replication_ListFilesystems_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
||||||
in := new(ListFilesystemReq)
|
in := new(ListFilesystemReq)
|
||||||
if err := dec(in); err != nil {
|
if err := dec(in); err != nil {
|
||||||
@ -1251,6 +1359,10 @@ var _Replication_serviceDesc = grpc.ServiceDesc{
|
|||||||
ServiceName: "Replication",
|
ServiceName: "Replication",
|
||||||
HandlerType: (*ReplicationServer)(nil),
|
HandlerType: (*ReplicationServer)(nil),
|
||||||
Methods: []grpc.MethodDesc{
|
Methods: []grpc.MethodDesc{
|
||||||
|
{
|
||||||
|
MethodName: "Ping",
|
||||||
|
Handler: _Replication_Ping_Handler,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
MethodName: "ListFilesystems",
|
MethodName: "ListFilesystems",
|
||||||
Handler: _Replication_ListFilesystems_Handler,
|
Handler: _Replication_ListFilesystems_Handler,
|
||||||
@ -1272,54 +1384,58 @@ var _Replication_serviceDesc = grpc.ServiceDesc{
|
|||||||
Metadata: "pdu.proto",
|
Metadata: "pdu.proto",
|
||||||
}
|
}
|
||||||
|
|
||||||
func init() { proto.RegisterFile("pdu.proto", fileDescriptor_pdu_89315d819a6e0938) }
|
func init() { proto.RegisterFile("pdu.proto", fileDescriptor_pdu_83b7e2a28d820622) }
|
||||||
|
|
||||||
var fileDescriptor_pdu_89315d819a6e0938 = []byte{
|
var fileDescriptor_pdu_83b7e2a28d820622 = []byte{
|
||||||
// 735 bytes of a gzipped FileDescriptorProto
|
// 785 bytes of a gzipped FileDescriptorProto
|
||||||
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x8c, 0x55, 0xdd, 0x6e, 0xda, 0x4a,
|
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x8c, 0x55, 0xd1, 0x8e, 0xe3, 0x34,
|
||||||
0x10, 0xc6, 0x60, 0xc0, 0x0c, 0x51, 0x42, 0x36, 0x9c, 0xc8, 0xc7, 0xe7, 0x28, 0x42, 0xdb, 0x1b,
|
0x14, 0x9d, 0xb4, 0x69, 0x9b, 0xde, 0x0e, 0xbb, 0x1d, 0x4f, 0x59, 0x85, 0x00, 0xab, 0xca, 0xcb,
|
||||||
0x52, 0xa9, 0x6e, 0x45, 0x7b, 0x53, 0x55, 0xaa, 0x54, 0x42, 0x7e, 0xa4, 0x56, 0x69, 0xb4, 0xd0,
|
0x43, 0x17, 0x89, 0x80, 0x0a, 0x2f, 0x08, 0x09, 0x89, 0x4e, 0x67, 0x67, 0x10, 0xb0, 0x54, 0x6e,
|
||||||
0x28, 0xca, 0x1d, 0x0d, 0xa3, 0xc4, 0x0a, 0xb0, 0xce, 0xee, 0xba, 0x0a, 0xbd, 0xec, 0x7b, 0xf4,
|
0x59, 0xad, 0xf6, 0x2d, 0x34, 0x57, 0x6d, 0x34, 0x6d, 0x9d, 0xb5, 0x13, 0xb4, 0xe5, 0x91, 0xbf,
|
||||||
0x41, 0xfa, 0x0e, 0xbd, 0xec, 0x03, 0x55, 0xbb, 0x60, 0xe3, 0x60, 0x23, 0x71, 0xe5, 0xfd, 0xbe,
|
0x9a, 0x7f, 0xe0, 0x91, 0x0f, 0x42, 0x76, 0xe3, 0x34, 0x6d, 0x52, 0xa9, 0x4f, 0xf1, 0x39, 0xf7,
|
||||||
0x9d, 0x9d, 0x9d, 0xf9, 0x76, 0x66, 0x0c, 0xb5, 0x70, 0x14, 0xf9, 0xa1, 0xe0, 0x8a, 0xd3, 0x3d,
|
0xda, 0x3e, 0xf7, 0xd8, 0xd7, 0x81, 0x76, 0x1c, 0xa6, 0x7e, 0x2c, 0x78, 0xc2, 0xe9, 0x35, 0x5c,
|
||||||
0xd8, 0xfd, 0x14, 0x48, 0x75, 0x12, 0x8c, 0x51, 0xce, 0xa4, 0xc2, 0x09, 0xc3, 0x07, 0x7a, 0x95,
|
0xfd, 0x1a, 0xc9, 0xe4, 0x55, 0xb4, 0x42, 0xb9, 0x95, 0x09, 0xae, 0x19, 0xbe, 0xa7, 0xa3, 0x32,
|
||||||
0x25, 0x25, 0x79, 0x01, 0xf5, 0x25, 0x21, 0x5d, 0xab, 0x55, 0x6a, 0xd7, 0x3b, 0x75, 0x3f, 0x65,
|
0x29, 0xc9, 0x57, 0xd0, 0xd9, 0x13, 0xd2, 0xb5, 0xfa, 0xf5, 0x41, 0x67, 0xd8, 0xf1, 0x0b, 0x49,
|
||||||
0x94, 0xde, 0x27, 0x4d, 0x28, 0x1f, 0x4f, 0x42, 0x35, 0x73, 0x8b, 0x2d, 0xab, 0xed, 0xb0, 0x39,
|
0xc5, 0x38, 0x5d, 0x02, 0xec, 0x21, 0x21, 0x60, 0x4f, 0x82, 0x64, 0xe9, 0x5a, 0x7d, 0x6b, 0xd0,
|
||||||
0xa0, 0x5d, 0x80, 0xa5, 0x11, 0x21, 0x60, 0x5f, 0x0c, 0xd5, 0x9d, 0x6b, 0xb5, 0xac, 0x76, 0x8d,
|
0x66, 0x7a, 0x4c, 0xfa, 0xd0, 0x61, 0x28, 0xd3, 0x35, 0xce, 0xf8, 0x03, 0x6e, 0xdc, 0x9a, 0x0e,
|
||||||
0x99, 0x35, 0x69, 0x41, 0x9d, 0xa1, 0x8c, 0x26, 0x38, 0xe0, 0xf7, 0x38, 0x35, 0xa7, 0x6b, 0x2c,
|
0x15, 0x29, 0xf2, 0x05, 0x7c, 0xf4, 0xb3, 0x9c, 0xac, 0x82, 0x39, 0x2e, 0xf9, 0x2a, 0x44, 0xe1,
|
||||||
0x4d, 0xd1, 0x77, 0xf0, 0xef, 0xd3, 0xe8, 0x2e, 0x51, 0xc8, 0x80, 0x4f, 0x25, 0xc3, 0x07, 0x72,
|
0xd6, 0xfb, 0xd6, 0xc0, 0x61, 0x87, 0x24, 0xfd, 0x01, 0x3e, 0x39, 0x54, 0xfb, 0x06, 0x85, 0x8c,
|
||||||
0x90, 0xbe, 0x60, 0xe1, 0x38, 0xc5, 0xd0, 0x8f, 0xeb, 0x0f, 0x4b, 0xe2, 0x83, 0x13, 0xc3, 0x45,
|
0xf8, 0x46, 0x32, 0x7c, 0x4f, 0x9e, 0x17, 0x65, 0x64, 0xdb, 0x17, 0x18, 0xfa, 0xcb, 0xe9, 0xc9,
|
||||||
0x7e, 0xc4, 0xcf, 0x58, 0xb2, 0xc4, 0x86, 0xfe, 0xb1, 0x60, 0x37, 0xb3, 0x4f, 0x3a, 0x60, 0x0f,
|
0x92, 0xf8, 0xe0, 0x18, 0x98, 0xd5, 0x4b, 0xfc, 0x52, 0x26, 0xcb, 0x73, 0xe8, 0x7f, 0x16, 0x5c,
|
||||||
0x66, 0x21, 0x9a, 0xcb, 0xb7, 0x3b, 0x07, 0x59, 0x0f, 0xfe, 0xe2, 0xab, 0xad, 0x98, 0xb1, 0xd5,
|
0x95, 0xe2, 0x64, 0x08, 0xf6, 0x6c, 0x1b, 0xa3, 0xde, 0xfc, 0xc9, 0xf0, 0x79, 0x79, 0x05, 0x3f,
|
||||||
0x4a, 0x9c, 0x0f, 0x27, 0xb8, 0x48, 0xd7, 0xac, 0x35, 0x77, 0x1a, 0x05, 0x23, 0xb7, 0xd4, 0xb2,
|
0xfb, 0xaa, 0x2c, 0xa6, 0x73, 0x95, 0x5f, 0xaf, 0x83, 0x35, 0x66, 0xa6, 0xe8, 0xb1, 0xe2, 0xee,
|
||||||
0xda, 0x36, 0x33, 0x6b, 0xf2, 0x3f, 0xd4, 0x8e, 0x04, 0x0e, 0x15, 0x0e, 0xae, 0x4e, 0x5d, 0xdb,
|
0xd2, 0x28, 0xd4, 0x26, 0xd8, 0x4c, 0x8f, 0xc9, 0x67, 0xd0, 0xbe, 0x11, 0x18, 0x24, 0x38, 0x7b,
|
||||||
0x6c, 0x2c, 0x09, 0xe2, 0x81, 0x63, 0x40, 0xc0, 0xa7, 0x6e, 0xd9, 0x78, 0x4a, 0x30, 0x3d, 0x84,
|
0x7b, 0xe7, 0xda, 0x3a, 0xb0, 0x27, 0x88, 0x07, 0x8e, 0x06, 0x11, 0xdf, 0xb8, 0x0d, 0xbd, 0x52,
|
||||||
0x7a, 0xea, 0x5a, 0xb2, 0x05, 0x4e, 0x7f, 0x3a, 0x0c, 0xe5, 0x1d, 0x57, 0x8d, 0x82, 0x46, 0x5d,
|
0x8e, 0xe9, 0x4b, 0xe8, 0x14, 0xb6, 0x25, 0x97, 0xe0, 0x4c, 0x37, 0x41, 0x2c, 0x97, 0x3c, 0xe9,
|
||||||
0xce, 0xef, 0x27, 0x43, 0x71, 0xdf, 0xb0, 0xe8, 0x2f, 0x0b, 0xaa, 0x7d, 0x9c, 0x8e, 0x36, 0xd0,
|
0x5e, 0x28, 0x34, 0xe2, 0xfc, 0x61, 0x1d, 0x88, 0x87, 0xae, 0x45, 0x1f, 0x2d, 0x68, 0x4d, 0x71,
|
||||||
0x53, 0x07, 0x79, 0x22, 0xf8, 0x24, 0x0e, 0x5c, 0xaf, 0xc9, 0x36, 0x14, 0x07, 0xdc, 0x84, 0x5d,
|
0x13, 0x9e, 0xe1, 0xa7, 0x12, 0xf9, 0x4a, 0xf0, 0xb5, 0x11, 0xae, 0xc6, 0xe4, 0x09, 0xd4, 0x66,
|
||||||
0x63, 0xc5, 0x01, 0x5f, 0x7d, 0x52, 0x3b, 0xf3, 0xa4, 0x26, 0x70, 0x3e, 0x09, 0x05, 0x4a, 0x69,
|
0x5c, 0xcb, 0x6e, 0xb3, 0xda, 0x8c, 0x1f, 0x1f, 0xbc, 0x5d, 0x3e, 0x78, 0x25, 0x9c, 0xaf, 0x63,
|
||||||
0x02, 0x77, 0x58, 0x82, 0x75, 0x21, 0xf5, 0x70, 0x14, 0x85, 0x6e, 0x65, 0x5e, 0x48, 0x06, 0x90,
|
0x81, 0x52, 0x6a, 0xe1, 0x0e, 0xcb, 0x31, 0xe9, 0x41, 0x63, 0x8c, 0x61, 0x1a, 0xbb, 0x4d, 0x1d,
|
||||||
0x7d, 0xa8, 0xf4, 0xc4, 0x8c, 0x45, 0x53, 0xb7, 0x6a, 0xe8, 0x05, 0xa2, 0x6f, 0xc0, 0xb9, 0x10,
|
0xd8, 0x01, 0xf2, 0x0c, 0x9a, 0x63, 0xb1, 0x65, 0xe9, 0xc6, 0x6d, 0x69, 0x3a, 0x43, 0xf4, 0x3b,
|
||||||
0x3c, 0x44, 0xa1, 0x66, 0x89, 0xa8, 0x56, 0x4a, 0xd4, 0x26, 0x94, 0x2f, 0x87, 0xe3, 0x28, 0x56,
|
0x70, 0x26, 0x82, 0xc7, 0x28, 0x92, 0x6d, 0x6e, 0xaa, 0x55, 0x30, 0xb5, 0x07, 0x8d, 0x37, 0xc1,
|
||||||
0x7a, 0x0e, 0xe8, 0x8f, 0x24, 0x63, 0x49, 0xda, 0xb0, 0xf3, 0x45, 0xe2, 0x68, 0xb5, 0x08, 0x1d,
|
0x2a, 0x35, 0x4e, 0xef, 0x00, 0xfd, 0x27, 0xaf, 0x58, 0x92, 0x01, 0x3c, 0xfd, 0x43, 0x62, 0x78,
|
||||||
0xb6, 0x4a, 0x13, 0x0a, 0x5b, 0xc7, 0x8f, 0x21, 0xde, 0x28, 0x1c, 0xf5, 0x83, 0xef, 0x68, 0x32,
|
0x7c, 0x55, 0x1d, 0x76, 0x4c, 0x13, 0x0a, 0x97, 0xb7, 0x1f, 0x62, 0x9c, 0x27, 0x18, 0x4e, 0xa3,
|
||||||
0x2e, 0xb1, 0x27, 0x1c, 0x39, 0x04, 0x58, 0xc4, 0x13, 0xa0, 0x74, 0x6d, 0x53, 0x54, 0x35, 0x3f,
|
0xbf, 0x51, 0x57, 0x5c, 0x67, 0x07, 0x1c, 0x79, 0x09, 0x90, 0xe9, 0x89, 0x50, 0xba, 0xb6, 0xbe,
|
||||||
0x0e, 0x91, 0xa5, 0x36, 0xe9, 0x15, 0x00, 0xc3, 0x1b, 0x0c, 0xbe, 0xe1, 0x26, 0xc2, 0x3f, 0x87,
|
0x54, 0x6d, 0xdf, 0x48, 0x64, 0x85, 0x20, 0x7d, 0x0b, 0xc0, 0x70, 0x8e, 0xd1, 0x5f, 0x78, 0x8e,
|
||||||
0xc6, 0xd1, 0x18, 0x87, 0x22, 0x1b, 0x67, 0x86, 0xa7, 0x5b, 0x29, 0xcf, 0x92, 0xde, 0xc2, 0x5e,
|
0xf1, 0x5f, 0x42, 0xf7, 0x66, 0x85, 0x81, 0x28, 0xeb, 0x2c, 0xf1, 0xf4, 0xb2, 0xb0, 0xb2, 0xa4,
|
||||||
0x0f, 0xa5, 0x12, 0x7c, 0x16, 0x57, 0xc0, 0x26, 0x9d, 0x43, 0x5e, 0x41, 0x2d, 0xb1, 0x77, 0x8b,
|
0x0b, 0xb8, 0x1e, 0xa3, 0x4c, 0x04, 0xdf, 0x9a, 0x1b, 0x70, 0x4e, 0xe7, 0x90, 0x6f, 0xa0, 0x9d,
|
||||||
0x6b, 0xbb, 0x63, 0x69, 0x44, 0xaf, 0x81, 0xac, 0x5c, 0xb4, 0x68, 0xb2, 0x18, 0x9a, 0x5b, 0xd6,
|
0xe7, 0xbb, 0xb5, 0x93, 0xdd, 0xb1, 0x4f, 0xa2, 0xef, 0x80, 0x1c, 0x6d, 0x94, 0x35, 0x99, 0x81,
|
||||||
0x34, 0x59, 0x6c, 0x63, 0x06, 0x89, 0x10, 0x5c, 0xc4, 0x2f, 0x66, 0x00, 0xed, 0xe5, 0x25, 0xa1,
|
0x7a, 0x97, 0x13, 0x4d, 0x66, 0x72, 0xd4, 0x89, 0xdd, 0x0a, 0xc1, 0x85, 0x39, 0x31, 0x0d, 0xe8,
|
||||||
0x87, 0x54, 0x55, 0x27, 0x3e, 0x56, 0x71, 0x03, 0xef, 0xf9, 0xd9, 0x10, 0x58, 0x6c, 0x43, 0x7f,
|
0xb8, 0xaa, 0x08, 0xf5, 0x68, 0xb5, 0x54, 0xe1, 0xab, 0xc4, 0x34, 0xf0, 0xb5, 0x5f, 0x96, 0xc0,
|
||||||
0x5b, 0xd0, 0x64, 0x18, 0x8e, 0x83, 0x1b, 0xd3, 0x24, 0x47, 0x91, 0x90, 0x5c, 0x6c, 0x22, 0xc6,
|
0x4c, 0x0e, 0xfd, 0xd7, 0x82, 0x1e, 0xc3, 0x78, 0x15, 0xcd, 0x75, 0x93, 0xdc, 0xa4, 0x42, 0x72,
|
||||||
0x4b, 0x28, 0xdd, 0xa2, 0x32, 0x21, 0xd5, 0x3b, 0xff, 0xf9, 0x79, 0x3e, 0xfc, 0x53, 0x54, 0x9f,
|
0x71, 0x8e, 0x19, 0x5f, 0x43, 0x7d, 0x81, 0x89, 0x96, 0xd4, 0x19, 0x7e, 0xea, 0x57, 0xad, 0xe1,
|
||||||
0xc3, 0xb3, 0x02, 0xd3, 0x96, 0xfa, 0x80, 0x44, 0x65, 0x4a, 0x64, 0xed, 0x81, 0x7e, 0x7c, 0x40,
|
0xdf, 0x61, 0xf2, 0x7b, 0x7c, 0x7f, 0xc1, 0x54, 0xa6, 0x9a, 0x20, 0x31, 0xd1, 0x57, 0xe4, 0xe4,
|
||||||
0xa2, 0xf2, 0xaa, 0x50, 0x36, 0x0e, 0xbc, 0x67, 0x50, 0x36, 0x1b, 0xba, 0x49, 0x12, 0xe1, 0xe6,
|
0x84, 0xa9, 0x99, 0x20, 0x31, 0xf1, 0x5a, 0xd0, 0xd0, 0x0b, 0x78, 0x2f, 0xa0, 0xa1, 0x03, 0xaa,
|
||||||
0x5a, 0x24, 0xb8, 0x6b, 0x43, 0x91, 0x87, 0x74, 0x90, 0x9b, 0x8d, 0x6e, 0xa1, 0xf9, 0x24, 0xd1,
|
0x49, 0x72, 0xe3, 0x76, 0x5e, 0xe4, 0x78, 0x64, 0x43, 0x8d, 0xc7, 0x74, 0x56, 0x59, 0x8d, 0x6a,
|
||||||
0x79, 0xd8, 0x67, 0x85, 0x64, 0x96, 0x38, 0xe7, 0x5c, 0xe1, 0x63, 0x20, 0xe7, 0xfe, 0x9c, 0xb3,
|
0xa1, 0xdd, 0x4b, 0xa2, 0xea, 0xb0, 0xef, 0x2f, 0xf2, 0xb7, 0xc4, 0x79, 0xcd, 0x13, 0xfc, 0x10,
|
||||||
0x02, 0x4b, 0x98, 0xae, 0x03, 0x95, 0xb9, 0x4a, 0x9d, 0x9f, 0x45, 0xdd, 0xbf, 0x89, 0x5b, 0xf2,
|
0xc9, 0xdd, 0x7a, 0xce, 0xfd, 0x05, 0xcb, 0x99, 0x91, 0x03, 0xcd, 0x9d, 0x4b, 0xf4, 0x05, 0xb4,
|
||||||
0x16, 0x76, 0x9e, 0x8e, 0x50, 0x49, 0x88, 0x9f, 0xf9, 0x89, 0x78, 0x59, 0x4e, 0x92, 0x0b, 0xd8,
|
0x26, 0xd1, 0x66, 0xa1, 0x6c, 0x71, 0xa1, 0xf5, 0x1b, 0x4a, 0x19, 0x2c, 0x4c, 0x53, 0x19, 0x48,
|
||||||
0xcf, 0x9f, 0xbe, 0xc4, 0xf3, 0xd7, 0xce, 0x74, 0x6f, 0xfd, 0x9e, 0x24, 0xef, 0xa1, 0xb1, 0x5a,
|
0x3f, 0x37, 0x49, 0x52, 0xb5, 0xdd, 0xed, 0x7c, 0xc9, 0x4d, 0xdb, 0xa9, 0xf1, 0xf0, 0xb1, 0xa6,
|
||||||
0x07, 0xa4, 0xe9, 0xe7, 0xd4, 0xb7, 0x97, 0xc7, 0x4a, 0xf2, 0x01, 0x76, 0x33, 0x92, 0x91, 0x7f,
|
0xde, 0x80, 0x5c, 0x1a, 0xf1, 0xc0, 0x56, 0xe9, 0xc4, 0xf1, 0xb3, 0xa5, 0x3d, 0x33, 0x92, 0xe4,
|
||||||
0x72, 0xdf, 0xc7, 0xcb, 0xa5, 0x65, 0xb7, 0x7c, 0x5d, 0x0a, 0x47, 0xd1, 0xd7, 0x8a, 0xf9, 0xa1,
|
0x7b, 0x78, 0x7a, 0xf8, 0x44, 0x4b, 0x42, 0xfc, 0xd2, 0x4f, 0xcb, 0x2b, 0x73, 0x92, 0x4c, 0xe0,
|
||||||
0xbe, 0xfe, 0x1b, 0x00, 0x00, 0xff, 0xff, 0xa3, 0xba, 0x8e, 0x63, 0x5d, 0x07, 0x00, 0x00,
|
0x59, 0xf5, 0xeb, 0x4e, 0x3c, 0xff, 0xe4, 0x3f, 0xc3, 0x3b, 0x1d, 0x93, 0xe4, 0x47, 0xe8, 0x1e,
|
||||||
|
0xdf, 0x33, 0xd2, 0xf3, 0x2b, 0xfa, 0xc7, 0xab, 0x62, 0x25, 0xf9, 0x09, 0xae, 0x4a, 0x47, 0x42,
|
||||||
|
0x3e, 0xae, 0x3c, 0x7f, 0xaf, 0x92, 0x96, 0xa3, 0xc6, 0xbb, 0x7a, 0x1c, 0xa6, 0x7f, 0x36, 0xf5,
|
||||||
|
0x0f, 0xfc, 0xdb, 0xff, 0x03, 0x00, 0x00, 0xff, 0xff, 0x37, 0x0e, 0xf2, 0xe4, 0xcd, 0x07, 0x00,
|
||||||
|
0x00,
|
||||||
}
|
}
|
@ -2,6 +2,7 @@ syntax = "proto3";
|
|||||||
option go_package = "pdu";
|
option go_package = "pdu";
|
||||||
|
|
||||||
service Replication {
|
service Replication {
|
||||||
|
rpc Ping (PingReq) returns (PingRes);
|
||||||
rpc ListFilesystems (ListFilesystemReq) returns (ListFilesystemRes);
|
rpc ListFilesystems (ListFilesystemReq) returns (ListFilesystemRes);
|
||||||
rpc ListFilesystemVersions (ListFilesystemVersionsReq) returns (ListFilesystemVersionsRes);
|
rpc ListFilesystemVersions (ListFilesystemVersionsReq) returns (ListFilesystemVersionsRes);
|
||||||
rpc DestroySnapshots (DestroySnapshotsReq) returns (DestroySnapshotsRes);
|
rpc DestroySnapshots (DestroySnapshotsReq) returns (DestroySnapshotsRes);
|
||||||
@ -13,12 +14,12 @@ message ListFilesystemReq {}
|
|||||||
|
|
||||||
message ListFilesystemRes {
|
message ListFilesystemRes {
|
||||||
repeated Filesystem Filesystems = 1;
|
repeated Filesystem Filesystems = 1;
|
||||||
bool Empty = 2;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
message Filesystem {
|
message Filesystem {
|
||||||
string Path = 1;
|
string Path = 1;
|
||||||
string ResumeToken = 2;
|
string ResumeToken = 2;
|
||||||
|
bool IsPlaceholder = 3;
|
||||||
}
|
}
|
||||||
|
|
||||||
message ListFilesystemVersionsReq {
|
message ListFilesystemVersionsReq {
|
||||||
@ -120,3 +121,12 @@ message ReplicationCursorRes {
|
|||||||
bool Notexist = 2;
|
bool Notexist = 2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
message PingReq {
|
||||||
|
string Message = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
message PingRes {
|
||||||
|
// Echo must be PingReq.Message
|
||||||
|
string Echo = 1;
|
||||||
|
}
|
495
replication/logic/replication_logic.go
Normal file
495
replication/logic/replication_logic.go
Normal file
@ -0,0 +1,495 @@
|
|||||||
|
package logic
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/prometheus/client_golang/prometheus"
|
||||||
|
|
||||||
|
"github.com/zrepl/zrepl/replication/driver"
|
||||||
|
. "github.com/zrepl/zrepl/replication/logic/diff"
|
||||||
|
"github.com/zrepl/zrepl/replication/logic/pdu"
|
||||||
|
"github.com/zrepl/zrepl/replication/report"
|
||||||
|
"github.com/zrepl/zrepl/util/bytecounter"
|
||||||
|
"github.com/zrepl/zrepl/zfs"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Endpoint represents one side of the replication.
|
||||||
|
//
|
||||||
|
// An endpoint is either in Sender or Receiver mode, represented by the correspondingly
|
||||||
|
// named interfaces defined in this package.
|
||||||
|
type Endpoint interface {
|
||||||
|
// Does not include placeholder filesystems
|
||||||
|
ListFilesystems(ctx context.Context, req *pdu.ListFilesystemReq) (*pdu.ListFilesystemRes, error)
|
||||||
|
ListFilesystemVersions(ctx context.Context, req *pdu.ListFilesystemVersionsReq) (*pdu.ListFilesystemVersionsRes, error)
|
||||||
|
DestroySnapshots(ctx context.Context, req *pdu.DestroySnapshotsReq) (*pdu.DestroySnapshotsRes, error)
|
||||||
|
WaitForConnectivity(ctx context.Context) (error)
|
||||||
|
}
|
||||||
|
|
||||||
|
type Sender interface {
|
||||||
|
Endpoint
|
||||||
|
// If a non-nil io.ReadCloser is returned, it is guaranteed to be closed before
|
||||||
|
// any next call to the parent github.com/zrepl/zrepl/replication.Endpoint.
|
||||||
|
// If the send request is for dry run the io.ReadCloser will be nil
|
||||||
|
Send(ctx context.Context, r *pdu.SendReq) (*pdu.SendRes, zfs.StreamCopier, error)
|
||||||
|
ReplicationCursor(ctx context.Context, req *pdu.ReplicationCursorReq) (*pdu.ReplicationCursorRes, error)
|
||||||
|
}
|
||||||
|
|
||||||
|
type Receiver interface {
|
||||||
|
Endpoint
|
||||||
|
// Receive sends r and sendStream (the latter containing a ZFS send stream)
|
||||||
|
// to the parent github.com/zrepl/zrepl/replication.Endpoint.
|
||||||
|
Receive(ctx context.Context, req *pdu.ReceiveReq, receive zfs.StreamCopier) (*pdu.ReceiveRes, error)
|
||||||
|
}
|
||||||
|
|
||||||
|
type Planner struct {
|
||||||
|
sender Sender
|
||||||
|
receiver Receiver
|
||||||
|
|
||||||
|
promSecsPerState *prometheus.HistogramVec // labels: state
|
||||||
|
promBytesReplicated *prometheus.CounterVec // labels: filesystem
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *Planner) Plan(ctx context.Context) ([]driver.FS, error) {
|
||||||
|
fss, err := p.doPlanning(ctx)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
dfss := make([]driver.FS, len(fss))
|
||||||
|
for i := range dfss {
|
||||||
|
dfss[i] = fss[i]
|
||||||
|
}
|
||||||
|
return dfss, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *Planner) WaitForConnectivity(ctx context.Context) error {
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
doPing := func(endpoint Endpoint, errOut *error) {
|
||||||
|
defer wg.Done()
|
||||||
|
err := endpoint.WaitForConnectivity(ctx)
|
||||||
|
if err != nil {
|
||||||
|
*errOut = err
|
||||||
|
} else {
|
||||||
|
*errOut = nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
wg.Add(2)
|
||||||
|
var senderErr, receiverErr error
|
||||||
|
go doPing(p.sender, &senderErr)
|
||||||
|
go doPing(p.receiver, &receiverErr)
|
||||||
|
wg.Wait()
|
||||||
|
if senderErr == nil && receiverErr == nil {
|
||||||
|
return nil
|
||||||
|
} else if senderErr != nil && receiverErr != nil {
|
||||||
|
if senderErr.Error() == receiverErr.Error() {
|
||||||
|
return fmt.Errorf("sender and receiver are not reachable: %s", senderErr.Error())
|
||||||
|
} else {
|
||||||
|
return fmt.Errorf("sender and receiver are not reachable:\n sender: %s\n receiver: %s", senderErr, receiverErr)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
var side string
|
||||||
|
var err *error
|
||||||
|
if senderErr != nil {
|
||||||
|
side = "sender"
|
||||||
|
err = &senderErr
|
||||||
|
} else {
|
||||||
|
side = "receiver"
|
||||||
|
err = &receiverErr
|
||||||
|
}
|
||||||
|
return fmt.Errorf("%s is not reachable: %s", side, *err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type Filesystem struct {
|
||||||
|
sender Sender
|
||||||
|
receiver Receiver
|
||||||
|
|
||||||
|
Path string // compat
|
||||||
|
receiverFS *pdu.Filesystem
|
||||||
|
promBytesReplicated prometheus.Counter // compat
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *Filesystem) EqualToPreviousAttempt(other driver.FS) bool {
|
||||||
|
g, ok := other.(*Filesystem)
|
||||||
|
if !ok {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
// TODO: use GUIDs (issued by zrepl, not those from ZFS)
|
||||||
|
return f.Path == g.Path
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *Filesystem) PlanFS(ctx context.Context) ([]driver.Step, error) {
|
||||||
|
steps, err := f.doPlanning(ctx)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
dsteps := make([]driver.Step, len(steps))
|
||||||
|
for i := range dsteps {
|
||||||
|
dsteps[i] = steps[i]
|
||||||
|
}
|
||||||
|
return dsteps, nil
|
||||||
|
}
|
||||||
|
func (f *Filesystem) ReportInfo() *report.FilesystemInfo {
|
||||||
|
return &report.FilesystemInfo{Name: f.Path} // FIXME compat name
|
||||||
|
}
|
||||||
|
|
||||||
|
type Step struct {
|
||||||
|
sender Sender
|
||||||
|
receiver Receiver
|
||||||
|
|
||||||
|
parent *Filesystem
|
||||||
|
from, to *pdu.FilesystemVersion // compat
|
||||||
|
|
||||||
|
byteCounter bytecounter.StreamCopier
|
||||||
|
expectedSize int64 // 0 means no size estimate present / possible
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Step) TargetEquals(other driver.Step) bool {
|
||||||
|
t, ok := other.(*Step)
|
||||||
|
if !ok {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if !s.parent.EqualToPreviousAttempt(t.parent) {
|
||||||
|
panic("Step interface promise broken: parent filesystems must be same")
|
||||||
|
}
|
||||||
|
return s.from.GetGuid() == t.from.GetGuid() &&
|
||||||
|
s.to.GetGuid() == t.to.GetGuid()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Step) TargetDate() time.Time {
|
||||||
|
return s.to.SnapshotTime() // FIXME compat name
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Step) Step(ctx context.Context) error {
|
||||||
|
return s.doReplication(ctx)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Step) ReportInfo() *report.StepInfo {
|
||||||
|
var byteCounter int64
|
||||||
|
if s.byteCounter != nil {
|
||||||
|
byteCounter = s.byteCounter.Count()
|
||||||
|
}
|
||||||
|
// FIXME stick to zfs convention of from and to
|
||||||
|
from := ""
|
||||||
|
if s.from != nil {
|
||||||
|
from = s.from.RelName()
|
||||||
|
}
|
||||||
|
return &report.StepInfo{
|
||||||
|
From: from,
|
||||||
|
To: s.to.RelName(),
|
||||||
|
BytesExpected: s.expectedSize,
|
||||||
|
BytesReplicated: byteCounter,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewPlanner(secsPerState *prometheus.HistogramVec, bytesReplicated *prometheus.CounterVec, sender Sender, receiver Receiver) *Planner {
|
||||||
|
return &Planner{
|
||||||
|
sender: sender,
|
||||||
|
receiver: receiver,
|
||||||
|
promSecsPerState: secsPerState,
|
||||||
|
promBytesReplicated: bytesReplicated,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
func resolveConflict(conflict error) (path []*pdu.FilesystemVersion, msg string) {
|
||||||
|
if noCommonAncestor, ok := conflict.(*ConflictNoCommonAncestor); ok {
|
||||||
|
if len(noCommonAncestor.SortedReceiverVersions) == 0 {
|
||||||
|
// TODO this is hard-coded replication policy: most recent snapshot as source
|
||||||
|
var mostRecentSnap *pdu.FilesystemVersion
|
||||||
|
for n := len(noCommonAncestor.SortedSenderVersions) - 1; n >= 0; n-- {
|
||||||
|
if noCommonAncestor.SortedSenderVersions[n].Type == pdu.FilesystemVersion_Snapshot {
|
||||||
|
mostRecentSnap = noCommonAncestor.SortedSenderVersions[n]
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if mostRecentSnap == nil {
|
||||||
|
return nil, "no snapshots available on sender side"
|
||||||
|
}
|
||||||
|
return []*pdu.FilesystemVersion{mostRecentSnap}, fmt.Sprintf("start replication at most recent snapshot %s", mostRecentSnap.RelName())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil, "no automated way to handle conflict type"
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *Planner) doPlanning(ctx context.Context) ([]*Filesystem, error) {
|
||||||
|
|
||||||
|
log := getLogger(ctx)
|
||||||
|
|
||||||
|
log.Info("start planning")
|
||||||
|
|
||||||
|
slfssres, err := p.sender.ListFilesystems(ctx, &pdu.ListFilesystemReq{})
|
||||||
|
if err != nil {
|
||||||
|
log.WithError(err).WithField("errType", fmt.Sprintf("%T", err)).Error("error listing sender filesystems")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
sfss := slfssres.GetFilesystems()
|
||||||
|
// no progress here since we could run in a live-lock on connectivity issues
|
||||||
|
|
||||||
|
rlfssres, err := p.receiver.ListFilesystems(ctx, &pdu.ListFilesystemReq{})
|
||||||
|
if err != nil {
|
||||||
|
log.WithError(err).WithField("errType", fmt.Sprintf("%T", err)).Error("error listing receiver filesystems")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
rfss := rlfssres.GetFilesystems()
|
||||||
|
|
||||||
|
q := make([]*Filesystem, 0, len(sfss))
|
||||||
|
for _, fs := range sfss {
|
||||||
|
|
||||||
|
var receiverFS *pdu.Filesystem
|
||||||
|
for _, rfs := range rfss {
|
||||||
|
if rfs.Path == fs.Path {
|
||||||
|
receiverFS = rfs
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ctr := p.promBytesReplicated.WithLabelValues(fs.Path)
|
||||||
|
|
||||||
|
q = append(q, &Filesystem{
|
||||||
|
sender: p.sender,
|
||||||
|
receiver: p.receiver,
|
||||||
|
Path: fs.Path,
|
||||||
|
receiverFS: receiverFS,
|
||||||
|
promBytesReplicated: ctr,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
return q, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (fs *Filesystem) doPlanning(ctx context.Context) ([]*Step, error) {
|
||||||
|
|
||||||
|
log := getLogger(ctx).WithField("filesystem", fs.Path)
|
||||||
|
|
||||||
|
log.Debug("assessing filesystem")
|
||||||
|
|
||||||
|
sfsvsres, err := fs.sender.ListFilesystemVersions(ctx, &pdu.ListFilesystemVersionsReq{Filesystem: fs.Path})
|
||||||
|
if err != nil {
|
||||||
|
log.WithError(err).Error("cannot get remote filesystem versions")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
sfsvs := sfsvsres.GetVersions()
|
||||||
|
|
||||||
|
if len(sfsvs) < 1 {
|
||||||
|
err := errors.New("sender does not have any versions")
|
||||||
|
log.Error(err.Error())
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var rfsvs []*pdu.FilesystemVersion
|
||||||
|
if fs.receiverFS != nil && !fs.receiverFS.GetIsPlaceholder() {
|
||||||
|
rfsvsres, err := fs.receiver.ListFilesystemVersions(ctx, &pdu.ListFilesystemVersionsReq{Filesystem: fs.Path})
|
||||||
|
if err != nil {
|
||||||
|
log.WithError(err).Error("receiver error")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
rfsvs = rfsvsres.GetVersions()
|
||||||
|
} else {
|
||||||
|
rfsvs = []*pdu.FilesystemVersion{}
|
||||||
|
}
|
||||||
|
|
||||||
|
path, conflict := IncrementalPath(rfsvs, sfsvs)
|
||||||
|
if conflict != nil {
|
||||||
|
var msg string
|
||||||
|
path, msg = resolveConflict(conflict) // no shadowing allowed!
|
||||||
|
if path != nil {
|
||||||
|
log.WithField("conflict", conflict).Info("conflict")
|
||||||
|
log.WithField("resolution", msg).Info("automatically resolved")
|
||||||
|
} else {
|
||||||
|
log.WithField("conflict", conflict).Error("conflict")
|
||||||
|
log.WithField("problem", msg).Error("cannot resolve conflict")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(path) == 0 {
|
||||||
|
return nil, conflict
|
||||||
|
}
|
||||||
|
|
||||||
|
steps := make([]*Step, 0, len(path))
|
||||||
|
// FIXME unify struct declarations => initializer?
|
||||||
|
if len(path) == 1 {
|
||||||
|
steps = append(steps, &Step{
|
||||||
|
parent: fs,
|
||||||
|
sender: fs.sender,
|
||||||
|
receiver: fs.receiver,
|
||||||
|
from: nil,
|
||||||
|
to: path[0],
|
||||||
|
})
|
||||||
|
} else {
|
||||||
|
for i := 0; i < len(path)-1; i++ {
|
||||||
|
steps = append(steps, &Step{
|
||||||
|
parent: fs,
|
||||||
|
sender: fs.sender,
|
||||||
|
receiver: fs.receiver,
|
||||||
|
from: path[i],
|
||||||
|
to: path[i+1],
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Debug("compute send size estimate")
|
||||||
|
errs := make(chan error, len(steps))
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
fanOutCtx, fanOutCancel := context.WithCancel(ctx)
|
||||||
|
defer fanOutCancel()
|
||||||
|
for _, step := range steps {
|
||||||
|
wg.Add(1)
|
||||||
|
go func(step *Step) {
|
||||||
|
defer wg.Done()
|
||||||
|
err := step.updateSizeEstimate(fanOutCtx)
|
||||||
|
if err != nil {
|
||||||
|
log.WithError(err).WithField("step", step).Error("error computing size estimate")
|
||||||
|
fanOutCancel()
|
||||||
|
}
|
||||||
|
errs <- err
|
||||||
|
}(step)
|
||||||
|
}
|
||||||
|
wg.Wait()
|
||||||
|
close(errs)
|
||||||
|
var significantErr error = nil
|
||||||
|
for err := range errs {
|
||||||
|
if err != nil {
|
||||||
|
if significantErr == nil || significantErr == context.Canceled {
|
||||||
|
significantErr = err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if significantErr != nil {
|
||||||
|
return nil, significantErr
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Debug("filesystem planning finished")
|
||||||
|
return steps, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// type FilesystemsReplicationFailedError struct {
|
||||||
|
// FilesystemsWithError []*fsrep.Replication
|
||||||
|
// }
|
||||||
|
|
||||||
|
// func (e FilesystemsReplicationFailedError) Error() string {
|
||||||
|
// allSame := true
|
||||||
|
// lastErr := e.FilesystemsWithError[0].Err().Error()
|
||||||
|
// for _, fs := range e.FilesystemsWithError {
|
||||||
|
// fsErr := fs.Err().Error()
|
||||||
|
// allSame = allSame && lastErr == fsErr
|
||||||
|
// }
|
||||||
|
|
||||||
|
// fsstr := "multiple filesystems"
|
||||||
|
// if len(e.FilesystemsWithError) == 1 {
|
||||||
|
// fsstr = fmt.Sprintf("filesystem %s", e.FilesystemsWithError[0].FS())
|
||||||
|
// }
|
||||||
|
// errorStr := lastErr
|
||||||
|
// if !allSame {
|
||||||
|
// errorStr = "multiple different errors"
|
||||||
|
// }
|
||||||
|
// return fmt.Sprintf("%s could not be replicated: %s", fsstr, errorStr)
|
||||||
|
// }
|
||||||
|
|
||||||
|
func (s *Step) updateSizeEstimate(ctx context.Context) error {
|
||||||
|
|
||||||
|
log := getLogger(ctx)
|
||||||
|
|
||||||
|
sr := s.buildSendRequest(true)
|
||||||
|
|
||||||
|
log.Debug("initiate dry run send request")
|
||||||
|
sres, _, err := s.sender.Send(ctx, sr)
|
||||||
|
if err != nil {
|
||||||
|
log.WithError(err).Error("dry run send request failed")
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
s.expectedSize = sres.ExpectedSize
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Step) buildSendRequest(dryRun bool) (sr *pdu.SendReq) {
|
||||||
|
fs := s.parent.Path
|
||||||
|
if s.from == nil {
|
||||||
|
sr = &pdu.SendReq{
|
||||||
|
Filesystem: fs,
|
||||||
|
To: s.to.RelName(),
|
||||||
|
DryRun: dryRun,
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
sr = &pdu.SendReq{
|
||||||
|
Filesystem: fs,
|
||||||
|
From: s.from.RelName(),
|
||||||
|
To: s.to.RelName(),
|
||||||
|
DryRun: dryRun,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return sr
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Step) doReplication(ctx context.Context) error {
|
||||||
|
|
||||||
|
fs := s.parent.Path
|
||||||
|
|
||||||
|
log := getLogger(ctx)
|
||||||
|
sr := s.buildSendRequest(false)
|
||||||
|
|
||||||
|
log.Debug("initiate send request")
|
||||||
|
sres, sstreamCopier, err := s.sender.Send(ctx, sr)
|
||||||
|
if err != nil {
|
||||||
|
log.WithError(err).Error("send request failed")
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if sstreamCopier == nil {
|
||||||
|
err := errors.New("send request did not return a stream, broken endpoint implementation")
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer sstreamCopier.Close()
|
||||||
|
|
||||||
|
// Install a byte counter to track progress + for status report
|
||||||
|
s.byteCounter = bytecounter.NewStreamCopier(sstreamCopier)
|
||||||
|
defer func() {
|
||||||
|
s.parent.promBytesReplicated.Add(float64(s.byteCounter.Count()))
|
||||||
|
}()
|
||||||
|
|
||||||
|
rr := &pdu.ReceiveReq{
|
||||||
|
Filesystem: fs,
|
||||||
|
ClearResumeToken: !sres.UsedResumeToken,
|
||||||
|
}
|
||||||
|
log.Debug("initiate receive request")
|
||||||
|
_, err = s.receiver.Receive(ctx, rr, s.byteCounter)
|
||||||
|
if err != nil {
|
||||||
|
log.
|
||||||
|
WithError(err).
|
||||||
|
WithField("errType", fmt.Sprintf("%T", err)).
|
||||||
|
Error("receive request failed (might also be error on sender)")
|
||||||
|
// This failure could be due to
|
||||||
|
// - an unexpected exit of ZFS on the sending side
|
||||||
|
// - an unexpected exit of ZFS on the receiving side
|
||||||
|
// - a connectivity issue
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
log.Debug("receive finished")
|
||||||
|
|
||||||
|
log.Debug("advance replication cursor")
|
||||||
|
req := &pdu.ReplicationCursorReq{
|
||||||
|
Filesystem: fs,
|
||||||
|
Op: &pdu.ReplicationCursorReq_Set{
|
||||||
|
Set: &pdu.ReplicationCursorReq_SetOp{
|
||||||
|
Snapshot: s.to.GetName(),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
_, err = s.sender.ReplicationCursor(ctx, req)
|
||||||
|
if err != nil {
|
||||||
|
log.WithError(err).Error("error advancing replication cursor")
|
||||||
|
// If this fails and replication planning restarts, the diff algorithm will find
|
||||||
|
// that cursor out of place. This is not a problem because then, it would just use another FS
|
||||||
|
// However, we FIXME have no means to just update the cursor in a
|
||||||
|
// second replication attempt right after this one where we don't have new snaps yet
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Step) String() string {
|
||||||
|
if s.from == nil { // FIXME: ZFS semantics are that to is nil on non-incremental send
|
||||||
|
return fmt.Sprintf("%s%s (full)", s.parent.Path, s.to.RelName())
|
||||||
|
} else {
|
||||||
|
return fmt.Sprintf("%s(%s => %s)", s.parent.Path, s.from.RelName(), s.to.RelName())
|
||||||
|
}
|
||||||
|
}
|
@ -1,9 +1,9 @@
|
|||||||
package replication
|
package logic
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
|
||||||
"github.com/zrepl/zrepl/logger"
|
"github.com/zrepl/zrepl/logger"
|
||||||
"github.com/zrepl/zrepl/replication/fsrep"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type contextKey int
|
type contextKey int
|
||||||
@ -16,7 +16,6 @@ type Logger = logger.Logger
|
|||||||
|
|
||||||
func WithLogger(ctx context.Context, l Logger) context.Context {
|
func WithLogger(ctx context.Context, l Logger) context.Context {
|
||||||
ctx = context.WithValue(ctx, contextKeyLog, l)
|
ctx = context.WithValue(ctx, contextKeyLog, l)
|
||||||
ctx = fsrep.WithLogger(ctx, l)
|
|
||||||
return ctx
|
return ctx
|
||||||
}
|
}
|
||||||
|
|
@ -1,560 +0,0 @@
|
|||||||
// Package replication implements replication of filesystems with existing
|
|
||||||
// versions (snapshots) from a sender to a receiver.
|
|
||||||
package replication
|
|
||||||
|
|
||||||
import (
|
|
||||||
"context"
|
|
||||||
"errors"
|
|
||||||
"fmt"
|
|
||||||
"github.com/prometheus/client_golang/prometheus"
|
|
||||||
"github.com/zrepl/zrepl/daemon/job/wakeup"
|
|
||||||
"github.com/zrepl/zrepl/util/envconst"
|
|
||||||
"github.com/zrepl/zrepl/util/watchdog"
|
|
||||||
"math/bits"
|
|
||||||
"net"
|
|
||||||
"sort"
|
|
||||||
"sync"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/zrepl/zrepl/replication/fsrep"
|
|
||||||
. "github.com/zrepl/zrepl/replication/internal/diff"
|
|
||||||
"github.com/zrepl/zrepl/replication/pdu"
|
|
||||||
)
|
|
||||||
|
|
||||||
//go:generate enumer -type=State
|
|
||||||
type State uint
|
|
||||||
|
|
||||||
const (
|
|
||||||
Planning State = 1 << iota
|
|
||||||
PlanningError
|
|
||||||
Working
|
|
||||||
WorkingWait
|
|
||||||
Completed
|
|
||||||
PermanentError
|
|
||||||
)
|
|
||||||
|
|
||||||
func (s State) rsf() state {
|
|
||||||
idx := bits.TrailingZeros(uint(s))
|
|
||||||
if idx == bits.UintSize {
|
|
||||||
panic(s) // invalid value
|
|
||||||
}
|
|
||||||
m := []state{
|
|
||||||
statePlanning,
|
|
||||||
statePlanningError,
|
|
||||||
stateWorking,
|
|
||||||
stateWorkingWait,
|
|
||||||
nil,
|
|
||||||
nil,
|
|
||||||
}
|
|
||||||
return m[idx]
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s State) IsTerminal() bool {
|
|
||||||
return s.rsf() == nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Replication implements the replication of multiple file systems from a Sender to a Receiver.
|
|
||||||
//
|
|
||||||
// It is a state machine that is driven by the Drive method
|
|
||||||
// and provides asynchronous reporting via the Report method (i.e. from another goroutine).
|
|
||||||
type Replication struct {
|
|
||||||
// not protected by lock
|
|
||||||
promSecsPerState *prometheus.HistogramVec // labels: state
|
|
||||||
promBytesReplicated *prometheus.CounterVec // labels: filesystem
|
|
||||||
|
|
||||||
Progress watchdog.KeepAlive
|
|
||||||
|
|
||||||
// lock protects all fields of this struct (but not the fields behind pointers!)
|
|
||||||
lock sync.Mutex
|
|
||||||
|
|
||||||
state State
|
|
||||||
|
|
||||||
// Working, WorkingWait, Completed, ContextDone
|
|
||||||
queue []*fsrep.Replication
|
|
||||||
completed []*fsrep.Replication
|
|
||||||
active *fsrep.Replication // == queue[0] or nil, unlike in Report
|
|
||||||
|
|
||||||
// for PlanningError, WorkingWait and ContextError and Completed
|
|
||||||
err error
|
|
||||||
|
|
||||||
// PlanningError, WorkingWait
|
|
||||||
sleepUntil time.Time
|
|
||||||
}
|
|
||||||
|
|
||||||
type Report struct {
|
|
||||||
Status string
|
|
||||||
Problem string
|
|
||||||
SleepUntil time.Time
|
|
||||||
Completed []*fsrep.Report
|
|
||||||
Pending []*fsrep.Report
|
|
||||||
Active *fsrep.Report // not contained in Pending, unlike in struct Replication
|
|
||||||
}
|
|
||||||
|
|
||||||
func NewReplication(secsPerState *prometheus.HistogramVec, bytesReplicated *prometheus.CounterVec) *Replication {
|
|
||||||
r := Replication{
|
|
||||||
promSecsPerState: secsPerState,
|
|
||||||
promBytesReplicated: bytesReplicated,
|
|
||||||
state: Planning,
|
|
||||||
}
|
|
||||||
return &r
|
|
||||||
}
|
|
||||||
|
|
||||||
// Endpoint represents one side of the replication.
|
|
||||||
//
|
|
||||||
// An endpoint is either in Sender or Receiver mode, represented by the correspondingly
|
|
||||||
// named interfaces defined in this package.
|
|
||||||
type Endpoint interface {
|
|
||||||
// Does not include placeholder filesystems
|
|
||||||
ListFilesystems(ctx context.Context, req *pdu.ListFilesystemReq) (*pdu.ListFilesystemRes, error)
|
|
||||||
ListFilesystemVersions(ctx context.Context, req *pdu.ListFilesystemVersionsReq) (*pdu.ListFilesystemVersionsRes, error)
|
|
||||||
DestroySnapshots(ctx context.Context, req *pdu.DestroySnapshotsReq) (*pdu.DestroySnapshotsRes, error)
|
|
||||||
}
|
|
||||||
|
|
||||||
type Sender interface {
|
|
||||||
Endpoint
|
|
||||||
fsrep.Sender
|
|
||||||
}
|
|
||||||
|
|
||||||
type Receiver interface {
|
|
||||||
Endpoint
|
|
||||||
fsrep.Receiver
|
|
||||||
}
|
|
||||||
|
|
||||||
type FilteredError struct{ fs string }
|
|
||||||
|
|
||||||
func NewFilteredError(fs string) *FilteredError {
|
|
||||||
return &FilteredError{fs}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (f FilteredError) Error() string { return "endpoint does not allow access to filesystem " + f.fs }
|
|
||||||
|
|
||||||
type updater func(func(*Replication)) (newState State)
|
|
||||||
type state func(ctx context.Context, ka *watchdog.KeepAlive, sender Sender, receiver Receiver, u updater) state
|
|
||||||
|
|
||||||
// Drive starts the state machine and returns only after replication has finished (with or without errors).
|
|
||||||
// The Logger in ctx is used for both debug and error logging, but is not guaranteed to be stable
|
|
||||||
// or end-user friendly.
|
|
||||||
// User-facing replication progress reports and can be obtained using the Report method,
|
|
||||||
// whose output will not change after Drive returns.
|
|
||||||
//
|
|
||||||
// FIXME: Drive may be only called once per instance of Replication
|
|
||||||
func (r *Replication) Drive(ctx context.Context, sender Sender, receiver Receiver) {
|
|
||||||
|
|
||||||
var u updater = func(f func(*Replication)) State {
|
|
||||||
r.lock.Lock()
|
|
||||||
defer r.lock.Unlock()
|
|
||||||
if f != nil {
|
|
||||||
f(r)
|
|
||||||
}
|
|
||||||
return r.state
|
|
||||||
}
|
|
||||||
|
|
||||||
var s state = statePlanning
|
|
||||||
var pre, post State
|
|
||||||
for s != nil {
|
|
||||||
preTime := time.Now()
|
|
||||||
pre = u(nil)
|
|
||||||
s = s(ctx, &r.Progress, sender, receiver, u)
|
|
||||||
delta := time.Now().Sub(preTime)
|
|
||||||
r.promSecsPerState.WithLabelValues(pre.String()).Observe(delta.Seconds())
|
|
||||||
post = u(nil)
|
|
||||||
getLogger(ctx).
|
|
||||||
WithField("transition", fmt.Sprintf("%s => %s", pre, post)).
|
|
||||||
WithField("duration", delta).
|
|
||||||
Debug("main state transition")
|
|
||||||
if post == Working && pre != post {
|
|
||||||
getLogger(ctx).Info("start working")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
getLogger(ctx).
|
|
||||||
WithField("final_state", post).
|
|
||||||
Debug("main final state")
|
|
||||||
}
|
|
||||||
|
|
||||||
func resolveConflict(conflict error) (path []*pdu.FilesystemVersion, msg string) {
|
|
||||||
if noCommonAncestor, ok := conflict.(*ConflictNoCommonAncestor); ok {
|
|
||||||
if len(noCommonAncestor.SortedReceiverVersions) == 0 {
|
|
||||||
// TODO this is hard-coded replication policy: most recent snapshot as source
|
|
||||||
var mostRecentSnap *pdu.FilesystemVersion
|
|
||||||
for n := len(noCommonAncestor.SortedSenderVersions) - 1; n >= 0; n-- {
|
|
||||||
if noCommonAncestor.SortedSenderVersions[n].Type == pdu.FilesystemVersion_Snapshot {
|
|
||||||
mostRecentSnap = noCommonAncestor.SortedSenderVersions[n]
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if mostRecentSnap == nil {
|
|
||||||
return nil, "no snapshots available on sender side"
|
|
||||||
}
|
|
||||||
return []*pdu.FilesystemVersion{mostRecentSnap}, fmt.Sprintf("start replication at most recent snapshot %s", mostRecentSnap.RelName())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil, "no automated way to handle conflict type"
|
|
||||||
}
|
|
||||||
|
|
||||||
var RetryInterval = envconst.Duration("ZREPL_REPLICATION_RETRY_INTERVAL", 10 * time.Second)
|
|
||||||
|
|
||||||
type Error interface {
|
|
||||||
error
|
|
||||||
Temporary() bool
|
|
||||||
}
|
|
||||||
|
|
||||||
var _ Error = fsrep.Error(nil)
|
|
||||||
var _ Error = net.Error(nil)
|
|
||||||
|
|
||||||
func isPermanent(err error) bool {
|
|
||||||
if e, ok := err.(Error); ok {
|
|
||||||
return !e.Temporary()
|
|
||||||
}
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
func statePlanning(ctx context.Context, ka *watchdog.KeepAlive, sender Sender, receiver Receiver, u updater) state {
|
|
||||||
|
|
||||||
log := getLogger(ctx)
|
|
||||||
|
|
||||||
log.Info("start planning")
|
|
||||||
|
|
||||||
handlePlanningError := func(err error) state {
|
|
||||||
return u(func(r *Replication) {
|
|
||||||
ge := GlobalError{Err: err, Temporary: !isPermanent(err)}
|
|
||||||
log.WithError(ge).Error("encountered global error while planning replication")
|
|
||||||
r.err = ge
|
|
||||||
if !ge.Temporary {
|
|
||||||
r.state = PermanentError
|
|
||||||
} else {
|
|
||||||
r.sleepUntil = time.Now().Add(RetryInterval)
|
|
||||||
r.state = PlanningError
|
|
||||||
}
|
|
||||||
}).rsf()
|
|
||||||
}
|
|
||||||
|
|
||||||
slfssres, err := sender.ListFilesystems(ctx, &pdu.ListFilesystemReq{})
|
|
||||||
if err != nil {
|
|
||||||
log.WithError(err).WithField("errType", fmt.Sprintf("%T", err)).Error("error listing sender filesystems")
|
|
||||||
return handlePlanningError(err)
|
|
||||||
}
|
|
||||||
sfss := slfssres.GetFilesystems()
|
|
||||||
// no progress here since we could run in a live-lock on connectivity issues
|
|
||||||
|
|
||||||
rlfssres, err := receiver.ListFilesystems(ctx, &pdu.ListFilesystemReq{})
|
|
||||||
if err != nil {
|
|
||||||
log.WithError(err).WithField("errType", fmt.Sprintf("%T", err)).Error("error listing receiver filesystems")
|
|
||||||
return handlePlanningError(err)
|
|
||||||
}
|
|
||||||
rfss := rlfssres.GetFilesystems()
|
|
||||||
ka.MadeProgress() // for both sender and receiver
|
|
||||||
|
|
||||||
q := make([]*fsrep.Replication, 0, len(sfss))
|
|
||||||
mainlog := log
|
|
||||||
for _, fs := range sfss {
|
|
||||||
|
|
||||||
log := mainlog.WithField("filesystem", fs.Path)
|
|
||||||
|
|
||||||
log.Debug("assessing filesystem")
|
|
||||||
|
|
||||||
sfsvsres, err := sender.ListFilesystemVersions(ctx, &pdu.ListFilesystemVersionsReq{Filesystem: fs.Path})
|
|
||||||
if err != nil {
|
|
||||||
log.WithError(err).Error("cannot get remote filesystem versions")
|
|
||||||
return handlePlanningError(err)
|
|
||||||
}
|
|
||||||
sfsvs := sfsvsres.GetVersions()
|
|
||||||
ka.MadeProgress()
|
|
||||||
|
|
||||||
if len(sfsvs) < 1 {
|
|
||||||
err := errors.New("sender does not have any versions")
|
|
||||||
log.Error(err.Error())
|
|
||||||
q = append(q, fsrep.NewReplicationConflictError(fs.Path, err))
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
receiverFSExists := false
|
|
||||||
for _, rfs := range rfss {
|
|
||||||
if rfs.Path == fs.Path {
|
|
||||||
receiverFSExists = true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
var rfsvs []*pdu.FilesystemVersion
|
|
||||||
if receiverFSExists {
|
|
||||||
rfsvsres, err := receiver.ListFilesystemVersions(ctx, &pdu.ListFilesystemVersionsReq{Filesystem: fs.Path})
|
|
||||||
if err != nil {
|
|
||||||
if _, ok := err.(*FilteredError); ok {
|
|
||||||
log.Info("receiver ignores filesystem")
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
log.WithError(err).Error("receiver error")
|
|
||||||
return handlePlanningError(err)
|
|
||||||
}
|
|
||||||
rfsvs = rfsvsres.GetVersions()
|
|
||||||
} else {
|
|
||||||
rfsvs = []*pdu.FilesystemVersion{}
|
|
||||||
}
|
|
||||||
ka.MadeProgress()
|
|
||||||
|
|
||||||
path, conflict := IncrementalPath(rfsvs, sfsvs)
|
|
||||||
if conflict != nil {
|
|
||||||
var msg string
|
|
||||||
path, msg = resolveConflict(conflict) // no shadowing allowed!
|
|
||||||
if path != nil {
|
|
||||||
log.WithField("conflict", conflict).Info("conflict")
|
|
||||||
log.WithField("resolution", msg).Info("automatically resolved")
|
|
||||||
} else {
|
|
||||||
log.WithField("conflict", conflict).Error("conflict")
|
|
||||||
log.WithField("problem", msg).Error("cannot resolve conflict")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
ka.MadeProgress()
|
|
||||||
if path == nil {
|
|
||||||
q = append(q, fsrep.NewReplicationConflictError(fs.Path, conflict))
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
var promBytesReplicated *prometheus.CounterVec
|
|
||||||
u(func(replication *Replication) { // FIXME args struct like in pruner (also use for sender and receiver)
|
|
||||||
promBytesReplicated = replication.promBytesReplicated
|
|
||||||
})
|
|
||||||
fsrfsm := fsrep.BuildReplication(fs.Path, promBytesReplicated.WithLabelValues(fs.Path))
|
|
||||||
if len(path) == 1 {
|
|
||||||
fsrfsm.AddStep(nil, path[0])
|
|
||||||
} else {
|
|
||||||
for i := 0; i < len(path)-1; i++ {
|
|
||||||
fsrfsm.AddStep(path[i], path[i+1])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
qitem := fsrfsm.Done()
|
|
||||||
ka.MadeProgress()
|
|
||||||
|
|
||||||
log.Debug("compute send size estimate")
|
|
||||||
if err = qitem.UpdateSizeEsitmate(ctx, sender); err != nil {
|
|
||||||
log.WithError(err).Error("error computing size estimate")
|
|
||||||
return handlePlanningError(err)
|
|
||||||
}
|
|
||||||
ka.MadeProgress()
|
|
||||||
|
|
||||||
q = append(q, qitem)
|
|
||||||
}
|
|
||||||
|
|
||||||
ka.MadeProgress()
|
|
||||||
|
|
||||||
return u(func(r *Replication) {
|
|
||||||
r.completed = nil
|
|
||||||
r.queue = q
|
|
||||||
r.err = nil
|
|
||||||
r.state = Working
|
|
||||||
}).rsf()
|
|
||||||
}
|
|
||||||
|
|
||||||
func statePlanningError(ctx context.Context, ka *watchdog.KeepAlive, sender Sender, receiver Receiver, u updater) state {
|
|
||||||
var sleepUntil time.Time
|
|
||||||
u(func(r *Replication) {
|
|
||||||
sleepUntil = r.sleepUntil
|
|
||||||
})
|
|
||||||
t := time.NewTimer(sleepUntil.Sub(time.Now()))
|
|
||||||
getLogger(ctx).WithField("until", sleepUntil).Info("retry wait after planning error")
|
|
||||||
defer t.Stop()
|
|
||||||
select {
|
|
||||||
case <-ctx.Done():
|
|
||||||
return u(func(r *Replication) {
|
|
||||||
r.state = PermanentError
|
|
||||||
r.err = ctx.Err()
|
|
||||||
}).rsf()
|
|
||||||
case <-t.C:
|
|
||||||
case <-wakeup.Wait(ctx):
|
|
||||||
}
|
|
||||||
return u(func(r *Replication) {
|
|
||||||
r.state = Planning
|
|
||||||
}).rsf()
|
|
||||||
}
|
|
||||||
|
|
||||||
type GlobalError struct {
|
|
||||||
Err error
|
|
||||||
Temporary bool
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e GlobalError) Error() string {
|
|
||||||
errClass := "temporary"
|
|
||||||
if !e.Temporary {
|
|
||||||
errClass = "permanent"
|
|
||||||
}
|
|
||||||
return fmt.Sprintf("%s global error: %s", errClass, e.Err)
|
|
||||||
}
|
|
||||||
|
|
||||||
type FilesystemsReplicationFailedError struct {
|
|
||||||
FilesystemsWithError []*fsrep.Replication
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e FilesystemsReplicationFailedError) Error() string {
|
|
||||||
allSame := true
|
|
||||||
lastErr := e.FilesystemsWithError[0].Err().Error()
|
|
||||||
for _, fs := range e.FilesystemsWithError {
|
|
||||||
fsErr := fs.Err().Error()
|
|
||||||
allSame = allSame && lastErr == fsErr
|
|
||||||
}
|
|
||||||
|
|
||||||
fsstr := "multiple filesystems"
|
|
||||||
if len(e.FilesystemsWithError) == 1 {
|
|
||||||
fsstr = fmt.Sprintf("filesystem %s", e.FilesystemsWithError[0].FS())
|
|
||||||
}
|
|
||||||
errorStr := lastErr
|
|
||||||
if !allSame {
|
|
||||||
errorStr = "multiple different errors"
|
|
||||||
}
|
|
||||||
return fmt.Sprintf("%s could not be replicated: %s", fsstr, errorStr)
|
|
||||||
}
|
|
||||||
|
|
||||||
func stateWorking(ctx context.Context, ka *watchdog.KeepAlive, sender Sender, receiver Receiver, u updater) state {
|
|
||||||
|
|
||||||
var active *fsrep.Replication
|
|
||||||
rsfNext := u(func(r *Replication) {
|
|
||||||
|
|
||||||
r.err = nil
|
|
||||||
|
|
||||||
newq := make([]*fsrep.Replication, 0, len(r.queue))
|
|
||||||
for i := range r.queue {
|
|
||||||
if r.queue[i].CanRetry() {
|
|
||||||
newq = append(newq, r.queue[i])
|
|
||||||
} else {
|
|
||||||
r.completed = append(r.completed, r.queue[i])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
sort.SliceStable(newq, func(i, j int) bool {
|
|
||||||
return newq[i].NextStepDate().Before(newq[j].NextStepDate())
|
|
||||||
})
|
|
||||||
r.queue = newq
|
|
||||||
|
|
||||||
if len(r.queue) == 0 {
|
|
||||||
r.state = Completed
|
|
||||||
fsWithErr := FilesystemsReplicationFailedError{ // prepare it
|
|
||||||
FilesystemsWithError: make([]*fsrep.Replication, 0, len(r.completed)),
|
|
||||||
}
|
|
||||||
for _, fs := range r.completed {
|
|
||||||
if fs.CanRetry() {
|
|
||||||
panic(fmt.Sprintf("implementation error: completed contains retryable FS %s %#v",
|
|
||||||
fs.FS(), fs.Err()))
|
|
||||||
}
|
|
||||||
if fs.Err() != nil {
|
|
||||||
fsWithErr.FilesystemsWithError = append(fsWithErr.FilesystemsWithError, fs)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if len(fsWithErr.FilesystemsWithError) > 0 {
|
|
||||||
r.err = fsWithErr
|
|
||||||
r.state = PermanentError
|
|
||||||
}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
active = r.queue[0] // do not dequeue: if it's done, it will be sorted the next time we check for more work
|
|
||||||
r.active = active
|
|
||||||
}).rsf()
|
|
||||||
|
|
||||||
if active == nil {
|
|
||||||
return rsfNext
|
|
||||||
}
|
|
||||||
|
|
||||||
activeCtx := fsrep.WithLogger(ctx, getLogger(ctx).WithField("fs", active.FS()))
|
|
||||||
err := active.Retry(activeCtx, ka, sender, receiver)
|
|
||||||
u(func(r *Replication) {
|
|
||||||
r.active = nil
|
|
||||||
}).rsf()
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
if err.ContextErr() && ctx.Err() != nil {
|
|
||||||
getLogger(ctx).WithError(err).
|
|
||||||
Info("filesystem replication was cancelled")
|
|
||||||
u(func(r*Replication) {
|
|
||||||
r.err = GlobalError{Err: err, Temporary: false}
|
|
||||||
r.state = PermanentError
|
|
||||||
})
|
|
||||||
} else if err.LocalToFS() {
|
|
||||||
getLogger(ctx).WithError(err).
|
|
||||||
Error("filesystem replication encountered a filesystem-specific error")
|
|
||||||
// we stay in this state and let the queuing logic above de-prioritize this failing FS
|
|
||||||
} else if err.Temporary() {
|
|
||||||
getLogger(ctx).WithError(err).
|
|
||||||
Error("filesystem encountered a non-filesystem-specific temporary error, enter retry-wait")
|
|
||||||
u(func(r *Replication) {
|
|
||||||
r.err = GlobalError{Err: err, Temporary: true}
|
|
||||||
r.sleepUntil = time.Now().Add(RetryInterval)
|
|
||||||
r.state = WorkingWait
|
|
||||||
}).rsf()
|
|
||||||
} else {
|
|
||||||
getLogger(ctx).WithError(err).
|
|
||||||
Error("encountered a permanent non-filesystem-specific error")
|
|
||||||
u(func(r *Replication) {
|
|
||||||
r.err = GlobalError{Err: err, Temporary: false}
|
|
||||||
r.state = PermanentError
|
|
||||||
}).rsf()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return u(nil).rsf()
|
|
||||||
}
|
|
||||||
|
|
||||||
func stateWorkingWait(ctx context.Context, ka *watchdog.KeepAlive, sender Sender, receiver Receiver, u updater) state {
|
|
||||||
var sleepUntil time.Time
|
|
||||||
u(func(r *Replication) {
|
|
||||||
sleepUntil = r.sleepUntil
|
|
||||||
})
|
|
||||||
t := time.NewTimer(RetryInterval)
|
|
||||||
getLogger(ctx).WithField("until", sleepUntil).Info("retry wait after error")
|
|
||||||
defer t.Stop()
|
|
||||||
select {
|
|
||||||
case <-ctx.Done():
|
|
||||||
return u(func(r *Replication) {
|
|
||||||
r.state = PermanentError
|
|
||||||
r.err = ctx.Err()
|
|
||||||
}).rsf()
|
|
||||||
|
|
||||||
case <-t.C:
|
|
||||||
case <-wakeup.Wait(ctx):
|
|
||||||
}
|
|
||||||
return u(func(r *Replication) {
|
|
||||||
r.state = Working
|
|
||||||
}).rsf()
|
|
||||||
}
|
|
||||||
|
|
||||||
// Report provides a summary of the progress of the Replication,
|
|
||||||
// i.e., a condensed dump of the internal state machine.
|
|
||||||
// Report is safe to be called asynchronously while Drive is running.
|
|
||||||
func (r *Replication) Report() *Report {
|
|
||||||
r.lock.Lock()
|
|
||||||
defer r.lock.Unlock()
|
|
||||||
|
|
||||||
rep := Report{
|
|
||||||
Status: r.state.String(),
|
|
||||||
SleepUntil: r.sleepUntil,
|
|
||||||
}
|
|
||||||
|
|
||||||
if r.err != nil {
|
|
||||||
rep.Problem = r.err.Error()
|
|
||||||
}
|
|
||||||
|
|
||||||
if r.state&(Planning|PlanningError) != 0 {
|
|
||||||
return &rep
|
|
||||||
}
|
|
||||||
|
|
||||||
rep.Pending = make([]*fsrep.Report, 0, len(r.queue))
|
|
||||||
rep.Completed = make([]*fsrep.Report, 0, len(r.completed)) // room for active (potentially)
|
|
||||||
|
|
||||||
// since r.active == r.queue[0], do not contain it in pending output
|
|
||||||
pending := r.queue
|
|
||||||
if r.active != nil {
|
|
||||||
rep.Active = r.active.Report()
|
|
||||||
pending = r.queue[1:]
|
|
||||||
}
|
|
||||||
for _, fsr := range pending {
|
|
||||||
rep.Pending= append(rep.Pending, fsr.Report())
|
|
||||||
}
|
|
||||||
for _, fsr := range r.completed {
|
|
||||||
rep.Completed = append(rep.Completed, fsr.Report())
|
|
||||||
}
|
|
||||||
|
|
||||||
return &rep
|
|
||||||
}
|
|
||||||
|
|
||||||
func (r *Replication) State() State {
|
|
||||||
r.lock.Lock()
|
|
||||||
defer r.lock.Unlock()
|
|
||||||
return r.state
|
|
||||||
}
|
|
13
replication/replication.go
Normal file
13
replication/replication.go
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
// Package replication implements replication of filesystems with existing
|
||||||
|
// versions (snapshots) from a sender to a receiver.
|
||||||
|
package replication
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
|
||||||
|
"github.com/zrepl/zrepl/replication/driver"
|
||||||
|
)
|
||||||
|
|
||||||
|
func Do(ctx context.Context, planner driver.Planner) (driver.ReportFunc, driver.WaitFunc) {
|
||||||
|
return driver.Do(ctx, planner)
|
||||||
|
}
|
152
replication/report/replication_report.go
Normal file
152
replication/report/replication_report.go
Normal file
@ -0,0 +1,152 @@
|
|||||||
|
package report
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Report struct {
|
||||||
|
StartAt, FinishAt time.Time
|
||||||
|
WaitReconnectSince, WaitReconnectUntil time.Time
|
||||||
|
WaitReconnectError *TimedError
|
||||||
|
Attempts []*AttemptReport
|
||||||
|
}
|
||||||
|
|
||||||
|
var _, _ = json.Marshal(&Report{})
|
||||||
|
|
||||||
|
type TimedError struct {
|
||||||
|
Err string
|
||||||
|
Time time.Time
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewTimedError(err string, t time.Time) *TimedError {
|
||||||
|
if err == "" {
|
||||||
|
panic("error must be empty")
|
||||||
|
}
|
||||||
|
if t.IsZero() {
|
||||||
|
panic("t must be non-zero")
|
||||||
|
}
|
||||||
|
return &TimedError{err, t}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *TimedError) Error() string {
|
||||||
|
return s.Err
|
||||||
|
}
|
||||||
|
|
||||||
|
var _, _ = json.Marshal(&TimedError{})
|
||||||
|
|
||||||
|
type AttemptReport struct {
|
||||||
|
State AttemptState
|
||||||
|
StartAt, FinishAt time.Time
|
||||||
|
PlanError *TimedError
|
||||||
|
Filesystems []*FilesystemReport
|
||||||
|
}
|
||||||
|
|
||||||
|
type AttemptState string
|
||||||
|
|
||||||
|
const (
|
||||||
|
AttemptPlanning AttemptState = "planning"
|
||||||
|
AttemptPlanningError AttemptState = "planning-error"
|
||||||
|
AttemptFanOutFSs AttemptState = "fan-out-filesystems"
|
||||||
|
AttemptFanOutError AttemptState = "filesystem-error"
|
||||||
|
AttemptDone AttemptState = "done"
|
||||||
|
)
|
||||||
|
|
||||||
|
type FilesystemState string
|
||||||
|
|
||||||
|
const (
|
||||||
|
FilesystemPlanning FilesystemState = "planning"
|
||||||
|
FilesystemPlanningErrored FilesystemState = "planning-error"
|
||||||
|
FilesystemStepping FilesystemState = "stepping"
|
||||||
|
FilesystemSteppingErrored FilesystemState = "step-error"
|
||||||
|
FilesystemDone FilesystemState = "done"
|
||||||
|
)
|
||||||
|
|
||||||
|
type FilesystemReport struct {
|
||||||
|
Info *FilesystemInfo
|
||||||
|
|
||||||
|
State FilesystemState
|
||||||
|
|
||||||
|
// Valid in State = FilesystemPlanningErrored
|
||||||
|
PlanError *TimedError
|
||||||
|
// Valid in State = FilesystemSteppingErrored
|
||||||
|
StepError *TimedError
|
||||||
|
|
||||||
|
// Valid in State = FilesystemStepping
|
||||||
|
CurrentStep int
|
||||||
|
Steps []*StepReport
|
||||||
|
}
|
||||||
|
|
||||||
|
type FilesystemInfo struct {
|
||||||
|
Name string
|
||||||
|
}
|
||||||
|
|
||||||
|
type StepReport struct {
|
||||||
|
Info *StepInfo
|
||||||
|
}
|
||||||
|
|
||||||
|
type StepInfo struct {
|
||||||
|
From, To string
|
||||||
|
BytesExpected int64
|
||||||
|
BytesReplicated int64
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *AttemptReport) BytesSum() (expected, replicated int64) {
|
||||||
|
for _, fs := range a.Filesystems {
|
||||||
|
e, r := fs.BytesSum()
|
||||||
|
expected += e
|
||||||
|
replicated += r
|
||||||
|
}
|
||||||
|
return expected, replicated
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *FilesystemReport) BytesSum() (expected, replicated int64) {
|
||||||
|
for _, step := range f.Steps {
|
||||||
|
expected += step.Info.BytesExpected
|
||||||
|
replicated += step.Info.BytesReplicated
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *AttemptReport) FilesystemsByState() map[FilesystemState][]*FilesystemReport {
|
||||||
|
r := make(map[FilesystemState][]*FilesystemReport, 4)
|
||||||
|
for _, fs := range f.Filesystems {
|
||||||
|
l := r[fs.State]
|
||||||
|
l = append(l, fs)
|
||||||
|
r[fs.State] = l
|
||||||
|
}
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *FilesystemReport) Error() *TimedError {
|
||||||
|
switch f.State {
|
||||||
|
case FilesystemPlanningErrored:
|
||||||
|
return f.PlanError
|
||||||
|
case FilesystemSteppingErrored:
|
||||||
|
return f.StepError
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// may return nil
|
||||||
|
func (f *FilesystemReport) NextStep() *StepReport {
|
||||||
|
switch f.State {
|
||||||
|
case FilesystemDone:
|
||||||
|
return nil
|
||||||
|
case FilesystemPlanningErrored:
|
||||||
|
return nil
|
||||||
|
case FilesystemSteppingErrored:
|
||||||
|
return nil
|
||||||
|
case FilesystemPlanning:
|
||||||
|
return nil
|
||||||
|
case FilesystemStepping:
|
||||||
|
// invariant is that this is always correct
|
||||||
|
// TODO what about 0-length Steps but short intermediary state?
|
||||||
|
return f.Steps[f.CurrentStep]
|
||||||
|
}
|
||||||
|
panic("unreachable")
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *StepReport) IsIncremental() bool {
|
||||||
|
return f.Info.From != "" // FIXME change to ZFS semantics (To != "")
|
||||||
|
}
|
@ -1,76 +0,0 @@
|
|||||||
// Code generated by "enumer -type=State"; DO NOT EDIT.
|
|
||||||
|
|
||||||
package replication
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
)
|
|
||||||
|
|
||||||
const (
|
|
||||||
_StateName_0 = "PlanningPlanningError"
|
|
||||||
_StateName_1 = "Working"
|
|
||||||
_StateName_2 = "WorkingWait"
|
|
||||||
_StateName_3 = "Completed"
|
|
||||||
_StateName_4 = "PermanentError"
|
|
||||||
)
|
|
||||||
|
|
||||||
var (
|
|
||||||
_StateIndex_0 = [...]uint8{0, 8, 21}
|
|
||||||
_StateIndex_1 = [...]uint8{0, 7}
|
|
||||||
_StateIndex_2 = [...]uint8{0, 11}
|
|
||||||
_StateIndex_3 = [...]uint8{0, 9}
|
|
||||||
_StateIndex_4 = [...]uint8{0, 14}
|
|
||||||
)
|
|
||||||
|
|
||||||
func (i State) String() string {
|
|
||||||
switch {
|
|
||||||
case 1 <= i && i <= 2:
|
|
||||||
i -= 1
|
|
||||||
return _StateName_0[_StateIndex_0[i]:_StateIndex_0[i+1]]
|
|
||||||
case i == 4:
|
|
||||||
return _StateName_1
|
|
||||||
case i == 8:
|
|
||||||
return _StateName_2
|
|
||||||
case i == 16:
|
|
||||||
return _StateName_3
|
|
||||||
case i == 32:
|
|
||||||
return _StateName_4
|
|
||||||
default:
|
|
||||||
return fmt.Sprintf("State(%d)", i)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
var _StateValues = []State{1, 2, 4, 8, 16, 32}
|
|
||||||
|
|
||||||
var _StateNameToValueMap = map[string]State{
|
|
||||||
_StateName_0[0:8]: 1,
|
|
||||||
_StateName_0[8:21]: 2,
|
|
||||||
_StateName_1[0:7]: 4,
|
|
||||||
_StateName_2[0:11]: 8,
|
|
||||||
_StateName_3[0:9]: 16,
|
|
||||||
_StateName_4[0:14]: 32,
|
|
||||||
}
|
|
||||||
|
|
||||||
// StateString retrieves an enum value from the enum constants string name.
|
|
||||||
// Throws an error if the param is not part of the enum.
|
|
||||||
func StateString(s string) (State, error) {
|
|
||||||
if val, ok := _StateNameToValueMap[s]; ok {
|
|
||||||
return val, nil
|
|
||||||
}
|
|
||||||
return 0, fmt.Errorf("%s does not belong to State values", s)
|
|
||||||
}
|
|
||||||
|
|
||||||
// StateValues returns all values of the enum
|
|
||||||
func StateValues() []State {
|
|
||||||
return _StateValues
|
|
||||||
}
|
|
||||||
|
|
||||||
// IsAState returns "true" if the value is listed in the enum definition. "false" otherwise
|
|
||||||
func (i State) IsAState() bool {
|
|
||||||
for _, v := range _StateValues {
|
|
||||||
if i == v {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
@ -7,7 +7,7 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/golang/protobuf/proto"
|
"github.com/golang/protobuf/proto"
|
||||||
"github.com/zrepl/zrepl/replication/pdu"
|
"github.com/zrepl/zrepl/replication/logic/pdu"
|
||||||
"github.com/zrepl/zrepl/rpc/dataconn/stream"
|
"github.com/zrepl/zrepl/rpc/dataconn/stream"
|
||||||
"github.com/zrepl/zrepl/transport"
|
"github.com/zrepl/zrepl/transport"
|
||||||
"github.com/zrepl/zrepl/zfs"
|
"github.com/zrepl/zrepl/zfs"
|
||||||
@ -213,3 +213,23 @@ func (c *Client) ReqRecv(ctx context.Context, req *pdu.ReceiveReq, streamCopier
|
|||||||
|
|
||||||
return res.res, cause
|
return res.res, cause
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
func (c *Client) ReqPing(ctx context.Context, req *pdu.PingReq) (*pdu.PingRes, error) {
|
||||||
|
conn, err := c.getWire(ctx)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer c.putWire(conn)
|
||||||
|
|
||||||
|
if err := c.send(ctx, conn, EndpointPing, req, nil); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var res pdu.PingRes
|
||||||
|
if err := c.recv(ctx, conn, &res); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return &res, nil
|
||||||
|
}
|
@ -7,7 +7,7 @@ import (
|
|||||||
|
|
||||||
"github.com/golang/protobuf/proto"
|
"github.com/golang/protobuf/proto"
|
||||||
"github.com/zrepl/zrepl/logger"
|
"github.com/zrepl/zrepl/logger"
|
||||||
"github.com/zrepl/zrepl/replication/pdu"
|
"github.com/zrepl/zrepl/replication/logic/pdu"
|
||||||
"github.com/zrepl/zrepl/rpc/dataconn/stream"
|
"github.com/zrepl/zrepl/rpc/dataconn/stream"
|
||||||
"github.com/zrepl/zrepl/transport"
|
"github.com/zrepl/zrepl/transport"
|
||||||
"github.com/zrepl/zrepl/zfs"
|
"github.com/zrepl/zrepl/zfs"
|
||||||
@ -25,6 +25,8 @@ type Handler interface {
|
|||||||
// It is guaranteed that Server calls Receive with a stream that holds the IdleConnTimeout
|
// It is guaranteed that Server calls Receive with a stream that holds the IdleConnTimeout
|
||||||
// configured in ServerConfig.Shared.IdleConnTimeout.
|
// configured in ServerConfig.Shared.IdleConnTimeout.
|
||||||
Receive(ctx context.Context, r *pdu.ReceiveReq, receive zfs.StreamCopier) (*pdu.ReceiveRes, error)
|
Receive(ctx context.Context, r *pdu.ReceiveReq, receive zfs.StreamCopier) (*pdu.ReceiveRes, error)
|
||||||
|
// PingDataconn handles a PingReq
|
||||||
|
PingDataconn(ctx context.Context, r *pdu.PingReq) (*pdu.PingRes, error)
|
||||||
}
|
}
|
||||||
|
|
||||||
type Logger = logger.Logger
|
type Logger = logger.Logger
|
||||||
@ -125,6 +127,13 @@ func (s *Server) serveConn(nc *transport.AuthConn) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
res, handlerErr = s.h.Receive(ctx, &req, &streamCopier{streamConn: c, closeStreamOnClose: false}) // SHADOWING
|
res, handlerErr = s.h.Receive(ctx, &req, &streamCopier{streamConn: c, closeStreamOnClose: false}) // SHADOWING
|
||||||
|
case EndpointPing:
|
||||||
|
var req pdu.PingReq
|
||||||
|
if err := proto.Unmarshal(reqStructured, &req); err != nil {
|
||||||
|
s.log.WithError(err).Error("cannot unmarshal ping request")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
res, handlerErr = s.h.PingDataconn(ctx, &req) // SHADOWING
|
||||||
default:
|
default:
|
||||||
s.log.WithField("endpoint", endpoint).Error("unknown endpoint")
|
s.log.WithField("endpoint", endpoint).Error("unknown endpoint")
|
||||||
handlerErr = fmt.Errorf("requested endpoint does not exist")
|
handlerErr = fmt.Errorf("requested endpoint does not exist")
|
||||||
@ -137,6 +146,10 @@ func (s *Server) serveConn(nc *transport.AuthConn) {
|
|||||||
// if marshaling fails. We consider failed marshaling a handler error
|
// if marshaling fails. We consider failed marshaling a handler error
|
||||||
var protobuf *bytes.Buffer
|
var protobuf *bytes.Buffer
|
||||||
if handlerErr == nil {
|
if handlerErr == nil {
|
||||||
|
if res == nil {
|
||||||
|
handlerErr = fmt.Errorf("implementation error: handler for endpoint %q returns nil error and nil result", endpoint)
|
||||||
|
s.log.WithError(err).Error("handle implementation error")
|
||||||
|
} else {
|
||||||
protobufBytes, err := proto.Marshal(res)
|
protobufBytes, err := proto.Marshal(res)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
s.log.WithError(err).Error("cannot marshal handler protobuf")
|
s.log.WithError(err).Error("cannot marshal handler protobuf")
|
||||||
@ -144,6 +157,7 @@ func (s *Server) serveConn(nc *transport.AuthConn) {
|
|||||||
}
|
}
|
||||||
protobuf = bytes.NewBuffer(protobufBytes) // SHADOWING
|
protobuf = bytes.NewBuffer(protobufBytes) // SHADOWING
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
var resHeaderBuf bytes.Buffer
|
var resHeaderBuf bytes.Buffer
|
||||||
if handlerErr == nil {
|
if handlerErr == nil {
|
||||||
|
@ -10,6 +10,7 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
|
EndpointPing string = "/v1/ping"
|
||||||
EndpointSend string = "/v1/send"
|
EndpointSend string = "/v1/send"
|
||||||
EndpointRecv string = "/v1/recv"
|
EndpointRecv string = "/v1/recv"
|
||||||
)
|
)
|
||||||
|
@ -24,7 +24,7 @@ import (
|
|||||||
"github.com/pkg/profile"
|
"github.com/pkg/profile"
|
||||||
|
|
||||||
"github.com/zrepl/zrepl/logger"
|
"github.com/zrepl/zrepl/logger"
|
||||||
"github.com/zrepl/zrepl/replication/pdu"
|
"github.com/zrepl/zrepl/replication/logic/pdu"
|
||||||
"github.com/zrepl/zrepl/rpc/dataconn"
|
"github.com/zrepl/zrepl/rpc/dataconn"
|
||||||
"github.com/zrepl/zrepl/rpc/dataconn/timeoutconn"
|
"github.com/zrepl/zrepl/rpc/dataconn/timeoutconn"
|
||||||
"github.com/zrepl/zrepl/transport"
|
"github.com/zrepl/zrepl/transport"
|
||||||
@ -77,6 +77,12 @@ func (devNullHandler) Receive(ctx context.Context, r *pdu.ReceiveReq, stream zfs
|
|||||||
return &res, err
|
return &res, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (devNullHandler) PingDataconn(ctx context.Context, r *pdu.PingReq) (*pdu.PingRes, error) {
|
||||||
|
return &pdu.PingRes{
|
||||||
|
Echo: r.GetMessage(),
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
type tcpConnecter struct {
|
type tcpConnecter struct {
|
||||||
addr string
|
addr string
|
||||||
}
|
}
|
||||||
|
@ -105,11 +105,12 @@ func NewInterceptors(logger Logger, clientIdentityKey interface{}) (unary grpc.U
|
|||||||
if !ok {
|
if !ok {
|
||||||
panic("peer.FromContext expected to return a peer in grpc.UnaryServerInterceptor")
|
panic("peer.FromContext expected to return a peer in grpc.UnaryServerInterceptor")
|
||||||
}
|
}
|
||||||
logger.WithField("peer", fmt.Sprintf("%v", p)).Debug("peer")
|
logger.WithField("peer_addr", fmt.Sprintf("%s", p.Addr)).Debug("peer addr")
|
||||||
a, ok := p.AuthInfo.(*authConnAuthType)
|
a, ok := p.AuthInfo.(*authConnAuthType)
|
||||||
if !ok {
|
if !ok {
|
||||||
panic(fmt.Sprintf("NewInterceptors must be used in combination with grpc.NewTransportCredentials, but got auth type %T", p.AuthInfo))
|
panic(fmt.Sprintf("NewInterceptors must be used in combination with grpc.NewTransportCredentials, but got auth type %T", p.AuthInfo))
|
||||||
}
|
}
|
||||||
|
logger.WithField("peer_client_identity", a.clientIdentity).Debug("peer client identity")
|
||||||
ctx = context.WithValue(ctx, clientIdentityKey, a.clientIdentity)
|
ctx = context.WithValue(ctx, clientIdentityKey, a.clientIdentity)
|
||||||
return handler(ctx, req)
|
return handler(ctx, req)
|
||||||
}
|
}
|
||||||
|
@ -36,7 +36,7 @@ func ClientConn(cn transport.Connecter, log Logger) *grpc.ClientConn {
|
|||||||
})
|
})
|
||||||
dialerOption := grpc.WithDialer(grpcclientidentity.NewDialer(log, cn))
|
dialerOption := grpc.WithDialer(grpcclientidentity.NewDialer(log, cn))
|
||||||
cred := grpc.WithTransportCredentials(grpcclientidentity.NewTransportCredentials(log))
|
cred := grpc.WithTransportCredentials(grpcclientidentity.NewTransportCredentials(log))
|
||||||
ctx, cancel := context.WithTimeout(context.TODO(), 5*time.Second)
|
ctx, cancel := context.WithTimeout(context.TODO(), 5*time.Second) // FIXME constant
|
||||||
defer cancel()
|
defer cancel()
|
||||||
cc, err := grpc.DialContext(ctx, "doesn't matter done by dialer", dialerOption, cred, ka)
|
cc, err := grpc.DialContext(ctx, "doesn't matter done by dialer", dialerOption, cred, ka)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -2,13 +2,18 @@ package rpc
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
"net"
|
"net"
|
||||||
|
"sync"
|
||||||
|
"sync/atomic"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"google.golang.org/grpc"
|
"google.golang.org/grpc"
|
||||||
|
|
||||||
"github.com/zrepl/zrepl/replication"
|
"github.com/google/uuid"
|
||||||
"github.com/zrepl/zrepl/replication/pdu"
|
"github.com/zrepl/zrepl/replication/logic"
|
||||||
|
"github.com/zrepl/zrepl/replication/logic/pdu"
|
||||||
"github.com/zrepl/zrepl/rpc/dataconn"
|
"github.com/zrepl/zrepl/rpc/dataconn"
|
||||||
"github.com/zrepl/zrepl/rpc/grpcclientidentity/grpchelper"
|
"github.com/zrepl/zrepl/rpc/grpcclientidentity/grpchelper"
|
||||||
"github.com/zrepl/zrepl/rpc/versionhandshake"
|
"github.com/zrepl/zrepl/rpc/versionhandshake"
|
||||||
@ -24,11 +29,12 @@ type Client struct {
|
|||||||
controlClient pdu.ReplicationClient // this the grpc client instance, see constructor
|
controlClient pdu.ReplicationClient // this the grpc client instance, see constructor
|
||||||
controlConn *grpc.ClientConn
|
controlConn *grpc.ClientConn
|
||||||
loggers Loggers
|
loggers Loggers
|
||||||
|
closed chan struct{}
|
||||||
}
|
}
|
||||||
|
|
||||||
var _ replication.Endpoint = &Client{}
|
var _ logic.Endpoint = &Client{}
|
||||||
var _ replication.Sender = &Client{}
|
var _ logic.Sender = &Client{}
|
||||||
var _ replication.Receiver = &Client{}
|
var _ logic.Receiver = &Client{}
|
||||||
|
|
||||||
type DialContextFunc = func(ctx context.Context, network string, addr string) (net.Conn, error)
|
type DialContextFunc = func(ctx context.Context, network string, addr string) (net.Conn, error)
|
||||||
|
|
||||||
@ -41,14 +47,21 @@ func NewClient(cn transport.Connecter, loggers Loggers) *Client {
|
|||||||
|
|
||||||
c := &Client{
|
c := &Client{
|
||||||
loggers: loggers,
|
loggers: loggers,
|
||||||
|
closed: make(chan struct{}),
|
||||||
}
|
}
|
||||||
grpcConn := grpchelper.ClientConn(muxedConnecter.control, loggers.Control)
|
grpcConn := grpchelper.ClientConn(muxedConnecter.control, loggers.Control)
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
for {
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
go func() {
|
||||||
|
<-c.closed
|
||||||
|
cancel()
|
||||||
|
}()
|
||||||
|
defer cancel()
|
||||||
|
for ctx.Err() == nil {
|
||||||
state := grpcConn.GetState()
|
state := grpcConn.GetState()
|
||||||
loggers.General.WithField("grpc_state", state.String()).Debug("grpc state change")
|
loggers.General.WithField("grpc_state", state.String()).Debug("grpc state change")
|
||||||
grpcConn.WaitForStateChange(context.TODO(), state)
|
grpcConn.WaitForStateChange(ctx, state)
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
c.controlClient = pdu.NewReplicationClient(grpcConn)
|
c.controlClient = pdu.NewReplicationClient(grpcConn)
|
||||||
@ -59,8 +72,9 @@ func NewClient(cn transport.Connecter, loggers Loggers) *Client {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (c *Client) Close() {
|
func (c *Client) Close() {
|
||||||
|
close(c.closed)
|
||||||
if err := c.controlConn.Close(); err != nil {
|
if err := c.controlConn.Close(); err != nil {
|
||||||
c.loggers.General.WithError(err).Error("cannot cloe control connection")
|
c.loggers.General.WithError(err).Error("cannot close control connection")
|
||||||
}
|
}
|
||||||
// TODO c.dataClient should have Close()
|
// TODO c.dataClient should have Close()
|
||||||
}
|
}
|
||||||
@ -101,6 +115,72 @@ func (c *Client) ReplicationCursor(ctx context.Context, in *pdu.ReplicationCurso
|
|||||||
return c.controlClient.ReplicationCursor(ctx, in)
|
return c.controlClient.ReplicationCursor(ctx, in)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (c *Client) WaitForConnectivity(ctx context.Context) error {
|
||||||
|
ctx, cancel := context.WithCancel(ctx)
|
||||||
|
defer cancel()
|
||||||
|
msg := uuid.New().String()
|
||||||
|
req := pdu.PingReq{Message: msg}
|
||||||
|
var ctrlOk, dataOk int32
|
||||||
|
loggers := GetLoggersOrPanic(ctx)
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
wg.Add(2)
|
||||||
|
checkRes := func(res *pdu.PingRes, err error, logger Logger, okVar *int32) {
|
||||||
|
if err == nil && res.GetEcho() != req.GetMessage() {
|
||||||
|
err = errors.New("pilot message not echoed correctly")
|
||||||
|
}
|
||||||
|
if err == context.Canceled {
|
||||||
|
err = nil
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
logger.WithError(err).Error("ping failed")
|
||||||
|
atomic.StoreInt32(okVar, 0)
|
||||||
|
cancel()
|
||||||
|
} else {
|
||||||
|
atomic.StoreInt32(okVar, 1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
|
ctrl, ctrlErr := c.controlClient.Ping(ctx, &req, grpc.FailFast(false))
|
||||||
|
checkRes(ctrl, ctrlErr, loggers.Control, &ctrlOk)
|
||||||
|
}()
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
|
for ctx.Err() == nil {
|
||||||
|
data, dataErr := c.dataClient.ReqPing(ctx, &req)
|
||||||
|
// dataClient uses transport.Connecter, which doesn't expose FailFast(false)
|
||||||
|
// => we need to mask dial timeouts
|
||||||
|
if err, ok := dataErr.(interface{ Temporary() bool }); ok && err.Temporary() {
|
||||||
|
// Rate-limit pings here in case Temporary() is a mis-classification
|
||||||
|
// or returns immediately (this is a tight loop in that case)
|
||||||
|
// TODO keep this in lockstep with controlClient
|
||||||
|
// => don't use FailFast for control, but check that both control and data worked
|
||||||
|
time.Sleep(envconst.Duration("ZREPL_RPC_DATACONN_PING_SLEEP", 1*time.Second))
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// it's not a dial timeout,
|
||||||
|
checkRes(data, dataErr, loggers.Data, &dataOk)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
wg.Wait()
|
||||||
|
var what string
|
||||||
|
if ctrlOk == 1 && dataOk == 1 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if ctrlOk == 0 {
|
||||||
|
what += "control"
|
||||||
|
}
|
||||||
|
if dataOk == 0 {
|
||||||
|
if len(what) > 0 {
|
||||||
|
what += " and data"
|
||||||
|
} else {
|
||||||
|
what += "data"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return fmt.Errorf("%s rpc failed to respond to ping rpcs", what)
|
||||||
|
}
|
||||||
|
|
||||||
func (c *Client) ResetConnectBackoff() {
|
func (c *Client) ResetConnectBackoff() {
|
||||||
c.controlConn.ResetConnectBackoff()
|
c.controlConn.ResetConnectBackoff()
|
||||||
}
|
}
|
||||||
|
@ -7,7 +7,7 @@ import (
|
|||||||
"google.golang.org/grpc"
|
"google.golang.org/grpc"
|
||||||
|
|
||||||
"github.com/zrepl/zrepl/endpoint"
|
"github.com/zrepl/zrepl/endpoint"
|
||||||
"github.com/zrepl/zrepl/replication/pdu"
|
"github.com/zrepl/zrepl/replication/logic/pdu"
|
||||||
"github.com/zrepl/zrepl/rpc/dataconn"
|
"github.com/zrepl/zrepl/rpc/dataconn"
|
||||||
"github.com/zrepl/zrepl/rpc/grpcclientidentity"
|
"github.com/zrepl/zrepl/rpc/grpcclientidentity"
|
||||||
"github.com/zrepl/zrepl/rpc/netadaptor"
|
"github.com/zrepl/zrepl/rpc/netadaptor"
|
||||||
|
@ -7,10 +7,10 @@ package transportmux
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"net"
|
"net"
|
||||||
"time"
|
"time"
|
||||||
"fmt"
|
|
||||||
|
|
||||||
"github.com/zrepl/zrepl/logger"
|
"github.com/zrepl/zrepl/logger"
|
||||||
"github.com/zrepl/zrepl/transport"
|
"github.com/zrepl/zrepl/transport"
|
||||||
@ -111,7 +111,7 @@ func Demux(ctx context.Context, rawListener transport.AuthenticatedListener, lab
|
|||||||
if ctx.Err() != nil {
|
if ctx.Err() != nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
getLog(ctx).WithError(err).Error("accept error")
|
getLog(ctx).WithError(err).WithField("errType", fmt.Sprintf("%T", err)).Error("accept error")
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
closeConn := func() {
|
closeConn := func() {
|
||||||
|
@ -26,14 +26,22 @@ type HandshakeError struct {
|
|||||||
msg string
|
msg string
|
||||||
// If not nil, the underlying IO error that caused the handshake to fail.
|
// If not nil, the underlying IO error that caused the handshake to fail.
|
||||||
IOError error
|
IOError error
|
||||||
|
isAcceptError bool
|
||||||
}
|
}
|
||||||
|
|
||||||
var _ net.Error = &HandshakeError{}
|
var _ net.Error = &HandshakeError{}
|
||||||
|
|
||||||
func (e HandshakeError) Error() string { return e.msg }
|
func (e HandshakeError) Error() string { return e.msg }
|
||||||
|
|
||||||
// Always true to enable usage in a net.Listener.
|
// Like with net.OpErr (Go issue 6163), a client failing to handshake
|
||||||
func (e HandshakeError) Temporary() bool { return true }
|
// should be a temporary Accept error toward the Listener .
|
||||||
|
func (e HandshakeError) Temporary() bool {
|
||||||
|
if e.isAcceptError {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
te, ok := e.IOError.(interface{ Temporary() bool });
|
||||||
|
return ok && te.Temporary()
|
||||||
|
}
|
||||||
|
|
||||||
// If the underlying IOError was net.Error.Timeout(), Timeout() returns that value.
|
// If the underlying IOError was net.Error.Timeout(), Timeout() returns that value.
|
||||||
// Otherwise false.
|
// Otherwise false.
|
||||||
@ -142,14 +150,14 @@ func (m *HandshakeMessage) DecodeReader(r io.Reader, maxLen int) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func DoHandshakeCurrentVersion(conn net.Conn, deadline time.Time) error {
|
func DoHandshakeCurrentVersion(conn net.Conn, deadline time.Time) *HandshakeError {
|
||||||
// current protocol version is hardcoded here
|
// current protocol version is hardcoded here
|
||||||
return DoHandshakeVersion(conn, deadline, 1)
|
return DoHandshakeVersion(conn, deadline, 1)
|
||||||
}
|
}
|
||||||
|
|
||||||
const HandshakeMessageMaxLen = 16 * 4096
|
const HandshakeMessageMaxLen = 16 * 4096
|
||||||
|
|
||||||
func DoHandshakeVersion(conn net.Conn, deadline time.Time, version int) error {
|
func DoHandshakeVersion(conn net.Conn, deadline time.Time, version int) *HandshakeError {
|
||||||
ours := HandshakeMessage{
|
ours := HandshakeMessage{
|
||||||
ProtocolVersion: version,
|
ProtocolVersion: version,
|
||||||
Extensions: nil,
|
Extensions: nil,
|
||||||
|
@ -55,6 +55,7 @@ func (l HandshakeListener) Accept(ctx context.Context) (*transport.AuthConn, err
|
|||||||
dl = time.Now().Add(l.timeout) // shadowing
|
dl = time.Now().Add(l.timeout) // shadowing
|
||||||
}
|
}
|
||||||
if err := DoHandshakeCurrentVersion(conn, dl); err != nil {
|
if err := DoHandshakeCurrentVersion(conn, dl); err != nil {
|
||||||
|
err.isAcceptError = true
|
||||||
conn.Close()
|
conn.Close()
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
42
util/chainlock/chainlock.go
Normal file
42
util/chainlock/chainlock.go
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
// package chainlock implements a mutex whose Lock and Unlock
|
||||||
|
// methods return the lock itself, to enable chaining.
|
||||||
|
//
|
||||||
|
// Intended Usage
|
||||||
|
//
|
||||||
|
// defer s.lock().unlock()
|
||||||
|
// // drop lock while waiting for wait group
|
||||||
|
// func() {
|
||||||
|
// defer a.l.Unlock().Lock()
|
||||||
|
// fssesDone.Wait()
|
||||||
|
// }()
|
||||||
|
//
|
||||||
|
package chainlock
|
||||||
|
|
||||||
|
import "sync"
|
||||||
|
|
||||||
|
type L struct {
|
||||||
|
mtx sync.Mutex
|
||||||
|
}
|
||||||
|
|
||||||
|
func New() *L {
|
||||||
|
return &L{}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *L) Lock() *L {
|
||||||
|
l.mtx.Lock()
|
||||||
|
return l
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *L) Unlock() *L {
|
||||||
|
l.mtx.Unlock()
|
||||||
|
return l
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *L) NewCond() *sync.Cond {
|
||||||
|
return sync.NewCond(&l.mtx)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *L) DropWhile(f func()) {
|
||||||
|
defer l.Unlock().Lock()
|
||||||
|
f()
|
||||||
|
}
|
@ -40,3 +40,19 @@ func Int64(varname string, def int64) int64 {
|
|||||||
cache.Store(varname, d)
|
cache.Store(varname, d)
|
||||||
return d
|
return d
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func Bool(varname string, def bool) bool {
|
||||||
|
if v, ok := cache.Load(varname); ok {
|
||||||
|
return v.(bool)
|
||||||
|
}
|
||||||
|
e := os.Getenv(varname)
|
||||||
|
if e == "" {
|
||||||
|
return def
|
||||||
|
}
|
||||||
|
d, err := strconv.ParseBool(e)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
cache.Store(varname, d)
|
||||||
|
return d
|
||||||
|
}
|
||||||
|
@ -1,16 +0,0 @@
|
|||||||
// Code generated by "stringer -type=Conflict"; DO NOT EDIT.
|
|
||||||
|
|
||||||
package zfs
|
|
||||||
|
|
||||||
import "strconv"
|
|
||||||
|
|
||||||
const _Conflict_name = "ConflictIncrementalConflictAllRightConflictNoCommonAncestorConflictDiverged"
|
|
||||||
|
|
||||||
var _Conflict_index = [...]uint8{0, 19, 35, 59, 75}
|
|
||||||
|
|
||||||
func (i Conflict) String() string {
|
|
||||||
if i < 0 || i >= Conflict(len(_Conflict_index)-1) {
|
|
||||||
return "Conflict(" + strconv.FormatInt(int64(i), 10) + ")"
|
|
||||||
}
|
|
||||||
return _Conflict_name[_Conflict_index[i]:_Conflict_index[i+1]]
|
|
||||||
}
|
|
284
zfs/diff.go
284
zfs/diff.go
@ -1,284 +0,0 @@
|
|||||||
package zfs
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bytes"
|
|
||||||
"crypto/sha512"
|
|
||||||
"encoding/hex"
|
|
||||||
"fmt"
|
|
||||||
"io"
|
|
||||||
"os/exec"
|
|
||||||
"sort"
|
|
||||||
)
|
|
||||||
|
|
||||||
type fsbyCreateTXG []FilesystemVersion
|
|
||||||
|
|
||||||
func (l fsbyCreateTXG) Len() int { return len(l) }
|
|
||||||
func (l fsbyCreateTXG) Swap(i, j int) { l[i], l[j] = l[j], l[i] }
|
|
||||||
func (l fsbyCreateTXG) Less(i, j int) bool {
|
|
||||||
return l[i].CreateTXG < l[j].CreateTXG
|
|
||||||
}
|
|
||||||
|
|
||||||
//go:generate stringer -type=Conflict
|
|
||||||
type Conflict int
|
|
||||||
|
|
||||||
const (
|
|
||||||
ConflictIncremental Conflict = iota // no conflict, incremental repl possible
|
|
||||||
ConflictAllRight // no conflict, initial repl possible
|
|
||||||
ConflictNoCommonAncestor
|
|
||||||
ConflictDiverged
|
|
||||||
)
|
|
||||||
|
|
||||||
/* The receiver (left) wants to know if the sender (right) has more recent versions
|
|
||||||
|
|
||||||
Left : | C |
|
|
||||||
Right: | A | B | C | D | E |
|
|
||||||
=> : | C | D | E |
|
|
||||||
|
|
||||||
Left: | C |
|
|
||||||
Right: | D | E |
|
|
||||||
=> : <empty list>, no common ancestor
|
|
||||||
|
|
||||||
Left : | C | D | E |
|
|
||||||
Right: | A | B | C |
|
|
||||||
=> : <empty list>, the left has newer versions
|
|
||||||
|
|
||||||
Left : | A | B | C | | F |
|
|
||||||
Right: | C | D | E |
|
|
||||||
=> : | C | | F | => diverged => <empty list>
|
|
||||||
|
|
||||||
IMPORTANT: since ZFS currently does not export dataset UUIDs, the best heuristic to
|
|
||||||
identify a filesystem version is the tuple (name,creation)
|
|
||||||
*/
|
|
||||||
type FilesystemDiff struct {
|
|
||||||
|
|
||||||
// Which kind of conflict / "way forward" is possible.
|
|
||||||
// Check this first to determine the semantics of this struct's remaining members
|
|
||||||
Conflict Conflict
|
|
||||||
|
|
||||||
// Conflict = Incremental | AllRight
|
|
||||||
// The incremental steps required to get left up to right's most recent version
|
|
||||||
// 0th element is the common ancestor, ordered by birthtime, oldest first
|
|
||||||
// If len() < 2, left and right are at same most recent version
|
|
||||||
// Conflict = otherwise
|
|
||||||
// nil; there is no incremental path for left to get to right's most recent version
|
|
||||||
IncrementalPath []FilesystemVersion
|
|
||||||
|
|
||||||
// Conflict = Incremental | AllRight: nil
|
|
||||||
// Conflict = NoCommonAncestor: left as passed as input
|
|
||||||
// Conflict = Diverged: contains path from left most recent common ancestor (mrca) to most
|
|
||||||
// recent version on left
|
|
||||||
MRCAPathLeft []FilesystemVersion
|
|
||||||
// Conflict = Incremental | AllRight: nil
|
|
||||||
// Conflict = NoCommonAncestor: right as passed as input
|
|
||||||
// Conflict = Diverged: contains path from right most recent common ancestor (mrca)
|
|
||||||
// to most recent version on right
|
|
||||||
MRCAPathRight []FilesystemVersion
|
|
||||||
}
|
|
||||||
|
|
||||||
func (f FilesystemDiff) String() (str string) {
|
|
||||||
var b bytes.Buffer
|
|
||||||
|
|
||||||
fmt.Fprintf(&b, "%s, ", f.Conflict)
|
|
||||||
|
|
||||||
switch f.Conflict {
|
|
||||||
case ConflictIncremental:
|
|
||||||
fmt.Fprintf(&b, "incremental path length %v, common ancestor at %s", len(f.IncrementalPath)-1, f.IncrementalPath[0])
|
|
||||||
case ConflictAllRight:
|
|
||||||
fmt.Fprintf(&b, "%v versions, most recent is %s", len(f.MRCAPathRight)-1, f.MRCAPathRight[len(f.MRCAPathRight)-1])
|
|
||||||
case ConflictDiverged:
|
|
||||||
fmt.Fprintf(&b, "diverged at %s", f.MRCAPathRight[0]) // right always has at least one snap...?
|
|
||||||
case ConflictNoCommonAncestor:
|
|
||||||
fmt.Fprintf(&b, "no diff to show")
|
|
||||||
default:
|
|
||||||
fmt.Fprintf(&b, "unknown conflict type, likely a bug")
|
|
||||||
}
|
|
||||||
|
|
||||||
return b.String()
|
|
||||||
}
|
|
||||||
|
|
||||||
// we must assume left and right are ordered ascendingly by ZFS_PROP_CREATETXG and that
|
|
||||||
// names are unique (bas ZFS_PROP_GUID replacement)
|
|
||||||
func MakeFilesystemDiff(left, right []FilesystemVersion) (diff FilesystemDiff) {
|
|
||||||
|
|
||||||
if right == nil {
|
|
||||||
panic("right must not be nil")
|
|
||||||
}
|
|
||||||
if left == nil {
|
|
||||||
diff = FilesystemDiff{
|
|
||||||
IncrementalPath: nil,
|
|
||||||
Conflict: ConflictAllRight,
|
|
||||||
MRCAPathLeft: left,
|
|
||||||
MRCAPathRight: right,
|
|
||||||
}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Assert both left and right are sorted by createtxg
|
|
||||||
{
|
|
||||||
var leftSorted, rightSorted fsbyCreateTXG
|
|
||||||
leftSorted = left
|
|
||||||
rightSorted = right
|
|
||||||
if !sort.IsSorted(leftSorted) {
|
|
||||||
panic("cannot make filesystem diff: unsorted left")
|
|
||||||
}
|
|
||||||
if !sort.IsSorted(rightSorted) {
|
|
||||||
panic("cannot make filesystem diff: unsorted right")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Find most recent common ancestor by name, preferring snapshots over bookmarks
|
|
||||||
mrcaLeft := len(left) - 1
|
|
||||||
var mrcaRight int
|
|
||||||
outer:
|
|
||||||
for ; mrcaLeft >= 0; mrcaLeft-- {
|
|
||||||
for i := len(right) - 1; i >= 0; i-- {
|
|
||||||
if left[mrcaLeft].Guid == right[i].Guid {
|
|
||||||
mrcaRight = i
|
|
||||||
if i-1 >= 0 && right[i-1].Guid == right[i].Guid && right[i-1].Type == Snapshot {
|
|
||||||
// prefer snapshots over bookmarks
|
|
||||||
mrcaRight = i - 1
|
|
||||||
}
|
|
||||||
break outer
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// no common ancestor?
|
|
||||||
if mrcaLeft == -1 {
|
|
||||||
diff = FilesystemDiff{
|
|
||||||
IncrementalPath: nil,
|
|
||||||
Conflict: ConflictNoCommonAncestor,
|
|
||||||
MRCAPathLeft: left,
|
|
||||||
MRCAPathRight: right,
|
|
||||||
}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// diverged?
|
|
||||||
if mrcaLeft != len(left)-1 {
|
|
||||||
diff = FilesystemDiff{
|
|
||||||
IncrementalPath: nil,
|
|
||||||
Conflict: ConflictDiverged,
|
|
||||||
MRCAPathLeft: left[mrcaLeft:],
|
|
||||||
MRCAPathRight: right[mrcaRight:],
|
|
||||||
}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if mrcaLeft != len(left)-1 {
|
|
||||||
panic("invariant violated: mrca on left must be the last item in the left list")
|
|
||||||
}
|
|
||||||
|
|
||||||
// incPath must not contain bookmarks except initial one,
|
|
||||||
// and only if that initial bookmark's snapshot is gone
|
|
||||||
incPath := make([]FilesystemVersion, 0, len(right))
|
|
||||||
incPath = append(incPath, right[mrcaRight])
|
|
||||||
// right[mrcaRight] may be a bookmark if there's no equally named snapshot
|
|
||||||
for i := mrcaRight + 1; i < len(right); i++ {
|
|
||||||
if right[i].Type != Bookmark {
|
|
||||||
incPath = append(incPath, right[i])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
diff = FilesystemDiff{
|
|
||||||
IncrementalPath: incPath,
|
|
||||||
}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
const ZREPL_PLACEHOLDER_PROPERTY_NAME string = "zrepl:placeholder"
|
|
||||||
|
|
||||||
type FilesystemState struct {
|
|
||||||
Placeholder bool
|
|
||||||
// TODO extend with resume token when that feature is finally added
|
|
||||||
}
|
|
||||||
|
|
||||||
// A somewhat efficient way to determine if a filesystem exists on this host.
|
|
||||||
// Particularly useful if exists is called more than once (will only fork exec once and cache the result)
|
|
||||||
func ZFSListFilesystemState() (localState map[string]FilesystemState, err error) {
|
|
||||||
|
|
||||||
var actual [][]string
|
|
||||||
if actual, err = ZFSList([]string{"name", ZREPL_PLACEHOLDER_PROPERTY_NAME}, "-t", "filesystem,volume"); err != nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
localState = make(map[string]FilesystemState, len(actual))
|
|
||||||
for _, e := range actual {
|
|
||||||
dp, err := NewDatasetPath(e[0])
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("ZFS does not return parseable dataset path: %s", e[0])
|
|
||||||
}
|
|
||||||
placeholder, _ := IsPlaceholder(dp, e[1])
|
|
||||||
localState[e[0]] = FilesystemState{
|
|
||||||
placeholder,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
// Computes the value for the ZREPL_PLACEHOLDER_PROPERTY_NAME ZFS user property
|
|
||||||
// to mark the given DatasetPath p as a placeholder
|
|
||||||
//
|
|
||||||
// We cannot simply use booleans here since user properties are always
|
|
||||||
// inherited.
|
|
||||||
//
|
|
||||||
// We hash the DatasetPath and use it to check for a given path if it is the
|
|
||||||
// one originally marked as placeholder.
|
|
||||||
//
|
|
||||||
// However, this prohibits moving datasets around via `zfs rename`. The
|
|
||||||
// placeholder attribute must be re-computed for the dataset path after the
|
|
||||||
// move.
|
|
||||||
//
|
|
||||||
// TODO better solution available?
|
|
||||||
func PlaceholderPropertyValue(p *DatasetPath) string {
|
|
||||||
ps := []byte(p.ToString())
|
|
||||||
sum := sha512.Sum512_256(ps)
|
|
||||||
return hex.EncodeToString(sum[:])
|
|
||||||
}
|
|
||||||
|
|
||||||
func IsPlaceholder(p *DatasetPath, placeholderPropertyValue string) (isPlaceholder bool, err error) {
|
|
||||||
expected := PlaceholderPropertyValue(p)
|
|
||||||
isPlaceholder = expected == placeholderPropertyValue
|
|
||||||
if !isPlaceholder {
|
|
||||||
err = fmt.Errorf("expected %s, has %s", expected, placeholderPropertyValue)
|
|
||||||
}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// for nonexistent FS, isPlaceholder == false && err == nil
|
|
||||||
func ZFSIsPlaceholderFilesystem(p *DatasetPath) (isPlaceholder bool, err error) {
|
|
||||||
props, err := zfsGet(p.ToString(), []string{ZREPL_PLACEHOLDER_PROPERTY_NAME}, sourceAny)
|
|
||||||
if err == io.ErrUnexpectedEOF {
|
|
||||||
// interpret this as an early exit of the zfs binary due to the fs not existing
|
|
||||||
return false, nil
|
|
||||||
} else if err != nil {
|
|
||||||
return false, err
|
|
||||||
}
|
|
||||||
isPlaceholder, _ = IsPlaceholder(p, props.Get(ZREPL_PLACEHOLDER_PROPERTY_NAME))
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
func ZFSCreatePlaceholderFilesystem(p *DatasetPath) (err error) {
|
|
||||||
v := PlaceholderPropertyValue(p)
|
|
||||||
cmd := exec.Command(ZFS_BINARY, "create",
|
|
||||||
"-o", fmt.Sprintf("%s=%s", ZREPL_PLACEHOLDER_PROPERTY_NAME, v),
|
|
||||||
"-o", "mountpoint=none",
|
|
||||||
p.ToString())
|
|
||||||
|
|
||||||
stderr := bytes.NewBuffer(make([]byte, 0, 1024))
|
|
||||||
cmd.Stderr = stderr
|
|
||||||
|
|
||||||
if err = cmd.Start(); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
if err = cmd.Wait(); err != nil {
|
|
||||||
err = &ZFSError{
|
|
||||||
Stderr: stderr.Bytes(),
|
|
||||||
WaitErr: err,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return
|
|
||||||
}
|
|
112
zfs/diff_test.go
112
zfs/diff_test.go
@ -1,112 +0,0 @@
|
|||||||
package zfs
|
|
||||||
|
|
||||||
import (
|
|
||||||
"github.com/stretchr/testify/assert"
|
|
||||||
"strconv"
|
|
||||||
"strings"
|
|
||||||
"testing"
|
|
||||||
"time"
|
|
||||||
)
|
|
||||||
|
|
||||||
func fsvlist(fsv ...string) (r []FilesystemVersion) {
|
|
||||||
|
|
||||||
r = make([]FilesystemVersion, len(fsv))
|
|
||||||
for i, f := range fsv {
|
|
||||||
|
|
||||||
// parse the id from fsvlist. it is used to derivce Guid,CreateTXG and Creation attrs
|
|
||||||
split := strings.Split(f, ",")
|
|
||||||
if len(split) != 2 {
|
|
||||||
panic("invalid fsv spec")
|
|
||||||
}
|
|
||||||
id, err := strconv.Atoi(split[1])
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if strings.HasPrefix(f, "#") {
|
|
||||||
r[i] = FilesystemVersion{
|
|
||||||
Name: strings.TrimPrefix(f, "#"),
|
|
||||||
Type: Bookmark,
|
|
||||||
Guid: uint64(id),
|
|
||||||
CreateTXG: uint64(id),
|
|
||||||
Creation: time.Unix(0, 0).Add(time.Duration(id) * time.Second),
|
|
||||||
}
|
|
||||||
} else if strings.HasPrefix(f, "@") {
|
|
||||||
r[i] = FilesystemVersion{
|
|
||||||
Name: strings.TrimPrefix(f, "@"),
|
|
||||||
Type: Snapshot,
|
|
||||||
Guid: uint64(id),
|
|
||||||
CreateTXG: uint64(id),
|
|
||||||
Creation: time.Unix(0, 0).Add(time.Duration(id) * time.Second),
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
panic("invalid character")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
func doTest(left, right []FilesystemVersion, validate func(d FilesystemDiff)) {
|
|
||||||
var d FilesystemDiff
|
|
||||||
d = MakeFilesystemDiff(left, right)
|
|
||||||
validate(d)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestMakeFilesystemDiff_IncrementalSnapshots(t *testing.T) {
|
|
||||||
|
|
||||||
l := fsvlist
|
|
||||||
|
|
||||||
// basic functionality
|
|
||||||
doTest(l("@a,1", "@b,2"), l("@a,1", "@b,2", "@c,3", "@d,4"), func(d FilesystemDiff) {
|
|
||||||
assert.Equal(t, l("@b,2", "@c,3", "@d,4"), d.IncrementalPath)
|
|
||||||
})
|
|
||||||
|
|
||||||
// no common ancestor
|
|
||||||
doTest(l(), l("@a,1"), func(d FilesystemDiff) {
|
|
||||||
assert.Nil(t, d.IncrementalPath)
|
|
||||||
assert.EqualValues(t, d.Conflict, ConflictNoCommonAncestor)
|
|
||||||
assert.Equal(t, l("@a,1"), d.MRCAPathRight)
|
|
||||||
})
|
|
||||||
doTest(l("@a,1", "@b,2"), l("@c,3", "@d,4"), func(d FilesystemDiff) {
|
|
||||||
assert.Nil(t, d.IncrementalPath)
|
|
||||||
assert.EqualValues(t, d.Conflict, ConflictNoCommonAncestor)
|
|
||||||
assert.Equal(t, l("@c,3", "@d,4"), d.MRCAPathRight)
|
|
||||||
})
|
|
||||||
|
|
||||||
// divergence is detected
|
|
||||||
doTest(l("@a,1", "@b1,2"), l("@a,1", "@b2,3"), func(d FilesystemDiff) {
|
|
||||||
assert.Nil(t, d.IncrementalPath)
|
|
||||||
assert.EqualValues(t, d.Conflict, ConflictDiverged)
|
|
||||||
assert.Equal(t, l("@a,1", "@b1,2"), d.MRCAPathLeft)
|
|
||||||
assert.Equal(t, l("@a,1", "@b2,3"), d.MRCAPathRight)
|
|
||||||
})
|
|
||||||
|
|
||||||
// gaps before most recent common ancestor do not matter
|
|
||||||
doTest(l("@a,1", "@b,2", "@c,3"), l("@a,1", "@c,3", "@d,4"), func(d FilesystemDiff) {
|
|
||||||
assert.Equal(t, l("@c,3", "@d,4"), d.IncrementalPath)
|
|
||||||
})
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestMakeFilesystemDiff_BookmarksSupport(t *testing.T) {
|
|
||||||
l := fsvlist
|
|
||||||
|
|
||||||
// bookmarks are used
|
|
||||||
doTest(l("@a,1"), l("#a,1", "@b,2"), func(d FilesystemDiff) {
|
|
||||||
assert.Equal(t, l("#a,1", "@b,2"), d.IncrementalPath)
|
|
||||||
})
|
|
||||||
|
|
||||||
// boomarks are stripped from IncrementalPath (cannot send incrementally)
|
|
||||||
doTest(l("@a,1"), l("#a,1", "#b,2", "@c,3"), func(d FilesystemDiff) {
|
|
||||||
assert.Equal(t, l("#a,1", "@c,3"), d.IncrementalPath)
|
|
||||||
})
|
|
||||||
|
|
||||||
// test that snapshots are preferred over bookmarks in IncrementalPath
|
|
||||||
doTest(l("@a,1"), l("#a,1", "@a,1", "@b,2"), func(d FilesystemDiff) {
|
|
||||||
assert.Equal(t, l("@a,1", "@b,2"), d.IncrementalPath)
|
|
||||||
})
|
|
||||||
doTest(l("@a,1"), l("@a,1", "#a,1", "@b,2"), func(d FilesystemDiff) {
|
|
||||||
assert.Equal(t, l("@a,1", "@b,2"), d.IncrementalPath)
|
|
||||||
})
|
|
||||||
|
|
||||||
}
|
|
113
zfs/placeholder.go
Normal file
113
zfs/placeholder.go
Normal file
@ -0,0 +1,113 @@
|
|||||||
|
package zfs
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"crypto/sha512"
|
||||||
|
"encoding/hex"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"os/exec"
|
||||||
|
)
|
||||||
|
|
||||||
|
const ZREPL_PLACEHOLDER_PROPERTY_NAME string = "zrepl:placeholder"
|
||||||
|
|
||||||
|
type FilesystemState struct {
|
||||||
|
Placeholder bool
|
||||||
|
// TODO extend with resume token when that feature is finally added
|
||||||
|
}
|
||||||
|
|
||||||
|
// A somewhat efficient way to determine if a filesystem exists on this host.
|
||||||
|
// Particularly useful if exists is called more than once (will only fork exec once and cache the result)
|
||||||
|
func ZFSListFilesystemState() (localState map[string]FilesystemState, err error) {
|
||||||
|
|
||||||
|
var actual [][]string
|
||||||
|
if actual, err = ZFSList([]string{"name", ZREPL_PLACEHOLDER_PROPERTY_NAME}, "-t", "filesystem,volume"); err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
localState = make(map[string]FilesystemState, len(actual))
|
||||||
|
for _, e := range actual {
|
||||||
|
dp, err := NewDatasetPath(e[0])
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("ZFS does not return parseable dataset path: %s", e[0])
|
||||||
|
}
|
||||||
|
placeholder, _ := IsPlaceholder(dp, e[1])
|
||||||
|
localState[e[0]] = FilesystemState{
|
||||||
|
placeholder,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// Computes the value for the ZREPL_PLACEHOLDER_PROPERTY_NAME ZFS user property
|
||||||
|
// to mark the given DatasetPath p as a placeholder
|
||||||
|
//
|
||||||
|
// We cannot simply use booleans here since user properties are always
|
||||||
|
// inherited.
|
||||||
|
//
|
||||||
|
// We hash the DatasetPath and use it to check for a given path if it is the
|
||||||
|
// one originally marked as placeholder.
|
||||||
|
//
|
||||||
|
// However, this prohibits moving datasets around via `zfs rename`. The
|
||||||
|
// placeholder attribute must be re-computed for the dataset path after the
|
||||||
|
// move.
|
||||||
|
//
|
||||||
|
// TODO better solution available?
|
||||||
|
func PlaceholderPropertyValue(p *DatasetPath) string {
|
||||||
|
ps := []byte(p.ToString())
|
||||||
|
sum := sha512.Sum512_256(ps)
|
||||||
|
return hex.EncodeToString(sum[:])
|
||||||
|
}
|
||||||
|
|
||||||
|
func IsPlaceholder(p *DatasetPath, placeholderPropertyValue string) (isPlaceholder bool, err error) {
|
||||||
|
expected := PlaceholderPropertyValue(p)
|
||||||
|
isPlaceholder = expected == placeholderPropertyValue
|
||||||
|
if !isPlaceholder {
|
||||||
|
err = fmt.Errorf("expected %s, has %s", expected, placeholderPropertyValue)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// for nonexistent FS, isPlaceholder == false && err == nil
|
||||||
|
func ZFSIsPlaceholderFilesystem(p *DatasetPath) (isPlaceholder bool, err error) {
|
||||||
|
props, err := zfsGet(p.ToString(), []string{ZREPL_PLACEHOLDER_PROPERTY_NAME}, sourceAny)
|
||||||
|
if err == io.ErrUnexpectedEOF {
|
||||||
|
// interpret this as an early exit of the zfs binary due to the fs not existing
|
||||||
|
return false, nil
|
||||||
|
} else if err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
isPlaceholder, _ = IsPlaceholder(p, props.Get(ZREPL_PLACEHOLDER_PROPERTY_NAME))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func ZFSCreatePlaceholderFilesystem(p *DatasetPath) (err error) {
|
||||||
|
v := PlaceholderPropertyValue(p)
|
||||||
|
cmd := exec.Command(ZFS_BINARY, "create",
|
||||||
|
"-o", fmt.Sprintf("%s=%s", ZREPL_PLACEHOLDER_PROPERTY_NAME, v),
|
||||||
|
"-o", "mountpoint=none",
|
||||||
|
p.ToString())
|
||||||
|
|
||||||
|
stderr := bytes.NewBuffer(make([]byte, 0, 1024))
|
||||||
|
cmd.Stderr = stderr
|
||||||
|
|
||||||
|
if err = cmd.Start(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err = cmd.Wait(); err != nil {
|
||||||
|
err = &ZFSError{
|
||||||
|
Stderr: stderr.Bytes(),
|
||||||
|
WaitErr: err,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func ZFSSetNoPlaceholder(p *DatasetPath) error {
|
||||||
|
props := NewZFSProperties()
|
||||||
|
props.Set(ZREPL_PLACEHOLDER_PROPERTY_NAME, "off")
|
||||||
|
return zfsSet(p.ToString(), props)
|
||||||
|
}
|
82
zfs/zfs.go
82
zfs/zfs.go
@ -9,6 +9,7 @@ import (
|
|||||||
"io"
|
"io"
|
||||||
"os"
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
@ -691,17 +692,62 @@ type StreamCopier interface {
|
|||||||
Close() error
|
Close() error
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type RecvOptions struct {
|
||||||
|
// Rollback to the oldest snapshot, destroy it, then perform `recv -F`.
|
||||||
|
// Note that this doesn't change property values, i.e. an existing local property value will be kept.
|
||||||
|
RollbackAndForceRecv bool
|
||||||
|
}
|
||||||
|
|
||||||
func ZFSRecv(ctx context.Context, fs string, streamCopier StreamCopier, additionalArgs ...string) (err error) {
|
func ZFSRecv(ctx context.Context, fs string, streamCopier StreamCopier, opts RecvOptions) (err error) {
|
||||||
|
|
||||||
if err := validateZFSFilesystem(fs); err != nil {
|
if err := validateZFSFilesystem(fs); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
fsdp, err := NewDatasetPath(fs)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if opts.RollbackAndForceRecv {
|
||||||
|
// destroy all snapshots before `recv -F` because `recv -F`
|
||||||
|
// does not perform a rollback unless `send -R` was used (which we assume hasn't been the case)
|
||||||
|
var snaps []FilesystemVersion
|
||||||
|
{
|
||||||
|
vs, err := ZFSListFilesystemVersions(fsdp, nil)
|
||||||
|
if err != nil {
|
||||||
|
err = fmt.Errorf("cannot list versions to rollback is required: %s", err)
|
||||||
|
}
|
||||||
|
for _, v := range vs {
|
||||||
|
if v.Type == Snapshot {
|
||||||
|
snaps = append(snaps, v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sort.Slice(snaps, func(i, j int) bool {
|
||||||
|
return snaps[i].CreateTXG < snaps[j].CreateTXG
|
||||||
|
})
|
||||||
|
}
|
||||||
|
// bookmarks are rolled back automatically
|
||||||
|
if len(snaps) > 0 {
|
||||||
|
// use rollback to efficiently destroy all but the earliest snapshot
|
||||||
|
// then destroy that earliest snapshot
|
||||||
|
// afterwards, `recv -F` will work
|
||||||
|
rollbackTarget := snaps[0]
|
||||||
|
rollbackTargetAbs := rollbackTarget.ToAbsPath(fsdp)
|
||||||
|
debug("recv: rollback to %q", rollbackTargetAbs)
|
||||||
|
if err := ZFSRollback(fsdp, rollbackTarget, "-r"); err != nil {
|
||||||
|
return fmt.Errorf("cannot rollback %s to %s for forced receive: %s", fsdp.ToString(), rollbackTarget, err)
|
||||||
|
}
|
||||||
|
debug("recv: destroy %q", rollbackTargetAbs)
|
||||||
|
if err := ZFSDestroy(rollbackTargetAbs); err != nil {
|
||||||
|
return fmt.Errorf("cannot destroy %s for forced receive: %s", rollbackTargetAbs, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
args := make([]string, 0)
|
args := make([]string, 0)
|
||||||
args = append(args, "recv")
|
args = append(args, "recv")
|
||||||
if len(args) > 0 {
|
if opts.RollbackAndForceRecv {
|
||||||
args = append(args, additionalArgs...)
|
args = append(args, "-F")
|
||||||
}
|
}
|
||||||
args = append(args, fs)
|
args = append(args, fs)
|
||||||
|
|
||||||
@ -1038,3 +1084,33 @@ func ZFSBookmark(fs *DatasetPath, snapshot, bookmark string) (err error) {
|
|||||||
return
|
return
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func ZFSRollback(fs *DatasetPath, snapshot FilesystemVersion, rollbackArgs ...string) (err error) {
|
||||||
|
|
||||||
|
snapabs := snapshot.ToAbsPath(fs)
|
||||||
|
if snapshot.Type != Snapshot {
|
||||||
|
return fmt.Errorf("can only rollback to snapshots, got %s", snapabs)
|
||||||
|
}
|
||||||
|
|
||||||
|
args := []string{"rollback"}
|
||||||
|
args = append(args, rollbackArgs...)
|
||||||
|
args = append(args, snapabs)
|
||||||
|
|
||||||
|
cmd := exec.Command(ZFS_BINARY, args...)
|
||||||
|
|
||||||
|
stderr := bytes.NewBuffer(make([]byte, 0, 1024))
|
||||||
|
cmd.Stderr = stderr
|
||||||
|
|
||||||
|
if err = cmd.Start(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err = cmd.Wait(); err != nil {
|
||||||
|
err = &ZFSError{
|
||||||
|
Stderr: stderr.Bytes(),
|
||||||
|
WaitErr: err,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user