mirror of
https://github.com/zrepl/zrepl.git
synced 2024-12-25 16:38:49 +01:00
Merge branch 'replication_rewrite' (in fact it's a 90% rewrite)
This commit is contained in:
commit
074f989547
150
Gopkg.lock
generated
150
Gopkg.lock
generated
@ -3,163 +3,293 @@
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
digest = "1:c0bec5f9b98d0bc872ff5e834fac186b807b656683bd29cb82fb207a1513fabb"
|
||||
name = "github.com/beorn7/perks"
|
||||
packages = ["quantile"]
|
||||
pruneopts = ""
|
||||
revision = "3a771d992973f24aa725d07868b467d1ddfceafb"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:56c130d885a4aacae1dd9c7b71cfe39912c7ebc1ff7d2b46083c8812996dc43b"
|
||||
name = "github.com/davecgh/go-spew"
|
||||
packages = ["spew"]
|
||||
pruneopts = ""
|
||||
revision = "346938d642f2ec3594ed81d874461961cd0faa76"
|
||||
version = "v1.1.0"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "github.com/dustin/go-humanize"
|
||||
digest = "1:e988ed0ca0d81f4d28772760c02ee95084961311291bdfefc1b04617c178b722"
|
||||
name = "github.com/fatih/color"
|
||||
packages = ["."]
|
||||
revision = "bb3d318650d48840a39aa21a027c6630e198e626"
|
||||
pruneopts = ""
|
||||
revision = "5b77d2a35fb0ede96d138fc9a99f5c9b6aef11b4"
|
||||
version = "v1.7.0"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
digest = "1:5d0a2385edf4ba44f3b7b76bc0436ceb8f62bf55aa5d540a9eb9ec6c58d86809"
|
||||
name = "github.com/ftrvxmtrx/fd"
|
||||
packages = ["."]
|
||||
pruneopts = ""
|
||||
revision = "c6d800382fff6dc1412f34269f71b7f83bd059ad"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:6a4a01d58b227c4b6b11111b9f172ec5c17682b82724e58e6daf3f19f4faccd8"
|
||||
name = "github.com/go-logfmt/logfmt"
|
||||
packages = ["."]
|
||||
pruneopts = ""
|
||||
revision = "390ab7935ee28ec6b286364bba9b4dd6410cb3d5"
|
||||
version = "v0.3.0"
|
||||
|
||||
[[projects]]
|
||||
branch = "v2"
|
||||
digest = "1:81314a486195626940617e43740b4fa073f265b0715c9f54ce2027fee1cb5f61"
|
||||
name = "github.com/go-yaml/yaml"
|
||||
packages = ["."]
|
||||
pruneopts = ""
|
||||
revision = "eb3733d160e74a9c7e442f435eb3bea458e1d19f"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:3dd078fda7500c341bc26cfbc6c6a34614f295a2457149fc1045cab767cbcf18"
|
||||
name = "github.com/golang/protobuf"
|
||||
packages = ["proto"]
|
||||
revision = "925541529c1fa6821df4e44ce2723319eb2be768"
|
||||
version = "v1.0.0"
|
||||
pruneopts = ""
|
||||
revision = "aa810b61a9c79d51363740d207bb46cf8e620ed5"
|
||||
version = "v1.2.0"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
digest = "1:cb09475f771b9167fb9333629f5d6a7161572602ea040f1094602b0dc8709878"
|
||||
name = "github.com/jinzhu/copier"
|
||||
packages = ["."]
|
||||
pruneopts = ""
|
||||
revision = "db4671f3a9b8df855e993f7c94ec5ef1ffb0a23b"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
digest = "1:1ed9eeebdf24aadfbca57eb50e6455bd1d2474525e0f0d4454de8c8e9bc7ee9a"
|
||||
name = "github.com/kr/logfmt"
|
||||
packages = ["."]
|
||||
pruneopts = ""
|
||||
revision = "b84e30acd515aadc4b783ad4ff83aff3299bdfe0"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
digest = "1:591a2778aa6e896980757ea87e659b3aa13d8c0e790310614028463a31c0998b"
|
||||
name = "github.com/kr/pretty"
|
||||
packages = ["."]
|
||||
pruneopts = ""
|
||||
revision = "cfb55aafdaf3ec08f0db22699ab822c50091b1c4"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
digest = "1:109e0a9b9e74c5c8adf94a2dc4dd4a9ca2a183d4d87ffecd4e62db69a5ede55a"
|
||||
name = "github.com/kr/text"
|
||||
packages = ["."]
|
||||
pruneopts = ""
|
||||
revision = "7cafcd837844e784b526369c9bce262804aebc60"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:9ea83adf8e96d6304f394d40436f2eb44c1dc3250d223b74088cc253a6cd0a1c"
|
||||
name = "github.com/mattn/go-colorable"
|
||||
packages = ["."]
|
||||
pruneopts = ""
|
||||
revision = "167de6bfdfba052fa6b2d3664c8f5272e23c9072"
|
||||
version = "v0.0.9"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:78229b46ddb7434f881390029bd1af7661294af31f6802e0e1bedaad4ab0af3c"
|
||||
name = "github.com/mattn/go-isatty"
|
||||
packages = ["."]
|
||||
pruneopts = ""
|
||||
revision = "0360b2af4f38e8d38c7fce2a9f4e702702d73a39"
|
||||
version = "v0.0.3"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:82b912465c1da0668582a7d1117339c278e786c2536b3c3623029a0c7141c2d0"
|
||||
name = "github.com/mattn/go-runewidth"
|
||||
packages = ["."]
|
||||
pruneopts = ""
|
||||
revision = "ce7b0b5c7b45a81508558cd1dba6bb1e4ddb51bb"
|
||||
version = "v0.0.3"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:4c23ced97a470b17d9ffd788310502a077b9c1f60221a85563e49696276b4147"
|
||||
name = "github.com/matttproud/golang_protobuf_extensions"
|
||||
packages = ["pbutil"]
|
||||
pruneopts = ""
|
||||
revision = "3247c84500bff8d9fb6d579d800f20b3e091582c"
|
||||
version = "v1.0.0"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
digest = "1:c9ede10a9ded782d25d1f0be87c680e11409c23554828f19a19d691a95e76130"
|
||||
name = "github.com/mitchellh/mapstructure"
|
||||
packages = ["."]
|
||||
pruneopts = ""
|
||||
revision = "d0303fe809921458f417bcf828397a65db30a7e4"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
digest = "1:20a553eff588d7abe1f05addf5f57cdbaef1d0f992427a0099b7eb51274b79cf"
|
||||
name = "github.com/nsf/termbox-go"
|
||||
packages = ["."]
|
||||
pruneopts = ""
|
||||
revision = "b66b20ab708e289ff1eb3e218478302e6aec28ce"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:7365acd48986e205ccb8652cc746f09c8b7876030d53710ea6ef7d0bd0dcd7ca"
|
||||
name = "github.com/pkg/errors"
|
||||
packages = ["."]
|
||||
pruneopts = ""
|
||||
revision = "645ef00459ed84a119197bfb8d8205042c6df63d"
|
||||
version = "v0.8.0"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:256484dbbcd271f9ecebc6795b2df8cad4c458dd0f5fd82a8c2fa0c29f233411"
|
||||
name = "github.com/pmezard/go-difflib"
|
||||
packages = ["difflib"]
|
||||
pruneopts = ""
|
||||
revision = "792786c7400a136282c1664665ae0a8db921c6c2"
|
||||
version = "v1.0.0"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
digest = "1:1392748e290ca66ac8447ef24961f8ae9e1d846a53af0f58a5a0256982ce0577"
|
||||
name = "github.com/problame/go-netssh"
|
||||
packages = ["."]
|
||||
pruneopts = ""
|
||||
revision = "c56ad38d2c91397ad3c8dd9443d7448e328a9e9e"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
digest = "1:8c63c44f018bd52b03ebad65c9df26aabbc6793138e421df1c8c84c285a45bc6"
|
||||
name = "github.com/problame/go-rwccmd"
|
||||
packages = ["."]
|
||||
pruneopts = ""
|
||||
revision = "391d2c78c8404a9683d79f75dd24ab53040f89f7"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:c2ba1c9dc003c15856e4529dac028cacba08ee8924300f058b3467cde9acf7a9"
|
||||
name = "github.com/problame/go-streamrpc"
|
||||
packages = [
|
||||
".",
|
||||
"internal/pdu",
|
||||
]
|
||||
pruneopts = ""
|
||||
revision = "de6f6a4041c77f700f02d8fe749e54efa50811f7"
|
||||
version = "v0.4"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
digest = "1:ebf8ffdde9bdbf9c83e22121875c68c01d821776523546554b2a3ff6f72773ab"
|
||||
name = "github.com/prometheus/client_golang"
|
||||
packages = ["prometheus","prometheus/promhttp"]
|
||||
packages = [
|
||||
"prometheus",
|
||||
"prometheus/promhttp",
|
||||
]
|
||||
pruneopts = ""
|
||||
revision = "e11c6ff8170beca9d5fd8b938e71165eeec53ac6"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
digest = "1:60aca47f4eeeb972f1b9da7e7db51dee15ff6c59f7b401c1588b8e6771ba15ef"
|
||||
name = "github.com/prometheus/client_model"
|
||||
packages = ["go"]
|
||||
pruneopts = ""
|
||||
revision = "99fa1f4be8e564e8a6b613da7fa6f46c9edafc6c"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
digest = "1:af21ee3e0a8212f17bb317cd7237f9920bcb2641a291ac111f30f63b3cab817f"
|
||||
name = "github.com/prometheus/common"
|
||||
packages = ["expfmt","internal/bitbucket.org/ww/goautoneg","model"]
|
||||
packages = [
|
||||
"expfmt",
|
||||
"internal/bitbucket.org/ww/goautoneg",
|
||||
"model",
|
||||
]
|
||||
pruneopts = ""
|
||||
revision = "d0f7cd64bda49e08b22ae8a730aa57aa0db125d6"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
digest = "1:61df0898746840afc7be5dc2c3eeec83022fab70df11ecee5b16c85e912cf5ed"
|
||||
name = "github.com/prometheus/procfs"
|
||||
packages = [".","internal/util","nfs","xfs"]
|
||||
packages = [
|
||||
".",
|
||||
"internal/util",
|
||||
"nfs",
|
||||
"xfs",
|
||||
]
|
||||
pruneopts = ""
|
||||
revision = "8b1c2da0d56deffdbb9e48d4414b4e674bd8083e"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
digest = "1:146327ce93be37e68bd3ff8541090d96da8cb3adc9e35d57570e9170a29f6bf6"
|
||||
name = "github.com/spf13/cobra"
|
||||
packages = ["."]
|
||||
pruneopts = ""
|
||||
revision = "b78744579491c1ceeaaa3b40205e56b0591b93a3"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:261bc565833ef4f02121450d74eb88d5ae4bd74bfe5d0e862cddb8550ec35000"
|
||||
name = "github.com/spf13/pflag"
|
||||
packages = ["."]
|
||||
pruneopts = ""
|
||||
revision = "e57e3eeb33f795204c1ca35f56c44f83227c6e66"
|
||||
version = "v1.0.0"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:3926a4ec9a4ff1a072458451aa2d9b98acd059a45b38f7335d31e06c3d6a0159"
|
||||
name = "github.com/stretchr/testify"
|
||||
packages = ["assert"]
|
||||
packages = [
|
||||
"assert",
|
||||
"require",
|
||||
]
|
||||
pruneopts = ""
|
||||
revision = "69483b4bd14f5845b5a1e55bca19e954e827f1d0"
|
||||
version = "v1.1.4"
|
||||
|
||||
[[projects]]
|
||||
branch = "v2"
|
||||
digest = "1:9d92186f609a73744232323416ddafd56fae67cb552162cc190ab903e36900dd"
|
||||
name = "github.com/zrepl/yaml-config"
|
||||
packages = ["."]
|
||||
pruneopts = ""
|
||||
revision = "af27d27978ad95808723a62d87557d63c3ff0605"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
digest = "1:9c286cf11d0ca56368185bada5dd6d97b6be4648fc26c354fcba8df7293718f7"
|
||||
name = "golang.org/x/sys"
|
||||
packages = ["unix"]
|
||||
pruneopts = ""
|
||||
revision = "bf42f188b9bc6f2cf5b8ee5a912ef1aedd0eba4c"
|
||||
|
||||
[solve-meta]
|
||||
analyzer-name = "dep"
|
||||
analyzer-version = 1
|
||||
inputs-digest = "36731e77252dcc851fdfa2b0b0778b980597e3c1a47b5e2af3bd0bcb802662ec"
|
||||
input-imports = [
|
||||
"github.com/fatih/color",
|
||||
"github.com/go-logfmt/logfmt",
|
||||
"github.com/go-yaml/yaml",
|
||||
"github.com/golang/protobuf/proto",
|
||||
"github.com/jinzhu/copier",
|
||||
"github.com/kr/pretty",
|
||||
"github.com/mattn/go-isatty",
|
||||
"github.com/mitchellh/mapstructure",
|
||||
"github.com/nsf/termbox-go",
|
||||
"github.com/pkg/errors",
|
||||
"github.com/problame/go-netssh",
|
||||
"github.com/problame/go-rwccmd",
|
||||
"github.com/problame/go-streamrpc",
|
||||
"github.com/prometheus/client_golang/prometheus",
|
||||
"github.com/prometheus/client_golang/prometheus/promhttp",
|
||||
"github.com/spf13/cobra",
|
||||
"github.com/stretchr/testify/assert",
|
||||
"github.com/stretchr/testify/require",
|
||||
"github.com/zrepl/yaml-config",
|
||||
]
|
||||
solver-name = "gps-cdcl"
|
||||
solver-version = 1
|
||||
|
17
Gopkg.toml
17
Gopkg.toml
@ -38,7 +38,7 @@ ignored = [ "github.com/inconshreveable/mousetrap" ]
|
||||
|
||||
[[constraint]]
|
||||
branch = "v2"
|
||||
name = "github.com/go-yaml/yaml"
|
||||
name = "github.com/zrepl/yaml-config"
|
||||
|
||||
[[constraint]]
|
||||
name = "github.com/go-logfmt/logfmt"
|
||||
@ -55,3 +55,18 @@ ignored = [ "github.com/inconshreveable/mousetrap" ]
|
||||
[[constraint]]
|
||||
name = "github.com/prometheus/client_golang"
|
||||
branch = "master"
|
||||
|
||||
[[constraint]]
|
||||
name = "github.com/golang/protobuf"
|
||||
version = "1.2.0"
|
||||
|
||||
[[constraint]]
|
||||
name = "github.com/nsf/termbox-go"
|
||||
branch = "master"
|
||||
[[constraint]]
|
||||
name = "github.com/fatih/color"
|
||||
version = "1.7.0"
|
||||
|
||||
[[constraint]]
|
||||
name = "github.com/problame/go-streamrpc"
|
||||
version = "0.4.0"
|
||||
|
30
Makefile
30
Makefile
@ -2,7 +2,32 @@
|
||||
.DEFAULT_GOAL := build
|
||||
|
||||
ROOT := github.com/zrepl/zrepl
|
||||
SUBPKGS := cmd logger rpc util zfs
|
||||
SUBPKGS += client
|
||||
SUBPKGS += config
|
||||
SUBPKGS += daemon
|
||||
SUBPKGS += daemon/filters
|
||||
SUBPKGS += daemon/job
|
||||
SUBPKGS += daemon/logging
|
||||
SUBPKGS += daemon/nethelpers
|
||||
SUBPKGS += daemon/pruner
|
||||
SUBPKGS += daemon/snapper
|
||||
SUBPKGS += daemon/streamrpcconfig
|
||||
SUBPKGS += daemon/transport
|
||||
SUBPKGS += daemon/transport/connecter
|
||||
SUBPKGS += daemon/transport/serve
|
||||
SUBPKGS += endpoint
|
||||
SUBPKGS += logger
|
||||
SUBPKGS += pruning
|
||||
SUBPKGS += pruning/retentiongrid
|
||||
SUBPKGS += replication
|
||||
SUBPKGS += replication/fsrep
|
||||
SUBPKGS += replication/pdu
|
||||
SUBPKGS += replication/internal/queue
|
||||
SUBPKGS += replication/internal/diff
|
||||
SUBPKGS += tlsconf
|
||||
SUBPKGS += util
|
||||
SUBPKGS += version
|
||||
SUBPKGS += zfs
|
||||
|
||||
_TESTPKGS := $(ROOT) $(foreach p,$(SUBPKGS),$(ROOT)/$(p))
|
||||
|
||||
@ -14,7 +39,7 @@ ifndef ZREPL_VERSION
|
||||
$(error cannot infer variable ZREPL_VERSION using git and variable is not overriden by make invocation)
|
||||
endif
|
||||
endif
|
||||
GO_LDFLAGS := "-X github.com/zrepl/zrepl/cmd.zreplVersion=$(ZREPL_VERSION)"
|
||||
GO_LDFLAGS := "-X github.com/zrepl/zrepl/version.zreplVersion=$(ZREPL_VERSION)"
|
||||
|
||||
GO_BUILD := go build -ldflags $(GO_LDFLAGS)
|
||||
|
||||
@ -26,6 +51,7 @@ vendordeps:
|
||||
dep ensure -v -vendor-only
|
||||
|
||||
generate: #not part of the build, must do that manually
|
||||
protoc -I=replication/pdu --go_out=replication/pdu replication/pdu/pdu.proto
|
||||
@for pkg in $(_TESTPKGS); do\
|
||||
go generate "$$pkg" || exit 1; \
|
||||
done;
|
||||
|
@ -46,6 +46,7 @@ Make sure to develop an understanding how zrepl is typically used by studying th
|
||||
|
||||
```
|
||||
├── cmd
|
||||
│ ├── endpoint # implementations of endpoints for package replication
|
||||
│ ├── sampleconf # example configuration
|
||||
├── docs # sphinx-based documentation
|
||||
│ ├── **/*.rst # documentation in reStructuredText
|
||||
@ -55,6 +56,7 @@ Make sure to develop an understanding how zrepl is typically used by studying th
|
||||
│ ├── publish.sh # shell script for automated rendering & deploy to zrepl.github.io repo
|
||||
│ ├── public_git # checkout of zrepl.github.io managed by above shell script
|
||||
├── logger # logger package used by zrepl
|
||||
├── replication # replication functionality
|
||||
├── rpc # rpc protocol implementation
|
||||
├── util
|
||||
└── zfs # ZFS wrappers, filesystemm diffing
|
||||
|
@ -1,7 +1,12 @@
|
||||
FROM golang:latest
|
||||
|
||||
RUN apt-get update && apt-get install -y \
|
||||
python3-pip
|
||||
python3-pip \
|
||||
unzip
|
||||
|
||||
RUN wget https://github.com/protocolbuffers/protobuf/releases/download/v3.6.1/protoc-3.6.1-linux-x86_64.zip
|
||||
RUN echo "6003de742ea3fcf703cfec1cd4a3380fd143081a2eb0e559065563496af27807 protoc-3.6.1-linux-x86_64.zip" | sha256sum -c
|
||||
RUN unzip -d /usr protoc-3.6.1-linux-x86_64.zip
|
||||
|
||||
ADD lazy.sh /tmp/lazy.sh
|
||||
ADD docs/requirements.txt /tmp/requirements.txt
|
||||
|
116
cli/cli.go
Normal file
116
cli/cli.go
Normal file
@ -0,0 +1,116 @@
|
||||
package cli
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/spf13/cobra"
|
||||
"github.com/spf13/pflag"
|
||||
"github.com/zrepl/zrepl/config"
|
||||
"os"
|
||||
)
|
||||
|
||||
var rootArgs struct {
|
||||
configPath string
|
||||
}
|
||||
|
||||
var rootCmd = &cobra.Command{
|
||||
Use: "zrepl",
|
||||
Short: "One-stop ZFS replication solution",
|
||||
}
|
||||
|
||||
var bashcompCmd = &cobra.Command{
|
||||
Use: "bashcomp path/to/out/file",
|
||||
Short: "generate bash completions",
|
||||
Run: func(cmd *cobra.Command, args []string) {
|
||||
if len(args) != 1 {
|
||||
fmt.Fprintf(os.Stderr, "specify exactly one positional agument\n")
|
||||
cmd.Usage()
|
||||
os.Exit(1)
|
||||
}
|
||||
if err := rootCmd.GenBashCompletionFile(args[0]); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "error generating bash completion: %s", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
},
|
||||
Hidden: true,
|
||||
}
|
||||
|
||||
func init() {
|
||||
rootCmd.PersistentFlags().StringVar(&rootArgs.configPath, "config", "", "config file path")
|
||||
rootCmd.AddCommand(bashcompCmd)
|
||||
}
|
||||
|
||||
type Subcommand struct {
|
||||
Use string
|
||||
Short string
|
||||
NoRequireConfig bool
|
||||
Run func(subcommand *Subcommand, args []string) error
|
||||
SetupFlags func(f *pflag.FlagSet)
|
||||
SetupSubcommands func() []*Subcommand
|
||||
|
||||
config *config.Config
|
||||
configErr error
|
||||
}
|
||||
|
||||
func (s *Subcommand) ConfigParsingError() error {
|
||||
return s.configErr
|
||||
}
|
||||
|
||||
func (s *Subcommand) Config() *config.Config {
|
||||
if !s.NoRequireConfig && s.config == nil {
|
||||
panic("command that requires config is running and has no config set")
|
||||
}
|
||||
return s.config
|
||||
}
|
||||
|
||||
func (s *Subcommand) run(cmd *cobra.Command, args []string) {
|
||||
s.tryParseConfig()
|
||||
err := s.Run(s, args)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "%s\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Subcommand) tryParseConfig() {
|
||||
config, err := config.ParseConfig(rootArgs.configPath)
|
||||
s.configErr = err
|
||||
if err != nil {
|
||||
if s.NoRequireConfig {
|
||||
// doesn't matter
|
||||
return
|
||||
} else {
|
||||
fmt.Fprintf(os.Stderr, "could not parse config: %s\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
s.config = config
|
||||
}
|
||||
|
||||
func AddSubcommand(s *Subcommand) {
|
||||
addSubcommandToCobraCmd(rootCmd, s)
|
||||
}
|
||||
|
||||
func addSubcommandToCobraCmd(c *cobra.Command, s *Subcommand) {
|
||||
cmd := cobra.Command{
|
||||
Use: s.Use,
|
||||
Short: s.Short,
|
||||
}
|
||||
if s.SetupSubcommands == nil {
|
||||
cmd.Run = s.run
|
||||
} else {
|
||||
for _, sub := range s.SetupSubcommands() {
|
||||
addSubcommandToCobraCmd(&cmd, sub)
|
||||
}
|
||||
}
|
||||
if s.SetupFlags != nil {
|
||||
s.SetupFlags(cmd.Flags())
|
||||
}
|
||||
c.AddCommand(&cmd)
|
||||
}
|
||||
|
||||
|
||||
func Run() {
|
||||
if err := rootCmd.Execute(); err != nil {
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
36
client/configcheck.go
Normal file
36
client/configcheck.go
Normal file
@ -0,0 +1,36 @@
|
||||
package client
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"github.com/kr/pretty"
|
||||
"github.com/spf13/pflag"
|
||||
"github.com/zrepl/yaml-config"
|
||||
"github.com/zrepl/zrepl/cli"
|
||||
"os"
|
||||
)
|
||||
|
||||
var configcheckArgs struct {
|
||||
format string
|
||||
}
|
||||
|
||||
var ConfigcheckCmd = &cli.Subcommand{
|
||||
Use: "configcheck",
|
||||
Short: "check if config can be parsed without errors",
|
||||
SetupFlags: func(f *pflag.FlagSet) {
|
||||
f.StringVar(&configcheckArgs.format, "format", "", "dump parsed config object [pretty|yaml|json]")
|
||||
},
|
||||
Run: func(subcommand *cli.Subcommand, args []string) error {
|
||||
switch configcheckArgs.format {
|
||||
case "pretty":
|
||||
_, err := pretty.Println(subcommand.Config())
|
||||
return err
|
||||
case "json":
|
||||
return json.NewEncoder(os.Stdout).Encode(subcommand.Config())
|
||||
case "yaml":
|
||||
return yaml.NewEncoder(os.Stdout).Encode(subcommand.Config())
|
||||
default: // no output
|
||||
}
|
||||
return nil
|
||||
},
|
||||
}
|
||||
|
48
client/jsonclient.go
Normal file
48
client/jsonclient.go
Normal file
@ -0,0 +1,48 @@
|
||||
package client
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"github.com/pkg/errors"
|
||||
"io"
|
||||
"net"
|
||||
"net/http"
|
||||
)
|
||||
|
||||
func controlHttpClient(sockpath string) (client http.Client, err error) {
|
||||
return http.Client{
|
||||
Transport: &http.Transport{
|
||||
DialContext: func(_ context.Context, _, _ string) (net.Conn, error) {
|
||||
return net.Dial("unix", sockpath)
|
||||
},
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
|
||||
func jsonRequestResponse(c http.Client, endpoint string, req interface{}, res interface{}) error {
|
||||
var buf bytes.Buffer
|
||||
encodeErr := json.NewEncoder(&buf).Encode(req)
|
||||
if encodeErr != nil {
|
||||
return encodeErr
|
||||
}
|
||||
|
||||
resp, err := c.Post("http://unix"+endpoint, "application/json", &buf)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
var msg bytes.Buffer
|
||||
io.CopyN(&msg, resp.Body, 4096)
|
||||
return errors.Errorf("%s", msg.String())
|
||||
}
|
||||
|
||||
decodeError := json.NewDecoder(resp.Body).Decode(&res)
|
||||
if decodeError != nil {
|
||||
return decodeError
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
66
client/pprof.go
Normal file
66
client/pprof.go
Normal file
@ -0,0 +1,66 @@
|
||||
package client
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"github.com/zrepl/zrepl/cli"
|
||||
"github.com/zrepl/zrepl/config"
|
||||
"github.com/zrepl/zrepl/daemon"
|
||||
"log"
|
||||
"os"
|
||||
)
|
||||
|
||||
var pprofArgs struct {
|
||||
daemon.PprofServerControlMsg
|
||||
}
|
||||
|
||||
var PprofCmd = &cli.Subcommand{
|
||||
Use: "pprof off | [on TCP_LISTEN_ADDRESS]",
|
||||
Short: "start a http server exposing go-tool-compatible profiling endpoints at TCP_LISTEN_ADDRESS",
|
||||
Run: func(subcommand *cli.Subcommand, args []string) error {
|
||||
if len(args) < 1 {
|
||||
goto enargs
|
||||
}
|
||||
switch args[0] {
|
||||
case "on":
|
||||
pprofArgs.Run = true
|
||||
if len(args) != 2 {
|
||||
return errors.New("must specify TCP_LISTEN_ADDRESS as second positional argument")
|
||||
}
|
||||
pprofArgs.HttpListenAddress = args[1]
|
||||
case "off":
|
||||
if len(args) != 1 {
|
||||
goto enargs
|
||||
}
|
||||
pprofArgs.Run = false
|
||||
}
|
||||
|
||||
RunPProf(subcommand.Config())
|
||||
return nil
|
||||
enargs:
|
||||
return errors.New("invalid number of positional arguments")
|
||||
|
||||
},
|
||||
}
|
||||
|
||||
func RunPProf(conf *config.Config) {
|
||||
log := log.New(os.Stderr, "", 0)
|
||||
|
||||
die := func() {
|
||||
log.Printf("exiting after error")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
log.Printf("connecting to zrepl daemon")
|
||||
|
||||
httpc, err := controlHttpClient(conf.Global.Control.SockPath)
|
||||
if err != nil {
|
||||
log.Printf("error creating http client: %s", err)
|
||||
die()
|
||||
}
|
||||
err = jsonRequestResponse(httpc, daemon.ControlJobEndpointPProf, pprofArgs.PprofServerControlMsg, struct{}{})
|
||||
if err != nil {
|
||||
log.Printf("error sending control message: %s", err)
|
||||
die()
|
||||
}
|
||||
log.Printf("finished")
|
||||
}
|
39
client/signal.go
Normal file
39
client/signal.go
Normal file
@ -0,0 +1,39 @@
|
||||
package client
|
||||
|
||||
import (
|
||||
"github.com/pkg/errors"
|
||||
"github.com/zrepl/zrepl/cli"
|
||||
"github.com/zrepl/zrepl/config"
|
||||
"github.com/zrepl/zrepl/daemon"
|
||||
)
|
||||
|
||||
var SignalCmd = &cli.Subcommand{
|
||||
Use: "signal [wakeup|reset] JOB",
|
||||
Short: "wake up a job from wait state or abort its current invocation",
|
||||
Run: func(subcommand *cli.Subcommand, args []string) error {
|
||||
return runSignalCmd(subcommand.Config(), args)
|
||||
},
|
||||
}
|
||||
|
||||
func runSignalCmd(config *config.Config, args []string) error {
|
||||
if len(args) != 2 {
|
||||
return errors.Errorf("Expected 2 arguments: [wakeup|reset] JOB")
|
||||
}
|
||||
|
||||
httpc, err := controlHttpClient(config.Global.Control.SockPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = jsonRequestResponse(httpc, daemon.ControlJobEndpointSignal,
|
||||
struct {
|
||||
Name string
|
||||
Op string
|
||||
}{
|
||||
Name: args[1],
|
||||
Op: args[0],
|
||||
},
|
||||
struct{}{},
|
||||
)
|
||||
return err
|
||||
}
|
547
client/status.go
Normal file
547
client/status.go
Normal file
@ -0,0 +1,547 @@
|
||||
package client
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/nsf/termbox-go"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/spf13/pflag"
|
||||
"github.com/zrepl/yaml-config"
|
||||
"github.com/zrepl/zrepl/cli"
|
||||
"github.com/zrepl/zrepl/daemon"
|
||||
"github.com/zrepl/zrepl/daemon/job"
|
||||
"github.com/zrepl/zrepl/daemon/pruner"
|
||||
"github.com/zrepl/zrepl/replication"
|
||||
"github.com/zrepl/zrepl/replication/fsrep"
|
||||
"io"
|
||||
"math"
|
||||
"net/http"
|
||||
"os"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
type tui struct {
|
||||
x, y int
|
||||
indent int
|
||||
|
||||
lock sync.Mutex //For report and error
|
||||
report map[string]job.Status
|
||||
err error
|
||||
}
|
||||
|
||||
func newTui() tui {
|
||||
return tui{}
|
||||
}
|
||||
|
||||
func (t *tui) moveCursor(x, y int) {
|
||||
t.x += x
|
||||
t.y += y
|
||||
}
|
||||
|
||||
func (t *tui) moveLine(dl int, col int) {
|
||||
t.y += dl
|
||||
t.x = t.indent*4 + col
|
||||
}
|
||||
|
||||
func (t *tui) write(text string) {
|
||||
for _, c := range text {
|
||||
if c == '\n' {
|
||||
t.newline()
|
||||
continue
|
||||
}
|
||||
termbox.SetCell(t.x, t.y, c, termbox.ColorDefault, termbox.ColorDefault)
|
||||
t.x += 1
|
||||
}
|
||||
}
|
||||
|
||||
func (t *tui) printf(text string, a ...interface{}) {
|
||||
t.write(fmt.Sprintf(text, a...))
|
||||
}
|
||||
|
||||
func (t *tui) newline() {
|
||||
t.moveLine(1, 0)
|
||||
}
|
||||
|
||||
func (t *tui) setIndent(indent int) {
|
||||
t.indent = indent
|
||||
t.moveLine(0, 0)
|
||||
}
|
||||
|
||||
func (t *tui) addIndent(indent int) {
|
||||
t.indent += indent
|
||||
t.moveLine(0, 0)
|
||||
}
|
||||
|
||||
var statusFlags struct {
|
||||
Raw bool
|
||||
}
|
||||
|
||||
var StatusCmd = &cli.Subcommand{
|
||||
Use: "status",
|
||||
Short: "show job activity or dump as JSON for monitoring",
|
||||
SetupFlags: func(f *pflag.FlagSet) {
|
||||
f.BoolVar(&statusFlags.Raw, "raw", false, "dump raw status description from zrepl daemon")
|
||||
},
|
||||
Run: runStatus,
|
||||
}
|
||||
|
||||
func runStatus(s *cli.Subcommand, args []string) error {
|
||||
httpc, err := controlHttpClient(s.Config().Global.Control.SockPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if statusFlags.Raw {
|
||||
resp, err := httpc.Get("http://unix"+daemon.ControlJobEndpointStatus)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
fmt.Fprintf(os.Stderr, "Received error response:\n")
|
||||
io.CopyN(os.Stderr, resp.Body, 4096)
|
||||
return errors.Errorf("exit")
|
||||
}
|
||||
if _, err := io.Copy(os.Stdout, resp.Body); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
t := newTui()
|
||||
t.lock.Lock()
|
||||
t.err = errors.New("Got no report yet")
|
||||
t.lock.Unlock()
|
||||
|
||||
err = termbox.Init()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer termbox.Close()
|
||||
|
||||
update := func() {
|
||||
m := make(map[string]job.Status)
|
||||
|
||||
err2 := jsonRequestResponse(httpc, daemon.ControlJobEndpointStatus,
|
||||
struct{}{},
|
||||
&m,
|
||||
)
|
||||
|
||||
t.lock.Lock()
|
||||
t.err = err2
|
||||
t.report = m
|
||||
t.lock.Unlock()
|
||||
t.draw()
|
||||
}
|
||||
update()
|
||||
|
||||
ticker := time.NewTicker(500 * time.Millisecond)
|
||||
defer ticker.Stop()
|
||||
go func() {
|
||||
for _ = range ticker.C {
|
||||
update()
|
||||
}
|
||||
}()
|
||||
|
||||
termbox.HideCursor()
|
||||
termbox.Clear(termbox.ColorDefault, termbox.ColorDefault)
|
||||
|
||||
loop:
|
||||
for {
|
||||
switch ev := termbox.PollEvent(); ev.Type {
|
||||
case termbox.EventKey:
|
||||
switch ev.Key {
|
||||
case termbox.KeyEsc:
|
||||
break loop
|
||||
case termbox.KeyCtrlC:
|
||||
break loop
|
||||
}
|
||||
case termbox.EventResize:
|
||||
t.draw()
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
|
||||
}
|
||||
|
||||
func (t *tui) draw() {
|
||||
t.lock.Lock()
|
||||
defer t.lock.Unlock()
|
||||
|
||||
termbox.Clear(termbox.ColorDefault, termbox.ColorDefault)
|
||||
t.x = 0
|
||||
t.y = 0
|
||||
t.indent = 0
|
||||
|
||||
if t.err != nil {
|
||||
t.write(t.err.Error())
|
||||
} else {
|
||||
//Iterate over map in alphabetical order
|
||||
keys := make([]string, len(t.report))
|
||||
i := 0
|
||||
for k, _ := range t.report {
|
||||
keys[i] = k
|
||||
i++
|
||||
}
|
||||
sort.Strings(keys)
|
||||
|
||||
for _, k := range keys {
|
||||
v := t.report[k]
|
||||
if len(k) == 0 || daemon.IsInternalJobName(k) { //Internal job
|
||||
continue
|
||||
}
|
||||
t.setIndent(0)
|
||||
|
||||
t.printf("Job: %s", k)
|
||||
t.setIndent(1)
|
||||
t.newline()
|
||||
t.printf("Type: %s", v.Type)
|
||||
t.setIndent(1)
|
||||
t.newline()
|
||||
|
||||
if v.Type != job.TypePush && v.Type != job.TypePull {
|
||||
t.printf("No status representation for job type '%s', dumping as YAML", v.Type)
|
||||
t.newline()
|
||||
asYaml, err := yaml.Marshal(v.JobSpecific)
|
||||
if err != nil {
|
||||
t.printf("Error marshaling status to YAML: %s", err)
|
||||
t.newline()
|
||||
continue
|
||||
}
|
||||
t.write(string(asYaml))
|
||||
t.newline()
|
||||
continue
|
||||
}
|
||||
|
||||
pushStatus, ok := v.JobSpecific.(*job.ActiveSideStatus)
|
||||
if !ok || pushStatus == nil {
|
||||
t.printf("ActiveSideStatus is null")
|
||||
t.newline()
|
||||
continue
|
||||
}
|
||||
|
||||
t.printf("Replication:")
|
||||
t.newline()
|
||||
t.addIndent(1)
|
||||
t.renderReplicationReport(pushStatus.Replication)
|
||||
t.addIndent(-1)
|
||||
|
||||
t.printf("Pruning Sender:")
|
||||
t.newline()
|
||||
t.addIndent(1)
|
||||
t.renderPrunerReport(pushStatus.PruningSender)
|
||||
t.addIndent(-1)
|
||||
|
||||
t.printf("Pruning Receiver:")
|
||||
t.newline()
|
||||
t.addIndent(1)
|
||||
t.renderPrunerReport(pushStatus.PruningReceiver)
|
||||
t.addIndent(-1)
|
||||
|
||||
}
|
||||
}
|
||||
termbox.Flush()
|
||||
}
|
||||
|
||||
func (t *tui) renderReplicationReport(rep *replication.Report) {
|
||||
if rep == nil {
|
||||
t.printf("...\n")
|
||||
return
|
||||
}
|
||||
|
||||
all := make([]*fsrep.Report, 0, len(rep.Completed)+len(rep.Pending) + 1)
|
||||
all = append(all, rep.Completed...)
|
||||
all = append(all, rep.Pending...)
|
||||
if rep.Active != nil {
|
||||
all = append(all, rep.Active)
|
||||
}
|
||||
sort.Slice(all, func(i, j int) bool {
|
||||
return all[i].Filesystem < all[j].Filesystem
|
||||
})
|
||||
|
||||
state, err := replication.StateString(rep.Status)
|
||||
if err != nil {
|
||||
t.printf("Status: %q (parse error: %q)\n", rep.Status, err)
|
||||
return
|
||||
}
|
||||
|
||||
t.printf("Status: %s", state)
|
||||
t.newline()
|
||||
if rep.Problem != "" {
|
||||
t.printf("Problem: %s", rep.Problem)
|
||||
t.newline()
|
||||
}
|
||||
if rep.SleepUntil.After(time.Now()) &&
|
||||
state & ^(replication.ContextDone|replication.Completed) != 0 {
|
||||
t.printf("Sleeping until %s (%s left)\n", rep.SleepUntil, rep.SleepUntil.Sub(time.Now()))
|
||||
}
|
||||
|
||||
if state != replication.Planning && state != replication.PlanningError {
|
||||
// Progress: [---------------]
|
||||
sumUpFSRep := func(rep *fsrep.Report) (transferred, total int64) {
|
||||
for _, s := range rep.Pending {
|
||||
transferred += s.Bytes
|
||||
total += s.ExpectedBytes
|
||||
}
|
||||
for _, s := range rep.Completed {
|
||||
transferred += s.Bytes
|
||||
total += s.ExpectedBytes
|
||||
}
|
||||
return
|
||||
}
|
||||
var transferred, total int64
|
||||
for _, fs := range all {
|
||||
fstx, fstotal := sumUpFSRep(fs)
|
||||
transferred += fstx
|
||||
total += fstotal
|
||||
}
|
||||
t.write("Progress: ")
|
||||
t.drawBar(80, transferred, total)
|
||||
t.write(fmt.Sprintf(" %s / %s", ByteCountBinary(transferred), ByteCountBinary(total)))
|
||||
t.newline()
|
||||
}
|
||||
|
||||
var maxFSLen int
|
||||
for _, fs := range all {
|
||||
if len(fs.Filesystem) > maxFSLen {
|
||||
maxFSLen = len(fs.Filesystem)
|
||||
}
|
||||
}
|
||||
for _, fs := range all {
|
||||
printFilesystemStatus(fs, t, fs == rep.Active, maxFSLen)
|
||||
}
|
||||
}
|
||||
|
||||
func (t *tui) renderPrunerReport(r *pruner.Report) {
|
||||
if r == nil {
|
||||
t.printf("...\n")
|
||||
return
|
||||
}
|
||||
|
||||
state, err := pruner.StateString(r.State)
|
||||
if err != nil {
|
||||
t.printf("Status: %q (parse error: %q)\n", r.State, err)
|
||||
return
|
||||
}
|
||||
|
||||
t.printf("Status: %s", state)
|
||||
t.newline()
|
||||
|
||||
if r.Error != "" {
|
||||
t.printf("Error: %s\n", r.Error)
|
||||
}
|
||||
if r.SleepUntil.After(time.Now()) {
|
||||
t.printf("Sleeping until %s (%s left)\n", r.SleepUntil, r.SleepUntil.Sub(time.Now()))
|
||||
}
|
||||
|
||||
type commonFS struct {
|
||||
*pruner.FSReport
|
||||
completed bool
|
||||
}
|
||||
all := make([]commonFS, 0, len(r.Pending) + len(r.Completed))
|
||||
for i := range r.Pending {
|
||||
all = append(all, commonFS{&r.Pending[i], false})
|
||||
}
|
||||
for i := range r.Completed {
|
||||
all = append(all, commonFS{&r.Completed[i], true})
|
||||
}
|
||||
|
||||
switch state {
|
||||
case pruner.Plan: fallthrough
|
||||
case pruner.PlanWait: fallthrough
|
||||
case pruner.ErrPerm:
|
||||
return
|
||||
}
|
||||
|
||||
if len(all) == 0 {
|
||||
t.printf("nothing to do\n")
|
||||
return
|
||||
}
|
||||
|
||||
var totalDestroyCount, completedDestroyCount int
|
||||
var maxFSname int
|
||||
for _, fs := range all {
|
||||
totalDestroyCount += len(fs.DestroyList)
|
||||
if fs.completed {
|
||||
completedDestroyCount += len(fs.DestroyList)
|
||||
}
|
||||
if maxFSname < len(fs.Filesystem) {
|
||||
maxFSname = len(fs.Filesystem)
|
||||
}
|
||||
}
|
||||
|
||||
// global progress bar
|
||||
progress := int(math.Round(80 * float64(completedDestroyCount) / float64(totalDestroyCount)))
|
||||
t.write("Progress: ")
|
||||
t.write("[")
|
||||
t.write(times("=", progress))
|
||||
t.write(">")
|
||||
t.write(times("-", 80 - progress))
|
||||
t.write("]")
|
||||
t.printf(" %d/%d snapshots", completedDestroyCount, totalDestroyCount)
|
||||
t.newline()
|
||||
|
||||
sort.SliceStable(all, func(i, j int) bool {
|
||||
return strings.Compare(all[i].Filesystem, all[j].Filesystem) == -1
|
||||
})
|
||||
|
||||
// Draw a table-like representation of 'all'
|
||||
for _, fs := range all {
|
||||
t.write(rightPad(fs.Filesystem, maxFSname, " "))
|
||||
t.write(" ")
|
||||
if fs.Error != "" {
|
||||
t.printf("ERROR: %s\n", fs.Error) // whitespace is padding
|
||||
continue
|
||||
}
|
||||
|
||||
pruneRuleActionStr := fmt.Sprintf("(destroy %d of %d snapshots)",
|
||||
len(fs.DestroyList), len(fs.SnapshotList))
|
||||
|
||||
if fs.completed {
|
||||
t.printf( "Completed %s\n", pruneRuleActionStr)
|
||||
continue
|
||||
}
|
||||
|
||||
t.write("Pending ") // whitespace is padding 10
|
||||
if len(fs.DestroyList) == 1 {
|
||||
t.write(fs.DestroyList[0].Name)
|
||||
} else {
|
||||
t.write(pruneRuleActionStr)
|
||||
}
|
||||
t.newline()
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
const snapshotIndent = 1
|
||||
func calculateMaxFSLength(all []*fsrep.Report) (maxFS, maxStatus int) {
|
||||
for _, e := range all {
|
||||
if len(e.Filesystem) > maxFS {
|
||||
maxFS = len(e.Filesystem)
|
||||
}
|
||||
all2 := make([]*fsrep.StepReport, 0, len(e.Pending) + len(e.Completed))
|
||||
all2 = append(all2, e.Pending...)
|
||||
all2 = append(all2, e.Completed...)
|
||||
for _, e2 := range all2 {
|
||||
elen := len(e2.Problem) + len(e2.From) + len(e2.To) + 60 // random spacing, units, labels, etc
|
||||
if elen > maxStatus {
|
||||
maxStatus = elen
|
||||
}
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func times(str string, n int) (out string) {
|
||||
for i := 0; i < n; i++ {
|
||||
out += str
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func rightPad(str string, length int, pad string) string {
|
||||
if len(str) > length {
|
||||
return str[:length]
|
||||
}
|
||||
return str + times(pad, length-len(str))
|
||||
}
|
||||
|
||||
|
||||
func leftPad(str string, length int, pad string) string {
|
||||
if len(str) > length {
|
||||
return str[len(str)-length:]
|
||||
}
|
||||
return times(pad, length-len(str)) + str
|
||||
}
|
||||
|
||||
func (t *tui) drawBar(length int, bytes, totalBytes int64) {
|
||||
var completedLength int
|
||||
if totalBytes > 0 {
|
||||
completedLength = int(int64(length) * bytes / totalBytes)
|
||||
if completedLength > length {
|
||||
completedLength = length
|
||||
}
|
||||
} else if totalBytes == bytes {
|
||||
completedLength = length
|
||||
}
|
||||
|
||||
t.write("[")
|
||||
t.write(times("=", completedLength))
|
||||
t.write(">")
|
||||
t.write(times("-", length-completedLength))
|
||||
t.write("]")
|
||||
}
|
||||
|
||||
func StringStepState(s fsrep.StepState) string {
|
||||
switch s {
|
||||
case fsrep.StepReplicationReady: return "Ready"
|
||||
case fsrep.StepReplicationRetry: return "Retry"
|
||||
case fsrep.StepMarkReplicatedReady: return "MarkReady"
|
||||
case fsrep.StepMarkReplicatedRetry: return "MarkRetry"
|
||||
case fsrep.StepPermanentError: return "PermanentError"
|
||||
case fsrep.StepCompleted: return "Completed"
|
||||
default:
|
||||
return fmt.Sprintf("UNKNOWN %d", s)
|
||||
}
|
||||
}
|
||||
|
||||
func filesystemStatusString(rep *fsrep.Report, active bool, fsWidth int) (line string, bytes, totalBytes int64) {
|
||||
bytes = int64(0)
|
||||
totalBytes = int64(0)
|
||||
for _, s := range rep.Pending {
|
||||
bytes += s.Bytes
|
||||
totalBytes += s.ExpectedBytes
|
||||
}
|
||||
for _, s := range rep.Completed {
|
||||
bytes += s.Bytes
|
||||
totalBytes += s.ExpectedBytes
|
||||
}
|
||||
|
||||
next := ""
|
||||
if rep.Problem != "" {
|
||||
next = " problem: " + rep.Problem
|
||||
} else if len(rep.Pending) > 0 {
|
||||
if rep.Pending[0].From != "" {
|
||||
next = fmt.Sprintf(" next: %s => %s", rep.Pending[0].From, rep.Pending[0].To)
|
||||
} else {
|
||||
next = fmt.Sprintf(" next: %s (full)", rep.Pending[0].To)
|
||||
}
|
||||
}
|
||||
status := fmt.Sprintf("%s (step %d/%d, %s/%s)%s",
|
||||
rep.Status,
|
||||
len(rep.Completed), len(rep.Pending) + len(rep.Completed),
|
||||
ByteCountBinary(bytes), ByteCountBinary(totalBytes),
|
||||
next,
|
||||
)
|
||||
activeIndicator := " "
|
||||
if active {
|
||||
activeIndicator = "*"
|
||||
}
|
||||
line = fmt.Sprintf("%s %s %s",
|
||||
activeIndicator,
|
||||
rightPad(rep.Filesystem, fsWidth, " "),
|
||||
status)
|
||||
return line, bytes, totalBytes
|
||||
}
|
||||
|
||||
func printFilesystemStatus(rep *fsrep.Report, t *tui, active bool, maxFS int) {
|
||||
totalStatus, _, _ := filesystemStatusString(rep, active, maxFS)
|
||||
t.write(totalStatus)
|
||||
t.newline()
|
||||
}
|
||||
|
||||
func ByteCountBinary(b int64) string {
|
||||
const unit = 1024
|
||||
if b < unit {
|
||||
return fmt.Sprintf("%d B", b)
|
||||
}
|
||||
div, exp := int64(unit), 0
|
||||
for n := b / unit; n >= unit; n /= unit {
|
||||
div *= unit
|
||||
exp++
|
||||
}
|
||||
return fmt.Sprintf("%.1f %ciB", float64(b)/float64(div), "KMGTPE"[exp])
|
||||
}
|
@ -1,55 +1,49 @@
|
||||
package cmd
|
||||
package client
|
||||
|
||||
import (
|
||||
"github.com/zrepl/zrepl/cli"
|
||||
"os"
|
||||
|
||||
"context"
|
||||
"errors"
|
||||
"github.com/problame/go-netssh"
|
||||
"github.com/spf13/cobra"
|
||||
"github.com/zrepl/zrepl/config"
|
||||
"log"
|
||||
"path"
|
||||
)
|
||||
|
||||
var StdinserverCmd = &cobra.Command{
|
||||
var StdinserverCmd = &cli.Subcommand{
|
||||
Use: "stdinserver CLIENT_IDENTITY",
|
||||
Short: "start in stdinserver mode (from authorized_keys file)",
|
||||
Run: cmdStdinServer,
|
||||
Short: "stdinserver transport mode (started from authorized_keys file as forced command)",
|
||||
Run: func(subcommand *cli.Subcommand, args []string) error {
|
||||
return runStdinserver(subcommand.Config(), args)
|
||||
},
|
||||
}
|
||||
|
||||
func init() {
|
||||
RootCmd.AddCommand(StdinserverCmd)
|
||||
}
|
||||
|
||||
func cmdStdinServer(cmd *cobra.Command, args []string) {
|
||||
func runStdinserver(config *config.Config, args []string) error {
|
||||
|
||||
// NOTE: the netssh proxying protocol requires exiting with non-zero status if anything goes wrong
|
||||
defer os.Exit(1)
|
||||
|
||||
log := log.New(os.Stderr, "", log.LUTC|log.Ldate|log.Ltime)
|
||||
|
||||
conf, err := ParseConfig(rootArgs.configFile)
|
||||
if err != nil {
|
||||
log.Printf("error parsing config: %s", err)
|
||||
return
|
||||
}
|
||||
|
||||
if len(args) != 1 || args[0] == "" {
|
||||
log.Print("must specify client_identity as positional argument")
|
||||
return
|
||||
err := errors.New("must specify client_identity as positional argument")
|
||||
return err
|
||||
}
|
||||
|
||||
identity := args[0]
|
||||
unixaddr := path.Join(conf.Global.Serve.Stdinserver.SockDir, identity)
|
||||
unixaddr := path.Join(config.Global.Serve.StdinServer.SockDir, identity)
|
||||
|
||||
log.Printf("proxying client identity '%s' to zrepl daemon '%s'", identity, unixaddr)
|
||||
|
||||
ctx := netssh.ContextWithLog(context.TODO(), log)
|
||||
|
||||
err = netssh.Proxy(ctx, unixaddr)
|
||||
err := netssh.Proxy(ctx, unixaddr)
|
||||
if err == nil {
|
||||
log.Print("proxying finished successfully, exiting with status 0")
|
||||
os.Exit(0)
|
||||
}
|
||||
log.Printf("error proxying: %s", err)
|
||||
|
||||
return nil
|
||||
}
|
108
client/testcmd.go
Normal file
108
client/testcmd.go
Normal file
@ -0,0 +1,108 @@
|
||||
package client
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/spf13/pflag"
|
||||
"github.com/zrepl/zrepl/cli"
|
||||
"github.com/zrepl/zrepl/config"
|
||||
"github.com/zrepl/zrepl/daemon/filters"
|
||||
"github.com/zrepl/zrepl/zfs"
|
||||
)
|
||||
|
||||
var TestCmd = &cli.Subcommand {
|
||||
Use: "test",
|
||||
SetupSubcommands: func() []*cli.Subcommand {
|
||||
return []*cli.Subcommand{testFilter}
|
||||
},
|
||||
}
|
||||
|
||||
var testFilterArgs struct {
|
||||
job string
|
||||
all bool
|
||||
input string
|
||||
}
|
||||
|
||||
var testFilter = &cli.Subcommand{
|
||||
Use: "filesystems --job JOB [--all | --input INPUT]",
|
||||
Short: "test filesystems filter specified in push or source job",
|
||||
SetupFlags: func(f *pflag.FlagSet) {
|
||||
f.StringVar(&testFilterArgs.job, "job", "", "the name of the push or source job")
|
||||
f.StringVar(&testFilterArgs.input, "input", "", "a filesystem name to test against the job's filters")
|
||||
f.BoolVar(&testFilterArgs.all, "all", false, "test all local filesystems")
|
||||
},
|
||||
Run: runTestFilterCmd,
|
||||
}
|
||||
|
||||
func runTestFilterCmd(subcommand *cli.Subcommand, args []string) error {
|
||||
|
||||
if testFilterArgs.job == "" {
|
||||
return fmt.Errorf("must specify --job flag")
|
||||
}
|
||||
if !(testFilterArgs.all != (testFilterArgs.input != "")) { // xor
|
||||
return fmt.Errorf("must set one: --all or --input")
|
||||
}
|
||||
|
||||
conf := subcommand.Config()
|
||||
|
||||
var confFilter config.FilesystemsFilter
|
||||
job, err := conf.Job(testFilterArgs.job)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
switch j := job.Ret.(type) {
|
||||
case *config.SourceJob: confFilter = j.Filesystems
|
||||
case *config.PushJob: confFilter = j.Filesystems
|
||||
default:
|
||||
return fmt.Errorf("job type %T does not have filesystems filter", j)
|
||||
}
|
||||
|
||||
f, err := filters.DatasetMapFilterFromConfig(confFilter)
|
||||
if err != nil {
|
||||
return fmt.Errorf("filter invalid: %s", err)
|
||||
}
|
||||
|
||||
var fsnames []string
|
||||
if testFilterArgs.input != "" {
|
||||
fsnames = []string{testFilterArgs.input}
|
||||
} else {
|
||||
out, err := zfs.ZFSList([]string{"name"})
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not list ZFS filesystems: %s", err)
|
||||
}
|
||||
for _, row := range out {
|
||||
|
||||
fsnames = append(fsnames, row[0])
|
||||
}
|
||||
}
|
||||
|
||||
fspaths := make([]*zfs.DatasetPath, len(fsnames))
|
||||
for i, fsname := range fsnames {
|
||||
path, err := zfs.NewDatasetPath(fsname)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
fspaths[i] = path
|
||||
}
|
||||
|
||||
hadFilterErr := false
|
||||
for _, in := range fspaths {
|
||||
var res string
|
||||
var errStr string
|
||||
pass, err := f.Filter(in)
|
||||
if err != nil {
|
||||
res = "ERROR"
|
||||
errStr = err.Error()
|
||||
hadFilterErr = true
|
||||
} else if pass {
|
||||
res = "ACCEPT"
|
||||
} else {
|
||||
res = "REJECT"
|
||||
}
|
||||
fmt.Printf("%s\t%s\t%s\n", res, in.ToString(), errStr)
|
||||
}
|
||||
|
||||
if hadFilterErr {
|
||||
return fmt.Errorf("filter errors occurred")
|
||||
}
|
||||
return nil
|
||||
}
|
72
client/version.go
Normal file
72
client/version.go
Normal file
@ -0,0 +1,72 @@
|
||||
package client
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/spf13/pflag"
|
||||
"github.com/zrepl/zrepl/cli"
|
||||
"github.com/zrepl/zrepl/config"
|
||||
"github.com/zrepl/zrepl/daemon"
|
||||
"github.com/zrepl/zrepl/version"
|
||||
"os"
|
||||
)
|
||||
|
||||
var versionArgs struct {
|
||||
Show string
|
||||
Config *config.Config
|
||||
ConfigErr error
|
||||
}
|
||||
|
||||
var VersionCmd = &cli.Subcommand{
|
||||
Use: "version",
|
||||
Short: "print version of zrepl binary and running daemon",
|
||||
NoRequireConfig: true,
|
||||
SetupFlags: func(f *pflag.FlagSet) {
|
||||
f.StringVar(&versionArgs.Show, "show", "", "version info to show (client|daemon)")
|
||||
},
|
||||
Run: func(subcommand *cli.Subcommand, args []string) error {
|
||||
versionArgs.Config = subcommand.Config()
|
||||
versionArgs.ConfigErr = subcommand.ConfigParsingError()
|
||||
return runVersionCmd()
|
||||
},
|
||||
}
|
||||
|
||||
func runVersionCmd() error {
|
||||
args := versionArgs
|
||||
|
||||
if args.Show != "daemon" && args.Show != "client" && args.Show != "" {
|
||||
return fmt.Errorf("show flag must be 'client' or 'server' or be left empty")
|
||||
}
|
||||
|
||||
var clientVersion, daemonVersion *version.ZreplVersionInformation
|
||||
if args.Show == "client" || args.Show == "" {
|
||||
clientVersion = version.NewZreplVersionInformation()
|
||||
fmt.Printf("client: %s\n", clientVersion.String())
|
||||
}
|
||||
if args.Show == "daemon" || args.Show == "" {
|
||||
|
||||
if args.ConfigErr != nil {
|
||||
return fmt.Errorf("config parsing error: %s", args.ConfigErr)
|
||||
}
|
||||
|
||||
httpc, err := controlHttpClient(args.Config.Global.Control.SockPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("server: error: %s\n", err)
|
||||
}
|
||||
|
||||
var info version.ZreplVersionInformation
|
||||
err = jsonRequestResponse(httpc, daemon.ControlJobEndpointVersion, "", &info)
|
||||
if err != nil {
|
||||
return fmt.Errorf("server: error: %s\n", err)
|
||||
}
|
||||
daemonVersion = &info
|
||||
fmt.Printf("server: %s\n", daemonVersion.String())
|
||||
}
|
||||
|
||||
if args.Show == "" {
|
||||
if clientVersion.Version != daemonVersion.Version {
|
||||
fmt.Fprintf(os.Stderr, "WARNING: client version != daemon version, restart zrepl daemon\n")
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
201
cmd/autosnap.go
201
cmd/autosnap.go
@ -1,201 +0,0 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"github.com/zrepl/zrepl/zfs"
|
||||
"sort"
|
||||
"time"
|
||||
)
|
||||
|
||||
type IntervalAutosnap struct {
|
||||
task *Task
|
||||
DatasetFilter zfs.DatasetFilter
|
||||
Prefix string
|
||||
SnapshotInterval time.Duration
|
||||
}
|
||||
|
||||
func (a *IntervalAutosnap) filterFilesystems() (fss []*zfs.DatasetPath, stop bool) {
|
||||
a.task.Enter("filter_filesystems")
|
||||
defer a.task.Finish()
|
||||
fss, err := zfs.ZFSListMapping(a.DatasetFilter)
|
||||
stop = err != nil
|
||||
if err != nil {
|
||||
a.task.Log().WithError(err).Error("cannot list datasets")
|
||||
}
|
||||
if len(fss) == 0 {
|
||||
a.task.Log().Warn("no filesystem matching filesystem filter")
|
||||
}
|
||||
return fss, stop
|
||||
}
|
||||
|
||||
func (a *IntervalAutosnap) findSyncPoint(fss []*zfs.DatasetPath) (syncPoint time.Time, err error) {
|
||||
a.task.Enter("find_sync_point")
|
||||
defer a.task.Finish()
|
||||
type snapTime struct {
|
||||
ds *zfs.DatasetPath
|
||||
time time.Time
|
||||
}
|
||||
|
||||
if len(fss) == 0 {
|
||||
return time.Now(), nil
|
||||
}
|
||||
|
||||
snaptimes := make([]snapTime, 0, len(fss))
|
||||
|
||||
now := time.Now()
|
||||
|
||||
a.task.Log().Debug("examine filesystem state")
|
||||
for _, d := range fss {
|
||||
|
||||
l := a.task.Log().WithField(logFSField, d.ToString())
|
||||
|
||||
fsvs, err := zfs.ZFSListFilesystemVersions(d, NewPrefixFilter(a.Prefix))
|
||||
if err != nil {
|
||||
l.WithError(err).Error("cannot list filesystem versions")
|
||||
continue
|
||||
}
|
||||
if len(fsvs) <= 0 {
|
||||
l.WithField("prefix", a.Prefix).Info("no filesystem versions with prefix")
|
||||
continue
|
||||
}
|
||||
|
||||
// Sort versions by creation
|
||||
sort.SliceStable(fsvs, func(i, j int) bool {
|
||||
return fsvs[i].CreateTXG < fsvs[j].CreateTXG
|
||||
})
|
||||
|
||||
latest := fsvs[len(fsvs)-1]
|
||||
l.WithField("creation", latest.Creation).
|
||||
Debug("found latest snapshot")
|
||||
|
||||
since := now.Sub(latest.Creation)
|
||||
if since < 0 {
|
||||
l.WithField("snapshot", latest.Name).
|
||||
WithField("creation", latest.Creation).
|
||||
Error("snapshot is from the future")
|
||||
continue
|
||||
}
|
||||
next := now
|
||||
if since < a.SnapshotInterval {
|
||||
next = latest.Creation.Add(a.SnapshotInterval)
|
||||
}
|
||||
snaptimes = append(snaptimes, snapTime{d, next})
|
||||
}
|
||||
|
||||
if len(snaptimes) == 0 {
|
||||
snaptimes = append(snaptimes, snapTime{nil, now})
|
||||
}
|
||||
|
||||
sort.Slice(snaptimes, func(i, j int) bool {
|
||||
return snaptimes[i].time.Before(snaptimes[j].time)
|
||||
})
|
||||
|
||||
return snaptimes[0].time, nil
|
||||
|
||||
}
|
||||
|
||||
func (a *IntervalAutosnap) waitForSyncPoint(ctx context.Context, syncPoint time.Time) {
|
||||
a.task.Enter("wait_sync_point")
|
||||
defer a.task.Finish()
|
||||
|
||||
const LOG_TIME_FMT string = time.ANSIC
|
||||
|
||||
a.task.Log().WithField("sync_point", syncPoint.Format(LOG_TIME_FMT)).
|
||||
Info("wait for sync point")
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
a.task.Log().WithError(ctx.Err()).Info("context done")
|
||||
return
|
||||
case <-time.After(syncPoint.Sub(time.Now())):
|
||||
}
|
||||
}
|
||||
|
||||
func (a *IntervalAutosnap) syncUpRun(ctx context.Context, didSnaps chan struct{}) (stop bool) {
|
||||
a.task.Enter("sync_up")
|
||||
defer a.task.Finish()
|
||||
|
||||
fss, stop := a.filterFilesystems()
|
||||
if stop {
|
||||
return true
|
||||
}
|
||||
|
||||
syncPoint, err := a.findSyncPoint(fss)
|
||||
if err != nil {
|
||||
return true
|
||||
}
|
||||
|
||||
a.waitForSyncPoint(ctx, syncPoint)
|
||||
|
||||
a.task.Log().Debug("snapshot all filesystems to enable further snaps in lockstep")
|
||||
a.doSnapshots(didSnaps)
|
||||
return false
|
||||
}
|
||||
|
||||
func (a *IntervalAutosnap) Run(ctx context.Context, didSnaps chan struct{}) {
|
||||
|
||||
if a.syncUpRun(ctx, didSnaps) {
|
||||
a.task.Log().Error("stoppping autosnap after error in sync up")
|
||||
return
|
||||
}
|
||||
|
||||
// task drops back to idle here
|
||||
|
||||
a.task.Log().Debug("setting up ticker in SnapshotInterval")
|
||||
ticker := time.NewTicker(a.SnapshotInterval)
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
ticker.Stop()
|
||||
a.task.Log().WithError(ctx.Err()).Info("context done")
|
||||
return
|
||||
|
||||
case <-ticker.C:
|
||||
a.doSnapshots(didSnaps)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func (a *IntervalAutosnap) doSnapshots(didSnaps chan struct{}) {
|
||||
|
||||
a.task.Enter("do_snapshots")
|
||||
defer a.task.Finish()
|
||||
|
||||
// don't cache the result from previous run in case the user added
|
||||
// a new dataset in the meantime
|
||||
ds, stop := a.filterFilesystems()
|
||||
if stop {
|
||||
return
|
||||
}
|
||||
|
||||
// TODO channel programs -> allow a little jitter?
|
||||
for _, d := range ds {
|
||||
suffix := time.Now().In(time.UTC).Format("20060102_150405_000")
|
||||
snapname := fmt.Sprintf("%s%s", a.Prefix, suffix)
|
||||
|
||||
l := a.task.Log().WithField(logFSField, d.ToString()).
|
||||
WithField("snapname", snapname)
|
||||
|
||||
l.Info("create snapshot")
|
||||
err := zfs.ZFSSnapshot(d, snapname, false)
|
||||
if err != nil {
|
||||
a.task.Log().WithError(err).Error("cannot create snapshot")
|
||||
}
|
||||
|
||||
l.Info("create corresponding bookmark")
|
||||
err = zfs.ZFSBookmark(d, snapname, snapname)
|
||||
if err != nil {
|
||||
a.task.Log().WithError(err).Error("cannot create bookmark")
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
select {
|
||||
case didSnaps <- struct{}{}:
|
||||
default:
|
||||
a.task.Log().Error("warning: callback channel is full, discarding")
|
||||
}
|
||||
|
||||
}
|
@ -1,29 +0,0 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
"os"
|
||||
)
|
||||
|
||||
var bashcompCmd = &cobra.Command{
|
||||
Use: "bashcomp path/to/out/file",
|
||||
Short: "generate bash completions",
|
||||
Run: func(cmd *cobra.Command, args []string) {
|
||||
if len(args) != 1 {
|
||||
fmt.Fprintf(os.Stderr, "specify exactly one positional agument\n")
|
||||
cmd.Usage()
|
||||
os.Exit(1)
|
||||
}
|
||||
if err := RootCmd.GenBashCompletionFile(args[0]); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "error generating bash completion: %s", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
},
|
||||
Hidden: true,
|
||||
}
|
||||
|
||||
func init() {
|
||||
RootCmd.AddCommand(bashcompCmd)
|
||||
}
|
102
cmd/config.go
102
cmd/config.go
@ -1,102 +0,0 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"io"
|
||||
|
||||
"fmt"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/zrepl/zrepl/zfs"
|
||||
)
|
||||
|
||||
type Config struct {
|
||||
Global Global
|
||||
Jobs map[string]Job
|
||||
}
|
||||
|
||||
func (c *Config) LookupJob(name string) (j Job, err error) {
|
||||
j, ok := c.Jobs[name]
|
||||
if !ok {
|
||||
return nil, errors.Errorf("job '%s' is not defined", name)
|
||||
}
|
||||
return j, nil
|
||||
}
|
||||
|
||||
type Global struct {
|
||||
Serve struct {
|
||||
Stdinserver struct {
|
||||
SockDir string
|
||||
}
|
||||
}
|
||||
Control struct {
|
||||
Sockpath string
|
||||
}
|
||||
logging *LoggingConfig
|
||||
}
|
||||
|
||||
type JobDebugSettings struct {
|
||||
Conn struct {
|
||||
ReadDump string `mapstructure:"read_dump"`
|
||||
WriteDump string `mapstructure:"write_dump"`
|
||||
}
|
||||
RPC struct {
|
||||
Log bool
|
||||
}
|
||||
}
|
||||
|
||||
type RWCConnecter interface {
|
||||
Connect() (io.ReadWriteCloser, error)
|
||||
}
|
||||
type AuthenticatedChannelListenerFactory interface {
|
||||
Listen() (AuthenticatedChannelListener, error)
|
||||
}
|
||||
|
||||
type AuthenticatedChannelListener interface {
|
||||
Accept() (ch io.ReadWriteCloser, err error)
|
||||
Close() (err error)
|
||||
}
|
||||
|
||||
type SSHStdinServerConnectDescr struct {
|
||||
}
|
||||
|
||||
type PrunePolicy interface {
|
||||
// Prune filters versions and decide which to keep and which to remove.
|
||||
// Prune **does not** implement the actual removal of the versions.
|
||||
Prune(fs *zfs.DatasetPath, versions []zfs.FilesystemVersion) (keep, remove []zfs.FilesystemVersion, err error)
|
||||
}
|
||||
|
||||
type PruningJob interface {
|
||||
Pruner(task *Task, side PrunePolicySide, dryRun bool) (Pruner, error)
|
||||
}
|
||||
|
||||
// A type for constants describing different prune policies of a PruningJob
|
||||
// This is mostly a special-case for LocalJob, which is the only job that has two prune policies
|
||||
// instead of one.
|
||||
// It implements github.com/spf13/pflag.Value to be used as CLI flag for the test subcommand
|
||||
type PrunePolicySide string
|
||||
|
||||
const (
|
||||
PrunePolicySideDefault PrunePolicySide = ""
|
||||
PrunePolicySideLeft PrunePolicySide = "left"
|
||||
PrunePolicySideRight PrunePolicySide = "right"
|
||||
)
|
||||
|
||||
func (s *PrunePolicySide) String() string {
|
||||
return string(*s)
|
||||
}
|
||||
|
||||
func (s *PrunePolicySide) Set(news string) error {
|
||||
p := PrunePolicySide(news)
|
||||
switch p {
|
||||
case PrunePolicySideRight:
|
||||
fallthrough
|
||||
case PrunePolicySideLeft:
|
||||
*s = p
|
||||
default:
|
||||
return errors.Errorf("must be either %s or %s", PrunePolicySideLeft, PrunePolicySideRight)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *PrunePolicySide) Type() string {
|
||||
return fmt.Sprintf("%s | %s", PrunePolicySideLeft, PrunePolicySideRight)
|
||||
}
|
@ -1,66 +0,0 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
"context"
|
||||
"github.com/jinzhu/copier"
|
||||
"github.com/mitchellh/mapstructure"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/problame/go-netssh"
|
||||
"time"
|
||||
)
|
||||
|
||||
type SSHStdinserverConnecter struct {
|
||||
Host string
|
||||
User string
|
||||
Port uint16
|
||||
IdentityFile string `mapstructure:"identity_file"`
|
||||
TransportOpenCommand []string `mapstructure:"transport_open_command"`
|
||||
SSHCommand string `mapstructure:"ssh_command"`
|
||||
Options []string
|
||||
DialTimeout string `mapstructure:"dial_timeout"`
|
||||
dialTimeout time.Duration
|
||||
}
|
||||
|
||||
func parseSSHStdinserverConnecter(i map[string]interface{}) (c *SSHStdinserverConnecter, err error) {
|
||||
|
||||
c = &SSHStdinserverConnecter{}
|
||||
if err = mapstructure.Decode(i, c); err != nil {
|
||||
err = errors.New(fmt.Sprintf("could not parse ssh transport: %s", err))
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if c.DialTimeout != "" {
|
||||
c.dialTimeout, err = time.ParseDuration(c.DialTimeout)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "cannot parse dial_timeout")
|
||||
}
|
||||
} else {
|
||||
c.dialTimeout = 10 * time.Second
|
||||
}
|
||||
|
||||
// TODO assert fields are filled
|
||||
return
|
||||
|
||||
}
|
||||
|
||||
func (c *SSHStdinserverConnecter) Connect() (rwc io.ReadWriteCloser, err error) {
|
||||
|
||||
var endpoint netssh.Endpoint
|
||||
if err = copier.Copy(&endpoint, c); err != nil {
|
||||
return nil, errors.WithStack(err)
|
||||
}
|
||||
var dialCtx context.Context
|
||||
dialCtx, dialCancel := context.WithTimeout(context.TODO(), c.dialTimeout) // context.TODO tied to error handling below
|
||||
defer dialCancel()
|
||||
if rwc, err = netssh.Dial(dialCtx, endpoint); err != nil {
|
||||
if err == context.DeadlineExceeded {
|
||||
err = errors.Errorf("dial_timeout of %s exceeded", c.dialTimeout)
|
||||
}
|
||||
err = errors.WithStack(err)
|
||||
return
|
||||
}
|
||||
return
|
||||
}
|
@ -1,36 +0,0 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"github.com/pkg/errors"
|
||||
"github.com/zrepl/zrepl/zfs"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type PrefixFilter struct {
|
||||
prefix string
|
||||
fstype zfs.VersionType
|
||||
fstypeSet bool // optionals anyone?
|
||||
}
|
||||
|
||||
func NewPrefixFilter(prefix string) *PrefixFilter {
|
||||
return &PrefixFilter{prefix: prefix}
|
||||
}
|
||||
|
||||
func NewTypedPrefixFilter(prefix string, versionType zfs.VersionType) *PrefixFilter {
|
||||
return &PrefixFilter{prefix, versionType, true}
|
||||
}
|
||||
|
||||
func parseSnapshotPrefix(i string) (p string, err error) {
|
||||
if len(i) <= 0 {
|
||||
err = errors.Errorf("snapshot prefix must not be empty string")
|
||||
return
|
||||
}
|
||||
p = i
|
||||
return
|
||||
}
|
||||
|
||||
func (f *PrefixFilter) Filter(fsv zfs.FilesystemVersion) (accept bool, err error) {
|
||||
fstypeMatches := (!f.fstypeSet || fsv.Type == f.fstype)
|
||||
prefixMatches := strings.HasPrefix(fsv.Name, f.prefix)
|
||||
return fstypeMatches && prefixMatches, nil
|
||||
}
|
@ -1,145 +0,0 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"github.com/pkg/errors"
|
||||
"io"
|
||||
"net"
|
||||
"net/http"
|
||||
)
|
||||
|
||||
type ControlJob struct {
|
||||
Name string
|
||||
sockaddr *net.UnixAddr
|
||||
}
|
||||
|
||||
func NewControlJob(name, sockpath string) (j *ControlJob, err error) {
|
||||
j = &ControlJob{Name: name}
|
||||
|
||||
j.sockaddr, err = net.ResolveUnixAddr("unix", sockpath)
|
||||
if err != nil {
|
||||
err = errors.Wrap(err, "cannot resolve unix address")
|
||||
return
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (j *ControlJob) JobName() string {
|
||||
return j.Name
|
||||
}
|
||||
|
||||
func (j *ControlJob) JobType() JobType { return JobTypeControl }
|
||||
|
||||
func (j *ControlJob) JobStatus(ctx context.Context) (*JobStatus, error) {
|
||||
return &JobStatus{Tasks: nil}, nil
|
||||
}
|
||||
|
||||
const (
|
||||
ControlJobEndpointPProf string = "/debug/pprof"
|
||||
ControlJobEndpointVersion string = "/version"
|
||||
ControlJobEndpointStatus string = "/status"
|
||||
)
|
||||
|
||||
func (j *ControlJob) JobStart(ctx context.Context) {
|
||||
|
||||
log := ctx.Value(contextKeyLog).(Logger)
|
||||
defer log.Info("control job finished")
|
||||
|
||||
daemon := ctx.Value(contextKeyDaemon).(*Daemon)
|
||||
|
||||
l, err := ListenUnixPrivate(j.sockaddr)
|
||||
if err != nil {
|
||||
log.WithError(err).Error("error listening")
|
||||
return
|
||||
}
|
||||
|
||||
pprofServer := NewPProfServer(ctx)
|
||||
|
||||
mux := http.NewServeMux()
|
||||
mux.Handle(ControlJobEndpointPProf, requestLogger{log: log, handlerFunc: func(w http.ResponseWriter, r *http.Request) {
|
||||
var msg PprofServerControlMsg
|
||||
err := json.NewDecoder(r.Body).Decode(&msg)
|
||||
if err != nil {
|
||||
log.WithError(err).Error("bad pprof request from client")
|
||||
w.WriteHeader(http.StatusBadRequest)
|
||||
}
|
||||
pprofServer.Control(msg)
|
||||
w.WriteHeader(200)
|
||||
}})
|
||||
mux.Handle(ControlJobEndpointVersion,
|
||||
requestLogger{log: log, handler: jsonResponder{func() (interface{}, error) {
|
||||
return NewZreplVersionInformation(), nil
|
||||
}}})
|
||||
mux.Handle(ControlJobEndpointStatus,
|
||||
requestLogger{log: log, handler: jsonResponder{func() (interface{}, error) {
|
||||
return daemon.Status(), nil
|
||||
}}})
|
||||
server := http.Server{Handler: mux}
|
||||
|
||||
outer:
|
||||
for {
|
||||
|
||||
served := make(chan error)
|
||||
go func() {
|
||||
served <- server.Serve(l)
|
||||
close(served)
|
||||
}()
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
log.WithError(ctx.Err()).Info("context done")
|
||||
server.Shutdown(context.Background())
|
||||
break outer
|
||||
case err = <-served:
|
||||
if err != nil {
|
||||
log.WithError(err).Error("error serving")
|
||||
break outer
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
type jsonResponder struct {
|
||||
producer func() (interface{}, error)
|
||||
}
|
||||
|
||||
func (j jsonResponder) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
||||
res, err := j.producer()
|
||||
if err != nil {
|
||||
w.WriteHeader(http.StatusInternalServerError)
|
||||
io.WriteString(w, err.Error())
|
||||
return
|
||||
}
|
||||
var buf bytes.Buffer
|
||||
err = json.NewEncoder(&buf).Encode(res)
|
||||
if err != nil {
|
||||
w.WriteHeader(http.StatusInternalServerError)
|
||||
io.WriteString(w, err.Error())
|
||||
} else {
|
||||
io.Copy(w, &buf)
|
||||
}
|
||||
}
|
||||
|
||||
type requestLogger struct {
|
||||
log Logger
|
||||
handler http.Handler
|
||||
handlerFunc http.HandlerFunc
|
||||
}
|
||||
|
||||
func (l requestLogger) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
||||
log := l.log.WithField("method", r.Method).WithField("url", r.URL)
|
||||
log.Info("start")
|
||||
if l.handlerFunc != nil {
|
||||
l.handlerFunc(w, r)
|
||||
} else if l.handler != nil {
|
||||
l.handler.ServeHTTP(w, r)
|
||||
} else {
|
||||
log.Error("no handler or handlerFunc configured")
|
||||
}
|
||||
log.Info("finish")
|
||||
}
|
@ -1,216 +0,0 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"context"
|
||||
"github.com/mitchellh/mapstructure"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/zrepl/zrepl/rpc"
|
||||
"github.com/zrepl/zrepl/zfs"
|
||||
"sync"
|
||||
)
|
||||
|
||||
type LocalJob struct {
|
||||
Name string
|
||||
Mapping *DatasetMapFilter
|
||||
SnapshotPrefix string
|
||||
Interval time.Duration
|
||||
InitialReplPolicy InitialReplPolicy
|
||||
PruneLHS PrunePolicy
|
||||
PruneRHS PrunePolicy
|
||||
Debug JobDebugSettings
|
||||
snapperTask *Task
|
||||
mainTask *Task
|
||||
handlerTask *Task
|
||||
pruneRHSTask *Task
|
||||
pruneLHSTask *Task
|
||||
}
|
||||
|
||||
func parseLocalJob(c JobParsingContext, name string, i map[string]interface{}) (j *LocalJob, err error) {
|
||||
|
||||
var asMap struct {
|
||||
Mapping map[string]string
|
||||
SnapshotPrefix string `mapstructure:"snapshot_prefix"`
|
||||
Interval string
|
||||
InitialReplPolicy string `mapstructure:"initial_repl_policy"`
|
||||
PruneLHS map[string]interface{} `mapstructure:"prune_lhs"`
|
||||
PruneRHS map[string]interface{} `mapstructure:"prune_rhs"`
|
||||
Debug map[string]interface{}
|
||||
}
|
||||
|
||||
if err = mapstructure.Decode(i, &asMap); err != nil {
|
||||
err = errors.Wrap(err, "mapstructure error")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
j = &LocalJob{Name: name}
|
||||
|
||||
if j.Mapping, err = parseDatasetMapFilter(asMap.Mapping, false); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
if j.SnapshotPrefix, err = parseSnapshotPrefix(asMap.SnapshotPrefix); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
if j.Interval, err = parsePostitiveDuration(asMap.Interval); err != nil {
|
||||
err = errors.Wrap(err, "cannot parse interval")
|
||||
return
|
||||
}
|
||||
|
||||
if j.InitialReplPolicy, err = parseInitialReplPolicy(asMap.InitialReplPolicy, DEFAULT_INITIAL_REPL_POLICY); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
if j.PruneLHS, err = parsePrunePolicy(asMap.PruneLHS, true); err != nil {
|
||||
err = errors.Wrap(err, "cannot parse 'prune_lhs'")
|
||||
return
|
||||
}
|
||||
if j.PruneRHS, err = parsePrunePolicy(asMap.PruneRHS, false); err != nil {
|
||||
err = errors.Wrap(err, "cannot parse 'prune_rhs'")
|
||||
return
|
||||
}
|
||||
|
||||
if err = mapstructure.Decode(asMap.Debug, &j.Debug); err != nil {
|
||||
err = errors.Wrap(err, "cannot parse 'debug'")
|
||||
return
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (j *LocalJob) JobName() string {
|
||||
return j.Name
|
||||
}
|
||||
|
||||
func (j *LocalJob) JobType() JobType { return JobTypeLocal }
|
||||
|
||||
func (j *LocalJob) JobStart(ctx context.Context) {
|
||||
|
||||
rootLog := ctx.Value(contextKeyLog).(Logger)
|
||||
|
||||
j.snapperTask = NewTask("snapshot", j, rootLog)
|
||||
j.mainTask = NewTask("main", j, rootLog)
|
||||
j.handlerTask = NewTask("handler", j, rootLog)
|
||||
j.pruneRHSTask = NewTask("prune_rhs", j, rootLog)
|
||||
j.pruneLHSTask = NewTask("prune_lhs", j, rootLog)
|
||||
|
||||
local := rpc.NewLocalRPC()
|
||||
// Allow access to any dataset since we control what mapping
|
||||
// is passed to the pull routine.
|
||||
// All local datasets will be passed to its Map() function,
|
||||
// but only those for which a mapping exists will actually be pulled.
|
||||
// We can pay this small performance penalty for now.
|
||||
handler := NewHandler(j.handlerTask.Log(), localPullACL{}, NewPrefixFilter(j.SnapshotPrefix))
|
||||
|
||||
registerEndpoints(local, handler)
|
||||
|
||||
snapper := IntervalAutosnap{
|
||||
task: j.snapperTask,
|
||||
DatasetFilter: j.Mapping.AsFilter(),
|
||||
Prefix: j.SnapshotPrefix,
|
||||
SnapshotInterval: j.Interval,
|
||||
}
|
||||
|
||||
plhs, err := j.Pruner(j.pruneLHSTask, PrunePolicySideLeft, false)
|
||||
if err != nil {
|
||||
rootLog.WithError(err).Error("error creating lhs pruner")
|
||||
return
|
||||
}
|
||||
prhs, err := j.Pruner(j.pruneRHSTask, PrunePolicySideRight, false)
|
||||
if err != nil {
|
||||
rootLog.WithError(err).Error("error creating rhs pruner")
|
||||
return
|
||||
}
|
||||
|
||||
didSnaps := make(chan struct{})
|
||||
go snapper.Run(ctx, didSnaps)
|
||||
|
||||
outer:
|
||||
for {
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
j.mainTask.Log().WithError(ctx.Err()).Info("context")
|
||||
break outer
|
||||
case <-didSnaps:
|
||||
j.mainTask.Log().Debug("finished taking snapshots")
|
||||
j.mainTask.Log().Info("starting replication procedure")
|
||||
}
|
||||
|
||||
j.mainTask.Log().Debug("replicating from lhs to rhs")
|
||||
j.mainTask.Enter("replicate")
|
||||
puller := Puller{j.mainTask, local, j.Mapping, j.InitialReplPolicy}
|
||||
puller.Pull()
|
||||
j.mainTask.Finish()
|
||||
|
||||
// use a ctx as soon as Pull gains ctx support
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
break outer
|
||||
default:
|
||||
}
|
||||
|
||||
var wg sync.WaitGroup
|
||||
|
||||
j.mainTask.Log().Info("pruning lhs")
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
plhs.Run(ctx)
|
||||
wg.Done()
|
||||
}()
|
||||
|
||||
j.mainTask.Log().Info("pruning rhs")
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
prhs.Run(ctx)
|
||||
wg.Done()
|
||||
}()
|
||||
|
||||
wg.Wait()
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func (j *LocalJob) JobStatus(ctxt context.Context) (*JobStatus, error) {
|
||||
return &JobStatus{Tasks: []*TaskStatus{
|
||||
j.snapperTask.Status(),
|
||||
j.pruneLHSTask.Status(),
|
||||
j.pruneRHSTask.Status(),
|
||||
j.mainTask.Status(),
|
||||
}}, nil
|
||||
}
|
||||
|
||||
func (j *LocalJob) Pruner(task *Task, side PrunePolicySide, dryRun bool) (p Pruner, err error) {
|
||||
|
||||
var dsfilter zfs.DatasetFilter
|
||||
var pp PrunePolicy
|
||||
switch side {
|
||||
case PrunePolicySideLeft:
|
||||
pp = j.PruneLHS
|
||||
dsfilter = j.Mapping.AsFilter()
|
||||
case PrunePolicySideRight:
|
||||
pp = j.PruneRHS
|
||||
dsfilter, err = j.Mapping.InvertedFilter()
|
||||
if err != nil {
|
||||
err = errors.Wrap(err, "cannot invert mapping for prune_rhs")
|
||||
return
|
||||
}
|
||||
default:
|
||||
err = errors.Errorf("must be either left or right side")
|
||||
return
|
||||
}
|
||||
|
||||
p = Pruner{
|
||||
task,
|
||||
time.Now(),
|
||||
dryRun,
|
||||
dsfilter,
|
||||
j.SnapshotPrefix,
|
||||
pp,
|
||||
}
|
||||
|
||||
return
|
||||
}
|
@ -1,99 +0,0 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"github.com/mitchellh/mapstructure"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||
"github.com/zrepl/zrepl/zfs"
|
||||
"net"
|
||||
"net/http"
|
||||
)
|
||||
|
||||
type PrometheusJob struct {
|
||||
Name string
|
||||
Listen string
|
||||
}
|
||||
|
||||
var prom struct {
|
||||
taskLastActiveStart *prometheus.GaugeVec
|
||||
taskLastActiveDuration *prometheus.GaugeVec
|
||||
taskLogEntries *prometheus.CounterVec
|
||||
}
|
||||
|
||||
func init() {
|
||||
prom.taskLastActiveStart = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Namespace: "zrepl",
|
||||
Subsystem: "daemon",
|
||||
Name: "task_last_active_start",
|
||||
Help: "point in time at which the job task last left idle state",
|
||||
}, []string{"zrepl_job", "job_type", "task"})
|
||||
prom.taskLastActiveDuration = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Namespace: "zrepl",
|
||||
Subsystem: "daemon",
|
||||
Name: "task_last_active_duration",
|
||||
Help: "seconds that the last run ob a job task spent between leaving and re-entering idle state",
|
||||
}, []string{"zrepl_job", "job_type", "task"})
|
||||
prom.taskLogEntries = prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Namespace: "zrepl",
|
||||
Subsystem: "daemon",
|
||||
Name: "task_log_entries",
|
||||
Help: "number of log entries per job task and level",
|
||||
}, []string{"zrepl_job", "job_type", "task", "level"})
|
||||
prometheus.MustRegister(prom.taskLastActiveStart)
|
||||
prometheus.MustRegister(prom.taskLastActiveDuration)
|
||||
prometheus.MustRegister(prom.taskLogEntries)
|
||||
}
|
||||
|
||||
func parsePrometheusJob(c JobParsingContext, name string, i map[string]interface{}) (j *PrometheusJob, err error) {
|
||||
var s struct {
|
||||
Listen string
|
||||
}
|
||||
if err := mapstructure.Decode(i, &s); err != nil {
|
||||
return nil, errors.Wrap(err, "mapstructure error")
|
||||
}
|
||||
if s.Listen == "" {
|
||||
return nil, errors.New("must specify 'listen' attribute")
|
||||
}
|
||||
return &PrometheusJob{name, s.Listen}, nil
|
||||
}
|
||||
|
||||
func (j *PrometheusJob) JobName() string { return j.Name }
|
||||
|
||||
func (j *PrometheusJob) JobType() JobType { return JobTypePrometheus }
|
||||
|
||||
func (j *PrometheusJob) JobStart(ctx context.Context) {
|
||||
|
||||
if err := zfs.PrometheusRegister(prometheus.DefaultRegisterer); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
log := ctx.Value(contextKeyLog).(Logger)
|
||||
task := NewTask("main", j, log)
|
||||
log = task.Log()
|
||||
|
||||
l, err := net.Listen("tcp", j.Listen)
|
||||
if err != nil {
|
||||
log.WithError(err).Error("cannot listen")
|
||||
}
|
||||
go func() {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
l.Close()
|
||||
}
|
||||
}()
|
||||
|
||||
mux := http.NewServeMux()
|
||||
mux.Handle("/metrics", promhttp.Handler())
|
||||
|
||||
err = http.Serve(l, mux)
|
||||
if err != nil {
|
||||
log.WithError(err).Error("error while serving")
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func (*PrometheusJob) JobStatus(ctxt context.Context) (*JobStatus, error) {
|
||||
return &JobStatus{}, nil
|
||||
}
|
@ -1,197 +0,0 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"context"
|
||||
"fmt"
|
||||
"github.com/mitchellh/mapstructure"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/zrepl/zrepl/rpc"
|
||||
"github.com/zrepl/zrepl/util"
|
||||
)
|
||||
|
||||
type PullJob struct {
|
||||
Name string
|
||||
Connect RWCConnecter
|
||||
Interval time.Duration
|
||||
Mapping *DatasetMapFilter
|
||||
// constructed from mapping during parsing
|
||||
pruneFilter *DatasetMapFilter
|
||||
SnapshotPrefix string
|
||||
InitialReplPolicy InitialReplPolicy
|
||||
Prune PrunePolicy
|
||||
Debug JobDebugSettings
|
||||
|
||||
task *Task
|
||||
}
|
||||
|
||||
func parsePullJob(c JobParsingContext, name string, i map[string]interface{}) (j *PullJob, err error) {
|
||||
|
||||
var asMap struct {
|
||||
Connect map[string]interface{}
|
||||
Interval string
|
||||
Mapping map[string]string
|
||||
InitialReplPolicy string `mapstructure:"initial_repl_policy"`
|
||||
Prune map[string]interface{}
|
||||
SnapshotPrefix string `mapstructure:"snapshot_prefix"`
|
||||
Debug map[string]interface{}
|
||||
}
|
||||
|
||||
if err = mapstructure.Decode(i, &asMap); err != nil {
|
||||
err = errors.Wrap(err, "mapstructure error")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
j = &PullJob{Name: name}
|
||||
|
||||
j.Connect, err = parseSSHStdinserverConnecter(asMap.Connect)
|
||||
if err != nil {
|
||||
err = errors.Wrap(err, "cannot parse 'connect'")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if j.Interval, err = parsePostitiveDuration(asMap.Interval); err != nil {
|
||||
err = errors.Wrap(err, "cannot parse 'interval'")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
j.Mapping, err = parseDatasetMapFilter(asMap.Mapping, false)
|
||||
if err != nil {
|
||||
err = errors.Wrap(err, "cannot parse 'mapping'")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if j.pruneFilter, err = j.Mapping.InvertedFilter(); err != nil {
|
||||
err = errors.Wrap(err, "cannot automatically invert 'mapping' for prune job")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
j.InitialReplPolicy, err = parseInitialReplPolicy(asMap.InitialReplPolicy, DEFAULT_INITIAL_REPL_POLICY)
|
||||
if err != nil {
|
||||
err = errors.Wrap(err, "cannot parse 'initial_repl_policy'")
|
||||
return
|
||||
}
|
||||
|
||||
if j.SnapshotPrefix, err = parseSnapshotPrefix(asMap.SnapshotPrefix); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
if j.Prune, err = parsePrunePolicy(asMap.Prune, false); err != nil {
|
||||
err = errors.Wrap(err, "cannot parse prune policy")
|
||||
return
|
||||
}
|
||||
|
||||
if err = mapstructure.Decode(asMap.Debug, &j.Debug); err != nil {
|
||||
err = errors.Wrap(err, "cannot parse 'debug'")
|
||||
return
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (j *PullJob) JobName() string {
|
||||
return j.Name
|
||||
}
|
||||
|
||||
func (j *PullJob) JobType() JobType { return JobTypePull }
|
||||
|
||||
func (j *PullJob) JobStart(ctx context.Context) {
|
||||
|
||||
log := ctx.Value(contextKeyLog).(Logger)
|
||||
defer log.Info("exiting")
|
||||
j.task = NewTask("main", j, log)
|
||||
|
||||
// j.task is idle here idle here
|
||||
|
||||
ticker := time.NewTicker(j.Interval)
|
||||
for {
|
||||
j.doRun(ctx)
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
j.task.Log().WithError(ctx.Err()).Info("context")
|
||||
return
|
||||
case <-ticker.C:
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (j *PullJob) doRun(ctx context.Context) {
|
||||
|
||||
j.task.Enter("run")
|
||||
defer j.task.Finish()
|
||||
|
||||
j.task.Log().Info("connecting")
|
||||
rwc, err := j.Connect.Connect()
|
||||
if err != nil {
|
||||
j.task.Log().WithError(err).Error("error connecting")
|
||||
return
|
||||
}
|
||||
|
||||
rwc, err = util.NewReadWriteCloserLogger(rwc, j.Debug.Conn.ReadDump, j.Debug.Conn.WriteDump)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
client := rpc.NewClient(rwc)
|
||||
if j.Debug.RPC.Log {
|
||||
client.SetLogger(j.task.Log(), true)
|
||||
}
|
||||
|
||||
j.task.Enter("pull")
|
||||
puller := Puller{j.task, client, j.Mapping, j.InitialReplPolicy}
|
||||
puller.Pull()
|
||||
closeRPCWithTimeout(j.task, client, time.Second*1, "")
|
||||
rwc.Close()
|
||||
j.task.Finish()
|
||||
|
||||
j.task.Enter("prune")
|
||||
pruner, err := j.Pruner(j.task, PrunePolicySideDefault, false)
|
||||
if err != nil {
|
||||
j.task.Log().WithError(err).Error("error creating pruner")
|
||||
} else {
|
||||
pruner.Run(ctx)
|
||||
}
|
||||
j.task.Finish()
|
||||
|
||||
}
|
||||
|
||||
func (j *PullJob) JobStatus(ctxt context.Context) (*JobStatus, error) {
|
||||
return &JobStatus{Tasks: []*TaskStatus{j.task.Status()}}, nil
|
||||
}
|
||||
|
||||
func (j *PullJob) Pruner(task *Task, side PrunePolicySide, dryRun bool) (p Pruner, err error) {
|
||||
p = Pruner{
|
||||
task,
|
||||
time.Now(),
|
||||
dryRun,
|
||||
j.pruneFilter,
|
||||
j.SnapshotPrefix,
|
||||
j.Prune,
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func closeRPCWithTimeout(task *Task, remote rpc.RPCClient, timeout time.Duration, goodbye string) {
|
||||
|
||||
task.Log().Info("closing rpc connection")
|
||||
|
||||
ch := make(chan error)
|
||||
go func() {
|
||||
ch <- remote.Close()
|
||||
close(ch)
|
||||
}()
|
||||
|
||||
var err error
|
||||
select {
|
||||
case <-time.After(timeout):
|
||||
err = fmt.Errorf("timeout exceeded (%s)", timeout)
|
||||
case closeRequestErr := <-ch:
|
||||
err = closeRequestErr
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
task.Log().WithError(err).Error("error closing connection")
|
||||
}
|
||||
return
|
||||
}
|
@ -1,252 +0,0 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"io"
|
||||
"time"
|
||||
|
||||
mapstructure "github.com/mitchellh/mapstructure"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/zrepl/zrepl/rpc"
|
||||
"github.com/zrepl/zrepl/util"
|
||||
)
|
||||
|
||||
type SourceJob struct {
|
||||
Name string
|
||||
Serve AuthenticatedChannelListenerFactory
|
||||
Filesystems *DatasetMapFilter
|
||||
SnapshotPrefix string
|
||||
Interval time.Duration
|
||||
Prune PrunePolicy
|
||||
Debug JobDebugSettings
|
||||
serveTask *Task
|
||||
autosnapTask *Task
|
||||
pruneTask *Task
|
||||
}
|
||||
|
||||
func parseSourceJob(c JobParsingContext, name string, i map[string]interface{}) (j *SourceJob, err error) {
|
||||
|
||||
var asMap struct {
|
||||
Serve map[string]interface{}
|
||||
Filesystems map[string]string
|
||||
SnapshotPrefix string `mapstructure:"snapshot_prefix"`
|
||||
Interval string
|
||||
Prune map[string]interface{}
|
||||
Debug map[string]interface{}
|
||||
}
|
||||
|
||||
if err = mapstructure.Decode(i, &asMap); err != nil {
|
||||
err = errors.Wrap(err, "mapstructure error")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
j = &SourceJob{Name: name}
|
||||
|
||||
if j.Serve, err = parseAuthenticatedChannelListenerFactory(c, asMap.Serve); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
if j.Filesystems, err = parseDatasetMapFilter(asMap.Filesystems, true); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
if j.SnapshotPrefix, err = parseSnapshotPrefix(asMap.SnapshotPrefix); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
if j.Interval, err = parsePostitiveDuration(asMap.Interval); err != nil {
|
||||
err = errors.Wrap(err, "cannot parse 'interval'")
|
||||
return
|
||||
}
|
||||
|
||||
if j.Prune, err = parsePrunePolicy(asMap.Prune, true); err != nil {
|
||||
err = errors.Wrap(err, "cannot parse 'prune'")
|
||||
return
|
||||
}
|
||||
|
||||
if err = mapstructure.Decode(asMap.Debug, &j.Debug); err != nil {
|
||||
err = errors.Wrap(err, "cannot parse 'debug'")
|
||||
return
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (j *SourceJob) JobName() string {
|
||||
return j.Name
|
||||
}
|
||||
|
||||
func (j *SourceJob) JobType() JobType { return JobTypeSource }
|
||||
|
||||
func (j *SourceJob) JobStart(ctx context.Context) {
|
||||
|
||||
log := ctx.Value(contextKeyLog).(Logger)
|
||||
defer log.Info("exiting")
|
||||
|
||||
j.autosnapTask = NewTask("autosnap", j, log)
|
||||
j.pruneTask = NewTask("prune", j, log)
|
||||
j.serveTask = NewTask("serve", j, log)
|
||||
|
||||
a := IntervalAutosnap{j.autosnapTask, j.Filesystems, j.SnapshotPrefix, j.Interval}
|
||||
p, err := j.Pruner(j.pruneTask, PrunePolicySideDefault, false)
|
||||
|
||||
if err != nil {
|
||||
log.WithError(err).Error("error creating pruner")
|
||||
return
|
||||
}
|
||||
|
||||
didSnaps := make(chan struct{})
|
||||
|
||||
go j.serve(ctx, j.serveTask)
|
||||
go a.Run(ctx, didSnaps)
|
||||
|
||||
outer:
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
break outer
|
||||
case <-didSnaps:
|
||||
log.Info("starting pruner")
|
||||
p.Run(ctx)
|
||||
log.Info("pruner done")
|
||||
}
|
||||
}
|
||||
log.WithError(ctx.Err()).Info("context")
|
||||
|
||||
}
|
||||
|
||||
func (j *SourceJob) JobStatus(ctxt context.Context) (*JobStatus, error) {
|
||||
return &JobStatus{
|
||||
Tasks: []*TaskStatus{
|
||||
j.autosnapTask.Status(),
|
||||
j.pruneTask.Status(),
|
||||
j.serveTask.Status(),
|
||||
}}, nil
|
||||
}
|
||||
|
||||
func (j *SourceJob) Pruner(task *Task, side PrunePolicySide, dryRun bool) (p Pruner, err error) {
|
||||
p = Pruner{
|
||||
task,
|
||||
time.Now(),
|
||||
dryRun,
|
||||
j.Filesystems,
|
||||
j.SnapshotPrefix,
|
||||
j.Prune,
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (j *SourceJob) serve(ctx context.Context, task *Task) {
|
||||
|
||||
listener, err := j.Serve.Listen()
|
||||
if err != nil {
|
||||
task.Log().WithError(err).Error("error listening")
|
||||
return
|
||||
}
|
||||
|
||||
type rwcChanMsg struct {
|
||||
rwc io.ReadWriteCloser
|
||||
err error
|
||||
}
|
||||
rwcChan := make(chan rwcChanMsg)
|
||||
|
||||
// Serve connections until interrupted or error
|
||||
outer:
|
||||
for {
|
||||
|
||||
go func() {
|
||||
rwc, err := listener.Accept()
|
||||
if err != nil {
|
||||
rwcChan <- rwcChanMsg{rwc, err}
|
||||
close(rwcChan)
|
||||
return
|
||||
}
|
||||
rwcChan <- rwcChanMsg{rwc, err}
|
||||
}()
|
||||
|
||||
select {
|
||||
|
||||
case rwcMsg := <-rwcChan:
|
||||
|
||||
if rwcMsg.err != nil {
|
||||
task.Log().WithError(err).Error("error accepting connection")
|
||||
break outer
|
||||
}
|
||||
|
||||
j.handleConnection(rwcMsg.rwc, task)
|
||||
|
||||
case <-ctx.Done():
|
||||
task.Log().WithError(ctx.Err()).Info("context")
|
||||
break outer
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
task.Enter("close_listener")
|
||||
defer task.Finish()
|
||||
err = listener.Close()
|
||||
if err != nil {
|
||||
task.Log().WithError(err).Error("error closing listener")
|
||||
}
|
||||
|
||||
return
|
||||
|
||||
}
|
||||
|
||||
func (j *SourceJob) handleConnection(rwc io.ReadWriteCloser, task *Task) {
|
||||
|
||||
task.Enter("handle_connection")
|
||||
defer task.Finish()
|
||||
|
||||
task.Log().Info("handling client connection")
|
||||
|
||||
rwc, err := util.NewReadWriteCloserLogger(rwc, j.Debug.Conn.ReadDump, j.Debug.Conn.WriteDump)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
// construct connection handler
|
||||
handler := NewHandler(task.Log(), j.Filesystems, NewPrefixFilter(j.SnapshotPrefix))
|
||||
|
||||
// handle connection
|
||||
rpcServer := rpc.NewServer(rwc)
|
||||
if j.Debug.RPC.Log {
|
||||
rpclog := task.Log().WithField("subsystem", "rpc")
|
||||
rpcServer.SetLogger(rpclog, true)
|
||||
}
|
||||
registerEndpoints(rpcServer, handler)
|
||||
if err = rpcServer.Serve(); err != nil {
|
||||
task.Log().WithError(err).Error("error serving connection")
|
||||
}
|
||||
|
||||
// wait for client to close connection
|
||||
// FIXME: we cannot just close it like we would to with a TCP socket because
|
||||
// FIXME: go-nettsh's Close() may overtake the remaining data in the pipe
|
||||
const CLIENT_HANGUP_TIMEOUT = 1 * time.Second
|
||||
task.Log().
|
||||
WithField("timeout", CLIENT_HANGUP_TIMEOUT).
|
||||
Debug("waiting for client to hang up")
|
||||
|
||||
wchan := make(chan error)
|
||||
go func() {
|
||||
var pseudo [1]byte
|
||||
_, err := io.ReadFull(rwc, pseudo[:])
|
||||
wchan <- err
|
||||
}()
|
||||
var werr error
|
||||
select {
|
||||
case werr = <-wchan:
|
||||
// all right
|
||||
case <-time.After(CLIENT_HANGUP_TIMEOUT):
|
||||
werr = errors.New("client did not close connection within timeout")
|
||||
}
|
||||
if werr != nil && werr != io.EOF {
|
||||
task.Log().WithError(werr).
|
||||
Error("error waiting for client to hang up")
|
||||
}
|
||||
task.Log().Info("closing client connection")
|
||||
if err = rwc.Close(); err != nil {
|
||||
task.Log().WithError(err).Error("error force-closing connection")
|
||||
}
|
||||
}
|
@ -1,251 +0,0 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"crypto/tls"
|
||||
"crypto/x509"
|
||||
"github.com/mattn/go-isatty"
|
||||
"github.com/mitchellh/mapstructure"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/zrepl/zrepl/logger"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"time"
|
||||
)
|
||||
|
||||
type LoggingConfig struct {
|
||||
Outlets *logger.Outlets
|
||||
}
|
||||
|
||||
type MetadataFlags int64
|
||||
|
||||
const (
|
||||
MetadataTime MetadataFlags = 1 << iota
|
||||
MetadataLevel
|
||||
|
||||
MetadataNone MetadataFlags = 0
|
||||
MetadataAll MetadataFlags = ^0
|
||||
)
|
||||
|
||||
func parseLogging(i interface{}) (c *LoggingConfig, err error) {
|
||||
|
||||
c = &LoggingConfig{}
|
||||
c.Outlets = logger.NewOutlets()
|
||||
|
||||
var asList []interface{}
|
||||
if err = mapstructure.Decode(i, &asList); err != nil {
|
||||
return nil, errors.Wrap(err, "mapstructure error")
|
||||
}
|
||||
if len(asList) == 0 {
|
||||
// Default config
|
||||
out := WriterOutlet{&HumanFormatter{}, os.Stdout}
|
||||
c.Outlets.Add(out, logger.Warn)
|
||||
return
|
||||
}
|
||||
|
||||
var syslogOutlets, stdoutOutlets int
|
||||
for lei, le := range asList {
|
||||
|
||||
outlet, minLevel, err := parseOutlet(le)
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "cannot parse outlet #%d", lei)
|
||||
}
|
||||
var _ logger.Outlet = WriterOutlet{}
|
||||
var _ logger.Outlet = &SyslogOutlet{}
|
||||
switch outlet.(type) {
|
||||
case *SyslogOutlet:
|
||||
syslogOutlets++
|
||||
case WriterOutlet:
|
||||
stdoutOutlets++
|
||||
}
|
||||
|
||||
c.Outlets.Add(outlet, minLevel)
|
||||
|
||||
}
|
||||
|
||||
if syslogOutlets > 1 {
|
||||
return nil, errors.Errorf("can only define one 'syslog' outlet")
|
||||
}
|
||||
if stdoutOutlets > 1 {
|
||||
return nil, errors.Errorf("can only define one 'stdout' outlet")
|
||||
}
|
||||
|
||||
return c, nil
|
||||
|
||||
}
|
||||
|
||||
func parseLogFormat(i interface{}) (f EntryFormatter, err error) {
|
||||
var is string
|
||||
switch j := i.(type) {
|
||||
case string:
|
||||
is = j
|
||||
default:
|
||||
return nil, errors.Errorf("invalid log format: wrong type: %T", i)
|
||||
}
|
||||
|
||||
switch is {
|
||||
case "human":
|
||||
return &HumanFormatter{}, nil
|
||||
case "logfmt":
|
||||
return &LogfmtFormatter{}, nil
|
||||
case "json":
|
||||
return &JSONFormatter{}, nil
|
||||
default:
|
||||
return nil, errors.Errorf("invalid log format: '%s'", is)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func parseOutlet(i interface{}) (o logger.Outlet, level logger.Level, err error) {
|
||||
|
||||
var in struct {
|
||||
Outlet string
|
||||
Level string
|
||||
Format string
|
||||
}
|
||||
if err = mapstructure.Decode(i, &in); err != nil {
|
||||
err = errors.Wrap(err, "mapstructure error")
|
||||
return
|
||||
}
|
||||
if in.Outlet == "" || in.Level == "" || in.Format == "" {
|
||||
err = errors.Errorf("must specify 'outlet', 'level' and 'format' field")
|
||||
return
|
||||
}
|
||||
|
||||
minLevel, err := logger.ParseLevel(in.Level)
|
||||
if err != nil {
|
||||
err = errors.Wrap(err, "cannot parse 'level' field")
|
||||
return
|
||||
}
|
||||
formatter, err := parseLogFormat(in.Format)
|
||||
if err != nil {
|
||||
err = errors.Wrap(err, "cannot parse")
|
||||
return
|
||||
}
|
||||
|
||||
switch in.Outlet {
|
||||
case "stdout":
|
||||
o, err = parseStdoutOutlet(i, formatter)
|
||||
case "tcp":
|
||||
o, err = parseTCPOutlet(i, formatter)
|
||||
case "syslog":
|
||||
o, err = parseSyslogOutlet(i, formatter)
|
||||
default:
|
||||
err = errors.Errorf("unknown outlet type '%s'", in.Outlet)
|
||||
}
|
||||
return o, minLevel, err
|
||||
|
||||
}
|
||||
|
||||
func parseStdoutOutlet(i interface{}, formatter EntryFormatter) (WriterOutlet, error) {
|
||||
|
||||
var in struct {
|
||||
Time bool
|
||||
}
|
||||
if err := mapstructure.Decode(i, &in); err != nil {
|
||||
return WriterOutlet{}, errors.Wrap(err, "invalid structure for stdout outlet")
|
||||
}
|
||||
|
||||
flags := MetadataAll
|
||||
writer := os.Stdout
|
||||
if !isatty.IsTerminal(writer.Fd()) && !in.Time {
|
||||
flags &= ^MetadataTime
|
||||
}
|
||||
|
||||
formatter.SetMetadataFlags(flags)
|
||||
return WriterOutlet{
|
||||
formatter,
|
||||
os.Stdout,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func parseTCPOutlet(i interface{}, formatter EntryFormatter) (out *TCPOutlet, err error) {
|
||||
|
||||
var in struct {
|
||||
Net string
|
||||
Address string
|
||||
RetryInterval string `mapstructure:"retry_interval"`
|
||||
TLS *struct {
|
||||
CA string
|
||||
Cert string
|
||||
Key string
|
||||
}
|
||||
}
|
||||
if err = mapstructure.Decode(i, &in); err != nil {
|
||||
return nil, errors.Wrap(err, "mapstructure error")
|
||||
}
|
||||
|
||||
retryInterval, err := time.ParseDuration(in.RetryInterval)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "cannot parse 'retry_interval'")
|
||||
}
|
||||
|
||||
if len(in.Net) == 0 {
|
||||
return nil, errors.New("field 'net' must not be empty")
|
||||
}
|
||||
if len(in.Address) == 0 {
|
||||
return nil, errors.New("field 'address' must not be empty")
|
||||
}
|
||||
|
||||
var tlsConfig *tls.Config
|
||||
if in.TLS != nil {
|
||||
|
||||
cert, err := tls.LoadX509KeyPair(in.TLS.Cert, in.TLS.Key)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "cannot load client cert")
|
||||
}
|
||||
|
||||
var rootCAs *x509.CertPool
|
||||
if in.TLS.CA == "" {
|
||||
if rootCAs, err = x509.SystemCertPool(); err != nil {
|
||||
return nil, errors.Wrap(err, "cannot open system cert pool")
|
||||
}
|
||||
} else {
|
||||
rootCAs = x509.NewCertPool()
|
||||
rootCAPEM, err := ioutil.ReadFile(in.TLS.CA)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "cannot load CA cert")
|
||||
}
|
||||
if !rootCAs.AppendCertsFromPEM(rootCAPEM) {
|
||||
return nil, errors.New("cannot parse CA cert")
|
||||
}
|
||||
}
|
||||
if err != nil && in.TLS.CA == "" {
|
||||
return nil, errors.Wrap(err, "cannot load root ca pool")
|
||||
}
|
||||
|
||||
tlsConfig = &tls.Config{
|
||||
Certificates: []tls.Certificate{cert},
|
||||
RootCAs: rootCAs,
|
||||
}
|
||||
|
||||
tlsConfig.BuildNameToCertificate()
|
||||
}
|
||||
|
||||
formatter.SetMetadataFlags(MetadataAll)
|
||||
return NewTCPOutlet(formatter, in.Net, in.Address, tlsConfig, retryInterval), nil
|
||||
|
||||
}
|
||||
|
||||
func parseSyslogOutlet(i interface{}, formatter EntryFormatter) (out *SyslogOutlet, err error) {
|
||||
|
||||
var in struct {
|
||||
RetryInterval string `mapstructure:"retry_interval"`
|
||||
}
|
||||
if err = mapstructure.Decode(i, &in); err != nil {
|
||||
return nil, errors.Wrap(err, "mapstructure error")
|
||||
}
|
||||
|
||||
out = &SyslogOutlet{}
|
||||
out.Formatter = formatter
|
||||
out.Formatter.SetMetadataFlags(MetadataNone)
|
||||
|
||||
out.RetryInterval = 0 // default to 0 as we assume local syslog will just work
|
||||
if in.RetryInterval != "" {
|
||||
out.RetryInterval, err = time.ParseDuration(in.RetryInterval)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "cannot parse 'retry_interval'")
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
@ -1,322 +0,0 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
|
||||
"fmt"
|
||||
yaml "github.com/go-yaml/yaml"
|
||||
"github.com/mitchellh/mapstructure"
|
||||
"github.com/pkg/errors"
|
||||
"os"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"time"
|
||||
)
|
||||
|
||||
var ConfigFileDefaultLocations []string = []string{
|
||||
"/etc/zrepl/zrepl.yml",
|
||||
"/usr/local/etc/zrepl/zrepl.yml",
|
||||
}
|
||||
|
||||
const (
|
||||
JobNameControl string = "control"
|
||||
)
|
||||
|
||||
var ReservedJobNames []string = []string{
|
||||
JobNameControl,
|
||||
}
|
||||
|
||||
type ConfigParsingContext struct {
|
||||
Global *Global
|
||||
}
|
||||
|
||||
func ParseConfig(path string) (config *Config, err error) {
|
||||
|
||||
if path == "" {
|
||||
// Try default locations
|
||||
for _, l := range ConfigFileDefaultLocations {
|
||||
stat, err := os.Stat(l)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
if !stat.Mode().IsRegular() {
|
||||
err = errors.Errorf("file at default location is not a regular file: %s", l)
|
||||
continue
|
||||
}
|
||||
path = l
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
var i interface{}
|
||||
|
||||
var bytes []byte
|
||||
|
||||
if bytes, err = ioutil.ReadFile(path); err != nil {
|
||||
err = errors.WithStack(err)
|
||||
return
|
||||
}
|
||||
|
||||
if err = yaml.Unmarshal(bytes, &i); err != nil {
|
||||
err = errors.WithStack(err)
|
||||
return
|
||||
}
|
||||
|
||||
return parseConfig(i)
|
||||
}
|
||||
|
||||
func parseConfig(i interface{}) (c *Config, err error) {
|
||||
|
||||
var asMap struct {
|
||||
Global map[string]interface{}
|
||||
Jobs []map[string]interface{}
|
||||
}
|
||||
if err := mapstructure.Decode(i, &asMap); err != nil {
|
||||
return nil, errors.Wrap(err, "config root must be a dict")
|
||||
}
|
||||
|
||||
c = &Config{}
|
||||
|
||||
// Parse global with defaults
|
||||
c.Global.Serve.Stdinserver.SockDir = "/var/run/zrepl/stdinserver"
|
||||
c.Global.Control.Sockpath = "/var/run/zrepl/control"
|
||||
|
||||
err = mapstructure.Decode(asMap.Global, &c.Global)
|
||||
if err != nil {
|
||||
err = errors.Wrap(err, "mapstructure error on 'global' section: %s")
|
||||
return
|
||||
}
|
||||
|
||||
if c.Global.logging, err = parseLogging(asMap.Global["logging"]); err != nil {
|
||||
return nil, errors.Wrap(err, "cannot parse logging section")
|
||||
}
|
||||
|
||||
cpc := ConfigParsingContext{&c.Global}
|
||||
jpc := JobParsingContext{cpc}
|
||||
c.Jobs = make(map[string]Job, len(asMap.Jobs))
|
||||
|
||||
// FIXME internal jobs should not be mixed with user jobs
|
||||
// Monitoring Jobs
|
||||
var monJobs []map[string]interface{}
|
||||
if err := mapstructure.Decode(asMap.Global["monitoring"], &monJobs); err != nil {
|
||||
return nil, errors.Wrap(err, "cannot parse monitoring section")
|
||||
}
|
||||
for i, jc := range monJobs {
|
||||
if jc["name"] == "" || jc["name"] == nil {
|
||||
// FIXME internal jobs should not require a name...
|
||||
jc["name"] = fmt.Sprintf("prometheus-%d", i)
|
||||
}
|
||||
job, err := parseJob(jpc, jc)
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "cannot parse monitoring job #%d", i)
|
||||
}
|
||||
if job.JobType() != JobTypePrometheus {
|
||||
return nil, errors.Errorf("monitoring job #%d has invalid job type", i)
|
||||
}
|
||||
c.Jobs[job.JobName()] = job
|
||||
}
|
||||
|
||||
// Regular Jobs
|
||||
for i := range asMap.Jobs {
|
||||
job, err := parseJob(jpc, asMap.Jobs[i])
|
||||
if err != nil {
|
||||
// Try to find its name
|
||||
namei, ok := asMap.Jobs[i]["name"]
|
||||
if !ok {
|
||||
namei = fmt.Sprintf("<no name, entry #%d in list>", i)
|
||||
}
|
||||
err = errors.Wrapf(err, "cannot parse job '%v'", namei)
|
||||
return nil, err
|
||||
}
|
||||
jn := job.JobName()
|
||||
if _, ok := c.Jobs[jn]; ok {
|
||||
err = errors.Errorf("duplicate or invalid job name: %s", jn)
|
||||
return nil, err
|
||||
}
|
||||
c.Jobs[job.JobName()] = job
|
||||
}
|
||||
|
||||
cj, err := NewControlJob(JobNameControl, jpc.Global.Control.Sockpath)
|
||||
if err != nil {
|
||||
err = errors.Wrap(err, "cannot create control job")
|
||||
return
|
||||
}
|
||||
c.Jobs[JobNameControl] = cj
|
||||
|
||||
return c, nil
|
||||
|
||||
}
|
||||
|
||||
func extractStringField(i map[string]interface{}, key string, notempty bool) (field string, err error) {
|
||||
vi, ok := i[key]
|
||||
if !ok {
|
||||
err = errors.Errorf("must have field '%s'", key)
|
||||
return "", err
|
||||
}
|
||||
field, ok = vi.(string)
|
||||
if !ok {
|
||||
err = errors.Errorf("'%s' field must have type string", key)
|
||||
return "", err
|
||||
}
|
||||
if notempty && len(field) <= 0 {
|
||||
err = errors.Errorf("'%s' field must not be empty", key)
|
||||
return "", err
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
type JobParsingContext struct {
|
||||
ConfigParsingContext
|
||||
}
|
||||
|
||||
func parseJob(c JobParsingContext, i map[string]interface{}) (j Job, err error) {
|
||||
|
||||
name, err := extractStringField(i, "name", true)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, r := range ReservedJobNames {
|
||||
if name == r {
|
||||
err = errors.Errorf("job name '%s' is reserved", name)
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
jobtypeStr, err := extractStringField(i, "type", true)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
jobtype, err := ParseUserJobType(jobtypeStr)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
switch jobtype {
|
||||
case JobTypePull:
|
||||
return parsePullJob(c, name, i)
|
||||
case JobTypeSource:
|
||||
return parseSourceJob(c, name, i)
|
||||
case JobTypeLocal:
|
||||
return parseLocalJob(c, name, i)
|
||||
case JobTypePrometheus:
|
||||
return parsePrometheusJob(c, name, i)
|
||||
default:
|
||||
panic(fmt.Sprintf("implementation error: unknown job type %s", jobtype))
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func parseConnect(i map[string]interface{}) (c RWCConnecter, err error) {
|
||||
|
||||
t, err := extractStringField(i, "type", true)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
switch t {
|
||||
case "ssh+stdinserver":
|
||||
return parseSSHStdinserverConnecter(i)
|
||||
default:
|
||||
return nil, errors.Errorf("unknown connection type '%s'", t)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func parseInitialReplPolicy(v interface{}, defaultPolicy InitialReplPolicy) (p InitialReplPolicy, err error) {
|
||||
s, ok := v.(string)
|
||||
if !ok {
|
||||
goto err
|
||||
}
|
||||
|
||||
switch {
|
||||
case s == "":
|
||||
p = defaultPolicy
|
||||
case s == "most_recent":
|
||||
p = InitialReplPolicyMostRecent
|
||||
case s == "all":
|
||||
p = InitialReplPolicyAll
|
||||
default:
|
||||
goto err
|
||||
}
|
||||
|
||||
return
|
||||
|
||||
err:
|
||||
err = errors.New(fmt.Sprintf("expected InitialReplPolicy, got %#v", v))
|
||||
return
|
||||
}
|
||||
|
||||
func parsePrunePolicy(v map[string]interface{}, willSeeBookmarks bool) (p PrunePolicy, err error) {
|
||||
|
||||
policyName, err := extractStringField(v, "policy", true)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
switch policyName {
|
||||
case "grid":
|
||||
return parseGridPrunePolicy(v, willSeeBookmarks)
|
||||
case "noprune":
|
||||
return NoPrunePolicy{}, nil
|
||||
default:
|
||||
err = errors.Errorf("unknown policy '%s'", policyName)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
func parseAuthenticatedChannelListenerFactory(c JobParsingContext, v map[string]interface{}) (p AuthenticatedChannelListenerFactory, err error) {
|
||||
|
||||
t, err := extractStringField(v, "type", true)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
switch t {
|
||||
case "stdinserver":
|
||||
return parseStdinserverListenerFactory(c, v)
|
||||
default:
|
||||
err = errors.Errorf("unknown type '%s'", t)
|
||||
return
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
var durationStringRegex *regexp.Regexp = regexp.MustCompile(`^\s*(\d+)\s*(s|m|h|d|w)\s*$`)
|
||||
|
||||
func parsePostitiveDuration(e string) (d time.Duration, err error) {
|
||||
comps := durationStringRegex.FindStringSubmatch(e)
|
||||
if len(comps) != 3 {
|
||||
err = fmt.Errorf("does not match regex: %s %#v", e, comps)
|
||||
return
|
||||
}
|
||||
|
||||
durationFactor, err := strconv.ParseInt(comps[1], 10, 64)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
if durationFactor <= 0 {
|
||||
return 0, errors.New("duration must be positive integer")
|
||||
}
|
||||
|
||||
var durationUnit time.Duration
|
||||
switch comps[2] {
|
||||
case "s":
|
||||
durationUnit = time.Second
|
||||
case "m":
|
||||
durationUnit = time.Minute
|
||||
case "h":
|
||||
durationUnit = time.Hour
|
||||
case "d":
|
||||
durationUnit = 24 * time.Hour
|
||||
case "w":
|
||||
durationUnit = 24 * 7 * time.Hour
|
||||
default:
|
||||
err = fmt.Errorf("contains unknown time unit '%s'", comps[2])
|
||||
return
|
||||
}
|
||||
|
||||
d = time.Duration(durationFactor) * durationUnit
|
||||
return
|
||||
}
|
@ -1,245 +0,0 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/mitchellh/mapstructure"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/zrepl/zrepl/util"
|
||||
"github.com/zrepl/zrepl/zfs"
|
||||
"math"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
type GridPrunePolicy struct {
|
||||
RetentionGrid *util.RetentionGrid
|
||||
MaxBookmarks int
|
||||
}
|
||||
|
||||
const GridPrunePolicyMaxBookmarksKeepAll = -1
|
||||
|
||||
type retentionGridAdaptor struct {
|
||||
zfs.FilesystemVersion
|
||||
}
|
||||
|
||||
func (a retentionGridAdaptor) Date() time.Time {
|
||||
return a.Creation
|
||||
}
|
||||
|
||||
func (a retentionGridAdaptor) LessThan(b util.RetentionGridEntry) bool {
|
||||
return a.CreateTXG < b.(retentionGridAdaptor).CreateTXG
|
||||
}
|
||||
|
||||
// Prune filters snapshots with the retention grid.
|
||||
// Bookmarks are deleted such that KeepBookmarks are kept in the end.
|
||||
// The oldest bookmarks are removed first.
|
||||
func (p *GridPrunePolicy) Prune(_ *zfs.DatasetPath, versions []zfs.FilesystemVersion) (keep, remove []zfs.FilesystemVersion, err error) {
|
||||
skeep, sremove := p.pruneSnapshots(versions)
|
||||
keep, remove = p.pruneBookmarks(skeep)
|
||||
remove = append(remove, sremove...)
|
||||
return keep, remove, nil
|
||||
}
|
||||
|
||||
func (p *GridPrunePolicy) pruneSnapshots(versions []zfs.FilesystemVersion) (keep, remove []zfs.FilesystemVersion) {
|
||||
|
||||
// Build adaptors for retention grid
|
||||
keep = []zfs.FilesystemVersion{}
|
||||
adaptors := make([]util.RetentionGridEntry, 0)
|
||||
for fsv := range versions {
|
||||
if versions[fsv].Type != zfs.Snapshot {
|
||||
keep = append(keep, versions[fsv])
|
||||
continue
|
||||
}
|
||||
adaptors = append(adaptors, retentionGridAdaptor{versions[fsv]})
|
||||
}
|
||||
|
||||
sort.SliceStable(adaptors, func(i, j int) bool {
|
||||
return adaptors[i].LessThan(adaptors[j])
|
||||
})
|
||||
now := adaptors[len(adaptors)-1].Date()
|
||||
|
||||
// Evaluate retention grid
|
||||
keepa, removea := p.RetentionGrid.FitEntries(now, adaptors)
|
||||
|
||||
// Revert adaptors
|
||||
for i := range keepa {
|
||||
keep = append(keep, keepa[i].(retentionGridAdaptor).FilesystemVersion)
|
||||
}
|
||||
remove = make([]zfs.FilesystemVersion, len(removea))
|
||||
for i := range removea {
|
||||
remove[i] = removea[i].(retentionGridAdaptor).FilesystemVersion
|
||||
}
|
||||
return
|
||||
|
||||
}
|
||||
|
||||
func (p *GridPrunePolicy) pruneBookmarks(versions []zfs.FilesystemVersion) (keep, remove []zfs.FilesystemVersion) {
|
||||
|
||||
if p.MaxBookmarks == GridPrunePolicyMaxBookmarksKeepAll {
|
||||
return versions, []zfs.FilesystemVersion{}
|
||||
}
|
||||
|
||||
keep = []zfs.FilesystemVersion{}
|
||||
bookmarks := make([]zfs.FilesystemVersion, 0)
|
||||
for fsv := range versions {
|
||||
if versions[fsv].Type != zfs.Bookmark {
|
||||
keep = append(keep, versions[fsv])
|
||||
continue
|
||||
}
|
||||
bookmarks = append(bookmarks, versions[fsv])
|
||||
}
|
||||
|
||||
if len(bookmarks) == 0 {
|
||||
return keep, []zfs.FilesystemVersion{}
|
||||
}
|
||||
if len(bookmarks) < p.MaxBookmarks {
|
||||
keep = append(keep, bookmarks...)
|
||||
return keep, []zfs.FilesystemVersion{}
|
||||
}
|
||||
|
||||
// NOTE: sorting descending by descending by createtxg <=> sorting ascending wrt creation time
|
||||
sort.SliceStable(bookmarks, func(i, j int) bool {
|
||||
return (bookmarks[i].CreateTXG > bookmarks[j].CreateTXG)
|
||||
})
|
||||
|
||||
keep = append(keep, bookmarks[:p.MaxBookmarks]...)
|
||||
remove = bookmarks[p.MaxBookmarks:]
|
||||
|
||||
return keep, remove
|
||||
}
|
||||
|
||||
func parseGridPrunePolicy(e map[string]interface{}, willSeeBookmarks bool) (p *GridPrunePolicy, err error) {
|
||||
|
||||
const KeepBookmarksAllString = "all"
|
||||
var i struct {
|
||||
Grid string
|
||||
KeepBookmarks string `mapstructure:"keep_bookmarks"`
|
||||
}
|
||||
|
||||
dec, err := mapstructure.NewDecoder(&mapstructure.DecoderConfig{Result: &i, WeaklyTypedInput: true})
|
||||
if err != nil {
|
||||
err = errors.Wrap(err, "mapstructure error")
|
||||
return
|
||||
}
|
||||
if err = dec.Decode(e); err != nil {
|
||||
err = errors.Wrapf(err, "mapstructure error")
|
||||
return
|
||||
}
|
||||
|
||||
// Parse grid
|
||||
intervals, err := parseRetentionGridIntervalsString(i.Grid)
|
||||
if err != nil {
|
||||
err = fmt.Errorf("cannot parse retention grid: %s", err)
|
||||
return
|
||||
}
|
||||
// Assert intervals are of increasing length (not necessarily required, but indicates config mistake)
|
||||
lastDuration := time.Duration(0)
|
||||
for i := range intervals {
|
||||
|
||||
if intervals[i].Length < lastDuration {
|
||||
// If all intervals before were keep=all, this is ok
|
||||
allPrevKeepCountAll := true
|
||||
for j := i - 1; allPrevKeepCountAll && j >= 0; j-- {
|
||||
allPrevKeepCountAll = intervals[j].KeepCount == util.RetentionGridKeepCountAll
|
||||
}
|
||||
if allPrevKeepCountAll {
|
||||
goto isMonotonicIncrease
|
||||
}
|
||||
err = errors.New("retention grid interval length must be monotonically increasing")
|
||||
return
|
||||
}
|
||||
isMonotonicIncrease:
|
||||
lastDuration = intervals[i].Length
|
||||
|
||||
}
|
||||
|
||||
// Parse KeepBookmarks
|
||||
keepBookmarks := 0
|
||||
if i.KeepBookmarks == KeepBookmarksAllString || (i.KeepBookmarks == "" && !willSeeBookmarks) {
|
||||
keepBookmarks = GridPrunePolicyMaxBookmarksKeepAll
|
||||
} else {
|
||||
i, err := strconv.ParseInt(i.KeepBookmarks, 10, 32)
|
||||
if err != nil || i <= 0 || i > math.MaxInt32 {
|
||||
return nil, errors.Errorf("keep_bookmarks must be positive integer or 'all'")
|
||||
}
|
||||
keepBookmarks = int(i)
|
||||
}
|
||||
return &GridPrunePolicy{
|
||||
util.NewRetentionGrid(intervals),
|
||||
keepBookmarks,
|
||||
}, nil
|
||||
}
|
||||
|
||||
var retentionStringIntervalRegex *regexp.Regexp = regexp.MustCompile(`^\s*(\d+)\s*x\s*([^\(]+)\s*(\((.*)\))?\s*$`)
|
||||
|
||||
func parseRetentionGridIntervalString(e string) (intervals []util.RetentionInterval, err error) {
|
||||
|
||||
comps := retentionStringIntervalRegex.FindStringSubmatch(e)
|
||||
if comps == nil {
|
||||
err = fmt.Errorf("retention string does not match expected format")
|
||||
return
|
||||
}
|
||||
|
||||
times, err := strconv.Atoi(comps[1])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
} else if times <= 0 {
|
||||
return nil, fmt.Errorf("contains factor <= 0")
|
||||
}
|
||||
|
||||
duration, err := parsePostitiveDuration(comps[2])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
keepCount := 1
|
||||
if comps[3] != "" {
|
||||
// Decompose key=value, comma separated
|
||||
// For now, only keep_count is supported
|
||||
re := regexp.MustCompile(`^\s*keep=(.+)\s*$`)
|
||||
res := re.FindStringSubmatch(comps[4])
|
||||
if res == nil || len(res) != 2 {
|
||||
err = fmt.Errorf("interval parameter contains unknown parameters")
|
||||
return
|
||||
}
|
||||
if res[1] == "all" {
|
||||
keepCount = util.RetentionGridKeepCountAll
|
||||
} else {
|
||||
keepCount, err = strconv.Atoi(res[1])
|
||||
if err != nil {
|
||||
err = fmt.Errorf("cannot parse keep_count value")
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
intervals = make([]util.RetentionInterval, times)
|
||||
for i := range intervals {
|
||||
intervals[i] = util.RetentionInterval{
|
||||
Length: duration,
|
||||
KeepCount: keepCount,
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
|
||||
}
|
||||
|
||||
func parseRetentionGridIntervalsString(s string) (intervals []util.RetentionInterval, err error) {
|
||||
|
||||
ges := strings.Split(s, "|")
|
||||
intervals = make([]util.RetentionInterval, 0, 7*len(ges))
|
||||
|
||||
for intervalIdx, e := range ges {
|
||||
parsed, err := parseRetentionGridIntervalString(e)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse interval %d of %d: %s: %s", intervalIdx+1, len(ges), err, strings.TrimSpace(e))
|
||||
}
|
||||
intervals = append(intervals, parsed...)
|
||||
}
|
||||
|
||||
return
|
||||
}
|
@ -1,11 +0,0 @@
|
||||
package cmd
|
||||
|
||||
import "github.com/zrepl/zrepl/zfs"
|
||||
|
||||
type NoPrunePolicy struct{}
|
||||
|
||||
func (p NoPrunePolicy) Prune(fs *zfs.DatasetPath, versions []zfs.FilesystemVersion) (keep, remove []zfs.FilesystemVersion, err error) {
|
||||
keep = versions
|
||||
remove = []zfs.FilesystemVersion{}
|
||||
return
|
||||
}
|
@ -1,56 +0,0 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"github.com/mitchellh/mapstructure"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/problame/go-netssh"
|
||||
"io"
|
||||
"path"
|
||||
)
|
||||
|
||||
type StdinserverListenerFactory struct {
|
||||
ClientIdentity string `mapstructure:"client_identity"`
|
||||
sockpath string
|
||||
}
|
||||
|
||||
func parseStdinserverListenerFactory(c JobParsingContext, i map[string]interface{}) (f *StdinserverListenerFactory, err error) {
|
||||
|
||||
f = &StdinserverListenerFactory{}
|
||||
|
||||
if err = mapstructure.Decode(i, f); err != nil {
|
||||
return nil, errors.Wrap(err, "mapstructure error")
|
||||
}
|
||||
if !(len(f.ClientIdentity) > 0) {
|
||||
err = errors.Errorf("must specify 'client_identity'")
|
||||
return
|
||||
}
|
||||
|
||||
f.sockpath = path.Join(c.Global.Serve.Stdinserver.SockDir, f.ClientIdentity)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (f *StdinserverListenerFactory) Listen() (al AuthenticatedChannelListener, err error) {
|
||||
|
||||
if err = PreparePrivateSockpath(f.sockpath); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
l, err := netssh.Listen(f.sockpath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return StdinserverListener{l}, nil
|
||||
}
|
||||
|
||||
type StdinserverListener struct {
|
||||
l *netssh.Listener
|
||||
}
|
||||
|
||||
func (l StdinserverListener) Accept() (ch io.ReadWriteCloser, err error) {
|
||||
return l.l.Accept()
|
||||
}
|
||||
|
||||
func (l StdinserverListener) Close() (err error) {
|
||||
return l.l.Close()
|
||||
}
|
@ -1,222 +0,0 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/kr/pretty"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/zrepl/zrepl/util"
|
||||
"github.com/zrepl/zrepl/zfs"
|
||||
)
|
||||
|
||||
func TestSampleConfigsAreParsedWithoutErrors(t *testing.T) {
|
||||
|
||||
paths := []string{
|
||||
"./sampleconf/localbackup/host1.yml",
|
||||
"./sampleconf/pullbackup/backuphost.yml",
|
||||
"./sampleconf/pullbackup/productionhost.yml",
|
||||
"./sampleconf/random/debugging.yml",
|
||||
"./sampleconf/random/logging_and_monitoring.yml",
|
||||
}
|
||||
|
||||
for _, p := range paths {
|
||||
|
||||
c, err := ParseConfig(p)
|
||||
if err != nil {
|
||||
t.Errorf("error parsing %s:\n%+v", p, err)
|
||||
}
|
||||
|
||||
t.Logf("file: %s", p)
|
||||
t.Log(pretty.Sprint(c))
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func TestParseRetentionGridStringParsing(t *testing.T) {
|
||||
|
||||
intervals, err := parseRetentionGridIntervalsString("2x10m(keep=2) | 1x1h | 3x1w")
|
||||
|
||||
assert.Nil(t, err)
|
||||
assert.Len(t, intervals, 6)
|
||||
proto := util.RetentionInterval{
|
||||
KeepCount: 2,
|
||||
Length: 10 * time.Minute,
|
||||
}
|
||||
assert.EqualValues(t, proto, intervals[0])
|
||||
assert.EqualValues(t, proto, intervals[1])
|
||||
|
||||
proto.KeepCount = 1
|
||||
proto.Length = 1 * time.Hour
|
||||
assert.EqualValues(t, proto, intervals[2])
|
||||
|
||||
proto.Length = 7 * 24 * time.Hour
|
||||
assert.EqualValues(t, proto, intervals[3])
|
||||
assert.EqualValues(t, proto, intervals[4])
|
||||
assert.EqualValues(t, proto, intervals[5])
|
||||
|
||||
intervals, err = parseRetentionGridIntervalsString("|")
|
||||
assert.Error(t, err)
|
||||
intervals, err = parseRetentionGridIntervalsString("2x10m")
|
||||
assert.NoError(t, err)
|
||||
|
||||
intervals, err = parseRetentionGridIntervalsString("1x10m(keep=all)")
|
||||
assert.NoError(t, err)
|
||||
assert.Len(t, intervals, 1)
|
||||
assert.EqualValues(t, util.RetentionGridKeepCountAll, intervals[0].KeepCount)
|
||||
|
||||
}
|
||||
|
||||
func TestDatasetMapFilter(t *testing.T) {
|
||||
|
||||
expectMapping := func(m map[string]string, from, to string) {
|
||||
dmf, err := parseDatasetMapFilter(m, false)
|
||||
if err != nil {
|
||||
t.Logf("expect test map to be valid: %s", err)
|
||||
t.FailNow()
|
||||
}
|
||||
fromPath, err := zfs.NewDatasetPath(from)
|
||||
if err != nil {
|
||||
t.Logf("expect test from path to be valid: %s", err)
|
||||
t.FailNow()
|
||||
}
|
||||
|
||||
res, err := dmf.Map(fromPath)
|
||||
if to == "" {
|
||||
assert.Nil(t, res)
|
||||
assert.Nil(t, err)
|
||||
t.Logf("%s => NOT MAPPED", fromPath.ToString())
|
||||
return
|
||||
}
|
||||
|
||||
assert.Nil(t, err)
|
||||
toPath, err := zfs.NewDatasetPath(to)
|
||||
if err != nil {
|
||||
t.Logf("expect test to path to be valid: %s", err)
|
||||
t.FailNow()
|
||||
}
|
||||
assert.True(t, res.Equal(toPath))
|
||||
}
|
||||
|
||||
expectFilter := func(m map[string]string, path string, pass bool) {
|
||||
dmf, err := parseDatasetMapFilter(m, true)
|
||||
if err != nil {
|
||||
t.Logf("expect test filter to be valid: %s", err)
|
||||
t.FailNow()
|
||||
}
|
||||
p, err := zfs.NewDatasetPath(path)
|
||||
if err != nil {
|
||||
t.Logf("expect test path to be valid: %s", err)
|
||||
t.FailNow()
|
||||
}
|
||||
res, err := dmf.Filter(p)
|
||||
assert.Nil(t, err)
|
||||
assert.Equal(t, pass, res)
|
||||
}
|
||||
|
||||
map1 := map[string]string{
|
||||
"a/b/c<": "root1",
|
||||
"a/b<": "root2",
|
||||
"<": "root3/b/c",
|
||||
"b": "!",
|
||||
"a/b/c/d/e<": "!",
|
||||
"q<": "root4/1/2",
|
||||
}
|
||||
|
||||
expectMapping(map1, "a/b/c", "root1")
|
||||
expectMapping(map1, "a/b/c/d", "root1/d")
|
||||
expectMapping(map1, "a/b/c/d/e", "")
|
||||
expectMapping(map1, "a/b/e", "root2/e")
|
||||
expectMapping(map1, "a/b", "root2")
|
||||
expectMapping(map1, "x", "root3/b/c/x")
|
||||
expectMapping(map1, "x/y", "root3/b/c/x/y")
|
||||
expectMapping(map1, "q", "root4/1/2")
|
||||
expectMapping(map1, "b", "")
|
||||
expectMapping(map1, "q/r", "root4/1/2/r")
|
||||
|
||||
filter1 := map[string]string{
|
||||
"<": "!",
|
||||
"a<": "ok",
|
||||
"a/b<": "!",
|
||||
}
|
||||
|
||||
expectFilter(filter1, "b", false)
|
||||
expectFilter(filter1, "a", true)
|
||||
expectFilter(filter1, "a/d", true)
|
||||
expectFilter(filter1, "a/b", false)
|
||||
expectFilter(filter1, "a/b/c", false)
|
||||
|
||||
filter2 := map[string]string{}
|
||||
expectFilter(filter2, "foo", false) // default to omit
|
||||
|
||||
}
|
||||
|
||||
func TestDatasetMapFilter_AsFilter(t *testing.T) {
|
||||
|
||||
mapspec := map[string]string{
|
||||
"a/b/c<": "root1",
|
||||
"a/b<": "root2",
|
||||
"<": "root3/b/c",
|
||||
"b": "!",
|
||||
"a/b/c/d/e<": "!",
|
||||
"q<": "root4/1/2",
|
||||
}
|
||||
|
||||
m, err := parseDatasetMapFilter(mapspec, false)
|
||||
assert.Nil(t, err)
|
||||
|
||||
f := m.AsFilter()
|
||||
|
||||
t.Logf("Mapping:\n%s\nFilter:\n%s", pretty.Sprint(m), pretty.Sprint(f))
|
||||
|
||||
tf := func(f zfs.DatasetFilter, path string, pass bool) {
|
||||
p, err := zfs.NewDatasetPath(path)
|
||||
assert.Nil(t, err)
|
||||
r, err := f.Filter(p)
|
||||
assert.Nil(t, err)
|
||||
assert.Equal(t, pass, r)
|
||||
}
|
||||
|
||||
tf(f, "a/b/c", true)
|
||||
tf(f, "a/b", true)
|
||||
tf(f, "b", false)
|
||||
tf(f, "a/b/c/d/e", false)
|
||||
tf(f, "a/b/c/d/e/f", false)
|
||||
tf(f, "a", true)
|
||||
|
||||
}
|
||||
|
||||
func TestDatasetMapFilter_InvertedFilter(t *testing.T) {
|
||||
mapspec := map[string]string{
|
||||
"a/b": "1/2",
|
||||
"a/b/c<": "3",
|
||||
"a/b/c/d<": "1/2/a",
|
||||
"a/b/d": "!",
|
||||
}
|
||||
|
||||
m, err := parseDatasetMapFilter(mapspec, false)
|
||||
assert.Nil(t, err)
|
||||
|
||||
inv, err := m.InvertedFilter()
|
||||
assert.Nil(t, err)
|
||||
|
||||
t.Log(pretty.Sprint(inv))
|
||||
|
||||
expectMapping := func(m *DatasetMapFilter, ps string, expRes bool) {
|
||||
p, err := zfs.NewDatasetPath(ps)
|
||||
assert.Nil(t, err)
|
||||
r, err := m.Filter(p)
|
||||
assert.Nil(t, err)
|
||||
assert.Equal(t, expRes, r)
|
||||
}
|
||||
|
||||
expectMapping(inv, "4", false)
|
||||
expectMapping(inv, "3", true)
|
||||
expectMapping(inv, "3/x", true)
|
||||
expectMapping(inv, "1", false)
|
||||
expectMapping(inv, "1/2", true)
|
||||
expectMapping(inv, "1/2/3", false)
|
||||
expectMapping(inv, "1/2/a/b", true)
|
||||
|
||||
}
|
305
cmd/control.go
305
cmd/control.go
@ -1,305 +0,0 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"github.com/dustin/go-humanize"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/spf13/cobra"
|
||||
"github.com/zrepl/zrepl/logger"
|
||||
"io"
|
||||
golog "log"
|
||||
"net"
|
||||
"net/http"
|
||||
"os"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
var controlCmd = &cobra.Command{
|
||||
Use: "control",
|
||||
Short: "control zrepl daemon",
|
||||
}
|
||||
|
||||
var pprofCmd = &cobra.Command{
|
||||
Use: "pprof off | [on TCP_LISTEN_ADDRESS]",
|
||||
Short: "start a http server exposing go-tool-compatible profiling endpoints at TCP_LISTEN_ADDRESS",
|
||||
Run: doControlPProf,
|
||||
PreRunE: func(cmd *cobra.Command, args []string) error {
|
||||
if cmd.Flags().NArg() < 1 {
|
||||
goto enargs
|
||||
}
|
||||
switch cmd.Flags().Arg(0) {
|
||||
case "on":
|
||||
pprofCmdArgs.msg.Run = true
|
||||
if cmd.Flags().NArg() != 2 {
|
||||
return errors.New("must specify TCP_LISTEN_ADDRESS as second positional argument")
|
||||
}
|
||||
pprofCmdArgs.msg.HttpListenAddress = cmd.Flags().Arg(1)
|
||||
case "off":
|
||||
if cmd.Flags().NArg() != 1 {
|
||||
goto enargs
|
||||
}
|
||||
pprofCmdArgs.msg.Run = false
|
||||
}
|
||||
return nil
|
||||
enargs:
|
||||
return errors.New("invalid number of positional arguments")
|
||||
|
||||
},
|
||||
}
|
||||
var pprofCmdArgs struct {
|
||||
msg PprofServerControlMsg
|
||||
}
|
||||
|
||||
var controlVersionCmd = &cobra.Command{
|
||||
Use: "version",
|
||||
Short: "print version of running zrepl daemon",
|
||||
Run: doControLVersionCmd,
|
||||
}
|
||||
|
||||
var controlStatusCmdArgs struct {
|
||||
format string
|
||||
level logger.Level
|
||||
onlyShowJob string
|
||||
}
|
||||
|
||||
var controlStatusCmd = &cobra.Command{
|
||||
Use: "status [JOB_NAME]",
|
||||
Short: "get current status",
|
||||
Run: doControlStatusCmd,
|
||||
}
|
||||
|
||||
func init() {
|
||||
RootCmd.AddCommand(controlCmd)
|
||||
controlCmd.AddCommand(pprofCmd)
|
||||
controlCmd.AddCommand(controlVersionCmd)
|
||||
controlCmd.AddCommand(controlStatusCmd)
|
||||
controlStatusCmd.Flags().StringVar(&controlStatusCmdArgs.format, "format", "human", "output format (human|raw)")
|
||||
controlStatusCmdArgs.level = logger.Warn
|
||||
controlStatusCmd.Flags().Var(&controlStatusCmdArgs.level, "level", "minimum log level to show")
|
||||
}
|
||||
|
||||
func controlHttpClient() (client http.Client, err error) {
|
||||
|
||||
conf, err := ParseConfig(rootArgs.configFile)
|
||||
if err != nil {
|
||||
return http.Client{}, err
|
||||
}
|
||||
|
||||
return http.Client{
|
||||
Transport: &http.Transport{
|
||||
DialContext: func(_ context.Context, _, _ string) (net.Conn, error) {
|
||||
return net.Dial("unix", conf.Global.Control.Sockpath)
|
||||
},
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
|
||||
func doControlPProf(cmd *cobra.Command, args []string) {
|
||||
|
||||
log := golog.New(os.Stderr, "", 0)
|
||||
|
||||
die := func() {
|
||||
log.Printf("exiting after error")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
log.Printf("connecting to zrepl daemon")
|
||||
httpc, err := controlHttpClient()
|
||||
if err != nil {
|
||||
log.Printf("error parsing config: %s", err)
|
||||
die()
|
||||
}
|
||||
|
||||
var buf bytes.Buffer
|
||||
if err := json.NewEncoder(&buf).Encode(&pprofCmdArgs.msg); err != nil {
|
||||
log.Printf("error marshaling request: %s", err)
|
||||
die()
|
||||
}
|
||||
_, err = httpc.Post("http://unix"+ControlJobEndpointPProf, "application/json", &buf)
|
||||
if err != nil {
|
||||
log.Printf("error: %s", err)
|
||||
die()
|
||||
}
|
||||
|
||||
log.Printf("finished")
|
||||
}
|
||||
|
||||
func doControLVersionCmd(cmd *cobra.Command, args []string) {
|
||||
|
||||
log := golog.New(os.Stderr, "", 0)
|
||||
|
||||
die := func() {
|
||||
log.Printf("exiting after error")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
httpc, err := controlHttpClient()
|
||||
if err != nil {
|
||||
log.Printf("could not connect to daemon: %s", err)
|
||||
die()
|
||||
}
|
||||
|
||||
resp, err := httpc.Get("http://unix" + ControlJobEndpointVersion)
|
||||
if err != nil {
|
||||
log.Printf("error: %s", err)
|
||||
die()
|
||||
} else if resp.StatusCode != http.StatusOK {
|
||||
var msg bytes.Buffer
|
||||
io.CopyN(&msg, resp.Body, 4096)
|
||||
log.Printf("error: %s", msg.String())
|
||||
die()
|
||||
}
|
||||
|
||||
var info ZreplVersionInformation
|
||||
err = json.NewDecoder(resp.Body).Decode(&info)
|
||||
if err != nil {
|
||||
log.Printf("error unmarshaling response: %s", err)
|
||||
die()
|
||||
}
|
||||
|
||||
fmt.Println(info.String())
|
||||
|
||||
}
|
||||
|
||||
func doControlStatusCmd(cmd *cobra.Command, args []string) {
|
||||
|
||||
log := golog.New(os.Stderr, "", 0)
|
||||
|
||||
die := func() {
|
||||
log.Print("exiting after error")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
if len(args) == 1 {
|
||||
controlStatusCmdArgs.onlyShowJob = args[0]
|
||||
} else if len(args) > 1 {
|
||||
log.Print("can only specify one job as positional argument")
|
||||
cmd.Usage()
|
||||
die()
|
||||
}
|
||||
|
||||
httpc, err := controlHttpClient()
|
||||
if err != nil {
|
||||
log.Printf("could not connect to daemon: %s", err)
|
||||
die()
|
||||
}
|
||||
|
||||
resp, err := httpc.Get("http://unix" + ControlJobEndpointStatus)
|
||||
if err != nil {
|
||||
log.Printf("error: %s", err)
|
||||
die()
|
||||
} else if resp.StatusCode != http.StatusOK {
|
||||
var msg bytes.Buffer
|
||||
io.CopyN(&msg, resp.Body, 4096)
|
||||
log.Printf("error: %s", msg.String())
|
||||
die()
|
||||
}
|
||||
|
||||
var status DaemonStatus
|
||||
err = json.NewDecoder(resp.Body).Decode(&status)
|
||||
if err != nil {
|
||||
log.Printf("error unmarshaling response: %s", err)
|
||||
die()
|
||||
}
|
||||
|
||||
switch controlStatusCmdArgs.format {
|
||||
case "raw":
|
||||
enc := json.NewEncoder(os.Stdout)
|
||||
enc.SetIndent("", " ")
|
||||
if err := enc.Encode(status); err != nil {
|
||||
log.Panic(err)
|
||||
}
|
||||
case "human":
|
||||
|
||||
formatter := HumanFormatter{}
|
||||
formatter.SetMetadataFlags(MetadataAll)
|
||||
formatter.SetIgnoreFields([]string{
|
||||
logJobField,
|
||||
})
|
||||
jobNames := make([]string, 0, len(status.Jobs))
|
||||
for name, _ := range status.Jobs {
|
||||
jobNames = append(jobNames, name)
|
||||
}
|
||||
sort.Slice(jobNames, func(i, j int) bool {
|
||||
return strings.Compare(jobNames[i], jobNames[j]) == -1
|
||||
})
|
||||
now := time.Now()
|
||||
for _, name := range jobNames {
|
||||
|
||||
if controlStatusCmdArgs.onlyShowJob != "" && name != controlStatusCmdArgs.onlyShowJob {
|
||||
continue
|
||||
}
|
||||
|
||||
job := status.Jobs[name]
|
||||
jobLogEntries := make([]logger.Entry, 0)
|
||||
informAboutError := false
|
||||
|
||||
fmt.Printf("Job '%s':\n", name)
|
||||
for _, task := range job.Tasks {
|
||||
|
||||
var header bytes.Buffer
|
||||
fmt.Fprintf(&header, " Task '%s': ", task.Name)
|
||||
if !task.Idle {
|
||||
fmt.Fprint(&header, strings.Join(task.ActivityStack, "."))
|
||||
} else {
|
||||
fmt.Fprint(&header, "<idle>")
|
||||
}
|
||||
fmt.Fprint(&header, " ")
|
||||
const TASK_STALLED_HOLDOFF_DURATION = 10 * time.Second
|
||||
sinceLastUpdate := now.Sub(task.LastUpdate)
|
||||
if !task.Idle || task.ProgressRx != 0 || task.ProgressTx != 0 {
|
||||
fmt.Fprintf(&header, "(%s / %s , Rx/Tx",
|
||||
humanize.Bytes(uint64(task.ProgressRx)),
|
||||
humanize.Bytes(uint64(task.ProgressTx)))
|
||||
if task.Idle {
|
||||
fmt.Fprint(&header, ", values from last run")
|
||||
}
|
||||
fmt.Fprint(&header, ")")
|
||||
}
|
||||
fmt.Fprint(&header, "\n")
|
||||
if !task.Idle && !task.LastUpdate.IsZero() && sinceLastUpdate >= TASK_STALLED_HOLDOFF_DURATION {
|
||||
informAboutError = true
|
||||
fmt.Fprintf(&header, " WARNING: last update %s ago at %s)",
|
||||
sinceLastUpdate.String(),
|
||||
task.LastUpdate.Format(HumanFormatterDateFormat))
|
||||
fmt.Fprint(&header, "\n")
|
||||
}
|
||||
io.Copy(os.Stdout, &header)
|
||||
|
||||
jobLogEntries = append(jobLogEntries, task.LogEntries...)
|
||||
informAboutError = informAboutError || task.MaxLogLevel >= logger.Warn
|
||||
}
|
||||
|
||||
sort.Slice(jobLogEntries, func(i, j int) bool {
|
||||
return jobLogEntries[i].Time.Before(jobLogEntries[j].Time)
|
||||
})
|
||||
if informAboutError {
|
||||
fmt.Println(" WARNING: Some tasks encountered problems since the last time they left idle state:")
|
||||
fmt.Println(" check the logs below or your log file for more information.")
|
||||
fmt.Println(" Use the --level flag if you need debug information.")
|
||||
fmt.Println()
|
||||
}
|
||||
for _, e := range jobLogEntries {
|
||||
if e.Level < controlStatusCmdArgs.level {
|
||||
continue
|
||||
}
|
||||
formatted, err := formatter.Format(&e)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
fmt.Printf(" %s\n", string(formatted))
|
||||
}
|
||||
fmt.Println()
|
||||
|
||||
}
|
||||
default:
|
||||
log.Printf("invalid output format '%s'", controlStatusCmdArgs.format)
|
||||
die()
|
||||
}
|
||||
|
||||
}
|
461
cmd/daemon.go
461
cmd/daemon.go
@ -1,461 +0,0 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"container/list"
|
||||
"context"
|
||||
"fmt"
|
||||
"github.com/spf13/cobra"
|
||||
"github.com/zrepl/zrepl/logger"
|
||||
"io"
|
||||
"os"
|
||||
"os/signal"
|
||||
"strings"
|
||||
"sync"
|
||||
"syscall"
|
||||
"time"
|
||||
)
|
||||
|
||||
// daemonCmd represents the daemon command
|
||||
var daemonCmd = &cobra.Command{
|
||||
Use: "daemon",
|
||||
Short: "start daemon",
|
||||
Run: doDaemon,
|
||||
}
|
||||
|
||||
func init() {
|
||||
RootCmd.AddCommand(daemonCmd)
|
||||
}
|
||||
|
||||
type Job interface {
|
||||
JobName() string
|
||||
JobType() JobType
|
||||
JobStart(ctxt context.Context)
|
||||
JobStatus(ctxt context.Context) (*JobStatus, error)
|
||||
}
|
||||
|
||||
type JobType string
|
||||
|
||||
const (
|
||||
JobTypePull JobType = "pull"
|
||||
JobTypeSource JobType = "source"
|
||||
JobTypeLocal JobType = "local"
|
||||
JobTypePrometheus JobType = "prometheus"
|
||||
JobTypeControl JobType = "control"
|
||||
)
|
||||
|
||||
func ParseUserJobType(s string) (JobType, error) {
|
||||
switch s {
|
||||
case "pull":
|
||||
return JobTypePull, nil
|
||||
case "source":
|
||||
return JobTypeSource, nil
|
||||
case "local":
|
||||
return JobTypeLocal, nil
|
||||
case "prometheus":
|
||||
return JobTypePrometheus, nil
|
||||
}
|
||||
return "", fmt.Errorf("unknown job type '%s'", s)
|
||||
}
|
||||
|
||||
func (j JobType) String() string {
|
||||
return string(j)
|
||||
}
|
||||
|
||||
func doDaemon(cmd *cobra.Command, args []string) {
|
||||
|
||||
conf, err := ParseConfig(rootArgs.configFile)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "error parsing config: %s\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
log := logger.NewLogger(conf.Global.logging.Outlets, 1*time.Second)
|
||||
|
||||
log.Info(NewZreplVersionInformation().String())
|
||||
log.Debug("starting daemon")
|
||||
ctx := context.WithValue(context.Background(), contextKeyLog, log)
|
||||
ctx = context.WithValue(ctx, contextKeyLog, log)
|
||||
|
||||
d := NewDaemon(conf)
|
||||
d.Loop(ctx)
|
||||
|
||||
}
|
||||
|
||||
type contextKey string
|
||||
|
||||
const (
|
||||
contextKeyLog contextKey = contextKey("log")
|
||||
contextKeyDaemon contextKey = contextKey("daemon")
|
||||
)
|
||||
|
||||
type Daemon struct {
|
||||
conf *Config
|
||||
startedAt time.Time
|
||||
}
|
||||
|
||||
func NewDaemon(initialConf *Config) *Daemon {
|
||||
return &Daemon{conf: initialConf}
|
||||
}
|
||||
|
||||
func (d *Daemon) Loop(ctx context.Context) {
|
||||
|
||||
d.startedAt = time.Now()
|
||||
|
||||
log := ctx.Value(contextKeyLog).(Logger)
|
||||
|
||||
ctx, cancel := context.WithCancel(ctx)
|
||||
ctx = context.WithValue(ctx, contextKeyDaemon, d)
|
||||
|
||||
sigChan := make(chan os.Signal, 1)
|
||||
finishs := make(chan Job)
|
||||
|
||||
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
|
||||
|
||||
log.Info("starting jobs from config")
|
||||
i := 0
|
||||
for _, job := range d.conf.Jobs {
|
||||
logger := log.WithField(logJobField, job.JobName())
|
||||
logger.Info("starting")
|
||||
i++
|
||||
jobCtx := context.WithValue(ctx, contextKeyLog, logger)
|
||||
go func(j Job) {
|
||||
j.JobStart(jobCtx)
|
||||
finishs <- j
|
||||
}(job)
|
||||
}
|
||||
|
||||
finishCount := 0
|
||||
outer:
|
||||
for {
|
||||
select {
|
||||
case <-finishs:
|
||||
finishCount++
|
||||
if finishCount == len(d.conf.Jobs) {
|
||||
log.Info("all jobs finished")
|
||||
break outer
|
||||
}
|
||||
|
||||
case sig := <-sigChan:
|
||||
log.WithField("signal", sig).Info("received signal")
|
||||
log.Info("cancelling all jobs")
|
||||
cancel()
|
||||
}
|
||||
}
|
||||
|
||||
signal.Stop(sigChan)
|
||||
cancel() // make go vet happy
|
||||
|
||||
log.Info("exiting")
|
||||
|
||||
}
|
||||
|
||||
// Representation of a Job's status that is composed of Tasks
|
||||
type JobStatus struct {
|
||||
// Statuses of all tasks of this job
|
||||
Tasks []*TaskStatus
|
||||
// Error != "" if JobStatus() returned an error
|
||||
JobStatusError string
|
||||
}
|
||||
|
||||
// Representation of a Daemon's status that is composed of Jobs
|
||||
type DaemonStatus struct {
|
||||
StartedAt time.Time
|
||||
Jobs map[string]*JobStatus
|
||||
}
|
||||
|
||||
func (d *Daemon) Status() (s *DaemonStatus) {
|
||||
|
||||
s = &DaemonStatus{}
|
||||
s.StartedAt = d.startedAt
|
||||
|
||||
s.Jobs = make(map[string]*JobStatus, len(d.conf.Jobs))
|
||||
|
||||
for name, j := range d.conf.Jobs {
|
||||
status, err := j.JobStatus(context.TODO())
|
||||
if err != nil {
|
||||
s.Jobs[name] = &JobStatus{nil, err.Error()}
|
||||
continue
|
||||
}
|
||||
s.Jobs[name] = status
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// Representation of a Task's status
|
||||
type TaskStatus struct {
|
||||
Name string
|
||||
// Whether the task is idle.
|
||||
Idle bool
|
||||
// The stack of activities the task is currently executing.
|
||||
// The first element is the root activity and equal to Name.
|
||||
ActivityStack []string
|
||||
// Number of bytes received by the task since it last left idle state.
|
||||
ProgressRx int64
|
||||
// Number of bytes sent by the task since it last left idle state.
|
||||
ProgressTx int64
|
||||
// Log entries emitted by the task since it last left idle state.
|
||||
// Only contains the log entries emitted through the task's logger
|
||||
// (provided by Task.Log()).
|
||||
LogEntries []logger.Entry
|
||||
// The maximum log level of LogEntries.
|
||||
// Only valid if len(LogEntries) > 0.
|
||||
MaxLogLevel logger.Level
|
||||
// Last time something about the Task changed
|
||||
LastUpdate time.Time
|
||||
}
|
||||
|
||||
// An instance of Task tracks a single thread of activity that is part of a Job.
|
||||
type Task struct {
|
||||
name string // immutable
|
||||
parent Job // immutable
|
||||
|
||||
// Stack of activities the task is currently in
|
||||
// Members are instances of taskActivity
|
||||
activities *list.List
|
||||
// Last time activities was changed (not the activities inside, the list)
|
||||
activitiesLastUpdate time.Time
|
||||
// Protects Task members from modification
|
||||
rwl sync.RWMutex
|
||||
}
|
||||
|
||||
// Structure that describes the progress a Task has made
|
||||
type taskProgress struct {
|
||||
rx int64
|
||||
tx int64
|
||||
creation time.Time
|
||||
lastUpdate time.Time
|
||||
logEntries []logger.Entry
|
||||
mtx sync.RWMutex
|
||||
}
|
||||
|
||||
func newTaskProgress() (p *taskProgress) {
|
||||
return &taskProgress{
|
||||
creation: time.Now(),
|
||||
logEntries: make([]logger.Entry, 0),
|
||||
}
|
||||
}
|
||||
|
||||
func (p *taskProgress) UpdateIO(drx, dtx int64) {
|
||||
p.mtx.Lock()
|
||||
defer p.mtx.Unlock()
|
||||
p.rx += drx
|
||||
p.tx += dtx
|
||||
p.lastUpdate = time.Now()
|
||||
}
|
||||
|
||||
func (p *taskProgress) UpdateLogEntry(entry logger.Entry) {
|
||||
p.mtx.Lock()
|
||||
defer p.mtx.Unlock()
|
||||
// FIXME: ensure maximum size (issue #48)
|
||||
p.logEntries = append(p.logEntries, entry)
|
||||
p.lastUpdate = time.Now()
|
||||
}
|
||||
|
||||
func (p *taskProgress) DeepCopy() (out taskProgress) {
|
||||
p.mtx.RLock()
|
||||
defer p.mtx.RUnlock()
|
||||
out.rx, out.tx = p.rx, p.tx
|
||||
out.creation = p.creation
|
||||
out.lastUpdate = p.lastUpdate
|
||||
out.logEntries = make([]logger.Entry, len(p.logEntries))
|
||||
for i := range p.logEntries {
|
||||
out.logEntries[i] = p.logEntries[i]
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// returns a copy of this taskProgress, the mutex carries no semantic value
|
||||
func (p *taskProgress) Read() (out taskProgress) {
|
||||
p.mtx.RLock()
|
||||
defer p.mtx.RUnlock()
|
||||
return p.DeepCopy()
|
||||
}
|
||||
|
||||
// Element of a Task's activity stack
|
||||
type taskActivity struct {
|
||||
name string
|
||||
idle bool
|
||||
logger *logger.Logger
|
||||
// The progress of the task that is updated by UpdateIO() and UpdateLogEntry()
|
||||
//
|
||||
// Progress happens on a task-level and is thus global to the task.
|
||||
// That's why progress is just a pointer to the current taskProgress:
|
||||
// we reset progress when leaving the idle root activity
|
||||
progress *taskProgress
|
||||
}
|
||||
|
||||
func NewTask(name string, parent Job, lg *logger.Logger) *Task {
|
||||
t := &Task{
|
||||
name: name,
|
||||
parent: parent,
|
||||
activities: list.New(),
|
||||
}
|
||||
rootLogger := lg.ReplaceField(logTaskField, name).
|
||||
WithOutlet(t, logger.Debug)
|
||||
rootAct := &taskActivity{name, true, rootLogger, newTaskProgress()}
|
||||
t.activities.PushFront(rootAct)
|
||||
return t
|
||||
}
|
||||
|
||||
// callers must hold t.rwl
|
||||
func (t *Task) cur() *taskActivity {
|
||||
return t.activities.Front().Value.(*taskActivity)
|
||||
}
|
||||
|
||||
// buildActivityStack returns the stack of activity names
|
||||
// t.rwl must be held, but the slice can be returned since strings are immutable
|
||||
func (t *Task) buildActivityStack() []string {
|
||||
comps := make([]string, 0, t.activities.Len())
|
||||
for e := t.activities.Back(); e != nil; e = e.Prev() {
|
||||
act := e.Value.(*taskActivity)
|
||||
comps = append(comps, act.name)
|
||||
}
|
||||
return comps
|
||||
}
|
||||
|
||||
// Start a sub-activity.
|
||||
// Must always be matched with a call to t.Finish()
|
||||
// --- consider using defer for this purpose.
|
||||
func (t *Task) Enter(activity string) {
|
||||
t.rwl.Lock()
|
||||
defer t.rwl.Unlock()
|
||||
|
||||
prev := t.cur()
|
||||
if prev.idle {
|
||||
// reset progress when leaving idle task
|
||||
// we leave the old progress dangling to have the user not worry about
|
||||
prev.progress = newTaskProgress()
|
||||
|
||||
prom.taskLastActiveStart.WithLabelValues(
|
||||
t.parent.JobName(),
|
||||
t.parent.JobType().String(),
|
||||
t.name).
|
||||
Set(float64(prev.progress.creation.UnixNano()) / 1e9)
|
||||
|
||||
}
|
||||
act := &taskActivity{activity, false, nil, prev.progress}
|
||||
t.activities.PushFront(act)
|
||||
stack := t.buildActivityStack()
|
||||
activityField := strings.Join(stack, ".")
|
||||
act.logger = prev.logger.ReplaceField(logTaskField, activityField)
|
||||
|
||||
t.activitiesLastUpdate = time.Now()
|
||||
}
|
||||
|
||||
func (t *Task) UpdateProgress(dtx, drx int64) {
|
||||
t.rwl.RLock()
|
||||
p := t.cur().progress // protected by own rwlock
|
||||
t.rwl.RUnlock()
|
||||
p.UpdateIO(dtx, drx)
|
||||
}
|
||||
|
||||
// Returns a wrapper io.Reader that updates this task's _current_ progress value.
|
||||
// Progress updates after this task resets its progress value are discarded.
|
||||
func (t *Task) ProgressUpdater(r io.Reader) *IOProgressUpdater {
|
||||
t.rwl.RLock()
|
||||
defer t.rwl.RUnlock()
|
||||
return &IOProgressUpdater{r, t.cur().progress}
|
||||
}
|
||||
|
||||
func (t *Task) Status() *TaskStatus {
|
||||
t.rwl.RLock()
|
||||
defer t.rwl.RUnlock()
|
||||
// NOTE
|
||||
// do not return any state in TaskStatus that is protected by t.rwl
|
||||
|
||||
cur := t.cur()
|
||||
stack := t.buildActivityStack()
|
||||
prog := cur.progress.Read()
|
||||
|
||||
var maxLevel logger.Level
|
||||
for _, entry := range prog.logEntries {
|
||||
if maxLevel < entry.Level {
|
||||
maxLevel = entry.Level
|
||||
}
|
||||
}
|
||||
|
||||
lastUpdate := prog.lastUpdate
|
||||
if lastUpdate.Before(t.activitiesLastUpdate) {
|
||||
lastUpdate = t.activitiesLastUpdate
|
||||
}
|
||||
|
||||
s := &TaskStatus{
|
||||
Name: stack[0],
|
||||
ActivityStack: stack,
|
||||
Idle: cur.idle,
|
||||
ProgressRx: prog.rx,
|
||||
ProgressTx: prog.tx,
|
||||
LogEntries: prog.logEntries,
|
||||
MaxLogLevel: maxLevel,
|
||||
LastUpdate: lastUpdate,
|
||||
}
|
||||
|
||||
return s
|
||||
}
|
||||
|
||||
// Finish a sub-activity.
|
||||
// Corresponds to a preceding call to t.Enter()
|
||||
func (t *Task) Finish() {
|
||||
t.rwl.Lock()
|
||||
defer t.rwl.Unlock()
|
||||
top := t.activities.Front()
|
||||
if top.Next() == nil {
|
||||
return // cannot remove root activity
|
||||
}
|
||||
t.activities.Remove(top)
|
||||
t.activitiesLastUpdate = time.Now()
|
||||
|
||||
// prometheus
|
||||
front := t.activities.Front()
|
||||
if front != nil && front == t.activities.Back() {
|
||||
idleAct := front.Value.(*taskActivity)
|
||||
if !idleAct.idle {
|
||||
panic("inconsistent implementation")
|
||||
}
|
||||
progress := idleAct.progress.Read()
|
||||
non_idle_time := t.activitiesLastUpdate.Sub(progress.creation) // use same time
|
||||
prom.taskLastActiveDuration.WithLabelValues(
|
||||
t.parent.JobName(),
|
||||
t.parent.JobType().String(),
|
||||
t.name).Set(non_idle_time.Seconds())
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Returns a logger derived from the logger passed to the constructor function.
|
||||
// The logger's task field contains the current activity stack joined by '.'.
|
||||
func (t *Task) Log() *logger.Logger {
|
||||
t.rwl.RLock()
|
||||
defer t.rwl.RUnlock()
|
||||
// FIXME should influence TaskStatus's LastUpdate field
|
||||
return t.cur().logger
|
||||
}
|
||||
|
||||
// implement logger.Outlet interface
|
||||
func (t *Task) WriteEntry(entry logger.Entry) error {
|
||||
t.rwl.RLock()
|
||||
defer t.rwl.RUnlock()
|
||||
t.cur().progress.UpdateLogEntry(entry)
|
||||
|
||||
prom.taskLogEntries.WithLabelValues(
|
||||
t.parent.JobName(),
|
||||
t.parent.JobType().String(),
|
||||
t.name,
|
||||
entry.Level.String()).
|
||||
Inc()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
type IOProgressUpdater struct {
|
||||
r io.Reader
|
||||
p *taskProgress
|
||||
}
|
||||
|
||||
func (u *IOProgressUpdater) Read(p []byte) (n int, err error) {
|
||||
n, err = u.r.Read(p)
|
||||
u.p.UpdateIO(int64(n), 0)
|
||||
return
|
||||
|
||||
}
|
181
cmd/handler.go
181
cmd/handler.go
@ -1,181 +0,0 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
"github.com/zrepl/zrepl/rpc"
|
||||
"github.com/zrepl/zrepl/zfs"
|
||||
)
|
||||
|
||||
type DatasetMapping interface {
|
||||
Map(source *zfs.DatasetPath) (target *zfs.DatasetPath, err error)
|
||||
}
|
||||
|
||||
type FilesystemRequest struct {
|
||||
Roots []string // may be nil, indicating interest in all filesystems
|
||||
}
|
||||
|
||||
type FilesystemVersionsRequest struct {
|
||||
Filesystem *zfs.DatasetPath
|
||||
}
|
||||
|
||||
type InitialTransferRequest struct {
|
||||
Filesystem *zfs.DatasetPath
|
||||
FilesystemVersion zfs.FilesystemVersion
|
||||
}
|
||||
|
||||
type IncrementalTransferRequest struct {
|
||||
Filesystem *zfs.DatasetPath
|
||||
From zfs.FilesystemVersion
|
||||
To zfs.FilesystemVersion
|
||||
}
|
||||
|
||||
type Handler struct {
|
||||
logger Logger
|
||||
dsf zfs.DatasetFilter
|
||||
fsvf zfs.FilesystemVersionFilter
|
||||
}
|
||||
|
||||
func NewHandler(logger Logger, dsfilter zfs.DatasetFilter, snapfilter zfs.FilesystemVersionFilter) (h Handler) {
|
||||
return Handler{logger, dsfilter, snapfilter}
|
||||
}
|
||||
|
||||
func registerEndpoints(server rpc.RPCServer, handler Handler) (err error) {
|
||||
err = server.RegisterEndpoint("FilesystemRequest", handler.HandleFilesystemRequest)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
err = server.RegisterEndpoint("FilesystemVersionsRequest", handler.HandleFilesystemVersionsRequest)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
err = server.RegisterEndpoint("InitialTransferRequest", handler.HandleInitialTransferRequest)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
err = server.RegisterEndpoint("IncrementalTransferRequest", handler.HandleIncrementalTransferRequest)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (h Handler) HandleFilesystemRequest(r *FilesystemRequest, roots *[]*zfs.DatasetPath) (err error) {
|
||||
|
||||
log := h.logger.WithField("endpoint", "FilesystemRequest")
|
||||
|
||||
log.WithField("request", r).Debug("request")
|
||||
log.WithField("dataset_filter", h.dsf).Debug("dsf")
|
||||
|
||||
allowed, err := zfs.ZFSListMapping(h.dsf)
|
||||
if err != nil {
|
||||
log.WithError(err).Error("error listing filesystems")
|
||||
return
|
||||
}
|
||||
|
||||
log.WithField("response", allowed).Debug("response")
|
||||
*roots = allowed
|
||||
return
|
||||
}
|
||||
|
||||
func (h Handler) HandleFilesystemVersionsRequest(r *FilesystemVersionsRequest, versions *[]zfs.FilesystemVersion) (err error) {
|
||||
|
||||
log := h.logger.WithField("endpoint", "FilesystemVersionsRequest")
|
||||
|
||||
log.WithField("request", r).Debug("request")
|
||||
|
||||
// allowed to request that?
|
||||
if h.pullACLCheck(r.Filesystem, nil); err != nil {
|
||||
log.WithError(err).Warn("pull ACL check failed")
|
||||
return
|
||||
}
|
||||
|
||||
// find our versions
|
||||
vs, err := zfs.ZFSListFilesystemVersions(r.Filesystem, h.fsvf)
|
||||
if err != nil {
|
||||
log.WithError(err).Error("cannot list filesystem versions")
|
||||
return
|
||||
}
|
||||
|
||||
log.WithField("response", vs).Debug("response")
|
||||
|
||||
*versions = vs
|
||||
return
|
||||
|
||||
}
|
||||
|
||||
func (h Handler) HandleInitialTransferRequest(r *InitialTransferRequest, stream *io.Reader) (err error) {
|
||||
|
||||
log := h.logger.WithField("endpoint", "InitialTransferRequest")
|
||||
|
||||
log.WithField("request", r).Debug("request")
|
||||
if err = h.pullACLCheck(r.Filesystem, &r.FilesystemVersion); err != nil {
|
||||
log.WithError(err).Warn("pull ACL check failed")
|
||||
return
|
||||
}
|
||||
|
||||
log.Debug("invoking zfs send")
|
||||
|
||||
s, err := zfs.ZFSSend(r.Filesystem, &r.FilesystemVersion, nil)
|
||||
if err != nil {
|
||||
log.WithError(err).Error("cannot send filesystem")
|
||||
}
|
||||
*stream = s
|
||||
|
||||
return
|
||||
|
||||
}
|
||||
|
||||
func (h Handler) HandleIncrementalTransferRequest(r *IncrementalTransferRequest, stream *io.Reader) (err error) {
|
||||
|
||||
log := h.logger.WithField("endpoint", "IncrementalTransferRequest")
|
||||
log.WithField("request", r).Debug("request")
|
||||
if err = h.pullACLCheck(r.Filesystem, &r.From); err != nil {
|
||||
log.WithError(err).Warn("pull ACL check failed")
|
||||
return
|
||||
}
|
||||
if err = h.pullACLCheck(r.Filesystem, &r.To); err != nil {
|
||||
log.WithError(err).Warn("pull ACL check failed")
|
||||
return
|
||||
}
|
||||
|
||||
log.Debug("invoking zfs send")
|
||||
|
||||
s, err := zfs.ZFSSend(r.Filesystem, &r.From, &r.To)
|
||||
if err != nil {
|
||||
log.WithError(err).Error("cannot send filesystem")
|
||||
}
|
||||
|
||||
*stream = s
|
||||
return
|
||||
|
||||
}
|
||||
|
||||
func (h Handler) pullACLCheck(p *zfs.DatasetPath, v *zfs.FilesystemVersion) (err error) {
|
||||
var fsAllowed, vAllowed bool
|
||||
fsAllowed, err = h.dsf.Filter(p)
|
||||
if err != nil {
|
||||
err = fmt.Errorf("error evaluating ACL: %s", err)
|
||||
return
|
||||
}
|
||||
if !fsAllowed {
|
||||
err = fmt.Errorf("ACL prohibits access to %s", p.ToString())
|
||||
return
|
||||
}
|
||||
if v == nil {
|
||||
return
|
||||
}
|
||||
|
||||
vAllowed, err = h.fsvf.Filter(*v)
|
||||
if err != nil {
|
||||
err = errors.Wrap(err, "error evaluating version filter")
|
||||
return
|
||||
}
|
||||
if !vAllowed {
|
||||
err = fmt.Errorf("ACL prohibits access to %s", v.ToAbsPath(p))
|
||||
return
|
||||
}
|
||||
return
|
||||
}
|
47
cmd/main.go
47
cmd/main.go
@ -1,47 +0,0 @@
|
||||
// zrepl replicates ZFS filesystems & volumes between pools
|
||||
//
|
||||
// Code Organization
|
||||
//
|
||||
// The cmd package uses github.com/spf13/cobra for its CLI.
|
||||
//
|
||||
// It combines the other packages in the zrepl project to implement zrepl functionality.
|
||||
//
|
||||
// Each subcommand's code is in the corresponding *.go file.
|
||||
// All other *.go files contain code shared by the subcommands.
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"github.com/spf13/cobra"
|
||||
"github.com/zrepl/zrepl/logger"
|
||||
)
|
||||
|
||||
//
|
||||
//type Logger interface {
|
||||
// Printf(format string, v ...interface{})
|
||||
//}
|
||||
|
||||
var (
|
||||
zreplVersion string // set by build infrastructure
|
||||
)
|
||||
|
||||
type Logger = *logger.Logger
|
||||
|
||||
var RootCmd = &cobra.Command{
|
||||
Use: "zrepl",
|
||||
Short: "ZFS dataset replication",
|
||||
Long: `Replicate ZFS filesystems & volumes between pools:
|
||||
|
||||
- push & pull mode
|
||||
- automatic snapshot creation & pruning
|
||||
- local / over the network
|
||||
- ACLs instead of blank SSH access`,
|
||||
}
|
||||
|
||||
var rootArgs struct {
|
||||
configFile string
|
||||
}
|
||||
|
||||
func init() {
|
||||
//cobra.OnInitialize(initConfig)
|
||||
RootCmd.PersistentFlags().StringVar(&rootArgs.configFile, "config", "", "config file path")
|
||||
}
|
138
cmd/prune.go
138
cmd/prune.go
@ -1,138 +0,0 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"github.com/zrepl/zrepl/zfs"
|
||||
"time"
|
||||
)
|
||||
|
||||
type Pruner struct {
|
||||
task *Task
|
||||
Now time.Time
|
||||
DryRun bool
|
||||
DatasetFilter zfs.DatasetFilter
|
||||
SnapshotPrefix string
|
||||
PrunePolicy PrunePolicy
|
||||
}
|
||||
|
||||
type PruneResult struct {
|
||||
Filesystem *zfs.DatasetPath
|
||||
All []zfs.FilesystemVersion
|
||||
Keep []zfs.FilesystemVersion
|
||||
Remove []zfs.FilesystemVersion
|
||||
}
|
||||
|
||||
func (p *Pruner) filterFilesystems() (filesystems []*zfs.DatasetPath, stop bool) {
|
||||
p.task.Enter("filter_fs")
|
||||
defer p.task.Finish()
|
||||
filesystems, err := zfs.ZFSListMapping(p.DatasetFilter)
|
||||
if err != nil {
|
||||
p.task.Log().WithError(err).Error("error applying filesystem filter")
|
||||
return nil, true
|
||||
}
|
||||
if len(filesystems) <= 0 {
|
||||
p.task.Log().Info("no filesystems matching filter")
|
||||
return nil, true
|
||||
}
|
||||
return filesystems, false
|
||||
}
|
||||
|
||||
func (p *Pruner) filterVersions(fs *zfs.DatasetPath) (fsversions []zfs.FilesystemVersion, stop bool) {
|
||||
p.task.Enter("filter_versions")
|
||||
defer p.task.Finish()
|
||||
log := p.task.Log().WithField(logFSField, fs.ToString())
|
||||
|
||||
filter := NewPrefixFilter(p.SnapshotPrefix)
|
||||
fsversions, err := zfs.ZFSListFilesystemVersions(fs, filter)
|
||||
if err != nil {
|
||||
log.WithError(err).Error("error listing filesytem versions")
|
||||
return nil, true
|
||||
}
|
||||
if len(fsversions) == 0 {
|
||||
log.WithField("prefix", p.SnapshotPrefix).Info("no filesystem versions matching prefix")
|
||||
return nil, true
|
||||
}
|
||||
return fsversions, false
|
||||
}
|
||||
|
||||
func (p *Pruner) pruneFilesystem(fs *zfs.DatasetPath) (r PruneResult, valid bool) {
|
||||
p.task.Enter("prune_fs")
|
||||
defer p.task.Finish()
|
||||
log := p.task.Log().WithField(logFSField, fs.ToString())
|
||||
|
||||
fsversions, stop := p.filterVersions(fs)
|
||||
if stop {
|
||||
return
|
||||
}
|
||||
|
||||
p.task.Enter("prune_policy")
|
||||
keep, remove, err := p.PrunePolicy.Prune(fs, fsversions)
|
||||
p.task.Finish()
|
||||
if err != nil {
|
||||
log.WithError(err).Error("error evaluating prune policy")
|
||||
return
|
||||
}
|
||||
|
||||
log.WithField("fsversions", fsversions).
|
||||
WithField("keep", keep).
|
||||
WithField("remove", remove).
|
||||
Debug("prune policy debug dump")
|
||||
|
||||
r = PruneResult{fs, fsversions, keep, remove}
|
||||
|
||||
makeFields := func(v zfs.FilesystemVersion) (fields map[string]interface{}) {
|
||||
fields = make(map[string]interface{})
|
||||
fields["version"] = v.ToAbsPath(fs)
|
||||
timeSince := v.Creation.Sub(p.Now)
|
||||
fields["age_ns"] = timeSince
|
||||
const day time.Duration = 24 * time.Hour
|
||||
days := timeSince / day
|
||||
remainder := timeSince % day
|
||||
fields["age_str"] = fmt.Sprintf("%dd%s", days, remainder)
|
||||
return
|
||||
}
|
||||
|
||||
for _, v := range remove {
|
||||
fields := makeFields(v)
|
||||
log.WithFields(fields).Info("destroying version")
|
||||
// echo what we'll do and exec zfs destroy if not dry run
|
||||
// TODO special handling for EBUSY (zfs hold)
|
||||
// TODO error handling for clones? just echo to cli, skip over, and exit with non-zero status code (we're idempotent)
|
||||
if !p.DryRun {
|
||||
p.task.Enter("destroy")
|
||||
err := zfs.ZFSDestroyFilesystemVersion(fs, v)
|
||||
p.task.Finish()
|
||||
if err != nil {
|
||||
log.WithFields(fields).WithError(err).Error("error destroying version")
|
||||
}
|
||||
}
|
||||
}
|
||||
return r, true
|
||||
}
|
||||
|
||||
func (p *Pruner) Run(ctx context.Context) (r []PruneResult, err error) {
|
||||
p.task.Enter("run")
|
||||
defer p.task.Finish()
|
||||
|
||||
if p.DryRun {
|
||||
p.task.Log().Info("doing dry run")
|
||||
}
|
||||
|
||||
filesystems, stop := p.filterFilesystems()
|
||||
if stop {
|
||||
return
|
||||
}
|
||||
|
||||
r = make([]PruneResult, 0, len(filesystems))
|
||||
|
||||
for _, fs := range filesystems {
|
||||
res, ok := p.pruneFilesystem(fs)
|
||||
if ok {
|
||||
r = append(r, res)
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
|
||||
}
|
@ -1,323 +0,0 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"github.com/zrepl/zrepl/rpc"
|
||||
"github.com/zrepl/zrepl/zfs"
|
||||
)
|
||||
|
||||
type localPullACL struct{}
|
||||
|
||||
func (a localPullACL) Filter(p *zfs.DatasetPath) (pass bool, err error) {
|
||||
return true, nil
|
||||
}
|
||||
|
||||
const DEFAULT_INITIAL_REPL_POLICY = InitialReplPolicyMostRecent
|
||||
|
||||
type InitialReplPolicy string
|
||||
|
||||
const (
|
||||
InitialReplPolicyMostRecent InitialReplPolicy = "most_recent"
|
||||
InitialReplPolicyAll InitialReplPolicy = "all"
|
||||
)
|
||||
|
||||
type Puller struct {
|
||||
task *Task
|
||||
Remote rpc.RPCClient
|
||||
Mapping DatasetMapping
|
||||
InitialReplPolicy InitialReplPolicy
|
||||
}
|
||||
|
||||
type remoteLocalMapping struct {
|
||||
Remote *zfs.DatasetPath
|
||||
Local *zfs.DatasetPath
|
||||
}
|
||||
|
||||
func (p *Puller) getRemoteFilesystems() (rfs []*zfs.DatasetPath, ok bool) {
|
||||
p.task.Enter("fetch_remote_fs_list")
|
||||
defer p.task.Finish()
|
||||
|
||||
fsr := FilesystemRequest{}
|
||||
if err := p.Remote.Call("FilesystemRequest", &fsr, &rfs); err != nil {
|
||||
p.task.Log().WithError(err).Error("cannot fetch remote filesystem list")
|
||||
return nil, false
|
||||
}
|
||||
return rfs, true
|
||||
}
|
||||
|
||||
func (p *Puller) buildReplMapping(remoteFilesystems []*zfs.DatasetPath) (replMapping map[string]remoteLocalMapping, ok bool) {
|
||||
p.task.Enter("build_repl_mapping")
|
||||
defer p.task.Finish()
|
||||
|
||||
replMapping = make(map[string]remoteLocalMapping, len(remoteFilesystems))
|
||||
for fs := range remoteFilesystems {
|
||||
var err error
|
||||
var localFs *zfs.DatasetPath
|
||||
localFs, err = p.Mapping.Map(remoteFilesystems[fs])
|
||||
if err != nil {
|
||||
err := fmt.Errorf("error mapping %s: %s", remoteFilesystems[fs], err)
|
||||
p.task.Log().WithError(err).WithField(logMapFromField, remoteFilesystems[fs]).Error("cannot map")
|
||||
return nil, false
|
||||
}
|
||||
if localFs == nil {
|
||||
continue
|
||||
}
|
||||
p.task.Log().WithField(logMapFromField, remoteFilesystems[fs].ToString()).
|
||||
WithField(logMapToField, localFs.ToString()).Debug("mapping")
|
||||
m := remoteLocalMapping{remoteFilesystems[fs], localFs}
|
||||
replMapping[m.Local.ToString()] = m
|
||||
}
|
||||
return replMapping, true
|
||||
}
|
||||
|
||||
// returns true if the receiving filesystem (local side) exists and can have child filesystems
|
||||
func (p *Puller) replFilesystem(m remoteLocalMapping, localFilesystemState map[string]zfs.FilesystemState) (localExists bool) {
|
||||
|
||||
p.task.Enter("repl_fs")
|
||||
defer p.task.Finish()
|
||||
var err error
|
||||
remote := p.Remote
|
||||
|
||||
log := p.task.Log().
|
||||
WithField(logMapFromField, m.Remote.ToString()).
|
||||
WithField(logMapToField, m.Local.ToString())
|
||||
|
||||
log.Debug("examining local filesystem state")
|
||||
localState, localExists := localFilesystemState[m.Local.ToString()]
|
||||
var versions []zfs.FilesystemVersion
|
||||
switch {
|
||||
case !localExists:
|
||||
log.Info("local filesystem does not exist")
|
||||
case localState.Placeholder:
|
||||
log.Info("local filesystem is marked as placeholder")
|
||||
default:
|
||||
log.Debug("local filesystem exists")
|
||||
log.Debug("requesting local filesystem versions")
|
||||
if versions, err = zfs.ZFSListFilesystemVersions(m.Local, nil); err != nil {
|
||||
log.WithError(err).Error("cannot get local filesystem versions")
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
log.Info("requesting remote filesystem versions")
|
||||
r := FilesystemVersionsRequest{
|
||||
Filesystem: m.Remote,
|
||||
}
|
||||
var theirVersions []zfs.FilesystemVersion
|
||||
if err = remote.Call("FilesystemVersionsRequest", &r, &theirVersions); err != nil {
|
||||
log.WithError(err).Error("cannot get remote filesystem versions")
|
||||
log.Warn("stopping replication for all filesystems mapped as children of receiving filesystem")
|
||||
return false
|
||||
}
|
||||
|
||||
log.Debug("computing diff between remote and local filesystem versions")
|
||||
diff := zfs.MakeFilesystemDiff(versions, theirVersions)
|
||||
log.WithField("diff", diff).Debug("diff between local and remote filesystem")
|
||||
|
||||
if localState.Placeholder && diff.Conflict != zfs.ConflictAllRight {
|
||||
panic("internal inconsistency: local placeholder implies ConflictAllRight")
|
||||
}
|
||||
|
||||
switch diff.Conflict {
|
||||
case zfs.ConflictAllRight:
|
||||
|
||||
log.WithField("replication_policy", p.InitialReplPolicy).Info("performing initial sync, following policy")
|
||||
|
||||
if p.InitialReplPolicy != InitialReplPolicyMostRecent {
|
||||
panic(fmt.Sprintf("policy '%s' not implemented", p.InitialReplPolicy))
|
||||
}
|
||||
|
||||
snapsOnly := make([]zfs.FilesystemVersion, 0, len(diff.MRCAPathRight))
|
||||
for s := range diff.MRCAPathRight {
|
||||
if diff.MRCAPathRight[s].Type == zfs.Snapshot {
|
||||
snapsOnly = append(snapsOnly, diff.MRCAPathRight[s])
|
||||
}
|
||||
}
|
||||
|
||||
if len(snapsOnly) < 1 {
|
||||
log.Warn("cannot perform initial sync: no remote snapshots")
|
||||
return false
|
||||
}
|
||||
|
||||
r := InitialTransferRequest{
|
||||
Filesystem: m.Remote,
|
||||
FilesystemVersion: snapsOnly[len(snapsOnly)-1],
|
||||
}
|
||||
|
||||
log.WithField("version", r.FilesystemVersion).Debug("requesting snapshot stream")
|
||||
|
||||
var stream io.Reader
|
||||
|
||||
if err = remote.Call("InitialTransferRequest", &r, &stream); err != nil {
|
||||
log.WithError(err).Error("cannot request initial transfer")
|
||||
return false
|
||||
}
|
||||
log.Debug("received initial transfer request response")
|
||||
|
||||
log.Debug("invoke zfs receive")
|
||||
recvArgs := []string{"-u"}
|
||||
if localState.Placeholder {
|
||||
log.Info("receive with forced rollback to replace placeholder filesystem")
|
||||
recvArgs = append(recvArgs, "-F")
|
||||
}
|
||||
progressStream := p.task.ProgressUpdater(stream)
|
||||
if err = zfs.ZFSRecv(m.Local, progressStream, recvArgs...); err != nil {
|
||||
log.WithError(err).Error("cannot receive stream")
|
||||
return false
|
||||
}
|
||||
log.Info("finished receiving stream") // TODO rx delta
|
||||
|
||||
// TODO unify with recv path of ConflictIncremental
|
||||
log.Debug("configuring properties of received filesystem")
|
||||
props := zfs.NewZFSProperties()
|
||||
props.Set("readonly", "on")
|
||||
if err = zfs.ZFSSet(m.Local, props); err != nil {
|
||||
log.WithError(err).Error("cannot set readonly property")
|
||||
}
|
||||
|
||||
log.Info("finished initial transfer")
|
||||
return true
|
||||
|
||||
case zfs.ConflictIncremental:
|
||||
|
||||
if len(diff.IncrementalPath) < 2 {
|
||||
log.Info("remote and local are in sync")
|
||||
return true
|
||||
}
|
||||
|
||||
log.Info("following incremental path from diff")
|
||||
for i := 0; i < len(diff.IncrementalPath)-1; i++ {
|
||||
|
||||
from, to := diff.IncrementalPath[i], diff.IncrementalPath[i+1]
|
||||
|
||||
log, _ := log.WithField(logIncFromField, from.Name).WithField(logIncToField, to.Name), 0
|
||||
|
||||
log.Debug("requesting incremental snapshot stream")
|
||||
r := IncrementalTransferRequest{
|
||||
Filesystem: m.Remote,
|
||||
From: from,
|
||||
To: to,
|
||||
}
|
||||
var stream io.Reader
|
||||
if err = remote.Call("IncrementalTransferRequest", &r, &stream); err != nil {
|
||||
log.WithError(err).Error("cannot request incremental snapshot stream")
|
||||
return false
|
||||
}
|
||||
|
||||
log.Debug("invoking zfs receive")
|
||||
progressStream := p.task.ProgressUpdater(stream)
|
||||
// TODO protect against malicious incremental stream
|
||||
if err = zfs.ZFSRecv(m.Local, progressStream); err != nil {
|
||||
log.WithError(err).Error("cannot receive stream")
|
||||
return false
|
||||
}
|
||||
log.Info("finished incremental transfer") // TODO increment rx
|
||||
|
||||
}
|
||||
log.Info("finished following incremental path") // TODO path rx
|
||||
return true
|
||||
|
||||
case zfs.ConflictNoCommonAncestor:
|
||||
fallthrough
|
||||
case zfs.ConflictDiverged:
|
||||
|
||||
var jsonDiff bytes.Buffer
|
||||
if err := json.NewEncoder(&jsonDiff).Encode(diff); err != nil {
|
||||
log.WithError(err).Error("cannot JSON-encode diff")
|
||||
return false
|
||||
}
|
||||
|
||||
var problem, resolution string
|
||||
|
||||
switch diff.Conflict {
|
||||
case zfs.ConflictNoCommonAncestor:
|
||||
problem = "remote and local filesystem have snapshots, but no common one"
|
||||
resolution = "perform manual establish a common snapshot history"
|
||||
case zfs.ConflictDiverged:
|
||||
problem = "remote and local filesystem share a history but have diverged"
|
||||
resolution = "perform manual replication or delete snapshots on the receiving" +
|
||||
"side to establish an incremental replication parse"
|
||||
}
|
||||
|
||||
log.WithField("diff", jsonDiff.String()).
|
||||
WithField("problem", problem).
|
||||
WithField("resolution", resolution).
|
||||
Error("manual conflict resolution required")
|
||||
|
||||
return false
|
||||
|
||||
}
|
||||
|
||||
panic("should not be reached")
|
||||
}
|
||||
|
||||
func (p *Puller) Pull() {
|
||||
p.task.Enter("run")
|
||||
defer p.task.Finish()
|
||||
|
||||
p.task.Log().Info("request remote filesystem list")
|
||||
remoteFilesystems, ok := p.getRemoteFilesystems()
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
|
||||
p.task.Log().Debug("map remote filesystems to local paths and determine order for per-filesystem sync")
|
||||
replMapping, ok := p.buildReplMapping(remoteFilesystems)
|
||||
if !ok {
|
||||
|
||||
}
|
||||
|
||||
p.task.Log().Debug("build cache for already present local filesystem state")
|
||||
p.task.Enter("cache_local_fs_state")
|
||||
localFilesystemState, err := zfs.ZFSListFilesystemState()
|
||||
p.task.Finish()
|
||||
if err != nil {
|
||||
p.task.Log().WithError(err).Error("cannot request local filesystem state")
|
||||
return
|
||||
}
|
||||
|
||||
localTraversal := zfs.NewDatasetPathForest()
|
||||
for _, m := range replMapping {
|
||||
localTraversal.Add(m.Local)
|
||||
}
|
||||
|
||||
p.task.Log().Info("start per-filesystem sync")
|
||||
localTraversal.WalkTopDown(func(v zfs.DatasetPathVisit) bool {
|
||||
|
||||
p.task.Enter("tree_walk")
|
||||
defer p.task.Finish()
|
||||
|
||||
log := p.task.Log().WithField(logFSField, v.Path.ToString())
|
||||
|
||||
if v.FilledIn {
|
||||
if _, exists := localFilesystemState[v.Path.ToString()]; exists {
|
||||
// No need to verify if this is a placeholder or not. It is sufficient
|
||||
// to know we can add child filesystems to it
|
||||
return true
|
||||
}
|
||||
log.Debug("create placeholder filesystem")
|
||||
p.task.Enter("create_placeholder")
|
||||
err = zfs.ZFSCreatePlaceholderFilesystem(v.Path)
|
||||
p.task.Finish()
|
||||
if err != nil {
|
||||
log.Error("cannot create placeholder filesystem")
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
m, ok := replMapping[v.Path.ToString()]
|
||||
if !ok {
|
||||
panic("internal inconsistency: replMapping should contain mapping for any path that was not filled in by WalkTopDown()")
|
||||
}
|
||||
|
||||
return p.replFilesystem(m, localFilesystemState)
|
||||
})
|
||||
|
||||
return
|
||||
|
||||
}
|
@ -1,29 +0,0 @@
|
||||
jobs:
|
||||
- name: mirror_local
|
||||
type: local
|
||||
|
||||
# snapshot the filesystems matched by the left-hand-side of the mapping
|
||||
# every 10m with zrepl_ as prefix
|
||||
mapping: {
|
||||
"zroot/var/db<": "storage/backups/local/zroot/var/db",
|
||||
"zroot/usr/home<": "storage/backups/local/zroot/usr/home",
|
||||
"zroot/usr/home/paranoid": "!", #don't backup paranoid user
|
||||
"zroot/poudriere/ports<": "!", #don't backup the ports trees
|
||||
}
|
||||
snapshot_prefix: zrepl_
|
||||
interval: 10m
|
||||
initial_repl_policy: most_recent
|
||||
|
||||
# keep one hour of 10m interval snapshots of filesystems matched by
|
||||
# the left-hand-side of the mapping
|
||||
prune_lhs:
|
||||
policy: grid
|
||||
grid: 1x1h(keep=all)
|
||||
keep_bookmarks: all
|
||||
|
||||
# follow a grandfathering scheme for filesystems on the right-hand-side of the mapping
|
||||
prune_rhs:
|
||||
policy: grid
|
||||
grid: 1x1h(keep=all) | 24x1h | 35x1d | 6x30d
|
||||
|
||||
|
@ -1,27 +0,0 @@
|
||||
jobs:
|
||||
- name: fullbackup_prod1
|
||||
type: pull
|
||||
# connect to remote using ssh / stdinserver command
|
||||
connect:
|
||||
type: ssh+stdinserver
|
||||
host: prod1.example.com
|
||||
user: root
|
||||
port: 22
|
||||
identity_file: /root/.ssh/id_ed25519
|
||||
|
||||
# pull (=ask for new snapshots) every 10m, prune afterwards
|
||||
# this will leave us at most 10m behind production
|
||||
interval: 10m
|
||||
|
||||
# pull all offered filesystems to storage/backups/zrepl/pull/prod1.example.com
|
||||
mapping: {
|
||||
"<":"storage/backups/zrepl/pull/prod1.example.com"
|
||||
}
|
||||
initial_repl_policy: most_recent
|
||||
|
||||
# follow a grandfathering scheme for filesystems on the right-hand-side of the mapping
|
||||
snapshot_prefix: zrepl_
|
||||
prune:
|
||||
policy: grid
|
||||
grid: 1x1h(keep=all) | 24x1h | 35x1d | 6x30d
|
||||
|
@ -1,47 +0,0 @@
|
||||
global:
|
||||
serve:
|
||||
stdinserver:
|
||||
# Directory where AF_UNIX sockets for stdinserver command are placed.
|
||||
#
|
||||
# `zrepl stdinserver CLIENT_IDENTITY`
|
||||
# * connects to the socket in $sockdir/CLIENT_IDENTITY
|
||||
# * sends its stdin / stdout file descriptors to the `zrepl daemon` process (see cmsg(3))
|
||||
# * does nothing more
|
||||
#
|
||||
# This enables a setup where `zrepl daemon` is not directly exposed to the internet
|
||||
# but instead all traffic is tunnelled through SSH.
|
||||
# The server with the source job has an authorized_keys file entry for the public key
|
||||
# used by the corresponding pull job
|
||||
#
|
||||
# command="/mnt/zrepl stdinserver CLIENT_IDENTITY" ssh-ed25519 AAAAC3NzaC1E... zrepl@pullingserver
|
||||
#
|
||||
# Below is the default value.
|
||||
sockdir: /var/run/zrepl/stdinserver
|
||||
|
||||
jobs:
|
||||
|
||||
- name: fullbackup_prod1
|
||||
# expect remote to connect via ssh+stdinserver with fullbackup_prod1 as client_identity
|
||||
type: source
|
||||
serve:
|
||||
type: stdinserver # see global.serve.stdinserver for explanation
|
||||
client_identity: fullbackup_prod1
|
||||
|
||||
# snapshot these filesystems every 10m with zrepl_ as prefix
|
||||
filesystems: {
|
||||
"zroot/var/db<": "ok",
|
||||
"zroot/usr/home<": "ok",
|
||||
"zroot/var/tmp": "!", #don't backup /tmp
|
||||
}
|
||||
snapshot_prefix: zrepl_
|
||||
interval: 10m
|
||||
|
||||
|
||||
# keep 1 hour of snapshots (6 at 10m interval)
|
||||
# and one day of bookmarks in case pull doesn't work (link down, etc)
|
||||
# => keep_bookmarks = 24h / interval = 24h / 10m = 144
|
||||
prune:
|
||||
policy: grid
|
||||
grid: 1x1h(keep=all)
|
||||
keep_bookmarks: 144
|
||||
|
@ -1,20 +0,0 @@
|
||||
jobs:
|
||||
- name: fullbackup_prod1
|
||||
|
||||
# expect remote to connect via ssh+stdinserver with fullbackup_prod1 as client_identity
|
||||
type: push-sink
|
||||
serve:
|
||||
type: stdinserver
|
||||
client_identity: fullbackup_prod1
|
||||
|
||||
# map all pushed datasets to storage/backups/zrepl/sink/prod1.example.com
|
||||
mapping: {
|
||||
"<":"storage/backups/zrepl/sink/prod1.example.com"
|
||||
}
|
||||
|
||||
# follow a grandfathering scheme for filesystems on the right-hand-side of the mapping
|
||||
prune:
|
||||
policy: grid
|
||||
grid: 1x1h(keep=all) | 24x1h | 35x1d | 6x30d
|
||||
|
||||
|
@ -1,26 +0,0 @@
|
||||
jobs:
|
||||
- name: fullbackup_prod1
|
||||
|
||||
# connect to remote using ssh / stdinserver command
|
||||
type: push
|
||||
connect:
|
||||
type: ssh+stdinserver
|
||||
host: prod1.example.com
|
||||
user: root
|
||||
port: 22
|
||||
identity_file: /root/.ssh/id_ed25519
|
||||
|
||||
# snapshot these datsets every 10m with zrepl_ as prefix
|
||||
filesystems: {
|
||||
"zroot/var/db<": "ok",
|
||||
"zroot/usr/home<": "!",
|
||||
}
|
||||
snapshot_prefix: zrepl_
|
||||
interval: 10m
|
||||
|
||||
# keep a one day window 10m interval snapshots in case push doesn't work (link down, etc)
|
||||
# (we cannot keep more than one day because this host will run out of disk space)
|
||||
prune:
|
||||
policy: grid
|
||||
grid: 1x1d(keep=all)
|
||||
|
@ -1,33 +0,0 @@
|
||||
global:
|
||||
serve:
|
||||
stdinserver:
|
||||
sockdir: /var/run/zrepl/stdinserver
|
||||
|
||||
jobs:
|
||||
|
||||
- name: debian2_pull
|
||||
# JOB DEBUGGING OPTIONS
|
||||
# should be equal for all job types, but each job implements the debugging itself
|
||||
# => consult job documentation for supported options
|
||||
debug:
|
||||
conn: # debug the io.ReadWriteCloser connection
|
||||
read_dump: /tmp/connlog_read # dump results of Read() invocations to this file
|
||||
write_dump: /tmp/connlog_write # dump results of Write() invocations to this file
|
||||
rpc: # debug the RPC protocol implementation
|
||||
log: true # log output from rpc layer to the job log
|
||||
|
||||
# ... just to make the unit tests pass.
|
||||
# check other examples, e.g. localbackup or pullbackup for what the sutff below means
|
||||
type: source
|
||||
serve:
|
||||
type: stdinserver
|
||||
client_identity: debian2
|
||||
filesystems: {
|
||||
"pool1/db<": ok
|
||||
}
|
||||
snapshot_prefix: zrepl_
|
||||
interval: 1s
|
||||
prune:
|
||||
policy: grid
|
||||
grid: 1x10s(keep=all)
|
||||
keep_bookmarks: all
|
@ -1,19 +0,0 @@
|
||||
-----BEGIN CERTIFICATE-----
|
||||
MIIDIzCCAgsCAQEwDQYJKoZIhvcNAQELBQAwWTELMAkGA1UEBhMCQVUxEzARBgNV
|
||||
BAgMClNvbWUtU3RhdGUxITAfBgNVBAoMGEludGVybmV0IFdpZGdpdHMgUHR5IEx0
|
||||
ZDESMBAGA1UEAwwJbG9nc2VydmVyMB4XDTE3MDkyNDEyMzAzNloXDTE3MTAyNDEy
|
||||
MzAzNlowVjELMAkGA1UEBhMCQVUxEzARBgNVBAgMClNvbWUtU3RhdGUxITAfBgNV
|
||||
BAoMGEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDEPMA0GA1UEAwwGY2xpZW50MIIB
|
||||
IjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAt/xJTUlqApeJGzRD+w2J8sZS
|
||||
Bo+s+04T987L/M6gaCo8aDSTEb/ZH3XSoU5JEmO6kPpwNNapOsaEhTCjndZQdm5F
|
||||
uqiUtAg1uW0HCkBEIDkGr9bFHDKzpewGmmMgfQ2+hfiBR/4ZCrc/vd9P0W9BiWQS
|
||||
Dtc7p22XraWPVL8HlSz5K/Ih+V6i8O+kBltZkusiJh2bWPoRp/netiTZuc6du+Wp
|
||||
kpWp1OBaTU4GXIAlLj5afF14BBphRQK983Yhaz53BkA7OQ76XxowynMjmuLQVGmK
|
||||
f1R9zEJuohTX9XIr1tp/ueRHcS4Awk6LcNZUMCV6270FNSIw2f4hbOZvep+t2wID
|
||||
AQABMA0GCSqGSIb3DQEBCwUAA4IBAQACK3OeNzScpiNwz/jpg/usQzvXbZ/wDvml
|
||||
YLjtzn/A65ox8a8BhxvH1ydyoCM2YAGYX7+y7qXJnMgRO/v8565CQIVcznHhg9ST
|
||||
3828/WqZ3bXf2DV5GxKKQf7hPmBnyVUUhn/Ny91MECED27lZucWiX/bczN8ffDeh
|
||||
M3+ngezcJxsOBd4x0gLrqIJCoaFRSeepOaFEW6GHQ8loxE9GmA7FQd2phIpJHFSd
|
||||
Z7nQl7X5C1iN2OboEApJHwtmNVC45UlOpg53vo2sDTLhSfdogstiWi8x1HmvhIGM
|
||||
j3XHs0Illvo9OwVrmgUph8zQ7pvr/AFrTOIbhgzl/9uVUk5ApwFM
|
||||
-----END CERTIFICATE-----
|
@ -1,16 +0,0 @@
|
||||
-----BEGIN CERTIFICATE REQUEST-----
|
||||
MIICmzCCAYMCAQAwVjELMAkGA1UEBhMCQVUxEzARBgNVBAgMClNvbWUtU3RhdGUx
|
||||
ITAfBgNVBAoMGEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDEPMA0GA1UEAwwGY2xp
|
||||
ZW50MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAt/xJTUlqApeJGzRD
|
||||
+w2J8sZSBo+s+04T987L/M6gaCo8aDSTEb/ZH3XSoU5JEmO6kPpwNNapOsaEhTCj
|
||||
ndZQdm5FuqiUtAg1uW0HCkBEIDkGr9bFHDKzpewGmmMgfQ2+hfiBR/4ZCrc/vd9P
|
||||
0W9BiWQSDtc7p22XraWPVL8HlSz5K/Ih+V6i8O+kBltZkusiJh2bWPoRp/netiTZ
|
||||
uc6du+WpkpWp1OBaTU4GXIAlLj5afF14BBphRQK983Yhaz53BkA7OQ76XxowynMj
|
||||
muLQVGmKf1R9zEJuohTX9XIr1tp/ueRHcS4Awk6LcNZUMCV6270FNSIw2f4hbOZv
|
||||
ep+t2wIDAQABoAAwDQYJKoZIhvcNAQELBQADggEBAKnlr0Qs5KYF85u2YA7DJ5pL
|
||||
HwAx+qNoNbox5CS1aynrDBpDTWLaErviUJ+4WxRlRyTMEscMOIOKajbYhqqFmtGZ
|
||||
mu3SshZnFihErw8TOQMyU1LGGG+l6r+6ve5TciwJRLla2Y75z7izr6cyvQNRWdLr
|
||||
PvxL1/Yqr8LKha12+7o28R4SLf6/GY0GcedqoebRmtuwA/jES0PuGauEUD5lH4cj
|
||||
Me8sqRrB+IMHQ5j8hlJX4DbA8UQRUBL64sHkQzeQfWu+qkWmS5I19CFfLNrcH+OV
|
||||
yhyjGfN0q0jHyHdpckBhgzS7IIdo6P66AIlm4qpHM7Scra3JaGM7oaZPamJ6f8U=
|
||||
-----END CERTIFICATE REQUEST-----
|
@ -1,28 +0,0 @@
|
||||
-----BEGIN PRIVATE KEY-----
|
||||
MIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQC3/ElNSWoCl4kb
|
||||
NEP7DYnyxlIGj6z7ThP3zsv8zqBoKjxoNJMRv9kfddKhTkkSY7qQ+nA01qk6xoSF
|
||||
MKOd1lB2bkW6qJS0CDW5bQcKQEQgOQav1sUcMrOl7AaaYyB9Db6F+IFH/hkKtz+9
|
||||
30/Rb0GJZBIO1zunbZetpY9UvweVLPkr8iH5XqLw76QGW1mS6yImHZtY+hGn+d62
|
||||
JNm5zp275amSlanU4FpNTgZcgCUuPlp8XXgEGmFFAr3zdiFrPncGQDs5DvpfGjDK
|
||||
cyOa4tBUaYp/VH3MQm6iFNf1civW2n+55EdxLgDCTotw1lQwJXrbvQU1IjDZ/iFs
|
||||
5m96n63bAgMBAAECggEAF4om0sWe06ARwbJJNFjCGpa3LfG5/xk5Qs5pmPnS2iD1
|
||||
Q5veaTnzjKvlfA/pF3o9B4mTS59fXY7Cq8vSU0J1XwGy2DPzeqlGPmgtq2kXjkvd
|
||||
iCfhZj8ybvsoyR3/rSBSDRADcnOXPqC9fgyRSMmESBDOoql1D3HdIzF4ii46ySIU
|
||||
/XQvExS6NWifbP+Ue6DETV8NhreO5PqjeXLITQhhndtc8MDL/8eCNOyN8XjYIWKX
|
||||
smlBYtRQYOOY9BHOQgUn6yvPHrtKJNKci+qcQNvWir66mBhY1o40MH5wTIV+8yP2
|
||||
Vbm/VzoNKIYgeROsilBW7QTwGvkDn3R11zeTqfUNSQKBgQD0eFzhJAEZi4uBw6Tg
|
||||
NKmBC5Y1IHPOsb5gKPNz9Z9j4qYRDySgYl6ISk+2EdhgUCo1NmTk8EIPQjIerUVf
|
||||
S+EogFnpsj8U9LR3OM79DaGkNULxrHqhd209/g8DtVgk7yjkxL4vmVOv8qpHMp/7
|
||||
eWsylN7AOxj2RB/eXYQBPrw+jQKBgQDAqae9HasLmvpJ9ktTv30yZSKXC+LP4A0D
|
||||
RBBmx410VpPd4CvcpCJxXmjer6B7+9L1xHYP2pvsnMBid5i0knuvyK28dYy7fldl
|
||||
CzWvb+lqNA5YYPFXQED4oEdihlQczoI1Bm06SFizeAKD1Q9e2c+lgbR/51j8xuXi
|
||||
twvhMj/YBwKBgQCZw97/iQrcC2Zq7yiUEOuQjD4lGk1c83U/vGIsTJC9XcCAOFsc
|
||||
OeMlrD/oz96d7a4unBDn4qpaOJOXsfpRT0PGmrxy/jcpMiUUW/ntNpa11v5NTeQw
|
||||
DRL8DAFbnsNbL8Yz5f+Nps35fBNYBuKTZLJlNTfKByHTO9QjpAQ0WEZEvQKBgQCi
|
||||
Ovm83EuYVSKmvxcE6Tyx/8lVqTOO2Vn7wweQlD4/lVujvE0S2L8L+XSS9w5K+GzW
|
||||
eFz10p3zarbw80YJ30L5bSEmjVE43BUZR4woMzM4M6dUsiTm1HshIE2b4ALZ0uZ/
|
||||
Ye794ceXL9nmSrVLqFsaQZLNFPCwwYb4FiyRry9lZwKBgAO9VbWcN8SEeBDKo3z8
|
||||
yRbRTc6sI+AdKY44Dfx0tqOPmTjO3mE4X1GU4sbfD2Bvg3DdjwTuxxC/jHaKu0GG
|
||||
dTM0CbrZGbDAj7E87SOcN/PWEeBckSvuQq5H3DQfwIpTmlS1l5oZn9CxRGbLqC2G
|
||||
ifnel8XWUG0ROybsr1tk4mzW
|
||||
-----END PRIVATE KEY-----
|
@ -1,21 +0,0 @@
|
||||
-----BEGIN CERTIFICATE-----
|
||||
MIIDiDCCAnCgAwIBAgIJALhp/WvTQeg/MA0GCSqGSIb3DQEBCwUAMFkxCzAJBgNV
|
||||
BAYTAkFVMRMwEQYDVQQIDApTb21lLVN0YXRlMSEwHwYDVQQKDBhJbnRlcm5ldCBX
|
||||
aWRnaXRzIFB0eSBMdGQxEjAQBgNVBAMMCWxvZ3NlcnZlcjAeFw0xNzA5MjQxMjI3
|
||||
MDRaFw0yNzA5MjIxMjI3MDRaMFkxCzAJBgNVBAYTAkFVMRMwEQYDVQQIDApTb21l
|
||||
LVN0YXRlMSEwHwYDVQQKDBhJbnRlcm5ldCBXaWRnaXRzIFB0eSBMdGQxEjAQBgNV
|
||||
BAMMCWxvZ3NlcnZlcjCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAKs3
|
||||
TLYfXhV3hap71tOkhPQlM+m0EKRAo8Nua50Cci5UhDo4JkVpyYok1h+NFkqmjU2b
|
||||
IiIuGvsZZPOWYjbWWnSJE4+n5pBFBzcfNQ4d8xVxjANImFn6Tcehhj0WkbDIv/Ge
|
||||
364XUgywS7u3EGQj/FO7vZ8KHlUxBHNuPIOPHftwIVRyleh5K32UyBaSpSmnqGos
|
||||
rvI1byMuznavcZpOs4vlebZ+Jy6a20iKf9fj/0f0t0O+F5x3JIk07D3zSywhJ4RM
|
||||
M0mGIUmYXbh2SMh+f61KDZLDANpz/pMAPbUJe0mxEtBf0tnwK1gEqc3SLwA0EwiM
|
||||
8Hnn2iaH5Ln20UE3LOkCAwEAAaNTMFEwHQYDVR0OBBYEFDXoDcwx9SngzZcRYCeP
|
||||
BplBecfiMB8GA1UdIwQYMBaAFDXoDcwx9SngzZcRYCePBplBecfiMA8GA1UdEwEB
|
||||
/wQFMAMBAf8wDQYJKoZIhvcNAQELBQADggEBADyNvs4AA91x3gurQb1pcPVhK6nR
|
||||
mkYSTN1AsDKSRi/X2iCUmR7G7FlF7XW8mntTpHvVzcs+gr94WckH5wqEOA5iZnaw
|
||||
PXUWexmdXUge4hmC2q6kBQ5e2ykhSJMRVZXvOLZOZV9qitceamHESV1cKZSNMvZM
|
||||
aCSVA1RK61/nUzs04pVp5PFPv9gFxJp9ki39FYFdsgZmM5RZ5I/FqxxvTJzu4RnH
|
||||
VPjsMopzARYwJw6dV2bKdFSYOE8B/Vs3Yv0GxjrABw2ko4PkBPTjLIz22x6+Hd9r
|
||||
K9BQi4pVmQfvppF5+SORSftlHSS+N47b0DD1rW1f5R6QGi71dFuJGikOwvY=
|
||||
-----END CERTIFICATE-----
|
@ -1,28 +0,0 @@
|
||||
-----BEGIN PRIVATE KEY-----
|
||||
MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQCrN0y2H14Vd4Wq
|
||||
e9bTpIT0JTPptBCkQKPDbmudAnIuVIQ6OCZFacmKJNYfjRZKpo1NmyIiLhr7GWTz
|
||||
lmI21lp0iROPp+aQRQc3HzUOHfMVcYwDSJhZ+k3HoYY9FpGwyL/xnt+uF1IMsEu7
|
||||
txBkI/xTu72fCh5VMQRzbjyDjx37cCFUcpXoeSt9lMgWkqUpp6hqLK7yNW8jLs52
|
||||
r3GaTrOL5Xm2ficumttIin/X4/9H9LdDvhecdySJNOw980ssISeETDNJhiFJmF24
|
||||
dkjIfn+tSg2SwwDac/6TAD21CXtJsRLQX9LZ8CtYBKnN0i8ANBMIjPB559omh+S5
|
||||
9tFBNyzpAgMBAAECggEBAIY8ZwJq+WKvQLb3POjWFf8so9TY/ispGrwAeJKy9j5o
|
||||
uPrERw0o8YBDfTVjclS43BQ6Srqtly3DLSjlgL8ps+WmCxYYN2ZpGE0ZRIl65bis
|
||||
O2/fnML+wbiAZTTD2xnVatfPDeP6GLQmDFpyHoHEzPIBQZvNXRbBxZGSnhMvQ/x7
|
||||
FhqSBQG4kf3b1XDCENIbFEVOBOCg7WtMiIgjEGS7QnW3I65/Zt+Ts1LXRZbz+6na
|
||||
Gmi0PGHA/oLUh1NRzsF4zuZn6fFzja5zw4mkt+JvCWEoxg1QhRAxRp6QQwmZ6MIc
|
||||
1rw1D4Z+c5UEKyqHeIwZj4M6UNPhCfTXVm47c9eSiGECgYEA4U8pB+7eRo2fqX0C
|
||||
nWsWMcmsULJvwplQnUSFenUayPn3E8ammS/ZBHksoKhj82vwIdDbtS1hQZn8Bzsi
|
||||
atc8au0wz0YRDcVDzHX4HknXVQayHtP/FTPeSr5hwpoY8vhEbySuxBTBkXCrp4dx
|
||||
u5ErfOiYEP3Q1ZvPRywelrATu20CgYEAwonV5dgOcen/4oAirlnvufc2NfqhAQwJ
|
||||
FJ/JSVMAcXxPYu3sZMv0dGWrX8mLc+P1+XMCuV/7eBM/vU2LbDzmpeUV8sJfB2jw
|
||||
wyKqKXZwBgeq60btriA4f+0ElwRGgU2KSiniUuuTX2JmyftFQx4cVAQRCFk27NY0
|
||||
09psSsYyre0CgYBo6unabdtH029EB5iOIW3GZXk+Yrk0TxyA/4WAjsOYTv5FUT4H
|
||||
G4bdVGf5sDBLDDpYJOAKsEUXvVLlMx5FzlCuIiGWg7QxS2jU7yJJSG1jhKixPlsM
|
||||
Toj3GUyAyC1SB1Ymw1g2qsuwpFzquGG3zFQJ6G3Xi7oRnmqZY+wik3+8yQKBgB11
|
||||
SdKYOPe++2SNCrNkIw0CBk9+OEs0S1u4Jn7X9sU4kbzlUlqhF89YZe8HUfqmlmTD
|
||||
qbHwet/f6lL8HxSw1Cxi2EP+cu1oUqz53tKQgL4pAxTFlNA9SND2Ty+fEh4aY8p/
|
||||
NSphSduzxuTnC8HyGVAPnZSqDcsnVLCP7r4T7TCxAoGAbJygkkk/gZ9pT4fZoIaq
|
||||
8CMR8FTfxtkwCuZsWccSMUOWtx9nqet3gbCpKHfyoYZiKB4ke+lnUz4uFS16Y3hG
|
||||
kN0hFfvfoNa8eB2Ox7vs60cMMfWJac0H7KSaDDy+EvbhE2KtQADT0eWxMyhzGR8p
|
||||
5CbIivB0QCjeQIA8dOQpE8E=
|
||||
-----END PRIVATE KEY-----
|
@ -1,28 +0,0 @@
|
||||
global:
|
||||
logging:
|
||||
|
||||
- outlet: stdout
|
||||
level: warn
|
||||
format: human
|
||||
|
||||
- outlet: tcp
|
||||
level: debug
|
||||
format: json
|
||||
net: tcp
|
||||
address: 127.0.0.1:8080
|
||||
retry_interval: 1s
|
||||
tls: # if not specified, use plain TCP
|
||||
ca: sampleconf/random/logging/logserver.crt
|
||||
cert: sampleconf/random/logging/client.crt
|
||||
key: sampleconf/random/logging/client.key
|
||||
|
||||
- outlet: syslog
|
||||
level: debug
|
||||
format: logfmt
|
||||
|
||||
monitoring:
|
||||
|
||||
- type: prometheus
|
||||
listen: ':9090'
|
||||
|
||||
jobs: []
|
215
cmd/test.go
215
cmd/test.go
@ -1,215 +0,0 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"os"
|
||||
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"github.com/kr/pretty"
|
||||
"github.com/spf13/cobra"
|
||||
"github.com/zrepl/zrepl/logger"
|
||||
"github.com/zrepl/zrepl/zfs"
|
||||
"time"
|
||||
)
|
||||
|
||||
var testCmd = &cobra.Command{
|
||||
Use: "test",
|
||||
Short: "test configuration",
|
||||
PersistentPreRun: testCmdGlobalInit,
|
||||
}
|
||||
|
||||
var testCmdGlobal struct {
|
||||
log Logger
|
||||
conf *Config
|
||||
}
|
||||
|
||||
var testConfigSyntaxCmd = &cobra.Command{
|
||||
Use: "config",
|
||||
Short: "parse config file and dump parsed datastructure",
|
||||
Run: doTestConfig,
|
||||
}
|
||||
|
||||
var testDatasetMapFilter = &cobra.Command{
|
||||
Use: "pattern jobname test/zfs/dataset/path",
|
||||
Short: "test dataset mapping / filter specified in config",
|
||||
Example: ` zrepl test pattern my_pull_job tank/tmp`,
|
||||
Run: doTestDatasetMapFilter,
|
||||
}
|
||||
|
||||
var testPrunePolicyArgs struct {
|
||||
side PrunePolicySide
|
||||
showKept bool
|
||||
showRemoved bool
|
||||
}
|
||||
|
||||
var testPrunePolicyCmd = &cobra.Command{
|
||||
Use: "prune jobname",
|
||||
Short: "do a dry-run of the pruning part of a job",
|
||||
Run: doTestPrunePolicy,
|
||||
}
|
||||
|
||||
func init() {
|
||||
RootCmd.AddCommand(testCmd)
|
||||
testCmd.AddCommand(testConfigSyntaxCmd)
|
||||
testCmd.AddCommand(testDatasetMapFilter)
|
||||
|
||||
testPrunePolicyCmd.Flags().VarP(&testPrunePolicyArgs.side, "side", "s", "prune_lhs (left) or prune_rhs (right)")
|
||||
testPrunePolicyCmd.Flags().BoolVar(&testPrunePolicyArgs.showKept, "kept", false, "show kept snapshots")
|
||||
testPrunePolicyCmd.Flags().BoolVar(&testPrunePolicyArgs.showRemoved, "removed", true, "show removed snapshots")
|
||||
testCmd.AddCommand(testPrunePolicyCmd)
|
||||
}
|
||||
|
||||
func testCmdGlobalInit(cmd *cobra.Command, args []string) {
|
||||
|
||||
out := logger.NewOutlets()
|
||||
out.Add(WriterOutlet{&NoFormatter{}, os.Stdout}, logger.Info)
|
||||
log := logger.NewLogger(out, 1*time.Second)
|
||||
testCmdGlobal.log = log
|
||||
|
||||
var err error
|
||||
if testCmdGlobal.conf, err = ParseConfig(rootArgs.configFile); err != nil {
|
||||
testCmdGlobal.log.Printf("error parsing config file: %s", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func doTestConfig(cmd *cobra.Command, args []string) {
|
||||
|
||||
log, conf := testCmdGlobal.log, testCmdGlobal.conf
|
||||
|
||||
log.Printf("config ok")
|
||||
log.Printf("%# v", pretty.Formatter(conf))
|
||||
return
|
||||
}
|
||||
|
||||
func doTestDatasetMapFilter(cmd *cobra.Command, args []string) {
|
||||
|
||||
log, conf := testCmdGlobal.log, testCmdGlobal.conf
|
||||
|
||||
if len(args) != 2 {
|
||||
log.Printf("specify job name as first postitional argument, test input as second")
|
||||
log.Printf(cmd.UsageString())
|
||||
os.Exit(1)
|
||||
}
|
||||
n, i := args[0], args[1]
|
||||
|
||||
jobi, err := conf.LookupJob(n)
|
||||
if err != nil {
|
||||
log.Printf("%s", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
var mf *DatasetMapFilter
|
||||
switch j := jobi.(type) {
|
||||
case *PullJob:
|
||||
mf = j.Mapping
|
||||
case *SourceJob:
|
||||
mf = j.Filesystems
|
||||
case *LocalJob:
|
||||
mf = j.Mapping
|
||||
default:
|
||||
panic("incomplete implementation")
|
||||
}
|
||||
|
||||
ip, err := zfs.NewDatasetPath(i)
|
||||
if err != nil {
|
||||
log.Printf("cannot parse test input as ZFS dataset path: %s", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
if mf.filterMode {
|
||||
pass, err := mf.Filter(ip)
|
||||
if err != nil {
|
||||
log.Printf("error evaluating filter: %s", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
log.Printf("filter result: %v", pass)
|
||||
} else {
|
||||
res, err := mf.Map(ip)
|
||||
if err != nil {
|
||||
log.Printf("error evaluating mapping: %s", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
toStr := "NO MAPPING"
|
||||
if res != nil {
|
||||
toStr = res.ToString()
|
||||
}
|
||||
log.Printf("%s => %s", ip.ToString(), toStr)
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func doTestPrunePolicy(cmd *cobra.Command, args []string) {
|
||||
|
||||
log, conf := testCmdGlobal.log, testCmdGlobal.conf
|
||||
|
||||
if cmd.Flags().NArg() != 1 {
|
||||
log.Printf("specify job name as first positional argument")
|
||||
log.Printf(cmd.UsageString())
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
jobname := cmd.Flags().Arg(0)
|
||||
jobi, err := conf.LookupJob(jobname)
|
||||
if err != nil {
|
||||
log.Printf("%s", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
jobp, ok := jobi.(PruningJob)
|
||||
if !ok {
|
||||
log.Printf("job doesn't do any prunes")
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
log.Printf("job dump:\n%s", pretty.Sprint(jobp))
|
||||
|
||||
task := NewTask("", jobi, log)
|
||||
pruner, err := jobp.Pruner(task, testPrunePolicyArgs.side, true)
|
||||
if err != nil {
|
||||
log.Printf("cannot create test pruner: %s", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
log.Printf("start pruning")
|
||||
|
||||
ctx := context.WithValue(context.Background(), contextKeyLog, log)
|
||||
result, err := pruner.Run(ctx)
|
||||
if err != nil {
|
||||
log.Printf("error running pruner: %s", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
sort.Slice(result, func(i, j int) bool {
|
||||
return strings.Compare(result[i].Filesystem.ToString(), result[j].Filesystem.ToString()) == -1
|
||||
})
|
||||
|
||||
var b bytes.Buffer
|
||||
for _, r := range result {
|
||||
fmt.Fprintf(&b, "%s\n", r.Filesystem.ToString())
|
||||
|
||||
if testPrunePolicyArgs.showKept {
|
||||
fmt.Fprintf(&b, "\tkept:\n")
|
||||
for _, v := range r.Keep {
|
||||
fmt.Fprintf(&b, "\t- %s\n", v.Name)
|
||||
}
|
||||
}
|
||||
|
||||
if testPrunePolicyArgs.showRemoved {
|
||||
fmt.Fprintf(&b, "\tremoved:\n")
|
||||
for _, v := range r.Remove {
|
||||
fmt.Fprintf(&b, "\t- %s\n", v.Name)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
log.Printf("pruning result:\n%s", b.String())
|
||||
|
||||
}
|
483
config/config.go
Normal file
483
config/config.go
Normal file
@ -0,0 +1,483 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/zrepl/yaml-config"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"reflect"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"time"
|
||||
)
|
||||
|
||||
type Config struct {
|
||||
Jobs []JobEnum `yaml:"jobs"`
|
||||
Global *Global `yaml:"global,optional,fromdefaults"`
|
||||
}
|
||||
|
||||
func (c *Config) Job(name string) (*JobEnum, error) {
|
||||
for _, j := range c.Jobs {
|
||||
if j.Name() == name {
|
||||
return &j, nil
|
||||
}
|
||||
}
|
||||
return nil, fmt.Errorf("job %q not defined in config", name)
|
||||
}
|
||||
|
||||
type JobEnum struct {
|
||||
Ret interface{}
|
||||
}
|
||||
|
||||
func (j JobEnum) Name() string {
|
||||
var name string
|
||||
switch v := j.Ret.(type) {
|
||||
case *PushJob: name = v.Name
|
||||
case *SinkJob: name = v.Name
|
||||
case *PullJob: name = v.Name
|
||||
case *SourceJob: name = v.Name
|
||||
default:
|
||||
panic(fmt.Sprintf("unknownn job type %T", v))
|
||||
}
|
||||
return name
|
||||
}
|
||||
|
||||
type ActiveJob struct {
|
||||
Type string `yaml:"type"`
|
||||
Name string `yaml:"name"`
|
||||
Connect ConnectEnum `yaml:"connect"`
|
||||
Pruning PruningSenderReceiver `yaml:"pruning"`
|
||||
Debug JobDebugSettings `yaml:"debug,optional"`
|
||||
}
|
||||
|
||||
type PassiveJob struct {
|
||||
Type string `yaml:"type"`
|
||||
Name string `yaml:"name"`
|
||||
Serve ServeEnum `yaml:"serve"`
|
||||
Debug JobDebugSettings `yaml:"debug,optional"`
|
||||
}
|
||||
|
||||
type PushJob struct {
|
||||
ActiveJob `yaml:",inline"`
|
||||
Snapshotting SnapshottingEnum `yaml:"snapshotting"`
|
||||
Filesystems FilesystemsFilter `yaml:"filesystems"`
|
||||
}
|
||||
|
||||
type PullJob struct {
|
||||
ActiveJob `yaml:",inline"`
|
||||
RootFS string `yaml:"root_fs"`
|
||||
Interval time.Duration `yaml:"interval,positive"`
|
||||
}
|
||||
|
||||
type SinkJob struct {
|
||||
PassiveJob `yaml:",inline"`
|
||||
RootFS string `yaml:"root_fs"`
|
||||
}
|
||||
|
||||
type SourceJob struct {
|
||||
PassiveJob `yaml:",inline"`
|
||||
Snapshotting SnapshottingEnum `yaml:"snapshotting"`
|
||||
Filesystems FilesystemsFilter `yaml:"filesystems"`
|
||||
}
|
||||
|
||||
type FilesystemsFilter map[string]bool
|
||||
|
||||
type SnapshottingEnum struct {
|
||||
Ret interface{}
|
||||
}
|
||||
|
||||
type SnapshottingPeriodic struct {
|
||||
Type string `yaml:"type"`
|
||||
Prefix string `yaml:"prefix"`
|
||||
Interval time.Duration `yaml:"interval,positive"`
|
||||
}
|
||||
|
||||
type SnapshottingManual struct {
|
||||
Type string `yaml:"type"`
|
||||
}
|
||||
|
||||
type PruningSenderReceiver struct {
|
||||
KeepSender []PruningEnum `yaml:"keep_sender"`
|
||||
KeepReceiver []PruningEnum `yaml:"keep_receiver"`
|
||||
}
|
||||
|
||||
type PruningLocal struct {
|
||||
Keep []PruningEnum `yaml:"keep"`
|
||||
}
|
||||
|
||||
type LoggingOutletEnumList []LoggingOutletEnum
|
||||
|
||||
func (l *LoggingOutletEnumList) SetDefault() {
|
||||
def := `
|
||||
type: "stdout"
|
||||
time: true
|
||||
level: "warn"
|
||||
format: "human"
|
||||
`
|
||||
s := StdoutLoggingOutlet{}
|
||||
err := yaml.UnmarshalStrict([]byte(def), &s)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
*l = []LoggingOutletEnum{LoggingOutletEnum{Ret: s}}
|
||||
}
|
||||
|
||||
var _ yaml.Defaulter = &LoggingOutletEnumList{}
|
||||
|
||||
type Global struct {
|
||||
Logging *LoggingOutletEnumList `yaml:"logging,optional,fromdefaults"`
|
||||
Monitoring []MonitoringEnum `yaml:"monitoring,optional"`
|
||||
Control *GlobalControl `yaml:"control,optional,fromdefaults"`
|
||||
Serve *GlobalServe `yaml:"serve,optional,fromdefaults"`
|
||||
RPC *RPCConfig `yaml:"rpc,optional,fromdefaults"`
|
||||
}
|
||||
|
||||
func Default(i interface{}) {
|
||||
v := reflect.ValueOf(i)
|
||||
if v.Kind() != reflect.Ptr {
|
||||
panic(v)
|
||||
}
|
||||
y := `{}`
|
||||
err := yaml.Unmarshal([]byte(y), v.Interface())
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
type RPCConfig struct {
|
||||
Timeout time.Duration `yaml:"timeout,optional,positive,default=10s"`
|
||||
TxChunkSize uint32 `yaml:"tx_chunk_size,optional,default=32768"`
|
||||
RxStructuredMaxLen uint32 `yaml:"rx_structured_max,optional,default=16777216"`
|
||||
RxStreamChunkMaxLen uint32 `yaml:"rx_stream_chunk_max,optional,default=16777216"`
|
||||
RxHeaderMaxLen uint32 `yaml:"rx_header_max,optional,default=40960"`
|
||||
SendHeartbeatInterval time.Duration `yaml:"send_heartbeat_interval,optional,positive,default=5s"`
|
||||
|
||||
}
|
||||
|
||||
type ConnectEnum struct {
|
||||
Ret interface{}
|
||||
}
|
||||
|
||||
type ConnectCommon struct {
|
||||
Type string `yaml:"type"`
|
||||
RPC *RPCConfig `yaml:"rpc,optional"`
|
||||
}
|
||||
|
||||
type TCPConnect struct {
|
||||
ConnectCommon `yaml:",inline"`
|
||||
Address string `yaml:"address"`
|
||||
DialTimeout time.Duration `yaml:"dial_timeout,positive,default=10s"`
|
||||
}
|
||||
|
||||
type TLSConnect struct {
|
||||
ConnectCommon `yaml:",inline"`
|
||||
Address string `yaml:"address"`
|
||||
Ca string `yaml:"ca"`
|
||||
Cert string `yaml:"cert"`
|
||||
Key string `yaml:"key"`
|
||||
ServerCN string `yaml:"server_cn"`
|
||||
DialTimeout time.Duration `yaml:"dial_timeout,positive,default=10s"`
|
||||
}
|
||||
|
||||
type SSHStdinserverConnect struct {
|
||||
ConnectCommon `yaml:",inline"`
|
||||
Host string `yaml:"host"`
|
||||
User string `yaml:"user"`
|
||||
Port uint16 `yaml:"port"`
|
||||
IdentityFile string `yaml:"identity_file"`
|
||||
TransportOpenCommand []string `yaml:"transport_open_command,optional"` //TODO unused
|
||||
SSHCommand string `yaml:"ssh_command,optional"` //TODO unused
|
||||
Options []string `yaml:"options,optional"`
|
||||
DialTimeout time.Duration `yaml:"dial_timeout,positive,default=10s"`
|
||||
}
|
||||
|
||||
type LocalConnect struct {
|
||||
ConnectCommon `yaml:",inline"`
|
||||
ListenerName string `yaml:"listener_name"`
|
||||
ClientIdentity string `yaml:"client_identity"`
|
||||
}
|
||||
|
||||
type ServeEnum struct {
|
||||
Ret interface{}
|
||||
}
|
||||
|
||||
type ServeCommon struct {
|
||||
Type string `yaml:"type"`
|
||||
RPC *RPCConfig `yaml:"rpc,optional"`
|
||||
}
|
||||
|
||||
type TCPServe struct {
|
||||
ServeCommon `yaml:",inline"`
|
||||
Listen string `yaml:"listen"`
|
||||
Clients map[string]string `yaml:"clients"`
|
||||
}
|
||||
|
||||
type TLSServe struct {
|
||||
ServeCommon `yaml:",inline"`
|
||||
Listen string `yaml:"listen"`
|
||||
Ca string `yaml:"ca"`
|
||||
Cert string `yaml:"cert"`
|
||||
Key string `yaml:"key"`
|
||||
ClientCNs []string `yaml:"client_cns"`
|
||||
HandshakeTimeout time.Duration `yaml:"handshake_timeout,positive,default=10s"`
|
||||
}
|
||||
|
||||
type StdinserverServer struct {
|
||||
ServeCommon `yaml:",inline"`
|
||||
ClientIdentities []string `yaml:"client_identities"`
|
||||
}
|
||||
|
||||
type LocalServe struct {
|
||||
ServeCommon `yaml:",inline"`
|
||||
ListenerName string `yaml:"listener_name"`
|
||||
}
|
||||
|
||||
type PruningEnum struct {
|
||||
Ret interface{}
|
||||
}
|
||||
|
||||
type PruneKeepNotReplicated struct {
|
||||
Type string `yaml:"type"`
|
||||
KeepSnapshotAtCursor bool `yaml:"keep_snapshot_at_cursor,optional,default=true"`
|
||||
}
|
||||
|
||||
type PruneKeepLastN struct {
|
||||
Type string `yaml:"type"`
|
||||
Count int `yaml:"count"`
|
||||
}
|
||||
|
||||
type PruneKeepRegex struct { // FIXME rename to KeepRegex
|
||||
Type string `yaml:"type"`
|
||||
Regex string `yaml:"regex"`
|
||||
}
|
||||
|
||||
type LoggingOutletEnum struct {
|
||||
Ret interface{}
|
||||
}
|
||||
|
||||
type LoggingOutletCommon struct {
|
||||
Type string `yaml:"type"`
|
||||
Level string `yaml:"level"`
|
||||
Format string `yaml:"format"`
|
||||
}
|
||||
|
||||
type StdoutLoggingOutlet struct {
|
||||
LoggingOutletCommon `yaml:",inline"`
|
||||
Time bool `yaml:"time,default=true"`
|
||||
Color bool `yaml:"color,default=true"`
|
||||
}
|
||||
|
||||
type SyslogLoggingOutlet struct {
|
||||
LoggingOutletCommon `yaml:",inline"`
|
||||
RetryInterval time.Duration `yaml:"retry_interval,positive,default=10s"`
|
||||
}
|
||||
|
||||
type TCPLoggingOutlet struct {
|
||||
LoggingOutletCommon `yaml:",inline"`
|
||||
Address string `yaml:"address"`
|
||||
Net string `yaml:"net,default=tcp"`
|
||||
RetryInterval time.Duration `yaml:"retry_interval,positive,default=10s"`
|
||||
TLS *TCPLoggingOutletTLS `yaml:"tls,optional"`
|
||||
}
|
||||
|
||||
type TCPLoggingOutletTLS struct {
|
||||
CA string `yaml:"ca"`
|
||||
Cert string `yaml:"cert"`
|
||||
Key string `yaml:"key"`
|
||||
}
|
||||
|
||||
type MonitoringEnum struct {
|
||||
Ret interface{}
|
||||
}
|
||||
|
||||
type PrometheusMonitoring struct {
|
||||
Type string `yaml:"type"`
|
||||
Listen string `yaml:"listen"`
|
||||
}
|
||||
|
||||
type GlobalControl struct {
|
||||
SockPath string `yaml:"sockpath,default=/var/run/zrepl/control"`
|
||||
}
|
||||
|
||||
type GlobalServe struct {
|
||||
StdinServer *GlobalStdinServer `yaml:"stdinserver,optional,fromdefaults"`
|
||||
}
|
||||
|
||||
type GlobalStdinServer struct {
|
||||
SockDir string `yaml:"sockdir,default=/var/run/zrepl/stdinserver"`
|
||||
}
|
||||
|
||||
type JobDebugSettings struct {
|
||||
Conn *struct {
|
||||
ReadDump string `yaml:"read_dump"`
|
||||
WriteDump string `yaml:"write_dump"`
|
||||
} `yaml:"conn,optional"`
|
||||
RPCLog bool `yaml:"rpc_log,optional,default=false"`
|
||||
}
|
||||
|
||||
func enumUnmarshal(u func(interface{}, bool) error, types map[string]interface{}) (interface{}, error) {
|
||||
var in struct {
|
||||
Type string
|
||||
}
|
||||
if err := u(&in, true); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if in.Type == "" {
|
||||
return nil, &yaml.TypeError{Errors: []string{"must specify type"}}
|
||||
}
|
||||
|
||||
v, ok := types[in.Type]
|
||||
if !ok {
|
||||
return nil, &yaml.TypeError{Errors: []string{fmt.Sprintf("invalid type name %q", in.Type)}}
|
||||
}
|
||||
if err := u(v, false); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return v, nil
|
||||
}
|
||||
|
||||
func (t *JobEnum) UnmarshalYAML(u func(interface{}, bool) error) (err error) {
|
||||
t.Ret, err = enumUnmarshal(u, map[string]interface{}{
|
||||
"push": &PushJob{},
|
||||
"sink": &SinkJob{},
|
||||
"pull": &PullJob{},
|
||||
"source": &SourceJob{},
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
func (t *ConnectEnum) UnmarshalYAML(u func(interface{}, bool) error) (err error) {
|
||||
t.Ret, err = enumUnmarshal(u, map[string]interface{}{
|
||||
"tcp": &TCPConnect{},
|
||||
"tls": &TLSConnect{},
|
||||
"ssh+stdinserver": &SSHStdinserverConnect{},
|
||||
"local": &LocalConnect{},
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
func (t *ServeEnum) UnmarshalYAML(u func(interface{}, bool) error) (err error) {
|
||||
t.Ret, err = enumUnmarshal(u, map[string]interface{}{
|
||||
"tcp": &TCPServe{},
|
||||
"tls": &TLSServe{},
|
||||
"stdinserver": &StdinserverServer{},
|
||||
"local" : &LocalServe{},
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
func (t *PruningEnum) UnmarshalYAML(u func(interface{}, bool) error) (err error) {
|
||||
t.Ret, err = enumUnmarshal(u, map[string]interface{}{
|
||||
"not_replicated": &PruneKeepNotReplicated{},
|
||||
"last_n": &PruneKeepLastN{},
|
||||
"grid": &PruneGrid{},
|
||||
"regex": &PruneKeepRegex{},
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
func (t *SnapshottingEnum) UnmarshalYAML(u func(interface{}, bool) error) (err error) {
|
||||
t.Ret, err = enumUnmarshal(u, map[string]interface{}{
|
||||
"periodic": &SnapshottingPeriodic{},
|
||||
"manual": &SnapshottingManual{},
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
func (t *LoggingOutletEnum) UnmarshalYAML(u func(interface{}, bool) error) (err error) {
|
||||
t.Ret, err = enumUnmarshal(u, map[string]interface{}{
|
||||
"stdout": &StdoutLoggingOutlet{},
|
||||
"syslog": &SyslogLoggingOutlet{},
|
||||
"tcp": &TCPLoggingOutlet{},
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
func (t *MonitoringEnum) UnmarshalYAML(u func(interface{}, bool) error) (err error) {
|
||||
t.Ret, err = enumUnmarshal(u, map[string]interface{}{
|
||||
"prometheus": &PrometheusMonitoring{},
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
var ConfigFileDefaultLocations = []string{
|
||||
"/etc/zrepl/zrepl.yml",
|
||||
"/usr/local/etc/zrepl/zrepl.yml",
|
||||
}
|
||||
|
||||
func ParseConfig(path string) (i *Config, err error) {
|
||||
|
||||
if path == "" {
|
||||
// Try default locations
|
||||
for _, l := range ConfigFileDefaultLocations {
|
||||
stat, statErr := os.Stat(l)
|
||||
if statErr != nil {
|
||||
continue
|
||||
}
|
||||
if !stat.Mode().IsRegular() {
|
||||
err = errors.Errorf("file at default location is not a regular file: %s", l)
|
||||
return
|
||||
}
|
||||
path = l
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
var bytes []byte
|
||||
|
||||
if bytes, err = ioutil.ReadFile(path); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
return ParseConfigBytes(bytes)
|
||||
}
|
||||
|
||||
func ParseConfigBytes(bytes []byte) (*Config, error) {
|
||||
var c *Config
|
||||
if err := yaml.UnmarshalStrict(bytes, &c); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if c == nil {
|
||||
return nil, fmt.Errorf("config is empty or only consists of comments")
|
||||
}
|
||||
return c, nil
|
||||
}
|
||||
|
||||
var durationStringRegex *regexp.Regexp = regexp.MustCompile(`^\s*(\d+)\s*(s|m|h|d|w)\s*$`)
|
||||
|
||||
func parsePostitiveDuration(e string) (d time.Duration, err error) {
|
||||
comps := durationStringRegex.FindStringSubmatch(e)
|
||||
if len(comps) != 3 {
|
||||
err = fmt.Errorf("does not match regex: %s %#v", e, comps)
|
||||
return
|
||||
}
|
||||
|
||||
durationFactor, err := strconv.ParseInt(comps[1], 10, 64)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
if durationFactor <= 0 {
|
||||
return 0, errors.New("duration must be positive integer")
|
||||
}
|
||||
|
||||
var durationUnit time.Duration
|
||||
switch comps[2] {
|
||||
case "s":
|
||||
durationUnit = time.Second
|
||||
case "m":
|
||||
durationUnit = time.Minute
|
||||
case "h":
|
||||
durationUnit = time.Hour
|
||||
case "d":
|
||||
durationUnit = 24 * time.Hour
|
||||
case "w":
|
||||
durationUnit = 24 * 7 * time.Hour
|
||||
default:
|
||||
err = fmt.Errorf("contains unknown time unit '%s'", comps[2])
|
||||
return
|
||||
}
|
||||
|
||||
d = time.Duration(durationFactor) * durationUnit
|
||||
return
|
||||
}
|
82
config/config_global_test.go
Normal file
82
config/config_global_test.go
Normal file
@ -0,0 +1,82 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
"github.com/zrepl/yaml-config"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func testValidGlobalSection(t *testing.T, s string) *Config {
|
||||
jobdef := `
|
||||
jobs:
|
||||
- name: dummyjob
|
||||
type: sink
|
||||
serve:
|
||||
type: tcp
|
||||
listen: ":2342"
|
||||
clients: {
|
||||
"10.0.0.1":"foo"
|
||||
}
|
||||
root_fs: zoot/foo
|
||||
`
|
||||
_, err := ParseConfigBytes([]byte(jobdef))
|
||||
require.NoError(t, err)
|
||||
return testValidConfig(t, s + jobdef)
|
||||
}
|
||||
|
||||
func TestOutletTypes(t *testing.T) {
|
||||
conf := testValidGlobalSection(t, `
|
||||
global:
|
||||
logging:
|
||||
- type: stdout
|
||||
level: debug
|
||||
format: human
|
||||
- type: syslog
|
||||
level: info
|
||||
retry_interval: 20s
|
||||
format: human
|
||||
- type: tcp
|
||||
level: debug
|
||||
format: json
|
||||
address: logserver.example.com:1234
|
||||
- type: tcp
|
||||
level: debug
|
||||
format: json
|
||||
address: encryptedlogserver.example.com:1234
|
||||
retry_interval: 20s
|
||||
tls:
|
||||
ca: /etc/zrepl/log/ca.crt
|
||||
cert: /etc/zrepl/log/key.pem
|
||||
key: /etc/zrepl/log/cert.pem
|
||||
`)
|
||||
assert.Equal(t, 4, len(*conf.Global.Logging))
|
||||
assert.NotNil(t, (*conf.Global.Logging)[3].Ret.(*TCPLoggingOutlet).TLS)
|
||||
}
|
||||
|
||||
func TestDefaultLoggingOutlet(t *testing.T) {
|
||||
conf := testValidGlobalSection(t, "")
|
||||
assert.Equal(t, 1, len(*conf.Global.Logging))
|
||||
o := (*conf.Global.Logging)[0].Ret.(StdoutLoggingOutlet)
|
||||
assert.Equal(t, "warn", o.Level)
|
||||
assert.Equal(t, "human", o.Format)
|
||||
}
|
||||
|
||||
func TestPrometheusMonitoring(t *testing.T) {
|
||||
conf := testValidGlobalSection(t, `
|
||||
global:
|
||||
monitoring:
|
||||
- type: prometheus
|
||||
listen: ':9091'
|
||||
`)
|
||||
assert.Equal(t, ":9091", conf.Global.Monitoring[0].Ret.(*PrometheusMonitoring).Listen)
|
||||
}
|
||||
|
||||
func TestLoggingOutletEnumList_SetDefaults(t *testing.T) {
|
||||
e := &LoggingOutletEnumList{}
|
||||
var i yaml.Defaulter = e
|
||||
require.NotPanics(t, func() {
|
||||
i.SetDefault()
|
||||
assert.Equal(t, "warn", (*e)[0].Ret.(StdoutLoggingOutlet).Level)
|
||||
})
|
||||
}
|
39
config/config_minimal_test.go
Normal file
39
config/config_minimal_test.go
Normal file
@ -0,0 +1,39 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestConfigEmptyFails(t *testing.T) {
|
||||
conf, err := testConfig(t, "\n")
|
||||
assert.Nil(t, conf)
|
||||
assert.Error(t, err)
|
||||
}
|
||||
|
||||
func TestJobsOnlyWorks(t *testing.T) {
|
||||
testValidConfig(t, `
|
||||
jobs:
|
||||
- name: push
|
||||
type: push
|
||||
# snapshot the filesystems matched by the left-hand-side of the mapping
|
||||
# every 10m with zrepl_ as prefix
|
||||
connect:
|
||||
type: tcp
|
||||
address: localhost:2342
|
||||
filesystems: {
|
||||
"pool1/var/db<": true,
|
||||
"pool1/usr/home<": true,
|
||||
"pool1/usr/home/paranoid": false, #don't backup paranoid user
|
||||
"pool1/poudriere/ports<": false #don't backup the ports trees
|
||||
}
|
||||
snapshotting:
|
||||
type: manual
|
||||
pruning:
|
||||
keep_sender:
|
||||
- type: not_replicated
|
||||
keep_receiver:
|
||||
- type: last_n
|
||||
count: 1
|
||||
`)
|
||||
}
|
86
config/config_rpc_test.go
Normal file
86
config/config_rpc_test.go
Normal file
@ -0,0 +1,86 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"github.com/stretchr/testify/assert"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestRPC(t *testing.T) {
|
||||
conf := testValidConfig(t, `
|
||||
jobs:
|
||||
- name: pull_servers
|
||||
type: pull
|
||||
connect:
|
||||
type: tcp
|
||||
address: "server1.foo.bar:8888"
|
||||
rpc:
|
||||
timeout: 20s # different form default, should merge
|
||||
root_fs: "pool2/backup_servers"
|
||||
interval: 10m
|
||||
pruning:
|
||||
keep_sender:
|
||||
- type: not_replicated
|
||||
keep_receiver:
|
||||
- type: last_n
|
||||
count: 100
|
||||
|
||||
- name: pull_servers2
|
||||
type: pull
|
||||
connect:
|
||||
type: tcp
|
||||
address: "server1.foo.bar:8888"
|
||||
rpc:
|
||||
tx_chunk_size: 0xabcd # different from default, should merge
|
||||
root_fs: "pool2/backup_servers"
|
||||
interval: 10m
|
||||
pruning:
|
||||
keep_sender:
|
||||
- type: not_replicated
|
||||
keep_receiver:
|
||||
- type: last_n
|
||||
count: 100
|
||||
|
||||
- type: sink
|
||||
name: "laptop_sink"
|
||||
root_fs: "pool2/backup_laptops"
|
||||
serve:
|
||||
type: tcp
|
||||
listen: "192.168.122.189:8888"
|
||||
clients: {
|
||||
"10.23.42.23":"client1"
|
||||
}
|
||||
rpc:
|
||||
rx_structured_max: 0x2342
|
||||
|
||||
- type: sink
|
||||
name: "other_sink"
|
||||
root_fs: "pool2/backup_laptops"
|
||||
serve:
|
||||
type: tcp
|
||||
listen: "192.168.122.189:8888"
|
||||
clients: {
|
||||
"10.23.42.23":"client1"
|
||||
}
|
||||
rpc:
|
||||
send_heartbeat_interval: 10s
|
||||
|
||||
`)
|
||||
|
||||
assert.Equal(t, 20*time.Second, conf.Jobs[0].Ret.(*PullJob).Connect.Ret.(*TCPConnect).RPC.Timeout)
|
||||
assert.Equal(t, uint32(0xabcd), conf.Jobs[1].Ret.(*PullJob).Connect.Ret.(*TCPConnect).RPC.TxChunkSize)
|
||||
assert.Equal(t, uint32(0x2342), conf.Jobs[2].Ret.(*SinkJob).Serve.Ret.(*TCPServe).RPC.RxStructuredMaxLen)
|
||||
assert.Equal(t, 10*time.Second, conf.Jobs[3].Ret.(*SinkJob).Serve.Ret.(*TCPServe).RPC.SendHeartbeatInterval)
|
||||
defConf := RPCConfig{}
|
||||
Default(&defConf)
|
||||
assert.Equal(t, defConf.Timeout, conf.Global.RPC.Timeout)
|
||||
}
|
||||
|
||||
func TestGlobal_DefaultRPCConfig(t *testing.T) {
|
||||
assert.NotPanics(t, func() {
|
||||
var c RPCConfig
|
||||
Default(&c)
|
||||
assert.NotNil(t, c)
|
||||
assert.Equal(t, c.TxChunkSize, uint32(1)<<15)
|
||||
})
|
||||
}
|
57
config/config_snapshotting_test.go
Normal file
57
config/config_snapshotting_test.go
Normal file
@ -0,0 +1,57 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestSnapshotting(t *testing.T) {
|
||||
tmpl := `
|
||||
jobs:
|
||||
- name: foo
|
||||
type: push
|
||||
connect:
|
||||
type: local
|
||||
listener_name: foo
|
||||
client_identity: bar
|
||||
filesystems: {"<": true}
|
||||
%s
|
||||
pruning:
|
||||
keep_sender:
|
||||
- type: last_n
|
||||
count: 10
|
||||
keep_receiver:
|
||||
- type: last_n
|
||||
count: 10
|
||||
`
|
||||
manual := `
|
||||
snapshotting:
|
||||
type: manual
|
||||
`
|
||||
periodic := `
|
||||
snapshotting:
|
||||
type: periodic
|
||||
prefix: zrepl_
|
||||
interval: 10m
|
||||
`
|
||||
|
||||
fillSnapshotting := func(s string) string {return fmt.Sprintf(tmpl, s)}
|
||||
var c *Config
|
||||
|
||||
t.Run("manual", func(t *testing.T) {
|
||||
c = testValidConfig(t, fillSnapshotting(manual))
|
||||
snm := c.Jobs[0].Ret.(*PushJob).Snapshotting.Ret.(*SnapshottingManual)
|
||||
assert.Equal(t, "manual", snm.Type)
|
||||
})
|
||||
|
||||
t.Run("periodic", func(t *testing.T) {
|
||||
c = testValidConfig(t, fillSnapshotting(periodic))
|
||||
snp := c.Jobs[0].Ret.(*PushJob).Snapshotting.Ret.(*SnapshottingPeriodic)
|
||||
assert.Equal(t, "periodic", snp.Type)
|
||||
assert.Equal(t, 10*time.Minute, snp.Interval)
|
||||
assert.Equal(t, "zrepl_" , snp.Prefix)
|
||||
})
|
||||
|
||||
}
|
50
config/config_test.go
Normal file
50
config/config_test.go
Normal file
@ -0,0 +1,50 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"github.com/kr/pretty"
|
||||
"github.com/stretchr/testify/require"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestSampleConfigsAreParsedWithoutErrors(t *testing.T) {
|
||||
paths, err := filepath.Glob("./samples/*")
|
||||
if err != nil {
|
||||
t.Errorf("glob failed: %+v", err)
|
||||
}
|
||||
|
||||
for _, p := range paths {
|
||||
|
||||
if path.Ext(p) != ".yml" {
|
||||
t.Logf("skipping file %s", p)
|
||||
continue
|
||||
}
|
||||
|
||||
t.Run(p, func(t *testing.T) {
|
||||
c, err := ParseConfig(p)
|
||||
if err != nil {
|
||||
t.Errorf("error parsing %s:\n%+v", p, err)
|
||||
}
|
||||
|
||||
t.Logf("file: %s", p)
|
||||
t.Log(pretty.Sprint(c))
|
||||
})
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
func testValidConfig(t *testing.T, input string) (*Config) {
|
||||
t.Helper()
|
||||
conf, err := testConfig(t, input)
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, conf)
|
||||
return conf
|
||||
}
|
||||
|
||||
func testConfig(t *testing.T, input string) (*Config, error) {
|
||||
t.Helper()
|
||||
return ParseConfigBytes([]byte(input))
|
||||
}
|
123
config/retentiongrid.go
Normal file
123
config/retentiongrid.go
Normal file
@ -0,0 +1,123 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
type RetentionIntervalList []RetentionInterval
|
||||
|
||||
type PruneGrid struct {
|
||||
Type string `yaml:"type"`
|
||||
Grid RetentionIntervalList `yaml:"grid"`
|
||||
Regex string `yaml:"regex"`
|
||||
}
|
||||
|
||||
type RetentionInterval struct {
|
||||
length time.Duration
|
||||
keepCount int
|
||||
}
|
||||
|
||||
func (i *RetentionInterval) Length() time.Duration {
|
||||
return i.length
|
||||
}
|
||||
|
||||
func (i *RetentionInterval) KeepCount() int {
|
||||
return i.keepCount
|
||||
}
|
||||
|
||||
const RetentionGridKeepCountAll int = -1
|
||||
|
||||
type RetentionGrid struct {
|
||||
intervals []RetentionInterval
|
||||
}
|
||||
|
||||
func (t *RetentionIntervalList) UnmarshalYAML(u func(interface{}, bool) error) (err error) {
|
||||
var in string
|
||||
if err := u(&in, true); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
intervals, err := parseRetentionGridIntervalsString(in)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
*t = intervals
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
var retentionStringIntervalRegex *regexp.Regexp = regexp.MustCompile(`^\s*(\d+)\s*x\s*([^\(]+)\s*(\((.*)\))?\s*$`)
|
||||
|
||||
func parseRetentionGridIntervalString(e string) (intervals []RetentionInterval, err error) {
|
||||
|
||||
comps := retentionStringIntervalRegex.FindStringSubmatch(e)
|
||||
if comps == nil {
|
||||
err = fmt.Errorf("retention string does not match expected format")
|
||||
return
|
||||
}
|
||||
|
||||
times, err := strconv.Atoi(comps[1])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
} else if times <= 0 {
|
||||
return nil, fmt.Errorf("contains factor <= 0")
|
||||
}
|
||||
|
||||
duration, err := parsePostitiveDuration(comps[2])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
keepCount := 1
|
||||
if comps[3] != "" {
|
||||
// Decompose key=value, comma separated
|
||||
// For now, only keep_count is supported
|
||||
re := regexp.MustCompile(`^\s*keep=(.+)\s*$`)
|
||||
res := re.FindStringSubmatch(comps[4])
|
||||
if res == nil || len(res) != 2 {
|
||||
err = fmt.Errorf("interval parameter contains unknown parameters")
|
||||
return
|
||||
}
|
||||
if res[1] == "all" {
|
||||
keepCount = RetentionGridKeepCountAll
|
||||
} else {
|
||||
keepCount, err = strconv.Atoi(res[1])
|
||||
if err != nil {
|
||||
err = fmt.Errorf("cannot parse keep_count value")
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
intervals = make([]RetentionInterval, times)
|
||||
for i := range intervals {
|
||||
intervals[i] = RetentionInterval{
|
||||
length: duration,
|
||||
keepCount: keepCount,
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
|
||||
}
|
||||
|
||||
func parseRetentionGridIntervalsString(s string) (intervals []RetentionInterval, err error) {
|
||||
|
||||
ges := strings.Split(s, "|")
|
||||
intervals = make([]RetentionInterval, 0, 7*len(ges))
|
||||
|
||||
for intervalIdx, e := range ges {
|
||||
parsed, err := parseRetentionGridIntervalString(e)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse interval %d of %d: %s: %s", intervalIdx+1, len(ges), err, strings.TrimSpace(e))
|
||||
}
|
||||
intervals = append(intervals, parsed...)
|
||||
}
|
||||
|
||||
return
|
||||
}
|
31
config/samples/local.yml
Normal file
31
config/samples/local.yml
Normal file
@ -0,0 +1,31 @@
|
||||
|
||||
jobs:
|
||||
- type: sink
|
||||
name: "local_sink"
|
||||
root_fs: "storage/zrepl/sink"
|
||||
serve:
|
||||
type: local
|
||||
listener_name: localsink
|
||||
|
||||
- type: push
|
||||
name: "backup_system"
|
||||
connect:
|
||||
type: local
|
||||
listener_name: localsink
|
||||
client_identity: local_backup
|
||||
filesystems: {
|
||||
"system<": true,
|
||||
}
|
||||
snapshotting:
|
||||
type: periodic
|
||||
interval: 10m
|
||||
prefix: zrepl_
|
||||
pruning:
|
||||
keep_sender:
|
||||
- type: not_replicated
|
||||
- type: last_n
|
||||
count: 10
|
||||
keep_receiver:
|
||||
- type: grid
|
||||
grid: 1x1h(keep=all) | 24x1h | 35x1d | 6x30d
|
||||
regex: "zrepl_.*"
|
24
config/samples/pull.yml
Normal file
24
config/samples/pull.yml
Normal file
@ -0,0 +1,24 @@
|
||||
jobs:
|
||||
- name: pull_servers
|
||||
type: pull
|
||||
connect:
|
||||
type: tls
|
||||
address: "server1.foo.bar:8888"
|
||||
ca: "/certs/ca.crt"
|
||||
cert: "/certs/cert.crt"
|
||||
key: "/certs/key.pem"
|
||||
server_cn: "server1"
|
||||
root_fs: "pool2/backup_servers"
|
||||
interval: 10m
|
||||
pruning:
|
||||
keep_sender:
|
||||
- type: not_replicated
|
||||
- type: last_n
|
||||
count: 10
|
||||
- type: grid
|
||||
grid: 1x1h(keep=all) | 24x1h | 14x1d
|
||||
regex: "zrepl_.*"
|
||||
keep_receiver:
|
||||
- type: grid
|
||||
grid: 1x1h(keep=all) | 24x1h | 35x1d | 6x30d
|
||||
regex: "zrepl_.*"
|
28
config/samples/pull_ssh.yml
Normal file
28
config/samples/pull_ssh.yml
Normal file
@ -0,0 +1,28 @@
|
||||
jobs:
|
||||
|
||||
- name: pull_servers
|
||||
type: pull
|
||||
connect:
|
||||
type: ssh+stdinserver
|
||||
host: app-srv.example.com
|
||||
user: root
|
||||
port: 22
|
||||
identity_file: /etc/zrepl/ssh/identity
|
||||
options: # optional, default [], `-o` arguments passed to ssh
|
||||
- "Compression=on"
|
||||
root_fs: "pool2/backup_servers"
|
||||
interval: 10m
|
||||
pruning:
|
||||
keep_sender:
|
||||
- type: not_replicated
|
||||
- type: last_n
|
||||
count: 10
|
||||
- type: grid
|
||||
grid: 1x1h(keep=all) | 24x1h | 14x1d
|
||||
regex: "^zrepl_.*"
|
||||
keep_receiver:
|
||||
- type: regex
|
||||
regex: keep_
|
||||
- type: grid
|
||||
grid: 1x1h(keep=all) | 24x1h | 35x1d | 6x30d
|
||||
regex: "^zrepl_.*"
|
24
config/samples/push.yml
Normal file
24
config/samples/push.yml
Normal file
@ -0,0 +1,24 @@
|
||||
jobs:
|
||||
- type: push
|
||||
name: "push"
|
||||
filesystems: {
|
||||
"<": true,
|
||||
"tmp": false
|
||||
}
|
||||
connect:
|
||||
type: tcp
|
||||
address: "backup-server.foo.bar:8888"
|
||||
snapshotting:
|
||||
type: manual
|
||||
pruning:
|
||||
keep_sender:
|
||||
- type: not_replicated
|
||||
- type: last_n
|
||||
count: 10
|
||||
- type: grid
|
||||
grid: 1x1h(keep=all) | 24x1h | 14x1d
|
||||
regex: "^zrepl_.*"
|
||||
keep_receiver:
|
||||
- type: grid
|
||||
grid: 1x1h(keep=all) | 24x1h | 35x1d | 6x30d
|
||||
regex: "^zrepl_.*"
|
13
config/samples/sink.yml
Normal file
13
config/samples/sink.yml
Normal file
@ -0,0 +1,13 @@
|
||||
jobs:
|
||||
- type: sink
|
||||
name: "laptop_sink"
|
||||
root_fs: "pool2/backup_laptops"
|
||||
serve:
|
||||
type: tls
|
||||
listen: "192.168.122.189:8888"
|
||||
ca: "ca.pem"
|
||||
cert: "cert.pem"
|
||||
key: "key.pem"
|
||||
client_cns:
|
||||
- "laptop1"
|
||||
- "homeserver"
|
17
config/samples/source.yml
Normal file
17
config/samples/source.yml
Normal file
@ -0,0 +1,17 @@
|
||||
jobs:
|
||||
- name: pull_source
|
||||
type: source
|
||||
serve:
|
||||
type: tcp
|
||||
listen: "0.0.0.0:8888"
|
||||
clients: {
|
||||
"192.168.122.123" : "client1"
|
||||
}
|
||||
filesystems: {
|
||||
"<": true,
|
||||
"secret": false
|
||||
}
|
||||
snapshotting:
|
||||
type: periodic
|
||||
interval: 10m
|
||||
prefix: zrepl_
|
17
config/samples/source_ssh.yml
Normal file
17
config/samples/source_ssh.yml
Normal file
@ -0,0 +1,17 @@
|
||||
jobs:
|
||||
- name: pull_source
|
||||
type: source
|
||||
serve:
|
||||
type: stdinserver
|
||||
client_identities:
|
||||
- "client1"
|
||||
- "client2"
|
||||
filesystems: {
|
||||
"<": true,
|
||||
"secret": false
|
||||
}
|
||||
snapshotting:
|
||||
type: periodic
|
||||
interval: 10m
|
||||
prefix: zrepl_
|
||||
|
240
daemon/control.go
Normal file
240
daemon/control.go
Normal file
@ -0,0 +1,240 @@
|
||||
package daemon
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/zrepl/zrepl/daemon/job"
|
||||
"github.com/zrepl/zrepl/daemon/nethelpers"
|
||||
"github.com/zrepl/zrepl/logger"
|
||||
"github.com/zrepl/zrepl/version"
|
||||
"io"
|
||||
"net"
|
||||
"net/http"
|
||||
"time"
|
||||
)
|
||||
|
||||
type controlJob struct {
|
||||
sockaddr *net.UnixAddr
|
||||
jobs *jobs
|
||||
}
|
||||
|
||||
func newControlJob(sockpath string, jobs *jobs) (j *controlJob, err error) {
|
||||
j = &controlJob{jobs: jobs}
|
||||
|
||||
j.sockaddr, err = net.ResolveUnixAddr("unix", sockpath)
|
||||
if err != nil {
|
||||
err = errors.Wrap(err, "cannot resolve unix address")
|
||||
return
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (j *controlJob) Name() string { return jobNameControl }
|
||||
|
||||
func (j *controlJob) Status() *job.Status { return &job.Status{Type: job.TypeInternal} }
|
||||
|
||||
var promControl struct {
|
||||
requestBegin *prometheus.CounterVec
|
||||
requestFinished *prometheus.HistogramVec
|
||||
}
|
||||
|
||||
func (j *controlJob) RegisterMetrics(registerer prometheus.Registerer) {
|
||||
promControl.requestBegin = prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Namespace: "zrepl",
|
||||
Subsystem: "control",
|
||||
Name: "request_begin",
|
||||
Help: "number of request we started to handle",
|
||||
}, []string{"endpoint"})
|
||||
|
||||
promControl.requestFinished = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
||||
Namespace: "zrepl",
|
||||
Subsystem: "control",
|
||||
Name: "request_finished",
|
||||
Help: "time it took a request to finih",
|
||||
Buckets: []float64{1e-6, 10e-6, 100e-6, 500e-6, 1e-3,10e-3, 100e-3, 200e-3,400e-3,800e-3, 1, 10, 20},
|
||||
}, []string{"endpoint"})
|
||||
registerer.MustRegister(promControl.requestBegin)
|
||||
registerer.MustRegister(promControl.requestFinished)
|
||||
}
|
||||
|
||||
const (
|
||||
ControlJobEndpointPProf string = "/debug/pprof"
|
||||
ControlJobEndpointVersion string = "/version"
|
||||
ControlJobEndpointStatus string = "/status"
|
||||
ControlJobEndpointSignal string = "/signal"
|
||||
)
|
||||
|
||||
func (j *controlJob) Run(ctx context.Context) {
|
||||
|
||||
log := job.GetLogger(ctx)
|
||||
defer log.Info("control job finished")
|
||||
|
||||
l, err := nethelpers.ListenUnixPrivate(j.sockaddr)
|
||||
if err != nil {
|
||||
log.WithError(err).Error("error listening")
|
||||
return
|
||||
}
|
||||
|
||||
pprofServer := NewPProfServer(ctx)
|
||||
|
||||
mux := http.NewServeMux()
|
||||
mux.Handle(ControlJobEndpointPProf,
|
||||
requestLogger{log: log, handler: jsonRequestResponder{func(decoder jsonDecoder) (interface{}, error) {
|
||||
var msg PprofServerControlMsg
|
||||
err := decoder(&msg)
|
||||
if err != nil {
|
||||
return nil, errors.Errorf("decode failed")
|
||||
}
|
||||
pprofServer.Control(msg)
|
||||
return struct{}{}, nil
|
||||
}}})
|
||||
|
||||
mux.Handle(ControlJobEndpointVersion,
|
||||
requestLogger{log: log, handler: jsonResponder{func() (interface{}, error) {
|
||||
return version.NewZreplVersionInformation(), nil
|
||||
}}})
|
||||
|
||||
mux.Handle(ControlJobEndpointStatus,
|
||||
requestLogger{log: log, handler: jsonResponder{func() (interface{}, error) {
|
||||
s := j.jobs.status()
|
||||
return s, nil
|
||||
}}})
|
||||
|
||||
mux.Handle(ControlJobEndpointSignal,
|
||||
requestLogger{log: log, handler: jsonRequestResponder{func(decoder jsonDecoder) (interface{}, error) {
|
||||
type reqT struct {
|
||||
Name string
|
||||
Op string
|
||||
}
|
||||
var req reqT
|
||||
if decoder(&req) != nil {
|
||||
return nil, errors.Errorf("decode failed")
|
||||
}
|
||||
|
||||
var err error
|
||||
switch req.Op {
|
||||
case "wakeup":
|
||||
err = j.jobs.wakeup(req.Name)
|
||||
case "reset":
|
||||
err = j.jobs.reset(req.Name)
|
||||
default:
|
||||
err = fmt.Errorf("operation %q is invalid", req.Op)
|
||||
}
|
||||
|
||||
return struct{}{}, err
|
||||
}}})
|
||||
server := http.Server{
|
||||
Handler: mux,
|
||||
// control socket is local, 1s timeout should be more than sufficient, even on a loaded system
|
||||
WriteTimeout: 1*time.Second,
|
||||
ReadTimeout: 1*time.Second,
|
||||
}
|
||||
|
||||
outer:
|
||||
for {
|
||||
|
||||
served := make(chan error)
|
||||
go func() {
|
||||
served <- server.Serve(l)
|
||||
close(served)
|
||||
}()
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
log.WithError(ctx.Err()).Info("context done")
|
||||
server.Shutdown(context.Background())
|
||||
break outer
|
||||
case err = <-served:
|
||||
if err != nil {
|
||||
log.WithError(err).Error("error serving")
|
||||
break outer
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
type jsonResponder struct {
|
||||
producer func() (interface{}, error)
|
||||
}
|
||||
|
||||
func (j jsonResponder) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
||||
res, err := j.producer()
|
||||
if err != nil {
|
||||
w.WriteHeader(http.StatusInternalServerError)
|
||||
io.WriteString(w, err.Error())
|
||||
return
|
||||
}
|
||||
var buf bytes.Buffer
|
||||
err = json.NewEncoder(&buf).Encode(res)
|
||||
if err != nil {
|
||||
w.WriteHeader(http.StatusInternalServerError)
|
||||
io.WriteString(w, err.Error())
|
||||
} else {
|
||||
io.Copy(w, &buf)
|
||||
}
|
||||
}
|
||||
|
||||
type jsonDecoder = func(interface{}) error
|
||||
|
||||
type jsonRequestResponder struct {
|
||||
producer func(decoder jsonDecoder) (interface{}, error)
|
||||
}
|
||||
|
||||
func (j jsonRequestResponder) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
||||
var decodeError error
|
||||
decoder := func(i interface{}) error {
|
||||
err := json.NewDecoder(r.Body).Decode(&i)
|
||||
decodeError = err
|
||||
return err
|
||||
}
|
||||
res, producerErr := j.producer(decoder)
|
||||
|
||||
//If we had a decode error ignore output of producer and return error
|
||||
if decodeError != nil {
|
||||
w.WriteHeader(http.StatusBadRequest)
|
||||
io.WriteString(w, decodeError.Error())
|
||||
return
|
||||
}
|
||||
if producerErr != nil {
|
||||
w.WriteHeader(http.StatusInternalServerError)
|
||||
io.WriteString(w, producerErr.Error())
|
||||
return
|
||||
}
|
||||
|
||||
var buf bytes.Buffer
|
||||
encodeErr := json.NewEncoder(&buf).Encode(res)
|
||||
if encodeErr != nil {
|
||||
w.WriteHeader(http.StatusInternalServerError)
|
||||
io.WriteString(w, encodeErr.Error())
|
||||
} else {
|
||||
io.Copy(w, &buf)
|
||||
}
|
||||
}
|
||||
|
||||
type requestLogger struct {
|
||||
log logger.Logger
|
||||
handler http.Handler
|
||||
handlerFunc http.HandlerFunc
|
||||
}
|
||||
|
||||
func (l requestLogger) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
||||
log := l.log.WithField("method", r.Method).WithField("url", r.URL)
|
||||
log.Debug("start")
|
||||
promControl.requestBegin.WithLabelValues(r.URL.Path).Inc()
|
||||
defer prometheus.NewTimer(promControl.requestFinished.WithLabelValues(r.URL.Path)).ObserveDuration()
|
||||
if l.handlerFunc != nil {
|
||||
l.handlerFunc(w, r)
|
||||
} else if l.handler != nil {
|
||||
l.handler.ServeHTTP(w, r)
|
||||
} else {
|
||||
log.Error("no handler or handlerFunc configured")
|
||||
}
|
||||
log.Debug("finish")
|
||||
}
|
223
daemon/daemon.go
Normal file
223
daemon/daemon.go
Normal file
@ -0,0 +1,223 @@
|
||||
package daemon
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/zrepl/zrepl/config"
|
||||
"github.com/zrepl/zrepl/daemon/job"
|
||||
"github.com/zrepl/zrepl/daemon/job/reset"
|
||||
"github.com/zrepl/zrepl/daemon/job/wakeup"
|
||||
"github.com/zrepl/zrepl/daemon/logging"
|
||||
"github.com/zrepl/zrepl/logger"
|
||||
"github.com/zrepl/zrepl/version"
|
||||
"os"
|
||||
"os/signal"
|
||||
"strings"
|
||||
"sync"
|
||||
"syscall"
|
||||
"time"
|
||||
)
|
||||
|
||||
func Run(conf *config.Config) error {
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
|
||||
defer cancel()
|
||||
sigChan := make(chan os.Signal, 1)
|
||||
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
|
||||
go func() {
|
||||
<-sigChan
|
||||
cancel()
|
||||
}()
|
||||
|
||||
outlets, err := logging.OutletsFromConfig(*conf.Global.Logging)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "cannot build logging from config")
|
||||
}
|
||||
|
||||
confJobs, err := job.JobsFromConfig(conf)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "cannot build jobs from config")
|
||||
}
|
||||
|
||||
log := logger.NewLogger(outlets, 1*time.Second)
|
||||
log.Info(version.NewZreplVersionInformation().String())
|
||||
|
||||
for _, job := range confJobs {
|
||||
if IsInternalJobName(job.Name()) {
|
||||
panic(fmt.Sprintf("internal job name used for config job '%s'", job.Name())) //FIXME
|
||||
}
|
||||
}
|
||||
|
||||
ctx = job.WithLogger(ctx, log)
|
||||
|
||||
jobs := newJobs()
|
||||
|
||||
// start control socket
|
||||
controlJob, err := newControlJob(conf.Global.Control.SockPath, jobs)
|
||||
if err != nil {
|
||||
panic(err) // FIXME
|
||||
}
|
||||
jobs.start(ctx, controlJob, true)
|
||||
|
||||
for i, jc := range conf.Global.Monitoring {
|
||||
var (
|
||||
job job.Job
|
||||
err error
|
||||
)
|
||||
switch v := jc.Ret.(type) {
|
||||
case *config.PrometheusMonitoring:
|
||||
job, err = newPrometheusJobFromConfig(v)
|
||||
default:
|
||||
return errors.Errorf("unknown monitoring job #%d (type %T)", i, v)
|
||||
}
|
||||
if err != nil {
|
||||
return errors.Wrapf(err,"cannot build monitorin gjob #%d", i)
|
||||
}
|
||||
jobs.start(ctx, job, true)
|
||||
}
|
||||
|
||||
|
||||
log.Info("starting daemon")
|
||||
|
||||
// start regular jobs
|
||||
for _, j := range confJobs {
|
||||
jobs.start(ctx, j, false)
|
||||
}
|
||||
|
||||
select {
|
||||
case <-jobs.wait():
|
||||
log.Info("all jobs finished")
|
||||
case <-ctx.Done():
|
||||
log.WithError(ctx.Err()).Info("context finished")
|
||||
}
|
||||
log.Info("daemon exiting")
|
||||
return nil
|
||||
}
|
||||
|
||||
type jobs struct {
|
||||
wg sync.WaitGroup
|
||||
|
||||
// m protects all fields below it
|
||||
m sync.RWMutex
|
||||
wakeups map[string]wakeup.Func // by Job.Name
|
||||
resets map[string]reset.Func // by Job.Name
|
||||
jobs map[string]job.Job
|
||||
}
|
||||
|
||||
func newJobs() *jobs {
|
||||
return &jobs{
|
||||
wakeups: make(map[string]wakeup.Func),
|
||||
resets: make(map[string]reset.Func),
|
||||
jobs: make(map[string]job.Job),
|
||||
}
|
||||
}
|
||||
|
||||
const (
|
||||
logJobField string = "job"
|
||||
logTaskField string = "task"
|
||||
logSubsysField string = "subsystem"
|
||||
)
|
||||
|
||||
func (s *jobs) wait() <-chan struct{} {
|
||||
ch := make(chan struct{})
|
||||
go func() {
|
||||
s.wg.Wait()
|
||||
}()
|
||||
return ch
|
||||
}
|
||||
|
||||
func (s *jobs) status() map[string]*job.Status {
|
||||
s.m.RLock()
|
||||
defer s.m.RUnlock()
|
||||
|
||||
type res struct {
|
||||
name string
|
||||
status *job.Status
|
||||
}
|
||||
var wg sync.WaitGroup
|
||||
c := make(chan res, len(s.jobs))
|
||||
for name, j := range s.jobs {
|
||||
wg.Add(1)
|
||||
go func(name string, j job.Job) {
|
||||
defer wg.Done()
|
||||
c <- res{name: name, status: j.Status()}
|
||||
}(name, j)
|
||||
}
|
||||
wg.Wait()
|
||||
close(c)
|
||||
ret := make(map[string]*job.Status, len(s.jobs))
|
||||
for res := range c {
|
||||
ret[res.name] = res.status
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
func (s *jobs) wakeup(job string) error {
|
||||
s.m.RLock()
|
||||
defer s.m.RUnlock()
|
||||
|
||||
wu, ok := s.wakeups[job]
|
||||
if !ok {
|
||||
return errors.Errorf("Job %s does not exist", job)
|
||||
}
|
||||
return wu()
|
||||
}
|
||||
|
||||
func (s *jobs) reset(job string) error {
|
||||
s.m.RLock()
|
||||
defer s.m.RUnlock()
|
||||
|
||||
wu, ok := s.resets[job]
|
||||
if !ok {
|
||||
return errors.Errorf("Job %s does not exist", job)
|
||||
}
|
||||
return wu()
|
||||
}
|
||||
|
||||
const (
|
||||
jobNamePrometheus = "_prometheus"
|
||||
jobNameControl = "_control"
|
||||
)
|
||||
|
||||
func IsInternalJobName(s string) bool {
|
||||
return strings.HasPrefix(s, "_")
|
||||
}
|
||||
|
||||
func (s *jobs) start(ctx context.Context, j job.Job, internal bool) {
|
||||
s.m.Lock()
|
||||
defer s.m.Unlock()
|
||||
|
||||
jobLog := job.GetLogger(ctx).
|
||||
WithField(logJobField, j.Name()).
|
||||
WithOutlet(newPrometheusLogOutlet(j.Name()), logger.Debug)
|
||||
jobName := j.Name()
|
||||
if !internal && IsInternalJobName(jobName) {
|
||||
panic(fmt.Sprintf("internal job name used for non-internal job %s", jobName))
|
||||
}
|
||||
if internal && !IsInternalJobName(jobName) {
|
||||
panic(fmt.Sprintf("internal job does not use internal job name %s", jobName))
|
||||
}
|
||||
if _, ok := s.jobs[jobName]; ok {
|
||||
panic(fmt.Sprintf("duplicate job name %s", jobName))
|
||||
}
|
||||
|
||||
j.RegisterMetrics(prometheus.DefaultRegisterer)
|
||||
|
||||
s.jobs[jobName] = j
|
||||
ctx = job.WithLogger(ctx, jobLog)
|
||||
ctx, wakeup := wakeup.Context(ctx)
|
||||
ctx, resetFunc := reset.Context(ctx)
|
||||
s.wakeups[jobName] = wakeup
|
||||
s.resets[jobName] = resetFunc
|
||||
|
||||
s.wg.Add(1)
|
||||
go func() {
|
||||
defer s.wg.Done()
|
||||
jobLog.Info("starting job")
|
||||
defer jobLog.Info("job exited")
|
||||
j.Run(ctx)
|
||||
}()
|
||||
}
|
@ -1,12 +1,11 @@
|
||||
package cmd
|
||||
package filters
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/mitchellh/mapstructure"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/zrepl/zrepl/endpoint"
|
||||
"github.com/zrepl/zrepl/zfs"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type DatasetMapFilter struct {
|
||||
@ -101,6 +100,7 @@ func (m DatasetMapFilter) mostSpecificPrefixMapping(path *zfs.DatasetPath) (idx
|
||||
return
|
||||
}
|
||||
|
||||
// Returns target == nil if there is no mapping
|
||||
func (m DatasetMapFilter) Map(source *zfs.DatasetPath) (target *zfs.DatasetPath, err error) {
|
||||
|
||||
if m.filterMode {
|
||||
@ -114,10 +114,18 @@ func (m DatasetMapFilter) Map(source *zfs.DatasetPath) (target *zfs.DatasetPath,
|
||||
}
|
||||
me := m.entries[mi]
|
||||
|
||||
if me.mapping == "" {
|
||||
// Special case treatment: 'foo/bar<' => ''
|
||||
if !me.subtreeMatch {
|
||||
return nil, fmt.Errorf("mapping to '' must be a subtree match")
|
||||
}
|
||||
// ok...
|
||||
} else {
|
||||
if strings.HasPrefix("!", me.mapping) {
|
||||
// reject mapping
|
||||
return nil, nil
|
||||
}
|
||||
}
|
||||
|
||||
target, err = zfs.NewDatasetPath(me.mapping)
|
||||
if err != nil {
|
||||
@ -177,12 +185,43 @@ func (m DatasetMapFilter) InvertedFilter() (inv *DatasetMapFilter, err error) {
|
||||
return inv, nil
|
||||
}
|
||||
|
||||
// FIXME investigate whether we can support more...
|
||||
func (m DatasetMapFilter) Invert() (endpoint.FSMap, error) {
|
||||
|
||||
if m.filterMode {
|
||||
return nil, errors.Errorf("can only invert mappings")
|
||||
}
|
||||
|
||||
if len(m.entries) != 1 {
|
||||
return nil, errors.Errorf("inversion of complicated mappings is not implemented") // FIXME
|
||||
}
|
||||
|
||||
e := m.entries[0]
|
||||
|
||||
inv := &DatasetMapFilter{
|
||||
make([]datasetMapFilterEntry, len(m.entries)),
|
||||
false,
|
||||
}
|
||||
mp, err := zfs.NewDatasetPath(e.mapping)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
inv.entries[0] = datasetMapFilterEntry{
|
||||
path: mp,
|
||||
mapping: e.path.ToString(),
|
||||
subtreeMatch: e.subtreeMatch,
|
||||
}
|
||||
|
||||
return inv, nil
|
||||
}
|
||||
|
||||
// Creates a new DatasetMapFilter in filter mode from a mapping
|
||||
// All accepting mapping results are mapped to accepting filter results
|
||||
// All rejecting mapping results are mapped to rejecting filter results
|
||||
func (m DatasetMapFilter) AsFilter() (f *DatasetMapFilter) {
|
||||
func (m DatasetMapFilter) AsFilter() endpoint.FSFilter {
|
||||
|
||||
f = &DatasetMapFilter{
|
||||
f := &DatasetMapFilter{
|
||||
make([]datasetMapFilterEntry, len(m.entries)),
|
||||
true,
|
||||
}
|
||||
@ -217,16 +256,14 @@ func (m DatasetMapFilter) parseDatasetFilterResult(result string) (pass bool, er
|
||||
return false, fmt.Errorf("'%s' is not a valid filter result", result)
|
||||
}
|
||||
|
||||
func parseDatasetMapFilter(mi interface{}, filterMode bool) (f *DatasetMapFilter, err error) {
|
||||
func DatasetMapFilterFromConfig(in map[string]bool) (f *DatasetMapFilter, err error) {
|
||||
|
||||
var m map[string]string
|
||||
if err = mapstructure.Decode(mi, &m); err != nil {
|
||||
err = fmt.Errorf("maps / filters must be specified as map[string]string: %s", err)
|
||||
return
|
||||
f = NewDatasetMapFilter(len(in), true)
|
||||
for pathPattern, accept := range in {
|
||||
mapping := MapFilterResultOmit
|
||||
if accept {
|
||||
mapping = MapFilterResultOk
|
||||
}
|
||||
|
||||
f = NewDatasetMapFilter(len(m), filterMode)
|
||||
for pathPattern, mapping := range m {
|
||||
if err = f.Add(pathPattern, mapping); err != nil {
|
||||
err = fmt.Errorf("invalid mapping entry ['%s':'%s']: %s", pathPattern, mapping, err)
|
||||
return
|
41
daemon/filters/fsvfilter.go
Normal file
41
daemon/filters/fsvfilter.go
Normal file
@ -0,0 +1,41 @@
|
||||
package filters
|
||||
|
||||
import (
|
||||
"github.com/zrepl/zrepl/zfs"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type AnyFSVFilter struct{}
|
||||
|
||||
func NewAnyFSVFilter() AnyFSVFilter {
|
||||
return AnyFSVFilter{}
|
||||
}
|
||||
|
||||
var _ zfs.FilesystemVersionFilter = AnyFSVFilter{}
|
||||
|
||||
func (AnyFSVFilter) Filter(t zfs.VersionType, name string) (accept bool, err error) {
|
||||
return true, nil
|
||||
}
|
||||
|
||||
|
||||
type PrefixFilter struct {
|
||||
prefix string
|
||||
fstype zfs.VersionType
|
||||
fstypeSet bool // optionals anyone?
|
||||
}
|
||||
|
||||
var _ zfs.FilesystemVersionFilter = &PrefixFilter{}
|
||||
|
||||
func NewPrefixFilter(prefix string) *PrefixFilter {
|
||||
return &PrefixFilter{prefix: prefix}
|
||||
}
|
||||
|
||||
func NewTypedPrefixFilter(prefix string, versionType zfs.VersionType) *PrefixFilter {
|
||||
return &PrefixFilter{prefix, versionType, true}
|
||||
}
|
||||
|
||||
func (f *PrefixFilter) Filter(t zfs.VersionType, name string) (accept bool, err error) {
|
||||
fstypeMatches := (!f.fstypeSet || t == f.fstype)
|
||||
prefixMatches := strings.HasPrefix(name, f.prefix)
|
||||
return fstypeMatches && prefixMatches, nil
|
||||
}
|
405
daemon/job/active.go
Normal file
405
daemon/job/active.go
Normal file
@ -0,0 +1,405 @@
|
||||
package job
|
||||
|
||||
import (
|
||||
"context"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/problame/go-streamrpc"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/zrepl/zrepl/config"
|
||||
"github.com/zrepl/zrepl/daemon/job/reset"
|
||||
"github.com/zrepl/zrepl/daemon/job/wakeup"
|
||||
"github.com/zrepl/zrepl/daemon/transport/connecter"
|
||||
"github.com/zrepl/zrepl/daemon/filters"
|
||||
"github.com/zrepl/zrepl/daemon/pruner"
|
||||
"github.com/zrepl/zrepl/endpoint"
|
||||
"github.com/zrepl/zrepl/replication"
|
||||
"github.com/zrepl/zrepl/zfs"
|
||||
"sync"
|
||||
"github.com/zrepl/zrepl/daemon/logging"
|
||||
"github.com/zrepl/zrepl/daemon/snapper"
|
||||
"time"
|
||||
)
|
||||
|
||||
type ActiveSide struct {
|
||||
mode activeMode
|
||||
name string
|
||||
clientFactory *connecter.ClientFactory
|
||||
|
||||
prunerFactory *pruner.PrunerFactory
|
||||
|
||||
|
||||
promRepStateSecs *prometheus.HistogramVec // labels: state
|
||||
promPruneSecs *prometheus.HistogramVec // labels: prune_side
|
||||
promBytesReplicated *prometheus.CounterVec // labels: filesystem
|
||||
|
||||
tasksMtx sync.Mutex
|
||||
tasks activeSideTasks
|
||||
}
|
||||
|
||||
type activeSideTasks struct {
|
||||
replication *replication.Replication
|
||||
prunerSender, prunerReceiver *pruner.Pruner
|
||||
}
|
||||
|
||||
func (a *ActiveSide) updateTasks(u func(*activeSideTasks)) activeSideTasks {
|
||||
a.tasksMtx.Lock()
|
||||
defer a.tasksMtx.Unlock()
|
||||
var copy activeSideTasks
|
||||
copy = a.tasks
|
||||
if u == nil {
|
||||
return copy
|
||||
}
|
||||
u(©)
|
||||
a.tasks = copy
|
||||
return copy
|
||||
}
|
||||
|
||||
type activeMode interface {
|
||||
SenderReceiver(client *streamrpc.Client) (replication.Sender, replication.Receiver, error)
|
||||
Type() Type
|
||||
RunPeriodic(ctx context.Context, wakeUpCommon chan<- struct{})
|
||||
}
|
||||
|
||||
type modePush struct {
|
||||
fsfilter endpoint.FSFilter
|
||||
snapper *snapper.PeriodicOrManual
|
||||
}
|
||||
|
||||
func (m *modePush) SenderReceiver(client *streamrpc.Client) (replication.Sender, replication.Receiver, error) {
|
||||
sender := endpoint.NewSender(m.fsfilter)
|
||||
receiver := endpoint.NewRemote(client)
|
||||
return sender, receiver, nil
|
||||
}
|
||||
|
||||
func (m *modePush) Type() Type { return TypePush }
|
||||
|
||||
func (m *modePush) RunPeriodic(ctx context.Context, wakeUpCommon chan <- struct{}) {
|
||||
m.snapper.Run(ctx, wakeUpCommon)
|
||||
}
|
||||
|
||||
|
||||
func modePushFromConfig(g *config.Global, in *config.PushJob) (*modePush, error) {
|
||||
m := &modePush{}
|
||||
fsf, err := filters.DatasetMapFilterFromConfig(in.Filesystems)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "cannnot build filesystem filter")
|
||||
}
|
||||
m.fsfilter = fsf
|
||||
|
||||
if m.snapper, err = snapper.FromConfig(g, fsf, in.Snapshotting); err != nil {
|
||||
return nil, errors.Wrap(err, "cannot build snapper")
|
||||
}
|
||||
|
||||
return m, nil
|
||||
}
|
||||
|
||||
type modePull struct {
|
||||
rootFS *zfs.DatasetPath
|
||||
interval time.Duration
|
||||
}
|
||||
|
||||
func (m *modePull) SenderReceiver(client *streamrpc.Client) (replication.Sender, replication.Receiver, error) {
|
||||
sender := endpoint.NewRemote(client)
|
||||
receiver, err := endpoint.NewReceiver(m.rootFS)
|
||||
return sender, receiver, err
|
||||
}
|
||||
|
||||
func (*modePull) Type() Type { return TypePull }
|
||||
|
||||
func (m *modePull) RunPeriodic(ctx context.Context, wakeUpCommon chan<- struct{}) {
|
||||
t := time.NewTicker(m.interval)
|
||||
defer t.Stop()
|
||||
for {
|
||||
select {
|
||||
case <-t.C:
|
||||
select {
|
||||
case wakeUpCommon <- struct{}{}:
|
||||
default:
|
||||
GetLogger(ctx).
|
||||
WithField("pull_interval", m.interval).
|
||||
Warn("pull job took longer than pull interval")
|
||||
wakeUpCommon <- struct{}{} // block anyways, to queue up the wakeup
|
||||
}
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func modePullFromConfig(g *config.Global, in *config.PullJob) (m *modePull, err error) {
|
||||
m = &modePull{}
|
||||
if in.Interval <= 0 {
|
||||
return nil, errors.New("interval must be positive")
|
||||
}
|
||||
m.interval = in.Interval
|
||||
|
||||
m.rootFS, err = zfs.NewDatasetPath(in.RootFS)
|
||||
if err != nil {
|
||||
return nil, errors.New("RootFS is not a valid zfs filesystem path")
|
||||
}
|
||||
if m.rootFS.Length() <= 0 {
|
||||
return nil, errors.New("RootFS must not be empty") // duplicates error check of receiver
|
||||
}
|
||||
|
||||
return m, nil
|
||||
}
|
||||
|
||||
func activeSide(g *config.Global, in *config.ActiveJob, mode activeMode) (j *ActiveSide, err error) {
|
||||
|
||||
j = &ActiveSide{mode: mode}
|
||||
j.name = in.Name
|
||||
j.promRepStateSecs = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
||||
Namespace: "zrepl",
|
||||
Subsystem: "replication",
|
||||
Name: "state_time",
|
||||
Help: "seconds spent during replication",
|
||||
ConstLabels: prometheus.Labels{"zrepl_job":j.name},
|
||||
}, []string{"state"})
|
||||
j.promBytesReplicated = prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Namespace: "zrepl",
|
||||
Subsystem: "replication",
|
||||
Name: "bytes_replicated",
|
||||
Help: "number of bytes replicated from sender to receiver per filesystem",
|
||||
ConstLabels: prometheus.Labels{"zrepl_job":j.name},
|
||||
}, []string{"filesystem"})
|
||||
|
||||
j.clientFactory, err = connecter.FromConfig(g, in.Connect)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "cannot build client")
|
||||
}
|
||||
|
||||
j.promPruneSecs = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
||||
Namespace: "zrepl",
|
||||
Subsystem: "pruning",
|
||||
Name: "time",
|
||||
Help: "seconds spent in pruner",
|
||||
ConstLabels: prometheus.Labels{"zrepl_job":j.name},
|
||||
}, []string{"prune_side"})
|
||||
j.prunerFactory, err = pruner.NewPrunerFactory(in.Pruning, j.promPruneSecs)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return j, nil
|
||||
}
|
||||
|
||||
func (j *ActiveSide) RegisterMetrics(registerer prometheus.Registerer) {
|
||||
registerer.MustRegister(j.promRepStateSecs)
|
||||
registerer.MustRegister(j.promPruneSecs)
|
||||
registerer.MustRegister(j.promBytesReplicated)
|
||||
}
|
||||
|
||||
func (j *ActiveSide) Name() string { return j.name }
|
||||
|
||||
type ActiveSideStatus struct {
|
||||
Replication *replication.Report
|
||||
PruningSender, PruningReceiver *pruner.Report
|
||||
}
|
||||
|
||||
func (j *ActiveSide) Status() *Status {
|
||||
tasks := j.updateTasks(nil)
|
||||
|
||||
s := &ActiveSideStatus{}
|
||||
t := j.mode.Type()
|
||||
if tasks.replication != nil {
|
||||
s.Replication = tasks.replication.Report()
|
||||
}
|
||||
if tasks.prunerSender != nil {
|
||||
s.PruningSender = tasks.prunerSender.Report()
|
||||
}
|
||||
if tasks.prunerReceiver != nil {
|
||||
s.PruningReceiver = tasks.prunerReceiver.Report()
|
||||
}
|
||||
return &Status{Type: t, JobSpecific: s}
|
||||
}
|
||||
|
||||
func (j *ActiveSide) Run(ctx context.Context) {
|
||||
log := GetLogger(ctx)
|
||||
ctx = logging.WithSubsystemLoggers(ctx, log)
|
||||
|
||||
defer log.Info("job exiting")
|
||||
|
||||
periodicDone := make(chan struct{})
|
||||
ctx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
go j.mode.RunPeriodic(ctx, periodicDone)
|
||||
|
||||
invocationCount := 0
|
||||
outer:
|
||||
for {
|
||||
log.Info("wait for wakeups")
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
log.WithError(ctx.Err()).Info("context")
|
||||
break outer
|
||||
|
||||
case <-wakeup.Wait(ctx):
|
||||
case <-periodicDone:
|
||||
}
|
||||
invocationCount++
|
||||
invLog := log.WithField("invocation", invocationCount)
|
||||
j.do(WithLogger(ctx, invLog), periodicDone)
|
||||
}
|
||||
}
|
||||
|
||||
func (j *ActiveSide) do(ctx context.Context, periodicWakeup <-chan struct{}) {
|
||||
|
||||
log := GetLogger(ctx)
|
||||
ctx = logging.WithSubsystemLoggers(ctx, log)
|
||||
|
||||
// allow cancellation of an invocation (this function)
|
||||
ctx, cancelThisRun := context.WithCancel(ctx)
|
||||
defer cancelThisRun()
|
||||
runDone := make(chan struct{})
|
||||
defer close(runDone)
|
||||
go func() {
|
||||
select {
|
||||
case <-runDone:
|
||||
case <-reset.Wait(ctx):
|
||||
log.Info("reset received, cancelling current invocation")
|
||||
cancelThisRun()
|
||||
case <-ctx.Done():
|
||||
}
|
||||
}()
|
||||
|
||||
client, err := j.clientFactory.NewClient()
|
||||
if err != nil {
|
||||
log.WithError(err).Error("factory cannot instantiate streamrpc client")
|
||||
}
|
||||
defer client.Close(ctx)
|
||||
|
||||
sender, receiver, err := j.mode.SenderReceiver(client)
|
||||
|
||||
tasks := j.updateTasks(func(tasks *activeSideTasks) {
|
||||
// reset it
|
||||
*tasks = activeSideTasks{}
|
||||
tasks.replication = replication.NewReplication(j.promRepStateSecs, j.promBytesReplicated)
|
||||
})
|
||||
|
||||
log.Info("start replication")
|
||||
replicationDone := make(chan struct{})
|
||||
replicationCtx, replicationCancel := context.WithCancel(ctx)
|
||||
defer replicationCancel()
|
||||
go func() {
|
||||
tasks.replication.Drive(replicationCtx, sender, receiver)
|
||||
close(replicationDone)
|
||||
}()
|
||||
outer:
|
||||
for {
|
||||
select {
|
||||
case <-replicationDone:
|
||||
// fine!
|
||||
break outer
|
||||
case <-periodicWakeup:
|
||||
// Replication took longer than the periodic interval.
|
||||
//
|
||||
// For pull jobs, this isn't so bad because nothing changes on the active side
|
||||
// if replication doesn't go forward.
|
||||
//
|
||||
// For push jobs, this means snapshots were taken.
|
||||
// We need to invoke the pruner now, because otherwise an infinitely stuck replication
|
||||
// will cause this side to fill up with snapshots.
|
||||
//
|
||||
// However, there are cases where replication progresses and just takes longer,
|
||||
// and we don't want these situations be interrupted by a prune, which will require
|
||||
// re-planning and starting over (think of initial replication as an example).
|
||||
//
|
||||
// Therefore, we prohibit pruning of snapshots that are part of the current replication plan.
|
||||
// If there is no such plan, we kill the replication.
|
||||
|
||||
if j.mode.Type() == TypePush {
|
||||
|
||||
rep := tasks.replication.Report()
|
||||
state, err := replication.StateString(rep.Status)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
switch state {
|
||||
case replication.Planning:
|
||||
fallthrough
|
||||
case replication.PlanningError:
|
||||
fallthrough
|
||||
case replication.WorkingWait:
|
||||
log.WithField("repl_state", state.String()).
|
||||
Info("cancelling replication after new snapshots invalidated its current state")
|
||||
replicationCancel()
|
||||
log.Info("waiting for replication to stop")
|
||||
<-replicationDone // no need to wait for ctx.Done, replication is already bound to global cancel
|
||||
break outer
|
||||
default:
|
||||
log.WithField("repl_state", state.String()).
|
||||
Warn("new snapshots while replication is running and making progress")
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
var pruningWg sync.WaitGroup
|
||||
log.Info("start pruning sender")
|
||||
pruningWg.Add(1)
|
||||
go func() {
|
||||
defer pruningWg.Done()
|
||||
tasks := j.updateTasks(func(tasks *activeSideTasks) {
|
||||
tasks.prunerSender = j.prunerFactory.BuildSenderPruner(ctx, sender, sender)
|
||||
})
|
||||
tasks.prunerSender.Prune()
|
||||
// FIXME no need to do the cancellation dance with sender, we know it's local for push
|
||||
// FIXME and we don't worry about pull ATM
|
||||
}()
|
||||
log.Info("start pruning receiver")
|
||||
pruningWg.Add(1)
|
||||
go func() {
|
||||
defer pruningWg.Done()
|
||||
|
||||
receiverPrunerCtx, receiverPrunerCancel := context.WithCancel(ctx)
|
||||
defer receiverPrunerCancel()
|
||||
tasks := j.updateTasks(func(tasks *activeSideTasks) {
|
||||
tasks.prunerReceiver = j.prunerFactory.BuildReceiverPruner(receiverPrunerCtx, receiver, sender)
|
||||
})
|
||||
receiverPrunerDone := make(chan struct{})
|
||||
go func() {
|
||||
defer close(receiverPrunerDone)
|
||||
tasks.prunerReceiver.Prune()
|
||||
}()
|
||||
|
||||
outer:
|
||||
for {
|
||||
select {
|
||||
case <-receiverPrunerDone:
|
||||
// fine!
|
||||
break outer
|
||||
case <-periodicWakeup:
|
||||
// see comments for similar apporach with replication above
|
||||
if j.mode.Type() == TypePush {
|
||||
rep := tasks.prunerReceiver.Report()
|
||||
state, err := pruner.StateString(rep.State)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
switch state {
|
||||
case pruner.PlanWait:
|
||||
fallthrough
|
||||
case pruner.ExecWait:
|
||||
log.WithField("pruner_state", state.String()).
|
||||
Info("cancelling failing prune on receiver because new snapshots were taken on sender")
|
||||
receiverPrunerCancel()
|
||||
log.Info("waiting for receiver pruner to stop")
|
||||
<-receiverPrunerDone
|
||||
break outer
|
||||
default:
|
||||
log.WithField("pruner_state", state.String()).
|
||||
Warn("new snapshots while prune on receiver is still running")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}()
|
||||
|
||||
pruningWg.Wait() // if pruners handle ctx cancellation correctly, we don't need to wait for it here
|
||||
|
||||
}
|
68
daemon/job/build_jobs.go
Normal file
68
daemon/job/build_jobs.go
Normal file
@ -0,0 +1,68 @@
|
||||
package job
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/zrepl/zrepl/config"
|
||||
)
|
||||
|
||||
func JobsFromConfig(c *config.Config) ([]Job, error) {
|
||||
js := make([]Job, len(c.Jobs))
|
||||
for i := range c.Jobs {
|
||||
j, err := buildJob(c.Global, c.Jobs[i])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
js[i] = j
|
||||
}
|
||||
return js, nil
|
||||
}
|
||||
|
||||
func buildJob(c *config.Global, in config.JobEnum) (j Job, err error) {
|
||||
cannotBuildJob := func(e error, name string) (Job, error) {
|
||||
return nil, errors.Wrapf(err, "cannot build job %q", name)
|
||||
}
|
||||
// FIXME prettify this
|
||||
switch v := in.Ret.(type) {
|
||||
case *config.SinkJob:
|
||||
m, err := modeSinkFromConfig(c, v)
|
||||
if err != nil {
|
||||
return cannotBuildJob(err, v.Name)
|
||||
}
|
||||
j, err = passiveSideFromConfig(c, &v.PassiveJob, m)
|
||||
if err != nil {
|
||||
return cannotBuildJob(err, v.Name)
|
||||
}
|
||||
case *config.SourceJob:
|
||||
m, err := modeSourceFromConfig(c, v)
|
||||
if err != nil {
|
||||
return cannotBuildJob(err, v.Name)
|
||||
}
|
||||
j, err = passiveSideFromConfig(c, &v.PassiveJob, m)
|
||||
if err != nil {
|
||||
return cannotBuildJob(err, v.Name)
|
||||
}
|
||||
case *config.PushJob:
|
||||
m, err := modePushFromConfig(c, v)
|
||||
if err != nil {
|
||||
return cannotBuildJob(err, v.Name)
|
||||
}
|
||||
j, err = activeSide(c, &v.ActiveJob, m)
|
||||
if err != nil {
|
||||
return cannotBuildJob(err, v.Name)
|
||||
}
|
||||
case *config.PullJob:
|
||||
m, err := modePullFromConfig(c, v)
|
||||
if err != nil {
|
||||
return cannotBuildJob(err, v.Name)
|
||||
}
|
||||
j, err = activeSide(c, &v.ActiveJob, m)
|
||||
if err != nil {
|
||||
return cannotBuildJob(err, v.Name)
|
||||
}
|
||||
default:
|
||||
panic(fmt.Sprintf("implementation error: unknown job type %T", v))
|
||||
}
|
||||
return j, nil
|
||||
|
||||
}
|
103
daemon/job/job.go
Normal file
103
daemon/job/job.go
Normal file
@ -0,0 +1,103 @@
|
||||
package job
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/zrepl/zrepl/logger"
|
||||
)
|
||||
|
||||
type Logger = logger.Logger
|
||||
|
||||
type contextKey int
|
||||
|
||||
const (
|
||||
contextKeyLog contextKey = iota
|
||||
)
|
||||
|
||||
func GetLogger(ctx context.Context) Logger {
|
||||
if l, ok := ctx.Value(contextKeyLog).(Logger); ok {
|
||||
return l
|
||||
}
|
||||
return logger.NewNullLogger()
|
||||
}
|
||||
|
||||
func WithLogger(ctx context.Context, l Logger) context.Context {
|
||||
return context.WithValue(ctx, contextKeyLog, l)
|
||||
}
|
||||
|
||||
|
||||
type Job interface {
|
||||
Name() string
|
||||
Run(ctx context.Context)
|
||||
Status() *Status
|
||||
RegisterMetrics(registerer prometheus.Registerer)
|
||||
}
|
||||
|
||||
type Type string
|
||||
|
||||
const (
|
||||
TypeInternal Type = "internal"
|
||||
TypePush Type = "push"
|
||||
TypeSink Type = "sink"
|
||||
TypePull Type = "pull"
|
||||
TypeSource Type = "source"
|
||||
)
|
||||
|
||||
type Status struct {
|
||||
Type Type
|
||||
JobSpecific interface{}
|
||||
}
|
||||
|
||||
func (s *Status) MarshalJSON() ([]byte, error) {
|
||||
typeJson, err := json.Marshal(s.Type)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
jobJSON, err := json.Marshal(s.JobSpecific)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
m := map[string]json.RawMessage {
|
||||
"type": typeJson,
|
||||
string(s.Type): jobJSON,
|
||||
}
|
||||
return json.Marshal(m)
|
||||
}
|
||||
|
||||
func (s *Status) UnmarshalJSON(in []byte) (err error) {
|
||||
var m map[string]json.RawMessage
|
||||
if err := json.Unmarshal(in, &m); err != nil {
|
||||
return err
|
||||
}
|
||||
tJSON, ok := m["type"]
|
||||
if !ok {
|
||||
return fmt.Errorf("field 'type' not found")
|
||||
}
|
||||
if err := json.Unmarshal(tJSON, &s.Type); err != nil {
|
||||
return err
|
||||
}
|
||||
key := string(s.Type)
|
||||
jobJSON, ok := m[key]
|
||||
if !ok {
|
||||
return fmt.Errorf("field '%s', not found", key)
|
||||
}
|
||||
switch s.Type {
|
||||
case TypePull: fallthrough
|
||||
case TypePush:
|
||||
var st ActiveSideStatus
|
||||
err = json.Unmarshal(jobJSON, &st)
|
||||
s.JobSpecific = &st
|
||||
case TypeSource: fallthrough
|
||||
case TypeSink:
|
||||
var st PassiveStatus
|
||||
err = json.Unmarshal(jobJSON, &st)
|
||||
s.JobSpecific = &st
|
||||
case TypeInternal:
|
||||
// internal jobs do not report specifics
|
||||
default:
|
||||
err = fmt.Errorf("unknown job type '%s'", key)
|
||||
}
|
||||
return err
|
||||
}
|
196
daemon/job/passive.go
Normal file
196
daemon/job/passive.go
Normal file
@ -0,0 +1,196 @@
|
||||
package job
|
||||
|
||||
import (
|
||||
"context"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/problame/go-streamrpc"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/zrepl/zrepl/config"
|
||||
"github.com/zrepl/zrepl/daemon/filters"
|
||||
"github.com/zrepl/zrepl/daemon/logging"
|
||||
"github.com/zrepl/zrepl/daemon/transport/serve"
|
||||
"github.com/zrepl/zrepl/daemon/snapper"
|
||||
"github.com/zrepl/zrepl/endpoint"
|
||||
"github.com/zrepl/zrepl/zfs"
|
||||
"path"
|
||||
)
|
||||
|
||||
type PassiveSide struct {
|
||||
mode passiveMode
|
||||
name string
|
||||
l serve.ListenerFactory
|
||||
rpcConf *streamrpc.ConnConfig
|
||||
}
|
||||
|
||||
type passiveMode interface {
|
||||
ConnHandleFunc(ctx context.Context, conn serve.AuthenticatedConn) streamrpc.HandlerFunc
|
||||
RunPeriodic(ctx context.Context)
|
||||
Type() Type
|
||||
}
|
||||
|
||||
type modeSink struct {
|
||||
rootDataset *zfs.DatasetPath
|
||||
}
|
||||
|
||||
func (m *modeSink) Type() Type { return TypeSink }
|
||||
|
||||
func (m *modeSink) ConnHandleFunc(ctx context.Context, conn serve.AuthenticatedConn) streamrpc.HandlerFunc {
|
||||
log := GetLogger(ctx)
|
||||
|
||||
clientRootStr := path.Join(m.rootDataset.ToString(), conn.ClientIdentity())
|
||||
clientRoot, err := zfs.NewDatasetPath(clientRootStr)
|
||||
if err != nil {
|
||||
log.WithError(err).
|
||||
WithField("client_identity", conn.ClientIdentity()).
|
||||
Error("cannot build client filesystem map (client identity must be a valid ZFS FS name")
|
||||
}
|
||||
log.WithField("client_root", clientRoot).Debug("client root")
|
||||
|
||||
local, err := endpoint.NewReceiver(clientRoot)
|
||||
if err != nil {
|
||||
log.WithError(err).Error("unexpected error: cannot convert mapping to filter")
|
||||
return nil
|
||||
}
|
||||
|
||||
h := endpoint.NewHandler(local)
|
||||
return h.Handle
|
||||
}
|
||||
|
||||
func (m *modeSink) RunPeriodic(_ context.Context) {}
|
||||
|
||||
func modeSinkFromConfig(g *config.Global, in *config.SinkJob) (m *modeSink, err error) {
|
||||
m = &modeSink{}
|
||||
m.rootDataset, err = zfs.NewDatasetPath(in.RootFS)
|
||||
if err != nil {
|
||||
return nil, errors.New("root dataset is not a valid zfs filesystem path")
|
||||
}
|
||||
if m.rootDataset.Length() <= 0 {
|
||||
return nil, errors.New("root dataset must not be empty") // duplicates error check of receiver
|
||||
}
|
||||
return m, nil
|
||||
}
|
||||
|
||||
type modeSource struct {
|
||||
fsfilter zfs.DatasetFilter
|
||||
snapper *snapper.PeriodicOrManual
|
||||
}
|
||||
|
||||
func modeSourceFromConfig(g *config.Global, in *config.SourceJob) (m *modeSource, err error) {
|
||||
// FIXME exact dedup of modePush
|
||||
m = &modeSource{}
|
||||
fsf, err := filters.DatasetMapFilterFromConfig(in.Filesystems)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "cannnot build filesystem filter")
|
||||
}
|
||||
m.fsfilter = fsf
|
||||
|
||||
if m.snapper, err = snapper.FromConfig(g, fsf, in.Snapshotting); err != nil {
|
||||
return nil, errors.Wrap(err, "cannot build snapper")
|
||||
}
|
||||
|
||||
return m, nil
|
||||
}
|
||||
|
||||
func (m *modeSource) Type() Type { return TypeSource }
|
||||
|
||||
func (m *modeSource) ConnHandleFunc(ctx context.Context, conn serve.AuthenticatedConn) streamrpc.HandlerFunc {
|
||||
sender := endpoint.NewSender(m.fsfilter)
|
||||
h := endpoint.NewHandler(sender)
|
||||
return h.Handle
|
||||
}
|
||||
|
||||
func (m *modeSource) RunPeriodic(ctx context.Context) {
|
||||
m.snapper.Run(ctx, nil)
|
||||
}
|
||||
|
||||
func passiveSideFromConfig(g *config.Global, in *config.PassiveJob, mode passiveMode) (s *PassiveSide, err error) {
|
||||
|
||||
s = &PassiveSide{mode: mode, name: in.Name}
|
||||
if s.l, s.rpcConf, err = serve.FromConfig(g, in.Serve); err != nil {
|
||||
return nil, errors.Wrap(err, "cannot build server")
|
||||
}
|
||||
|
||||
return s, nil
|
||||
}
|
||||
|
||||
func (j *PassiveSide) Name() string { return j.name }
|
||||
|
||||
type PassiveStatus struct {}
|
||||
|
||||
func (s *PassiveSide) Status() *Status {
|
||||
return &Status{Type: s.mode.Type()} // FIXME PassiveStatus
|
||||
}
|
||||
|
||||
func (*PassiveSide) RegisterMetrics(registerer prometheus.Registerer) {}
|
||||
|
||||
func (j *PassiveSide) Run(ctx context.Context) {
|
||||
|
||||
log := GetLogger(ctx)
|
||||
defer log.Info("job exiting")
|
||||
|
||||
l, err := j.l.Listen()
|
||||
if err != nil {
|
||||
log.WithError(err).Error("cannot listen")
|
||||
return
|
||||
}
|
||||
defer l.Close()
|
||||
|
||||
{
|
||||
ctx, cancel := context.WithCancel(logging.WithSubsystemLoggers(ctx, log)) // shadowing
|
||||
defer cancel()
|
||||
go j.mode.RunPeriodic(ctx)
|
||||
}
|
||||
|
||||
log.WithField("addr", l.Addr()).Debug("accepting connections")
|
||||
var connId int
|
||||
outer:
|
||||
for {
|
||||
|
||||
select {
|
||||
case res := <-accept(ctx, l):
|
||||
if res.err != nil {
|
||||
log.WithError(res.err).Info("accept error")
|
||||
continue
|
||||
}
|
||||
conn := res.conn
|
||||
connId++
|
||||
connLog := log.
|
||||
WithField("connID", connId)
|
||||
connLog.
|
||||
WithField("addr", conn.RemoteAddr()).
|
||||
WithField("client_identity", conn.ClientIdentity()).
|
||||
Info("handling connection")
|
||||
go func() {
|
||||
defer connLog.Info("finished handling connection")
|
||||
defer conn.Close()
|
||||
ctx := logging.WithSubsystemLoggers(ctx, connLog)
|
||||
handleFunc := j.mode.ConnHandleFunc(ctx, conn)
|
||||
if handleFunc == nil {
|
||||
return
|
||||
}
|
||||
if err := streamrpc.ServeConn(ctx, conn, j.rpcConf, handleFunc); err != nil {
|
||||
log.WithError(err).Error("error serving client")
|
||||
}
|
||||
}()
|
||||
|
||||
case <-ctx.Done():
|
||||
break outer
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
type acceptResult struct {
|
||||
conn serve.AuthenticatedConn
|
||||
err error
|
||||
}
|
||||
|
||||
func accept(ctx context.Context, listener serve.AuthenticatedListener) <-chan acceptResult {
|
||||
c := make(chan acceptResult, 1)
|
||||
go func() {
|
||||
conn, err := listener.Accept(ctx)
|
||||
c <- acceptResult{conn, err}
|
||||
}()
|
||||
return c
|
||||
}
|
35
daemon/job/reset/reset.go
Normal file
35
daemon/job/reset/reset.go
Normal file
@ -0,0 +1,35 @@
|
||||
package reset
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
)
|
||||
|
||||
type contextKey int
|
||||
|
||||
const contextKeyReset contextKey = iota
|
||||
|
||||
func Wait(ctx context.Context) <-chan struct{} {
|
||||
wc, ok := ctx.Value(contextKeyReset).(chan struct{})
|
||||
if !ok {
|
||||
wc = make(chan struct{})
|
||||
}
|
||||
return wc
|
||||
}
|
||||
|
||||
type Func func() error
|
||||
|
||||
var AlreadyReset = errors.New("already reset")
|
||||
|
||||
func Context(ctx context.Context) (context.Context, Func) {
|
||||
wc := make(chan struct{})
|
||||
wuf := func() error {
|
||||
select {
|
||||
case wc <- struct{}{}:
|
||||
return nil
|
||||
default:
|
||||
return AlreadyReset
|
||||
}
|
||||
}
|
||||
return context.WithValue(ctx, contextKeyReset, wc), wuf
|
||||
}
|
35
daemon/job/wakeup/wakeup.go
Normal file
35
daemon/job/wakeup/wakeup.go
Normal file
@ -0,0 +1,35 @@
|
||||
package wakeup
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
)
|
||||
|
||||
type contextKey int
|
||||
|
||||
const contextKeyWakeup contextKey = iota
|
||||
|
||||
func Wait(ctx context.Context) <-chan struct{} {
|
||||
wc, ok := ctx.Value(contextKeyWakeup).(chan struct{})
|
||||
if !ok {
|
||||
wc = make(chan struct{})
|
||||
}
|
||||
return wc
|
||||
}
|
||||
|
||||
type Func func() error
|
||||
|
||||
var AlreadyWokenUp = errors.New("already woken up")
|
||||
|
||||
func Context(ctx context.Context) (context.Context, Func) {
|
||||
wc := make(chan struct{})
|
||||
wuf := func() error {
|
||||
select {
|
||||
case wc <- struct{}{}:
|
||||
return nil
|
||||
default:
|
||||
return AlreadyWokenUp
|
||||
}
|
||||
}
|
||||
return context.WithValue(ctx, contextKeyWakeup, wc), wuf
|
||||
}
|
32
daemon/logging/adaptors.go
Normal file
32
daemon/logging/adaptors.go
Normal file
@ -0,0 +1,32 @@
|
||||
package logging
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/problame/go-streamrpc"
|
||||
"github.com/zrepl/zrepl/logger"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type streamrpcLogAdaptor = twoClassLogAdaptor
|
||||
|
||||
type twoClassLogAdaptor struct {
|
||||
logger.Logger
|
||||
}
|
||||
|
||||
var _ streamrpc.Logger = twoClassLogAdaptor{}
|
||||
|
||||
func (a twoClassLogAdaptor) Errorf(fmtStr string, args ...interface{}) {
|
||||
const errorSuffix = ": %s"
|
||||
if len(args) == 1 {
|
||||
if err, ok := args[0].(error); ok && strings.HasSuffix(fmtStr, errorSuffix) {
|
||||
msg := strings.TrimSuffix(fmtStr, errorSuffix)
|
||||
a.WithError(err).Error(msg)
|
||||
return
|
||||
}
|
||||
}
|
||||
a.Logger.Error(fmt.Sprintf(fmtStr, args...))
|
||||
}
|
||||
|
||||
func (a twoClassLogAdaptor) Infof(fmtStr string, args ...interface{}) {
|
||||
a.Logger.Debug(fmt.Sprintf(fmtStr, args...))
|
||||
}
|
205
daemon/logging/build_logging.go
Normal file
205
daemon/logging/build_logging.go
Normal file
@ -0,0 +1,205 @@
|
||||
package logging
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/tls"
|
||||
"crypto/x509"
|
||||
"github.com/mattn/go-isatty"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/problame/go-streamrpc"
|
||||
"github.com/zrepl/zrepl/config"
|
||||
"github.com/zrepl/zrepl/daemon/pruner"
|
||||
"github.com/zrepl/zrepl/endpoint"
|
||||
"github.com/zrepl/zrepl/logger"
|
||||
"github.com/zrepl/zrepl/replication"
|
||||
"github.com/zrepl/zrepl/tlsconf"
|
||||
"os"
|
||||
"github.com/zrepl/zrepl/daemon/snapper"
|
||||
"github.com/zrepl/zrepl/daemon/transport/serve"
|
||||
)
|
||||
|
||||
func OutletsFromConfig(in config.LoggingOutletEnumList) (*logger.Outlets, error) {
|
||||
|
||||
outlets := logger.NewOutlets()
|
||||
|
||||
if len(in) == 0 {
|
||||
// Default config
|
||||
out := WriterOutlet{&HumanFormatter{}, os.Stdout}
|
||||
outlets.Add(out, logger.Warn)
|
||||
return outlets, nil
|
||||
}
|
||||
|
||||
var syslogOutlets, stdoutOutlets int
|
||||
for lei, le := range in {
|
||||
|
||||
outlet, minLevel, err := parseOutlet(le)
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "cannot parse outlet #%d", lei)
|
||||
}
|
||||
var _ logger.Outlet = WriterOutlet{}
|
||||
var _ logger.Outlet = &SyslogOutlet{}
|
||||
switch outlet.(type) {
|
||||
case *SyslogOutlet:
|
||||
syslogOutlets++
|
||||
case WriterOutlet:
|
||||
stdoutOutlets++
|
||||
}
|
||||
|
||||
outlets.Add(outlet, minLevel)
|
||||
|
||||
}
|
||||
|
||||
if syslogOutlets > 1 {
|
||||
return nil, errors.Errorf("can only define one 'syslog' outlet")
|
||||
}
|
||||
if stdoutOutlets > 1 {
|
||||
return nil, errors.Errorf("can only define one 'stdout' outlet")
|
||||
}
|
||||
|
||||
return outlets, nil
|
||||
|
||||
}
|
||||
|
||||
const (
|
||||
SubsysReplication = "repl"
|
||||
SubsysStreamrpc = "rpc"
|
||||
SubsyEndpoint = "endpoint"
|
||||
)
|
||||
|
||||
func WithSubsystemLoggers(ctx context.Context, log logger.Logger) context.Context {
|
||||
ctx = replication.WithLogger(ctx, log.WithField(SubsysField, "repl"))
|
||||
ctx = streamrpc.ContextWithLogger(ctx, streamrpcLogAdaptor{log.WithField(SubsysField, "rpc")})
|
||||
ctx = endpoint.WithLogger(ctx, log.WithField(SubsysField, "endpoint"))
|
||||
ctx = pruner.WithLogger(ctx, log.WithField(SubsysField, "pruning"))
|
||||
ctx = snapper.WithLogger(ctx, log.WithField(SubsysField, "snapshot"))
|
||||
ctx = serve.WithLogger(ctx, log.WithField(SubsysField, "serve"))
|
||||
return ctx
|
||||
}
|
||||
|
||||
func parseLogFormat(i interface{}) (f EntryFormatter, err error) {
|
||||
var is string
|
||||
switch j := i.(type) {
|
||||
case string:
|
||||
is = j
|
||||
default:
|
||||
return nil, errors.Errorf("invalid log format: wrong type: %T", i)
|
||||
}
|
||||
|
||||
switch is {
|
||||
case "human":
|
||||
return &HumanFormatter{}, nil
|
||||
case "logfmt":
|
||||
return &LogfmtFormatter{}, nil
|
||||
case "json":
|
||||
return &JSONFormatter{}, nil
|
||||
default:
|
||||
return nil, errors.Errorf("invalid log format: '%s'", is)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func parseOutlet(in config.LoggingOutletEnum) (o logger.Outlet, level logger.Level, err error) {
|
||||
|
||||
parseCommon := func(common config.LoggingOutletCommon) (logger.Level, EntryFormatter, error) {
|
||||
if common.Level == "" || common.Format == "" {
|
||||
return 0, nil, errors.Errorf("must specify 'level' and 'format' field")
|
||||
}
|
||||
|
||||
minLevel, err := logger.ParseLevel(common.Level)
|
||||
if err != nil {
|
||||
return 0, nil, errors.Wrap(err, "cannot parse 'level' field")
|
||||
}
|
||||
formatter, err := parseLogFormat(common.Format)
|
||||
if err != nil {
|
||||
return 0, nil, errors.Wrap(err, "cannot parse 'formatter' field")
|
||||
}
|
||||
return minLevel, formatter, nil
|
||||
}
|
||||
|
||||
var f EntryFormatter
|
||||
|
||||
switch v := in.Ret.(type) {
|
||||
case *config.StdoutLoggingOutlet:
|
||||
level, f, err = parseCommon(v.LoggingOutletCommon)
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
o, err = parseStdoutOutlet(v, f)
|
||||
case *config.TCPLoggingOutlet:
|
||||
level, f, err = parseCommon(v.LoggingOutletCommon)
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
o, err = parseTCPOutlet(v, f)
|
||||
case *config.SyslogLoggingOutlet:
|
||||
level, f, err = parseCommon(v.LoggingOutletCommon)
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
o, err = parseSyslogOutlet(v, f)
|
||||
default:
|
||||
panic(v)
|
||||
}
|
||||
return o, level, err
|
||||
}
|
||||
|
||||
func parseStdoutOutlet(in *config.StdoutLoggingOutlet, formatter EntryFormatter) (WriterOutlet, error) {
|
||||
flags := MetadataAll
|
||||
writer := os.Stdout
|
||||
if !isatty.IsTerminal(writer.Fd()) && !in.Time {
|
||||
flags &= ^MetadataTime
|
||||
}
|
||||
if isatty.IsTerminal(writer.Fd()) && !in.Color {
|
||||
flags &= ^MetadataColor
|
||||
}
|
||||
|
||||
formatter.SetMetadataFlags(flags)
|
||||
return WriterOutlet{
|
||||
formatter,
|
||||
os.Stdout,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func parseTCPOutlet(in *config.TCPLoggingOutlet, formatter EntryFormatter) (out *TCPOutlet, err error) {
|
||||
var tlsConfig *tls.Config
|
||||
if in.TLS != nil {
|
||||
tlsConfig, err = func(m *config.TCPLoggingOutletTLS, host string) (*tls.Config, error) {
|
||||
clientCert, err := tls.LoadX509KeyPair(m.Cert, m.Key)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "cannot load client cert")
|
||||
}
|
||||
|
||||
var rootCAs *x509.CertPool
|
||||
if m.CA == "" {
|
||||
if rootCAs, err = x509.SystemCertPool(); err != nil {
|
||||
return nil, errors.Wrap(err, "cannot open system cert pool")
|
||||
}
|
||||
} else {
|
||||
rootCAs, err = tlsconf.ParseCAFile(m.CA)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "cannot parse CA cert")
|
||||
}
|
||||
}
|
||||
if rootCAs == nil {
|
||||
panic("invariant violated")
|
||||
}
|
||||
|
||||
return tlsconf.ClientAuthClient(host, rootCAs, clientCert)
|
||||
}(in.TLS, in.Address)
|
||||
if err != nil {
|
||||
return nil, errors.New("cannot not parse TLS config in field 'tls'")
|
||||
}
|
||||
}
|
||||
|
||||
formatter.SetMetadataFlags(MetadataAll)
|
||||
return NewTCPOutlet(formatter, in.Net, in.Address, tlsConfig, in.RetryInterval), nil
|
||||
|
||||
}
|
||||
|
||||
func parseSyslogOutlet(in *config.SyslogLoggingOutlet, formatter EntryFormatter) (out *SyslogOutlet, err error) {
|
||||
out = &SyslogOutlet{}
|
||||
out.Formatter = formatter
|
||||
out.Formatter.SetMetadataFlags(MetadataNone)
|
||||
out.RetryInterval = in.RetryInterval
|
||||
return out, nil
|
||||
}
|
@ -1,20 +1,16 @@
|
||||
package cmd
|
||||
package logging
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"github.com/fatih/color"
|
||||
"github.com/go-logfmt/logfmt"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/zrepl/zrepl/logger"
|
||||
"time"
|
||||
)
|
||||
|
||||
type EntryFormatter interface {
|
||||
SetMetadataFlags(flags MetadataFlags)
|
||||
Format(e *logger.Entry) ([]byte, error)
|
||||
}
|
||||
|
||||
const (
|
||||
FieldLevel = "level"
|
||||
FieldMessage = "msg"
|
||||
@ -22,13 +18,19 @@ const (
|
||||
)
|
||||
|
||||
const (
|
||||
logJobField string = "job"
|
||||
logTaskField string = "task"
|
||||
logFSField string = "filesystem"
|
||||
logMapFromField string = "map_from"
|
||||
logMapToField string = "map_to"
|
||||
logIncFromField string = "inc_from"
|
||||
logIncToField string = "inc_to"
|
||||
JobField string = "job"
|
||||
SubsysField string = "subsystem"
|
||||
)
|
||||
|
||||
type MetadataFlags int64
|
||||
|
||||
const (
|
||||
MetadataTime MetadataFlags = 1 << iota
|
||||
MetadataLevel
|
||||
MetadataColor
|
||||
|
||||
MetadataNone MetadataFlags = 0
|
||||
MetadataAll MetadataFlags = ^0
|
||||
)
|
||||
|
||||
type NoFormatter struct{}
|
||||
@ -69,39 +71,29 @@ func (f *HumanFormatter) ignored(field string) bool {
|
||||
func (f *HumanFormatter) Format(e *logger.Entry) (out []byte, err error) {
|
||||
|
||||
var line bytes.Buffer
|
||||
col := color.New()
|
||||
if f.metadataFlags&MetadataColor != 0 {
|
||||
col = e.Color()
|
||||
}
|
||||
|
||||
if f.metadataFlags&MetadataTime != 0 {
|
||||
fmt.Fprintf(&line, "%s ", e.Time.Format(HumanFormatterDateFormat))
|
||||
}
|
||||
if f.metadataFlags&MetadataLevel != 0 {
|
||||
fmt.Fprintf(&line, "[%s]", e.Level.Short())
|
||||
fmt.Fprintf(&line, "[%s]", col.Sprint(e.Level.Short()))
|
||||
}
|
||||
|
||||
prefixFields := []string{logJobField, logTaskField, logFSField}
|
||||
prefixFields := []string{JobField, SubsysField}
|
||||
prefixed := make(map[string]bool, len(prefixFields)+2)
|
||||
for _, field := range prefixFields {
|
||||
val, ok := e.Fields[field].(string)
|
||||
if ok {
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if !f.ignored(field) {
|
||||
fmt.Fprintf(&line, "[%s]", val)
|
||||
fmt.Fprintf(&line, "[%s]", col.Sprint(val))
|
||||
prefixed[field] = true
|
||||
}
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
// even more prefix fields
|
||||
mapFrom, mapFromOk := e.Fields[logMapFromField].(string)
|
||||
mapTo, mapToOk := e.Fields[logMapToField].(string)
|
||||
if mapFromOk && mapToOk && !f.ignored(logMapFromField) && !f.ignored(logMapToField) {
|
||||
fmt.Fprintf(&line, "[%s => %s]", mapFrom, mapTo)
|
||||
prefixed[logMapFromField], prefixed[logMapToField] = true, true
|
||||
}
|
||||
incFrom, incFromOk := e.Fields[logIncFromField].(string)
|
||||
incTo, incToOk := e.Fields[logIncToField].(string)
|
||||
if incFromOk && incToOk && !f.ignored(logIncFromField) && !f.ignored(logMapToField) {
|
||||
fmt.Fprintf(&line, "[%s => %s]", incFrom, incTo)
|
||||
prefixed[logIncFromField], prefixed[logIncToField] = true, true
|
||||
}
|
||||
|
||||
if line.Len() > 0 {
|
||||
@ -110,15 +102,11 @@ func (f *HumanFormatter) Format(e *logger.Entry) (out []byte, err error) {
|
||||
fmt.Fprint(&line, e.Message)
|
||||
|
||||
if len(e.Fields)-len(prefixed) > 0 {
|
||||
fmt.Fprint(&line, " ")
|
||||
enc := logfmt.NewEncoder(&line)
|
||||
for field, value := range e.Fields {
|
||||
if prefixed[field] || f.ignored(field) {
|
||||
continue
|
||||
}
|
||||
if err := logfmtTryEncodeKeyval(enc, field, value); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
fmt.Fprintf(&line, " %s=%q", col.Sprint(field), fmt.Sprint(value))
|
||||
}
|
||||
}
|
||||
|
||||
@ -179,7 +167,7 @@ func (f *LogfmtFormatter) Format(e *logger.Entry) ([]byte, error) {
|
||||
|
||||
// at least try and put job and task in front
|
||||
prefixed := make(map[string]bool, 2)
|
||||
prefix := []string{logJobField, logTaskField}
|
||||
prefix := []string{JobField, SubsysField}
|
||||
for _, pf := range prefix {
|
||||
v, ok := e.Fields[pf]
|
||||
if !ok {
|
@ -1,4 +1,4 @@
|
||||
package cmd
|
||||
package logging
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
@ -12,18 +12,23 @@ import (
|
||||
"time"
|
||||
)
|
||||
|
||||
type EntryFormatter interface {
|
||||
SetMetadataFlags(flags MetadataFlags)
|
||||
Format(e *logger.Entry) ([]byte, error)
|
||||
}
|
||||
|
||||
type WriterOutlet struct {
|
||||
Formatter EntryFormatter
|
||||
Writer io.Writer
|
||||
formatter EntryFormatter
|
||||
writer io.Writer
|
||||
}
|
||||
|
||||
func (h WriterOutlet) WriteEntry(entry logger.Entry) error {
|
||||
bytes, err := h.Formatter.Format(&entry)
|
||||
bytes, err := h.formatter.Format(&entry)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
_, err = h.Writer.Write(bytes)
|
||||
h.Writer.Write([]byte("\n"))
|
||||
_, err = h.writer.Write(bytes)
|
||||
h.writer.Write([]byte("\n"))
|
||||
return err
|
||||
}
|
||||
|
16
daemon/main.go
Normal file
16
daemon/main.go
Normal file
@ -0,0 +1,16 @@
|
||||
package daemon
|
||||
|
||||
import (
|
||||
"github.com/zrepl/zrepl/cli"
|
||||
"github.com/zrepl/zrepl/logger"
|
||||
)
|
||||
|
||||
type Logger = logger.Logger
|
||||
|
||||
var DaemonCmd = &cli.Subcommand {
|
||||
Use: "daemon",
|
||||
Short: "run the zrepl daemon",
|
||||
Run: func(subcommand *cli.Subcommand, args []string) error {
|
||||
return Run(subcommand.Config())
|
||||
},
|
||||
}
|
@ -1,4 +1,4 @@
|
||||
package cmd
|
||||
package nethelpers
|
||||
|
||||
import (
|
||||
"github.com/pkg/errors"
|
@ -1,4 +1,4 @@
|
||||
package cmd
|
||||
package daemon
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
@ -9,7 +9,7 @@ import (
|
||||
"net/http/pprof"
|
||||
)
|
||||
|
||||
type PProfServer struct {
|
||||
type pprofServer struct {
|
||||
cc chan PprofServerControlMsg
|
||||
state PprofServerControlMsg
|
||||
listener net.Listener
|
||||
@ -22,9 +22,9 @@ type PprofServerControlMsg struct {
|
||||
HttpListenAddress string
|
||||
}
|
||||
|
||||
func NewPProfServer(ctx context.Context) *PProfServer {
|
||||
func NewPProfServer(ctx context.Context) *pprofServer {
|
||||
|
||||
s := &PProfServer{
|
||||
s := &pprofServer{
|
||||
cc: make(chan PprofServerControlMsg),
|
||||
}
|
||||
|
||||
@ -32,7 +32,7 @@ func NewPProfServer(ctx context.Context) *PProfServer {
|
||||
return s
|
||||
}
|
||||
|
||||
func (s *PProfServer) controlLoop(ctx context.Context) {
|
||||
func (s *pprofServer) controlLoop(ctx context.Context) {
|
||||
outer:
|
||||
for {
|
||||
|
||||
@ -75,6 +75,6 @@ outer:
|
||||
}
|
||||
}
|
||||
|
||||
func (s *PProfServer) Control(msg PprofServerControlMsg) {
|
||||
func (s *pprofServer) Control(msg PprofServerControlMsg) {
|
||||
s.cc <- msg
|
||||
}
|
89
daemon/prometheus.go
Normal file
89
daemon/prometheus.go
Normal file
@ -0,0 +1,89 @@
|
||||
package daemon
|
||||
|
||||
import (
|
||||
"context"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||
"github.com/zrepl/zrepl/config"
|
||||
"github.com/zrepl/zrepl/daemon/job"
|
||||
"github.com/zrepl/zrepl/logger"
|
||||
"github.com/zrepl/zrepl/zfs"
|
||||
"net"
|
||||
"net/http"
|
||||
)
|
||||
|
||||
type prometheusJob struct {
|
||||
listen string
|
||||
}
|
||||
|
||||
func newPrometheusJobFromConfig(in *config.PrometheusMonitoring) (*prometheusJob, error) {
|
||||
if _, _, err := net.SplitHostPort(in.Listen); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &prometheusJob{in.Listen}, nil
|
||||
}
|
||||
|
||||
var prom struct {
|
||||
taskLogEntries *prometheus.CounterVec
|
||||
}
|
||||
|
||||
func init() {
|
||||
prom.taskLogEntries = prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Namespace: "zrepl",
|
||||
Subsystem: "daemon",
|
||||
Name: "log_entries",
|
||||
Help: "number of log entries per job task and level",
|
||||
}, []string{"zrepl_job", "level"})
|
||||
prometheus.MustRegister(prom.taskLogEntries)
|
||||
}
|
||||
|
||||
func (j *prometheusJob) Name() string { return jobNamePrometheus }
|
||||
|
||||
func (j *prometheusJob) Status() *job.Status { return &job.Status{Type: job.TypeInternal} }
|
||||
|
||||
func (j *prometheusJob) RegisterMetrics(registerer prometheus.Registerer) {}
|
||||
|
||||
func (j *prometheusJob) Run(ctx context.Context) {
|
||||
|
||||
if err := zfs.PrometheusRegister(prometheus.DefaultRegisterer); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
log := job.GetLogger(ctx)
|
||||
|
||||
l, err := net.Listen("tcp", j.listen)
|
||||
if err != nil {
|
||||
log.WithError(err).Error("cannot listen")
|
||||
}
|
||||
go func() {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
l.Close()
|
||||
}
|
||||
}()
|
||||
|
||||
mux := http.NewServeMux()
|
||||
mux.Handle("/metrics", promhttp.Handler())
|
||||
|
||||
err = http.Serve(l, mux)
|
||||
if err != nil {
|
||||
log.WithError(err).Error("error while serving")
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
type prometheusJobOutlet struct {
|
||||
jobName string
|
||||
}
|
||||
|
||||
var _ logger.Outlet = prometheusJobOutlet{}
|
||||
|
||||
func newPrometheusLogOutlet(jobName string) prometheusJobOutlet {
|
||||
return prometheusJobOutlet{jobName}
|
||||
}
|
||||
|
||||
func (o prometheusJobOutlet) WriteEntry(entry logger.Entry) error {
|
||||
prom.taskLogEntries.WithLabelValues(o.jobName, entry.Level.String()).Inc()
|
||||
return nil
|
||||
}
|
||||
|
523
daemon/pruner/pruner.go
Normal file
523
daemon/pruner/pruner.go
Normal file
@ -0,0 +1,523 @@
|
||||
package pruner
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/zrepl/zrepl/config"
|
||||
"github.com/zrepl/zrepl/logger"
|
||||
"github.com/zrepl/zrepl/pruning"
|
||||
"github.com/zrepl/zrepl/replication/pdu"
|
||||
"net"
|
||||
"sort"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Try to keep it compatible with gitub.com/zrepl/zrepl/replication.Endpoint
|
||||
type History interface {
|
||||
ReplicationCursor(ctx context.Context, req *pdu.ReplicationCursorReq) (*pdu.ReplicationCursorRes, error)
|
||||
}
|
||||
|
||||
type Target interface {
|
||||
ListFilesystems(ctx context.Context) ([]*pdu.Filesystem, error)
|
||||
ListFilesystemVersions(ctx context.Context, fs string) ([]*pdu.FilesystemVersion, error) // fix depS
|
||||
DestroySnapshots(ctx context.Context, req *pdu.DestroySnapshotsReq) (*pdu.DestroySnapshotsRes, error)
|
||||
}
|
||||
|
||||
type Logger = logger.Logger
|
||||
|
||||
type contextKey int
|
||||
|
||||
const contextKeyLogger contextKey = 0
|
||||
|
||||
func WithLogger(ctx context.Context, log Logger) context.Context {
|
||||
return context.WithValue(ctx, contextKeyLogger, log)
|
||||
}
|
||||
|
||||
func GetLogger(ctx context.Context) Logger {
|
||||
if l, ok := ctx.Value(contextKeyLogger).(Logger); ok {
|
||||
return l
|
||||
}
|
||||
return logger.NewNullLogger()
|
||||
}
|
||||
|
||||
type args struct {
|
||||
ctx context.Context
|
||||
target Target
|
||||
receiver History
|
||||
rules []pruning.KeepRule
|
||||
retryWait time.Duration
|
||||
considerSnapAtCursorReplicated bool
|
||||
promPruneSecs prometheus.Observer
|
||||
}
|
||||
|
||||
type Pruner struct {
|
||||
args args
|
||||
|
||||
mtx sync.RWMutex
|
||||
|
||||
state State
|
||||
|
||||
// State ErrWait|ErrPerm
|
||||
sleepUntil time.Time
|
||||
err error
|
||||
|
||||
// State Exec
|
||||
prunePending []*fs
|
||||
pruneCompleted []*fs
|
||||
}
|
||||
|
||||
type PrunerFactory struct {
|
||||
senderRules []pruning.KeepRule
|
||||
receiverRules []pruning.KeepRule
|
||||
retryWait time.Duration
|
||||
considerSnapAtCursorReplicated bool
|
||||
promPruneSecs *prometheus.HistogramVec
|
||||
}
|
||||
|
||||
func checkContainsKeep1(rules []pruning.KeepRule) error {
|
||||
if len(rules) == 0 {
|
||||
return nil //No keep rules means keep all - ok
|
||||
}
|
||||
for _, e := range rules {
|
||||
switch e.(type) {
|
||||
case *pruning.KeepLastN:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return errors.New("sender keep rules must contain last_n or be empty so that the last snapshot is definitely kept")
|
||||
}
|
||||
|
||||
func NewPrunerFactory(in config.PruningSenderReceiver, promPruneSecs *prometheus.HistogramVec) (*PrunerFactory, error) {
|
||||
keepRulesReceiver, err := pruning.RulesFromConfig(in.KeepReceiver)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "cannot build receiver pruning rules")
|
||||
}
|
||||
|
||||
keepRulesSender, err := pruning.RulesFromConfig(in.KeepSender)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "cannot build sender pruning rules")
|
||||
}
|
||||
|
||||
considerSnapAtCursorReplicated := false
|
||||
for _, r := range in.KeepSender {
|
||||
knr, ok := r.Ret.(*config.PruneKeepNotReplicated)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
considerSnapAtCursorReplicated = considerSnapAtCursorReplicated || !knr.KeepSnapshotAtCursor
|
||||
}
|
||||
f := &PrunerFactory{
|
||||
keepRulesSender,
|
||||
keepRulesReceiver,
|
||||
10 * time.Second, //FIXME constant
|
||||
considerSnapAtCursorReplicated,
|
||||
promPruneSecs,
|
||||
}
|
||||
return f, nil
|
||||
}
|
||||
|
||||
func (f *PrunerFactory) BuildSenderPruner(ctx context.Context, target Target, receiver History) *Pruner {
|
||||
p := &Pruner{
|
||||
args: args{
|
||||
WithLogger(ctx, GetLogger(ctx).WithField("prune_side", "sender")),
|
||||
target,
|
||||
receiver,
|
||||
f.senderRules,
|
||||
f.retryWait,
|
||||
f.considerSnapAtCursorReplicated,
|
||||
f.promPruneSecs.WithLabelValues("sender"),
|
||||
},
|
||||
state: Plan,
|
||||
}
|
||||
return p
|
||||
}
|
||||
|
||||
func (f *PrunerFactory) BuildReceiverPruner(ctx context.Context, target Target, receiver History) *Pruner {
|
||||
p := &Pruner{
|
||||
args: args{
|
||||
WithLogger(ctx, GetLogger(ctx).WithField("prune_side", "receiver")),
|
||||
target,
|
||||
receiver,
|
||||
f.receiverRules,
|
||||
f.retryWait,
|
||||
false, // senseless here anyways
|
||||
f.promPruneSecs.WithLabelValues("receiver"),
|
||||
},
|
||||
state: Plan,
|
||||
}
|
||||
return p
|
||||
}
|
||||
|
||||
//go:generate enumer -type=State
|
||||
type State int
|
||||
|
||||
const (
|
||||
Plan State = 1 << iota
|
||||
PlanWait
|
||||
Exec
|
||||
ExecWait
|
||||
ErrPerm
|
||||
Done
|
||||
)
|
||||
|
||||
func (s State) statefunc() state {
|
||||
var statemap = map[State]state{
|
||||
Plan: statePlan,
|
||||
PlanWait: statePlanWait,
|
||||
Exec: stateExec,
|
||||
ExecWait: stateExecWait,
|
||||
ErrPerm: nil,
|
||||
Done: nil,
|
||||
}
|
||||
return statemap[s]
|
||||
}
|
||||
|
||||
type updater func(func(*Pruner)) State
|
||||
type state func(args *args, u updater) state
|
||||
|
||||
func (p *Pruner) Prune() {
|
||||
p.prune(p.args)
|
||||
}
|
||||
|
||||
func (p *Pruner) prune(args args) {
|
||||
s := p.state.statefunc()
|
||||
for s != nil {
|
||||
pre := p.state
|
||||
s = s(&args, func(f func(*Pruner)) State {
|
||||
p.mtx.Lock()
|
||||
defer p.mtx.Unlock()
|
||||
f(p)
|
||||
return p.state
|
||||
})
|
||||
post := p.state
|
||||
GetLogger(args.ctx).
|
||||
WithField("transition", fmt.Sprintf("%s=>%s", pre, post)).
|
||||
Debug("state transition")
|
||||
}
|
||||
}
|
||||
|
||||
type Report struct {
|
||||
State string
|
||||
SleepUntil time.Time
|
||||
Error string
|
||||
Pending, Completed []FSReport
|
||||
}
|
||||
|
||||
type FSReport struct {
|
||||
Filesystem string
|
||||
SnapshotList, DestroyList []SnapshotReport
|
||||
Error string
|
||||
}
|
||||
|
||||
type SnapshotReport struct {
|
||||
Name string
|
||||
Replicated bool
|
||||
Date time.Time
|
||||
}
|
||||
|
||||
func (p *Pruner) Report() *Report {
|
||||
p.mtx.Lock()
|
||||
defer p.mtx.Unlock()
|
||||
|
||||
r := Report{State: p.state.String()}
|
||||
|
||||
if p.state & PlanWait|ExecWait != 0 {
|
||||
r.SleepUntil = p.sleepUntil
|
||||
}
|
||||
if p.state & PlanWait|ExecWait|ErrPerm != 0 {
|
||||
if p.err != nil {
|
||||
r.Error = p.err.Error()
|
||||
}
|
||||
}
|
||||
|
||||
if p.state & Plan|PlanWait == 0 {
|
||||
return &r
|
||||
}
|
||||
|
||||
r.Pending = make([]FSReport, len(p.prunePending))
|
||||
for i, fs := range p.prunePending{
|
||||
r.Pending[i] = fs.Report()
|
||||
}
|
||||
r.Completed = make([]FSReport, len(p.pruneCompleted))
|
||||
for i, fs := range p.pruneCompleted{
|
||||
r.Completed[i] = fs.Report()
|
||||
}
|
||||
|
||||
return &r
|
||||
}
|
||||
|
||||
type fs struct {
|
||||
path string
|
||||
|
||||
// snapshots presented by target
|
||||
// (type snapshot)
|
||||
snaps []pruning.Snapshot
|
||||
// destroy list returned by pruning.PruneSnapshots(snaps)
|
||||
// (type snapshot)
|
||||
destroyList []pruning.Snapshot
|
||||
|
||||
mtx sync.RWMutex
|
||||
// for Plan
|
||||
err error
|
||||
}
|
||||
|
||||
func (f *fs) Update(err error) {
|
||||
f.mtx.Lock()
|
||||
defer f.mtx.Unlock()
|
||||
f.err = err
|
||||
}
|
||||
|
||||
func (f *fs) Report() FSReport {
|
||||
f.mtx.Lock()
|
||||
defer f.mtx.Unlock()
|
||||
|
||||
r := FSReport{}
|
||||
r.Filesystem = f.path
|
||||
if f.err != nil {
|
||||
r.Error = f.err.Error()
|
||||
}
|
||||
|
||||
r.SnapshotList = make([]SnapshotReport, len(f.snaps))
|
||||
for i, snap := range f.snaps {
|
||||
r.SnapshotList[i] = snap.(snapshot).Report()
|
||||
}
|
||||
|
||||
r.DestroyList = make([]SnapshotReport, len(f.destroyList))
|
||||
for i, snap := range f.destroyList{
|
||||
r.DestroyList[i] = snap.(snapshot).Report()
|
||||
}
|
||||
|
||||
return r
|
||||
}
|
||||
|
||||
type snapshot struct {
|
||||
replicated bool
|
||||
date time.Time
|
||||
fsv *pdu.FilesystemVersion
|
||||
}
|
||||
|
||||
func (s snapshot) Report() SnapshotReport {
|
||||
return SnapshotReport{
|
||||
Name: s.Name(),
|
||||
Replicated: s.Replicated(),
|
||||
Date: s.Date(),
|
||||
}
|
||||
}
|
||||
|
||||
var _ pruning.Snapshot = snapshot{}
|
||||
|
||||
func (s snapshot) Name() string { return s.fsv.Name }
|
||||
|
||||
func (s snapshot) Replicated() bool { return s.replicated }
|
||||
|
||||
func (s snapshot) Date() time.Time { return s.date }
|
||||
|
||||
func shouldRetry(e error) bool {
|
||||
switch e.(type) {
|
||||
case nil:
|
||||
return true
|
||||
case net.Error:
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func onErr(u updater, e error) state {
|
||||
return u(func(p *Pruner) {
|
||||
p.err = e
|
||||
if !shouldRetry(e) {
|
||||
p.state = ErrPerm
|
||||
return
|
||||
}
|
||||
switch p.state {
|
||||
case Plan:
|
||||
p.state = PlanWait
|
||||
case Exec:
|
||||
p.state = ExecWait
|
||||
default:
|
||||
panic(p.state)
|
||||
}
|
||||
}).statefunc()
|
||||
}
|
||||
|
||||
func statePlan(a *args, u updater) state {
|
||||
|
||||
ctx, target, receiver := a.ctx, a.target, a.receiver
|
||||
|
||||
tfss, err := target.ListFilesystems(ctx)
|
||||
if err != nil {
|
||||
return onErr(u, err)
|
||||
}
|
||||
|
||||
pfss := make([]*fs, len(tfss))
|
||||
fsloop:
|
||||
for i, tfs := range tfss {
|
||||
|
||||
l := GetLogger(ctx).WithField("fs", tfs.Path)
|
||||
l.Debug("plan filesystem")
|
||||
|
||||
|
||||
pfs := &fs{
|
||||
path: tfs.Path,
|
||||
}
|
||||
pfss[i] = pfs
|
||||
|
||||
tfsvs, err := target.ListFilesystemVersions(ctx, tfs.Path)
|
||||
if err != nil {
|
||||
l.WithError(err).Error("cannot list filesystem versions")
|
||||
if shouldRetry(err) {
|
||||
return onErr(u, err)
|
||||
}
|
||||
pfs.err = err
|
||||
continue fsloop
|
||||
}
|
||||
pfs.snaps = make([]pruning.Snapshot, 0, len(tfsvs))
|
||||
|
||||
rcReq := &pdu.ReplicationCursorReq{
|
||||
Filesystem: tfs.Path,
|
||||
Op: &pdu.ReplicationCursorReq_Get{
|
||||
Get: &pdu.ReplicationCursorReq_GetOp{},
|
||||
},
|
||||
}
|
||||
rc, err := receiver.ReplicationCursor(ctx, rcReq)
|
||||
if err != nil {
|
||||
l.WithError(err).Error("cannot get replication cursor")
|
||||
if shouldRetry(err) {
|
||||
return onErr(u, err)
|
||||
}
|
||||
pfs.err = err
|
||||
continue fsloop
|
||||
}
|
||||
if rc.GetError() != "" {
|
||||
l.WithField("reqErr", rc.GetError()).Error("cannot get replication cursor")
|
||||
pfs.err = fmt.Errorf("%s", rc.GetError())
|
||||
continue fsloop
|
||||
}
|
||||
|
||||
|
||||
// scan from older to newer, all snapshots older than cursor are interpreted as replicated
|
||||
sort.Slice(tfsvs, func(i, j int) bool {
|
||||
return tfsvs[i].CreateTXG < tfsvs[j].CreateTXG
|
||||
})
|
||||
|
||||
haveCursorSnapshot := false
|
||||
for _, tfsv := range tfsvs {
|
||||
if tfsv.Type != pdu.FilesystemVersion_Snapshot {
|
||||
continue
|
||||
}
|
||||
if tfsv.Guid == rc.GetGuid() {
|
||||
haveCursorSnapshot = true
|
||||
}
|
||||
}
|
||||
preCursor := haveCursorSnapshot
|
||||
for _, tfsv := range tfsvs {
|
||||
if tfsv.Type != pdu.FilesystemVersion_Snapshot {
|
||||
continue
|
||||
}
|
||||
creation, err := tfsv.CreationAsTime()
|
||||
if err != nil {
|
||||
pfs.err = fmt.Errorf("%s%s has invalid creation date: %s", tfs, tfsv.RelName(), err)
|
||||
l.WithError(pfs.err).Error("")
|
||||
continue fsloop
|
||||
}
|
||||
// note that we cannot use CreateTXG because target and receiver could be on different pools
|
||||
atCursor := tfsv.Guid == rc.GetGuid()
|
||||
preCursor = preCursor && !atCursor
|
||||
pfs.snaps = append(pfs.snaps, snapshot{
|
||||
replicated: preCursor || (a.considerSnapAtCursorReplicated && atCursor),
|
||||
date: creation,
|
||||
fsv: tfsv,
|
||||
})
|
||||
}
|
||||
if preCursor {
|
||||
pfs.err = fmt.Errorf("replication cursor not found in prune target filesystem versions")
|
||||
l.WithError(pfs.err).Error("")
|
||||
continue fsloop
|
||||
}
|
||||
|
||||
// Apply prune rules
|
||||
pfs.destroyList = pruning.PruneSnapshots(pfs.snaps, a.rules)
|
||||
|
||||
}
|
||||
|
||||
return u(func(pruner *Pruner) {
|
||||
for _, pfs := range pfss {
|
||||
if pfs.err != nil {
|
||||
pruner.pruneCompleted = append(pruner.pruneCompleted, pfs)
|
||||
} else {
|
||||
pruner.prunePending = append(pruner.prunePending, pfs)
|
||||
}
|
||||
}
|
||||
pruner.state = Exec
|
||||
}).statefunc()
|
||||
}
|
||||
|
||||
func stateExec(a *args, u updater) state {
|
||||
|
||||
var pfs *fs
|
||||
state := u(func(pruner *Pruner) {
|
||||
if len(pruner.prunePending) == 0 {
|
||||
nextState := Done
|
||||
for _, pfs := range pruner.pruneCompleted {
|
||||
if pfs.err != nil {
|
||||
nextState = ErrPerm
|
||||
}
|
||||
}
|
||||
pruner.state = nextState
|
||||
return
|
||||
}
|
||||
pfs = pruner.prunePending[0]
|
||||
})
|
||||
if state != Exec {
|
||||
return state.statefunc()
|
||||
}
|
||||
|
||||
destroyList := make([]*pdu.FilesystemVersion, len(pfs.destroyList))
|
||||
for i := range destroyList {
|
||||
destroyList[i] = pfs.destroyList[i].(snapshot).fsv
|
||||
GetLogger(a.ctx).
|
||||
WithField("fs", pfs.path).
|
||||
WithField("destroy_snap", destroyList[i].Name).
|
||||
Debug("policy destroys snapshot")
|
||||
}
|
||||
pfs.Update(nil)
|
||||
req := pdu.DestroySnapshotsReq{
|
||||
Filesystem: pfs.path,
|
||||
Snapshots: destroyList,
|
||||
}
|
||||
_, err := a.target.DestroySnapshots(a.ctx, &req)
|
||||
pfs.Update(err)
|
||||
if err != nil && shouldRetry(err) {
|
||||
return onErr(u, err)
|
||||
}
|
||||
// if it's not retryable, treat is like as being done
|
||||
|
||||
return u(func(pruner *Pruner) {
|
||||
pruner.pruneCompleted = append(pruner.pruneCompleted, pfs)
|
||||
pruner.prunePending = pruner.prunePending[1:]
|
||||
}).statefunc()
|
||||
}
|
||||
|
||||
func stateExecWait(a *args, u updater) state {
|
||||
return doWait(Exec, a, u)
|
||||
}
|
||||
|
||||
func statePlanWait(a *args, u updater) state {
|
||||
return doWait(Plan, a, u)
|
||||
}
|
||||
|
||||
func doWait(goback State, a *args, u updater) state {
|
||||
timer := time.NewTimer(a.retryWait)
|
||||
defer timer.Stop()
|
||||
select {
|
||||
case <-timer.C:
|
||||
return u(func(pruner *Pruner) {
|
||||
pruner.state = goback
|
||||
}).statefunc()
|
||||
case <-a.ctx.Done():
|
||||
return onErr(u, a.ctx.Err())
|
||||
}
|
||||
}
|
211
daemon/pruner/pruner_test.go
Normal file
211
daemon/pruner/pruner_test.go
Normal file
@ -0,0 +1,211 @@
|
||||
package pruner
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/zrepl/zrepl/logger"
|
||||
"github.com/zrepl/zrepl/pruning"
|
||||
"github.com/zrepl/zrepl/replication/pdu"
|
||||
"net"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
type mockFS struct {
|
||||
path string
|
||||
snaps []string
|
||||
}
|
||||
|
||||
func (m *mockFS) Filesystem() *pdu.Filesystem {
|
||||
return &pdu.Filesystem{
|
||||
Path: m.path,
|
||||
}
|
||||
}
|
||||
|
||||
func (m *mockFS) FilesystemVersions() []*pdu.FilesystemVersion {
|
||||
versions := make([]*pdu.FilesystemVersion, len(m.snaps))
|
||||
for i, v := range m.snaps {
|
||||
versions[i] = &pdu.FilesystemVersion{
|
||||
Type: pdu.FilesystemVersion_Snapshot,
|
||||
Name: v,
|
||||
Creation: pdu.FilesystemVersionCreation(time.Unix(0, 0)),
|
||||
Guid: uint64(i),
|
||||
}
|
||||
}
|
||||
return versions
|
||||
}
|
||||
|
||||
type mockTarget struct {
|
||||
fss []mockFS
|
||||
destroyed map[string][]string
|
||||
listVersionsErrs map[string][]error
|
||||
listFilesystemsErr []error
|
||||
destroyErrs map[string][]error
|
||||
}
|
||||
|
||||
func (t *mockTarget) ListFilesystems(ctx context.Context) ([]*pdu.Filesystem, error) {
|
||||
if len(t.listFilesystemsErr) > 0 {
|
||||
e := t.listFilesystemsErr[0]
|
||||
t.listFilesystemsErr = t.listFilesystemsErr[1:]
|
||||
return nil, e
|
||||
}
|
||||
fss := make([]*pdu.Filesystem, len(t.fss))
|
||||
for i := range fss {
|
||||
fss[i] = t.fss[i].Filesystem()
|
||||
}
|
||||
return fss, nil
|
||||
}
|
||||
|
||||
func (t *mockTarget) ListFilesystemVersions(ctx context.Context, fs string) ([]*pdu.FilesystemVersion, error) {
|
||||
if len(t.listVersionsErrs[fs]) != 0 {
|
||||
e := t.listVersionsErrs[fs][0]
|
||||
t.listVersionsErrs[fs] = t.listVersionsErrs[fs][1:]
|
||||
return nil, e
|
||||
}
|
||||
|
||||
for _, mfs := range t.fss {
|
||||
if mfs.path != fs {
|
||||
continue
|
||||
}
|
||||
return mfs.FilesystemVersions(), nil
|
||||
}
|
||||
return nil, fmt.Errorf("filesystem %s does not exist", fs)
|
||||
}
|
||||
|
||||
func (t *mockTarget) DestroySnapshots(ctx context.Context, req *pdu.DestroySnapshotsReq) (*pdu.DestroySnapshotsRes, error) {
|
||||
fs, snaps := req.Filesystem, req.Snapshots
|
||||
if len(t.destroyErrs[fs]) != 0 {
|
||||
e := t.destroyErrs[fs][0]
|
||||
t.destroyErrs[fs] = t.destroyErrs[fs][1:]
|
||||
return nil, e
|
||||
}
|
||||
destroyed := t.destroyed[fs]
|
||||
res := make([]*pdu.DestroySnapshotRes, len(snaps))
|
||||
for i, s := range snaps {
|
||||
destroyed = append(destroyed, s.Name)
|
||||
res[i] = &pdu.DestroySnapshotRes{Error: "", Snapshot: s}
|
||||
}
|
||||
t.destroyed[fs] = destroyed
|
||||
return &pdu.DestroySnapshotsRes{Results: res}, nil
|
||||
}
|
||||
|
||||
type mockCursor struct {
|
||||
snapname string
|
||||
guid uint64
|
||||
}
|
||||
type mockHistory struct {
|
||||
errs map[string][]error
|
||||
cursors map[string]*mockCursor
|
||||
}
|
||||
|
||||
func (r *mockHistory) ReplicationCursor(ctx context.Context, req *pdu.ReplicationCursorReq) (*pdu.ReplicationCursorRes, error) {
|
||||
fs := req.Filesystem
|
||||
if len(r.errs[fs]) > 0 {
|
||||
e := r.errs[fs][0]
|
||||
r.errs[fs] = r.errs[fs][1:]
|
||||
return nil, e
|
||||
}
|
||||
return &pdu.ReplicationCursorRes{Result: &pdu.ReplicationCursorRes_Guid{Guid: 0}}, nil
|
||||
}
|
||||
|
||||
type stubNetErr struct {
|
||||
msg string
|
||||
temporary, timeout bool
|
||||
}
|
||||
|
||||
var _ net.Error = stubNetErr{}
|
||||
|
||||
func (e stubNetErr) Error() string {
|
||||
return e.msg
|
||||
}
|
||||
|
||||
func (e stubNetErr) Temporary() bool { return e.temporary }
|
||||
|
||||
func (e stubNetErr) Timeout() bool { return e.timeout }
|
||||
|
||||
func TestPruner_Prune(t *testing.T) {
|
||||
|
||||
var _ net.Error = &net.OpError{} // we use it below
|
||||
target := &mockTarget{
|
||||
listFilesystemsErr: []error{
|
||||
stubNetErr{msg: "fakerror0"},
|
||||
},
|
||||
listVersionsErrs: map[string][]error{
|
||||
"zroot/foo": {
|
||||
stubNetErr{msg: "fakeerror1"}, // should be classified as temporaty
|
||||
stubNetErr{msg: "fakeerror2"},
|
||||
},
|
||||
},
|
||||
destroyErrs: map[string][]error{
|
||||
"zroot/foo": {
|
||||
fmt.Errorf("permanent error"),
|
||||
},
|
||||
"zroot/bar": {
|
||||
stubNetErr{msg: "fakeerror3"},
|
||||
},
|
||||
},
|
||||
destroyed: make(map[string][]string),
|
||||
fss: []mockFS{
|
||||
{
|
||||
path: "zroot/foo",
|
||||
snaps: []string{
|
||||
"keep_a",
|
||||
"keep_b",
|
||||
"drop_c",
|
||||
"keep_d",
|
||||
},
|
||||
},
|
||||
{
|
||||
path: "zroot/bar",
|
||||
snaps: []string{
|
||||
"keep_e",
|
||||
"keep_f",
|
||||
"drop_g",
|
||||
},
|
||||
},
|
||||
{
|
||||
path: "zroot/baz",
|
||||
snaps: []string{
|
||||
"keep_h",
|
||||
"drop_i",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
history := &mockHistory{
|
||||
errs: map[string][]error{
|
||||
"zroot/foo": {
|
||||
stubNetErr{msg: "fakeerror4"},
|
||||
},
|
||||
"zroot/baz": {
|
||||
fmt.Errorf("permanent error2"),
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
keepRules := []pruning.KeepRule{pruning.MustKeepRegex("^keep")}
|
||||
|
||||
p := Pruner{
|
||||
args: args{
|
||||
ctx: WithLogger(context.Background(), logger.NewTestLogger(t)),
|
||||
target: target,
|
||||
receiver: history,
|
||||
rules: keepRules,
|
||||
retryWait: 10*time.Millisecond,
|
||||
},
|
||||
state: Plan,
|
||||
}
|
||||
p.Prune()
|
||||
|
||||
exp := map[string][]string{
|
||||
"zroot/bar": {"drop_g"},
|
||||
// drop_c is prohibited by failing destroy
|
||||
// drop_i is prohibiteed by failing ReplicationCursor call
|
||||
}
|
||||
|
||||
assert.Equal(t, exp, target.destroyed)
|
||||
|
||||
//assert.Equal(t, map[string][]error{}, target.listVersionsErrs, "retried")
|
||||
|
||||
}
|
76
daemon/pruner/state_enumer.go
Normal file
76
daemon/pruner/state_enumer.go
Normal file
@ -0,0 +1,76 @@
|
||||
// Code generated by "enumer -type=State"; DO NOT EDIT.
|
||||
|
||||
package pruner
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
const (
|
||||
_StateName_0 = "PlanPlanWait"
|
||||
_StateName_1 = "Exec"
|
||||
_StateName_2 = "ExecWait"
|
||||
_StateName_3 = "ErrPerm"
|
||||
_StateName_4 = "Done"
|
||||
)
|
||||
|
||||
var (
|
||||
_StateIndex_0 = [...]uint8{0, 4, 12}
|
||||
_StateIndex_1 = [...]uint8{0, 4}
|
||||
_StateIndex_2 = [...]uint8{0, 8}
|
||||
_StateIndex_3 = [...]uint8{0, 7}
|
||||
_StateIndex_4 = [...]uint8{0, 4}
|
||||
)
|
||||
|
||||
func (i State) String() string {
|
||||
switch {
|
||||
case 1 <= i && i <= 2:
|
||||
i -= 1
|
||||
return _StateName_0[_StateIndex_0[i]:_StateIndex_0[i+1]]
|
||||
case i == 4:
|
||||
return _StateName_1
|
||||
case i == 8:
|
||||
return _StateName_2
|
||||
case i == 16:
|
||||
return _StateName_3
|
||||
case i == 32:
|
||||
return _StateName_4
|
||||
default:
|
||||
return fmt.Sprintf("State(%d)", i)
|
||||
}
|
||||
}
|
||||
|
||||
var _StateValues = []State{1, 2, 4, 8, 16, 32}
|
||||
|
||||
var _StateNameToValueMap = map[string]State{
|
||||
_StateName_0[0:4]: 1,
|
||||
_StateName_0[4:12]: 2,
|
||||
_StateName_1[0:4]: 4,
|
||||
_StateName_2[0:8]: 8,
|
||||
_StateName_3[0:7]: 16,
|
||||
_StateName_4[0:4]: 32,
|
||||
}
|
||||
|
||||
// StateString retrieves an enum value from the enum constants string name.
|
||||
// Throws an error if the param is not part of the enum.
|
||||
func StateString(s string) (State, error) {
|
||||
if val, ok := _StateNameToValueMap[s]; ok {
|
||||
return val, nil
|
||||
}
|
||||
return 0, fmt.Errorf("%s does not belong to State values", s)
|
||||
}
|
||||
|
||||
// StateValues returns all values of the enum
|
||||
func StateValues() []State {
|
||||
return _StateValues
|
||||
}
|
||||
|
||||
// IsAState returns "true" if the value is listed in the enum definition. "false" otherwise
|
||||
func (i State) IsAState() bool {
|
||||
for _, v := range _StateValues {
|
||||
if i == v {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
369
daemon/snapper/snapper.go
Normal file
369
daemon/snapper/snapper.go
Normal file
@ -0,0 +1,369 @@
|
||||
package snapper
|
||||
|
||||
import (
|
||||
"github.com/zrepl/zrepl/config"
|
||||
"github.com/pkg/errors"
|
||||
"time"
|
||||
"context"
|
||||
"github.com/zrepl/zrepl/daemon/filters"
|
||||
"fmt"
|
||||
"github.com/zrepl/zrepl/zfs"
|
||||
"sort"
|
||||
"github.com/zrepl/zrepl/logger"
|
||||
"sync"
|
||||
)
|
||||
|
||||
|
||||
//go:generate stringer -type=SnapState
|
||||
type SnapState uint
|
||||
|
||||
const (
|
||||
SnapPending SnapState = 1 << iota
|
||||
SnapStarted
|
||||
SnapDone
|
||||
SnapError
|
||||
)
|
||||
|
||||
type snapProgress struct {
|
||||
state SnapState
|
||||
|
||||
// SnapStarted, SnapDone, SnapError
|
||||
name string
|
||||
startAt time.Time
|
||||
|
||||
// SnapDone
|
||||
doneAt time.Time
|
||||
|
||||
// SnapErr
|
||||
err error
|
||||
}
|
||||
|
||||
type args struct {
|
||||
ctx context.Context
|
||||
log Logger
|
||||
prefix string
|
||||
interval time.Duration
|
||||
fsf *filters.DatasetMapFilter
|
||||
snapshotsTaken chan<-struct{}
|
||||
}
|
||||
|
||||
type Snapper struct {
|
||||
args args
|
||||
|
||||
mtx sync.Mutex
|
||||
state State
|
||||
|
||||
// set in state Plan, used in Waiting
|
||||
lastInvocation time.Time
|
||||
|
||||
// valid for state Snapshotting
|
||||
plan map[*zfs.DatasetPath]snapProgress
|
||||
|
||||
// valid for state SyncUp and Waiting
|
||||
sleepUntil time.Time
|
||||
|
||||
// valid for state Err
|
||||
err error
|
||||
}
|
||||
|
||||
//go:generate stringer -type=State
|
||||
type State uint
|
||||
|
||||
const (
|
||||
SyncUp State = 1<<iota
|
||||
Planning
|
||||
Snapshotting
|
||||
Waiting
|
||||
ErrorWait
|
||||
Stopped
|
||||
)
|
||||
|
||||
func (s State) sf() state {
|
||||
m := map[State]state{
|
||||
SyncUp: syncUp,
|
||||
Planning: plan,
|
||||
Snapshotting: snapshot,
|
||||
Waiting: wait,
|
||||
ErrorWait: wait,
|
||||
Stopped: nil,
|
||||
}
|
||||
return m[s]
|
||||
}
|
||||
|
||||
type updater func(u func(*Snapper)) State
|
||||
type state func(a args, u updater) state
|
||||
|
||||
type contextKey int
|
||||
|
||||
const (
|
||||
contextKeyLog contextKey = 0
|
||||
)
|
||||
|
||||
type Logger = logger.Logger
|
||||
|
||||
func WithLogger(ctx context.Context, log Logger) context.Context {
|
||||
return context.WithValue(ctx, contextKeyLog, log)
|
||||
}
|
||||
|
||||
func getLogger(ctx context.Context) Logger {
|
||||
if log, ok := ctx.Value(contextKeyLog).(Logger); ok {
|
||||
return log
|
||||
}
|
||||
return logger.NewNullLogger()
|
||||
}
|
||||
|
||||
func PeriodicFromConfig(g *config.Global, fsf *filters.DatasetMapFilter, in *config.SnapshottingPeriodic) (*Snapper, error) {
|
||||
if in.Prefix == "" {
|
||||
return nil, errors.New("prefix must not be empty")
|
||||
}
|
||||
if in.Interval <= 0 {
|
||||
return nil, errors.New("interval must be positive")
|
||||
}
|
||||
|
||||
args := args{
|
||||
prefix: in.Prefix,
|
||||
interval: in.Interval,
|
||||
fsf: fsf,
|
||||
// ctx and log is set in Run()
|
||||
}
|
||||
|
||||
return &Snapper{state: SyncUp, args: args}, nil
|
||||
}
|
||||
|
||||
func (s *Snapper) Run(ctx context.Context, snapshotsTaken chan<- struct{}) {
|
||||
|
||||
getLogger(ctx).Debug("start")
|
||||
defer getLogger(ctx).Debug("stop")
|
||||
|
||||
s.args.snapshotsTaken = snapshotsTaken
|
||||
s.args.ctx = ctx
|
||||
s.args.log = getLogger(ctx)
|
||||
|
||||
u := func(u func(*Snapper)) State {
|
||||
s.mtx.Lock()
|
||||
defer s.mtx.Unlock()
|
||||
if u != nil {
|
||||
u(s)
|
||||
}
|
||||
return s.state
|
||||
}
|
||||
|
||||
var st state = syncUp
|
||||
|
||||
for st != nil {
|
||||
pre := u(nil)
|
||||
st = st(s.args, u)
|
||||
post := u(nil)
|
||||
getLogger(ctx).
|
||||
WithField("transition", fmt.Sprintf("%s=>%s", pre, post)).
|
||||
Debug("state transition")
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func onErr(err error, u updater) state {
|
||||
return u(func(s *Snapper) {
|
||||
s.err = err
|
||||
s.state = ErrorWait
|
||||
}).sf()
|
||||
}
|
||||
|
||||
func onMainCtxDone(ctx context.Context, u updater) state {
|
||||
return u(func(s *Snapper) {
|
||||
s.err = ctx.Err()
|
||||
s.state = Stopped
|
||||
}).sf()
|
||||
}
|
||||
|
||||
func syncUp(a args, u updater) state {
|
||||
fss, err := listFSes(a.fsf)
|
||||
if err != nil {
|
||||
return onErr(err, u)
|
||||
}
|
||||
syncPoint, err := findSyncPoint(a.log, fss, a.prefix, a.interval)
|
||||
if err != nil {
|
||||
return onErr(err, u)
|
||||
}
|
||||
u(func(s *Snapper){
|
||||
s.sleepUntil = syncPoint
|
||||
})
|
||||
t := time.NewTimer(syncPoint.Sub(time.Now()))
|
||||
defer t.Stop()
|
||||
select {
|
||||
case <-t.C:
|
||||
return u(func(s *Snapper) {
|
||||
s.state = Planning
|
||||
}).sf()
|
||||
case <-a.ctx.Done():
|
||||
return onMainCtxDone(a.ctx, u)
|
||||
}
|
||||
}
|
||||
|
||||
func plan(a args, u updater) state {
|
||||
u(func(snapper *Snapper) {
|
||||
snapper.lastInvocation = time.Now()
|
||||
})
|
||||
fss, err := listFSes(a.fsf)
|
||||
if err != nil {
|
||||
return onErr(err, u)
|
||||
}
|
||||
|
||||
plan := make(map[*zfs.DatasetPath]snapProgress, len(fss))
|
||||
for _, fs := range fss {
|
||||
plan[fs] = snapProgress{state: SnapPending}
|
||||
}
|
||||
return u(func(s *Snapper) {
|
||||
s.state = Snapshotting
|
||||
s.plan = plan
|
||||
}).sf()
|
||||
}
|
||||
|
||||
func snapshot(a args, u updater) state {
|
||||
|
||||
var plan map[*zfs.DatasetPath]snapProgress
|
||||
u(func(snapper *Snapper) {
|
||||
plan = snapper.plan
|
||||
})
|
||||
|
||||
hadErr := false
|
||||
// TODO channel programs -> allow a little jitter?
|
||||
for fs, progress := range plan {
|
||||
suffix := time.Now().In(time.UTC).Format("20060102_150405_000")
|
||||
snapname := fmt.Sprintf("%s%s", a.prefix, suffix)
|
||||
|
||||
l := a.log.
|
||||
WithField("fs", fs.ToString()).
|
||||
WithField("snap", snapname)
|
||||
|
||||
u(func(snapper *Snapper) {
|
||||
progress.name = snapname
|
||||
progress.startAt = time.Now()
|
||||
progress.state = SnapStarted
|
||||
})
|
||||
|
||||
l.Debug("create snapshot")
|
||||
err := zfs.ZFSSnapshot(fs, snapname, false)
|
||||
if err != nil {
|
||||
hadErr = true
|
||||
l.WithError(err).Error("cannot create snapshot")
|
||||
}
|
||||
doneAt := time.Now()
|
||||
|
||||
u(func(snapper *Snapper) {
|
||||
progress.doneAt = doneAt
|
||||
progress.state = SnapDone
|
||||
if err != nil {
|
||||
progress.state = SnapError
|
||||
progress.err = err
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
select {
|
||||
case a.snapshotsTaken <- struct{}{}:
|
||||
default:
|
||||
if a.snapshotsTaken != nil {
|
||||
a.log.Warn("callback channel is full, discarding snapshot update event")
|
||||
}
|
||||
}
|
||||
|
||||
return u(func(snapper *Snapper) {
|
||||
if hadErr {
|
||||
snapper.state = ErrorWait
|
||||
snapper.err = errors.New("one or more snapshots could not be created, check logs for details")
|
||||
} else {
|
||||
snapper.state = Waiting
|
||||
}
|
||||
}).sf()
|
||||
}
|
||||
|
||||
func wait(a args, u updater) state {
|
||||
var sleepUntil time.Time
|
||||
u(func(snapper *Snapper) {
|
||||
lastTick := snapper.lastInvocation
|
||||
snapper.sleepUntil = lastTick.Add(a.interval)
|
||||
sleepUntil = snapper.sleepUntil
|
||||
})
|
||||
|
||||
t := time.NewTimer(sleepUntil.Sub(time.Now()))
|
||||
defer t.Stop()
|
||||
|
||||
select {
|
||||
case <-t.C:
|
||||
return u(func(snapper *Snapper) {
|
||||
snapper.state = Planning
|
||||
}).sf()
|
||||
case <-a.ctx.Done():
|
||||
return onMainCtxDone(a.ctx, u)
|
||||
}
|
||||
}
|
||||
|
||||
func listFSes(mf *filters.DatasetMapFilter) (fss []*zfs.DatasetPath, err error) {
|
||||
return zfs.ZFSListMapping(mf)
|
||||
}
|
||||
|
||||
func findSyncPoint(log Logger, fss []*zfs.DatasetPath, prefix string, interval time.Duration) (syncPoint time.Time, err error) {
|
||||
type snapTime struct {
|
||||
ds *zfs.DatasetPath
|
||||
time time.Time
|
||||
}
|
||||
|
||||
if len(fss) == 0 {
|
||||
return time.Now(), nil
|
||||
}
|
||||
|
||||
snaptimes := make([]snapTime, 0, len(fss))
|
||||
|
||||
now := time.Now()
|
||||
|
||||
log.Debug("examine filesystem state")
|
||||
for _, d := range fss {
|
||||
|
||||
l := log.WithField("fs", d.ToString())
|
||||
|
||||
fsvs, err := zfs.ZFSListFilesystemVersions(d, filters.NewTypedPrefixFilter(prefix, zfs.Snapshot))
|
||||
if err != nil {
|
||||
l.WithError(err).Error("cannot list filesystem versions")
|
||||
continue
|
||||
}
|
||||
if len(fsvs) <= 0 {
|
||||
l.WithField("prefix", prefix).Debug("no filesystem versions with prefix")
|
||||
continue
|
||||
}
|
||||
|
||||
// Sort versions by creation
|
||||
sort.SliceStable(fsvs, func(i, j int) bool {
|
||||
return fsvs[i].CreateTXG < fsvs[j].CreateTXG
|
||||
})
|
||||
|
||||
latest := fsvs[len(fsvs)-1]
|
||||
l.WithField("creation", latest.Creation).
|
||||
Debug("found latest snapshot")
|
||||
|
||||
since := now.Sub(latest.Creation)
|
||||
if since < 0 {
|
||||
l.WithField("snapshot", latest.Name).
|
||||
WithField("creation", latest.Creation).
|
||||
Error("snapshot is from the future")
|
||||
continue
|
||||
}
|
||||
next := now
|
||||
if since < interval {
|
||||
next = latest.Creation.Add(interval)
|
||||
}
|
||||
snaptimes = append(snaptimes, snapTime{d, next})
|
||||
}
|
||||
|
||||
if len(snaptimes) == 0 {
|
||||
snaptimes = append(snaptimes, snapTime{nil, now})
|
||||
}
|
||||
|
||||
sort.Slice(snaptimes, func(i, j int) bool {
|
||||
return snaptimes[i].time.Before(snaptimes[j].time)
|
||||
})
|
||||
|
||||
return snaptimes[0].time, nil
|
||||
|
||||
}
|
||||
|
39
daemon/snapper/snapper_all.go
Normal file
39
daemon/snapper/snapper_all.go
Normal file
@ -0,0 +1,39 @@
|
||||
package snapper
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"github.com/zrepl/zrepl/config"
|
||||
"github.com/zrepl/zrepl/daemon/filters"
|
||||
)
|
||||
|
||||
// FIXME: properly abstract snapshotting:
|
||||
// - split up things that trigger snapshotting from the mechanism
|
||||
// - timer-based trigger (periodic)
|
||||
// - call from control socket (manual)
|
||||
// - mixed modes?
|
||||
// - support a `zrepl snapshot JOBNAME` subcommand for config.SnapshottingManual
|
||||
type PeriodicOrManual struct {
|
||||
s *Snapper
|
||||
}
|
||||
|
||||
func (s *PeriodicOrManual) Run(ctx context.Context, wakeUpCommon chan <- struct{}) {
|
||||
if s.s != nil {
|
||||
s.s.Run(ctx, wakeUpCommon)
|
||||
}
|
||||
}
|
||||
|
||||
func FromConfig(g *config.Global, fsf *filters.DatasetMapFilter, in config.SnapshottingEnum) (*PeriodicOrManual, error) {
|
||||
switch v := in.Ret.(type) {
|
||||
case *config.SnapshottingPeriodic:
|
||||
snapper, err := PeriodicFromConfig(g, fsf, v)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &PeriodicOrManual{snapper}, nil
|
||||
case *config.SnapshottingManual:
|
||||
return &PeriodicOrManual{}, nil
|
||||
default:
|
||||
return nil, fmt.Errorf("unknown snapshotting type %T", v)
|
||||
}
|
||||
}
|
29
daemon/snapper/snapstate_string.go
Normal file
29
daemon/snapper/snapstate_string.go
Normal file
@ -0,0 +1,29 @@
|
||||
// Code generated by "stringer -type=SnapState"; DO NOT EDIT.
|
||||
|
||||
package snapper
|
||||
|
||||
import "strconv"
|
||||
|
||||
const (
|
||||
_SnapState_name_0 = "SnapPendingSnapStarted"
|
||||
_SnapState_name_1 = "SnapDone"
|
||||
_SnapState_name_2 = "SnapError"
|
||||
)
|
||||
|
||||
var (
|
||||
_SnapState_index_0 = [...]uint8{0, 11, 22}
|
||||
)
|
||||
|
||||
func (i SnapState) String() string {
|
||||
switch {
|
||||
case 1 <= i && i <= 2:
|
||||
i -= 1
|
||||
return _SnapState_name_0[_SnapState_index_0[i]:_SnapState_index_0[i+1]]
|
||||
case i == 4:
|
||||
return _SnapState_name_1
|
||||
case i == 8:
|
||||
return _SnapState_name_2
|
||||
default:
|
||||
return "SnapState(" + strconv.FormatInt(int64(i), 10) + ")"
|
||||
}
|
||||
}
|
35
daemon/snapper/state_string.go
Normal file
35
daemon/snapper/state_string.go
Normal file
@ -0,0 +1,35 @@
|
||||
// Code generated by "stringer -type=State"; DO NOT EDIT.
|
||||
|
||||
package snapper
|
||||
|
||||
import "strconv"
|
||||
|
||||
const (
|
||||
_State_name_0 = "SyncUpPlanning"
|
||||
_State_name_1 = "Snapshotting"
|
||||
_State_name_2 = "Waiting"
|
||||
_State_name_3 = "ErrorWait"
|
||||
_State_name_4 = "Stopped"
|
||||
)
|
||||
|
||||
var (
|
||||
_State_index_0 = [...]uint8{0, 6, 14}
|
||||
)
|
||||
|
||||
func (i State) String() string {
|
||||
switch {
|
||||
case 1 <= i && i <= 2:
|
||||
i -= 1
|
||||
return _State_name_0[_State_index_0[i]:_State_index_0[i+1]]
|
||||
case i == 4:
|
||||
return _State_name_1
|
||||
case i == 8:
|
||||
return _State_name_2
|
||||
case i == 16:
|
||||
return _State_name_3
|
||||
case i == 32:
|
||||
return _State_name_4
|
||||
default:
|
||||
return "State(" + strconv.FormatInt(int64(i), 10) + ")"
|
||||
}
|
||||
}
|
25
daemon/streamrpcconfig/streamrpcconfig.go
Normal file
25
daemon/streamrpcconfig/streamrpcconfig.go
Normal file
@ -0,0 +1,25 @@
|
||||
package streamrpcconfig
|
||||
|
||||
import (
|
||||
"github.com/problame/go-streamrpc"
|
||||
"github.com/zrepl/zrepl/config"
|
||||
)
|
||||
|
||||
func FromDaemonConfig(g *config.Global, in *config.RPCConfig) (*streamrpc.ConnConfig, error) {
|
||||
conf := in
|
||||
if conf == nil {
|
||||
conf = g.RPC
|
||||
}
|
||||
srpcConf := &streamrpc.ConnConfig{
|
||||
RxHeaderMaxLen: conf.RxHeaderMaxLen,
|
||||
RxStructuredMaxLen: conf.RxStructuredMaxLen,
|
||||
RxStreamMaxChunkSize: conf.RxStreamChunkMaxLen,
|
||||
TxChunkSize: conf.TxChunkSize,
|
||||
Timeout: conf.Timeout,
|
||||
SendHeartbeatInterval: conf.SendHeartbeatInterval,
|
||||
}
|
||||
if err := srpcConf.Validate(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return srpcConf, nil
|
||||
}
|
30
daemon/transport/connecter/connect_local.go
Normal file
30
daemon/transport/connecter/connect_local.go
Normal file
@ -0,0 +1,30 @@
|
||||
package connecter
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"github.com/zrepl/zrepl/config"
|
||||
"github.com/zrepl/zrepl/daemon/transport/serve"
|
||||
"net"
|
||||
)
|
||||
|
||||
type LocalConnecter struct {
|
||||
listenerName string
|
||||
clientIdentity string
|
||||
}
|
||||
|
||||
func LocalConnecterFromConfig(in *config.LocalConnect) (*LocalConnecter, error) {
|
||||
if in.ClientIdentity == "" {
|
||||
return nil, fmt.Errorf("ClientIdentity must not be empty")
|
||||
}
|
||||
if in.ListenerName == "" {
|
||||
return nil, fmt.Errorf("ListenerName must not be empty")
|
||||
}
|
||||
return &LocalConnecter{listenerName: in.ListenerName, clientIdentity: in.ClientIdentity}, nil
|
||||
}
|
||||
|
||||
func (c *LocalConnecter) Connect(dialCtx context.Context) (conn net.Conn, err error) {
|
||||
l := serve.GetLocalListener(c.listenerName)
|
||||
return l.Connect(dialCtx, c.clientIdentity)
|
||||
}
|
||||
|
66
daemon/transport/connecter/connect_ssh.go
Normal file
66
daemon/transport/connecter/connect_ssh.go
Normal file
@ -0,0 +1,66 @@
|
||||
package connecter
|
||||
|
||||
import (
|
||||
"context"
|
||||
"github.com/jinzhu/copier"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/problame/go-netssh"
|
||||
"github.com/problame/go-streamrpc"
|
||||
"github.com/zrepl/zrepl/config"
|
||||
"net"
|
||||
"time"
|
||||
)
|
||||
|
||||
type SSHStdinserverConnecter struct {
|
||||
Host string
|
||||
User string
|
||||
Port uint16
|
||||
IdentityFile string
|
||||
TransportOpenCommand []string
|
||||
SSHCommand string
|
||||
Options []string
|
||||
dialTimeout time.Duration
|
||||
}
|
||||
|
||||
var _ streamrpc.Connecter = &SSHStdinserverConnecter{}
|
||||
|
||||
func SSHStdinserverConnecterFromConfig(in *config.SSHStdinserverConnect) (c *SSHStdinserverConnecter, err error) {
|
||||
|
||||
c = &SSHStdinserverConnecter{
|
||||
Host: in.Host,
|
||||
User: in.User,
|
||||
Port: in.Port,
|
||||
IdentityFile: in.IdentityFile,
|
||||
SSHCommand: in.SSHCommand,
|
||||
Options: in.Options,
|
||||
dialTimeout: in.DialTimeout,
|
||||
}
|
||||
return
|
||||
|
||||
}
|
||||
|
||||
type netsshConnToConn struct{ *netssh.SSHConn }
|
||||
|
||||
var _ net.Conn = netsshConnToConn{}
|
||||
|
||||
func (netsshConnToConn) SetDeadline(dl time.Time) error { return nil }
|
||||
func (netsshConnToConn) SetReadDeadline(dl time.Time) error { return nil }
|
||||
func (netsshConnToConn) SetWriteDeadline(dl time.Time) error { return nil }
|
||||
|
||||
func (c *SSHStdinserverConnecter) Connect(dialCtx context.Context) (net.Conn, error) {
|
||||
|
||||
var endpoint netssh.Endpoint
|
||||
if err := copier.Copy(&endpoint, c); err != nil {
|
||||
return nil, errors.WithStack(err)
|
||||
}
|
||||
dialCtx, dialCancel := context.WithTimeout(dialCtx, c.dialTimeout) // context.TODO tied to error handling below
|
||||
defer dialCancel()
|
||||
nconn, err := netssh.Dial(dialCtx, endpoint)
|
||||
if err != nil {
|
||||
if err == context.DeadlineExceeded {
|
||||
err = errors.Errorf("dial_timeout of %s exceeded", c.dialTimeout)
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
return netsshConnToConn{nconn}, nil
|
||||
}
|
24
daemon/transport/connecter/connect_tcp.go
Normal file
24
daemon/transport/connecter/connect_tcp.go
Normal file
@ -0,0 +1,24 @@
|
||||
package connecter
|
||||
|
||||
import (
|
||||
"context"
|
||||
"github.com/zrepl/zrepl/config"
|
||||
"net"
|
||||
)
|
||||
|
||||
type TCPConnecter struct {
|
||||
Address string
|
||||
dialer net.Dialer
|
||||
}
|
||||
|
||||
func TCPConnecterFromConfig(in *config.TCPConnect) (*TCPConnecter, error) {
|
||||
dialer := net.Dialer{
|
||||
Timeout: in.DialTimeout,
|
||||
}
|
||||
|
||||
return &TCPConnecter{in.Address, dialer}, nil
|
||||
}
|
||||
|
||||
func (c *TCPConnecter) Connect(dialCtx context.Context) (conn net.Conn, err error) {
|
||||
return c.dialer.DialContext(dialCtx, "tcp", c.Address)
|
||||
}
|
43
daemon/transport/connecter/connect_tls.go
Normal file
43
daemon/transport/connecter/connect_tls.go
Normal file
@ -0,0 +1,43 @@
|
||||
package connecter
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/tls"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/zrepl/zrepl/config"
|
||||
"github.com/zrepl/zrepl/tlsconf"
|
||||
"net"
|
||||
)
|
||||
|
||||
type TLSConnecter struct {
|
||||
Address string
|
||||
dialer net.Dialer
|
||||
tlsConfig *tls.Config
|
||||
}
|
||||
|
||||
func TLSConnecterFromConfig(in *config.TLSConnect) (*TLSConnecter, error) {
|
||||
dialer := net.Dialer{
|
||||
Timeout: in.DialTimeout,
|
||||
}
|
||||
|
||||
ca, err := tlsconf.ParseCAFile(in.Ca)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "cannot parse ca file")
|
||||
}
|
||||
|
||||
cert, err := tls.LoadX509KeyPair(in.Cert, in.Key)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "cannot parse cert/key pair")
|
||||
}
|
||||
|
||||
tlsConfig, err := tlsconf.ClientAuthClient(in.ServerCN, ca, cert)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "cannot build tls config")
|
||||
}
|
||||
|
||||
return &TLSConnecter{in.Address, dialer, tlsConfig}, nil
|
||||
}
|
||||
|
||||
func (c *TLSConnecter) Connect(dialCtx context.Context) (conn net.Conn, err error) {
|
||||
return tls.DialWithDialer(&c.dialer, "tcp", c.Address, c.tlsConfig)
|
||||
}
|
84
daemon/transport/connecter/connecter.go
Normal file
84
daemon/transport/connecter/connecter.go
Normal file
@ -0,0 +1,84 @@
|
||||
package connecter
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"github.com/problame/go-streamrpc"
|
||||
"github.com/zrepl/zrepl/config"
|
||||
"github.com/zrepl/zrepl/daemon/streamrpcconfig"
|
||||
"github.com/zrepl/zrepl/daemon/transport"
|
||||
"net"
|
||||
"time"
|
||||
)
|
||||
|
||||
|
||||
type HandshakeConnecter struct {
|
||||
connecter streamrpc.Connecter
|
||||
}
|
||||
|
||||
func (c HandshakeConnecter) Connect(ctx context.Context) (net.Conn, error) {
|
||||
conn, err := c.connecter.Connect(ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
dl, ok := ctx.Deadline()
|
||||
if !ok {
|
||||
dl = time.Now().Add(10 * time.Second) // FIXME constant
|
||||
}
|
||||
if err := transport.DoHandshakeCurrentVersion(conn, dl); err != nil {
|
||||
conn.Close()
|
||||
return nil, err
|
||||
}
|
||||
return conn, nil
|
||||
}
|
||||
|
||||
|
||||
|
||||
func FromConfig(g *config.Global, in config.ConnectEnum) (*ClientFactory, error) {
|
||||
var (
|
||||
connecter streamrpc.Connecter
|
||||
errConnecter, errRPC error
|
||||
connConf *streamrpc.ConnConfig
|
||||
)
|
||||
switch v := in.Ret.(type) {
|
||||
case *config.SSHStdinserverConnect:
|
||||
connecter, errConnecter = SSHStdinserverConnecterFromConfig(v)
|
||||
connConf, errRPC = streamrpcconfig.FromDaemonConfig(g, v.RPC)
|
||||
case *config.TCPConnect:
|
||||
connecter, errConnecter = TCPConnecterFromConfig(v)
|
||||
connConf, errRPC = streamrpcconfig.FromDaemonConfig(g, v.RPC)
|
||||
case *config.TLSConnect:
|
||||
connecter, errConnecter = TLSConnecterFromConfig(v)
|
||||
connConf, errRPC = streamrpcconfig.FromDaemonConfig(g, v.RPC)
|
||||
case *config.LocalConnect:
|
||||
connecter, errConnecter = LocalConnecterFromConfig(v)
|
||||
connConf, errRPC = streamrpcconfig.FromDaemonConfig(g, v.RPC)
|
||||
default:
|
||||
panic(fmt.Sprintf("implementation error: unknown connecter type %T", v))
|
||||
}
|
||||
|
||||
if errConnecter != nil {
|
||||
return nil, errConnecter
|
||||
}
|
||||
if errRPC != nil {
|
||||
return nil, errRPC
|
||||
}
|
||||
|
||||
config := streamrpc.ClientConfig{ConnConfig: connConf}
|
||||
if err := config.Validate(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
connecter = HandshakeConnecter{connecter}
|
||||
|
||||
return &ClientFactory{connecter: connecter, config: &config}, nil
|
||||
}
|
||||
|
||||
type ClientFactory struct {
|
||||
connecter streamrpc.Connecter
|
||||
config *streamrpc.ClientConfig
|
||||
}
|
||||
|
||||
func (f ClientFactory) NewClient() (*streamrpc.Client, error) {
|
||||
return streamrpc.NewClient(f.connecter, f.config)
|
||||
}
|
136
daemon/transport/handshake.go
Normal file
136
daemon/transport/handshake.go
Normal file
@ -0,0 +1,136 @@
|
||||
package transport
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"net"
|
||||
"strings"
|
||||
"time"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
type HandshakeMessage struct {
|
||||
ProtocolVersion int
|
||||
Extensions []string
|
||||
}
|
||||
|
||||
func (m *HandshakeMessage) Encode() ([]byte, error) {
|
||||
if m.ProtocolVersion <= 0 || m.ProtocolVersion > 9999 {
|
||||
return nil, fmt.Errorf("protocol version must be in [1, 9999]")
|
||||
}
|
||||
if len(m.Extensions) >= 9999 {
|
||||
return nil, fmt.Errorf("protocol only supports [0, 9999] extensions")
|
||||
}
|
||||
// EXTENSIONS is a count of subsequent \n separated lines that contain protocol extensions
|
||||
var extensions strings.Builder
|
||||
for i, ext := range m.Extensions {
|
||||
if strings.ContainsAny(ext, "\n") {
|
||||
return nil, fmt.Errorf("Extension #%d contains forbidden newline character", i)
|
||||
}
|
||||
if !utf8.ValidString(ext) {
|
||||
return nil, fmt.Errorf("Extension #%d is not valid UTF-8", i)
|
||||
}
|
||||
extensions.WriteString(ext)
|
||||
extensions.WriteString("\n")
|
||||
}
|
||||
withoutLen := fmt.Sprintf("ZREPL_ZFS_REPLICATION PROTOVERSION=%04d EXTENSIONS=%04d\n%s",
|
||||
m.ProtocolVersion, len(m.Extensions), extensions.String())
|
||||
withLen := fmt.Sprintf("%010d %s", len(withoutLen), withoutLen)
|
||||
return []byte(withLen), nil
|
||||
}
|
||||
|
||||
func (m *HandshakeMessage) DecodeReader(r io.Reader, maxLen int) error {
|
||||
var lenAndSpace [11]byte
|
||||
if _, err := io.ReadFull(r, lenAndSpace[:]); err != nil {
|
||||
return err
|
||||
}
|
||||
if !utf8.Valid(lenAndSpace[:]) {
|
||||
return fmt.Errorf("invalid start of handshake message: not valid UTF-8")
|
||||
}
|
||||
var followLen int
|
||||
n, err := fmt.Sscanf(string(lenAndSpace[:]), "%010d ", &followLen)
|
||||
if n != 1 || err != nil {
|
||||
return fmt.Errorf("could not parse handshake message length")
|
||||
}
|
||||
if followLen > maxLen {
|
||||
return fmt.Errorf("handshake message length exceeds max length (%d vs %d)",
|
||||
followLen, maxLen)
|
||||
}
|
||||
|
||||
var buf bytes.Buffer
|
||||
_, err = io.Copy(&buf, io.LimitReader(r, int64(followLen)))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var (
|
||||
protoVersion, extensionCount int
|
||||
)
|
||||
n, err = fmt.Fscanf(&buf, "ZREPL_ZFS_REPLICATION PROTOVERSION=%04d EXTENSIONS=%4d\n",
|
||||
&protoVersion, &extensionCount)
|
||||
if n != 2 || err != nil {
|
||||
return fmt.Errorf("could not parse handshake message: %s", err)
|
||||
}
|
||||
if protoVersion < 1 {
|
||||
return fmt.Errorf("invalid protocol version %q", protoVersion)
|
||||
}
|
||||
m.ProtocolVersion = protoVersion
|
||||
|
||||
if extensionCount < 0 {
|
||||
return fmt.Errorf("invalid extension count %q", extensionCount)
|
||||
}
|
||||
if extensionCount == 0 {
|
||||
if buf.Len() != 0 {
|
||||
return fmt.Errorf("unexpected data trailing after header")
|
||||
}
|
||||
m.Extensions = nil
|
||||
return nil
|
||||
}
|
||||
s := buf.String()
|
||||
if strings.Count(s, "\n") != extensionCount {
|
||||
return fmt.Errorf("inconsistent extension count: found %d, header says %d", len(m.Extensions), extensionCount)
|
||||
}
|
||||
exts := strings.Split(s, "\n")
|
||||
if exts[len(exts)-1] != "" {
|
||||
return fmt.Errorf("unexpected data trailing after last extension newline")
|
||||
}
|
||||
m.Extensions = exts[0:len(exts)-1]
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func DoHandshakeCurrentVersion(conn net.Conn, deadline time.Time) error {
|
||||
// current protocol version is hardcoded here
|
||||
return DoHandshakeVersion(conn, deadline, 1)
|
||||
}
|
||||
|
||||
func DoHandshakeVersion(conn net.Conn, deadline time.Time, version int) error {
|
||||
ours := HandshakeMessage{
|
||||
ProtocolVersion: version,
|
||||
Extensions: nil,
|
||||
}
|
||||
hsb, err := ours.Encode()
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not encode protocol banner: %s", err)
|
||||
}
|
||||
|
||||
conn.SetDeadline(deadline)
|
||||
_, err = io.Copy(conn, bytes.NewBuffer(hsb))
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not send protocol banner: %s", err)
|
||||
}
|
||||
|
||||
theirs := HandshakeMessage{}
|
||||
if err := theirs.DecodeReader(conn, 16 * 4096); err != nil { // FIXME constant
|
||||
return fmt.Errorf("could not decode protocol banner: %s", err)
|
||||
}
|
||||
|
||||
if theirs.ProtocolVersion != ours.ProtocolVersion {
|
||||
return fmt.Errorf("protocol versions do not match: ours is %d, theirs is %d",
|
||||
ours.ProtocolVersion, theirs.ProtocolVersion)
|
||||
}
|
||||
// ignore extensions, we don't use them
|
||||
|
||||
return nil
|
||||
}
|
119
daemon/transport/handshake_test.go
Normal file
119
daemon/transport/handshake_test.go
Normal file
@ -0,0 +1,119 @@
|
||||
package transport
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
"github.com/zrepl/zrepl/util/socketpair"
|
||||
"io"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestHandshakeMessage_Encode(t *testing.T) {
|
||||
|
||||
msg := HandshakeMessage{
|
||||
ProtocolVersion: 2342,
|
||||
}
|
||||
|
||||
encB, err := msg.Encode()
|
||||
require.NoError(t, err)
|
||||
enc := string(encB)
|
||||
t.Logf("enc: %s", enc)
|
||||
|
||||
|
||||
|
||||
assert.False(t, strings.ContainsAny(enc[0:10], " "))
|
||||
assert.True(t, enc[10] == ' ')
|
||||
|
||||
var (
|
||||
headerlen, protoversion, extensionCount int
|
||||
)
|
||||
n, err := fmt.Sscanf(enc, "%010d ZREPL_ZFS_REPLICATION PROTOVERSION=%04d EXTENSIONS=%04d\n",
|
||||
&headerlen, &protoversion, &extensionCount)
|
||||
if n != 3 || (err != nil && err != io.EOF) {
|
||||
t.Fatalf("%v %v", n, err)
|
||||
}
|
||||
|
||||
assert.Equal(t, 2342, protoversion)
|
||||
assert.Equal(t, 0, extensionCount)
|
||||
assert.Equal(t, len(enc)-11, headerlen)
|
||||
|
||||
}
|
||||
|
||||
func TestHandshakeMessage_Encode_InvalidProtocolVersion(t *testing.T) {
|
||||
|
||||
for _, pv := range []int{-1, 0, 10000, 10001} {
|
||||
t.Logf("testing invalid protocol version = %v", pv)
|
||||
msg := HandshakeMessage{
|
||||
ProtocolVersion: pv,
|
||||
}
|
||||
b, err := msg.Encode()
|
||||
assert.Error(t, err)
|
||||
assert.Nil(t, b)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func TestHandshakeMessage_DecodeReader(t *testing.T) {
|
||||
|
||||
in := HandshakeMessage{
|
||||
2342,
|
||||
[]string{"foo", "bar 2342"},
|
||||
}
|
||||
|
||||
enc, err := in.Encode()
|
||||
require.NoError(t, err)
|
||||
|
||||
out := HandshakeMessage{}
|
||||
err = out.DecodeReader(bytes.NewReader([]byte(enc)), 4 * 4096)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, 2342, out.ProtocolVersion)
|
||||
assert.Equal(t, 2, len(out.Extensions))
|
||||
assert.Equal(t, "foo", out.Extensions[0])
|
||||
assert.Equal(t, "bar 2342", out.Extensions[1])
|
||||
|
||||
}
|
||||
|
||||
func TestDoHandshakeVersion_ErrorOnDifferentVersions(t *testing.T) {
|
||||
srv, client, err := socketpair.SocketPair()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer srv.Close()
|
||||
defer client.Close()
|
||||
|
||||
srvErrCh := make(chan error)
|
||||
go func() {
|
||||
srvErrCh <- DoHandshakeVersion(srv, time.Now().Add(2*time.Second), 1)
|
||||
}()
|
||||
err = DoHandshakeVersion(client, time.Now().Add(2*time.Second), 2)
|
||||
t.Log(err)
|
||||
assert.Error(t, err)
|
||||
assert.True(t, strings.Contains(err.Error(), "version"))
|
||||
|
||||
srvErr := <-srvErrCh
|
||||
t.Log(srvErr)
|
||||
assert.Error(t, srvErr)
|
||||
assert.True(t, strings.Contains(srvErr.Error(), "version"))
|
||||
}
|
||||
|
||||
func TestDoHandshakeCurrentVersion(t *testing.T) {
|
||||
srv, client, err := socketpair.SocketPair()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer srv.Close()
|
||||
defer client.Close()
|
||||
|
||||
srvErrCh := make(chan error)
|
||||
go func() {
|
||||
srvErrCh <- DoHandshakeVersion(srv, time.Now().Add(2*time.Second), 1)
|
||||
}()
|
||||
err = DoHandshakeVersion(client, time.Now().Add(2*time.Second), 1)
|
||||
assert.Nil(t, err)
|
||||
assert.Nil(t, <-srvErrCh)
|
||||
|
||||
}
|
147
daemon/transport/serve/serve.go
Normal file
147
daemon/transport/serve/serve.go
Normal file
@ -0,0 +1,147 @@
|
||||
package serve
|
||||
|
||||
import (
|
||||
"github.com/pkg/errors"
|
||||
"github.com/zrepl/zrepl/config"
|
||||
"github.com/zrepl/zrepl/daemon/transport"
|
||||
"net"
|
||||
"github.com/zrepl/zrepl/daemon/streamrpcconfig"
|
||||
"github.com/problame/go-streamrpc"
|
||||
"context"
|
||||
"github.com/zrepl/zrepl/logger"
|
||||
"github.com/zrepl/zrepl/zfs"
|
||||
"time"
|
||||
)
|
||||
|
||||
type contextKey int
|
||||
|
||||
const contextKeyLog contextKey = 0
|
||||
|
||||
type Logger = logger.Logger
|
||||
|
||||
func WithLogger(ctx context.Context, log Logger) context.Context {
|
||||
return context.WithValue(ctx, contextKeyLog, log)
|
||||
}
|
||||
|
||||
func getLogger(ctx context.Context) Logger {
|
||||
if log, ok := ctx.Value(contextKeyLog).(Logger); ok {
|
||||
return log
|
||||
}
|
||||
return logger.NewNullLogger()
|
||||
}
|
||||
|
||||
type AuthenticatedConn interface {
|
||||
net.Conn
|
||||
// ClientIdentity must be a string that satisfies ValidateClientIdentity
|
||||
ClientIdentity() string
|
||||
}
|
||||
|
||||
// A client identity must be a single component in a ZFS filesystem path
|
||||
func ValidateClientIdentity(in string) (err error) {
|
||||
path, err := zfs.NewDatasetPath(in)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if path.Length() != 1 {
|
||||
return errors.New("client identity must be a single path comonent (not empty, no '/')")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
type authConn struct {
|
||||
net.Conn
|
||||
clientIdentity string
|
||||
}
|
||||
|
||||
var _ AuthenticatedConn = authConn{}
|
||||
|
||||
func (c authConn) ClientIdentity() string {
|
||||
if err := ValidateClientIdentity(c.clientIdentity); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return c.clientIdentity
|
||||
}
|
||||
|
||||
// like net.Listener, but with an AuthenticatedConn instead of net.Conn
|
||||
type AuthenticatedListener interface {
|
||||
Addr() (net.Addr)
|
||||
Accept(ctx context.Context) (AuthenticatedConn, error)
|
||||
Close() error
|
||||
}
|
||||
|
||||
type ListenerFactory interface {
|
||||
Listen() (AuthenticatedListener, error)
|
||||
}
|
||||
|
||||
type HandshakeListenerFactory struct {
|
||||
lf ListenerFactory
|
||||
}
|
||||
|
||||
func (lf HandshakeListenerFactory) Listen() (AuthenticatedListener, error) {
|
||||
l, err := lf.lf.Listen()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return HandshakeListener{l}, nil
|
||||
}
|
||||
|
||||
type HandshakeListener struct {
|
||||
l AuthenticatedListener
|
||||
}
|
||||
|
||||
func (l HandshakeListener) Addr() (net.Addr) { return l.l.Addr() }
|
||||
|
||||
func (l HandshakeListener) Close() error { return l.l.Close() }
|
||||
|
||||
func (l HandshakeListener) Accept(ctx context.Context) (AuthenticatedConn, error) {
|
||||
conn, err := l.l.Accept(ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
dl, ok := ctx.Deadline()
|
||||
if !ok {
|
||||
dl = time.Now().Add(10*time.Second) // FIXME constant
|
||||
}
|
||||
if err := transport.DoHandshakeCurrentVersion(conn, dl); err != nil {
|
||||
conn.Close()
|
||||
return nil, err
|
||||
}
|
||||
return conn, nil
|
||||
}
|
||||
|
||||
func FromConfig(g *config.Global, in config.ServeEnum) (lf ListenerFactory, conf *streamrpc.ConnConfig, _ error) {
|
||||
|
||||
var (
|
||||
lfError, rpcErr error
|
||||
)
|
||||
switch v := in.Ret.(type) {
|
||||
case *config.TCPServe:
|
||||
lf, lfError = TCPListenerFactoryFromConfig(g, v)
|
||||
conf, rpcErr = streamrpcconfig.FromDaemonConfig(g, v.RPC)
|
||||
case *config.TLSServe:
|
||||
lf, lfError = TLSListenerFactoryFromConfig(g, v)
|
||||
conf, rpcErr = streamrpcconfig.FromDaemonConfig(g, v.RPC)
|
||||
case *config.StdinserverServer:
|
||||
lf, lfError = MultiStdinserverListenerFactoryFromConfig(g, v)
|
||||
conf, rpcErr = streamrpcconfig.FromDaemonConfig(g, v.RPC)
|
||||
case *config.LocalServe:
|
||||
lf, lfError = LocalListenerFactoryFromConfig(g, v)
|
||||
conf, rpcErr = streamrpcconfig.FromDaemonConfig(g, v.RPC)
|
||||
default:
|
||||
return nil, nil, errors.Errorf("internal error: unknown serve type %T", v)
|
||||
}
|
||||
|
||||
if lfError != nil {
|
||||
return nil, nil, lfError
|
||||
}
|
||||
if rpcErr != nil {
|
||||
return nil, nil, rpcErr
|
||||
}
|
||||
|
||||
lf = HandshakeListenerFactory{lf}
|
||||
|
||||
return lf, conf, nil
|
||||
|
||||
}
|
||||
|
||||
|
187
daemon/transport/serve/serve_local.go
Normal file
187
daemon/transport/serve/serve_local.go
Normal file
@ -0,0 +1,187 @@
|
||||
package serve
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"github.com/zrepl/zrepl/config"
|
||||
"github.com/zrepl/zrepl/util/socketpair"
|
||||
"net"
|
||||
"sync"
|
||||
)
|
||||
|
||||
var localListeners struct {
|
||||
m map[string]*LocalListener // listenerName -> listener
|
||||
init sync.Once
|
||||
mtx sync.Mutex
|
||||
}
|
||||
|
||||
func GetLocalListener(listenerName string) (*LocalListener) {
|
||||
|
||||
localListeners.init.Do(func() {
|
||||
localListeners.m = make(map[string]*LocalListener)
|
||||
})
|
||||
|
||||
localListeners.mtx.Lock()
|
||||
defer localListeners.mtx.Unlock()
|
||||
|
||||
l, ok := localListeners.m[listenerName]
|
||||
if !ok {
|
||||
l = newLocalListener()
|
||||
localListeners.m[listenerName] = l
|
||||
}
|
||||
return l
|
||||
|
||||
}
|
||||
|
||||
type connectRequest struct {
|
||||
clientIdentity string
|
||||
callback chan connectResult
|
||||
}
|
||||
|
||||
type connectResult struct {
|
||||
conn net.Conn
|
||||
err error
|
||||
}
|
||||
|
||||
type LocalListener struct {
|
||||
connects chan connectRequest
|
||||
}
|
||||
|
||||
func newLocalListener() *LocalListener {
|
||||
return &LocalListener{
|
||||
connects: make(chan connectRequest),
|
||||
}
|
||||
}
|
||||
|
||||
// Connect to the LocalListener from a client with identity clientIdentity
|
||||
func (l *LocalListener) Connect(dialCtx context.Context, clientIdentity string) (conn net.Conn, err error) {
|
||||
|
||||
// place request
|
||||
req := connectRequest{
|
||||
clientIdentity: clientIdentity,
|
||||
callback: make(chan connectResult),
|
||||
}
|
||||
select {
|
||||
case l.connects <- req:
|
||||
case <-dialCtx.Done():
|
||||
return nil, dialCtx.Err()
|
||||
}
|
||||
|
||||
// wait for listener response
|
||||
select {
|
||||
case connRes := <- req.callback:
|
||||
conn, err = connRes.conn, connRes.err
|
||||
case <-dialCtx.Done():
|
||||
close(req.callback) // sending to the channel afterwards will panic, the listener has to catch this
|
||||
conn, err = nil, dialCtx.Err()
|
||||
}
|
||||
|
||||
return conn, err
|
||||
}
|
||||
|
||||
type localAddr struct {
|
||||
S string
|
||||
}
|
||||
|
||||
func (localAddr) Network() string { return "local" }
|
||||
|
||||
func (a localAddr) String() string { return a.S }
|
||||
|
||||
func (l *LocalListener) Addr() (net.Addr) { return localAddr{"<listening>"} }
|
||||
|
||||
type localConn struct {
|
||||
net.Conn
|
||||
clientIdentity string
|
||||
}
|
||||
|
||||
func (l localConn) ClientIdentity() string { return l.clientIdentity }
|
||||
|
||||
func (l *LocalListener) Accept(ctx context.Context) (AuthenticatedConn, error) {
|
||||
respondToRequest := func(req connectRequest, res connectResult) (err error) {
|
||||
getLogger(ctx).
|
||||
WithField("res.conn", res.conn).WithField("res.err", res.err).
|
||||
Debug("responding to client request")
|
||||
defer func() {
|
||||
errv := recover()
|
||||
getLogger(ctx).WithField("recover_err", errv).
|
||||
Debug("panic on send to client callback, likely a legitimate client-side timeout")
|
||||
}()
|
||||
select {
|
||||
case req.callback <- res:
|
||||
err = nil
|
||||
default:
|
||||
err = fmt.Errorf("client-provided callback did block on send")
|
||||
}
|
||||
close(req.callback)
|
||||
return err
|
||||
}
|
||||
|
||||
getLogger(ctx).Debug("waiting for local client connect requests")
|
||||
var req connectRequest
|
||||
select {
|
||||
case req = <-l.connects:
|
||||
case <-ctx.Done():
|
||||
return nil, ctx.Err()
|
||||
}
|
||||
|
||||
getLogger(ctx).WithField("client_identity", req.clientIdentity).Debug("got connect request")
|
||||
if req.clientIdentity == "" {
|
||||
res := connectResult{nil, fmt.Errorf("client identity must not be empty")}
|
||||
if err := respondToRequest(req, res); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return nil, fmt.Errorf("client connected with empty client identity")
|
||||
}
|
||||
|
||||
getLogger(ctx).Debug("creating socketpair")
|
||||
left, right, err := socketpair.SocketPair()
|
||||
if err != nil {
|
||||
res := connectResult{nil, fmt.Errorf("server error: %s", err)}
|
||||
if respErr := respondToRequest(req, res); respErr != nil {
|
||||
// returning the socketpair error properly is more important than the error sent to the client
|
||||
getLogger(ctx).WithError(respErr).Error("error responding to client")
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
|
||||
getLogger(ctx).Debug("responding with left side of socketpair")
|
||||
res := connectResult{left, nil}
|
||||
if err := respondToRequest(req, res); err != nil {
|
||||
getLogger(ctx).WithError(err).Error("error responding to client")
|
||||
if err := left.Close(); err != nil {
|
||||
getLogger(ctx).WithError(err).Error("cannot close left side of socketpair")
|
||||
}
|
||||
if err := right.Close(); err != nil {
|
||||
getLogger(ctx).WithError(err).Error("cannot close right side of socketpair")
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return localConn{right, req.clientIdentity}, nil
|
||||
}
|
||||
|
||||
func (l *LocalListener) Close() error {
|
||||
// FIXME: make sure concurrent Accepts return with error, and further Accepts return that error, too
|
||||
// Example impl: for each accept, do context.WithCancel, and store the cancel in a list
|
||||
// When closing, set a member variable to state=closed, make sure accept will exit early
|
||||
// and then call all cancels in the list
|
||||
// The code path from Accept entry over check if state=closed to list entry must be protected by a mutex.
|
||||
return nil
|
||||
}
|
||||
|
||||
type LocalListenerFactory struct {
|
||||
listenerName string
|
||||
}
|
||||
|
||||
func LocalListenerFactoryFromConfig(g *config.Global, in *config.LocalServe) (f *LocalListenerFactory, err error) {
|
||||
if in.ListenerName == "" {
|
||||
return nil, fmt.Errorf("ListenerName must not be empty")
|
||||
}
|
||||
return &LocalListenerFactory{listenerName: in.ListenerName}, nil
|
||||
}
|
||||
|
||||
|
||||
func (lf *LocalListenerFactory) Listen() (AuthenticatedListener, error) {
|
||||
return GetLocalListener(lf.listenerName), nil
|
||||
}
|
||||
|
158
daemon/transport/serve/serve_stdinserver.go
Normal file
158
daemon/transport/serve/serve_stdinserver.go
Normal file
@ -0,0 +1,158 @@
|
||||
package serve
|
||||
|
||||
import (
|
||||
"github.com/problame/go-netssh"
|
||||
"github.com/zrepl/zrepl/config"
|
||||
"github.com/zrepl/zrepl/daemon/nethelpers"
|
||||
"io"
|
||||
"net"
|
||||
"path"
|
||||
"time"
|
||||
"context"
|
||||
"github.com/pkg/errors"
|
||||
"sync/atomic"
|
||||
)
|
||||
|
||||
type StdinserverListenerFactory struct {
|
||||
ClientIdentities []string
|
||||
Sockdir string
|
||||
}
|
||||
|
||||
func MultiStdinserverListenerFactoryFromConfig(g *config.Global, in *config.StdinserverServer) (f *multiStdinserverListenerFactory, err error) {
|
||||
|
||||
for _, ci := range in.ClientIdentities {
|
||||
if err := ValidateClientIdentity(ci); err != nil {
|
||||
return nil, errors.Wrapf(err, "invalid client identity %q", ci)
|
||||
}
|
||||
}
|
||||
|
||||
f = &multiStdinserverListenerFactory{
|
||||
ClientIdentities: in.ClientIdentities,
|
||||
Sockdir: g.Serve.StdinServer.SockDir,
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
type multiStdinserverListenerFactory struct {
|
||||
ClientIdentities []string
|
||||
Sockdir string
|
||||
}
|
||||
|
||||
func (f *multiStdinserverListenerFactory) Listen() (AuthenticatedListener, error) {
|
||||
return multiStdinserverListenerFromClientIdentities(f.Sockdir, f.ClientIdentities)
|
||||
}
|
||||
|
||||
type multiStdinserverAcceptRes struct {
|
||||
conn AuthenticatedConn
|
||||
err error
|
||||
}
|
||||
|
||||
type MultiStdinserverListener struct {
|
||||
listeners []*stdinserverListener
|
||||
accepts chan multiStdinserverAcceptRes
|
||||
closed int32
|
||||
}
|
||||
|
||||
// client identities must be validated
|
||||
func multiStdinserverListenerFromClientIdentities(sockdir string, cis []string) (*MultiStdinserverListener, error) {
|
||||
listeners := make([]*stdinserverListener, 0, len(cis))
|
||||
var err error
|
||||
for _, ci := range cis {
|
||||
sockpath := path.Join(sockdir, ci)
|
||||
l := &stdinserverListener{clientIdentity: ci}
|
||||
if err = nethelpers.PreparePrivateSockpath(sockpath); err != nil {
|
||||
break
|
||||
}
|
||||
if l.l, err = netssh.Listen(sockpath); err != nil {
|
||||
break
|
||||
}
|
||||
listeners = append(listeners, l)
|
||||
}
|
||||
if err != nil {
|
||||
for _, l := range listeners {
|
||||
l.Close() // FIXME error reporting?
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
return &MultiStdinserverListener{listeners: listeners}, nil
|
||||
}
|
||||
|
||||
func (m *MultiStdinserverListener) Accept(ctx context.Context) (AuthenticatedConn, error){
|
||||
|
||||
if m.accepts == nil {
|
||||
m.accepts = make(chan multiStdinserverAcceptRes, len(m.listeners))
|
||||
for i := range m.listeners {
|
||||
go func(i int) {
|
||||
for atomic.LoadInt32(&m.closed) == 0 {
|
||||
conn, err := m.listeners[i].Accept(context.TODO())
|
||||
m.accepts <- multiStdinserverAcceptRes{conn, err}
|
||||
}
|
||||
}(i)
|
||||
}
|
||||
}
|
||||
|
||||
res := <- m.accepts
|
||||
return res.conn, res.err
|
||||
|
||||
}
|
||||
|
||||
func (m *MultiStdinserverListener) Addr() (net.Addr) {
|
||||
return netsshAddr{}
|
||||
}
|
||||
|
||||
func (m *MultiStdinserverListener) Close() error {
|
||||
atomic.StoreInt32(&m.closed, 1)
|
||||
var oneErr error
|
||||
for _, l := range m.listeners {
|
||||
if err := l.Close(); err != nil && oneErr == nil {
|
||||
oneErr = err
|
||||
}
|
||||
}
|
||||
return oneErr
|
||||
}
|
||||
|
||||
// a single stdinserverListener (part of multiStinserverListener)
|
||||
type stdinserverListener struct {
|
||||
l *netssh.Listener
|
||||
clientIdentity string
|
||||
}
|
||||
|
||||
func (l stdinserverListener) Addr() net.Addr {
|
||||
return netsshAddr{}
|
||||
}
|
||||
|
||||
func (l stdinserverListener) Accept(ctx context.Context) (AuthenticatedConn, error) {
|
||||
c, err := l.l.Accept()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return netsshConnToNetConnAdatper{c, l.clientIdentity}, nil
|
||||
}
|
||||
|
||||
func (l stdinserverListener) Close() (err error) {
|
||||
return l.l.Close()
|
||||
}
|
||||
|
||||
type netsshAddr struct{}
|
||||
|
||||
func (netsshAddr) Network() string { return "netssh" }
|
||||
func (netsshAddr) String() string { return "???" }
|
||||
|
||||
type netsshConnToNetConnAdatper struct {
|
||||
io.ReadWriteCloser // works for both netssh.SSHConn and netssh.ServeConn
|
||||
clientIdentity string
|
||||
}
|
||||
|
||||
func (a netsshConnToNetConnAdatper) ClientIdentity() string { return a.clientIdentity }
|
||||
|
||||
func (netsshConnToNetConnAdatper) LocalAddr() net.Addr { return netsshAddr{} }
|
||||
|
||||
func (netsshConnToNetConnAdatper) RemoteAddr() net.Addr { return netsshAddr{} }
|
||||
|
||||
// FIXME log warning once!
|
||||
func (netsshConnToNetConnAdatper) SetDeadline(t time.Time) error { return nil }
|
||||
|
||||
func (netsshConnToNetConnAdatper) SetReadDeadline(t time.Time) error { return nil }
|
||||
|
||||
func (netsshConnToNetConnAdatper) SetWriteDeadline(t time.Time) error { return nil }
|
91
daemon/transport/serve/serve_tcp.go
Normal file
91
daemon/transport/serve/serve_tcp.go
Normal file
@ -0,0 +1,91 @@
|
||||
package serve
|
||||
|
||||
import (
|
||||
"github.com/zrepl/zrepl/config"
|
||||
"net"
|
||||
"github.com/pkg/errors"
|
||||
"context"
|
||||
)
|
||||
|
||||
type TCPListenerFactory struct {
|
||||
address *net.TCPAddr
|
||||
clientMap *ipMap
|
||||
}
|
||||
|
||||
type ipMapEntry struct {
|
||||
ip net.IP
|
||||
ident string
|
||||
}
|
||||
|
||||
type ipMap struct {
|
||||
entries []ipMapEntry
|
||||
}
|
||||
|
||||
func ipMapFromConfig(clients map[string]string) (*ipMap, error) {
|
||||
entries := make([]ipMapEntry, 0, len(clients))
|
||||
for clientIPString, clientIdent := range clients {
|
||||
clientIP := net.ParseIP(clientIPString)
|
||||
if clientIP == nil {
|
||||
return nil, errors.Errorf("cannot parse client IP %q", clientIPString)
|
||||
}
|
||||
if err := ValidateClientIdentity(clientIdent); err != nil {
|
||||
return nil, errors.Wrapf(err,"invalid client identity for IP %q", clientIPString)
|
||||
}
|
||||
entries = append(entries, ipMapEntry{clientIP, clientIdent})
|
||||
}
|
||||
return &ipMap{entries: entries}, nil
|
||||
}
|
||||
|
||||
func (m *ipMap) Get(ip net.IP) (string, error) {
|
||||
for _, e := range m.entries {
|
||||
if e.ip.Equal(ip) {
|
||||
return e.ident, nil
|
||||
}
|
||||
}
|
||||
return "", errors.Errorf("no identity mapping for client IP %s", ip)
|
||||
}
|
||||
|
||||
func TCPListenerFactoryFromConfig(c *config.Global, in *config.TCPServe) (*TCPListenerFactory, error) {
|
||||
addr, err := net.ResolveTCPAddr("tcp", in.Listen)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "cannot parse listen address")
|
||||
}
|
||||
clientMap, err := ipMapFromConfig(in.Clients)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "cannot parse client IP map")
|
||||
}
|
||||
lf := &TCPListenerFactory{
|
||||
address: addr,
|
||||
clientMap: clientMap,
|
||||
}
|
||||
return lf, nil
|
||||
}
|
||||
|
||||
func (f *TCPListenerFactory) Listen() (AuthenticatedListener, error) {
|
||||
l, err := net.ListenTCP("tcp", f.address)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &TCPAuthListener{l, f.clientMap}, nil
|
||||
}
|
||||
|
||||
type TCPAuthListener struct {
|
||||
*net.TCPListener
|
||||
clientMap *ipMap
|
||||
}
|
||||
|
||||
func (f *TCPAuthListener) Accept(ctx context.Context) (AuthenticatedConn, error) {
|
||||
nc, err := f.TCPListener.Accept()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
clientIP := nc.RemoteAddr().(*net.TCPAddr).IP
|
||||
clientIdent, err := f.clientMap.Get(clientIP)
|
||||
if err != nil {
|
||||
getLogger(ctx).WithField("ip", clientIP).Error("client IP not in client map")
|
||||
nc.Close()
|
||||
return nil, err
|
||||
}
|
||||
return authConn{nc, clientIdent}, nil
|
||||
}
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user