Merge branch 'replication_rewrite' (in fact it's a 90% rewrite)

This commit is contained in:
Christian Schwarz 2018-10-13 16:26:23 +02:00
commit 074f989547
173 changed files with 12505 additions and 6197 deletions

150
Gopkg.lock generated
View File

@ -3,163 +3,293 @@
[[projects]]
branch = "master"
digest = "1:c0bec5f9b98d0bc872ff5e834fac186b807b656683bd29cb82fb207a1513fabb"
name = "github.com/beorn7/perks"
packages = ["quantile"]
pruneopts = ""
revision = "3a771d992973f24aa725d07868b467d1ddfceafb"
[[projects]]
digest = "1:56c130d885a4aacae1dd9c7b71cfe39912c7ebc1ff7d2b46083c8812996dc43b"
name = "github.com/davecgh/go-spew"
packages = ["spew"]
pruneopts = ""
revision = "346938d642f2ec3594ed81d874461961cd0faa76"
version = "v1.1.0"
[[projects]]
branch = "master"
name = "github.com/dustin/go-humanize"
digest = "1:e988ed0ca0d81f4d28772760c02ee95084961311291bdfefc1b04617c178b722"
name = "github.com/fatih/color"
packages = ["."]
revision = "bb3d318650d48840a39aa21a027c6630e198e626"
pruneopts = ""
revision = "5b77d2a35fb0ede96d138fc9a99f5c9b6aef11b4"
version = "v1.7.0"
[[projects]]
branch = "master"
digest = "1:5d0a2385edf4ba44f3b7b76bc0436ceb8f62bf55aa5d540a9eb9ec6c58d86809"
name = "github.com/ftrvxmtrx/fd"
packages = ["."]
pruneopts = ""
revision = "c6d800382fff6dc1412f34269f71b7f83bd059ad"
[[projects]]
digest = "1:6a4a01d58b227c4b6b11111b9f172ec5c17682b82724e58e6daf3f19f4faccd8"
name = "github.com/go-logfmt/logfmt"
packages = ["."]
pruneopts = ""
revision = "390ab7935ee28ec6b286364bba9b4dd6410cb3d5"
version = "v0.3.0"
[[projects]]
branch = "v2"
digest = "1:81314a486195626940617e43740b4fa073f265b0715c9f54ce2027fee1cb5f61"
name = "github.com/go-yaml/yaml"
packages = ["."]
pruneopts = ""
revision = "eb3733d160e74a9c7e442f435eb3bea458e1d19f"
[[projects]]
digest = "1:3dd078fda7500c341bc26cfbc6c6a34614f295a2457149fc1045cab767cbcf18"
name = "github.com/golang/protobuf"
packages = ["proto"]
revision = "925541529c1fa6821df4e44ce2723319eb2be768"
version = "v1.0.0"
pruneopts = ""
revision = "aa810b61a9c79d51363740d207bb46cf8e620ed5"
version = "v1.2.0"
[[projects]]
branch = "master"
digest = "1:cb09475f771b9167fb9333629f5d6a7161572602ea040f1094602b0dc8709878"
name = "github.com/jinzhu/copier"
packages = ["."]
pruneopts = ""
revision = "db4671f3a9b8df855e993f7c94ec5ef1ffb0a23b"
[[projects]]
branch = "master"
digest = "1:1ed9eeebdf24aadfbca57eb50e6455bd1d2474525e0f0d4454de8c8e9bc7ee9a"
name = "github.com/kr/logfmt"
packages = ["."]
pruneopts = ""
revision = "b84e30acd515aadc4b783ad4ff83aff3299bdfe0"
[[projects]]
branch = "master"
digest = "1:591a2778aa6e896980757ea87e659b3aa13d8c0e790310614028463a31c0998b"
name = "github.com/kr/pretty"
packages = ["."]
pruneopts = ""
revision = "cfb55aafdaf3ec08f0db22699ab822c50091b1c4"
[[projects]]
branch = "master"
digest = "1:109e0a9b9e74c5c8adf94a2dc4dd4a9ca2a183d4d87ffecd4e62db69a5ede55a"
name = "github.com/kr/text"
packages = ["."]
pruneopts = ""
revision = "7cafcd837844e784b526369c9bce262804aebc60"
[[projects]]
digest = "1:9ea83adf8e96d6304f394d40436f2eb44c1dc3250d223b74088cc253a6cd0a1c"
name = "github.com/mattn/go-colorable"
packages = ["."]
pruneopts = ""
revision = "167de6bfdfba052fa6b2d3664c8f5272e23c9072"
version = "v0.0.9"
[[projects]]
digest = "1:78229b46ddb7434f881390029bd1af7661294af31f6802e0e1bedaad4ab0af3c"
name = "github.com/mattn/go-isatty"
packages = ["."]
pruneopts = ""
revision = "0360b2af4f38e8d38c7fce2a9f4e702702d73a39"
version = "v0.0.3"
[[projects]]
digest = "1:82b912465c1da0668582a7d1117339c278e786c2536b3c3623029a0c7141c2d0"
name = "github.com/mattn/go-runewidth"
packages = ["."]
pruneopts = ""
revision = "ce7b0b5c7b45a81508558cd1dba6bb1e4ddb51bb"
version = "v0.0.3"
[[projects]]
digest = "1:4c23ced97a470b17d9ffd788310502a077b9c1f60221a85563e49696276b4147"
name = "github.com/matttproud/golang_protobuf_extensions"
packages = ["pbutil"]
pruneopts = ""
revision = "3247c84500bff8d9fb6d579d800f20b3e091582c"
version = "v1.0.0"
[[projects]]
branch = "master"
digest = "1:c9ede10a9ded782d25d1f0be87c680e11409c23554828f19a19d691a95e76130"
name = "github.com/mitchellh/mapstructure"
packages = ["."]
pruneopts = ""
revision = "d0303fe809921458f417bcf828397a65db30a7e4"
[[projects]]
branch = "master"
digest = "1:20a553eff588d7abe1f05addf5f57cdbaef1d0f992427a0099b7eb51274b79cf"
name = "github.com/nsf/termbox-go"
packages = ["."]
pruneopts = ""
revision = "b66b20ab708e289ff1eb3e218478302e6aec28ce"
[[projects]]
digest = "1:7365acd48986e205ccb8652cc746f09c8b7876030d53710ea6ef7d0bd0dcd7ca"
name = "github.com/pkg/errors"
packages = ["."]
pruneopts = ""
revision = "645ef00459ed84a119197bfb8d8205042c6df63d"
version = "v0.8.0"
[[projects]]
digest = "1:256484dbbcd271f9ecebc6795b2df8cad4c458dd0f5fd82a8c2fa0c29f233411"
name = "github.com/pmezard/go-difflib"
packages = ["difflib"]
pruneopts = ""
revision = "792786c7400a136282c1664665ae0a8db921c6c2"
version = "v1.0.0"
[[projects]]
branch = "master"
digest = "1:1392748e290ca66ac8447ef24961f8ae9e1d846a53af0f58a5a0256982ce0577"
name = "github.com/problame/go-netssh"
packages = ["."]
pruneopts = ""
revision = "c56ad38d2c91397ad3c8dd9443d7448e328a9e9e"
[[projects]]
branch = "master"
digest = "1:8c63c44f018bd52b03ebad65c9df26aabbc6793138e421df1c8c84c285a45bc6"
name = "github.com/problame/go-rwccmd"
packages = ["."]
pruneopts = ""
revision = "391d2c78c8404a9683d79f75dd24ab53040f89f7"
[[projects]]
digest = "1:c2ba1c9dc003c15856e4529dac028cacba08ee8924300f058b3467cde9acf7a9"
name = "github.com/problame/go-streamrpc"
packages = [
".",
"internal/pdu",
]
pruneopts = ""
revision = "de6f6a4041c77f700f02d8fe749e54efa50811f7"
version = "v0.4"
[[projects]]
branch = "master"
digest = "1:ebf8ffdde9bdbf9c83e22121875c68c01d821776523546554b2a3ff6f72773ab"
name = "github.com/prometheus/client_golang"
packages = ["prometheus","prometheus/promhttp"]
packages = [
"prometheus",
"prometheus/promhttp",
]
pruneopts = ""
revision = "e11c6ff8170beca9d5fd8b938e71165eeec53ac6"
[[projects]]
branch = "master"
digest = "1:60aca47f4eeeb972f1b9da7e7db51dee15ff6c59f7b401c1588b8e6771ba15ef"
name = "github.com/prometheus/client_model"
packages = ["go"]
pruneopts = ""
revision = "99fa1f4be8e564e8a6b613da7fa6f46c9edafc6c"
[[projects]]
branch = "master"
digest = "1:af21ee3e0a8212f17bb317cd7237f9920bcb2641a291ac111f30f63b3cab817f"
name = "github.com/prometheus/common"
packages = ["expfmt","internal/bitbucket.org/ww/goautoneg","model"]
packages = [
"expfmt",
"internal/bitbucket.org/ww/goautoneg",
"model",
]
pruneopts = ""
revision = "d0f7cd64bda49e08b22ae8a730aa57aa0db125d6"
[[projects]]
branch = "master"
digest = "1:61df0898746840afc7be5dc2c3eeec83022fab70df11ecee5b16c85e912cf5ed"
name = "github.com/prometheus/procfs"
packages = [".","internal/util","nfs","xfs"]
packages = [
".",
"internal/util",
"nfs",
"xfs",
]
pruneopts = ""
revision = "8b1c2da0d56deffdbb9e48d4414b4e674bd8083e"
[[projects]]
branch = "master"
digest = "1:146327ce93be37e68bd3ff8541090d96da8cb3adc9e35d57570e9170a29f6bf6"
name = "github.com/spf13/cobra"
packages = ["."]
pruneopts = ""
revision = "b78744579491c1ceeaaa3b40205e56b0591b93a3"
[[projects]]
digest = "1:261bc565833ef4f02121450d74eb88d5ae4bd74bfe5d0e862cddb8550ec35000"
name = "github.com/spf13/pflag"
packages = ["."]
pruneopts = ""
revision = "e57e3eeb33f795204c1ca35f56c44f83227c6e66"
version = "v1.0.0"
[[projects]]
digest = "1:3926a4ec9a4ff1a072458451aa2d9b98acd059a45b38f7335d31e06c3d6a0159"
name = "github.com/stretchr/testify"
packages = ["assert"]
packages = [
"assert",
"require",
]
pruneopts = ""
revision = "69483b4bd14f5845b5a1e55bca19e954e827f1d0"
version = "v1.1.4"
[[projects]]
branch = "v2"
digest = "1:9d92186f609a73744232323416ddafd56fae67cb552162cc190ab903e36900dd"
name = "github.com/zrepl/yaml-config"
packages = ["."]
pruneopts = ""
revision = "af27d27978ad95808723a62d87557d63c3ff0605"
[[projects]]
branch = "master"
digest = "1:9c286cf11d0ca56368185bada5dd6d97b6be4648fc26c354fcba8df7293718f7"
name = "golang.org/x/sys"
packages = ["unix"]
pruneopts = ""
revision = "bf42f188b9bc6f2cf5b8ee5a912ef1aedd0eba4c"
[solve-meta]
analyzer-name = "dep"
analyzer-version = 1
inputs-digest = "36731e77252dcc851fdfa2b0b0778b980597e3c1a47b5e2af3bd0bcb802662ec"
input-imports = [
"github.com/fatih/color",
"github.com/go-logfmt/logfmt",
"github.com/go-yaml/yaml",
"github.com/golang/protobuf/proto",
"github.com/jinzhu/copier",
"github.com/kr/pretty",
"github.com/mattn/go-isatty",
"github.com/mitchellh/mapstructure",
"github.com/nsf/termbox-go",
"github.com/pkg/errors",
"github.com/problame/go-netssh",
"github.com/problame/go-rwccmd",
"github.com/problame/go-streamrpc",
"github.com/prometheus/client_golang/prometheus",
"github.com/prometheus/client_golang/prometheus/promhttp",
"github.com/spf13/cobra",
"github.com/stretchr/testify/assert",
"github.com/stretchr/testify/require",
"github.com/zrepl/yaml-config",
]
solver-name = "gps-cdcl"
solver-version = 1

View File

@ -38,7 +38,7 @@ ignored = [ "github.com/inconshreveable/mousetrap" ]
[[constraint]]
branch = "v2"
name = "github.com/go-yaml/yaml"
name = "github.com/zrepl/yaml-config"
[[constraint]]
name = "github.com/go-logfmt/logfmt"
@ -55,3 +55,18 @@ ignored = [ "github.com/inconshreveable/mousetrap" ]
[[constraint]]
name = "github.com/prometheus/client_golang"
branch = "master"
[[constraint]]
name = "github.com/golang/protobuf"
version = "1.2.0"
[[constraint]]
name = "github.com/nsf/termbox-go"
branch = "master"
[[constraint]]
name = "github.com/fatih/color"
version = "1.7.0"
[[constraint]]
name = "github.com/problame/go-streamrpc"
version = "0.4.0"

View File

@ -2,7 +2,32 @@
.DEFAULT_GOAL := build
ROOT := github.com/zrepl/zrepl
SUBPKGS := cmd logger rpc util zfs
SUBPKGS += client
SUBPKGS += config
SUBPKGS += daemon
SUBPKGS += daemon/filters
SUBPKGS += daemon/job
SUBPKGS += daemon/logging
SUBPKGS += daemon/nethelpers
SUBPKGS += daemon/pruner
SUBPKGS += daemon/snapper
SUBPKGS += daemon/streamrpcconfig
SUBPKGS += daemon/transport
SUBPKGS += daemon/transport/connecter
SUBPKGS += daemon/transport/serve
SUBPKGS += endpoint
SUBPKGS += logger
SUBPKGS += pruning
SUBPKGS += pruning/retentiongrid
SUBPKGS += replication
SUBPKGS += replication/fsrep
SUBPKGS += replication/pdu
SUBPKGS += replication/internal/queue
SUBPKGS += replication/internal/diff
SUBPKGS += tlsconf
SUBPKGS += util
SUBPKGS += version
SUBPKGS += zfs
_TESTPKGS := $(ROOT) $(foreach p,$(SUBPKGS),$(ROOT)/$(p))
@ -14,7 +39,7 @@ ifndef ZREPL_VERSION
$(error cannot infer variable ZREPL_VERSION using git and variable is not overriden by make invocation)
endif
endif
GO_LDFLAGS := "-X github.com/zrepl/zrepl/cmd.zreplVersion=$(ZREPL_VERSION)"
GO_LDFLAGS := "-X github.com/zrepl/zrepl/version.zreplVersion=$(ZREPL_VERSION)"
GO_BUILD := go build -ldflags $(GO_LDFLAGS)
@ -26,6 +51,7 @@ vendordeps:
dep ensure -v -vendor-only
generate: #not part of the build, must do that manually
protoc -I=replication/pdu --go_out=replication/pdu replication/pdu/pdu.proto
@for pkg in $(_TESTPKGS); do\
go generate "$$pkg" || exit 1; \
done;

View File

@ -46,6 +46,7 @@ Make sure to develop an understanding how zrepl is typically used by studying th
```
├── cmd
│   ├── endpoint # implementations of endpoints for package replication
│   ├── sampleconf # example configuration
├── docs # sphinx-based documentation
│   ├── **/*.rst # documentation in reStructuredText
@ -55,6 +56,7 @@ Make sure to develop an understanding how zrepl is typically used by studying th
│   ├── publish.sh # shell script for automated rendering & deploy to zrepl.github.io repo
│   ├── public_git # checkout of zrepl.github.io managed by above shell script
├── logger # logger package used by zrepl
├── replication # replication functionality
├── rpc # rpc protocol implementation
├── util
└── zfs # ZFS wrappers, filesystemm diffing

View File

@ -1,7 +1,12 @@
FROM golang:latest
RUN apt-get update && apt-get install -y \
python3-pip
python3-pip \
unzip
RUN wget https://github.com/protocolbuffers/protobuf/releases/download/v3.6.1/protoc-3.6.1-linux-x86_64.zip
RUN echo "6003de742ea3fcf703cfec1cd4a3380fd143081a2eb0e559065563496af27807 protoc-3.6.1-linux-x86_64.zip" | sha256sum -c
RUN unzip -d /usr protoc-3.6.1-linux-x86_64.zip
ADD lazy.sh /tmp/lazy.sh
ADD docs/requirements.txt /tmp/requirements.txt

116
cli/cli.go Normal file
View File

@ -0,0 +1,116 @@
package cli
import (
"fmt"
"github.com/spf13/cobra"
"github.com/spf13/pflag"
"github.com/zrepl/zrepl/config"
"os"
)
var rootArgs struct {
configPath string
}
var rootCmd = &cobra.Command{
Use: "zrepl",
Short: "One-stop ZFS replication solution",
}
var bashcompCmd = &cobra.Command{
Use: "bashcomp path/to/out/file",
Short: "generate bash completions",
Run: func(cmd *cobra.Command, args []string) {
if len(args) != 1 {
fmt.Fprintf(os.Stderr, "specify exactly one positional agument\n")
cmd.Usage()
os.Exit(1)
}
if err := rootCmd.GenBashCompletionFile(args[0]); err != nil {
fmt.Fprintf(os.Stderr, "error generating bash completion: %s", err)
os.Exit(1)
}
},
Hidden: true,
}
func init() {
rootCmd.PersistentFlags().StringVar(&rootArgs.configPath, "config", "", "config file path")
rootCmd.AddCommand(bashcompCmd)
}
type Subcommand struct {
Use string
Short string
NoRequireConfig bool
Run func(subcommand *Subcommand, args []string) error
SetupFlags func(f *pflag.FlagSet)
SetupSubcommands func() []*Subcommand
config *config.Config
configErr error
}
func (s *Subcommand) ConfigParsingError() error {
return s.configErr
}
func (s *Subcommand) Config() *config.Config {
if !s.NoRequireConfig && s.config == nil {
panic("command that requires config is running and has no config set")
}
return s.config
}
func (s *Subcommand) run(cmd *cobra.Command, args []string) {
s.tryParseConfig()
err := s.Run(s, args)
if err != nil {
fmt.Fprintf(os.Stderr, "%s\n", err)
os.Exit(1)
}
}
func (s *Subcommand) tryParseConfig() {
config, err := config.ParseConfig(rootArgs.configPath)
s.configErr = err
if err != nil {
if s.NoRequireConfig {
// doesn't matter
return
} else {
fmt.Fprintf(os.Stderr, "could not parse config: %s\n", err)
os.Exit(1)
}
}
s.config = config
}
func AddSubcommand(s *Subcommand) {
addSubcommandToCobraCmd(rootCmd, s)
}
func addSubcommandToCobraCmd(c *cobra.Command, s *Subcommand) {
cmd := cobra.Command{
Use: s.Use,
Short: s.Short,
}
if s.SetupSubcommands == nil {
cmd.Run = s.run
} else {
for _, sub := range s.SetupSubcommands() {
addSubcommandToCobraCmd(&cmd, sub)
}
}
if s.SetupFlags != nil {
s.SetupFlags(cmd.Flags())
}
c.AddCommand(&cmd)
}
func Run() {
if err := rootCmd.Execute(); err != nil {
os.Exit(1)
}
}

36
client/configcheck.go Normal file
View File

@ -0,0 +1,36 @@
package client
import (
"encoding/json"
"github.com/kr/pretty"
"github.com/spf13/pflag"
"github.com/zrepl/yaml-config"
"github.com/zrepl/zrepl/cli"
"os"
)
var configcheckArgs struct {
format string
}
var ConfigcheckCmd = &cli.Subcommand{
Use: "configcheck",
Short: "check if config can be parsed without errors",
SetupFlags: func(f *pflag.FlagSet) {
f.StringVar(&configcheckArgs.format, "format", "", "dump parsed config object [pretty|yaml|json]")
},
Run: func(subcommand *cli.Subcommand, args []string) error {
switch configcheckArgs.format {
case "pretty":
_, err := pretty.Println(subcommand.Config())
return err
case "json":
return json.NewEncoder(os.Stdout).Encode(subcommand.Config())
case "yaml":
return yaml.NewEncoder(os.Stdout).Encode(subcommand.Config())
default: // no output
}
return nil
},
}

48
client/jsonclient.go Normal file
View File

@ -0,0 +1,48 @@
package client
import (
"bytes"
"context"
"encoding/json"
"github.com/pkg/errors"
"io"
"net"
"net/http"
)
func controlHttpClient(sockpath string) (client http.Client, err error) {
return http.Client{
Transport: &http.Transport{
DialContext: func(_ context.Context, _, _ string) (net.Conn, error) {
return net.Dial("unix", sockpath)
},
},
}, nil
}
func jsonRequestResponse(c http.Client, endpoint string, req interface{}, res interface{}) error {
var buf bytes.Buffer
encodeErr := json.NewEncoder(&buf).Encode(req)
if encodeErr != nil {
return encodeErr
}
resp, err := c.Post("http://unix"+endpoint, "application/json", &buf)
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
var msg bytes.Buffer
io.CopyN(&msg, resp.Body, 4096)
return errors.Errorf("%s", msg.String())
}
decodeError := json.NewDecoder(resp.Body).Decode(&res)
if decodeError != nil {
return decodeError
}
return nil
}

66
client/pprof.go Normal file
View File

@ -0,0 +1,66 @@
package client
import (
"errors"
"github.com/zrepl/zrepl/cli"
"github.com/zrepl/zrepl/config"
"github.com/zrepl/zrepl/daemon"
"log"
"os"
)
var pprofArgs struct {
daemon.PprofServerControlMsg
}
var PprofCmd = &cli.Subcommand{
Use: "pprof off | [on TCP_LISTEN_ADDRESS]",
Short: "start a http server exposing go-tool-compatible profiling endpoints at TCP_LISTEN_ADDRESS",
Run: func(subcommand *cli.Subcommand, args []string) error {
if len(args) < 1 {
goto enargs
}
switch args[0] {
case "on":
pprofArgs.Run = true
if len(args) != 2 {
return errors.New("must specify TCP_LISTEN_ADDRESS as second positional argument")
}
pprofArgs.HttpListenAddress = args[1]
case "off":
if len(args) != 1 {
goto enargs
}
pprofArgs.Run = false
}
RunPProf(subcommand.Config())
return nil
enargs:
return errors.New("invalid number of positional arguments")
},
}
func RunPProf(conf *config.Config) {
log := log.New(os.Stderr, "", 0)
die := func() {
log.Printf("exiting after error")
os.Exit(1)
}
log.Printf("connecting to zrepl daemon")
httpc, err := controlHttpClient(conf.Global.Control.SockPath)
if err != nil {
log.Printf("error creating http client: %s", err)
die()
}
err = jsonRequestResponse(httpc, daemon.ControlJobEndpointPProf, pprofArgs.PprofServerControlMsg, struct{}{})
if err != nil {
log.Printf("error sending control message: %s", err)
die()
}
log.Printf("finished")
}

39
client/signal.go Normal file
View File

@ -0,0 +1,39 @@
package client
import (
"github.com/pkg/errors"
"github.com/zrepl/zrepl/cli"
"github.com/zrepl/zrepl/config"
"github.com/zrepl/zrepl/daemon"
)
var SignalCmd = &cli.Subcommand{
Use: "signal [wakeup|reset] JOB",
Short: "wake up a job from wait state or abort its current invocation",
Run: func(subcommand *cli.Subcommand, args []string) error {
return runSignalCmd(subcommand.Config(), args)
},
}
func runSignalCmd(config *config.Config, args []string) error {
if len(args) != 2 {
return errors.Errorf("Expected 2 arguments: [wakeup|reset] JOB")
}
httpc, err := controlHttpClient(config.Global.Control.SockPath)
if err != nil {
return err
}
err = jsonRequestResponse(httpc, daemon.ControlJobEndpointSignal,
struct {
Name string
Op string
}{
Name: args[1],
Op: args[0],
},
struct{}{},
)
return err
}

547
client/status.go Normal file
View File

@ -0,0 +1,547 @@
package client
import (
"fmt"
"github.com/nsf/termbox-go"
"github.com/pkg/errors"
"github.com/spf13/pflag"
"github.com/zrepl/yaml-config"
"github.com/zrepl/zrepl/cli"
"github.com/zrepl/zrepl/daemon"
"github.com/zrepl/zrepl/daemon/job"
"github.com/zrepl/zrepl/daemon/pruner"
"github.com/zrepl/zrepl/replication"
"github.com/zrepl/zrepl/replication/fsrep"
"io"
"math"
"net/http"
"os"
"sort"
"strings"
"sync"
"time"
)
type tui struct {
x, y int
indent int
lock sync.Mutex //For report and error
report map[string]job.Status
err error
}
func newTui() tui {
return tui{}
}
func (t *tui) moveCursor(x, y int) {
t.x += x
t.y += y
}
func (t *tui) moveLine(dl int, col int) {
t.y += dl
t.x = t.indent*4 + col
}
func (t *tui) write(text string) {
for _, c := range text {
if c == '\n' {
t.newline()
continue
}
termbox.SetCell(t.x, t.y, c, termbox.ColorDefault, termbox.ColorDefault)
t.x += 1
}
}
func (t *tui) printf(text string, a ...interface{}) {
t.write(fmt.Sprintf(text, a...))
}
func (t *tui) newline() {
t.moveLine(1, 0)
}
func (t *tui) setIndent(indent int) {
t.indent = indent
t.moveLine(0, 0)
}
func (t *tui) addIndent(indent int) {
t.indent += indent
t.moveLine(0, 0)
}
var statusFlags struct {
Raw bool
}
var StatusCmd = &cli.Subcommand{
Use: "status",
Short: "show job activity or dump as JSON for monitoring",
SetupFlags: func(f *pflag.FlagSet) {
f.BoolVar(&statusFlags.Raw, "raw", false, "dump raw status description from zrepl daemon")
},
Run: runStatus,
}
func runStatus(s *cli.Subcommand, args []string) error {
httpc, err := controlHttpClient(s.Config().Global.Control.SockPath)
if err != nil {
return err
}
if statusFlags.Raw {
resp, err := httpc.Get("http://unix"+daemon.ControlJobEndpointStatus)
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
fmt.Fprintf(os.Stderr, "Received error response:\n")
io.CopyN(os.Stderr, resp.Body, 4096)
return errors.Errorf("exit")
}
if _, err := io.Copy(os.Stdout, resp.Body); err != nil {
return err
}
return nil
}
t := newTui()
t.lock.Lock()
t.err = errors.New("Got no report yet")
t.lock.Unlock()
err = termbox.Init()
if err != nil {
return err
}
defer termbox.Close()
update := func() {
m := make(map[string]job.Status)
err2 := jsonRequestResponse(httpc, daemon.ControlJobEndpointStatus,
struct{}{},
&m,
)
t.lock.Lock()
t.err = err2
t.report = m
t.lock.Unlock()
t.draw()
}
update()
ticker := time.NewTicker(500 * time.Millisecond)
defer ticker.Stop()
go func() {
for _ = range ticker.C {
update()
}
}()
termbox.HideCursor()
termbox.Clear(termbox.ColorDefault, termbox.ColorDefault)
loop:
for {
switch ev := termbox.PollEvent(); ev.Type {
case termbox.EventKey:
switch ev.Key {
case termbox.KeyEsc:
break loop
case termbox.KeyCtrlC:
break loop
}
case termbox.EventResize:
t.draw()
}
}
return nil
}
func (t *tui) draw() {
t.lock.Lock()
defer t.lock.Unlock()
termbox.Clear(termbox.ColorDefault, termbox.ColorDefault)
t.x = 0
t.y = 0
t.indent = 0
if t.err != nil {
t.write(t.err.Error())
} else {
//Iterate over map in alphabetical order
keys := make([]string, len(t.report))
i := 0
for k, _ := range t.report {
keys[i] = k
i++
}
sort.Strings(keys)
for _, k := range keys {
v := t.report[k]
if len(k) == 0 || daemon.IsInternalJobName(k) { //Internal job
continue
}
t.setIndent(0)
t.printf("Job: %s", k)
t.setIndent(1)
t.newline()
t.printf("Type: %s", v.Type)
t.setIndent(1)
t.newline()
if v.Type != job.TypePush && v.Type != job.TypePull {
t.printf("No status representation for job type '%s', dumping as YAML", v.Type)
t.newline()
asYaml, err := yaml.Marshal(v.JobSpecific)
if err != nil {
t.printf("Error marshaling status to YAML: %s", err)
t.newline()
continue
}
t.write(string(asYaml))
t.newline()
continue
}
pushStatus, ok := v.JobSpecific.(*job.ActiveSideStatus)
if !ok || pushStatus == nil {
t.printf("ActiveSideStatus is null")
t.newline()
continue
}
t.printf("Replication:")
t.newline()
t.addIndent(1)
t.renderReplicationReport(pushStatus.Replication)
t.addIndent(-1)
t.printf("Pruning Sender:")
t.newline()
t.addIndent(1)
t.renderPrunerReport(pushStatus.PruningSender)
t.addIndent(-1)
t.printf("Pruning Receiver:")
t.newline()
t.addIndent(1)
t.renderPrunerReport(pushStatus.PruningReceiver)
t.addIndent(-1)
}
}
termbox.Flush()
}
func (t *tui) renderReplicationReport(rep *replication.Report) {
if rep == nil {
t.printf("...\n")
return
}
all := make([]*fsrep.Report, 0, len(rep.Completed)+len(rep.Pending) + 1)
all = append(all, rep.Completed...)
all = append(all, rep.Pending...)
if rep.Active != nil {
all = append(all, rep.Active)
}
sort.Slice(all, func(i, j int) bool {
return all[i].Filesystem < all[j].Filesystem
})
state, err := replication.StateString(rep.Status)
if err != nil {
t.printf("Status: %q (parse error: %q)\n", rep.Status, err)
return
}
t.printf("Status: %s", state)
t.newline()
if rep.Problem != "" {
t.printf("Problem: %s", rep.Problem)
t.newline()
}
if rep.SleepUntil.After(time.Now()) &&
state & ^(replication.ContextDone|replication.Completed) != 0 {
t.printf("Sleeping until %s (%s left)\n", rep.SleepUntil, rep.SleepUntil.Sub(time.Now()))
}
if state != replication.Planning && state != replication.PlanningError {
// Progress: [---------------]
sumUpFSRep := func(rep *fsrep.Report) (transferred, total int64) {
for _, s := range rep.Pending {
transferred += s.Bytes
total += s.ExpectedBytes
}
for _, s := range rep.Completed {
transferred += s.Bytes
total += s.ExpectedBytes
}
return
}
var transferred, total int64
for _, fs := range all {
fstx, fstotal := sumUpFSRep(fs)
transferred += fstx
total += fstotal
}
t.write("Progress: ")
t.drawBar(80, transferred, total)
t.write(fmt.Sprintf(" %s / %s", ByteCountBinary(transferred), ByteCountBinary(total)))
t.newline()
}
var maxFSLen int
for _, fs := range all {
if len(fs.Filesystem) > maxFSLen {
maxFSLen = len(fs.Filesystem)
}
}
for _, fs := range all {
printFilesystemStatus(fs, t, fs == rep.Active, maxFSLen)
}
}
func (t *tui) renderPrunerReport(r *pruner.Report) {
if r == nil {
t.printf("...\n")
return
}
state, err := pruner.StateString(r.State)
if err != nil {
t.printf("Status: %q (parse error: %q)\n", r.State, err)
return
}
t.printf("Status: %s", state)
t.newline()
if r.Error != "" {
t.printf("Error: %s\n", r.Error)
}
if r.SleepUntil.After(time.Now()) {
t.printf("Sleeping until %s (%s left)\n", r.SleepUntil, r.SleepUntil.Sub(time.Now()))
}
type commonFS struct {
*pruner.FSReport
completed bool
}
all := make([]commonFS, 0, len(r.Pending) + len(r.Completed))
for i := range r.Pending {
all = append(all, commonFS{&r.Pending[i], false})
}
for i := range r.Completed {
all = append(all, commonFS{&r.Completed[i], true})
}
switch state {
case pruner.Plan: fallthrough
case pruner.PlanWait: fallthrough
case pruner.ErrPerm:
return
}
if len(all) == 0 {
t.printf("nothing to do\n")
return
}
var totalDestroyCount, completedDestroyCount int
var maxFSname int
for _, fs := range all {
totalDestroyCount += len(fs.DestroyList)
if fs.completed {
completedDestroyCount += len(fs.DestroyList)
}
if maxFSname < len(fs.Filesystem) {
maxFSname = len(fs.Filesystem)
}
}
// global progress bar
progress := int(math.Round(80 * float64(completedDestroyCount) / float64(totalDestroyCount)))
t.write("Progress: ")
t.write("[")
t.write(times("=", progress))
t.write(">")
t.write(times("-", 80 - progress))
t.write("]")
t.printf(" %d/%d snapshots", completedDestroyCount, totalDestroyCount)
t.newline()
sort.SliceStable(all, func(i, j int) bool {
return strings.Compare(all[i].Filesystem, all[j].Filesystem) == -1
})
// Draw a table-like representation of 'all'
for _, fs := range all {
t.write(rightPad(fs.Filesystem, maxFSname, " "))
t.write(" ")
if fs.Error != "" {
t.printf("ERROR: %s\n", fs.Error) // whitespace is padding
continue
}
pruneRuleActionStr := fmt.Sprintf("(destroy %d of %d snapshots)",
len(fs.DestroyList), len(fs.SnapshotList))
if fs.completed {
t.printf( "Completed %s\n", pruneRuleActionStr)
continue
}
t.write("Pending ") // whitespace is padding 10
if len(fs.DestroyList) == 1 {
t.write(fs.DestroyList[0].Name)
} else {
t.write(pruneRuleActionStr)
}
t.newline()
}
}
const snapshotIndent = 1
func calculateMaxFSLength(all []*fsrep.Report) (maxFS, maxStatus int) {
for _, e := range all {
if len(e.Filesystem) > maxFS {
maxFS = len(e.Filesystem)
}
all2 := make([]*fsrep.StepReport, 0, len(e.Pending) + len(e.Completed))
all2 = append(all2, e.Pending...)
all2 = append(all2, e.Completed...)
for _, e2 := range all2 {
elen := len(e2.Problem) + len(e2.From) + len(e2.To) + 60 // random spacing, units, labels, etc
if elen > maxStatus {
maxStatus = elen
}
}
}
return
}
func times(str string, n int) (out string) {
for i := 0; i < n; i++ {
out += str
}
return
}
func rightPad(str string, length int, pad string) string {
if len(str) > length {
return str[:length]
}
return str + times(pad, length-len(str))
}
func leftPad(str string, length int, pad string) string {
if len(str) > length {
return str[len(str)-length:]
}
return times(pad, length-len(str)) + str
}
func (t *tui) drawBar(length int, bytes, totalBytes int64) {
var completedLength int
if totalBytes > 0 {
completedLength = int(int64(length) * bytes / totalBytes)
if completedLength > length {
completedLength = length
}
} else if totalBytes == bytes {
completedLength = length
}
t.write("[")
t.write(times("=", completedLength))
t.write(">")
t.write(times("-", length-completedLength))
t.write("]")
}
func StringStepState(s fsrep.StepState) string {
switch s {
case fsrep.StepReplicationReady: return "Ready"
case fsrep.StepReplicationRetry: return "Retry"
case fsrep.StepMarkReplicatedReady: return "MarkReady"
case fsrep.StepMarkReplicatedRetry: return "MarkRetry"
case fsrep.StepPermanentError: return "PermanentError"
case fsrep.StepCompleted: return "Completed"
default:
return fmt.Sprintf("UNKNOWN %d", s)
}
}
func filesystemStatusString(rep *fsrep.Report, active bool, fsWidth int) (line string, bytes, totalBytes int64) {
bytes = int64(0)
totalBytes = int64(0)
for _, s := range rep.Pending {
bytes += s.Bytes
totalBytes += s.ExpectedBytes
}
for _, s := range rep.Completed {
bytes += s.Bytes
totalBytes += s.ExpectedBytes
}
next := ""
if rep.Problem != "" {
next = " problem: " + rep.Problem
} else if len(rep.Pending) > 0 {
if rep.Pending[0].From != "" {
next = fmt.Sprintf(" next: %s => %s", rep.Pending[0].From, rep.Pending[0].To)
} else {
next = fmt.Sprintf(" next: %s (full)", rep.Pending[0].To)
}
}
status := fmt.Sprintf("%s (step %d/%d, %s/%s)%s",
rep.Status,
len(rep.Completed), len(rep.Pending) + len(rep.Completed),
ByteCountBinary(bytes), ByteCountBinary(totalBytes),
next,
)
activeIndicator := " "
if active {
activeIndicator = "*"
}
line = fmt.Sprintf("%s %s %s",
activeIndicator,
rightPad(rep.Filesystem, fsWidth, " "),
status)
return line, bytes, totalBytes
}
func printFilesystemStatus(rep *fsrep.Report, t *tui, active bool, maxFS int) {
totalStatus, _, _ := filesystemStatusString(rep, active, maxFS)
t.write(totalStatus)
t.newline()
}
func ByteCountBinary(b int64) string {
const unit = 1024
if b < unit {
return fmt.Sprintf("%d B", b)
}
div, exp := int64(unit), 0
for n := b / unit; n >= unit; n /= unit {
div *= unit
exp++
}
return fmt.Sprintf("%.1f %ciB", float64(b)/float64(div), "KMGTPE"[exp])
}

View File

@ -1,55 +1,49 @@
package cmd
package client
import (
"github.com/zrepl/zrepl/cli"
"os"
"context"
"errors"
"github.com/problame/go-netssh"
"github.com/spf13/cobra"
"github.com/zrepl/zrepl/config"
"log"
"path"
)
var StdinserverCmd = &cobra.Command{
var StdinserverCmd = &cli.Subcommand{
Use: "stdinserver CLIENT_IDENTITY",
Short: "start in stdinserver mode (from authorized_keys file)",
Run: cmdStdinServer,
Short: "stdinserver transport mode (started from authorized_keys file as forced command)",
Run: func(subcommand *cli.Subcommand, args []string) error {
return runStdinserver(subcommand.Config(), args)
},
}
func init() {
RootCmd.AddCommand(StdinserverCmd)
}
func cmdStdinServer(cmd *cobra.Command, args []string) {
func runStdinserver(config *config.Config, args []string) error {
// NOTE: the netssh proxying protocol requires exiting with non-zero status if anything goes wrong
defer os.Exit(1)
log := log.New(os.Stderr, "", log.LUTC|log.Ldate|log.Ltime)
conf, err := ParseConfig(rootArgs.configFile)
if err != nil {
log.Printf("error parsing config: %s", err)
return
}
if len(args) != 1 || args[0] == "" {
log.Print("must specify client_identity as positional argument")
return
err := errors.New("must specify client_identity as positional argument")
return err
}
identity := args[0]
unixaddr := path.Join(conf.Global.Serve.Stdinserver.SockDir, identity)
unixaddr := path.Join(config.Global.Serve.StdinServer.SockDir, identity)
log.Printf("proxying client identity '%s' to zrepl daemon '%s'", identity, unixaddr)
ctx := netssh.ContextWithLog(context.TODO(), log)
err = netssh.Proxy(ctx, unixaddr)
err := netssh.Proxy(ctx, unixaddr)
if err == nil {
log.Print("proxying finished successfully, exiting with status 0")
os.Exit(0)
}
log.Printf("error proxying: %s", err)
return nil
}

108
client/testcmd.go Normal file
View File

@ -0,0 +1,108 @@
package client
import (
"fmt"
"github.com/spf13/pflag"
"github.com/zrepl/zrepl/cli"
"github.com/zrepl/zrepl/config"
"github.com/zrepl/zrepl/daemon/filters"
"github.com/zrepl/zrepl/zfs"
)
var TestCmd = &cli.Subcommand {
Use: "test",
SetupSubcommands: func() []*cli.Subcommand {
return []*cli.Subcommand{testFilter}
},
}
var testFilterArgs struct {
job string
all bool
input string
}
var testFilter = &cli.Subcommand{
Use: "filesystems --job JOB [--all | --input INPUT]",
Short: "test filesystems filter specified in push or source job",
SetupFlags: func(f *pflag.FlagSet) {
f.StringVar(&testFilterArgs.job, "job", "", "the name of the push or source job")
f.StringVar(&testFilterArgs.input, "input", "", "a filesystem name to test against the job's filters")
f.BoolVar(&testFilterArgs.all, "all", false, "test all local filesystems")
},
Run: runTestFilterCmd,
}
func runTestFilterCmd(subcommand *cli.Subcommand, args []string) error {
if testFilterArgs.job == "" {
return fmt.Errorf("must specify --job flag")
}
if !(testFilterArgs.all != (testFilterArgs.input != "")) { // xor
return fmt.Errorf("must set one: --all or --input")
}
conf := subcommand.Config()
var confFilter config.FilesystemsFilter
job, err := conf.Job(testFilterArgs.job)
if err != nil {
return err
}
switch j := job.Ret.(type) {
case *config.SourceJob: confFilter = j.Filesystems
case *config.PushJob: confFilter = j.Filesystems
default:
return fmt.Errorf("job type %T does not have filesystems filter", j)
}
f, err := filters.DatasetMapFilterFromConfig(confFilter)
if err != nil {
return fmt.Errorf("filter invalid: %s", err)
}
var fsnames []string
if testFilterArgs.input != "" {
fsnames = []string{testFilterArgs.input}
} else {
out, err := zfs.ZFSList([]string{"name"})
if err != nil {
return fmt.Errorf("could not list ZFS filesystems: %s", err)
}
for _, row := range out {
fsnames = append(fsnames, row[0])
}
}
fspaths := make([]*zfs.DatasetPath, len(fsnames))
for i, fsname := range fsnames {
path, err := zfs.NewDatasetPath(fsname)
if err != nil {
return err
}
fspaths[i] = path
}
hadFilterErr := false
for _, in := range fspaths {
var res string
var errStr string
pass, err := f.Filter(in)
if err != nil {
res = "ERROR"
errStr = err.Error()
hadFilterErr = true
} else if pass {
res = "ACCEPT"
} else {
res = "REJECT"
}
fmt.Printf("%s\t%s\t%s\n", res, in.ToString(), errStr)
}
if hadFilterErr {
return fmt.Errorf("filter errors occurred")
}
return nil
}

72
client/version.go Normal file
View File

@ -0,0 +1,72 @@
package client
import (
"fmt"
"github.com/spf13/pflag"
"github.com/zrepl/zrepl/cli"
"github.com/zrepl/zrepl/config"
"github.com/zrepl/zrepl/daemon"
"github.com/zrepl/zrepl/version"
"os"
)
var versionArgs struct {
Show string
Config *config.Config
ConfigErr error
}
var VersionCmd = &cli.Subcommand{
Use: "version",
Short: "print version of zrepl binary and running daemon",
NoRequireConfig: true,
SetupFlags: func(f *pflag.FlagSet) {
f.StringVar(&versionArgs.Show, "show", "", "version info to show (client|daemon)")
},
Run: func(subcommand *cli.Subcommand, args []string) error {
versionArgs.Config = subcommand.Config()
versionArgs.ConfigErr = subcommand.ConfigParsingError()
return runVersionCmd()
},
}
func runVersionCmd() error {
args := versionArgs
if args.Show != "daemon" && args.Show != "client" && args.Show != "" {
return fmt.Errorf("show flag must be 'client' or 'server' or be left empty")
}
var clientVersion, daemonVersion *version.ZreplVersionInformation
if args.Show == "client" || args.Show == "" {
clientVersion = version.NewZreplVersionInformation()
fmt.Printf("client: %s\n", clientVersion.String())
}
if args.Show == "daemon" || args.Show == "" {
if args.ConfigErr != nil {
return fmt.Errorf("config parsing error: %s", args.ConfigErr)
}
httpc, err := controlHttpClient(args.Config.Global.Control.SockPath)
if err != nil {
return fmt.Errorf("server: error: %s\n", err)
}
var info version.ZreplVersionInformation
err = jsonRequestResponse(httpc, daemon.ControlJobEndpointVersion, "", &info)
if err != nil {
return fmt.Errorf("server: error: %s\n", err)
}
daemonVersion = &info
fmt.Printf("server: %s\n", daemonVersion.String())
}
if args.Show == "" {
if clientVersion.Version != daemonVersion.Version {
fmt.Fprintf(os.Stderr, "WARNING: client version != daemon version, restart zrepl daemon\n")
}
}
return nil
}

View File

@ -1,201 +0,0 @@
package cmd
import (
"context"
"fmt"
"github.com/zrepl/zrepl/zfs"
"sort"
"time"
)
type IntervalAutosnap struct {
task *Task
DatasetFilter zfs.DatasetFilter
Prefix string
SnapshotInterval time.Duration
}
func (a *IntervalAutosnap) filterFilesystems() (fss []*zfs.DatasetPath, stop bool) {
a.task.Enter("filter_filesystems")
defer a.task.Finish()
fss, err := zfs.ZFSListMapping(a.DatasetFilter)
stop = err != nil
if err != nil {
a.task.Log().WithError(err).Error("cannot list datasets")
}
if len(fss) == 0 {
a.task.Log().Warn("no filesystem matching filesystem filter")
}
return fss, stop
}
func (a *IntervalAutosnap) findSyncPoint(fss []*zfs.DatasetPath) (syncPoint time.Time, err error) {
a.task.Enter("find_sync_point")
defer a.task.Finish()
type snapTime struct {
ds *zfs.DatasetPath
time time.Time
}
if len(fss) == 0 {
return time.Now(), nil
}
snaptimes := make([]snapTime, 0, len(fss))
now := time.Now()
a.task.Log().Debug("examine filesystem state")
for _, d := range fss {
l := a.task.Log().WithField(logFSField, d.ToString())
fsvs, err := zfs.ZFSListFilesystemVersions(d, NewPrefixFilter(a.Prefix))
if err != nil {
l.WithError(err).Error("cannot list filesystem versions")
continue
}
if len(fsvs) <= 0 {
l.WithField("prefix", a.Prefix).Info("no filesystem versions with prefix")
continue
}
// Sort versions by creation
sort.SliceStable(fsvs, func(i, j int) bool {
return fsvs[i].CreateTXG < fsvs[j].CreateTXG
})
latest := fsvs[len(fsvs)-1]
l.WithField("creation", latest.Creation).
Debug("found latest snapshot")
since := now.Sub(latest.Creation)
if since < 0 {
l.WithField("snapshot", latest.Name).
WithField("creation", latest.Creation).
Error("snapshot is from the future")
continue
}
next := now
if since < a.SnapshotInterval {
next = latest.Creation.Add(a.SnapshotInterval)
}
snaptimes = append(snaptimes, snapTime{d, next})
}
if len(snaptimes) == 0 {
snaptimes = append(snaptimes, snapTime{nil, now})
}
sort.Slice(snaptimes, func(i, j int) bool {
return snaptimes[i].time.Before(snaptimes[j].time)
})
return snaptimes[0].time, nil
}
func (a *IntervalAutosnap) waitForSyncPoint(ctx context.Context, syncPoint time.Time) {
a.task.Enter("wait_sync_point")
defer a.task.Finish()
const LOG_TIME_FMT string = time.ANSIC
a.task.Log().WithField("sync_point", syncPoint.Format(LOG_TIME_FMT)).
Info("wait for sync point")
select {
case <-ctx.Done():
a.task.Log().WithError(ctx.Err()).Info("context done")
return
case <-time.After(syncPoint.Sub(time.Now())):
}
}
func (a *IntervalAutosnap) syncUpRun(ctx context.Context, didSnaps chan struct{}) (stop bool) {
a.task.Enter("sync_up")
defer a.task.Finish()
fss, stop := a.filterFilesystems()
if stop {
return true
}
syncPoint, err := a.findSyncPoint(fss)
if err != nil {
return true
}
a.waitForSyncPoint(ctx, syncPoint)
a.task.Log().Debug("snapshot all filesystems to enable further snaps in lockstep")
a.doSnapshots(didSnaps)
return false
}
func (a *IntervalAutosnap) Run(ctx context.Context, didSnaps chan struct{}) {
if a.syncUpRun(ctx, didSnaps) {
a.task.Log().Error("stoppping autosnap after error in sync up")
return
}
// task drops back to idle here
a.task.Log().Debug("setting up ticker in SnapshotInterval")
ticker := time.NewTicker(a.SnapshotInterval)
for {
select {
case <-ctx.Done():
ticker.Stop()
a.task.Log().WithError(ctx.Err()).Info("context done")
return
case <-ticker.C:
a.doSnapshots(didSnaps)
}
}
}
func (a *IntervalAutosnap) doSnapshots(didSnaps chan struct{}) {
a.task.Enter("do_snapshots")
defer a.task.Finish()
// don't cache the result from previous run in case the user added
// a new dataset in the meantime
ds, stop := a.filterFilesystems()
if stop {
return
}
// TODO channel programs -> allow a little jitter?
for _, d := range ds {
suffix := time.Now().In(time.UTC).Format("20060102_150405_000")
snapname := fmt.Sprintf("%s%s", a.Prefix, suffix)
l := a.task.Log().WithField(logFSField, d.ToString()).
WithField("snapname", snapname)
l.Info("create snapshot")
err := zfs.ZFSSnapshot(d, snapname, false)
if err != nil {
a.task.Log().WithError(err).Error("cannot create snapshot")
}
l.Info("create corresponding bookmark")
err = zfs.ZFSBookmark(d, snapname, snapname)
if err != nil {
a.task.Log().WithError(err).Error("cannot create bookmark")
}
}
select {
case didSnaps <- struct{}{}:
default:
a.task.Log().Error("warning: callback channel is full, discarding")
}
}

View File

@ -1,29 +0,0 @@
package cmd
import (
"fmt"
"github.com/spf13/cobra"
"os"
)
var bashcompCmd = &cobra.Command{
Use: "bashcomp path/to/out/file",
Short: "generate bash completions",
Run: func(cmd *cobra.Command, args []string) {
if len(args) != 1 {
fmt.Fprintf(os.Stderr, "specify exactly one positional agument\n")
cmd.Usage()
os.Exit(1)
}
if err := RootCmd.GenBashCompletionFile(args[0]); err != nil {
fmt.Fprintf(os.Stderr, "error generating bash completion: %s", err)
os.Exit(1)
}
},
Hidden: true,
}
func init() {
RootCmd.AddCommand(bashcompCmd)
}

View File

@ -1,102 +0,0 @@
package cmd
import (
"io"
"fmt"
"github.com/pkg/errors"
"github.com/zrepl/zrepl/zfs"
)
type Config struct {
Global Global
Jobs map[string]Job
}
func (c *Config) LookupJob(name string) (j Job, err error) {
j, ok := c.Jobs[name]
if !ok {
return nil, errors.Errorf("job '%s' is not defined", name)
}
return j, nil
}
type Global struct {
Serve struct {
Stdinserver struct {
SockDir string
}
}
Control struct {
Sockpath string
}
logging *LoggingConfig
}
type JobDebugSettings struct {
Conn struct {
ReadDump string `mapstructure:"read_dump"`
WriteDump string `mapstructure:"write_dump"`
}
RPC struct {
Log bool
}
}
type RWCConnecter interface {
Connect() (io.ReadWriteCloser, error)
}
type AuthenticatedChannelListenerFactory interface {
Listen() (AuthenticatedChannelListener, error)
}
type AuthenticatedChannelListener interface {
Accept() (ch io.ReadWriteCloser, err error)
Close() (err error)
}
type SSHStdinServerConnectDescr struct {
}
type PrunePolicy interface {
// Prune filters versions and decide which to keep and which to remove.
// Prune **does not** implement the actual removal of the versions.
Prune(fs *zfs.DatasetPath, versions []zfs.FilesystemVersion) (keep, remove []zfs.FilesystemVersion, err error)
}
type PruningJob interface {
Pruner(task *Task, side PrunePolicySide, dryRun bool) (Pruner, error)
}
// A type for constants describing different prune policies of a PruningJob
// This is mostly a special-case for LocalJob, which is the only job that has two prune policies
// instead of one.
// It implements github.com/spf13/pflag.Value to be used as CLI flag for the test subcommand
type PrunePolicySide string
const (
PrunePolicySideDefault PrunePolicySide = ""
PrunePolicySideLeft PrunePolicySide = "left"
PrunePolicySideRight PrunePolicySide = "right"
)
func (s *PrunePolicySide) String() string {
return string(*s)
}
func (s *PrunePolicySide) Set(news string) error {
p := PrunePolicySide(news)
switch p {
case PrunePolicySideRight:
fallthrough
case PrunePolicySideLeft:
*s = p
default:
return errors.Errorf("must be either %s or %s", PrunePolicySideLeft, PrunePolicySideRight)
}
return nil
}
func (s *PrunePolicySide) Type() string {
return fmt.Sprintf("%s | %s", PrunePolicySideLeft, PrunePolicySideRight)
}

View File

@ -1,66 +0,0 @@
package cmd
import (
"fmt"
"io"
"context"
"github.com/jinzhu/copier"
"github.com/mitchellh/mapstructure"
"github.com/pkg/errors"
"github.com/problame/go-netssh"
"time"
)
type SSHStdinserverConnecter struct {
Host string
User string
Port uint16
IdentityFile string `mapstructure:"identity_file"`
TransportOpenCommand []string `mapstructure:"transport_open_command"`
SSHCommand string `mapstructure:"ssh_command"`
Options []string
DialTimeout string `mapstructure:"dial_timeout"`
dialTimeout time.Duration
}
func parseSSHStdinserverConnecter(i map[string]interface{}) (c *SSHStdinserverConnecter, err error) {
c = &SSHStdinserverConnecter{}
if err = mapstructure.Decode(i, c); err != nil {
err = errors.New(fmt.Sprintf("could not parse ssh transport: %s", err))
return nil, err
}
if c.DialTimeout != "" {
c.dialTimeout, err = time.ParseDuration(c.DialTimeout)
if err != nil {
return nil, errors.Wrap(err, "cannot parse dial_timeout")
}
} else {
c.dialTimeout = 10 * time.Second
}
// TODO assert fields are filled
return
}
func (c *SSHStdinserverConnecter) Connect() (rwc io.ReadWriteCloser, err error) {
var endpoint netssh.Endpoint
if err = copier.Copy(&endpoint, c); err != nil {
return nil, errors.WithStack(err)
}
var dialCtx context.Context
dialCtx, dialCancel := context.WithTimeout(context.TODO(), c.dialTimeout) // context.TODO tied to error handling below
defer dialCancel()
if rwc, err = netssh.Dial(dialCtx, endpoint); err != nil {
if err == context.DeadlineExceeded {
err = errors.Errorf("dial_timeout of %s exceeded", c.dialTimeout)
}
err = errors.WithStack(err)
return
}
return
}

View File

@ -1,36 +0,0 @@
package cmd
import (
"github.com/pkg/errors"
"github.com/zrepl/zrepl/zfs"
"strings"
)
type PrefixFilter struct {
prefix string
fstype zfs.VersionType
fstypeSet bool // optionals anyone?
}
func NewPrefixFilter(prefix string) *PrefixFilter {
return &PrefixFilter{prefix: prefix}
}
func NewTypedPrefixFilter(prefix string, versionType zfs.VersionType) *PrefixFilter {
return &PrefixFilter{prefix, versionType, true}
}
func parseSnapshotPrefix(i string) (p string, err error) {
if len(i) <= 0 {
err = errors.Errorf("snapshot prefix must not be empty string")
return
}
p = i
return
}
func (f *PrefixFilter) Filter(fsv zfs.FilesystemVersion) (accept bool, err error) {
fstypeMatches := (!f.fstypeSet || fsv.Type == f.fstype)
prefixMatches := strings.HasPrefix(fsv.Name, f.prefix)
return fstypeMatches && prefixMatches, nil
}

View File

@ -1,145 +0,0 @@
package cmd
import (
"bytes"
"context"
"encoding/json"
"github.com/pkg/errors"
"io"
"net"
"net/http"
)
type ControlJob struct {
Name string
sockaddr *net.UnixAddr
}
func NewControlJob(name, sockpath string) (j *ControlJob, err error) {
j = &ControlJob{Name: name}
j.sockaddr, err = net.ResolveUnixAddr("unix", sockpath)
if err != nil {
err = errors.Wrap(err, "cannot resolve unix address")
return
}
return
}
func (j *ControlJob) JobName() string {
return j.Name
}
func (j *ControlJob) JobType() JobType { return JobTypeControl }
func (j *ControlJob) JobStatus(ctx context.Context) (*JobStatus, error) {
return &JobStatus{Tasks: nil}, nil
}
const (
ControlJobEndpointPProf string = "/debug/pprof"
ControlJobEndpointVersion string = "/version"
ControlJobEndpointStatus string = "/status"
)
func (j *ControlJob) JobStart(ctx context.Context) {
log := ctx.Value(contextKeyLog).(Logger)
defer log.Info("control job finished")
daemon := ctx.Value(contextKeyDaemon).(*Daemon)
l, err := ListenUnixPrivate(j.sockaddr)
if err != nil {
log.WithError(err).Error("error listening")
return
}
pprofServer := NewPProfServer(ctx)
mux := http.NewServeMux()
mux.Handle(ControlJobEndpointPProf, requestLogger{log: log, handlerFunc: func(w http.ResponseWriter, r *http.Request) {
var msg PprofServerControlMsg
err := json.NewDecoder(r.Body).Decode(&msg)
if err != nil {
log.WithError(err).Error("bad pprof request from client")
w.WriteHeader(http.StatusBadRequest)
}
pprofServer.Control(msg)
w.WriteHeader(200)
}})
mux.Handle(ControlJobEndpointVersion,
requestLogger{log: log, handler: jsonResponder{func() (interface{}, error) {
return NewZreplVersionInformation(), nil
}}})
mux.Handle(ControlJobEndpointStatus,
requestLogger{log: log, handler: jsonResponder{func() (interface{}, error) {
return daemon.Status(), nil
}}})
server := http.Server{Handler: mux}
outer:
for {
served := make(chan error)
go func() {
served <- server.Serve(l)
close(served)
}()
select {
case <-ctx.Done():
log.WithError(ctx.Err()).Info("context done")
server.Shutdown(context.Background())
break outer
case err = <-served:
if err != nil {
log.WithError(err).Error("error serving")
break outer
}
}
}
}
type jsonResponder struct {
producer func() (interface{}, error)
}
func (j jsonResponder) ServeHTTP(w http.ResponseWriter, r *http.Request) {
res, err := j.producer()
if err != nil {
w.WriteHeader(http.StatusInternalServerError)
io.WriteString(w, err.Error())
return
}
var buf bytes.Buffer
err = json.NewEncoder(&buf).Encode(res)
if err != nil {
w.WriteHeader(http.StatusInternalServerError)
io.WriteString(w, err.Error())
} else {
io.Copy(w, &buf)
}
}
type requestLogger struct {
log Logger
handler http.Handler
handlerFunc http.HandlerFunc
}
func (l requestLogger) ServeHTTP(w http.ResponseWriter, r *http.Request) {
log := l.log.WithField("method", r.Method).WithField("url", r.URL)
log.Info("start")
if l.handlerFunc != nil {
l.handlerFunc(w, r)
} else if l.handler != nil {
l.handler.ServeHTTP(w, r)
} else {
log.Error("no handler or handlerFunc configured")
}
log.Info("finish")
}

View File

@ -1,216 +0,0 @@
package cmd
import (
"time"
"context"
"github.com/mitchellh/mapstructure"
"github.com/pkg/errors"
"github.com/zrepl/zrepl/rpc"
"github.com/zrepl/zrepl/zfs"
"sync"
)
type LocalJob struct {
Name string
Mapping *DatasetMapFilter
SnapshotPrefix string
Interval time.Duration
InitialReplPolicy InitialReplPolicy
PruneLHS PrunePolicy
PruneRHS PrunePolicy
Debug JobDebugSettings
snapperTask *Task
mainTask *Task
handlerTask *Task
pruneRHSTask *Task
pruneLHSTask *Task
}
func parseLocalJob(c JobParsingContext, name string, i map[string]interface{}) (j *LocalJob, err error) {
var asMap struct {
Mapping map[string]string
SnapshotPrefix string `mapstructure:"snapshot_prefix"`
Interval string
InitialReplPolicy string `mapstructure:"initial_repl_policy"`
PruneLHS map[string]interface{} `mapstructure:"prune_lhs"`
PruneRHS map[string]interface{} `mapstructure:"prune_rhs"`
Debug map[string]interface{}
}
if err = mapstructure.Decode(i, &asMap); err != nil {
err = errors.Wrap(err, "mapstructure error")
return nil, err
}
j = &LocalJob{Name: name}
if j.Mapping, err = parseDatasetMapFilter(asMap.Mapping, false); err != nil {
return
}
if j.SnapshotPrefix, err = parseSnapshotPrefix(asMap.SnapshotPrefix); err != nil {
return
}
if j.Interval, err = parsePostitiveDuration(asMap.Interval); err != nil {
err = errors.Wrap(err, "cannot parse interval")
return
}
if j.InitialReplPolicy, err = parseInitialReplPolicy(asMap.InitialReplPolicy, DEFAULT_INITIAL_REPL_POLICY); err != nil {
return
}
if j.PruneLHS, err = parsePrunePolicy(asMap.PruneLHS, true); err != nil {
err = errors.Wrap(err, "cannot parse 'prune_lhs'")
return
}
if j.PruneRHS, err = parsePrunePolicy(asMap.PruneRHS, false); err != nil {
err = errors.Wrap(err, "cannot parse 'prune_rhs'")
return
}
if err = mapstructure.Decode(asMap.Debug, &j.Debug); err != nil {
err = errors.Wrap(err, "cannot parse 'debug'")
return
}
return
}
func (j *LocalJob) JobName() string {
return j.Name
}
func (j *LocalJob) JobType() JobType { return JobTypeLocal }
func (j *LocalJob) JobStart(ctx context.Context) {
rootLog := ctx.Value(contextKeyLog).(Logger)
j.snapperTask = NewTask("snapshot", j, rootLog)
j.mainTask = NewTask("main", j, rootLog)
j.handlerTask = NewTask("handler", j, rootLog)
j.pruneRHSTask = NewTask("prune_rhs", j, rootLog)
j.pruneLHSTask = NewTask("prune_lhs", j, rootLog)
local := rpc.NewLocalRPC()
// Allow access to any dataset since we control what mapping
// is passed to the pull routine.
// All local datasets will be passed to its Map() function,
// but only those for which a mapping exists will actually be pulled.
// We can pay this small performance penalty for now.
handler := NewHandler(j.handlerTask.Log(), localPullACL{}, NewPrefixFilter(j.SnapshotPrefix))
registerEndpoints(local, handler)
snapper := IntervalAutosnap{
task: j.snapperTask,
DatasetFilter: j.Mapping.AsFilter(),
Prefix: j.SnapshotPrefix,
SnapshotInterval: j.Interval,
}
plhs, err := j.Pruner(j.pruneLHSTask, PrunePolicySideLeft, false)
if err != nil {
rootLog.WithError(err).Error("error creating lhs pruner")
return
}
prhs, err := j.Pruner(j.pruneRHSTask, PrunePolicySideRight, false)
if err != nil {
rootLog.WithError(err).Error("error creating rhs pruner")
return
}
didSnaps := make(chan struct{})
go snapper.Run(ctx, didSnaps)
outer:
for {
select {
case <-ctx.Done():
j.mainTask.Log().WithError(ctx.Err()).Info("context")
break outer
case <-didSnaps:
j.mainTask.Log().Debug("finished taking snapshots")
j.mainTask.Log().Info("starting replication procedure")
}
j.mainTask.Log().Debug("replicating from lhs to rhs")
j.mainTask.Enter("replicate")
puller := Puller{j.mainTask, local, j.Mapping, j.InitialReplPolicy}
puller.Pull()
j.mainTask.Finish()
// use a ctx as soon as Pull gains ctx support
select {
case <-ctx.Done():
break outer
default:
}
var wg sync.WaitGroup
j.mainTask.Log().Info("pruning lhs")
wg.Add(1)
go func() {
plhs.Run(ctx)
wg.Done()
}()
j.mainTask.Log().Info("pruning rhs")
wg.Add(1)
go func() {
prhs.Run(ctx)
wg.Done()
}()
wg.Wait()
}
}
func (j *LocalJob) JobStatus(ctxt context.Context) (*JobStatus, error) {
return &JobStatus{Tasks: []*TaskStatus{
j.snapperTask.Status(),
j.pruneLHSTask.Status(),
j.pruneRHSTask.Status(),
j.mainTask.Status(),
}}, nil
}
func (j *LocalJob) Pruner(task *Task, side PrunePolicySide, dryRun bool) (p Pruner, err error) {
var dsfilter zfs.DatasetFilter
var pp PrunePolicy
switch side {
case PrunePolicySideLeft:
pp = j.PruneLHS
dsfilter = j.Mapping.AsFilter()
case PrunePolicySideRight:
pp = j.PruneRHS
dsfilter, err = j.Mapping.InvertedFilter()
if err != nil {
err = errors.Wrap(err, "cannot invert mapping for prune_rhs")
return
}
default:
err = errors.Errorf("must be either left or right side")
return
}
p = Pruner{
task,
time.Now(),
dryRun,
dsfilter,
j.SnapshotPrefix,
pp,
}
return
}

View File

@ -1,99 +0,0 @@
package cmd
import (
"context"
"github.com/mitchellh/mapstructure"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/zrepl/zrepl/zfs"
"net"
"net/http"
)
type PrometheusJob struct {
Name string
Listen string
}
var prom struct {
taskLastActiveStart *prometheus.GaugeVec
taskLastActiveDuration *prometheus.GaugeVec
taskLogEntries *prometheus.CounterVec
}
func init() {
prom.taskLastActiveStart = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: "zrepl",
Subsystem: "daemon",
Name: "task_last_active_start",
Help: "point in time at which the job task last left idle state",
}, []string{"zrepl_job", "job_type", "task"})
prom.taskLastActiveDuration = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: "zrepl",
Subsystem: "daemon",
Name: "task_last_active_duration",
Help: "seconds that the last run ob a job task spent between leaving and re-entering idle state",
}, []string{"zrepl_job", "job_type", "task"})
prom.taskLogEntries = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: "zrepl",
Subsystem: "daemon",
Name: "task_log_entries",
Help: "number of log entries per job task and level",
}, []string{"zrepl_job", "job_type", "task", "level"})
prometheus.MustRegister(prom.taskLastActiveStart)
prometheus.MustRegister(prom.taskLastActiveDuration)
prometheus.MustRegister(prom.taskLogEntries)
}
func parsePrometheusJob(c JobParsingContext, name string, i map[string]interface{}) (j *PrometheusJob, err error) {
var s struct {
Listen string
}
if err := mapstructure.Decode(i, &s); err != nil {
return nil, errors.Wrap(err, "mapstructure error")
}
if s.Listen == "" {
return nil, errors.New("must specify 'listen' attribute")
}
return &PrometheusJob{name, s.Listen}, nil
}
func (j *PrometheusJob) JobName() string { return j.Name }
func (j *PrometheusJob) JobType() JobType { return JobTypePrometheus }
func (j *PrometheusJob) JobStart(ctx context.Context) {
if err := zfs.PrometheusRegister(prometheus.DefaultRegisterer); err != nil {
panic(err)
}
log := ctx.Value(contextKeyLog).(Logger)
task := NewTask("main", j, log)
log = task.Log()
l, err := net.Listen("tcp", j.Listen)
if err != nil {
log.WithError(err).Error("cannot listen")
}
go func() {
select {
case <-ctx.Done():
l.Close()
}
}()
mux := http.NewServeMux()
mux.Handle("/metrics", promhttp.Handler())
err = http.Serve(l, mux)
if err != nil {
log.WithError(err).Error("error while serving")
}
}
func (*PrometheusJob) JobStatus(ctxt context.Context) (*JobStatus, error) {
return &JobStatus{}, nil
}

View File

@ -1,197 +0,0 @@
package cmd
import (
"time"
"context"
"fmt"
"github.com/mitchellh/mapstructure"
"github.com/pkg/errors"
"github.com/zrepl/zrepl/rpc"
"github.com/zrepl/zrepl/util"
)
type PullJob struct {
Name string
Connect RWCConnecter
Interval time.Duration
Mapping *DatasetMapFilter
// constructed from mapping during parsing
pruneFilter *DatasetMapFilter
SnapshotPrefix string
InitialReplPolicy InitialReplPolicy
Prune PrunePolicy
Debug JobDebugSettings
task *Task
}
func parsePullJob(c JobParsingContext, name string, i map[string]interface{}) (j *PullJob, err error) {
var asMap struct {
Connect map[string]interface{}
Interval string
Mapping map[string]string
InitialReplPolicy string `mapstructure:"initial_repl_policy"`
Prune map[string]interface{}
SnapshotPrefix string `mapstructure:"snapshot_prefix"`
Debug map[string]interface{}
}
if err = mapstructure.Decode(i, &asMap); err != nil {
err = errors.Wrap(err, "mapstructure error")
return nil, err
}
j = &PullJob{Name: name}
j.Connect, err = parseSSHStdinserverConnecter(asMap.Connect)
if err != nil {
err = errors.Wrap(err, "cannot parse 'connect'")
return nil, err
}
if j.Interval, err = parsePostitiveDuration(asMap.Interval); err != nil {
err = errors.Wrap(err, "cannot parse 'interval'")
return nil, err
}
j.Mapping, err = parseDatasetMapFilter(asMap.Mapping, false)
if err != nil {
err = errors.Wrap(err, "cannot parse 'mapping'")
return nil, err
}
if j.pruneFilter, err = j.Mapping.InvertedFilter(); err != nil {
err = errors.Wrap(err, "cannot automatically invert 'mapping' for prune job")
return nil, err
}
j.InitialReplPolicy, err = parseInitialReplPolicy(asMap.InitialReplPolicy, DEFAULT_INITIAL_REPL_POLICY)
if err != nil {
err = errors.Wrap(err, "cannot parse 'initial_repl_policy'")
return
}
if j.SnapshotPrefix, err = parseSnapshotPrefix(asMap.SnapshotPrefix); err != nil {
return
}
if j.Prune, err = parsePrunePolicy(asMap.Prune, false); err != nil {
err = errors.Wrap(err, "cannot parse prune policy")
return
}
if err = mapstructure.Decode(asMap.Debug, &j.Debug); err != nil {
err = errors.Wrap(err, "cannot parse 'debug'")
return
}
return
}
func (j *PullJob) JobName() string {
return j.Name
}
func (j *PullJob) JobType() JobType { return JobTypePull }
func (j *PullJob) JobStart(ctx context.Context) {
log := ctx.Value(contextKeyLog).(Logger)
defer log.Info("exiting")
j.task = NewTask("main", j, log)
// j.task is idle here idle here
ticker := time.NewTicker(j.Interval)
for {
j.doRun(ctx)
select {
case <-ctx.Done():
j.task.Log().WithError(ctx.Err()).Info("context")
return
case <-ticker.C:
}
}
}
func (j *PullJob) doRun(ctx context.Context) {
j.task.Enter("run")
defer j.task.Finish()
j.task.Log().Info("connecting")
rwc, err := j.Connect.Connect()
if err != nil {
j.task.Log().WithError(err).Error("error connecting")
return
}
rwc, err = util.NewReadWriteCloserLogger(rwc, j.Debug.Conn.ReadDump, j.Debug.Conn.WriteDump)
if err != nil {
return
}
client := rpc.NewClient(rwc)
if j.Debug.RPC.Log {
client.SetLogger(j.task.Log(), true)
}
j.task.Enter("pull")
puller := Puller{j.task, client, j.Mapping, j.InitialReplPolicy}
puller.Pull()
closeRPCWithTimeout(j.task, client, time.Second*1, "")
rwc.Close()
j.task.Finish()
j.task.Enter("prune")
pruner, err := j.Pruner(j.task, PrunePolicySideDefault, false)
if err != nil {
j.task.Log().WithError(err).Error("error creating pruner")
} else {
pruner.Run(ctx)
}
j.task.Finish()
}
func (j *PullJob) JobStatus(ctxt context.Context) (*JobStatus, error) {
return &JobStatus{Tasks: []*TaskStatus{j.task.Status()}}, nil
}
func (j *PullJob) Pruner(task *Task, side PrunePolicySide, dryRun bool) (p Pruner, err error) {
p = Pruner{
task,
time.Now(),
dryRun,
j.pruneFilter,
j.SnapshotPrefix,
j.Prune,
}
return
}
func closeRPCWithTimeout(task *Task, remote rpc.RPCClient, timeout time.Duration, goodbye string) {
task.Log().Info("closing rpc connection")
ch := make(chan error)
go func() {
ch <- remote.Close()
close(ch)
}()
var err error
select {
case <-time.After(timeout):
err = fmt.Errorf("timeout exceeded (%s)", timeout)
case closeRequestErr := <-ch:
err = closeRequestErr
}
if err != nil {
task.Log().WithError(err).Error("error closing connection")
}
return
}

View File

@ -1,252 +0,0 @@
package cmd
import (
"context"
"io"
"time"
mapstructure "github.com/mitchellh/mapstructure"
"github.com/pkg/errors"
"github.com/zrepl/zrepl/rpc"
"github.com/zrepl/zrepl/util"
)
type SourceJob struct {
Name string
Serve AuthenticatedChannelListenerFactory
Filesystems *DatasetMapFilter
SnapshotPrefix string
Interval time.Duration
Prune PrunePolicy
Debug JobDebugSettings
serveTask *Task
autosnapTask *Task
pruneTask *Task
}
func parseSourceJob(c JobParsingContext, name string, i map[string]interface{}) (j *SourceJob, err error) {
var asMap struct {
Serve map[string]interface{}
Filesystems map[string]string
SnapshotPrefix string `mapstructure:"snapshot_prefix"`
Interval string
Prune map[string]interface{}
Debug map[string]interface{}
}
if err = mapstructure.Decode(i, &asMap); err != nil {
err = errors.Wrap(err, "mapstructure error")
return nil, err
}
j = &SourceJob{Name: name}
if j.Serve, err = parseAuthenticatedChannelListenerFactory(c, asMap.Serve); err != nil {
return
}
if j.Filesystems, err = parseDatasetMapFilter(asMap.Filesystems, true); err != nil {
return
}
if j.SnapshotPrefix, err = parseSnapshotPrefix(asMap.SnapshotPrefix); err != nil {
return
}
if j.Interval, err = parsePostitiveDuration(asMap.Interval); err != nil {
err = errors.Wrap(err, "cannot parse 'interval'")
return
}
if j.Prune, err = parsePrunePolicy(asMap.Prune, true); err != nil {
err = errors.Wrap(err, "cannot parse 'prune'")
return
}
if err = mapstructure.Decode(asMap.Debug, &j.Debug); err != nil {
err = errors.Wrap(err, "cannot parse 'debug'")
return
}
return
}
func (j *SourceJob) JobName() string {
return j.Name
}
func (j *SourceJob) JobType() JobType { return JobTypeSource }
func (j *SourceJob) JobStart(ctx context.Context) {
log := ctx.Value(contextKeyLog).(Logger)
defer log.Info("exiting")
j.autosnapTask = NewTask("autosnap", j, log)
j.pruneTask = NewTask("prune", j, log)
j.serveTask = NewTask("serve", j, log)
a := IntervalAutosnap{j.autosnapTask, j.Filesystems, j.SnapshotPrefix, j.Interval}
p, err := j.Pruner(j.pruneTask, PrunePolicySideDefault, false)
if err != nil {
log.WithError(err).Error("error creating pruner")
return
}
didSnaps := make(chan struct{})
go j.serve(ctx, j.serveTask)
go a.Run(ctx, didSnaps)
outer:
for {
select {
case <-ctx.Done():
break outer
case <-didSnaps:
log.Info("starting pruner")
p.Run(ctx)
log.Info("pruner done")
}
}
log.WithError(ctx.Err()).Info("context")
}
func (j *SourceJob) JobStatus(ctxt context.Context) (*JobStatus, error) {
return &JobStatus{
Tasks: []*TaskStatus{
j.autosnapTask.Status(),
j.pruneTask.Status(),
j.serveTask.Status(),
}}, nil
}
func (j *SourceJob) Pruner(task *Task, side PrunePolicySide, dryRun bool) (p Pruner, err error) {
p = Pruner{
task,
time.Now(),
dryRun,
j.Filesystems,
j.SnapshotPrefix,
j.Prune,
}
return
}
func (j *SourceJob) serve(ctx context.Context, task *Task) {
listener, err := j.Serve.Listen()
if err != nil {
task.Log().WithError(err).Error("error listening")
return
}
type rwcChanMsg struct {
rwc io.ReadWriteCloser
err error
}
rwcChan := make(chan rwcChanMsg)
// Serve connections until interrupted or error
outer:
for {
go func() {
rwc, err := listener.Accept()
if err != nil {
rwcChan <- rwcChanMsg{rwc, err}
close(rwcChan)
return
}
rwcChan <- rwcChanMsg{rwc, err}
}()
select {
case rwcMsg := <-rwcChan:
if rwcMsg.err != nil {
task.Log().WithError(err).Error("error accepting connection")
break outer
}
j.handleConnection(rwcMsg.rwc, task)
case <-ctx.Done():
task.Log().WithError(ctx.Err()).Info("context")
break outer
}
}
task.Enter("close_listener")
defer task.Finish()
err = listener.Close()
if err != nil {
task.Log().WithError(err).Error("error closing listener")
}
return
}
func (j *SourceJob) handleConnection(rwc io.ReadWriteCloser, task *Task) {
task.Enter("handle_connection")
defer task.Finish()
task.Log().Info("handling client connection")
rwc, err := util.NewReadWriteCloserLogger(rwc, j.Debug.Conn.ReadDump, j.Debug.Conn.WriteDump)
if err != nil {
panic(err)
}
// construct connection handler
handler := NewHandler(task.Log(), j.Filesystems, NewPrefixFilter(j.SnapshotPrefix))
// handle connection
rpcServer := rpc.NewServer(rwc)
if j.Debug.RPC.Log {
rpclog := task.Log().WithField("subsystem", "rpc")
rpcServer.SetLogger(rpclog, true)
}
registerEndpoints(rpcServer, handler)
if err = rpcServer.Serve(); err != nil {
task.Log().WithError(err).Error("error serving connection")
}
// wait for client to close connection
// FIXME: we cannot just close it like we would to with a TCP socket because
// FIXME: go-nettsh's Close() may overtake the remaining data in the pipe
const CLIENT_HANGUP_TIMEOUT = 1 * time.Second
task.Log().
WithField("timeout", CLIENT_HANGUP_TIMEOUT).
Debug("waiting for client to hang up")
wchan := make(chan error)
go func() {
var pseudo [1]byte
_, err := io.ReadFull(rwc, pseudo[:])
wchan <- err
}()
var werr error
select {
case werr = <-wchan:
// all right
case <-time.After(CLIENT_HANGUP_TIMEOUT):
werr = errors.New("client did not close connection within timeout")
}
if werr != nil && werr != io.EOF {
task.Log().WithError(werr).
Error("error waiting for client to hang up")
}
task.Log().Info("closing client connection")
if err = rwc.Close(); err != nil {
task.Log().WithError(err).Error("error force-closing connection")
}
}

View File

@ -1,251 +0,0 @@
package cmd
import (
"crypto/tls"
"crypto/x509"
"github.com/mattn/go-isatty"
"github.com/mitchellh/mapstructure"
"github.com/pkg/errors"
"github.com/zrepl/zrepl/logger"
"io/ioutil"
"os"
"time"
)
type LoggingConfig struct {
Outlets *logger.Outlets
}
type MetadataFlags int64
const (
MetadataTime MetadataFlags = 1 << iota
MetadataLevel
MetadataNone MetadataFlags = 0
MetadataAll MetadataFlags = ^0
)
func parseLogging(i interface{}) (c *LoggingConfig, err error) {
c = &LoggingConfig{}
c.Outlets = logger.NewOutlets()
var asList []interface{}
if err = mapstructure.Decode(i, &asList); err != nil {
return nil, errors.Wrap(err, "mapstructure error")
}
if len(asList) == 0 {
// Default config
out := WriterOutlet{&HumanFormatter{}, os.Stdout}
c.Outlets.Add(out, logger.Warn)
return
}
var syslogOutlets, stdoutOutlets int
for lei, le := range asList {
outlet, minLevel, err := parseOutlet(le)
if err != nil {
return nil, errors.Wrapf(err, "cannot parse outlet #%d", lei)
}
var _ logger.Outlet = WriterOutlet{}
var _ logger.Outlet = &SyslogOutlet{}
switch outlet.(type) {
case *SyslogOutlet:
syslogOutlets++
case WriterOutlet:
stdoutOutlets++
}
c.Outlets.Add(outlet, minLevel)
}
if syslogOutlets > 1 {
return nil, errors.Errorf("can only define one 'syslog' outlet")
}
if stdoutOutlets > 1 {
return nil, errors.Errorf("can only define one 'stdout' outlet")
}
return c, nil
}
func parseLogFormat(i interface{}) (f EntryFormatter, err error) {
var is string
switch j := i.(type) {
case string:
is = j
default:
return nil, errors.Errorf("invalid log format: wrong type: %T", i)
}
switch is {
case "human":
return &HumanFormatter{}, nil
case "logfmt":
return &LogfmtFormatter{}, nil
case "json":
return &JSONFormatter{}, nil
default:
return nil, errors.Errorf("invalid log format: '%s'", is)
}
}
func parseOutlet(i interface{}) (o logger.Outlet, level logger.Level, err error) {
var in struct {
Outlet string
Level string
Format string
}
if err = mapstructure.Decode(i, &in); err != nil {
err = errors.Wrap(err, "mapstructure error")
return
}
if in.Outlet == "" || in.Level == "" || in.Format == "" {
err = errors.Errorf("must specify 'outlet', 'level' and 'format' field")
return
}
minLevel, err := logger.ParseLevel(in.Level)
if err != nil {
err = errors.Wrap(err, "cannot parse 'level' field")
return
}
formatter, err := parseLogFormat(in.Format)
if err != nil {
err = errors.Wrap(err, "cannot parse")
return
}
switch in.Outlet {
case "stdout":
o, err = parseStdoutOutlet(i, formatter)
case "tcp":
o, err = parseTCPOutlet(i, formatter)
case "syslog":
o, err = parseSyslogOutlet(i, formatter)
default:
err = errors.Errorf("unknown outlet type '%s'", in.Outlet)
}
return o, minLevel, err
}
func parseStdoutOutlet(i interface{}, formatter EntryFormatter) (WriterOutlet, error) {
var in struct {
Time bool
}
if err := mapstructure.Decode(i, &in); err != nil {
return WriterOutlet{}, errors.Wrap(err, "invalid structure for stdout outlet")
}
flags := MetadataAll
writer := os.Stdout
if !isatty.IsTerminal(writer.Fd()) && !in.Time {
flags &= ^MetadataTime
}
formatter.SetMetadataFlags(flags)
return WriterOutlet{
formatter,
os.Stdout,
}, nil
}
func parseTCPOutlet(i interface{}, formatter EntryFormatter) (out *TCPOutlet, err error) {
var in struct {
Net string
Address string
RetryInterval string `mapstructure:"retry_interval"`
TLS *struct {
CA string
Cert string
Key string
}
}
if err = mapstructure.Decode(i, &in); err != nil {
return nil, errors.Wrap(err, "mapstructure error")
}
retryInterval, err := time.ParseDuration(in.RetryInterval)
if err != nil {
return nil, errors.Wrap(err, "cannot parse 'retry_interval'")
}
if len(in.Net) == 0 {
return nil, errors.New("field 'net' must not be empty")
}
if len(in.Address) == 0 {
return nil, errors.New("field 'address' must not be empty")
}
var tlsConfig *tls.Config
if in.TLS != nil {
cert, err := tls.LoadX509KeyPair(in.TLS.Cert, in.TLS.Key)
if err != nil {
return nil, errors.Wrap(err, "cannot load client cert")
}
var rootCAs *x509.CertPool
if in.TLS.CA == "" {
if rootCAs, err = x509.SystemCertPool(); err != nil {
return nil, errors.Wrap(err, "cannot open system cert pool")
}
} else {
rootCAs = x509.NewCertPool()
rootCAPEM, err := ioutil.ReadFile(in.TLS.CA)
if err != nil {
return nil, errors.Wrap(err, "cannot load CA cert")
}
if !rootCAs.AppendCertsFromPEM(rootCAPEM) {
return nil, errors.New("cannot parse CA cert")
}
}
if err != nil && in.TLS.CA == "" {
return nil, errors.Wrap(err, "cannot load root ca pool")
}
tlsConfig = &tls.Config{
Certificates: []tls.Certificate{cert},
RootCAs: rootCAs,
}
tlsConfig.BuildNameToCertificate()
}
formatter.SetMetadataFlags(MetadataAll)
return NewTCPOutlet(formatter, in.Net, in.Address, tlsConfig, retryInterval), nil
}
func parseSyslogOutlet(i interface{}, formatter EntryFormatter) (out *SyslogOutlet, err error) {
var in struct {
RetryInterval string `mapstructure:"retry_interval"`
}
if err = mapstructure.Decode(i, &in); err != nil {
return nil, errors.Wrap(err, "mapstructure error")
}
out = &SyslogOutlet{}
out.Formatter = formatter
out.Formatter.SetMetadataFlags(MetadataNone)
out.RetryInterval = 0 // default to 0 as we assume local syslog will just work
if in.RetryInterval != "" {
out.RetryInterval, err = time.ParseDuration(in.RetryInterval)
if err != nil {
return nil, errors.Wrap(err, "cannot parse 'retry_interval'")
}
}
return
}

View File

@ -1,322 +0,0 @@
package cmd
import (
"io/ioutil"
"fmt"
yaml "github.com/go-yaml/yaml"
"github.com/mitchellh/mapstructure"
"github.com/pkg/errors"
"os"
"regexp"
"strconv"
"time"
)
var ConfigFileDefaultLocations []string = []string{
"/etc/zrepl/zrepl.yml",
"/usr/local/etc/zrepl/zrepl.yml",
}
const (
JobNameControl string = "control"
)
var ReservedJobNames []string = []string{
JobNameControl,
}
type ConfigParsingContext struct {
Global *Global
}
func ParseConfig(path string) (config *Config, err error) {
if path == "" {
// Try default locations
for _, l := range ConfigFileDefaultLocations {
stat, err := os.Stat(l)
if err != nil {
continue
}
if !stat.Mode().IsRegular() {
err = errors.Errorf("file at default location is not a regular file: %s", l)
continue
}
path = l
break
}
}
var i interface{}
var bytes []byte
if bytes, err = ioutil.ReadFile(path); err != nil {
err = errors.WithStack(err)
return
}
if err = yaml.Unmarshal(bytes, &i); err != nil {
err = errors.WithStack(err)
return
}
return parseConfig(i)
}
func parseConfig(i interface{}) (c *Config, err error) {
var asMap struct {
Global map[string]interface{}
Jobs []map[string]interface{}
}
if err := mapstructure.Decode(i, &asMap); err != nil {
return nil, errors.Wrap(err, "config root must be a dict")
}
c = &Config{}
// Parse global with defaults
c.Global.Serve.Stdinserver.SockDir = "/var/run/zrepl/stdinserver"
c.Global.Control.Sockpath = "/var/run/zrepl/control"
err = mapstructure.Decode(asMap.Global, &c.Global)
if err != nil {
err = errors.Wrap(err, "mapstructure error on 'global' section: %s")
return
}
if c.Global.logging, err = parseLogging(asMap.Global["logging"]); err != nil {
return nil, errors.Wrap(err, "cannot parse logging section")
}
cpc := ConfigParsingContext{&c.Global}
jpc := JobParsingContext{cpc}
c.Jobs = make(map[string]Job, len(asMap.Jobs))
// FIXME internal jobs should not be mixed with user jobs
// Monitoring Jobs
var monJobs []map[string]interface{}
if err := mapstructure.Decode(asMap.Global["monitoring"], &monJobs); err != nil {
return nil, errors.Wrap(err, "cannot parse monitoring section")
}
for i, jc := range monJobs {
if jc["name"] == "" || jc["name"] == nil {
// FIXME internal jobs should not require a name...
jc["name"] = fmt.Sprintf("prometheus-%d", i)
}
job, err := parseJob(jpc, jc)
if err != nil {
return nil, errors.Wrapf(err, "cannot parse monitoring job #%d", i)
}
if job.JobType() != JobTypePrometheus {
return nil, errors.Errorf("monitoring job #%d has invalid job type", i)
}
c.Jobs[job.JobName()] = job
}
// Regular Jobs
for i := range asMap.Jobs {
job, err := parseJob(jpc, asMap.Jobs[i])
if err != nil {
// Try to find its name
namei, ok := asMap.Jobs[i]["name"]
if !ok {
namei = fmt.Sprintf("<no name, entry #%d in list>", i)
}
err = errors.Wrapf(err, "cannot parse job '%v'", namei)
return nil, err
}
jn := job.JobName()
if _, ok := c.Jobs[jn]; ok {
err = errors.Errorf("duplicate or invalid job name: %s", jn)
return nil, err
}
c.Jobs[job.JobName()] = job
}
cj, err := NewControlJob(JobNameControl, jpc.Global.Control.Sockpath)
if err != nil {
err = errors.Wrap(err, "cannot create control job")
return
}
c.Jobs[JobNameControl] = cj
return c, nil
}
func extractStringField(i map[string]interface{}, key string, notempty bool) (field string, err error) {
vi, ok := i[key]
if !ok {
err = errors.Errorf("must have field '%s'", key)
return "", err
}
field, ok = vi.(string)
if !ok {
err = errors.Errorf("'%s' field must have type string", key)
return "", err
}
if notempty && len(field) <= 0 {
err = errors.Errorf("'%s' field must not be empty", key)
return "", err
}
return
}
type JobParsingContext struct {
ConfigParsingContext
}
func parseJob(c JobParsingContext, i map[string]interface{}) (j Job, err error) {
name, err := extractStringField(i, "name", true)
if err != nil {
return nil, err
}
for _, r := range ReservedJobNames {
if name == r {
err = errors.Errorf("job name '%s' is reserved", name)
return nil, err
}
}
jobtypeStr, err := extractStringField(i, "type", true)
if err != nil {
return nil, err
}
jobtype, err := ParseUserJobType(jobtypeStr)
if err != nil {
return nil, err
}
switch jobtype {
case JobTypePull:
return parsePullJob(c, name, i)
case JobTypeSource:
return parseSourceJob(c, name, i)
case JobTypeLocal:
return parseLocalJob(c, name, i)
case JobTypePrometheus:
return parsePrometheusJob(c, name, i)
default:
panic(fmt.Sprintf("implementation error: unknown job type %s", jobtype))
}
}
func parseConnect(i map[string]interface{}) (c RWCConnecter, err error) {
t, err := extractStringField(i, "type", true)
if err != nil {
return nil, err
}
switch t {
case "ssh+stdinserver":
return parseSSHStdinserverConnecter(i)
default:
return nil, errors.Errorf("unknown connection type '%s'", t)
}
}
func parseInitialReplPolicy(v interface{}, defaultPolicy InitialReplPolicy) (p InitialReplPolicy, err error) {
s, ok := v.(string)
if !ok {
goto err
}
switch {
case s == "":
p = defaultPolicy
case s == "most_recent":
p = InitialReplPolicyMostRecent
case s == "all":
p = InitialReplPolicyAll
default:
goto err
}
return
err:
err = errors.New(fmt.Sprintf("expected InitialReplPolicy, got %#v", v))
return
}
func parsePrunePolicy(v map[string]interface{}, willSeeBookmarks bool) (p PrunePolicy, err error) {
policyName, err := extractStringField(v, "policy", true)
if err != nil {
return
}
switch policyName {
case "grid":
return parseGridPrunePolicy(v, willSeeBookmarks)
case "noprune":
return NoPrunePolicy{}, nil
default:
err = errors.Errorf("unknown policy '%s'", policyName)
return
}
}
func parseAuthenticatedChannelListenerFactory(c JobParsingContext, v map[string]interface{}) (p AuthenticatedChannelListenerFactory, err error) {
t, err := extractStringField(v, "type", true)
if err != nil {
return nil, err
}
switch t {
case "stdinserver":
return parseStdinserverListenerFactory(c, v)
default:
err = errors.Errorf("unknown type '%s'", t)
return
}
}
var durationStringRegex *regexp.Regexp = regexp.MustCompile(`^\s*(\d+)\s*(s|m|h|d|w)\s*$`)
func parsePostitiveDuration(e string) (d time.Duration, err error) {
comps := durationStringRegex.FindStringSubmatch(e)
if len(comps) != 3 {
err = fmt.Errorf("does not match regex: %s %#v", e, comps)
return
}
durationFactor, err := strconv.ParseInt(comps[1], 10, 64)
if err != nil {
return 0, err
}
if durationFactor <= 0 {
return 0, errors.New("duration must be positive integer")
}
var durationUnit time.Duration
switch comps[2] {
case "s":
durationUnit = time.Second
case "m":
durationUnit = time.Minute
case "h":
durationUnit = time.Hour
case "d":
durationUnit = 24 * time.Hour
case "w":
durationUnit = 24 * 7 * time.Hour
default:
err = fmt.Errorf("contains unknown time unit '%s'", comps[2])
return
}
d = time.Duration(durationFactor) * durationUnit
return
}

View File

@ -1,245 +0,0 @@
package cmd
import (
"fmt"
"github.com/mitchellh/mapstructure"
"github.com/pkg/errors"
"github.com/zrepl/zrepl/util"
"github.com/zrepl/zrepl/zfs"
"math"
"regexp"
"sort"
"strconv"
"strings"
"time"
)
type GridPrunePolicy struct {
RetentionGrid *util.RetentionGrid
MaxBookmarks int
}
const GridPrunePolicyMaxBookmarksKeepAll = -1
type retentionGridAdaptor struct {
zfs.FilesystemVersion
}
func (a retentionGridAdaptor) Date() time.Time {
return a.Creation
}
func (a retentionGridAdaptor) LessThan(b util.RetentionGridEntry) bool {
return a.CreateTXG < b.(retentionGridAdaptor).CreateTXG
}
// Prune filters snapshots with the retention grid.
// Bookmarks are deleted such that KeepBookmarks are kept in the end.
// The oldest bookmarks are removed first.
func (p *GridPrunePolicy) Prune(_ *zfs.DatasetPath, versions []zfs.FilesystemVersion) (keep, remove []zfs.FilesystemVersion, err error) {
skeep, sremove := p.pruneSnapshots(versions)
keep, remove = p.pruneBookmarks(skeep)
remove = append(remove, sremove...)
return keep, remove, nil
}
func (p *GridPrunePolicy) pruneSnapshots(versions []zfs.FilesystemVersion) (keep, remove []zfs.FilesystemVersion) {
// Build adaptors for retention grid
keep = []zfs.FilesystemVersion{}
adaptors := make([]util.RetentionGridEntry, 0)
for fsv := range versions {
if versions[fsv].Type != zfs.Snapshot {
keep = append(keep, versions[fsv])
continue
}
adaptors = append(adaptors, retentionGridAdaptor{versions[fsv]})
}
sort.SliceStable(adaptors, func(i, j int) bool {
return adaptors[i].LessThan(adaptors[j])
})
now := adaptors[len(adaptors)-1].Date()
// Evaluate retention grid
keepa, removea := p.RetentionGrid.FitEntries(now, adaptors)
// Revert adaptors
for i := range keepa {
keep = append(keep, keepa[i].(retentionGridAdaptor).FilesystemVersion)
}
remove = make([]zfs.FilesystemVersion, len(removea))
for i := range removea {
remove[i] = removea[i].(retentionGridAdaptor).FilesystemVersion
}
return
}
func (p *GridPrunePolicy) pruneBookmarks(versions []zfs.FilesystemVersion) (keep, remove []zfs.FilesystemVersion) {
if p.MaxBookmarks == GridPrunePolicyMaxBookmarksKeepAll {
return versions, []zfs.FilesystemVersion{}
}
keep = []zfs.FilesystemVersion{}
bookmarks := make([]zfs.FilesystemVersion, 0)
for fsv := range versions {
if versions[fsv].Type != zfs.Bookmark {
keep = append(keep, versions[fsv])
continue
}
bookmarks = append(bookmarks, versions[fsv])
}
if len(bookmarks) == 0 {
return keep, []zfs.FilesystemVersion{}
}
if len(bookmarks) < p.MaxBookmarks {
keep = append(keep, bookmarks...)
return keep, []zfs.FilesystemVersion{}
}
// NOTE: sorting descending by descending by createtxg <=> sorting ascending wrt creation time
sort.SliceStable(bookmarks, func(i, j int) bool {
return (bookmarks[i].CreateTXG > bookmarks[j].CreateTXG)
})
keep = append(keep, bookmarks[:p.MaxBookmarks]...)
remove = bookmarks[p.MaxBookmarks:]
return keep, remove
}
func parseGridPrunePolicy(e map[string]interface{}, willSeeBookmarks bool) (p *GridPrunePolicy, err error) {
const KeepBookmarksAllString = "all"
var i struct {
Grid string
KeepBookmarks string `mapstructure:"keep_bookmarks"`
}
dec, err := mapstructure.NewDecoder(&mapstructure.DecoderConfig{Result: &i, WeaklyTypedInput: true})
if err != nil {
err = errors.Wrap(err, "mapstructure error")
return
}
if err = dec.Decode(e); err != nil {
err = errors.Wrapf(err, "mapstructure error")
return
}
// Parse grid
intervals, err := parseRetentionGridIntervalsString(i.Grid)
if err != nil {
err = fmt.Errorf("cannot parse retention grid: %s", err)
return
}
// Assert intervals are of increasing length (not necessarily required, but indicates config mistake)
lastDuration := time.Duration(0)
for i := range intervals {
if intervals[i].Length < lastDuration {
// If all intervals before were keep=all, this is ok
allPrevKeepCountAll := true
for j := i - 1; allPrevKeepCountAll && j >= 0; j-- {
allPrevKeepCountAll = intervals[j].KeepCount == util.RetentionGridKeepCountAll
}
if allPrevKeepCountAll {
goto isMonotonicIncrease
}
err = errors.New("retention grid interval length must be monotonically increasing")
return
}
isMonotonicIncrease:
lastDuration = intervals[i].Length
}
// Parse KeepBookmarks
keepBookmarks := 0
if i.KeepBookmarks == KeepBookmarksAllString || (i.KeepBookmarks == "" && !willSeeBookmarks) {
keepBookmarks = GridPrunePolicyMaxBookmarksKeepAll
} else {
i, err := strconv.ParseInt(i.KeepBookmarks, 10, 32)
if err != nil || i <= 0 || i > math.MaxInt32 {
return nil, errors.Errorf("keep_bookmarks must be positive integer or 'all'")
}
keepBookmarks = int(i)
}
return &GridPrunePolicy{
util.NewRetentionGrid(intervals),
keepBookmarks,
}, nil
}
var retentionStringIntervalRegex *regexp.Regexp = regexp.MustCompile(`^\s*(\d+)\s*x\s*([^\(]+)\s*(\((.*)\))?\s*$`)
func parseRetentionGridIntervalString(e string) (intervals []util.RetentionInterval, err error) {
comps := retentionStringIntervalRegex.FindStringSubmatch(e)
if comps == nil {
err = fmt.Errorf("retention string does not match expected format")
return
}
times, err := strconv.Atoi(comps[1])
if err != nil {
return nil, err
} else if times <= 0 {
return nil, fmt.Errorf("contains factor <= 0")
}
duration, err := parsePostitiveDuration(comps[2])
if err != nil {
return nil, err
}
keepCount := 1
if comps[3] != "" {
// Decompose key=value, comma separated
// For now, only keep_count is supported
re := regexp.MustCompile(`^\s*keep=(.+)\s*$`)
res := re.FindStringSubmatch(comps[4])
if res == nil || len(res) != 2 {
err = fmt.Errorf("interval parameter contains unknown parameters")
return
}
if res[1] == "all" {
keepCount = util.RetentionGridKeepCountAll
} else {
keepCount, err = strconv.Atoi(res[1])
if err != nil {
err = fmt.Errorf("cannot parse keep_count value")
return
}
}
}
intervals = make([]util.RetentionInterval, times)
for i := range intervals {
intervals[i] = util.RetentionInterval{
Length: duration,
KeepCount: keepCount,
}
}
return
}
func parseRetentionGridIntervalsString(s string) (intervals []util.RetentionInterval, err error) {
ges := strings.Split(s, "|")
intervals = make([]util.RetentionInterval, 0, 7*len(ges))
for intervalIdx, e := range ges {
parsed, err := parseRetentionGridIntervalString(e)
if err != nil {
return nil, fmt.Errorf("cannot parse interval %d of %d: %s: %s", intervalIdx+1, len(ges), err, strings.TrimSpace(e))
}
intervals = append(intervals, parsed...)
}
return
}

View File

@ -1,11 +0,0 @@
package cmd
import "github.com/zrepl/zrepl/zfs"
type NoPrunePolicy struct{}
func (p NoPrunePolicy) Prune(fs *zfs.DatasetPath, versions []zfs.FilesystemVersion) (keep, remove []zfs.FilesystemVersion, err error) {
keep = versions
remove = []zfs.FilesystemVersion{}
return
}

View File

@ -1,56 +0,0 @@
package cmd
import (
"github.com/mitchellh/mapstructure"
"github.com/pkg/errors"
"github.com/problame/go-netssh"
"io"
"path"
)
type StdinserverListenerFactory struct {
ClientIdentity string `mapstructure:"client_identity"`
sockpath string
}
func parseStdinserverListenerFactory(c JobParsingContext, i map[string]interface{}) (f *StdinserverListenerFactory, err error) {
f = &StdinserverListenerFactory{}
if err = mapstructure.Decode(i, f); err != nil {
return nil, errors.Wrap(err, "mapstructure error")
}
if !(len(f.ClientIdentity) > 0) {
err = errors.Errorf("must specify 'client_identity'")
return
}
f.sockpath = path.Join(c.Global.Serve.Stdinserver.SockDir, f.ClientIdentity)
return
}
func (f *StdinserverListenerFactory) Listen() (al AuthenticatedChannelListener, err error) {
if err = PreparePrivateSockpath(f.sockpath); err != nil {
return nil, err
}
l, err := netssh.Listen(f.sockpath)
if err != nil {
return nil, err
}
return StdinserverListener{l}, nil
}
type StdinserverListener struct {
l *netssh.Listener
}
func (l StdinserverListener) Accept() (ch io.ReadWriteCloser, err error) {
return l.l.Accept()
}
func (l StdinserverListener) Close() (err error) {
return l.l.Close()
}

View File

@ -1,222 +0,0 @@
package cmd
import (
"testing"
"time"
"github.com/kr/pretty"
"github.com/stretchr/testify/assert"
"github.com/zrepl/zrepl/util"
"github.com/zrepl/zrepl/zfs"
)
func TestSampleConfigsAreParsedWithoutErrors(t *testing.T) {
paths := []string{
"./sampleconf/localbackup/host1.yml",
"./sampleconf/pullbackup/backuphost.yml",
"./sampleconf/pullbackup/productionhost.yml",
"./sampleconf/random/debugging.yml",
"./sampleconf/random/logging_and_monitoring.yml",
}
for _, p := range paths {
c, err := ParseConfig(p)
if err != nil {
t.Errorf("error parsing %s:\n%+v", p, err)
}
t.Logf("file: %s", p)
t.Log(pretty.Sprint(c))
}
}
func TestParseRetentionGridStringParsing(t *testing.T) {
intervals, err := parseRetentionGridIntervalsString("2x10m(keep=2) | 1x1h | 3x1w")
assert.Nil(t, err)
assert.Len(t, intervals, 6)
proto := util.RetentionInterval{
KeepCount: 2,
Length: 10 * time.Minute,
}
assert.EqualValues(t, proto, intervals[0])
assert.EqualValues(t, proto, intervals[1])
proto.KeepCount = 1
proto.Length = 1 * time.Hour
assert.EqualValues(t, proto, intervals[2])
proto.Length = 7 * 24 * time.Hour
assert.EqualValues(t, proto, intervals[3])
assert.EqualValues(t, proto, intervals[4])
assert.EqualValues(t, proto, intervals[5])
intervals, err = parseRetentionGridIntervalsString("|")
assert.Error(t, err)
intervals, err = parseRetentionGridIntervalsString("2x10m")
assert.NoError(t, err)
intervals, err = parseRetentionGridIntervalsString("1x10m(keep=all)")
assert.NoError(t, err)
assert.Len(t, intervals, 1)
assert.EqualValues(t, util.RetentionGridKeepCountAll, intervals[0].KeepCount)
}
func TestDatasetMapFilter(t *testing.T) {
expectMapping := func(m map[string]string, from, to string) {
dmf, err := parseDatasetMapFilter(m, false)
if err != nil {
t.Logf("expect test map to be valid: %s", err)
t.FailNow()
}
fromPath, err := zfs.NewDatasetPath(from)
if err != nil {
t.Logf("expect test from path to be valid: %s", err)
t.FailNow()
}
res, err := dmf.Map(fromPath)
if to == "" {
assert.Nil(t, res)
assert.Nil(t, err)
t.Logf("%s => NOT MAPPED", fromPath.ToString())
return
}
assert.Nil(t, err)
toPath, err := zfs.NewDatasetPath(to)
if err != nil {
t.Logf("expect test to path to be valid: %s", err)
t.FailNow()
}
assert.True(t, res.Equal(toPath))
}
expectFilter := func(m map[string]string, path string, pass bool) {
dmf, err := parseDatasetMapFilter(m, true)
if err != nil {
t.Logf("expect test filter to be valid: %s", err)
t.FailNow()
}
p, err := zfs.NewDatasetPath(path)
if err != nil {
t.Logf("expect test path to be valid: %s", err)
t.FailNow()
}
res, err := dmf.Filter(p)
assert.Nil(t, err)
assert.Equal(t, pass, res)
}
map1 := map[string]string{
"a/b/c<": "root1",
"a/b<": "root2",
"<": "root3/b/c",
"b": "!",
"a/b/c/d/e<": "!",
"q<": "root4/1/2",
}
expectMapping(map1, "a/b/c", "root1")
expectMapping(map1, "a/b/c/d", "root1/d")
expectMapping(map1, "a/b/c/d/e", "")
expectMapping(map1, "a/b/e", "root2/e")
expectMapping(map1, "a/b", "root2")
expectMapping(map1, "x", "root3/b/c/x")
expectMapping(map1, "x/y", "root3/b/c/x/y")
expectMapping(map1, "q", "root4/1/2")
expectMapping(map1, "b", "")
expectMapping(map1, "q/r", "root4/1/2/r")
filter1 := map[string]string{
"<": "!",
"a<": "ok",
"a/b<": "!",
}
expectFilter(filter1, "b", false)
expectFilter(filter1, "a", true)
expectFilter(filter1, "a/d", true)
expectFilter(filter1, "a/b", false)
expectFilter(filter1, "a/b/c", false)
filter2 := map[string]string{}
expectFilter(filter2, "foo", false) // default to omit
}
func TestDatasetMapFilter_AsFilter(t *testing.T) {
mapspec := map[string]string{
"a/b/c<": "root1",
"a/b<": "root2",
"<": "root3/b/c",
"b": "!",
"a/b/c/d/e<": "!",
"q<": "root4/1/2",
}
m, err := parseDatasetMapFilter(mapspec, false)
assert.Nil(t, err)
f := m.AsFilter()
t.Logf("Mapping:\n%s\nFilter:\n%s", pretty.Sprint(m), pretty.Sprint(f))
tf := func(f zfs.DatasetFilter, path string, pass bool) {
p, err := zfs.NewDatasetPath(path)
assert.Nil(t, err)
r, err := f.Filter(p)
assert.Nil(t, err)
assert.Equal(t, pass, r)
}
tf(f, "a/b/c", true)
tf(f, "a/b", true)
tf(f, "b", false)
tf(f, "a/b/c/d/e", false)
tf(f, "a/b/c/d/e/f", false)
tf(f, "a", true)
}
func TestDatasetMapFilter_InvertedFilter(t *testing.T) {
mapspec := map[string]string{
"a/b": "1/2",
"a/b/c<": "3",
"a/b/c/d<": "1/2/a",
"a/b/d": "!",
}
m, err := parseDatasetMapFilter(mapspec, false)
assert.Nil(t, err)
inv, err := m.InvertedFilter()
assert.Nil(t, err)
t.Log(pretty.Sprint(inv))
expectMapping := func(m *DatasetMapFilter, ps string, expRes bool) {
p, err := zfs.NewDatasetPath(ps)
assert.Nil(t, err)
r, err := m.Filter(p)
assert.Nil(t, err)
assert.Equal(t, expRes, r)
}
expectMapping(inv, "4", false)
expectMapping(inv, "3", true)
expectMapping(inv, "3/x", true)
expectMapping(inv, "1", false)
expectMapping(inv, "1/2", true)
expectMapping(inv, "1/2/3", false)
expectMapping(inv, "1/2/a/b", true)
}

View File

@ -1,305 +0,0 @@
package cmd
import (
"bytes"
"context"
"encoding/json"
"fmt"
"github.com/dustin/go-humanize"
"github.com/pkg/errors"
"github.com/spf13/cobra"
"github.com/zrepl/zrepl/logger"
"io"
golog "log"
"net"
"net/http"
"os"
"sort"
"strings"
"time"
)
var controlCmd = &cobra.Command{
Use: "control",
Short: "control zrepl daemon",
}
var pprofCmd = &cobra.Command{
Use: "pprof off | [on TCP_LISTEN_ADDRESS]",
Short: "start a http server exposing go-tool-compatible profiling endpoints at TCP_LISTEN_ADDRESS",
Run: doControlPProf,
PreRunE: func(cmd *cobra.Command, args []string) error {
if cmd.Flags().NArg() < 1 {
goto enargs
}
switch cmd.Flags().Arg(0) {
case "on":
pprofCmdArgs.msg.Run = true
if cmd.Flags().NArg() != 2 {
return errors.New("must specify TCP_LISTEN_ADDRESS as second positional argument")
}
pprofCmdArgs.msg.HttpListenAddress = cmd.Flags().Arg(1)
case "off":
if cmd.Flags().NArg() != 1 {
goto enargs
}
pprofCmdArgs.msg.Run = false
}
return nil
enargs:
return errors.New("invalid number of positional arguments")
},
}
var pprofCmdArgs struct {
msg PprofServerControlMsg
}
var controlVersionCmd = &cobra.Command{
Use: "version",
Short: "print version of running zrepl daemon",
Run: doControLVersionCmd,
}
var controlStatusCmdArgs struct {
format string
level logger.Level
onlyShowJob string
}
var controlStatusCmd = &cobra.Command{
Use: "status [JOB_NAME]",
Short: "get current status",
Run: doControlStatusCmd,
}
func init() {
RootCmd.AddCommand(controlCmd)
controlCmd.AddCommand(pprofCmd)
controlCmd.AddCommand(controlVersionCmd)
controlCmd.AddCommand(controlStatusCmd)
controlStatusCmd.Flags().StringVar(&controlStatusCmdArgs.format, "format", "human", "output format (human|raw)")
controlStatusCmdArgs.level = logger.Warn
controlStatusCmd.Flags().Var(&controlStatusCmdArgs.level, "level", "minimum log level to show")
}
func controlHttpClient() (client http.Client, err error) {
conf, err := ParseConfig(rootArgs.configFile)
if err != nil {
return http.Client{}, err
}
return http.Client{
Transport: &http.Transport{
DialContext: func(_ context.Context, _, _ string) (net.Conn, error) {
return net.Dial("unix", conf.Global.Control.Sockpath)
},
},
}, nil
}
func doControlPProf(cmd *cobra.Command, args []string) {
log := golog.New(os.Stderr, "", 0)
die := func() {
log.Printf("exiting after error")
os.Exit(1)
}
log.Printf("connecting to zrepl daemon")
httpc, err := controlHttpClient()
if err != nil {
log.Printf("error parsing config: %s", err)
die()
}
var buf bytes.Buffer
if err := json.NewEncoder(&buf).Encode(&pprofCmdArgs.msg); err != nil {
log.Printf("error marshaling request: %s", err)
die()
}
_, err = httpc.Post("http://unix"+ControlJobEndpointPProf, "application/json", &buf)
if err != nil {
log.Printf("error: %s", err)
die()
}
log.Printf("finished")
}
func doControLVersionCmd(cmd *cobra.Command, args []string) {
log := golog.New(os.Stderr, "", 0)
die := func() {
log.Printf("exiting after error")
os.Exit(1)
}
httpc, err := controlHttpClient()
if err != nil {
log.Printf("could not connect to daemon: %s", err)
die()
}
resp, err := httpc.Get("http://unix" + ControlJobEndpointVersion)
if err != nil {
log.Printf("error: %s", err)
die()
} else if resp.StatusCode != http.StatusOK {
var msg bytes.Buffer
io.CopyN(&msg, resp.Body, 4096)
log.Printf("error: %s", msg.String())
die()
}
var info ZreplVersionInformation
err = json.NewDecoder(resp.Body).Decode(&info)
if err != nil {
log.Printf("error unmarshaling response: %s", err)
die()
}
fmt.Println(info.String())
}
func doControlStatusCmd(cmd *cobra.Command, args []string) {
log := golog.New(os.Stderr, "", 0)
die := func() {
log.Print("exiting after error")
os.Exit(1)
}
if len(args) == 1 {
controlStatusCmdArgs.onlyShowJob = args[0]
} else if len(args) > 1 {
log.Print("can only specify one job as positional argument")
cmd.Usage()
die()
}
httpc, err := controlHttpClient()
if err != nil {
log.Printf("could not connect to daemon: %s", err)
die()
}
resp, err := httpc.Get("http://unix" + ControlJobEndpointStatus)
if err != nil {
log.Printf("error: %s", err)
die()
} else if resp.StatusCode != http.StatusOK {
var msg bytes.Buffer
io.CopyN(&msg, resp.Body, 4096)
log.Printf("error: %s", msg.String())
die()
}
var status DaemonStatus
err = json.NewDecoder(resp.Body).Decode(&status)
if err != nil {
log.Printf("error unmarshaling response: %s", err)
die()
}
switch controlStatusCmdArgs.format {
case "raw":
enc := json.NewEncoder(os.Stdout)
enc.SetIndent("", " ")
if err := enc.Encode(status); err != nil {
log.Panic(err)
}
case "human":
formatter := HumanFormatter{}
formatter.SetMetadataFlags(MetadataAll)
formatter.SetIgnoreFields([]string{
logJobField,
})
jobNames := make([]string, 0, len(status.Jobs))
for name, _ := range status.Jobs {
jobNames = append(jobNames, name)
}
sort.Slice(jobNames, func(i, j int) bool {
return strings.Compare(jobNames[i], jobNames[j]) == -1
})
now := time.Now()
for _, name := range jobNames {
if controlStatusCmdArgs.onlyShowJob != "" && name != controlStatusCmdArgs.onlyShowJob {
continue
}
job := status.Jobs[name]
jobLogEntries := make([]logger.Entry, 0)
informAboutError := false
fmt.Printf("Job '%s':\n", name)
for _, task := range job.Tasks {
var header bytes.Buffer
fmt.Fprintf(&header, " Task '%s': ", task.Name)
if !task.Idle {
fmt.Fprint(&header, strings.Join(task.ActivityStack, "."))
} else {
fmt.Fprint(&header, "<idle>")
}
fmt.Fprint(&header, " ")
const TASK_STALLED_HOLDOFF_DURATION = 10 * time.Second
sinceLastUpdate := now.Sub(task.LastUpdate)
if !task.Idle || task.ProgressRx != 0 || task.ProgressTx != 0 {
fmt.Fprintf(&header, "(%s / %s , Rx/Tx",
humanize.Bytes(uint64(task.ProgressRx)),
humanize.Bytes(uint64(task.ProgressTx)))
if task.Idle {
fmt.Fprint(&header, ", values from last run")
}
fmt.Fprint(&header, ")")
}
fmt.Fprint(&header, "\n")
if !task.Idle && !task.LastUpdate.IsZero() && sinceLastUpdate >= TASK_STALLED_HOLDOFF_DURATION {
informAboutError = true
fmt.Fprintf(&header, " WARNING: last update %s ago at %s)",
sinceLastUpdate.String(),
task.LastUpdate.Format(HumanFormatterDateFormat))
fmt.Fprint(&header, "\n")
}
io.Copy(os.Stdout, &header)
jobLogEntries = append(jobLogEntries, task.LogEntries...)
informAboutError = informAboutError || task.MaxLogLevel >= logger.Warn
}
sort.Slice(jobLogEntries, func(i, j int) bool {
return jobLogEntries[i].Time.Before(jobLogEntries[j].Time)
})
if informAboutError {
fmt.Println(" WARNING: Some tasks encountered problems since the last time they left idle state:")
fmt.Println(" check the logs below or your log file for more information.")
fmt.Println(" Use the --level flag if you need debug information.")
fmt.Println()
}
for _, e := range jobLogEntries {
if e.Level < controlStatusCmdArgs.level {
continue
}
formatted, err := formatter.Format(&e)
if err != nil {
panic(err)
}
fmt.Printf(" %s\n", string(formatted))
}
fmt.Println()
}
default:
log.Printf("invalid output format '%s'", controlStatusCmdArgs.format)
die()
}
}

View File

@ -1,461 +0,0 @@
package cmd
import (
"container/list"
"context"
"fmt"
"github.com/spf13/cobra"
"github.com/zrepl/zrepl/logger"
"io"
"os"
"os/signal"
"strings"
"sync"
"syscall"
"time"
)
// daemonCmd represents the daemon command
var daemonCmd = &cobra.Command{
Use: "daemon",
Short: "start daemon",
Run: doDaemon,
}
func init() {
RootCmd.AddCommand(daemonCmd)
}
type Job interface {
JobName() string
JobType() JobType
JobStart(ctxt context.Context)
JobStatus(ctxt context.Context) (*JobStatus, error)
}
type JobType string
const (
JobTypePull JobType = "pull"
JobTypeSource JobType = "source"
JobTypeLocal JobType = "local"
JobTypePrometheus JobType = "prometheus"
JobTypeControl JobType = "control"
)
func ParseUserJobType(s string) (JobType, error) {
switch s {
case "pull":
return JobTypePull, nil
case "source":
return JobTypeSource, nil
case "local":
return JobTypeLocal, nil
case "prometheus":
return JobTypePrometheus, nil
}
return "", fmt.Errorf("unknown job type '%s'", s)
}
func (j JobType) String() string {
return string(j)
}
func doDaemon(cmd *cobra.Command, args []string) {
conf, err := ParseConfig(rootArgs.configFile)
if err != nil {
fmt.Fprintf(os.Stderr, "error parsing config: %s\n", err)
os.Exit(1)
}
log := logger.NewLogger(conf.Global.logging.Outlets, 1*time.Second)
log.Info(NewZreplVersionInformation().String())
log.Debug("starting daemon")
ctx := context.WithValue(context.Background(), contextKeyLog, log)
ctx = context.WithValue(ctx, contextKeyLog, log)
d := NewDaemon(conf)
d.Loop(ctx)
}
type contextKey string
const (
contextKeyLog contextKey = contextKey("log")
contextKeyDaemon contextKey = contextKey("daemon")
)
type Daemon struct {
conf *Config
startedAt time.Time
}
func NewDaemon(initialConf *Config) *Daemon {
return &Daemon{conf: initialConf}
}
func (d *Daemon) Loop(ctx context.Context) {
d.startedAt = time.Now()
log := ctx.Value(contextKeyLog).(Logger)
ctx, cancel := context.WithCancel(ctx)
ctx = context.WithValue(ctx, contextKeyDaemon, d)
sigChan := make(chan os.Signal, 1)
finishs := make(chan Job)
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
log.Info("starting jobs from config")
i := 0
for _, job := range d.conf.Jobs {
logger := log.WithField(logJobField, job.JobName())
logger.Info("starting")
i++
jobCtx := context.WithValue(ctx, contextKeyLog, logger)
go func(j Job) {
j.JobStart(jobCtx)
finishs <- j
}(job)
}
finishCount := 0
outer:
for {
select {
case <-finishs:
finishCount++
if finishCount == len(d.conf.Jobs) {
log.Info("all jobs finished")
break outer
}
case sig := <-sigChan:
log.WithField("signal", sig).Info("received signal")
log.Info("cancelling all jobs")
cancel()
}
}
signal.Stop(sigChan)
cancel() // make go vet happy
log.Info("exiting")
}
// Representation of a Job's status that is composed of Tasks
type JobStatus struct {
// Statuses of all tasks of this job
Tasks []*TaskStatus
// Error != "" if JobStatus() returned an error
JobStatusError string
}
// Representation of a Daemon's status that is composed of Jobs
type DaemonStatus struct {
StartedAt time.Time
Jobs map[string]*JobStatus
}
func (d *Daemon) Status() (s *DaemonStatus) {
s = &DaemonStatus{}
s.StartedAt = d.startedAt
s.Jobs = make(map[string]*JobStatus, len(d.conf.Jobs))
for name, j := range d.conf.Jobs {
status, err := j.JobStatus(context.TODO())
if err != nil {
s.Jobs[name] = &JobStatus{nil, err.Error()}
continue
}
s.Jobs[name] = status
}
return
}
// Representation of a Task's status
type TaskStatus struct {
Name string
// Whether the task is idle.
Idle bool
// The stack of activities the task is currently executing.
// The first element is the root activity and equal to Name.
ActivityStack []string
// Number of bytes received by the task since it last left idle state.
ProgressRx int64
// Number of bytes sent by the task since it last left idle state.
ProgressTx int64
// Log entries emitted by the task since it last left idle state.
// Only contains the log entries emitted through the task's logger
// (provided by Task.Log()).
LogEntries []logger.Entry
// The maximum log level of LogEntries.
// Only valid if len(LogEntries) > 0.
MaxLogLevel logger.Level
// Last time something about the Task changed
LastUpdate time.Time
}
// An instance of Task tracks a single thread of activity that is part of a Job.
type Task struct {
name string // immutable
parent Job // immutable
// Stack of activities the task is currently in
// Members are instances of taskActivity
activities *list.List
// Last time activities was changed (not the activities inside, the list)
activitiesLastUpdate time.Time
// Protects Task members from modification
rwl sync.RWMutex
}
// Structure that describes the progress a Task has made
type taskProgress struct {
rx int64
tx int64
creation time.Time
lastUpdate time.Time
logEntries []logger.Entry
mtx sync.RWMutex
}
func newTaskProgress() (p *taskProgress) {
return &taskProgress{
creation: time.Now(),
logEntries: make([]logger.Entry, 0),
}
}
func (p *taskProgress) UpdateIO(drx, dtx int64) {
p.mtx.Lock()
defer p.mtx.Unlock()
p.rx += drx
p.tx += dtx
p.lastUpdate = time.Now()
}
func (p *taskProgress) UpdateLogEntry(entry logger.Entry) {
p.mtx.Lock()
defer p.mtx.Unlock()
// FIXME: ensure maximum size (issue #48)
p.logEntries = append(p.logEntries, entry)
p.lastUpdate = time.Now()
}
func (p *taskProgress) DeepCopy() (out taskProgress) {
p.mtx.RLock()
defer p.mtx.RUnlock()
out.rx, out.tx = p.rx, p.tx
out.creation = p.creation
out.lastUpdate = p.lastUpdate
out.logEntries = make([]logger.Entry, len(p.logEntries))
for i := range p.logEntries {
out.logEntries[i] = p.logEntries[i]
}
return
}
// returns a copy of this taskProgress, the mutex carries no semantic value
func (p *taskProgress) Read() (out taskProgress) {
p.mtx.RLock()
defer p.mtx.RUnlock()
return p.DeepCopy()
}
// Element of a Task's activity stack
type taskActivity struct {
name string
idle bool
logger *logger.Logger
// The progress of the task that is updated by UpdateIO() and UpdateLogEntry()
//
// Progress happens on a task-level and is thus global to the task.
// That's why progress is just a pointer to the current taskProgress:
// we reset progress when leaving the idle root activity
progress *taskProgress
}
func NewTask(name string, parent Job, lg *logger.Logger) *Task {
t := &Task{
name: name,
parent: parent,
activities: list.New(),
}
rootLogger := lg.ReplaceField(logTaskField, name).
WithOutlet(t, logger.Debug)
rootAct := &taskActivity{name, true, rootLogger, newTaskProgress()}
t.activities.PushFront(rootAct)
return t
}
// callers must hold t.rwl
func (t *Task) cur() *taskActivity {
return t.activities.Front().Value.(*taskActivity)
}
// buildActivityStack returns the stack of activity names
// t.rwl must be held, but the slice can be returned since strings are immutable
func (t *Task) buildActivityStack() []string {
comps := make([]string, 0, t.activities.Len())
for e := t.activities.Back(); e != nil; e = e.Prev() {
act := e.Value.(*taskActivity)
comps = append(comps, act.name)
}
return comps
}
// Start a sub-activity.
// Must always be matched with a call to t.Finish()
// --- consider using defer for this purpose.
func (t *Task) Enter(activity string) {
t.rwl.Lock()
defer t.rwl.Unlock()
prev := t.cur()
if prev.idle {
// reset progress when leaving idle task
// we leave the old progress dangling to have the user not worry about
prev.progress = newTaskProgress()
prom.taskLastActiveStart.WithLabelValues(
t.parent.JobName(),
t.parent.JobType().String(),
t.name).
Set(float64(prev.progress.creation.UnixNano()) / 1e9)
}
act := &taskActivity{activity, false, nil, prev.progress}
t.activities.PushFront(act)
stack := t.buildActivityStack()
activityField := strings.Join(stack, ".")
act.logger = prev.logger.ReplaceField(logTaskField, activityField)
t.activitiesLastUpdate = time.Now()
}
func (t *Task) UpdateProgress(dtx, drx int64) {
t.rwl.RLock()
p := t.cur().progress // protected by own rwlock
t.rwl.RUnlock()
p.UpdateIO(dtx, drx)
}
// Returns a wrapper io.Reader that updates this task's _current_ progress value.
// Progress updates after this task resets its progress value are discarded.
func (t *Task) ProgressUpdater(r io.Reader) *IOProgressUpdater {
t.rwl.RLock()
defer t.rwl.RUnlock()
return &IOProgressUpdater{r, t.cur().progress}
}
func (t *Task) Status() *TaskStatus {
t.rwl.RLock()
defer t.rwl.RUnlock()
// NOTE
// do not return any state in TaskStatus that is protected by t.rwl
cur := t.cur()
stack := t.buildActivityStack()
prog := cur.progress.Read()
var maxLevel logger.Level
for _, entry := range prog.logEntries {
if maxLevel < entry.Level {
maxLevel = entry.Level
}
}
lastUpdate := prog.lastUpdate
if lastUpdate.Before(t.activitiesLastUpdate) {
lastUpdate = t.activitiesLastUpdate
}
s := &TaskStatus{
Name: stack[0],
ActivityStack: stack,
Idle: cur.idle,
ProgressRx: prog.rx,
ProgressTx: prog.tx,
LogEntries: prog.logEntries,
MaxLogLevel: maxLevel,
LastUpdate: lastUpdate,
}
return s
}
// Finish a sub-activity.
// Corresponds to a preceding call to t.Enter()
func (t *Task) Finish() {
t.rwl.Lock()
defer t.rwl.Unlock()
top := t.activities.Front()
if top.Next() == nil {
return // cannot remove root activity
}
t.activities.Remove(top)
t.activitiesLastUpdate = time.Now()
// prometheus
front := t.activities.Front()
if front != nil && front == t.activities.Back() {
idleAct := front.Value.(*taskActivity)
if !idleAct.idle {
panic("inconsistent implementation")
}
progress := idleAct.progress.Read()
non_idle_time := t.activitiesLastUpdate.Sub(progress.creation) // use same time
prom.taskLastActiveDuration.WithLabelValues(
t.parent.JobName(),
t.parent.JobType().String(),
t.name).Set(non_idle_time.Seconds())
}
}
// Returns a logger derived from the logger passed to the constructor function.
// The logger's task field contains the current activity stack joined by '.'.
func (t *Task) Log() *logger.Logger {
t.rwl.RLock()
defer t.rwl.RUnlock()
// FIXME should influence TaskStatus's LastUpdate field
return t.cur().logger
}
// implement logger.Outlet interface
func (t *Task) WriteEntry(entry logger.Entry) error {
t.rwl.RLock()
defer t.rwl.RUnlock()
t.cur().progress.UpdateLogEntry(entry)
prom.taskLogEntries.WithLabelValues(
t.parent.JobName(),
t.parent.JobType().String(),
t.name,
entry.Level.String()).
Inc()
return nil
}
type IOProgressUpdater struct {
r io.Reader
p *taskProgress
}
func (u *IOProgressUpdater) Read(p []byte) (n int, err error) {
n, err = u.r.Read(p)
u.p.UpdateIO(int64(n), 0)
return
}

View File

@ -1,181 +0,0 @@
package cmd
import (
"fmt"
"io"
"github.com/pkg/errors"
"github.com/zrepl/zrepl/rpc"
"github.com/zrepl/zrepl/zfs"
)
type DatasetMapping interface {
Map(source *zfs.DatasetPath) (target *zfs.DatasetPath, err error)
}
type FilesystemRequest struct {
Roots []string // may be nil, indicating interest in all filesystems
}
type FilesystemVersionsRequest struct {
Filesystem *zfs.DatasetPath
}
type InitialTransferRequest struct {
Filesystem *zfs.DatasetPath
FilesystemVersion zfs.FilesystemVersion
}
type IncrementalTransferRequest struct {
Filesystem *zfs.DatasetPath
From zfs.FilesystemVersion
To zfs.FilesystemVersion
}
type Handler struct {
logger Logger
dsf zfs.DatasetFilter
fsvf zfs.FilesystemVersionFilter
}
func NewHandler(logger Logger, dsfilter zfs.DatasetFilter, snapfilter zfs.FilesystemVersionFilter) (h Handler) {
return Handler{logger, dsfilter, snapfilter}
}
func registerEndpoints(server rpc.RPCServer, handler Handler) (err error) {
err = server.RegisterEndpoint("FilesystemRequest", handler.HandleFilesystemRequest)
if err != nil {
panic(err)
}
err = server.RegisterEndpoint("FilesystemVersionsRequest", handler.HandleFilesystemVersionsRequest)
if err != nil {
panic(err)
}
err = server.RegisterEndpoint("InitialTransferRequest", handler.HandleInitialTransferRequest)
if err != nil {
panic(err)
}
err = server.RegisterEndpoint("IncrementalTransferRequest", handler.HandleIncrementalTransferRequest)
if err != nil {
panic(err)
}
return nil
}
func (h Handler) HandleFilesystemRequest(r *FilesystemRequest, roots *[]*zfs.DatasetPath) (err error) {
log := h.logger.WithField("endpoint", "FilesystemRequest")
log.WithField("request", r).Debug("request")
log.WithField("dataset_filter", h.dsf).Debug("dsf")
allowed, err := zfs.ZFSListMapping(h.dsf)
if err != nil {
log.WithError(err).Error("error listing filesystems")
return
}
log.WithField("response", allowed).Debug("response")
*roots = allowed
return
}
func (h Handler) HandleFilesystemVersionsRequest(r *FilesystemVersionsRequest, versions *[]zfs.FilesystemVersion) (err error) {
log := h.logger.WithField("endpoint", "FilesystemVersionsRequest")
log.WithField("request", r).Debug("request")
// allowed to request that?
if h.pullACLCheck(r.Filesystem, nil); err != nil {
log.WithError(err).Warn("pull ACL check failed")
return
}
// find our versions
vs, err := zfs.ZFSListFilesystemVersions(r.Filesystem, h.fsvf)
if err != nil {
log.WithError(err).Error("cannot list filesystem versions")
return
}
log.WithField("response", vs).Debug("response")
*versions = vs
return
}
func (h Handler) HandleInitialTransferRequest(r *InitialTransferRequest, stream *io.Reader) (err error) {
log := h.logger.WithField("endpoint", "InitialTransferRequest")
log.WithField("request", r).Debug("request")
if err = h.pullACLCheck(r.Filesystem, &r.FilesystemVersion); err != nil {
log.WithError(err).Warn("pull ACL check failed")
return
}
log.Debug("invoking zfs send")
s, err := zfs.ZFSSend(r.Filesystem, &r.FilesystemVersion, nil)
if err != nil {
log.WithError(err).Error("cannot send filesystem")
}
*stream = s
return
}
func (h Handler) HandleIncrementalTransferRequest(r *IncrementalTransferRequest, stream *io.Reader) (err error) {
log := h.logger.WithField("endpoint", "IncrementalTransferRequest")
log.WithField("request", r).Debug("request")
if err = h.pullACLCheck(r.Filesystem, &r.From); err != nil {
log.WithError(err).Warn("pull ACL check failed")
return
}
if err = h.pullACLCheck(r.Filesystem, &r.To); err != nil {
log.WithError(err).Warn("pull ACL check failed")
return
}
log.Debug("invoking zfs send")
s, err := zfs.ZFSSend(r.Filesystem, &r.From, &r.To)
if err != nil {
log.WithError(err).Error("cannot send filesystem")
}
*stream = s
return
}
func (h Handler) pullACLCheck(p *zfs.DatasetPath, v *zfs.FilesystemVersion) (err error) {
var fsAllowed, vAllowed bool
fsAllowed, err = h.dsf.Filter(p)
if err != nil {
err = fmt.Errorf("error evaluating ACL: %s", err)
return
}
if !fsAllowed {
err = fmt.Errorf("ACL prohibits access to %s", p.ToString())
return
}
if v == nil {
return
}
vAllowed, err = h.fsvf.Filter(*v)
if err != nil {
err = errors.Wrap(err, "error evaluating version filter")
return
}
if !vAllowed {
err = fmt.Errorf("ACL prohibits access to %s", v.ToAbsPath(p))
return
}
return
}

View File

@ -1,47 +0,0 @@
// zrepl replicates ZFS filesystems & volumes between pools
//
// Code Organization
//
// The cmd package uses github.com/spf13/cobra for its CLI.
//
// It combines the other packages in the zrepl project to implement zrepl functionality.
//
// Each subcommand's code is in the corresponding *.go file.
// All other *.go files contain code shared by the subcommands.
package cmd
import (
"github.com/spf13/cobra"
"github.com/zrepl/zrepl/logger"
)
//
//type Logger interface {
// Printf(format string, v ...interface{})
//}
var (
zreplVersion string // set by build infrastructure
)
type Logger = *logger.Logger
var RootCmd = &cobra.Command{
Use: "zrepl",
Short: "ZFS dataset replication",
Long: `Replicate ZFS filesystems & volumes between pools:
- push & pull mode
- automatic snapshot creation & pruning
- local / over the network
- ACLs instead of blank SSH access`,
}
var rootArgs struct {
configFile string
}
func init() {
//cobra.OnInitialize(initConfig)
RootCmd.PersistentFlags().StringVar(&rootArgs.configFile, "config", "", "config file path")
}

View File

@ -1,138 +0,0 @@
package cmd
import (
"context"
"fmt"
"github.com/zrepl/zrepl/zfs"
"time"
)
type Pruner struct {
task *Task
Now time.Time
DryRun bool
DatasetFilter zfs.DatasetFilter
SnapshotPrefix string
PrunePolicy PrunePolicy
}
type PruneResult struct {
Filesystem *zfs.DatasetPath
All []zfs.FilesystemVersion
Keep []zfs.FilesystemVersion
Remove []zfs.FilesystemVersion
}
func (p *Pruner) filterFilesystems() (filesystems []*zfs.DatasetPath, stop bool) {
p.task.Enter("filter_fs")
defer p.task.Finish()
filesystems, err := zfs.ZFSListMapping(p.DatasetFilter)
if err != nil {
p.task.Log().WithError(err).Error("error applying filesystem filter")
return nil, true
}
if len(filesystems) <= 0 {
p.task.Log().Info("no filesystems matching filter")
return nil, true
}
return filesystems, false
}
func (p *Pruner) filterVersions(fs *zfs.DatasetPath) (fsversions []zfs.FilesystemVersion, stop bool) {
p.task.Enter("filter_versions")
defer p.task.Finish()
log := p.task.Log().WithField(logFSField, fs.ToString())
filter := NewPrefixFilter(p.SnapshotPrefix)
fsversions, err := zfs.ZFSListFilesystemVersions(fs, filter)
if err != nil {
log.WithError(err).Error("error listing filesytem versions")
return nil, true
}
if len(fsversions) == 0 {
log.WithField("prefix", p.SnapshotPrefix).Info("no filesystem versions matching prefix")
return nil, true
}
return fsversions, false
}
func (p *Pruner) pruneFilesystem(fs *zfs.DatasetPath) (r PruneResult, valid bool) {
p.task.Enter("prune_fs")
defer p.task.Finish()
log := p.task.Log().WithField(logFSField, fs.ToString())
fsversions, stop := p.filterVersions(fs)
if stop {
return
}
p.task.Enter("prune_policy")
keep, remove, err := p.PrunePolicy.Prune(fs, fsversions)
p.task.Finish()
if err != nil {
log.WithError(err).Error("error evaluating prune policy")
return
}
log.WithField("fsversions", fsversions).
WithField("keep", keep).
WithField("remove", remove).
Debug("prune policy debug dump")
r = PruneResult{fs, fsversions, keep, remove}
makeFields := func(v zfs.FilesystemVersion) (fields map[string]interface{}) {
fields = make(map[string]interface{})
fields["version"] = v.ToAbsPath(fs)
timeSince := v.Creation.Sub(p.Now)
fields["age_ns"] = timeSince
const day time.Duration = 24 * time.Hour
days := timeSince / day
remainder := timeSince % day
fields["age_str"] = fmt.Sprintf("%dd%s", days, remainder)
return
}
for _, v := range remove {
fields := makeFields(v)
log.WithFields(fields).Info("destroying version")
// echo what we'll do and exec zfs destroy if not dry run
// TODO special handling for EBUSY (zfs hold)
// TODO error handling for clones? just echo to cli, skip over, and exit with non-zero status code (we're idempotent)
if !p.DryRun {
p.task.Enter("destroy")
err := zfs.ZFSDestroyFilesystemVersion(fs, v)
p.task.Finish()
if err != nil {
log.WithFields(fields).WithError(err).Error("error destroying version")
}
}
}
return r, true
}
func (p *Pruner) Run(ctx context.Context) (r []PruneResult, err error) {
p.task.Enter("run")
defer p.task.Finish()
if p.DryRun {
p.task.Log().Info("doing dry run")
}
filesystems, stop := p.filterFilesystems()
if stop {
return
}
r = make([]PruneResult, 0, len(filesystems))
for _, fs := range filesystems {
res, ok := p.pruneFilesystem(fs)
if ok {
r = append(r, res)
}
}
return
}

View File

@ -1,323 +0,0 @@
package cmd
import (
"fmt"
"io"
"bytes"
"encoding/json"
"github.com/zrepl/zrepl/rpc"
"github.com/zrepl/zrepl/zfs"
)
type localPullACL struct{}
func (a localPullACL) Filter(p *zfs.DatasetPath) (pass bool, err error) {
return true, nil
}
const DEFAULT_INITIAL_REPL_POLICY = InitialReplPolicyMostRecent
type InitialReplPolicy string
const (
InitialReplPolicyMostRecent InitialReplPolicy = "most_recent"
InitialReplPolicyAll InitialReplPolicy = "all"
)
type Puller struct {
task *Task
Remote rpc.RPCClient
Mapping DatasetMapping
InitialReplPolicy InitialReplPolicy
}
type remoteLocalMapping struct {
Remote *zfs.DatasetPath
Local *zfs.DatasetPath
}
func (p *Puller) getRemoteFilesystems() (rfs []*zfs.DatasetPath, ok bool) {
p.task.Enter("fetch_remote_fs_list")
defer p.task.Finish()
fsr := FilesystemRequest{}
if err := p.Remote.Call("FilesystemRequest", &fsr, &rfs); err != nil {
p.task.Log().WithError(err).Error("cannot fetch remote filesystem list")
return nil, false
}
return rfs, true
}
func (p *Puller) buildReplMapping(remoteFilesystems []*zfs.DatasetPath) (replMapping map[string]remoteLocalMapping, ok bool) {
p.task.Enter("build_repl_mapping")
defer p.task.Finish()
replMapping = make(map[string]remoteLocalMapping, len(remoteFilesystems))
for fs := range remoteFilesystems {
var err error
var localFs *zfs.DatasetPath
localFs, err = p.Mapping.Map(remoteFilesystems[fs])
if err != nil {
err := fmt.Errorf("error mapping %s: %s", remoteFilesystems[fs], err)
p.task.Log().WithError(err).WithField(logMapFromField, remoteFilesystems[fs]).Error("cannot map")
return nil, false
}
if localFs == nil {
continue
}
p.task.Log().WithField(logMapFromField, remoteFilesystems[fs].ToString()).
WithField(logMapToField, localFs.ToString()).Debug("mapping")
m := remoteLocalMapping{remoteFilesystems[fs], localFs}
replMapping[m.Local.ToString()] = m
}
return replMapping, true
}
// returns true if the receiving filesystem (local side) exists and can have child filesystems
func (p *Puller) replFilesystem(m remoteLocalMapping, localFilesystemState map[string]zfs.FilesystemState) (localExists bool) {
p.task.Enter("repl_fs")
defer p.task.Finish()
var err error
remote := p.Remote
log := p.task.Log().
WithField(logMapFromField, m.Remote.ToString()).
WithField(logMapToField, m.Local.ToString())
log.Debug("examining local filesystem state")
localState, localExists := localFilesystemState[m.Local.ToString()]
var versions []zfs.FilesystemVersion
switch {
case !localExists:
log.Info("local filesystem does not exist")
case localState.Placeholder:
log.Info("local filesystem is marked as placeholder")
default:
log.Debug("local filesystem exists")
log.Debug("requesting local filesystem versions")
if versions, err = zfs.ZFSListFilesystemVersions(m.Local, nil); err != nil {
log.WithError(err).Error("cannot get local filesystem versions")
return false
}
}
log.Info("requesting remote filesystem versions")
r := FilesystemVersionsRequest{
Filesystem: m.Remote,
}
var theirVersions []zfs.FilesystemVersion
if err = remote.Call("FilesystemVersionsRequest", &r, &theirVersions); err != nil {
log.WithError(err).Error("cannot get remote filesystem versions")
log.Warn("stopping replication for all filesystems mapped as children of receiving filesystem")
return false
}
log.Debug("computing diff between remote and local filesystem versions")
diff := zfs.MakeFilesystemDiff(versions, theirVersions)
log.WithField("diff", diff).Debug("diff between local and remote filesystem")
if localState.Placeholder && diff.Conflict != zfs.ConflictAllRight {
panic("internal inconsistency: local placeholder implies ConflictAllRight")
}
switch diff.Conflict {
case zfs.ConflictAllRight:
log.WithField("replication_policy", p.InitialReplPolicy).Info("performing initial sync, following policy")
if p.InitialReplPolicy != InitialReplPolicyMostRecent {
panic(fmt.Sprintf("policy '%s' not implemented", p.InitialReplPolicy))
}
snapsOnly := make([]zfs.FilesystemVersion, 0, len(diff.MRCAPathRight))
for s := range diff.MRCAPathRight {
if diff.MRCAPathRight[s].Type == zfs.Snapshot {
snapsOnly = append(snapsOnly, diff.MRCAPathRight[s])
}
}
if len(snapsOnly) < 1 {
log.Warn("cannot perform initial sync: no remote snapshots")
return false
}
r := InitialTransferRequest{
Filesystem: m.Remote,
FilesystemVersion: snapsOnly[len(snapsOnly)-1],
}
log.WithField("version", r.FilesystemVersion).Debug("requesting snapshot stream")
var stream io.Reader
if err = remote.Call("InitialTransferRequest", &r, &stream); err != nil {
log.WithError(err).Error("cannot request initial transfer")
return false
}
log.Debug("received initial transfer request response")
log.Debug("invoke zfs receive")
recvArgs := []string{"-u"}
if localState.Placeholder {
log.Info("receive with forced rollback to replace placeholder filesystem")
recvArgs = append(recvArgs, "-F")
}
progressStream := p.task.ProgressUpdater(stream)
if err = zfs.ZFSRecv(m.Local, progressStream, recvArgs...); err != nil {
log.WithError(err).Error("cannot receive stream")
return false
}
log.Info("finished receiving stream") // TODO rx delta
// TODO unify with recv path of ConflictIncremental
log.Debug("configuring properties of received filesystem")
props := zfs.NewZFSProperties()
props.Set("readonly", "on")
if err = zfs.ZFSSet(m.Local, props); err != nil {
log.WithError(err).Error("cannot set readonly property")
}
log.Info("finished initial transfer")
return true
case zfs.ConflictIncremental:
if len(diff.IncrementalPath) < 2 {
log.Info("remote and local are in sync")
return true
}
log.Info("following incremental path from diff")
for i := 0; i < len(diff.IncrementalPath)-1; i++ {
from, to := diff.IncrementalPath[i], diff.IncrementalPath[i+1]
log, _ := log.WithField(logIncFromField, from.Name).WithField(logIncToField, to.Name), 0
log.Debug("requesting incremental snapshot stream")
r := IncrementalTransferRequest{
Filesystem: m.Remote,
From: from,
To: to,
}
var stream io.Reader
if err = remote.Call("IncrementalTransferRequest", &r, &stream); err != nil {
log.WithError(err).Error("cannot request incremental snapshot stream")
return false
}
log.Debug("invoking zfs receive")
progressStream := p.task.ProgressUpdater(stream)
// TODO protect against malicious incremental stream
if err = zfs.ZFSRecv(m.Local, progressStream); err != nil {
log.WithError(err).Error("cannot receive stream")
return false
}
log.Info("finished incremental transfer") // TODO increment rx
}
log.Info("finished following incremental path") // TODO path rx
return true
case zfs.ConflictNoCommonAncestor:
fallthrough
case zfs.ConflictDiverged:
var jsonDiff bytes.Buffer
if err := json.NewEncoder(&jsonDiff).Encode(diff); err != nil {
log.WithError(err).Error("cannot JSON-encode diff")
return false
}
var problem, resolution string
switch diff.Conflict {
case zfs.ConflictNoCommonAncestor:
problem = "remote and local filesystem have snapshots, but no common one"
resolution = "perform manual establish a common snapshot history"
case zfs.ConflictDiverged:
problem = "remote and local filesystem share a history but have diverged"
resolution = "perform manual replication or delete snapshots on the receiving" +
"side to establish an incremental replication parse"
}
log.WithField("diff", jsonDiff.String()).
WithField("problem", problem).
WithField("resolution", resolution).
Error("manual conflict resolution required")
return false
}
panic("should not be reached")
}
func (p *Puller) Pull() {
p.task.Enter("run")
defer p.task.Finish()
p.task.Log().Info("request remote filesystem list")
remoteFilesystems, ok := p.getRemoteFilesystems()
if !ok {
return
}
p.task.Log().Debug("map remote filesystems to local paths and determine order for per-filesystem sync")
replMapping, ok := p.buildReplMapping(remoteFilesystems)
if !ok {
}
p.task.Log().Debug("build cache for already present local filesystem state")
p.task.Enter("cache_local_fs_state")
localFilesystemState, err := zfs.ZFSListFilesystemState()
p.task.Finish()
if err != nil {
p.task.Log().WithError(err).Error("cannot request local filesystem state")
return
}
localTraversal := zfs.NewDatasetPathForest()
for _, m := range replMapping {
localTraversal.Add(m.Local)
}
p.task.Log().Info("start per-filesystem sync")
localTraversal.WalkTopDown(func(v zfs.DatasetPathVisit) bool {
p.task.Enter("tree_walk")
defer p.task.Finish()
log := p.task.Log().WithField(logFSField, v.Path.ToString())
if v.FilledIn {
if _, exists := localFilesystemState[v.Path.ToString()]; exists {
// No need to verify if this is a placeholder or not. It is sufficient
// to know we can add child filesystems to it
return true
}
log.Debug("create placeholder filesystem")
p.task.Enter("create_placeholder")
err = zfs.ZFSCreatePlaceholderFilesystem(v.Path)
p.task.Finish()
if err != nil {
log.Error("cannot create placeholder filesystem")
return false
}
return true
}
m, ok := replMapping[v.Path.ToString()]
if !ok {
panic("internal inconsistency: replMapping should contain mapping for any path that was not filled in by WalkTopDown()")
}
return p.replFilesystem(m, localFilesystemState)
})
return
}

View File

@ -1,29 +0,0 @@
jobs:
- name: mirror_local
type: local
# snapshot the filesystems matched by the left-hand-side of the mapping
# every 10m with zrepl_ as prefix
mapping: {
"zroot/var/db<": "storage/backups/local/zroot/var/db",
"zroot/usr/home<": "storage/backups/local/zroot/usr/home",
"zroot/usr/home/paranoid": "!", #don't backup paranoid user
"zroot/poudriere/ports<": "!", #don't backup the ports trees
}
snapshot_prefix: zrepl_
interval: 10m
initial_repl_policy: most_recent
# keep one hour of 10m interval snapshots of filesystems matched by
# the left-hand-side of the mapping
prune_lhs:
policy: grid
grid: 1x1h(keep=all)
keep_bookmarks: all
# follow a grandfathering scheme for filesystems on the right-hand-side of the mapping
prune_rhs:
policy: grid
grid: 1x1h(keep=all) | 24x1h | 35x1d | 6x30d

View File

@ -1,27 +0,0 @@
jobs:
- name: fullbackup_prod1
type: pull
# connect to remote using ssh / stdinserver command
connect:
type: ssh+stdinserver
host: prod1.example.com
user: root
port: 22
identity_file: /root/.ssh/id_ed25519
# pull (=ask for new snapshots) every 10m, prune afterwards
# this will leave us at most 10m behind production
interval: 10m
# pull all offered filesystems to storage/backups/zrepl/pull/prod1.example.com
mapping: {
"<":"storage/backups/zrepl/pull/prod1.example.com"
}
initial_repl_policy: most_recent
# follow a grandfathering scheme for filesystems on the right-hand-side of the mapping
snapshot_prefix: zrepl_
prune:
policy: grid
grid: 1x1h(keep=all) | 24x1h | 35x1d | 6x30d

View File

@ -1,47 +0,0 @@
global:
serve:
stdinserver:
# Directory where AF_UNIX sockets for stdinserver command are placed.
#
# `zrepl stdinserver CLIENT_IDENTITY`
# * connects to the socket in $sockdir/CLIENT_IDENTITY
# * sends its stdin / stdout file descriptors to the `zrepl daemon` process (see cmsg(3))
# * does nothing more
#
# This enables a setup where `zrepl daemon` is not directly exposed to the internet
# but instead all traffic is tunnelled through SSH.
# The server with the source job has an authorized_keys file entry for the public key
# used by the corresponding pull job
#
# command="/mnt/zrepl stdinserver CLIENT_IDENTITY" ssh-ed25519 AAAAC3NzaC1E... zrepl@pullingserver
#
# Below is the default value.
sockdir: /var/run/zrepl/stdinserver
jobs:
- name: fullbackup_prod1
# expect remote to connect via ssh+stdinserver with fullbackup_prod1 as client_identity
type: source
serve:
type: stdinserver # see global.serve.stdinserver for explanation
client_identity: fullbackup_prod1
# snapshot these filesystems every 10m with zrepl_ as prefix
filesystems: {
"zroot/var/db<": "ok",
"zroot/usr/home<": "ok",
"zroot/var/tmp": "!", #don't backup /tmp
}
snapshot_prefix: zrepl_
interval: 10m
# keep 1 hour of snapshots (6 at 10m interval)
# and one day of bookmarks in case pull doesn't work (link down, etc)
# => keep_bookmarks = 24h / interval = 24h / 10m = 144
prune:
policy: grid
grid: 1x1h(keep=all)
keep_bookmarks: 144

View File

@ -1,20 +0,0 @@
jobs:
- name: fullbackup_prod1
# expect remote to connect via ssh+stdinserver with fullbackup_prod1 as client_identity
type: push-sink
serve:
type: stdinserver
client_identity: fullbackup_prod1
# map all pushed datasets to storage/backups/zrepl/sink/prod1.example.com
mapping: {
"<":"storage/backups/zrepl/sink/prod1.example.com"
}
# follow a grandfathering scheme for filesystems on the right-hand-side of the mapping
prune:
policy: grid
grid: 1x1h(keep=all) | 24x1h | 35x1d | 6x30d

View File

@ -1,26 +0,0 @@
jobs:
- name: fullbackup_prod1
# connect to remote using ssh / stdinserver command
type: push
connect:
type: ssh+stdinserver
host: prod1.example.com
user: root
port: 22
identity_file: /root/.ssh/id_ed25519
# snapshot these datsets every 10m with zrepl_ as prefix
filesystems: {
"zroot/var/db<": "ok",
"zroot/usr/home<": "!",
}
snapshot_prefix: zrepl_
interval: 10m
# keep a one day window 10m interval snapshots in case push doesn't work (link down, etc)
# (we cannot keep more than one day because this host will run out of disk space)
prune:
policy: grid
grid: 1x1d(keep=all)

View File

@ -1,33 +0,0 @@
global:
serve:
stdinserver:
sockdir: /var/run/zrepl/stdinserver
jobs:
- name: debian2_pull
# JOB DEBUGGING OPTIONS
# should be equal for all job types, but each job implements the debugging itself
# => consult job documentation for supported options
debug:
conn: # debug the io.ReadWriteCloser connection
read_dump: /tmp/connlog_read # dump results of Read() invocations to this file
write_dump: /tmp/connlog_write # dump results of Write() invocations to this file
rpc: # debug the RPC protocol implementation
log: true # log output from rpc layer to the job log
# ... just to make the unit tests pass.
# check other examples, e.g. localbackup or pullbackup for what the sutff below means
type: source
serve:
type: stdinserver
client_identity: debian2
filesystems: {
"pool1/db<": ok
}
snapshot_prefix: zrepl_
interval: 1s
prune:
policy: grid
grid: 1x10s(keep=all)
keep_bookmarks: all

View File

@ -1,19 +0,0 @@
-----BEGIN CERTIFICATE-----
MIIDIzCCAgsCAQEwDQYJKoZIhvcNAQELBQAwWTELMAkGA1UEBhMCQVUxEzARBgNV
BAgMClNvbWUtU3RhdGUxITAfBgNVBAoMGEludGVybmV0IFdpZGdpdHMgUHR5IEx0
ZDESMBAGA1UEAwwJbG9nc2VydmVyMB4XDTE3MDkyNDEyMzAzNloXDTE3MTAyNDEy
MzAzNlowVjELMAkGA1UEBhMCQVUxEzARBgNVBAgMClNvbWUtU3RhdGUxITAfBgNV
BAoMGEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDEPMA0GA1UEAwwGY2xpZW50MIIB
IjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAt/xJTUlqApeJGzRD+w2J8sZS
Bo+s+04T987L/M6gaCo8aDSTEb/ZH3XSoU5JEmO6kPpwNNapOsaEhTCjndZQdm5F
uqiUtAg1uW0HCkBEIDkGr9bFHDKzpewGmmMgfQ2+hfiBR/4ZCrc/vd9P0W9BiWQS
Dtc7p22XraWPVL8HlSz5K/Ih+V6i8O+kBltZkusiJh2bWPoRp/netiTZuc6du+Wp
kpWp1OBaTU4GXIAlLj5afF14BBphRQK983Yhaz53BkA7OQ76XxowynMjmuLQVGmK
f1R9zEJuohTX9XIr1tp/ueRHcS4Awk6LcNZUMCV6270FNSIw2f4hbOZvep+t2wID
AQABMA0GCSqGSIb3DQEBCwUAA4IBAQACK3OeNzScpiNwz/jpg/usQzvXbZ/wDvml
YLjtzn/A65ox8a8BhxvH1ydyoCM2YAGYX7+y7qXJnMgRO/v8565CQIVcznHhg9ST
3828/WqZ3bXf2DV5GxKKQf7hPmBnyVUUhn/Ny91MECED27lZucWiX/bczN8ffDeh
M3+ngezcJxsOBd4x0gLrqIJCoaFRSeepOaFEW6GHQ8loxE9GmA7FQd2phIpJHFSd
Z7nQl7X5C1iN2OboEApJHwtmNVC45UlOpg53vo2sDTLhSfdogstiWi8x1HmvhIGM
j3XHs0Illvo9OwVrmgUph8zQ7pvr/AFrTOIbhgzl/9uVUk5ApwFM
-----END CERTIFICATE-----

View File

@ -1,16 +0,0 @@
-----BEGIN CERTIFICATE REQUEST-----
MIICmzCCAYMCAQAwVjELMAkGA1UEBhMCQVUxEzARBgNVBAgMClNvbWUtU3RhdGUx
ITAfBgNVBAoMGEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDEPMA0GA1UEAwwGY2xp
ZW50MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAt/xJTUlqApeJGzRD
+w2J8sZSBo+s+04T987L/M6gaCo8aDSTEb/ZH3XSoU5JEmO6kPpwNNapOsaEhTCj
ndZQdm5FuqiUtAg1uW0HCkBEIDkGr9bFHDKzpewGmmMgfQ2+hfiBR/4ZCrc/vd9P
0W9BiWQSDtc7p22XraWPVL8HlSz5K/Ih+V6i8O+kBltZkusiJh2bWPoRp/netiTZ
uc6du+WpkpWp1OBaTU4GXIAlLj5afF14BBphRQK983Yhaz53BkA7OQ76XxowynMj
muLQVGmKf1R9zEJuohTX9XIr1tp/ueRHcS4Awk6LcNZUMCV6270FNSIw2f4hbOZv
ep+t2wIDAQABoAAwDQYJKoZIhvcNAQELBQADggEBAKnlr0Qs5KYF85u2YA7DJ5pL
HwAx+qNoNbox5CS1aynrDBpDTWLaErviUJ+4WxRlRyTMEscMOIOKajbYhqqFmtGZ
mu3SshZnFihErw8TOQMyU1LGGG+l6r+6ve5TciwJRLla2Y75z7izr6cyvQNRWdLr
PvxL1/Yqr8LKha12+7o28R4SLf6/GY0GcedqoebRmtuwA/jES0PuGauEUD5lH4cj
Me8sqRrB+IMHQ5j8hlJX4DbA8UQRUBL64sHkQzeQfWu+qkWmS5I19CFfLNrcH+OV
yhyjGfN0q0jHyHdpckBhgzS7IIdo6P66AIlm4qpHM7Scra3JaGM7oaZPamJ6f8U=
-----END CERTIFICATE REQUEST-----

View File

@ -1,28 +0,0 @@
-----BEGIN PRIVATE KEY-----
MIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQC3/ElNSWoCl4kb
NEP7DYnyxlIGj6z7ThP3zsv8zqBoKjxoNJMRv9kfddKhTkkSY7qQ+nA01qk6xoSF
MKOd1lB2bkW6qJS0CDW5bQcKQEQgOQav1sUcMrOl7AaaYyB9Db6F+IFH/hkKtz+9
30/Rb0GJZBIO1zunbZetpY9UvweVLPkr8iH5XqLw76QGW1mS6yImHZtY+hGn+d62
JNm5zp275amSlanU4FpNTgZcgCUuPlp8XXgEGmFFAr3zdiFrPncGQDs5DvpfGjDK
cyOa4tBUaYp/VH3MQm6iFNf1civW2n+55EdxLgDCTotw1lQwJXrbvQU1IjDZ/iFs
5m96n63bAgMBAAECggEAF4om0sWe06ARwbJJNFjCGpa3LfG5/xk5Qs5pmPnS2iD1
Q5veaTnzjKvlfA/pF3o9B4mTS59fXY7Cq8vSU0J1XwGy2DPzeqlGPmgtq2kXjkvd
iCfhZj8ybvsoyR3/rSBSDRADcnOXPqC9fgyRSMmESBDOoql1D3HdIzF4ii46ySIU
/XQvExS6NWifbP+Ue6DETV8NhreO5PqjeXLITQhhndtc8MDL/8eCNOyN8XjYIWKX
smlBYtRQYOOY9BHOQgUn6yvPHrtKJNKci+qcQNvWir66mBhY1o40MH5wTIV+8yP2
Vbm/VzoNKIYgeROsilBW7QTwGvkDn3R11zeTqfUNSQKBgQD0eFzhJAEZi4uBw6Tg
NKmBC5Y1IHPOsb5gKPNz9Z9j4qYRDySgYl6ISk+2EdhgUCo1NmTk8EIPQjIerUVf
S+EogFnpsj8U9LR3OM79DaGkNULxrHqhd209/g8DtVgk7yjkxL4vmVOv8qpHMp/7
eWsylN7AOxj2RB/eXYQBPrw+jQKBgQDAqae9HasLmvpJ9ktTv30yZSKXC+LP4A0D
RBBmx410VpPd4CvcpCJxXmjer6B7+9L1xHYP2pvsnMBid5i0knuvyK28dYy7fldl
CzWvb+lqNA5YYPFXQED4oEdihlQczoI1Bm06SFizeAKD1Q9e2c+lgbR/51j8xuXi
twvhMj/YBwKBgQCZw97/iQrcC2Zq7yiUEOuQjD4lGk1c83U/vGIsTJC9XcCAOFsc
OeMlrD/oz96d7a4unBDn4qpaOJOXsfpRT0PGmrxy/jcpMiUUW/ntNpa11v5NTeQw
DRL8DAFbnsNbL8Yz5f+Nps35fBNYBuKTZLJlNTfKByHTO9QjpAQ0WEZEvQKBgQCi
Ovm83EuYVSKmvxcE6Tyx/8lVqTOO2Vn7wweQlD4/lVujvE0S2L8L+XSS9w5K+GzW
eFz10p3zarbw80YJ30L5bSEmjVE43BUZR4woMzM4M6dUsiTm1HshIE2b4ALZ0uZ/
Ye794ceXL9nmSrVLqFsaQZLNFPCwwYb4FiyRry9lZwKBgAO9VbWcN8SEeBDKo3z8
yRbRTc6sI+AdKY44Dfx0tqOPmTjO3mE4X1GU4sbfD2Bvg3DdjwTuxxC/jHaKu0GG
dTM0CbrZGbDAj7E87SOcN/PWEeBckSvuQq5H3DQfwIpTmlS1l5oZn9CxRGbLqC2G
ifnel8XWUG0ROybsr1tk4mzW
-----END PRIVATE KEY-----

View File

@ -1,21 +0,0 @@
-----BEGIN CERTIFICATE-----
MIIDiDCCAnCgAwIBAgIJALhp/WvTQeg/MA0GCSqGSIb3DQEBCwUAMFkxCzAJBgNV
BAYTAkFVMRMwEQYDVQQIDApTb21lLVN0YXRlMSEwHwYDVQQKDBhJbnRlcm5ldCBX
aWRnaXRzIFB0eSBMdGQxEjAQBgNVBAMMCWxvZ3NlcnZlcjAeFw0xNzA5MjQxMjI3
MDRaFw0yNzA5MjIxMjI3MDRaMFkxCzAJBgNVBAYTAkFVMRMwEQYDVQQIDApTb21l
LVN0YXRlMSEwHwYDVQQKDBhJbnRlcm5ldCBXaWRnaXRzIFB0eSBMdGQxEjAQBgNV
BAMMCWxvZ3NlcnZlcjCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAKs3
TLYfXhV3hap71tOkhPQlM+m0EKRAo8Nua50Cci5UhDo4JkVpyYok1h+NFkqmjU2b
IiIuGvsZZPOWYjbWWnSJE4+n5pBFBzcfNQ4d8xVxjANImFn6Tcehhj0WkbDIv/Ge
364XUgywS7u3EGQj/FO7vZ8KHlUxBHNuPIOPHftwIVRyleh5K32UyBaSpSmnqGos
rvI1byMuznavcZpOs4vlebZ+Jy6a20iKf9fj/0f0t0O+F5x3JIk07D3zSywhJ4RM
M0mGIUmYXbh2SMh+f61KDZLDANpz/pMAPbUJe0mxEtBf0tnwK1gEqc3SLwA0EwiM
8Hnn2iaH5Ln20UE3LOkCAwEAAaNTMFEwHQYDVR0OBBYEFDXoDcwx9SngzZcRYCeP
BplBecfiMB8GA1UdIwQYMBaAFDXoDcwx9SngzZcRYCePBplBecfiMA8GA1UdEwEB
/wQFMAMBAf8wDQYJKoZIhvcNAQELBQADggEBADyNvs4AA91x3gurQb1pcPVhK6nR
mkYSTN1AsDKSRi/X2iCUmR7G7FlF7XW8mntTpHvVzcs+gr94WckH5wqEOA5iZnaw
PXUWexmdXUge4hmC2q6kBQ5e2ykhSJMRVZXvOLZOZV9qitceamHESV1cKZSNMvZM
aCSVA1RK61/nUzs04pVp5PFPv9gFxJp9ki39FYFdsgZmM5RZ5I/FqxxvTJzu4RnH
VPjsMopzARYwJw6dV2bKdFSYOE8B/Vs3Yv0GxjrABw2ko4PkBPTjLIz22x6+Hd9r
K9BQi4pVmQfvppF5+SORSftlHSS+N47b0DD1rW1f5R6QGi71dFuJGikOwvY=
-----END CERTIFICATE-----

View File

@ -1,28 +0,0 @@
-----BEGIN PRIVATE KEY-----
MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQCrN0y2H14Vd4Wq
e9bTpIT0JTPptBCkQKPDbmudAnIuVIQ6OCZFacmKJNYfjRZKpo1NmyIiLhr7GWTz
lmI21lp0iROPp+aQRQc3HzUOHfMVcYwDSJhZ+k3HoYY9FpGwyL/xnt+uF1IMsEu7
txBkI/xTu72fCh5VMQRzbjyDjx37cCFUcpXoeSt9lMgWkqUpp6hqLK7yNW8jLs52
r3GaTrOL5Xm2ficumttIin/X4/9H9LdDvhecdySJNOw980ssISeETDNJhiFJmF24
dkjIfn+tSg2SwwDac/6TAD21CXtJsRLQX9LZ8CtYBKnN0i8ANBMIjPB559omh+S5
9tFBNyzpAgMBAAECggEBAIY8ZwJq+WKvQLb3POjWFf8so9TY/ispGrwAeJKy9j5o
uPrERw0o8YBDfTVjclS43BQ6Srqtly3DLSjlgL8ps+WmCxYYN2ZpGE0ZRIl65bis
O2/fnML+wbiAZTTD2xnVatfPDeP6GLQmDFpyHoHEzPIBQZvNXRbBxZGSnhMvQ/x7
FhqSBQG4kf3b1XDCENIbFEVOBOCg7WtMiIgjEGS7QnW3I65/Zt+Ts1LXRZbz+6na
Gmi0PGHA/oLUh1NRzsF4zuZn6fFzja5zw4mkt+JvCWEoxg1QhRAxRp6QQwmZ6MIc
1rw1D4Z+c5UEKyqHeIwZj4M6UNPhCfTXVm47c9eSiGECgYEA4U8pB+7eRo2fqX0C
nWsWMcmsULJvwplQnUSFenUayPn3E8ammS/ZBHksoKhj82vwIdDbtS1hQZn8Bzsi
atc8au0wz0YRDcVDzHX4HknXVQayHtP/FTPeSr5hwpoY8vhEbySuxBTBkXCrp4dx
u5ErfOiYEP3Q1ZvPRywelrATu20CgYEAwonV5dgOcen/4oAirlnvufc2NfqhAQwJ
FJ/JSVMAcXxPYu3sZMv0dGWrX8mLc+P1+XMCuV/7eBM/vU2LbDzmpeUV8sJfB2jw
wyKqKXZwBgeq60btriA4f+0ElwRGgU2KSiniUuuTX2JmyftFQx4cVAQRCFk27NY0
09psSsYyre0CgYBo6unabdtH029EB5iOIW3GZXk+Yrk0TxyA/4WAjsOYTv5FUT4H
G4bdVGf5sDBLDDpYJOAKsEUXvVLlMx5FzlCuIiGWg7QxS2jU7yJJSG1jhKixPlsM
Toj3GUyAyC1SB1Ymw1g2qsuwpFzquGG3zFQJ6G3Xi7oRnmqZY+wik3+8yQKBgB11
SdKYOPe++2SNCrNkIw0CBk9+OEs0S1u4Jn7X9sU4kbzlUlqhF89YZe8HUfqmlmTD
qbHwet/f6lL8HxSw1Cxi2EP+cu1oUqz53tKQgL4pAxTFlNA9SND2Ty+fEh4aY8p/
NSphSduzxuTnC8HyGVAPnZSqDcsnVLCP7r4T7TCxAoGAbJygkkk/gZ9pT4fZoIaq
8CMR8FTfxtkwCuZsWccSMUOWtx9nqet3gbCpKHfyoYZiKB4ke+lnUz4uFS16Y3hG
kN0hFfvfoNa8eB2Ox7vs60cMMfWJac0H7KSaDDy+EvbhE2KtQADT0eWxMyhzGR8p
5CbIivB0QCjeQIA8dOQpE8E=
-----END PRIVATE KEY-----

View File

@ -1,28 +0,0 @@
global:
logging:
- outlet: stdout
level: warn
format: human
- outlet: tcp
level: debug
format: json
net: tcp
address: 127.0.0.1:8080
retry_interval: 1s
tls: # if not specified, use plain TCP
ca: sampleconf/random/logging/logserver.crt
cert: sampleconf/random/logging/client.crt
key: sampleconf/random/logging/client.key
- outlet: syslog
level: debug
format: logfmt
monitoring:
- type: prometheus
listen: ':9090'
jobs: []

View File

@ -1,215 +0,0 @@
package cmd
import (
"os"
"bytes"
"context"
"fmt"
"sort"
"strings"
"github.com/kr/pretty"
"github.com/spf13/cobra"
"github.com/zrepl/zrepl/logger"
"github.com/zrepl/zrepl/zfs"
"time"
)
var testCmd = &cobra.Command{
Use: "test",
Short: "test configuration",
PersistentPreRun: testCmdGlobalInit,
}
var testCmdGlobal struct {
log Logger
conf *Config
}
var testConfigSyntaxCmd = &cobra.Command{
Use: "config",
Short: "parse config file and dump parsed datastructure",
Run: doTestConfig,
}
var testDatasetMapFilter = &cobra.Command{
Use: "pattern jobname test/zfs/dataset/path",
Short: "test dataset mapping / filter specified in config",
Example: ` zrepl test pattern my_pull_job tank/tmp`,
Run: doTestDatasetMapFilter,
}
var testPrunePolicyArgs struct {
side PrunePolicySide
showKept bool
showRemoved bool
}
var testPrunePolicyCmd = &cobra.Command{
Use: "prune jobname",
Short: "do a dry-run of the pruning part of a job",
Run: doTestPrunePolicy,
}
func init() {
RootCmd.AddCommand(testCmd)
testCmd.AddCommand(testConfigSyntaxCmd)
testCmd.AddCommand(testDatasetMapFilter)
testPrunePolicyCmd.Flags().VarP(&testPrunePolicyArgs.side, "side", "s", "prune_lhs (left) or prune_rhs (right)")
testPrunePolicyCmd.Flags().BoolVar(&testPrunePolicyArgs.showKept, "kept", false, "show kept snapshots")
testPrunePolicyCmd.Flags().BoolVar(&testPrunePolicyArgs.showRemoved, "removed", true, "show removed snapshots")
testCmd.AddCommand(testPrunePolicyCmd)
}
func testCmdGlobalInit(cmd *cobra.Command, args []string) {
out := logger.NewOutlets()
out.Add(WriterOutlet{&NoFormatter{}, os.Stdout}, logger.Info)
log := logger.NewLogger(out, 1*time.Second)
testCmdGlobal.log = log
var err error
if testCmdGlobal.conf, err = ParseConfig(rootArgs.configFile); err != nil {
testCmdGlobal.log.Printf("error parsing config file: %s", err)
os.Exit(1)
}
}
func doTestConfig(cmd *cobra.Command, args []string) {
log, conf := testCmdGlobal.log, testCmdGlobal.conf
log.Printf("config ok")
log.Printf("%# v", pretty.Formatter(conf))
return
}
func doTestDatasetMapFilter(cmd *cobra.Command, args []string) {
log, conf := testCmdGlobal.log, testCmdGlobal.conf
if len(args) != 2 {
log.Printf("specify job name as first postitional argument, test input as second")
log.Printf(cmd.UsageString())
os.Exit(1)
}
n, i := args[0], args[1]
jobi, err := conf.LookupJob(n)
if err != nil {
log.Printf("%s", err)
os.Exit(1)
}
var mf *DatasetMapFilter
switch j := jobi.(type) {
case *PullJob:
mf = j.Mapping
case *SourceJob:
mf = j.Filesystems
case *LocalJob:
mf = j.Mapping
default:
panic("incomplete implementation")
}
ip, err := zfs.NewDatasetPath(i)
if err != nil {
log.Printf("cannot parse test input as ZFS dataset path: %s", err)
os.Exit(1)
}
if mf.filterMode {
pass, err := mf.Filter(ip)
if err != nil {
log.Printf("error evaluating filter: %s", err)
os.Exit(1)
}
log.Printf("filter result: %v", pass)
} else {
res, err := mf.Map(ip)
if err != nil {
log.Printf("error evaluating mapping: %s", err)
os.Exit(1)
}
toStr := "NO MAPPING"
if res != nil {
toStr = res.ToString()
}
log.Printf("%s => %s", ip.ToString(), toStr)
}
}
func doTestPrunePolicy(cmd *cobra.Command, args []string) {
log, conf := testCmdGlobal.log, testCmdGlobal.conf
if cmd.Flags().NArg() != 1 {
log.Printf("specify job name as first positional argument")
log.Printf(cmd.UsageString())
os.Exit(1)
}
jobname := cmd.Flags().Arg(0)
jobi, err := conf.LookupJob(jobname)
if err != nil {
log.Printf("%s", err)
os.Exit(1)
}
jobp, ok := jobi.(PruningJob)
if !ok {
log.Printf("job doesn't do any prunes")
os.Exit(0)
}
log.Printf("job dump:\n%s", pretty.Sprint(jobp))
task := NewTask("", jobi, log)
pruner, err := jobp.Pruner(task, testPrunePolicyArgs.side, true)
if err != nil {
log.Printf("cannot create test pruner: %s", err)
os.Exit(1)
}
log.Printf("start pruning")
ctx := context.WithValue(context.Background(), contextKeyLog, log)
result, err := pruner.Run(ctx)
if err != nil {
log.Printf("error running pruner: %s", err)
os.Exit(1)
}
sort.Slice(result, func(i, j int) bool {
return strings.Compare(result[i].Filesystem.ToString(), result[j].Filesystem.ToString()) == -1
})
var b bytes.Buffer
for _, r := range result {
fmt.Fprintf(&b, "%s\n", r.Filesystem.ToString())
if testPrunePolicyArgs.showKept {
fmt.Fprintf(&b, "\tkept:\n")
for _, v := range r.Keep {
fmt.Fprintf(&b, "\t- %s\n", v.Name)
}
}
if testPrunePolicyArgs.showRemoved {
fmt.Fprintf(&b, "\tremoved:\n")
for _, v := range r.Remove {
fmt.Fprintf(&b, "\t- %s\n", v.Name)
}
}
}
log.Printf("pruning result:\n%s", b.String())
}

483
config/config.go Normal file
View File

@ -0,0 +1,483 @@
package config
import (
"fmt"
"github.com/pkg/errors"
"github.com/zrepl/yaml-config"
"io/ioutil"
"os"
"reflect"
"regexp"
"strconv"
"time"
)
type Config struct {
Jobs []JobEnum `yaml:"jobs"`
Global *Global `yaml:"global,optional,fromdefaults"`
}
func (c *Config) Job(name string) (*JobEnum, error) {
for _, j := range c.Jobs {
if j.Name() == name {
return &j, nil
}
}
return nil, fmt.Errorf("job %q not defined in config", name)
}
type JobEnum struct {
Ret interface{}
}
func (j JobEnum) Name() string {
var name string
switch v := j.Ret.(type) {
case *PushJob: name = v.Name
case *SinkJob: name = v.Name
case *PullJob: name = v.Name
case *SourceJob: name = v.Name
default:
panic(fmt.Sprintf("unknownn job type %T", v))
}
return name
}
type ActiveJob struct {
Type string `yaml:"type"`
Name string `yaml:"name"`
Connect ConnectEnum `yaml:"connect"`
Pruning PruningSenderReceiver `yaml:"pruning"`
Debug JobDebugSettings `yaml:"debug,optional"`
}
type PassiveJob struct {
Type string `yaml:"type"`
Name string `yaml:"name"`
Serve ServeEnum `yaml:"serve"`
Debug JobDebugSettings `yaml:"debug,optional"`
}
type PushJob struct {
ActiveJob `yaml:",inline"`
Snapshotting SnapshottingEnum `yaml:"snapshotting"`
Filesystems FilesystemsFilter `yaml:"filesystems"`
}
type PullJob struct {
ActiveJob `yaml:",inline"`
RootFS string `yaml:"root_fs"`
Interval time.Duration `yaml:"interval,positive"`
}
type SinkJob struct {
PassiveJob `yaml:",inline"`
RootFS string `yaml:"root_fs"`
}
type SourceJob struct {
PassiveJob `yaml:",inline"`
Snapshotting SnapshottingEnum `yaml:"snapshotting"`
Filesystems FilesystemsFilter `yaml:"filesystems"`
}
type FilesystemsFilter map[string]bool
type SnapshottingEnum struct {
Ret interface{}
}
type SnapshottingPeriodic struct {
Type string `yaml:"type"`
Prefix string `yaml:"prefix"`
Interval time.Duration `yaml:"interval,positive"`
}
type SnapshottingManual struct {
Type string `yaml:"type"`
}
type PruningSenderReceiver struct {
KeepSender []PruningEnum `yaml:"keep_sender"`
KeepReceiver []PruningEnum `yaml:"keep_receiver"`
}
type PruningLocal struct {
Keep []PruningEnum `yaml:"keep"`
}
type LoggingOutletEnumList []LoggingOutletEnum
func (l *LoggingOutletEnumList) SetDefault() {
def := `
type: "stdout"
time: true
level: "warn"
format: "human"
`
s := StdoutLoggingOutlet{}
err := yaml.UnmarshalStrict([]byte(def), &s)
if err != nil {
panic(err)
}
*l = []LoggingOutletEnum{LoggingOutletEnum{Ret: s}}
}
var _ yaml.Defaulter = &LoggingOutletEnumList{}
type Global struct {
Logging *LoggingOutletEnumList `yaml:"logging,optional,fromdefaults"`
Monitoring []MonitoringEnum `yaml:"monitoring,optional"`
Control *GlobalControl `yaml:"control,optional,fromdefaults"`
Serve *GlobalServe `yaml:"serve,optional,fromdefaults"`
RPC *RPCConfig `yaml:"rpc,optional,fromdefaults"`
}
func Default(i interface{}) {
v := reflect.ValueOf(i)
if v.Kind() != reflect.Ptr {
panic(v)
}
y := `{}`
err := yaml.Unmarshal([]byte(y), v.Interface())
if err != nil {
panic(err)
}
}
type RPCConfig struct {
Timeout time.Duration `yaml:"timeout,optional,positive,default=10s"`
TxChunkSize uint32 `yaml:"tx_chunk_size,optional,default=32768"`
RxStructuredMaxLen uint32 `yaml:"rx_structured_max,optional,default=16777216"`
RxStreamChunkMaxLen uint32 `yaml:"rx_stream_chunk_max,optional,default=16777216"`
RxHeaderMaxLen uint32 `yaml:"rx_header_max,optional,default=40960"`
SendHeartbeatInterval time.Duration `yaml:"send_heartbeat_interval,optional,positive,default=5s"`
}
type ConnectEnum struct {
Ret interface{}
}
type ConnectCommon struct {
Type string `yaml:"type"`
RPC *RPCConfig `yaml:"rpc,optional"`
}
type TCPConnect struct {
ConnectCommon `yaml:",inline"`
Address string `yaml:"address"`
DialTimeout time.Duration `yaml:"dial_timeout,positive,default=10s"`
}
type TLSConnect struct {
ConnectCommon `yaml:",inline"`
Address string `yaml:"address"`
Ca string `yaml:"ca"`
Cert string `yaml:"cert"`
Key string `yaml:"key"`
ServerCN string `yaml:"server_cn"`
DialTimeout time.Duration `yaml:"dial_timeout,positive,default=10s"`
}
type SSHStdinserverConnect struct {
ConnectCommon `yaml:",inline"`
Host string `yaml:"host"`
User string `yaml:"user"`
Port uint16 `yaml:"port"`
IdentityFile string `yaml:"identity_file"`
TransportOpenCommand []string `yaml:"transport_open_command,optional"` //TODO unused
SSHCommand string `yaml:"ssh_command,optional"` //TODO unused
Options []string `yaml:"options,optional"`
DialTimeout time.Duration `yaml:"dial_timeout,positive,default=10s"`
}
type LocalConnect struct {
ConnectCommon `yaml:",inline"`
ListenerName string `yaml:"listener_name"`
ClientIdentity string `yaml:"client_identity"`
}
type ServeEnum struct {
Ret interface{}
}
type ServeCommon struct {
Type string `yaml:"type"`
RPC *RPCConfig `yaml:"rpc,optional"`
}
type TCPServe struct {
ServeCommon `yaml:",inline"`
Listen string `yaml:"listen"`
Clients map[string]string `yaml:"clients"`
}
type TLSServe struct {
ServeCommon `yaml:",inline"`
Listen string `yaml:"listen"`
Ca string `yaml:"ca"`
Cert string `yaml:"cert"`
Key string `yaml:"key"`
ClientCNs []string `yaml:"client_cns"`
HandshakeTimeout time.Duration `yaml:"handshake_timeout,positive,default=10s"`
}
type StdinserverServer struct {
ServeCommon `yaml:",inline"`
ClientIdentities []string `yaml:"client_identities"`
}
type LocalServe struct {
ServeCommon `yaml:",inline"`
ListenerName string `yaml:"listener_name"`
}
type PruningEnum struct {
Ret interface{}
}
type PruneKeepNotReplicated struct {
Type string `yaml:"type"`
KeepSnapshotAtCursor bool `yaml:"keep_snapshot_at_cursor,optional,default=true"`
}
type PruneKeepLastN struct {
Type string `yaml:"type"`
Count int `yaml:"count"`
}
type PruneKeepRegex struct { // FIXME rename to KeepRegex
Type string `yaml:"type"`
Regex string `yaml:"regex"`
}
type LoggingOutletEnum struct {
Ret interface{}
}
type LoggingOutletCommon struct {
Type string `yaml:"type"`
Level string `yaml:"level"`
Format string `yaml:"format"`
}
type StdoutLoggingOutlet struct {
LoggingOutletCommon `yaml:",inline"`
Time bool `yaml:"time,default=true"`
Color bool `yaml:"color,default=true"`
}
type SyslogLoggingOutlet struct {
LoggingOutletCommon `yaml:",inline"`
RetryInterval time.Duration `yaml:"retry_interval,positive,default=10s"`
}
type TCPLoggingOutlet struct {
LoggingOutletCommon `yaml:",inline"`
Address string `yaml:"address"`
Net string `yaml:"net,default=tcp"`
RetryInterval time.Duration `yaml:"retry_interval,positive,default=10s"`
TLS *TCPLoggingOutletTLS `yaml:"tls,optional"`
}
type TCPLoggingOutletTLS struct {
CA string `yaml:"ca"`
Cert string `yaml:"cert"`
Key string `yaml:"key"`
}
type MonitoringEnum struct {
Ret interface{}
}
type PrometheusMonitoring struct {
Type string `yaml:"type"`
Listen string `yaml:"listen"`
}
type GlobalControl struct {
SockPath string `yaml:"sockpath,default=/var/run/zrepl/control"`
}
type GlobalServe struct {
StdinServer *GlobalStdinServer `yaml:"stdinserver,optional,fromdefaults"`
}
type GlobalStdinServer struct {
SockDir string `yaml:"sockdir,default=/var/run/zrepl/stdinserver"`
}
type JobDebugSettings struct {
Conn *struct {
ReadDump string `yaml:"read_dump"`
WriteDump string `yaml:"write_dump"`
} `yaml:"conn,optional"`
RPCLog bool `yaml:"rpc_log,optional,default=false"`
}
func enumUnmarshal(u func(interface{}, bool) error, types map[string]interface{}) (interface{}, error) {
var in struct {
Type string
}
if err := u(&in, true); err != nil {
return nil, err
}
if in.Type == "" {
return nil, &yaml.TypeError{Errors: []string{"must specify type"}}
}
v, ok := types[in.Type]
if !ok {
return nil, &yaml.TypeError{Errors: []string{fmt.Sprintf("invalid type name %q", in.Type)}}
}
if err := u(v, false); err != nil {
return nil, err
}
return v, nil
}
func (t *JobEnum) UnmarshalYAML(u func(interface{}, bool) error) (err error) {
t.Ret, err = enumUnmarshal(u, map[string]interface{}{
"push": &PushJob{},
"sink": &SinkJob{},
"pull": &PullJob{},
"source": &SourceJob{},
})
return
}
func (t *ConnectEnum) UnmarshalYAML(u func(interface{}, bool) error) (err error) {
t.Ret, err = enumUnmarshal(u, map[string]interface{}{
"tcp": &TCPConnect{},
"tls": &TLSConnect{},
"ssh+stdinserver": &SSHStdinserverConnect{},
"local": &LocalConnect{},
})
return
}
func (t *ServeEnum) UnmarshalYAML(u func(interface{}, bool) error) (err error) {
t.Ret, err = enumUnmarshal(u, map[string]interface{}{
"tcp": &TCPServe{},
"tls": &TLSServe{},
"stdinserver": &StdinserverServer{},
"local" : &LocalServe{},
})
return
}
func (t *PruningEnum) UnmarshalYAML(u func(interface{}, bool) error) (err error) {
t.Ret, err = enumUnmarshal(u, map[string]interface{}{
"not_replicated": &PruneKeepNotReplicated{},
"last_n": &PruneKeepLastN{},
"grid": &PruneGrid{},
"regex": &PruneKeepRegex{},
})
return
}
func (t *SnapshottingEnum) UnmarshalYAML(u func(interface{}, bool) error) (err error) {
t.Ret, err = enumUnmarshal(u, map[string]interface{}{
"periodic": &SnapshottingPeriodic{},
"manual": &SnapshottingManual{},
})
return
}
func (t *LoggingOutletEnum) UnmarshalYAML(u func(interface{}, bool) error) (err error) {
t.Ret, err = enumUnmarshal(u, map[string]interface{}{
"stdout": &StdoutLoggingOutlet{},
"syslog": &SyslogLoggingOutlet{},
"tcp": &TCPLoggingOutlet{},
})
return
}
func (t *MonitoringEnum) UnmarshalYAML(u func(interface{}, bool) error) (err error) {
t.Ret, err = enumUnmarshal(u, map[string]interface{}{
"prometheus": &PrometheusMonitoring{},
})
return
}
var ConfigFileDefaultLocations = []string{
"/etc/zrepl/zrepl.yml",
"/usr/local/etc/zrepl/zrepl.yml",
}
func ParseConfig(path string) (i *Config, err error) {
if path == "" {
// Try default locations
for _, l := range ConfigFileDefaultLocations {
stat, statErr := os.Stat(l)
if statErr != nil {
continue
}
if !stat.Mode().IsRegular() {
err = errors.Errorf("file at default location is not a regular file: %s", l)
return
}
path = l
break
}
}
var bytes []byte
if bytes, err = ioutil.ReadFile(path); err != nil {
return
}
return ParseConfigBytes(bytes)
}
func ParseConfigBytes(bytes []byte) (*Config, error) {
var c *Config
if err := yaml.UnmarshalStrict(bytes, &c); err != nil {
return nil, err
}
if c == nil {
return nil, fmt.Errorf("config is empty or only consists of comments")
}
return c, nil
}
var durationStringRegex *regexp.Regexp = regexp.MustCompile(`^\s*(\d+)\s*(s|m|h|d|w)\s*$`)
func parsePostitiveDuration(e string) (d time.Duration, err error) {
comps := durationStringRegex.FindStringSubmatch(e)
if len(comps) != 3 {
err = fmt.Errorf("does not match regex: %s %#v", e, comps)
return
}
durationFactor, err := strconv.ParseInt(comps[1], 10, 64)
if err != nil {
return 0, err
}
if durationFactor <= 0 {
return 0, errors.New("duration must be positive integer")
}
var durationUnit time.Duration
switch comps[2] {
case "s":
durationUnit = time.Second
case "m":
durationUnit = time.Minute
case "h":
durationUnit = time.Hour
case "d":
durationUnit = 24 * time.Hour
case "w":
durationUnit = 24 * 7 * time.Hour
default:
err = fmt.Errorf("contains unknown time unit '%s'", comps[2])
return
}
d = time.Duration(durationFactor) * durationUnit
return
}

View File

@ -0,0 +1,82 @@
package config
import (
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/zrepl/yaml-config"
"testing"
)
func testValidGlobalSection(t *testing.T, s string) *Config {
jobdef := `
jobs:
- name: dummyjob
type: sink
serve:
type: tcp
listen: ":2342"
clients: {
"10.0.0.1":"foo"
}
root_fs: zoot/foo
`
_, err := ParseConfigBytes([]byte(jobdef))
require.NoError(t, err)
return testValidConfig(t, s + jobdef)
}
func TestOutletTypes(t *testing.T) {
conf := testValidGlobalSection(t, `
global:
logging:
- type: stdout
level: debug
format: human
- type: syslog
level: info
retry_interval: 20s
format: human
- type: tcp
level: debug
format: json
address: logserver.example.com:1234
- type: tcp
level: debug
format: json
address: encryptedlogserver.example.com:1234
retry_interval: 20s
tls:
ca: /etc/zrepl/log/ca.crt
cert: /etc/zrepl/log/key.pem
key: /etc/zrepl/log/cert.pem
`)
assert.Equal(t, 4, len(*conf.Global.Logging))
assert.NotNil(t, (*conf.Global.Logging)[3].Ret.(*TCPLoggingOutlet).TLS)
}
func TestDefaultLoggingOutlet(t *testing.T) {
conf := testValidGlobalSection(t, "")
assert.Equal(t, 1, len(*conf.Global.Logging))
o := (*conf.Global.Logging)[0].Ret.(StdoutLoggingOutlet)
assert.Equal(t, "warn", o.Level)
assert.Equal(t, "human", o.Format)
}
func TestPrometheusMonitoring(t *testing.T) {
conf := testValidGlobalSection(t, `
global:
monitoring:
- type: prometheus
listen: ':9091'
`)
assert.Equal(t, ":9091", conf.Global.Monitoring[0].Ret.(*PrometheusMonitoring).Listen)
}
func TestLoggingOutletEnumList_SetDefaults(t *testing.T) {
e := &LoggingOutletEnumList{}
var i yaml.Defaulter = e
require.NotPanics(t, func() {
i.SetDefault()
assert.Equal(t, "warn", (*e)[0].Ret.(StdoutLoggingOutlet).Level)
})
}

View File

@ -0,0 +1,39 @@
package config
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestConfigEmptyFails(t *testing.T) {
conf, err := testConfig(t, "\n")
assert.Nil(t, conf)
assert.Error(t, err)
}
func TestJobsOnlyWorks(t *testing.T) {
testValidConfig(t, `
jobs:
- name: push
type: push
# snapshot the filesystems matched by the left-hand-side of the mapping
# every 10m with zrepl_ as prefix
connect:
type: tcp
address: localhost:2342
filesystems: {
"pool1/var/db<": true,
"pool1/usr/home<": true,
"pool1/usr/home/paranoid": false, #don't backup paranoid user
"pool1/poudriere/ports<": false #don't backup the ports trees
}
snapshotting:
type: manual
pruning:
keep_sender:
- type: not_replicated
keep_receiver:
- type: last_n
count: 1
`)
}

86
config/config_rpc_test.go Normal file
View File

@ -0,0 +1,86 @@
package config
import (
"github.com/stretchr/testify/assert"
"testing"
"time"
)
func TestRPC(t *testing.T) {
conf := testValidConfig(t, `
jobs:
- name: pull_servers
type: pull
connect:
type: tcp
address: "server1.foo.bar:8888"
rpc:
timeout: 20s # different form default, should merge
root_fs: "pool2/backup_servers"
interval: 10m
pruning:
keep_sender:
- type: not_replicated
keep_receiver:
- type: last_n
count: 100
- name: pull_servers2
type: pull
connect:
type: tcp
address: "server1.foo.bar:8888"
rpc:
tx_chunk_size: 0xabcd # different from default, should merge
root_fs: "pool2/backup_servers"
interval: 10m
pruning:
keep_sender:
- type: not_replicated
keep_receiver:
- type: last_n
count: 100
- type: sink
name: "laptop_sink"
root_fs: "pool2/backup_laptops"
serve:
type: tcp
listen: "192.168.122.189:8888"
clients: {
"10.23.42.23":"client1"
}
rpc:
rx_structured_max: 0x2342
- type: sink
name: "other_sink"
root_fs: "pool2/backup_laptops"
serve:
type: tcp
listen: "192.168.122.189:8888"
clients: {
"10.23.42.23":"client1"
}
rpc:
send_heartbeat_interval: 10s
`)
assert.Equal(t, 20*time.Second, conf.Jobs[0].Ret.(*PullJob).Connect.Ret.(*TCPConnect).RPC.Timeout)
assert.Equal(t, uint32(0xabcd), conf.Jobs[1].Ret.(*PullJob).Connect.Ret.(*TCPConnect).RPC.TxChunkSize)
assert.Equal(t, uint32(0x2342), conf.Jobs[2].Ret.(*SinkJob).Serve.Ret.(*TCPServe).RPC.RxStructuredMaxLen)
assert.Equal(t, 10*time.Second, conf.Jobs[3].Ret.(*SinkJob).Serve.Ret.(*TCPServe).RPC.SendHeartbeatInterval)
defConf := RPCConfig{}
Default(&defConf)
assert.Equal(t, defConf.Timeout, conf.Global.RPC.Timeout)
}
func TestGlobal_DefaultRPCConfig(t *testing.T) {
assert.NotPanics(t, func() {
var c RPCConfig
Default(&c)
assert.NotNil(t, c)
assert.Equal(t, c.TxChunkSize, uint32(1)<<15)
})
}

View File

@ -0,0 +1,57 @@
package config
import (
"fmt"
"github.com/stretchr/testify/assert"
"testing"
"time"
)
func TestSnapshotting(t *testing.T) {
tmpl := `
jobs:
- name: foo
type: push
connect:
type: local
listener_name: foo
client_identity: bar
filesystems: {"<": true}
%s
pruning:
keep_sender:
- type: last_n
count: 10
keep_receiver:
- type: last_n
count: 10
`
manual := `
snapshotting:
type: manual
`
periodic := `
snapshotting:
type: periodic
prefix: zrepl_
interval: 10m
`
fillSnapshotting := func(s string) string {return fmt.Sprintf(tmpl, s)}
var c *Config
t.Run("manual", func(t *testing.T) {
c = testValidConfig(t, fillSnapshotting(manual))
snm := c.Jobs[0].Ret.(*PushJob).Snapshotting.Ret.(*SnapshottingManual)
assert.Equal(t, "manual", snm.Type)
})
t.Run("periodic", func(t *testing.T) {
c = testValidConfig(t, fillSnapshotting(periodic))
snp := c.Jobs[0].Ret.(*PushJob).Snapshotting.Ret.(*SnapshottingPeriodic)
assert.Equal(t, "periodic", snp.Type)
assert.Equal(t, 10*time.Minute, snp.Interval)
assert.Equal(t, "zrepl_" , snp.Prefix)
})
}

50
config/config_test.go Normal file
View File

@ -0,0 +1,50 @@
package config
import (
"github.com/kr/pretty"
"github.com/stretchr/testify/require"
"path"
"path/filepath"
"testing"
)
func TestSampleConfigsAreParsedWithoutErrors(t *testing.T) {
paths, err := filepath.Glob("./samples/*")
if err != nil {
t.Errorf("glob failed: %+v", err)
}
for _, p := range paths {
if path.Ext(p) != ".yml" {
t.Logf("skipping file %s", p)
continue
}
t.Run(p, func(t *testing.T) {
c, err := ParseConfig(p)
if err != nil {
t.Errorf("error parsing %s:\n%+v", p, err)
}
t.Logf("file: %s", p)
t.Log(pretty.Sprint(c))
})
}
}
func testValidConfig(t *testing.T, input string) (*Config) {
t.Helper()
conf, err := testConfig(t, input)
require.NoError(t, err)
require.NotNil(t, conf)
return conf
}
func testConfig(t *testing.T, input string) (*Config, error) {
t.Helper()
return ParseConfigBytes([]byte(input))
}

123
config/retentiongrid.go Normal file
View File

@ -0,0 +1,123 @@
package config
import (
"fmt"
"regexp"
"strconv"
"strings"
"time"
)
type RetentionIntervalList []RetentionInterval
type PruneGrid struct {
Type string `yaml:"type"`
Grid RetentionIntervalList `yaml:"grid"`
Regex string `yaml:"regex"`
}
type RetentionInterval struct {
length time.Duration
keepCount int
}
func (i *RetentionInterval) Length() time.Duration {
return i.length
}
func (i *RetentionInterval) KeepCount() int {
return i.keepCount
}
const RetentionGridKeepCountAll int = -1
type RetentionGrid struct {
intervals []RetentionInterval
}
func (t *RetentionIntervalList) UnmarshalYAML(u func(interface{}, bool) error) (err error) {
var in string
if err := u(&in, true); err != nil {
return err
}
intervals, err := parseRetentionGridIntervalsString(in)
if err != nil {
return err
}
*t = intervals
return nil
}
var retentionStringIntervalRegex *regexp.Regexp = regexp.MustCompile(`^\s*(\d+)\s*x\s*([^\(]+)\s*(\((.*)\))?\s*$`)
func parseRetentionGridIntervalString(e string) (intervals []RetentionInterval, err error) {
comps := retentionStringIntervalRegex.FindStringSubmatch(e)
if comps == nil {
err = fmt.Errorf("retention string does not match expected format")
return
}
times, err := strconv.Atoi(comps[1])
if err != nil {
return nil, err
} else if times <= 0 {
return nil, fmt.Errorf("contains factor <= 0")
}
duration, err := parsePostitiveDuration(comps[2])
if err != nil {
return nil, err
}
keepCount := 1
if comps[3] != "" {
// Decompose key=value, comma separated
// For now, only keep_count is supported
re := regexp.MustCompile(`^\s*keep=(.+)\s*$`)
res := re.FindStringSubmatch(comps[4])
if res == nil || len(res) != 2 {
err = fmt.Errorf("interval parameter contains unknown parameters")
return
}
if res[1] == "all" {
keepCount = RetentionGridKeepCountAll
} else {
keepCount, err = strconv.Atoi(res[1])
if err != nil {
err = fmt.Errorf("cannot parse keep_count value")
return
}
}
}
intervals = make([]RetentionInterval, times)
for i := range intervals {
intervals[i] = RetentionInterval{
length: duration,
keepCount: keepCount,
}
}
return
}
func parseRetentionGridIntervalsString(s string) (intervals []RetentionInterval, err error) {
ges := strings.Split(s, "|")
intervals = make([]RetentionInterval, 0, 7*len(ges))
for intervalIdx, e := range ges {
parsed, err := parseRetentionGridIntervalString(e)
if err != nil {
return nil, fmt.Errorf("cannot parse interval %d of %d: %s: %s", intervalIdx+1, len(ges), err, strings.TrimSpace(e))
}
intervals = append(intervals, parsed...)
}
return
}

31
config/samples/local.yml Normal file
View File

@ -0,0 +1,31 @@
jobs:
- type: sink
name: "local_sink"
root_fs: "storage/zrepl/sink"
serve:
type: local
listener_name: localsink
- type: push
name: "backup_system"
connect:
type: local
listener_name: localsink
client_identity: local_backup
filesystems: {
"system<": true,
}
snapshotting:
type: periodic
interval: 10m
prefix: zrepl_
pruning:
keep_sender:
- type: not_replicated
- type: last_n
count: 10
keep_receiver:
- type: grid
grid: 1x1h(keep=all) | 24x1h | 35x1d | 6x30d
regex: "zrepl_.*"

24
config/samples/pull.yml Normal file
View File

@ -0,0 +1,24 @@
jobs:
- name: pull_servers
type: pull
connect:
type: tls
address: "server1.foo.bar:8888"
ca: "/certs/ca.crt"
cert: "/certs/cert.crt"
key: "/certs/key.pem"
server_cn: "server1"
root_fs: "pool2/backup_servers"
interval: 10m
pruning:
keep_sender:
- type: not_replicated
- type: last_n
count: 10
- type: grid
grid: 1x1h(keep=all) | 24x1h | 14x1d
regex: "zrepl_.*"
keep_receiver:
- type: grid
grid: 1x1h(keep=all) | 24x1h | 35x1d | 6x30d
regex: "zrepl_.*"

View File

@ -0,0 +1,28 @@
jobs:
- name: pull_servers
type: pull
connect:
type: ssh+stdinserver
host: app-srv.example.com
user: root
port: 22
identity_file: /etc/zrepl/ssh/identity
options: # optional, default [], `-o` arguments passed to ssh
- "Compression=on"
root_fs: "pool2/backup_servers"
interval: 10m
pruning:
keep_sender:
- type: not_replicated
- type: last_n
count: 10
- type: grid
grid: 1x1h(keep=all) | 24x1h | 14x1d
regex: "^zrepl_.*"
keep_receiver:
- type: regex
regex: keep_
- type: grid
grid: 1x1h(keep=all) | 24x1h | 35x1d | 6x30d
regex: "^zrepl_.*"

24
config/samples/push.yml Normal file
View File

@ -0,0 +1,24 @@
jobs:
- type: push
name: "push"
filesystems: {
"<": true,
"tmp": false
}
connect:
type: tcp
address: "backup-server.foo.bar:8888"
snapshotting:
type: manual
pruning:
keep_sender:
- type: not_replicated
- type: last_n
count: 10
- type: grid
grid: 1x1h(keep=all) | 24x1h | 14x1d
regex: "^zrepl_.*"
keep_receiver:
- type: grid
grid: 1x1h(keep=all) | 24x1h | 35x1d | 6x30d
regex: "^zrepl_.*"

13
config/samples/sink.yml Normal file
View File

@ -0,0 +1,13 @@
jobs:
- type: sink
name: "laptop_sink"
root_fs: "pool2/backup_laptops"
serve:
type: tls
listen: "192.168.122.189:8888"
ca: "ca.pem"
cert: "cert.pem"
key: "key.pem"
client_cns:
- "laptop1"
- "homeserver"

17
config/samples/source.yml Normal file
View File

@ -0,0 +1,17 @@
jobs:
- name: pull_source
type: source
serve:
type: tcp
listen: "0.0.0.0:8888"
clients: {
"192.168.122.123" : "client1"
}
filesystems: {
"<": true,
"secret": false
}
snapshotting:
type: periodic
interval: 10m
prefix: zrepl_

View File

@ -0,0 +1,17 @@
jobs:
- name: pull_source
type: source
serve:
type: stdinserver
client_identities:
- "client1"
- "client2"
filesystems: {
"<": true,
"secret": false
}
snapshotting:
type: periodic
interval: 10m
prefix: zrepl_

240
daemon/control.go Normal file
View File

@ -0,0 +1,240 @@
package daemon
import (
"bytes"
"context"
"encoding/json"
"fmt"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
"github.com/zrepl/zrepl/daemon/job"
"github.com/zrepl/zrepl/daemon/nethelpers"
"github.com/zrepl/zrepl/logger"
"github.com/zrepl/zrepl/version"
"io"
"net"
"net/http"
"time"
)
type controlJob struct {
sockaddr *net.UnixAddr
jobs *jobs
}
func newControlJob(sockpath string, jobs *jobs) (j *controlJob, err error) {
j = &controlJob{jobs: jobs}
j.sockaddr, err = net.ResolveUnixAddr("unix", sockpath)
if err != nil {
err = errors.Wrap(err, "cannot resolve unix address")
return
}
return
}
func (j *controlJob) Name() string { return jobNameControl }
func (j *controlJob) Status() *job.Status { return &job.Status{Type: job.TypeInternal} }
var promControl struct {
requestBegin *prometheus.CounterVec
requestFinished *prometheus.HistogramVec
}
func (j *controlJob) RegisterMetrics(registerer prometheus.Registerer) {
promControl.requestBegin = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: "zrepl",
Subsystem: "control",
Name: "request_begin",
Help: "number of request we started to handle",
}, []string{"endpoint"})
promControl.requestFinished = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: "zrepl",
Subsystem: "control",
Name: "request_finished",
Help: "time it took a request to finih",
Buckets: []float64{1e-6, 10e-6, 100e-6, 500e-6, 1e-3,10e-3, 100e-3, 200e-3,400e-3,800e-3, 1, 10, 20},
}, []string{"endpoint"})
registerer.MustRegister(promControl.requestBegin)
registerer.MustRegister(promControl.requestFinished)
}
const (
ControlJobEndpointPProf string = "/debug/pprof"
ControlJobEndpointVersion string = "/version"
ControlJobEndpointStatus string = "/status"
ControlJobEndpointSignal string = "/signal"
)
func (j *controlJob) Run(ctx context.Context) {
log := job.GetLogger(ctx)
defer log.Info("control job finished")
l, err := nethelpers.ListenUnixPrivate(j.sockaddr)
if err != nil {
log.WithError(err).Error("error listening")
return
}
pprofServer := NewPProfServer(ctx)
mux := http.NewServeMux()
mux.Handle(ControlJobEndpointPProf,
requestLogger{log: log, handler: jsonRequestResponder{func(decoder jsonDecoder) (interface{}, error) {
var msg PprofServerControlMsg
err := decoder(&msg)
if err != nil {
return nil, errors.Errorf("decode failed")
}
pprofServer.Control(msg)
return struct{}{}, nil
}}})
mux.Handle(ControlJobEndpointVersion,
requestLogger{log: log, handler: jsonResponder{func() (interface{}, error) {
return version.NewZreplVersionInformation(), nil
}}})
mux.Handle(ControlJobEndpointStatus,
requestLogger{log: log, handler: jsonResponder{func() (interface{}, error) {
s := j.jobs.status()
return s, nil
}}})
mux.Handle(ControlJobEndpointSignal,
requestLogger{log: log, handler: jsonRequestResponder{func(decoder jsonDecoder) (interface{}, error) {
type reqT struct {
Name string
Op string
}
var req reqT
if decoder(&req) != nil {
return nil, errors.Errorf("decode failed")
}
var err error
switch req.Op {
case "wakeup":
err = j.jobs.wakeup(req.Name)
case "reset":
err = j.jobs.reset(req.Name)
default:
err = fmt.Errorf("operation %q is invalid", req.Op)
}
return struct{}{}, err
}}})
server := http.Server{
Handler: mux,
// control socket is local, 1s timeout should be more than sufficient, even on a loaded system
WriteTimeout: 1*time.Second,
ReadTimeout: 1*time.Second,
}
outer:
for {
served := make(chan error)
go func() {
served <- server.Serve(l)
close(served)
}()
select {
case <-ctx.Done():
log.WithError(ctx.Err()).Info("context done")
server.Shutdown(context.Background())
break outer
case err = <-served:
if err != nil {
log.WithError(err).Error("error serving")
break outer
}
}
}
}
type jsonResponder struct {
producer func() (interface{}, error)
}
func (j jsonResponder) ServeHTTP(w http.ResponseWriter, r *http.Request) {
res, err := j.producer()
if err != nil {
w.WriteHeader(http.StatusInternalServerError)
io.WriteString(w, err.Error())
return
}
var buf bytes.Buffer
err = json.NewEncoder(&buf).Encode(res)
if err != nil {
w.WriteHeader(http.StatusInternalServerError)
io.WriteString(w, err.Error())
} else {
io.Copy(w, &buf)
}
}
type jsonDecoder = func(interface{}) error
type jsonRequestResponder struct {
producer func(decoder jsonDecoder) (interface{}, error)
}
func (j jsonRequestResponder) ServeHTTP(w http.ResponseWriter, r *http.Request) {
var decodeError error
decoder := func(i interface{}) error {
err := json.NewDecoder(r.Body).Decode(&i)
decodeError = err
return err
}
res, producerErr := j.producer(decoder)
//If we had a decode error ignore output of producer and return error
if decodeError != nil {
w.WriteHeader(http.StatusBadRequest)
io.WriteString(w, decodeError.Error())
return
}
if producerErr != nil {
w.WriteHeader(http.StatusInternalServerError)
io.WriteString(w, producerErr.Error())
return
}
var buf bytes.Buffer
encodeErr := json.NewEncoder(&buf).Encode(res)
if encodeErr != nil {
w.WriteHeader(http.StatusInternalServerError)
io.WriteString(w, encodeErr.Error())
} else {
io.Copy(w, &buf)
}
}
type requestLogger struct {
log logger.Logger
handler http.Handler
handlerFunc http.HandlerFunc
}
func (l requestLogger) ServeHTTP(w http.ResponseWriter, r *http.Request) {
log := l.log.WithField("method", r.Method).WithField("url", r.URL)
log.Debug("start")
promControl.requestBegin.WithLabelValues(r.URL.Path).Inc()
defer prometheus.NewTimer(promControl.requestFinished.WithLabelValues(r.URL.Path)).ObserveDuration()
if l.handlerFunc != nil {
l.handlerFunc(w, r)
} else if l.handler != nil {
l.handler.ServeHTTP(w, r)
} else {
log.Error("no handler or handlerFunc configured")
}
log.Debug("finish")
}

223
daemon/daemon.go Normal file
View File

@ -0,0 +1,223 @@
package daemon
import (
"context"
"fmt"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
"github.com/zrepl/zrepl/config"
"github.com/zrepl/zrepl/daemon/job"
"github.com/zrepl/zrepl/daemon/job/reset"
"github.com/zrepl/zrepl/daemon/job/wakeup"
"github.com/zrepl/zrepl/daemon/logging"
"github.com/zrepl/zrepl/logger"
"github.com/zrepl/zrepl/version"
"os"
"os/signal"
"strings"
"sync"
"syscall"
"time"
)
func Run(conf *config.Config) error {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
sigChan := make(chan os.Signal, 1)
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
go func() {
<-sigChan
cancel()
}()
outlets, err := logging.OutletsFromConfig(*conf.Global.Logging)
if err != nil {
return errors.Wrap(err, "cannot build logging from config")
}
confJobs, err := job.JobsFromConfig(conf)
if err != nil {
return errors.Wrap(err, "cannot build jobs from config")
}
log := logger.NewLogger(outlets, 1*time.Second)
log.Info(version.NewZreplVersionInformation().String())
for _, job := range confJobs {
if IsInternalJobName(job.Name()) {
panic(fmt.Sprintf("internal job name used for config job '%s'", job.Name())) //FIXME
}
}
ctx = job.WithLogger(ctx, log)
jobs := newJobs()
// start control socket
controlJob, err := newControlJob(conf.Global.Control.SockPath, jobs)
if err != nil {
panic(err) // FIXME
}
jobs.start(ctx, controlJob, true)
for i, jc := range conf.Global.Monitoring {
var (
job job.Job
err error
)
switch v := jc.Ret.(type) {
case *config.PrometheusMonitoring:
job, err = newPrometheusJobFromConfig(v)
default:
return errors.Errorf("unknown monitoring job #%d (type %T)", i, v)
}
if err != nil {
return errors.Wrapf(err,"cannot build monitorin gjob #%d", i)
}
jobs.start(ctx, job, true)
}
log.Info("starting daemon")
// start regular jobs
for _, j := range confJobs {
jobs.start(ctx, j, false)
}
select {
case <-jobs.wait():
log.Info("all jobs finished")
case <-ctx.Done():
log.WithError(ctx.Err()).Info("context finished")
}
log.Info("daemon exiting")
return nil
}
type jobs struct {
wg sync.WaitGroup
// m protects all fields below it
m sync.RWMutex
wakeups map[string]wakeup.Func // by Job.Name
resets map[string]reset.Func // by Job.Name
jobs map[string]job.Job
}
func newJobs() *jobs {
return &jobs{
wakeups: make(map[string]wakeup.Func),
resets: make(map[string]reset.Func),
jobs: make(map[string]job.Job),
}
}
const (
logJobField string = "job"
logTaskField string = "task"
logSubsysField string = "subsystem"
)
func (s *jobs) wait() <-chan struct{} {
ch := make(chan struct{})
go func() {
s.wg.Wait()
}()
return ch
}
func (s *jobs) status() map[string]*job.Status {
s.m.RLock()
defer s.m.RUnlock()
type res struct {
name string
status *job.Status
}
var wg sync.WaitGroup
c := make(chan res, len(s.jobs))
for name, j := range s.jobs {
wg.Add(1)
go func(name string, j job.Job) {
defer wg.Done()
c <- res{name: name, status: j.Status()}
}(name, j)
}
wg.Wait()
close(c)
ret := make(map[string]*job.Status, len(s.jobs))
for res := range c {
ret[res.name] = res.status
}
return ret
}
func (s *jobs) wakeup(job string) error {
s.m.RLock()
defer s.m.RUnlock()
wu, ok := s.wakeups[job]
if !ok {
return errors.Errorf("Job %s does not exist", job)
}
return wu()
}
func (s *jobs) reset(job string) error {
s.m.RLock()
defer s.m.RUnlock()
wu, ok := s.resets[job]
if !ok {
return errors.Errorf("Job %s does not exist", job)
}
return wu()
}
const (
jobNamePrometheus = "_prometheus"
jobNameControl = "_control"
)
func IsInternalJobName(s string) bool {
return strings.HasPrefix(s, "_")
}
func (s *jobs) start(ctx context.Context, j job.Job, internal bool) {
s.m.Lock()
defer s.m.Unlock()
jobLog := job.GetLogger(ctx).
WithField(logJobField, j.Name()).
WithOutlet(newPrometheusLogOutlet(j.Name()), logger.Debug)
jobName := j.Name()
if !internal && IsInternalJobName(jobName) {
panic(fmt.Sprintf("internal job name used for non-internal job %s", jobName))
}
if internal && !IsInternalJobName(jobName) {
panic(fmt.Sprintf("internal job does not use internal job name %s", jobName))
}
if _, ok := s.jobs[jobName]; ok {
panic(fmt.Sprintf("duplicate job name %s", jobName))
}
j.RegisterMetrics(prometheus.DefaultRegisterer)
s.jobs[jobName] = j
ctx = job.WithLogger(ctx, jobLog)
ctx, wakeup := wakeup.Context(ctx)
ctx, resetFunc := reset.Context(ctx)
s.wakeups[jobName] = wakeup
s.resets[jobName] = resetFunc
s.wg.Add(1)
go func() {
defer s.wg.Done()
jobLog.Info("starting job")
defer jobLog.Info("job exited")
j.Run(ctx)
}()
}

View File

@ -1,12 +1,11 @@
package cmd
package filters
import (
"fmt"
"strings"
"github.com/mitchellh/mapstructure"
"github.com/pkg/errors"
"github.com/zrepl/zrepl/endpoint"
"github.com/zrepl/zrepl/zfs"
"strings"
)
type DatasetMapFilter struct {
@ -101,6 +100,7 @@ func (m DatasetMapFilter) mostSpecificPrefixMapping(path *zfs.DatasetPath) (idx
return
}
// Returns target == nil if there is no mapping
func (m DatasetMapFilter) Map(source *zfs.DatasetPath) (target *zfs.DatasetPath, err error) {
if m.filterMode {
@ -114,9 +114,17 @@ func (m DatasetMapFilter) Map(source *zfs.DatasetPath) (target *zfs.DatasetPath,
}
me := m.entries[mi]
if strings.HasPrefix("!", me.mapping) {
// reject mapping
return nil, nil
if me.mapping == "" {
// Special case treatment: 'foo/bar<' => ''
if !me.subtreeMatch {
return nil, fmt.Errorf("mapping to '' must be a subtree match")
}
// ok...
} else {
if strings.HasPrefix("!", me.mapping) {
// reject mapping
return nil, nil
}
}
target, err = zfs.NewDatasetPath(me.mapping)
@ -177,12 +185,43 @@ func (m DatasetMapFilter) InvertedFilter() (inv *DatasetMapFilter, err error) {
return inv, nil
}
// FIXME investigate whether we can support more...
func (m DatasetMapFilter) Invert() (endpoint.FSMap, error) {
if m.filterMode {
return nil, errors.Errorf("can only invert mappings")
}
if len(m.entries) != 1 {
return nil, errors.Errorf("inversion of complicated mappings is not implemented") // FIXME
}
e := m.entries[0]
inv := &DatasetMapFilter{
make([]datasetMapFilterEntry, len(m.entries)),
false,
}
mp, err := zfs.NewDatasetPath(e.mapping)
if err != nil {
return nil, err
}
inv.entries[0] = datasetMapFilterEntry{
path: mp,
mapping: e.path.ToString(),
subtreeMatch: e.subtreeMatch,
}
return inv, nil
}
// Creates a new DatasetMapFilter in filter mode from a mapping
// All accepting mapping results are mapped to accepting filter results
// All rejecting mapping results are mapped to rejecting filter results
func (m DatasetMapFilter) AsFilter() (f *DatasetMapFilter) {
func (m DatasetMapFilter) AsFilter() endpoint.FSFilter {
f = &DatasetMapFilter{
f := &DatasetMapFilter{
make([]datasetMapFilterEntry, len(m.entries)),
true,
}
@ -217,16 +256,14 @@ func (m DatasetMapFilter) parseDatasetFilterResult(result string) (pass bool, er
return false, fmt.Errorf("'%s' is not a valid filter result", result)
}
func parseDatasetMapFilter(mi interface{}, filterMode bool) (f *DatasetMapFilter, err error) {
func DatasetMapFilterFromConfig(in map[string]bool) (f *DatasetMapFilter, err error) {
var m map[string]string
if err = mapstructure.Decode(mi, &m); err != nil {
err = fmt.Errorf("maps / filters must be specified as map[string]string: %s", err)
return
}
f = NewDatasetMapFilter(len(m), filterMode)
for pathPattern, mapping := range m {
f = NewDatasetMapFilter(len(in), true)
for pathPattern, accept := range in {
mapping := MapFilterResultOmit
if accept {
mapping = MapFilterResultOk
}
if err = f.Add(pathPattern, mapping); err != nil {
err = fmt.Errorf("invalid mapping entry ['%s':'%s']: %s", pathPattern, mapping, err)
return

View File

@ -0,0 +1,41 @@
package filters
import (
"github.com/zrepl/zrepl/zfs"
"strings"
)
type AnyFSVFilter struct{}
func NewAnyFSVFilter() AnyFSVFilter {
return AnyFSVFilter{}
}
var _ zfs.FilesystemVersionFilter = AnyFSVFilter{}
func (AnyFSVFilter) Filter(t zfs.VersionType, name string) (accept bool, err error) {
return true, nil
}
type PrefixFilter struct {
prefix string
fstype zfs.VersionType
fstypeSet bool // optionals anyone?
}
var _ zfs.FilesystemVersionFilter = &PrefixFilter{}
func NewPrefixFilter(prefix string) *PrefixFilter {
return &PrefixFilter{prefix: prefix}
}
func NewTypedPrefixFilter(prefix string, versionType zfs.VersionType) *PrefixFilter {
return &PrefixFilter{prefix, versionType, true}
}
func (f *PrefixFilter) Filter(t zfs.VersionType, name string) (accept bool, err error) {
fstypeMatches := (!f.fstypeSet || t == f.fstype)
prefixMatches := strings.HasPrefix(name, f.prefix)
return fstypeMatches && prefixMatches, nil
}

405
daemon/job/active.go Normal file
View File

@ -0,0 +1,405 @@
package job
import (
"context"
"github.com/pkg/errors"
"github.com/problame/go-streamrpc"
"github.com/prometheus/client_golang/prometheus"
"github.com/zrepl/zrepl/config"
"github.com/zrepl/zrepl/daemon/job/reset"
"github.com/zrepl/zrepl/daemon/job/wakeup"
"github.com/zrepl/zrepl/daemon/transport/connecter"
"github.com/zrepl/zrepl/daemon/filters"
"github.com/zrepl/zrepl/daemon/pruner"
"github.com/zrepl/zrepl/endpoint"
"github.com/zrepl/zrepl/replication"
"github.com/zrepl/zrepl/zfs"
"sync"
"github.com/zrepl/zrepl/daemon/logging"
"github.com/zrepl/zrepl/daemon/snapper"
"time"
)
type ActiveSide struct {
mode activeMode
name string
clientFactory *connecter.ClientFactory
prunerFactory *pruner.PrunerFactory
promRepStateSecs *prometheus.HistogramVec // labels: state
promPruneSecs *prometheus.HistogramVec // labels: prune_side
promBytesReplicated *prometheus.CounterVec // labels: filesystem
tasksMtx sync.Mutex
tasks activeSideTasks
}
type activeSideTasks struct {
replication *replication.Replication
prunerSender, prunerReceiver *pruner.Pruner
}
func (a *ActiveSide) updateTasks(u func(*activeSideTasks)) activeSideTasks {
a.tasksMtx.Lock()
defer a.tasksMtx.Unlock()
var copy activeSideTasks
copy = a.tasks
if u == nil {
return copy
}
u(&copy)
a.tasks = copy
return copy
}
type activeMode interface {
SenderReceiver(client *streamrpc.Client) (replication.Sender, replication.Receiver, error)
Type() Type
RunPeriodic(ctx context.Context, wakeUpCommon chan<- struct{})
}
type modePush struct {
fsfilter endpoint.FSFilter
snapper *snapper.PeriodicOrManual
}
func (m *modePush) SenderReceiver(client *streamrpc.Client) (replication.Sender, replication.Receiver, error) {
sender := endpoint.NewSender(m.fsfilter)
receiver := endpoint.NewRemote(client)
return sender, receiver, nil
}
func (m *modePush) Type() Type { return TypePush }
func (m *modePush) RunPeriodic(ctx context.Context, wakeUpCommon chan <- struct{}) {
m.snapper.Run(ctx, wakeUpCommon)
}
func modePushFromConfig(g *config.Global, in *config.PushJob) (*modePush, error) {
m := &modePush{}
fsf, err := filters.DatasetMapFilterFromConfig(in.Filesystems)
if err != nil {
return nil, errors.Wrap(err, "cannnot build filesystem filter")
}
m.fsfilter = fsf
if m.snapper, err = snapper.FromConfig(g, fsf, in.Snapshotting); err != nil {
return nil, errors.Wrap(err, "cannot build snapper")
}
return m, nil
}
type modePull struct {
rootFS *zfs.DatasetPath
interval time.Duration
}
func (m *modePull) SenderReceiver(client *streamrpc.Client) (replication.Sender, replication.Receiver, error) {
sender := endpoint.NewRemote(client)
receiver, err := endpoint.NewReceiver(m.rootFS)
return sender, receiver, err
}
func (*modePull) Type() Type { return TypePull }
func (m *modePull) RunPeriodic(ctx context.Context, wakeUpCommon chan<- struct{}) {
t := time.NewTicker(m.interval)
defer t.Stop()
for {
select {
case <-t.C:
select {
case wakeUpCommon <- struct{}{}:
default:
GetLogger(ctx).
WithField("pull_interval", m.interval).
Warn("pull job took longer than pull interval")
wakeUpCommon <- struct{}{} // block anyways, to queue up the wakeup
}
case <-ctx.Done():
return
}
}
}
func modePullFromConfig(g *config.Global, in *config.PullJob) (m *modePull, err error) {
m = &modePull{}
if in.Interval <= 0 {
return nil, errors.New("interval must be positive")
}
m.interval = in.Interval
m.rootFS, err = zfs.NewDatasetPath(in.RootFS)
if err != nil {
return nil, errors.New("RootFS is not a valid zfs filesystem path")
}
if m.rootFS.Length() <= 0 {
return nil, errors.New("RootFS must not be empty") // duplicates error check of receiver
}
return m, nil
}
func activeSide(g *config.Global, in *config.ActiveJob, mode activeMode) (j *ActiveSide, err error) {
j = &ActiveSide{mode: mode}
j.name = in.Name
j.promRepStateSecs = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: "zrepl",
Subsystem: "replication",
Name: "state_time",
Help: "seconds spent during replication",
ConstLabels: prometheus.Labels{"zrepl_job":j.name},
}, []string{"state"})
j.promBytesReplicated = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: "zrepl",
Subsystem: "replication",
Name: "bytes_replicated",
Help: "number of bytes replicated from sender to receiver per filesystem",
ConstLabels: prometheus.Labels{"zrepl_job":j.name},
}, []string{"filesystem"})
j.clientFactory, err = connecter.FromConfig(g, in.Connect)
if err != nil {
return nil, errors.Wrap(err, "cannot build client")
}
j.promPruneSecs = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: "zrepl",
Subsystem: "pruning",
Name: "time",
Help: "seconds spent in pruner",
ConstLabels: prometheus.Labels{"zrepl_job":j.name},
}, []string{"prune_side"})
j.prunerFactory, err = pruner.NewPrunerFactory(in.Pruning, j.promPruneSecs)
if err != nil {
return nil, err
}
return j, nil
}
func (j *ActiveSide) RegisterMetrics(registerer prometheus.Registerer) {
registerer.MustRegister(j.promRepStateSecs)
registerer.MustRegister(j.promPruneSecs)
registerer.MustRegister(j.promBytesReplicated)
}
func (j *ActiveSide) Name() string { return j.name }
type ActiveSideStatus struct {
Replication *replication.Report
PruningSender, PruningReceiver *pruner.Report
}
func (j *ActiveSide) Status() *Status {
tasks := j.updateTasks(nil)
s := &ActiveSideStatus{}
t := j.mode.Type()
if tasks.replication != nil {
s.Replication = tasks.replication.Report()
}
if tasks.prunerSender != nil {
s.PruningSender = tasks.prunerSender.Report()
}
if tasks.prunerReceiver != nil {
s.PruningReceiver = tasks.prunerReceiver.Report()
}
return &Status{Type: t, JobSpecific: s}
}
func (j *ActiveSide) Run(ctx context.Context) {
log := GetLogger(ctx)
ctx = logging.WithSubsystemLoggers(ctx, log)
defer log.Info("job exiting")
periodicDone := make(chan struct{})
ctx, cancel := context.WithCancel(ctx)
defer cancel()
go j.mode.RunPeriodic(ctx, periodicDone)
invocationCount := 0
outer:
for {
log.Info("wait for wakeups")
select {
case <-ctx.Done():
log.WithError(ctx.Err()).Info("context")
break outer
case <-wakeup.Wait(ctx):
case <-periodicDone:
}
invocationCount++
invLog := log.WithField("invocation", invocationCount)
j.do(WithLogger(ctx, invLog), periodicDone)
}
}
func (j *ActiveSide) do(ctx context.Context, periodicWakeup <-chan struct{}) {
log := GetLogger(ctx)
ctx = logging.WithSubsystemLoggers(ctx, log)
// allow cancellation of an invocation (this function)
ctx, cancelThisRun := context.WithCancel(ctx)
defer cancelThisRun()
runDone := make(chan struct{})
defer close(runDone)
go func() {
select {
case <-runDone:
case <-reset.Wait(ctx):
log.Info("reset received, cancelling current invocation")
cancelThisRun()
case <-ctx.Done():
}
}()
client, err := j.clientFactory.NewClient()
if err != nil {
log.WithError(err).Error("factory cannot instantiate streamrpc client")
}
defer client.Close(ctx)
sender, receiver, err := j.mode.SenderReceiver(client)
tasks := j.updateTasks(func(tasks *activeSideTasks) {
// reset it
*tasks = activeSideTasks{}
tasks.replication = replication.NewReplication(j.promRepStateSecs, j.promBytesReplicated)
})
log.Info("start replication")
replicationDone := make(chan struct{})
replicationCtx, replicationCancel := context.WithCancel(ctx)
defer replicationCancel()
go func() {
tasks.replication.Drive(replicationCtx, sender, receiver)
close(replicationDone)
}()
outer:
for {
select {
case <-replicationDone:
// fine!
break outer
case <-periodicWakeup:
// Replication took longer than the periodic interval.
//
// For pull jobs, this isn't so bad because nothing changes on the active side
// if replication doesn't go forward.
//
// For push jobs, this means snapshots were taken.
// We need to invoke the pruner now, because otherwise an infinitely stuck replication
// will cause this side to fill up with snapshots.
//
// However, there are cases where replication progresses and just takes longer,
// and we don't want these situations be interrupted by a prune, which will require
// re-planning and starting over (think of initial replication as an example).
//
// Therefore, we prohibit pruning of snapshots that are part of the current replication plan.
// If there is no such plan, we kill the replication.
if j.mode.Type() == TypePush {
rep := tasks.replication.Report()
state, err := replication.StateString(rep.Status)
if err != nil {
panic(err)
}
switch state {
case replication.Planning:
fallthrough
case replication.PlanningError:
fallthrough
case replication.WorkingWait:
log.WithField("repl_state", state.String()).
Info("cancelling replication after new snapshots invalidated its current state")
replicationCancel()
log.Info("waiting for replication to stop")
<-replicationDone // no need to wait for ctx.Done, replication is already bound to global cancel
break outer
default:
log.WithField("repl_state", state.String()).
Warn("new snapshots while replication is running and making progress")
}
}
}
}
var pruningWg sync.WaitGroup
log.Info("start pruning sender")
pruningWg.Add(1)
go func() {
defer pruningWg.Done()
tasks := j.updateTasks(func(tasks *activeSideTasks) {
tasks.prunerSender = j.prunerFactory.BuildSenderPruner(ctx, sender, sender)
})
tasks.prunerSender.Prune()
// FIXME no need to do the cancellation dance with sender, we know it's local for push
// FIXME and we don't worry about pull ATM
}()
log.Info("start pruning receiver")
pruningWg.Add(1)
go func() {
defer pruningWg.Done()
receiverPrunerCtx, receiverPrunerCancel := context.WithCancel(ctx)
defer receiverPrunerCancel()
tasks := j.updateTasks(func(tasks *activeSideTasks) {
tasks.prunerReceiver = j.prunerFactory.BuildReceiverPruner(receiverPrunerCtx, receiver, sender)
})
receiverPrunerDone := make(chan struct{})
go func() {
defer close(receiverPrunerDone)
tasks.prunerReceiver.Prune()
}()
outer:
for {
select {
case <-receiverPrunerDone:
// fine!
break outer
case <-periodicWakeup:
// see comments for similar apporach with replication above
if j.mode.Type() == TypePush {
rep := tasks.prunerReceiver.Report()
state, err := pruner.StateString(rep.State)
if err != nil {
panic(err)
}
switch state {
case pruner.PlanWait:
fallthrough
case pruner.ExecWait:
log.WithField("pruner_state", state.String()).
Info("cancelling failing prune on receiver because new snapshots were taken on sender")
receiverPrunerCancel()
log.Info("waiting for receiver pruner to stop")
<-receiverPrunerDone
break outer
default:
log.WithField("pruner_state", state.String()).
Warn("new snapshots while prune on receiver is still running")
}
}
}
}
}()
pruningWg.Wait() // if pruners handle ctx cancellation correctly, we don't need to wait for it here
}

68
daemon/job/build_jobs.go Normal file
View File

@ -0,0 +1,68 @@
package job
import (
"fmt"
"github.com/pkg/errors"
"github.com/zrepl/zrepl/config"
)
func JobsFromConfig(c *config.Config) ([]Job, error) {
js := make([]Job, len(c.Jobs))
for i := range c.Jobs {
j, err := buildJob(c.Global, c.Jobs[i])
if err != nil {
return nil, err
}
js[i] = j
}
return js, nil
}
func buildJob(c *config.Global, in config.JobEnum) (j Job, err error) {
cannotBuildJob := func(e error, name string) (Job, error) {
return nil, errors.Wrapf(err, "cannot build job %q", name)
}
// FIXME prettify this
switch v := in.Ret.(type) {
case *config.SinkJob:
m, err := modeSinkFromConfig(c, v)
if err != nil {
return cannotBuildJob(err, v.Name)
}
j, err = passiveSideFromConfig(c, &v.PassiveJob, m)
if err != nil {
return cannotBuildJob(err, v.Name)
}
case *config.SourceJob:
m, err := modeSourceFromConfig(c, v)
if err != nil {
return cannotBuildJob(err, v.Name)
}
j, err = passiveSideFromConfig(c, &v.PassiveJob, m)
if err != nil {
return cannotBuildJob(err, v.Name)
}
case *config.PushJob:
m, err := modePushFromConfig(c, v)
if err != nil {
return cannotBuildJob(err, v.Name)
}
j, err = activeSide(c, &v.ActiveJob, m)
if err != nil {
return cannotBuildJob(err, v.Name)
}
case *config.PullJob:
m, err := modePullFromConfig(c, v)
if err != nil {
return cannotBuildJob(err, v.Name)
}
j, err = activeSide(c, &v.ActiveJob, m)
if err != nil {
return cannotBuildJob(err, v.Name)
}
default:
panic(fmt.Sprintf("implementation error: unknown job type %T", v))
}
return j, nil
}

103
daemon/job/job.go Normal file
View File

@ -0,0 +1,103 @@
package job
import (
"context"
"encoding/json"
"fmt"
"github.com/prometheus/client_golang/prometheus"
"github.com/zrepl/zrepl/logger"
)
type Logger = logger.Logger
type contextKey int
const (
contextKeyLog contextKey = iota
)
func GetLogger(ctx context.Context) Logger {
if l, ok := ctx.Value(contextKeyLog).(Logger); ok {
return l
}
return logger.NewNullLogger()
}
func WithLogger(ctx context.Context, l Logger) context.Context {
return context.WithValue(ctx, contextKeyLog, l)
}
type Job interface {
Name() string
Run(ctx context.Context)
Status() *Status
RegisterMetrics(registerer prometheus.Registerer)
}
type Type string
const (
TypeInternal Type = "internal"
TypePush Type = "push"
TypeSink Type = "sink"
TypePull Type = "pull"
TypeSource Type = "source"
)
type Status struct {
Type Type
JobSpecific interface{}
}
func (s *Status) MarshalJSON() ([]byte, error) {
typeJson, err := json.Marshal(s.Type)
if err != nil {
return nil, err
}
jobJSON, err := json.Marshal(s.JobSpecific)
if err != nil {
return nil, err
}
m := map[string]json.RawMessage {
"type": typeJson,
string(s.Type): jobJSON,
}
return json.Marshal(m)
}
func (s *Status) UnmarshalJSON(in []byte) (err error) {
var m map[string]json.RawMessage
if err := json.Unmarshal(in, &m); err != nil {
return err
}
tJSON, ok := m["type"]
if !ok {
return fmt.Errorf("field 'type' not found")
}
if err := json.Unmarshal(tJSON, &s.Type); err != nil {
return err
}
key := string(s.Type)
jobJSON, ok := m[key]
if !ok {
return fmt.Errorf("field '%s', not found", key)
}
switch s.Type {
case TypePull: fallthrough
case TypePush:
var st ActiveSideStatus
err = json.Unmarshal(jobJSON, &st)
s.JobSpecific = &st
case TypeSource: fallthrough
case TypeSink:
var st PassiveStatus
err = json.Unmarshal(jobJSON, &st)
s.JobSpecific = &st
case TypeInternal:
// internal jobs do not report specifics
default:
err = fmt.Errorf("unknown job type '%s'", key)
}
return err
}

196
daemon/job/passive.go Normal file
View File

@ -0,0 +1,196 @@
package job
import (
"context"
"github.com/pkg/errors"
"github.com/problame/go-streamrpc"
"github.com/prometheus/client_golang/prometheus"
"github.com/zrepl/zrepl/config"
"github.com/zrepl/zrepl/daemon/filters"
"github.com/zrepl/zrepl/daemon/logging"
"github.com/zrepl/zrepl/daemon/transport/serve"
"github.com/zrepl/zrepl/daemon/snapper"
"github.com/zrepl/zrepl/endpoint"
"github.com/zrepl/zrepl/zfs"
"path"
)
type PassiveSide struct {
mode passiveMode
name string
l serve.ListenerFactory
rpcConf *streamrpc.ConnConfig
}
type passiveMode interface {
ConnHandleFunc(ctx context.Context, conn serve.AuthenticatedConn) streamrpc.HandlerFunc
RunPeriodic(ctx context.Context)
Type() Type
}
type modeSink struct {
rootDataset *zfs.DatasetPath
}
func (m *modeSink) Type() Type { return TypeSink }
func (m *modeSink) ConnHandleFunc(ctx context.Context, conn serve.AuthenticatedConn) streamrpc.HandlerFunc {
log := GetLogger(ctx)
clientRootStr := path.Join(m.rootDataset.ToString(), conn.ClientIdentity())
clientRoot, err := zfs.NewDatasetPath(clientRootStr)
if err != nil {
log.WithError(err).
WithField("client_identity", conn.ClientIdentity()).
Error("cannot build client filesystem map (client identity must be a valid ZFS FS name")
}
log.WithField("client_root", clientRoot).Debug("client root")
local, err := endpoint.NewReceiver(clientRoot)
if err != nil {
log.WithError(err).Error("unexpected error: cannot convert mapping to filter")
return nil
}
h := endpoint.NewHandler(local)
return h.Handle
}
func (m *modeSink) RunPeriodic(_ context.Context) {}
func modeSinkFromConfig(g *config.Global, in *config.SinkJob) (m *modeSink, err error) {
m = &modeSink{}
m.rootDataset, err = zfs.NewDatasetPath(in.RootFS)
if err != nil {
return nil, errors.New("root dataset is not a valid zfs filesystem path")
}
if m.rootDataset.Length() <= 0 {
return nil, errors.New("root dataset must not be empty") // duplicates error check of receiver
}
return m, nil
}
type modeSource struct {
fsfilter zfs.DatasetFilter
snapper *snapper.PeriodicOrManual
}
func modeSourceFromConfig(g *config.Global, in *config.SourceJob) (m *modeSource, err error) {
// FIXME exact dedup of modePush
m = &modeSource{}
fsf, err := filters.DatasetMapFilterFromConfig(in.Filesystems)
if err != nil {
return nil, errors.Wrap(err, "cannnot build filesystem filter")
}
m.fsfilter = fsf
if m.snapper, err = snapper.FromConfig(g, fsf, in.Snapshotting); err != nil {
return nil, errors.Wrap(err, "cannot build snapper")
}
return m, nil
}
func (m *modeSource) Type() Type { return TypeSource }
func (m *modeSource) ConnHandleFunc(ctx context.Context, conn serve.AuthenticatedConn) streamrpc.HandlerFunc {
sender := endpoint.NewSender(m.fsfilter)
h := endpoint.NewHandler(sender)
return h.Handle
}
func (m *modeSource) RunPeriodic(ctx context.Context) {
m.snapper.Run(ctx, nil)
}
func passiveSideFromConfig(g *config.Global, in *config.PassiveJob, mode passiveMode) (s *PassiveSide, err error) {
s = &PassiveSide{mode: mode, name: in.Name}
if s.l, s.rpcConf, err = serve.FromConfig(g, in.Serve); err != nil {
return nil, errors.Wrap(err, "cannot build server")
}
return s, nil
}
func (j *PassiveSide) Name() string { return j.name }
type PassiveStatus struct {}
func (s *PassiveSide) Status() *Status {
return &Status{Type: s.mode.Type()} // FIXME PassiveStatus
}
func (*PassiveSide) RegisterMetrics(registerer prometheus.Registerer) {}
func (j *PassiveSide) Run(ctx context.Context) {
log := GetLogger(ctx)
defer log.Info("job exiting")
l, err := j.l.Listen()
if err != nil {
log.WithError(err).Error("cannot listen")
return
}
defer l.Close()
{
ctx, cancel := context.WithCancel(logging.WithSubsystemLoggers(ctx, log)) // shadowing
defer cancel()
go j.mode.RunPeriodic(ctx)
}
log.WithField("addr", l.Addr()).Debug("accepting connections")
var connId int
outer:
for {
select {
case res := <-accept(ctx, l):
if res.err != nil {
log.WithError(res.err).Info("accept error")
continue
}
conn := res.conn
connId++
connLog := log.
WithField("connID", connId)
connLog.
WithField("addr", conn.RemoteAddr()).
WithField("client_identity", conn.ClientIdentity()).
Info("handling connection")
go func() {
defer connLog.Info("finished handling connection")
defer conn.Close()
ctx := logging.WithSubsystemLoggers(ctx, connLog)
handleFunc := j.mode.ConnHandleFunc(ctx, conn)
if handleFunc == nil {
return
}
if err := streamrpc.ServeConn(ctx, conn, j.rpcConf, handleFunc); err != nil {
log.WithError(err).Error("error serving client")
}
}()
case <-ctx.Done():
break outer
}
}
}
type acceptResult struct {
conn serve.AuthenticatedConn
err error
}
func accept(ctx context.Context, listener serve.AuthenticatedListener) <-chan acceptResult {
c := make(chan acceptResult, 1)
go func() {
conn, err := listener.Accept(ctx)
c <- acceptResult{conn, err}
}()
return c
}

35
daemon/job/reset/reset.go Normal file
View File

@ -0,0 +1,35 @@
package reset
import (
"context"
"errors"
)
type contextKey int
const contextKeyReset contextKey = iota
func Wait(ctx context.Context) <-chan struct{} {
wc, ok := ctx.Value(contextKeyReset).(chan struct{})
if !ok {
wc = make(chan struct{})
}
return wc
}
type Func func() error
var AlreadyReset = errors.New("already reset")
func Context(ctx context.Context) (context.Context, Func) {
wc := make(chan struct{})
wuf := func() error {
select {
case wc <- struct{}{}:
return nil
default:
return AlreadyReset
}
}
return context.WithValue(ctx, contextKeyReset, wc), wuf
}

View File

@ -0,0 +1,35 @@
package wakeup
import (
"context"
"errors"
)
type contextKey int
const contextKeyWakeup contextKey = iota
func Wait(ctx context.Context) <-chan struct{} {
wc, ok := ctx.Value(contextKeyWakeup).(chan struct{})
if !ok {
wc = make(chan struct{})
}
return wc
}
type Func func() error
var AlreadyWokenUp = errors.New("already woken up")
func Context(ctx context.Context) (context.Context, Func) {
wc := make(chan struct{})
wuf := func() error {
select {
case wc <- struct{}{}:
return nil
default:
return AlreadyWokenUp
}
}
return context.WithValue(ctx, contextKeyWakeup, wc), wuf
}

View File

@ -0,0 +1,32 @@
package logging
import (
"fmt"
"github.com/problame/go-streamrpc"
"github.com/zrepl/zrepl/logger"
"strings"
)
type streamrpcLogAdaptor = twoClassLogAdaptor
type twoClassLogAdaptor struct {
logger.Logger
}
var _ streamrpc.Logger = twoClassLogAdaptor{}
func (a twoClassLogAdaptor) Errorf(fmtStr string, args ...interface{}) {
const errorSuffix = ": %s"
if len(args) == 1 {
if err, ok := args[0].(error); ok && strings.HasSuffix(fmtStr, errorSuffix) {
msg := strings.TrimSuffix(fmtStr, errorSuffix)
a.WithError(err).Error(msg)
return
}
}
a.Logger.Error(fmt.Sprintf(fmtStr, args...))
}
func (a twoClassLogAdaptor) Infof(fmtStr string, args ...interface{}) {
a.Logger.Debug(fmt.Sprintf(fmtStr, args...))
}

View File

@ -0,0 +1,205 @@
package logging
import (
"context"
"crypto/tls"
"crypto/x509"
"github.com/mattn/go-isatty"
"github.com/pkg/errors"
"github.com/problame/go-streamrpc"
"github.com/zrepl/zrepl/config"
"github.com/zrepl/zrepl/daemon/pruner"
"github.com/zrepl/zrepl/endpoint"
"github.com/zrepl/zrepl/logger"
"github.com/zrepl/zrepl/replication"
"github.com/zrepl/zrepl/tlsconf"
"os"
"github.com/zrepl/zrepl/daemon/snapper"
"github.com/zrepl/zrepl/daemon/transport/serve"
)
func OutletsFromConfig(in config.LoggingOutletEnumList) (*logger.Outlets, error) {
outlets := logger.NewOutlets()
if len(in) == 0 {
// Default config
out := WriterOutlet{&HumanFormatter{}, os.Stdout}
outlets.Add(out, logger.Warn)
return outlets, nil
}
var syslogOutlets, stdoutOutlets int
for lei, le := range in {
outlet, minLevel, err := parseOutlet(le)
if err != nil {
return nil, errors.Wrapf(err, "cannot parse outlet #%d", lei)
}
var _ logger.Outlet = WriterOutlet{}
var _ logger.Outlet = &SyslogOutlet{}
switch outlet.(type) {
case *SyslogOutlet:
syslogOutlets++
case WriterOutlet:
stdoutOutlets++
}
outlets.Add(outlet, minLevel)
}
if syslogOutlets > 1 {
return nil, errors.Errorf("can only define one 'syslog' outlet")
}
if stdoutOutlets > 1 {
return nil, errors.Errorf("can only define one 'stdout' outlet")
}
return outlets, nil
}
const (
SubsysReplication = "repl"
SubsysStreamrpc = "rpc"
SubsyEndpoint = "endpoint"
)
func WithSubsystemLoggers(ctx context.Context, log logger.Logger) context.Context {
ctx = replication.WithLogger(ctx, log.WithField(SubsysField, "repl"))
ctx = streamrpc.ContextWithLogger(ctx, streamrpcLogAdaptor{log.WithField(SubsysField, "rpc")})
ctx = endpoint.WithLogger(ctx, log.WithField(SubsysField, "endpoint"))
ctx = pruner.WithLogger(ctx, log.WithField(SubsysField, "pruning"))
ctx = snapper.WithLogger(ctx, log.WithField(SubsysField, "snapshot"))
ctx = serve.WithLogger(ctx, log.WithField(SubsysField, "serve"))
return ctx
}
func parseLogFormat(i interface{}) (f EntryFormatter, err error) {
var is string
switch j := i.(type) {
case string:
is = j
default:
return nil, errors.Errorf("invalid log format: wrong type: %T", i)
}
switch is {
case "human":
return &HumanFormatter{}, nil
case "logfmt":
return &LogfmtFormatter{}, nil
case "json":
return &JSONFormatter{}, nil
default:
return nil, errors.Errorf("invalid log format: '%s'", is)
}
}
func parseOutlet(in config.LoggingOutletEnum) (o logger.Outlet, level logger.Level, err error) {
parseCommon := func(common config.LoggingOutletCommon) (logger.Level, EntryFormatter, error) {
if common.Level == "" || common.Format == "" {
return 0, nil, errors.Errorf("must specify 'level' and 'format' field")
}
minLevel, err := logger.ParseLevel(common.Level)
if err != nil {
return 0, nil, errors.Wrap(err, "cannot parse 'level' field")
}
formatter, err := parseLogFormat(common.Format)
if err != nil {
return 0, nil, errors.Wrap(err, "cannot parse 'formatter' field")
}
return minLevel, formatter, nil
}
var f EntryFormatter
switch v := in.Ret.(type) {
case *config.StdoutLoggingOutlet:
level, f, err = parseCommon(v.LoggingOutletCommon)
if err != nil {
break
}
o, err = parseStdoutOutlet(v, f)
case *config.TCPLoggingOutlet:
level, f, err = parseCommon(v.LoggingOutletCommon)
if err != nil {
break
}
o, err = parseTCPOutlet(v, f)
case *config.SyslogLoggingOutlet:
level, f, err = parseCommon(v.LoggingOutletCommon)
if err != nil {
break
}
o, err = parseSyslogOutlet(v, f)
default:
panic(v)
}
return o, level, err
}
func parseStdoutOutlet(in *config.StdoutLoggingOutlet, formatter EntryFormatter) (WriterOutlet, error) {
flags := MetadataAll
writer := os.Stdout
if !isatty.IsTerminal(writer.Fd()) && !in.Time {
flags &= ^MetadataTime
}
if isatty.IsTerminal(writer.Fd()) && !in.Color {
flags &= ^MetadataColor
}
formatter.SetMetadataFlags(flags)
return WriterOutlet{
formatter,
os.Stdout,
}, nil
}
func parseTCPOutlet(in *config.TCPLoggingOutlet, formatter EntryFormatter) (out *TCPOutlet, err error) {
var tlsConfig *tls.Config
if in.TLS != nil {
tlsConfig, err = func(m *config.TCPLoggingOutletTLS, host string) (*tls.Config, error) {
clientCert, err := tls.LoadX509KeyPair(m.Cert, m.Key)
if err != nil {
return nil, errors.Wrap(err, "cannot load client cert")
}
var rootCAs *x509.CertPool
if m.CA == "" {
if rootCAs, err = x509.SystemCertPool(); err != nil {
return nil, errors.Wrap(err, "cannot open system cert pool")
}
} else {
rootCAs, err = tlsconf.ParseCAFile(m.CA)
if err != nil {
return nil, errors.Wrap(err, "cannot parse CA cert")
}
}
if rootCAs == nil {
panic("invariant violated")
}
return tlsconf.ClientAuthClient(host, rootCAs, clientCert)
}(in.TLS, in.Address)
if err != nil {
return nil, errors.New("cannot not parse TLS config in field 'tls'")
}
}
formatter.SetMetadataFlags(MetadataAll)
return NewTCPOutlet(formatter, in.Net, in.Address, tlsConfig, in.RetryInterval), nil
}
func parseSyslogOutlet(in *config.SyslogLoggingOutlet, formatter EntryFormatter) (out *SyslogOutlet, err error) {
out = &SyslogOutlet{}
out.Formatter = formatter
out.Formatter.SetMetadataFlags(MetadataNone)
out.RetryInterval = in.RetryInterval
return out, nil
}

View File

@ -1,20 +1,16 @@
package cmd
package logging
import (
"bytes"
"encoding/json"
"fmt"
"github.com/fatih/color"
"github.com/go-logfmt/logfmt"
"github.com/pkg/errors"
"github.com/zrepl/zrepl/logger"
"time"
)
type EntryFormatter interface {
SetMetadataFlags(flags MetadataFlags)
Format(e *logger.Entry) ([]byte, error)
}
const (
FieldLevel = "level"
FieldMessage = "msg"
@ -22,13 +18,19 @@ const (
)
const (
logJobField string = "job"
logTaskField string = "task"
logFSField string = "filesystem"
logMapFromField string = "map_from"
logMapToField string = "map_to"
logIncFromField string = "inc_from"
logIncToField string = "inc_to"
JobField string = "job"
SubsysField string = "subsystem"
)
type MetadataFlags int64
const (
MetadataTime MetadataFlags = 1 << iota
MetadataLevel
MetadataColor
MetadataNone MetadataFlags = 0
MetadataAll MetadataFlags = ^0
)
type NoFormatter struct{}
@ -69,39 +71,29 @@ func (f *HumanFormatter) ignored(field string) bool {
func (f *HumanFormatter) Format(e *logger.Entry) (out []byte, err error) {
var line bytes.Buffer
col := color.New()
if f.metadataFlags&MetadataColor != 0 {
col = e.Color()
}
if f.metadataFlags&MetadataTime != 0 {
fmt.Fprintf(&line, "%s ", e.Time.Format(HumanFormatterDateFormat))
}
if f.metadataFlags&MetadataLevel != 0 {
fmt.Fprintf(&line, "[%s]", e.Level.Short())
fmt.Fprintf(&line, "[%s]", col.Sprint(e.Level.Short()))
}
prefixFields := []string{logJobField, logTaskField, logFSField}
prefixFields := []string{JobField, SubsysField}
prefixed := make(map[string]bool, len(prefixFields)+2)
for _, field := range prefixFields {
val, ok := e.Fields[field].(string)
if ok {
if !f.ignored(field) {
fmt.Fprintf(&line, "[%s]", val)
prefixed[field] = true
}
} else {
break
if !ok {
continue
}
if !f.ignored(field) {
fmt.Fprintf(&line, "[%s]", col.Sprint(val))
prefixed[field] = true
}
}
// even more prefix fields
mapFrom, mapFromOk := e.Fields[logMapFromField].(string)
mapTo, mapToOk := e.Fields[logMapToField].(string)
if mapFromOk && mapToOk && !f.ignored(logMapFromField) && !f.ignored(logMapToField) {
fmt.Fprintf(&line, "[%s => %s]", mapFrom, mapTo)
prefixed[logMapFromField], prefixed[logMapToField] = true, true
}
incFrom, incFromOk := e.Fields[logIncFromField].(string)
incTo, incToOk := e.Fields[logIncToField].(string)
if incFromOk && incToOk && !f.ignored(logIncFromField) && !f.ignored(logMapToField) {
fmt.Fprintf(&line, "[%s => %s]", incFrom, incTo)
prefixed[logIncFromField], prefixed[logIncToField] = true, true
}
if line.Len() > 0 {
@ -110,15 +102,11 @@ func (f *HumanFormatter) Format(e *logger.Entry) (out []byte, err error) {
fmt.Fprint(&line, e.Message)
if len(e.Fields)-len(prefixed) > 0 {
fmt.Fprint(&line, " ")
enc := logfmt.NewEncoder(&line)
for field, value := range e.Fields {
if prefixed[field] || f.ignored(field) {
continue
}
if err := logfmtTryEncodeKeyval(enc, field, value); err != nil {
return nil, err
}
fmt.Fprintf(&line, " %s=%q", col.Sprint(field), fmt.Sprint(value))
}
}
@ -179,7 +167,7 @@ func (f *LogfmtFormatter) Format(e *logger.Entry) ([]byte, error) {
// at least try and put job and task in front
prefixed := make(map[string]bool, 2)
prefix := []string{logJobField, logTaskField}
prefix := []string{JobField, SubsysField}
for _, pf := range prefix {
v, ok := e.Fields[pf]
if !ok {

View File

@ -1,4 +1,4 @@
package cmd
package logging
import (
"bytes"
@ -12,18 +12,23 @@ import (
"time"
)
type EntryFormatter interface {
SetMetadataFlags(flags MetadataFlags)
Format(e *logger.Entry) ([]byte, error)
}
type WriterOutlet struct {
Formatter EntryFormatter
Writer io.Writer
formatter EntryFormatter
writer io.Writer
}
func (h WriterOutlet) WriteEntry(entry logger.Entry) error {
bytes, err := h.Formatter.Format(&entry)
bytes, err := h.formatter.Format(&entry)
if err != nil {
return err
}
_, err = h.Writer.Write(bytes)
h.Writer.Write([]byte("\n"))
_, err = h.writer.Write(bytes)
h.writer.Write([]byte("\n"))
return err
}

16
daemon/main.go Normal file
View File

@ -0,0 +1,16 @@
package daemon
import (
"github.com/zrepl/zrepl/cli"
"github.com/zrepl/zrepl/logger"
)
type Logger = logger.Logger
var DaemonCmd = &cli.Subcommand {
Use: "daemon",
Short: "run the zrepl daemon",
Run: func(subcommand *cli.Subcommand, args []string) error {
return Run(subcommand.Config())
},
}

View File

@ -1,4 +1,4 @@
package cmd
package nethelpers
import (
"github.com/pkg/errors"

View File

@ -1,4 +1,4 @@
package cmd
package daemon
import (
"net/http"
@ -9,7 +9,7 @@ import (
"net/http/pprof"
)
type PProfServer struct {
type pprofServer struct {
cc chan PprofServerControlMsg
state PprofServerControlMsg
listener net.Listener
@ -22,9 +22,9 @@ type PprofServerControlMsg struct {
HttpListenAddress string
}
func NewPProfServer(ctx context.Context) *PProfServer {
func NewPProfServer(ctx context.Context) *pprofServer {
s := &PProfServer{
s := &pprofServer{
cc: make(chan PprofServerControlMsg),
}
@ -32,7 +32,7 @@ func NewPProfServer(ctx context.Context) *PProfServer {
return s
}
func (s *PProfServer) controlLoop(ctx context.Context) {
func (s *pprofServer) controlLoop(ctx context.Context) {
outer:
for {
@ -75,6 +75,6 @@ outer:
}
}
func (s *PProfServer) Control(msg PprofServerControlMsg) {
func (s *pprofServer) Control(msg PprofServerControlMsg) {
s.cc <- msg
}

89
daemon/prometheus.go Normal file
View File

@ -0,0 +1,89 @@
package daemon
import (
"context"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/zrepl/zrepl/config"
"github.com/zrepl/zrepl/daemon/job"
"github.com/zrepl/zrepl/logger"
"github.com/zrepl/zrepl/zfs"
"net"
"net/http"
)
type prometheusJob struct {
listen string
}
func newPrometheusJobFromConfig(in *config.PrometheusMonitoring) (*prometheusJob, error) {
if _, _, err := net.SplitHostPort(in.Listen); err != nil {
return nil, err
}
return &prometheusJob{in.Listen}, nil
}
var prom struct {
taskLogEntries *prometheus.CounterVec
}
func init() {
prom.taskLogEntries = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: "zrepl",
Subsystem: "daemon",
Name: "log_entries",
Help: "number of log entries per job task and level",
}, []string{"zrepl_job", "level"})
prometheus.MustRegister(prom.taskLogEntries)
}
func (j *prometheusJob) Name() string { return jobNamePrometheus }
func (j *prometheusJob) Status() *job.Status { return &job.Status{Type: job.TypeInternal} }
func (j *prometheusJob) RegisterMetrics(registerer prometheus.Registerer) {}
func (j *prometheusJob) Run(ctx context.Context) {
if err := zfs.PrometheusRegister(prometheus.DefaultRegisterer); err != nil {
panic(err)
}
log := job.GetLogger(ctx)
l, err := net.Listen("tcp", j.listen)
if err != nil {
log.WithError(err).Error("cannot listen")
}
go func() {
select {
case <-ctx.Done():
l.Close()
}
}()
mux := http.NewServeMux()
mux.Handle("/metrics", promhttp.Handler())
err = http.Serve(l, mux)
if err != nil {
log.WithError(err).Error("error while serving")
}
}
type prometheusJobOutlet struct {
jobName string
}
var _ logger.Outlet = prometheusJobOutlet{}
func newPrometheusLogOutlet(jobName string) prometheusJobOutlet {
return prometheusJobOutlet{jobName}
}
func (o prometheusJobOutlet) WriteEntry(entry logger.Entry) error {
prom.taskLogEntries.WithLabelValues(o.jobName, entry.Level.String()).Inc()
return nil
}

523
daemon/pruner/pruner.go Normal file
View File

@ -0,0 +1,523 @@
package pruner
import (
"context"
"fmt"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
"github.com/zrepl/zrepl/config"
"github.com/zrepl/zrepl/logger"
"github.com/zrepl/zrepl/pruning"
"github.com/zrepl/zrepl/replication/pdu"
"net"
"sort"
"sync"
"time"
)
// Try to keep it compatible with gitub.com/zrepl/zrepl/replication.Endpoint
type History interface {
ReplicationCursor(ctx context.Context, req *pdu.ReplicationCursorReq) (*pdu.ReplicationCursorRes, error)
}
type Target interface {
ListFilesystems(ctx context.Context) ([]*pdu.Filesystem, error)
ListFilesystemVersions(ctx context.Context, fs string) ([]*pdu.FilesystemVersion, error) // fix depS
DestroySnapshots(ctx context.Context, req *pdu.DestroySnapshotsReq) (*pdu.DestroySnapshotsRes, error)
}
type Logger = logger.Logger
type contextKey int
const contextKeyLogger contextKey = 0
func WithLogger(ctx context.Context, log Logger) context.Context {
return context.WithValue(ctx, contextKeyLogger, log)
}
func GetLogger(ctx context.Context) Logger {
if l, ok := ctx.Value(contextKeyLogger).(Logger); ok {
return l
}
return logger.NewNullLogger()
}
type args struct {
ctx context.Context
target Target
receiver History
rules []pruning.KeepRule
retryWait time.Duration
considerSnapAtCursorReplicated bool
promPruneSecs prometheus.Observer
}
type Pruner struct {
args args
mtx sync.RWMutex
state State
// State ErrWait|ErrPerm
sleepUntil time.Time
err error
// State Exec
prunePending []*fs
pruneCompleted []*fs
}
type PrunerFactory struct {
senderRules []pruning.KeepRule
receiverRules []pruning.KeepRule
retryWait time.Duration
considerSnapAtCursorReplicated bool
promPruneSecs *prometheus.HistogramVec
}
func checkContainsKeep1(rules []pruning.KeepRule) error {
if len(rules) == 0 {
return nil //No keep rules means keep all - ok
}
for _, e := range rules {
switch e.(type) {
case *pruning.KeepLastN:
return nil
}
}
return errors.New("sender keep rules must contain last_n or be empty so that the last snapshot is definitely kept")
}
func NewPrunerFactory(in config.PruningSenderReceiver, promPruneSecs *prometheus.HistogramVec) (*PrunerFactory, error) {
keepRulesReceiver, err := pruning.RulesFromConfig(in.KeepReceiver)
if err != nil {
return nil, errors.Wrap(err, "cannot build receiver pruning rules")
}
keepRulesSender, err := pruning.RulesFromConfig(in.KeepSender)
if err != nil {
return nil, errors.Wrap(err, "cannot build sender pruning rules")
}
considerSnapAtCursorReplicated := false
for _, r := range in.KeepSender {
knr, ok := r.Ret.(*config.PruneKeepNotReplicated)
if !ok {
continue
}
considerSnapAtCursorReplicated = considerSnapAtCursorReplicated || !knr.KeepSnapshotAtCursor
}
f := &PrunerFactory{
keepRulesSender,
keepRulesReceiver,
10 * time.Second, //FIXME constant
considerSnapAtCursorReplicated,
promPruneSecs,
}
return f, nil
}
func (f *PrunerFactory) BuildSenderPruner(ctx context.Context, target Target, receiver History) *Pruner {
p := &Pruner{
args: args{
WithLogger(ctx, GetLogger(ctx).WithField("prune_side", "sender")),
target,
receiver,
f.senderRules,
f.retryWait,
f.considerSnapAtCursorReplicated,
f.promPruneSecs.WithLabelValues("sender"),
},
state: Plan,
}
return p
}
func (f *PrunerFactory) BuildReceiverPruner(ctx context.Context, target Target, receiver History) *Pruner {
p := &Pruner{
args: args{
WithLogger(ctx, GetLogger(ctx).WithField("prune_side", "receiver")),
target,
receiver,
f.receiverRules,
f.retryWait,
false, // senseless here anyways
f.promPruneSecs.WithLabelValues("receiver"),
},
state: Plan,
}
return p
}
//go:generate enumer -type=State
type State int
const (
Plan State = 1 << iota
PlanWait
Exec
ExecWait
ErrPerm
Done
)
func (s State) statefunc() state {
var statemap = map[State]state{
Plan: statePlan,
PlanWait: statePlanWait,
Exec: stateExec,
ExecWait: stateExecWait,
ErrPerm: nil,
Done: nil,
}
return statemap[s]
}
type updater func(func(*Pruner)) State
type state func(args *args, u updater) state
func (p *Pruner) Prune() {
p.prune(p.args)
}
func (p *Pruner) prune(args args) {
s := p.state.statefunc()
for s != nil {
pre := p.state
s = s(&args, func(f func(*Pruner)) State {
p.mtx.Lock()
defer p.mtx.Unlock()
f(p)
return p.state
})
post := p.state
GetLogger(args.ctx).
WithField("transition", fmt.Sprintf("%s=>%s", pre, post)).
Debug("state transition")
}
}
type Report struct {
State string
SleepUntil time.Time
Error string
Pending, Completed []FSReport
}
type FSReport struct {
Filesystem string
SnapshotList, DestroyList []SnapshotReport
Error string
}
type SnapshotReport struct {
Name string
Replicated bool
Date time.Time
}
func (p *Pruner) Report() *Report {
p.mtx.Lock()
defer p.mtx.Unlock()
r := Report{State: p.state.String()}
if p.state & PlanWait|ExecWait != 0 {
r.SleepUntil = p.sleepUntil
}
if p.state & PlanWait|ExecWait|ErrPerm != 0 {
if p.err != nil {
r.Error = p.err.Error()
}
}
if p.state & Plan|PlanWait == 0 {
return &r
}
r.Pending = make([]FSReport, len(p.prunePending))
for i, fs := range p.prunePending{
r.Pending[i] = fs.Report()
}
r.Completed = make([]FSReport, len(p.pruneCompleted))
for i, fs := range p.pruneCompleted{
r.Completed[i] = fs.Report()
}
return &r
}
type fs struct {
path string
// snapshots presented by target
// (type snapshot)
snaps []pruning.Snapshot
// destroy list returned by pruning.PruneSnapshots(snaps)
// (type snapshot)
destroyList []pruning.Snapshot
mtx sync.RWMutex
// for Plan
err error
}
func (f *fs) Update(err error) {
f.mtx.Lock()
defer f.mtx.Unlock()
f.err = err
}
func (f *fs) Report() FSReport {
f.mtx.Lock()
defer f.mtx.Unlock()
r := FSReport{}
r.Filesystem = f.path
if f.err != nil {
r.Error = f.err.Error()
}
r.SnapshotList = make([]SnapshotReport, len(f.snaps))
for i, snap := range f.snaps {
r.SnapshotList[i] = snap.(snapshot).Report()
}
r.DestroyList = make([]SnapshotReport, len(f.destroyList))
for i, snap := range f.destroyList{
r.DestroyList[i] = snap.(snapshot).Report()
}
return r
}
type snapshot struct {
replicated bool
date time.Time
fsv *pdu.FilesystemVersion
}
func (s snapshot) Report() SnapshotReport {
return SnapshotReport{
Name: s.Name(),
Replicated: s.Replicated(),
Date: s.Date(),
}
}
var _ pruning.Snapshot = snapshot{}
func (s snapshot) Name() string { return s.fsv.Name }
func (s snapshot) Replicated() bool { return s.replicated }
func (s snapshot) Date() time.Time { return s.date }
func shouldRetry(e error) bool {
switch e.(type) {
case nil:
return true
case net.Error:
return true
}
return false
}
func onErr(u updater, e error) state {
return u(func(p *Pruner) {
p.err = e
if !shouldRetry(e) {
p.state = ErrPerm
return
}
switch p.state {
case Plan:
p.state = PlanWait
case Exec:
p.state = ExecWait
default:
panic(p.state)
}
}).statefunc()
}
func statePlan(a *args, u updater) state {
ctx, target, receiver := a.ctx, a.target, a.receiver
tfss, err := target.ListFilesystems(ctx)
if err != nil {
return onErr(u, err)
}
pfss := make([]*fs, len(tfss))
fsloop:
for i, tfs := range tfss {
l := GetLogger(ctx).WithField("fs", tfs.Path)
l.Debug("plan filesystem")
pfs := &fs{
path: tfs.Path,
}
pfss[i] = pfs
tfsvs, err := target.ListFilesystemVersions(ctx, tfs.Path)
if err != nil {
l.WithError(err).Error("cannot list filesystem versions")
if shouldRetry(err) {
return onErr(u, err)
}
pfs.err = err
continue fsloop
}
pfs.snaps = make([]pruning.Snapshot, 0, len(tfsvs))
rcReq := &pdu.ReplicationCursorReq{
Filesystem: tfs.Path,
Op: &pdu.ReplicationCursorReq_Get{
Get: &pdu.ReplicationCursorReq_GetOp{},
},
}
rc, err := receiver.ReplicationCursor(ctx, rcReq)
if err != nil {
l.WithError(err).Error("cannot get replication cursor")
if shouldRetry(err) {
return onErr(u, err)
}
pfs.err = err
continue fsloop
}
if rc.GetError() != "" {
l.WithField("reqErr", rc.GetError()).Error("cannot get replication cursor")
pfs.err = fmt.Errorf("%s", rc.GetError())
continue fsloop
}
// scan from older to newer, all snapshots older than cursor are interpreted as replicated
sort.Slice(tfsvs, func(i, j int) bool {
return tfsvs[i].CreateTXG < tfsvs[j].CreateTXG
})
haveCursorSnapshot := false
for _, tfsv := range tfsvs {
if tfsv.Type != pdu.FilesystemVersion_Snapshot {
continue
}
if tfsv.Guid == rc.GetGuid() {
haveCursorSnapshot = true
}
}
preCursor := haveCursorSnapshot
for _, tfsv := range tfsvs {
if tfsv.Type != pdu.FilesystemVersion_Snapshot {
continue
}
creation, err := tfsv.CreationAsTime()
if err != nil {
pfs.err = fmt.Errorf("%s%s has invalid creation date: %s", tfs, tfsv.RelName(), err)
l.WithError(pfs.err).Error("")
continue fsloop
}
// note that we cannot use CreateTXG because target and receiver could be on different pools
atCursor := tfsv.Guid == rc.GetGuid()
preCursor = preCursor && !atCursor
pfs.snaps = append(pfs.snaps, snapshot{
replicated: preCursor || (a.considerSnapAtCursorReplicated && atCursor),
date: creation,
fsv: tfsv,
})
}
if preCursor {
pfs.err = fmt.Errorf("replication cursor not found in prune target filesystem versions")
l.WithError(pfs.err).Error("")
continue fsloop
}
// Apply prune rules
pfs.destroyList = pruning.PruneSnapshots(pfs.snaps, a.rules)
}
return u(func(pruner *Pruner) {
for _, pfs := range pfss {
if pfs.err != nil {
pruner.pruneCompleted = append(pruner.pruneCompleted, pfs)
} else {
pruner.prunePending = append(pruner.prunePending, pfs)
}
}
pruner.state = Exec
}).statefunc()
}
func stateExec(a *args, u updater) state {
var pfs *fs
state := u(func(pruner *Pruner) {
if len(pruner.prunePending) == 0 {
nextState := Done
for _, pfs := range pruner.pruneCompleted {
if pfs.err != nil {
nextState = ErrPerm
}
}
pruner.state = nextState
return
}
pfs = pruner.prunePending[0]
})
if state != Exec {
return state.statefunc()
}
destroyList := make([]*pdu.FilesystemVersion, len(pfs.destroyList))
for i := range destroyList {
destroyList[i] = pfs.destroyList[i].(snapshot).fsv
GetLogger(a.ctx).
WithField("fs", pfs.path).
WithField("destroy_snap", destroyList[i].Name).
Debug("policy destroys snapshot")
}
pfs.Update(nil)
req := pdu.DestroySnapshotsReq{
Filesystem: pfs.path,
Snapshots: destroyList,
}
_, err := a.target.DestroySnapshots(a.ctx, &req)
pfs.Update(err)
if err != nil && shouldRetry(err) {
return onErr(u, err)
}
// if it's not retryable, treat is like as being done
return u(func(pruner *Pruner) {
pruner.pruneCompleted = append(pruner.pruneCompleted, pfs)
pruner.prunePending = pruner.prunePending[1:]
}).statefunc()
}
func stateExecWait(a *args, u updater) state {
return doWait(Exec, a, u)
}
func statePlanWait(a *args, u updater) state {
return doWait(Plan, a, u)
}
func doWait(goback State, a *args, u updater) state {
timer := time.NewTimer(a.retryWait)
defer timer.Stop()
select {
case <-timer.C:
return u(func(pruner *Pruner) {
pruner.state = goback
}).statefunc()
case <-a.ctx.Done():
return onErr(u, a.ctx.Err())
}
}

View File

@ -0,0 +1,211 @@
package pruner
import (
"context"
"fmt"
"github.com/stretchr/testify/assert"
"github.com/zrepl/zrepl/logger"
"github.com/zrepl/zrepl/pruning"
"github.com/zrepl/zrepl/replication/pdu"
"net"
"testing"
"time"
)
type mockFS struct {
path string
snaps []string
}
func (m *mockFS) Filesystem() *pdu.Filesystem {
return &pdu.Filesystem{
Path: m.path,
}
}
func (m *mockFS) FilesystemVersions() []*pdu.FilesystemVersion {
versions := make([]*pdu.FilesystemVersion, len(m.snaps))
for i, v := range m.snaps {
versions[i] = &pdu.FilesystemVersion{
Type: pdu.FilesystemVersion_Snapshot,
Name: v,
Creation: pdu.FilesystemVersionCreation(time.Unix(0, 0)),
Guid: uint64(i),
}
}
return versions
}
type mockTarget struct {
fss []mockFS
destroyed map[string][]string
listVersionsErrs map[string][]error
listFilesystemsErr []error
destroyErrs map[string][]error
}
func (t *mockTarget) ListFilesystems(ctx context.Context) ([]*pdu.Filesystem, error) {
if len(t.listFilesystemsErr) > 0 {
e := t.listFilesystemsErr[0]
t.listFilesystemsErr = t.listFilesystemsErr[1:]
return nil, e
}
fss := make([]*pdu.Filesystem, len(t.fss))
for i := range fss {
fss[i] = t.fss[i].Filesystem()
}
return fss, nil
}
func (t *mockTarget) ListFilesystemVersions(ctx context.Context, fs string) ([]*pdu.FilesystemVersion, error) {
if len(t.listVersionsErrs[fs]) != 0 {
e := t.listVersionsErrs[fs][0]
t.listVersionsErrs[fs] = t.listVersionsErrs[fs][1:]
return nil, e
}
for _, mfs := range t.fss {
if mfs.path != fs {
continue
}
return mfs.FilesystemVersions(), nil
}
return nil, fmt.Errorf("filesystem %s does not exist", fs)
}
func (t *mockTarget) DestroySnapshots(ctx context.Context, req *pdu.DestroySnapshotsReq) (*pdu.DestroySnapshotsRes, error) {
fs, snaps := req.Filesystem, req.Snapshots
if len(t.destroyErrs[fs]) != 0 {
e := t.destroyErrs[fs][0]
t.destroyErrs[fs] = t.destroyErrs[fs][1:]
return nil, e
}
destroyed := t.destroyed[fs]
res := make([]*pdu.DestroySnapshotRes, len(snaps))
for i, s := range snaps {
destroyed = append(destroyed, s.Name)
res[i] = &pdu.DestroySnapshotRes{Error: "", Snapshot: s}
}
t.destroyed[fs] = destroyed
return &pdu.DestroySnapshotsRes{Results: res}, nil
}
type mockCursor struct {
snapname string
guid uint64
}
type mockHistory struct {
errs map[string][]error
cursors map[string]*mockCursor
}
func (r *mockHistory) ReplicationCursor(ctx context.Context, req *pdu.ReplicationCursorReq) (*pdu.ReplicationCursorRes, error) {
fs := req.Filesystem
if len(r.errs[fs]) > 0 {
e := r.errs[fs][0]
r.errs[fs] = r.errs[fs][1:]
return nil, e
}
return &pdu.ReplicationCursorRes{Result: &pdu.ReplicationCursorRes_Guid{Guid: 0}}, nil
}
type stubNetErr struct {
msg string
temporary, timeout bool
}
var _ net.Error = stubNetErr{}
func (e stubNetErr) Error() string {
return e.msg
}
func (e stubNetErr) Temporary() bool { return e.temporary }
func (e stubNetErr) Timeout() bool { return e.timeout }
func TestPruner_Prune(t *testing.T) {
var _ net.Error = &net.OpError{} // we use it below
target := &mockTarget{
listFilesystemsErr: []error{
stubNetErr{msg: "fakerror0"},
},
listVersionsErrs: map[string][]error{
"zroot/foo": {
stubNetErr{msg: "fakeerror1"}, // should be classified as temporaty
stubNetErr{msg: "fakeerror2"},
},
},
destroyErrs: map[string][]error{
"zroot/foo": {
fmt.Errorf("permanent error"),
},
"zroot/bar": {
stubNetErr{msg: "fakeerror3"},
},
},
destroyed: make(map[string][]string),
fss: []mockFS{
{
path: "zroot/foo",
snaps: []string{
"keep_a",
"keep_b",
"drop_c",
"keep_d",
},
},
{
path: "zroot/bar",
snaps: []string{
"keep_e",
"keep_f",
"drop_g",
},
},
{
path: "zroot/baz",
snaps: []string{
"keep_h",
"drop_i",
},
},
},
}
history := &mockHistory{
errs: map[string][]error{
"zroot/foo": {
stubNetErr{msg: "fakeerror4"},
},
"zroot/baz": {
fmt.Errorf("permanent error2"),
},
},
}
keepRules := []pruning.KeepRule{pruning.MustKeepRegex("^keep")}
p := Pruner{
args: args{
ctx: WithLogger(context.Background(), logger.NewTestLogger(t)),
target: target,
receiver: history,
rules: keepRules,
retryWait: 10*time.Millisecond,
},
state: Plan,
}
p.Prune()
exp := map[string][]string{
"zroot/bar": {"drop_g"},
// drop_c is prohibited by failing destroy
// drop_i is prohibiteed by failing ReplicationCursor call
}
assert.Equal(t, exp, target.destroyed)
//assert.Equal(t, map[string][]error{}, target.listVersionsErrs, "retried")
}

View File

@ -0,0 +1,76 @@
// Code generated by "enumer -type=State"; DO NOT EDIT.
package pruner
import (
"fmt"
)
const (
_StateName_0 = "PlanPlanWait"
_StateName_1 = "Exec"
_StateName_2 = "ExecWait"
_StateName_3 = "ErrPerm"
_StateName_4 = "Done"
)
var (
_StateIndex_0 = [...]uint8{0, 4, 12}
_StateIndex_1 = [...]uint8{0, 4}
_StateIndex_2 = [...]uint8{0, 8}
_StateIndex_3 = [...]uint8{0, 7}
_StateIndex_4 = [...]uint8{0, 4}
)
func (i State) String() string {
switch {
case 1 <= i && i <= 2:
i -= 1
return _StateName_0[_StateIndex_0[i]:_StateIndex_0[i+1]]
case i == 4:
return _StateName_1
case i == 8:
return _StateName_2
case i == 16:
return _StateName_3
case i == 32:
return _StateName_4
default:
return fmt.Sprintf("State(%d)", i)
}
}
var _StateValues = []State{1, 2, 4, 8, 16, 32}
var _StateNameToValueMap = map[string]State{
_StateName_0[0:4]: 1,
_StateName_0[4:12]: 2,
_StateName_1[0:4]: 4,
_StateName_2[0:8]: 8,
_StateName_3[0:7]: 16,
_StateName_4[0:4]: 32,
}
// StateString retrieves an enum value from the enum constants string name.
// Throws an error if the param is not part of the enum.
func StateString(s string) (State, error) {
if val, ok := _StateNameToValueMap[s]; ok {
return val, nil
}
return 0, fmt.Errorf("%s does not belong to State values", s)
}
// StateValues returns all values of the enum
func StateValues() []State {
return _StateValues
}
// IsAState returns "true" if the value is listed in the enum definition. "false" otherwise
func (i State) IsAState() bool {
for _, v := range _StateValues {
if i == v {
return true
}
}
return false
}

369
daemon/snapper/snapper.go Normal file
View File

@ -0,0 +1,369 @@
package snapper
import (
"github.com/zrepl/zrepl/config"
"github.com/pkg/errors"
"time"
"context"
"github.com/zrepl/zrepl/daemon/filters"
"fmt"
"github.com/zrepl/zrepl/zfs"
"sort"
"github.com/zrepl/zrepl/logger"
"sync"
)
//go:generate stringer -type=SnapState
type SnapState uint
const (
SnapPending SnapState = 1 << iota
SnapStarted
SnapDone
SnapError
)
type snapProgress struct {
state SnapState
// SnapStarted, SnapDone, SnapError
name string
startAt time.Time
// SnapDone
doneAt time.Time
// SnapErr
err error
}
type args struct {
ctx context.Context
log Logger
prefix string
interval time.Duration
fsf *filters.DatasetMapFilter
snapshotsTaken chan<-struct{}
}
type Snapper struct {
args args
mtx sync.Mutex
state State
// set in state Plan, used in Waiting
lastInvocation time.Time
// valid for state Snapshotting
plan map[*zfs.DatasetPath]snapProgress
// valid for state SyncUp and Waiting
sleepUntil time.Time
// valid for state Err
err error
}
//go:generate stringer -type=State
type State uint
const (
SyncUp State = 1<<iota
Planning
Snapshotting
Waiting
ErrorWait
Stopped
)
func (s State) sf() state {
m := map[State]state{
SyncUp: syncUp,
Planning: plan,
Snapshotting: snapshot,
Waiting: wait,
ErrorWait: wait,
Stopped: nil,
}
return m[s]
}
type updater func(u func(*Snapper)) State
type state func(a args, u updater) state
type contextKey int
const (
contextKeyLog contextKey = 0
)
type Logger = logger.Logger
func WithLogger(ctx context.Context, log Logger) context.Context {
return context.WithValue(ctx, contextKeyLog, log)
}
func getLogger(ctx context.Context) Logger {
if log, ok := ctx.Value(contextKeyLog).(Logger); ok {
return log
}
return logger.NewNullLogger()
}
func PeriodicFromConfig(g *config.Global, fsf *filters.DatasetMapFilter, in *config.SnapshottingPeriodic) (*Snapper, error) {
if in.Prefix == "" {
return nil, errors.New("prefix must not be empty")
}
if in.Interval <= 0 {
return nil, errors.New("interval must be positive")
}
args := args{
prefix: in.Prefix,
interval: in.Interval,
fsf: fsf,
// ctx and log is set in Run()
}
return &Snapper{state: SyncUp, args: args}, nil
}
func (s *Snapper) Run(ctx context.Context, snapshotsTaken chan<- struct{}) {
getLogger(ctx).Debug("start")
defer getLogger(ctx).Debug("stop")
s.args.snapshotsTaken = snapshotsTaken
s.args.ctx = ctx
s.args.log = getLogger(ctx)
u := func(u func(*Snapper)) State {
s.mtx.Lock()
defer s.mtx.Unlock()
if u != nil {
u(s)
}
return s.state
}
var st state = syncUp
for st != nil {
pre := u(nil)
st = st(s.args, u)
post := u(nil)
getLogger(ctx).
WithField("transition", fmt.Sprintf("%s=>%s", pre, post)).
Debug("state transition")
}
}
func onErr(err error, u updater) state {
return u(func(s *Snapper) {
s.err = err
s.state = ErrorWait
}).sf()
}
func onMainCtxDone(ctx context.Context, u updater) state {
return u(func(s *Snapper) {
s.err = ctx.Err()
s.state = Stopped
}).sf()
}
func syncUp(a args, u updater) state {
fss, err := listFSes(a.fsf)
if err != nil {
return onErr(err, u)
}
syncPoint, err := findSyncPoint(a.log, fss, a.prefix, a.interval)
if err != nil {
return onErr(err, u)
}
u(func(s *Snapper){
s.sleepUntil = syncPoint
})
t := time.NewTimer(syncPoint.Sub(time.Now()))
defer t.Stop()
select {
case <-t.C:
return u(func(s *Snapper) {
s.state = Planning
}).sf()
case <-a.ctx.Done():
return onMainCtxDone(a.ctx, u)
}
}
func plan(a args, u updater) state {
u(func(snapper *Snapper) {
snapper.lastInvocation = time.Now()
})
fss, err := listFSes(a.fsf)
if err != nil {
return onErr(err, u)
}
plan := make(map[*zfs.DatasetPath]snapProgress, len(fss))
for _, fs := range fss {
plan[fs] = snapProgress{state: SnapPending}
}
return u(func(s *Snapper) {
s.state = Snapshotting
s.plan = plan
}).sf()
}
func snapshot(a args, u updater) state {
var plan map[*zfs.DatasetPath]snapProgress
u(func(snapper *Snapper) {
plan = snapper.plan
})
hadErr := false
// TODO channel programs -> allow a little jitter?
for fs, progress := range plan {
suffix := time.Now().In(time.UTC).Format("20060102_150405_000")
snapname := fmt.Sprintf("%s%s", a.prefix, suffix)
l := a.log.
WithField("fs", fs.ToString()).
WithField("snap", snapname)
u(func(snapper *Snapper) {
progress.name = snapname
progress.startAt = time.Now()
progress.state = SnapStarted
})
l.Debug("create snapshot")
err := zfs.ZFSSnapshot(fs, snapname, false)
if err != nil {
hadErr = true
l.WithError(err).Error("cannot create snapshot")
}
doneAt := time.Now()
u(func(snapper *Snapper) {
progress.doneAt = doneAt
progress.state = SnapDone
if err != nil {
progress.state = SnapError
progress.err = err
}
})
}
select {
case a.snapshotsTaken <- struct{}{}:
default:
if a.snapshotsTaken != nil {
a.log.Warn("callback channel is full, discarding snapshot update event")
}
}
return u(func(snapper *Snapper) {
if hadErr {
snapper.state = ErrorWait
snapper.err = errors.New("one or more snapshots could not be created, check logs for details")
} else {
snapper.state = Waiting
}
}).sf()
}
func wait(a args, u updater) state {
var sleepUntil time.Time
u(func(snapper *Snapper) {
lastTick := snapper.lastInvocation
snapper.sleepUntil = lastTick.Add(a.interval)
sleepUntil = snapper.sleepUntil
})
t := time.NewTimer(sleepUntil.Sub(time.Now()))
defer t.Stop()
select {
case <-t.C:
return u(func(snapper *Snapper) {
snapper.state = Planning
}).sf()
case <-a.ctx.Done():
return onMainCtxDone(a.ctx, u)
}
}
func listFSes(mf *filters.DatasetMapFilter) (fss []*zfs.DatasetPath, err error) {
return zfs.ZFSListMapping(mf)
}
func findSyncPoint(log Logger, fss []*zfs.DatasetPath, prefix string, interval time.Duration) (syncPoint time.Time, err error) {
type snapTime struct {
ds *zfs.DatasetPath
time time.Time
}
if len(fss) == 0 {
return time.Now(), nil
}
snaptimes := make([]snapTime, 0, len(fss))
now := time.Now()
log.Debug("examine filesystem state")
for _, d := range fss {
l := log.WithField("fs", d.ToString())
fsvs, err := zfs.ZFSListFilesystemVersions(d, filters.NewTypedPrefixFilter(prefix, zfs.Snapshot))
if err != nil {
l.WithError(err).Error("cannot list filesystem versions")
continue
}
if len(fsvs) <= 0 {
l.WithField("prefix", prefix).Debug("no filesystem versions with prefix")
continue
}
// Sort versions by creation
sort.SliceStable(fsvs, func(i, j int) bool {
return fsvs[i].CreateTXG < fsvs[j].CreateTXG
})
latest := fsvs[len(fsvs)-1]
l.WithField("creation", latest.Creation).
Debug("found latest snapshot")
since := now.Sub(latest.Creation)
if since < 0 {
l.WithField("snapshot", latest.Name).
WithField("creation", latest.Creation).
Error("snapshot is from the future")
continue
}
next := now
if since < interval {
next = latest.Creation.Add(interval)
}
snaptimes = append(snaptimes, snapTime{d, next})
}
if len(snaptimes) == 0 {
snaptimes = append(snaptimes, snapTime{nil, now})
}
sort.Slice(snaptimes, func(i, j int) bool {
return snaptimes[i].time.Before(snaptimes[j].time)
})
return snaptimes[0].time, nil
}

View File

@ -0,0 +1,39 @@
package snapper
import (
"context"
"fmt"
"github.com/zrepl/zrepl/config"
"github.com/zrepl/zrepl/daemon/filters"
)
// FIXME: properly abstract snapshotting:
// - split up things that trigger snapshotting from the mechanism
// - timer-based trigger (periodic)
// - call from control socket (manual)
// - mixed modes?
// - support a `zrepl snapshot JOBNAME` subcommand for config.SnapshottingManual
type PeriodicOrManual struct {
s *Snapper
}
func (s *PeriodicOrManual) Run(ctx context.Context, wakeUpCommon chan <- struct{}) {
if s.s != nil {
s.s.Run(ctx, wakeUpCommon)
}
}
func FromConfig(g *config.Global, fsf *filters.DatasetMapFilter, in config.SnapshottingEnum) (*PeriodicOrManual, error) {
switch v := in.Ret.(type) {
case *config.SnapshottingPeriodic:
snapper, err := PeriodicFromConfig(g, fsf, v)
if err != nil {
return nil, err
}
return &PeriodicOrManual{snapper}, nil
case *config.SnapshottingManual:
return &PeriodicOrManual{}, nil
default:
return nil, fmt.Errorf("unknown snapshotting type %T", v)
}
}

View File

@ -0,0 +1,29 @@
// Code generated by "stringer -type=SnapState"; DO NOT EDIT.
package snapper
import "strconv"
const (
_SnapState_name_0 = "SnapPendingSnapStarted"
_SnapState_name_1 = "SnapDone"
_SnapState_name_2 = "SnapError"
)
var (
_SnapState_index_0 = [...]uint8{0, 11, 22}
)
func (i SnapState) String() string {
switch {
case 1 <= i && i <= 2:
i -= 1
return _SnapState_name_0[_SnapState_index_0[i]:_SnapState_index_0[i+1]]
case i == 4:
return _SnapState_name_1
case i == 8:
return _SnapState_name_2
default:
return "SnapState(" + strconv.FormatInt(int64(i), 10) + ")"
}
}

View File

@ -0,0 +1,35 @@
// Code generated by "stringer -type=State"; DO NOT EDIT.
package snapper
import "strconv"
const (
_State_name_0 = "SyncUpPlanning"
_State_name_1 = "Snapshotting"
_State_name_2 = "Waiting"
_State_name_3 = "ErrorWait"
_State_name_4 = "Stopped"
)
var (
_State_index_0 = [...]uint8{0, 6, 14}
)
func (i State) String() string {
switch {
case 1 <= i && i <= 2:
i -= 1
return _State_name_0[_State_index_0[i]:_State_index_0[i+1]]
case i == 4:
return _State_name_1
case i == 8:
return _State_name_2
case i == 16:
return _State_name_3
case i == 32:
return _State_name_4
default:
return "State(" + strconv.FormatInt(int64(i), 10) + ")"
}
}

View File

@ -0,0 +1,25 @@
package streamrpcconfig
import (
"github.com/problame/go-streamrpc"
"github.com/zrepl/zrepl/config"
)
func FromDaemonConfig(g *config.Global, in *config.RPCConfig) (*streamrpc.ConnConfig, error) {
conf := in
if conf == nil {
conf = g.RPC
}
srpcConf := &streamrpc.ConnConfig{
RxHeaderMaxLen: conf.RxHeaderMaxLen,
RxStructuredMaxLen: conf.RxStructuredMaxLen,
RxStreamMaxChunkSize: conf.RxStreamChunkMaxLen,
TxChunkSize: conf.TxChunkSize,
Timeout: conf.Timeout,
SendHeartbeatInterval: conf.SendHeartbeatInterval,
}
if err := srpcConf.Validate(); err != nil {
return nil, err
}
return srpcConf, nil
}

View File

@ -0,0 +1,30 @@
package connecter
import (
"context"
"fmt"
"github.com/zrepl/zrepl/config"
"github.com/zrepl/zrepl/daemon/transport/serve"
"net"
)
type LocalConnecter struct {
listenerName string
clientIdentity string
}
func LocalConnecterFromConfig(in *config.LocalConnect) (*LocalConnecter, error) {
if in.ClientIdentity == "" {
return nil, fmt.Errorf("ClientIdentity must not be empty")
}
if in.ListenerName == "" {
return nil, fmt.Errorf("ListenerName must not be empty")
}
return &LocalConnecter{listenerName: in.ListenerName, clientIdentity: in.ClientIdentity}, nil
}
func (c *LocalConnecter) Connect(dialCtx context.Context) (conn net.Conn, err error) {
l := serve.GetLocalListener(c.listenerName)
return l.Connect(dialCtx, c.clientIdentity)
}

View File

@ -0,0 +1,66 @@
package connecter
import (
"context"
"github.com/jinzhu/copier"
"github.com/pkg/errors"
"github.com/problame/go-netssh"
"github.com/problame/go-streamrpc"
"github.com/zrepl/zrepl/config"
"net"
"time"
)
type SSHStdinserverConnecter struct {
Host string
User string
Port uint16
IdentityFile string
TransportOpenCommand []string
SSHCommand string
Options []string
dialTimeout time.Duration
}
var _ streamrpc.Connecter = &SSHStdinserverConnecter{}
func SSHStdinserverConnecterFromConfig(in *config.SSHStdinserverConnect) (c *SSHStdinserverConnecter, err error) {
c = &SSHStdinserverConnecter{
Host: in.Host,
User: in.User,
Port: in.Port,
IdentityFile: in.IdentityFile,
SSHCommand: in.SSHCommand,
Options: in.Options,
dialTimeout: in.DialTimeout,
}
return
}
type netsshConnToConn struct{ *netssh.SSHConn }
var _ net.Conn = netsshConnToConn{}
func (netsshConnToConn) SetDeadline(dl time.Time) error { return nil }
func (netsshConnToConn) SetReadDeadline(dl time.Time) error { return nil }
func (netsshConnToConn) SetWriteDeadline(dl time.Time) error { return nil }
func (c *SSHStdinserverConnecter) Connect(dialCtx context.Context) (net.Conn, error) {
var endpoint netssh.Endpoint
if err := copier.Copy(&endpoint, c); err != nil {
return nil, errors.WithStack(err)
}
dialCtx, dialCancel := context.WithTimeout(dialCtx, c.dialTimeout) // context.TODO tied to error handling below
defer dialCancel()
nconn, err := netssh.Dial(dialCtx, endpoint)
if err != nil {
if err == context.DeadlineExceeded {
err = errors.Errorf("dial_timeout of %s exceeded", c.dialTimeout)
}
return nil, err
}
return netsshConnToConn{nconn}, nil
}

View File

@ -0,0 +1,24 @@
package connecter
import (
"context"
"github.com/zrepl/zrepl/config"
"net"
)
type TCPConnecter struct {
Address string
dialer net.Dialer
}
func TCPConnecterFromConfig(in *config.TCPConnect) (*TCPConnecter, error) {
dialer := net.Dialer{
Timeout: in.DialTimeout,
}
return &TCPConnecter{in.Address, dialer}, nil
}
func (c *TCPConnecter) Connect(dialCtx context.Context) (conn net.Conn, err error) {
return c.dialer.DialContext(dialCtx, "tcp", c.Address)
}

View File

@ -0,0 +1,43 @@
package connecter
import (
"context"
"crypto/tls"
"github.com/pkg/errors"
"github.com/zrepl/zrepl/config"
"github.com/zrepl/zrepl/tlsconf"
"net"
)
type TLSConnecter struct {
Address string
dialer net.Dialer
tlsConfig *tls.Config
}
func TLSConnecterFromConfig(in *config.TLSConnect) (*TLSConnecter, error) {
dialer := net.Dialer{
Timeout: in.DialTimeout,
}
ca, err := tlsconf.ParseCAFile(in.Ca)
if err != nil {
return nil, errors.Wrap(err, "cannot parse ca file")
}
cert, err := tls.LoadX509KeyPair(in.Cert, in.Key)
if err != nil {
return nil, errors.Wrap(err, "cannot parse cert/key pair")
}
tlsConfig, err := tlsconf.ClientAuthClient(in.ServerCN, ca, cert)
if err != nil {
return nil, errors.Wrap(err, "cannot build tls config")
}
return &TLSConnecter{in.Address, dialer, tlsConfig}, nil
}
func (c *TLSConnecter) Connect(dialCtx context.Context) (conn net.Conn, err error) {
return tls.DialWithDialer(&c.dialer, "tcp", c.Address, c.tlsConfig)
}

View File

@ -0,0 +1,84 @@
package connecter
import (
"context"
"fmt"
"github.com/problame/go-streamrpc"
"github.com/zrepl/zrepl/config"
"github.com/zrepl/zrepl/daemon/streamrpcconfig"
"github.com/zrepl/zrepl/daemon/transport"
"net"
"time"
)
type HandshakeConnecter struct {
connecter streamrpc.Connecter
}
func (c HandshakeConnecter) Connect(ctx context.Context) (net.Conn, error) {
conn, err := c.connecter.Connect(ctx)
if err != nil {
return nil, err
}
dl, ok := ctx.Deadline()
if !ok {
dl = time.Now().Add(10 * time.Second) // FIXME constant
}
if err := transport.DoHandshakeCurrentVersion(conn, dl); err != nil {
conn.Close()
return nil, err
}
return conn, nil
}
func FromConfig(g *config.Global, in config.ConnectEnum) (*ClientFactory, error) {
var (
connecter streamrpc.Connecter
errConnecter, errRPC error
connConf *streamrpc.ConnConfig
)
switch v := in.Ret.(type) {
case *config.SSHStdinserverConnect:
connecter, errConnecter = SSHStdinserverConnecterFromConfig(v)
connConf, errRPC = streamrpcconfig.FromDaemonConfig(g, v.RPC)
case *config.TCPConnect:
connecter, errConnecter = TCPConnecterFromConfig(v)
connConf, errRPC = streamrpcconfig.FromDaemonConfig(g, v.RPC)
case *config.TLSConnect:
connecter, errConnecter = TLSConnecterFromConfig(v)
connConf, errRPC = streamrpcconfig.FromDaemonConfig(g, v.RPC)
case *config.LocalConnect:
connecter, errConnecter = LocalConnecterFromConfig(v)
connConf, errRPC = streamrpcconfig.FromDaemonConfig(g, v.RPC)
default:
panic(fmt.Sprintf("implementation error: unknown connecter type %T", v))
}
if errConnecter != nil {
return nil, errConnecter
}
if errRPC != nil {
return nil, errRPC
}
config := streamrpc.ClientConfig{ConnConfig: connConf}
if err := config.Validate(); err != nil {
return nil, err
}
connecter = HandshakeConnecter{connecter}
return &ClientFactory{connecter: connecter, config: &config}, nil
}
type ClientFactory struct {
connecter streamrpc.Connecter
config *streamrpc.ClientConfig
}
func (f ClientFactory) NewClient() (*streamrpc.Client, error) {
return streamrpc.NewClient(f.connecter, f.config)
}

View File

@ -0,0 +1,136 @@
package transport
import (
"bytes"
"fmt"
"io"
"net"
"strings"
"time"
"unicode/utf8"
)
type HandshakeMessage struct {
ProtocolVersion int
Extensions []string
}
func (m *HandshakeMessage) Encode() ([]byte, error) {
if m.ProtocolVersion <= 0 || m.ProtocolVersion > 9999 {
return nil, fmt.Errorf("protocol version must be in [1, 9999]")
}
if len(m.Extensions) >= 9999 {
return nil, fmt.Errorf("protocol only supports [0, 9999] extensions")
}
// EXTENSIONS is a count of subsequent \n separated lines that contain protocol extensions
var extensions strings.Builder
for i, ext := range m.Extensions {
if strings.ContainsAny(ext, "\n") {
return nil, fmt.Errorf("Extension #%d contains forbidden newline character", i)
}
if !utf8.ValidString(ext) {
return nil, fmt.Errorf("Extension #%d is not valid UTF-8", i)
}
extensions.WriteString(ext)
extensions.WriteString("\n")
}
withoutLen := fmt.Sprintf("ZREPL_ZFS_REPLICATION PROTOVERSION=%04d EXTENSIONS=%04d\n%s",
m.ProtocolVersion, len(m.Extensions), extensions.String())
withLen := fmt.Sprintf("%010d %s", len(withoutLen), withoutLen)
return []byte(withLen), nil
}
func (m *HandshakeMessage) DecodeReader(r io.Reader, maxLen int) error {
var lenAndSpace [11]byte
if _, err := io.ReadFull(r, lenAndSpace[:]); err != nil {
return err
}
if !utf8.Valid(lenAndSpace[:]) {
return fmt.Errorf("invalid start of handshake message: not valid UTF-8")
}
var followLen int
n, err := fmt.Sscanf(string(lenAndSpace[:]), "%010d ", &followLen)
if n != 1 || err != nil {
return fmt.Errorf("could not parse handshake message length")
}
if followLen > maxLen {
return fmt.Errorf("handshake message length exceeds max length (%d vs %d)",
followLen, maxLen)
}
var buf bytes.Buffer
_, err = io.Copy(&buf, io.LimitReader(r, int64(followLen)))
if err != nil {
return err
}
var (
protoVersion, extensionCount int
)
n, err = fmt.Fscanf(&buf, "ZREPL_ZFS_REPLICATION PROTOVERSION=%04d EXTENSIONS=%4d\n",
&protoVersion, &extensionCount)
if n != 2 || err != nil {
return fmt.Errorf("could not parse handshake message: %s", err)
}
if protoVersion < 1 {
return fmt.Errorf("invalid protocol version %q", protoVersion)
}
m.ProtocolVersion = protoVersion
if extensionCount < 0 {
return fmt.Errorf("invalid extension count %q", extensionCount)
}
if extensionCount == 0 {
if buf.Len() != 0 {
return fmt.Errorf("unexpected data trailing after header")
}
m.Extensions = nil
return nil
}
s := buf.String()
if strings.Count(s, "\n") != extensionCount {
return fmt.Errorf("inconsistent extension count: found %d, header says %d", len(m.Extensions), extensionCount)
}
exts := strings.Split(s, "\n")
if exts[len(exts)-1] != "" {
return fmt.Errorf("unexpected data trailing after last extension newline")
}
m.Extensions = exts[0:len(exts)-1]
return nil
}
func DoHandshakeCurrentVersion(conn net.Conn, deadline time.Time) error {
// current protocol version is hardcoded here
return DoHandshakeVersion(conn, deadline, 1)
}
func DoHandshakeVersion(conn net.Conn, deadline time.Time, version int) error {
ours := HandshakeMessage{
ProtocolVersion: version,
Extensions: nil,
}
hsb, err := ours.Encode()
if err != nil {
return fmt.Errorf("could not encode protocol banner: %s", err)
}
conn.SetDeadline(deadline)
_, err = io.Copy(conn, bytes.NewBuffer(hsb))
if err != nil {
return fmt.Errorf("could not send protocol banner: %s", err)
}
theirs := HandshakeMessage{}
if err := theirs.DecodeReader(conn, 16 * 4096); err != nil { // FIXME constant
return fmt.Errorf("could not decode protocol banner: %s", err)
}
if theirs.ProtocolVersion != ours.ProtocolVersion {
return fmt.Errorf("protocol versions do not match: ours is %d, theirs is %d",
ours.ProtocolVersion, theirs.ProtocolVersion)
}
// ignore extensions, we don't use them
return nil
}

View File

@ -0,0 +1,119 @@
package transport
import (
"bytes"
"fmt"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/zrepl/zrepl/util/socketpair"
"io"
"strings"
"testing"
"time"
)
func TestHandshakeMessage_Encode(t *testing.T) {
msg := HandshakeMessage{
ProtocolVersion: 2342,
}
encB, err := msg.Encode()
require.NoError(t, err)
enc := string(encB)
t.Logf("enc: %s", enc)
assert.False(t, strings.ContainsAny(enc[0:10], " "))
assert.True(t, enc[10] == ' ')
var (
headerlen, protoversion, extensionCount int
)
n, err := fmt.Sscanf(enc, "%010d ZREPL_ZFS_REPLICATION PROTOVERSION=%04d EXTENSIONS=%04d\n",
&headerlen, &protoversion, &extensionCount)
if n != 3 || (err != nil && err != io.EOF) {
t.Fatalf("%v %v", n, err)
}
assert.Equal(t, 2342, protoversion)
assert.Equal(t, 0, extensionCount)
assert.Equal(t, len(enc)-11, headerlen)
}
func TestHandshakeMessage_Encode_InvalidProtocolVersion(t *testing.T) {
for _, pv := range []int{-1, 0, 10000, 10001} {
t.Logf("testing invalid protocol version = %v", pv)
msg := HandshakeMessage{
ProtocolVersion: pv,
}
b, err := msg.Encode()
assert.Error(t, err)
assert.Nil(t, b)
}
}
func TestHandshakeMessage_DecodeReader(t *testing.T) {
in := HandshakeMessage{
2342,
[]string{"foo", "bar 2342"},
}
enc, err := in.Encode()
require.NoError(t, err)
out := HandshakeMessage{}
err = out.DecodeReader(bytes.NewReader([]byte(enc)), 4 * 4096)
assert.NoError(t, err)
assert.Equal(t, 2342, out.ProtocolVersion)
assert.Equal(t, 2, len(out.Extensions))
assert.Equal(t, "foo", out.Extensions[0])
assert.Equal(t, "bar 2342", out.Extensions[1])
}
func TestDoHandshakeVersion_ErrorOnDifferentVersions(t *testing.T) {
srv, client, err := socketpair.SocketPair()
if err != nil {
t.Fatal(err)
}
defer srv.Close()
defer client.Close()
srvErrCh := make(chan error)
go func() {
srvErrCh <- DoHandshakeVersion(srv, time.Now().Add(2*time.Second), 1)
}()
err = DoHandshakeVersion(client, time.Now().Add(2*time.Second), 2)
t.Log(err)
assert.Error(t, err)
assert.True(t, strings.Contains(err.Error(), "version"))
srvErr := <-srvErrCh
t.Log(srvErr)
assert.Error(t, srvErr)
assert.True(t, strings.Contains(srvErr.Error(), "version"))
}
func TestDoHandshakeCurrentVersion(t *testing.T) {
srv, client, err := socketpair.SocketPair()
if err != nil {
t.Fatal(err)
}
defer srv.Close()
defer client.Close()
srvErrCh := make(chan error)
go func() {
srvErrCh <- DoHandshakeVersion(srv, time.Now().Add(2*time.Second), 1)
}()
err = DoHandshakeVersion(client, time.Now().Add(2*time.Second), 1)
assert.Nil(t, err)
assert.Nil(t, <-srvErrCh)
}

View File

@ -0,0 +1,147 @@
package serve
import (
"github.com/pkg/errors"
"github.com/zrepl/zrepl/config"
"github.com/zrepl/zrepl/daemon/transport"
"net"
"github.com/zrepl/zrepl/daemon/streamrpcconfig"
"github.com/problame/go-streamrpc"
"context"
"github.com/zrepl/zrepl/logger"
"github.com/zrepl/zrepl/zfs"
"time"
)
type contextKey int
const contextKeyLog contextKey = 0
type Logger = logger.Logger
func WithLogger(ctx context.Context, log Logger) context.Context {
return context.WithValue(ctx, contextKeyLog, log)
}
func getLogger(ctx context.Context) Logger {
if log, ok := ctx.Value(contextKeyLog).(Logger); ok {
return log
}
return logger.NewNullLogger()
}
type AuthenticatedConn interface {
net.Conn
// ClientIdentity must be a string that satisfies ValidateClientIdentity
ClientIdentity() string
}
// A client identity must be a single component in a ZFS filesystem path
func ValidateClientIdentity(in string) (err error) {
path, err := zfs.NewDatasetPath(in)
if err != nil {
return err
}
if path.Length() != 1 {
return errors.New("client identity must be a single path comonent (not empty, no '/')")
}
return nil
}
type authConn struct {
net.Conn
clientIdentity string
}
var _ AuthenticatedConn = authConn{}
func (c authConn) ClientIdentity() string {
if err := ValidateClientIdentity(c.clientIdentity); err != nil {
panic(err)
}
return c.clientIdentity
}
// like net.Listener, but with an AuthenticatedConn instead of net.Conn
type AuthenticatedListener interface {
Addr() (net.Addr)
Accept(ctx context.Context) (AuthenticatedConn, error)
Close() error
}
type ListenerFactory interface {
Listen() (AuthenticatedListener, error)
}
type HandshakeListenerFactory struct {
lf ListenerFactory
}
func (lf HandshakeListenerFactory) Listen() (AuthenticatedListener, error) {
l, err := lf.lf.Listen()
if err != nil {
return nil, err
}
return HandshakeListener{l}, nil
}
type HandshakeListener struct {
l AuthenticatedListener
}
func (l HandshakeListener) Addr() (net.Addr) { return l.l.Addr() }
func (l HandshakeListener) Close() error { return l.l.Close() }
func (l HandshakeListener) Accept(ctx context.Context) (AuthenticatedConn, error) {
conn, err := l.l.Accept(ctx)
if err != nil {
return nil, err
}
dl, ok := ctx.Deadline()
if !ok {
dl = time.Now().Add(10*time.Second) // FIXME constant
}
if err := transport.DoHandshakeCurrentVersion(conn, dl); err != nil {
conn.Close()
return nil, err
}
return conn, nil
}
func FromConfig(g *config.Global, in config.ServeEnum) (lf ListenerFactory, conf *streamrpc.ConnConfig, _ error) {
var (
lfError, rpcErr error
)
switch v := in.Ret.(type) {
case *config.TCPServe:
lf, lfError = TCPListenerFactoryFromConfig(g, v)
conf, rpcErr = streamrpcconfig.FromDaemonConfig(g, v.RPC)
case *config.TLSServe:
lf, lfError = TLSListenerFactoryFromConfig(g, v)
conf, rpcErr = streamrpcconfig.FromDaemonConfig(g, v.RPC)
case *config.StdinserverServer:
lf, lfError = MultiStdinserverListenerFactoryFromConfig(g, v)
conf, rpcErr = streamrpcconfig.FromDaemonConfig(g, v.RPC)
case *config.LocalServe:
lf, lfError = LocalListenerFactoryFromConfig(g, v)
conf, rpcErr = streamrpcconfig.FromDaemonConfig(g, v.RPC)
default:
return nil, nil, errors.Errorf("internal error: unknown serve type %T", v)
}
if lfError != nil {
return nil, nil, lfError
}
if rpcErr != nil {
return nil, nil, rpcErr
}
lf = HandshakeListenerFactory{lf}
return lf, conf, nil
}

View File

@ -0,0 +1,187 @@
package serve
import (
"context"
"fmt"
"github.com/zrepl/zrepl/config"
"github.com/zrepl/zrepl/util/socketpair"
"net"
"sync"
)
var localListeners struct {
m map[string]*LocalListener // listenerName -> listener
init sync.Once
mtx sync.Mutex
}
func GetLocalListener(listenerName string) (*LocalListener) {
localListeners.init.Do(func() {
localListeners.m = make(map[string]*LocalListener)
})
localListeners.mtx.Lock()
defer localListeners.mtx.Unlock()
l, ok := localListeners.m[listenerName]
if !ok {
l = newLocalListener()
localListeners.m[listenerName] = l
}
return l
}
type connectRequest struct {
clientIdentity string
callback chan connectResult
}
type connectResult struct {
conn net.Conn
err error
}
type LocalListener struct {
connects chan connectRequest
}
func newLocalListener() *LocalListener {
return &LocalListener{
connects: make(chan connectRequest),
}
}
// Connect to the LocalListener from a client with identity clientIdentity
func (l *LocalListener) Connect(dialCtx context.Context, clientIdentity string) (conn net.Conn, err error) {
// place request
req := connectRequest{
clientIdentity: clientIdentity,
callback: make(chan connectResult),
}
select {
case l.connects <- req:
case <-dialCtx.Done():
return nil, dialCtx.Err()
}
// wait for listener response
select {
case connRes := <- req.callback:
conn, err = connRes.conn, connRes.err
case <-dialCtx.Done():
close(req.callback) // sending to the channel afterwards will panic, the listener has to catch this
conn, err = nil, dialCtx.Err()
}
return conn, err
}
type localAddr struct {
S string
}
func (localAddr) Network() string { return "local" }
func (a localAddr) String() string { return a.S }
func (l *LocalListener) Addr() (net.Addr) { return localAddr{"<listening>"} }
type localConn struct {
net.Conn
clientIdentity string
}
func (l localConn) ClientIdentity() string { return l.clientIdentity }
func (l *LocalListener) Accept(ctx context.Context) (AuthenticatedConn, error) {
respondToRequest := func(req connectRequest, res connectResult) (err error) {
getLogger(ctx).
WithField("res.conn", res.conn).WithField("res.err", res.err).
Debug("responding to client request")
defer func() {
errv := recover()
getLogger(ctx).WithField("recover_err", errv).
Debug("panic on send to client callback, likely a legitimate client-side timeout")
}()
select {
case req.callback <- res:
err = nil
default:
err = fmt.Errorf("client-provided callback did block on send")
}
close(req.callback)
return err
}
getLogger(ctx).Debug("waiting for local client connect requests")
var req connectRequest
select {
case req = <-l.connects:
case <-ctx.Done():
return nil, ctx.Err()
}
getLogger(ctx).WithField("client_identity", req.clientIdentity).Debug("got connect request")
if req.clientIdentity == "" {
res := connectResult{nil, fmt.Errorf("client identity must not be empty")}
if err := respondToRequest(req, res); err != nil {
return nil, err
}
return nil, fmt.Errorf("client connected with empty client identity")
}
getLogger(ctx).Debug("creating socketpair")
left, right, err := socketpair.SocketPair()
if err != nil {
res := connectResult{nil, fmt.Errorf("server error: %s", err)}
if respErr := respondToRequest(req, res); respErr != nil {
// returning the socketpair error properly is more important than the error sent to the client
getLogger(ctx).WithError(respErr).Error("error responding to client")
}
return nil, err
}
getLogger(ctx).Debug("responding with left side of socketpair")
res := connectResult{left, nil}
if err := respondToRequest(req, res); err != nil {
getLogger(ctx).WithError(err).Error("error responding to client")
if err := left.Close(); err != nil {
getLogger(ctx).WithError(err).Error("cannot close left side of socketpair")
}
if err := right.Close(); err != nil {
getLogger(ctx).WithError(err).Error("cannot close right side of socketpair")
}
return nil, err
}
return localConn{right, req.clientIdentity}, nil
}
func (l *LocalListener) Close() error {
// FIXME: make sure concurrent Accepts return with error, and further Accepts return that error, too
// Example impl: for each accept, do context.WithCancel, and store the cancel in a list
// When closing, set a member variable to state=closed, make sure accept will exit early
// and then call all cancels in the list
// The code path from Accept entry over check if state=closed to list entry must be protected by a mutex.
return nil
}
type LocalListenerFactory struct {
listenerName string
}
func LocalListenerFactoryFromConfig(g *config.Global, in *config.LocalServe) (f *LocalListenerFactory, err error) {
if in.ListenerName == "" {
return nil, fmt.Errorf("ListenerName must not be empty")
}
return &LocalListenerFactory{listenerName: in.ListenerName}, nil
}
func (lf *LocalListenerFactory) Listen() (AuthenticatedListener, error) {
return GetLocalListener(lf.listenerName), nil
}

View File

@ -0,0 +1,158 @@
package serve
import (
"github.com/problame/go-netssh"
"github.com/zrepl/zrepl/config"
"github.com/zrepl/zrepl/daemon/nethelpers"
"io"
"net"
"path"
"time"
"context"
"github.com/pkg/errors"
"sync/atomic"
)
type StdinserverListenerFactory struct {
ClientIdentities []string
Sockdir string
}
func MultiStdinserverListenerFactoryFromConfig(g *config.Global, in *config.StdinserverServer) (f *multiStdinserverListenerFactory, err error) {
for _, ci := range in.ClientIdentities {
if err := ValidateClientIdentity(ci); err != nil {
return nil, errors.Wrapf(err, "invalid client identity %q", ci)
}
}
f = &multiStdinserverListenerFactory{
ClientIdentities: in.ClientIdentities,
Sockdir: g.Serve.StdinServer.SockDir,
}
return
}
type multiStdinserverListenerFactory struct {
ClientIdentities []string
Sockdir string
}
func (f *multiStdinserverListenerFactory) Listen() (AuthenticatedListener, error) {
return multiStdinserverListenerFromClientIdentities(f.Sockdir, f.ClientIdentities)
}
type multiStdinserverAcceptRes struct {
conn AuthenticatedConn
err error
}
type MultiStdinserverListener struct {
listeners []*stdinserverListener
accepts chan multiStdinserverAcceptRes
closed int32
}
// client identities must be validated
func multiStdinserverListenerFromClientIdentities(sockdir string, cis []string) (*MultiStdinserverListener, error) {
listeners := make([]*stdinserverListener, 0, len(cis))
var err error
for _, ci := range cis {
sockpath := path.Join(sockdir, ci)
l := &stdinserverListener{clientIdentity: ci}
if err = nethelpers.PreparePrivateSockpath(sockpath); err != nil {
break
}
if l.l, err = netssh.Listen(sockpath); err != nil {
break
}
listeners = append(listeners, l)
}
if err != nil {
for _, l := range listeners {
l.Close() // FIXME error reporting?
}
return nil, err
}
return &MultiStdinserverListener{listeners: listeners}, nil
}
func (m *MultiStdinserverListener) Accept(ctx context.Context) (AuthenticatedConn, error){
if m.accepts == nil {
m.accepts = make(chan multiStdinserverAcceptRes, len(m.listeners))
for i := range m.listeners {
go func(i int) {
for atomic.LoadInt32(&m.closed) == 0 {
conn, err := m.listeners[i].Accept(context.TODO())
m.accepts <- multiStdinserverAcceptRes{conn, err}
}
}(i)
}
}
res := <- m.accepts
return res.conn, res.err
}
func (m *MultiStdinserverListener) Addr() (net.Addr) {
return netsshAddr{}
}
func (m *MultiStdinserverListener) Close() error {
atomic.StoreInt32(&m.closed, 1)
var oneErr error
for _, l := range m.listeners {
if err := l.Close(); err != nil && oneErr == nil {
oneErr = err
}
}
return oneErr
}
// a single stdinserverListener (part of multiStinserverListener)
type stdinserverListener struct {
l *netssh.Listener
clientIdentity string
}
func (l stdinserverListener) Addr() net.Addr {
return netsshAddr{}
}
func (l stdinserverListener) Accept(ctx context.Context) (AuthenticatedConn, error) {
c, err := l.l.Accept()
if err != nil {
return nil, err
}
return netsshConnToNetConnAdatper{c, l.clientIdentity}, nil
}
func (l stdinserverListener) Close() (err error) {
return l.l.Close()
}
type netsshAddr struct{}
func (netsshAddr) Network() string { return "netssh" }
func (netsshAddr) String() string { return "???" }
type netsshConnToNetConnAdatper struct {
io.ReadWriteCloser // works for both netssh.SSHConn and netssh.ServeConn
clientIdentity string
}
func (a netsshConnToNetConnAdatper) ClientIdentity() string { return a.clientIdentity }
func (netsshConnToNetConnAdatper) LocalAddr() net.Addr { return netsshAddr{} }
func (netsshConnToNetConnAdatper) RemoteAddr() net.Addr { return netsshAddr{} }
// FIXME log warning once!
func (netsshConnToNetConnAdatper) SetDeadline(t time.Time) error { return nil }
func (netsshConnToNetConnAdatper) SetReadDeadline(t time.Time) error { return nil }
func (netsshConnToNetConnAdatper) SetWriteDeadline(t time.Time) error { return nil }

View File

@ -0,0 +1,91 @@
package serve
import (
"github.com/zrepl/zrepl/config"
"net"
"github.com/pkg/errors"
"context"
)
type TCPListenerFactory struct {
address *net.TCPAddr
clientMap *ipMap
}
type ipMapEntry struct {
ip net.IP
ident string
}
type ipMap struct {
entries []ipMapEntry
}
func ipMapFromConfig(clients map[string]string) (*ipMap, error) {
entries := make([]ipMapEntry, 0, len(clients))
for clientIPString, clientIdent := range clients {
clientIP := net.ParseIP(clientIPString)
if clientIP == nil {
return nil, errors.Errorf("cannot parse client IP %q", clientIPString)
}
if err := ValidateClientIdentity(clientIdent); err != nil {
return nil, errors.Wrapf(err,"invalid client identity for IP %q", clientIPString)
}
entries = append(entries, ipMapEntry{clientIP, clientIdent})
}
return &ipMap{entries: entries}, nil
}
func (m *ipMap) Get(ip net.IP) (string, error) {
for _, e := range m.entries {
if e.ip.Equal(ip) {
return e.ident, nil
}
}
return "", errors.Errorf("no identity mapping for client IP %s", ip)
}
func TCPListenerFactoryFromConfig(c *config.Global, in *config.TCPServe) (*TCPListenerFactory, error) {
addr, err := net.ResolveTCPAddr("tcp", in.Listen)
if err != nil {
return nil, errors.Wrap(err, "cannot parse listen address")
}
clientMap, err := ipMapFromConfig(in.Clients)
if err != nil {
return nil, errors.Wrap(err, "cannot parse client IP map")
}
lf := &TCPListenerFactory{
address: addr,
clientMap: clientMap,
}
return lf, nil
}
func (f *TCPListenerFactory) Listen() (AuthenticatedListener, error) {
l, err := net.ListenTCP("tcp", f.address)
if err != nil {
return nil, err
}
return &TCPAuthListener{l, f.clientMap}, nil
}
type TCPAuthListener struct {
*net.TCPListener
clientMap *ipMap
}
func (f *TCPAuthListener) Accept(ctx context.Context) (AuthenticatedConn, error) {
nc, err := f.TCPListener.Accept()
if err != nil {
return nil, err
}
clientIP := nc.RemoteAddr().(*net.TCPAddr).IP
clientIdent, err := f.clientMap.Get(clientIP)
if err != nil {
getLogger(ctx).WithField("ip", clientIP).Error("client IP not in client map")
nc.Close()
return nil, err
}
return authConn{nc, clientIdent}, nil
}

Some files were not shown because too many files have changed in this diff Show More