mirror of
https://github.com/zrepl/zrepl.git
synced 2025-01-14 10:19:14 +01:00
507 lines
13 KiB
Go
507 lines
13 KiB
Go
package cmd
|
|
|
|
import (
|
|
"fmt"
|
|
"io"
|
|
"os"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/spf13/cobra"
|
|
"github.com/zrepl/zrepl/jobrun"
|
|
"github.com/zrepl/zrepl/rpc"
|
|
"github.com/zrepl/zrepl/util"
|
|
"github.com/zrepl/zrepl/zfs"
|
|
)
|
|
|
|
var runArgs struct {
|
|
job string
|
|
once bool
|
|
}
|
|
|
|
var RunCmd = &cobra.Command{
|
|
Use: "run",
|
|
Short: "run push & pull replication",
|
|
Run: cmdRun,
|
|
}
|
|
|
|
var PushCmd = &cobra.Command{
|
|
Use: "push",
|
|
Short: "run push job (first positional argument)",
|
|
Run: cmdPush,
|
|
}
|
|
|
|
var PullCmd = &cobra.Command{
|
|
Use: "pull",
|
|
Short: "run pull job (first positional argument)",
|
|
Run: cmdPull,
|
|
}
|
|
|
|
func init() {
|
|
RootCmd.AddCommand(RunCmd)
|
|
RunCmd.Flags().BoolVar(&runArgs.once, "once", false, "run jobs only once, regardless of configured repeat behavior")
|
|
RunCmd.Flags().StringVar(&runArgs.job, "job", "", "run only the given job")
|
|
|
|
RootCmd.AddCommand(PushCmd)
|
|
RootCmd.AddCommand(PullCmd)
|
|
}
|
|
|
|
func cmdPush(cmd *cobra.Command, args []string) {
|
|
|
|
if len(args) != 1 {
|
|
log.Printf("must specify exactly one job as positional argument")
|
|
os.Exit(1)
|
|
}
|
|
job, ok := conf.Pushs[args[0]]
|
|
if !ok {
|
|
log.Printf("could not find push job %s", args[0])
|
|
os.Exit(1)
|
|
}
|
|
if err := jobPush(job, log); err != nil {
|
|
log.Printf("error doing push: %s", err)
|
|
os.Exit(1)
|
|
}
|
|
|
|
}
|
|
|
|
func cmdPull(cmd *cobra.Command, args []string) {
|
|
|
|
if len(args) != 1 {
|
|
log.Printf("must specify exactly one job as positional argument")
|
|
os.Exit(1)
|
|
}
|
|
job, ok := conf.Pulls[args[0]]
|
|
if !ok {
|
|
log.Printf("could not find pull job %s", args[0])
|
|
os.Exit(1)
|
|
}
|
|
|
|
if err := jobPull(job, log); err != nil {
|
|
log.Printf("error doing pull: %s", err)
|
|
os.Exit(1)
|
|
}
|
|
|
|
}
|
|
|
|
func cmdRun(cmd *cobra.Command, args []string) {
|
|
|
|
var wg sync.WaitGroup
|
|
wg.Add(1)
|
|
go func() {
|
|
defer wg.Done()
|
|
runner.Start()
|
|
}()
|
|
|
|
jobs := make([]jobrun.Job, len(conf.Pulls)+len(conf.Pushs))
|
|
i := 0
|
|
for _, pull := range conf.Pulls {
|
|
jobs[i] = jobrun.Job{
|
|
Name: fmt.Sprintf("pull.%d", i),
|
|
RepeatStrategy: pull.RepeatStrategy,
|
|
RunFunc: func(log jobrun.Logger) error {
|
|
log.Printf("doing pull: %v", pull)
|
|
return jobPull(pull, log)
|
|
},
|
|
}
|
|
i++
|
|
}
|
|
for _, push := range conf.Pushs {
|
|
jobs[i] = jobrun.Job{
|
|
Name: fmt.Sprintf("push.%d", i),
|
|
RepeatStrategy: push.RepeatStrategy,
|
|
RunFunc: func(log jobrun.Logger) error {
|
|
log.Printf("doing push: %v", push)
|
|
return jobPush(push, log)
|
|
},
|
|
}
|
|
i++
|
|
}
|
|
|
|
for _, j := range jobs {
|
|
if runArgs.once {
|
|
j.RepeatStrategy = jobrun.NoRepeatStrategy{}
|
|
}
|
|
if runArgs.job != "" {
|
|
if runArgs.job == j.Name {
|
|
runner.AddJob(j)
|
|
break
|
|
}
|
|
continue
|
|
}
|
|
runner.AddJob(j)
|
|
}
|
|
|
|
for {
|
|
select {
|
|
case job := <-runner.NotificationChan():
|
|
log.Printf("job %s reported error: %v\n", job.Name, job.LastError)
|
|
}
|
|
}
|
|
|
|
wg.Wait()
|
|
|
|
}
|
|
|
|
type localPullACL struct{}
|
|
|
|
func (a localPullACL) Filter(p *zfs.DatasetPath) (pass bool, err error) {
|
|
return true, nil
|
|
}
|
|
|
|
func jobPull(pull *Pull, log jobrun.Logger) (err error) {
|
|
|
|
if lt, ok := pull.From.Transport.(LocalTransport); ok {
|
|
|
|
lt.SetHandler(Handler{
|
|
Logger: log,
|
|
// Allow access to any dataset since we control what mapping
|
|
// is passed to the pull routine.
|
|
// All local datasets will be passed to its Map() function,
|
|
// but only those for which a mapping exists will actually be pulled.
|
|
// We can pay this small performance penalty for now.
|
|
PullACL: localPullACL{},
|
|
})
|
|
pull.From.Transport = lt
|
|
log.Printf("fixing up local transport: %#v", pull.From.Transport)
|
|
}
|
|
|
|
var remote rpc.RPCRequester
|
|
|
|
if remote, err = pull.From.Transport.Connect(log); err != nil {
|
|
return
|
|
}
|
|
|
|
defer closeRPCWithTimeout(log, remote, time.Second*10, "")
|
|
|
|
return doPull(PullContext{remote, log, pull.Mapping, pull.InitialReplPolicy})
|
|
}
|
|
|
|
func jobPush(push *Push, log jobrun.Logger) (err error) {
|
|
|
|
if _, ok := push.To.Transport.(LocalTransport); ok {
|
|
panic("no support for local pushs")
|
|
}
|
|
|
|
var remote rpc.RPCRequester
|
|
if remote, err = push.To.Transport.Connect(log); err != nil {
|
|
return err
|
|
}
|
|
|
|
defer closeRPCWithTimeout(log, remote, time.Second*10, "")
|
|
|
|
log.Printf("building handler for PullMeRequest")
|
|
handler := Handler{
|
|
Logger: log,
|
|
PullACL: push.Filter,
|
|
SinkMappingFunc: nil, // no need for that in the handler for PullMe
|
|
}
|
|
log.Printf("handler: %#v", handler)
|
|
|
|
r := rpc.PullMeRequest{
|
|
InitialReplPolicy: push.InitialReplPolicy,
|
|
}
|
|
log.Printf("doing PullMeRequest: %#v", r)
|
|
|
|
if err = remote.PullMeRequest(r, handler); err != nil {
|
|
log.Printf("PullMeRequest failed: %s", err)
|
|
return
|
|
}
|
|
|
|
log.Printf("push job finished")
|
|
return
|
|
|
|
}
|
|
|
|
func closeRPCWithTimeout(log Logger, remote rpc.RPCRequester, timeout time.Duration, goodbye string) {
|
|
log.Printf("closing rpc connection")
|
|
|
|
ch := make(chan error)
|
|
go func() {
|
|
ch <- remote.CloseRequest(rpc.CloseRequest{goodbye})
|
|
close(ch)
|
|
}()
|
|
|
|
var err error
|
|
select {
|
|
case <-time.After(timeout):
|
|
err = fmt.Errorf("timeout exceeded (%s)", timeout)
|
|
case closeRequestErr := <-ch:
|
|
err = closeRequestErr
|
|
}
|
|
|
|
if err != nil {
|
|
log.Printf("error closing connection: %s", err)
|
|
err = remote.ForceClose()
|
|
if err != nil {
|
|
log.Printf("error force-closing connection: %s", err)
|
|
}
|
|
}
|
|
return
|
|
}
|
|
|
|
type PullContext struct {
|
|
Remote rpc.RPCRequester
|
|
Log Logger
|
|
Mapping DatasetMapping
|
|
InitialReplPolicy rpc.InitialReplPolicy
|
|
}
|
|
|
|
func doPull(pull PullContext) (err error) {
|
|
|
|
remote := pull.Remote
|
|
log := pull.Log
|
|
|
|
log.Printf("requesting remote filesystem list")
|
|
fsr := rpc.FilesystemRequest{}
|
|
var remoteFilesystems []*zfs.DatasetPath
|
|
if remoteFilesystems, err = remote.FilesystemRequest(fsr); err != nil {
|
|
return
|
|
}
|
|
|
|
log.Printf("map remote filesystems to local paths and determine order for per-filesystem sync")
|
|
type RemoteLocalMapping struct {
|
|
Remote *zfs.DatasetPath
|
|
Local *zfs.DatasetPath
|
|
}
|
|
replMapping := make(map[string]RemoteLocalMapping, len(remoteFilesystems))
|
|
localTraversal := zfs.NewDatasetPathForest()
|
|
for fs := range remoteFilesystems {
|
|
var err error
|
|
var localFs *zfs.DatasetPath
|
|
localFs, err = pull.Mapping.Map(remoteFilesystems[fs])
|
|
if err != nil {
|
|
if err != NoMatchError {
|
|
err := fmt.Errorf("error mapping %s: %s", remoteFilesystems[fs], err)
|
|
log.Printf("%s", err)
|
|
return err
|
|
}
|
|
continue
|
|
}
|
|
log.Printf("%s => %s", remoteFilesystems[fs].ToString(), localFs.ToString())
|
|
m := RemoteLocalMapping{remoteFilesystems[fs], localFs}
|
|
replMapping[m.Local.ToString()] = m
|
|
localTraversal.Add(m.Local)
|
|
}
|
|
|
|
log.Printf("build cache for already present local filesystem state")
|
|
localFilesystemState, err := zfs.ZFSListFilesystemState()
|
|
if err != nil {
|
|
log.Printf("error requesting local filesystem state: %s", err)
|
|
return err
|
|
}
|
|
|
|
log.Printf("start per-filesystem sync")
|
|
localTraversal.WalkTopDown(func(v zfs.DatasetPathVisit) bool {
|
|
|
|
if v.FilledIn {
|
|
if _, exists := localFilesystemState[v.Path.ToString()]; exists {
|
|
// No need to verify if this is a placeholder or not. It is sufficient
|
|
// to know we can add child filesystems to it
|
|
return true
|
|
}
|
|
log.Printf("creating placeholder filesystem %s", v.Path.ToString())
|
|
err = zfs.ZFSCreatePlaceholderFilesystem(v.Path)
|
|
if err != nil {
|
|
err = fmt.Errorf("aborting, cannot create placeholder filesystem %s: %s", v.Path, err)
|
|
return false
|
|
}
|
|
return true
|
|
}
|
|
|
|
m, ok := replMapping[v.Path.ToString()]
|
|
if !ok {
|
|
panic("internal inconsistency: replMapping should contain mapping for any path that was not filled in by WalkTopDown()")
|
|
}
|
|
|
|
log := func(format string, args ...interface{}) {
|
|
log.Printf("[%s => %s]: %s", m.Remote.ToString(), m.Local.ToString(), fmt.Sprintf(format, args...))
|
|
}
|
|
|
|
log("examing local filesystem state")
|
|
localState, localExists := localFilesystemState[m.Local.ToString()]
|
|
var versions []zfs.FilesystemVersion
|
|
switch {
|
|
case !localExists:
|
|
log("local filesystem does not exist")
|
|
case localState.Placeholder:
|
|
log("local filesystem is marked as placeholder")
|
|
default:
|
|
log("local filesystem exists")
|
|
log("requesting local filesystem versions")
|
|
if versions, err = zfs.ZFSListFilesystemVersions(m.Local, nil); err != nil {
|
|
log("cannot get local filesystem versions: %s", err)
|
|
return false
|
|
}
|
|
}
|
|
|
|
log("requesting remote filesystem versions")
|
|
var theirVersions []zfs.FilesystemVersion
|
|
theirVersions, err = remote.FilesystemVersionsRequest(rpc.FilesystemVersionsRequest{
|
|
Filesystem: m.Remote,
|
|
})
|
|
if err != nil {
|
|
log("error requesting remote filesystem versions: %s", err)
|
|
log("stopping replication for all filesystems mapped as children of %s", m.Local.ToString())
|
|
return false
|
|
}
|
|
|
|
log("computing diff between remote and local filesystem versions")
|
|
diff := zfs.MakeFilesystemDiff(versions, theirVersions)
|
|
log("%s", diff)
|
|
|
|
if localState.Placeholder && diff.Conflict != zfs.ConflictAllRight {
|
|
panic("internal inconsistency: local placeholder implies ConflictAllRight")
|
|
}
|
|
|
|
switch diff.Conflict {
|
|
case zfs.ConflictAllRight:
|
|
|
|
log("performing initial sync, following policy: '%s'", pull.InitialReplPolicy)
|
|
|
|
if pull.InitialReplPolicy != rpc.InitialReplPolicyMostRecent {
|
|
panic(fmt.Sprintf("policy '%s' not implemented", pull.InitialReplPolicy))
|
|
}
|
|
|
|
snapsOnly := make([]zfs.FilesystemVersion, 0, len(diff.MRCAPathRight))
|
|
for s := range diff.MRCAPathRight {
|
|
if diff.MRCAPathRight[s].Type == zfs.Snapshot {
|
|
snapsOnly = append(snapsOnly, diff.MRCAPathRight[s])
|
|
}
|
|
}
|
|
|
|
if len(snapsOnly) < 1 {
|
|
log("cannot perform initial sync: no remote snapshots. stopping...")
|
|
return false
|
|
}
|
|
|
|
r := rpc.InitialTransferRequest{
|
|
Filesystem: m.Remote,
|
|
FilesystemVersion: snapsOnly[len(snapsOnly)-1],
|
|
}
|
|
|
|
log("requesting snapshot stream for %s", r.FilesystemVersion)
|
|
|
|
var stream io.Reader
|
|
if stream, err = remote.InitialTransferRequest(r); err != nil {
|
|
log("error requesting initial transfer: %s", err)
|
|
return false
|
|
}
|
|
log("received initial transfer request response")
|
|
|
|
log("invoking zfs receive")
|
|
watcher := util.IOProgressWatcher{Reader: stream}
|
|
watcher.KickOff(1*time.Second, func(p util.IOProgress) {
|
|
log("progress on receive operation: %v bytes received", p.TotalRX)
|
|
})
|
|
|
|
recvArgs := []string{"-u"}
|
|
if localState.Placeholder {
|
|
log("receive with forced rollback to replace placeholder filesystem")
|
|
recvArgs = append(recvArgs, "-F")
|
|
}
|
|
|
|
if err = zfs.ZFSRecv(m.Local, &watcher, recvArgs...); err != nil {
|
|
log("error receiving stream: %s", err)
|
|
return false
|
|
}
|
|
log("finished receiving stream, %v bytes total", watcher.Progress().TotalRX)
|
|
|
|
log("configuring properties of received filesystem")
|
|
if err = zfs.ZFSSet(m.Local, "readonly", "on"); err != nil {
|
|
|
|
}
|
|
|
|
log("finished initial transfer")
|
|
return true
|
|
|
|
case zfs.ConflictIncremental:
|
|
|
|
if len(diff.IncrementalPath) < 2 {
|
|
log("remote and local are in sync")
|
|
return true
|
|
}
|
|
|
|
log("following incremental path from diff")
|
|
var pathRx uint64
|
|
|
|
for i := 0; i < len(diff.IncrementalPath)-1; i++ {
|
|
|
|
from, to := diff.IncrementalPath[i], diff.IncrementalPath[i+1]
|
|
|
|
log := func(format string, args ...interface{}) {
|
|
log("[%v/%v][%s => %s]: %s", i+1, len(diff.IncrementalPath)-1,
|
|
from.Name, to.Name, fmt.Sprintf(format, args...))
|
|
}
|
|
|
|
log("requesting incremental snapshot stream")
|
|
r := rpc.IncrementalTransferRequest{
|
|
Filesystem: m.Remote,
|
|
From: from,
|
|
To: to,
|
|
}
|
|
var stream io.Reader
|
|
if stream, err = remote.IncrementalTransferRequest(r); err != nil {
|
|
log("error requesting incremental snapshot stream: %s", err)
|
|
return false
|
|
}
|
|
|
|
log("invoking zfs receive")
|
|
watcher := util.IOProgressWatcher{Reader: stream}
|
|
watcher.KickOff(1*time.Second, func(p util.IOProgress) {
|
|
log("progress on receive operation: %v bytes received", p.TotalRX)
|
|
})
|
|
|
|
if err = zfs.ZFSRecv(m.Local, &watcher); err != nil {
|
|
log("error receiving stream: %s", err)
|
|
return false
|
|
}
|
|
|
|
totalRx := watcher.Progress().TotalRX
|
|
pathRx += totalRx
|
|
log("finished incremental transfer, %v bytes total", totalRx)
|
|
|
|
}
|
|
|
|
log("finished following incremental path, %v bytes total", pathRx)
|
|
return true
|
|
|
|
case zfs.ConflictNoCommonAncestor:
|
|
|
|
log("remote and local filesystem have snapshots, but no common one")
|
|
log("perform manual replication to establish a common snapshot history")
|
|
log("remote versions:")
|
|
for _, v := range diff.MRCAPathRight {
|
|
log(" %s (GUID %v)", v, v.Guid)
|
|
}
|
|
log("local versions:")
|
|
for _, v := range diff.MRCAPathLeft {
|
|
log(" %s (GUID %v)", v, v.Guid)
|
|
}
|
|
return false
|
|
|
|
case zfs.ConflictDiverged:
|
|
|
|
log("remote and local filesystem share a history but have diverged")
|
|
log("perform manual replication or delete snapshots on the receiving" +
|
|
"side to establish an incremental replication parse")
|
|
log("remote-only versions:")
|
|
for _, v := range diff.MRCAPathRight {
|
|
log(" %s (GUID %v)", v, v.Guid)
|
|
}
|
|
log("local-only versions:")
|
|
for _, v := range diff.MRCAPathLeft {
|
|
log(" %s (GUID %v)", v, v.Guid)
|
|
}
|
|
return false
|
|
|
|
}
|
|
|
|
panic("implementation error: this should not be reached")
|
|
return false
|
|
|
|
})
|
|
|
|
return
|
|
|
|
}
|