2017-09-10 16:13:05 +02:00
|
|
|
package cmd
|
|
|
|
|
|
|
|
import (
|
|
|
|
"time"
|
|
|
|
|
2017-09-13 23:27:18 +02:00
|
|
|
"context"
|
2017-12-26 22:05:20 +01:00
|
|
|
"fmt"
|
2017-09-10 16:13:05 +02:00
|
|
|
"github.com/mitchellh/mapstructure"
|
|
|
|
"github.com/pkg/errors"
|
2017-09-11 15:45:10 +02:00
|
|
|
"github.com/zrepl/zrepl/util"
|
2018-06-20 20:20:37 +02:00
|
|
|
"github.com/zrepl/zrepl/cmd/replication"
|
|
|
|
"github.com/problame/go-streamrpc"
|
2017-09-10 16:13:05 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
type PullJob struct {
|
2017-09-13 23:27:18 +02:00
|
|
|
Name string
|
2018-08-08 13:09:51 +02:00
|
|
|
Connect streamrpc.Connecter
|
2017-09-13 23:27:18 +02:00
|
|
|
Interval time.Duration
|
|
|
|
Mapping *DatasetMapFilter
|
|
|
|
// constructed from mapping during parsing
|
|
|
|
pruneFilter *DatasetMapFilter
|
|
|
|
SnapshotPrefix string
|
2017-09-10 16:13:05 +02:00
|
|
|
InitialReplPolicy InitialReplPolicy
|
|
|
|
Prune PrunePolicy
|
2017-09-11 15:45:10 +02:00
|
|
|
Debug JobDebugSettings
|
2017-12-26 19:36:27 +01:00
|
|
|
|
|
|
|
task *Task
|
2017-09-10 16:13:05 +02:00
|
|
|
}
|
|
|
|
|
2017-09-17 18:20:05 +02:00
|
|
|
func parsePullJob(c JobParsingContext, name string, i map[string]interface{}) (j *PullJob, err error) {
|
2017-09-10 16:13:05 +02:00
|
|
|
|
|
|
|
var asMap struct {
|
|
|
|
Connect map[string]interface{}
|
2017-09-13 23:27:18 +02:00
|
|
|
Interval string
|
2017-09-10 16:13:05 +02:00
|
|
|
Mapping map[string]string
|
|
|
|
InitialReplPolicy string `mapstructure:"initial_repl_policy"`
|
|
|
|
Prune map[string]interface{}
|
|
|
|
SnapshotPrefix string `mapstructure:"snapshot_prefix"`
|
2017-09-11 15:45:10 +02:00
|
|
|
Debug map[string]interface{}
|
2017-09-10 16:13:05 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if err = mapstructure.Decode(i, &asMap); err != nil {
|
|
|
|
err = errors.Wrap(err, "mapstructure error")
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
j = &PullJob{Name: name}
|
|
|
|
|
|
|
|
j.Connect, err = parseSSHStdinserverConnecter(asMap.Connect)
|
|
|
|
if err != nil {
|
|
|
|
err = errors.Wrap(err, "cannot parse 'connect'")
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2017-10-05 15:12:50 +02:00
|
|
|
if j.Interval, err = parsePostitiveDuration(asMap.Interval); err != nil {
|
2017-09-13 23:27:18 +02:00
|
|
|
err = errors.Wrap(err, "cannot parse 'interval'")
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2017-09-10 16:13:05 +02:00
|
|
|
j.Mapping, err = parseDatasetMapFilter(asMap.Mapping, false)
|
|
|
|
if err != nil {
|
|
|
|
err = errors.Wrap(err, "cannot parse 'mapping'")
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2017-09-13 23:27:18 +02:00
|
|
|
if j.pruneFilter, err = j.Mapping.InvertedFilter(); err != nil {
|
|
|
|
err = errors.Wrap(err, "cannot automatically invert 'mapping' for prune job")
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2017-09-10 16:13:05 +02:00
|
|
|
j.InitialReplPolicy, err = parseInitialReplPolicy(asMap.InitialReplPolicy, DEFAULT_INITIAL_REPL_POLICY)
|
|
|
|
if err != nil {
|
|
|
|
err = errors.Wrap(err, "cannot parse 'initial_repl_policy'")
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2017-09-13 23:27:18 +02:00
|
|
|
if j.SnapshotPrefix, err = parseSnapshotPrefix(asMap.SnapshotPrefix); err != nil {
|
2017-09-10 16:13:05 +02:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2018-02-17 20:48:31 +01:00
|
|
|
if j.Prune, err = parsePrunePolicy(asMap.Prune, false); err != nil {
|
2017-09-10 16:13:05 +02:00
|
|
|
err = errors.Wrap(err, "cannot parse prune policy")
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2017-09-11 15:45:10 +02:00
|
|
|
if err = mapstructure.Decode(asMap.Debug, &j.Debug); err != nil {
|
|
|
|
err = errors.Wrap(err, "cannot parse 'debug'")
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2018-08-08 13:09:51 +02:00
|
|
|
if j.Debug.Conn.ReadDump != "" || j.Debug.Conn.WriteDump != "" {
|
|
|
|
logConnecter := logNetConnConnecter{
|
|
|
|
Connecter: j.Connect,
|
|
|
|
ReadDump: j.Debug.Conn.ReadDump,
|
|
|
|
WriteDump: j.Debug.Conn.WriteDump,
|
|
|
|
}
|
|
|
|
j.Connect = logConnecter
|
|
|
|
}
|
|
|
|
|
2017-09-10 16:13:05 +02:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
func (j *PullJob) JobName() string {
|
|
|
|
return j.Name
|
|
|
|
}
|
|
|
|
|
2018-04-05 22:22:55 +02:00
|
|
|
func (j *PullJob) JobType() JobType { return JobTypePull }
|
|
|
|
|
2017-09-13 23:27:18 +02:00
|
|
|
func (j *PullJob) JobStart(ctx context.Context) {
|
2017-09-11 15:45:10 +02:00
|
|
|
|
2017-09-16 21:12:26 +02:00
|
|
|
log := ctx.Value(contextKeyLog).(Logger)
|
2017-09-23 17:52:29 +02:00
|
|
|
defer log.Info("exiting")
|
2018-04-05 22:18:22 +02:00
|
|
|
j.task = NewTask("main", j, log)
|
2017-12-26 22:05:20 +01:00
|
|
|
|
|
|
|
// j.task is idle here idle here
|
2017-09-16 21:12:26 +02:00
|
|
|
|
2017-09-13 23:27:18 +02:00
|
|
|
ticker := time.NewTicker(j.Interval)
|
2017-12-26 22:05:20 +01:00
|
|
|
for {
|
2018-07-15 17:36:53 +02:00
|
|
|
begin := time.Now()
|
2017-12-26 22:05:20 +01:00
|
|
|
j.doRun(ctx)
|
2018-07-15 17:36:53 +02:00
|
|
|
duration := time.Now().Sub(begin)
|
|
|
|
if duration > j.Interval {
|
|
|
|
j.task.Log().
|
|
|
|
WithField("actual_duration", duration).
|
|
|
|
WithField("configured_interval", j.Interval).
|
|
|
|
Warn("pull run took longer than configured interval")
|
|
|
|
}
|
2017-12-26 22:05:20 +01:00
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
|
|
|
j.task.Log().WithError(ctx.Err()).Info("context")
|
|
|
|
return
|
|
|
|
case <-ticker.C:
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2017-09-13 23:27:18 +02:00
|
|
|
|
2018-06-20 20:20:37 +02:00
|
|
|
var STREAMRPC_CONFIG = &streamrpc.ConnConfig{ // FIXME oversight and configurability
|
|
|
|
RxHeaderMaxLen: 4096,
|
|
|
|
RxStructuredMaxLen: 4096 * 4096,
|
|
|
|
RxStreamMaxChunkSize: 4096 * 4096,
|
|
|
|
TxChunkSize: 4096 * 4096,
|
2018-08-08 13:09:51 +02:00
|
|
|
RxTimeout: streamrpc.Timeout{
|
|
|
|
Progress: 10*time.Second,
|
|
|
|
},
|
|
|
|
TxTimeout: streamrpc.Timeout{
|
|
|
|
Progress: 10*time.Second,
|
|
|
|
},
|
2018-07-15 17:36:53 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
func (j *PullJob) doRun(ctx context.Context) {
|
2018-06-20 20:20:37 +02:00
|
|
|
|
2018-07-15 17:36:53 +02:00
|
|
|
j.task.Enter("run")
|
|
|
|
defer j.task.Finish()
|
|
|
|
|
|
|
|
// FIXME
|
|
|
|
clientConf := &streamrpc.ClientConfig{
|
|
|
|
ConnConfig: STREAMRPC_CONFIG,
|
2017-09-11 15:45:10 +02:00
|
|
|
}
|
|
|
|
|
2018-08-08 13:09:51 +02:00
|
|
|
client, err := streamrpc.NewClient(j.Connect, clientConf)
|
2018-07-15 17:36:53 +02:00
|
|
|
defer client.Close()
|
|
|
|
|
2017-12-26 21:37:48 +01:00
|
|
|
j.task.Enter("pull")
|
2018-06-20 20:20:37 +02:00
|
|
|
|
2018-07-15 17:36:53 +02:00
|
|
|
sender := RemoteEndpoint{client}
|
|
|
|
|
2018-06-20 20:20:37 +02:00
|
|
|
puller, err := NewReceiverEndpoint(
|
|
|
|
j.Mapping,
|
|
|
|
NewPrefixFilter(j.SnapshotPrefix),
|
|
|
|
)
|
|
|
|
if err != nil {
|
|
|
|
j.task.Log().WithError(err).Error("error creating receiver endpoint")
|
|
|
|
j.task.Finish()
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
replicator := replication.NewIncrementalPathReplicator()
|
2018-07-15 17:36:53 +02:00
|
|
|
ctx = context.WithValue(ctx, replication.ContextKeyLog, j.task.Log())
|
|
|
|
ctx = context.WithValue(ctx, streamrpc.ContextKeyLogger, j.task.Log())
|
|
|
|
ctx, enforceDeadline := util.ContextWithOptionalDeadline(ctx)
|
|
|
|
|
|
|
|
// Try replicating each file system regardless of j.Interval
|
|
|
|
// (this does not solve the underlying problem that j.Interval is too short,
|
|
|
|
// but it covers the case of initial replication taking longer than all
|
|
|
|
// incremental replications afterwards)
|
|
|
|
allTriedOnce := make(chan struct{})
|
|
|
|
replicationBegin := time.Now()
|
|
|
|
go func() {
|
|
|
|
select {
|
|
|
|
case <-allTriedOnce:
|
|
|
|
enforceDeadline(replicationBegin.Add(j.Interval))
|
|
|
|
case <-ctx.Done():
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
replication.Replicate(ctx, replication.NewEndpointPairPull(sender, puller), replicator, allTriedOnce)
|
|
|
|
|
2018-06-20 20:20:37 +02:00
|
|
|
|
2018-07-15 17:36:53 +02:00
|
|
|
client.Close()
|
2017-12-26 21:37:48 +01:00
|
|
|
j.task.Finish()
|
2017-09-13 23:27:18 +02:00
|
|
|
|
2017-12-26 22:05:20 +01:00
|
|
|
j.task.Enter("prune")
|
2017-12-26 19:36:27 +01:00
|
|
|
pruner, err := j.Pruner(j.task, PrunePolicySideDefault, false)
|
2017-09-16 21:12:26 +02:00
|
|
|
if err != nil {
|
2017-12-26 22:05:20 +01:00
|
|
|
j.task.Log().WithError(err).Error("error creating pruner")
|
|
|
|
} else {
|
|
|
|
pruner.Run(ctx)
|
2017-09-13 23:27:18 +02:00
|
|
|
}
|
2017-12-26 22:05:20 +01:00
|
|
|
j.task.Finish()
|
2017-09-13 23:27:18 +02:00
|
|
|
|
|
|
|
}
|
|
|
|
|
2017-12-24 15:34:41 +01:00
|
|
|
func (j *PullJob) JobStatus(ctxt context.Context) (*JobStatus, error) {
|
2017-12-26 19:36:27 +01:00
|
|
|
return &JobStatus{Tasks: []*TaskStatus{j.task.Status()}}, nil
|
2017-12-24 15:34:41 +01:00
|
|
|
}
|
|
|
|
|
2017-12-26 19:36:27 +01:00
|
|
|
func (j *PullJob) Pruner(task *Task, side PrunePolicySide, dryRun bool) (p Pruner, err error) {
|
2017-09-16 21:12:26 +02:00
|
|
|
p = Pruner{
|
2017-12-26 19:36:27 +01:00
|
|
|
task,
|
2017-09-16 21:12:26 +02:00
|
|
|
time.Now(),
|
|
|
|
dryRun,
|
|
|
|
j.pruneFilter,
|
|
|
|
j.SnapshotPrefix,
|
|
|
|
j.Prune,
|
|
|
|
}
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2018-06-20 20:20:37 +02:00
|
|
|
func closeRPCWithTimeout(task *Task, remote RemoteEndpoint, timeout time.Duration, goodbye string) {
|
2017-09-13 23:27:18 +02:00
|
|
|
|
2017-12-26 22:05:20 +01:00
|
|
|
task.Log().Info("closing rpc connection")
|
|
|
|
|
|
|
|
ch := make(chan error)
|
|
|
|
go func() {
|
2018-07-15 17:36:53 +02:00
|
|
|
remote.Close()
|
|
|
|
ch <- nil
|
2017-12-26 22:05:20 +01:00
|
|
|
close(ch)
|
|
|
|
}()
|
|
|
|
|
|
|
|
var err error
|
|
|
|
select {
|
|
|
|
case <-time.After(timeout):
|
|
|
|
err = fmt.Errorf("timeout exceeded (%s)", timeout)
|
|
|
|
case closeRequestErr := <-ch:
|
|
|
|
err = closeRequestErr
|
|
|
|
}
|
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
task.Log().WithError(err).Error("error closing connection")
|
|
|
|
}
|
|
|
|
return
|
2017-09-10 16:13:05 +02:00
|
|
|
}
|