mirror of
https://github.com/zrepl/zrepl.git
synced 2025-06-21 02:08:45 +02:00
Merge branch 'problame/overlapping-dataset-hierarchy-improvements' into 'master'
closes #153
This commit is contained in:
commit
0ab62f197a
@ -5,16 +5,18 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net"
|
||||||
|
"net/http"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/pkg/errors"
|
"github.com/pkg/errors"
|
||||||
"github.com/prometheus/client_golang/prometheus"
|
"github.com/prometheus/client_golang/prometheus"
|
||||||
"github.com/zrepl/zrepl/daemon/job"
|
"github.com/zrepl/zrepl/daemon/job"
|
||||||
"github.com/zrepl/zrepl/daemon/nethelpers"
|
"github.com/zrepl/zrepl/daemon/nethelpers"
|
||||||
"github.com/zrepl/zrepl/logger"
|
"github.com/zrepl/zrepl/logger"
|
||||||
"github.com/zrepl/zrepl/version"
|
"github.com/zrepl/zrepl/version"
|
||||||
"io"
|
"github.com/zrepl/zrepl/zfs"
|
||||||
"net"
|
|
||||||
"net/http"
|
|
||||||
"time"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type controlJob struct {
|
type controlJob struct {
|
||||||
@ -38,6 +40,8 @@ func (j *controlJob) Name() string { return jobNameControl }
|
|||||||
|
|
||||||
func (j *controlJob) Status() *job.Status { return &job.Status{Type: job.TypeInternal} }
|
func (j *controlJob) Status() *job.Status { return &job.Status{Type: job.TypeInternal} }
|
||||||
|
|
||||||
|
func (j *controlJob) OwnedDatasetSubtreeRoot() (p *zfs.DatasetPath, ok bool) { return nil, false }
|
||||||
|
|
||||||
var promControl struct {
|
var promControl struct {
|
||||||
requestBegin *prometheus.CounterVec
|
requestBegin *prometheus.CounterVec
|
||||||
requestFinished *prometheus.HistogramVec
|
requestFinished *prometheus.HistogramVec
|
||||||
|
@ -299,6 +299,15 @@ func (j *ActiveSide) Status() *Status {
|
|||||||
return &Status{Type: t, JobSpecific: s}
|
return &Status{Type: t, JobSpecific: s}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (j *ActiveSide) OwnedDatasetSubtreeRoot() (rfs *zfs.DatasetPath, ok bool) {
|
||||||
|
pull, ok := j.mode.(*modePull)
|
||||||
|
if !ok {
|
||||||
|
_ = j.mode.(*modePush) // make sure we didn't introduce a new job type
|
||||||
|
return nil, false
|
||||||
|
}
|
||||||
|
return pull.rootFS.Copy(), true
|
||||||
|
}
|
||||||
|
|
||||||
func (j *ActiveSide) Run(ctx context.Context) {
|
func (j *ActiveSide) Run(ctx context.Context) {
|
||||||
log := GetLogger(ctx)
|
log := GetLogger(ctx)
|
||||||
ctx = logging.WithSubsystemLoggers(ctx, log)
|
ctx = logging.WithSubsystemLoggers(ctx, log)
|
||||||
|
@ -2,6 +2,9 @@ package job
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"sort"
|
||||||
|
"strings"
|
||||||
|
|
||||||
"github.com/pkg/errors"
|
"github.com/pkg/errors"
|
||||||
"github.com/zrepl/zrepl/config"
|
"github.com/zrepl/zrepl/config"
|
||||||
)
|
)
|
||||||
@ -18,6 +21,22 @@ func JobsFromConfig(c *config.Config) ([]Job, error) {
|
|||||||
}
|
}
|
||||||
js[i] = j
|
js[i] = j
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// receiving-side root filesystems must not overlap
|
||||||
|
{
|
||||||
|
rfss := make([]string, len(js))
|
||||||
|
for i, j := range js {
|
||||||
|
jrfs, ok := j.OwnedDatasetSubtreeRoot()
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
rfss[i] = jrfs.ToString()
|
||||||
|
}
|
||||||
|
if err := validateReceivingSidesDoNotOverlap(rfss); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return js, nil
|
return js, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -74,3 +93,27 @@ func buildJob(c *config.Global, in config.JobEnum) (j Job, err error) {
|
|||||||
return j, nil
|
return j, nil
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func validateReceivingSidesDoNotOverlap(receivingRootFSs []string) error {
|
||||||
|
if len(receivingRootFSs) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
rfss := make([]string, len(receivingRootFSs))
|
||||||
|
copy(rfss, receivingRootFSs)
|
||||||
|
sort.Slice(rfss, func(i, j int) bool {
|
||||||
|
return strings.Compare(rfss[i], rfss[j]) == -1
|
||||||
|
})
|
||||||
|
// idea:
|
||||||
|
// no path in rfss must be prefix of another
|
||||||
|
//
|
||||||
|
// rfss is now lexicographically sorted, which means that
|
||||||
|
// if i is prefix of j, i < j (in lexicographical order)
|
||||||
|
// thus,
|
||||||
|
// if any i is prefix of i+n (n >= 1), there is overlap
|
||||||
|
for i := 0; i < len(rfss)-1; i++ {
|
||||||
|
if strings.HasPrefix(rfss[i+1], rfss[i]) {
|
||||||
|
return fmt.Errorf("receiving jobs with overlapping root filesystems are forbidden")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
42
daemon/job/build_jobs_test.go
Normal file
42
daemon/job/build_jobs_test.go
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
package job
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestValidateReceivingSidesDoNotOverlap(t *testing.T) {
|
||||||
|
type testCase struct {
|
||||||
|
err bool
|
||||||
|
input []string
|
||||||
|
}
|
||||||
|
tcs := []testCase{
|
||||||
|
{false, nil},
|
||||||
|
{false, []string{}},
|
||||||
|
{false, []string{""}}, // not our job to determine valid paths
|
||||||
|
{false, []string{"a"}},
|
||||||
|
{false, []string{"some/path"}},
|
||||||
|
{false, []string{"zroot/sink1", "zroot/sink2", "zroot/sink3"}},
|
||||||
|
{true, []string{"zroot/b", "zroot/b"}},
|
||||||
|
{true, []string{"zroot/foo", "zroot/foo/bar", "zroot/baz"}},
|
||||||
|
{false, []string{"a/x", "b/x"}},
|
||||||
|
{false, []string{"a", "b"}},
|
||||||
|
{true, []string{"a", "a"}},
|
||||||
|
{true, []string{"a/x/y", "a/x"}},
|
||||||
|
{true, []string{"a/x", "a/x/y"}},
|
||||||
|
{true, []string{"a/x", "b/x", "a/x/y"}},
|
||||||
|
{true, []string{"a", "a/b", "a/c", "a/b"}},
|
||||||
|
{true, []string{"a/b", "a/c", "a/b", "a/d", "a/c"}},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tc := range tcs {
|
||||||
|
t.Logf("input: %v", tc.input)
|
||||||
|
err := validateReceivingSidesDoNotOverlap(tc.input)
|
||||||
|
if tc.err {
|
||||||
|
assert.Error(t, err)
|
||||||
|
} else {
|
||||||
|
assert.NoError(t, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -4,8 +4,10 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
"github.com/prometheus/client_golang/prometheus"
|
"github.com/prometheus/client_golang/prometheus"
|
||||||
"github.com/zrepl/zrepl/logger"
|
"github.com/zrepl/zrepl/logger"
|
||||||
|
"github.com/zrepl/zrepl/zfs"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Logger = logger.Logger
|
type Logger = logger.Logger
|
||||||
@ -33,6 +35,9 @@ type Job interface {
|
|||||||
Run(ctx context.Context)
|
Run(ctx context.Context)
|
||||||
Status() *Status
|
Status() *Status
|
||||||
RegisterMetrics(registerer prometheus.Registerer)
|
RegisterMetrics(registerer prometheus.Registerer)
|
||||||
|
// Jobs that return a subtree of the dataset hierarchy
|
||||||
|
// must return the root of that subtree as rfs and ok = true
|
||||||
|
OwnedDatasetSubtreeRoot() (rfs *zfs.DatasetPath, ok bool)
|
||||||
}
|
}
|
||||||
|
|
||||||
type Type string
|
type Type string
|
||||||
|
@ -103,6 +103,15 @@ func (s *PassiveSide) Status() *Status {
|
|||||||
return &Status{Type: s.mode.Type()} // FIXME PassiveStatus
|
return &Status{Type: s.mode.Type()} // FIXME PassiveStatus
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (j *PassiveSide) OwnedDatasetSubtreeRoot() (rfs *zfs.DatasetPath, ok bool) {
|
||||||
|
sink, ok := j.mode.(*modeSink)
|
||||||
|
if !ok {
|
||||||
|
_ = j.mode.(*modeSource) // make sure we didn't introduce a new job type
|
||||||
|
return nil, false
|
||||||
|
}
|
||||||
|
return sink.rootDataset.Copy(), true
|
||||||
|
}
|
||||||
|
|
||||||
func (*PassiveSide) RegisterMetrics(registerer prometheus.Registerer) {}
|
func (*PassiveSide) RegisterMetrics(registerer prometheus.Registerer) {}
|
||||||
|
|
||||||
func (j *PassiveSide) Run(ctx context.Context) {
|
func (j *PassiveSide) Run(ctx context.Context) {
|
||||||
|
@ -77,6 +77,10 @@ func (j *SnapJob) Status() *Status {
|
|||||||
return &Status{Type: t, JobSpecific: s}
|
return &Status{Type: t, JobSpecific: s}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (j *SnapJob) OwnedDatasetSubtreeRoot() (rfs *zfs.DatasetPath, ok bool) {
|
||||||
|
return nil, false
|
||||||
|
}
|
||||||
|
|
||||||
func (j *SnapJob) Run(ctx context.Context) {
|
func (j *SnapJob) Run(ctx context.Context) {
|
||||||
log := GetLogger(ctx)
|
log := GetLogger(ctx)
|
||||||
ctx = logging.WithSubsystemLoggers(ctx, log)
|
ctx = logging.WithSubsystemLoggers(ctx, log)
|
||||||
|
@ -42,6 +42,8 @@ func (j *prometheusJob) Name() string { return jobNamePrometheus }
|
|||||||
|
|
||||||
func (j *prometheusJob) Status() *job.Status { return &job.Status{Type: job.TypeInternal} }
|
func (j *prometheusJob) Status() *job.Status { return &job.Status{Type: job.TypeInternal} }
|
||||||
|
|
||||||
|
func (j *prometheusJob) OwnedDatasetSubtreeRoot() (p *zfs.DatasetPath, ok bool) { return nil, false }
|
||||||
|
|
||||||
func (j *prometheusJob) RegisterMetrics(registerer prometheus.Registerer) {}
|
func (j *prometheusJob) RegisterMetrics(registerer prometheus.Registerer) {}
|
||||||
|
|
||||||
func (j *prometheusJob) Run(ctx context.Context) {
|
func (j *prometheusJob) Run(ctx context.Context) {
|
||||||
|
@ -108,6 +108,7 @@ The **replication cursor bookmark** ``#zrepl_replication_cursor`` is kept per fi
|
|||||||
It is a bookmark of the most recent successfully replicated snapshot to the receiving side.
|
It is a bookmark of the most recent successfully replicated snapshot to the receiving side.
|
||||||
It is is used by the :ref:`not_replicated <prune-keep-not-replicated>` keep rule to identify all snapshots that have not yet been replicated to the receiving side.
|
It is is used by the :ref:`not_replicated <prune-keep-not-replicated>` keep rule to identify all snapshots that have not yet been replicated to the receiving side.
|
||||||
Regardless of whether that keep rule is used, the bookmark ensures that replication can always continue incrementally.
|
Regardless of whether that keep rule is used, the bookmark ensures that replication can always continue incrementally.
|
||||||
|
Note that there is only one cursor bookmark per filesystem, which prohibits multiple jobs to replicate the same filesystem (:ref:`see below<jobs-multiple-jobs>`).
|
||||||
|
|
||||||
.. _replication-placeholder-property:
|
.. _replication-placeholder-property:
|
||||||
|
|
||||||
@ -172,6 +173,50 @@ Note that you will have to trigger replication manually using the ``zrepl signal
|
|||||||
type: manual
|
type: manual
|
||||||
...
|
...
|
||||||
|
|
||||||
|
.. _jobs-multiple-jobs:
|
||||||
|
|
||||||
|
Multiple Jobs & More than 2 Machines
|
||||||
|
------------------------------------
|
||||||
|
|
||||||
|
.. ATTENTION::
|
||||||
|
|
||||||
|
When using multiple jobs across single or multiple machines, the following rules are critical to avoid race conditions & data loss:
|
||||||
|
|
||||||
|
1. The sets of ZFS filesystems matched by the ``filesystems`` filter fields must be disjoint across all jobs configured on a machine.
|
||||||
|
2. The ZFS filesystem subtrees of jobs with ``root_fs`` must be disjoint.
|
||||||
|
3. Across all zrepl instances on all machines in the replication domain, there must be a 1:1 correspondence between active and passive jobs.
|
||||||
|
|
||||||
|
Explanations & exceptions to above rules are detailed below.
|
||||||
|
|
||||||
|
If you would like to see improvements to multi-job setups, please `open an issue on GitHub <https://github.com/zrepl/zrepl/issues/new>`_.
|
||||||
|
|
||||||
|
No Overlapping
|
||||||
|
~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
Jobs run independently of each other.
|
||||||
|
If two jobs match the same filesystem with their ``filesystems`` filter, they will operate on that filesystem independently and potentially in parallel.
|
||||||
|
For example, if job A prunes snapshots that job B is planning to replicate, the replication will fail because B asssumed the snapshot to still be present.
|
||||||
|
More subtle race conditions can occur with the :ref:`replication cursor bookmark <replication-cursor-bookmark>`, which currently only exists once per filesystem.
|
||||||
|
|
||||||
|
N push jobs to 1 sink
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
The :ref:`sink job <job-sink>` namespaces by client identity.
|
||||||
|
It is thus safe to push to one sink job with different client identities.
|
||||||
|
If the push jobs have the same client identity, the filesystems matched by the push jobs must be disjoint to avoid races.
|
||||||
|
|
||||||
|
N pull jobs from 1 source
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
Multiple pull jobs pulling from the same source have potential for race conditions during pruning:
|
||||||
|
each pull job prunes the source side independently, causing replication-prune and prune-prune races.
|
||||||
|
|
||||||
|
There is currently no way for a pull job to filter which snapshots it should attempt to replicate.
|
||||||
|
Thus, it is not possibe to just manually assert that the prune rules of all pull jobs are disjoint to avoid replication-prune and prune-prune races.
|
||||||
|
|
||||||
|
|
||||||
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
.. _job-push:
|
.. _job-push:
|
||||||
|
|
||||||
@ -217,8 +262,8 @@ Job Type ``sink``
|
|||||||
* - ``serve``
|
* - ``serve``
|
||||||
- |serve-transport|
|
- |serve-transport|
|
||||||
* - ``root_fs``
|
* - ``root_fs``
|
||||||
- ZFS dataset path are received to
|
- ZFS filesystems are received to
|
||||||
``$root_fs/$client_identity``
|
``$root_fs/$client_identity/$source_path``
|
||||||
|
|
||||||
Example config: :sampleconf:`/sink.yml`
|
Example config: :sampleconf:`/sink.yml`
|
||||||
|
|
||||||
@ -240,8 +285,8 @@ Job Type ``pull``
|
|||||||
* - ``connect``
|
* - ``connect``
|
||||||
- |connect-transport|
|
- |connect-transport|
|
||||||
* - ``root_fs``
|
* - ``root_fs``
|
||||||
- ZFS dataset path are received to
|
- ZFS filesystems are received to
|
||||||
``$root_fs/$client_identity``
|
``$root_fs/$source_path``
|
||||||
* - ``interval``
|
* - ``interval``
|
||||||
- | Interval at which to pull from the source job (e.g. ``10m``).
|
- | Interval at which to pull from the source job (e.g. ``10m``).
|
||||||
| ``manual`` disables periodic pulling, replication then only happens on :ref:`wakeup <cli-signal-wakeup>`.
|
| ``manual`` disables periodic pulling, replication then only happens on :ref:`wakeup <cli-signal-wakeup>`.
|
||||||
|
@ -8,6 +8,7 @@ import (
|
|||||||
|
|
||||||
"github.com/pkg/errors"
|
"github.com/pkg/errors"
|
||||||
"github.com/zrepl/zrepl/replication/logic/pdu"
|
"github.com/zrepl/zrepl/replication/logic/pdu"
|
||||||
|
"github.com/zrepl/zrepl/util/chainlock"
|
||||||
"github.com/zrepl/zrepl/zfs"
|
"github.com/zrepl/zrepl/zfs"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -170,13 +171,19 @@ type FSMap interface { // FIXME unused
|
|||||||
type Receiver struct {
|
type Receiver struct {
|
||||||
rootWithoutClientComponent *zfs.DatasetPath
|
rootWithoutClientComponent *zfs.DatasetPath
|
||||||
appendClientIdentity bool
|
appendClientIdentity bool
|
||||||
|
|
||||||
|
recvParentCreationMtx *chainlock.L
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewReceiver(rootDataset *zfs.DatasetPath, appendClientIdentity bool) *Receiver {
|
func NewReceiver(rootDataset *zfs.DatasetPath, appendClientIdentity bool) *Receiver {
|
||||||
if rootDataset.Length() <= 0 {
|
if rootDataset.Length() <= 0 {
|
||||||
panic(fmt.Sprintf("root dataset must not be an empty path: %v", rootDataset))
|
panic(fmt.Sprintf("root dataset must not be an empty path: %v", rootDataset))
|
||||||
}
|
}
|
||||||
return &Receiver{rootWithoutClientComponent: rootDataset.Copy(), appendClientIdentity: appendClientIdentity}
|
return &Receiver{
|
||||||
|
rootWithoutClientComponent: rootDataset.Copy(),
|
||||||
|
appendClientIdentity: appendClientIdentity,
|
||||||
|
recvParentCreationMtx: chainlock.New(),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestClientIdentity(rootFS *zfs.DatasetPath, clientIdentity string) error {
|
func TestClientIdentity(rootFS *zfs.DatasetPath, clientIdentity string) error {
|
||||||
@ -323,7 +330,18 @@ func (s *Receiver) Receive(ctx context.Context, req *pdu.ReceiveReq, receive zfs
|
|||||||
}
|
}
|
||||||
|
|
||||||
// create placeholder parent filesystems as appropriate
|
// create placeholder parent filesystems as appropriate
|
||||||
|
//
|
||||||
|
// Manipulating the ZFS dataset hierarchy must happen exclusively.
|
||||||
|
// TODO: Use fine-grained locking to allow separate clients / requests to pass
|
||||||
|
// through the following section concurrently when operating on disjoint
|
||||||
|
// ZFS dataset hierarchy subtrees.
|
||||||
var visitErr error
|
var visitErr error
|
||||||
|
func() {
|
||||||
|
getLogger(ctx).Debug("begin aquire recvParentCreationMtx")
|
||||||
|
defer s.recvParentCreationMtx.Lock().Unlock()
|
||||||
|
getLogger(ctx).Debug("end aquire recvParentCreationMtx")
|
||||||
|
defer getLogger(ctx).Debug("release recvParentCreationMtx")
|
||||||
|
|
||||||
f := zfs.NewDatasetPathForest()
|
f := zfs.NewDatasetPathForest()
|
||||||
f.Add(lp)
|
f.Add(lp)
|
||||||
getLogger(ctx).Debug("begin tree-walk")
|
getLogger(ctx).Debug("begin tree-walk")
|
||||||
@ -332,14 +350,16 @@ func (s *Receiver) Receive(ctx context.Context, req *pdu.ReceiveReq, receive zfs
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
ph, err := zfs.ZFSGetFilesystemPlaceholderState(v.Path)
|
ph, err := zfs.ZFSGetFilesystemPlaceholderState(v.Path)
|
||||||
|
getLogger(ctx).
|
||||||
|
WithField("fs", v.Path.ToString()).
|
||||||
|
WithField("placeholder_state", fmt.Sprintf("%#v", ph)).
|
||||||
|
WithField("err", fmt.Sprintf("%s", err)).
|
||||||
|
WithField("errType", fmt.Sprintf("%T", err)).
|
||||||
|
Debug("placeholder state for filesystem")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
visitErr = err
|
visitErr = err
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
getLogger(ctx).
|
|
||||||
WithField("fs", v.Path.ToString()).
|
|
||||||
WithField("placeholder_state", fmt.Sprintf("%#v", ph)).
|
|
||||||
Debug("placeholder state for filesystem")
|
|
||||||
|
|
||||||
if !ph.FSExists {
|
if !ph.FSExists {
|
||||||
l := getLogger(ctx).WithField("placeholder_fs", v.Path)
|
l := getLogger(ctx).WithField("placeholder_fs", v.Path)
|
||||||
@ -355,8 +375,8 @@ func (s *Receiver) Receive(ctx context.Context, req *pdu.ReceiveReq, receive zfs
|
|||||||
getLogger(ctx).WithField("filesystem", v.Path.ToString()).Debug("exists")
|
getLogger(ctx).WithField("filesystem", v.Path.ToString()).Debug("exists")
|
||||||
return true // leave this fs as is
|
return true // leave this fs as is
|
||||||
})
|
})
|
||||||
|
}()
|
||||||
getLogger(ctx).WithField("visitErr", visitErr).Debug("complete tree-walk")
|
getLogger(ctx).WithField("visitErr", visitErr).Debug("complete tree-walk")
|
||||||
|
|
||||||
if visitErr != nil {
|
if visitErr != nil {
|
||||||
return nil, visitErr
|
return nil, visitErr
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user