pruner + docs: less confusing type names, some comments, better docs for keep: not_replicated

fixes https://github.com/zrepl/zrepl/issues/524
This commit is contained in:
Christian Schwarz 2021-10-10 21:11:38 +02:00
parent 5104ad3d0b
commit 1f0f2f8569
3 changed files with 24 additions and 15 deletions

View File

@ -147,7 +147,7 @@ type alwaysUpToDateReplicationCursorHistory struct {
target pruner.Target target pruner.Target
} }
var _ pruner.History = (*alwaysUpToDateReplicationCursorHistory)(nil) var _ pruner.Sender = (*alwaysUpToDateReplicationCursorHistory)(nil)
func (h alwaysUpToDateReplicationCursorHistory) ReplicationCursor(ctx context.Context, req *pdu.ReplicationCursorReq) (*pdu.ReplicationCursorRes, error) { func (h alwaysUpToDateReplicationCursorHistory) ReplicationCursor(ctx context.Context, req *pdu.ReplicationCursorReq) (*pdu.ReplicationCursorRes, error) {
fsvReq := &pdu.ListFilesystemVersionsReq{ fsvReq := &pdu.ListFilesystemVersionsReq{

View File

@ -19,12 +19,20 @@ import (
"github.com/zrepl/zrepl/util/envconst" "github.com/zrepl/zrepl/util/envconst"
) )
// The sender in the replication setup.
// The pruner uses the Sender to determine which of the Target's filesystems need to be pruned.
// Also, it asks the Sender about the replication cursor of each filesystem
// to enable the 'not_replicated' pruning rule.
//
// Try to keep it compatible with github.com/zrepl/zrepl/endpoint.Endpoint // Try to keep it compatible with github.com/zrepl/zrepl/endpoint.Endpoint
type History interface { type Sender interface {
ReplicationCursor(ctx context.Context, req *pdu.ReplicationCursorReq) (*pdu.ReplicationCursorRes, error) ReplicationCursor(ctx context.Context, req *pdu.ReplicationCursorReq) (*pdu.ReplicationCursorRes, error)
ListFilesystems(ctx context.Context, req *pdu.ListFilesystemReq) (*pdu.ListFilesystemRes, error) ListFilesystems(ctx context.Context, req *pdu.ListFilesystemReq) (*pdu.ListFilesystemRes, error)
} }
// The pruning target, i.e., on which snapshots are destroyed.
// This can be a replication sender or receiver.
//
// Try to keep it compatible with github.com/zrepl/zrepl/endpoint.Endpoint // Try to keep it compatible with github.com/zrepl/zrepl/endpoint.Endpoint
type Target interface { type Target interface {
ListFilesystems(ctx context.Context, req *pdu.ListFilesystemReq) (*pdu.ListFilesystemRes, error) ListFilesystems(ctx context.Context, req *pdu.ListFilesystemReq) (*pdu.ListFilesystemRes, error)
@ -48,7 +56,7 @@ func GetLogger(ctx context.Context) Logger {
type args struct { type args struct {
ctx context.Context ctx context.Context
target Target target Target
receiver History sender Sender
rules []pruning.KeepRule rules []pruning.KeepRule
retryWait time.Duration retryWait time.Duration
considerSnapAtCursorReplicated bool considerSnapAtCursorReplicated bool
@ -132,12 +140,12 @@ func NewPrunerFactory(in config.PruningSenderReceiver, promPruneSecs *prometheus
return f, nil return f, nil
} }
func (f *PrunerFactory) BuildSenderPruner(ctx context.Context, target Target, receiver History) *Pruner { func (f *PrunerFactory) BuildSenderPruner(ctx context.Context, target Target, sender Sender) *Pruner {
p := &Pruner{ p := &Pruner{
args: args{ args: args{
context.WithValue(ctx, contextKeyPruneSide, "sender"), context.WithValue(ctx, contextKeyPruneSide, "sender"),
target, target,
receiver, sender,
f.senderRules, f.senderRules,
f.retryWait, f.retryWait,
f.considerSnapAtCursorReplicated, f.considerSnapAtCursorReplicated,
@ -148,12 +156,12 @@ func (f *PrunerFactory) BuildSenderPruner(ctx context.Context, target Target, re
return p return p
} }
func (f *PrunerFactory) BuildReceiverPruner(ctx context.Context, target Target, receiver History) *Pruner { func (f *PrunerFactory) BuildReceiverPruner(ctx context.Context, target Target, sender Sender) *Pruner {
p := &Pruner{ p := &Pruner{
args: args{ args: args{
context.WithValue(ctx, contextKeyPruneSide, "receiver"), context.WithValue(ctx, contextKeyPruneSide, "receiver"),
target, target,
receiver, sender,
f.receiverRules, f.receiverRules,
f.retryWait, f.retryWait,
false, // senseless here anyways false, // senseless here anyways
@ -164,12 +172,12 @@ func (f *PrunerFactory) BuildReceiverPruner(ctx context.Context, target Target,
return p return p
} }
func (f *LocalPrunerFactory) BuildLocalPruner(ctx context.Context, target Target, receiver History) *Pruner { func (f *LocalPrunerFactory) BuildLocalPruner(ctx context.Context, target Target, history Sender) *Pruner {
p := &Pruner{ p := &Pruner{
args: args{ args: args{
context.WithValue(ctx, contextKeyPruneSide, "local"), context.WithValue(ctx, contextKeyPruneSide, "local"),
target, target,
receiver, history,
f.keepRules, f.keepRules,
f.retryWait, f.retryWait,
false, // considerSnapAtCursorReplicated is not relevant for local pruning false, // considerSnapAtCursorReplicated is not relevant for local pruning
@ -343,9 +351,9 @@ func (s snapshot) Date() time.Time { return s.date }
func doOneAttempt(a *args, u updater) { func doOneAttempt(a *args, u updater) {
ctx, target, receiver := a.ctx, a.target, a.receiver ctx, target, sender := a.ctx, a.target, a.sender
sfssres, err := receiver.ListFilesystems(ctx, &pdu.ListFilesystemReq{}) sfssres, err := sender.ListFilesystems(ctx, &pdu.ListFilesystemReq{})
if err != nil { if err != nil {
u(func(p *Pruner) { u(func(p *Pruner) {
p.state = PlanErr p.state = PlanErr
@ -410,7 +418,7 @@ tfss_loop:
rcReq := &pdu.ReplicationCursorReq{ rcReq := &pdu.ReplicationCursorReq{
Filesystem: tfs.Path, Filesystem: tfs.Path,
} }
rc, err := receiver.ReplicationCursor(ctx, rcReq) rc, err := sender.ReplicationCursor(ctx, rcReq)
if err != nil { if err != nil {
pfsPlanErrAndLog(err, "cannot get replication cursor bookmark") pfsPlanErrAndLog(err, "cannot get replication cursor bookmark")
continue tfss_loop continue tfss_loop
@ -456,7 +464,7 @@ tfss_loop:
}) })
} }
if preCursor { if preCursor {
pfsPlanErrAndLog(fmt.Errorf("replication cursor not found in prune target filesystem versions"), "") pfsPlanErrAndLog(fmt.Errorf("prune target has no snapshot that corresponds to sender replication cursor bookmark"), "")
continue tfss_loop continue tfss_loop
} }

View File

@ -67,8 +67,9 @@ Policy ``not_replicated``
... ...
``not_replicated`` keeps all snapshots that have not been replicated to the receiving side. ``not_replicated`` keeps all snapshots that have not been replicated to the receiving side.
It only makes sense to specify this rule on a sender (source or push job). It only makes sense to specify this rule for the ``keep_sender``.
The state required to evaluate this rule is stored in the :ref:`replication cursor bookmark <replication-cursor-and-last-received-hold>` on the sending side. The reason is that, by definition, all snapshots on the receiver have already been replicated to there from the sender.
To determine whether a sender-side snapshot has already been replicated, zrepl uses the :ref:`replication cursor bookmark <replication-cursor-and-last-received-hold>` which corresponds to the most recent successfully replicated snapshot.
.. _prune-keep-retention-grid: .. _prune-keep-retention-grid: