diff --git a/Makefile b/Makefile index 5b3091e..f66192d 100644 --- a/Makefile +++ b/Makefile @@ -17,6 +17,7 @@ SUBPKGS += daemon/streamrpcconfig SUBPKGS += endpoint SUBPKGS += logger SUBPKGS += pruning +SUBPKGS += pruning/retentiongrid SUBPKGS += replication SUBPKGS += replication/fsrep SUBPKGS += replication/pdu diff --git a/config/retentiongrid.go b/config/retentiongrid.go index e208941..58b2ff3 100644 --- a/config/retentiongrid.go +++ b/config/retentiongrid.go @@ -13,7 +13,7 @@ type RetentionIntervalList []RetentionInterval type PruneGrid struct { Type string `yaml:"type"` Grid RetentionIntervalList `yaml:"grid"` - KeepBookmarks string `yaml:"keep_bookmarks"` + Regex string `yaml:"regex"` } type RetentionInterval struct { diff --git a/config/samples/local.yml b/config/samples/local.yml index 01d73f0..5cc6c2c 100644 --- a/config/samples/local.yml +++ b/config/samples/local.yml @@ -25,4 +25,4 @@ jobs: keep_receiver: - type: grid grid: 1x1h(keep=all) | 24x1h | 35x1d | 6x30d - keep_bookmarks: all + regex: "zrepl_.*" \ No newline at end of file diff --git a/config/samples/pull.yml b/config/samples/pull.yml index 4fd162b..4eb5113 100644 --- a/config/samples/pull.yml +++ b/config/samples/pull.yml @@ -17,8 +17,8 @@ jobs: count: 10 - type: grid grid: 1x1h(keep=all) | 24x1h | 14x1d - keep_bookmarks: all + regex: "zrepl_.*" keep_receiver: - type: grid grid: 1x1h(keep=all) | 24x1h | 35x1d | 6x30d - keep_bookmarks: all + regex: "zrepl_.*" \ No newline at end of file diff --git a/config/samples/pull_ssh.yml b/config/samples/pull_ssh.yml index 3e809a6..920f237 100644 --- a/config/samples/pull_ssh.yml +++ b/config/samples/pull_ssh.yml @@ -19,10 +19,10 @@ jobs: count: 10 - type: grid grid: 1x1h(keep=all) | 24x1h | 14x1d - keep_bookmarks: all + regex: "^zrepl_.*" keep_receiver: - type: prefix prefix: keep_ - type: grid grid: 1x1h(keep=all) | 24x1h | 35x1d | 6x30d - keep_bookmarks: all + regex: "^zrepl_.*" \ No newline at end of file diff --git a/config/samples/push.yml b/config/samples/push.yml index b017dad..cf941f0 100644 --- a/config/samples/push.yml +++ b/config/samples/push.yml @@ -18,9 +18,8 @@ jobs: count: 10 - type: grid grid: 1x1h(keep=all) | 24x1h | 14x1d - keep_bookmarks: all - + regex: "^zrepl_.*" keep_receiver: - type: grid grid: 1x1h(keep=all) | 24x1h | 35x1d | 6x30d - keep_bookmarks: all + regex: "^zrepl_.*" \ No newline at end of file diff --git a/pruning/keep_grid.go b/pruning/keep_grid.go new file mode 100644 index 0000000..24da21a --- /dev/null +++ b/pruning/keep_grid.go @@ -0,0 +1,102 @@ +package pruning + +import ( + "fmt" + "github.com/pkg/errors" + "github.com/zrepl/zrepl/config" + "github.com/zrepl/zrepl/pruning/retentiongrid" + "regexp" + "sort" + "time" +) + +// KeepGrid fits snapshots that match a given regex into a retentiongrid.Grid, +// uses the most recent snapshot among those that match the regex as 'now', +// and deletes all snapshots that do not fit the grid specification. +type KeepGrid struct { + retentionGrid *retentiongrid.Grid + re *regexp.Regexp +} + +func NewKeepGrid(in *config.PruneGrid) (p *KeepGrid, err error) { + + if in.Regex == "" { + return nil, fmt.Errorf("Regex must not be empty") + } + re, err := regexp.Compile(in.Regex) + if err != nil { + return nil, errors.Wrap(err, "Regex is invalid") + } + + // Assert intervals are of increasing length (not necessarily required, but indicates config mistake) + lastDuration := time.Duration(0) + for i := range in.Grid { + + if in.Grid[i].Length() < lastDuration { + // If all intervals before were keep=all, this is ok + allPrevKeepCountAll := true + for j := i - 1; allPrevKeepCountAll && j >= 0; j-- { + allPrevKeepCountAll = in.Grid[j].KeepCount() == config.RetentionGridKeepCountAll + } + if allPrevKeepCountAll { + goto isMonotonicIncrease + } + err = errors.New("retention grid interval length must be monotonically increasing") + return + } + isMonotonicIncrease: + lastDuration = in.Grid[i].Length() + + } + + retentionIntervals := make([]retentiongrid.Interval, len(in.Grid)) + for i := range in.Grid { + retentionIntervals[i] = &in.Grid[i] + } + + return &KeepGrid{ + retentiongrid.NewGrid(retentionIntervals), + re, + }, nil +} + +type retentionGridAdaptor struct { + Snapshot +} + +func (a retentionGridAdaptor) LessThan(b retentiongrid.Entry) bool { + return a.Date().Before(b.Date()) +} + +// Prune filters snapshots with the retention grid. +func (p *KeepGrid) KeepRule(snaps []Snapshot) (destroyList []Snapshot) { + + snaps = filterSnapList(snaps, func(snapshot Snapshot) bool { + return p.re.MatchString(snapshot.Name()) + }) + if len(snaps) == 0 { + return nil + } + + // Build adaptors for retention grid + adaptors := make([]retentiongrid.Entry, 0) + for i := range snaps { + adaptors = append(adaptors, retentionGridAdaptor{snaps[i]}) + } + + // determine 'now' edge + sort.SliceStable(adaptors, func(i, j int) bool { + return adaptors[i].LessThan(adaptors[j]) + }) + now := adaptors[len(adaptors)-1].Date() + + // Evaluate retention grid + _, removea := p.retentionGrid.FitEntries(now, adaptors) + + // Revert adaptors + destroyList = make([]Snapshot, len(removea)) + for i := range removea { + destroyList[i] = removea[i].(retentionGridAdaptor).Snapshot + } + return destroyList +} diff --git a/pruning/pruning.go b/pruning/pruning.go index eb575c2..2c4dec9 100644 --- a/pruning/pruning.go +++ b/pruning/pruning.go @@ -61,6 +61,8 @@ func RuleFromConfig(in config.PruningEnum) (KeepRule, error) { return NewKeepLastN(v.Count) case *config.PruneKeepRegex: return NewKeepRegex(v.Regex) + case *config.PruneGrid: + return NewKeepGrid(v) default: return nil, fmt.Errorf("unknown keep rule type %T", v) } diff --git a/pruning/retentiongrid/config_prune_grid.go b/pruning/retentiongrid/config_prune_grid.go deleted file mode 100644 index de1c6ba..0000000 --- a/pruning/retentiongrid/config_prune_grid.go +++ /dev/null @@ -1,156 +0,0 @@ -package retentiongrid - -import ( - "github.com/pkg/errors" - "github.com/zrepl/zrepl/config" - "github.com/zrepl/zrepl/zfs" - "math" - "sort" - "strconv" - "time" -) - -type GridPrunePolicy struct { - retentionGrid *retentionGrid - keepBookmarks int -} - -const GridPrunePolicyMaxBookmarksKeepAll = -1 - -type retentionGridAdaptor struct { - zfs.FilesystemVersion -} - -func (a retentionGridAdaptor) Date() time.Time { - return a.Creation -} - -func (a retentionGridAdaptor) LessThan(b RetentionGridEntry) bool { - return a.CreateTXG < b.(retentionGridAdaptor).CreateTXG -} - -// Prune filters snapshots with the retention grid. -// Bookmarks are deleted such that keepBookmarks are kept in the end. -// The oldest bookmarks are removed first. -func (p *GridPrunePolicy) Prune(_ *zfs.DatasetPath, versions []zfs.FilesystemVersion) (keep, remove []zfs.FilesystemVersion, err error) { - skeep, sremove := p.pruneSnapshots(versions) - keep, remove = p.pruneBookmarks(skeep) - remove = append(remove, sremove...) - return keep, remove, nil -} - -func (p *GridPrunePolicy) pruneSnapshots(versions []zfs.FilesystemVersion) (keep, remove []zfs.FilesystemVersion) { - - // Build adaptors for retention grid - keep = []zfs.FilesystemVersion{} - adaptors := make([]RetentionGridEntry, 0) - for fsv := range versions { - if versions[fsv].Type != zfs.Snapshot { - keep = append(keep, versions[fsv]) - continue - } - adaptors = append(adaptors, retentionGridAdaptor{versions[fsv]}) - } - - sort.SliceStable(adaptors, func(i, j int) bool { - return adaptors[i].LessThan(adaptors[j]) - }) - now := adaptors[len(adaptors)-1].Date() - - // Evaluate retention grid - keepa, removea := p.retentionGrid.FitEntries(now, adaptors) - - // Revert adaptors - for i := range keepa { - keep = append(keep, keepa[i].(retentionGridAdaptor).FilesystemVersion) - } - remove = make([]zfs.FilesystemVersion, len(removea)) - for i := range removea { - remove[i] = removea[i].(retentionGridAdaptor).FilesystemVersion - } - return - -} - -func (p *GridPrunePolicy) pruneBookmarks(versions []zfs.FilesystemVersion) (keep, remove []zfs.FilesystemVersion) { - - if p.keepBookmarks == GridPrunePolicyMaxBookmarksKeepAll { - return versions, []zfs.FilesystemVersion{} - } - - keep = []zfs.FilesystemVersion{} - bookmarks := make([]zfs.FilesystemVersion, 0) - for fsv := range versions { - if versions[fsv].Type != zfs.Bookmark { - keep = append(keep, versions[fsv]) - continue - } - bookmarks = append(bookmarks, versions[fsv]) - } - - if len(bookmarks) == 0 { - return keep, []zfs.FilesystemVersion{} - } - if len(bookmarks) < p.keepBookmarks { - keep = append(keep, bookmarks...) - return keep, []zfs.FilesystemVersion{} - } - - // NOTE: sorting descending by descending by createtxg <=> sorting ascending wrt creation time - sort.SliceStable(bookmarks, func(i, j int) bool { - return (bookmarks[i].CreateTXG > bookmarks[j].CreateTXG) - }) - - keep = append(keep, bookmarks[:p.keepBookmarks]...) - remove = bookmarks[p.keepBookmarks:] - - return keep, remove -} - -func ParseGridPrunePolicy(in config.PruneGrid, willSeeBookmarks bool) (p *GridPrunePolicy, err error) { - - const KeepBookmarksAllString = "all" - - // Assert intervals are of increasing length (not necessarily required, but indicates config mistake) - lastDuration := time.Duration(0) - for i := range in.Grid { - - if in.Grid[i].Length() < lastDuration { - // If all intervals before were keep=all, this is ok - allPrevKeepCountAll := true - for j := i - 1; allPrevKeepCountAll && j >= 0; j-- { - allPrevKeepCountAll = in.Grid[j].KeepCount() == config.RetentionGridKeepCountAll - } - if allPrevKeepCountAll { - goto isMonotonicIncrease - } - err = errors.New("retention grid interval length must be monotonically increasing") - return - } - isMonotonicIncrease: - lastDuration = in.Grid[i].Length() - - } - - // Parse keepBookmarks - keepBookmarks := 0 - if in.KeepBookmarks == KeepBookmarksAllString || (in.KeepBookmarks == "" && !willSeeBookmarks) { - keepBookmarks = GridPrunePolicyMaxBookmarksKeepAll - } else { - i, err := strconv.ParseInt(in.KeepBookmarks, 10, 32) - if err != nil || i <= 0 || i > math.MaxInt32 { - return nil, errors.Errorf("keep_bookmarks must be positive integer or 'all'") - } - keepBookmarks = int(i) - } - - retentionIntervals := make([]RetentionInterval, len(in.Grid)) - for i := range in.Grid { - retentionIntervals[i] = &in.Grid[i] - } - - return &GridPrunePolicy{ - newRetentionGrid(retentionIntervals), - keepBookmarks, - }, nil -} diff --git a/pruning/retentiongrid/retentiongrid.go b/pruning/retentiongrid/retentiongrid.go index 4813933..78f8767 100644 --- a/pruning/retentiongrid/retentiongrid.go +++ b/pruning/retentiongrid/retentiongrid.go @@ -5,50 +5,50 @@ import ( "time" ) -type RetentionInterval interface { +type Interval interface { Length() time.Duration KeepCount() int } const RetentionGridKeepCountAll int = -1 -type retentionGrid struct { - intervals []RetentionInterval +type Grid struct { + intervals []Interval } //A point inside the grid, i.e. a thing the grid can decide to remove -type RetentionGridEntry interface { +type Entry interface { Date() time.Time - LessThan(b RetentionGridEntry) bool + LessThan(b Entry) bool } -func dateInInterval(date, startDateInterval time.Time, i RetentionInterval) bool { +func dateInInterval(date, startDateInterval time.Time, i Interval) bool { return date.After(startDateInterval) && date.Before(startDateInterval.Add(i.Length())) } -func newRetentionGrid(l []RetentionInterval) *retentionGrid { +func NewGrid(l []Interval) *Grid { // TODO Maybe check for ascending interval lengths here, although the algorithm // itself doesn't care about that. - return &retentionGrid{l} + return &Grid{l} } -// Partition a list of RetentionGridEntries into the retentionGrid, +// Partition a list of RetentionGridEntries into the Grid, // relative to a given start date `now`. // -// The `keepCount` oldest entries per `RetentionInterval` are kept (`keep`), +// The `keepCount` oldest entries per `retentiongrid.Interval` are kept (`keep`), // the others are removed (`remove`). // // Entries that are younger than `now` are always kept. // Those that are older than the earliest beginning of an interval are removed. -func (g retentionGrid) FitEntries(now time.Time, entries []RetentionGridEntry) (keep, remove []RetentionGridEntry) { +func (g Grid) FitEntries(now time.Time, entries []Entry) (keep, remove []Entry) { type bucket struct { - entries []RetentionGridEntry + entries []Entry } buckets := make([]bucket, len(g.intervals)) - keep = make([]RetentionGridEntry, 0) - remove = make([]RetentionGridEntry, 0) + keep = make([]Entry, 0) + remove = make([]Entry, 0) oldestIntervalStart := now for i := range g.intervals { diff --git a/pruning/retentiongrid/retentiongrid_test.go b/pruning/retentiongrid/retentiongrid_test.go index 70c8ccd..3e5e52f 100644 --- a/pruning/retentiongrid/retentiongrid_test.go +++ b/pruning/retentiongrid/retentiongrid_test.go @@ -22,10 +22,10 @@ func (i *retentionIntervalStub) KeepCount() int { return i.keepCount } -func retentionGridFromString(gs string) (g *retentionGrid) { +func gridFromString(gs string) (g *Grid) { intervals := strings.Split(gs, "|") - g = &retentionGrid{ - intervals: make([]RetentionInterval, len(intervals)), + g = &Grid{ + intervals: make([]Interval, len(intervals)), } for idx, i := range intervals { comps := strings.SplitN(i, ",", 2) @@ -62,11 +62,11 @@ func (ds dummySnap) Date() time.Time { return ds.date } -func (ds dummySnap) LessThan(b RetentionGridEntry) bool { +func (ds dummySnap) LessThan(b Entry) bool { return ds.date.Before(b.(dummySnap).date) // don't have a txg here } -func validateRetentionGridFitEntries(t *testing.T, now time.Time, input, keep, remove []RetentionGridEntry) { +func validateRetentionGridFitEntries(t *testing.T, now time.Time, input, keep, remove []Entry) { snapDescr := func(d dummySnap) string { return fmt.Sprintf("%s@%s", d.Name, d.date.Sub(now)) @@ -101,8 +101,8 @@ func validateRetentionGridFitEntries(t *testing.T, now time.Time, input, keep, r } func TestRetentionGridFitEntriesEmptyInput(t *testing.T) { - g := retentionGridFromString("10m|10m|10m|1h") - keep, remove := g.FitEntries(time.Now(), []RetentionGridEntry{}) + g := gridFromString("10m|10m|10m|1h") + keep, remove := g.FitEntries(time.Now(), []Entry{}) assert.Empty(t, keep) assert.Empty(t, remove) } @@ -111,13 +111,13 @@ func TestRetentionGridFitEntriesIntervalBoundariesAndAlignment(t *testing.T) { // Intervals are (duration], i.e. 10min is in the first interval, not in the second - g := retentionGridFromString("10m|10m|10m") + g := gridFromString("10m|10m|10m") t.Logf("%#v\n", g) now := time.Unix(0, 0) - snaps := []RetentionGridEntry{ + snaps := []Entry{ dummySnap{"0", true, now.Add(1 * time.Minute)}, // before now dummySnap{"1", true, now}, // before now dummySnap{"2", true, now.Add(-10 * time.Minute)}, // 1st interval @@ -133,13 +133,13 @@ func TestRetentionGridFitEntriesIntervalBoundariesAndAlignment(t *testing.T) { func TestRetentionGridFitEntries(t *testing.T) { - g := retentionGridFromString("10m,-1|10m|10m,2|1h") + g := gridFromString("10m,-1|10m|10m,2|1h") t.Logf("%#v\n", g) now := time.Unix(0, 0) - snaps := []RetentionGridEntry{ + snaps := []Entry{ dummySnap{"1", true, now.Add(3 * time.Minute)}, // pre-now must always be kept dummySnap{"b1", true, now.Add(-6 * time.Minute)}, // 1st interval allows unlimited entries dummySnap{"b3", true, now.Add(-8 * time.Minute)}, // 1st interval allows unlimited entries