pruning: implement 'grid' keep rule

This commit is contained in:
Christian Schwarz 2018-09-24 17:30:03 +02:00
parent 328ac687f6
commit 9e941d5be5
11 changed files with 138 additions and 190 deletions

View File

@ -17,6 +17,7 @@ SUBPKGS += daemon/streamrpcconfig
SUBPKGS += endpoint SUBPKGS += endpoint
SUBPKGS += logger SUBPKGS += logger
SUBPKGS += pruning SUBPKGS += pruning
SUBPKGS += pruning/retentiongrid
SUBPKGS += replication SUBPKGS += replication
SUBPKGS += replication/fsrep SUBPKGS += replication/fsrep
SUBPKGS += replication/pdu SUBPKGS += replication/pdu

View File

@ -13,7 +13,7 @@ type RetentionIntervalList []RetentionInterval
type PruneGrid struct { type PruneGrid struct {
Type string `yaml:"type"` Type string `yaml:"type"`
Grid RetentionIntervalList `yaml:"grid"` Grid RetentionIntervalList `yaml:"grid"`
KeepBookmarks string `yaml:"keep_bookmarks"` Regex string `yaml:"regex"`
} }
type RetentionInterval struct { type RetentionInterval struct {

View File

@ -25,4 +25,4 @@ jobs:
keep_receiver: keep_receiver:
- type: grid - type: grid
grid: 1x1h(keep=all) | 24x1h | 35x1d | 6x30d grid: 1x1h(keep=all) | 24x1h | 35x1d | 6x30d
keep_bookmarks: all regex: "zrepl_.*"

View File

@ -17,8 +17,8 @@ jobs:
count: 10 count: 10
- type: grid - type: grid
grid: 1x1h(keep=all) | 24x1h | 14x1d grid: 1x1h(keep=all) | 24x1h | 14x1d
keep_bookmarks: all regex: "zrepl_.*"
keep_receiver: keep_receiver:
- type: grid - type: grid
grid: 1x1h(keep=all) | 24x1h | 35x1d | 6x30d grid: 1x1h(keep=all) | 24x1h | 35x1d | 6x30d
keep_bookmarks: all regex: "zrepl_.*"

View File

@ -19,10 +19,10 @@ jobs:
count: 10 count: 10
- type: grid - type: grid
grid: 1x1h(keep=all) | 24x1h | 14x1d grid: 1x1h(keep=all) | 24x1h | 14x1d
keep_bookmarks: all regex: "^zrepl_.*"
keep_receiver: keep_receiver:
- type: prefix - type: prefix
prefix: keep_ prefix: keep_
- type: grid - type: grid
grid: 1x1h(keep=all) | 24x1h | 35x1d | 6x30d grid: 1x1h(keep=all) | 24x1h | 35x1d | 6x30d
keep_bookmarks: all regex: "^zrepl_.*"

View File

@ -18,9 +18,8 @@ jobs:
count: 10 count: 10
- type: grid - type: grid
grid: 1x1h(keep=all) | 24x1h | 14x1d grid: 1x1h(keep=all) | 24x1h | 14x1d
keep_bookmarks: all regex: "^zrepl_.*"
keep_receiver: keep_receiver:
- type: grid - type: grid
grid: 1x1h(keep=all) | 24x1h | 35x1d | 6x30d grid: 1x1h(keep=all) | 24x1h | 35x1d | 6x30d
keep_bookmarks: all regex: "^zrepl_.*"

102
pruning/keep_grid.go Normal file
View File

@ -0,0 +1,102 @@
package pruning
import (
"fmt"
"github.com/pkg/errors"
"github.com/zrepl/zrepl/config"
"github.com/zrepl/zrepl/pruning/retentiongrid"
"regexp"
"sort"
"time"
)
// KeepGrid fits snapshots that match a given regex into a retentiongrid.Grid,
// uses the most recent snapshot among those that match the regex as 'now',
// and deletes all snapshots that do not fit the grid specification.
type KeepGrid struct {
retentionGrid *retentiongrid.Grid
re *regexp.Regexp
}
func NewKeepGrid(in *config.PruneGrid) (p *KeepGrid, err error) {
if in.Regex == "" {
return nil, fmt.Errorf("Regex must not be empty")
}
re, err := regexp.Compile(in.Regex)
if err != nil {
return nil, errors.Wrap(err, "Regex is invalid")
}
// Assert intervals are of increasing length (not necessarily required, but indicates config mistake)
lastDuration := time.Duration(0)
for i := range in.Grid {
if in.Grid[i].Length() < lastDuration {
// If all intervals before were keep=all, this is ok
allPrevKeepCountAll := true
for j := i - 1; allPrevKeepCountAll && j >= 0; j-- {
allPrevKeepCountAll = in.Grid[j].KeepCount() == config.RetentionGridKeepCountAll
}
if allPrevKeepCountAll {
goto isMonotonicIncrease
}
err = errors.New("retention grid interval length must be monotonically increasing")
return
}
isMonotonicIncrease:
lastDuration = in.Grid[i].Length()
}
retentionIntervals := make([]retentiongrid.Interval, len(in.Grid))
for i := range in.Grid {
retentionIntervals[i] = &in.Grid[i]
}
return &KeepGrid{
retentiongrid.NewGrid(retentionIntervals),
re,
}, nil
}
type retentionGridAdaptor struct {
Snapshot
}
func (a retentionGridAdaptor) LessThan(b retentiongrid.Entry) bool {
return a.Date().Before(b.Date())
}
// Prune filters snapshots with the retention grid.
func (p *KeepGrid) KeepRule(snaps []Snapshot) (destroyList []Snapshot) {
snaps = filterSnapList(snaps, func(snapshot Snapshot) bool {
return p.re.MatchString(snapshot.Name())
})
if len(snaps) == 0 {
return nil
}
// Build adaptors for retention grid
adaptors := make([]retentiongrid.Entry, 0)
for i := range snaps {
adaptors = append(adaptors, retentionGridAdaptor{snaps[i]})
}
// determine 'now' edge
sort.SliceStable(adaptors, func(i, j int) bool {
return adaptors[i].LessThan(adaptors[j])
})
now := adaptors[len(adaptors)-1].Date()
// Evaluate retention grid
_, removea := p.retentionGrid.FitEntries(now, adaptors)
// Revert adaptors
destroyList = make([]Snapshot, len(removea))
for i := range removea {
destroyList[i] = removea[i].(retentionGridAdaptor).Snapshot
}
return destroyList
}

View File

@ -61,6 +61,8 @@ func RuleFromConfig(in config.PruningEnum) (KeepRule, error) {
return NewKeepLastN(v.Count) return NewKeepLastN(v.Count)
case *config.PruneKeepRegex: case *config.PruneKeepRegex:
return NewKeepRegex(v.Regex) return NewKeepRegex(v.Regex)
case *config.PruneGrid:
return NewKeepGrid(v)
default: default:
return nil, fmt.Errorf("unknown keep rule type %T", v) return nil, fmt.Errorf("unknown keep rule type %T", v)
} }

View File

@ -1,156 +0,0 @@
package retentiongrid
import (
"github.com/pkg/errors"
"github.com/zrepl/zrepl/config"
"github.com/zrepl/zrepl/zfs"
"math"
"sort"
"strconv"
"time"
)
type GridPrunePolicy struct {
retentionGrid *retentionGrid
keepBookmarks int
}
const GridPrunePolicyMaxBookmarksKeepAll = -1
type retentionGridAdaptor struct {
zfs.FilesystemVersion
}
func (a retentionGridAdaptor) Date() time.Time {
return a.Creation
}
func (a retentionGridAdaptor) LessThan(b RetentionGridEntry) bool {
return a.CreateTXG < b.(retentionGridAdaptor).CreateTXG
}
// Prune filters snapshots with the retention grid.
// Bookmarks are deleted such that keepBookmarks are kept in the end.
// The oldest bookmarks are removed first.
func (p *GridPrunePolicy) Prune(_ *zfs.DatasetPath, versions []zfs.FilesystemVersion) (keep, remove []zfs.FilesystemVersion, err error) {
skeep, sremove := p.pruneSnapshots(versions)
keep, remove = p.pruneBookmarks(skeep)
remove = append(remove, sremove...)
return keep, remove, nil
}
func (p *GridPrunePolicy) pruneSnapshots(versions []zfs.FilesystemVersion) (keep, remove []zfs.FilesystemVersion) {
// Build adaptors for retention grid
keep = []zfs.FilesystemVersion{}
adaptors := make([]RetentionGridEntry, 0)
for fsv := range versions {
if versions[fsv].Type != zfs.Snapshot {
keep = append(keep, versions[fsv])
continue
}
adaptors = append(adaptors, retentionGridAdaptor{versions[fsv]})
}
sort.SliceStable(adaptors, func(i, j int) bool {
return adaptors[i].LessThan(adaptors[j])
})
now := adaptors[len(adaptors)-1].Date()
// Evaluate retention grid
keepa, removea := p.retentionGrid.FitEntries(now, adaptors)
// Revert adaptors
for i := range keepa {
keep = append(keep, keepa[i].(retentionGridAdaptor).FilesystemVersion)
}
remove = make([]zfs.FilesystemVersion, len(removea))
for i := range removea {
remove[i] = removea[i].(retentionGridAdaptor).FilesystemVersion
}
return
}
func (p *GridPrunePolicy) pruneBookmarks(versions []zfs.FilesystemVersion) (keep, remove []zfs.FilesystemVersion) {
if p.keepBookmarks == GridPrunePolicyMaxBookmarksKeepAll {
return versions, []zfs.FilesystemVersion{}
}
keep = []zfs.FilesystemVersion{}
bookmarks := make([]zfs.FilesystemVersion, 0)
for fsv := range versions {
if versions[fsv].Type != zfs.Bookmark {
keep = append(keep, versions[fsv])
continue
}
bookmarks = append(bookmarks, versions[fsv])
}
if len(bookmarks) == 0 {
return keep, []zfs.FilesystemVersion{}
}
if len(bookmarks) < p.keepBookmarks {
keep = append(keep, bookmarks...)
return keep, []zfs.FilesystemVersion{}
}
// NOTE: sorting descending by descending by createtxg <=> sorting ascending wrt creation time
sort.SliceStable(bookmarks, func(i, j int) bool {
return (bookmarks[i].CreateTXG > bookmarks[j].CreateTXG)
})
keep = append(keep, bookmarks[:p.keepBookmarks]...)
remove = bookmarks[p.keepBookmarks:]
return keep, remove
}
func ParseGridPrunePolicy(in config.PruneGrid, willSeeBookmarks bool) (p *GridPrunePolicy, err error) {
const KeepBookmarksAllString = "all"
// Assert intervals are of increasing length (not necessarily required, but indicates config mistake)
lastDuration := time.Duration(0)
for i := range in.Grid {
if in.Grid[i].Length() < lastDuration {
// If all intervals before were keep=all, this is ok
allPrevKeepCountAll := true
for j := i - 1; allPrevKeepCountAll && j >= 0; j-- {
allPrevKeepCountAll = in.Grid[j].KeepCount() == config.RetentionGridKeepCountAll
}
if allPrevKeepCountAll {
goto isMonotonicIncrease
}
err = errors.New("retention grid interval length must be monotonically increasing")
return
}
isMonotonicIncrease:
lastDuration = in.Grid[i].Length()
}
// Parse keepBookmarks
keepBookmarks := 0
if in.KeepBookmarks == KeepBookmarksAllString || (in.KeepBookmarks == "" && !willSeeBookmarks) {
keepBookmarks = GridPrunePolicyMaxBookmarksKeepAll
} else {
i, err := strconv.ParseInt(in.KeepBookmarks, 10, 32)
if err != nil || i <= 0 || i > math.MaxInt32 {
return nil, errors.Errorf("keep_bookmarks must be positive integer or 'all'")
}
keepBookmarks = int(i)
}
retentionIntervals := make([]RetentionInterval, len(in.Grid))
for i := range in.Grid {
retentionIntervals[i] = &in.Grid[i]
}
return &GridPrunePolicy{
newRetentionGrid(retentionIntervals),
keepBookmarks,
}, nil
}

View File

@ -5,50 +5,50 @@ import (
"time" "time"
) )
type RetentionInterval interface { type Interval interface {
Length() time.Duration Length() time.Duration
KeepCount() int KeepCount() int
} }
const RetentionGridKeepCountAll int = -1 const RetentionGridKeepCountAll int = -1
type retentionGrid struct { type Grid struct {
intervals []RetentionInterval intervals []Interval
} }
//A point inside the grid, i.e. a thing the grid can decide to remove //A point inside the grid, i.e. a thing the grid can decide to remove
type RetentionGridEntry interface { type Entry interface {
Date() time.Time Date() time.Time
LessThan(b RetentionGridEntry) bool LessThan(b Entry) bool
} }
func dateInInterval(date, startDateInterval time.Time, i RetentionInterval) bool { func dateInInterval(date, startDateInterval time.Time, i Interval) bool {
return date.After(startDateInterval) && date.Before(startDateInterval.Add(i.Length())) return date.After(startDateInterval) && date.Before(startDateInterval.Add(i.Length()))
} }
func newRetentionGrid(l []RetentionInterval) *retentionGrid { func NewGrid(l []Interval) *Grid {
// TODO Maybe check for ascending interval lengths here, although the algorithm // TODO Maybe check for ascending interval lengths here, although the algorithm
// itself doesn't care about that. // itself doesn't care about that.
return &retentionGrid{l} return &Grid{l}
} }
// Partition a list of RetentionGridEntries into the retentionGrid, // Partition a list of RetentionGridEntries into the Grid,
// relative to a given start date `now`. // relative to a given start date `now`.
// //
// The `keepCount` oldest entries per `RetentionInterval` are kept (`keep`), // The `keepCount` oldest entries per `retentiongrid.Interval` are kept (`keep`),
// the others are removed (`remove`). // the others are removed (`remove`).
// //
// Entries that are younger than `now` are always kept. // Entries that are younger than `now` are always kept.
// Those that are older than the earliest beginning of an interval are removed. // Those that are older than the earliest beginning of an interval are removed.
func (g retentionGrid) FitEntries(now time.Time, entries []RetentionGridEntry) (keep, remove []RetentionGridEntry) { func (g Grid) FitEntries(now time.Time, entries []Entry) (keep, remove []Entry) {
type bucket struct { type bucket struct {
entries []RetentionGridEntry entries []Entry
} }
buckets := make([]bucket, len(g.intervals)) buckets := make([]bucket, len(g.intervals))
keep = make([]RetentionGridEntry, 0) keep = make([]Entry, 0)
remove = make([]RetentionGridEntry, 0) remove = make([]Entry, 0)
oldestIntervalStart := now oldestIntervalStart := now
for i := range g.intervals { for i := range g.intervals {

View File

@ -22,10 +22,10 @@ func (i *retentionIntervalStub) KeepCount() int {
return i.keepCount return i.keepCount
} }
func retentionGridFromString(gs string) (g *retentionGrid) { func gridFromString(gs string) (g *Grid) {
intervals := strings.Split(gs, "|") intervals := strings.Split(gs, "|")
g = &retentionGrid{ g = &Grid{
intervals: make([]RetentionInterval, len(intervals)), intervals: make([]Interval, len(intervals)),
} }
for idx, i := range intervals { for idx, i := range intervals {
comps := strings.SplitN(i, ",", 2) comps := strings.SplitN(i, ",", 2)
@ -62,11 +62,11 @@ func (ds dummySnap) Date() time.Time {
return ds.date return ds.date
} }
func (ds dummySnap) LessThan(b RetentionGridEntry) bool { func (ds dummySnap) LessThan(b Entry) bool {
return ds.date.Before(b.(dummySnap).date) // don't have a txg here return ds.date.Before(b.(dummySnap).date) // don't have a txg here
} }
func validateRetentionGridFitEntries(t *testing.T, now time.Time, input, keep, remove []RetentionGridEntry) { func validateRetentionGridFitEntries(t *testing.T, now time.Time, input, keep, remove []Entry) {
snapDescr := func(d dummySnap) string { snapDescr := func(d dummySnap) string {
return fmt.Sprintf("%s@%s", d.Name, d.date.Sub(now)) return fmt.Sprintf("%s@%s", d.Name, d.date.Sub(now))
@ -101,8 +101,8 @@ func validateRetentionGridFitEntries(t *testing.T, now time.Time, input, keep, r
} }
func TestRetentionGridFitEntriesEmptyInput(t *testing.T) { func TestRetentionGridFitEntriesEmptyInput(t *testing.T) {
g := retentionGridFromString("10m|10m|10m|1h") g := gridFromString("10m|10m|10m|1h")
keep, remove := g.FitEntries(time.Now(), []RetentionGridEntry{}) keep, remove := g.FitEntries(time.Now(), []Entry{})
assert.Empty(t, keep) assert.Empty(t, keep)
assert.Empty(t, remove) assert.Empty(t, remove)
} }
@ -111,13 +111,13 @@ func TestRetentionGridFitEntriesIntervalBoundariesAndAlignment(t *testing.T) {
// Intervals are (duration], i.e. 10min is in the first interval, not in the second // Intervals are (duration], i.e. 10min is in the first interval, not in the second
g := retentionGridFromString("10m|10m|10m") g := gridFromString("10m|10m|10m")
t.Logf("%#v\n", g) t.Logf("%#v\n", g)
now := time.Unix(0, 0) now := time.Unix(0, 0)
snaps := []RetentionGridEntry{ snaps := []Entry{
dummySnap{"0", true, now.Add(1 * time.Minute)}, // before now dummySnap{"0", true, now.Add(1 * time.Minute)}, // before now
dummySnap{"1", true, now}, // before now dummySnap{"1", true, now}, // before now
dummySnap{"2", true, now.Add(-10 * time.Minute)}, // 1st interval dummySnap{"2", true, now.Add(-10 * time.Minute)}, // 1st interval
@ -133,13 +133,13 @@ func TestRetentionGridFitEntriesIntervalBoundariesAndAlignment(t *testing.T) {
func TestRetentionGridFitEntries(t *testing.T) { func TestRetentionGridFitEntries(t *testing.T) {
g := retentionGridFromString("10m,-1|10m|10m,2|1h") g := gridFromString("10m,-1|10m|10m,2|1h")
t.Logf("%#v\n", g) t.Logf("%#v\n", g)
now := time.Unix(0, 0) now := time.Unix(0, 0)
snaps := []RetentionGridEntry{ snaps := []Entry{
dummySnap{"1", true, now.Add(3 * time.Minute)}, // pre-now must always be kept dummySnap{"1", true, now.Add(3 * time.Minute)}, // pre-now must always be kept
dummySnap{"b1", true, now.Add(-6 * time.Minute)}, // 1st interval allows unlimited entries dummySnap{"b1", true, now.Add(-6 * time.Minute)}, // 1st interval allows unlimited entries
dummySnap{"b3", true, now.Add(-8 * time.Minute)}, // 1st interval allows unlimited entries dummySnap{"b3", true, now.Add(-8 * time.Minute)}, // 1st interval allows unlimited entries