move retentiongrid to own package

This commit is contained in:
Anton Schirg 2018-08-26 19:20:08 +02:00
parent cd9a428841
commit add1b69809
7 changed files with 355 additions and 310 deletions

View File

@ -6,6 +6,8 @@ import (
"github.com/zrepl/yaml-config" "github.com/zrepl/yaml-config"
"io/ioutil" "io/ioutil"
"os" "os"
"regexp"
"strconv"
"time" "time"
) )
@ -102,11 +104,6 @@ type PruneKeepLastN struct {
Count int `yaml:"count"` Count int `yaml:"count"`
} }
type PruneGrid struct {
Type string `yaml:"type"`
Grid string `yaml:"grid"`
}
type LoggingOutletEnum struct { type LoggingOutletEnum struct {
Ret interface{} Ret interface{}
} }
@ -239,3 +236,41 @@ func ParseConfig(path string) (i Config, err error) {
return return
} }
var durationStringRegex *regexp.Regexp = regexp.MustCompile(`^\s*(\d+)\s*(s|m|h|d|w)\s*$`)
func parsePostitiveDuration(e string) (d time.Duration, err error) {
comps := durationStringRegex.FindStringSubmatch(e)
if len(comps) != 3 {
err = fmt.Errorf("does not match regex: %s %#v", e, comps)
return
}
durationFactor, err := strconv.ParseInt(comps[1], 10, 64)
if err != nil {
return 0, err
}
if durationFactor <= 0 {
return 0, errors.New("duration must be positive integer")
}
var durationUnit time.Duration
switch comps[2] {
case "s":
durationUnit = time.Second
case "m":
durationUnit = time.Minute
case "h":
durationUnit = time.Hour
case "d":
durationUnit = 24 * time.Hour
case "w":
durationUnit = 24 * 7 * time.Hour
default:
err = fmt.Errorf("contains unknown time unit '%s'", comps[2])
return
}
d = time.Duration(durationFactor) * durationUnit
return
}

123
cmd/config/retentiongrid.go Normal file
View File

@ -0,0 +1,123 @@
package config
import (
"fmt"
"regexp"
"strconv"
"strings"
"time"
)
type RetentionIntervalList []RetentionInterval
type PruneGrid struct {
Type string `yaml:"type"`
Grid RetentionIntervalList `yaml:"grid"`
KeepBookmarks string `yaml:"keep_bookmarks"`
}
type RetentionInterval struct {
length time.Duration
keepCount int
}
func (i *RetentionInterval) Length() time.Duration {
return i.length
}
func (i *RetentionInterval) KeepCount() int {
return i.keepCount
}
const RetentionGridKeepCountAll int = -1
type RetentionGrid struct {
intervals []RetentionInterval
}
func (t *RetentionIntervalList) UnmarshalYAML(u func(interface{}, bool) error) (err error) {
var in string
if err := u(&in, true); err != nil {
return err
}
intervals, err := parseRetentionGridIntervalsString(in)
if err != nil {
return err
}
*t = intervals
return nil
}
var retentionStringIntervalRegex *regexp.Regexp = regexp.MustCompile(`^\s*(\d+)\s*x\s*([^\(]+)\s*(\((.*)\))?\s*$`)
func parseRetentionGridIntervalString(e string) (intervals []RetentionInterval, err error) {
comps := retentionStringIntervalRegex.FindStringSubmatch(e)
if comps == nil {
err = fmt.Errorf("retention string does not match expected format")
return
}
times, err := strconv.Atoi(comps[1])
if err != nil {
return nil, err
} else if times <= 0 {
return nil, fmt.Errorf("contains factor <= 0")
}
duration, err := parsePostitiveDuration(comps[2])
if err != nil {
return nil, err
}
keepCount := 1
if comps[3] != "" {
// Decompose key=value, comma separated
// For now, only keep_count is supported
re := regexp.MustCompile(`^\s*keep=(.+)\s*$`)
res := re.FindStringSubmatch(comps[4])
if res == nil || len(res) != 2 {
err = fmt.Errorf("interval parameter contains unknown parameters")
return
}
if res[1] == "all" {
keepCount = RetentionGridKeepCountAll
} else {
keepCount, err = strconv.Atoi(res[1])
if err != nil {
err = fmt.Errorf("cannot parse keep_count value")
return
}
}
}
intervals = make([]RetentionInterval, times)
for i := range intervals {
intervals[i] = RetentionInterval{
length: duration,
keepCount: keepCount,
}
}
return
}
func parseRetentionGridIntervalsString(s string) (intervals []RetentionInterval, err error) {
ges := strings.Split(s, "|")
intervals = make([]RetentionInterval, 0, 7*len(ges))
for intervalIdx, e := range ges {
parsed, err := parseRetentionGridIntervalString(e)
if err != nil {
return nil, fmt.Errorf("cannot parse interval %d of %d: %s: %s", intervalIdx+1, len(ges), err, strings.TrimSpace(e))
}
intervals = append(intervals, parsed...)
}
return
}

View File

@ -254,41 +254,3 @@ func parseAuthenticatedChannelListenerFactory(c JobParsingContext, v map[string]
} }
} }
var durationStringRegex *regexp.Regexp = regexp.MustCompile(`^\s*(\d+)\s*(s|m|h|d|w)\s*$`)
func parsePostitiveDuration(e string) (d time.Duration, err error) {
comps := durationStringRegex.FindStringSubmatch(e)
if len(comps) != 3 {
err = fmt.Errorf("does not match regex: %s %#v", e, comps)
return
}
durationFactor, err := strconv.ParseInt(comps[1], 10, 64)
if err != nil {
return 0, err
}
if durationFactor <= 0 {
return 0, errors.New("duration must be positive integer")
}
var durationUnit time.Duration
switch comps[2] {
case "s":
durationUnit = time.Second
case "m":
durationUnit = time.Minute
case "h":
durationUnit = time.Hour
case "d":
durationUnit = 24 * time.Hour
case "w":
durationUnit = 24 * 7 * time.Hour
default:
err = fmt.Errorf("contains unknown time unit '%s'", comps[2])
return
}
d = time.Duration(durationFactor) * durationUnit
return
}

View File

@ -1,245 +0,0 @@
package cmd
import (
"fmt"
"github.com/mitchellh/mapstructure"
"github.com/pkg/errors"
"github.com/zrepl/zrepl/util"
"github.com/zrepl/zrepl/zfs"
"math"
"regexp"
"sort"
"strconv"
"strings"
"time"
)
type GridPrunePolicy struct {
RetentionGrid *util.RetentionGrid
MaxBookmarks int
}
const GridPrunePolicyMaxBookmarksKeepAll = -1
type retentionGridAdaptor struct {
zfs.FilesystemVersion
}
func (a retentionGridAdaptor) Date() time.Time {
return a.Creation
}
func (a retentionGridAdaptor) LessThan(b util.RetentionGridEntry) bool {
return a.CreateTXG < b.(retentionGridAdaptor).CreateTXG
}
// Prune filters snapshots with the retention grid.
// Bookmarks are deleted such that KeepBookmarks are kept in the end.
// The oldest bookmarks are removed first.
func (p *GridPrunePolicy) Prune(_ *zfs.DatasetPath, versions []zfs.FilesystemVersion) (keep, remove []zfs.FilesystemVersion, err error) {
skeep, sremove := p.pruneSnapshots(versions)
keep, remove = p.pruneBookmarks(skeep)
remove = append(remove, sremove...)
return keep, remove, nil
}
func (p *GridPrunePolicy) pruneSnapshots(versions []zfs.FilesystemVersion) (keep, remove []zfs.FilesystemVersion) {
// Build adaptors for retention grid
keep = []zfs.FilesystemVersion{}
adaptors := make([]util.RetentionGridEntry, 0)
for fsv := range versions {
if versions[fsv].Type != zfs.Snapshot {
keep = append(keep, versions[fsv])
continue
}
adaptors = append(adaptors, retentionGridAdaptor{versions[fsv]})
}
sort.SliceStable(adaptors, func(i, j int) bool {
return adaptors[i].LessThan(adaptors[j])
})
now := adaptors[len(adaptors)-1].Date()
// Evaluate retention grid
keepa, removea := p.RetentionGrid.FitEntries(now, adaptors)
// Revert adaptors
for i := range keepa {
keep = append(keep, keepa[i].(retentionGridAdaptor).FilesystemVersion)
}
remove = make([]zfs.FilesystemVersion, len(removea))
for i := range removea {
remove[i] = removea[i].(retentionGridAdaptor).FilesystemVersion
}
return
}
func (p *GridPrunePolicy) pruneBookmarks(versions []zfs.FilesystemVersion) (keep, remove []zfs.FilesystemVersion) {
if p.MaxBookmarks == GridPrunePolicyMaxBookmarksKeepAll {
return versions, []zfs.FilesystemVersion{}
}
keep = []zfs.FilesystemVersion{}
bookmarks := make([]zfs.FilesystemVersion, 0)
for fsv := range versions {
if versions[fsv].Type != zfs.Bookmark {
keep = append(keep, versions[fsv])
continue
}
bookmarks = append(bookmarks, versions[fsv])
}
if len(bookmarks) == 0 {
return keep, []zfs.FilesystemVersion{}
}
if len(bookmarks) < p.MaxBookmarks {
keep = append(keep, bookmarks...)
return keep, []zfs.FilesystemVersion{}
}
// NOTE: sorting descending by descending by createtxg <=> sorting ascending wrt creation time
sort.SliceStable(bookmarks, func(i, j int) bool {
return (bookmarks[i].CreateTXG > bookmarks[j].CreateTXG)
})
keep = append(keep, bookmarks[:p.MaxBookmarks]...)
remove = bookmarks[p.MaxBookmarks:]
return keep, remove
}
func parseGridPrunePolicy(e map[string]interface{}, willSeeBookmarks bool) (p *GridPrunePolicy, err error) {
const KeepBookmarksAllString = "all"
var i struct {
Grid string
KeepBookmarks string `mapstructure:"keep_bookmarks"`
}
dec, err := mapstructure.NewDecoder(&mapstructure.DecoderConfig{Result: &i, WeaklyTypedInput: true})
if err != nil {
err = errors.Wrap(err, "mapstructure error")
return
}
if err = dec.Decode(e); err != nil {
err = errors.Wrapf(err, "mapstructure error")
return
}
// Parse grid
intervals, err := parseRetentionGridIntervalsString(i.Grid)
if err != nil {
err = fmt.Errorf("cannot parse retention grid: %s", err)
return
}
// Assert intervals are of increasing length (not necessarily required, but indicates config mistake)
lastDuration := time.Duration(0)
for i := range intervals {
if intervals[i].Length < lastDuration {
// If all intervals before were keep=all, this is ok
allPrevKeepCountAll := true
for j := i - 1; allPrevKeepCountAll && j >= 0; j-- {
allPrevKeepCountAll = intervals[j].KeepCount == util.RetentionGridKeepCountAll
}
if allPrevKeepCountAll {
goto isMonotonicIncrease
}
err = errors.New("retention grid interval length must be monotonically increasing")
return
}
isMonotonicIncrease:
lastDuration = intervals[i].Length
}
// Parse KeepBookmarks
keepBookmarks := 0
if i.KeepBookmarks == KeepBookmarksAllString || (i.KeepBookmarks == "" && !willSeeBookmarks) {
keepBookmarks = GridPrunePolicyMaxBookmarksKeepAll
} else {
i, err := strconv.ParseInt(i.KeepBookmarks, 10, 32)
if err != nil || i <= 0 || i > math.MaxInt32 {
return nil, errors.Errorf("keep_bookmarks must be positive integer or 'all'")
}
keepBookmarks = int(i)
}
return &GridPrunePolicy{
util.NewRetentionGrid(intervals),
keepBookmarks,
}, nil
}
var retentionStringIntervalRegex *regexp.Regexp = regexp.MustCompile(`^\s*(\d+)\s*x\s*([^\(]+)\s*(\((.*)\))?\s*$`)
func parseRetentionGridIntervalString(e string) (intervals []util.RetentionInterval, err error) {
comps := retentionStringIntervalRegex.FindStringSubmatch(e)
if comps == nil {
err = fmt.Errorf("retention string does not match expected format")
return
}
times, err := strconv.Atoi(comps[1])
if err != nil {
return nil, err
} else if times <= 0 {
return nil, fmt.Errorf("contains factor <= 0")
}
duration, err := parsePostitiveDuration(comps[2])
if err != nil {
return nil, err
}
keepCount := 1
if comps[3] != "" {
// Decompose key=value, comma separated
// For now, only keep_count is supported
re := regexp.MustCompile(`^\s*keep=(.+)\s*$`)
res := re.FindStringSubmatch(comps[4])
if res == nil || len(res) != 2 {
err = fmt.Errorf("interval parameter contains unknown parameters")
return
}
if res[1] == "all" {
keepCount = util.RetentionGridKeepCountAll
} else {
keepCount, err = strconv.Atoi(res[1])
if err != nil {
err = fmt.Errorf("cannot parse keep_count value")
return
}
}
}
intervals = make([]util.RetentionInterval, times)
for i := range intervals {
intervals[i] = util.RetentionInterval{
Length: duration,
KeepCount: keepCount,
}
}
return
}
func parseRetentionGridIntervalsString(s string) (intervals []util.RetentionInterval, err error) {
ges := strings.Split(s, "|")
intervals = make([]util.RetentionInterval, 0, 7*len(ges))
for intervalIdx, e := range ges {
parsed, err := parseRetentionGridIntervalString(e)
if err != nil {
return nil, fmt.Errorf("cannot parse interval %d of %d: %s: %s", intervalIdx+1, len(ges), err, strings.TrimSpace(e))
}
intervals = append(intervals, parsed...)
}
return
}

View File

@ -0,0 +1,156 @@
package retentiongrid
import (
"github.com/pkg/errors"
"github.com/zrepl/zrepl/cmd/config"
"github.com/zrepl/zrepl/zfs"
"math"
"sort"
"strconv"
"time"
)
type GridPrunePolicy struct {
retentionGrid *retentionGrid
keepBookmarks int
}
const GridPrunePolicyMaxBookmarksKeepAll = -1
type retentionGridAdaptor struct {
zfs.FilesystemVersion
}
func (a retentionGridAdaptor) Date() time.Time {
return a.Creation
}
func (a retentionGridAdaptor) LessThan(b RetentionGridEntry) bool {
return a.CreateTXG < b.(retentionGridAdaptor).CreateTXG
}
// Prune filters snapshots with the retention grid.
// Bookmarks are deleted such that keepBookmarks are kept in the end.
// The oldest bookmarks are removed first.
func (p *GridPrunePolicy) Prune(_ *zfs.DatasetPath, versions []zfs.FilesystemVersion) (keep, remove []zfs.FilesystemVersion, err error) {
skeep, sremove := p.pruneSnapshots(versions)
keep, remove = p.pruneBookmarks(skeep)
remove = append(remove, sremove...)
return keep, remove, nil
}
func (p *GridPrunePolicy) pruneSnapshots(versions []zfs.FilesystemVersion) (keep, remove []zfs.FilesystemVersion) {
// Build adaptors for retention grid
keep = []zfs.FilesystemVersion{}
adaptors := make([]RetentionGridEntry, 0)
for fsv := range versions {
if versions[fsv].Type != zfs.Snapshot {
keep = append(keep, versions[fsv])
continue
}
adaptors = append(adaptors, retentionGridAdaptor{versions[fsv]})
}
sort.SliceStable(adaptors, func(i, j int) bool {
return adaptors[i].LessThan(adaptors[j])
})
now := adaptors[len(adaptors)-1].Date()
// Evaluate retention grid
keepa, removea := p.retentionGrid.FitEntries(now, adaptors)
// Revert adaptors
for i := range keepa {
keep = append(keep, keepa[i].(retentionGridAdaptor).FilesystemVersion)
}
remove = make([]zfs.FilesystemVersion, len(removea))
for i := range removea {
remove[i] = removea[i].(retentionGridAdaptor).FilesystemVersion
}
return
}
func (p *GridPrunePolicy) pruneBookmarks(versions []zfs.FilesystemVersion) (keep, remove []zfs.FilesystemVersion) {
if p.keepBookmarks == GridPrunePolicyMaxBookmarksKeepAll {
return versions, []zfs.FilesystemVersion{}
}
keep = []zfs.FilesystemVersion{}
bookmarks := make([]zfs.FilesystemVersion, 0)
for fsv := range versions {
if versions[fsv].Type != zfs.Bookmark {
keep = append(keep, versions[fsv])
continue
}
bookmarks = append(bookmarks, versions[fsv])
}
if len(bookmarks) == 0 {
return keep, []zfs.FilesystemVersion{}
}
if len(bookmarks) < p.keepBookmarks {
keep = append(keep, bookmarks...)
return keep, []zfs.FilesystemVersion{}
}
// NOTE: sorting descending by descending by createtxg <=> sorting ascending wrt creation time
sort.SliceStable(bookmarks, func(i, j int) bool {
return (bookmarks[i].CreateTXG > bookmarks[j].CreateTXG)
})
keep = append(keep, bookmarks[:p.keepBookmarks]...)
remove = bookmarks[p.keepBookmarks:]
return keep, remove
}
func ParseGridPrunePolicy(in config.PruneGrid, willSeeBookmarks bool) (p *GridPrunePolicy, err error) {
const KeepBookmarksAllString = "all"
// Assert intervals are of increasing length (not necessarily required, but indicates config mistake)
lastDuration := time.Duration(0)
for i := range in.Grid {
if in.Grid[i].Length() < lastDuration {
// If all intervals before were keep=all, this is ok
allPrevKeepCountAll := true
for j := i - 1; allPrevKeepCountAll && j >= 0; j-- {
allPrevKeepCountAll = in.Grid[j].KeepCount() == config.RetentionGridKeepCountAll
}
if allPrevKeepCountAll {
goto isMonotonicIncrease
}
err = errors.New("retention grid interval length must be monotonically increasing")
return
}
isMonotonicIncrease:
lastDuration = in.Grid[i].Length()
}
// Parse keepBookmarks
keepBookmarks := 0
if in.KeepBookmarks == KeepBookmarksAllString || (in.KeepBookmarks == "" && !willSeeBookmarks) {
keepBookmarks = GridPrunePolicyMaxBookmarksKeepAll
} else {
i, err := strconv.ParseInt(in.KeepBookmarks, 10, 32)
if err != nil || i <= 0 || i > math.MaxInt32 {
return nil, errors.Errorf("keep_bookmarks must be positive integer or 'all'")
}
keepBookmarks = int(i)
}
retentionIntervals := make([]RetentionInterval, len(in.Grid))
for i := range in.Grid {
retentionIntervals[i] = &in.Grid[i]
}
return &GridPrunePolicy{
newRetentionGrid(retentionIntervals),
keepBookmarks,
}, nil
}

View File

@ -1,45 +1,46 @@
package util package retentiongrid
import ( import (
"sort" "sort"
"time" "time"
) )
type RetentionInterval struct { type RetentionInterval interface {
Length time.Duration Length() time.Duration
KeepCount int KeepCount() int
} }
const RetentionGridKeepCountAll int = -1 const RetentionGridKeepCountAll int = -1
type RetentionGrid struct { type retentionGrid struct {
intervals []RetentionInterval intervals []RetentionInterval
} }
//A point inside the grid, i.e. a thing the grid can decide to remove
type RetentionGridEntry interface { type RetentionGridEntry interface {
Date() time.Time Date() time.Time
LessThan(b RetentionGridEntry) bool LessThan(b RetentionGridEntry) bool
} }
func dateInInterval(date, startDateInterval time.Time, i RetentionInterval) bool { func dateInInterval(date, startDateInterval time.Time, i RetentionInterval) bool {
return date.After(startDateInterval) && date.Before(startDateInterval.Add(i.Length)) return date.After(startDateInterval) && date.Before(startDateInterval.Add(i.Length()))
} }
func NewRetentionGrid(l []RetentionInterval) *RetentionGrid { func newRetentionGrid(l []RetentionInterval) *retentionGrid {
// TODO Maybe check for ascending interval lengths here, although the algorithm // TODO Maybe check for ascending interval lengths here, although the algorithm
// itself doesn't care about that. // itself doesn't care about that.
return &RetentionGrid{l} return &retentionGrid{l}
} }
// Partition a list of RetentionGridEntries into the RetentionGrid, // Partition a list of RetentionGridEntries into the retentionGrid,
// relative to a given start date `now`. // relative to a given start date `now`.
// //
// The `KeepCount` oldest entries per `RetentionInterval` are kept (`keep`), // The `keepCount` oldest entries per `RetentionInterval` are kept (`keep`),
// the others are removed (`remove`). // the others are removed (`remove`).
// //
// Entries that are younger than `now` are always kept. // Entries that are younger than `now` are always kept.
// Those that are older than the earliest beginning of an interval are removed. // Those that are older than the earliest beginning of an interval are removed.
func (g RetentionGrid) FitEntries(now time.Time, entries []RetentionGridEntry) (keep, remove []RetentionGridEntry) { func (g retentionGrid) FitEntries(now time.Time, entries []RetentionGridEntry) (keep, remove []RetentionGridEntry) {
type bucket struct { type bucket struct {
entries []RetentionGridEntry entries []RetentionGridEntry
@ -51,7 +52,7 @@ func (g RetentionGrid) FitEntries(now time.Time, entries []RetentionGridEntry) (
oldestIntervalStart := now oldestIntervalStart := now
for i := range g.intervals { for i := range g.intervals {
oldestIntervalStart = oldestIntervalStart.Add(-g.intervals[i].Length) oldestIntervalStart = oldestIntervalStart.Add(-g.intervals[i].Length())
} }
for ei := 0; ei < len(entries); ei++ { for ei := 0; ei < len(entries); ei++ {
@ -69,7 +70,7 @@ func (g RetentionGrid) FitEntries(now time.Time, entries []RetentionGridEntry) (
iStartTime := now iStartTime := now
for i := 0; i < len(g.intervals); i++ { for i := 0; i < len(g.intervals); i++ {
iStartTime = iStartTime.Add(-g.intervals[i].Length) iStartTime = iStartTime.Add(-g.intervals[i].Length())
if date == iStartTime || dateInInterval(date, iStartTime, g.intervals[i]) { if date == iStartTime || dateInInterval(date, iStartTime, g.intervals[i]) {
buckets[i].entries = append(buckets[i].entries, e) buckets[i].entries = append(buckets[i].entries, e)
} }
@ -78,14 +79,14 @@ func (g RetentionGrid) FitEntries(now time.Time, entries []RetentionGridEntry) (
for bi, b := range buckets { for bi, b := range buckets {
interval := &g.intervals[bi] interval := g.intervals[bi]
sort.SliceStable(b.entries, func(i, j int) bool { sort.SliceStable(b.entries, func(i, j int) bool {
return b.entries[i].LessThan((b.entries[j])) return b.entries[i].LessThan((b.entries[j]))
}) })
i := 0 i := 0
for ; (interval.KeepCount == RetentionGridKeepCountAll || i < interval.KeepCount) && i < len(b.entries); i++ { for ; (interval.KeepCount() == RetentionGridKeepCountAll || i < interval.KeepCount()) && i < len(b.entries); i++ {
keep = append(keep, b.entries[i]) keep = append(keep, b.entries[i])
} }
for ; i < len(b.entries); i++ { for ; i < len(b.entries); i++ {

View File

@ -1,4 +1,4 @@
package util package retentiongrid
import ( import (
"fmt" "fmt"
@ -9,9 +9,22 @@ import (
"time" "time"
) )
func retentionGridFromString(gs string) (g *RetentionGrid) { type retentionIntervalStub struct {
length time.Duration
keepCount int
}
func (i *retentionIntervalStub) Length() time.Duration {
return i.length
}
func (i *retentionIntervalStub) KeepCount() int {
return i.keepCount
}
func retentionGridFromString(gs string) (g *retentionGrid) {
intervals := strings.Split(gs, "|") intervals := strings.Split(gs, "|")
g = &RetentionGrid{ g = &retentionGrid{
intervals: make([]RetentionInterval, len(intervals)), intervals: make([]RetentionInterval, len(intervals)),
} }
for idx, i := range intervals { for idx, i := range intervals {
@ -25,16 +38,16 @@ func retentionGridFromString(gs string) (g *RetentionGrid) {
} }
var err error var err error
var interval RetentionInterval var interval retentionIntervalStub
if interval.KeepCount, err = strconv.Atoi(numSnapsStr); err != nil { if interval.keepCount, err = strconv.Atoi(numSnapsStr); err != nil {
panic(err) panic(err)
} }
if interval.Length, err = time.ParseDuration(durationStr); err != nil { if interval.length, err = time.ParseDuration(durationStr); err != nil {
panic(err) panic(err)
} }
g.intervals[idx] = interval g.intervals[idx] = &interval
} }
return return
} }