snapshotting: ability to specify timestamp location != UTC (#801)

This PR adds a new field optional field `timestamp_location` that allows
the user to specify a timezone different than the default UTC for use in
the snapshot suffix.

I took @mjasnik 's PR https://github.com/zrepl/zrepl/pull/785 and
refactored+extended it as follows:
* move all formatting logic into its own package
* disallow `dense` and `human` with formats != UTC to protect users from
stupidity
* document behavior more clearly
* regression test for existing users
This commit is contained in:
Christian Schwarz 2024-10-18 15:12:41 +02:00 committed by GitHub
parent 904c1512a3
commit b9b9ad10cf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 322 additions and 66 deletions

View File

@ -222,7 +222,12 @@ type SnapshottingPeriodic struct {
Prefix string `yaml:"prefix"`
Interval *PositiveDuration `yaml:"interval"`
Hooks HookList `yaml:"hooks,optional"`
TimestampFormattingSpec `yaml:",inline"`
}
type TimestampFormattingSpec struct {
TimestampFormat string `yaml:"timestamp_format,optional,default=dense"`
TimestampLocation string `yaml:"timestamp_location,optional,default=UTC"`
}
type CronSpec struct {
@ -255,7 +260,7 @@ type SnapshottingCron struct {
Prefix string `yaml:"prefix"`
Cron CronSpec `yaml:"cron"`
Hooks HookList `yaml:"hooks,optional"`
TimestampFormat string `yaml:"timestamp_format,optional,default=dense"`
TimestampFormattingSpec `yaml:",inline"`
}
type SnapshottingManual struct {

View File

@ -163,7 +163,8 @@ jobs:
assert.Equal(t, "periodic", snp.Type)
assert.Equal(t, 10*time.Minute, snp.Interval.Duration())
assert.Equal(t, "zrepl_", snp.Prefix)
assert.Equal(t, "dense", snp.TimestampFormat) // default was set correctly
assert.Equal(t, snp.TimestampFormat, "dense")
assert.Equal(t, snp.TimestampLocation, "UTC")
})
t.Run("cron", func(t *testing.T) {
@ -171,6 +172,7 @@ jobs:
snp := c.Jobs[0].Ret.(*PushJob).Snapshotting.Ret.(*SnapshottingCron)
assert.Equal(t, "cron", snp.Type)
assert.Equal(t, "zrepl_", snp.Prefix)
assert.Equal(t, "dense", snp.TimestampFormat) // default was set correctly
assert.Equal(t, snp.TimestampFormat, "dense")
assert.Equal(t, snp.TimestampLocation, "UTC")
})
}

View File

@ -10,6 +10,7 @@ import (
"github.com/zrepl/zrepl/config"
"github.com/zrepl/zrepl/daemon/hooks"
"github.com/zrepl/zrepl/daemon/snapper/snapname"
"github.com/zrepl/zrepl/util/suspendresumesafetimer"
"github.com/zrepl/zrepl/zfs"
)
@ -20,9 +21,14 @@ func cronFromConfig(fsf zfs.DatasetFilter, in config.SnapshottingCron) (*Cron, e
if err != nil {
return nil, errors.Wrap(err, "hook config error")
}
formatter, err := snapname.New(in.Prefix, in.TimestampFormat, in.TimestampLocation)
if err != nil {
return nil, errors.Wrap(err, "build snapshot name formatter")
}
planArgs := planArgs{
prefix: in.Prefix,
timestampFormat: in.TimestampFormat,
formatter: formatter,
hooks: hooksList,
}
return &Cron{config: in, fsf: fsf, planArgs: planArgs}, nil

View File

@ -4,19 +4,18 @@ import (
"context"
"fmt"
"sort"
"strconv"
"strings"
"time"
"github.com/zrepl/zrepl/daemon/hooks"
"github.com/zrepl/zrepl/daemon/logging"
"github.com/zrepl/zrepl/daemon/snapper/snapname"
"github.com/zrepl/zrepl/util/chainlock"
"github.com/zrepl/zrepl/zfs"
)
type planArgs struct {
prefix string
timestampFormat string
formatter *snapname.Formatter
hooks *hooks.List
}
@ -60,21 +59,6 @@ type snapProgress struct {
runResults hooks.PlanReport
}
func (plan *plan) formatNow(format string) string {
now := time.Now().UTC()
switch strings.ToLower(format) {
case "dense":
format = "20060102_150405_000"
case "human":
format = "2006-01-02_15:04:05"
case "iso-8601":
format = "2006-01-02T15:04:05.000Z"
case "unix-seconds":
return strconv.FormatInt(now.Unix(), 10)
}
return now.Format(format)
}
func (plan *plan) execute(ctx context.Context, dryRun bool) (ok bool) {
hookMatchCount := make(map[hooks.Hook]int, len(*plan.args.hooks))
@ -85,8 +69,7 @@ func (plan *plan) execute(ctx context.Context, dryRun bool) (ok bool) {
anyFsHadErr := false
// TODO channel programs -> allow a little jitter?
for fs, progress := range plan.snaps {
suffix := plan.formatNow(plan.args.timestampFormat)
snapname := fmt.Sprintf("%s%s", plan.args.prefix, suffix)
snapname := plan.args.formatter.Format(time.Now())
ctx := logging.WithInjectedField(ctx, "fs", fs.ToString())
ctx = logging.WithInjectedField(ctx, "snap", snapname)

View File

@ -10,6 +10,7 @@ import (
"github.com/pkg/errors"
"github.com/zrepl/zrepl/daemon/logging/trace"
"github.com/zrepl/zrepl/daemon/snapper/snapname"
"github.com/zrepl/zrepl/config"
"github.com/zrepl/zrepl/daemon/hooks"
@ -32,12 +33,16 @@ func periodicFromConfig(g *config.Global, fsf zfs.DatasetFilter, in *config.Snap
return nil, errors.Wrap(err, "hook config error")
}
formatter, err := snapname.New(in.Prefix, in.TimestampFormat, in.TimestampLocation)
if err != nil {
return nil, errors.Wrap(err, "build snapshot name formatter")
}
args := periodicArgs{
interval: in.Interval.Duration(),
fsf: fsf,
planArgs: planArgs{
prefix: in.Prefix,
timestampFormat: in.TimestampFormat,
formatter: formatter,
hooks: hookList,
},
// ctx and log is set in Run()
@ -166,7 +171,7 @@ func periodicStateSyncUp(a periodicArgs, u updater) state {
if err != nil {
return onErr(err, u)
}
syncPoint, err := findSyncPoint(a.ctx, fss, a.planArgs.prefix, a.interval)
syncPoint, err := findSyncPoint(a.ctx, fss, a.planArgs.formatter.Prefix(), a.interval)
if err != nil {
return onErr(err, u)
}

View File

@ -0,0 +1,52 @@
package snapname
import (
"fmt"
"time"
"github.com/pkg/errors"
"github.com/zrepl/zrepl/daemon/snapper/snapname/timestamp"
"github.com/zrepl/zrepl/zfs"
)
type Formatter struct {
prefix string
timestamp *timestamp.Formatter
}
func New(prefix, tsFormat, tsLocation string) (*Formatter, error) {
timestamp, err := timestamp.New(tsFormat, tsLocation)
if err != nil {
return nil, errors.Wrap(err, "build timestamp formatter")
}
formatter := &Formatter{
prefix: prefix,
timestamp: timestamp,
}
// Best-effort check to detect whether the result would be an invalid name.
// Test two dates that in most places have will have different time zone offsets due to DST.
check := func(t time.Time) error {
testFormat := formatter.Format(t)
if err := zfs.ComponentNamecheck(testFormat); err != nil {
// testFormat last, can be quite long
return fmt.Errorf("`invalid snapshot name would result from `prefix+$timestamp`: %s: %q", err, testFormat)
}
return nil
}
if err := check(time.Date(2020, 6, 1, 0, 0, 0, 0, time.UTC)); err != nil {
return nil, err
}
if err := check(time.Date(2020, 12, 1, 0, 0, 0, 0, time.UTC)); err != nil {
return nil, err
}
return formatter, nil
}
func (f *Formatter) Format(now time.Time) string {
return f.prefix + f.timestamp.Format(now)
}
func (f *Formatter) Prefix() string {
return f.prefix
}

View File

@ -0,0 +1,93 @@
package timestamp
import (
"fmt"
"strconv"
"strings"
"time"
"github.com/pkg/errors"
)
type Formatter struct {
format func(time.Time) string
location *time.Location
}
func New(formatString string, locationString string) (*Formatter, error) {
location, err := time.LoadLocation(locationString) // no shadow
if err != nil {
return nil, errors.Wrapf(err, "load location from string %q", locationString)
}
makeFormatFunc := func(formatString string) (func(time.Time) string, error) {
// NB: we use zfs.EntityNamecheck in higher-level code to filter out all invalid characters.
// This check here is specifically so that we know for sure that the `+`=>`_` replacement
// that we do in the returned func replaces exactly the timezone offset `+` and not some other `+`.
if strings.Contains(formatString, "+") {
return nil, fmt.Errorf("character '+' is not allowed in ZFS snapshot names and has special handling")
}
return func(t time.Time) string {
res := t.Format(formatString)
// if the formatString contains a time zone specifier
// and the location would result in a positive offset to UTC
// then the result of t.Format would contain a '+' sign.
if isLocationPositiveOffsetToUTC(location) {
// the only source of `+` can be the positive time zone offset because we disallowed `+` as a character in the format string
res = strings.Replace(res, "+", "_", 1)
}
if strings.Contains(res, "+") {
panic(fmt.Sprintf("format produced a string containing illegal character '+' that wasn't the expected case of positive time zone offset: format=%q location=%q unix=%q result=%q", formatString, location, t.Unix(), res))
}
return res
}, nil
}
var formatFunc func(time.Time) string
mustUseUtcError := func() error {
return fmt.Errorf("format string requires UTC location")
}
switch strings.ToLower(formatString) {
case "dense":
if location != time.UTC {
err = mustUseUtcError()
} else {
formatFunc, err = makeFormatFunc("20060102_150405_000")
}
case "human":
if location != time.UTC {
err = mustUseUtcError()
} else {
formatFunc, err = makeFormatFunc("2006-01-02_15:04:05")
}
case "iso-8601":
formatFunc, err = makeFormatFunc("2006-01-02T15:04:05.000Z0700")
case "unix-seconds":
if location != time.UTC {
// Technically not required because unix time is by definition in UTC
// but let's make that clear to confused users...
err = mustUseUtcError()
} else {
formatFunc = func(t time.Time) string {
return strconv.FormatInt(t.Unix(), 10)
}
}
default:
formatFunc, err = makeFormatFunc(formatString)
}
if err != nil {
return nil, errors.Wrapf(err, "invalid format string %q or location %q", formatString, locationString)
}
return &Formatter{
format: formatFunc,
location: location,
}, nil
}
func isLocationPositiveOffsetToUTC(location *time.Location) bool {
_, offsetSeconds := time.Now().In(location).Zone()
return offsetSeconds > 0
}
func (f *Formatter) Format(t time.Time) string {
t = t.In(f.location)
return f.format(t)
}

View File

@ -0,0 +1,84 @@
package timestamp
import (
"testing"
"time"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
var utc, _ = time.LoadLocation("UTC")
var berlin, _ = time.LoadLocation("Europe/Berlin")
var nyc, _ = time.LoadLocation("America/New_York")
func TestAssumptionsAboutTimePackage(t *testing.T) {
now := time.Now()
assert.Equal(t, now.In(utc).Unix(), now.In(berlin).Unix(), "unix timestamp is always in UTC")
}
func TestLegacyIso8601Format(t *testing.T) {
// Before we allowed users to specify the location of the time to be formatted,
// we always used UTC.
// At the time, the `iso-8601` format used the following format string
// "2006-01-02T15:04:05.000Z"
// That format string's `Z` was never identified by the Go time package as a time zone specifier.
// The correct way would have been
// "2006-01-02T15:04:05.000Z07"
// "2006-01-02T15:04:05.000Z0700"
// "2006-01-02T15:04:05.000Z070000"
// or any variation of how the minute / second offsets are displayed.
// However, because of the forced location UTC, it didn't matter, because both format strings
// evaluate to the same time string.
//
// This test is here to ensure that the legacy behavior is preserved for users who don't specify a non-UTC location
// (UTC location is the default location at this time)
oldFormatString := "2006-01-02T15:04:05.000Z"
now := time.Now()
oldOutput := now.In(utc).Format(oldFormatString)
currentImpl, err := New("iso-8601", "UTC")
require.NoError(t, err)
currentOutput := currentImpl.Format(now)
assert.Equal(t, oldOutput, currentOutput, "legacy behavior of iso-8601 format is preserved")
}
func TestIso8601PrintsTimeZoneOffset(t *testing.T) {
f, err := New("iso-8601", "Europe/Berlin")
require.NoError(t, err)
out := f.Format(time.Date(2024, 5, 14, 21, 16, 23, 0, time.UTC))
require.Equal(t, "2024-05-14T23:16:23.000_0200", out, "time zone offset is printed")
}
func TestBuiltinFormatsErrorOnNonUtcLocation(t *testing.T) {
var err error
expectMsg := `^.*: format string requires UTC location$`
_, err = New("dense", "Europe/Berlin")
require.Regexp(t, expectMsg, err)
_, err = New("human", "Europe/Berlin")
require.Regexp(t, expectMsg, err)
_, err = New("unix-seconds", "Europe/Berlin")
require.Regexp(t, expectMsg, err)
_, err = New("iso-8601", "Europe/Berlin")
require.NoError(t, err, "iso-8601 prints time zone, so non-UTC locations are allowed, see test TestIso8601PrintsTimeZoneOffset")
}
func TestPositiveUtcOffsetDetection(t *testing.T) {
assert.True(t, isLocationPositiveOffsetToUTC(berlin), "Berlin is UTC+1/UTC+2")
assert.False(t, isLocationPositiveOffsetToUTC(utc), "UTC is UTC+0")
assert.False(t, isLocationPositiveOffsetToUTC(nyc), "New York is UTC-5/UTC-4")
}
func TestFormatCanReplacePlusWithUnderscore(t *testing.T) {
f, err := New("2006-01-02_15:04:05-07:00:00", "Europe/Berlin")
require.NoError(t, err)
out := f.Format(time.Date(2024, 5, 14, 21, 16, 23, 0, time.UTC))
require.Equal(t, "2024-05-14_23:16:23_02:00:00", out, "+ is replaced with _ so we can use the string in ZFS snapshots")
}

View File

@ -25,8 +25,9 @@ The following snapshotting types are supported:
The ``periodic`` and ``cron`` snapshotting types share some common options and behavior:
* **Naming:** The snapshot names are composed of a user-defined ``prefix`` followed by a UTC date formatted like ``20060102_150405_000``.
We use UTC because it will avoid name conflicts when switching time zones or between summer and winter time.
* **Naming:** The snapshot names are composed of a user-defined ``prefix`` followed by a UTC date formatted like ``20060102_150405_000`` by default.
We either use UTC or timestamp with timezone information because it will avoid name conflicts when switching time zones or between summer and winter time.
Note that if timestamp with time zone information is used, the "+" of the timezone (i.e. +02:00) is replaced by "_" (i.e. _02:00) to conform to allowed characters in ZFS snapshots.
* **Hooks:** You can configure hooks to run before or after zrepl takes the snapshots. See :ref:`below <job-snapshotting-hooks>` for details.
* **Push replication:** After creating all snapshots, the snapshotter will wake up the replication part of the job, if it's a ``push`` job.
Note that snapshotting is decoupled from replication, i.e., if it is down or takes too long, snapshots will still be taken.
@ -65,6 +66,9 @@ The ``periodic`` and ``cron`` snapshotting types share some common options and b
# Timestamp format that is used as snapshot suffix.
# Can be any of "dense" (default), "human", "iso-8601", "unix-seconds" or a custom Go time format (see https://go.dev/src/time/format.go)
timestamp_format: dense
# Specifies in which time zone the snapshot suffix is generated, optional, defaults to UTC, used only if time zone information is part of timestamp_format.
# Can be "UTC" (default), "Local" (time zone information from the OS) or any of the IANA Time Zone names (see https://nodatime.org/TimeZones)
timestamp_location: UTC
hooks: ...
pruning: ...
@ -97,6 +101,9 @@ The snapshotter uses the ``prefix`` to identify which snapshots it created.
# Timestamp format that is used as snapshot suffix.
# Can be any of "dense" (default), "human", "iso-8601", "unix-seconds" or a custom Go time format (see https://go.dev/src/time/format.go)
timestamp_format: dense
# Specifies in which time zone the snapshot suffix is generated, optional, defaults to UTC, used only if time zone information is part of timestamp_format.
# Can be "UTC" (default), "Local" (time zone information from the OS) or any of the IANA Time Zone names (see https://nodatime.org/TimeZones)
timestamp_location: UTC
pruning: ...
In ``cron`` mode, the snapshotter takes snaphots at fixed points in time.
@ -104,20 +111,6 @@ See https://en.wikipedia.org/wiki/Cron for details on the syntax.
zrepl uses the ``the github.com/robfig/cron/v3`` Go package for parsing.
An optional field for "seconds" is supported to take snapshots at sub-minute frequencies.
.. _job-snapshotting-timestamp_format:
Timestamp Format
~~~~~~~~~~~~~~~~
The ``cron`` and ``periodic`` snapshotter support configuring a custom timestamp format that is used as suffix for the snapshot name.
It can be used by setting ``timestamp_format`` to any of the following values:
* ``dense`` (default) looks like ``20060102_150405_000``
* ``human`` looks like ``2006-01-02_15:04:05``
* ``iso-8601`` looks like ``2006-01-02T15:04:05.000Z``
* ``unix-seconds`` looks like ``1136214245``
* Any custom Go time format accepted by `time.Time#Format <https://go.dev/src/time/format.go>`_.
``manual`` Snapshotting
-----------------------
@ -137,6 +130,33 @@ See :ref:`this quickstart guide <quickstart-backup-to-external-disk>` for an exa
To trigger replication after taking snapshots, use the ``zrepl signal wakeup JOB`` command.
.. _job-snapshotting-timestamp_format:
Timestamp Format
----------------
The ``cron`` and ``periodic`` snapshotter support configuring a custom timestamp format that is used as suffix for the snapshot name.
By default, the current time is converted to UTC and then formatted using the ``dense`` format.
Both time zone and format can be customized by setting ``timestamp_format`` and/or ``timestamp_location``.
``timestamp_location`` is fed verbatim into `time.LoadLocation <https://pkg.go.dev/time#LoadLocation>`_.
``timestamp_format`` supports the following values (case-insensitive).
* ``dense`` => Go format string ``20060102_150405_000``
* ``human`` => Go format string ``2006-01-02_15:04:05``
* ``iso-8601`` => Go format string ``2006-01-02T15:04:05.000Z07:00``
* ``unix-seconds`` => Unix seconds since the epoch, formatted as a decimal string (`time.Time.Unix <https://pkg.go.dev/time#Time.Unix>`_)
* Any custom Go time format accepted by `time.Time#Format <https://go.dev/src/time/format.go>`_.
The ``dense``, ``human``, and ``unix-seconds`` formats require the ``timestamp_location`` to be ``UTC`` because they do not contain timezone information.
The ``+`` character is `not allowed in ZFS snapshot names <https://github.com/openzfs/zfs/blob/1d3ba0bf01020f5459b1c28db3979129088924c0/module/zcommon/zfs_namecheck.c#L334-L351>`_.
Format strings are disallowed to contain the ``+`` character (it's not a character that is interpreted by ``time.Time.Format``, so there's no reason to use it).
If a format _produces_ a ``+``, that ``+`` is replaced by an `_`` character.
.. _job-snapshotting-hooks:
Pre- and Post-Snapshot Hooks

View File

@ -55,22 +55,22 @@ var componentValidChar = regexp.MustCompile(`^[0-9a-zA-Z-_\.: ]+$`)
// [-_.: ]
func ComponentNamecheck(datasetPathComponent string) error {
if len(datasetPathComponent) == 0 {
return fmt.Errorf("path component must not be empty")
return fmt.Errorf("must not be empty")
}
if len(datasetPathComponent) > MaxDatasetNameLen {
return fmt.Errorf("path component must not be longer than %d chars", MaxDatasetNameLen)
return fmt.Errorf("must not be longer than %d chars", MaxDatasetNameLen)
}
if !(isASCII(datasetPathComponent)) {
return fmt.Errorf("path component must be ASCII")
return fmt.Errorf("must be ASCII")
}
if !componentValidChar.MatchString(datasetPathComponent) {
return fmt.Errorf("path component must only contain alphanumeric chars and any in %q", "-_.: ")
return fmt.Errorf("must only contain alphanumeric chars and any in %q", "-_.: ")
}
if datasetPathComponent == "." || datasetPathComponent == ".." {
return fmt.Errorf("path component must not be '%s'", datasetPathComponent)
return fmt.Errorf("must not be '%s'", datasetPathComponent)
}
return nil
@ -155,8 +155,9 @@ func EntityNamecheck(path string, t EntityType) (err *PathValidationError) {
if bookmarkOrSnapshotDelims == 0 {
// hot path, all but last component
if err := ComponentNamecheck(string(comp)); err != nil {
return pve(err.Error())
component := string(comp)
if err := ComponentNamecheck(component); err != nil {
return pve(fmt.Sprintf("component %q: %s", component, err.Error()))
}
continue
}
@ -172,8 +173,9 @@ func EntityNamecheck(path string, t EntityType) (err *PathValidationError) {
}
for _, comp := range subComps {
if err := ComponentNamecheck(string(comp)); err != nil {
return pve(err.Error())
component := string(comp)
if err := ComponentNamecheck(component); err != nil {
return pve(fmt.Sprintf("component %q: %s", component, err.Error()))
}
}

View File

@ -49,6 +49,10 @@ func TestEntityNamecheck(t *testing.T) {
{strings.Repeat("a", MaxDatasetNameLen-2) + "/a", EntityTypeFilesystem, true},
{strings.Repeat("a", MaxDatasetNameLen-4) + "/a@b", EntityTypeSnapshot, true},
{strings.Repeat("a", MaxDatasetNameLen) + "/a@b", EntityTypeSnapshot, false},
// + is not allowed, and particularly relevant to test here because
// common timestamp formats usually use `+` as a delimiter for numeric timezone offset
// => cf with package `timestamp_formatting`
{"foo/bar@23+42", EntityTypeSnapshot, false},
}
for idx := range tcs {