diff --git a/config/config.go b/config/config.go index 7810e15..f27ba78 100644 --- a/config/config.go +++ b/config/config.go @@ -218,11 +218,16 @@ type SnapshottingEnum struct { } type SnapshottingPeriodic struct { - Type string `yaml:"type"` - Prefix string `yaml:"prefix"` - Interval *PositiveDuration `yaml:"interval"` - Hooks HookList `yaml:"hooks,optional"` - TimestampFormat string `yaml:"timestamp_format,optional,default=dense"` + Type string `yaml:"type"` + Prefix string `yaml:"prefix"` + Interval *PositiveDuration `yaml:"interval"` + Hooks HookList `yaml:"hooks,optional"` + TimestampFormattingSpec `yaml:",inline"` +} + +type TimestampFormattingSpec struct { + TimestampFormat string `yaml:"timestamp_format,optional,default=dense"` + TimestampLocation string `yaml:"timestamp_location,optional,default=UTC"` } type CronSpec struct { @@ -251,11 +256,11 @@ func (s *CronSpec) UnmarshalYAML(unmarshal func(v interface{}, not_strict bool) } type SnapshottingCron struct { - Type string `yaml:"type"` - Prefix string `yaml:"prefix"` - Cron CronSpec `yaml:"cron"` - Hooks HookList `yaml:"hooks,optional"` - TimestampFormat string `yaml:"timestamp_format,optional,default=dense"` + Type string `yaml:"type"` + Prefix string `yaml:"prefix"` + Cron CronSpec `yaml:"cron"` + Hooks HookList `yaml:"hooks,optional"` + TimestampFormattingSpec `yaml:",inline"` } type SnapshottingManual struct { diff --git a/config/config_snapshotting_test.go b/config/config_snapshotting_test.go index dda724c..c595d87 100644 --- a/config/config_snapshotting_test.go +++ b/config/config_snapshotting_test.go @@ -163,7 +163,8 @@ jobs: assert.Equal(t, "periodic", snp.Type) assert.Equal(t, 10*time.Minute, snp.Interval.Duration()) assert.Equal(t, "zrepl_", snp.Prefix) - assert.Equal(t, "dense", snp.TimestampFormat) // default was set correctly + assert.Equal(t, snp.TimestampFormat, "dense") + assert.Equal(t, snp.TimestampLocation, "UTC") }) t.Run("cron", func(t *testing.T) { @@ -171,6 +172,7 @@ jobs: snp := c.Jobs[0].Ret.(*PushJob).Snapshotting.Ret.(*SnapshottingCron) assert.Equal(t, "cron", snp.Type) assert.Equal(t, "zrepl_", snp.Prefix) - assert.Equal(t, "dense", snp.TimestampFormat) // default was set correctly + assert.Equal(t, snp.TimestampFormat, "dense") + assert.Equal(t, snp.TimestampLocation, "UTC") }) } diff --git a/daemon/snapper/cron.go b/daemon/snapper/cron.go index 0797988..c67a033 100644 --- a/daemon/snapper/cron.go +++ b/daemon/snapper/cron.go @@ -10,6 +10,7 @@ import ( "github.com/zrepl/zrepl/config" "github.com/zrepl/zrepl/daemon/hooks" + "github.com/zrepl/zrepl/daemon/snapper/snapname" "github.com/zrepl/zrepl/util/suspendresumesafetimer" "github.com/zrepl/zrepl/zfs" ) @@ -20,10 +21,15 @@ func cronFromConfig(fsf zfs.DatasetFilter, in config.SnapshottingCron) (*Cron, e if err != nil { return nil, errors.Wrap(err, "hook config error") } + + formatter, err := snapname.New(in.Prefix, in.TimestampFormat, in.TimestampLocation) + if err != nil { + return nil, errors.Wrap(err, "build snapshot name formatter") + } + planArgs := planArgs{ - prefix: in.Prefix, - timestampFormat: in.TimestampFormat, - hooks: hooksList, + formatter: formatter, + hooks: hooksList, } return &Cron{config: in, fsf: fsf, planArgs: planArgs}, nil } diff --git a/daemon/snapper/impl.go b/daemon/snapper/impl.go index 6d2cec6..2b67f63 100644 --- a/daemon/snapper/impl.go +++ b/daemon/snapper/impl.go @@ -4,20 +4,19 @@ import ( "context" "fmt" "sort" - "strconv" "strings" "time" "github.com/zrepl/zrepl/daemon/hooks" "github.com/zrepl/zrepl/daemon/logging" + "github.com/zrepl/zrepl/daemon/snapper/snapname" "github.com/zrepl/zrepl/util/chainlock" "github.com/zrepl/zrepl/zfs" ) type planArgs struct { - prefix string - timestampFormat string - hooks *hooks.List + formatter *snapname.Formatter + hooks *hooks.List } type plan struct { @@ -60,21 +59,6 @@ type snapProgress struct { runResults hooks.PlanReport } -func (plan *plan) formatNow(format string) string { - now := time.Now().UTC() - switch strings.ToLower(format) { - case "dense": - format = "20060102_150405_000" - case "human": - format = "2006-01-02_15:04:05" - case "iso-8601": - format = "2006-01-02T15:04:05.000Z" - case "unix-seconds": - return strconv.FormatInt(now.Unix(), 10) - } - return now.Format(format) -} - func (plan *plan) execute(ctx context.Context, dryRun bool) (ok bool) { hookMatchCount := make(map[hooks.Hook]int, len(*plan.args.hooks)) @@ -85,8 +69,7 @@ func (plan *plan) execute(ctx context.Context, dryRun bool) (ok bool) { anyFsHadErr := false // TODO channel programs -> allow a little jitter? for fs, progress := range plan.snaps { - suffix := plan.formatNow(plan.args.timestampFormat) - snapname := fmt.Sprintf("%s%s", plan.args.prefix, suffix) + snapname := plan.args.formatter.Format(time.Now()) ctx := logging.WithInjectedField(ctx, "fs", fs.ToString()) ctx = logging.WithInjectedField(ctx, "snap", snapname) diff --git a/daemon/snapper/periodic.go b/daemon/snapper/periodic.go index d6068f7..612c412 100644 --- a/daemon/snapper/periodic.go +++ b/daemon/snapper/periodic.go @@ -10,6 +10,7 @@ import ( "github.com/pkg/errors" "github.com/zrepl/zrepl/daemon/logging/trace" + "github.com/zrepl/zrepl/daemon/snapper/snapname" "github.com/zrepl/zrepl/config" "github.com/zrepl/zrepl/daemon/hooks" @@ -32,13 +33,17 @@ func periodicFromConfig(g *config.Global, fsf zfs.DatasetFilter, in *config.Snap return nil, errors.Wrap(err, "hook config error") } + formatter, err := snapname.New(in.Prefix, in.TimestampFormat, in.TimestampLocation) + if err != nil { + return nil, errors.Wrap(err, "build snapshot name formatter") + } + args := periodicArgs{ interval: in.Interval.Duration(), fsf: fsf, planArgs: planArgs{ - prefix: in.Prefix, - timestampFormat: in.TimestampFormat, - hooks: hookList, + formatter: formatter, + hooks: hookList, }, // ctx and log is set in Run() } @@ -166,7 +171,7 @@ func periodicStateSyncUp(a periodicArgs, u updater) state { if err != nil { return onErr(err, u) } - syncPoint, err := findSyncPoint(a.ctx, fss, a.planArgs.prefix, a.interval) + syncPoint, err := findSyncPoint(a.ctx, fss, a.planArgs.formatter.Prefix(), a.interval) if err != nil { return onErr(err, u) } diff --git a/daemon/snapper/snapname/snapname.go b/daemon/snapper/snapname/snapname.go new file mode 100644 index 0000000..91b4831 --- /dev/null +++ b/daemon/snapper/snapname/snapname.go @@ -0,0 +1,52 @@ +package snapname + +import ( + "fmt" + "time" + + "github.com/pkg/errors" + + "github.com/zrepl/zrepl/daemon/snapper/snapname/timestamp" + "github.com/zrepl/zrepl/zfs" +) + +type Formatter struct { + prefix string + timestamp *timestamp.Formatter +} + +func New(prefix, tsFormat, tsLocation string) (*Formatter, error) { + timestamp, err := timestamp.New(tsFormat, tsLocation) + if err != nil { + return nil, errors.Wrap(err, "build timestamp formatter") + } + formatter := &Formatter{ + prefix: prefix, + timestamp: timestamp, + } + // Best-effort check to detect whether the result would be an invalid name. + // Test two dates that in most places have will have different time zone offsets due to DST. + check := func(t time.Time) error { + testFormat := formatter.Format(t) + if err := zfs.ComponentNamecheck(testFormat); err != nil { + // testFormat last, can be quite long + return fmt.Errorf("`invalid snapshot name would result from `prefix+$timestamp`: %s: %q", err, testFormat) + } + return nil + } + if err := check(time.Date(2020, 6, 1, 0, 0, 0, 0, time.UTC)); err != nil { + return nil, err + } + if err := check(time.Date(2020, 12, 1, 0, 0, 0, 0, time.UTC)); err != nil { + return nil, err + } + return formatter, nil +} + +func (f *Formatter) Format(now time.Time) string { + return f.prefix + f.timestamp.Format(now) +} + +func (f *Formatter) Prefix() string { + return f.prefix +} diff --git a/daemon/snapper/snapname/timestamp/timestamp.go b/daemon/snapper/snapname/timestamp/timestamp.go new file mode 100644 index 0000000..e70ca99 --- /dev/null +++ b/daemon/snapper/snapname/timestamp/timestamp.go @@ -0,0 +1,93 @@ +package timestamp + +import ( + "fmt" + "strconv" + "strings" + "time" + + "github.com/pkg/errors" +) + +type Formatter struct { + format func(time.Time) string + location *time.Location +} + +func New(formatString string, locationString string) (*Formatter, error) { + location, err := time.LoadLocation(locationString) // no shadow + if err != nil { + return nil, errors.Wrapf(err, "load location from string %q", locationString) + } + makeFormatFunc := func(formatString string) (func(time.Time) string, error) { + // NB: we use zfs.EntityNamecheck in higher-level code to filter out all invalid characters. + // This check here is specifically so that we know for sure that the `+`=>`_` replacement + // that we do in the returned func replaces exactly the timezone offset `+` and not some other `+`. + if strings.Contains(formatString, "+") { + return nil, fmt.Errorf("character '+' is not allowed in ZFS snapshot names and has special handling") + } + return func(t time.Time) string { + res := t.Format(formatString) + // if the formatString contains a time zone specifier + // and the location would result in a positive offset to UTC + // then the result of t.Format would contain a '+' sign. + if isLocationPositiveOffsetToUTC(location) { + // the only source of `+` can be the positive time zone offset because we disallowed `+` as a character in the format string + res = strings.Replace(res, "+", "_", 1) + } + if strings.Contains(res, "+") { + panic(fmt.Sprintf("format produced a string containing illegal character '+' that wasn't the expected case of positive time zone offset: format=%q location=%q unix=%q result=%q", formatString, location, t.Unix(), res)) + } + return res + }, nil + } + var formatFunc func(time.Time) string + mustUseUtcError := func() error { + return fmt.Errorf("format string requires UTC location") + } + switch strings.ToLower(formatString) { + case "dense": + if location != time.UTC { + err = mustUseUtcError() + } else { + formatFunc, err = makeFormatFunc("20060102_150405_000") + } + case "human": + if location != time.UTC { + err = mustUseUtcError() + } else { + formatFunc, err = makeFormatFunc("2006-01-02_15:04:05") + } + case "iso-8601": + formatFunc, err = makeFormatFunc("2006-01-02T15:04:05.000Z0700") + case "unix-seconds": + if location != time.UTC { + // Technically not required because unix time is by definition in UTC + // but let's make that clear to confused users... + err = mustUseUtcError() + } else { + formatFunc = func(t time.Time) string { + return strconv.FormatInt(t.Unix(), 10) + } + } + default: + formatFunc, err = makeFormatFunc(formatString) + } + if err != nil { + return nil, errors.Wrapf(err, "invalid format string %q or location %q", formatString, locationString) + } + return &Formatter{ + format: formatFunc, + location: location, + }, nil +} + +func isLocationPositiveOffsetToUTC(location *time.Location) bool { + _, offsetSeconds := time.Now().In(location).Zone() + return offsetSeconds > 0 +} + +func (f *Formatter) Format(t time.Time) string { + t = t.In(f.location) + return f.format(t) +} diff --git a/daemon/snapper/snapname/timestamp/timestamp_test.go b/daemon/snapper/snapname/timestamp/timestamp_test.go new file mode 100644 index 0000000..da40b02 --- /dev/null +++ b/daemon/snapper/snapname/timestamp/timestamp_test.go @@ -0,0 +1,84 @@ +package timestamp + +import ( + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +var utc, _ = time.LoadLocation("UTC") +var berlin, _ = time.LoadLocation("Europe/Berlin") +var nyc, _ = time.LoadLocation("America/New_York") + +func TestAssumptionsAboutTimePackage(t *testing.T) { + now := time.Now() + assert.Equal(t, now.In(utc).Unix(), now.In(berlin).Unix(), "unix timestamp is always in UTC") +} + +func TestLegacyIso8601Format(t *testing.T) { + // Before we allowed users to specify the location of the time to be formatted, + // we always used UTC. + // At the time, the `iso-8601` format used the following format string + // "2006-01-02T15:04:05.000Z" + // That format string's `Z` was never identified by the Go time package as a time zone specifier. + // The correct way would have been + // "2006-01-02T15:04:05.000Z07" + // "2006-01-02T15:04:05.000Z0700" + // "2006-01-02T15:04:05.000Z070000" + // or any variation of how the minute / second offsets are displayed. + // However, because of the forced location UTC, it didn't matter, because both format strings + // evaluate to the same time string. + // + // This test is here to ensure that the legacy behavior is preserved for users who don't specify a non-UTC location + // (UTC location is the default location at this time) + + oldFormatString := "2006-01-02T15:04:05.000Z" + now := time.Now() + oldOutput := now.In(utc).Format(oldFormatString) + + currentImpl, err := New("iso-8601", "UTC") + require.NoError(t, err) + currentOutput := currentImpl.Format(now) + + assert.Equal(t, oldOutput, currentOutput, "legacy behavior of iso-8601 format is preserved") +} + +func TestIso8601PrintsTimeZoneOffset(t *testing.T) { + f, err := New("iso-8601", "Europe/Berlin") + require.NoError(t, err) + out := f.Format(time.Date(2024, 5, 14, 21, 16, 23, 0, time.UTC)) + require.Equal(t, "2024-05-14T23:16:23.000_0200", out, "time zone offset is printed") +} + +func TestBuiltinFormatsErrorOnNonUtcLocation(t *testing.T) { + var err error + + expectMsg := `^.*: format string requires UTC location$` + + _, err = New("dense", "Europe/Berlin") + require.Regexp(t, expectMsg, err) + + _, err = New("human", "Europe/Berlin") + require.Regexp(t, expectMsg, err) + + _, err = New("unix-seconds", "Europe/Berlin") + require.Regexp(t, expectMsg, err) + + _, err = New("iso-8601", "Europe/Berlin") + require.NoError(t, err, "iso-8601 prints time zone, so non-UTC locations are allowed, see test TestIso8601PrintsTimeZoneOffset") +} + +func TestPositiveUtcOffsetDetection(t *testing.T) { + assert.True(t, isLocationPositiveOffsetToUTC(berlin), "Berlin is UTC+1/UTC+2") + assert.False(t, isLocationPositiveOffsetToUTC(utc), "UTC is UTC+0") + assert.False(t, isLocationPositiveOffsetToUTC(nyc), "New York is UTC-5/UTC-4") +} + +func TestFormatCanReplacePlusWithUnderscore(t *testing.T) { + f, err := New("2006-01-02_15:04:05-07:00:00", "Europe/Berlin") + require.NoError(t, err) + out := f.Format(time.Date(2024, 5, 14, 21, 16, 23, 0, time.UTC)) + require.Equal(t, "2024-05-14_23:16:23_02:00:00", out, "+ is replaced with _ so we can use the string in ZFS snapshots") +} diff --git a/docs/configuration/snapshotting.rst b/docs/configuration/snapshotting.rst index 30885ef..2855459 100644 --- a/docs/configuration/snapshotting.rst +++ b/docs/configuration/snapshotting.rst @@ -25,8 +25,9 @@ The following snapshotting types are supported: The ``periodic`` and ``cron`` snapshotting types share some common options and behavior: -* **Naming:** The snapshot names are composed of a user-defined ``prefix`` followed by a UTC date formatted like ``20060102_150405_000``. - We use UTC because it will avoid name conflicts when switching time zones or between summer and winter time. +* **Naming:** The snapshot names are composed of a user-defined ``prefix`` followed by a UTC date formatted like ``20060102_150405_000`` by default. + We either use UTC or timestamp with timezone information because it will avoid name conflicts when switching time zones or between summer and winter time. + Note that if timestamp with time zone information is used, the "+" of the timezone (i.e. +02:00) is replaced by "_" (i.e. _02:00) to conform to allowed characters in ZFS snapshots. * **Hooks:** You can configure hooks to run before or after zrepl takes the snapshots. See :ref:`below ` for details. * **Push replication:** After creating all snapshots, the snapshotter will wake up the replication part of the job, if it's a ``push`` job. Note that snapshotting is decoupled from replication, i.e., if it is down or takes too long, snapshots will still be taken. @@ -65,6 +66,9 @@ The ``periodic`` and ``cron`` snapshotting types share some common options and b # Timestamp format that is used as snapshot suffix. # Can be any of "dense" (default), "human", "iso-8601", "unix-seconds" or a custom Go time format (see https://go.dev/src/time/format.go) timestamp_format: dense + # Specifies in which time zone the snapshot suffix is generated, optional, defaults to UTC, used only if time zone information is part of timestamp_format. + # Can be "UTC" (default), "Local" (time zone information from the OS) or any of the IANA Time Zone names (see https://nodatime.org/TimeZones) + timestamp_location: UTC hooks: ... pruning: ... @@ -97,6 +101,9 @@ The snapshotter uses the ``prefix`` to identify which snapshots it created. # Timestamp format that is used as snapshot suffix. # Can be any of "dense" (default), "human", "iso-8601", "unix-seconds" or a custom Go time format (see https://go.dev/src/time/format.go) timestamp_format: dense + # Specifies in which time zone the snapshot suffix is generated, optional, defaults to UTC, used only if time zone information is part of timestamp_format. + # Can be "UTC" (default), "Local" (time zone information from the OS) or any of the IANA Time Zone names (see https://nodatime.org/TimeZones) + timestamp_location: UTC pruning: ... In ``cron`` mode, the snapshotter takes snaphots at fixed points in time. @@ -104,20 +111,6 @@ See https://en.wikipedia.org/wiki/Cron for details on the syntax. zrepl uses the ``the github.com/robfig/cron/v3`` Go package for parsing. An optional field for "seconds" is supported to take snapshots at sub-minute frequencies. -.. _job-snapshotting-timestamp_format: - -Timestamp Format -~~~~~~~~~~~~~~~~ - -The ``cron`` and ``periodic`` snapshotter support configuring a custom timestamp format that is used as suffix for the snapshot name. -It can be used by setting ``timestamp_format`` to any of the following values: - -* ``dense`` (default) looks like ``20060102_150405_000`` -* ``human`` looks like ``2006-01-02_15:04:05`` -* ``iso-8601`` looks like ``2006-01-02T15:04:05.000Z`` -* ``unix-seconds`` looks like ``1136214245`` -* Any custom Go time format accepted by `time.Time#Format `_. - ``manual`` Snapshotting ----------------------- @@ -137,6 +130,33 @@ See :ref:`this quickstart guide ` for an exa To trigger replication after taking snapshots, use the ``zrepl signal wakeup JOB`` command. +.. _job-snapshotting-timestamp_format: + +Timestamp Format +---------------- + +The ``cron`` and ``periodic`` snapshotter support configuring a custom timestamp format that is used as suffix for the snapshot name. + +By default, the current time is converted to UTC and then formatted using the ``dense`` format. + +Both time zone and format can be customized by setting ``timestamp_format`` and/or ``timestamp_location``. + +``timestamp_location`` is fed verbatim into `time.LoadLocation `_. + + ``timestamp_format`` supports the following values (case-insensitive). + +* ``dense`` => Go format string ``20060102_150405_000`` +* ``human`` => Go format string ``2006-01-02_15:04:05`` +* ``iso-8601`` => Go format string ``2006-01-02T15:04:05.000Z07:00`` +* ``unix-seconds`` => Unix seconds since the epoch, formatted as a decimal string (`time.Time.Unix `_) +* Any custom Go time format accepted by `time.Time#Format `_. + +The ``dense``, ``human``, and ``unix-seconds`` formats require the ``timestamp_location`` to be ``UTC`` because they do not contain timezone information. + +The ``+`` character is `not allowed in ZFS snapshot names `_. +Format strings are disallowed to contain the ``+`` character (it's not a character that is interpreted by ``time.Time.Format``, so there's no reason to use it). +If a format _produces_ a ``+``, that ``+`` is replaced by an `_`` character. + .. _job-snapshotting-hooks: Pre- and Post-Snapshot Hooks @@ -173,7 +193,7 @@ Most hook types take additional parameters, please refer to the respective subse * - ``mysql-lock-tables`` - :ref:`Details ` - Flush and read-Lock MySQL tables while taking the snapshot. - + .. _job-hook-type-command: ``command`` Hooks diff --git a/zfs/namecheck.go b/zfs/namecheck.go index c350160..fe3a5d7 100644 --- a/zfs/namecheck.go +++ b/zfs/namecheck.go @@ -55,22 +55,22 @@ var componentValidChar = regexp.MustCompile(`^[0-9a-zA-Z-_\.: ]+$`) // [-_.: ] func ComponentNamecheck(datasetPathComponent string) error { if len(datasetPathComponent) == 0 { - return fmt.Errorf("path component must not be empty") + return fmt.Errorf("must not be empty") } if len(datasetPathComponent) > MaxDatasetNameLen { - return fmt.Errorf("path component must not be longer than %d chars", MaxDatasetNameLen) + return fmt.Errorf("must not be longer than %d chars", MaxDatasetNameLen) } if !(isASCII(datasetPathComponent)) { - return fmt.Errorf("path component must be ASCII") + return fmt.Errorf("must be ASCII") } if !componentValidChar.MatchString(datasetPathComponent) { - return fmt.Errorf("path component must only contain alphanumeric chars and any in %q", "-_.: ") + return fmt.Errorf("must only contain alphanumeric chars and any in %q", "-_.: ") } if datasetPathComponent == "." || datasetPathComponent == ".." { - return fmt.Errorf("path component must not be '%s'", datasetPathComponent) + return fmt.Errorf("must not be '%s'", datasetPathComponent) } return nil @@ -155,8 +155,9 @@ func EntityNamecheck(path string, t EntityType) (err *PathValidationError) { if bookmarkOrSnapshotDelims == 0 { // hot path, all but last component - if err := ComponentNamecheck(string(comp)); err != nil { - return pve(err.Error()) + component := string(comp) + if err := ComponentNamecheck(component); err != nil { + return pve(fmt.Sprintf("component %q: %s", component, err.Error())) } continue } @@ -172,8 +173,9 @@ func EntityNamecheck(path string, t EntityType) (err *PathValidationError) { } for _, comp := range subComps { - if err := ComponentNamecheck(string(comp)); err != nil { - return pve(err.Error()) + component := string(comp) + if err := ComponentNamecheck(component); err != nil { + return pve(fmt.Sprintf("component %q: %s", component, err.Error())) } } diff --git a/zfs/namecheck_test.go b/zfs/namecheck_test.go index 13314ea..49586e4 100644 --- a/zfs/namecheck_test.go +++ b/zfs/namecheck_test.go @@ -49,6 +49,10 @@ func TestEntityNamecheck(t *testing.T) { {strings.Repeat("a", MaxDatasetNameLen-2) + "/a", EntityTypeFilesystem, true}, {strings.Repeat("a", MaxDatasetNameLen-4) + "/a@b", EntityTypeSnapshot, true}, {strings.Repeat("a", MaxDatasetNameLen) + "/a@b", EntityTypeSnapshot, false}, + // + is not allowed, and particularly relevant to test here because + // common timestamp formats usually use `+` as a delimiter for numeric timezone offset + // => cf with package `timestamp_formatting` + {"foo/bar@23+42", EntityTypeSnapshot, false}, } for idx := range tcs {