forked from extern/zfs-auto-snapshot
## was a bad idea - making // do the right thing instead.
This commit is contained in:
parent
5d95fb20bd
commit
1a98b469a2
17
Changelog
17
Changelog
@ -5,19 +5,18 @@
|
||||
- service start/stop logs stay under /var/svc/log
|
||||
- other logs saved to /export/home/zfssnap (and syslog)
|
||||
* Add a 'zfs/interval' property value 'none' which doesn't use cron
|
||||
* Add a 'zfs/fs-name' property value '##'
|
||||
* Add a cache of svcprops to the method script
|
||||
* Add a com.sun:auto-snapshot user property, com.sun:auto-snapshot:$LABEL
|
||||
takes precedence
|
||||
* Remove the seconds field of the snapshot name - we don't really need it
|
||||
* Make recursive snapshots the default for bundled '//' manifests
|
||||
* Changed the way // works with recursive snapshots
|
||||
(look for local props only when using -r)
|
||||
* Add a com.sun:auto-snapshot user property used by all instances,
|
||||
com.sun:auto-snapshot:$LABEL takes precedence
|
||||
* Remove the seconds field of the snapshot name - it's not needed
|
||||
* Changed the way // works with recursive snapshots - ignore
|
||||
snapshot-children, and instead automatically determine when we can take
|
||||
recursive snapshots based on which datasets have the zfs user properties
|
||||
* Set avoidscrub to false by default (bug was fixed in in nv_94)
|
||||
* Bugfix from Dan - Volumes are datasets too
|
||||
* Automatically snapshot everything by setting com.sun:auto-snapshot=true
|
||||
on startup. (this gets done on all root pools - existing properties set
|
||||
to false will override this)
|
||||
on startup. (this gets done on all top level datasets - an existing
|
||||
property set to false on the top level dataset overrides this)
|
||||
* Check for missed snapshots on startup
|
||||
* Clean up shell style
|
||||
* Clean up preremove script
|
||||
|
@ -63,17 +63,10 @@ The properties each instance needs are:
|
||||
|
||||
# zfs set com.sun:auto-snapshot:frequent=true tank/timf
|
||||
|
||||
When the "snap-children" property is set to "true",
|
||||
only locally-set dataset properties are used to
|
||||
determine which filesystems to snapshot -
|
||||
property inheritance is not respected in this case,
|
||||
but yeilds better performance for large dataset trees.
|
||||
|
||||
The special filesystem name "##" is the reverse of the
|
||||
above - it snapshots all filesystems, except ones that are
|
||||
explicitly marked with a "com.sun:auto-snapshot:<label>"
|
||||
set to "false". Using this zfs/fs-name value implicitly
|
||||
turns off the "snap-children" flag.
|
||||
The "snap-children" property is ignored when using this
|
||||
fs-name value. Instead, the system automatically determines
|
||||
when it's able to take recursive, vs. non-recursive snapshots
|
||||
of the system, based on the values of the ZFS user properties.
|
||||
|
||||
zfs/interval [ hours | days | months | none]
|
||||
When set to none, we don't take automatic snapshots, but
|
||||
@ -93,7 +86,8 @@ The properties each instance needs are:
|
||||
(eg. every 10 days)
|
||||
|
||||
zfs/snapshot-children "true" if you would like to recursively take snapshots
|
||||
of all child filesystems of the specified fs-name.
|
||||
of all child filesystems of the specified fs-name.
|
||||
This value is ignored when setting zfs/fs-name='//'
|
||||
|
||||
zfs/backup [ full | incremental | none ]
|
||||
|
||||
|
@ -68,7 +68,10 @@ SEP=":"
|
||||
# whenever we have $FMRI defined. Used by the print_log and
|
||||
# print_note functions below for all output, it's defined
|
||||
# by the schedule_snapshots take_snapshots and unschedule_snapshots
|
||||
# methods.
|
||||
# methods. Note that for take_snapshot LOG gets set to
|
||||
# a file in the zfssnap role's home directory, as we don't own
|
||||
# the SMF log. Start/stop logging goes to retarter/logfile,
|
||||
# everything else goes in the zfssnap role's log.
|
||||
LOG=""
|
||||
|
||||
|
||||
@ -143,7 +146,7 @@ function schedule_snapshots {
|
||||
typeset OFFSET=0
|
||||
|
||||
case $FILESYS in
|
||||
'//' | '##')
|
||||
'//')
|
||||
;;
|
||||
*)
|
||||
# validate the filesystem
|
||||
@ -297,7 +300,7 @@ function unschedule_snapshots {
|
||||
# This function is intended to be called on service start. It checks to see
|
||||
# if the last snapshot was taken more than <frequency> <intervals> ago,
|
||||
# and if that's the case, takes a snapshot immediatedly.
|
||||
function check_missed_snapshots { # $INTERVAL $PERIOD $FMRI
|
||||
function check_missed_snapshots { # $INTERVAL $PERIOD $FMRI <repopulate cache>
|
||||
set -x
|
||||
|
||||
typeset INTERVAL=$1
|
||||
@ -306,12 +309,27 @@ function check_missed_snapshots { # $INTERVAL $PERIOD $FMRI
|
||||
typeset FILESYS=$fs_name
|
||||
typeset LABEL=$label
|
||||
|
||||
if [ -n $4 ] ; then
|
||||
typeset NO_CACHE_REPOPULATE=$4
|
||||
fi
|
||||
|
||||
# // is special, in that we take snapshots based on user properties
|
||||
# so here, we get those properties, and call ourselves again, with
|
||||
# those values.
|
||||
case "$FILESYS" in
|
||||
"//")
|
||||
FILESYS=$(get_snapshot_datasets $LABEL $SNAP_CHILDREN)
|
||||
;;
|
||||
"##")
|
||||
FILESYS=$(get_nonexcluded_datasets $LABEL)
|
||||
get_userprop_datasets
|
||||
export snapshot_children=false
|
||||
export fs_name="$SINGLE_LIST"
|
||||
print_note "Checking for non-recursive missed // snapshots $SINGLE_LIST"
|
||||
check_missed_snapshots $INTERVAL $PERIOD $FMRI no_repopulate_cache
|
||||
|
||||
export snapshot_children=true
|
||||
export fs_name="$RECURSIVE_LIST"
|
||||
print_note "Checking for recursive missed // snapshots $RECURSIVE_LIST"
|
||||
check_missed_snapshots $INTERVAL $PERIOD $FMRI no_repopulate_cache
|
||||
|
||||
return 0
|
||||
;;
|
||||
esac
|
||||
|
||||
@ -370,16 +388,22 @@ function check_missed_snapshots { # $INTERVAL $PERIOD $FMRI
|
||||
if [ $AGO -gt $PERIOD_S ] ; then
|
||||
print_log "Last snapshot for $FMRI taken on $LAST_SNAP_TIME_HUMAN"
|
||||
print_log "which was greater than the $PERIOD $INTERVAL schedule. Taking snapshot now."
|
||||
take_snapshot $FMRI
|
||||
take_snapshot $FMRI $NO_CACHE_REPOPULATE
|
||||
fi
|
||||
}
|
||||
|
||||
# This function actually takes the snapshot of the filesystem.
|
||||
# $1 is assumed to be a valid FMRI
|
||||
# $1 is assumed to be a valid FMRI. $2 if non-null makes us skip
|
||||
# populating the SMF property cache - used only by the special
|
||||
# // snapshot type.
|
||||
function take_snapshot {
|
||||
# want this to be global, used by check_failure
|
||||
FMRI=$1
|
||||
zfs_smf_props $FMRI
|
||||
NO_CACHE_REPOPULATE=$2
|
||||
|
||||
if [ -z "$NO_CACHE_REPOPULATE" ] ; then
|
||||
zfs_smf_props $FMRI
|
||||
fi
|
||||
|
||||
# When taking snapshots, because we're running as a role
|
||||
# and can't redirect our output through SMF, we don't have
|
||||
@ -410,22 +434,25 @@ function take_snapshot {
|
||||
# user property which specifies which datasets should be snapshotted
|
||||
# and under which "label" - a set of default service instances that
|
||||
# snapshot at defined periods (daily, weekly, monthly, every 15 mins)
|
||||
|
||||
# the "##" filesystem is also special. It takes snapshots of
|
||||
# all datasets (non-recursively) *except* those marked with the tag
|
||||
# "com.sun:auto-snapshot:${LABEL}" = false. We necessarily ignore
|
||||
# the SNAP_CHILDREN setting in this case, as that could result in
|
||||
# us inadvertently taking snapshots of a child dataset under the
|
||||
# parent's lack of "com.sun:auto-snapshot:${LABEL}" = false tag.
|
||||
|
||||
# Determine what these are, call ourselves again, then return.
|
||||
case "$FILESYS" in
|
||||
"//")
|
||||
FILESYS=$(get_snapshot_datasets $LABEL $SNAP_CHILDREN)
|
||||
;;
|
||||
"##")
|
||||
FILESYS=$(get_nonexcluded_datasets $LABEL)
|
||||
SNAP_CHILDREN=false
|
||||
;;
|
||||
# this populates two values SINGLE_LIST and RECURSIVE_LIST
|
||||
get_userprop_datasets $LABEL
|
||||
|
||||
print_note "Taking non-recursive snapshots $SINGLE_LIST"
|
||||
export snapshot_children=false
|
||||
export fs_name="$SINGLE_LIST"
|
||||
take_snapshots $FMRI no_propcache_repopulate
|
||||
single_STATE=$?
|
||||
|
||||
print_note "Taking recursive snapshots of $RECURSIVE_LIST"
|
||||
export snapshot_childrent=false
|
||||
export fs_name="$RECURSIVE_LIST"
|
||||
take_snapshots $FMRI no_propcache_repopulate
|
||||
recursive_STATE=$?
|
||||
return $single_STATE && $recursive_STATE
|
||||
;;
|
||||
esac
|
||||
|
||||
if [ "$LABEL" != "\"\"" ] ; then
|
||||
@ -724,40 +751,6 @@ function take_backup { # filesystem backup-type label fmri
|
||||
|
||||
}
|
||||
|
||||
# Get a list of filesystem we should snapshot. If snap_children is "true"
|
||||
# then we don't list children that inherit the parent's property - we just look
|
||||
# for locally set properties, and let "zfs snapshot -r" snapshot the children.
|
||||
function get_snapshot_datasets { #LABEL #SNAP_CHILDREN
|
||||
|
||||
typeset LABEL=$1
|
||||
typeset SNAP_CHILDREN=$2
|
||||
if [ "${SNAP_CHILDREN}" = "true" ] ; then
|
||||
# FIXME this doesn't cope with the case where a dataset is
|
||||
# set locally to differing values - value for :LABEL needs to override
|
||||
typeset FS=$(zfs get -H -s local \
|
||||
-o name,value com.sun:auto-snapshot,com.sun:auto-snapshot:$LABEL \
|
||||
| grep -v 'false$' | egrep ' true '\|'true$' | awk '{print $1}' | sort)
|
||||
FS="$(narrow_recursive_filesystems $FS)"
|
||||
else
|
||||
typeset FS=$(zfs list -t filesystem,volume \
|
||||
-o name,com.sun:auto-snapshot,com.sun:auto-snapshot:$LABEL \
|
||||
| grep -v 'false$' | egrep ' true '\|'true$' | awk '{print $1}')
|
||||
fi
|
||||
echo "$FS"
|
||||
}
|
||||
|
||||
# Get a list of filesystems we should snapshot. We look for all filesystems
|
||||
# and volumes that don't have a property com.sun:auto-snapshot:$LABEL
|
||||
# or com.sun:auto-snapshot set to false. The :$LABEL specifier overrides
|
||||
# the more general property setting.
|
||||
function get_nonexcluded_datasets {
|
||||
|
||||
typeset LABEL=$1
|
||||
typeset FS=$(zfs list -H -t filesystem,volume \
|
||||
-o name,com.sun:auto-snapshot,com.sun:auto-snapshot:$LABEL \
|
||||
| egrep -v ' false -$'\|'false$' | awk '{print $1}')
|
||||
echo "$FS"
|
||||
}
|
||||
|
||||
# Given a sorted list of filesystems, determine whether any of the
|
||||
# listed filesystems are redundant
|
||||
@ -784,7 +777,66 @@ function narrow_recursive_filesystems {
|
||||
echo ${LIST} | sed -e 's#//##g'
|
||||
}
|
||||
|
||||
function can_recursive_snapshot {
|
||||
typeset ds=$1
|
||||
if egrep "$ds/"\|"$ds " $EXCLUDE > /dev/null; then
|
||||
# we can't recursively snapshot $ds because
|
||||
# it's excluded or is in the path to an excluded dataset
|
||||
return 1
|
||||
else
|
||||
return 0
|
||||
fi
|
||||
}
|
||||
|
||||
function is_excluded {
|
||||
typeset ds=$1
|
||||
if egrep "$ds " $EXCLUDE > /dev/null ; then
|
||||
return 0
|
||||
else
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# This builds two lists of datasets - RECURSIVE_LIST and SINGLE_LIST
|
||||
# based on the value of ZFS user properties com.sun:auto-snapshot and
|
||||
# com.sun:auto-snapshot:${LABEL}, the first argument to this script.
|
||||
# RECURSIVE_LIST is a list of datasets that can be snapshotted with -r
|
||||
# SINGLE_LIST is a list of datasets to snapshot individually.
|
||||
#
|
||||
function get_userprop_datasets {
|
||||
|
||||
typeset LABEL=$1
|
||||
typeset ALL=/tmp/zfs-auto-snapshot-list.$$
|
||||
typeset EXCLUDE=/tmp/zfs-auto-snapshot-exclude.$$
|
||||
|
||||
zfs list -H -t filesystem,volume -o \
|
||||
name,com.sun:auto-snapshot,com.sun:auto-snapshot:${LABEL} > $ALL
|
||||
cat $ALL | egrep -e "false$"\|"false -$" > $EXCLUDE
|
||||
|
||||
# iterating through datasets
|
||||
for ds in $(cat $ALL | cut -f1 | sort -u) ; do
|
||||
if can_recursive_snapshot $ds ; then
|
||||
print_note "OK to recursive snapshot $ds"
|
||||
RECURSIVE_LIST="${RECURSIVE_LIST} $ds"
|
||||
else
|
||||
if ! is_excluded $ds ; then
|
||||
print_note "OK to snapshot sole dataset $ds"
|
||||
SINGLE_LIST="${SINGLE_LIST} $ds"
|
||||
else
|
||||
print_note "$ds will not be snapshotted"
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
FINAL_RECURSIVE_LIST=$(narrow_recursive_filesystems $RECURSIVE_LIST)
|
||||
print_note "Narrowed list of datasets to recursively snapshot is"
|
||||
print_note "$FINAL_RECURSIVE_LIST"
|
||||
export RECURSIVE_LIST="$FINAL_RECURSIVE_LIST"
|
||||
export SINGLE_LIST
|
||||
|
||||
rm $ALL
|
||||
rm $EXCLUDE
|
||||
}
|
||||
|
||||
# Determine if a pool is currently being scrubbed or resilvered.
|
||||
# Return 0 if it is scrubbing/resilvering, 1 otherwise.
|
||||
|
@ -72,15 +72,17 @@ they being :
|
||||
look at the com.sun:auto-snapshot ZFS user
|
||||
properties on datasets, set to "true" if the dataset
|
||||
should have snapshots taken by this instance.
|
||||
|
||||
If set to false or unset, snapshots will not be taken
|
||||
by this instance.
|
||||
|
||||
The snapshot-children property is ignored when using
|
||||
this setting, instead the system will automatically
|
||||
determine how to take snapshots, based on which datasets
|
||||
have true, false or unset property values.
|
||||
|
||||
The special filesystem name "##" indicates we should
|
||||
snapshot all filesystems on the machine, except those
|
||||
marked with the com.sun:auto-snapshot ZFS user property
|
||||
set to false. The snapshot-children setting is ignored when
|
||||
using this fs-name value.
|
||||
|
||||
For both of the above, setting com.sun:auto-snapshot:<label>
|
||||
will override the general setting for com.sun:auto-snapshot.
|
||||
Setting com.sun:auto-snapshot:<label> will override
|
||||
the general setting for com.sun:auto-snapshot.
|
||||
|
||||
|
||||
interval : minutes | hours | days | months | none
|
||||
@ -99,7 +101,8 @@ they being :
|
||||
delete the oldest when we hit this threshold
|
||||
|
||||
snapshot-children : Whether we should recursively snapshot
|
||||
all filesystems contained within.
|
||||
all filesystems contained within. Ignored when
|
||||
using the "//" fs-name value.
|
||||
|
||||
backup : If we want to perform a "zfs send" for our backup
|
||||
we set this - either to "full" or "incremental".
|
||||
|
Loading…
Reference in New Issue
Block a user