## was a bad idea - making // do the right thing instead.

This commit is contained in:
Tim Foster 2008-08-25 14:28:33 +01:00
parent 5d95fb20bd
commit 1a98b469a2
4 changed files with 136 additions and 88 deletions

View File

@ -5,19 +5,18 @@
- service start/stop logs stay under /var/svc/log
- other logs saved to /export/home/zfssnap (and syslog)
* Add a 'zfs/interval' property value 'none' which doesn't use cron
* Add a 'zfs/fs-name' property value '##'
* Add a cache of svcprops to the method script
* Add a com.sun:auto-snapshot user property, com.sun:auto-snapshot:$LABEL
takes precedence
* Remove the seconds field of the snapshot name - we don't really need it
* Make recursive snapshots the default for bundled '//' manifests
* Changed the way // works with recursive snapshots
(look for local props only when using -r)
* Add a com.sun:auto-snapshot user property used by all instances,
com.sun:auto-snapshot:$LABEL takes precedence
* Remove the seconds field of the snapshot name - it's not needed
* Changed the way // works with recursive snapshots - ignore
snapshot-children, and instead automatically determine when we can take
recursive snapshots based on which datasets have the zfs user properties
* Set avoidscrub to false by default (bug was fixed in in nv_94)
* Bugfix from Dan - Volumes are datasets too
* Automatically snapshot everything by setting com.sun:auto-snapshot=true
on startup. (this gets done on all root pools - existing properties set
to false will override this)
on startup. (this gets done on all top level datasets - an existing
property set to false on the top level dataset overrides this)
* Check for missed snapshots on startup
* Clean up shell style
* Clean up preremove script

View File

@ -63,17 +63,10 @@ The properties each instance needs are:
# zfs set com.sun:auto-snapshot:frequent=true tank/timf
When the "snap-children" property is set to "true",
only locally-set dataset properties are used to
determine which filesystems to snapshot -
property inheritance is not respected in this case,
but yeilds better performance for large dataset trees.
The special filesystem name "##" is the reverse of the
above - it snapshots all filesystems, except ones that are
explicitly marked with a "com.sun:auto-snapshot:<label>"
set to "false". Using this zfs/fs-name value implicitly
turns off the "snap-children" flag.
The "snap-children" property is ignored when using this
fs-name value. Instead, the system automatically determines
when it's able to take recursive, vs. non-recursive snapshots
of the system, based on the values of the ZFS user properties.
zfs/interval [ hours | days | months | none]
When set to none, we don't take automatic snapshots, but
@ -93,7 +86,8 @@ The properties each instance needs are:
(eg. every 10 days)
zfs/snapshot-children "true" if you would like to recursively take snapshots
of all child filesystems of the specified fs-name.
of all child filesystems of the specified fs-name.
This value is ignored when setting zfs/fs-name='//'
zfs/backup [ full | incremental | none ]

View File

@ -68,7 +68,10 @@ SEP=":"
# whenever we have $FMRI defined. Used by the print_log and
# print_note functions below for all output, it's defined
# by the schedule_snapshots take_snapshots and unschedule_snapshots
# methods.
# methods. Note that for take_snapshot LOG gets set to
# a file in the zfssnap role's home directory, as we don't own
# the SMF log. Start/stop logging goes to retarter/logfile,
# everything else goes in the zfssnap role's log.
LOG=""
@ -143,7 +146,7 @@ function schedule_snapshots {
typeset OFFSET=0
case $FILESYS in
'//' | '##')
'//')
;;
*)
# validate the filesystem
@ -297,7 +300,7 @@ function unschedule_snapshots {
# This function is intended to be called on service start. It checks to see
# if the last snapshot was taken more than <frequency> <intervals> ago,
# and if that's the case, takes a snapshot immediatedly.
function check_missed_snapshots { # $INTERVAL $PERIOD $FMRI
function check_missed_snapshots { # $INTERVAL $PERIOD $FMRI <repopulate cache>
set -x
typeset INTERVAL=$1
@ -306,12 +309,27 @@ function check_missed_snapshots { # $INTERVAL $PERIOD $FMRI
typeset FILESYS=$fs_name
typeset LABEL=$label
if [ -n $4 ] ; then
typeset NO_CACHE_REPOPULATE=$4
fi
# // is special, in that we take snapshots based on user properties
# so here, we get those properties, and call ourselves again, with
# those values.
case "$FILESYS" in
"//")
FILESYS=$(get_snapshot_datasets $LABEL $SNAP_CHILDREN)
;;
"##")
FILESYS=$(get_nonexcluded_datasets $LABEL)
get_userprop_datasets
export snapshot_children=false
export fs_name="$SINGLE_LIST"
print_note "Checking for non-recursive missed // snapshots $SINGLE_LIST"
check_missed_snapshots $INTERVAL $PERIOD $FMRI no_repopulate_cache
export snapshot_children=true
export fs_name="$RECURSIVE_LIST"
print_note "Checking for recursive missed // snapshots $RECURSIVE_LIST"
check_missed_snapshots $INTERVAL $PERIOD $FMRI no_repopulate_cache
return 0
;;
esac
@ -370,16 +388,22 @@ function check_missed_snapshots { # $INTERVAL $PERIOD $FMRI
if [ $AGO -gt $PERIOD_S ] ; then
print_log "Last snapshot for $FMRI taken on $LAST_SNAP_TIME_HUMAN"
print_log "which was greater than the $PERIOD $INTERVAL schedule. Taking snapshot now."
take_snapshot $FMRI
take_snapshot $FMRI $NO_CACHE_REPOPULATE
fi
}
# This function actually takes the snapshot of the filesystem.
# $1 is assumed to be a valid FMRI
# $1 is assumed to be a valid FMRI. $2 if non-null makes us skip
# populating the SMF property cache - used only by the special
# // snapshot type.
function take_snapshot {
# want this to be global, used by check_failure
FMRI=$1
zfs_smf_props $FMRI
NO_CACHE_REPOPULATE=$2
if [ -z "$NO_CACHE_REPOPULATE" ] ; then
zfs_smf_props $FMRI
fi
# When taking snapshots, because we're running as a role
# and can't redirect our output through SMF, we don't have
@ -410,22 +434,25 @@ function take_snapshot {
# user property which specifies which datasets should be snapshotted
# and under which "label" - a set of default service instances that
# snapshot at defined periods (daily, weekly, monthly, every 15 mins)
# the "##" filesystem is also special. It takes snapshots of
# all datasets (non-recursively) *except* those marked with the tag
# "com.sun:auto-snapshot:${LABEL}" = false. We necessarily ignore
# the SNAP_CHILDREN setting in this case, as that could result in
# us inadvertently taking snapshots of a child dataset under the
# parent's lack of "com.sun:auto-snapshot:${LABEL}" = false tag.
# Determine what these are, call ourselves again, then return.
case "$FILESYS" in
"//")
FILESYS=$(get_snapshot_datasets $LABEL $SNAP_CHILDREN)
;;
"##")
FILESYS=$(get_nonexcluded_datasets $LABEL)
SNAP_CHILDREN=false
;;
# this populates two values SINGLE_LIST and RECURSIVE_LIST
get_userprop_datasets $LABEL
print_note "Taking non-recursive snapshots $SINGLE_LIST"
export snapshot_children=false
export fs_name="$SINGLE_LIST"
take_snapshots $FMRI no_propcache_repopulate
single_STATE=$?
print_note "Taking recursive snapshots of $RECURSIVE_LIST"
export snapshot_childrent=false
export fs_name="$RECURSIVE_LIST"
take_snapshots $FMRI no_propcache_repopulate
recursive_STATE=$?
return $single_STATE && $recursive_STATE
;;
esac
if [ "$LABEL" != "\"\"" ] ; then
@ -724,40 +751,6 @@ function take_backup { # filesystem backup-type label fmri
}
# Get a list of filesystem we should snapshot. If snap_children is "true"
# then we don't list children that inherit the parent's property - we just look
# for locally set properties, and let "zfs snapshot -r" snapshot the children.
function get_snapshot_datasets { #LABEL #SNAP_CHILDREN
typeset LABEL=$1
typeset SNAP_CHILDREN=$2
if [ "${SNAP_CHILDREN}" = "true" ] ; then
# FIXME this doesn't cope with the case where a dataset is
# set locally to differing values - value for :LABEL needs to override
typeset FS=$(zfs get -H -s local \
-o name,value com.sun:auto-snapshot,com.sun:auto-snapshot:$LABEL \
| grep -v 'false$' | egrep ' true '\|'true$' | awk '{print $1}' | sort)
FS="$(narrow_recursive_filesystems $FS)"
else
typeset FS=$(zfs list -t filesystem,volume \
-o name,com.sun:auto-snapshot,com.sun:auto-snapshot:$LABEL \
| grep -v 'false$' | egrep ' true '\|'true$' | awk '{print $1}')
fi
echo "$FS"
}
# Get a list of filesystems we should snapshot. We look for all filesystems
# and volumes that don't have a property com.sun:auto-snapshot:$LABEL
# or com.sun:auto-snapshot set to false. The :$LABEL specifier overrides
# the more general property setting.
function get_nonexcluded_datasets {
typeset LABEL=$1
typeset FS=$(zfs list -H -t filesystem,volume \
-o name,com.sun:auto-snapshot,com.sun:auto-snapshot:$LABEL \
| egrep -v ' false -$'\|'false$' | awk '{print $1}')
echo "$FS"
}
# Given a sorted list of filesystems, determine whether any of the
# listed filesystems are redundant
@ -784,7 +777,66 @@ function narrow_recursive_filesystems {
echo ${LIST} | sed -e 's#//##g'
}
function can_recursive_snapshot {
typeset ds=$1
if egrep "$ds/"\|"$ds " $EXCLUDE > /dev/null; then
# we can't recursively snapshot $ds because
# it's excluded or is in the path to an excluded dataset
return 1
else
return 0
fi
}
function is_excluded {
typeset ds=$1
if egrep "$ds " $EXCLUDE > /dev/null ; then
return 0
else
return 1
fi
}
# This builds two lists of datasets - RECURSIVE_LIST and SINGLE_LIST
# based on the value of ZFS user properties com.sun:auto-snapshot and
# com.sun:auto-snapshot:${LABEL}, the first argument to this script.
# RECURSIVE_LIST is a list of datasets that can be snapshotted with -r
# SINGLE_LIST is a list of datasets to snapshot individually.
#
function get_userprop_datasets {
typeset LABEL=$1
typeset ALL=/tmp/zfs-auto-snapshot-list.$$
typeset EXCLUDE=/tmp/zfs-auto-snapshot-exclude.$$
zfs list -H -t filesystem,volume -o \
name,com.sun:auto-snapshot,com.sun:auto-snapshot:${LABEL} > $ALL
cat $ALL | egrep -e "false$"\|"false -$" > $EXCLUDE
# iterating through datasets
for ds in $(cat $ALL | cut -f1 | sort -u) ; do
if can_recursive_snapshot $ds ; then
print_note "OK to recursive snapshot $ds"
RECURSIVE_LIST="${RECURSIVE_LIST} $ds"
else
if ! is_excluded $ds ; then
print_note "OK to snapshot sole dataset $ds"
SINGLE_LIST="${SINGLE_LIST} $ds"
else
print_note "$ds will not be snapshotted"
fi
fi
done
FINAL_RECURSIVE_LIST=$(narrow_recursive_filesystems $RECURSIVE_LIST)
print_note "Narrowed list of datasets to recursively snapshot is"
print_note "$FINAL_RECURSIVE_LIST"
export RECURSIVE_LIST="$FINAL_RECURSIVE_LIST"
export SINGLE_LIST
rm $ALL
rm $EXCLUDE
}
# Determine if a pool is currently being scrubbed or resilvered.
# Return 0 if it is scrubbing/resilvering, 1 otherwise.

View File

@ -73,14 +73,16 @@ they being :
properties on datasets, set to "true" if the dataset
should have snapshots taken by this instance.
The special filesystem name "##" indicates we should
snapshot all filesystems on the machine, except those
marked with the com.sun:auto-snapshot ZFS user property
set to false. The snapshot-children setting is ignored when
using this fs-name value.
If set to false or unset, snapshots will not be taken
by this instance.
For both of the above, setting com.sun:auto-snapshot:<label>
will override the general setting for com.sun:auto-snapshot.
The snapshot-children property is ignored when using
this setting, instead the system will automatically
determine how to take snapshots, based on which datasets
have true, false or unset property values.
Setting com.sun:auto-snapshot:<label> will override
the general setting for com.sun:auto-snapshot.
interval : minutes | hours | days | months | none
@ -99,7 +101,8 @@ they being :
delete the oldest when we hit this threshold
snapshot-children : Whether we should recursively snapshot
all filesystems contained within.
all filesystems contained within. Ignored when
using the "//" fs-name value.
backup : If we want to perform a "zfs send" for our backup
we set this - either to "full" or "incremental".