com.sun:auto-snapshot support, remove install scripts, add narrow_recursive_filesystems and auto_include

This commit is contained in:
Tim Foster 2008-08-22 10:29:23 +01:00
parent 3bd3e3136f
commit 0d2d785a53
11 changed files with 138 additions and 78 deletions

View File

@ -65,7 +65,7 @@ SEP=":"
# This variable gets set to the restarter/logfile property
# whenever we have $FMRI defined. Used by the print_log and
# print_note functions below for all output, it's definied
# print_note functions below for all output, it's defined
# by the schedule_snapshots take_snapshots and unschedule_snapshots
# methods.
LOG=""
@ -90,7 +90,7 @@ function zfs_smf_props {
IFS="
"
SMF_PROPS="$(svcprop -t -p zfs $1 |\
SMF_PROPS="$(svcprop -t -p zfs -p restarter/logfile $1 |\
sed -e 's#zfs/fs-name#zfs/fs_name#g' \
-e 's#zfs/backup-lock#zfs/backup_lock#g' \
-e 's#zfs/snapshot-children#zfs/snapshot_children#g' \
@ -113,6 +113,9 @@ function schedule_snapshots {
typeset FMRI=$1
zfs_smf_props $FMRI
# functions we call need $LOG set.
export LOG=$logfile
# FIXME need work in here to actually validate the FMRI props
typeset FILESYS="$fs_name"
typeset INTERVAL="$interval"
@ -136,9 +139,7 @@ function schedule_snapshots {
typeset OFFSET=0
case $FILESYS in
"//")
;;
"##" )
'//' | '##')
;;
*)
# validate the filesystem
@ -376,8 +377,8 @@ function take_snapshot {
FMRI=$1
zfs_smf_props $FMRI
export LOG=$log
typeset DATE=$(date +%F-%H${SEP}%M${SEP}%S)
export LOG=$logfile
typeset DATE=$(date +%F-%H${SEP}%M)
typeset FILESYS="$fs_name"
typeset KEEP=$keep
typeset SNAP_CHILDREN=$snapshot_children
@ -532,7 +533,7 @@ function destroy_older_snapshots {
print_note "$snapshot being destroyed ${RECURSIVE} as per \
retention policy."
zfs destroy ${RECURSIVE} $snapshot
check_failure $? "Unable to destroy $snapshot"
check_failure $? "Unable to destroy $snapshot" "NON_FATAL"
else
# don't destroy this one
COUNTER=$(($COUNTER - 1))
@ -541,19 +542,24 @@ function destroy_older_snapshots {
}
# Given the exit status of a command, an integer, 0 if the command completed
# without errors. If the command exited with errors, we degrade the
# state of this service into maintenance mode. We also log an error message
# as passed into this function.
# without errors. If the command exited with errors we degrade the
# state of this service into maintenance mode. If a 3rd argument is presented
# we don't degrade the service. We also log an error message as passed into
# this function.
#
function check_failure { # integer exit status, error message to display
function check_failure { # integer exit status, error message to display, be fatal
typeset RESULT=$1
typeset ERR_MSG=$2
typeset NON_FATAL=$3
if [ $RESULT -ne 0 ] ; then
print_log "Error: $ERR_MSG"
print_log "Moving service $FMRI to maintenance mode."
svcadm mark maintenance $FMRI
if [ -z "${NON_FATAL}" ] ; then
print_log "Moving service $FMRI to maintenance mode."
svcadm mark maintenance $FMRI
fi
fi
}
@ -716,28 +722,58 @@ function get_snapshot_datasets { #LABEL #SNAP_CHILDREN
typeset LABEL=$1
typeset SNAP_CHILDREN=$2
if [ "${SNAP_CHILDREN}" = "true" ] ; then
typeset FS=$(zfs get -s local -o name,value com.sun:auto-snapshot:$LABEL \
| grep true$ | awk '{print $1}')
# FIXME this doesn't cope with the case where a dataset is
# set locally to differing values - value for :LABEL needs to override
typeset FS=$(zfs get -H -s local \
-o name,value com.sun:auto-snapshot,com.sun:auto-snapshot:$LABEL \
| grep -v 'false$' | egrep ' true '\|'true$' | awk '{print $1}' | sort)
FS="$(narrow_recursive_filesystems $FS)"
else
typeset FS=$(zfs list -t filesystem,volume \
-o name,com.sun:auto-snapshot:$LABEL \
| grep true$ | awk '{print $1}')
typeset FS=$(zfs list -t filesystem,volume \
-o name,com.sun:auto-snapshot,com.sun:auto-snapshot:$LABEL \
| grep -v 'false$' | egrep ' true '\|'true$' | awk '{print $1}')
fi
echo "$FS"
}
# Get a list of filesystems we should snapshot. We look for all filesystems
# and volumes that don't have a property com.sun:auto-snapshot:$LABEL
# set to false.
# or com.sun:auto-snapshot set to false. The :$LABEL specifier overrides
# the more general property setting.
function get_nonexcluded_datasets {
typeset LABEL=$1
typeset FS=$(zfs list -H -t filesystem,volume \
-o name,com.sun:auto-snapshot:$LABEL \
| grep -v false$ | awk '{print $1}')
-o name,com.sun:auto-snapshot,com.sun:auto-snapshot:$LABEL \
| egrep -v ' false -$'\|'false$' | awk '{print $1}')
echo "$FS"
}
# Given a sorted list of filesystems, determine whether any of the
# listed filesystems are redundant
# eg. for tank/other tank/foo tank/other/bar tank/foo/bar
# we only need to snapshot tank/other and tank/foo
function narrow_recursive_filesystems {
# for each filesystem in the list, get each of it's ancestors
# if any of the ancestors is already in the list, don't add it,
# otherwise, do.
typeset LIST=""
for ds in $@ ; do
ANCESTOR_IN_LIST=""
ancestor=$(dirname $ds)
while [ $ancestor != "." ] ; do
if echo $LIST | fgrep $ancestor// > /dev/null ; then
ANCESTOR_IN_LIST=true
fi
ancestor=$(dirname $ancestor)
done
if [ -z "${ANCESTOR_IN_LIST}" ] ; then
LIST="${LIST} ${ds}//"
fi
done
echo ${LIST} | sed -e 's#//##g'
}
# Determine if a pool is currently being scrubbed or resilvered.
@ -745,7 +781,7 @@ function get_nonexcluded_datasets {
# The 2nd arg is a cache of pools known to be not scrubbing during this
# invocation of the script. This does risk a scrub starting mid-way through
# the script being started and us not checking for it - but if that's just
# the script and us not checking for it - but if that's just
# happened, then restarting the scrub as a result of a snapshot being taken
# won't be too expensive.
function is_scrubbing { # POOL SCRUBLIST
@ -770,6 +806,33 @@ function is_scrubbing { # POOL SCRUBLIST
fi
}
# This function runs on startup - by default, if we're taking snapshots
# under a // schedule, and there isn't a property set on the pool
# com.sun:auto-snapshot=false, then we set the property to true, causing
# all datasets on the system to get included by the service.
function auto_include {
FS_NAME=$fs_name
LABEL=$label
if [ "$FS_NAME" == "//" ] ; then
POOLS=$(zpool list -H -o name)
for pool in $POOLS ; do
if ! zpool status -x $pool | grep "state: UNAVAIL" > /dev/null ; then
SNAPALL=$(zfs get com.sun:auto-snapshot $pool)
SNAPLABEL=$(zfs get com.sun:auto-snapshot:$LABEL $pool)
SNAP=$SNAPALL$SNAPLABEL
case $SNAP in
*true | true*)
;;
*false | false*)
;;
*)
zfs set com.sun:auto-snapshot=true $pool
;;
esac
fi
done
fi
}
@ -784,12 +847,13 @@ function is_scrubbing { # POOL SCRUBLIST
if [ -n "${SMF_FMRI}" ] ; then
zfs_smf_props $SMF_FMRI
export LOG=${log}
export LOG=$logfile
fi
# $1 start | stop | an FMRI that we want to take snapshots of.
# $1 start | stop | refresh | an FMRI that we want to take snapshots of.
case "$1" in
'start')
auto_include $SMF_FMRI
schedule_snapshots $SMF_FMRI
if [ $? -eq 0 ] ; then
result=$SMF_EXIT_OK
@ -808,7 +872,6 @@ case "$1" in
result=$SMF_EXIT_ERR_FATAL
fi
;;
# the default case, we actually call from the cron job itself that's
# executing this script, and do the job of taking snapshots.
*)

View File

@ -1,4 +1,3 @@
CLASSES=none
LC_MESSAGES=en_US.UTF-8
LANG=en_US.UTF-8
TZ=Eire
@ -9,13 +8,14 @@ PKG=TIMFauto-snapshot
NAME=ZFS Automatic Snapshot Service
ARCH=all
BASEDIR=/
VERSION=0.11
VERSION=0.11ea
MAXINST=1
CATEGORY=application
DESC=Takes automatic snapshots of ZFS filesystems on a periodic basis.
PSTAMP=haiiro20080730110036
PSTAMP=haiiro20080822102134
VENDOR=Sun Microsystems, Inc.
HOTLINE=Please contact your local service provider
CLASSES=none manifest
EMAIL=tim.foster@sun.com
SUNW_PKGVERS=1.0
SUNW_PKG_ALLZONES=false

View File

@ -27,22 +27,4 @@
# a postinstall script - it should import the manifests
# and enable the service
DEFAULT=svc:/system/filesystem/zfs/auto-snapshot:default
FMRIS="svc:/system/filesystem/zfs/auto-snapshot:frequent svc:/system/filesystem/zfs/auto-snapshot:hourly svc:/system/filesystem/zfs/auto-snapshot:daily svc:/system/filesystem/zfs/auto-snapshot:weekly svc:/system/filesystem/zfs/auto-snapshot:monthly"
FILES="auto-snapshot-daily.xml auto-snapshot-monthly.xml auto-snapshot-frequent.xml auto-snapshot-weekly.xml auto-snapshot-hourly.xml zfs-auto-snapshot.xml"
/usr/sbin/svccfg import /var/svc/manifest/system/filesystem/zfs-auto-snapshot.xml
for manifest in $FILES
do
echo Importing $manifest
/usr/sbin/svccfg import /var/svc/manifest/system/filesystem/$manifest
done
for fmri in $FMRIS
do
echo Enabling $fmri
/usr/sbin/svcadm enable $fmri
done
echo "Post install script doing nathing!"

View File

@ -27,14 +27,4 @@
# a postinstall script - it should disable the services
# and delete the instances
DEFAULT=svc:/system/filesystem/zfs/auto-snapshot:default
FMRIS="svc:/system/filesystem/zfs/auto-snapshot:frequent svc:/system/filesystem/zfs/auto-snapshot:hourly svc:/system/filesystem/zfs/auto-snapshot:daily svc:/system/filesystem/zfs/auto-snapshot:weekly svc:/system/filesystem/zfs/auto-snapshot:monthly"
for fmri in $FMRIS $DEFAULT ; do
STATE=$(/usr/bin/svcs -H -o state $fmri)
if [ "$STATE" = "online" ] ; then
/usr/sbin/svcadm disable -s $fmri
fi
/usr/sbin/svccfg delete $fmri
done
echo preremove script doing nothing

View File

@ -2,24 +2,26 @@ i pkginfo
i copyright
i postinstall
i preremove
i i.manifest
i r.manifest
d none lib 0755 root bin
d none lib/svc 0755 root bin
d none lib/svc/method 0755 root bin
f none lib/svc/method/zfs-auto-snapshot 0755 root bin
d none var 0755 root sys
d none var/svc 0755 root sys
d none var/svc/manifest 0755 root sys
d none var/svc/manifest/system 0755 root sys
d none var/svc/manifest/system/filesystem 0755 root sys
f none var/svc/manifest/system/filesystem/zfs-auto-snapshot.xml 0644 root sys
f none var/svc/manifest/system/filesystem/auto-snapshot-monthly.xml 0644 root sys
f none var/svc/manifest/system/filesystem/auto-snapshot-frequent.xml 0644 root sys
f none var/svc/manifest/system/filesystem/auto-snapshot-daily.xml 0644 root sys
f none var/svc/manifest/system/filesystem/auto-snapshot-hourly.xml 0644 root sys
f none var/svc/manifest/system/filesystem/auto-snapshot-weekly.xml 0644 root sys
f manifest var/svc/manifest/system/filesystem/zfs-auto-snapshot.xml 0644 root sys
f manifest var/svc/manifest/system/filesystem/auto-snapshot-monthly.xml 0644 root sys
f manifest var/svc/manifest/system/filesystem/auto-snapshot-frequent.xml 0644 root sys
f manifest var/svc/manifest/system/filesystem/auto-snapshot-daily.xml 0644 root sys
f manifest var/svc/manifest/system/filesystem/auto-snapshot-hourly.xml 0644 root sys
f manifest var/svc/manifest/system/filesystem/auto-snapshot-weekly.xml 0644 root sys
d none usr 0755 root sys
d none usr/share 0755 root sys
d none usr/share/applications 0755 root other
f none usr/share/applications/automatic-snapshot.desktop 0644 root bin
d none usr/bin 0755 root bin
f none usr/bin/zfs-auto-snapshot-admin.sh 0755 root sys
d none lib 0755 root bin
d none lib/svc 0755 root bin
d none lib/svc/method 0755 root bin
f none lib/svc/method/zfs-auto-snapshot 0755 root bin

View File

@ -62,6 +62,9 @@ snapshots into the past.
<propval name="verbose" type="boolean" value="false"
override="true"/>
<propval name="avoidscrub" type="boolean" value="false"
override="false"/>
</property_group>
</instance>

View File

@ -62,6 +62,9 @@ com.sun:auto-snapshot:frequent=true every
<propval name="verbose" type="boolean" value="false"
override="true"/>
<propval name="avoidscrub" type="boolean" value="false"
override="false"/>
</property_group>
</instance>

View File

@ -63,7 +63,7 @@ and keeps 24 of these snapshots into the past.
override="true"/>
<propval name="avoidscrub" type="boolean" value="true"
override="true"/>
override="false"/>
</property_group>

View File

@ -63,7 +63,7 @@ and keeps 12 of these snapshots into the past.
override="true"/>
<propval name="avoidscrub" type="boolean" value="true"
override="true"/>
override="false"/>
</property_group>

View File

@ -63,7 +63,7 @@ and keeps 4 of these snapshots into the past.
override="true"/>
<propval name="avoidscrub" type="boolean" value="true"
override="true"/>
override="false"/>
</property_group>

View File

@ -59,12 +59,29 @@
<!-- the properties we expect that any instance will define
they being :
fs-name : The name of the filesystem we want to snapshot.
The special filesystem name "//" indicates we should
look at the com.sun:auto-snapshot:<label> ZFS user
property on datasets, set to "true" if the dataset
look at the com.sun:auto-snapshot ZFS user
properties on datasets, set to "true" if the dataset
should have snapshots taken by this instance.
interval : minutes | hours | days | months
The special filesystem name "##" indicates we should
snapshot all filesystems on the machine, except those
marked with the com.sun:auto-snapshot ZFS user property
set to false. The snapshot-children setting is ignored when
using this fs-name value.
For both of the above, setting com.sun:auto-snapshot:<label>
will override the general setting for com.sun:auto-snapshot.
interval : minutes | hours | days | months | none
For the interval "none" a cron job is not created for that
instance - instead the user can manually file the method
script to take snapshots defined by the rest of the properties
in the instance. The period and offset values are ignored in
this case.
period : How many (m,h,d,m) do we wait between snapshots
@ -73,14 +90,14 @@ they being :
keep : How many snapshots we should keep, otherwise, we
delete the oldest when we hit this threshold
snapshot-children : Whether we should recursively snapshot
snapshot-children : Whether we should recursively snapshot
all filesystems contained within.
backup : If we want to perform a "zfs send" for our backup
we set this - either to "full" or "incremental".
If set to "none", we don't perform backups.
backup-save-cmd : A command string to save the backup - if unset,
backup-save-cmd : A command string to save the backup - if unset,
we return an error and move the service to
maintenance.
@ -101,7 +118,7 @@ they being :
in the service printing more detail in the log
about what it's doing.
avoidscrub : Set to true by default, this determines whether
avoidscrub : Set to false by default, this determines whether
we should avoid taking snapshots on any pools that have
a scrub or resilver in progress.
More info in the bugid:
@ -123,7 +140,7 @@ they being :
<propval name="label" type="astring" value="" override="true"/>
<propval name="verbose" type="boolean" value="false" override="true"/>
<propval name="avoidscrub" type="boolean" value="true" override="true"/>
<propval name="avoidscrub" type="boolean" value="false" override="true"/>
</property_group>