diff --git a/README.zfs-auto-snapshot.txt b/README.zfs-auto-snapshot.txt index 4c14aaa..27449ce 100644 --- a/README.zfs-auto-snapshot.txt +++ b/README.zfs-auto-snapshot.txt @@ -1,14 +1,30 @@ -ZFS Automatic Snapshot SMF Service, version 0.5 -Introduction ------------ +NAME + +ZFS Automatic Snapshot SMF Service, version 0.6 + + + +DESCRIPTION This is a *prototype* of a simple SMF service which you can configure to -take automatic, scheduled snapshots of any given ZFS filesystem. +take automatic, scheduled snapshots of any given ZFS filesystem as well +as perform simple incremental or full backups of that filesystem. + +To use the service, the user must install the method script, import the default +instance, and then create instances for each ZFS filesystem that should be +managed by the service. + +Documentation for the service instance is contained in the manifest file, +zfs-auto-snapshot.xml. + +We also bundle a simple GUI application, which will query the user for the +properties required, and will proceed to build an instance manifest. This +GUI is documented as part of the installation instructions below. -Usage Instructions ------------------- + +INSTALLATION To install, as root, run the following commands: @@ -19,24 +35,42 @@ Once you have installed these, you need to create an instance of the service for each set of ZFS snapshots you want to take. The properties we need are: zfs/fs-name The name of the filesystem + zfs/interval [ hours | days | months ] + zfs/keep How many snapshots to retain. "all" keeps all snapshots. + zfs/period How often you want to take snapshots (eg. every 10 days) + zfs/snapshot-children "true" if you would like to recursively take snapshots of all child filesystems of the specified fs-name. + zfs/backup [ full | incremental | none ] -An example instance manifest is included in this archive, and the default -instance (which should be disabled) is also documented. + zfs/backup-save-cmd The command string used to save the backup stream. + + zfs/backup-lock You shouldn't need to change this - but it should be + set to "unlocked" by default. We use it to indicate when + a backup is running. + + zfs/label A label that can be used to differentiate this set of + backups from others, not required. + + +An example instance manifest is included in this archive. The script "zfs-auto-snapshot-admin.sh" is a simple shell wrapper which uses zenity, a scriptable GUI framework in GNOME, to write a service manifest based on user input. + # ./zfs-auto-snapshot-admin.sh Usage: zfs-auto-snapshot-admin.sh [zfs filesystem name] + +EXAMPLES + The following shows me running it for the ZFS filesystem "tank/root_filesystem". @@ -52,11 +86,17 @@ then issue the command : You can see what work will be done by checking your crontab. + +SEE ALSO + + +More background about this service, along with implementation comments can be +found in web log posts at: + +http://blogs.sun.com/timf/entry/zfs_automatic_snapshots_prototype_1 +http://blogs.sun.com/timf/entry/zfs_automatic_snapshots_smf_service +http://blogs.sun.com/timf/entry/and_also_for_s10u2_zfs +http://blogs.sun.com/timf/entry/smf_philosophy_more_on_zfs +http://blogs.sun.com/timf/entry/zfs_automatic_snapshots_now_with + The ZFS Automatic Snapshot SMF Service is released under the terms of the CDDL. - -More background detail about this service can be found in blog posts at: - -http://blogs.sun.com/roller/page/timf?entry=zfs_automatic_snapshots_prototype_1 -http://blogs.sun.com/roller/page/timf?entry=zfs_automatic_snapshots_smf_service -http://blogs.sun.com/roller/page/timf?entry=and_also_for_s10u2_zfs - diff --git a/auto-snapshot-instance.xml b/auto-snapshot-instance.xml new file mode 100644 index 0000000..afb04bc --- /dev/null +++ b/auto-snapshot-instance.xml @@ -0,0 +1,60 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/lib/svc/method/zfs-auto-snapshot b/lib/svc/method/zfs-auto-snapshot index 3120fde..8f75192 100755 --- a/lib/svc/method/zfs-auto-snapshot +++ b/lib/svc/method/zfs-auto-snapshot @@ -32,9 +32,10 @@ # all child datasets. More documentation available at # http://blogs.sun.com/timf # -# The service will move itself into maintenance if it's unable to take a snapshot, -# destroy a snapshot as per the snapshot retention policy, or is unable to -# create or update the cron job. +# The service will move itself into maintenance if it's unable to take a +# snapshot, destroy a snapshot as per the snapshot retention policy, unable to +# zfs send a dataset (if configured) or is unable to create or update the cron +# job. # @@ -61,15 +62,28 @@ result=$SMF_EXIT_OK # $1 is assumed to be a valid FMRI function schedule_snapshots { - FMRI=$1 + typeset FMRI=$1 # FIXME need work in here to actually validate the FMRI props - FILESYS=$(svcprop -p zfs/fs-name $FMRI) - INTERVAL=$(svcprop -p zfs/interval $FMRI) - PERIOD=$(svcprop -p zfs/period $FMRI) - OFFSET=$(svcprop -p zfs/offset $FMRI) - + typeset FILESYS=$(svcprop -p zfs/fs-name $FMRI) + typeset INTERVAL=$(svcprop -p zfs/interval $FMRI) + typeset PERIOD=$(svcprop -p zfs/period $FMRI) + typeset OFFSET=$(svcprop -p zfs/offset $FMRI) + typeset STATE=0 + + typeset BACKUP=$(svcprop -p zfs/backup $FMRI) + typeset BACKUP_SAVE_CMD=$(svcprop -p zfs/backup $FMRI) + + case $BACKUP in + 'full' | 'incremental' ) + if [ -z "${BACKUP_SAVE_CMD}" ] + then + check_failure 1 "Backup requested, but no backup command specified." + fi + ;; + esac + # for now, we're forcing the offset to be 0 seconds. - OFFSET=0 + typeset OFFSET=0 # validate the filesystem zfs list $FILESYS 2>&1 1> /dev/null @@ -105,7 +119,12 @@ function schedule_snapshots { # just live with this. # function add_cron_job { # $INTERVAL $PERIOD $OFFSET $FMRI - + + typeset INTERVAL=$1 + typeset PERIOD=$2 + typeset OFFSET=$3 + typeset FMRI=$4 + case $INTERVAL in 'minutes') TIMES=$(get_divisor 0 59 $PERIOD) @@ -145,7 +164,8 @@ function add_cron_job { # $INTERVAL $PERIOD $OFFSET $FMRI # $1 is assumed to be a valid FMRI function unschedule_snapshots { - FMRI=$1 + typeset FMRI=$1 + crontab -l | grep -v "/lib/svc/method/zfs-auto-snapshot $FMRI$" > /tmp/saved-crontab.$$ crontab /tmp/saved-crontab.$$ check_failure $? "Unable to unschedule snapshots for $FMRI" @@ -167,30 +187,74 @@ function unschedule_snapshots { # $1 is assumed to be a valid FMRI function take_snapshot { - FMRI=$1 + typeset FMRI=$1 - DATE=$(date +%F-%H:%M:%S) - SNAPNAME="zfs-auto-snap-${DATE}" - FILESYS=$(svcprop -p zfs/fs-name $FMRI) - KEEP=$(svcprop -p zfs/keep $FMRI) - SNAP_CHILDREN=$(svcprop -p zfs/snapshot-children $FMRI) + typeset DATE=$(date +%F-%H:%M:%S) + typeset FILESYS=$(svcprop -p zfs/fs-name $FMRI) + typeset KEEP=$(svcprop -p zfs/keep $FMRI) + typeset SNAP_CHILDREN=$(svcprop -p zfs/snapshot-children $FMRI) + + typeset BACKUP=$(svcprop -p zfs/backup $FMRI) + typeset STATE=0 + + # an identifier allows us to setup multiple snapshot schedules + # per filesystem - so we append a : token if the user has + # requested one, which then gets used in the SNAPNAME. SMF + # returns the value '""' for the empty string to differentiate + # between an unset property, and a set-but-empty property. + # Shocking, I know. + typeset LABEL="$(svcprop -p zfs/label $FMRI)" + if [ "$LABEL" != "\"\"" ] + then + LABEL=":${LABEL}" + else + LABEL="" + fi + + typeset SNAPNAME="zfs-auto-snap${LABEL}-${DATE}" - # Ok, now say cheese! It'd be nice if the child snapshotting was - # atomic, but we don't yet have that in zfs. + + # Ok, now say cheese! If we're taking recursive snapshots, + # walk through the children, destroying old ones if required. if [ "${SNAP_CHILDREN}" == "true" ] then - for child in $(zfs list -r -H -o name -t filesystem $FILESYS) + + OS=$(uname -r) + for child in $(zfs list -r -H -o name -t filesystem,volume $FILESYS) do - destroy_older_snapshots $child $KEEP - zfs snapshot $child@$SNAPNAME - check_failure $? "Unable to take snapshot $child@$SNAPNAME." + destroy_older_snapshots $child $KEEP $LABEL + if [ "${OS}" != "5.11" ] + then + # Solaris 10 doesn't have recursive snapshots, but we do + # them outside the loop otherwise. + zfs snapshot $child@$SNAPNAME + check_failure $? "Unable to take snapshot $child@$SNAPNAME." + fi done + # take the recursive snapshots if we're on Solaris Nevada. + if [ "${OS}" == "5.11" ] + then + zfs snapshot -r $FILESYS@$SNAPNAME + check_failure $? "Unable to take recursive snapshots $FILESYS@$SNAPNAME." + fi + else + destroy_older_snapshots $FILESYS $KEEP zfs snapshot $FILESYS@$SNAPNAME check_failure $? "Unable to take snapshot $FILESYS@$SNAPNAME." + fi + # If the user has asked for backups, go ahead and do this. + if [ "${BACKUP}" != "none" ] + then + take_backup $FILESYS $BACKUP "$LABEL" $FMRI + check_failure $? "Unable to backup filesystem $FILESYS using \ + $BACKUP backup strategy." + fi + + # finally, check our status before we return STATE=$(svcprop -p restarter/state $FMRI) if [ "${STATE}" == "maintenance" ] @@ -202,16 +266,18 @@ function take_snapshot { return $STATE } -# Given a filesystem name, and a limit of the number of snapshots we want +# Given a filesystem name, and a limit of the number of snapshots we want, +# along with the identifier for this set of snapshots, # we destroy all older snapshots of this filesystem whose names begin -# with the text "zfs-auto-snap". Note that here we destroy one more snapshot +# with the text "zfs-auto-snap". Note that here we destroy one more snapshot # than the "keep" threshold - this is because in the context of calling this # function, we're already creating one new auto-snapshot. # function destroy_older_snapshots { - FILESYS=$1 - COUNTER=$2 + typeset FILESYS=$1 + typeset COUNTER=$2 + typeset LABEL=$3 if [ "${COUNTER}" == "all" ] then @@ -222,7 +288,7 @@ function destroy_older_snapshots { # walk through the snapshots, newest first, destroying older ones for snapshot in $(zfs list -r -t snapshot -H -o name $FILESYS \ - | grep $FILESYS@zfs-auto-snap | sort -r) + | grep "$FILESYS@zfs-auto-snap${LABEL}" | sort -r) do if [ $COUNTER -le 0 ] then @@ -237,14 +303,15 @@ function destroy_older_snapshots { } # Given the exit status of a command, an integer, 0 if the command completed -# without errors, if the command exited with errors, then we degrade the +# without errors. If the command exited with errors, we degrade the # state of this service into maintenance mode. We also log an error message # as passed into this function. # function check_failure { # integer exit status, error message to display - RESULT=$1 - ERR_MSG=$2 + typeset RESULT=$1 + typeset ERR_MSG=$2 + if [ $RESULT -ne 0 ] then echo "Error: $ERR_MSG" @@ -261,11 +328,11 @@ function check_failure { # integer exit status, error message to display # function get_divisor { # start period, end period, width of period - START=$1 - END=$2 - WIDTH=$3 - RANGE=$START - JUMP=$(( $RANGE + $WIDTH )) + typeset START=$1 + typeset END=$2 + typeset WIDTH=$3 + typeset RANGE=$START + typeset JUMP=$(( $RANGE + $WIDTH )) while [ $JUMP -lt $END ] do @@ -277,6 +344,114 @@ function get_divisor { # start period, end period, width of period } +# Given a filesytem name, and a backup type (currently "complete" or +# "incremental") along with an FMRI, we backup the filesystem - either +# from the latest snapshot that was taken, or by an incremental backup. +# Properties in the FMRI tell us what to do with the backup stream +# +function take_backup { # filesystem backup-type label fmri + + typeset FILESYS=$1 + typeset BACKUP=$2 + typeset LABEL=$3 + typeset FMRI=$4 + + + # obtain lock from fmri + typeset LOCK=$(svcprop -p zfs/backup-lock $FMRI) + if [ "$LOCK" != "unlocked" ] + then + # Unable to perform this backup due to an existing backup being + # executed for this dataset. This would result in moving the + # service to maintenance mode if we're doing incrementals, but + # it's not so serious for full backups. + echo "Unable to backup $FILESYS: $LOCK." + + if [ "$BACKUP" == "incremental" ] + then + echo "A lock prevented us from performing an incremental backup." + return 1 + else + echo "Full backup not completed for $FMRI on $(date)." + return 0 + fi + else + # set our lock. (this isn't atomic, unfortunately :-( ) + svccfg -s $FMRI setprop zfs/backup-lock = astring: \ + "\"$BACKUP backup in progress by PID $$\"" + svcadm refresh $FMRI + fi + + typeset BACKUP_SAVE_CMD=$(svcprop -p zfs/backup-save-cmd $FMRI \ + | sed -e 's/\\//g') + typeset SNAP_CHILDREN=$(svcprop -p zfs/snapshot-children $FMRI) + typeset BACKUP_DATASETS="" + + # Determine how many datasets we have to backup + if [ "$SNAP_CHILDREN" == "true" ] + then + BACKUP_DATASETS=$(zfs list -r -H -o name -t filesystem,volume $FILESYS) + else + # only one dataset to worry about here. + BACKUP_DATASETS=$FILESYS + fi + + # loop through the datasets, backing up each one. + for dataset in $BACKUP_DATASETS + do + + # An initial check of the input parameters, to see how we should proceed + case $BACKUP in + "incremental") + # get the last two snapshots + LAST_SNAP=$(zfs list -H -o name -r -t snapshot $dataset \ + | grep "$dataset@zfs-auto-snap${LABEL}" | tail -1) + + PREV_SNAP=$(zfs list -H -o name -r -t snapshot $dataset \ + | grep "$dataset@zfs-auto-snap${LABEL}" \ + | tail -2 | head -1) + + if [ "$PREV_SNAP" == "$LAST_SNAP" ] + then + echo "Previous snap not found of $dataset, taking full backup" + BACKUP="full" + fi + ;; + "full") + LAST_SNAP=$(zfs list -H -o name -r -t snapshot $dataset \ + | grep "$dataset@zfs-auto-snap${LABEL}" | tail -1) + ;; + *) + check_failure 1 "Unknown backup type $BACKUP" + svccfg -s $FMRI setprop zfs/backup-lock = astring: "unlocked" + svcadm refresh $FMRI + return 1 + ;; + esac + + + # Now perform the backup. Note that on errors, we'll immediately mark + # the service as being in maintenance mode, however, backups will still + # be attempted for other datasets in our list. + case $BACKUP in + "incremental") + zfs send -i $PREV_SNAP $LAST_SNAP | $BACKUP_SAVE_CMD + check_failure $? "Error performing incremental backup of $dataset." + ;; + "full") + zfs send $LAST_SNAP | $BACKUP_SAVE_CMD + check_failure $? "Error performing full backup of $dataset." + ;; + esac + done + + # Now we can release our lock + svccfg -s $FMRI setprop zfs/backup-lock = astring: "unlocked" + svcadm refresh $FMRI + +} + + # Here's the beginning of the main script. As we're a method script for SMF, # we take start and stop arguments, and assume that the $SMF_FMRI value is being @@ -313,7 +488,7 @@ case "$1" in ;; # the default case, we actually call from the cron job itself that's -# executing this script. +# executing this script, and do the job of taking snapshots. *) SMF_FMRI=$1 # are we being called with the correct argument (an FMRI) ? diff --git a/sample-auto-snapshot-instance.xml b/sample-auto-snapshot-instance.xml index e365f3b..28a0d99 100644 --- a/sample-auto-snapshot-instance.xml +++ b/sample-auto-snapshot-instance.xml @@ -22,14 +22,14 @@ CDDL HEADER END --> - + + version='0.6'> - + + + + + + + + + diff --git a/zfs-auto-snapshot-admin.sh b/zfs-auto-snapshot-admin.sh index 61c8a56..be00017 100755 --- a/zfs-auto-snapshot-admin.sh +++ b/zfs-auto-snapshot-admin.sh @@ -33,13 +33,14 @@ # the version string, and call the appropriate "_26" versions of functions # if we need to. (zenity that ships in s10u2 is based on GNOME 2.6 and doesn't # have the same functionality as the 2.14-based zenity) +# MAIN_TITLE="Take regular ZFS snapshots" function get_interval_26 { # Get an interval for taking snapshots # zenity 2.6 doesn't support the --text option to --list - TITLE="${MAIN_TITLE}: Choose a time period for taking snapshots " + TITLE="${MAIN_TITLE}: Choose a time period for taking snapshots." INTERVAL=$(zenity --list --title="${TITLE}" \ --radiolist --column="select" \ --column="interval" x "minutes" x "hours" x "days" x "months") @@ -167,6 +168,63 @@ function get_snap_children { fi } + +function get_backup { + # decide if we want to do backup of this filesystem + TITLE="${MAIN_TITLE}: Remote backups" + TEXT="Choose a type of backup to perform for this filesystem:" + + BACKUP=$(zenity --list --title="${TITLE}" --text="${TEXT}" \ + --radiolist --column="select" \ + --column="type" x "full" x "incremental" x "none") + + if [ $? -eq 1 ] + then + exit 1; + fi + + case $BACKUP in + 'incremental' | 'full') + get_backup_command + ;; + *) + BACKUP="none" + ;; + esac + +} + +function get_backup_command { + # ask the user which backup command they want to use. + TITLE="${MAIN_TITLE}: Backup command" + TEXT="Enter a command you wish to run on the backup stream.\ + eg. eval cat > /net/hostname/backup.\$\$" + + BACKUP_COMMAND=$(zenity --entry --title="${TITLE}" --text="${TEXT}" \ + --entry-text="ssh timf@hostname \ +/usr/bin/pfexec /usr/sbin/zfs receive tank/backup") + if [ $? -eq 1 ] + then + exit 1; + fi + +} + + +function get_label { + # ask the user if they want to attach a label to this instance + TITLE="${MAIN_TITLE}: Label" + TEXT="Choose a label you may use to distinguish this snapshot schedule\ + from others (Alphanumeric chars only. Cancel to leave blank.)" + + LABEL=$(zenity --entry --title="${TITLE}" --text="${TEXT}" \ + --entry-text="") + if [ $? -eq 1 ] + then + LABEL="" + fi +} + function show_summary { # let's give the user a summary of what we've done: @@ -176,7 +234,11 @@ function show_summary { Interval = ${INTERVAL}\n\ Period = ${PERIOD}\n\ Keep snapshots = ${KEEP_SNAP}\n\ - Snapshot Children = ${SNAP_CHILDREN}\n\n\ + Snapshot Children = ${SNAP_CHILDREN}\n\ + Backup = ${BACKUP}\n\ + Backup command = ${BACKUP_COMMAND}\n\ + Label = ${LABEL}\n\ + \n\ Do you want to write this auto-snapshot manifest now ?" zenity --question --title="${TITLE}" --text="${TEXT}" @@ -213,6 +275,8 @@ then get_period_26 get_maxsnap_26 get_snap_children + get_backup + get_label show_summary else @@ -221,6 +285,8 @@ else get_period get_maxsnap get_snap_children + get_backup + get_label show_summary fi @@ -231,7 +297,10 @@ fi # svc:/system/filesystem/zfs/auto-snapshot:tank-tims--fs ESCAPED_NAME=$(echo $1 | sed -e 's#-#--#g' | sed -e 's#/#-#g' \ | sed -e 's#\.#-#g') - +if [ ! -z "${LABEL}" ] +then + ESCAPED_NAME="${ESCAPED_NAME},${LABEL}" +fi # Now we can build an SMF manifest to perform these actions... cat > auto-snapshot-instance.xml < auto-snapshot-instance.xml < + version='0.6'> - + auto-snapshot-instance.xml < + + auto-snapshot-instance.xml < + + + + + + + @@ -290,6 +372,7 @@ echo "" echo " # svccfg import auto-snapshot-instance.xml" echo "" echo "then issue the command :" -echo " # svcadm enable svc:/system/filesystem/zfs/auto-snapshot:$ESCAPED_NAME" +echo " # svcadm enable \ +svc:/system/filesystem/zfs/auto-snapshot:$ESCAPED_NAME" echo "" echo "You can see what work will be done by checking your crontab." diff --git a/zfs-auto-snapshot.xml b/zfs-auto-snapshot.xml index 6eb39d4..b1b08c8 100755 --- a/zfs-auto-snapshot.xml +++ b/zfs-auto-snapshot.xml @@ -32,7 +32,7 @@ + version='0.6'> @@ -75,6 +102,13 @@ + + + + + + +