zsync/bashclub-zsync/usr/bin/bashclub-zsync
2024-08-14 11:36:58 +00:00

343 lines
15 KiB
Bash

#!/bin/bash
#
# bashclub zfs replication script
# Author: (C) 2024 Thorsten Spille <thorsten@spille-edv.de>
__version__=v1.0.13
PATH="/bin:/sbin:/usr/bin:/usr/sbin:/usr/local/bin:/usr/local/sbin"
prog="$(basename $0)"
zfs=$(which zfs)
ssh=$(which ssh)
scp=$(which scp)
grep=$(which grep)
uniq=$(which uniq)
cut=$(which cut)
tail=$(which tail)
wc=$(which wc)
tr=$(which tr)
sed=$(which sed)
zfs_auto_snapshot=$(which zfs-auto-snapshot)
checkzfs=$(which checkzfs)
mv=$(which mv)
rc=0
debug=
#### default config file, can be changed with parameter -c
conf=/etc/bashclub/zsync.conf
#### default values in config file
# replication target path on local machine
target=pool/dataset
# ssh address of remote machine
source=user@host
# ssh port of remote machine
sshport=22
# zfs user parameter to identify filesystems/volumes to replicate
tag=bashclub:zsync
# pipe separated list of snapshot name filters
snapshot_filter="hourly|daily|weekly|monthly"
# minimum count of snapshots per filter to keep
min_keep=3
# number of zfs snapshots to keep on source (0 or 1 = snapshot function disabled)
zfs_auto_snapshot_keep=0
# make snapshot via zfs-auto-snapshot before replication
zfs_auto_snapshot_label="backup"
# disable checkzfs with value > 0
checkzfs_disabled=0
# do not connect to source host (if replication is local)
checkzfs_local=0
# set checkzfs parameter "--prefix"
checkzfs_prefix=zsync
# set checkzfs maximum age of last snapshot in minutes (comma separated => warn,crit)
checkzfs_max_age=1500,6000
# set checkzfs maximum count of snapshots per dataset (comma separated => warn,crit)
checkzfs_max_snapshot_count=150,165
# set where to move the checkzfs output (0 = local machine, 1 = source machine)
checkzfs_spool=0
# set maxmimum age of checkzfs data in seconds (hourly = 3900, daily = 87000)
checkzfs_spool_maxage=87000
usage() {
cat >&2 << EOF
$prog Version $__version__
-------------------------------------------------------------------------------
usage: $prog [-h] [-d] [-c CONFIG]
creates a mirrored replication of configured zfs filesystems / volumes
-c CONFIG Configuration file for this script
-d Debug mode
-------------------------------------------------------------------------------
(C) 2024 by Spille IT Solutions for bashclub (github.com/bashclub)
Author: Thorsten Spille <thorsten@spille-edv.de>
-------------------------------------------------------------------------------
EOF
exit $1
}
while getopts "hdc:" opt; do
case $opt in
h) usage 0 ;;
c) conf=$OPTARG ;;
d) debug=-v ;;
*) usage 1 ;;
esac
done
shift $((OPTIND-1))
function log() {
echo -e "$(date +'%b %d %T') $1"
}
if which checkzfs; then
if [ $checkzfs_disabled -eq 0 ]; then
checkzfs=$(which checkzfs)
log "[INFO] installed checkzfs found: $checkzfs"
else
log "[INFO] checkzfs is disabled by configuration."
fi
else
checkzfs_disabled=1
log "[WARN] checkzfs is disabled. for monitoring please install:\n\thttps://github.com/bashclub/check-zfs-replication\n"
fi
# load config file
if [ -f $conf ]; then
log "[INFO] Reading configuration $conf"
source $conf
else
mkdir -p $(dirname $conf)
cat << EOF > $conf
target=$target
source=$source
sshport=$sshport
tag=$tag
snapshot_filter="$snapshot_filter"
min_keep=$min_keep
zfs_auto_snapshot_keep=$zfs_auto_snapshot_keep
zfs_auto_snapshot_label=$zfs_auto_snapshot_label
checkzfs_disabled=$checkzfs_disabled
checkzfs_local=$checkzfs_local
checkzfs_prefix=$checkzfs_prefix
checkzfs_max_age=$checkzfs_max_age
checkzfs_max_snapshot_count=$checkzfs_max_snapshot_count
checkzfs_spool=$checkzfs_spool
checkzfs_spool_maxage=$checkzfs_spool_maxage
EOF
log "[INFO] Initial config file created. Please adjust and restart script. Exiting..."
usage 0
fi
if [[ $source == "" ]]; then
log "[INFO] source is empty, switching to local mode."
ssh=
zsync_sshport=
log "[INFO] Configuration:\n\ttarget=$target\n\ttag=$tag\n\tsnapshot_filter=$snapshot_filter\n\tmin_keep=$min_keep\n\ŧzfs_auto_snapshot_keep=$zfs_auto_snapshot_keep\n\tzfs_auto_snapshot_label=$zfs_auto_snapshot_label\n\tcheckzfs_disabled=$checkzfs_disabled\n\tcheckzfs_local=$checkzfs_local\n\tcheckzfs_prefix=$checkzfs_prefix\n\tcheckzfs_max_age=$checkzfs_max_age\n\tcheckzfs_max_snapshot_count=$checkzfs_max_snapshot_count\n\tcheckzfs_spool=$checkzfs_spool\n\tcheckzfs_spool_maxage=$checkzfs_spool_maxage\n"
else
zsync_sshport=-p$sshport
log "[INFO] Configuration:\n\ttarget=$target\n\tsource=$source\n\tsshport=$sshport\n\ttag=$tag\n\tsnapshot_filter=$snapshot_filter\n\tmin_keep=$min_keep\n\tzfs_auto_snapshot_keep=$zfs_auto_snapshot_keep\n\tzfs_auto_snapshot_label=$zfs_auto_snapshot_label\n\tcheckzfs_disabled=$checkzfs_disabled\n\tcheckzfs_local=$checkzfs_local\n\tcheckzfs_prefix=$checkzfs_prefix\n\tcheckzfs_max_age=$checkzfs_max_age\n\tcheckzfs_max_snapshot_count=$checkzfs_max_snapshot_count\n\tcheckzfs_spool=$checkzfs_spool\n\tcheckzfs_spool_maxage=$checkzfs_spool_maxage\n"
fi
controlmaster=-oControlMaster=auto
controlpath=-oControlPath=~/.ssh/bashclub-zsync-%r@%h-%p
controlpersist=-oControlPersist=15
local_os_id=$($grep -E "^ID=" /etc/os-release | $cut -d'=' -f2)
remote_os_id=$($ssh $controlmaster $controlpath $controlpersist $source $zsync_sshport "grep -E \"^ID=\" /etc/os-release | cut -d'=' -f2")
if [[ $local_os_id == "freebsd" ]]; then
local_aes=$($grep -o AES /var/run/dmesg.boot | $uniq)
else
local_aes=$($grep -m1 -o aes /proc/cpuinfo | $uniq)
fi
if [[ $remote_os_id == "freebsd" ]]; then
remote_aes=$($ssh $controlmaster $controlpath $controlpersist $source $zsync_sshport "grep -o AES /var/run/dmesg.boot | uniq")
else
remote_aes=$($ssh $controlmaster $controlpath $controlpersist $source $zsync_sshport "grep -m1 -o aes /proc/cpuinfo | uniq")
fi
if [[ $local_aes == "aes" ]] && [[ $remote_aes == "aes" ]]; then
if [[ $debug == "-v" ]]; then log "[DEBUG] Switching to cipher aes256-gcm@openssh.com"; fi
sshcipher=-caes256-gcm@openssh.com
else
if [[ $debug == "-v" ]]; then log "[DEBUG] Using default cipher chacha20-poly1305@openssh.com"; fi
sshcipher=-cchacha20-poly1305@openssh.com
fi
include_list="[INFO] Included datasets:\n"
exclude_list="[INFO] Excluded datasets:\n"
checkzfs_headline="[INFO] checkzfs filter:"
checkzfs_filter=
IFS=$'\n'
for zvol in $($ssh $controlmaster $controlpath $controlpersist $sshcipher $zsync_sshport $source "zfs get -H -o name,value,source -t filesystem,volume $tag"); do
name=$(echo $zvol | $cut -f1)
if [[ "$(echo $zvol | $cut -f2)" == "subvols" ]]; then
if [[ $(echo $zvol | $grep -vE "local|received") ]]; then
include_list="${include_list}\t${name}\n"
syncvols=("${syncvols[@]}" "$name")
elif [[ $(echo $zvol | $grep "local") ]]; then
checkzfs_filter="#$name/|$checkzfs_filter"
fi
elif [[ "$(echo $zvol | $cut -f2)" == "all" ]]; then
if [[ $(echo $zvol | $grep -v received) ]]; then
include_list="${include_list}\t${name}\n"
syncvols=("${syncvols[@]}" "$name")
fi
if [[ $(echo $zvol | $grep "local") ]]; then
checkzfs_filter="#$name|$checkzfs_filter"
fi
else
exclude_list="${exclude_list}\t${name}\n"
# if [[ $(echo $zvol | $grep "local") ]]; then
# checkzfs_filter="!#$name|$checkzfs_filter"
# fi
fi
done
checkzfs_filter=${checkzfs_filter::-1}
log "$include_list"
log "$exclude_list"
log "$checkzfs_headline\ncheckzfs_filter=$checkzfs_filter\n"
if ! $zfs list $target > /dev/null 2>&1 ; then
if [[ $debug == "-v" ]]; then log "[DEBUG] $target does not exist. Creating..."; fi
$zfs create -o canmount=noauto -o com.sun:auto-snapshot=false $target
else
if [[ $debug == "-v" ]]; then log "[DEBUG] $target exists, check auto-snapshot...";fi
if [[ $($zfs get -H -o value,source com.sun:auto-snapshot $target) != "false local" ]]; then
$zfs set com.sun:auto-snapshot=false $target
fi
fi
if [ $zfs_auto_snapshot_keep -gt 1 ]; then
if [[ $debug == "-v" ]]; then log "[DEBUG] Running zfs-auto-snapshot"; fi
$ssh $controlmaster $controlpath $controlpersist $sshcipher $zsync_sshport $source "which zfs-auto-snapshot > /dev/null || exit 0 ; zfs-auto-snapshot --quiet --syslog --label=$zfs_auto_snapshot_label --keep=$zfs_auto_snapshot_keep //"
if [[ $snapshot_filter == "" ]]; then
snapshot_filter="$zfs_auto_snapshot_label"
else
snapshot_filter="$snapshot_filter|$zfs_auto_snapshot_label"
fi
fi
for name in "${syncvols[@]}"; do
log "[INFO] Replicate $name"
fstype=$($ssh $controlmaster $controlpath $controlpersist $sshcipher $zsync_sshport $source zfs get -H -o value type $name)
if [[ $fstype == "filesystem" ]]; then
mp=-xmountpoint
cm=-ocanmount=noauto
else
mp=
cm=
fi
if [[ $($ssh $controlmaster $controlpath $controlpersist $sshcipher $zsync_sshport $source "zfs list -H -t snapshot -o name -S creation $name 2>/dev/null | grep -E \"@.*($snapshot_filter)\" | wc -l | tr -d ' '") -gt 0 ]]; then
IFS=$' '
if ! $zfs list -H $target/$name > /dev/null 2>&1 ; then
if [[ $debug == "-v" ]]; then log "[DEBUG] $target/$name does not exist"; fi
prefix=""
for part in $(echo $target/$(echo $name | $cut -d'/' -f1) | $sed "s/\// /g"); do
if [ $($zfs list $prefix$part > /dev/null 2>&1 ; echo $?) -gt 0 ]; then
if [[ $debug == "-v" ]]; then log "[DEBUG] $prefix$part does not exist"; fi
log "[INFO] Creating $prefix$part"
$zfs create -o canmount=noauto $autosnap -p $prefix$part
fi
prefix="$prefix$part/"
done
if [[ $debug == "-v" ]]; then log "[DEBUG] $name - Start initial replication"; fi
IFS=$'\n'
for snap in $($ssh $controlmaster $controlpath $controlpersist $sshcipher $zsync_sshport $source "zfs list -H -t snapshot -o name -S creation $name | grep -E \"@.*($snapshot_filter)\" | tail -1"); do
log "[INFO] Start initial replication: $snap => $target/$(echo $name | $cut -d'/' -f1)"
$ssh $controlmaster $controlpath $controlpersist $sshcipher $zsync_sshport $source "zfs send -w -p $debug $snap" | $zfs receive $mp $cm -x $tag -x com.sun:auto-snapshot $debug -dF $target/$(echo $name | $cut -d'/' -f1)
if [ $? -gt $rc ]; then rc=1; log "[ERROR] initial replication to $target/$name failed."; fi
done
fi
if [[ $debug == "-v" ]]; then log "[DEBUG] $name - Start incremental replication"; fi
guid=$($zfs list -H -o guid -s creation -t snapshot $target/$name | $tail -1)
if [[ $guid != "" ]]; then
last=$($ssh $controlmaster $controlpath $controlpersist $sshcipher $zsync_sshport $source "zfs list -H -o name,guid -t snapshot $name | grep $guid | tail -1 | cut -f1")
if [[ $last != "" ]]; then
IFS=$'\n'
if [[ $fstype == "filesystem" ]] && [[ $($zfs get -H -o value canmount $target/$name) != "noauto" ]]; then
$zfs set canmount=noauto $target/$name
fi
for snap in $($ssh $controlmaster $controlpath $controlpersist $sshcipher $zsync_sshport $source "zfs list -H -o name,guid -s creation -t snapshot $name | grep -E \"@.*($snapshot_filter)\" | grep --after-context=200 $guid | grep -v $guid | cut -f1"); do
log "[INFO] Replicating delta of $last => $snap to $target/$name"
$ssh $controlmaster $controlpath $controlpersist $sshcipher $zsync_sshport $source "zfs send -w $debug -i $last $snap" | zfs receive -x $tag -x com.sun:auto-snapshot -F $debug $target/$name
if [ $? -gt $rc ]; then rc=1; log "[ERROR] incremental replication to $target/$name failed."; fi
last=$snap
done
else
log "[ERROR] No matching snapshot (guid: $guid) found on source filesystem. This can be outdated snapshots on target or a previously deleted and new created dataset $name on the source filesystem."
if [ $rc -eq 0 ]; then rc=1; fi
fi
else
log "[ERROR] No snapshot found on $target/$name to add incremental snapshots to. The target dataset (with all children) needs to be deleted and recreated via replication."
if [ $rc -eq 0 ]; then rc=1; fi
fi
if [[ $debug == "-v" ]]; then log "[DEBUG] $name - Start deletion of old snapshots"; fi
filter=$(echo -e $snapshot_filter | sed "s/|/\n/g")
IFS=$'\n'
for interval in $filter ; do
if [[ $debug == "-v" ]]; then log "[DEBUG] $name - Checking interval $interval"; fi
guid=$($ssh $controlmaster $controlpath $controlpersist $sshcipher $zsync_sshport $source "zfs list -H -o guid,name -S creation -t snapshot $name | grep -E \"@.*$interval\" | cut -f1 | tail -1")
if [[ "$(echo -e "$guid" | sed 's/\n//g')" != "" ]]; then
snaps_to_delete=$($zfs list -H -o name,guid -S creation -t snapshot $target/$name | $grep -E "@.*$interval" | $grep --after-context=200 $guid | $grep -v $guid | $cut -f1)
snap_count=$($zfs list -H -o name,guid -S creation -t snapshot $target/$name | $grep -E "@.*$interval" | $wc -l | $tr -d ' ')
for snap in $snaps_to_delete; do
if [[ $snap_count -gt $min_keep ]]; then
log "[INFO] Deleting $snap"
if [[ $debug == "-v" ]]; then log "[DEBUG] $name - snap_count=$snap_count, min_keep=$min_keep"; fi
$zfs destroy $debug $snap
snap_count=$(expr $snap_count - 1)
else
if [[ $debug == "-v" ]]; then log "[DEBUG] $name - Skipping deletion of $snap. snap_count=$snap_count, min_keep=$min_keep"; fi
fi
done
fi
done
else
if [[ $debug == "-v" ]]; then log "[DEBUG] $name - No snapshots found with filter $snapshot_filter"; fi
fi
log "[INFO] Replication of $name finished."
done
if [ $checkzfs_disabled -eq 0 ]; then
log "[INFO] Running checkzfs..."
echo "<<<local>>>" > /tmp/${checkzfs_spool_maxage}_${checkzfs_prefix}
if [[ $checkzfs_local -eq 0 ]]; then
c_key=--source
c_value=$source:$sshport
fi
if [[ $debug == "-v" ]]; then log "[DEBUG] Command:\n$checkzfs $c_key $c_value --output checkmk --threshold $checkzfs_max_age --maxsnapshots $checkzfs_max_snapshot_count --prefix $checkzfs_prefix --filter \"$checkzfs_filter\" >> /tmp/${checkzfs_spool_maxage}_${checkzfs_prefix}"; fi
$checkzfs $c_key $c_value --output checkmk --threshold $checkzfs_max_age --maxsnapshots $checkzfs_max_snapshot_count --prefix $checkzfs_prefix --filter "$checkzfs_filter" >> /tmp/${checkzfs_spool_maxage}_${checkzfs_prefix}
if [ $checkzfs_spool -eq 0 ] || [ $checkzfs_local -gt 0 ]; then
mkdir -p /var/lib/check_mk_agent/spool/
$mv /tmp/${checkzfs_spool_maxage}_${checkzfs_prefix} /var/lib/check_mk_agent/spool/
else
$ssh $controlmaster $controlpath $controlpersist $sshcipher $zsync_sshport $source "mkdir -p /var/lib/check_mk_agent/spool/"
$scp -P ${sshport} /tmp/${checkzfs_spool_maxage}_${checkzfs_prefix} ${source}:/var/lib/check_mk_agent/spool/
fi
fi
exit $rc