bundles/backup-client: rework backup generation
All checks were successful
kunsi/bundlewrap/pipeline/head This commit looks good
All checks were successful
kunsi/bundlewrap/pipeline/head This commit looks good
This commit is contained in:
parent
4e5cb69d1c
commit
14e4415e5f
6 changed files with 159 additions and 107 deletions
|
@ -1,77 +1,121 @@
|
|||
#!/bin/bash
|
||||
|
||||
statusfile=/var/tmp/backup.monitoring
|
||||
statusfile="/var/tmp/backup.monitoring"
|
||||
logdir="/var/log/backup-client"
|
||||
lock="/tmp/backup-client-is-running"
|
||||
ssh_login="${username}@${server}"
|
||||
ssh_cmnd="ssh -o IdentityFile=/etc/backup.priv -o StrictHostKeyChecking=accept-new -p ${port}"
|
||||
ssh_opts="-o IdentityFile=/etc/backup.priv -o StrictHostKeyChecking=accept-new -p ${port}"
|
||||
nodename="${node.name}"
|
||||
|
||||
<%text>
|
||||
try="${1:-<unknown>}"
|
||||
[[ -n "$DEBUG" ]] && set -x
|
||||
NL=$'\n'
|
||||
|
||||
if ! [[ -f /etc/backup.priv ]]
|
||||
then
|
||||
echo "/etc/backup.priv does not exist" | logger -t backup-client -p user.error
|
||||
echo "abort_no_key" > "$statusfile"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
run-parts --exit-on-error -- /etc/backup-pre-hooks.d
|
||||
exitcode=$?
|
||||
if [[ $exitcode != 0 ]]
|
||||
then
|
||||
echo "run-parts /etc/backup-pre-hooks.d exited $exitcode" | logger -t backup-client -p user.error
|
||||
echo "hook $exitcode" > "$statusfile"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
do_backup() {
|
||||
rsync_errorcodes_for_this_path=""
|
||||
backup_has_successfully_run="no"
|
||||
echo "==> starting backup for '$1'"
|
||||
|
||||
for try in {1..5}
|
||||
do
|
||||
echo "Backup for '$1', try $try ..." | logger -t backup-client -p user.info
|
||||
# Compress level 1 is a good compromise between speed and cpu usage.
|
||||
rsync --compress-level=1 -aAP --numeric-ids --delete --relative \
|
||||
--rsync-path="/usr/bin/rsync --fake-super" \
|
||||
-e "ssh $ssh_opts" \
|
||||
"$1" "$ssh_login":backups/
|
||||
|
||||
# Compress level 1 is a good compromise between speed and cpu usage.
|
||||
rsync --compress-level=1 -aAP --numeric-ids --delete --relative \
|
||||
--rsync-path="/usr/bin/rsync --fake-super" \
|
||||
-e "$ssh_cmnd" \
|
||||
"$1" "$ssh_login":backups/
|
||||
|
||||
# Exit code 24 means some files have vanished during rsync.
|
||||
# I don't know why, but this is very common, apparently?
|
||||
exitcode=$?
|
||||
echo "Backup for '$1' try $try exited $exitcode" | logger -t backup-client -p user.info
|
||||
if [[ $exitcode != 0 ]] && [[ $exitcode != 24 ]]
|
||||
then
|
||||
rsync_errorcodes_for_this_path+=" $exitcode"
|
||||
sleep 30
|
||||
else
|
||||
backup_has_successfully_run="yes"
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
if [[ "$backup_has_successfully_run" != "yes" ]]
|
||||
# Exit code 24 means some files have vanished during rsync.
|
||||
# I don't know why, but this is very common, apparently?
|
||||
exitcode=$?
|
||||
echo "==> backup for '$1' exited $exitcode"
|
||||
if [[ $exitcode != 0 ]] && [[ $exitcode != 24 ]]
|
||||
then
|
||||
echo "Backup for '$1' did not succeed!" | logger -t backup-client -p user.error
|
||||
rsync_errors+="${NL}${1}${rsync_errorcodes_for_this_path}"
|
||||
rsync_errors+=" $1 ($exitcode)"
|
||||
fi
|
||||
}
|
||||
|
||||
rsync_errors=""
|
||||
on_exit() {
|
||||
rmdir "$lock"
|
||||
echo "*** END BACKUP RUN $(date '+%F %T %z') ***"
|
||||
}
|
||||
|
||||
$ssh_cmnd $ssh_login "sudo /usr/local/bin/rotate-single-backup-client $nodename"
|
||||
prepare_and_cleanup_logdir() {
|
||||
# rsync logs tend to get very large. That's why we pipe them through
|
||||
# gzip when writing. Because we're running multiple tries, we cannot
|
||||
# rely on logrotate to rotate the logs, we have to do it ourselves.
|
||||
# Of course that means we have to clean up after ourselves, too.
|
||||
mkdir -p "$logdir"
|
||||
find "$logdir" -type f -mtime +14 -name "*.log" -delete
|
||||
find "$logdir" -type f -mtime +14 -name "*.gz" -delete
|
||||
}
|
||||
|
||||
save_result_for_monitoring() {
|
||||
code=$1
|
||||
msg=$2
|
||||
printf "status=%q\n" "$code" > "$statusfile"
|
||||
printf "msg=%q\n" "$msg" >> "$statusfile"
|
||||
printf "timestamp=%q\n" "$(date +%s)" >> "$statusfile"
|
||||
}
|
||||
|
||||
if ! mkdir "$lock" >/dev/null 2>&1
|
||||
then
|
||||
save_result_for_monitoring 2 "could not get lock"
|
||||
exit 1
|
||||
fi
|
||||
trap "on_exit" EXIT
|
||||
|
||||
# redirect stdout and stderr to logfile
|
||||
prepare_and_cleanup_logdir
|
||||
logfile="$logdir/backup--$(date '+%F--%H-%M-%S')--$$.log.gz"
|
||||
echo "All log output will go to $logfile" | logger -it backup-client
|
||||
exec > >(gzip >"$logfile")
|
||||
exec 2>&1
|
||||
|
||||
# this is where the real work starts
|
||||
ts_begin=$(date +%s)
|
||||
|
||||
echo "*** BEGIN BACKUP RUN $(date '+%F %T %z') ***"
|
||||
echo "This is attempt $try"
|
||||
echo "using ssh options [$ssh_opts]"
|
||||
echo "using ssh login [$ssh_login]"
|
||||
|
||||
if ! [[ -f /etc/backup.priv ]]
|
||||
then
|
||||
save_result_for_monitoring 2 "/etc/backup.priv does not exist"
|
||||
exit 100
|
||||
fi
|
||||
|
||||
for i in /etc/backup-pre-hooks.d/*
|
||||
do
|
||||
[[ -x "$i" ]] || continue
|
||||
|
||||
echo "Running pre-hook '$i'"
|
||||
if ! $i
|
||||
then
|
||||
save_result_for_monitoring 2 "pre-hook '$i' failed to run"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
rsync_errors=""
|
||||
</%text>
|
||||
|
||||
% for path in sorted(paths):
|
||||
do_backup "${path}"
|
||||
% endfor
|
||||
|
||||
<%text>
|
||||
if [[ -n "$rsync_errors" ]]
|
||||
then
|
||||
echo "rsync_error$rsync_errors" > "$statusfile"
|
||||
else
|
||||
echo "ok" > "$statusfile"
|
||||
save_result_for_monitoring 2 "rsync failed: $rsync_errors"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
ssh $ssh_opts $ssh_login "sudo /usr/local/bin/rotate-single-backup-client $nodename" </dev/null
|
||||
ssh_error=$?
|
||||
if [[ $ssh_error -ne 0 ]]
|
||||
then
|
||||
save_result_for_monitoring 2 "rotating backups failed with status code $ssh_error"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
ts_end=$(date +%s)
|
||||
echo "Success"
|
||||
save_result_for_monitoring 0 "Backup finished at $(date '+%F %T %z') (took $((ts_end - ts_begin)) seconds)"
|
||||
</%text>
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue