borgwrapper/nagios-plugin/check_borgwrapper

148 lines
4.7 KiB
Python
Executable File

#!/usr/bin/env python3
#
# This plugin is designed as a nagios compatible check plugin to use with
# Icinga 2 and others
#
import argparse
import sys
import os
import time
def parse_status_file(path):
with open(path, 'r') as f:
status = dict()
timestamp, status_ = f.read().split()
status['timestamp'] = float(timestamp)
status['status'] = status_
return status
def get_backup_status_file(path, name):
return os.path.join(path, '%s.backup' % name)
def get_verify_status_file(path, name):
return os.path.join(path, '%s.verify' % name)
def get_seconds_readable(seconds):
m, s = divmod(seconds, 60)
h, m = divmod(m, 60)
return '%dh %dm %ds' % (h, m, s)
def get_status_msg(retval):
if retval == 0:
msg = 'Backup OK'
elif retval == 1:
msg = 'Backup Warning'
elif retval == 2:
msg = 'Backup Critical'
elif retval == 3:
msg = 'Unknown error'
else:
raise ValueError('Invalid retval %s' % retval)
return msg
def get_perfdata(label, value, warning, critical, minimum=0):
return '%s=%ss;%s;%s;%s' % (label, value, warning, critical, minimum)
def get_backup_status(status, timestamp, max_age_warning, max_age_critical):
current_timestamp = time.time()
age = current_timestamp - timestamp
retval = 3
msg = 'Unknown error'
if max_age_warning > max_age_critical:
raise ValueError('Warning threshold is higher than critical threshold')
if status == 'OK':
msg = 'backed up %s ago' % get_seconds_readable(age)
if timestamp > current_timestamp - max_age_warning:
retval = 0
elif timestamp > current_timestamp - max_age_critical:
retval = 1
else:
retval = 2
elif status == 'FAILED':
msg = 'backup failed'
retval = 2
else:
raise RuntimeError('Unknown status')
perf = get_perfdata('backup_age', int(age), max_age_warning, max_age_critical)
return (retval, perf, msg)
def get_verify_status(status, timestamp, max_age_warning, max_age_critical):
current_timestamp = time.time()
age = current_timestamp - timestamp
retval = 3
if max_age_warning > max_age_critical:
raise ValueError('Warning threshold is higher than critical threshold')
if status == 'OK':
msg = 'verified %s ago' % get_seconds_readable(age)
if timestamp > current_timestamp - max_age_warning:
retval = 0
elif timestamp > current_timestamp - max_age_critical:
retval = 1
else:
retval = 2
elif status == 'FAILED':
msg = 'verification failed'
retval = 2
else:
raise RuntimeError('Unknown status')
perf = get_perfdata('verify_age', int(age), max_age_warning, max_age_critical)
return (retval, perf, msg)
def main():
parser = argparse.ArgumentParser('Check borgwrapper backup status')
parser.add_argument('--backup-age-warning', type=int, default=172800) # 48 hours
parser.add_argument('--backup-age-critical', type=int, default=259200) # 72 hours
parser.add_argument('--verify-age-warning', type=int, default=5270400) # 2 months
parser.add_argument('--verify-age-critical', type=int, default=7948800) # 3 months
parser.add_argument('--status-dir', default='/var/lib/borgwrapper/status')
parser.add_argument('--config-name', default='config')
args = parser.parse_args()
statuses = []
try:
# Get backup status
status_file = get_backup_status_file(args.status_dir, args.config_name)
if os.path.exists(status_file):
status = parse_status_file(status_file)
statuses.append(get_backup_status(status['status'], status['timestamp'],
args.backup_age_warning, args.backup_age_critical))
else:
statuses.append((2, None, 'no backup status found'))
# Get verification status
status_file = get_verify_status_file(args.status_dir, args.config_name)
if os.path.exists(status_file):
status = parse_status_file(status_file)
statuses.append(get_verify_status(status['status'], status['timestamp'],
args.verify_age_warning, args.verify_age_critical))
else:
statuses.append((1, None, 'no verification status found'))
retval = max([i[0] for i in statuses])
status = get_status_msg(retval)
msgs = ', '.join([i[2] for i in statuses])
perfdata = ' '.join([i[1] for i in statuses if i[1]])
print('%s - %s | %s' % (status, msgs, perfdata))
return retval
except Exception as e:
print('Error: %s' % str(e))
return 3
if __name__ == '__main__':
sys.exit(main())