From eabe2ced6e3fd384eb36fd9cc5007925c2353650 Mon Sep 17 00:00:00 2001 From: hkbakke Date: Fri, 15 Sep 2017 14:00:03 +0200 Subject: [PATCH] Add nagios compatible check plugin --- nagios-plugin/check_borgwrapper | 147 ++++++++++++++++++++++++++++++++ 1 file changed, 147 insertions(+) create mode 100755 nagios-plugin/check_borgwrapper diff --git a/nagios-plugin/check_borgwrapper b/nagios-plugin/check_borgwrapper new file mode 100755 index 0000000..7002e4a --- /dev/null +++ b/nagios-plugin/check_borgwrapper @@ -0,0 +1,147 @@ +#!/usr/bin/env python3 + +# +# This plugin is designed as a nagios compatible check plugin to use with +# Icinga 2 and others +# + +import argparse +import sys +import os +import time + + +def parse_status_file(path): + with open(path, 'r') as f: + status = dict() + timestamp, status_ = f.read().split() + status['timestamp'] = float(timestamp) + status['status'] = status_ + return status + +def get_backup_status_file(path, name): + return os.path.join(path, '%s.backup' % name) + +def get_verify_status_file(path, name): + return os.path.join(path, '%s.verify' % name) + +def get_seconds_readable(seconds): + m, s = divmod(seconds, 60) + h, m = divmod(m, 60) + return '%dh %dm %ds' % (h, m, s) + +def get_status_msg(retval): + if retval == 0: + msg = 'Backup OK' + elif retval == 1: + msg = 'Backup Warning' + elif retval == 2: + msg = 'Backup Critical' + elif retval == 3: + msg = 'Unknown error' + else: + raise ValueError('Invalid retval %s' % retval) + + return msg + +def get_perfdata(label, value, warning, critical, minimum=0): + return '%s=%ss;%s;%s;%s' % (label, value, warning, critical, minimum) + +def get_backup_status(status, timestamp, max_age_warning, max_age_critical): + current_timestamp = time.time() + age = current_timestamp - timestamp + retval = 3 + msg = 'Unknown error' + + if max_age_warning > max_age_critical: + raise ValueError('Warning threshold is higher than critical threshold') + + if status == 'OK': + msg = 'backed up %s ago' % get_seconds_readable(age) + + if timestamp > current_timestamp - max_age_warning: + retval = 0 + elif timestamp > current_timestamp - max_age_critical: + retval = 1 + else: + retval = 2 + elif status == 'FAILED': + msg = 'backup failed' + retval = 2 + else: + raise RuntimeError('Unknown status') + + perf = get_perfdata('backup_age', int(age), max_age_warning, max_age_critical) + return (retval, perf, msg) + + +def get_verify_status(status, timestamp, max_age_warning, max_age_critical): + current_timestamp = time.time() + age = current_timestamp - timestamp + retval = 3 + + if max_age_warning > max_age_critical: + raise ValueError('Warning threshold is higher than critical threshold') + + if status == 'OK': + msg = 'verified %s ago' % get_seconds_readable(age) + + if timestamp > current_timestamp - max_age_warning: + retval = 0 + elif timestamp > current_timestamp - max_age_critical: + retval = 1 + else: + retval = 2 + elif status == 'FAILED': + msg = 'verification failed' + retval = 2 + else: + raise RuntimeError('Unknown status') + + perf = get_perfdata('verify_age', int(age), max_age_warning, max_age_critical) + return (retval, perf, msg) + +def main(): + parser = argparse.ArgumentParser('Check borgwrapper backup status') + parser.add_argument('--backup-age-warning', type=int, default=172800) # 48 hours + parser.add_argument('--backup-age-critical', type=int, default=259200) # 72 hours + parser.add_argument('--verify-age-warning', type=int, default=5270400) # 2 months + parser.add_argument('--verify-age-critical', type=int, default=7948800) # 3 months + parser.add_argument('--status-dir', default='/var/lib/borgwrapper/status') + parser.add_argument('--config-name', default='config') + args = parser.parse_args() + + statuses = [] + + try: + # Get backup status + status_file = get_backup_status_file(args.status_dir, args.config_name) + if os.path.exists(status_file): + status = parse_status_file(status_file) + statuses.append(get_backup_status(status['status'], status['timestamp'], + args.backup_age_warning, args.backup_age_critical)) + else: + statuses.append((2, None, 'no backup status found')) + + # Get verification status + status_file = get_verify_status_file(args.status_dir, args.config_name) + if os.path.exists(status_file): + status = parse_status_file(status_file) + statuses.append(get_verify_status(status['status'], status['timestamp'], + args.verify_age_warning, args.verify_age_critical)) + else: + statuses.append((1, None, 'no verification status found')) + + retval = max([i[0] for i in statuses]) + status = get_status_msg(retval) + + msgs = ', '.join([i[2] for i in statuses]) + perfdata = ' '.join([i[1] for i in statuses if i[1]]) + print('%s - %s | %s' % (status, msgs, perfdata)) + return retval + except Exception as e: + print('Error: %s' % str(e)) + return 3 + +if __name__ == '__main__': + sys.exit(main())