Add nagios compatible check plugin
parent
72f722e3b3
commit
eabe2ced6e
|
|
@ -0,0 +1,147 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
#
|
||||
# This plugin is designed as a nagios compatible check plugin to use with
|
||||
# Icinga 2 and others
|
||||
#
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
import os
|
||||
import time
|
||||
|
||||
|
||||
def parse_status_file(path):
|
||||
with open(path, 'r') as f:
|
||||
status = dict()
|
||||
timestamp, status_ = f.read().split()
|
||||
status['timestamp'] = float(timestamp)
|
||||
status['status'] = status_
|
||||
return status
|
||||
|
||||
def get_backup_status_file(path, name):
|
||||
return os.path.join(path, '%s.backup' % name)
|
||||
|
||||
def get_verify_status_file(path, name):
|
||||
return os.path.join(path, '%s.verify' % name)
|
||||
|
||||
def get_seconds_readable(seconds):
|
||||
m, s = divmod(seconds, 60)
|
||||
h, m = divmod(m, 60)
|
||||
return '%dh %dm %ds' % (h, m, s)
|
||||
|
||||
def get_status_msg(retval):
|
||||
if retval == 0:
|
||||
msg = 'Backup OK'
|
||||
elif retval == 1:
|
||||
msg = 'Backup Warning'
|
||||
elif retval == 2:
|
||||
msg = 'Backup Critical'
|
||||
elif retval == 3:
|
||||
msg = 'Unknown error'
|
||||
else:
|
||||
raise ValueError('Invalid retval %s' % retval)
|
||||
|
||||
return msg
|
||||
|
||||
def get_perfdata(label, value, warning, critical, minimum=0):
|
||||
return '%s=%ss;%s;%s;%s' % (label, value, warning, critical, minimum)
|
||||
|
||||
def get_backup_status(status, timestamp, max_age_warning, max_age_critical):
|
||||
current_timestamp = time.time()
|
||||
age = current_timestamp - timestamp
|
||||
retval = 3
|
||||
msg = 'Unknown error'
|
||||
|
||||
if max_age_warning > max_age_critical:
|
||||
raise ValueError('Warning threshold is higher than critical threshold')
|
||||
|
||||
if status == 'OK':
|
||||
msg = 'backed up %s ago' % get_seconds_readable(age)
|
||||
|
||||
if timestamp > current_timestamp - max_age_warning:
|
||||
retval = 0
|
||||
elif timestamp > current_timestamp - max_age_critical:
|
||||
retval = 1
|
||||
else:
|
||||
retval = 2
|
||||
elif status == 'FAILED':
|
||||
msg = 'backup failed'
|
||||
retval = 2
|
||||
else:
|
||||
raise RuntimeError('Unknown status')
|
||||
|
||||
perf = get_perfdata('backup_age', int(age), max_age_warning, max_age_critical)
|
||||
return (retval, perf, msg)
|
||||
|
||||
|
||||
def get_verify_status(status, timestamp, max_age_warning, max_age_critical):
|
||||
current_timestamp = time.time()
|
||||
age = current_timestamp - timestamp
|
||||
retval = 3
|
||||
|
||||
if max_age_warning > max_age_critical:
|
||||
raise ValueError('Warning threshold is higher than critical threshold')
|
||||
|
||||
if status == 'OK':
|
||||
msg = 'verified %s ago' % get_seconds_readable(age)
|
||||
|
||||
if timestamp > current_timestamp - max_age_warning:
|
||||
retval = 0
|
||||
elif timestamp > current_timestamp - max_age_critical:
|
||||
retval = 1
|
||||
else:
|
||||
retval = 2
|
||||
elif status == 'FAILED':
|
||||
msg = 'verification failed'
|
||||
retval = 2
|
||||
else:
|
||||
raise RuntimeError('Unknown status')
|
||||
|
||||
perf = get_perfdata('verify_age', int(age), max_age_warning, max_age_critical)
|
||||
return (retval, perf, msg)
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser('Check borgwrapper backup status')
|
||||
parser.add_argument('--backup-age-warning', type=int, default=172800) # 48 hours
|
||||
parser.add_argument('--backup-age-critical', type=int, default=259200) # 72 hours
|
||||
parser.add_argument('--verify-age-warning', type=int, default=5270400) # 2 months
|
||||
parser.add_argument('--verify-age-critical', type=int, default=7948800) # 3 months
|
||||
parser.add_argument('--status-dir', default='/var/lib/borgwrapper/status')
|
||||
parser.add_argument('--config-name', default='config')
|
||||
args = parser.parse_args()
|
||||
|
||||
statuses = []
|
||||
|
||||
try:
|
||||
# Get backup status
|
||||
status_file = get_backup_status_file(args.status_dir, args.config_name)
|
||||
if os.path.exists(status_file):
|
||||
status = parse_status_file(status_file)
|
||||
statuses.append(get_backup_status(status['status'], status['timestamp'],
|
||||
args.backup_age_warning, args.backup_age_critical))
|
||||
else:
|
||||
statuses.append((2, None, 'no backup status found'))
|
||||
|
||||
# Get verification status
|
||||
status_file = get_verify_status_file(args.status_dir, args.config_name)
|
||||
if os.path.exists(status_file):
|
||||
status = parse_status_file(status_file)
|
||||
statuses.append(get_verify_status(status['status'], status['timestamp'],
|
||||
args.verify_age_warning, args.verify_age_critical))
|
||||
else:
|
||||
statuses.append((1, None, 'no verification status found'))
|
||||
|
||||
retval = max([i[0] for i in statuses])
|
||||
status = get_status_msg(retval)
|
||||
|
||||
msgs = ', '.join([i[2] for i in statuses])
|
||||
perfdata = ' '.join([i[1] for i in statuses if i[1]])
|
||||
print('%s - %s | %s' % (status, msgs, perfdata))
|
||||
return retval
|
||||
except Exception as e:
|
||||
print('Error: %s' % str(e))
|
||||
return 3
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
Loading…
Reference in New Issue