bundles/sshmon: use own check_cpu_stats script

Old script only checked iowait, which is not enough.
This commit is contained in:
Franzi 2021-02-06 09:38:50 +01:00
parent c185a5bacd
commit b6d23aaed4
Signed by: kunsi
GPG key ID: 12E3D2136B818350

View file

@ -1,184 +1,59 @@
#!/bin/bash
# ========================================================================================
# CPU Utilization Statistics plugin for Nagios
#
# Written by : Steve Bosek
# Patched by : Bas van der Doorn
# Release : 2.2
# Creation date : 8 September 2007
# Revision date : 23 November 2008
# Package : DTB Nagios Plugin
# Description : Nagios plugin (script) to check cpu utilization statistics.
# This script has been designed and written on Unix plateform (Linux, Aix, Solaris),
# requiring iostat as external program. The locations of these can easily
# be changed by editing the variables $IOSTAT at the top of the script.
# The script is used to query 4 of the key cpu statistics (user,system,iowait,idle)
# at the same time. Note though that there is only one set of warning
# and critical values for iowait percent.
#
# Usage : ./check_cpu_stats.sh [-w <warn>] [-c <crit] ( [ -i <intervals in second> ] [ -n <report number> ])
# ----------------------------------------------------------------------------------------
#
# TODO: Support for HP-UX
#
#
# ========================================================================================
#
# HISTORY :
# Release | Date | Authors | Description
# --------------+---------------+---------------+------------------------------------------
# 2.0 | 16.02.08 | Steve Bosek | Solaris support and new parameters
# | | | New Parameters : - iostat seconds intervals
# | | | - iostat report number
# 2.1 | 08.06.08 | Steve Bosek | Bug perfdata and convert comma in point for Linux result
# 2.1.1 | 20.11.08 | Bas van der Doorn | Fixed improperly terminated string
# 2.1.2 | 23.11.08 | Bas van der Doorn | Fixed linux steal reported as idle, comparisons
# 2.2 | 23.11.08 | Bas van der Doorn | Capable systems will output nice and steal data
# -----------------------------------------------------------------------------------------
#
# =========================================================================================
#!/usr/bin/env python3
# Paths to commands used in this script. These may have to be modified to match your system setup.
from re import findall
from subprocess import check_output
from sys import exit
IOSTAT=/usr/bin/iostat
try:
top_output = check_output("top -b -n1 -d1 | grep -i '^\%cpu'", shell=True).decode('UTF-8').split(': ', 2)[1].strip()
# Nagios return codes
STATE_OK=0
STATE_WARNING=1
STATE_CRITICAL=2
STATE_UNKNOWN=3
cpu_usage = {}
for value, identifier in findall('([0-9\.\,]{3,5}) ([a-z]{2})', top_output):
cpu_usage[identifier] = float(value.replace(',', '.'))
# Plugin parameters value if not define
WARNING_THRESHOLD=${WARNING_THRESHOLD:="30"}
CRITICAL_THRESHOLD=${CRITICAL_THRESHOLD:="100"}
INTERVAL_SEC=${INTERVAL_SEC:="1"}
NUM_REPORT=${NUM_REPORT:="3"}
warn = set()
crit = set()
# Plugin variable description
PROGNAME=$(basename $0)
RELEASE="Revision 2.1.1"
AUTHOR="(c) 2008 Steve Bosek (steve.bosek@gmail.com)"
print(top_output)
if [ ! -x $IOSTAT ]; then
echo "UNKNOWN: iostat not found or is not executable by the nagios user."
exit $STATE_UNKNOWN
fi
# steal
if cpu_usage['st'] > 5:
crit.add('CPU steal is {}% (>5%)'.format(cpu_usage['st']))
elif cpu_usage['st'] > 2:
warn.add('CPU steal is {}% (>2%)'.format(cpu_usage['st']))
# Functions plugin usage
print_release() {
echo "$RELEASE $AUTHOR"
}
# iowait
if cpu_usage['wa'] > 60:
crit.add('IOwait is {}% (>60%)'.format(cpu_usage['wa']))
elif cpu_usage['wa'] > 30:
warn.add('IOwait is {}% (>30%)'.format(cpu_usage['wa']))
print_usage() {
echo ""
echo "$PROGNAME $RELEASE - CPU Utilization check script for Nagios"
echo ""
echo "Usage: check_cpu_stats.sh -w -c (-i -n)"
echo ""
echo " -w Warning level in % for cpu iowait"
echo " -c Crical level in % for cpu iowait"
echo " -i Interval in seconds for iostat (default : 1)"
echo " -n Number report for iostat (default : 3)"
echo " -h Show this page"
echo ""
echo "Usage: $PROGNAME"
echo "Usage: $PROGNAME --help"
echo ""
}
print_help() {
print_usage
echo ""
echo "This plugin will check cpu utilization (user,system,iowait,idle in %)"
echo ""
exit 0
}
# Parse parameters
while [ $# -gt 0 ]; do
case "$1" in
-h | --help)
print_help
exit $STATE_OK
;;
-v | --version)
print_release
exit $STATE_OK
;;
-w | --warning)
shift
WARNING_THRESHOLD=$1
;;
-c | --critical)
shift
CRITICAL_THRESHOLD=$1
;;
-i | --interval)
shift
INTERVAL_SEC=$1
;;
-n | --number)
shift
NUM_REPORT=$1
;;
*) echo "Unknown argument: $1"
print_usage
exit $STATE_UNKNOWN
;;
esac
shift
done
# CPU Utilization Statistics Unix Plateform ( Linux,AIX,Solaris are supported )
case `uname` in
Linux ) CPU_REPORT=`iostat -c $INTERVAL_SEC $NUM_REPORT | sed -e 's/,/./g' | tr -s ' ' ';' | sed '/^$/d' | tail -1`
CPU_REPORT_SECTIONS=`echo ${CPU_REPORT} | grep ';' -o | wc -l`
CPU_USER=`echo $CPU_REPORT | cut -d ";" -f 2`
CPU_NICE=`echo $CPU_REPORT | cut -d ";" -f 3`
CPU_SYSTEM=`echo $CPU_REPORT | cut -d ";" -f 4`
CPU_IOWAIT=`echo $CPU_REPORT | cut -d ";" -f 5`
CPU_IOWAIT_MAJOR=`echo $CPU_IOWAIT | cut -d "." -f 1`
if [ ${CPU_REPORT_SECTIONS} -ge 6 ]; then
CPU_STEAL=`echo $CPU_REPORT | cut -d ";" -f 6`
CPU_IDLE=`echo $CPU_REPORT | cut -d ";" -f 7`
NAGIOS_DATA="user=${CPU_USER}% system=${CPU_SYSTEM}% iowait=${CPU_IOWAIT}% idle=${CPU_IDLE}% nice=${CPU_NICE}% steal=${CPU_STEAL}% | CpuUser=${CPU_USER};CpuSystem=${CPU_SYSTEM};CpuIoWait=${CPU_IOWAIT};CpuIdle=${CPU_IDLE};CpuNice=${CPU_NICE};CpuSteal=${CPU_STEAL};$WARNING_THRESHOLD;$CRITICAL_THRESHOLD"
else
CPU_IDLE=`echo $CPU_REPORT | cut -d ";" -f 6`
NAGIOS_DATA="user=${CPU_USER}% system=${CPU_SYSTEM}% iowait=${CPU_IOWAIT}% idle=${CPU_IDLE}% nice=${CPU_NICE}% | CpuUser=${CPU_USER};CpuSystem=${CPU_SYSTEM};CpuIoWait=${CPU_IOWAIT};CpuIdle=${CPU_IDLE};CpuNice=${CPU_NICE};$WARNING_THRESHOLD;$CRITICAL_THRESHOLD"
fi
;;
AIX ) CPU_REPORT=`iostat -t $INTERVAL_SEC $NUM_REPORT | sed -e 's/,/./g'|tr -s ' ' ';' | tail -1`
CPU_USER=`echo $CPU_REPORT | cut -d ";" -f 4`
CPU_SYSTEM=`echo $CPU_REPORT | cut -d ";" -f 5`
CPU_IOWAIT=`echo $CPU_REPORT | cut -d ";" -f 7`
CPU_IOWAIT_MAJOR=`echo $CPU_IOWAIT | cut -d "." -f 1`
CPU_IDLE=`echo $CPU_REPORT | cut -d ";" -f 6`
NAGIOS_DATA="user=${CPU_USER}% system=${CPU_SYSTEM}% iowait=${CPU_IOWAIT}% idle=${CPU_IDLE}% | CpuUser=${CPU_USER};CpuSystem=${CPU_SYSTEM};CpuIoWait=${CPU_IOWAIT};CpuIdle=${CPU_IDLE};$WARNING_THRESHOLD;$CRITICAL_THRESHOLD"
;;
SunOS ) CPU_REPORT=`iostat -c $INTERVAL_SEC $NUM_REPORT | tail -1`
CPU_USER=`echo $CPU_REPORT | awk '{ print $1 }'`
CPU_SYSTEM=`echo $CPU_REPORT | awk '{ print $2 }'`
CPU_IOWAIT=`echo $CPU_REPORT | awk '{ print $3 }'`
CPU_IOWAIT_MAJOR=`echo $CPU_IOWAIT | cut -d "." -f 1`
CPU_IDLE=`echo $CPU_REPORT | awk '{ print $4 }'`
NAGIOS_DATA="user=${CPU_USER}% system=${CPU_SYSTEM}% iowait=${CPU_IOWAIT}% idle=${CPU_IDLE}% | CpuUser=${CPU_USER};CpuSystem=${CPU_SYSTEM};CpuIoWait=${CPU_IOWAIT};CpuIdle=${CPU_IDLE};$WARNING_THRESHOLD;$CRITICAL_THRESHOLD"
;;
*) echo "UNKNOWN: `uname` not yet supported by this plugin. Coming soon !"
exit $STATE_UNKNOWN
;;
esac
# Return
if [ ${CPU_IOWAIT_MAJOR} -ge $WARNING_THRESHOLD ] && [ ${CPU_IOWAIT_MAJOR} -lt $CRITICAL_THRESHOLD ]; then
echo "CPU STATISTICS WARNING : ${NAGIOS_DATA}"
exit $STATE_WARNING
elif [ ${CPU_IOWAIT_MAJOR} -ge $CRITICAL_THRESHOLD ]; then
echo "CPU STATISTICS CRITICAL : ${NAGIOS_DATA}"
exit $STATE_CRITICAL
else
echo "CPU STATISTICS OK : ${NAGIOS_DATA}"
exit $STATE_OK
fi
total_usage = cpu_usage['us'] + cpu_usage['sy']
if total_usage > 90:
crit.add('Total CPU usage is {:.1f}% ({}% user, {}% system, >90%)'.format(
total_usage,
cpu_usage['us'],
cpu_usage['sy'],
))
elif total_usage > 80:
warn.add('Total CPU usage is {:.1f}% ({}% user, {}% system > 80%)'.format(
total_usage,
cpu_usage['us'],
cpu_usage['sy'],
))
for line in sorted(crit):
print(line)
for line in sorted(warn):
print(line)
if crit:
exit(2)
elif warn:
exit(1)
else:
exit(0)
except Exception as e:
print(repr(e))
exit(3)