bundles/nginx: add monitoring
All checks were successful
bundlewrap/pipeline/head This commit looks good
All checks were successful
bundlewrap/pipeline/head This commit looks good
This commit is contained in:
parent
65a8efc97f
commit
8cb997133a
5 changed files with 528 additions and 1 deletions
|
@ -42,6 +42,7 @@ Rule of thumb: keep ports below 10000 free for stuff that reserves ports.
|
|||
| 22030 | octoprint | OctoPrint Web Interface |
|
||||
| 22040 | miniflux | Miniflux Web Interface |
|
||||
| 22050 | radicale | radicale carddav and caldav server |
|
||||
| 22999 | nginx | stub_status |
|
||||
|
||||
## UDP
|
||||
| Port | bundle | usage |
|
||||
|
|
474
bundles/nginx/files/check_nginx_status
Normal file
474
bundles/nginx/files/check_nginx_status
Normal file
|
@ -0,0 +1,474 @@
|
|||
#!/usr/bin/env perl
|
||||
# check_nginx_status.pl
|
||||
# Author : regis.leroy at makina-corpus.com
|
||||
# Licence : GPL - http://www.fsf.org/licenses/gpl.txt
|
||||
#
|
||||
# help : ./check_nginx_status.pl -h
|
||||
#
|
||||
# issues & updates: http://github.com/regilero/check_nginx_status
|
||||
use warnings;
|
||||
use strict;
|
||||
use Getopt::Long;
|
||||
use LWP::UserAgent;
|
||||
use Time::HiRes qw(gettimeofday tv_interval);
|
||||
use Digest::MD5 qw(md5 md5_hex);
|
||||
use FindBin;
|
||||
|
||||
# ensure all outputs are in UTF-8
|
||||
binmode(STDOUT, ":utf8");
|
||||
|
||||
# Nagios specific
|
||||
use lib "/usr/lib/nagios/plugins";
|
||||
use utils qw($TIMEOUT);
|
||||
|
||||
# Globals
|
||||
my $Version='0.20';
|
||||
my $Name=$0;
|
||||
|
||||
my $o_host = undef; # hostname
|
||||
my $o_help= undef; # want some help ?
|
||||
my $o_port= undef; # port
|
||||
my $o_url = undef; # url to use, if not the default
|
||||
my $o_user= undef; # user for auth
|
||||
my $o_pass= ''; # password for auth
|
||||
my $o_realm= ''; # password for auth
|
||||
my $o_version= undef; # print version
|
||||
my $o_warn_a_level= -1; # Number of active connections that will cause a warning
|
||||
my $o_crit_a_level= -1; # Number of active connections that will cause an error
|
||||
my $o_warn_rps_level= -1; # Number of Request per second that will cause a warning
|
||||
my $o_crit_rps_level= -1; # Number of request Per second that will cause an error
|
||||
my $o_warn_cps_level= -1; # Number of Connections per second that will cause a warning
|
||||
my $o_crit_cps_level= -1; # Number of Connections per second that will cause an error
|
||||
my $o_timeout= 15; # Default 15s Timeout
|
||||
my $o_warn_thresold= undef; # warning thresolds entry
|
||||
my $o_crit_thresold= undef; # critical thresolds entry
|
||||
my $o_debug= undef; # debug mode
|
||||
my $o_servername= undef; # ServerName (host header in http request)
|
||||
my $o_https= undef; # SSL (HTTPS) mode
|
||||
my $o_disable_sslverifyhostname = 0;
|
||||
|
||||
my $TempPath = '/tmp/'; # temp path
|
||||
my $MaxTimeDif = 60*30; # Maximum uptime difference (seconds), default 30 minutes
|
||||
|
||||
my $nginx = 'NGINX'; # Could be used to store version also
|
||||
|
||||
# functions
|
||||
sub show_versioninfo { print "$Name version : $Version\n"; }
|
||||
|
||||
sub print_usage {
|
||||
print "Usage: $Name -H <host ip> [-p <port>] [-s servername] [-t <timeout>] [-w <WARN_THRESOLD> -c <CRIT_THRESOLD>] [-V] [-d] [-u <url>] [-U user -P pass -r realm]\n";
|
||||
}
|
||||
sub nagios_exit {
|
||||
my ( $nickname, $status, $message, $perfdata , $silent) = @_;
|
||||
my %STATUSCODE = (
|
||||
'OK' => 0
|
||||
, 'WARNING' => 1
|
||||
, 'CRITICAL' => 2
|
||||
, 'UNKNOWN' => 3
|
||||
, 'PENDING' => 4
|
||||
);
|
||||
if(!defined($silent)) {
|
||||
my $output = undef;
|
||||
$output .= sprintf('%1$s %2$s - %3$s', $nickname, $status, $message);
|
||||
if ($perfdata) {
|
||||
$output .= sprintf('|%1$s', $perfdata);
|
||||
}
|
||||
$output .= chr(10);
|
||||
print $output;
|
||||
}
|
||||
exit $STATUSCODE{$status};
|
||||
}
|
||||
|
||||
# Get the alarm signal
|
||||
$SIG{'ALRM'} = sub {
|
||||
nagios_exit($nginx,"CRITICAL","ERROR: Alarm signal (Nagios timeout)");
|
||||
};
|
||||
|
||||
sub help {
|
||||
print "Nginx Monitor for Nagios version ",$Version,"\n";
|
||||
print "GPL licence, (c)2012 Leroy Regis\n\n";
|
||||
print_usage();
|
||||
print <<EOT;
|
||||
-h, --help
|
||||
print this help message
|
||||
-H, --hostname=HOST
|
||||
name or IP address of host to check
|
||||
-p, --port=PORT
|
||||
Http port
|
||||
-u, --url=URL
|
||||
Specific URL to use, instead of the default "http://<hostname or IP>/nginx_status"
|
||||
-s, --servername=SERVERNAME
|
||||
ServerName, (host header of HTTP request) use it if you specified an IP in -H to match the good Virtualhost in your target
|
||||
-S, --ssl
|
||||
Wether we should use HTTPS instead of HTTP
|
||||
--disable-sslverifyhostname
|
||||
Disable SSL hostname verification
|
||||
-U, --user=user
|
||||
Username for basic auth
|
||||
-P, --pass=PASS
|
||||
Password for basic auth
|
||||
-r, --realm=REALM
|
||||
Realm for basic auth
|
||||
-d, --debug
|
||||
Debug mode (show http request response)
|
||||
-m, --maxreach=MAX
|
||||
Number of max processes reached (since last check) that should trigger an alert
|
||||
-t, --timeout=INTEGER
|
||||
timeout in seconds (Default: $o_timeout)
|
||||
-w, --warn=ACTIVE_CONN,REQ_PER_SEC,CONN_PER_SEC
|
||||
number of active connections, ReqPerSec or ConnPerSec that will cause a WARNING
|
||||
-1 for no warning
|
||||
-c, --critical=ACTIVE_CONN,REQ_PER_SEC,CONN_PER_SEC
|
||||
number of active connections, ReqPerSec or ConnPerSec that will cause a CRITICAL
|
||||
-1 for no CRITICAL
|
||||
-V, --version
|
||||
prints version number
|
||||
|
||||
Note :
|
||||
3 items can be managed on this check, this is why -w and -c parameters are using 3 values thresolds
|
||||
- ACTIVE_CONN: Number of all opened connections, including connections to backends
|
||||
- REQ_PER_SEC: Average number of request per second between this check and the previous one
|
||||
- CONN_PER_SEC: Average number of connections per second between this check and the previous one
|
||||
|
||||
Examples:
|
||||
|
||||
This one will generate WARNING and CRITICIAL alerts if you reach 10 000 or 20 000 active connection; or
|
||||
100 or 200 request per second; or 200 or 300 connections per second
|
||||
check_nginx_status.pl -H 10.0.0.10 -u /foo/nginx_status -s mydomain.example.com -t 8 -w 10000,100,200 -c 20000,200,300
|
||||
|
||||
this will generate WARNING and CRITICAL alerts only on the number of active connections (with low numbers for nginx)
|
||||
check_nginx_status.pl -H 10.0.0.10 -s mydomain.example.com -t 8 -w 10,-1,-1 -c 20,-1,-1
|
||||
|
||||
theses two equivalents will not generate any alert (if the nginx_status page is reachable) but could be used for graphics
|
||||
check_nginx_status.pl -H 10.0.0.10 -s mydomain.example.com -w -1,-1,-1 -c -1,-1,-1
|
||||
check_nginx_status.pl -H 10.0.0.10 -s mydomain.example.com
|
||||
|
||||
EOT
|
||||
}
|
||||
|
||||
sub check_options {
|
||||
Getopt::Long::Configure ("bundling");
|
||||
GetOptions(
|
||||
'h' => \$o_help, 'help' => \$o_help,
|
||||
'd' => \$o_debug, 'debug' => \$o_debug,
|
||||
'H:s' => \$o_host, 'hostname:s' => \$o_host,
|
||||
's:s' => \$o_servername, 'servername:s' => \$o_servername,
|
||||
'S:s' => \$o_https, 'ssl:s' => \$o_https,
|
||||
'u:s' => \$o_url, 'url:s' => \$o_url,
|
||||
'U:s' => \$o_user, 'user:s' => \$o_user,
|
||||
'P:s' => \$o_pass, 'pass:s' => \$o_pass,
|
||||
'r:s' => \$o_realm, 'realm:s' => \$o_realm,
|
||||
'p:i' => \$o_port, 'port:i' => \$o_port,
|
||||
'V' => \$o_version, 'version' => \$o_version,
|
||||
'w:s' => \$o_warn_thresold,'warn:s' => \$o_warn_thresold,
|
||||
'c:s' => \$o_crit_thresold,'critical:s' => \$o_crit_thresold,
|
||||
't:i' => \$o_timeout, 'timeout:i' => \$o_timeout,
|
||||
'disable-sslverifyhostname' => \$o_disable_sslverifyhostname,
|
||||
);
|
||||
|
||||
if (defined ($o_help)) {
|
||||
help();
|
||||
nagios_exit($nginx,"UNKNOWN","leaving","",1);
|
||||
}
|
||||
if (defined($o_version)) {
|
||||
show_versioninfo();
|
||||
nagios_exit($nginx,"UNKNOWN","leaving","",1);
|
||||
};
|
||||
|
||||
if (defined($o_warn_thresold)) {
|
||||
($o_warn_a_level,$o_warn_rps_level,$o_warn_cps_level) = split(',', $o_warn_thresold);
|
||||
}
|
||||
if (defined($o_crit_thresold)) {
|
||||
($o_crit_a_level,$o_crit_rps_level,$o_crit_cps_level) = split(',', $o_crit_thresold);
|
||||
}
|
||||
if (defined($o_debug)) {
|
||||
print("\nDebug thresolds: \nWarning: ($o_warn_thresold) => Active: $o_warn_a_level ReqPerSec :$o_warn_rps_level ConnPerSec: $o_warn_cps_level");
|
||||
print("\nCritical ($o_crit_thresold) => : Active: $o_crit_a_level ReqPerSec: $o_crit_rps_level ConnPerSec : $o_crit_cps_level\n");
|
||||
}
|
||||
if ((defined($o_warn_a_level) && defined($o_crit_a_level)) &&
|
||||
(($o_warn_a_level != -1) && ($o_crit_a_level != -1) && ($o_warn_a_level >= $o_crit_a_level)) ) {
|
||||
nagios_exit($nginx,"UNKNOWN","Check warning and critical values for Active Process (1st part of thresold), warning level must be < crit level!");
|
||||
}
|
||||
if ((defined($o_warn_rps_level) && defined($o_crit_rps_level)) &&
|
||||
(($o_warn_rps_level != -1) && ($o_crit_rps_level != -1) && ($o_warn_rps_level >= $o_crit_rps_level)) ) {
|
||||
nagios_exit($nginx,"UNKNOWN","Check warning and critical values for ReqPerSec (2nd part of thresold), warning level must be < crit level!");
|
||||
}
|
||||
if ((defined($o_warn_cps_level) && defined($o_crit_cps_level)) &&
|
||||
(($o_warn_cps_level != -1) && ($o_crit_cps_level != -1) && ($o_warn_cps_level >= $o_crit_cps_level)) ) {
|
||||
nagios_exit($nginx,"UNKNOWN","Check warning and critical values for ConnPerSec (3rd part of thresold), warning level must be < crit level!");
|
||||
}
|
||||
# Check compulsory attributes
|
||||
if (!defined($o_host)) {
|
||||
print_usage();
|
||||
nagios_exit($nginx,"UNKNOWN","-H host argument required");
|
||||
}
|
||||
}
|
||||
|
||||
########## MAIN ##########
|
||||
|
||||
check_options();
|
||||
|
||||
my $override_ip = $o_host;
|
||||
my $ua = LWP::UserAgent->new(
|
||||
protocols_allowed => ['http', 'https'],
|
||||
timeout => $o_timeout
|
||||
);
|
||||
|
||||
if ( $o_disable_sslverifyhostname ) {
|
||||
$ua->ssl_opts( 'verify_hostname' => 0 );
|
||||
}
|
||||
|
||||
# we need to enforce the HTTP request is made on the Nagios Host IP and
|
||||
# not on the DNS related IP for that domain
|
||||
@LWP::Protocol::http::EXTRA_SOCK_OPTS = ( PeerAddr => $override_ip );
|
||||
# this prevent used only once warning in -w mode
|
||||
my $ua_settings = @LWP::Protocol::http::EXTRA_SOCK_OPTS;
|
||||
|
||||
my $timing0 = [gettimeofday];
|
||||
my $response = undef;
|
||||
my $url = undef;
|
||||
|
||||
if (!defined($o_url)) {
|
||||
$o_url='/nginx_status';
|
||||
} else {
|
||||
# ensure we have a '/' as first char
|
||||
$o_url = '/'.$o_url unless $o_url =~ m(^/)
|
||||
}
|
||||
my $proto='http://';
|
||||
if(defined($o_https)) {
|
||||
$proto='https://';
|
||||
if (defined($o_port) && $o_port!=443) {
|
||||
if (defined ($o_debug)) {
|
||||
print "\nDEBUG: Notice: port is defined at $o_port and not 443, check you really want that in SSL mode! \n";
|
||||
}
|
||||
}
|
||||
}
|
||||
if (defined($o_servername)) {
|
||||
if (!defined($o_port)) {
|
||||
$url = $proto . $o_servername . $o_url;
|
||||
} else {
|
||||
$url = $proto . $o_servername . ':' . $o_port . $o_url;
|
||||
}
|
||||
} else {
|
||||
if (!defined($o_port)) {
|
||||
$url = $proto . $o_host . $o_url;
|
||||
} else {
|
||||
$url = $proto . $o_host . ':' . $o_port . $o_url;
|
||||
}
|
||||
}
|
||||
if (defined ($o_debug)) {
|
||||
print "\nDEBUG: HTTP url: \n";
|
||||
print $url;
|
||||
}
|
||||
|
||||
my $req = HTTP::Request->new( GET => $url );
|
||||
|
||||
if (defined($o_servername)) {
|
||||
$req->header('Host' => $o_servername);
|
||||
}
|
||||
if (defined($o_user)) {
|
||||
$req->authorization_basic($o_user, $o_pass);
|
||||
}
|
||||
|
||||
if (defined ($o_debug)) {
|
||||
print "\nDEBUG: HTTP request: \n";
|
||||
print "IP used (better if it's an IP):" . $override_ip . "\n";
|
||||
print $req->as_string;
|
||||
}
|
||||
$response = $ua->request($req);
|
||||
my $timeelapsed = tv_interval ($timing0, [gettimeofday]);
|
||||
|
||||
my $InfoData = '';
|
||||
my $PerfData = '';
|
||||
#my @Time = (localtime); # list context and not scalar as we want the brutal timestamp
|
||||
my $Time = time;
|
||||
|
||||
my $webcontent = undef;
|
||||
if ($response->is_success) {
|
||||
$webcontent=$response->decoded_content;
|
||||
if (defined ($o_debug)) {
|
||||
print "\nDEBUG: HTTP response:";
|
||||
print $response->status_line;
|
||||
print "\n".$response->header('Content-Type');
|
||||
print "\n";
|
||||
print $webcontent;
|
||||
}
|
||||
if ($response->header('Content-Type') =~ m/text\/html/) {
|
||||
nagios_exit($nginx,"CRITICAL", "We have a response page for our request, but it's an HTML page, quite certainly not the status report of nginx");
|
||||
}
|
||||
# example of response content expected:
|
||||
#Active connections: 10
|
||||
#server accepts handled requests
|
||||
#38500 38500 50690
|
||||
#Reading: 5 Writing: 5 Waiting: 0
|
||||
|
||||
# number of all open connections including connections to backends
|
||||
my $ActiveConn = 0;
|
||||
if($webcontent =~ m/Active connections: (.*?)\n/) {
|
||||
$ActiveConn = $1;
|
||||
# triming
|
||||
$ActiveConn =~ s/^\s+|\s+$//g;
|
||||
}
|
||||
|
||||
|
||||
# 3 counters with a space: accepted conn, handled conn and number of requests
|
||||
my $counters = '';
|
||||
my $AcceptedConn = 0;
|
||||
my $HandledConn = 0;
|
||||
my $NbRequests = 0;
|
||||
if($webcontent =~ m/\nserver accepts handled requests\n(.*?)\n/) {
|
||||
$counters = $1;
|
||||
# triming
|
||||
$counters =~ s/^\s+|\s+$//g;
|
||||
#splitting
|
||||
($AcceptedConn,$HandledConn,$NbRequests) = split(' ', $counters);
|
||||
# triming
|
||||
$AcceptedConn =~ s/^\s+|\s+$//g;
|
||||
$HandledConn =~ s/^\s+|\s+$//g;
|
||||
$NbRequests =~ s/^\s+|\s+$//g;
|
||||
}
|
||||
|
||||
# nginx reads request header
|
||||
my $Reading = 0;
|
||||
# nginx reads request body, processes request, or writes response to a client
|
||||
my $Writing = 0;
|
||||
# keep-alive connections, actually it is active - (reading + writing)
|
||||
my $Waiting = 0;
|
||||
if($webcontent =~ m/Reading: (.*?)Writing: (.*?)Waiting: (.*?)$/) {
|
||||
$Reading = $1;
|
||||
$Writing = $2;
|
||||
$Waiting = $3;
|
||||
# triming
|
||||
$Reading =~ s/^\s+|\s+$//g;
|
||||
$Writing =~ s/^\s+|\s+$//g;
|
||||
$Waiting =~ s/^\s+|\s+$//g;
|
||||
}
|
||||
|
||||
# Debug
|
||||
if (defined ($o_debug)) {
|
||||
print ("\nDEBUG Parse results => Active :" . $ActiveConn . "\nAcceptedConn :" . $AcceptedConn . "\nHandledConn :" . $HandledConn . "\nNbRequests :".$NbRequests . "\nReading :" .$Reading . "\nWriting :" . $Writing . "\nWaiting :" . $Waiting . "\n");
|
||||
}
|
||||
|
||||
my $TempFile = $TempPath.$o_host.'_check_nginx_status'.md5_hex($url);
|
||||
my $FH;
|
||||
|
||||
my $LastTime = 0;
|
||||
my $LastAcceptedConn = 0;
|
||||
my $LastHandledConn = 0;
|
||||
my $LastNbRequests = 0;
|
||||
if ((-e $TempFile) && (-r $TempFile) && (-w $TempFile)) {
|
||||
open ($FH, '<',$TempFile) or nagios_exit($nginx,"UNKNOWN","unable to read temporary data from :".$TempFile);
|
||||
$LastTime = <$FH>;
|
||||
$LastAcceptedConn = <$FH>;
|
||||
$LastHandledConn = <$FH>;
|
||||
$LastNbRequests = <$FH>;
|
||||
close ($FH);
|
||||
if (defined ($o_debug)) {
|
||||
print ("\nDebug: data from temporary file: $TempFile\n");
|
||||
print (" LastTime: $LastTime LastAcceptedConn: $LastAcceptedConn LastHandledConn: $LastHandledConn LastNbRequests: $LastNbRequests \n");
|
||||
}
|
||||
}
|
||||
|
||||
open ($FH, '>'.$TempFile) or nagios_exit($nginx,"UNKNOWN","unable to write temporary data in :".$TempFile);
|
||||
#print $FH (@Time),"\n";
|
||||
print $FH "$Time\n";
|
||||
print $FH "$AcceptedConn\n";
|
||||
print $FH "$HandledConn\n";
|
||||
print $FH "$NbRequests\n";
|
||||
close ($FH);
|
||||
|
||||
my $ConnPerSec = 0;
|
||||
my $ReqPerSec = 0;
|
||||
my $RequestsNew = 0;
|
||||
# by default the average
|
||||
my $ReqPerConn = 0;
|
||||
if ($AcceptedConn > 0) {
|
||||
$ReqPerConn = $NbRequests/$AcceptedConn;
|
||||
}
|
||||
my $elapsed = $Time - $LastTime ;
|
||||
if (defined ($o_debug)) {
|
||||
print ("\nDebug: pre-computation\n");
|
||||
print ("Average ReqPerconn: $ReqPerConn, Seconds elapsed Since last check: $elapsed\n");
|
||||
}
|
||||
# check only if the counters may have been incremented
|
||||
# but not if it may have been too much incremented
|
||||
# if nginx was restarted ($NbRequests is now lower than previous value), just skip
|
||||
if ( ($elapsed < $MaxTimeDif) && ($elapsed != 0) && ($NbRequests >= $LastNbRequests) ) {
|
||||
$ConnPerSec = ($AcceptedConn-$LastAcceptedConn)/$elapsed;
|
||||
$RequestsNew = $NbRequests-$LastNbRequests;
|
||||
$ReqPerSec = $RequestsNew/$elapsed;
|
||||
# get finer value
|
||||
if ( $ConnPerSec!=0 ) {
|
||||
my $ReqPerConn = $ReqPerSec/$ConnPerSec;
|
||||
} else {
|
||||
my $ReqPerConn = 0;
|
||||
}
|
||||
}
|
||||
if (defined ($o_debug)) {
|
||||
print ("\nDebug: data computed\n");
|
||||
print ("ConnPerSec: $ConnPerSec ReqPerSec: $ReqPerSec ReqPerConn: $ReqPerConn\n");
|
||||
}
|
||||
$InfoData = sprintf (" %.3f sec. response time, Active: %d (Writing: %d Reading: %d Waiting: %d)"
|
||||
. " ReqPerSec: %.3f ConnPerSec: %.3f ReqPerConn: %.3f"
|
||||
,$timeelapsed,$ActiveConn,$Writing,$Reading,$Waiting,$ReqPerSec,$ConnPerSec,$ReqPerConn);
|
||||
|
||||
# Manage warn and crit values for the perfdata
|
||||
my $p_warn_a_level = "$o_warn_a_level";
|
||||
my $p_crit_a_level = "$o_crit_a_level";
|
||||
my $p_warn_rps_level = "$o_warn_rps_level";
|
||||
my $p_crit_rps_level = "$o_crit_rps_level";
|
||||
my $p_warn_cps_level = "$o_warn_cps_level";
|
||||
my $p_crit_cps_level = "$o_crit_cps_level";
|
||||
|
||||
if ($p_warn_a_level == "-1") {
|
||||
$p_warn_a_level = "";
|
||||
}
|
||||
if ($p_crit_a_level == "-1") {
|
||||
$p_crit_a_level = "";
|
||||
}
|
||||
if ($p_warn_rps_level == "-1") {
|
||||
$p_warn_rps_level = "";
|
||||
}
|
||||
if ($p_crit_rps_level == "-1") {
|
||||
$p_crit_rps_level = "";
|
||||
}
|
||||
if ($p_warn_cps_level == "-1") {
|
||||
$p_warn_cps_level = "";
|
||||
}
|
||||
if ($p_crit_cps_level == "-1") {
|
||||
$p_crit_cps_level = "";
|
||||
}
|
||||
|
||||
$PerfData = sprintf ("Writing=%d;;;; Reading=%d;;;; Waiting=%d;;;; Active=%d;%s;%s;; "
|
||||
. "ReqPerSec=%f;%s;%s;; ConnPerSec=%f;%s;%s;; ReqPerConn=%f;;;;"
|
||||
,($Writing),($Reading),($Waiting),($ActiveConn)
|
||||
,($p_warn_a_level),($p_crit_a_level)
|
||||
,($ReqPerSec),($p_warn_rps_level),($p_crit_rps_level)
|
||||
,($ConnPerSec),($p_warn_cps_level),($p_crit_cps_level)
|
||||
,($ReqPerConn));
|
||||
# first all critical exists by priority
|
||||
if (defined($o_crit_a_level) && (-1!=$o_crit_a_level) && ($ActiveConn >= $o_crit_a_level)) {
|
||||
nagios_exit($nginx,"CRITICAL", "Active Connections are critically high " . $InfoData,$PerfData);
|
||||
}
|
||||
if (defined($o_crit_rps_level) && (-1!=$o_crit_rps_level) && ($ReqPerSec >= $o_crit_rps_level)) {
|
||||
nagios_exit($nginx,"CRITICAL", "Request per second ratios is critically high " . $InfoData,$PerfData);
|
||||
}
|
||||
if (defined($o_crit_cps_level) && (-1!=$o_crit_cps_level) && ($ConnPerSec >= $o_crit_cps_level)) {
|
||||
nagios_exit($nginx,"CRITICAL", "Connection per second ratio is critically high " . $InfoData,$PerfData);
|
||||
}
|
||||
# Then WARNING exits by priority
|
||||
if (defined($o_warn_a_level) && (-1!=$o_warn_a_level) && ($ActiveConn >= $o_warn_a_level)) {
|
||||
nagios_exit($nginx,"WARNING", "Active Connections are high " . $InfoData,$PerfData);
|
||||
}
|
||||
if (defined($o_warn_rps_level) && (-1!=$o_warn_rps_level) && ($ReqPerSec >= $o_warn_rps_level)) {
|
||||
nagios_exit($nginx,"WARNING", "Requests per second ratio is high " . $InfoData,$PerfData);
|
||||
}
|
||||
if (defined($o_warn_cps_level) && (-1!=$o_warn_cps_level) && ($ConnPerSec >= $o_warn_cps_level)) {
|
||||
nagios_exit($nginx,"WARNING", "Connection per second ratio is high " . $InfoData,$PerfData);
|
||||
}
|
||||
|
||||
nagios_exit($nginx,"OK",$InfoData,$PerfData);
|
||||
|
||||
} else {
|
||||
nagios_exit($nginx,"CRITICAL", $response->status_line);
|
||||
}
|
|
@ -1,5 +1,5 @@
|
|||
server {
|
||||
listen [::1]:8080 default_server;
|
||||
listen 127.0.0.1:22999 default_server;
|
||||
server_name _;
|
||||
|
||||
stub_status;
|
||||
|
|
|
@ -25,6 +25,9 @@ files = {
|
|||
'svc_systemd:nginx:restart',
|
||||
},
|
||||
},
|
||||
'/usr/local/share/icinga/plugins/check_nginx_status': {
|
||||
'mode': '0755',
|
||||
},
|
||||
}
|
||||
|
||||
actions = {
|
||||
|
|
|
@ -11,6 +11,18 @@ defaults = {
|
|||
'nginx': {},
|
||||
},
|
||||
},
|
||||
'icinga2_api': {
|
||||
'nginx': {
|
||||
'services': {
|
||||
'NGINX PROCESS': {
|
||||
'command_on_monitored_host': '/usr/local/share/icinga/plugins/check_systemd_unit nginx',
|
||||
},
|
||||
'NGINX STATUS': {
|
||||
'command_on_monitored_host': '/usr/local/share/icinga/plugins/check_nginx_status',
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
'nginx': {
|
||||
'worker_connections': 768,
|
||||
'use_ssl_for_all_connections': True,
|
||||
|
@ -70,3 +82,40 @@ def index_files(metadata):
|
|||
'vhosts': vhosts,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@metadata_reactor
|
||||
def monitoring(metadata):
|
||||
services = {}
|
||||
|
||||
for vname, vconfig in metadata.get('nginx/vhosts', {}).items():
|
||||
domain = vconfig.get('domain', vname)
|
||||
|
||||
if 'website_check_path' in vconfig and 'website_check_string' in vconfig:
|
||||
services['NGINX VHOST {} CONTENT'.format(vname)] = {
|
||||
'check_command': 'check_http_wget',
|
||||
'vars.http_wget_contains': vconfig['website_check_string'],
|
||||
'vars.http_wget_url': '{}{}'.format(domain, vconfig['website_check_path']),
|
||||
}
|
||||
|
||||
if vconfig.get('check_ssl', False):
|
||||
services['NGINX VHOST {} CERTIFICATE'.format(vname)] = {
|
||||
'check_command': 'check_vhost_https_cert_at_url',
|
||||
'vars.domain': domain,
|
||||
}
|
||||
|
||||
max_connections = metadata.get('nginx/worker_connections') * metadata.get('nginx/worker_processes')
|
||||
connections_warn = int(max_connections * 0.8)
|
||||
connections_crit = int(max_connections * 0.9)
|
||||
|
||||
services['NGINX STATUS'] = {
|
||||
'command_on_monitored_host': '/usr/local/share/icinga/plugins/check_nginx_status --warn={},-1,-1 --critical={},-1,-1 -H 127.0.0.1:22999'.format(connections_warn, connections_crit),
|
||||
}
|
||||
|
||||
return {
|
||||
'icinga2_api': {
|
||||
'nginx': {
|
||||
'services': services,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue