From: telecaster Date: Mon, 9 Dec 2024 15:10:52 +0000 (+0100) Subject: add nagios scripts (to copy in /home/sonde-nagios) X-Git-Url: https://git.parisson.com/?a=commitdiff_plain;h=4e53fc6c5adc55518ea85f966e2f99c71420eeec;p=telecaster-server.git add nagios scripts (to copy in /home/sonde-nagios) --- diff --git a/bin/monitoring/nagios/check_disk b/bin/monitoring/nagios/check_disk new file mode 100755 index 0000000..4a1fcb8 --- /dev/null +++ b/bin/monitoring/nagios/check_disk @@ -0,0 +1,61 @@ +#!/bin/sh + +PATH=/bin:/sbin:/usr/bin:/usr/sbin:/usr/local/bin:/usr/local/sbin + +# Arguments before traitement + +warninglevel=10% +criticallevel=5% +filetest=/ + +#Get Arguments + +while test "$#" -gt 0 ; do + if test "$1" = "-w"; then + shift + warninglevel=$1 + fi + if test "$1" = "-p"; then + shift + filetest=$1 + fi + if test "$1" = "-c"; then + shift + criticallevel=$1 + fi + shift +done + +# Get real values + +valueinfo=`df $filetest | tail -n +2 | awk '{ print $5 }'` +realspace=`df -h $filetest | tail -n +2 | awk '{ print $4 }'` + +# Delete the % before testing + +warninglevelreduced=`echo $warninglevel | tr -d %` +criticallevelreduced=`echo $criticallevel | tr -d %` +valuetest=`echo $valueinfo | tr -d %` +valuetest=$((100 - $valuetest)) + +# Test if we are in critical situation +msg="- free space: $filetest $realspace ($valuetest %)" + +if [ $valuetest -le $criticallevelreduced ] +then + echo DISK CRITICAL $msg + exit 2 +fi + +# Test if we are in warning situation + +if [ $valuetest -le $warninglevelreduced ] +then + echo DISK WARNING $msg + exit 1 +fi + +# All is fine +echo DISK OK $msg +exit 0 + diff --git a/bin/monitoring/nagios/check_domain.py b/bin/monitoring/nagios/check_domain.py new file mode 100755 index 0000000..bfe315b --- /dev/null +++ b/bin/monitoring/nagios/check_domain.py @@ -0,0 +1,55 @@ +#!/usr/bin/python3 +import sys, os, re, subprocess +from datetime import date as da + +def check_domain(dom=""): + OK_STATE = 0 + WARNING_STATE = 1 + CRITICAL_STATE = 2 + if not dom: + sys.exit(WARNING_STATE) + dom = re.sub('^[^.]*\.(?=[a-zA-Z0-9_\-]*\.\w)', '', dom) + try: + res = subprocess.check_output(["whois", dom], encoding='utf-8') + except subprocess.CalledProcessError as e: + # HACK : sometimes whois exits with 1 but still give info... + if e.returncode == 1: + res = e.output + else: + print("The domain {0} seems to be dead".format(dom)) + sys.exit(CRITICAL_STATE) + lst = ['Expiry', 'Expires', 'Expiration', 'paid-till'] + res = res.split('\n') + st = "" + + for item in res: + for word in lst: + if word in item and ':' in item: + val = item.split(':', 1)[1].strip() + if val: + st = val + + if not st: + print("The probe must be checked for domain {0}").format(dom) + sys.exit(WARNING_STATE) + + if "fr" in dom: # For .fr TLDs + st = st.split('/') + if len(st) > 1: + tmp = st[0] + st[0] = st[2] + st[2] = tmp + st = "/".join(st) + st = subprocess.check_output(["date", "-d", st, "+%Y/%m/%d"], encoding='utf-8') + st = st.split('/') + remaining = ((da(int(st[0]),int(st[1]),int(st[2]))-da.today())).days + print("There are {0} days remaining until expiration of {1}".format(remaining, dom)) + if remaining > 30: + sys.exit(OK_STATE) + elif remaining < 30 and remaining > 7: + sys.exit(WARNING_STATE) + elif remaining < 7: + sys.exit(CRITICAL_STATE) + +if __name__ == '__main__': + check_domain(sys.argv[1]) diff --git a/bin/monitoring/nagios/check_inode b/bin/monitoring/nagios/check_inode new file mode 100755 index 0000000..5fba60c --- /dev/null +++ b/bin/monitoring/nagios/check_inode @@ -0,0 +1,58 @@ +#!/bin/sh + +PATH=/bin:/sbin:/usr/bin:/usr/sbin:/usr/local/bin:/usr/local/sbin + +# Arguments before traitement + +warninglevel=80% +criticallevel=90% +filetest=/ + +#Get Arguments + +while test "$#" -gt 0 ; do + if test "$1" = "-w"; then + shift + warninglevel=$1 + fi + if test "$1" = "-p"; then + shift + filetest=$1 + fi + if test "$1" = "-c"; then + shift + criticallevel=$1 + fi + shift +done + +# Delete the % before testing + +valueinfo=`df -i $filetest | tail -n +2 | awk '{ print $5 }'` +valuetest=`df -i $filetest | tail -n +2 | awk '{ print $5 }'` + +warninglevelreduced=`echo $warninglevel | tr -d %` +criticallevelreduced=`echo $criticallevel | tr -d %` +valuetestreduced=`echo $valuetest | tr -d %` + +# Test if we are in critical situation + +if [ $criticallevelreduced -le $valuetestreduced ] + then echo INODE CRITICAL - Inode occupation = $valueinfo +exit 2 +fi + +# Test if we are in warning situation + +if [ $warninglevelreduced -le $valuetestreduced ] + then echo INODE WARNING - Inode occupation = $valueinfo +exit 1 +fi + +# Test if we are in normal time + +if [ $valuetestreduced -le $warninglevelreduced ] + then echo INODE OK - Inode occupation = $valueinfo +exit 0 +fi + diff --git a/bin/monitoring/nagios/check_load b/bin/monitoring/nagios/check_load new file mode 100755 index 0000000..1a03cf1 Binary files /dev/null and b/bin/monitoring/nagios/check_load differ diff --git a/bin/monitoring/nagios/check_md_raid b/bin/monitoring/nagios/check_md_raid new file mode 100755 index 0000000..6cab984 --- /dev/null +++ b/bin/monitoring/nagios/check_md_raid @@ -0,0 +1,263 @@ +#!/usr/bin/env python3 +# +# Copyright Hari Sekhon 2007 +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +# + +""" This plugin for Nagios uses the standard mdadm program to get the status + of all the linux md arrays on the local machine using the mdadm utility""" + +__version__ = "0.7.3" + +import os +import re +import sys +from optparse import OptionParser + +# Standard Nagios return codes +OK = 0 +WARNING = 1 +CRITICAL = 2 +UNKNOWN = 3 + +# Full path to the mdadm utility check on the Raid state +BIN = "/sbin/mdadm" +SUDO = "/usr/bin/sudo" + +if not os.path.isfile(BIN): + print("No raid, No Check") + sys.exit(0) + +def end(status, message): + """exits the plugin with first arg as the return code and the second + arg as the message to output""" + + if status == OK: + print("RAID OK: %s" % message) + sys.exit(OK) + elif status == WARNING: + print("RAID WARNING: %s" % message) + sys.exit(WARNING) + elif status == CRITICAL: + print("RAID CRITICAL: %s" % message) + sys.exit(CRITICAL) + else: + print("UNKNOWN: %s" % message) + sys.exit(UNKNOWN) + +if not os.path.exists(BIN): + end(UNKNOWN, "Raid utility '%s' cannot be found" % BIN) + +if not os.access(BIN, os.X_OK): + end(UNKNOWN, "Raid utility '%s' is not executable" % BIN) + + +def find_arrays(verbosity): + """finds all MD arrays on local machine using mdadm and returns a list of + them, or exits UNKNOWN if no MD arrays are found""" + + if verbosity >= 3: + print("finding all MD arrays via: %s --detail --scan" % BIN) + devices_output = os.popen("%s %s --detail --scan" %(SUDO, BIN)).readlines() + raid_devices = [] + for line in devices_output: + if "ARRAY" in line: + raid_device = line.split()[1] + if verbosity >= 2: + print("found array %s" % raid_device) + raid_devices.append(raid_device) + + if len(raid_devices) == 0: + end(OK, "no MD raid devices found on this machine") + else: + raid_devices.sort() + return raid_devices + + +def test_raid(verbosity): + """checks all MD arrays on local machine, returns status code""" + + raid_devices = find_arrays(verbosity) + + status = OK + message = "" + arrays_not_ok = 0 + number_arrays = len(raid_devices) + for array in raid_devices: + if verbosity >= 2: + print('Now testing raid device "%s"' % array) + + detailed_output = os.popen("%s %s --detail %s" % (SUDO, BIN, array) ).readlines() + + if verbosity >= 3: + for line in detailed_output: + print(line) + + state = "unknown" + for line in detailed_output: + if "State :" in line: + state = line.split(":")[-1].strip() + re_clean = re.compile('^clean(, no-errors)?$') + if not re_clean.match(state) and state != "active" and state != "active, recovering": + arrays_not_ok += 1 + raidlevel = detailed_output[3].split()[-1] + shortname = array.split("/")[-1].upper() + if state == "dirty": + # This happens when the array is under heavy usage but it's \ + # normal and the array recovers within seconds + continue + +# added by Yanga Yann + + elif "clean, recovering" in state: + extra_info = None + for line in detailed_output: + if "Rebuild Status" in line: + extra_info = line + message += 'Array "%s" is in state ' % shortname + if extra_info: + message += '"%s" (%s) - %s' \ + % (state, raidlevel, extra_info) + + else: + message += '"%s" (%s)' % (state, raidlevel) + message += ", " + + if status == OK: + status = OK + + + elif "clean, degraded, recovering" in state: + extra_info = None + for line in detailed_output: + if "Rebuild Status" in line: + extra_info = line + message += 'Array "%s" is in state ' % shortname + if extra_info: + message += '"%s" (%s) - %s' \ + % (state, raidlevel, extra_info) + + else: + message += '"%s" (%s)' % (state, raidlevel) + message += ", " + + if status == OK: + status = WARNING + + elif "clean, checking" in state: + extra_info = None + for line in detailed_output: + if "Rebuild Status" in line: + extra_info = line + message += 'Array "%s" is in state ' % shortname + if extra_info: + message += '"%s" (%s) - %s' \ + % (state, raidlevel, extra_info) + + else: + message += '"%s" (%s)' % (state, raidlevel) + message += ", " + + if status == OK: + status = WARNING + + elif "active, checking" in state: + extra_info = None + for line in detailed_output: + if "Rebuild Status" in line: + extra_info = line + message += 'Array "%s" is in state ' % shortname + if extra_info: + message += '"%s" (%s) - %s' \ + % (state, raidlevel, extra_info) + + else: + message += '"%s" (%s)' % (state, raidlevel) + message += ", " + + if status == OK: + status = WARNING + + + +# end + + elif state == "unknown": + message += 'State of Raid Array "%s" is unknown, ' % shortname + if state == OK: + status = UNKNOWN + else: + message += 'Array %s is in state "%s" (%s), ' \ + % (shortname, state, raidlevel) + status = CRITICAL + + message = message.rstrip(", ") + + if status == OK: + message += "All arrays OK" + else: + if arrays_not_ok == 1: + message = "1 array not ok - " + message + else: + message = "%s arrays not ok - " % arrays_not_ok + message + + if number_arrays == 1: + message += " [1 array checked]" + else: + message += " [%s arrays checked]" % number_arrays + + return status, message + + +def main(): + """parses args and calls func to test MD arrays""" + + parser = OptionParser() + + parser.add_option( "-v", + "--verbose", + action="count", + dest="verbosity", + default=0, + help="Verbose mode. Good for testing plugin. By default\ + only one result line is printed as per Nagios standards") + + parser.add_option( "-V", + "--version", + action="store_true", + dest="version", + help="Print version number and exit") + + (options, args) = parser.parse_args() + + if args: + parser.print_help() + sys.exit(UNKNOWN) + + verbosity = options.verbosity + version = options.version + + if version: + print(__version__) + sys.exit(OK) + + result, message = test_raid(verbosity) + + end(result, message) + + +if __name__ == "__main__": + main() diff --git a/bin/monitoring/nagios/check_mem.sh b/bin/monitoring/nagios/check_mem.sh new file mode 100755 index 0000000..e893ec9 --- /dev/null +++ b/bin/monitoring/nagios/check_mem.sh @@ -0,0 +1,38 @@ +#!/bin/bash + +if [ "$1" = "-w" ] && [ "$2" -gt "0" ] && [ "$3" = "-c" ] && [ "$4" -gt "0" ]; then + FreeM=`free -m |grep Mem` + memTotal_m=`echo "$FreeM" |awk '{print $2}'` + memUsed_m=`echo "$FreeM" |awk '{print $3}'` + memFree_m=`echo "$FreeM" |awk '{print $4}'` + if free -m | grep -q available + then + memBuffer_cache_m=`echo "$FreeM" |awk '{print $6}'` + memAvailable_m=`echo "$FreeM" |awk '{print $7}'` + else + memBuffer_cache_m=$(( `echo "$FreeM" |awk '{print $6}'` + `echo "$FreeM" |awk '{print $7}'` )) + memAvailable_m=$(($memFree_m+$memBuffer_cache_m)) + fi + + memUsed_m=$(($memTotal_m-$memAvailable_m)) + + memUsedPrc=`echo $((($memUsed_m*100)/$memTotal_m))||cut -d. -f1` + if [ "$memUsedPrc" -ge "$4" ]; then + echo "Memory: CRITICAL Total: $memTotal_m MB - Used: $memUsed_m MB - $memUsedPrc% used!|TOTAL=$memTotal_m;;;; USED=$memUsed_m;;;; BUFFER/CACHE=$memBuffer_cache_m;;;; AVAILABLE=$memAvailable_m;;;;" + exit 2 + elif [ "$memUsedPrc" -ge "$2" ]; then + echo "Memory: WARNING Total: $memTotal_m MB - Used: $memUsed_m MB - $memUsedPrc% used!|TOTAL=$memTotal_m;;;; USED=$memUsed_m;;;; BUFFER/CACHE=$memBuffer_cache_m;;;; AVAILABLE=$memAvailable_m;;;;" + exit 1 + else + echo "Memory: OK Total: $memTotal_m MB - Used: $memUsed_m MB - $memUsedPrc% used|TOTAL=$memTotal_m;;;; USED=$memUsed_m;;;; BUFFER/CACHE=$memBuffer_cache_m;;;; AVAILABLE=$memAvailable_m;;;;" + exit 0 + fi +else # If inputs are not as expected, print help. + sName="`echo $0|awk -F '/' '{print $NF}'`" + echo -e "\n\n\t\t### $sName Version 2.1###\n" + echo -e "# Usage:\t$sName -w -c " + echo -e "\t\t= warnlevel and critlevel is percentage value without %\n" + echo "# EXAMPLE:\t/usr/lib64/nagios/plugins/$sName -w 80 -c 90" + echo -e "\nCopyright (C) 2012 Lukasz Gogolin (lukasz.gogolin@gmail.com), improved by Nestor 2015\n\n" + exit +fi diff --git a/bin/monitoring/nagios/check_smartdisk b/bin/monitoring/nagios/check_smartdisk new file mode 100755 index 0000000..98f019c --- /dev/null +++ b/bin/monitoring/nagios/check_smartdisk @@ -0,0 +1,122 @@ +#!/bin/bash +# man smartctl for the return codes definitions +# By Gary GABRIEL. garyg@pilotsystems.net +# 2012-10-29 +#encoding: utf-8 + +STATUSOK="0" +WARNINGS="1" +CRITICAL="2" + +CRITFLAG="0" +WARNFLAG="0" + +SMARTCTL="/usr/sbin/smartctl" + +if [ ! -x "$SMARTCTL" ] +then + echo "$SMARTCTL not found or not executable. Please fix it." + exit $WARNINGS +fi + +# Maximum temperature in degrees celcius +HOSTNAME=$(hostname) +case $HOSTNAME in + *blue) + MAXDEG=60;; + *) + MAXDEG=40;; +esac +CURDEG=0 + +for DISK in `cat /proc/partitions |awk '{print $4}'|grep -E "sd[a-z]$"|sort -u` +do +# echo "Polling /dev/$DISK" + STATUS=`/usr/bin/sudo $SMARTCTL -a "/dev/$DISK" -s on` + RETVAL="$?" + + CMDLINE="Please run \"sudo $SMARTCTL -a /dev/$DISK\" on $HOSTNAME." + + # Parse output for SATA drives + CURDEG=`echo "$STATUS" | grep "194 Temperature_Celsius" | awk '{print $10}'` + if [ -z "$CURDEG" ] + then + # Parse output for SCSI drives + CURDEG=`echo "$STATUS" | grep "Current Drive Temperature" | awk '{print $4}'` + fi + + # Get sure the temperature is a figure + if ! [[ "$CURDEG" =~ ^[0-9]+$ ]] ; then + # Well, some disks aren't smart-capable, let's ignore them + continue + WARNLOGS="/dev/$DISK: Could not read current temperature; $WARNLOGS" + + WARNFLAG="1" + else + if [ "$CURDEG" -ge "$MAXDEG" ] + then + WARNLOGS="/dev/$DISK: Temperature is above $MAXDEG; $CRITLOGS" + WARNFLAG="1" + fi + fi + + # Parse return code with bit masking + for ((i=0;i<8;i++)) + do + let "BIT=$RETVAL & 2**$i" + case $BIT in + 1) WARNLOGS="/dev/$DISK: Command line did not parse. $CMDLINE; $WARNLOGS" + WARNFLAG="1" + ;; + + 2) WARNLOGS="/dev/$DISK: Device open failed, device did not return an IDENTIFY DEVICE structure, or device is in a low-power mode. $CMDLINE; $WARNLOGS" + WARNFLAG="1" + ;; + + # 4) WARNLOGS="/dev/$DISK: Some SMART or other ATA command to the disk failed, or there was a checksum error in a SMART data structure. $CMDLINE; $WARNLOGS" + # WARNFLAG="1" + # ;; + + 8) CRITLOGS="/dev/$DISK: SMART status check returned \"DISK FAILING\". $CMDLINE; $CRITLOGS" + CRITFLAG="1" + ;; + + 16) WARNLOGS="/dev/$DISK: We found prefail Attributes <= threshold. $CMDLINE; $WARNLOGS" + WARNFLAG="1" + ;; + + # 32) WARNLOGS="/dev/$DISK: SMART status check returned DISK OK but we found that some (usage or prefail) Attributes have been <= threshold at some time in the past. $CMDLINE; $WARNLOGS" + # WARNFLAG="1" + # ;; + + # 64) WARNLOGS="/dev/$DISK: The device error log contains records of errors. $CMDLINE; $CRITLOGS" + # WARNFLAG="1" + # ;; + + 128) WARNLOGS="/dev/$DISK: The device self-test log contains records of errors. [ATA only] Failed self-tests outdated by a newer successful extended self-test are ignored. $CMDLINE; $CRITLOGS" + WARNFLAG="1" + ;; + *) + ;; + esac + done +done + +# Critical has precedence over warning +if [ "$CRITFLAG" -eq "1" ] +then + echo "$CRITLOGS" + exit $CRITICAL +fi + +# If no critical alerts, show warnings +if [ "$WARNFLAG" -eq "1" ] +then + echo "$WARNLOGS" + exit $WARNINGS +fi + +# else, everything is fine +echo "Everything is OK" +exit $STATUSOK + diff --git a/bin/monitoring/nagios/check_ssl b/bin/monitoring/nagios/check_ssl new file mode 100755 index 0000000..786a0d5 --- /dev/null +++ b/bin/monitoring/nagios/check_ssl @@ -0,0 +1,24 @@ +#!/bin/sh + +set -e + +HOST="$1" +PORT="$2" +IP="$3" + +if [ -z "$IP" ] +then + IP="$HOST" +fi + +/usr/lib/nagios/plugins/check_http -H "$HOST" -S -C 28 -p "$PORT" --sni -I "$IP" + +ISSUER="$(/usr/bin/openssl s_client -showcerts -servername "$HOST" -connect "$IP":"$PORT" /dev/null | grep issuer)" + +if echo "$ISSUER" | grep -q GeoTrust +then + echo "Certificate signed by $ISSUER, will expire on July 20th" + exit 1 +fi + +echo "Issuer : $ISSUER" diff --git a/bin/monitoring/nagios/check_telecaster.py b/bin/monitoring/nagios/check_telecaster.py new file mode 100755 index 0000000..8da6ede --- /dev/null +++ b/bin/monitoring/nagios/check_telecaster.py @@ -0,0 +1,104 @@ +#!/usr/bin/python3 + +import os, sys, psutil, time + + +class TelecasterCheck: + """Nagios compatible Telecaster server check""" + + record_paths = ['/home/telecaster/trash', '/home/telecaster/monitor'] + formats = ['mp3', 'webm'] + daemons = [ + {'proc': 'pipewire', 'args': ''}, + {'proc': 'Xtigervnc', 'args': ''}, + {'proc': 'gst-launch-1.0', 'args': 'lamemp3enc'}, + {'proc': 'gst-launch-1.0', 'args': 'vp8enc'}, + {'proc': 'deefuzzer', 'args': 'mp3'}, + {'proc': 'deefuzzer', 'args': 'webm'}, + ] + + log_path = '/var/log/telecaster/' + + OK_STATE = 0 + WARNING_STATE = 1 + CRITICAL_STATE = 2 + TIME_SLEEP = 1 + + def __init__(self): + self.message = "" + self.is_up = True + self.is_writing = True + + def get_pid(self, name, args=None): + """Get a process pid filtered by arguments and uid""" + for proc in psutil.process_iter(): + if proc.cmdline(): + if name == proc.name(): + if args: + #print(proc.cmdline()) + if args in proc.cmdline()[1:] or args in ' '.join(proc.cmdline()[1:]) : + return proc.pid + else: + return proc.pid + return None + + def get_dir_size(self, path='.'): + """https://note.nkmk.me/en/python-os-path-getsize/""" + total = 0 + with os.scandir(path) as it: + for entry in it: + if entry.is_file(): + total += entry.stat().st_size + elif entry.is_dir(): + total += self.get_dir_size(entry.path) + return total + + def check_daemons(self): + for daemon in self.daemons: + if not self.get_pid(daemon['proc'], args=daemon['args']): + self.is_up = False + self.message += daemon['proc'] + " " + daemon['args'] + " is OFF" + " - " + else: + self.message += daemon['proc'] + " " + daemon['args'] + " is ON" + " - " + + def check_writing(self): + for record_path in self.record_paths: + if os.path.exists(record_path): + for format in self.formats: + size = self.get_dir_size(record_path + os.sep + format) + log = self.log_path + os.sep + format + '.log' + if not os.path.exists(log): + f = open(log, 'w') + f.write(str(size)) + f.close() + else: + f = open(log) + previous_size = f.read() + f.close() + if previous_size: + previous_size = int(previous_size) + if size == previous_size: + if self.is_up: + time.sleep(self.TIME_SLEEP) + self.is_writing = False + self.message += format + " writing is OFF - " + else: + self.is_writing = True + self.message += format + " writing is ON - " + f = open(log, 'w') + f.write(str(size)) + f.close() + + def run(self): + self.check_daemons() + self.check_writing() + print(self.message) + if not self.is_up or not self.is_writing: + sys.exit(self.CRITICAL_STATE) + else: + sys.exit(self.OK_STATE) + + +if __name__ == "__main__": + check = TelecasterCheck() + check.run()