check_mailcleaner.sh

MailCleaner Support, 2018-06-04 17:20

Download (13 KB)

 
1
#!/bin/bash
2
#set -x
3

    
4
# Checks the MailCleaner overall status using SNMP values.
5
# 
6
# NB: don't forget to allow the monitoring poller IP in the MailCleaner configuration
7
#     ( Configuration => Services => SNMP monitoring )
8
# 
9
# 
10
# by fabricat
11
# 
12

    
13
: <<'SNMP-DOCUMENTATION'
14
Source: http://www.mailcleaner.org/doku.php/documentation:snmp_monitoring
15

    
16

    
17
Here are the few more traps provided by MailCleaner:
18

    
19
  extOutput.1 (1.3.6.1.4.1.2021.8.1.101.1): number of filtered messages (integer)
20

    
21
  extOutput.2 (1.3.6.1.4.1.2021.8.1.101.2): number of spams detected (integer)
22

    
23
  extOutput.3 (1.3.6.1.4.1.2021.8.1.101.3): number of bytes filtered (integer)
24

    
25
  extOutput.4 (1.3.6.1.4.1.2021.8.1.101.4): number of viruses detected (integer)
26

    
27
  extOutput.5 (1.3.6.1.4.1.2021.8.1.101.5): processes status (boolean list e.g: |1|1|1|1|1|1|1|1).
28
    Definition and order of processes (0 = down, 1= running):
29
        incoming MTA (critical)
30
        queuing MTA (critical)
31
        outgoing MTA (critical)
32
        Web GUI (not critical)
33
        antispam/antivirus process/filtering engine (critical)
34
        master database (not critical)
35
        slave database (critical)
36
        firewall (not critical)
37

    
38
  extOutput.6 (1.3.6.1.4.1.2021.8.1.101.6): spools status, number of messages in queues (integer list, e.g.:|190|4|26)
39
    Definition and order of spools:
40
        incoming : incoming MTA. Messages can be stored here on massive attacks, or when the MailCleaner is used as an outgoing relay for your network.
41
        filtering: main engine spool. Messages are stored here when processed by the engine. Less than 300 messages is normal because messages are NOT deleted here until process if completly finished). More messages can be an indication that your system is getting a little bit busy at the time.
42
        outgoing: outgoing MTA. MEssages are stored here when they cannot be delivered immediatly (temporary failure of destination host)
43

    
44
  extOutput.7 (1.3.6.1.4.1.2021.8.1.101.7): system load (float list, e.g. |5.29|3.79|3.55)
45
    Definition and order of loads:
46
         5 minutes:  5 last minutes average
47
        10 minutes: 10 last minutes average
48
        15 minutes: 15 last minutes average dernieres minutes
49

    
50
  extOutput.8 (1.3.6.1.4.1.2021.8.1.101.8): disk partitions usage (list of string, e.g. |/|32%|/var|35%)
51

    
52
  extOutput.9 (1.3.6.1.4.1.2021.8.1.101.9): system memory usage (integer list, e.g. |2068628|177144|1951888|1936572)
53
    Definition and order of usages:
54
        total physical memory
55
        free physical memory
56
        total swap memory
57
        free swap memory
58

    
59
  extOutput.10 (1.3.6.1.4.1.2021.8.1.101.10): all daily counts (integer list)
60
    Definition and order of counts:
61
    $total_bytes|$total_msg|$total_spam|$percentspam|$total_virus|$percentvirus|$total_content|$percentcontent|$total_clean|$percentclean
62
        number of bytes filtered
63
        number of messages filtered
64
        number of spams detected
65
        spam percentage
66
        number of viruses detected
67
        viruses percentage
68
        number of dangerous content detected
69
        dangerous content percentage
70
        number of clean messages
71
        clean messages percentages
72

    
73
SNMP-DOCUMENTATION
74

    
75
# Default values
76
COMMUNITY="mailcleaner"
77
MC_HOST="127.0.0.1"
78
VERBOSE="0"
79

    
80
SNMPWALK="/usr/bin/snmpwalk"
81

    
82

    
83
# Default thresholds
84
MSG_SPAM_CRIT=70
85
MSG_SPAM_WARN=50
86

    
87
MSG_VIRUS_CRIT=30
88
MSG_VIRUS_WARN=15
89

    
90
MSG_QUEUE_CRIT=100
91
MSG_QUEUE_WARN=50
92

    
93
LOAD_CRIT=10
94
LOAD_WARN=5
95

    
96
MEM_CRIT=90
97
MEM_WARN=75
98

    
99
SWAP_CRIT=80
100
SWAP_WARN=50
101

    
102
DISK_CRIT=90
103
DISK_WARN=80
104

    
105

    
106

    
107

    
108
USAGE=" Usage: $0 [options...]
109

    
110
 Options:
111
    -H <string>    MailCleaner host or IP              (default: ${MC_HOST})
112
    -C <string>    SNMP read community                 (default: ${COMMUNITY})
113
    -v             Verbose output                      
114
    -V             Very verbose output                 
115
    -h             Print this help and exit            
116
    -w <int>       Spam warning percentage             (default: ${MSG_SPAM_WARN})
117
    -c <int>       Spam error percentage               (default: ${MSG_SPAM_CRIT})
118
    -r <int>       Virus warning percentage            (default: ${MSG_VIRUS_WARN})
119
    -R <int>       Virus error percentage              (default: ${MSG_VIRUS_CRIT})
120
    -q <int>       Mail queues warning level           (default: ${MSG_QUEUE_WARN})
121
    -Q <int>       Mail queues error level             (default: ${MSG_QUEUE_CRIT})
122
    -l <int>       System load warning level           (default: ${LOAD_WARN})
123
    -L <int>       System load error level             (default: ${LOAD_CRIT})
124
    -m <int>       Memory load warning percentage      (default: ${MEM_WARN})
125
    -M <int>       Memory load error percentage        (default: ${MEM_CRIT})
126
    -s <int>       Swap load warning percentage        (default: ${SWAP_WARN})
127
    -S <int>       Swap load error percentage          (default: ${SWAP_CRIT})
128
    -d <int>       Partitions usage warning percentage (default: ${DISK_WARN})
129
    -D <int>       Partitions usage error percentage   (default: ${DISK_CRIT})
130
"
131
# Getting parameters:
132
while getopts "H:C:vVhw:c:r:R:q:Q:l:L:m:M:s:S:d:D:" OPT
133
do
134
	case $OPT in
135
		"H") MC_HOST=$OPTARG;;
136
		"C") COMMUNITY=$OPTARG;;
137
		"v") if [ "$VERBOSE" -lt "1" ]; then VERBOSE="1"; fi;;
138
		"V") VERBOSE="2";;
139
		"h") echo "$USAGE" && exit 3;;
140
		"w") MSG_SPAM_WARN=$OPTARG;;
141
		"c") MSG_SPAM_CRIT=$OPTARG;;
142
		"r") MSG_VIRUS_WARN=$OPTARG;;
143
		"R") MSG_VIRUS_CRIT=$OPTARG;;
144
		"q") MSG_QUEUE_WARN=$OPTARG;;
145
		"Q") MSG_QUEUE_CRIT=$OPTARG;;
146
		"l") LOAD_WARN=$OPTARG;;
147
		"L") LOAD_CRIT=$OPTARG;;
148
		"m") MEM_WARN=$OPTARG;;
149
		"M") MEM_CRIT=$OPTARG;;
150
		"s") SWAP_WARN=$OPTARG;;
151
		"S") SWAP_CRIT=$OPTARG;;
152
		"d") DISK_WARN=$OPTARG;;
153
		"D") DISK_CRIT=$OPTARG;;
154
	esac
155
done
156

    
157

    
158
# Other variables
159
STATE_OK=0
160
STATE_WARNING=1
161
STATE_CRITICAL=2
162
STATE_UNKNOWN=3
163

    
164
ISSUECRIT=""
165
ISSUEWARN=""
166
ISSUEOK=""
167
STATS=""
168

    
169
SEPARATOR=" - "
170

    
171

    
172

    
173

    
174

    
175
# Get data from SNMP queries
176
DAILY_COUNTS=$(${SNMPWALK} -v2c -c ${COMMUNITY} -O qv ${MC_HOST} 1.3.6.1.4.1.2021.8.1.101.10 2>&1)
177
if [ $? -ne 0 ]
178
then
179
	echo "CRITICAL: $DAILY_COUNTS"
180
	exit $STATE_CRITICAL
181
fi
182

    
183
MSG_TOTAL=$(   ${SNMPWALK} -v2c -c ${COMMUNITY} -O qv ${MC_HOST} 1.3.6.1.4.1.2021.8.1.101.1)
184
MSG_SPAM=$(    ${SNMPWALK} -v2c -c ${COMMUNITY} -O qv ${MC_HOST} 1.3.6.1.4.1.2021.8.1.101.2)
185
MSG_BYTES=$(   ${SNMPWALK} -v2c -c ${COMMUNITY} -O qv ${MC_HOST} 1.3.6.1.4.1.2021.8.1.101.3)
186
MSG_VIRUS=$(   ${SNMPWALK} -v2c -c ${COMMUNITY} -O qv ${MC_HOST} 1.3.6.1.4.1.2021.8.1.101.4)
187

    
188
PROCS_STATUS=$(${SNMPWALK} -v2c -c ${COMMUNITY} -O qv ${MC_HOST} 1.3.6.1.4.1.2021.8.1.101.5)
189
SPOOL_STATUS=$(${SNMPWALK} -v2c -c ${COMMUNITY} -O qv ${MC_HOST} 1.3.6.1.4.1.2021.8.1.101.6)
190

    
191
LOAD_STATUS=$( ${SNMPWALK} -v2c -c ${COMMUNITY} -O qv ${MC_HOST} 1.3.6.1.4.1.2021.8.1.101.7)
192
PART_STATUS=$( ${SNMPWALK} -v2c -c ${COMMUNITY} -O qv ${MC_HOST} 1.3.6.1.4.1.2021.8.1.101.8)
193
MEM_STATUS=$(  ${SNMPWALK} -v2c -c ${COMMUNITY} -O qv ${MC_HOST} 1.3.6.1.4.1.2021.8.1.101.9)
194

    
195

    
196

    
197
# Process some stats
198
STATS="${STATS} msg_tot=${MSG_TOTAL} msg_spam=${MSG_SPAM} msg_virus=${MSG_VIRUS}"
199

    
200
### Process data
201

    
202
# Queue status
203
incoming=$(echo ${SPOOL_STATUS} | cut -d'|' -f 2)
204
filtered=$(echo ${SPOOL_STATUS} | cut -d'|' -f 3)
205
outgoing=$(echo ${SPOOL_STATUS} | cut -d'|' -f 4)
206
STATS="${STATS} queue_in=${incoming} queue_filter=${filtered} queue_out=${outgoing}"
207

    
208
MSG="Queue count: $incoming incoming, $filtered filtered, $outgoing outgoing"
209
if [ $incoming -ge $MSG_QUEUE_CRIT -o $filtered -ge $MSG_QUEUE_CRIT -o $outgoing -ge $MSG_QUEUE_CRIT ]
210
then
211
	ISSUECRIT="${ISSUECRIT}${MSG}${SEPARATOR}"
212
elif [ $incoming -ge $MSG_QUEUE_WARN -o $filtered -ge $MSG_QUEUE_WARN -o $outgoing -ge $MSG_QUEUE_WARN ]
213
then
214
	ISSUEWARN="${ISSUEWARN}${MSG}${SEPARATOR}"
215
else
216
	ISSUEOK="${ISSUEOK}${MSG}\n"
217
fi
218

    
219

    
220
# Procs status
221
mta_in=$(     echo ${PROCS_STATUS} | cut -d'|' -f 2)
222
mta_queue=$(  echo ${PROCS_STATUS} | cut -d'|' -f 3)
223
mta_out=$(    echo ${PROCS_STATUS} | cut -d'|' -f 4)
224
web_gui=$(    echo ${PROCS_STATUS} | cut -d'|' -f 5)
225
filt_engine=$(echo ${PROCS_STATUS} | cut -d'|' -f 6)
226
master_db=$(  echo ${PROCS_STATUS} | cut -d'|' -f 7)
227
slave_db=$(   echo ${PROCS_STATUS} | cut -d'|' -f 8)
228
firewall=$(   echo ${PROCS_STATUS} | cut -d'|' -f 9)
229

    
230
if [ "$mta_in" == "1" ]
231
then
232
	ISSUEOK="${ISSUEOK}Incoming MTA: running\n"
233
else
234
	ISSUECRIT="${ISSUECRIT}Incoming MTA down${SEPARATOR}"
235
fi
236

    
237
if [ "$mta_queue" == "1" ]
238
then
239
	ISSUEOK="${ISSUEOK}Queuing MTA: running\n"
240
else
241
	ISSUECRIT="${ISSUECRIT}Queuing MTA down${SEPARATOR}"
242
fi
243

    
244
if [ "$mta_out" == "1" ]
245
then
246
	ISSUEOK="${ISSUEOK}Outgoing MTA: running\n"
247
else
248
	ISSUECRIT="${ISSUECRIT}Outgoing MTA down${SEPARATOR}"
249
fi
250

    
251
if [ "$web_gui" == "1" ]
252
then
253
	ISSUEOK="${ISSUEOK}Web GUI: running\n"
254
else
255
	ISSUEWARN="${ISSUEWARN}Web GUI down${SEPARATOR}"
256
fi
257

    
258
if [ "$filt_engine" == "1" ]
259
then
260
	ISSUEOK="${ISSUEOK}Antispam/antivirus process/filtering engine: running\n"
261
else
262
	ISSUECRIT="${ISSUECRIT}Antispam/antivirus process/filtering engine down${SEPARATOR}"
263
fi
264

    
265
if [ "$master_db" == "1" ]
266
then
267
	ISSUEOK="${ISSUEOK}Master DB: running\n"
268
else
269
	ISSUEWARN="${ISSUEWARN}Master DB down${SEPARATOR}"
270
fi
271

    
272
if [ "$slave_db" == "1" ]
273
then
274
	ISSUEOK="${ISSUEOK}Slave DB: running\n"
275
else
276
	ISSUECRIT="${ISSUECRIT}Slave DB down${SEPARATOR}"
277
fi
278

    
279
if [ "$firewall" == "1" ]
280
then
281
	ISSUEOK="${ISSUEOK}Firewall: running\n"
282
else
283
	ISSUEWARN="${ISSUEWARN}Firewall down${SEPARATOR}"
284
fi
285

    
286

    
287
# Load status
288
load05=$(echo ${LOAD_STATUS} | cut -d'|' -f 2)
289
load10=$(echo ${LOAD_STATUS} | cut -d'|' -f 3)
290
load15=$(echo ${LOAD_STATUS} | cut -d'|' -f 4)
291
STATS="${STATS} load5=${load05} load10=${load10} load15=${load15}"
292

    
293
MSG="System load: $load05/$load10/$load15"
294
load05=${load05/.*}
295
load10=${load10/.*}
296
load15=${load15/.*}
297
if [ "$load05" -ge "$LOAD_CRIT" -o "$load10" -ge "$LOAD_CRIT" -o "$load15" -ge "$LOAD_CRIT" ]
298
then
299
	ISSUECRIT="${ISSUECRIT}${MSG}${SEPARATOR}"
300
elif [ "$load05" -ge "$LOAD_WARN" -o "$load10" -ge "$LOAD_WARN" -o "$load15" -ge "$LOAD_WARN" ]
301
then
302
	ISSUEWARN="${ISSUEWARN}${MSG}${SEPARATOR}"
303
else
304
	ISSUEOK="${ISSUEOK}${MSG}\n"
305
fi
306

    
307

    
308
# Memory status
309
ram_tot=$(  echo ${MEM_STATUS} | cut -d'|' -f 2)
310
ram_free=$( echo ${MEM_STATUS} | cut -d'|' -f 3)
311
swap_tot=$( echo ${MEM_STATUS} | cut -d'|' -f 4)
312
swap_free=$(echo ${MEM_STATUS} | cut -d'|' -f 5)
313

    
314
ram_perc=$(expr  100 - \( $ram_free  \* 100 / $ram_tot  \) )
315
swap_perc=$(expr 100 - \( $swap_free \* 100 / $swap_tot \) )
316
STATS="${STATS} ram=${ram_perc}% swap=${swap_perc}%"
317

    
318
MSG="Memory load: ${ram_perc}%"
319
if [ "$ram_perc" -ge "$MEM_CRIT" ]
320
then
321
	ISSUECRIT="${ISSUECRIT}${MSG}${SEPARATOR}"
322
elif [ "$ram_perc" -ge "$MEM_WARN" ]
323
then
324
	ISSUEWARN="${ISSUEWARN}${MSG}${SEPARATOR}"
325
else
326
	ISSUEOK="${ISSUEOK}${MSG}\n"
327
fi
328

    
329
MSG="Swap load: ${swap_perc}%"
330
if [ "$swap_perc" -ge "$SWAP_CRIT" ]
331
then
332
	ISSUECRIT="${ISSUECRIT}${MSG}${SEPARATOR}"
333
elif [ "$swap_perc" -ge "$SWAP_WARN" ]
334
then
335
	ISSUEWARN="${ISSUEWARN}${MSG}${SEPARATOR}"
336
else
337
	ISSUEOK="${ISSUEOK}${MSG}\n"
338
fi
339

    
340

    
341
# Disk partitions status
342
i="2"
343
part_name=$(echo ${PART_STATUS} | cut -d'|' -f $i)
344
while [ "$part_name" != "" ]
345
do
346
	i=$(( $i + 1 ))
347
	part_perc=$(echo ${PART_STATUS} | cut -d'|' -f $i)
348
	STATS="${STATS} ${part_name}=${part_perc}"
349

    
350
	MSG="Disk ${part_name}: ${part_perc}"
351
	if [ "${part_perc%\%}" -ge "$DISK_CRIT" ]
352
	then
353
		ISSUECRIT="${ISSUECRIT}${MSG}${SEPARATOR}"
354
	elif [ "${part_perc%\%}" -ge "$DISK_WARN" ]
355
	then
356
		ISSUEWARN="${ISSUEWARN}${MSG}${SEPARATOR}"
357
	else
358
		ISSUEOK="${ISSUEOK}${MSG}\n"
359
	fi
360

    
361
	i=$(( $i + 1 ))
362
	part_name=$(echo ${PART_STATUS} | cut -d'|' -f $i)
363
done
364

    
365

    
366
# Spam / malicious percentage status
367
spam_perc=$( echo ${DAILY_COUNTS} | cut -d'|' -f 4)
368
virus_perc=$(echo ${DAILY_COUNTS} | cut -d'|' -f 6)
369
clean_perc=$(echo ${DAILY_COUNTS} | cut -d'|' -f 10)
370
STATS="${STATS} spam=${spam_perc}% virus=${virus_perc}% clean=${clean_perc}%"
371

    
372
MSG="Spam load: ${spam_perc}"
373
if [ "${spam_perc/.*}" -ge "$MSG_SPAM_CRIT" ]
374
then
375
	ISSUECRIT="${ISSUECRIT}${MSG}${SEPARATOR}"
376
elif [ "${spam_perc/.*}" -ge "$MSG_SPAM_WARN" ]
377
then
378
	ISSUEWARN="${ISSUEWARN}${MSG}${SEPARATOR}"
379
else
380
	ISSUEOK="${ISSUEOK}${MSG}\n"
381
fi
382

    
383
MSG="Virus load: ${virus_perc}"
384
if [ "${virus_perc/.*}" -ge "$MSG_VIRUS_CRIT" ]
385
then
386
	ISSUECRIT="${ISSUECRIT}${MSG}${SEPARATOR}"
387
elif [ "${virus_perc/.*}" -ge "$MSG_VIRUS_WARN" ]
388
then
389
	ISSUEWARN="${ISSUEWARN}${MSG}${SEPARATOR}"
390
else
391
	ISSUEOK="${ISSUEOK}${MSG}\n"
392
fi
393

    
394

    
395

    
396

    
397
# Prepare output values
398
RETSTATE=$STATE_OK
399
if [ -n "$ISSUECRIT" ]
400
then
401
	echo -n "CRITICAL: $ISSUECRIT"
402
	RETSTATE=$STATE_CRITICAL
403
fi
404
if [ -n "$ISSUEWARN" ]
405
then
406
	echo -n "WARNING: $ISSUEWARN"
407
	if [ "$RETSTATE" -lt "$STATE_WARNING" ]
408
	then
409
		RETSTATE=$STATE_WARNING
410
	fi
411
fi
412
if [ $RETSTATE -eq $STATE_OK ]
413
then
414
	echo -n "OK"
415
fi
416

    
417
echo " |$STATS"
418

    
419
if [ -n "$ISSUEOK" -a "${VERBOSE}" -ge "1" ]
420
then
421
	echo -e "\n$ISSUEOK"
422
fi
423

    
424
if [ "${VERBOSE}" -ge "2" ]
425
then
426
	echo "Raw SNMP values:
427
 1. number of filtered messages = ${MSG_TOTAL}
428
 2. number of spams detected = ${MSG_SPAM}
429
 3. number of bytes filtered = ${MSG_BYTES}
430
 4. number of viruses detected = ${MSG_VIRUS}
431
 5. processes status = ${PROCS_STATUS//\|/#}
432
 6. spools status (messages in incoming#filtering#outgoing queues) = ${SPOOL_STATUS//\|/#}
433
 7. system load (last 5#10#15minutes) = ${LOAD_STATUS//\|/#}
434
 8. disk partitions usage = ${PART_STATUS//\|/#}
435
 9. system memory usage in kB (tot_ram#free_ram#tot_swap#free_swap) = ${MEM_STATUS//\|/#}
436
10. all daily counts (bytes#msg#spam#%spam#virus#%virus#content#%content#clean#%clean) = ${DAILY_COUNTS//\|/#}"
437

    
438
fi
439

    
440
exit $RETSTATE