1
|
#!/usr/bin/perl
|
2
|
|
3
|
# Mailcleaner - SMTP Antivirus/Antispam Gateway
|
4
|
# Copyright (C) 2020 John Mertz <git@john.me.tz>
|
5
|
#
|
6
|
# This program is free software; you can redistribute it and/or modify
|
7
|
# it under the terms of the GNU General Public License as published by
|
8
|
# the Free Software Foundation; either version 2 of the License, or
|
9
|
# (at your option) any later version.
|
10
|
#
|
11
|
# This program is distributed in the hope that it will be useful,
|
12
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
# GNU General Public License for more details.
|
15
|
#
|
16
|
# You should have received a copy of the GNU General Public License
|
17
|
# along with this program; if not, write to the Free Software
|
18
|
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
19
|
#
|
20
|
#
|
21
|
# This script will generate outbound relaying summaries to assist in the
|
22
|
# diagnosing of outbound blacklisting or other unexpected relaying behaviour
|
23
|
#
|
24
|
# See Usage mene below or by running with the --help option
|
25
|
|
26
|
use strict;
|
27
|
use warnings;
|
28
|
use Module::Load::Conditional qw| check_install |;
|
29
|
|
30
|
my $VAR = "/var/mailcleaner";
|
31
|
my $domains_list = "$VAR/spool/tmp/mailcleaner/domains.list";
|
32
|
|
33
|
sub usage {
|
34
|
print STDERR <<EOL;
|
35
|
|
36
|
usage: $0 [--detailed] [--ids] [--debug] [<days>] [<sender>]
|
37
|
|
38
|
Options can be given in any order, however the first numeric option will be
|
39
|
treated as the <days> value and the first non-numberic option (which is not
|
40
|
listed below) will be treated as the <sender> value.
|
41
|
|
42
|
Options:
|
43
|
|
44
|
--recipients Count sender/recipient pairs, instead of just unique senders.
|
45
|
|
46
|
--ids Include the IDs for each sender or sender/recipient pair.
|
47
|
|
48
|
--debug Print the input settings that were recognized and the sender
|
49
|
regex used to STDERR.
|
50
|
|
51
|
<days> Number of days to search. In reverse, prior to today.
|
52
|
'ALL' can be used for all available.
|
53
|
Warning will be printed to STDERR for a large number of days.
|
54
|
Default: 0 (just today)
|
55
|
|
56
|
<sender> Simple filter for a specific sender or domain.
|
57
|
If the string entered matches one of your domains exactly,
|
58
|
only that domain will be searched.
|
59
|
Any string with a '\@' will be searched for as a complete
|
60
|
email address. As a side-effect, external messages from a
|
61
|
specific sender could also be searched this way.
|
62
|
Any other string will be searched as a username at any of
|
63
|
your existing domains.
|
64
|
Default: all internal addresses.
|
65
|
EOL
|
66
|
exit 0;
|
67
|
}
|
68
|
|
69
|
my ($days, $recipients, $ids, $debug, $unlisted) = (0, 0, 0, 0, 1);
|
70
|
my $filter;
|
71
|
|
72
|
# Collect arguments. Print usage if an argument is not understood or
|
73
|
if (scalar @ARGV) {
|
74
|
foreach (@ARGV) {
|
75
|
if ($_ eq '-h' || $_ eq '--help' || $_ eq '?') {
|
76
|
usage();
|
77
|
} elsif ($_ eq '--debug' && !$debug) {
|
78
|
$debug = 1;
|
79
|
} elsif ($_ eq '--recipients' && !$recipients) {
|
80
|
$recipients = 1;
|
81
|
} elsif ($_ eq '--ids' && !$ids) {
|
82
|
$ids = 1;
|
83
|
} elsif ($_ =~ /^(\d+|ALL)$/) {
|
84
|
$days = $_;
|
85
|
} elsif (!defined($filter)) {
|
86
|
$filter = $_;
|
87
|
# Ignore unlisted domains when using a filter is in use
|
88
|
$unlisted = 0;
|
89
|
} else {
|
90
|
print STDERR "Argument $_ not understood, or redundant.\n";
|
91
|
usage();
|
92
|
}
|
93
|
}
|
94
|
}
|
95
|
|
96
|
# Get the number of log days available if ALL is selected
|
97
|
if ($days eq 'ALL') {
|
98
|
$days = 1;
|
99
|
while (-e "$VAR/log/exim_stage1/mainlog.$days.gz") {
|
100
|
$days++;
|
101
|
}
|
102
|
}
|
103
|
|
104
|
# Print warning for large number of days
|
105
|
if ($days gt 30) {
|
106
|
print STDERR "Searching " . ($days+1) . " days. This can take a long time. Use Ctrl+C to exit.\n";
|
107
|
}
|
108
|
|
109
|
# Parse sender argument, if given and define a regex
|
110
|
my $regex = '';
|
111
|
|
112
|
# If sender given and contains @, search for this as complete address
|
113
|
if (defined $filter && $filter =~ /@/) {
|
114
|
$regex = "<= $filter ";
|
115
|
# Otherwise, get the domains list
|
116
|
} else {
|
117
|
my @domains;
|
118
|
open(my $df, '<', $domains_list) || die "Unable to open domain list: $domains_list\n";
|
119
|
while (<$df>) {
|
120
|
push @domains, (split ':', $_)[0];
|
121
|
# If the sender argument matches this domain exactly, set that as the regex
|
122
|
if (defined($filter) && (split ':', $_)[0] eq $filter) {
|
123
|
$regex = "<= [^\ ]*\@$filter ";
|
124
|
last;
|
125
|
}
|
126
|
}
|
127
|
close $df;
|
128
|
unless (scalar @domains) {
|
129
|
die "Didn't find any internal domains in $domains_list\n";
|
130
|
}
|
131
|
# If regex already set, move on
|
132
|
unless ($regex) {
|
133
|
# If a sender is defined without a @, search as username at all domains.
|
134
|
if (defined $filter) {
|
135
|
$regex = "<= $filter@(" . join('|', @domains) . ") ";
|
136
|
# Otherwise search any user at all domains
|
137
|
} else {
|
138
|
$regex = "<= [^\ ]*@(" . join('|', @domains) . ") ";
|
139
|
}
|
140
|
}
|
141
|
}
|
142
|
|
143
|
# If requiring more than just today and yesterday, we need the gzip module
|
144
|
if ($days gt 1) {
|
145
|
check_install( 'module' => 'PerlIO::gzip', 'version' => 0.18) || die "Searching logs from previous days requires the library PerlIO::gzip. Please install with:\n apt-get install libperlio-gzip-perl\n\n";
|
146
|
}
|
147
|
|
148
|
# Output parsed arguments and regex if requested
|
149
|
if ($debug) {
|
150
|
print STDERR "\nSearch settings:\nrecipient: $recipients, ids: $ids, days: $days, sender: " . ($filter || 'NONE') . ", unlisted: " . $unlisted . ", regex: /$regex/\n\n";
|
151
|
}
|
152
|
|
153
|
# Messages relayed by IP have a dedicated log line mentioning this.
|
154
|
my %outstanding_senders;
|
155
|
|
156
|
# If unlisted domains are allowed to relay, our regex won't include them. Collect that first to catch any senders who might be from an unlisted domain.
|
157
|
my %unlisted;
|
158
|
|
159
|
# Senders are collected on a different line than recipient.
|
160
|
# It is possible for these to be non-sequential, so we need to track which sender and ID we're waiting to complete.
|
161
|
my %outstanding_ids;
|
162
|
|
163
|
# Go through all log files requested and collect all sender, recipient and id data
|
164
|
my %results;
|
165
|
for (my $i = 0; $i <= $days; $i++) {
|
166
|
my $fh;
|
167
|
if ($i eq 0) {
|
168
|
open $fh, '<', "$VAR/log/exim_stage1/mainlog" or die $!;
|
169
|
} elsif ($i eq 1) {
|
170
|
open $fh, '<', "$VAR/log/exim_stage1/mainlog.0" or die $!;
|
171
|
} else {
|
172
|
open $fh, '<:gzip', "$VAR/log/exim_stage1/mainlog.".($i-1).".gz" or die $!;
|
173
|
}
|
174
|
while (<$fh>) {
|
175
|
# Search for initial log line to for both listed and unlisted domains
|
176
|
if ($_ =~ /Accepting authorized relaying session from [^,]*, sender ([^\ ]*)/) {
|
177
|
# Create or increment key for this sender to indicate that we're expecting <= line for that sender
|
178
|
if ($unlisted) {
|
179
|
if (!defined $outstanding_senders{$1}) {
|
180
|
$outstanding_senders{$1} = 1;
|
181
|
} else {
|
182
|
$outstanding_senders{$1}++;
|
183
|
}
|
184
|
}
|
185
|
# Search for sender log line with specified sender regex
|
186
|
} elsif ($_ =~ /$regex/) {
|
187
|
my ($date, $time, $id, $direction, $sender) = split(' ',$_);
|
188
|
# Push the id to the outstanding_ids has with sender ast the value for faster searching on key
|
189
|
$outstanding_ids{$id} = $sender;
|
190
|
# Decrement outstanding messages from this sender; remove the hash key if 0
|
191
|
$outstanding_senders{$sender}--;
|
192
|
unless ($outstanding_senders{$sender}) {
|
193
|
delete $outstanding_senders{$sender};
|
194
|
}
|
195
|
} elsif ($_ =~ / <= / && scalar(keys %outstanding_senders)) {
|
196
|
my ($date, $time, $id, $direction, $sender) = split(' ',$_);
|
197
|
foreach my $outstanding_sender (keys %outstanding_senders) {
|
198
|
if ($outstanding_sender eq $sender) {
|
199
|
# Push the id to the outstanding_ids has with sender ast the value for faster searching on key
|
200
|
$outstanding_ids{$id} = '*'.$sender;
|
201
|
# Set flag to output additional warning if unlisted domains are found
|
202
|
$unlisted = 2;
|
203
|
# Decrement outstanding messages from this sender; remove the hash key if 0
|
204
|
$outstanding_senders{$sender}--;
|
205
|
unless ($outstanding_senders{$sender}) {
|
206
|
delete $outstanding_senders{$sender};
|
207
|
}
|
208
|
}
|
209
|
last;
|
210
|
}
|
211
|
# Search for recipient log line if there are outstanding_ids ids
|
212
|
} elsif ($_ =~ / => / && scalar(keys %outstanding_ids)) {
|
213
|
my ($date, $time, $id, $direction, $recipient) = split(' ',$_);
|
214
|
# See if this recipient ID matches one in our outstanding_ids list
|
215
|
foreach my $out_id (keys %outstanding_ids) {
|
216
|
if ($id eq $out_id) {
|
217
|
# If the ID array for the sender/recipient combo is not yet defined, open array
|
218
|
unless ($results{$outstanding_ids{$out_id}}{$recipient}) {
|
219
|
@{$results{$outstanding_ids{$out_id}}{$recipient}} = ();
|
220
|
}
|
221
|
push @{$results{$outstanding_ids{$out_id}}{$recipient}}, $id;
|
222
|
# Make sure to delete from outstanding_ids so that we don't look for that ID anymore
|
223
|
delete $outstanding_ids{$id};
|
224
|
last;
|
225
|
}
|
226
|
}
|
227
|
}
|
228
|
}
|
229
|
close $fh;
|
230
|
}
|
231
|
|
232
|
# Break hash into domains
|
233
|
my %domains;
|
234
|
foreach my $sender (keys %results) {
|
235
|
my ($user, $domain) = split('@',$sender);
|
236
|
unless ($domains{$domain}) {
|
237
|
@{$domains{$domain}} = ();
|
238
|
}
|
239
|
push @{$domains{$domain}}, $sender;
|
240
|
}
|
241
|
|
242
|
# Sort domains
|
243
|
my @sorted_domains = sort {lc($a) cmp lc($b)} (keys %domains);
|
244
|
|
245
|
# Get list of senders sorted by domain, then sender
|
246
|
my @sorted;
|
247
|
foreach my $domain (@sorted_domains) {
|
248
|
my @users = sort {lc($a) cmp lc($b)} (@{$domains{$domain}});
|
249
|
foreach (@users) {
|
250
|
push @sorted, $_;
|
251
|
}
|
252
|
}
|
253
|
|
254
|
# Create nicely formatted output based on provided options
|
255
|
my $output = '';
|
256
|
# Tally for all senders
|
257
|
my $all = 0;
|
258
|
foreach my $sender (@sorted) {
|
259
|
# Tally for all messages to specific sender
|
260
|
my $total = 0;
|
261
|
# Collect all ids if --ids, but not --recipients
|
262
|
my @ids = ();
|
263
|
# Start output for individual sender
|
264
|
my $sender_out = $sender;
|
265
|
foreach my $recipient (keys %{$results{$sender}}) {
|
266
|
# Add count for this recipient to total tally.
|
267
|
$total += scalar @{$results{$sender}{$recipient}};
|
268
|
# If recipients were requested, a new line of output is required for each
|
269
|
if ($recipients) {
|
270
|
# Additionally IDs for each sender/recipient pair, if requested
|
271
|
if ($ids) {
|
272
|
$sender_out .= "\n " . $recipient . " (" . scalar(@{$results{$sender}{$recipient}}) . "): " . join(', ',@{$results{$sender}{$recipient}});
|
273
|
} else {
|
274
|
$sender_out .= "\n " . $recipient . " (" . scalar(@{$results{$sender}{$recipient}}) . ")";
|
275
|
}
|
276
|
# Otherwise, all recipients are tallied on the same line. If IDs or requested, we need to track them.
|
277
|
} elsif ($ids) {
|
278
|
foreach (@{$results{$sender}{$recipient}}) {
|
279
|
push @ids, $_;
|
280
|
}
|
281
|
}
|
282
|
}
|
283
|
# Add to overall tally
|
284
|
$all += $total;
|
285
|
# If --recipients then sender output is already generated, otherwise we need to do so
|
286
|
if (!$recipients) {
|
287
|
$sender_out .= ' (' . $total . ')';
|
288
|
# Additionally add IDs, if requested
|
289
|
if ($ids) {
|
290
|
$sender_out .= ': ' . join(', ', @ids);
|
291
|
}
|
292
|
}
|
293
|
# Append this sender to the complete output
|
294
|
$output .= $sender_out . "\n";
|
295
|
}
|
296
|
|
297
|
# Append overall count to the end
|
298
|
$output .= "Total: $all\n";
|
299
|
|
300
|
# Print results or print warning to STDERR
|
301
|
if ($output eq '') {
|
302
|
print STDERR "No results found using this criteria.\n";
|
303
|
exit 0;
|
304
|
} else {
|
305
|
print $output;
|
306
|
# If an unlisted address is found, this flag will be set
|
307
|
if ($unlisted == 2) {
|
308
|
print STDERR "Note: addresses beginning with '*' are from unlisted domains\n";
|
309
|
}
|
310
|
exit 1;
|
311
|
}
|