relay_summary.pl

MailCleaner Support, 2020-06-08 22:39

Download (11.8 KB)

 
1
#!/usr/bin/perl
2

    
3
#   Mailcleaner - SMTP Antivirus/Antispam Gateway
4
#   Copyright (C) 2020 John Mertz <git@john.me.tz>
5
#
6
#   This program is free software; you can redistribute it and/or modify
7
#   it under the terms of the GNU General Public License as published by
8
#   the Free Software Foundation; either version 2 of the License, or
9
#   (at your option) any later version.
10
#
11
#   This program is distributed in the hope that it will be useful,
12
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
13
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
#   GNU General Public License for more details.
15
#
16
#   You should have received a copy of the GNU General Public License
17
#   along with this program; if not, write to the Free Software
18
#   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19
#
20
#
21
#   This script will generate outbound relaying summaries to assist in the
22
#   diagnosing of outbound blacklisting or other unexpected relaying behaviour
23
#
24
#   See Usage mene below or by running with the --help option
25

    
26
use strict;
27
use warnings;
28
use Module::Load::Conditional qw| check_install |;
29

    
30
my $VAR = "/var/mailcleaner";
31
my $domains_list = "$VAR/spool/tmp/mailcleaner/domains.list";
32

    
33
sub usage {
34
    print STDERR <<EOL;
35

    
36
usage: $0 [--detailed] [--ids] [--debug] [<days>] [<sender>]
37

    
38
Options can be given in any order, however the first numeric option will be
39
treated as the <days> value and the first non-numberic option (which is not
40
listed below) will be treated as the <sender> value.
41

    
42
Options:
43

    
44
--recipients    Count sender/recipient pairs, instead of just unique senders.
45

    
46
--ids           Include the IDs for each sender or sender/recipient pair.
47

    
48
--debug         Print the input settings that were recognized and the sender
49
                regex used to STDERR.
50

    
51
<days>          Number of days to search. In reverse, prior to today.
52
                'ALL' can be used for all available.
53
                Warning will be printed to STDERR for a large number of days.
54
                Default: 0 (just today)
55

    
56
<sender>        Simple filter for a specific sender or domain.
57
                If the string entered matches one of your domains exactly,
58
                only that domain will be searched.
59
                Any string with a '\@' will be searched for as a complete
60
                email address. As a side-effect, external messages from a
61
                specific sender could also be searched this way.
62
                Any other string will be searched as a username at any of
63
                your existing domains.
64
                Default: all internal addresses.
65
EOL
66
    exit 0;
67
}
68

    
69
my ($days, $recipients, $ids, $debug, $unlisted) = (0, 0, 0, 0, 1);
70
my $filter;
71

    
72
# Collect arguments. Print usage if an argument is not understood or
73
if (scalar @ARGV) {
74
    foreach (@ARGV) {
75
        if ($_ eq '-h' || $_ eq '--help' || $_ eq '?') {
76
            usage();
77
        } elsif ($_ eq '--debug' && !$debug) {
78
            $debug = 1;
79
        } elsif ($_ eq '--recipients' && !$recipients) {
80
            $recipients = 1;
81
        } elsif ($_ eq '--ids' && !$ids) {
82
            $ids = 1;
83
        } elsif ($_ =~ /^(\d+|ALL)$/) {
84
            $days = $_;
85
        } elsif (!defined($filter)) {
86
            $filter = $_;
87
            # Ignore unlisted domains when using a filter is in use
88
            $unlisted = 0;
89
        } else {
90
            print STDERR "Argument $_ not understood, or redundant.\n";
91
            usage();
92
        }
93
    }
94
}
95

    
96
# Get the number of log days available if ALL is selected
97
if ($days eq 'ALL') {
98
    $days = 1;
99
    while (-e "$VAR/log/exim_stage1/mainlog.$days.gz") {
100
        $days++;
101
    }
102
}
103

    
104
# Print warning for large number of days
105
if ($days gt 30) {
106
    print STDERR "Searching " . ($days+1) . " days. This can take a long time. Use Ctrl+C to exit.\n";
107
}
108

    
109
# Parse sender argument, if given and define a regex
110
my $regex = '';
111

    
112
# If sender given and contains @, search for this as complete address
113
if (defined $filter && $filter =~ /@/) {
114
    $regex = "<= $filter ";
115
# Otherwise, get the domains list
116
} else {
117
    my @domains;
118
    open(my $df, '<', $domains_list) || die "Unable to open domain list: $domains_list\n";
119
    while (<$df>) {
120
        push @domains, (split ':', $_)[0];
121
        # If the sender argument matches this domain exactly, set that as the regex
122
        if (defined($filter) && (split ':', $_)[0] eq $filter) {
123
            $regex = "<= [^\ ]*\@$filter ";
124
            last;
125
        }
126
    }
127
    close $df;
128
    unless (scalar @domains) {
129
        die "Didn't find any internal domains in $domains_list\n";
130
    }
131
    # If regex already set, move on
132
    unless ($regex) {
133
        # If a sender is defined without a @, search as username at all domains.
134
        if (defined $filter) {
135
            $regex = "<= $filter@(" . join('|', @domains) . ") ";
136
        # Otherwise search any user at all domains
137
        } else {
138
            $regex = "<= [^\ ]*@(" . join('|', @domains) . ") ";
139
        }
140
    }
141
}
142

    
143
# If requiring more than just today and yesterday, we need the gzip module
144
if ($days gt 1) {
145
    check_install( 'module' => 'PerlIO::gzip', 'version' => 0.18) || die "Searching logs from previous days requires the library PerlIO::gzip. Please install with:\n  apt-get install libperlio-gzip-perl\n\n";
146
}
147

    
148
# Output parsed arguments and regex if requested
149
if ($debug) {
150
    print STDERR "\nSearch settings:\nrecipient: $recipients, ids: $ids, days: $days, sender: " . ($filter || 'NONE') . ", unlisted: " . $unlisted . ", regex: /$regex/\n\n";
151
}
152

    
153
# Messages relayed by IP have a dedicated log line mentioning this.
154
my %outstanding_senders;
155

    
156
# If unlisted domains are allowed to relay, our regex won't include them. Collect that first to catch any senders who might be from an unlisted domain.
157
my %unlisted;
158

    
159
# Senders are collected on a different line than recipient.
160
# It is possible for these to be non-sequential, so we need to track which sender and ID we're waiting to complete.
161
my %outstanding_ids;
162

    
163
# Go through all log files requested and collect all sender, recipient and id data
164
my %results;
165
for (my $i = 0; $i <= $days; $i++) {
166
    my $fh;
167
    if ($i eq 0) {
168
        open $fh, '<', "$VAR/log/exim_stage1/mainlog" or die $!;
169
    } elsif ($i eq 1) {
170
        open $fh, '<', "$VAR/log/exim_stage1/mainlog.0" or die $!;
171
    } else {
172
        open $fh, '<:gzip', "$VAR/log/exim_stage1/mainlog.".($i-1).".gz" or die $!;
173
    }
174
    while (<$fh>) {
175
        # Search for initial log line to for both listed and unlisted domains
176
        if ($_ =~ /Accepting authorized relaying session from [^,]*, sender ([^\ ]*)/) {
177
            # Create or increment key for this sender to indicate that we're expecting <= line for that sender
178
            if ($unlisted) {
179
                if (!defined $outstanding_senders{$1}) {
180
                    $outstanding_senders{$1} = 1;
181
                } else {
182
                    $outstanding_senders{$1}++;
183
                }
184
            }
185
        # Search for sender log line with specified sender regex
186
        } elsif ($_ =~ /$regex/) {
187
            my ($date, $time, $id, $direction, $sender) = split(' ',$_);
188
            # Push the id to the outstanding_ids has with sender ast the value for faster searching on key
189
            $outstanding_ids{$id} = $sender;
190
            # Decrement outstanding messages from this sender; remove the hash key if 0
191
            $outstanding_senders{$sender}--;
192
            unless ($outstanding_senders{$sender}) {
193
                delete $outstanding_senders{$sender};
194
            }
195
        } elsif ($_ =~ / <= / && scalar(keys %outstanding_senders)) {
196
            my ($date, $time, $id, $direction, $sender) = split(' ',$_);
197
            foreach my $outstanding_sender (keys %outstanding_senders) {
198
                if ($outstanding_sender eq $sender) {
199
                    # Push the id to the outstanding_ids has with sender ast the value for faster searching on key
200
                    $outstanding_ids{$id} = '*'.$sender;
201
                    # Set flag to output additional warning if unlisted domains are found
202
                    $unlisted = 2;
203
                    # Decrement outstanding messages from this sender; remove the hash key if 0
204
                    $outstanding_senders{$sender}--;
205
                    unless ($outstanding_senders{$sender}) {
206
                        delete $outstanding_senders{$sender};
207
                    }
208
                }
209
                last;
210
            }
211
        # Search for recipient log line if there are outstanding_ids ids
212
        } elsif ($_ =~ / => / && scalar(keys %outstanding_ids)) {
213
            my ($date, $time, $id, $direction, $recipient) = split(' ',$_);
214
            # See if this recipient ID matches one in our outstanding_ids list
215
            foreach my $out_id (keys %outstanding_ids) {
216
                if ($id eq $out_id) {
217
                    # If the ID array for the sender/recipient combo is not yet defined, open array
218
                    unless ($results{$outstanding_ids{$out_id}}{$recipient}) {
219
                        @{$results{$outstanding_ids{$out_id}}{$recipient}} = ();
220
                    }
221
                    push @{$results{$outstanding_ids{$out_id}}{$recipient}}, $id;
222
                    # Make sure to delete from outstanding_ids so that we don't look for that ID anymore
223
                    delete $outstanding_ids{$id};
224
                    last;
225
                }
226
            }
227
        }
228
    }
229
    close $fh;
230
}
231

    
232
# Break hash into domains
233
my %domains;
234
foreach my $sender (keys %results) {
235
    my ($user, $domain) = split('@',$sender);
236
    unless ($domains{$domain}) {
237
        @{$domains{$domain}} = ();
238
    }
239
    push @{$domains{$domain}}, $sender;
240
}
241

    
242
# Sort domains
243
my @sorted_domains = sort {lc($a) cmp lc($b)} (keys %domains);
244

    
245
# Get list of senders sorted by domain, then sender
246
my @sorted;
247
foreach my $domain (@sorted_domains) {
248
    my @users = sort {lc($a) cmp lc($b)} (@{$domains{$domain}});
249
    foreach (@users) {
250
        push @sorted, $_;
251
    }
252
}
253

    
254
# Create nicely formatted output based on provided options
255
my $output = '';
256
# Tally for all senders
257
my $all = 0;
258
foreach my $sender (@sorted) {
259
    # Tally for all messages to specific sender
260
    my $total = 0;
261
    # Collect all ids if --ids, but not --recipients
262
    my @ids = ();
263
    # Start output for individual sender
264
    my $sender_out = $sender;
265
    foreach my $recipient (keys %{$results{$sender}}) {
266
        # Add count for this recipient to total tally.
267
        $total += scalar @{$results{$sender}{$recipient}};
268
        # If recipients were requested, a new line of output is required for each
269
        if ($recipients) {
270
            # Additionally IDs for each sender/recipient pair, if requested
271
            if ($ids) {
272
                $sender_out .= "\n  " . $recipient . " (" . scalar(@{$results{$sender}{$recipient}}) . "): " . join(', ',@{$results{$sender}{$recipient}});
273
            } else {
274
                $sender_out .= "\n  " . $recipient . " (" . scalar(@{$results{$sender}{$recipient}}) . ")";
275
            }
276
        # Otherwise, all recipients are tallied on the same line. If IDs or requested, we need to track them.
277
        } elsif ($ids) {
278
            foreach (@{$results{$sender}{$recipient}}) {
279
                push @ids, $_;
280
            }
281
        }
282
    }
283
    # Add to overall tally
284
    $all += $total;
285
    # If --recipients then sender output is already generated, otherwise we need to do so
286
    if (!$recipients) {
287
        $sender_out .= ' (' . $total . ')';
288
        # Additionally add IDs, if requested
289
        if ($ids) {
290
            $sender_out .= ': ' . join(', ', @ids);
291
        }
292
    }
293
    # Append this sender to the complete output
294
    $output .= $sender_out . "\n";
295
}
296

    
297
# Append overall count to the end
298
$output .= "Total: $all\n";
299

    
300
# Print results or print warning to STDERR
301
if ($output eq '') {
302
    print STDERR "No results found using this criteria.\n";
303
    exit 0;
304
} else {
305
    print $output;
306
    # If an unlisted address is found, this flag will be set
307
    if ($unlisted == 2) {
308
        print STDERR "Note: addresses beginning with '*' are from unlisted domains\n";
309
    }
310
    exit 1;
311
}