#! /usr/bin/perl -w
# vim:syntax=perl

use strict;
use lib '/usr/share/perl5';
use Lire::DlfSchema;
use Lire::Email qw/sanitize splitemailadress/;
use Lire::Program qw( :msg :dlf );
use Time::Local;

my $lines	= 0;
my $dlflines	= 0;
my $errorlines  = 0;

my $schema = eval { Lire::DlfSchema::load_schema( "email" ); };
lr_err( "error loading email schema: $@" ) if $@;
my $dlf_maker = 
  $schema->make_hashref2asciidlf_func( qw/time queueid msgid
				    from_user from_domain from_relay_host
				    from_relay_ip size delay to_user
				    to_domain to_relay_host to_relay_ip
				    stat xstat/ );

my @error_shorts =
  (
   [ qr/User unknown/i, "user" ],
   [ qr/unrouteable mail domain/i, "host" ],
   [ qr/unknown local-part/i , "user" ],
   [ qr/Relay denied/i,  "service" ],
   [ qr/Connection refused/i, "service" ],
   [ qr/retry time not reached for any host/i, "service" ],
   [ qr/relaying denied/i, "relay" ],
  );

my $qid = '';
my %msgs;

sub print_dlf
{
    my ($c, $print_only_x ) = @_;

    my $subroutine = 'print_dlf';

    if (! defined $c->{recipients}) {
        lr_warn "$subroutine would want to print dlf " .
	  "about ' " . (defined $c->{queueid} ? $c->{queueid} : '-') .
            "', but didn't find recipients, skipping";
    } else {
	foreach my $i ( 0 .. $c->{recipients} - 1 ) {
	    if ( defined $print_only_x ) {
		next unless $i == $print_only_x;
	    }

	    my %dlf = map { $_ => $c->{$_} } 
	      qw/time queueid msgid from_user from_domain
		 from_relay_host from_relay_ip size/;
	    foreach my $f ( qw/delay to_user to_domain to_relay_host
			       to_relay_ip stat xstat/ )
	    {
		$dlf{$f} = $c->{$f}[$i];
	    }
	    my $dlf = $dlf_maker->( \%dlf );
	    print join( " ", @$dlf ), "\n";
	    $dlflines++;
	}
    }
}

sub parse_exim_line {
    my ( $line ) = @_;

    my ( $year, $month, $day, $hour, $min, $sec, $pid ) = 
      $line =~ /^(\d\d\d\d)-(\d\d)-(\d\d) (\d\d):(\d\d):(\d\d)(\s[[]\d+[]]|)/
	or die "invalid Exim line: doesn't begin with a proper timestamp\n";

    my $time = timelocal( $sec, $min, $hour, $day, $month -1, $year );
    $qid  = substr($_, 20 + length($pid), 16);

    $msgs{$qid} ||= {
		     queueid	=> $qid,
		     recipients => 0,
		     queueid	=> $qid,
		     delay	=> [],
		     stat	=> [],
		     to_relay	=> [],
		     to_domain	=> [],
		     to_user	=> [],
		     # Exim specific
		     tos	=> {},
		    };
    $msgs{$qid}{time} ||= $time;

    die "too short for an Exim line\n"
      if length $line < 38 + length ($pid);

    my $command = substr($_, 37 + length ($pid), 2);

    if ($command eq "<=") {
	parse_rcpt( $time, $line );
    } elsif (($command eq "=>") || ($command eq "->")) {
	parse_delivery( $time, $line );
    } elsif ($command eq "==") {
	parse_error( $time, 0, $line );
    } elsif ($command eq "**") {
	parse_error( $time, 1, $line );
    } elsif ( $line =~ /Completed/) {
	delete $msgs{$qid};
    }
}

sub parse_rcpt {
    my ( $time, $line ) = @_;
    local $_ = $line;	# For regex matching

    ($msgs{$qid}{size}) = /\sS=([0-9]+)/;
    my ($host) = /\sH=(\S+)/;

    my ($from) = /^.{40}(\S+)/;
    sanitize( "emailadress", $from, $from );
    my ($user,$domain) = splitemailadress( $from );
    if (defined $host) {
        my ($ip) = /\sH=\S+(?:(?=\s\()\s\S+)?\s\[([^]]*)\]/;
        $msgs{$qid}{'from_relay_ip'} = $ip;
    } else {
	$host = "localhost";
        $msgs{$qid}{'from_relay_ip'} = "127.0.0.1";
    }
    $msgs{$qid}{'from_domain'} = $domain;
    $msgs{$qid}{'from_user'} = $user;
    $msgs{$qid}{'from_relay_host'} = $host;

    ($msgs{$qid}{'msgid'}) = /\sid=(\S+)/;
    ($msgs{$qid}{'protocol'}) = /\sP=(\S+)/;
}

sub parse_delivery {
    my ( $time, $line ) = @_;
    local $_ = $line;	# For regex matching

    my ($ip, $domain, $user);

    my ($to) = /^.{40}(.*?) .=/;
    my ($host) = /\sH=(\S+)/;
    if (defined $host) {
        ($ip) = /\sH=\S+(?:(?=\s\()\s\S+)?\s\[([^]]*)\]/;
    } else {
        $host = "localhost";
	$ip   = "127.0.0.1";
    }
    sanitize( "emailadress", $to, $to );
    ($user,$domain) = splitemailadress( $to );

    # determine receipt id for this email's target
    my ($email) = "$user\@$domain";
    my $rcptindex = $msgs{$qid}{'recipients'};
    if ($msgs{$qid}{'tos'}{$email}) {
	$rcptindex = $msgs{$qid}{'tos'}{$email};
    } else {
        $msgs{$qid}{'tos'}{$email} = $rcptindex;
	$msgs{$qid}{'recipients'}++;
        $msgs{$qid}{'to_user'}[$rcptindex] = $user;
        $msgs{$qid}{'to_domain'}[$rcptindex] = $domain;
    }

    $msgs{$qid}{'to_relay_ip'}[$rcptindex] = $ip;
    $msgs{$qid}{'to_relay_host'}[$rcptindex] = $host;
    $msgs{$qid}{'stat'}[$rcptindex] = "sent";

    $msgs{$qid}{'delay'}[$rcptindex] = $time - $msgs{$qid}{'time'};

    # now print a line to DLF
    print_dlf( $msgs{$qid}, $rcptindex);
}

sub parse_error {
    my ( $time, $print_dlf, $line ) = @_;

    local $_ = $line;	# For regex matching

    my ($to) = /^.{40}(.*?)(?::| .=)/;
    sanitize( "emailadress", $to, $to );
    my( $user, $domain ) = splitemailadress( $to );

    my ($email) = "$user\@$domain";

    my $rcptindex = $msgs{$qid}{'recipients'};
    if ($msgs{$qid}{'tos'}{$email}) {
        $rcptindex = $msgs{$qid}{'tos'}{$email};
    } else {
        $msgs{$qid}{'tos'}{$email} = $rcptindex;
        $msgs{$qid}{'recipients'}++;
        $msgs{$qid}{'to_user'}[$rcptindex] = $user;
        $msgs{$qid}{'to_domain'}[$rcptindex] = $domain;
    }

    # calculate delay
    $msgs{$qid}{delay}[$rcptindex] = $time - $msgs{$qid}{time};

    # determine error
    my ( $error );
    if ( /T=(.*)$/) {
	$error = $1;
    } else {
	$error = substr $line, 40;
    }

    # shorten error messages
    my $short_error = "unknown";
    foreach my $e (@error_shorts) {
        $short_error = $e->[1] if ($error =~ /$e->[0]/);
    }

    sanitize('stat', $error, $error);
    $msgs{$qid}{'stat'}[$rcptindex] = $short_error;
    $msgs{$qid}{'xstat'}[$rcptindex] = $error;

    print_dlf( $msgs{$qid}, $rcptindex) if $print_dlf;
}

init_dlf_converter( "email" );
while ( <> ) {
    chomp;
    $lines++;

    eval {
	parse_exim_line( $_ );
    };
    if ($@) {
	lr_warn( $@ );
	lr_warn( "failed to parse '$_'. Skipping." );
	$errorlines++;
    }
}

# now print all delayed messages not yet sent
foreach $qid (keys %msgs) {
    print_dlf( $msgs{$qid} );
}

end_dlf_converter( $lines, $dlflines, $errorlines );

__END__

=pod 

=head1 NAME

exim2dlf - convert exim logfiles to dlf format

=head1 SYNOPSIS

B<exim2dlf>

=head1 DESCRIPTION

B<exim2dlf> converts a Exim logfile to DLF format.  Information on
the exim Mail Transport Agent can be found on http://www.exim.org/ .
The generic email dlf format is described in email.xml.

=head1 EXAMPLES

To process a log as produced by Exim

 $ exim2dlf < mail.log

exim2dlf will be rarely used on its own, but is more likely
called by lr_log2report:

 $ lr_log2report exim < /var/log/mail.log

=head1 SEE ALSO

exim(1), postfix2dlf(1), sendmail2dlf(1), qmail2dlf(1)

=head1 VERSION

$Id: exim2dlf.in,v 1.24 2009/01/18 10:58:51 wraay Exp $

=head1 COPYRIGHT

Copyright (C) 2000, 2001 Stichting LogReport Foundation LogReport@LogReport.org

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program (see COPYING); if not, check with
http://www.gnu.org/copyleft/gpl.html.

=head1 AUTHOR

Egon Willighagen <egonw@logreport.org>

=cut

# Local Variables:
# mode: cperl
# End:
