#!/usr/bin/perl -w # mailclean (version 1.1) # # Copyright (C) 2002 by John J. Chew, III # http://www.poslfit.com # # A script that displays only the essential information in an RFC 822 # (e-mail message). No weird headers, no alternate HTML content. use strict; use vars qw($gBoundary $gqEcho $opt_a $opt_f); require 'getopts.pl'; sub Usage (); sub Init (); sub Main (); sub process_headers (\%); sub process_subheaders (\%); sub Usage () { die "Usage: $0 [-a] [-f]\n" ." -a display all sections, do not omit HTML\n" ." -f insert form feed between messages\n"; } Main; sub Init () { $gBoundary = undef; &Getopts('af-:') || &Usage; } sub Main () { my (%headers, %subheaders); Init; my %entities = ( 'amp' => '&', 'nbsp' => ' ', 'reg' => '(R)', ); my $entities = join('|', keys %entities); # state - 0=init 1=main header 2=body 3=section header my $last = undef; my $state = 0; my $hstate = 0; my $ystate = 0; my $bstate = 0; # 0 at start of body, 1 later while (<>) { if (/^From[ \t]+/../^$/) { # if now in header if ($state !=1) { # start of header print "\f" if $opt_f && $state == 2; # my $from_ = $_; %headers = (); $last = undef; $state = 1; $hstate = 0; $ystate = 0; $bstate = 0; } elsif (s/^[ \t]+/ /) { die unless defined $last; chop $headers{$last}; $headers{$last} .= $_; } elsif (/^(\S+)\s*:/) { $last = "\L$1"; $headers{$last} = '' unless defined $headers{$last}; $headers{$last} .= $_; } elsif (/^$/) { process_headers %headers; $state = 2; } next; } if ((defined $gBoundary) && (/^\Q$gBoundary\E$/../^$/)) { # if now in section header if ($state != 3) { %subheaders = (); $last = undef; $state = 3; } elsif (s/^[ \t]+/ /) { die unless defined $last; chop $subheaders{$last}; $subheaders{$last} .= $_; } elsif (/^(\S+)\s*:/) { $last = "\L$1"; $subheaders{$last} = '' unless defined $subheaders{$last}; $subheaders{$last} .= $_; } elsif (/^$/) { process_subheaders %subheaders; $state = 2; } next; } if ($state == 0) { print "***Oops: unprocessed text at start of file\n"; } elsif ($state != 2) { print "***Oops: fell out of a header\n"; } if ($gqEcho) { if ($hstate == 0) { if (s/^]*>//i) { $hstate = 1; } else { $hstate = -1; } } if ($hstate == 1) { s/\n/ /; s/]*>/\n/ig; s/]*>/\n-----\n/ig; s!]*alt="([^">]*)"[^>]*>!$1!ig; s/]*>/\n\n/ig; s!]*>!!ig; $hstate = 0 if s!!!ig; s!&($entities);!$entities{$1}!ig; } if ($ystate == 0 && /^Yahoo\! Groups Links\s*$/) { $ystate = 1; next; } elsif ($ystate == 1) { if (/^<\*> [TY]/) { $ystate = 2; next; } elsif (/\S/) { $ystate = 0; } else { next; } } elsif ($ystate == 2) { if (/^ (?:http:|\S+\@\S+)/) { $ystate = 1; next; } else { $ystate = 0; } } # top-posting if ($state == 2 && $bstate == 0) { if (/^--- In \S+\@\S+, "/) { $gqEcho = 0; } else { $bstate = 1; } } print; } } process_headers %headers if $state == 1; exit 0; } sub process_headers (\%) { my $hp = shift; $gqEcho = 1; for my $i ('from', 'to', 'subject', 'date') { print $hp->{$i} if defined $hp->{$i}; } print "\n"; $gBoundary = undef; my $content_type = $hp->{'content-type'}; if (defined $content_type) { if ($content_type =~ /boundary=(\S+)/i) { $gBoundary = $1; $gBoundary =~ s/^"(.*)"/$1/; $gBoundary = '--' . $gBoundary; } } } sub process_subheaders (\%) { my $hp = shift; $gqEcho = 1; my $content_type = $hp->{'content-type'}; if (defined $content_type) { print $content_type; if ($opt_a || $content_type =~ m!text/html!i) { print "[omitted]\n"; $gqEcho = 0; } } print "\n"; }