Документ взят из кэша поисковой машины. Адрес оригинального документа : http://hea-www.harvard.edu/~garcia/parse_record.txt
Дата изменения: Wed Feb 9 00:13:41 2011
Дата индексирования: Mon Oct 1 21:25:01 2012
Кодировка:

Поисковые слова: m 103
This is a multi-part message in MIME format.
--------------040101010905050604020004
Content-Type: text/plain; charset=ISO-8859-1; format=flowed
Content-Transfer-Encoding: 7bit

Mike,

I must have forgotten to attach it. Here it is.

Guenther

Michael Garcia wrote:
> Hi Guenther - I got 3 replies, one of which was yours. But I did
> not get your script. Can you try sending it again?
> Thanks - Mike
>
>> Mike,
>>
>>
>> Yes, I do. I have attached the Perl script. You need to set the
>> directories, etc for your setup. I write two Whitelist files, since the
>> spamassassin systems on cfa0 and on head are different. This version
>> works with thunderbird, so it expects a folder to be one file. xmh has
>> one email message per file, so the parser needs to be slightly
>> different. I had a version for that as well, I may still be able to
>> find it if necessary.
>>
>> Guenther
>>
>> Mike Garcia wrote:
>>> Does anybody have a script that goes through all your
>>> email (unix mail sorted into folders, ie, exmh) and
>>> makes a 'whitelisted' list of all your saved email addresses?
>>> Thanks - Mike
>> l scr
> #######################################################################
> # Dr. Michael Garcia Constellation-X SAO Science Lead #
> # Smithsonian Astrophysical Observatory FAX : (617) - 495 - 7356 #
> # Center for Astrophysics Tel : (617) - 495 - 7169 #
> # 60 Garden Street internet : garcia@cfa.harvard.edu #
> # Cambridge MA 02138 http://hea-www.harvard.edu/~garcia/ #
> #######################################################################
>

--------------040101010905050604020004
Content-Type: application/x-perl;
name="parse_record.pl"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
filename="parse_record.pl"

#!/opt/bin/perl
#
# Author: Dr. Guenther Eichhorn
# Smithsonian Astrophysical Observatory
#
# Finds the To: and Cc: addresses and puts them in the Whitelist
# of Spamassasin. It uses *@domain for smaller domains, but
# individual addresses for larger domain like yahoo.com.
#
#
@large_isp =
(
"aol.com",
"arinc.com",
"attglobal.net ",
"bellsouth.net",
"cfa",
"cfa.harvard.edu",
"charter.net",
"comcast.net",
"compuserve.com",
"ebay.com",
"earthlink.net",
"gmail.com",
"gmx.ch",
"gmx.de",
"gmx.net",
"google.com",
"hotmail.com",
"juno.com",
"laposte.net",
"mail.ru",
"microsoft.com",
"msn.com",
"netscape.net",
"netzero.com",
"netzero.net",
"o2.pl",
"prodigy.com",
"prodigy.net",
"rocketmail.com",
"rr.com",
"sympatico.ca",
"t-online.de",
"verizon.com",
"verizon.net",
"yahoo.ca",
"yahoo.co.in",
"yahoo.co.uk",
"yahoo.com.au",
"yahoo.com.hk",
"yahoo.fr",
"yahoo.it",
"yahoo.com",
"yahoo.de",
) ;

#$DEBUG = 1 ;

$MAIL_DIR = "/home/geichhor/Mail/" ;
$spam_filter_o = "/home/geichhor/.spamassassin/user_prefs" ;
$spam_filter = "/home/spamdusr/prefs/geichhor.prefs" ;

$dest1 = "To:" ;
$dest2 = "Cc:" ;
$folder = "Sent" ;
$all = 0 ;

while ( $#ARGV >= 0 ) {
if ( $ARGV[0] eq "-all" ) {
$all = 1;
} elsif ( $ARGV[0] eq "-from" ) {
$dest1 = "From:" ;
$dest2 = "From:" ;
} elsif ( $ARGV[0] eq "-folder" ) {
shift @ARGV ;
$folder = $ARGV[0] ;
}
shift @ARGV ;
}

$rec_fld = $MAIL_DIR . $folder ;

if ( !(-r "$rec_fld") ) {
exit 1 ;
}

%addrs = () ;

@to = &parse_rec ( $rec_fld ) ;
if ( $#to >= 0 ) {
foreach $t ( @to ) {
if ( ( $t ne "\@" ) && ( $t ne "" ) && ( $t =~ /.\@./ ) ) {
$DEBUG && print "found addr $t\n" ;
$addrs{"whitelist_from $t"} = 1 ;
}
}
}

open(FILE,"$spam_filter") || die "Cannot open spam filter $spam_filter" ;

while ( ) {
chop ;
tr/[A-Z]/a-z]/;
$addrs{$_} = 1 ;
}
close ( FILE ) ;

open(FILE,"> $spam_filter") || die "Cannot open spam filter $spam_filter" ;

$DEBUG && print "write file $spamfilter\n" ;
foreach $a ( sort keys %addrs ) {
$DEBUG && print "write file $a\n" ;
$a =~ tr/[A-Z]/[a-z]/;
print FILE "$a\n" ;
}

close ( FILE ) ;
`cp $spam_filter $spam_filter_o` ;
exit ( 0 ) ;
1;

sub parse_rec {
my $file = shift ;
my ( @res, @adrs, $adr, $line ) ;
$DEBUG && print "file $file\n" ;

@res = () ;
if ( open ( RFILE, $file ) ) {
while ( ) {
if ( !(/^From g/) ) {
next ;
}
$DEBUG && print "found From $_" ;
while ( ) {
chop ;
if ( ( /^$dest1/i ) || ( /^$dest2/i ) ) {
$DEBUG && print "found To: $_\n" ;

$line = $_ ;

while ( 1 ) {
@adrs = split ( /[\s,:<>\"\']+/, $line ) ;
$DEBUG && print "#adrs $#adrs, adrs $adrs[0]\n" ;

foreach $adr ( @adrs ) {
$DEBUG && print "adr $adr\n" ;
if ( $adr =~ /\@/ ) {
$DEBUG && print "found adr $adr\n" ;
$adr =~ tr/[A-Z]/[a-z]/;
$done = 0 ;
$adr =~ s/[^a-z]*$//;

$DEBUG && print "found adr $adr\n" ;
foreach $large ( @large_isp ) {
$DEBUG && print "check large $large\n" ;
if ( $adr =~ /$large$/i ) {
push ( @res, $adr ) ;
$DEBUG && print "push1 $adr\n" ;
$done = 1 ;
last ;
}
}
$DEBUG && print "done $done\n" ;
if ( $done == 0 ) {
$adr =~ s/^[^@]+@/*@/ ;
push ( @res, $adr ) ;
$DEBUG && print "push2 $adr\n" ;
}
}
}
$line = ;
if ( $line =~ /^ / ) {
next;
} elsif ( ( $line =~ s/^$dest1/ /i ) ||
( $line =~ s/^$dest2/ /i ) ) {
next ;
} else {
last ;
}
}
}
if ( /^\s*$/ ) {
last ;
}
}
}
}
return(@res) ;
}
1;

--------------040101010905050604020004--