R
ra9ftm
It is my first script on python. Don't know is it correctly uses
modules, but it is working fine with specially with russian code pages
and mime formated messages. Also quoted-printable and base64
encoded....
It will be very good if anybody post any comments on this script. Is
it good or bad...
import email
import mailbox
from email.Header import decode_header
from email.Header import make_header
import string
import sys
outEnc="cp866"
infile=sys.argv[1]
subStrObrez = []
subStrObrez.append("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
subStrObrez.append("""~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
To UNSUBSCRIBE from this forum, send an email to:""")
subStrObrez.append("~~~~~~~~~~~~~~~~~~")
# Cut yahoo info at the end of message
def obrez(strMsg):
for s in subStrObrez:
n = string.rfind(strMsg,s)
if n != -1:
return strMsg[0:n]
return strMsg
# Convert message header
def my_get_header(str):
str2=""
for val,encoding in decode_header(str):
if encoding:
str2 = str2+ val.decode(encoding)+" "
else:
str2 = str2+ val+" "
return str2
# Process the message
def proc(msg):
print 'From : '+ my_get_header(msg['From']).encode(outEnc)
print 'To : '+ my_get_header(msg['To']).encode(outEnc)
print 'Subject: '+ my_get_header(msg['Subject']).encode(outEnc)
print
if msg.is_multipart():
for part in msg.walk():
if part.get_content_type() == "text/plain":
if part.get_content_charset():
print
obrez(part.get_payload(None,True).decode(part.get_content_charset()).encode(outEnc))
else:
print obrez(part.get_payload(None,True))
else:
if msg.get_content_type() == "text/plain":
if msg.get_content_charset():
print
obrez( (msg.get_payload(None,True)).decode(msg.get_content_charset()) ).encode(outEnc)
else:
print obrez( msg.get_payload(None,True) )
else:
if msg.get_content_type() == "text/html":
if msg.get_content_charset():
print
(msg.get_payload(None,True)).decode(msg.get_content_charset()).encode(outEnc)
else:
print msg.get_payload(None,True)
####################################################################################
# The main program
f = open(infile, "rb")
m1 = mailbox.UnixMailbox(f)
RubLst=[]
RubLst.append(["[contestru]","FOTSTR"])
RubLst.append(["[russiandx]","FORUDX"])
for msg in mailbox.UnixMailbox(f,email.message_from_file):
for rub in RubLst:
if string.find(my_get_header(msg['Subject']),rub[0]) != -1 :
print "SB "+rub[1]+"@FORUM < INET"
print my_get_header(msg['Subject']).encode(outEnc)
print
proc(msg)
print
print "powered by Python"
print "/EX"
modules, but it is working fine with specially with russian code pages
and mime formated messages. Also quoted-printable and base64
encoded....
It will be very good if anybody post any comments on this script. Is
it good or bad...
import email
import mailbox
from email.Header import decode_header
from email.Header import make_header
import string
import sys
outEnc="cp866"
infile=sys.argv[1]
subStrObrez = []
subStrObrez.append("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
subStrObrez.append("""~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
To UNSUBSCRIBE from this forum, send an email to:""")
subStrObrez.append("~~~~~~~~~~~~~~~~~~")
# Cut yahoo info at the end of message
def obrez(strMsg):
for s in subStrObrez:
n = string.rfind(strMsg,s)
if n != -1:
return strMsg[0:n]
return strMsg
# Convert message header
def my_get_header(str):
str2=""
for val,encoding in decode_header(str):
if encoding:
str2 = str2+ val.decode(encoding)+" "
else:
str2 = str2+ val+" "
return str2
# Process the message
def proc(msg):
print 'From : '+ my_get_header(msg['From']).encode(outEnc)
print 'To : '+ my_get_header(msg['To']).encode(outEnc)
print 'Subject: '+ my_get_header(msg['Subject']).encode(outEnc)
if msg.is_multipart():
for part in msg.walk():
if part.get_content_type() == "text/plain":
if part.get_content_charset():
obrez(part.get_payload(None,True).decode(part.get_content_charset()).encode(outEnc))
else:
print obrez(part.get_payload(None,True))
else:
if msg.get_content_type() == "text/plain":
if msg.get_content_charset():
obrez( (msg.get_payload(None,True)).decode(msg.get_content_charset()) ).encode(outEnc)
else:
print obrez( msg.get_payload(None,True) )
else:
if msg.get_content_type() == "text/html":
if msg.get_content_charset():
(msg.get_payload(None,True)).decode(msg.get_content_charset()).encode(outEnc)
else:
print msg.get_payload(None,True)
####################################################################################
# The main program
f = open(infile, "rb")
m1 = mailbox.UnixMailbox(f)
RubLst=[]
RubLst.append(["[contestru]","FOTSTR"])
RubLst.append(["[russiandx]","FORUDX"])
for msg in mailbox.UnixMailbox(f,email.message_from_file):
for rub in RubLst:
if string.find(my_get_header(msg['Subject']),rub[0]) != -1 :
print "SB "+rub[1]+"@FORUM < INET"
print my_get_header(msg['Subject']).encode(outEnc)
proc(msg)
print "powered by Python"
print "/EX"