M
matej
Hi,
I am writing a script to convert couple of thousand emails (in couple
of hundred folders) and before I will get to the hard part -- maintaing
structure folders and subfolders, and maintaing record of the status of
the message, I would like to be sure that I have at least maildir->mbox
conversion right. Could anybody comment on the below shown code please?
Thanks a lot
Matěj
---------------------------------------------------------------------------------------------------------------------
#!/usr/bin/env python
"""mdir2mbx: yet another maildir -> mbox converter
mdir2mbx [maildirName] [mboxName]
TODO:
* convert all (or as many as possible) status flags from KMail
to Thunderbird.
* testing, testing, testing
"""
__version__ = "$Revision: 1.2 $"
__author__ = "Matej Cepl <[email protected]>"
__copyright__ = "(C) 2007 Matej Cepl. MIT/X11."
__date__ = "$Date: 2007/01/08 23:56:29 $"
___contributors__ = []
import email, email.Errors, email.Header, email.Generator, mailbox
import codecs, sys, cStringIO
class Mailbox(mailbox.UnixMailbox):
def __init__(self,filename):
mailbox.UnixMailbox.__init__(self,filename,email.message_from_file)
self.boxname=filename
self.content = ""
def add(self,msg):
fp = cStringIO.StringIO()
g = email.Generator.Generator(fp, mangle_from_=True,
maxheaderlen=65)
g.flatten(msg,unixfrom=True)
self.content += "%s\n\n" % fp.getvalue()
def write(self):
outfile=file(self.boxname,"wb")
outfile.write("%s\n" % self.content)
outfile.close()
class MyMaildir(mailbox.Maildir):
def __init__(self,dirname):
mailbox.Maildir.__init__(self,dirname,email.message_from_file)
self.dirname = dirname
self.decfunc = email.Header.decode_header
self.msg = ""
def __translateHeader(self,headerName):
header = email.Header.decode_header(self.msg[headerName])
string = header[0][0]
encoding = header[0][1]
if not(encoding):
encoding = "ascii"
outstr = string.decode(encoding,'ignore')
return outstr
def listHeaders(self):
for self.msg in self:
hdrfrom = self.__translateHeader("From")
#hdrto = self.__translateHeader("To")
hdrdate = self.__translateHeader("Date")
hdrsubject = self.__translateHeader("Subject")
print "%s;%s;%s" % (hdrfrom,hdrdate,hdrsubject)
#header =
email.Header.decode_header(self.msg["Message-Id"])[0][0]
#print "%s;%s" % (self.dirname,header)
def writeMBox(self,filename):
mbox = Mailbox(filename)
for self.msg in self:
mbox.add(self.msg)
mbox.write()
if __name__=="__main__":
obj = MyMaildir(sys.argv[1])
obj.writeMBox(sys.argv[2])
I am writing a script to convert couple of thousand emails (in couple
of hundred folders) and before I will get to the hard part -- maintaing
structure folders and subfolders, and maintaing record of the status of
the message, I would like to be sure that I have at least maildir->mbox
conversion right. Could anybody comment on the below shown code please?
Thanks a lot
Matěj
---------------------------------------------------------------------------------------------------------------------
#!/usr/bin/env python
"""mdir2mbx: yet another maildir -> mbox converter
mdir2mbx [maildirName] [mboxName]
TODO:
* convert all (or as many as possible) status flags from KMail
to Thunderbird.
* testing, testing, testing
"""
__version__ = "$Revision: 1.2 $"
__author__ = "Matej Cepl <[email protected]>"
__copyright__ = "(C) 2007 Matej Cepl. MIT/X11."
__date__ = "$Date: 2007/01/08 23:56:29 $"
___contributors__ = []
import email, email.Errors, email.Header, email.Generator, mailbox
import codecs, sys, cStringIO
class Mailbox(mailbox.UnixMailbox):
def __init__(self,filename):
mailbox.UnixMailbox.__init__(self,filename,email.message_from_file)
self.boxname=filename
self.content = ""
def add(self,msg):
fp = cStringIO.StringIO()
g = email.Generator.Generator(fp, mangle_from_=True,
maxheaderlen=65)
g.flatten(msg,unixfrom=True)
self.content += "%s\n\n" % fp.getvalue()
def write(self):
outfile=file(self.boxname,"wb")
outfile.write("%s\n" % self.content)
outfile.close()
class MyMaildir(mailbox.Maildir):
def __init__(self,dirname):
mailbox.Maildir.__init__(self,dirname,email.message_from_file)
self.dirname = dirname
self.decfunc = email.Header.decode_header
self.msg = ""
def __translateHeader(self,headerName):
header = email.Header.decode_header(self.msg[headerName])
string = header[0][0]
encoding = header[0][1]
if not(encoding):
encoding = "ascii"
outstr = string.decode(encoding,'ignore')
return outstr
def listHeaders(self):
for self.msg in self:
hdrfrom = self.__translateHeader("From")
#hdrto = self.__translateHeader("To")
hdrdate = self.__translateHeader("Date")
hdrsubject = self.__translateHeader("Subject")
print "%s;%s;%s" % (hdrfrom,hdrdate,hdrsubject)
#header =
email.Header.decode_header(self.msg["Message-Id"])[0][0]
#print "%s;%s" % (self.dirname,header)
def writeMBox(self,filename):
mbox = Mailbox(filename)
for self.msg in self:
mbox.add(self.msg)
mbox.write()
if __name__=="__main__":
obj = MyMaildir(sys.argv[1])
obj.writeMBox(sys.argv[2])