[ANN] libgmail 0.0.1 -- Gmail access via Python

F

Follower

Announcing initial version of a library to provide access to Gmail via
Python.

Below is the code required to access Google's Gmail service from
within Python. It is a first version so it's rough in places and will
easily break if much is changed on the Gmail servers.

If there's sufficient interest I'll endeavour to continue work on it &
find it a permanent home.

If someone wants to take this and save me the trouble of writing a
IMAP proxy for Gmail, feel free. :)

--Phil.

P.S. I'm aware this may not make it through Usenet in one piece, so
watch out for line wrappings.

File: libgmail.py

#!/usr/bin/python2.3
#
# libgmail -- Gmail access via Python
#
# Version: 0.0.1 (2 July 2004)
#
# Author: (e-mail address removed)
#
# License: GPL 2.0
#
# Requires:
# * ClientCookie <http://wwwsearch.sourceforge.net/ClientCookie/>
#
# Thanks:
# * Live HTTP Headers <http://livehttpheaders.mozdev.org/>
# * Gmail <http://gmail.google.com/>
# * Google Blogoscoped <http://blog.outer-court.com/>
# * The *first* big G. :)
#
# NOTE:
# You should ensure you are permitted to use this script before
using it
# to access Google's Gmail servers.
#
import ClientCookie
import urllib
import re

URL_LOGIN = "https://www.google.com/accounts/ServiceLoginBoxAuth"
URL_GMAIL = "https://gmail.google.com/gmail"

FOLDER_INBOX = "inbox"
FOLDER_SENT = "sent"

## This class is from the ClientCookie docs.
## TODO: Do all this cleanly.
# Build an opener that *doesn't* automatically call
..add_cookie_header()
# and .extract_cookies(), so we can do it manually without
interference.
class NullCookieProcessor(ClientCookie.HTTPCookieProcessor):
def http_request(self, request): return request
def http_response(self, request, response): return response


## TODO: Do this properly.
import time
def _bakeQuickCookie(name, value, path, domain):
"""
Kludge to work around no easy way to create Cookie with defaults.
(Defaults taken from Usenet post by `ClientCookie` author.)
"""
return ClientCookie.Cookie(0, name, value, None, 0,
domain, True, domain.startswith("."),
path, True,
True, # true if must only be sent via
https
time.time()+(3600*24*365), # expires
0, "", "", {})



RE_COOKIE_VAL = 'cookieVal=\W*"(.+)"'
def _extractGV(pageData):
"""

var cookieVal= "xxxxxxxxxxx-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";

`pageData` -- HTML page with Javascript to set cookie value.
"""
gv = None

try:
gv = re.search(RE_COOKIE_VAL, pageData).group(1)
except AttributeError:
print "Error: Couldn't extract GV cookie."
raise SystemExit

return gv


RE_MAIL_DATA = "<!--(.*)-->"
def _extractMailData(pageData):
"""
"""
try:
mailData = re.search(RE_MAIL_DATA, pageData,
re.DOTALL).group(1)
except AttributeError:
print "Error: Couldn't get mail data."
raise SystemExit

return mailData


RE_SPLIT_MAIL_DATA = re.compile("D\((.*?)\);", re.DOTALL)
def _parseMailData(mailData):
"""
"""
items = (re.findall(RE_SPLIT_MAIL_DATA, mailData))

itemsDict = {}

for item in items:
item = item.strip()[1:-1]
name, value = (item.split(",", 1) + [""])[:2]
itemsDict[name.strip('"')] = value

return itemsDict


OFFSET_MSG_ID = 0
OFFSET_MSG_SUBJECT = 6
class GmailMessage:
"""
"""

def __init__(self, msgData):
"""
"""
self.id = msgData[OFFSET_MSG_ID]
self.subject = msgData[OFFSET_MSG_SUBJECT]

# TODO: Populate additional fields & cache...(?)


def _parseMsgData(msgsInfo):
"""
"""
# TODO: Parse this better/safer...
msgsData = eval(msgsInfo.replace("\n",""))

msgs = [GmailMessage(msg)
for msg in msgsData]

return msgs


class GMailAccount:
"""
"""

def __init__(self, name, pw):
"""
"""
self.name = name
self._pw = pw

self._cookieJar = ClientCookie.CookieJar()
self._opener = ClientCookie.build_opener(NullCookieProcessor)

self._items = None



def login(self):
"""
"""
data = urllib.urlencode({'continue': URL_GMAIL,
'service': 'mail',
'Email': self.name,
'Passwd': self._pw,
'null': 'Sign+in'})

headers = {'Host': 'www.google.com',
'User-Agent': 'User-Agent: Mozilla/5.0
(compatible;)'}

req = ClientCookie.Request(URL_LOGIN, data=data,
headers=headers)
resp = ClientCookie.urlopen(req)
self._cookieJar.extract_cookies(resp, req)

pageData = resp.read()
gv = _extractGV(pageData)

self._cookieJar.set_cookie(
_bakeQuickCookie(name="GV", value=gv, path="/",
domain=".gmail.google.com"))


def _retrieveURL(self, url):
"""
"""
# TODO: Do extract cookies here too?
req = ClientCookie.Request(url)
self._cookieJar.add_cookie_header(req)
resp = ClientCookie.urlopen(req)

pageData = resp.read()

return pageData


def getFolderContent(self, folderName):
"""

`folderName` -- As set in GMail interface.
"""
URL_FOLDER_BASE =
"https://gmail.google.com/gmail?search=%s&view=tl"

pageData = self._retrieveURL(URL_FOLDER_BASE % folderName)

mailData = _extractMailData(pageData)

self._items = _parseMailData(mailData)

msgsInfo = self._items["t"]

return _parseMsgData(msgsInfo)


def getQuotaInfo(self):
"""

Return MB used, Total MB and percentage used.
"""
if not self._items:
# TODO: Handle this better.
# This retrieves the value if we haven't cached it yet.
self.getFolderContent(FOLDER_INBOX)

quotaInfo = [value.strip('"')
for value in self._items["qu"].split(",")]

return tuple(quotaInfo[:3])


def getRawMessage(self, msgId):
"""
"""
URL_BASE_RAW_MESSAGE =
"https://gmail.google.com/gmail?view=om&th=%s"

pageData = self._retrieveURL(URL_BASE_RAW_MESSAGE % msgId)

return pageData



FOLDER_NAMES = [FOLDER_INBOX, FOLDER_SENT] # TODO: Get these on the
fly.
if __name__ == "__main__":
name = raw_input("GMail account name: ")
pw = raw_input("Password: ")

ga = GMailAccount(name, pw)

print "\nPlease wait, logging in..."

ga.login()

print "Log in successful.\n"

print "%s of %s used. (%s)\n" % ga.getQuotaInfo()

while 1:
try:
print "Select folder to list: (Ctrl-C to exit)"
print "(NOTE: This will display the content of *ALL*
messages.)"
for optionId, folderName in enumerate(FOLDER_NAMES):
print " %d. %s" % (optionId, folderName)

folderName = FOLDER_NAMES[int(raw_input("Choice: "))]

msgs = ga.getFolderContent(folderName)

print
for msg in msgs:
print "================================"
#print msg.id, msg.subject
print ga.getRawMessage(msg.id)
print "================================"

print
except KeyboardInterrupt:
print "\n\nDone."
break
 
S

Skip Montanaro

phil> P.S. I'm aware this may not make it through Usenet in one piece,
phil> so watch out for line wrappings.

Probably be safer to place it on a website somewhere and just post the url.

Skip
 
D

David Fraser

Follower said:
Announcing initial version of a library to provide access to Gmail via
Python.

Below is the code required to access Google's Gmail service from
within Python. It is a first version so it's rough in places and will
easily break if much is changed on the Gmail servers.

If there's sufficient interest I'll endeavour to continue work on it &
find it a permanent home.

If someone wants to take this and save me the trouble of writing a
IMAP proxy for Gmail, feel free. :)

Just looking at the Gmail Agreement...
You also agree that you will not use any robot, spider, other
automated device, or manual process to monitor, or copy any content
from the Service.

Rats. Seems like we're not even allowed to use a browser to read our
mail :)

Anyway this is cool, why not set up a sourceforge project? I am sure
others will get involved

David
 
F

Follower

Just looking at the Gmail Agreement...
[Snip]
Rats. Seems like we're not even allowed to use a browser to read our
mail :)
Yeah, but they seem not to get too upset at the moment since the
projects "POP Goes the GMail" and "GTray" don't seem to have run into
problems yet. (Apparently the author of the first even got offered a
job at Google, so the story goes--I found his blog at one stage but
can't find it again...)
Anyway this is cool, why not set up a sourceforge project?
Funny you should suggest that, since that's what I've been doing. :)
And SF did an impressive job and got everything set up in a matter of
hours.

Haven't got anything there yet, but will soon:

<http://sourceforge.net/projects/libgmail/>

A couple of comments I wrote elsewhere:

"After sleeping on it I've realised I'm actually only retrieving the
conversation threads from the inbox, not all the messages. At present
this means you'll get one message from each thread, not all messages.

It looks like for a single message 'conversation' the thread id ==
message id, but not for a multiple-message conversation.

At this stage it seems like you have to open a conversation thread to
get all the message ids, but I'll look into it."
Also, as is always the case, after not finding anything before coding
I've found a couple of (PHP) scripts with source available to do
various Gmail things:

<http://ilia.ws/archives/15_Gmail_as_an_online_backup_system.html>
<http://tvg.ca/files/gmail.phps>
<http://muadib.blogspot.com/2004/06/gmail-rss-feeds-part-2.html>

--Phil.
 
F

Follower

BTW does it require Python 2.4 alpha? It seems to use
ClientCookie...
I'm using Python 2.3 with the latest development version of
ClientCookie available on the author's site. There are apparently some
differences between that version & the 2.4 version, but I don't know
if they are significant enough to affect libgmail's operation.

So, in short, no, you don't need Python 2.4 alpha, but you will need
to install ClientCookie separately.

--Phil.

P.S. The SF CVS version of libgmail has been modified a little bit,
mostly just refactoring and preparation for feature additions... It
now uses the more correct Folder>Thread>Message model and calculates
the total number of messages in a thread.
 
A

Adrian Holovaty

Announcing initial version of a library to provide access to Gmail via
Python.

Ah! Wish I'd announced this earlier, before it'd been reinvented. :)

Here's my stab at a Python Gmail interface, which appears to do what
libgmail does (but without dependencies on any non-standard-lib modules)
and also imports and exports Gmail contacts:

http://holovaty.com/blog/archive/2004/06/18/1751
http://holovaty.com/code/gmail.py

Feature requests welcome. I was thinking about setting up a SourceForge
project, too...But we should work together.

Adrian
 
F

Fazer

Hello!

I find your library very interesting and maybe helpful in the near
future. I hae a quesiton though. Is it possible to delete an email
using this library? If no, how can it be done? This is my main
concern since I have a few hundred messages that I get daily which are
spam and non-sense and I wish to make a scrip that checks gmail every
hour and clean out the dirty ones. So far the spam filter thing
doesn't work on Gmail =(

Thank you.

Fazer


Announcing initial version of a library to provide access to Gmail via
Python.

Below is the code required to access Google's Gmail service from
within Python. It is a first version so it's rough in places and will
easily break if much is changed on the Gmail servers.

If there's sufficient interest I'll endeavour to continue work on it &
find it a permanent home.

If someone wants to take this and save me the trouble of writing a
IMAP proxy for Gmail, feel free. :)

--Phil.

P.S. I'm aware this may not make it through Usenet in one piece, so
watch out for line wrappings.

File: libgmail.py

#!/usr/bin/python2.3
#
# libgmail -- Gmail access via Python
#
# Version: 0.0.1 (2 July 2004)
#
# Author: (e-mail address removed)
#
# License: GPL 2.0
#
# Requires:
# * ClientCookie <http://wwwsearch.sourceforge.net/ClientCookie/>
#
# Thanks:
# * Live HTTP Headers <http://livehttpheaders.mozdev.org/>
# * Gmail <http://gmail.google.com/>
# * Google Blogoscoped <http://blog.outer-court.com/>
# * The *first* big G. :)
#
# NOTE:
# You should ensure you are permitted to use this script before
using it
# to access Google's Gmail servers.
#
import ClientCookie
import urllib
import re

URL_LOGIN = "https://www.google.com/accounts/ServiceLoginBoxAuth"
URL_GMAIL = "https://gmail.google.com/gmail"

FOLDER_INBOX = "inbox"
FOLDER_SENT = "sent"

## This class is from the ClientCookie docs.
## TODO: Do all this cleanly.
# Build an opener that *doesn't* automatically call
.add_cookie_header()
# and .extract_cookies(), so we can do it manually without
interference.
class NullCookieProcessor(ClientCookie.HTTPCookieProcessor):
def http_request(self, request): return request
def http_response(self, request, response): return response


## TODO: Do this properly.
import time
def _bakeQuickCookie(name, value, path, domain):
"""
Kludge to work around no easy way to create Cookie with defaults.
(Defaults taken from Usenet post by `ClientCookie` author.)
"""
return ClientCookie.Cookie(0, name, value, None, 0,
domain, True, domain.startswith("."),
path, True,
True, # true if must only be sent via
https
time.time()+(3600*24*365), # expires
0, "", "", {})



RE_COOKIE_VAL = 'cookieVal=\W*"(.+)"'
def _extractGV(pageData):
"""

var cookieVal= "xxxxxxxxxxx-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";

`pageData` -- HTML page with Javascript to set cookie value.
"""
gv = None

try:
gv = re.search(RE_COOKIE_VAL, pageData).group(1)
except AttributeError:
print "Error: Couldn't extract GV cookie."
raise SystemExit

return gv


RE_MAIL_DATA = "<!--(.*)-->"
def _extractMailData(pageData):
"""
"""
try:
mailData = re.search(RE_MAIL_DATA, pageData,
re.DOTALL).group(1)
except AttributeError:
print "Error: Couldn't get mail data."
raise SystemExit

return mailData


RE_SPLIT_MAIL_DATA = re.compile("D\((.*?)\);", re.DOTALL)
def _parseMailData(mailData):
"""
"""
items = (re.findall(RE_SPLIT_MAIL_DATA, mailData))

itemsDict = {}

for item in items:
item = item.strip()[1:-1]
name, value = (item.split(",", 1) + [""])[:2]
itemsDict[name.strip('"')] = value

return itemsDict


OFFSET_MSG_ID = 0
OFFSET_MSG_SUBJECT = 6
class GmailMessage:
"""
"""

def __init__(self, msgData):
"""
"""
self.id = msgData[OFFSET_MSG_ID]
self.subject = msgData[OFFSET_MSG_SUBJECT]

# TODO: Populate additional fields & cache...(?)


def _parseMsgData(msgsInfo):
"""
"""
# TODO: Parse this better/safer...
msgsData = eval(msgsInfo.replace("\n",""))

msgs = [GmailMessage(msg)
for msg in msgsData]

return msgs


class GMailAccount:
"""
"""

def __init__(self, name, pw):
"""
"""
self.name = name
self._pw = pw

self._cookieJar = ClientCookie.CookieJar()
self._opener = ClientCookie.build_opener(NullCookieProcessor)

self._items = None



def login(self):
"""
"""
data = urllib.urlencode({'continue': URL_GMAIL,
'service': 'mail',
'Email': self.name,
'Passwd': self._pw,
'null': 'Sign+in'})

headers = {'Host': 'www.google.com',
'User-Agent': 'User-Agent: Mozilla/5.0
(compatible;)'}

req = ClientCookie.Request(URL_LOGIN, data=data,
headers=headers)
resp = ClientCookie.urlopen(req)
self._cookieJar.extract_cookies(resp, req)

pageData = resp.read()
gv = _extractGV(pageData)

self._cookieJar.set_cookie(
_bakeQuickCookie(name="GV", value=gv, path="/",
domain=".gmail.google.com"))


def _retrieveURL(self, url):
"""
"""
# TODO: Do extract cookies here too?
req = ClientCookie.Request(url)
self._cookieJar.add_cookie_header(req)
resp = ClientCookie.urlopen(req)

pageData = resp.read()

return pageData


def getFolderContent(self, folderName):
"""

`folderName` -- As set in GMail interface.
"""
URL_FOLDER_BASE =
"https://gmail.google.com/gmail?search=%s&view=tl"

pageData = self._retrieveURL(URL_FOLDER_BASE % folderName)

mailData = _extractMailData(pageData)

self._items = _parseMailData(mailData)

msgsInfo = self._items["t"]

return _parseMsgData(msgsInfo)


def getQuotaInfo(self):
"""

Return MB used, Total MB and percentage used.
"""
if not self._items:
# TODO: Handle this better.
# This retrieves the value if we haven't cached it yet.
self.getFolderContent(FOLDER_INBOX)

quotaInfo = [value.strip('"')
for value in self._items["qu"].split(",")]

return tuple(quotaInfo[:3])


def getRawMessage(self, msgId):
"""
"""
URL_BASE_RAW_MESSAGE =
"https://gmail.google.com/gmail?view=om&th=%s"

pageData = self._retrieveURL(URL_BASE_RAW_MESSAGE % msgId)

return pageData



FOLDER_NAMES = [FOLDER_INBOX, FOLDER_SENT] # TODO: Get these on the
fly.
if __name__ == "__main__":
name = raw_input("GMail account name: ")
pw = raw_input("Password: ")

ga = GMailAccount(name, pw)

print "\nPlease wait, logging in..."

ga.login()

print "Log in successful.\n"

print "%s of %s used. (%s)\n" % ga.getQuotaInfo()

while 1:
try:
print "Select folder to list: (Ctrl-C to exit)"
print "(NOTE: This will display the content of *ALL*
messages.)"
for optionId, folderName in enumerate(FOLDER_NAMES):
print " %d. %s" % (optionId, folderName)

folderName = FOLDER_NAMES[int(raw_input("Choice: "))]

msgs = ga.getFolderContent(folderName)

print
for msg in msgs:
print "================================"
#print msg.id, msg.subject
print ga.getRawMessage(msg.id)
print "================================"

print
except KeyboardInterrupt:
print "\n\nDone."
break
 
F

Follower

Hi Fazer,

Thanks for your feedback.

[Snip]
Is it possible to delete an email using this library?
I hope you don't mind me delaying the answer to this question until I
could answer in the affirmative. (Rather than promising vapourware...
:) )

I have just committed code into CVS to enable message and thread
trashing functionality. (It will appear in the 0.0.8 release.) The new
`GmailAccount` methods `trashMessage` and `trashThread` will move a
message or thread into the trash.

I intend to add functionality to permanently delete items & mark items
as spam in the future, but it's not there yet.

Hope this is useful.

--Phil.
 
F

Fazer

Hi Fazer,

Thanks for your feedback.

[Snip]
Is it possible to delete an email using this library?
I hope you don't mind me delaying the answer to this question until I
could answer in the affirmative. (Rather than promising vapourware...
:) )

I have just committed code into CVS to enable message and thread
trashing functionality. (It will appear in the 0.0.8 release.) The new
`GmailAccount` methods `trashMessage` and `trashThread` will move a
message or thread into the trash.

I intend to add functionality to permanently delete items & mark items
as spam in the future, but it's not there yet.

Hope this is useful.

--Phil.

Hey Phil,

I was wondering if you would also plan on writing some sort of
documentation/manual for your very nifty library? That would really
help others a lot. Do you need help with it? Maybe I can lend a hand
if you need it.

Thanks,

Fazer
 

Ask a Question

Want to reply to this thread or ask your own question?

You'll need to choose a username for the site, which only take a couple of moments. After that, you can post your question and our members will help you out.

Ask a Question

Members online

Forum statistics

Threads
473,982
Messages
2,570,190
Members
46,736
Latest member
zacharyharris

Latest Threads

Top