M
mk
(duck)
542 comp.lang.python rtfm
467 comp.lang.python shut+up
263 comp.lang.perl rtfm
45 comp.lang.perl shut+up
Code:
import urllib2
import re
import time
def fillurlfmt(args):
urlfmt, ggroup, gkw = args
return {'group':ggroup, 'keyword':gkw, 'url': urlfmt % (gkw, ggroup)}
def consqurls(args):
ggroup, gkeywords = args
urlfmt =
'http://groups.google.com/groups/sea..._ugroup=%s&as_usubject=&as_uauthors=&safe=off'
qurls = map(fillurlfmt, [ (urlfmt, ggroup, gkw) for gkw in gkeywords ])
return qurls
def flatten_list(x):
res = []
for el in x:
if isinstance(el,list):
res.extend(flatten_list(el))
else:
res.append(el)
return res
def ggsearch(urldict):
opener = urllib2.build_opener()
opener.addheaders = [('User-agent', 'Mozilla/5.0 (Windows; U;
Windows NT 5.1; en-US; rv:1.8.1.20) Gecko/20081217 (CK-IBM)
Firefox/2.0.0.20')]
time.sleep(0.1)
urlf = opener.open(urldict['url'])
resdict = {'result': urlf.read()}
resdict.update(urldict)
urlf.close()
return resdict
def extrclosure(resregexp, groupno):
def extrres(resdict):
txtgr = resregexp.search(resdict['result'])
resdict['result']=txtgr.group(groupno)
return resdict
return extrres
def delcomma(x):
x['result'] = x['result'].replace(',','')
return x
if __name__ == "__main__":
gkeywords = ['rtfm', 'shut+up']
ggroups = ['comp.lang.python', 'comp.lang.perl']
params = [(ggroup, gkeywords) for ggroup in ggroups]
qurls = map(consqurls, params)
qurls = flatten_list(qurls)
gresults = map(ggsearch, qurls)
resre = re.compile('Results \<b\>1\</b\> - \<b\>.+?\</b\> of about
\<b\>(.+?)\</b\>')
gextrsearchresult = extrclosure(resre,1)
gresults = map(gextrsearchresult, gresults)
gresults = map(delcomma, gresults)
for el in gresults:
print el['result'], el['group'], el['keyword']
print
This was inspired by
http://mail.python.org/pipermail/python-list/2002-November/172466.html
Regards,
mk
542 comp.lang.python rtfm
467 comp.lang.python shut+up
263 comp.lang.perl rtfm
45 comp.lang.perl shut+up
Code:
import urllib2
import re
import time
def fillurlfmt(args):
urlfmt, ggroup, gkw = args
return {'group':ggroup, 'keyword':gkw, 'url': urlfmt % (gkw, ggroup)}
def consqurls(args):
ggroup, gkeywords = args
urlfmt =
'http://groups.google.com/groups/sea..._ugroup=%s&as_usubject=&as_uauthors=&safe=off'
qurls = map(fillurlfmt, [ (urlfmt, ggroup, gkw) for gkw in gkeywords ])
return qurls
def flatten_list(x):
res = []
for el in x:
if isinstance(el,list):
res.extend(flatten_list(el))
else:
res.append(el)
return res
def ggsearch(urldict):
opener = urllib2.build_opener()
opener.addheaders = [('User-agent', 'Mozilla/5.0 (Windows; U;
Windows NT 5.1; en-US; rv:1.8.1.20) Gecko/20081217 (CK-IBM)
Firefox/2.0.0.20')]
time.sleep(0.1)
urlf = opener.open(urldict['url'])
resdict = {'result': urlf.read()}
resdict.update(urldict)
urlf.close()
return resdict
def extrclosure(resregexp, groupno):
def extrres(resdict):
txtgr = resregexp.search(resdict['result'])
resdict['result']=txtgr.group(groupno)
return resdict
return extrres
def delcomma(x):
x['result'] = x['result'].replace(',','')
return x
if __name__ == "__main__":
gkeywords = ['rtfm', 'shut+up']
ggroups = ['comp.lang.python', 'comp.lang.perl']
params = [(ggroup, gkeywords) for ggroup in ggroups]
qurls = map(consqurls, params)
qurls = flatten_list(qurls)
gresults = map(ggsearch, qurls)
resre = re.compile('Results \<b\>1\</b\> - \<b\>.+?\</b\> of about
\<b\>(.+?)\</b\>')
gextrsearchresult = extrclosure(resre,1)
gresults = map(gextrsearchresult, gresults)
gresults = map(delcomma, gresults)
for el in gresults:
print el['result'], el['group'], el['keyword']
This was inspired by
http://mail.python.org/pipermail/python-list/2002-November/172466.html
Regards,
mk