G
Gonsolo
I wrote a small script to compute the H-Index of an author.
It is modeled after activestate's google search:
http://code.activestate.com/recipes/523047/
Example use:
hindex i daubechies
Result:
49
The script:
#!/usr/bin/python
import httplib, urllib, re, sys
from BeautifulSoup import BeautifulSoup
terms = sys.argv[1:]
limit = 100
params = urllib.urlencode( { 'q': "+".join( terms ), 'num': limit } )
headers = {'User-Agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows
NT)'}
url = '/scholar'+"?"+params
conn = httplib.HTTPConnection( 'scholar.google.com' )
conn.request( "GET", url, {}, headers )
resp = conn.getresponse()
cites = []
if resp.status == 200:
html = resp.read()
html = html.decode( 'ascii', 'ignore' )
soup = BeautifulSoup( html )
for record in soup( 'p', { 'class': 'g' } ):
match = re.search("Cited by ([^<]*)", str(record))
if match != None:
cite = int( match.group( 1 ) )
cites.append( cite )
else:
print 'Error: '
print resp.status, resp.reason
cites.sort()
cites.reverse()
h = 0
for cite in cites:
if cite > h:
h += 1
print h
It is modeled after activestate's google search:
http://code.activestate.com/recipes/523047/
Example use:
hindex i daubechies
Result:
49
The script:
#!/usr/bin/python
import httplib, urllib, re, sys
from BeautifulSoup import BeautifulSoup
terms = sys.argv[1:]
limit = 100
params = urllib.urlencode( { 'q': "+".join( terms ), 'num': limit } )
headers = {'User-Agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows
NT)'}
url = '/scholar'+"?"+params
conn = httplib.HTTPConnection( 'scholar.google.com' )
conn.request( "GET", url, {}, headers )
resp = conn.getresponse()
cites = []
if resp.status == 200:
html = resp.read()
html = html.decode( 'ascii', 'ignore' )
soup = BeautifulSoup( html )
for record in soup( 'p', { 'class': 'g' } ):
match = re.search("Cited by ([^<]*)", str(record))
if match != None:
cite = int( match.group( 1 ) )
cites.append( cite )
else:
print 'Error: '
print resp.status, resp.reason
cites.sort()
cites.reverse()
h = 0
for cite in cites:
if cite > h:
h += 1
print h