W
Will McGugan
Hi folks,
Has anyone seen 'Googlewhack Adventure'?
http://www.davegorman.com/googlewhack.htm
I wrote a script to generate Googlewhacks - thought I'd share it with
you. I'd better stop running it as I fear Google may ban my IP for
making 20 searches a seconds..
Will McGugan
import random
import urllib2
import threading
WHACKER_THREADS = 20
random.seed()
wordlist = [ line.rstrip() for line in file("word.lst") ]
whacks = file( "whacks.txt", "a" )
class WhackerThread( threading.Thread ):
excluded = "/dict .lst word.lst .txt words".split()
def run(self):
def check_word( word ):
url = """http://dictionary.reference.com/search?q=%s""" % word
dict_page = urllib2.urlopen( url ).read()
return "Did You Mean" not in dict_page
def is_excluded(page):
for word in WhackerThread.excluded:
if word in page:
return True
return False
while( True ):
word_a = random.choice( wordlist )
#word_a = "haggis"
word_b = random.choice( wordlist )
words = word_a + " " + word_b
google_url = """http://www.google.com/search?hl=en&q=%s+%s&btnG=Google+Search""" % ( word_a, word_b )
opener = urllib2.build_opener()
opener.addheaders = [('User-agent', 'Mozilla/5.0')]
google_page = opener.open(google_url).read()
if is_excluded( google_page ):
print words + " (probably a word list)"
continue
if "Results <b>1</b> - <b>1</b> of <b>1</b>" in google_page:
if not check_word( word_a ):
print "%s (%s is not in dicionary.com)" % (words, word_a)
elif not check_word( word_b ):
print "%s (%s is not in dicionary.com)" % (words, word_b)
else:
print words + " WHACK!"
print >> whacks, words
whacks.flush()
else:
print words + "(no whack)"
Threads= [ WhackerThread() for _ in xrange(WHACKER_THREADS) ]
for whacker_thread in Threads:
whacker_thread.start()
Has anyone seen 'Googlewhack Adventure'?
http://www.davegorman.com/googlewhack.htm
I wrote a script to generate Googlewhacks - thought I'd share it with
you. I'd better stop running it as I fear Google may ban my IP for
making 20 searches a seconds..
Will McGugan
import random
import urllib2
import threading
WHACKER_THREADS = 20
random.seed()
wordlist = [ line.rstrip() for line in file("word.lst") ]
whacks = file( "whacks.txt", "a" )
class WhackerThread( threading.Thread ):
excluded = "/dict .lst word.lst .txt words".split()
def run(self):
def check_word( word ):
url = """http://dictionary.reference.com/search?q=%s""" % word
dict_page = urllib2.urlopen( url ).read()
return "Did You Mean" not in dict_page
def is_excluded(page):
for word in WhackerThread.excluded:
if word in page:
return True
return False
while( True ):
word_a = random.choice( wordlist )
#word_a = "haggis"
word_b = random.choice( wordlist )
words = word_a + " " + word_b
google_url = """http://www.google.com/search?hl=en&q=%s+%s&btnG=Google+Search""" % ( word_a, word_b )
opener = urllib2.build_opener()
opener.addheaders = [('User-agent', 'Mozilla/5.0')]
google_page = opener.open(google_url).read()
if is_excluded( google_page ):
print words + " (probably a word list)"
continue
if "Results <b>1</b> - <b>1</b> of <b>1</b>" in google_page:
if not check_word( word_a ):
print "%s (%s is not in dicionary.com)" % (words, word_a)
elif not check_word( word_b ):
print "%s (%s is not in dicionary.com)" % (words, word_b)
else:
print words + " WHACK!"
print >> whacks, words
whacks.flush()
else:
print words + "(no whack)"
Threads= [ WhackerThread() for _ in xrange(WHACKER_THREADS) ]
for whacker_thread in Threads:
whacker_thread.start()