Googlewhacker

W

Will McGugan

Hi folks,

Has anyone seen 'Googlewhack Adventure'?

http://www.davegorman.com/googlewhack.htm

I wrote a script to generate Googlewhacks - thought I'd share it with
you. I'd better stop running it as I fear Google may ban my IP for
making 20 searches a seconds..


Will McGugan


import random
import urllib2
import threading

WHACKER_THREADS = 20

random.seed()

wordlist = [ line.rstrip() for line in file("word.lst") ]
whacks = file( "whacks.txt", "a" )


class WhackerThread( threading.Thread ):

excluded = "/dict .lst word.lst .txt words".split()

def run(self):

def check_word( word ):
url = """http://dictionary.reference.com/search?q=%s""" % word
dict_page = urllib2.urlopen( url ).read()
return "Did You Mean" not in dict_page

def is_excluded(page):
for word in WhackerThread.excluded:
if word in page:
return True
return False

while( True ):
word_a = random.choice( wordlist )
#word_a = "haggis"
word_b = random.choice( wordlist )
words = word_a + " " + word_b

google_url = """http://www.google.com/search?hl=en&q=%s+%s&btnG=Google+Search""" % ( word_a, word_b )

opener = urllib2.build_opener()
opener.addheaders = [('User-agent', 'Mozilla/5.0')]
google_page = opener.open(google_url).read()

if is_excluded( google_page ):
print words + " (probably a word list)"
continue

if "Results <b>1</b> - <b>1</b> of <b>1</b>" in google_page:
if not check_word( word_a ):
print "%s (%s is not in dicionary.com)" % (words, word_a)
elif not check_word( word_b ):
print "%s (%s is not in dicionary.com)" % (words, word_b)
else:
print words + " WHACK!"
print >> whacks, words
whacks.flush()
else:
print words + "(no whack)"

Threads= [ WhackerThread() for _ in xrange(WHACKER_THREADS) ]
for whacker_thread in Threads:
whacker_thread.start()
 
W

Will McGugan

Will said:
Hi folks,

Has anyone seen 'Googlewhack Adventure'?

http://www.davegorman.com/googlewhack.htm

I wrote a script to generate Googlewhacks - thought I'd share it with
you. I'd better stop running it as I fear Google may ban my IP for
making 20 searches a seconds..

Oops, wrong script..

Will

import random
import urllib2
import threading

WHACKER_THREADS = 20

random.seed()

wordlist = [ line.rstrip() for line in file("word.lst") ]
whacks = file( "whacks.txt", "a" )


class WhackerThread( threading.Thread ):

excluded = "/dict .lst word.lst .txt words".split()

def run(self):

def check_word( word ):
url = """http://dictionary.reference.com/search?q=%s""" % word
dict_page = urllib2.urlopen( url ).read()
return "No entry found" not in dict_page

def is_excluded(page):
for word in WhackerThread.excluded:
if word in page:
return True
return False

while( True ):
word_a = random.choice( wordlist )
#word_a = "haggis"
word_b = random.choice( wordlist )
words = word_a + " " + word_b

google_url = """http://www.google.com/search?hl=en&q=%s+%s&btnG=Google+Search""" % ( word_a, word_b )

opener = urllib2.build_opener()
opener.addheaders = [('User-agent', 'Mozilla/5.0')]
google_page = opener.open(google_url).read()

if is_excluded( google_page ):
print words + " (probably a word list)"
continue

if "Results <b>1</b> - <b>1</b> of <b>1</b>" in google_page:
if not check_word( word_a ):
print "%s (%s is not in dicionary.com)" % (words, word_a)
elif not check_word( word_b ):
print "%s (%s is not in dicionary.com)" % (words, word_b)
else:
print words + " WHACK!"
print >> whacks, words
whacks.flush()
else:
print words + "(no whack)"

Threads= [ WhackerThread() for _ in xrange(WHACKER_THREADS) ]
for whacker_thread in Threads:
whacker_thread.start()
 
H

Harlin Seritt

They actually won't ban your IP for this. They only limit your number
of searches per day. I discovered this once when I used
http://www.google.com as a test metric for my network monitoring
program. I do like your script though.

Regards,

Harlin
 

Ask a Question

Want to reply to this thread or ask your own question?

You'll need to choose a username for the site, which only take a couple of moments. After that, you can post your question and our members will help you out.

Ask a Question

Members online

Forum statistics

Threads
474,221
Messages
2,571,134
Members
47,748
Latest member
LyleMondra

Latest Threads

Top