P
Phillip B Oldham
I'm keen on learning python, with a heavy lean on doing things the
"pythonic" way, so threw the following script together in a few hours
as a first-attempt in programming python.
I'd like the community's thoughts/comments on what I've done;
improvements I can make, "don'ts" I should be avoiding, etc. I'm not
so much bothered about the resulting data - for the moment it meets my
needs. But any comment is welcome!
#!/usr/bin/env python
## Open a file containing a list of domains (1 per line),
## request and parse it's whois record and push to a csv
## file.
import subprocess
import re
src = open('./domains.txt')
dest = open('./whois.csv', 'w');
sep = "|"
headers = ["Domain","Registrant","Registrant's
Address","Registrar","Registrant Type","Date Registered","Renewal
Date","Last Updated","Name Servers"]
dest.write(sep.join(headers)+"\n")
def trim( txt ):
x = []
for line in txt.split("\n"):
if line.strip() == "":
continue
if line.strip().startswith('WHOIS'):
continue
if line.strip().startswith('>>>'):
continue
if line.strip().startswith('%'):
continue
if line.startswith("--"):
return ''.join(x)
x.append(" "+line)
return "\n".join(x)
def clean( txt ):
x = []
isok = re.compile("^\s?([^:]+): ").match
for line in txt.split("\n"):
match = isok(line)
if not match:
continue
x.append(line)
return "\n".join(x);
def clean_co_uk( rec ):
rec = rec.replace('Company number:', 'Company number -')
rec = rec.replace("\n\n", "\n")
rec = rec.replace("\n", "")
rec = rec.replace(": ", ":\n")
rec = re.sub("([^(][a-zA-Z']+\s?[a-zA-Z]*:\n)", "\n\g<0>", rec)
rec = rec.replace(":\n", ": ")
rec = re.sub("^[ ]+\n", "", rec)
return rec
def clean_net( rec ):
rec = rec.replace("\n\n", "\n")
rec = rec.replace("\n", "")
rec = rec.replace(": ", ":\n")
rec = re.sub("([a-zA-Z']+\s?[a-zA-Z]*:\n)", "\n\g<0>", rec)
rec = rec.replace(":\n", ": ")
return rec
def clean_info( rec ):
x = []
for line in rec.split("\n"):
x.append(re.sub("^([^:]+):", "\g<0> ", line))
return "\n".join(x)
def record(domain, record):
details = ['','','','','','','','','']
for k, v in record.items():
try:
details[0] = domain.lower()
result = {
"registrant": lambda: 1,
"registrant name": lambda: 1,
"registrant type": lambda: 4,
"registrant's address": lambda: 2,
"registrant address1": lambda: 2,
"registrar": lambda: 3,
"sponsoring registrar": lambda: 3,
"registered on": lambda: 5,
"registered": lambda: 5,
"domain registeration date": lambda: 5,
"renewal date": lambda: 6,
"last updated": lambda: 7,
"domain last updated date": lambda: 7,
"name servers": lambda: 8,
"name server": lambda: 8,
"nameservers": lambda: 8,
"updated date": lambda: 7,
"creation date": lambda: 5,
"expiration date": lambda: 6,
"domain expiration date": lambda: 6,
"administrative contact": lambda: 2
}[k.lower()]()
if v != '':
details[result] = v
except:
continue
dest.write(sep.join(details)+"\n")
## Loop through domains
for domain in src:
domain = domain.strip()
if domain == '':
continue
rec = subprocess.Popen(["whois",domain],
stdout=subprocess.PIPE).communicate()[0]
if rec.startswith("No whois server") == True:
continue
if rec.startswith("This TLD has no whois server") == True:
continue
rec = trim(rec)
if domain.endswith(".net"):
rec = clean_net(rec)
if domain.endswith(".com"):
rec = clean_net(rec)
if domain.endswith(".tv"):
rec = clean_net(rec)
if domain.endswith(".co.uk"):
rec = clean_co_uk(rec)
if domain.endswith(".info"):
rec = clean_info(rec)
rec = clean(rec)
details = {}
try:
for line in rec.split("\n"):
bits = line.split(': ')
a = bits.pop(0)
b = bits.pop(0)
details[a.strip()] = b.strip().replace("\t", ", ")
except:
continue
record(domain, details)
## Cleanup
src.close()
dest.close()
"pythonic" way, so threw the following script together in a few hours
as a first-attempt in programming python.
I'd like the community's thoughts/comments on what I've done;
improvements I can make, "don'ts" I should be avoiding, etc. I'm not
so much bothered about the resulting data - for the moment it meets my
needs. But any comment is welcome!
#!/usr/bin/env python
## Open a file containing a list of domains (1 per line),
## request and parse it's whois record and push to a csv
## file.
import subprocess
import re
src = open('./domains.txt')
dest = open('./whois.csv', 'w');
sep = "|"
headers = ["Domain","Registrant","Registrant's
Address","Registrar","Registrant Type","Date Registered","Renewal
Date","Last Updated","Name Servers"]
dest.write(sep.join(headers)+"\n")
def trim( txt ):
x = []
for line in txt.split("\n"):
if line.strip() == "":
continue
if line.strip().startswith('WHOIS'):
continue
if line.strip().startswith('>>>'):
continue
if line.strip().startswith('%'):
continue
if line.startswith("--"):
return ''.join(x)
x.append(" "+line)
return "\n".join(x)
def clean( txt ):
x = []
isok = re.compile("^\s?([^:]+): ").match
for line in txt.split("\n"):
match = isok(line)
if not match:
continue
x.append(line)
return "\n".join(x);
def clean_co_uk( rec ):
rec = rec.replace('Company number:', 'Company number -')
rec = rec.replace("\n\n", "\n")
rec = rec.replace("\n", "")
rec = rec.replace(": ", ":\n")
rec = re.sub("([^(][a-zA-Z']+\s?[a-zA-Z]*:\n)", "\n\g<0>", rec)
rec = rec.replace(":\n", ": ")
rec = re.sub("^[ ]+\n", "", rec)
return rec
def clean_net( rec ):
rec = rec.replace("\n\n", "\n")
rec = rec.replace("\n", "")
rec = rec.replace(": ", ":\n")
rec = re.sub("([a-zA-Z']+\s?[a-zA-Z]*:\n)", "\n\g<0>", rec)
rec = rec.replace(":\n", ": ")
return rec
def clean_info( rec ):
x = []
for line in rec.split("\n"):
x.append(re.sub("^([^:]+):", "\g<0> ", line))
return "\n".join(x)
def record(domain, record):
details = ['','','','','','','','','']
for k, v in record.items():
try:
details[0] = domain.lower()
result = {
"registrant": lambda: 1,
"registrant name": lambda: 1,
"registrant type": lambda: 4,
"registrant's address": lambda: 2,
"registrant address1": lambda: 2,
"registrar": lambda: 3,
"sponsoring registrar": lambda: 3,
"registered on": lambda: 5,
"registered": lambda: 5,
"domain registeration date": lambda: 5,
"renewal date": lambda: 6,
"last updated": lambda: 7,
"domain last updated date": lambda: 7,
"name servers": lambda: 8,
"name server": lambda: 8,
"nameservers": lambda: 8,
"updated date": lambda: 7,
"creation date": lambda: 5,
"expiration date": lambda: 6,
"domain expiration date": lambda: 6,
"administrative contact": lambda: 2
}[k.lower()]()
if v != '':
details[result] = v
except:
continue
dest.write(sep.join(details)+"\n")
## Loop through domains
for domain in src:
domain = domain.strip()
if domain == '':
continue
rec = subprocess.Popen(["whois",domain],
stdout=subprocess.PIPE).communicate()[0]
if rec.startswith("No whois server") == True:
continue
if rec.startswith("This TLD has no whois server") == True:
continue
rec = trim(rec)
if domain.endswith(".net"):
rec = clean_net(rec)
if domain.endswith(".com"):
rec = clean_net(rec)
if domain.endswith(".tv"):
rec = clean_net(rec)
if domain.endswith(".co.uk"):
rec = clean_co_uk(rec)
if domain.endswith(".info"):
rec = clean_info(rec)
rec = clean(rec)
details = {}
try:
for line in rec.split("\n"):
bits = line.split(': ')
a = bits.pop(0)
b = bits.pop(0)
details[a.strip()] = b.strip().replace("\t", ", ")
except:
continue
record(domain, details)
## Cleanup
src.close()
dest.close()