Jay said:
Yes i know, i did check out a couple but i could never understand it.
They were confusing for me and i wasnt hoping for a full typed
tutorial, just like some help with excactly wat im trying to do, not
the whole module... but watever, Thx alot for the feedbak.
Well I don't want to hold this up as an example of best practice (it was
a quick hack to get some book graphics for my web site), but this
example shows you how you can extract stuff from XML, in this case
returned from Amazon's web services module.
Sorry about any wrapping that mangles the code.
regards
Steve
#!/usr/bin/python
#
# getbooks.py: download book details from Amazon.com
#
# hwBuild: database-driven web content management system
# Copyright (C) 2005 Steve Holden - (e-mail address removed)
#
# This program is free software; you can redistribute it
# and/or modify it under the terms of the GNU General
# Public License as published by the Free Software
# Foundation; either version 2 of the License, or (at
# your option) any later version.
#
# This program is distributed in the hope that it will be
# useful, but WITHOUT ANY WARRANTY; without even the implied
# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
# PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public
# License along with this program; if not, write to the
# Free Software Foundation, Inc., 59 Temple Place, Suite 330,
# Boston, MA 02111-1307 USA
#
import urllib
import urlparse
import os
import re
from xml.parsers import expat
from config import Config
picindir = os.path.join(Config['datadir'], "pybooks")
for f in os.listdir(picindir):
os.unlink(os.path.join(picindir, f))
filpat = re.compile(r"\d+")
class myParser:
def __init__(self):
self.parser = expat.ParserCreate()
self.parser.StartElementHandler = self.start_element
self.parser.EndElementHandler = self.end_element
self.parser.CharacterDataHandler = self.character_data
self.processing = 0
self.count = 0
def parse(self, f):
self.parser.ParseFile(f)
return self.count
def start_element(self, name, attrs):
if name == "MediumImage":
self.processing = 1
self.imgname = ""
if self.processing == 1 and name == "URL":
self.processing = 2
def end_element(self, name):
if self.processing == 2 and name == "URL":
self.processing = 1
print "Getting:", self.imgname
scheme, loc, path, params, query, fragment =
urlparse.urlparse(self.imgname)
itemno = filpat.match(os.path.basename(path))
fnam = itemno.group()
u = urllib.urlopen(self.imgname)
img = u.read()
outfile = file(os.path.join(picindir, "%s.jpg" % fnam), "wb")
outfile.write(img)
outfile.close()
self.count += 1
if self.processing ==1 and name == "MediumImage":
self.processing = 0
def character_data(self, data):
if self.processing == 2:
self.imgname += data
def main(search=None):
print "Search:", search
count = 0
for pageNum in range(1,5):
f =
urllib.urlopen("
http://webservices.amazon.com/onca/...oup=Images&type=lite&Version=2004-11-10&f=xml"
% (urllib.quote(search or Config['book-search']), pageNum))
fnam = os.path.join(picindir, "bookdata.txt")
file(fnam, "w").write(f.read())
f = file(fnam, "r")
p = myParser()
n = p.parse(f)
if n == 0:
break
count += n
return count
if __name__ == "__main__":
import sys
search = None
if len(sys.argv) > 1:
search = sys.argv[1]
n = main(search)
print "Pictures found:", n