You may want to try Expat (
www.libexpat.org) or Python wrapper to it.
You can feed small piece at a time, say by lines or whatever. Of
course, it all depends on what kind of parsing you have in mind.
Care to post more details?
The XML file I need to parse contains information about genes.
So the first element is a gene and then there are a lot sub-elements with
sub-elements. I only need some of the informtion and want to store it in
my an object called gene. Lateron this information will be printed into a
file, which in it's turn will be fed into some other program.
This is an example of the XML
<?xml version="1.0"?>
<!DOCTYPE Entrezgene-Set PUBLIC "-//NCBI//NCBI Entrezgene/EN" "NCBI_Entrezgene.dtd">
<Entrezgene-Set>
<Entrezgene>
<Entrezgene_track-info>
<Gene-track>
<Gene-track_geneid>9996</Gene-track_geneid>
<Gene-track_status value="secondary">1</Gene-track_status>
<Gene-track_current-id>
<Dbtag>
<Dbtag_db>LocusID</Dbtag_db>
<Dbtag_tag>
<Object-id>
<Object-id_id>320632</Object-id_id>
</Object-id>
</Dbtag_tag>
</Dbtag>
<Dbtag>
<Dbtag_db>GeneID</Dbtag_db>
<Dbtag_tag>
<Object-id>
<Object-id_id>320632</Object-id_id>
</Object-id>
</Dbtag_tag>
</Dbtag>
</Gene-track_current-id>
<Gene-track_create-date>
<Date>
<Date_std>
<Date-std>
<Date-std_year>2003</Date-std_year>
<Date-std_month>8</Date-std_month>
<Date-std_day>28</Date-std_day>
<Date-std_hour>21</Date-std_hour>
<Date-std_minute>39</Date-std_minute>
<Date-std_second>0</Date-std_second>
</Date-std>
</Date_std>
</Date>
</Gene-track_create-date>
<Gene-track_update-date>
<Date>
<Date_std>
<Date-std>
<Date-std_year>2005</Date-std_year>
<Date-std_month>2</Date-std_month>
<Date-std_day>17</Date-std_day>
<Date-std_hour>12</Date-std_hour>
<Date-std_minute>54</Date-std_minute>
<Date-std_second>0</Date-std_second>
</Date-std>
</Date_std>
</Date>
</Gene-track_update-date>
</Gene-track>
</Entrezgene_track-info>
<Entrezgene_type value="protein-coding">6</Entrezgene_type>
<Entrezgene_source>
<BioSource>
<BioSource_genome value="genomic">1</BioSource_genome>
<BioSource_origin value="natural">1</BioSource_origin>
<BioSource_org>
<Org-ref>
<Org-ref_taxname>Mus musculus</Org-ref_taxname>
<Org-ref_common>house mouse</Org-ref_common>
<Org-ref_db>
<Dbtag>
<Dbtag_db>taxon</Dbtag_db>
<Dbtag_tag>
<Object-id>
<Object-id_id>10090</Object-id_id>
</Object-id>
</Dbtag_tag>
</Dbtag>
</Org-ref_db>
<Org-ref_syn>
<Org-ref_syn_E>mouse</Org-ref_syn_E>
</Org-ref_syn>
<Org-ref_orgname>
<OrgName>
<OrgName_name>
<OrgName_name_binomial>
<BinomialOrgName>
<BinomialOrgName_genus>Mus</BinomialOrgName_genus>
<BinomialOrgName_species>musculus</BinomialOrgName_species>
</BinomialOrgName>
</OrgName_name_binomial>
</OrgName_name>
<OrgName_lineage>Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia; Sciurognathi; Muridae; Murinae; Mus</OrgName_lineage>
<OrgName_gcode>1</OrgName_gcode>
<OrgName_mgcode>2</OrgName_mgcode>
<OrgName_div>ROD</OrgName_div>
</OrgName>
</Org-ref_orgname>
</Org-ref>
</BioSource_org>
</BioSource>
</Entrezgene_source>
<Entrezgene_gene>
<Gene-ref>
</Gene-ref>
</Entrezgene_gene>
<Entrezgene_gene-source>
<Gene-source>
<Gene-source_src>LocusLink</Gene-source_src>
<Gene-source_src-int>9996</Gene-source_src-int>
<Gene-source_src-str2>9996</Gene-source_src-str2>
<Gene-source_gene-display value="false"/>
<Gene-source_locus-display value="false"/>
<Gene-source_extra-terms value="false"/>
</Gene-source>
</Entrezgene_gene-source>
<Entrezgene_locus>
<Gene-commentary>
<Gene-commentary_type value="genomic">1</Gene-commentary_type>
<Gene-commentary_version>0</Gene-commentary_version>
</Gene-commentary>
</Entrezgene_locus>
<Entrezgene_unique-keys>
<Dbtag>
<Dbtag_db>LocusID</Dbtag_db>
<Dbtag_tag>
<Object-id>
<Object-id_id>9996</Object-id_id>
</Object-id>
</Dbtag_tag>
</Dbtag>
</Entrezgene_unique-keys>
<Entrezgene_xtra-index-terms>
<Entrezgene_xtra-index-terms_E>LOC320632</Entrezgene_xtra-index-terms_E>
</Entrezgene_xtra-index-terms>
</Entrezgene>
</Entrezgene-Set>