S
Simon Brooke
I was debugging a new XML generator tonight and trying to determine why
it wasn't working; and realised my dom printer does not output XML
namespace declarations.
My method to output an Element is as follows:
/**
* Print an element node, and, by recursive descent, it's children
*
* @param node the node to print
* @param out the stream to print it on
* @param url the base URL to use in expanding relative URLs
* @param level the indentation level if pretty printing
*/
protected void print( Element node, PrintStream out, URL url,
int level )
throws IOException
{
indent( out, level );
out.print( '<' );
String tagname = node.getNodeName( );
out.print( tagname );
NamedNodeMap attrs = node.getAttributes( );
NodeList children = node.getChildNodes( );
/**
* Get the attributes of the node and print their values.
*/
for ( int i = 0; i < attrs.getLength( ); i++ )
{
print( ( (Attr) attrs.item( i ) ), out, url, level + 1 );
}
if ( ( children != null ) && ( children.getLength( ) > 0 ) )
{ // it's a non-empty tag
out.print( '>' );
int len = children.getLength( );
for ( int i = 0; i < len; i++ )
{
print( children.item( i ), out, url, level + 1 );
}
/**
* Set the end tag.
*/
indent( out, level );
out.print( '<' );
out.print( '/' );
out.print( tagname );
}
else // it's an empty tag
{
out.print( " /" );
}
out.print( '>' );
}
Performing the exact same XSL transform, the Xerces printer emits:
<?xml version="1.0" encoding="UTF-8"?>
<rdf:RDF version="1.0"
xmlns:syn="http://purl.org/rss/1.0/modules/syndication/"
xmlns:geourl="http://geourl.org/rss/module/"
xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
<rss version="0.91">
...
whereas my printer emits:
<rdf:RDF version="1.0">
<rss version="0.91">
...
The relevant part of the XSL file reads:
<xsl:template match="category">
<rdf:RDF version="1.0"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#"
xmlns:geourl="http://geourl.org/rss/module/"
xmlns:syn="http://purl.org/rss/1.0/modules/syndication/">
<rss version="0.91">
...
Clearly what Xerces is emitting is right and what I am emitting is wrong,
but I'm having trouble seeing what I'm doing wrong. My method to output
an attribute node is as follows:
/**
* Print an attribute node. If url is not null, use it as a base URL
* for expanding URL values.
*
* @param node the node to print
* @param out the stream to print it on
* @param url the base URL to use in expanding relative URLs
* @param level the indentation level if pretty printing
*/
protected void print( Attr node, PrintStream out, URL url,
int level )
throws IOException
{
String delimiter = "\"";
String value = node.getNodeValue( );
if ( value != null )
{
/* As I understand it, you aren't allowed unvalued
* attributes in XML
*/
value = cleanString( value, true );
/* are attribute values allowed to contain *any*
* characters? */
if ( value.indexOf( delimiter ) > -1 )
/* if an attribute has double quotes in it's value, we'll use
* single quotes as the delimiter and vice versa. If it has
* both we're stuffed. */
{
delimiter = "'";
}
indent( out, level );
out.print( " " );
out.print( node.getNodeName( ) );
out.print( "=" );
out.print( delimiter );
/* If this is an attribute whose value
* should be a URL. */
if ( ( node.getNodeName( ).equalsIgnoreCase( "href" ) ||
node.getNodeName( ).equalsIgnoreCase( "link" ) ||
node.getNodeName( ).equalsIgnoreCase( "src" ) ) &&
( url != null ) )
{
/* Change the partial URL to a full URL. */
try
{
String fullURL = new URL( url, value ).toString( );
out.print( fullURL );
}
catch ( MalformedURLException m )
{
// log
m.printStackTrace();
}
}
else
{ /* If I've got a value, clean it and
* print it. */
out.print( value );
}
out.print( delimiter );
}
else
{
System.err.println( "Unvalued attribute: " +
node.getNodeName( ));
}
}
Neither the MalformedURLException nor the string 'Unvalued attribute'
ever appear in the log. From this it seems that neither
Node.getAttributes() nor Node.getChildNodes() return the namespace
declarations. Yet I can't see any other no-args get...() method in the
API. Reading through the Xerces XMLSerializer code makes is seem that
they are finding the namespace declarations among the attributes.
Can anyone see what I'm doing wrong? I appreciate it probably some basic
howler, but I just can't see it myself.
it wasn't working; and realised my dom printer does not output XML
namespace declarations.
My method to output an Element is as follows:
/**
* Print an element node, and, by recursive descent, it's children
*
* @param node the node to print
* @param out the stream to print it on
* @param url the base URL to use in expanding relative URLs
* @param level the indentation level if pretty printing
*/
protected void print( Element node, PrintStream out, URL url,
int level )
throws IOException
{
indent( out, level );
out.print( '<' );
String tagname = node.getNodeName( );
out.print( tagname );
NamedNodeMap attrs = node.getAttributes( );
NodeList children = node.getChildNodes( );
/**
* Get the attributes of the node and print their values.
*/
for ( int i = 0; i < attrs.getLength( ); i++ )
{
print( ( (Attr) attrs.item( i ) ), out, url, level + 1 );
}
if ( ( children != null ) && ( children.getLength( ) > 0 ) )
{ // it's a non-empty tag
out.print( '>' );
int len = children.getLength( );
for ( int i = 0; i < len; i++ )
{
print( children.item( i ), out, url, level + 1 );
}
/**
* Set the end tag.
*/
indent( out, level );
out.print( '<' );
out.print( '/' );
out.print( tagname );
}
else // it's an empty tag
{
out.print( " /" );
}
out.print( '>' );
}
Performing the exact same XSL transform, the Xerces printer emits:
<?xml version="1.0" encoding="UTF-8"?>
<rdf:RDF version="1.0"
xmlns:syn="http://purl.org/rss/1.0/modules/syndication/"
xmlns:geourl="http://geourl.org/rss/module/"
xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
<rss version="0.91">
...
whereas my printer emits:
<rdf:RDF version="1.0">
<rss version="0.91">
...
The relevant part of the XSL file reads:
<xsl:template match="category">
<rdf:RDF version="1.0"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#"
xmlns:geourl="http://geourl.org/rss/module/"
xmlns:syn="http://purl.org/rss/1.0/modules/syndication/">
<rss version="0.91">
...
Clearly what Xerces is emitting is right and what I am emitting is wrong,
but I'm having trouble seeing what I'm doing wrong. My method to output
an attribute node is as follows:
/**
* Print an attribute node. If url is not null, use it as a base URL
* for expanding URL values.
*
* @param node the node to print
* @param out the stream to print it on
* @param url the base URL to use in expanding relative URLs
* @param level the indentation level if pretty printing
*/
protected void print( Attr node, PrintStream out, URL url,
int level )
throws IOException
{
String delimiter = "\"";
String value = node.getNodeValue( );
if ( value != null )
{
/* As I understand it, you aren't allowed unvalued
* attributes in XML
*/
value = cleanString( value, true );
/* are attribute values allowed to contain *any*
* characters? */
if ( value.indexOf( delimiter ) > -1 )
/* if an attribute has double quotes in it's value, we'll use
* single quotes as the delimiter and vice versa. If it has
* both we're stuffed. */
{
delimiter = "'";
}
indent( out, level );
out.print( " " );
out.print( node.getNodeName( ) );
out.print( "=" );
out.print( delimiter );
/* If this is an attribute whose value
* should be a URL. */
if ( ( node.getNodeName( ).equalsIgnoreCase( "href" ) ||
node.getNodeName( ).equalsIgnoreCase( "link" ) ||
node.getNodeName( ).equalsIgnoreCase( "src" ) ) &&
( url != null ) )
{
/* Change the partial URL to a full URL. */
try
{
String fullURL = new URL( url, value ).toString( );
out.print( fullURL );
}
catch ( MalformedURLException m )
{
// log
m.printStackTrace();
}
}
else
{ /* If I've got a value, clean it and
* print it. */
out.print( value );
}
out.print( delimiter );
}
else
{
System.err.println( "Unvalued attribute: " +
node.getNodeName( ));
}
}
Neither the MalformedURLException nor the string 'Unvalued attribute'
ever appear in the log. From this it seems that neither
Node.getAttributes() nor Node.getChildNodes() return the namespace
declarations. Yet I can't see any other no-args get...() method in the
API. Reading through the Xerces XMLSerializer code makes is seem that
they are finding the namespace declarations among the attributes.
Can anyone see what I'm doing wrong? I appreciate it probably some basic
howler, but I just can't see it myself.