You are viewing a plain text version of this content. The canonical link for it is here.
Posted to j-users@xalan.apache.org by da...@egcrc.net on 2002/02/26 00:43:12 UTC

Differences between explicit and implicit parsing for docs with DTDs?

Hi,

I am having a problem with using a simple identity Transformer on a
document
which contains an externally defined DTD.

The document that I am trying to parse can be found at:

http://www.ncbi.nlm.nih.gov:80/entrez/query.fcgi?cmd=Save&db=PubMed&uid=11855986&dopt=XML

I am currently trying to get an InputStream from the URL via
getContent(),
and then print the content out to System.out using the default identity
Transformer.
However, when I try to do this I get an
	javax.xml.transform.TransformerException:
java.lang.NullPointerException
In the middle of the DTD which gets parsed.

If I explicitly parse the InputStream into a Document using a
DocumentBuilder,
however, and the print out he resulting Document, then the problem goes
away.
However, it looks like only the comments from the DTDs get printed.

I'm not sure what the difference beneath the hood is,
and any clarifications would be appreciated.

The code that I am using is pasted below.
The two cases can be called by executing:

	java GetURL dontParse "http://www.ncbi.nlm.nih.gov:80/"
"entrez/query.fcgi?cmd=Save&db=PubMed&uid=11855986&dopt=XML"

or:

	java GetURL parse "http://www.ncbi.nlm.nih.gov:80/"
"entrez/query.fcgi?cmd=Save&db=PubMed&uid=11855986&dopt=XML"

I am using xalan-j_2_3_1.

Thanks,
d

---GetURL.java

import java.io.InputStream;
import java.io.IOException;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.MalformedURLException;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.stream.StreamSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.dom.DOMResult;
import org.w3c.dom.Document;
import org.xml.sax.SAXException;

public class GetURL
{
    public static InputStream getContentStream(String urlString)
    {
        URL url = null;
        try {
            url = new URL(urlString);
        } catch (MalformedURLException e) {
            System.err.println(e);
            System.exit(1);
        }
        InputStream content = null;
        try {
            content = (InputStream)url.getContent();
        } catch (IOException e) {
            System.err.println(e);
            System.exit(1);
        }
        return content;
    }

    public static Document getDocumentFromStream(InputStream in)
    {
        initDocumentBuilder();
        Document doc = null;
        try {
            doc = documentBuilder.parse(in, systemRoot);
        } catch (SAXException e) {
            System.err.println(e);
        } catch (IOException e) {
            System.err.println(e);
        }
        if (doc == null) {
            System.out.println("ERROR: Parsing unsuccessful");
        }
        return doc;
    }

    public static void printParsedStream(InputStream in)
    {
        initTransformer();
        try {
            StreamSource source = new StreamSource(in);
            StreamResult result = new StreamResult(System.out);
            source.setSystemId(systemRoot);
            identityTransformer.transform(source, result);
        } catch (TransformerException e) {
            System.err.println(e);
        }
    }

    public static void printDOMSource(Document doc)
    {
        initTransformer();
        try {
            DOMSource source = new DOMSource(doc);
            StreamResult result = new StreamResult(System.out);
            source.setSystemId(systemRoot);
            identityTransformer.transform(source, result);
        } catch (TransformerException e) {
            System.err.println(e);
        }
    }

    public static void initDocumentBuilder() {
        if (documentBuilderFactory == null) {
            documentBuilderFactory =
DocumentBuilderFactory.newInstance();
        }
        if (documentBuilder == null) {
            try {
                documentBuilder =
documentBuilderFactory.newDocumentBuilder(); 
            } catch (ParserConfigurationException e) {
                System.err.println(e);
            }
        }
    }

    public static void initTransformer() {
        if (transformerFactory == null) {
            transformerFactory = TransformerFactory.newInstance();
        }
        if (identityTransformer == null) {
            try {
                identityTransformer =
transformerFactory.newTransformer();
            } catch (TransformerConfigurationException e) {
                System.err.println("initTransformer: "+e);
            }
        }
    }

    protected static TransformerFactory transformerFactory;
    protected static Transformer identityTransformer;
    protected static DocumentBuilderFactory documentBuilderFactory;
    protected static DocumentBuilder documentBuilder;

    public static void printStream(InputStream in)
    {
        BufferedReader reader = new BufferedReader(new
InputStreamReader(in));
        try {
            String line = reader.readLine();
            while (line != null) {
                System.out.println(line);
                line = reader.readLine();
            }
        } catch (IOException e) {
            System.err.println(e);
        }
    }

    public static void usage()
    {
        System.out.println("GetURL [parse|dontParse] <systemRoot>
<url>");
    }

    public static void main(String[] argv)
    {
        if (argv.length != 3) {
            usage();
            System.exit(1);
        }
        parse = argv[0];
        systemRoot = argv[1];
        urlString = argv[2];
        System.out.println("Getting content stream");
        InputStream stream = getContentStream(systemRoot+urlString);
        if (parse.equals("parse")) {
            // Parse explicitly
            System.out.println("Parsing stream to document");
            Document doc = getDocumentFromStream(stream);
            System.out.println("Printing document");
            printDOMSource(doc);
        } else {
            // Parse implicitly using StreamSource
            System.out.println("Printing document directly from
stream");
            printParsedStream(stream);
        }
    }

    public static String parse;
    public static String systemRoot;
    public static String urlString;
}