You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@abdera.apache.org by jm...@apache.org on 2007/10/23 18:28:58 UTC
svn commit: r587550 [5/6] - in
/incubator/abdera/java/trunk/extensions/json/src/main: java/nu/
java/nu/validator/ java/nu/validator/htmlparser/
java/nu/validator/htmlparser/common/ java/nu/validator/htmlparser/impl/
java/nu/validator/htmlparser/sax/ ja...
Added: incubator/abdera/java/trunk/extensions/json/src/main/java/nu/validator/htmlparser/impl/XmlLangAttributesImpl.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/extensions/json/src/main/java/nu/validator/htmlparser/impl/XmlLangAttributesImpl.java?rev=587550&view=auto
==============================================================================
--- incubator/abdera/java/trunk/extensions/json/src/main/java/nu/validator/htmlparser/impl/XmlLangAttributesImpl.java (added)
+++ incubator/abdera/java/trunk/extensions/json/src/main/java/nu/validator/htmlparser/impl/XmlLangAttributesImpl.java Tue Oct 23 09:28:51 2007
@@ -0,0 +1,57 @@
+package nu.validator.htmlparser.impl;
+
+
+public class XmlLangAttributesImpl extends AttributesImpl {
+
+ /**
+ * @see nu.validator.htmlparser.impl.AttributesImpl#getIndex(java.lang.String, java.lang.String)
+ */
+ @Override
+ public int getIndex(String uri, String localName) {
+ if (("".equals(uri) && !"lang".equals(localName)) || ("http://www.w3.org/XML/1998/namespace".equals(uri) && "lang".equals(localName))) {
+ return getIndex(localName);
+ } else {
+ return -1;
+ }
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.AttributesImpl#getURI(int)
+ */
+ @Override
+ public String getURI(int index) {
+ String localName = getQName(index);
+ if (localName == null) {
+ return null;
+ } else if ("lang".equals(localName)) {
+ return "http://www.w3.org/XML/1998/namespace";
+ } else {
+ return "";
+ }
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.AttributesImpl#getValue(java.lang.String, java.lang.String)
+ */
+ @Override
+ public String getValue(String uri, String localName) {
+ if (("".equals(uri) && !"lang".equals(localName)) || ("http://www.w3.org/XML/1998/namespace".equals(uri) && "lang".equals(localName))) {
+ return getValue(localName);
+ } else {
+ return null;
+ }
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.AttributesImpl#getType(java.lang.String, java.lang.String)
+ */
+ @Override
+ public String getType(String uri, String localName) {
+ if (("".equals(uri) && !"lang".equals(localName)) || ("http://www.w3.org/XML/1998/namespace".equals(uri) && "lang".equals(localName))) {
+ return getType(localName);
+ } else {
+ return null;
+ }
+ }
+
+}
Added: incubator/abdera/java/trunk/extensions/json/src/main/java/nu/validator/htmlparser/impl/package.html
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/extensions/json/src/main/java/nu/validator/htmlparser/impl/package.html?rev=587550&view=auto
==============================================================================
--- incubator/abdera/java/trunk/extensions/json/src/main/java/nu/validator/htmlparser/impl/package.html (added)
+++ incubator/abdera/java/trunk/extensions/json/src/main/java/nu/validator/htmlparser/impl/package.html Tue Oct 23 09:28:51 2007
@@ -0,0 +1,30 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
+<html>
+<head><title>Package Overview</title>
+<!--
+ Copyright (c) 2007 Henri Sivonen
+
+ Permission is hereby granted, free of charge, to any person obtaining a
+ copy of this software and associated documentation files (the "Software"),
+ to deal in the Software without restriction, including without limitation
+ the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ and/or sell copies of the Software, and to permit persons to whom the
+ Software is furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be included in
+ all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+-->
+</head>
+<body bgcolor="white">
+<p>This package contains the bulk of parser internals. Only implementors of
+additional tree builders or token handlers should look here.</p>
+</body>
+</html>
\ No newline at end of file
Added: incubator/abdera/java/trunk/extensions/json/src/main/java/nu/validator/htmlparser/sax/HtmlParser.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/extensions/json/src/main/java/nu/validator/htmlparser/sax/HtmlParser.java?rev=587550&view=auto
==============================================================================
--- incubator/abdera/java/trunk/extensions/json/src/main/java/nu/validator/htmlparser/sax/HtmlParser.java (added)
+++ incubator/abdera/java/trunk/extensions/json/src/main/java/nu/validator/htmlparser/sax/HtmlParser.java Tue Oct 23 09:28:51 2007
@@ -0,0 +1,969 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ * Copyright (c) 2007 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.sax;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+
+import nu.validator.htmlparser.common.DoctypeExpectation;
+import nu.validator.htmlparser.common.DocumentModeHandler;
+import nu.validator.htmlparser.common.XmlViolationPolicy;
+import nu.validator.htmlparser.impl.Tokenizer;
+import nu.validator.htmlparser.impl.TreeBuilder;
+import nu.validator.saxtree.Document;
+import nu.validator.saxtree.DocumentFragment;
+import nu.validator.saxtree.TreeParser;
+
+import org.xml.sax.ContentHandler;
+import org.xml.sax.DTDHandler;
+import org.xml.sax.EntityResolver;
+import org.xml.sax.ErrorHandler;
+import org.xml.sax.InputSource;
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+import org.xml.sax.SAXNotRecognizedException;
+import org.xml.sax.SAXNotSupportedException;
+import org.xml.sax.XMLReader;
+import org.xml.sax.ext.LexicalHandler;
+import org.xml.sax.helpers.DefaultHandler;
+
+/**
+ * This class implements an HTML5 parser that exposes data through the SAX2
+ * interface.
+ *
+ * <p>By default, when using the constructor without arguments, the
+ * this parser treats XML 1.0-incompatible infosets as fatal errors in
+ * order to adhere to the SAX2 API contract strictly. This corresponds to
+ * <code>FATAL</code> as the general XML violation policy. To make the parser
+ * support non-conforming HTML fully per the HTML 5 spec while on the other
+ * hand potentially violating the SAX2 API contract, set the general XML
+ * violation policy to <code>ALLOW</code>. Handling all input without fatal
+ * errors and without violating the SAX2 API contract is possible by setting
+ * the general XML violation policy to <code>ALTER_INFOSET</code>. <em>This
+ * makes the parser non-conforming</em> but is probably the most useful
+ * setting for most applications.
+ *
+ * <p>By default, this parser doesn't do true streaming but buffers everything
+ * first. The parser can be made truly streaming by calling
+ * <code>setStreamabilityViolationPolicy(XmlViolationPolicy.FATAL)</code>. This
+ * has the consequence that errors that require non-streamable recovery are
+ * treated as fatal.
+ *
+ * <p>By default, in order to make the parse events emulate the parse events
+ * for a DTDless XML document, the parser does not report the doctype through
+ * <code>LexicalHandler</code>. Doctype reporting through
+ * <code>LexicalHandler</code> can be turned on by calling
+ * <code>setReportingDoctype(true)</code>.
+ *
+ * @version $Id: HtmlParser.java 153 2007-09-11 07:41:33Z hsivonen $
+ * @author hsivonen
+ */
+public class HtmlParser implements XMLReader {
+
+ private Tokenizer tokenizer = null;
+
+ private TreeBuilder<?> treeBuilder = null;
+
+ private SAXStreamer saxStreamer = null; // work around javac bug
+
+ private SAXTreeBuilder saxTreeBuilder = null; // work around javac bug
+
+ private ContentHandler contentHandler = null;
+
+ private LexicalHandler lexicalHandler = null;
+
+ private DTDHandler dtdHandler = null;
+
+ private EntityResolver entityResolver = null;
+
+ private ErrorHandler errorHandler = null;
+
+ private DocumentModeHandler documentModeHandler = null;
+
+ private DoctypeExpectation doctypeExpectation = DoctypeExpectation.HTML;
+
+ private boolean checkingNormalization = false;
+
+ private boolean scriptingEnabled = false;
+
+ private XmlViolationPolicy contentSpacePolicy = XmlViolationPolicy.FATAL;
+
+ private XmlViolationPolicy contentNonXmlCharPolicy = XmlViolationPolicy.FATAL;
+
+ private XmlViolationPolicy commentPolicy = XmlViolationPolicy.FATAL;
+
+ private XmlViolationPolicy namePolicy = XmlViolationPolicy.FATAL;
+
+ private XmlViolationPolicy streamabilityViolationPolicy = XmlViolationPolicy.ALLOW;
+
+ private boolean html4ModeCompatibleWithXhtml1Schemata;
+
+ private boolean mappingLangToXmlLang;
+
+ private XmlViolationPolicy xmlnsPolicy;
+
+ private XmlViolationPolicy bogusXmlnsPolicy;
+
+ private boolean reportingDoctype = true;
+
+ /**
+ * Instantiates the parser with a fatal XML violation policy.
+ *
+ */
+ public HtmlParser() {
+ this(XmlViolationPolicy.FATAL);
+ }
+
+ /**
+ * Instantiates the parser with a specific XML violation policy.
+ * @param xmlPolicy the policy
+ */
+ public HtmlParser(XmlViolationPolicy xmlPolicy) {
+ setXmlPolicy(xmlPolicy);
+ }
+
+ /**
+ * This class wraps differnt tree builders depending on configuration. This
+ * method does the work of hiding this from the user of the class.
+ */
+ private void lazyInit() {
+ if (tokenizer == null) {
+ if (streamabilityViolationPolicy == XmlViolationPolicy.ALLOW) {
+ this.saxTreeBuilder = new SAXTreeBuilder();
+ this.treeBuilder = this.saxTreeBuilder;
+ this.saxStreamer = null;
+ } else {
+ this.saxStreamer = new SAXStreamer();
+ this.treeBuilder = this.saxStreamer;
+ this.saxTreeBuilder = null;
+ }
+ this.tokenizer = new Tokenizer(treeBuilder);
+ this.tokenizer.setErrorHandler(errorHandler);
+ this.treeBuilder.setErrorHandler(errorHandler);
+ this.tokenizer.setCheckingNormalization(checkingNormalization);
+ this.tokenizer.setCommentPolicy(commentPolicy);
+ this.tokenizer.setContentNonXmlCharPolicy(contentNonXmlCharPolicy);
+ this.tokenizer.setContentSpacePolicy(contentSpacePolicy);
+ this.tokenizer.setHtml4ModeCompatibleWithXhtml1Schemata(html4ModeCompatibleWithXhtml1Schemata);
+ this.tokenizer.setMappingLangToXmlLang(mappingLangToXmlLang);
+ this.tokenizer.setXmlnsPolicy(xmlnsPolicy);
+ this.treeBuilder.setDoctypeExpectation(doctypeExpectation);
+ this.treeBuilder.setDocumentModeHandler(documentModeHandler);
+ this.treeBuilder.setIgnoringComments(lexicalHandler == null);
+ this.treeBuilder.setScriptingEnabled(scriptingEnabled);
+ this.treeBuilder.setReportingDoctype(reportingDoctype);
+ if (saxStreamer != null) {
+ saxStreamer.setContentHandler(contentHandler == null ? new DefaultHandler()
+ : contentHandler);
+ saxStreamer.setLexicalHandler(lexicalHandler);
+ }
+ }
+ }
+
+ /**
+ * @see org.xml.sax.XMLReader#getContentHandler()
+ */
+ public ContentHandler getContentHandler() {
+ return contentHandler;
+ }
+
+ /**
+ * @see org.xml.sax.XMLReader#getDTDHandler()
+ */
+ public DTDHandler getDTDHandler() {
+ return dtdHandler;
+ }
+
+ /**
+ * @see org.xml.sax.XMLReader#getEntityResolver()
+ */
+ public EntityResolver getEntityResolver() {
+ return entityResolver;
+ }
+
+ /**
+ * @see org.xml.sax.XMLReader#getErrorHandler()
+ */
+ public ErrorHandler getErrorHandler() {
+ return errorHandler;
+ }
+
+ /**
+ * Exposes the configuration of the emulated XML parser as well as
+ * boolean-valued configuration without using non-<code>XMLReader</code>
+ * getters directly.
+ *
+ * <dl>
+ * <dt><code>http://xml.org/sax/features/external-general-entities</code></dt>
+ * <dd><code>false</code></dd>
+ * <dt><code>http://xml.org/sax/features/external-parameter-entities</code></dt>
+ * <dd><code>false</code></dd>
+ * <dt><code>http://xml.org/sax/features/is-standalone</code></dt>
+ * <dd><code>true</code></dd>
+ * <dt><code>http://xml.org/sax/features/lexical-handler/parameter-entities</code></dt>
+ * <dd><code>false</code></dd>
+ * <dt><code>http://xml.org/sax/features/namespaces</code></dt>
+ * <dd><code>true</code></dd>
+ * <dt><code>http://xml.org/sax/features/namespace-prefixes</code></dt>
+ * <dd><code>false</code></dd>
+ * <dt><code>http://xml.org/sax/features/resolve-dtd-uris</code></dt>
+ * <dd><code>true</code></dd>
+ * <dt><code>http://xml.org/sax/features/string-interning</code></dt>
+ * <dd><code>false</code></dd>
+ * <dt><code>http://xml.org/sax/features/unicode-normalization-checking</code></dt>
+ * <dd><code>isCheckingNormalization</code></dd>
+ * <dt><code>http://xml.org/sax/features/use-attributes2</code></dt>
+ * <dd><code>false</code></dd>
+ * <dt><code>http://xml.org/sax/features/use-locator2</code></dt>
+ * <dd><code>false</code></dd>
+ * <dt><code>http://xml.org/sax/features/use-entity-resolver2</code></dt>
+ * <dd><code>false</code></dd>
+ * <dt><code>http://xml.org/sax/features/validation</code></dt>
+ * <dd><code>false</code></dd>
+ * <dt><code>http://xml.org/sax/features/xmlns-uris</code></dt>
+ * <dd><code>false</code></dd>
+ * <dt><code>http://xml.org/sax/features/xml-1.1</code></dt>
+ * <dd><code>false</code></dd>
+ * <dt><code>http://validator.nu/features/html4-mode-compatible-with-xhtml1-schemata</code></dt>
+ * <dd><code>isHtml4ModeCompatibleWithXhtml1Schemata</code></dd>
+ * <dt><code>http://validator.nu/features/mapping-lang-to-xml-lang</code></dt>
+ * <dd><code>isMappingLangToXmlLang</code></dd>
+ * <dt><code>http://validator.nu/features/scripting-enabled</code></dt>
+ * <dd><code>isScriptingEnabled</code></dd>
+ * </dl>
+ *
+ * @param name
+ * feature URI string
+ * @return a value per the list above
+ * @see org.xml.sax.XMLReader#getFeature(java.lang.String)
+ */
+ public boolean getFeature(String name) throws SAXNotRecognizedException,
+ SAXNotSupportedException {
+ if ("http://xml.org/sax/features/external-general-entities".equals(name)) {
+ return false;
+ } else if ("http://xml.org/sax/features/external-parameter-entities".equals(name)) {
+ return false;
+ } else if ("http://xml.org/sax/features/is-standalone".equals(name)) {
+ return true;
+ } else if ("http://xml.org/sax/features/lexical-handler/parameter-entities".equals(name)) {
+ return false;
+ } else if ("http://xml.org/sax/features/namespaces".equals(name)) {
+ return true;
+ } else if ("http://xml.org/sax/features/namespace-prefixes".equals(name)) {
+ return false;
+ } else if ("http://xml.org/sax/features/resolve-dtd-uris".equals(name)) {
+ return true; // default value--applicable scenario never happens
+ } else if ("http://xml.org/sax/features/string-interning".equals(name)) {
+ return false; // XXX revisit
+ } else if ("http://xml.org/sax/features/unicode-normalization-checking".equals(name)) {
+ return isCheckingNormalization(); // the checks aren't really per
+ // XML 1.1
+ } else if ("http://xml.org/sax/features/use-attributes2".equals(name)) {
+ return false;
+ } else if ("http://xml.org/sax/features/use-locator2".equals(name)) {
+ return false;
+ } else if ("http://xml.org/sax/features/use-entity-resolver2".equals(name)) {
+ return false;
+ } else if ("http://xml.org/sax/features/validation".equals(name)) {
+ return false;
+ } else if ("http://xml.org/sax/features/xmlns-uris".equals(name)) {
+ return false;
+ } else if ("http://xml.org/sax/features/xml-1.1".equals(name)) {
+ return false;
+ } else if ("http://validator.nu/features/html4-mode-compatible-with-xhtml1-schemata".equals(name)) {
+ return isHtml4ModeCompatibleWithXhtml1Schemata();
+ } else if ("http://validator.nu/features/mapping-lang-to-xml-lang".equals(name)) {
+ return isMappingLangToXmlLang();
+ } else if ("http://validator.nu/features/scripting-enabled".equals(name)) {
+ return isScriptingEnabled();
+ } else {
+ throw new SAXNotRecognizedException();
+ }
+ }
+
+ /**
+ * Allows <code>XMLReader</code>-level access to non-boolean valued
+ * getters.
+ *
+ * <p>
+ * The properties are mapped as follows:
+ *
+ * <dl>
+ * <dt><code>http://xml.org/sax/properties/document-xml-version</code></dt>
+ * <dd><code>"1.0"</code></dd>
+ * <dt><code>http://xml.org/sax/properties/lexical-handler</code></dt>
+ * <dd><code>getLexicalHandler</code></dd>
+ * <dt><code>http://validator.nu/properties/content-space-policy</code></dt>
+ * <dd><code>getContentSpacePolicy</code></dd>
+ * <dt><code>http://validator.nu/properties/content-non-xml-char-policy</code></dt>
+ * <dd><code>getContentNonXmlCharPolicy</code></dd>
+ * <dt><code>http://validator.nu/properties/comment-policy</code></dt>
+ * <dd><code>getCommentPolicy</code></dd>
+ * <dt><code>http://validator.nu/properties/xmlns-policy</code></dt>
+ * <dd><code>getXmlnsPolicy</code></dd>
+ * <dt><code>http://validator.nu/properties/name-policy</code></dt>
+ * <dd><code>getNamePolicy</code></dd>
+ * <dt><code>http://validator.nu/properties/streamability-violation-policy</code></dt>
+ * <dd><code>getStreamabilityViolationPolicy</code></dd>
+ * <dt><code>http://validator.nu/properties/document-mode-handler</code></dt>
+ * <dd><code>getDocumentModeHandler</code></dd>
+ * <dt><code>http://validator.nu/properties/doctype-expectation</code></dt>
+ * <dd><code>getDoctypeExpectation</code></dd>
+ * <dt><code>http://xml.org/sax/features/unicode-normalization-checking</code></dt>
+ * </dl>
+ *
+ * @param name
+ * property URI string
+ * @return a value per the list above
+ * @see org.xml.sax.XMLReader#getProperty(java.lang.String)
+ */
+ public Object getProperty(String name) throws SAXNotRecognizedException,
+ SAXNotSupportedException {
+ if ("http://xml.org/sax/properties/declaration-handler".equals(name)) {
+ throw new SAXNotSupportedException(
+ "This parser does not suppert DeclHandler.");
+ } else if ("http://xml.org/sax/properties/document-xml-version".equals(name)) {
+ return "1.0"; // Emulating an XML 1.1 parser is not supported.
+ } else if ("http://xml.org/sax/properties/dom-node".equals(name)) {
+ throw new SAXNotSupportedException(
+ "This parser does not walk the DOM.");
+ } else if ("http://xml.org/sax/properties/lexical-handler".equals(name)) {
+ return getLexicalHandler();
+ } else if ("http://xml.org/sax/properties/xml-string".equals(name)) {
+ throw new SAXNotSupportedException(
+ "This parser does not expose the source as a string.");
+ } else if ("http://validator.nu/properties/content-space-policy".equals(name)) {
+ return getContentSpacePolicy();
+ } else if ("http://validator.nu/properties/content-non-xml-char-policy".equals(name)) {
+ return getContentNonXmlCharPolicy();
+ } else if ("http://validator.nu/properties/comment-policy".equals(name)) {
+ return getCommentPolicy();
+ } else if ("http://validator.nu/properties/xmlns-policy".equals(name)) {
+ return getXmlnsPolicy();
+ } else if ("http://validator.nu/properties/name-policy".equals(name)) {
+ return getNamePolicy();
+ } else if ("http://validator.nu/properties/streamability-violation-policy".equals(name)) {
+ return getStreamabilityViolationPolicy();
+ } else if ("http://validator.nu/properties/document-mode-handler".equals(name)) {
+ return getDocumentModeHandler();
+ } else if ("http://validator.nu/properties/doctype-expectation".equals(name)) {
+ return getDoctypeExpectation();
+ } else if ("http://validator.nu/properties/xml-policy".equals(name)) {
+ throw new SAXNotSupportedException(
+ "Cannot get a convenience setter.");
+ } else {
+ throw new SAXNotRecognizedException();
+ }
+ }
+
+ /**
+ * @see org.xml.sax.XMLReader#parse(org.xml.sax.InputSource)
+ */
+ public void parse(InputSource input) throws IOException, SAXException {
+ lazyInit();
+ try {
+ treeBuilder.setFragmentContext(null);
+ tokenize(input);
+ } finally {
+ if (saxTreeBuilder != null) {
+ Document document = saxTreeBuilder.getDocument();
+ if (document != null) {
+ new TreeParser(contentHandler, lexicalHandler).parse(document);
+ }
+ }
+ }
+ }
+
+ /**
+ * Parser a fragment.
+ *
+ * @param input the input to parse
+ * @param context the name of the context element
+ * @throws IOException
+ * @throws SAXException
+ */
+ public void parseFragment(InputSource input, String context)
+ throws IOException, SAXException {
+ lazyInit();
+ try {
+ treeBuilder.setFragmentContext(context);
+ tokenize(input);
+ } finally {
+ if (saxTreeBuilder != null) {
+ DocumentFragment fragment = saxTreeBuilder.getDocumentFragment();
+ new TreeParser(contentHandler, lexicalHandler).parse(fragment);
+ }
+ }
+ }
+
+ /**
+ * @param is
+ * @throws SAXException
+ * @throws IOException
+ * @throws MalformedURLException
+ */
+ private void tokenize(InputSource is) throws SAXException, IOException, MalformedURLException {
+ if (is == null) {
+ throw new IllegalArgumentException("Null input.");
+ }
+ if (is.getByteStream() == null && is.getCharacterStream() == null) {
+ String systemId = is.getSystemId();
+ if (systemId == null) {
+ throw new IllegalArgumentException("No byte stream, no character stream nor URI.");
+ }
+ if (entityResolver != null) {
+ is = entityResolver.resolveEntity(is.getPublicId(), systemId);
+ }
+ if (is.getByteStream() == null || is.getCharacterStream() == null) {
+ is = new InputSource();
+ is.setSystemId(systemId);
+ is.setByteStream(new URL(systemId).openStream());
+ }
+ }
+ tokenizer.tokenize(is);
+ }
+
+ /**
+ * @see org.xml.sax.XMLReader#parse(java.lang.String)
+ */
+ public void parse(String systemId) throws IOException, SAXException {
+ parse(new InputSource(systemId));
+ }
+
+ /**
+ * @see org.xml.sax.XMLReader#setContentHandler(org.xml.sax.ContentHandler)
+ */
+ public void setContentHandler(ContentHandler handler) {
+ contentHandler = handler;
+ if (saxStreamer != null) {
+ saxStreamer.setContentHandler(contentHandler == null ? new DefaultHandler()
+ : contentHandler);
+ }
+ }
+
+ /**
+ * Sets the lexical handler.
+ * @param handler the hander.
+ */
+ public void setLexicalHandler(LexicalHandler handler) {
+ lexicalHandler = handler;
+ if (treeBuilder != null) {
+ treeBuilder.setIgnoringComments(handler == null);
+ if (saxStreamer != null) {
+ saxStreamer.setLexicalHandler(handler);
+ }
+ }
+ }
+
+ /**
+ * @see org.xml.sax.XMLReader#setDTDHandler(org.xml.sax.DTDHandler)
+ */
+ public void setDTDHandler(DTDHandler handler) {
+ dtdHandler = handler;
+ }
+
+ /**
+ * @see org.xml.sax.XMLReader#setEntityResolver(org.xml.sax.EntityResolver)
+ */
+ public void setEntityResolver(EntityResolver resolver) {
+ entityResolver = resolver;
+ }
+
+ /**
+ * @see org.xml.sax.XMLReader#setErrorHandler(org.xml.sax.ErrorHandler)
+ */
+ public void setErrorHandler(ErrorHandler handler) {
+ errorHandler = handler;
+ if (tokenizer != null) {
+ tokenizer.setErrorHandler(handler);
+ treeBuilder.setErrorHandler(handler);
+ }
+ }
+
+ /**
+ * Sets a boolean feature without having to use non-<code>XMLReader</code>
+ * setters directly.
+ *
+ * <p>
+ * The supported features are:
+ *
+ * <dl>
+ * <dt><code>http://xml.org/sax/features/unicode-normalization-checking</code></dt>
+ * <dd><code>setCheckingNormalization</code></dd>
+ * <dt><code>http://validator.nu/features/html4-mode-compatible-with-xhtml1-schemata</code></dt>
+ * <dd><code>setHtml4ModeCompatibleWithXhtml1Schemata</code></dd>
+ * <dt><code>http://validator.nu/features/mapping-lang-to-xml-lang</code></dt>
+ * <dd><code>setMappingLangToXmlLang</code></dd>
+ * <dt><code>http://validator.nu/features/scripting-enabled</code></dt>
+ * <dd><code>setScriptingEnabled</code></dd>
+ * </dl>
+ *
+ * @see org.xml.sax.XMLReader#setFeature(java.lang.String, boolean)
+ */
+ public void setFeature(String name, boolean value)
+ throws SAXNotRecognizedException, SAXNotSupportedException {
+ if ("http://xml.org/sax/features/external-general-entities".equals(name)) {
+ throw new SAXNotSupportedException("Cannot set " + name + ".");
+ } else if ("http://xml.org/sax/features/external-parameter-entities".equals(name)) {
+ throw new SAXNotSupportedException("Cannot set " + name + ".");
+ } else if ("http://xml.org/sax/features/is-standalone".equals(name)) {
+ throw new SAXNotSupportedException("Cannot set " + name + ".");
+ } else if ("http://xml.org/sax/features/lexical-handler/parameter-entities".equals(name)) {
+ throw new SAXNotSupportedException("Cannot set " + name + ".");
+ } else if ("http://xml.org/sax/features/namespaces".equals(name)) {
+ throw new SAXNotSupportedException("Cannot set " + name + ".");
+ } else if ("http://xml.org/sax/features/namespace-prefixes".equals(name)) {
+ throw new SAXNotSupportedException("Cannot set " + name + ".");
+ } else if ("http://xml.org/sax/features/resolve-dtd-uris".equals(name)) {
+ throw new SAXNotSupportedException("Cannot set " + name + ".");
+ } else if ("http://xml.org/sax/features/string-interning".equals(name)) {
+ throw new SAXNotSupportedException("Cannot set " + name + ".");
+ } else if ("http://xml.org/sax/features/unicode-normalization-checking".equals(name)) {
+ setCheckingNormalization(value);
+ } else if ("http://xml.org/sax/features/use-attributes2".equals(name)) {
+ throw new SAXNotSupportedException("Cannot set " + name + ".");
+ } else if ("http://xml.org/sax/features/use-locator2".equals(name)) {
+ throw new SAXNotSupportedException("Cannot set " + name + ".");
+ } else if ("http://xml.org/sax/features/use-entity-resolver2".equals(name)) {
+ throw new SAXNotSupportedException("Cannot set " + name + ".");
+ } else if ("http://xml.org/sax/features/validation".equals(name)) {
+ throw new SAXNotSupportedException("Cannot set " + name + ".");
+ } else if ("http://xml.org/sax/features/xmlns-uris".equals(name)) {
+ throw new SAXNotSupportedException("Cannot set " + name + ".");
+ } else if ("http://xml.org/sax/features/xml-1.1".equals(name)) {
+ throw new SAXNotSupportedException("Cannot set " + name + ".");
+ } else if ("http://validator.nu/features/html4-mode-compatible-with-xhtml1-schemata".equals(name)) {
+ setHtml4ModeCompatibleWithXhtml1Schemata(value);
+ } else if ("http://validator.nu/features/mapping-lang-to-xml-lang".equals(name)) {
+ setMappingLangToXmlLang(value);
+ } else if ("http://validator.nu/features/scripting-enabled".equals(name)) {
+ setScriptingEnabled(value);
+ } else {
+ throw new SAXNotRecognizedException();
+ }
+ }
+
+ /**
+ * Sets a non-boolean property without having to use non-<code>XMLReader</code>
+ * setters directly.
+ *
+ * <dl>
+ * <dt><code>http://xml.org/sax/properties/lexical-handler</code></dt>
+ * <dd><code>setLexicalHandler</code></dd>
+ * <dt><code>http://validator.nu/properties/content-space-policy</code></dt>
+ * <dd><code>setContentSpacePolicy</code></dd>
+ * <dt><code>http://validator.nu/properties/content-non-xml-char-policy</code></dt>
+ * <dd><code>setContentNonXmlCharPolicy</code></dd>
+ * <dt><code>http://validator.nu/properties/comment-policy</code></dt>
+ * <dd><code>setCommentPolicy</code></dd>
+ * <dt><code>http://validator.nu/properties/xmlns-policy</code></dt>
+ * <dd><code>setXmlnsPolicy</code></dd>
+ * <dt><code>http://validator.nu/properties/name-policy</code></dt>
+ * <dd><code>setNamePolicy</code></dd>
+ * <dt><code>http://validator.nu/properties/streamability-violation-policy</code></dt>
+ * <dd><code>setStreamabilityViolationPolicy</code></dd>
+ * <dt><code>http://validator.nu/properties/document-mode-handler</code></dt>
+ * <dd><code>setDocumentModeHandler</code></dd>
+ * <dt><code>http://validator.nu/properties/doctype-expectation</code></dt>
+ * <dd><code>setDoctypeExpectation</code></dd>
+ * <dt><code>http://validator.nu/properties/xml-policy</code></dt>
+ * <dd><code>setXmlPolicy</code></dd>
+ * </dl>
+ *
+ * @see org.xml.sax.XMLReader#setProperty(java.lang.String,
+ * java.lang.Object)
+ */
+ public void setProperty(String name, Object value)
+ throws SAXNotRecognizedException, SAXNotSupportedException {
+ if ("http://xml.org/sax/properties/declaration-handler".equals(name)) {
+ throw new SAXNotSupportedException(
+ "This parser does not suppert DeclHandler.");
+ } else if ("http://xml.org/sax/properties/document-xml-version".equals(name)) {
+ throw new SAXNotSupportedException(
+ "Can't set document-xml-version.");
+ } else if ("http://xml.org/sax/properties/dom-node".equals(name)) {
+ throw new SAXNotSupportedException("Can't set dom-node.");
+ } else if ("http://xml.org/sax/properties/lexical-handler".equals(name)) {
+ setLexicalHandler((LexicalHandler) value);
+ } else if ("http://xml.org/sax/properties/xml-string".equals(name)) {
+ throw new SAXNotSupportedException("Can't set xml-string.");
+ } else if ("http://validator.nu/properties/content-space-policy".equals(name)) {
+ setContentSpacePolicy((XmlViolationPolicy) value);
+ } else if ("http://validator.nu/properties/content-non-xml-char-policy".equals(name)) {
+ setContentNonXmlCharPolicy((XmlViolationPolicy) value);
+ } else if ("http://validator.nu/properties/comment-policy".equals(name)) {
+ setCommentPolicy((XmlViolationPolicy) value);
+ } else if ("http://validator.nu/properties/xmlns-policy".equals(name)) {
+ setXmlnsPolicy((XmlViolationPolicy) value);
+ } else if ("http://validator.nu/properties/name-policy".equals(name)) {
+ setNamePolicy((XmlViolationPolicy) value);
+ } else if ("http://validator.nu/properties/streamability-violation-policy".equals(name)) {
+ setStreamabilityViolationPolicy((XmlViolationPolicy) value);
+ } else if ("http://validator.nu/properties/document-mode-handler".equals(name)) {
+ setDocumentModeHandler((DocumentModeHandler) value);
+ } else if ("http://validator.nu/properties/doctype-expectation".equals(name)) {
+ setDoctypeExpectation((DoctypeExpectation) value);
+ } else if ("http://validator.nu/properties/xml-policy".equals(name)) {
+ setXmlPolicy((XmlViolationPolicy) value);
+ } else {
+ throw new SAXNotRecognizedException();
+ }
+ }
+
+ /**
+ * Indicates whether NFC normalization of source is being checked.
+ * @return <code>true</code> if NFC normalization of source is being checked.
+ * @see nu.validator.htmlparser.impl.Tokenizer#isCheckingNormalization()
+ */
+ public boolean isCheckingNormalization() {
+ return checkingNormalization;
+ }
+
+ /**
+ * Toggles the checking of the NFC normalization of source.
+ * @param enable <code>true</code> to check normalization
+ * @see nu.validator.htmlparser.impl.Tokenizer#setCheckingNormalization(boolean)
+ */
+ public void setCheckingNormalization(boolean enable) {
+ this.checkingNormalization = enable;
+ if (tokenizer != null) {
+ tokenizer.setCheckingNormalization(checkingNormalization);
+ }
+ }
+
+ /**
+ * Sets the policy for consecutive hyphens in comments.
+ * @param commentPolicy the policy
+ * @see nu.validator.htmlparser.impl.Tokenizer#setCommentPolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
+ */
+ public void setCommentPolicy(XmlViolationPolicy commentPolicy) {
+ this.commentPolicy = commentPolicy;
+ if (tokenizer != null) {
+ tokenizer.setCommentPolicy(commentPolicy);
+ }
+ }
+
+ /**
+ * Sets the policy for non-XML characters except white space.
+ * @param contentNonXmlCharPolicy the policy
+ * @see nu.validator.htmlparser.impl.Tokenizer#setContentNonXmlCharPolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
+ */
+ public void setContentNonXmlCharPolicy(
+ XmlViolationPolicy contentNonXmlCharPolicy) {
+ this.contentNonXmlCharPolicy = contentNonXmlCharPolicy;
+ if (tokenizer != null) {
+ tokenizer.setContentNonXmlCharPolicy(contentNonXmlCharPolicy);
+ }
+ }
+
+ /**
+ * Sets the policy for non-XML white space.
+ * @param contentSpacePolicy the policy
+ * @see nu.validator.htmlparser.impl.Tokenizer#setContentSpacePolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
+ */
+ public void setContentSpacePolicy(XmlViolationPolicy contentSpacePolicy) {
+ this.contentSpacePolicy = contentSpacePolicy;
+ if (tokenizer != null) {
+ tokenizer.setContentSpacePolicy(contentSpacePolicy);
+ }
+ }
+
+ /**
+ * Whether the parser considers scripting to be enabled for noscript treatment.
+ *
+ * @return <code>true</code> if enabled
+ * @see nu.validator.htmlparser.impl.TreeBuilder#isScriptingEnabled()
+ */
+ public boolean isScriptingEnabled() {
+ return scriptingEnabled;
+ }
+
+ /**
+ * Sets whether the parser considers scripting to be enabled for noscript treatment.
+ * @param scriptingEnabled <code>true</code> to enable
+ * @see nu.validator.htmlparser.impl.TreeBuilder#setScriptingEnabled(boolean)
+ */
+ public void setScriptingEnabled(boolean scriptingEnabled) {
+ this.scriptingEnabled = scriptingEnabled;
+ if (treeBuilder != null) {
+ treeBuilder.setScriptingEnabled(scriptingEnabled);
+ }
+ }
+
+ /**
+ * Returns the doctype expectation.
+ *
+ * @return the doctypeExpectation
+ */
+ public DoctypeExpectation getDoctypeExpectation() {
+ return doctypeExpectation;
+ }
+
+ /**
+ * Sets the doctype expectation.
+ *
+ * @param doctypeExpectation
+ * the doctypeExpectation to set
+ * @see nu.validator.htmlparser.impl.TreeBuilder#setDoctypeExpectation(nu.validator.htmlparser.common.DoctypeExpectation)
+ */
+ public void setDoctypeExpectation(DoctypeExpectation doctypeExpectation) {
+ this.doctypeExpectation = doctypeExpectation;
+ if (treeBuilder != null) {
+ treeBuilder.setDoctypeExpectation(doctypeExpectation);
+ }
+ }
+
+ /**
+ * Returns the document mode handler.
+ *
+ * @return the documentModeHandler
+ */
+ public DocumentModeHandler getDocumentModeHandler() {
+ return documentModeHandler;
+ }
+
+ /**
+ * Sets the document mode handler.
+ *
+ * @param documentModeHandler
+ * the documentModeHandler to set
+ * @see nu.validator.htmlparser.impl.TreeBuilder#setDocumentModeHandler(nu.validator.htmlparser.common.DocumentModeHandler)
+ */
+ public void setDocumentModeHandler(DocumentModeHandler documentModeHandler) {
+ this.documentModeHandler = documentModeHandler;
+ }
+
+ /**
+ * Returns the streamabilityViolationPolicy.
+ *
+ * @return the streamabilityViolationPolicy
+ */
+ public XmlViolationPolicy getStreamabilityViolationPolicy() {
+ return streamabilityViolationPolicy;
+ }
+
+ /**
+ * Sets the streamabilityViolationPolicy.
+ *
+ * @param streamabilityViolationPolicy
+ * the streamabilityViolationPolicy to set
+ */
+ public void setStreamabilityViolationPolicy(
+ XmlViolationPolicy streamabilityViolationPolicy) {
+ this.streamabilityViolationPolicy = streamabilityViolationPolicy;
+ }
+
+ /**
+ * Whether the HTML 4 mode reports boolean attributes in a way that repeats
+ * the name in the value.
+ * @param html4ModeCompatibleWithXhtml1Schemata
+ */
+ public void setHtml4ModeCompatibleWithXhtml1Schemata(
+ boolean html4ModeCompatibleWithXhtml1Schemata) {
+ this.html4ModeCompatibleWithXhtml1Schemata = html4ModeCompatibleWithXhtml1Schemata;
+ if (tokenizer != null) {
+ tokenizer.setHtml4ModeCompatibleWithXhtml1Schemata(html4ModeCompatibleWithXhtml1Schemata);
+ }
+ }
+
+ /**
+ * Returns the <code>Locator</code> during parse.
+ * @return the <code>Locator</code>
+ */
+ public Locator getDocumentLocator() {
+ return tokenizer;
+ }
+
+ /**
+ * Whether the HTML 4 mode reports boolean attributes in a way that repeats
+ * the name in the value.
+ *
+ * @return the html4ModeCompatibleWithXhtml1Schemata
+ */
+ public boolean isHtml4ModeCompatibleWithXhtml1Schemata() {
+ return html4ModeCompatibleWithXhtml1Schemata;
+ }
+
+ /**
+ * Whether <code>lang</code> is mapped to <code>xml:lang</code>.
+ * @param mappingLangToXmlLang
+ * @see nu.validator.htmlparser.impl.Tokenizer#setMappingLangToXmlLang(boolean)
+ */
+ public void setMappingLangToXmlLang(boolean mappingLangToXmlLang) {
+ this.mappingLangToXmlLang = mappingLangToXmlLang;
+ if (tokenizer != null) {
+ tokenizer.setMappingLangToXmlLang(mappingLangToXmlLang);
+ }
+ }
+
+ /**
+ * Whether <code>lang</code> is mapped to <code>xml:lang</code>.
+ *
+ * @return the mappingLangToXmlLang
+ */
+ public boolean isMappingLangToXmlLang() {
+ return mappingLangToXmlLang;
+ }
+
+ /**
+ * Whether the <code>xmlns</code> attribute on the root element is
+ * passed to through. (FATAL not allowed.)
+ * @param xmlnsPolicy
+ * @see nu.validator.htmlparser.impl.Tokenizer#setXmlnsPolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
+ */
+ public void setXmlnsPolicy(XmlViolationPolicy xmlnsPolicy) {
+ if (xmlnsPolicy == XmlViolationPolicy.FATAL) {
+ throw new IllegalArgumentException("Can't use FATAL here.");
+ }
+ this.xmlnsPolicy = xmlnsPolicy;
+ if (tokenizer != null) {
+ tokenizer.setXmlnsPolicy(xmlnsPolicy);
+ }
+ }
+
+ /**
+ * Returns the xmlnsPolicy.
+ *
+ * @return the xmlnsPolicy
+ */
+ public XmlViolationPolicy getXmlnsPolicy() {
+ return xmlnsPolicy;
+ }
+
+ /**
+ * Returns the lexicalHandler.
+ *
+ * @return the lexicalHandler
+ */
+ public LexicalHandler getLexicalHandler() {
+ return lexicalHandler;
+ }
+
+ /**
+ * Returns the commentPolicy.
+ *
+ * @return the commentPolicy
+ */
+ public XmlViolationPolicy getCommentPolicy() {
+ return commentPolicy;
+ }
+
+ /**
+ * Returns the contentNonXmlCharPolicy.
+ *
+ * @return the contentNonXmlCharPolicy
+ */
+ public XmlViolationPolicy getContentNonXmlCharPolicy() {
+ return contentNonXmlCharPolicy;
+ }
+
+ /**
+ * Returns the contentSpacePolicy.
+ *
+ * @return the contentSpacePolicy
+ */
+ public XmlViolationPolicy getContentSpacePolicy() {
+ return contentSpacePolicy;
+ }
+
+ /**
+ * @param reportingDoctype
+ * @see nu.validator.htmlparser.impl.TreeBuilder#setReportingDoctype(boolean)
+ */
+ public void setReportingDoctype(boolean reportingDoctype) {
+ this.reportingDoctype = reportingDoctype;
+ if (treeBuilder != null) {
+ treeBuilder.setReportingDoctype(reportingDoctype);
+ }
+ }
+
+ /**
+ * Returns the reportingDoctype.
+ *
+ * @return the reportingDoctype
+ */
+ public boolean isReportingDoctype() {
+ return reportingDoctype;
+ }
+
+ /**
+ * The policy for non-NCName element and attribute names.
+ * @param namePolicy
+ * @see nu.validator.htmlparser.impl.Tokenizer#setNamePolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
+ */
+ public void setNamePolicy(XmlViolationPolicy namePolicy) {
+ this.namePolicy = namePolicy;
+ if (tokenizer != null) {
+ tokenizer.setNamePolicy(namePolicy);
+ }
+ }
+
+ /**
+ * This is a catch-all convenience method for setting name, xmlns, content space,
+ * content non-XML char and comment policies in one go. This does not affect the
+ * streamability policy or doctype reporting.
+ *
+ * @param xmlPolicy
+ */
+ public void setXmlPolicy(XmlViolationPolicy xmlPolicy) {
+ setNamePolicy(xmlPolicy);
+ setXmlnsPolicy(xmlPolicy == XmlViolationPolicy.FATAL ? XmlViolationPolicy.ALTER_INFOSET : xmlPolicy);
+ setContentSpacePolicy(xmlPolicy);
+ setContentNonXmlCharPolicy(xmlPolicy);
+ setCommentPolicy(xmlPolicy);
+ setBogusXmlnsPolicy(xmlPolicy);
+ }
+
+ /**
+ * The policy for non-NCName element and attribute names.
+ *
+ * @return the namePolicy
+ */
+ public XmlViolationPolicy getNamePolicy() {
+ return namePolicy;
+ }
+
+ /**
+ * Sets the policy for forbidden <code>xmlns</code> attributes.
+ * @param bogusXmlnsPolicy the policy
+ * @see nu.validator.htmlparser.impl.Tokenizer#setBogusXmlnsPolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
+ */
+ public void setBogusXmlnsPolicy(
+ XmlViolationPolicy bogusXmlnsPolicy) {
+ this.bogusXmlnsPolicy = bogusXmlnsPolicy;
+ if (tokenizer != null) {
+ tokenizer.setBogusXmlnsPolicy(bogusXmlnsPolicy);
+ }
+ }
+
+ /**
+ * Returns the bogusXmlnsPolicy.
+ *
+ * @return the bogusXmlnsPolicy
+ */
+ public XmlViolationPolicy getBogusXmlnsPolicy() {
+ return bogusXmlnsPolicy;
+ }
+}
Added: incubator/abdera/java/trunk/extensions/json/src/main/java/nu/validator/htmlparser/sax/HtmlSerializer.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/extensions/json/src/main/java/nu/validator/htmlparser/sax/HtmlSerializer.java?rev=587550&view=auto
==============================================================================
--- incubator/abdera/java/trunk/extensions/json/src/main/java/nu/validator/htmlparser/sax/HtmlSerializer.java (added)
+++ incubator/abdera/java/trunk/extensions/json/src/main/java/nu/validator/htmlparser/sax/HtmlSerializer.java Tue Oct 23 09:28:51 2007
@@ -0,0 +1,252 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.sax;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.io.UnsupportedEncodingException;
+import java.io.Writer;
+import java.util.Arrays;
+
+import org.xml.sax.Attributes;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+import org.xml.sax.ext.LexicalHandler;
+
+public class HtmlSerializer implements ContentHandler, LexicalHandler {
+
+ private static final String[] VOID_ELEMENTS = { "area", "base", "basefont",
+ "bgsound", "br", "col", "embed", "frame", "hr", "img", "input",
+ "link", "meta", "param", "spacer", "wbr" };
+
+ private static final String[] NON_ESCAPING = {"iframe",
+ "noembed",
+ "noframes",
+ "noscript",
+ "plaintext",
+ "script",
+ "style",
+ "xmp"
+ };
+
+ private static Writer wrap(OutputStream out) {
+ try {
+ return new OutputStreamWriter(out, "UTF-8");
+ } catch (UnsupportedEncodingException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ private int ignoreLevel = 0;
+
+ private int escapeLevel = 0;
+
+ private final Writer writer;
+
+ public HtmlSerializer(OutputStream out) {
+ this(wrap(out));
+ }
+
+ public HtmlSerializer(Writer out) {
+ this.writer = out;
+ }
+
+ public void characters(char[] ch, int start, int length) throws SAXException {
+ try {
+ if (escapeLevel > 0) {
+ writer.write(ch, start, length);
+ } else {
+ for (int i = start; i < start + length; i++) {
+ char c = ch[i];
+ switch (c) {
+ case '<':
+ writer.write("<");
+ break;
+ case '>':
+ writer.write(">");
+ break;
+ case '&':
+ writer.write("&");
+ break;
+ default:
+ writer.write(c);
+ break;
+ }
+ }
+ }
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+ public void endDocument() throws SAXException {
+ try {
+ writer.flush();
+ writer.close();
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+ public void endElement(String uri, String localName, String qName) throws SAXException {
+ if (escapeLevel > 0) {
+ escapeLevel--;
+ }
+ if (ignoreLevel > 0) {
+ ignoreLevel--;
+ } else {
+ try {
+ writer.write('<');
+ writer.write('/');
+ writer.write(localName);
+ writer.write('>');
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+ }
+
+ public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
+ characters(ch, start, length);
+ }
+
+ public void processingInstruction(String target, String data) throws SAXException {
+ }
+
+ public void setDocumentLocator(Locator locator) {
+ }
+
+ public void startDocument() throws SAXException {
+ try {
+ writer.write("<!DOCTYPE html>\n");
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+ public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException {
+ if (escapeLevel > 0) {
+ escapeLevel++;
+ }
+ if (ignoreLevel > 0 || !"http://www.w3.org/1999/xhtml".equals(uri)) {
+ ignoreLevel++;
+ return;
+ }
+ try {
+ writer.write('<');
+ writer.write(localName);
+ for (int i = 0; i < atts.getLength(); i++) {
+ writer.write(' ');
+ writer.write(atts.getLocalName(i)); // XXX xml:lang
+ writer.write('=');
+ writer.write('"');
+ String val = atts.getValue(i);
+ for (int j = 0; j < val.length(); j++) {
+ char c = val.charAt(j);
+ switch (c) {
+ case '"':
+ writer.write(""");
+ break;
+ case '<':
+ writer.write("<");
+ break;
+ case '>':
+ writer.write(">");
+ break;
+ case '&':
+ writer.write("&");
+ break;
+ default:
+ writer.write(c);
+ break;
+ }
+ }
+ writer.write('"');
+ }
+// writer.write('>');
+// if (Arrays.binarySearch(VOID_ELEMENTS, localName) > -1) {
+// ignoreLevel++;
+// return;
+// }
+// Modified by James Snell on Oct 4, 2007: fix it so that void elements are
+// closed immediately
+ if (Arrays.binarySearch(VOID_ELEMENTS, localName) > -1) {
+ writer.write(' ');
+ writer.write('/');
+ ignoreLevel++;
+ }
+ writer.write('>');
+ if ("pre".equals(localName) || "textarea".equals(localName)) {
+ writer.write('\n');
+ }
+ if (escapeLevel == 0 && Arrays.binarySearch(NON_ESCAPING, localName) > -1) {
+ escapeLevel = 1;
+ }
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+ public void comment(char[] ch, int start, int length) throws SAXException {
+ if (ignoreLevel > 0) {
+ return;
+ }
+ try {
+ writer.write("<!--");
+ writer.write(ch, start, length);
+ writer.write("-->");
+ } catch (IOException e) {
+ throw new SAXException(e);
+ }
+ }
+
+ public void endCDATA() throws SAXException {
+ }
+
+ public void endDTD() throws SAXException {
+ }
+
+ public void endEntity(String name) throws SAXException {
+ }
+
+ public void startCDATA() throws SAXException {
+ }
+
+ public void startDTD(String name, String publicId, String systemId) throws SAXException {
+ }
+
+ public void startEntity(String name) throws SAXException {
+ }
+
+ public void startPrefixMapping(String prefix, String uri) throws SAXException {
+ }
+
+ public void endPrefixMapping(String prefix) throws SAXException {
+ }
+
+ public void skippedEntity(String name) throws SAXException {
+ }
+
+}
Added: incubator/abdera/java/trunk/extensions/json/src/main/java/nu/validator/htmlparser/sax/SAXStreamer.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/extensions/json/src/main/java/nu/validator/htmlparser/sax/SAXStreamer.java?rev=587550&view=auto
==============================================================================
--- incubator/abdera/java/trunk/extensions/json/src/main/java/nu/validator/htmlparser/sax/SAXStreamer.java (added)
+++ incubator/abdera/java/trunk/extensions/json/src/main/java/nu/validator/htmlparser/sax/SAXStreamer.java Tue Oct 23 09:28:51 2007
@@ -0,0 +1,210 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.sax;
+
+import org.xml.sax.Attributes;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+import org.xml.sax.ext.LexicalHandler;
+
+import nu.validator.htmlparser.common.XmlViolationPolicy;
+import nu.validator.htmlparser.impl.AttributesImpl;
+import nu.validator.htmlparser.impl.TreeBuilder;
+
+class SAXStreamer extends TreeBuilder<Attributes>{
+
+ private ContentHandler contentHandler;
+ private LexicalHandler lexicalHandler;
+ private int depth;
+
+ SAXStreamer() {
+ super(XmlViolationPolicy.FATAL, false);
+ }
+
+ @Override
+ protected void addAttributesToElement(Attributes element, Attributes attributes) throws SAXException {
+ Attributes existingAttrs = element;
+ for (int i = 0; i < attributes.getLength(); i++) {
+ String qName = attributes.getQName(i);
+ if (existingAttrs.getIndex(qName) < 0) {
+ fatal();
+ }
+ }
+ }
+
+ @Override
+ protected void appendCharacters(Attributes parent, char[] buf, int start, int length) throws SAXException {
+ contentHandler.characters(buf, start, length);
+ }
+
+ @Override
+ protected void appendChildrenToNewParent(Attributes oldParent, Attributes newParent) throws SAXException {
+ fatal();
+ }
+
+ @Override
+ protected void appendComment(Attributes parent, char[] buf, int start, int length) throws SAXException {
+ if (lexicalHandler != null) {
+ lexicalHandler.comment(buf, start, length);
+ }
+ }
+
+ @Override
+ protected void appendCommentToDocument(char[] buf, int start, int length)
+ throws SAXException {
+ if (lexicalHandler != null) {
+ if (depth == 0) {
+ lexicalHandler.comment(buf, start, length);
+ } else {
+ fatal();
+ }
+ }
+ }
+
+ @Override
+ protected Attributes createElement(String name, Attributes attributes) throws SAXException {
+ return attributes;
+ }
+
+ @Override
+ protected Attributes createHtmlElementSetAsRoot(Attributes attributes) throws SAXException {
+ return attributes;
+ }
+
+ @Override
+ protected void detachFromParent(Attributes element) throws SAXException {
+ fatal();
+ }
+
+ @Override
+ protected void detachFromParentAndAppendToNewParent(Attributes child, Attributes newParent) throws SAXException {
+ }
+
+ @Override
+ protected boolean hasChildren(Attributes element) throws SAXException {
+ return false;
+ }
+
+ @Override
+ protected void insertBefore(Attributes child, Attributes sibling, Attributes parent) throws SAXException {
+ fatal();
+ }
+
+ @Override
+ protected void insertCharactersBefore(char[] buf, int start, int length, Attributes sibling, Attributes parent) throws SAXException {
+ fatal();
+ }
+
+ @Override
+ protected Attributes parentElementFor(Attributes child) throws SAXException {
+ fatal();
+ throw new RuntimeException("Unreachable");
+ }
+
+ @Override
+ protected Attributes shallowClone(Attributes element) throws SAXException {
+ return element;
+ }
+
+ public void setContentHandler(ContentHandler handler) {
+ contentHandler = handler;
+ }
+
+ public void setLexicalHandler(LexicalHandler handler) {
+ lexicalHandler = handler;
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilder#appendDoctypeToDocument(java.lang.String, java.lang.String, java.lang.String)
+ */
+ @Override
+ protected void appendDoctypeToDocument(String name, String publicIdentifier, String systemIdentifier) throws SAXException {
+ if (lexicalHandler != null) {
+ lexicalHandler.startDTD(name, publicIdentifier, systemIdentifier);
+ lexicalHandler.endDTD();
+ }
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilder#bodyClosed(java.lang.Object)
+ */
+ @Override
+ protected void bodyClosed(Attributes body) throws SAXException {
+ contentHandler.endElement("http://www.w3.org/1999/xhtml", "body", "body");
+ depth--;
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilder#elementPopped(java.lang.String, java.lang.Object)
+ */
+ @Override
+ protected void elementPopped(String name, Attributes node) throws SAXException {
+ contentHandler.endElement("http://www.w3.org/1999/xhtml", name, name);
+ depth--;
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilder#elementPushed(java.lang.String, java.lang.Object)
+ */
+ @Override
+ protected void elementPushed(String name, Attributes node) throws SAXException {
+ if (depth == 0) {
+ contentHandler.startPrefixMapping("", "http://www.w3.org/1999/xhtml");
+ }
+ contentHandler.startElement("http://www.w3.org/1999/xhtml", name, name, node);
+ depth++;
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilder#end()
+ */
+ @Override
+ protected void end() throws SAXException {
+ contentHandler.endDocument();
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilder#htmlClosed(java.lang.Object)
+ */
+ @Override
+ protected void htmlClosed(Attributes html) throws SAXException {
+ contentHandler.endElement("http://www.w3.org/1999/xhtml", "html", "html");
+ contentHandler.endPrefixMapping("");
+ depth--;
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilder#start()
+ */
+ @Override
+ protected void start(boolean fragment) throws SAXException {
+ contentHandler.setDocumentLocator(tokenizer);
+ if (fragment) {
+ depth = 1;
+ } else {
+ depth = 0;
+ contentHandler.startDocument();
+ }
+ }
+
+}
Added: incubator/abdera/java/trunk/extensions/json/src/main/java/nu/validator/htmlparser/sax/SAXTreeBuilder.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/extensions/json/src/main/java/nu/validator/htmlparser/sax/SAXTreeBuilder.java?rev=587550&view=auto
==============================================================================
--- incubator/abdera/java/trunk/extensions/json/src/main/java/nu/validator/htmlparser/sax/SAXTreeBuilder.java (added)
+++ incubator/abdera/java/trunk/extensions/json/src/main/java/nu/validator/htmlparser/sax/SAXTreeBuilder.java Tue Oct 23 09:28:51 2007
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.htmlparser.sax;
+
+import nu.validator.htmlparser.common.XmlViolationPolicy;
+import nu.validator.htmlparser.impl.AttributesImpl;
+import nu.validator.htmlparser.impl.TreeBuilder;
+import nu.validator.saxtree.Characters;
+import nu.validator.saxtree.Comment;
+import nu.validator.saxtree.DTD;
+import nu.validator.saxtree.Document;
+import nu.validator.saxtree.DocumentFragment;
+import nu.validator.saxtree.Element;
+import nu.validator.saxtree.NodeType;
+import nu.validator.saxtree.ParentNode;
+import nu.validator.saxtree.TreeParser;
+
+import org.xml.sax.Attributes;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+import org.xml.sax.ext.LexicalHandler;
+
+class SAXTreeBuilder extends TreeBuilder<Element> {
+
+ private Document document;
+
+ SAXTreeBuilder() {
+ super(XmlViolationPolicy.ALLOW, false);
+ }
+
+ @Override
+ protected void appendComment(Element parent, char[] buf, int start, int length) {
+ parent.appendChild(new Comment(tokenizer, buf, start, length));
+ }
+
+ @Override
+ protected void appendCommentToDocument(char[] buf, int start, int length) {
+ document.appendChild(new Comment(tokenizer, buf, start, length));
+ }
+
+ @Override
+ protected void appendCharacters(Element parent, char[] buf, int start, int length) {
+ parent.appendChild(new Characters(tokenizer, buf, start, length));
+ }
+
+ @Override
+ protected void detachFromParent(Element element) {
+ element.detach();
+ }
+
+ @Override
+ protected boolean hasChildren(Element element) {
+ return element.getFirstChild() != null;
+ }
+
+ @Override
+ protected Element shallowClone(Element element) {
+ Element newElt = new Element(element, element.getUri(), element.getLocalName(), element.getQName(), element.getAttributes(), true, element.getPrefixMappings());
+ newElt.copyEndLocator(element);
+ return newElt;
+ }
+
+ @Override
+ protected void detachFromParentAndAppendToNewParent(Element child, Element newParent) {
+ newParent.appendChild(child);
+ }
+
+ @Override
+ protected Element createHtmlElementSetAsRoot(Attributes attributes) {
+ Element newElt = new Element(tokenizer, "http://www.w3.org/1999/xhtml", "html", "html", attributes, true, null);
+ document.appendChild(newElt);
+ return newElt;
+ }
+
+ @Override
+ protected void insertBefore(Element child, Element sibling, Element parent) {
+ parent.insertBefore(child, sibling);
+ }
+
+ @Override
+ protected Element parentElementFor(Element child) {
+ ParentNode parent = child.getParentNode();
+ if (parent == null) {
+ return null;
+ }
+ if (parent.getNodeType() == NodeType.ELEMENT) {
+ return (Element) parent;
+ }
+ return null;
+ }
+
+ @Override
+ protected void addAttributesToElement(Element element, Attributes attributes) {
+ AttributesImpl existingAttrs = (AttributesImpl) element.getAttributes();
+ for (int i = 0; i < attributes.getLength(); i++) {
+ String qName = attributes.getQName(i);
+ if (existingAttrs.getIndex(qName) < 0) {
+ existingAttrs.addAttribute(qName, attributes.getValue(i));
+ }
+ }
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilder#appendDoctypeToDocument(java.lang.String, java.lang.String, java.lang.String)
+ */
+ @Override
+ protected void appendDoctypeToDocument(String name, String publicIdentifier, String systemIdentifier) {
+ DTD dtd = new DTD(tokenizer, name, publicIdentifier, systemIdentifier);
+ dtd.setEndLocator(tokenizer);
+ document.appendChild(dtd);
+ }
+
+ /**
+ * Returns the document.
+ *
+ * @return the document
+ */
+ Document getDocument() {
+ Document rv = document;
+ document = null;
+ return rv;
+ }
+
+ DocumentFragment getDocumentFragment() {
+ DocumentFragment rv = new DocumentFragment();
+ rv.appendChildren(document.getFirstChild());
+ document = null;
+ return rv;
+ }
+
+ /**
+ * @throws SAXException
+ * @see nu.validator.htmlparser.impl.TreeBuilder#end()
+ */
+ @Override
+ protected void end() throws SAXException {
+ document.setEndLocator(tokenizer);
+ }
+
+ /**
+ * @see nu.validator.htmlparser.impl.TreeBuilder#start()
+ */
+ @Override
+ protected void start(boolean fragment) {
+ document = new Document(tokenizer);
+ }
+
+ @Override
+ protected void appendChildrenToNewParent(Element oldParent, Element newParent) throws SAXException {
+ newParent.appendChildren(oldParent);
+ }
+
+ @Override
+ protected Element createElement(String name, Attributes attributes) throws SAXException {
+ return new Element(tokenizer, "http://www.w3.org/1999/xhtml", name, name, attributes, true, null);
+ }
+
+ @Override
+ protected void insertCharactersBefore(char[] buf, int start, int length, Element sibling, Element parent) throws SAXException {
+ parent.insertBefore(new Characters(tokenizer, buf, start, length), sibling);
+ }
+
+}
Added: incubator/abdera/java/trunk/extensions/json/src/main/java/nu/validator/htmlparser/sax/package.html
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/extensions/json/src/main/java/nu/validator/htmlparser/sax/package.html?rev=587550&view=auto
==============================================================================
--- incubator/abdera/java/trunk/extensions/json/src/main/java/nu/validator/htmlparser/sax/package.html (added)
+++ incubator/abdera/java/trunk/extensions/json/src/main/java/nu/validator/htmlparser/sax/package.html Tue Oct 23 09:28:51 2007
@@ -0,0 +1,29 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
+<html>
+<head><title>Package Overview</title>
+<!--
+ Copyright (c) 2007 Henri Sivonen
+
+ Permission is hereby granted, free of charge, to any person obtaining a
+ copy of this software and associated documentation files (the "Software"),
+ to deal in the Software without restriction, including without limitation
+ the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ and/or sell copies of the Software, and to permit persons to whom the
+ Software is furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be included in
+ all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+-->
+</head>
+<body bgcolor="white">
+<p>This package provides an HTML5 parser that exposes the document through the SAX API.</p>
+</body>
+</html>
\ No newline at end of file
Added: incubator/abdera/java/trunk/extensions/json/src/main/java/nu/validator/saxtree/CDATA.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/extensions/json/src/main/java/nu/validator/saxtree/CDATA.java?rev=587550&view=auto
==============================================================================
--- incubator/abdera/java/trunk/extensions/json/src/main/java/nu/validator/saxtree/CDATA.java (added)
+++ incubator/abdera/java/trunk/extensions/json/src/main/java/nu/validator/saxtree/CDATA.java Tue Oct 23 09:28:51 2007
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.saxtree;
+
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+
+public final class CDATA extends ParentNode {
+
+ public CDATA(Locator locator) {
+ super(locator);
+ }
+
+ @Override
+ void visit(TreeParser treeParser) throws SAXException {
+ treeParser.startCDATA(this);
+ }
+
+ /**
+ * @throws SAXException
+ * @see nu.validator.saxtree.Node#revisit(nu.validator.saxtree.TreeParser)
+ */
+ @Override
+ void revisit(TreeParser treeParser) throws SAXException {
+ treeParser.endCDATA(endLocator);
+ }
+
+ @Override
+ public NodeType getNodeType() {
+ return NodeType.CDATA;
+ }
+
+}
Added: incubator/abdera/java/trunk/extensions/json/src/main/java/nu/validator/saxtree/CharBufferNode.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/extensions/json/src/main/java/nu/validator/saxtree/CharBufferNode.java?rev=587550&view=auto
==============================================================================
--- incubator/abdera/java/trunk/extensions/json/src/main/java/nu/validator/saxtree/CharBufferNode.java (added)
+++ incubator/abdera/java/trunk/extensions/json/src/main/java/nu/validator/saxtree/CharBufferNode.java Tue Oct 23 09:28:51 2007
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.saxtree;
+
+import org.xml.sax.Locator;
+
+public abstract class CharBufferNode extends Node {
+
+ protected final char[] buffer;
+
+ CharBufferNode(Locator locator, char[] buf, int start, int length) {
+ super(locator);
+ this.buffer = new char[length];
+ System.arraycopy(buf, start, buffer, 0, length);
+ }
+
+ /**
+ * Returns the wrapped buffer as a string.
+ *
+ * @see java.lang.Object#toString()
+ */
+ @Override
+ public String toString() {
+ return new String(buffer);
+ }
+}
Added: incubator/abdera/java/trunk/extensions/json/src/main/java/nu/validator/saxtree/Characters.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/extensions/json/src/main/java/nu/validator/saxtree/Characters.java?rev=587550&view=auto
==============================================================================
--- incubator/abdera/java/trunk/extensions/json/src/main/java/nu/validator/saxtree/Characters.java (added)
+++ incubator/abdera/java/trunk/extensions/json/src/main/java/nu/validator/saxtree/Characters.java Tue Oct 23 09:28:51 2007
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.saxtree;
+
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+
+public final class Characters extends CharBufferNode {
+
+ public Characters(Locator locator, char[] buf, int start, int length) {
+ super(locator, buf, start, length);
+ }
+
+ @Override
+ void visit(TreeParser treeParser) throws SAXException {
+ treeParser.characters(buffer, 0, buffer.length, this);
+ }
+
+ @Override
+ public NodeType getNodeType() {
+ return NodeType.CHARACTERS;
+ }
+
+}
Added: incubator/abdera/java/trunk/extensions/json/src/main/java/nu/validator/saxtree/Comment.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/extensions/json/src/main/java/nu/validator/saxtree/Comment.java?rev=587550&view=auto
==============================================================================
--- incubator/abdera/java/trunk/extensions/json/src/main/java/nu/validator/saxtree/Comment.java (added)
+++ incubator/abdera/java/trunk/extensions/json/src/main/java/nu/validator/saxtree/Comment.java Tue Oct 23 09:28:51 2007
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.saxtree;
+
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+
+public final class Comment extends CharBufferNode {
+
+ public Comment(Locator locator, char[] buf, int start, int length) {
+ super(locator, buf, start, length);
+ }
+
+ @Override
+ void visit(TreeParser treeParser) throws SAXException {
+ treeParser.comment(buffer, 0, buffer.length, this);
+ }
+
+ @Override
+ public NodeType getNodeType() {
+ return NodeType.COMMENT;
+ }
+
+}
Added: incubator/abdera/java/trunk/extensions/json/src/main/java/nu/validator/saxtree/DTD.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/extensions/json/src/main/java/nu/validator/saxtree/DTD.java?rev=587550&view=auto
==============================================================================
--- incubator/abdera/java/trunk/extensions/json/src/main/java/nu/validator/saxtree/DTD.java (added)
+++ incubator/abdera/java/trunk/extensions/json/src/main/java/nu/validator/saxtree/DTD.java Tue Oct 23 09:28:51 2007
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.saxtree;
+
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+
+public final class DTD extends ParentNode {
+
+ private final String name;
+ private final String publicIdentifier;
+ private final String systemIdentifier;
+
+ public DTD(Locator locator, String name, String publicIdentifier, String systemIdentifier) {
+ super(locator);
+ this.name = name;
+ this.publicIdentifier = publicIdentifier;
+ this.systemIdentifier = systemIdentifier;
+ }
+
+ @Override
+ void visit(TreeParser treeParser) throws SAXException {
+ treeParser.startDTD(name, publicIdentifier, systemIdentifier, this);
+ }
+
+ /**
+ * @throws SAXException
+ * @see nu.validator.saxtree.Node#revisit(nu.validator.saxtree.TreeParser)
+ */
+ @Override
+ void revisit(TreeParser treeParser) throws SAXException {
+ treeParser.endDTD(endLocator);
+ }
+
+ /**
+ * Returns the name.
+ *
+ * @return the name
+ */
+ public String getName() {
+ return name;
+ }
+
+ /**
+ * Returns the publicIdentifier.
+ *
+ * @return the publicIdentifier
+ */
+ public String getPublicIdentifier() {
+ return publicIdentifier;
+ }
+
+ /**
+ * Returns the systemIdentifier.
+ *
+ * @return the systemIdentifier
+ */
+ public String getSystemIdentifier() {
+ return systemIdentifier;
+ }
+
+ @Override
+ public NodeType getNodeType() {
+ return NodeType.DTD;
+ }
+
+}
Added: incubator/abdera/java/trunk/extensions/json/src/main/java/nu/validator/saxtree/Document.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/extensions/json/src/main/java/nu/validator/saxtree/Document.java?rev=587550&view=auto
==============================================================================
--- incubator/abdera/java/trunk/extensions/json/src/main/java/nu/validator/saxtree/Document.java (added)
+++ incubator/abdera/java/trunk/extensions/json/src/main/java/nu/validator/saxtree/Document.java Tue Oct 23 09:28:51 2007
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.saxtree;
+
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+
+public final class Document extends ParentNode {
+
+ public Document(Locator locator) {
+ super(locator);
+ }
+
+ @Override
+ void visit(TreeParser treeParser) throws SAXException {
+ treeParser.startDocument(this);
+ }
+
+ /**
+ * @throws SAXException
+ * @see nu.validator.saxtree.Node#revisit(nu.validator.saxtree.TreeParser)
+ */
+ @Override
+ void revisit(TreeParser treeParser) throws SAXException {
+ treeParser.endDocument(endLocator);
+ }
+
+ @Override
+ public NodeType getNodeType() {
+ return NodeType.DOCUMENT;
+ }
+
+}
Added: incubator/abdera/java/trunk/extensions/json/src/main/java/nu/validator/saxtree/DocumentFragment.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/extensions/json/src/main/java/nu/validator/saxtree/DocumentFragment.java?rev=587550&view=auto
==============================================================================
--- incubator/abdera/java/trunk/extensions/json/src/main/java/nu/validator/saxtree/DocumentFragment.java (added)
+++ incubator/abdera/java/trunk/extensions/json/src/main/java/nu/validator/saxtree/DocumentFragment.java Tue Oct 23 09:28:51 2007
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.saxtree;
+
+import org.xml.sax.helpers.LocatorImpl;
+
+public final class DocumentFragment extends ParentNode {
+
+ public DocumentFragment() {
+ super(new LocatorImpl());
+ }
+
+ @Override
+ void visit(TreeParser treeParser) {
+ // nothing
+ }
+
+ @Override
+ public NodeType getNodeType() {
+ return NodeType.DOCUMENT_FRAGMENT;
+ }
+}