You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ni...@apache.org on 2017/08/06 17:59:19 UTC

[21/33] lucenenet git commit: Lucene.Net.Benchmark: Added Sax and TagSoup to the Support folder.

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/198e5868/src/Lucene.Net.Benchmark/Support/Sax/SAXParseException.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/Support/Sax/SAXParseException.cs b/src/Lucene.Net.Benchmark/Support/Sax/SAXParseException.cs
new file mode 100644
index 0000000..b7cdf64
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/Support/Sax/SAXParseException.cs
@@ -0,0 +1,269 @@
+// SAX exception class.
+// http://www.saxproject.org
+// No warranty; no copyright -- use this as you will.
+// $Id: SAXParseException.java,v 1.11 2004/04/21 13:05:02 dmegginson Exp $
+
+using System;
+#if FEATURE_SERIALIZABLE
+using System.Runtime.Serialization;
+#endif
+
+namespace Sax
+{
+    /// <summary>
+    /// Encapsulate an XML parse error or warning.
+    /// </summary>
+    /// <remarks>
+    /// <em>This module, both source code and documentation, is in the
+    /// Public Domain, and comes with<strong> NO WARRANTY</strong>.</em>
+    /// See<a href='http://www.saxproject.org'>http://www.saxproject.org</a>
+    /// for further information.
+    /// <para/>
+    /// This exception may include information for locating the error
+    /// in the original XML document, as if it came from a <see cref="ILocator"/>
+    /// object.  Note that although the application
+    /// will receive a SAXParseException as the argument to the handlers
+    /// in the <see cref="IErrorHandler"/> interface, 
+    /// the application is not actually required to throw the exception;
+    /// instead, it can simply read the information in it and take a
+    /// different action.
+    /// <para/>
+    /// Since this exception is a subclass of <see cref="SAXException"/>, 
+    /// it inherits the ability to wrap another exception.
+    /// </remarks>
+    /// <since>SAX 1.0</since>
+    /// <author>David Megginson</author>
+    /// <version>2.0.1 (sax2r2)</version>
+    /// <seealso cref="SAXException"/>
+    /// <seealso cref="ILocator"/>
+    /// <seealso cref="IErrorHandler"/>
+#if FEATURE_SERIALIZABLE
+    [Serializable]
+#endif
+    public class SAXParseException : SAXException
+    {
+        //////////////////////////////////////////////////////////////////////
+        // Constructors.
+        //////////////////////////////////////////////////////////////////////
+
+        /// <summary>
+        /// Construct a new exception with no message.
+        /// </summary>
+        // LUCENENET specific for serialization
+        public SAXParseException()
+            : base()
+        {
+        }
+
+        /// <summary>
+        /// Create a new <see cref="SAXParseException"/> from a message and a <see cref="ILocator"/>.
+        /// </summary>
+        /// <remarks>
+        /// This constructor is especially useful when an application is
+        /// creating its own exception from within a <see cref="IContentHandler"/>
+        /// callback.
+        /// </remarks>
+        /// <param name="message">The error or warning message.</param>
+        /// <param name="locator">The locator object for the error or warning (may be null).</param>
+        /// <seealso cref="ILocator"/>
+        public SAXParseException(string message, ILocator locator)
+            : base(message)
+        {
+            if (locator != null)
+            {
+                Init(locator.PublicId, locator.SystemId,
+                 locator.LineNumber, locator.ColumnNumber);
+            }
+            else
+            {
+                Init(null, null, -1, -1);
+            }
+        }
+
+        /// <summary>
+        /// Wrap an existing exception in a SAXParseException.
+        /// </summary>
+        /// <remarks>
+        /// This constructor is especially useful when an application is
+        /// creating its own exception from within a <see cref="IContentHandler"/>
+        /// callback, and needs to wrap an existing exception that is not a
+        /// subclass of <see cref="SAXException"/>.
+        /// </remarks>
+        /// <param name="message">The error or warning message, or null to
+        /// use the message from the embedded exception.</param>
+        /// <param name="locator">The locator object for the error or warning (may be
+        /// null).</param>
+        /// <param name="e">Any exception.</param>
+        /// <seealso cref="ILocator"/>
+        public SAXParseException(string message, ILocator locator,
+                      Exception e)
+            : base(message, e)
+        {
+            if (locator != null)
+            {
+                Init(locator.PublicId, locator.SystemId,
+                 locator.LineNumber, locator.ColumnNumber);
+            }
+            else
+            {
+                Init(null, null, -1, -1);
+            }
+        }
+
+        /// <summary>
+        /// Create a new SAXParseException.
+        /// </summary>
+        /// <remarks>
+        /// This constructor is most useful for parser writers.
+        /// <para/>
+        /// All parameters except the message are as if
+        /// they were provided by a <see cref="ILocator"/>.  For example, if the
+        /// system identifier is a URL (including relative filename), the
+        /// caller must resolve it fully before creating the exception.
+        /// </remarks>
+        /// <param name="message">The error or warning message.</param>
+        /// <param name="publicId">The public identifier of the entity that generated the error or warning.</param>
+        /// <param name="systemId">The system identifier of the entity that generated the error or warning.</param>
+        /// <param name="lineNumber">The line number of the end of the text that caused the error or warning.</param>
+        /// <param name="columnNumber">The column number of the end of the text that cause the error or warning.</param>
+        public SAXParseException(string message, string publicId, string systemId,
+                      int lineNumber, int columnNumber)
+            : base(message)
+        {
+            Init(publicId, systemId, lineNumber, columnNumber);
+        }
+
+        /// <summary>
+        /// Create a new <see cref="SAXParseException"/> with an embedded exception.
+        /// </summary>
+        /// <remarks>
+        /// This constructor is most useful for parser writers who
+        /// need to wrap an exception that is not a subclass of
+        /// <see cref="SAXException"/>.
+        /// <para/>
+        /// All parameters except the message and exception are as if
+        /// they were provided by a <see cref="ILocator"/>.  For example, if the
+        /// system identifier is a URL (including relative filename), the
+        /// caller must resolve it fully before creating the exception.
+        /// </remarks>
+        /// <param name="message">The error or warning message, or null to use the message from the embedded exception.</param>
+        /// <param name="publicId">The public identifier of the entity that generated the error or warning.</param>
+        /// <param name="systemId">The system identifier of the entity that generated the error or warning.</param>
+        /// <param name="lineNumber">The line number of the end of the text that caused the error or warning.</param>
+        /// <param name="columnNumber">The column number of the end of the text that cause the error or warning.</param>
+        /// <param name="e">Another exception to embed in this one.</param>
+        public SAXParseException(string message, string publicId, string systemId,
+                      int lineNumber, int columnNumber, Exception e)
+            : base(message, e)
+        {
+            Init(publicId, systemId, lineNumber, columnNumber);
+        }
+
+#if FEATURE_SERIALIZABLE
+        /// <summary>
+        /// Initializes a new instance of this class with serialized data.
+        /// </summary>
+        /// <param name="info">The <see cref="SerializationInfo"/> that holds the serialized object data about the exception being thrown.</param>
+        /// <param name="context">The <see cref="StreamingContext"/> that contains contextual information about the source or destination.</param>
+        public SAXParseException(SerializationInfo info, StreamingContext context)
+            : base(info, context)
+        {
+        }
+#endif
+
+        /// <summary>
+        /// Internal initialization method.
+        /// </summary>
+        /// <param name="publicId">The public identifier of the entity which generated the exception, or null.</param>
+        /// <param name="systemId">The system identifier of the entity which generated the exception, or null.</param>
+        /// <param name="lineNumber">The line number of the error, or -1.</param>
+        /// <param name="columnNumber">The column number of the error, or -1.</param>
+        private void Init(string publicId, string systemId,
+                   int lineNumber, int columnNumber)
+        {
+            this.publicId = publicId;
+            this.systemId = systemId;
+            this.lineNumber = lineNumber;
+            this.columnNumber = columnNumber;
+        }
+
+        /// <summary>
+        /// Get the public identifier of the entity where the exception occurred.
+        /// Returns a string containing the public identifier, or null if none is available.
+        /// </summary>
+        /// <seealso cref="ILocator.PublicId"/>
+        public string PublicId
+        {
+            get { return this.publicId; }
+        }
+
+        /// <summary>
+        /// Get the system identifier of the entity where the exception occurred.
+        /// <para/>
+        /// If the system identifier is a URL, it will have been resolved fully.
+        /// <para/>
+        /// A string containing the system identifier, or null if none is available.
+        /// </summary>
+        /// <seealso cref="ILocator.SystemId"/>
+        public string SystemId
+        {
+            get { return this.systemId; }
+        }
+
+        /// <summary>
+        /// The line number of the end of the text where the exception occurred.
+        /// <para/>
+        /// The first line is line 1.
+        /// <para/>
+        /// An integer representing the line number, or -1 if none is available.
+        /// </summary>
+        /// <seealso cref="ILocator.LineNumber"/>
+        public int LineNumber
+        {
+            get { return this.lineNumber; }
+        }
+
+        /// <summary>
+        /// The column number of the end of the text where the exception occurred.
+        /// <para/>
+        /// The first column in a line is position 1.
+        /// <para/>
+        /// An integer representing the column number, or -1
+        /// if none is available.
+        /// </summary>
+        /// <seealso cref="ILocator.ColumnNumber"/>
+        public int ColumnNumber
+        {
+            get { return this.columnNumber; }
+        }
+
+
+        //////////////////////////////////////////////////////////////////////
+        // Internal state.
+        //////////////////////////////////////////////////////////////////////
+
+        /// <summary>
+        /// The public identifier, or null.
+        /// </summary>
+        /// <seealso cref="PublicId"/>
+        private string publicId;
+
+        /// <summary>
+        /// The system identifier, or null.
+        /// </summary>
+        /// <seealso cref="SystemId"/>
+        private string systemId;
+
+        /// <summary>
+        /// The line number, or -1.
+        /// </summary>
+        /// <seealso cref="LineNumber"/>
+        private int lineNumber;
+
+        /// <summary>
+        /// The column number, or -1.
+        /// </summary>
+        /// <seealso cref="ColumnNumber"/>
+        private int columnNumber;
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/198e5868/src/Lucene.Net.Benchmark/Support/Sax/XMLFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/Support/Sax/XMLFilter.cs b/src/Lucene.Net.Benchmark/Support/Sax/XMLFilter.cs
new file mode 100644
index 0000000..f9350d3
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/Support/Sax/XMLFilter.cs
@@ -0,0 +1,41 @@
+// XMLFilter.java - filter SAX2 events.
+// http://www.saxproject.org
+// Written by David Megginson
+// NO WARRANTY!  This class is in the Public Domain.
+// $Id: XMLFilter.java,v 1.6 2002/01/30 21:13:48 dbrownell Exp $
+
+namespace Sax
+{
+    /// <summary>
+    /// Interface for an XML filter.
+    /// </summary>
+    /// <remarks>
+    /// <em>This module, both source code and documentation, is in the
+    /// Public Domain, and comes with<strong> NO WARRANTY</strong>.</em>
+    /// See<a href='http://www.saxproject.org'>http://www.saxproject.org</a>
+    /// for further information.
+    /// <para/>
+    /// An XML filter is like an XML reader, except that it obtains its
+    /// events from another XML reader rather than a primary source like
+    /// an XML document or database.Filters can modify a stream of
+    /// events as they pass on to the final application.
+    /// <para/>
+    /// The XMLFilterImpl helper class provides a convenient base
+    /// for creating SAX2 filters, by passing on all <see cref="IEntityResolver"/>, <see cref="IDTDHandler"/>,
+    /// <see cref="IContentHandler"/> and <see cref="IErrorHandler"/>
+    /// events automatically.
+    /// </remarks>
+    public interface IXMLFilter : IXMLReader
+    {
+        /// <summary>
+        /// Gets or sets the parent reader. Returns the parent filter, or null if none has been set.
+        /// </summary>
+        /// <remarks>
+        /// This method allows the application to link or query the parent
+        /// reader (which may be another filter).  It is generally a
+        /// bad idea to perform any operations on the parent reader
+        /// directly: they should all pass through this filter.
+        /// </remarks>
+        IXMLReader Parent { get; set; }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/198e5868/src/Lucene.Net.Benchmark/Support/Sax/XMLReader.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/Support/Sax/XMLReader.cs b/src/Lucene.Net.Benchmark/Support/Sax/XMLReader.cs
new file mode 100644
index 0000000..71b690f
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/Support/Sax/XMLReader.cs
@@ -0,0 +1,305 @@
+// XMLFilter.java - filter SAX2 events.
+// http://www.saxproject.org
+// Written by David Megginson
+// NO WARRANTY!  This class is in the Public Domain.
+// $Id: XMLFilter.java,v 1.6 2002/01/30 21:13:48 dbrownell Exp $
+
+namespace Sax
+{
+    /// <summary>
+    /// Interface for an XML filter.
+    /// </summary>
+    /// <remarks>
+    /// <em>This module, both source code and documentation, is in the
+    /// Public Domain, and comes with<strong> NO WARRANTY</strong>.</em>
+    /// See<a href='http://www.saxproject.org'>http://www.saxproject.org</a>
+    /// for further information.
+    /// <para/>
+    /// An XML filter is like an XML reader, except that it obtains its
+    /// events from another XML reader rather than a primary source like
+    /// an XML document or database.Filters can modify a stream of
+    /// events as they pass on to the final application.
+    /// <para/>
+    /// The <see cref="IXMLFilter"/> helper class provides a convenient base
+    /// for creating SAX2 filters, by passing on all <see cref="IEntityResolver"/>, 
+    /// <see cref="IDTDHandler"/>,
+    /// <see cref="IContentHandler"/> and <see cref="IErrorHandler"/>
+    /// events automatically.
+    /// </remarks>
+    /// <since>SAX 2.0</since>
+    /// <author>David Megginson</author>
+    /// <version>2.0.1 (sax2r2)</version>
+    /// <seealso cref="Helpers.XMLFilter"/>
+    public interface IXMLReader
+    {
+        ////////////////////////////////////////////////////////////////////
+        // Configuration.
+        ////////////////////////////////////////////////////////////////////
+
+
+        /// <summary>
+        /// Look up the value of a feature flag.
+        /// </summary>
+        /// <remarks>
+        /// The feature name is any fully-qualified URI.  It is
+        /// possible for an XMLReader to recognize a feature name but
+        /// temporarily be unable to return its value.
+        /// Some feature values may be available only in specific
+        /// contexts, such as before, during, or after a parse.
+        /// Also, some feature values may not be programmatically accessible.
+        /// (In the case of an adapter for SAX1 {@link Parser}, there is no
+        /// implementation-independent way to expose whether the underlying
+        /// parser is performing validation, expanding external entities,
+        /// and so forth.)
+        /// <para/>All XMLReaders are required to recognize the
+        /// http://xml.org/sax/features/namespaces and the
+        /// http://xml.org/sax/features/namespace-prefixes feature names.
+        /// <para/>Typical usage is something like this:
+        /// <code>
+        /// XMLReader r = new MySAXDriver();
+        ///                         // try to activate validation
+        /// try {
+        ///    r.SetFeature("http://xml.org/sax/features/validation", true);
+        /// } catch (SAXException e) {
+        ///    Console.Error.WriteLine("Cannot activate validation."); 
+        /// }
+        ///                         // register event handlers
+        /// r.ContentHandler = new MyContentHandler();
+        /// r.ErrorHandler = new MyErrorHandler();
+        ///                         // parse the first document
+        /// try {
+        ///    r.Parse("http://www.foo.com/mydoc.xml");
+        /// } catch (IOException e) {
+        ///    Console.Error.WriteLine("I/O exception reading XML document");
+        /// } catch (SAXException e) {
+        ///    Console.Error.WriteLine("XML exception reading document.");
+        /// }
+        /// </code>
+        /// <para/>Implementors are free (and encouraged) to invent their own features,
+        /// using names built on their own URIs.
+        /// </remarks>
+        /// <param name="name">The feature name, which is a fully-qualified URI.</param>
+        /// <returns>The current value of the feature (true or false).</returns>
+        /// <exception cref="SAXNotRecognizedException">If the feature
+        /// value can't be assigned or retrieved.</exception>
+        /// <exception cref="SAXNotSupportedException">When the
+        /// <see cref="IXMLReader"/> recognizes the feature name but
+        /// cannot determine its value at this time.</exception>
+        /// <seealso cref="SetFeature(string, bool)"/>
+        bool GetFeature(string name);
+
+
+        /// <summary>
+        /// Set the value of a feature flag.
+        /// <para/>
+        /// The feature name is any fully-qualified URI.  It is
+        /// possible for an XMLReader to expose a feature value but
+        /// to be unable to change the current value.
+        /// Some feature values may be immutable or mutable only 
+        /// in specific contexts, such as before, during, or after 
+        /// a parse.
+        /// <para/>
+        /// All XMLReaders are required to support setting
+        /// http://xml.org/sax/features/namespaces to true and
+        /// http://xml.org/sax/features/namespace-prefixes to false.
+        /// </summary>
+        /// <param name="name">The feature name, which is a fully-qualified URI.</param>
+        /// <param name="value">The requested value of the feature (true or false).</param>
+        /// <exception cref="SAXNotRecognizedException">If the feature
+        /// value can't be assigned or retrieved.</exception>
+        /// <exception cref="SAXNotSupportedException">When the
+        /// <see cref="IXMLReader"/> recognizes the feature name but
+        /// cannot set the requested value.</exception>
+        /// <seealso cref="GetFeature(string)"/>
+        void SetFeature(string name, bool value);
+
+
+        /// <summary>
+        /// Look up the value of a property.
+        /// </summary>
+        /// <remarks>
+        /// The property name is any fully-qualified URI.  It is
+        /// possible for an XMLReader to recognize a property name but
+        /// temporarily be unable to return its value.
+        /// Some property values may be available only in specific
+        /// contexts, such as before, during, or after a parse.
+        /// <para/>
+        /// <see cref="IXMLReader"/>s are not required to recognize any specific
+        /// property names, though an initial core set is documented for
+        /// SAX2.
+        /// <para/>
+        /// Implementors are free (and encouraged) to invent their own properties,
+        /// using names built on their own URIs.
+        /// </remarks>
+        /// <param name="name">The property name, which is a fully-qualified URI.</param>
+        /// <returns>The current value of the property.</returns>
+        /// <exception cref="SAXNotRecognizedException">If the property
+        /// value can't be assigned or retrieved.</exception>
+        /// <exception cref="SAXNotSupportedException">When the
+        /// <see cref="IXMLReader"/> recognizes the property name but 
+        /// cannot determine its value at this time.</exception>
+        /// <seealso cref="SetProperty(string, object)"/>
+        object GetProperty(string name);
+
+
+        /// <summary>
+        /// Set the value of a property.
+        /// </summary>
+        /// <remarks>
+        /// The property name is any fully-qualified URI.  It is
+        /// possible for an <see cref="IXMLReader"/> to recognize a property name but
+        /// to be unable to change the current value.
+        /// Some property values may be immutable or mutable only 
+        /// in specific contexts, such as before, during, or after 
+        /// a parse.
+        /// <para/>
+        /// <see cref="IXMLReader"/>s are not required to recognize setting
+        /// any specific property names, though a core set is defined by 
+        /// SAX2.
+        /// <para/>
+        /// This method is also the standard mechanism for setting
+        /// extended handlers.
+        /// </remarks>
+        /// <param name="name">The property name, which is a fully-qualified URI.</param>
+        /// <param name="value">The requested value for the property.</param>
+        /// <exception cref="SAXNotRecognizedException">If the property
+        /// value can't be assigned or retrieved.</exception>
+        /// <exception cref="SAXNotSupportedException">When the
+        /// <see cref="IXMLReader"/> recognizes the property name but
+        /// cannot set the requested value.</exception>
+        void SetProperty(string name, object value);
+
+
+
+        ////////////////////////////////////////////////////////////////////
+        // Event handlers.
+        ////////////////////////////////////////////////////////////////////
+
+
+        /// <summary>
+        /// Gets or Sets an entity resolver.
+        /// </summary>
+        /// <remarks>
+        /// If the application does not register an entity resolver,
+        /// the <see cref="IXMLReader"/> will perform its own default resolution.
+        /// <para/>
+        /// Applications may register a new or different resolver in the
+        /// middle of a parse, and the SAX parser must begin using the new
+        /// resolver immediately.
+        /// </remarks>
+        IEntityResolver EntityResolver { get; set; }
+
+        /// <summary>
+        /// Gets or Sets a DTD event handler.
+        /// </summary>
+        /// <remarks>
+        /// If the application does not register a DTD handler, all DTD
+        /// events reported by the SAX parser will be silently ignored.
+        /// <para/>
+        /// Applications may register a new or different handler in the
+        /// middle of a parse, and the SAX parser must begin using the new
+        /// handler immediately.
+        /// </remarks>
+        IDTDHandler DTDHandler { get; set; }
+
+        /// <summary>
+        /// Gets or Sets a content event handler.
+        /// </summary>
+        /// <remarks>
+        /// <para/>If the application does not register a content handler, all
+        /// content events reported by the SAX parser will be silently
+        /// ignored.
+        /// <para/>Applications may register a new or different handler in the
+        /// middle of a parse, and the SAX parser must begin using the new
+        /// handler immediately.
+        /// </remarks>
+        IContentHandler ContentHandler { get; set; }
+
+
+        /// <summary>
+        /// Gets or Sets an error event handler.
+        /// </summary>
+        /// <remarks>
+        /// If the application does not register an error handler, all
+        /// error events reported by the SAX parser will be silently
+        /// ignored; however, normal processing may not continue.  It is
+        /// highly recommended that all SAX applications implement an
+        /// error handler to avoid unexpected bugs.
+        /// <para/>
+        /// Applications may register a new or different handler in the
+        /// middle of a parse, and the SAX parser must begin using the new
+        /// handler immediately.
+        /// </remarks>
+        IErrorHandler ErrorHandler { get; set; }
+
+
+        ////////////////////////////////////////////////////////////////////
+        // Parsing.
+        ////////////////////////////////////////////////////////////////////
+
+        /// <summary>
+        /// Parse an XML document.
+        /// </summary>
+        /// <remarks>
+        /// The application can use this method to instruct the XML
+        /// reader to begin parsing an XML document from any valid input
+        /// source (a character stream, a byte stream, or a URI).
+        /// <para/>
+        /// Applications may not invoke this method while a parse is in
+        /// progress (they should create a new XMLReader instead for each
+        /// nested XML document).  Once a parse is complete, an
+        /// application may reuse the same XMLReader object, possibly with a
+        /// different input source.
+        /// Configuration of the <see cref="IXMLReader"/> object (such as handler bindings and
+        /// values established for feature flags and properties) is unchanged
+        /// by completion of a parse, unless the definition of that aspect of
+        /// the configuration explicitly specifies other behavior.
+        /// (For example, feature flags or properties exposing
+        /// characteristics of the document being parsed.)
+        /// <para/>
+        /// During the parse, the XMLReader will provide information
+        /// about the XML document through the registered event
+        /// handlers.
+        /// <para/>
+        /// This method is synchronous: it will not return until parsing
+        /// has ended.  If a client application wants to terminate 
+        /// parsing early, it should throw an exception.
+        /// </remarks>
+        /// <param name="input">The input source for the top-level of the
+        /// XML document.</param>
+        /// <exception cref="SAXException">Any SAX exception, possibly
+        /// wrapping another exception.</exception>
+        /// <exception cref="System.IO.IOException">An IO exception from the parser,
+        /// possibly from a byte stream or character stream
+        /// supplied by the application.</exception>
+        /// <seealso cref="InputSource"/>
+        /// <seealso cref="Parse(string)"/>
+        /// <seealso cref="EntityResolver"/>
+        /// <seealso cref="DTDHandler"/>
+        /// <seealso cref="ContentHandler"/>
+        /// <seealso cref="ErrorHandler"/>
+        void Parse(InputSource input);
+
+
+        /// <summary>
+        /// Parse an XML document from a system identifier (URI).
+        /// </summary>
+        /// <remarks>
+        /// This method is a shortcut for the common case of reading a
+        /// document from a system identifier.  It is the exact
+        /// equivalent of the following:
+        /// <code>
+        /// Parse(new InputSource(systemId));
+        /// </code>
+        /// <para/>If the system identifier is a URL, it must be fully resolved
+        /// by the application before it is passed to the parser.
+        /// </remarks>
+        /// <param name="input">The system identifier (URI).</param>
+        /// <exception cref="SAXException">Any SAX exception, possibly
+        /// wrapping another exception.</exception>
+        /// <exception cref="System.IO.IOException">An IO exception from the parser,
+        /// possibly from a byte stream or character stream
+        /// supplied by the application.</exception>
+        void Parse(string systemId);
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/198e5868/src/Lucene.Net.Benchmark/Support/StringExtensions.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/Support/StringExtensions.cs b/src/Lucene.Net.Benchmark/Support/StringExtensions.cs
new file mode 100644
index 0000000..2104fdb
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/Support/StringExtensions.cs
@@ -0,0 +1,14 @@
+namespace Lucene.Net.Support
+{
+    public static class StringExtensions
+    {
+        public static string Intern(this string value)
+        {
+#if NETSTANDARD
+            return value;
+#else
+            return string.Intern(value);
+#endif
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/198e5868/src/Lucene.Net.Benchmark/Support/TagSoup/AutoDetector.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/Support/TagSoup/AutoDetector.cs b/src/Lucene.Net.Benchmark/Support/TagSoup/AutoDetector.cs
new file mode 100644
index 0000000..6fcb578
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/Support/TagSoup/AutoDetector.cs
@@ -0,0 +1,41 @@
+// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+//
+// TagSoup is licensed under the Apache License,
+// Version 2.0.  You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 .  You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+// 
+// 
+// Interface to objects that translate InputStreams to Readers by auto-detection
+
+using System.IO;
+
+namespace TagSoup
+{
+    /// <summary>
+    /// Classes which accept an <see cref="Stream"/> and provide a <see cref="TextReader"/> which figures
+    /// out the encoding of the <see cref="Stream"/> and reads characters from it should
+    /// conform to this interface.
+    /// </summary>
+    /// <seealso cref="Stream" />
+    /// <seealso cref="TextReader" />
+    public interface IAutoDetector
+    {
+        /// <summary>
+        /// Given a <see cref="Stream"/>, return a suitable <see cref="TextReader"/> that understands
+        /// the presumed character encoding of that <see cref="Stream"/>.
+        /// If bytes are consumed from the <see cref="Stream"/> in the process, they
+        /// <i>must</i> be pushed back onto the InputStream so that they can be
+        /// reinterpreted as characters.
+        /// </summary>
+        /// <param name="stream">The <see cref="Stream"/></param>
+        /// <returns>A <see cref="TextReader"/> that reads from the <see cref="Stream"/></returns>
+        TextReader AutoDetectingReader(Stream stream);
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/198e5868/src/Lucene.Net.Benchmark/Support/TagSoup/Element.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/Support/TagSoup/Element.cs b/src/Lucene.Net.Benchmark/Support/TagSoup/Element.cs
new file mode 100644
index 0000000..dca7eed
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/Support/TagSoup/Element.cs
@@ -0,0 +1,215 @@
+// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+//
+// TagSoup is licensed under the Apache License,
+// Version 2.0.  You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 .  You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+using Sax.Helpers;
+
+namespace TagSoup
+{
+    /// <summary>
+    /// The internal representation of an actual element (not an element type).
+    /// An Element has an element type, attributes, and a successor Element
+    /// for use in constructing stacks and queues of Elements.
+    /// </summary>
+    /// <seealso cref="ElementType" />
+    /// <seealso cref="Sax.Net.Helpers.Attributes" />
+    public class Element
+    {
+        private readonly Attributes _atts; // attributes of element
+        private readonly ElementType _type; // type of element
+        private bool _preclosed; // this element has been preclosed
+
+        /// <summary>
+        /// Return an Element from a specified ElementType.
+        /// </summary>
+        /// <param name="type">
+        /// The element type of the newly constructed element
+        /// </param>
+        /// <param name="defaultAttributes">
+        /// True if default attributes are wanted
+        /// </param>
+        public Element(ElementType type, bool defaultAttributes)
+        {
+            _type = type;
+            if (defaultAttributes)
+            {
+                _atts = new Attributes(type.Attributes);
+            }
+            else
+            {
+                _atts = new Attributes();
+            }
+            Next = null;
+            _preclosed = false;
+        }
+
+        /// <summary>
+        /// Gets the element type.
+        /// </summary>
+        public virtual ElementType Type
+        {
+            get { return _type; }
+        }
+
+        /// <summary>
+        /// Gets the attributes as an Attributes object.
+        /// Returning an Attributes makes the attributes mutable.
+        /// </summary>
+        /// <seealso cref="Attributes" />
+        public virtual Attributes Attributes
+        {
+            get { return _atts; }
+        }
+
+        /// <summary>
+        /// Gets or sets the next element in an element stack or queue.
+        /// </summary>
+        public virtual Element Next { get; set; }
+
+        /// <summary>
+        /// Gets the name of the element's type.
+        /// </summary>
+        public virtual string Name
+        {
+            get { return _type.Name; }
+        }
+
+        /// <summary>
+        /// Gets the namespace name of the element's type.
+        /// </summary>
+        public virtual string Namespace
+        {
+            get { return _type.Namespace; }
+        }
+
+        /// <summary>
+        /// Gets the local name of the element's type.
+        /// </summary>
+        public virtual string LocalName
+        {
+            get { return _type.LocalName; }
+        }
+
+        /// <summary>
+        /// Gets the content model vector of the element's type.
+        /// </summary>
+        public virtual int Model
+        {
+            get { return _type.Model; }
+        }
+
+        /// <summary>
+        /// Gets the member-of vector of the element's type.
+        /// </summary>
+        public virtual int MemberOf
+        {
+            get { return _type.MemberOf; }
+        }
+
+        /// <summary>
+        /// Gets the flags vector of the element's type.
+        /// </summary>
+        public virtual int Flags
+        {
+            get { return _type.Flags; }
+        }
+
+        /// <summary>
+        /// Gets the parent element type of the element's type.
+        /// </summary>
+        public virtual ElementType Parent
+        {
+            get { return _type.Parent; }
+        }
+
+        /// <summary>
+        /// Return true if this element has been preclosed.
+        /// </summary>
+        public virtual bool IsPreclosed
+        {
+            get { return _preclosed; }
+        }
+
+        /// <summary>
+        /// Return true if the type of this element can contain the type of
+        /// another element.
+        /// Convenience method.
+        /// </summary>
+        /// <param name="other">
+        /// The other element
+        /// </param>
+        public virtual bool CanContain(Element other)
+        {
+            return _type.CanContain(other._type);
+        }
+
+        /// <summary>
+        /// Set an attribute and its value into this element.
+        /// </summary>
+        /// <param name="name">
+        /// The attribute name (Qname)
+        /// </param>
+        /// <param name="type">
+        /// The attribute type
+        /// </param>
+        /// <param name="value">
+        /// The attribute value
+        /// </param>
+        public virtual void SetAttribute(string name, string type, string value)
+        {
+            _type.SetAttribute(_atts, name, type, value);
+        }
+
+        /// <summary>
+        /// Make this element anonymous.
+        /// Remove any <c>id</c> or <c>name</c> attribute present
+        /// in the element's attributes.
+        /// </summary>
+        public virtual void Anonymize()
+        {
+            for (int i = _atts.Length - 1; i >= 0; i--)
+            {
+                if (_atts.GetType(i).Equals("ID") || _atts.GetQName(i).Equals("name"))
+                {
+                    _atts.RemoveAttribute(i);
+                }
+            }
+        }
+
+        /// <summary>
+        /// Clean the attributes of this element.
+        /// Attributes with null name (the name was ill-formed)
+        /// or null value (the attribute was present in the element type but
+        /// not in this actual element) are removed.
+        /// </summary>
+        public virtual void Clean()
+        {
+            for (int i = _atts.Length - 1; i >= 0; i--)
+            {
+                string name = _atts.GetLocalName(i);
+                if (_atts.GetValue(i) == null || string.IsNullOrEmpty(name))
+                {
+                    _atts.RemoveAttribute(i);
+                }
+            }
+        }
+
+        /// <summary>
+        /// Force this element to preclosed status, meaning that an end-tag has
+        /// been seen but the element cannot yet be closed for structural reasons.
+        /// </summary>
+        public virtual void Preclose()
+        {
+            _preclosed = true;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/198e5868/src/Lucene.Net.Benchmark/Support/TagSoup/ElementType.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/Support/TagSoup/ElementType.cs b/src/Lucene.Net.Benchmark/Support/TagSoup/ElementType.cs
new file mode 100644
index 0000000..6d62a2f
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/Support/TagSoup/ElementType.cs
@@ -0,0 +1,269 @@
+// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+//
+// TagSoup is licensed under the Apache License,
+// Version 2.0.  You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 .  You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+using Sax.Helpers;
+using System;
+using System.Text;
+
+namespace TagSoup
+{
+    /// <summary>
+    /// This class represents an element type in the schema.
+    /// An element type has a name, a content model vector, a member-of vector,
+    /// a flags vector, default attributes, and a schema to which it belongs.
+    /// </summary>
+    /// <seealso cref="Schema" />
+    public class ElementType
+    {
+        private readonly Attributes atts; // default attributes
+        private readonly string localName; // element type local name
+        private readonly string name; // element type name (Qname)
+        private readonly string @namespace; // element type namespace name
+        private readonly Schema schema; // schema to which this belongs
+
+        /// <summary>
+        /// Construct an <see cref="ElementType"/>:
+        /// but it's better to use <see cref="Schema.Element()"/> instead.
+        /// The content model, member-of, and flags vectors are specified as ints.
+        /// </summary>
+        /// <param name="name">The element type name</param>
+        /// <param name="model">ORed-together bits representing the content 
+        /// models allowed in the content of this element type</param>
+        /// <param name="memberOf">ORed-together bits representing the content models
+        /// to which this element type belongs</param>
+        /// <param name="flags">ORed-together bits representing the flags associated
+        /// with this element type</param>
+        /// <param name="schema">
+        /// The schema with which this element type will be associated
+        /// </param>
+        public ElementType(string name, int model, int memberOf, int flags, Schema schema)
+        {
+            this.name = name;
+            Model = model;
+            MemberOf = memberOf;
+            Flags = flags;
+            atts = new Attributes();
+            this.schema = schema;
+            @namespace = GetNamespace(name, false);
+            localName = GetLocalName(name);
+        }
+
+        /// <summary>
+        /// Gets the name of this element type.
+        /// </summary>
+        public virtual string Name
+        {
+            get { return name; }
+        }
+
+        /// <summary>
+        /// Gets the namespace name of this element type.
+        /// </summary>
+        public virtual string Namespace
+        {
+            get { return @namespace; }
+        }
+
+        /// <summary>
+        /// Gets the local name of this element type.
+        /// </summary>
+        public virtual string LocalName
+        {
+            get { return localName; }
+        }
+
+        /// <summary>
+        /// Gets or sets the content models of this element type as a vector of bits
+        /// </summary>
+        public virtual int Model { get; set; }
+
+        /// <summary>
+        /// Gets or sets the content models to which this element type belongs as a vector of bits
+        /// </summary>
+        public virtual int MemberOf { get; set; }
+
+        /// <summary>
+        /// Gets or sets the flags associated with this element type as a vector of bits
+        /// </summary>
+        public virtual int Flags { get; set; }
+
+        /// <summary>
+        /// Returns the default attributes associated with this element type.
+        /// Attributes of type CDATA that don't have default values are
+        /// typically not included.  Other attributes without default values
+        /// have an internal value of <c>null</c>.
+        /// The return value is an Attributes to allow the caller to mutate
+        /// the attributes.
+        /// </summary>
+        public virtual Attributes Attributes
+        {
+            get { return atts; }
+        }
+
+        /// <summary>
+        /// Gets or sets the parent element type of this element type.
+        /// </summary>
+        public virtual ElementType Parent { get; set; }
+
+        /// <summary>
+        /// Gets the schema which this element type is associated with.
+        /// </summary>
+        public virtual Schema Schema
+        {
+            get { return schema; }
+        }
+
+        /// <summary>
+        /// Return a namespace name from a Qname.
+        /// The attribute flag tells us whether to return an empty namespace
+        /// name if there is no prefix, or use the schema default instead.
+        /// </summary>
+        /// <param name="name">The Qname</param>
+        /// <param name="attribute">True if name is an attribute name</param>
+        /// <returns>The namespace name</returns>
+        public virtual string GetNamespace(string name, bool attribute)
+        {
+            int colon = name.IndexOf(':');
+            if (colon == -1)
+            {
+                return attribute ? "" : schema.Uri;
+            }
+            string prefix = name.Substring(0, colon);
+            if (prefix.Equals("xml"))
+            {
+                return "http://www.w3.org/XML/1998/namespace";
+            }
+            return string.Intern("urn:x-prefix:" + prefix);
+        }
+
+        /// <summary>
+        /// Return a local name from a Qname.
+        /// </summary>
+        /// <param name="name">The Qname</param>
+        /// <returns>The local name</returns>
+        public virtual string GetLocalName(string name)
+        {
+            int colon = name.IndexOf(':');
+            if (colon == -1)
+            {
+                return name;
+            }
+            return string.Intern(name.Substring(colon + 1));
+        }
+
+        /// <summary>
+        /// Returns <c>true</c> if this element type can contain another element type.
+        /// That is, if any of the models in this element's model vector
+        /// match any of the models in the other element type's member-of
+        /// vector.
+        /// </summary>
+        /// <param name="other">The other element type</param>
+        public virtual bool CanContain(ElementType other)
+        {
+            return (Model & other.MemberOf) != 0;
+        }
+
+        /// <summary>
+        /// Sets an attribute and its value into an <see cref="Sax.IAttributes"/> object.
+        /// Attempts to set a namespace declaration are ignored.
+        /// </summary>
+        /// <param name="atts">The <see cref="Sax.Helpers.Attributes"/> object</param>
+        /// <param name="name">The name (Qname) of the attribute</param>
+        /// <param name="type">The type of the attribute</param>
+        /// <param name="value">The value of the attribute</param>
+        public virtual void SetAttribute(Attributes atts, string name, string type, string value)
+        {
+            if (name.Equals("xmlns") || name.StartsWith("xmlns:"))
+            {
+                return;
+            }
+
+            string ns = GetNamespace(name, true);
+            string localName = GetLocalName(name);
+            int i = atts.GetIndex(name);
+            if (i == -1)
+            {
+                name = string.Intern(name);
+                if (type == null)
+                {
+                    type = "CDATA";
+                }
+                if (!type.Equals("CDATA"))
+                {
+                    value = Normalize(value);
+                }
+                atts.AddAttribute(ns, localName, name, type, value);
+            }
+            else
+            {
+                if (type == null)
+                {
+                    type = atts.GetType(i);
+                }
+                if (!type.Equals("CDATA"))
+                {
+                    value = Normalize(value);
+                }
+                atts.SetAttribute(i, ns, localName, name, type, value);
+            }
+        }
+
+        /// <summary>
+        /// Normalize an attribute value (ID-style).
+        /// CDATA-style attribute normalization is already done.
+        /// </summary>
+        /// <param name="value">The value to normalize</param>
+        public static string Normalize(string value)
+        {
+            if (value == null)
+            {
+                return null;
+            }
+            value = value.Trim();
+            if (value.IndexOf("  ", StringComparison.Ordinal) == -1)
+            {
+                return value;
+            }
+            bool space = false;
+            var b = new StringBuilder(value.Length);
+            foreach (char v in value)
+            {
+                if (v == ' ')
+                {
+                    if (!space)
+                    {
+                        b.Append(v);
+                    }
+                    space = true;
+                }
+                else
+                {
+                    b.Append(v);
+                    space = false;
+                }
+            }
+            return b.ToString();
+        }
+
+        /// <summary>
+        /// Sets an attribute and its value into this element type.
+        /// </summary>
+        /// <param name="name">The name of the attribute</param>
+        /// <param name="type">The type of the attribute</param>
+        /// <param name="value">The value of the attribute</param>
+        public virtual void SetAttribute(string name, string type, string value)
+        {
+            SetAttribute(atts, name, type, value);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/198e5868/src/Lucene.Net.Benchmark/Support/TagSoup/HTMLScanner.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/Support/TagSoup/HTMLScanner.cs b/src/Lucene.Net.Benchmark/Support/TagSoup/HTMLScanner.cs
new file mode 100644
index 0000000..ed41f84
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/Support/TagSoup/HTMLScanner.cs
@@ -0,0 +1,745 @@
+// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+//
+// TagSoup is licensed under the Apache License,
+// Version 2.0.  You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 .  You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+// 
+// 
+
+using Sax;
+using System;
+using System.IO;
+
+namespace TagSoup
+{
+    /// <summary>
+    /// This class implements a table-driven scanner for HTML, allowing for lots of
+    /// defects.  It implements the Scanner interface, which accepts a Reader
+    /// object to fetch characters from and a ScanHandler object to report lexical
+    /// events to.
+    /// </summary>
+    public class HTMLScanner : IScanner, ILocator
+    {
+        // Start of state table
+        private const int S_ANAME = 1;
+        private const int S_APOS = 2;
+        private const int S_AVAL = 3;
+        private const int S_BB = 4;
+        private const int S_BBC = 5;
+        private const int S_BBCD = 6;
+        private const int S_BBCDA = 7;
+        private const int S_BBCDAT = 8;
+        private const int S_BBCDATA = 9;
+        private const int S_CDATA = 10;
+        private const int S_CDATA2 = 11;
+        private const int S_CDSECT = 12;
+        private const int S_CDSECT1 = 13;
+        private const int S_CDSECT2 = 14;
+        private const int S_COM = 15;
+        private const int S_COM2 = 16;
+        private const int S_COM3 = 17;
+        private const int S_COM4 = 18;
+        private const int S_DECL = 19;
+        private const int S_DECL2 = 20;
+        private const int S_DONE = 21;
+        private const int S_EMPTYTAG = 22;
+        private const int S_ENT = 23;
+        private const int S_EQ = 24;
+        private const int S_ETAG = 25;
+        private const int S_GI = 26;
+        private const int S_NCR = 27;
+        private const int S_PCDATA = 28;
+        private const int S_PI = 29;
+        private const int S_PITARGET = 30;
+        private const int S_QUOT = 31;
+        private const int S_STAGC = 32;
+        private const int S_TAG = 33;
+        private const int S_TAGWS = 34;
+        private const int S_XNCR = 35;
+        private const int A_ADUP = 1;
+        private const int A_ADUP_SAVE = 2;
+        private const int A_ADUP_STAGC = 3;
+        private const int A_ANAME = 4;
+        private const int A_ANAME_ADUP = 5;
+        private const int A_ANAME_ADUP_STAGC = 6;
+        private const int A_AVAL = 7;
+        private const int A_AVAL_STAGC = 8;
+        private const int A_CDATA = 9;
+        private const int A_CMNT = 10;
+        private const int A_DECL = 11;
+        private const int A_EMPTYTAG = 12;
+        private const int A_ENTITY = 13;
+        private const int A_ENTITY_START = 14;
+        private const int A_ETAG = 15;
+        private const int A_GI = 16;
+        private const int A_GI_STAGC = 17;
+        private const int A_LT = 18;
+        private const int A_LT_PCDATA = 19;
+        private const int A_MINUS = 20;
+        private const int A_MINUS2 = 21;
+        private const int A_MINUS3 = 22;
+        private const int A_PCDATA = 23;
+        private const int A_PI = 24;
+        private const int A_PITARGET = 25;
+        private const int A_PITARGET_PI = 26;
+        private const int A_SAVE = 27;
+        private const int A_SKIP = 28;
+        private const int A_SP = 29;
+        private const int A_STAGC = 30;
+        private const int A_UNGET = 31;
+        private const int A_UNSAVE_PCDATA = 32;
+        private static int[] statetable = {
+        S_ANAME, '/', A_ANAME_ADUP, S_EMPTYTAG,
+        S_ANAME, '=', A_ANAME, S_AVAL,
+        S_ANAME, '>', A_ANAME_ADUP_STAGC, S_PCDATA,
+        S_ANAME, 0, A_SAVE, S_ANAME,
+        S_ANAME, -1, A_ANAME_ADUP_STAGC, S_DONE,
+        S_ANAME, ' ', A_ANAME, S_EQ,
+        S_ANAME, '\n', A_ANAME, S_EQ,
+        S_ANAME, '\t', A_ANAME, S_EQ,
+        S_APOS, '\'', A_AVAL, S_TAGWS,
+        S_APOS, 0, A_SAVE, S_APOS,
+        S_APOS, -1, A_AVAL_STAGC, S_DONE,
+        S_APOS, ' ', A_SP, S_APOS,
+        S_APOS, '\n', A_SP, S_APOS,
+        S_APOS, '\t', A_SP, S_APOS,
+        S_AVAL, '\'', A_SKIP, S_APOS,
+        S_AVAL, '"', A_SKIP, S_QUOT,
+        S_AVAL, '>', A_AVAL_STAGC, S_PCDATA,
+        S_AVAL, 0, A_SAVE, S_STAGC,
+        S_AVAL, -1, A_AVAL_STAGC, S_DONE,
+        S_AVAL, ' ', A_SKIP, S_AVAL,
+        S_AVAL, '\n', A_SKIP, S_AVAL,
+        S_AVAL, '\t', A_SKIP, S_AVAL,
+        S_BB, 'C', A_SKIP, S_BBC,
+        S_BB, 0, A_SKIP, S_DECL,
+        S_BB, -1, A_SKIP, S_DONE,
+        S_BBC, 'D', A_SKIP, S_BBCD,
+        S_BBC, 0, A_SKIP, S_DECL,
+        S_BBC, -1, A_SKIP, S_DONE,
+        S_BBCD, 'A', A_SKIP, S_BBCDA,
+        S_BBCD, 0, A_SKIP, S_DECL,
+        S_BBCD, -1, A_SKIP, S_DONE,
+        S_BBCDA, 'T', A_SKIP, S_BBCDAT,
+        S_BBCDA, 0, A_SKIP, S_DECL,
+        S_BBCDA, -1, A_SKIP, S_DONE,
+        S_BBCDAT, 'A', A_SKIP, S_BBCDATA,
+        S_BBCDAT, 0, A_SKIP, S_DECL,
+        S_BBCDAT, -1, A_SKIP, S_DONE,
+        S_BBCDATA, '[', A_SKIP, S_CDSECT,
+        S_BBCDATA, 0, A_SKIP, S_DECL,
+        S_BBCDATA, -1, A_SKIP, S_DONE,
+        S_CDATA, '<', A_SAVE, S_CDATA2,
+        S_CDATA, 0, A_SAVE, S_CDATA,
+        S_CDATA, -1, A_PCDATA, S_DONE,
+        S_CDATA2, '/', A_UNSAVE_PCDATA, S_ETAG,
+        S_CDATA2, 0, A_SAVE, S_CDATA,
+        S_CDATA2, -1, A_UNSAVE_PCDATA, S_DONE,
+        S_CDSECT, ']', A_SAVE, S_CDSECT1,
+        S_CDSECT, 0, A_SAVE, S_CDSECT,
+        S_CDSECT, -1, A_SKIP, S_DONE,
+        S_CDSECT1, ']', A_SAVE, S_CDSECT2,
+        S_CDSECT1, 0, A_SAVE, S_CDSECT,
+        S_CDSECT1, -1, A_SKIP, S_DONE,
+        S_CDSECT2, '>', A_CDATA, S_PCDATA,
+        S_CDSECT2, 0, A_SAVE, S_CDSECT,
+        S_CDSECT2, -1, A_SKIP, S_DONE,
+        S_COM, '-', A_SKIP, S_COM2,
+        S_COM, 0, A_SAVE, S_COM2,
+        S_COM, -1, A_CMNT, S_DONE,
+        S_COM2, '-', A_SKIP, S_COM3,
+        S_COM2, 0, A_SAVE, S_COM2,
+        S_COM2, -1, A_CMNT, S_DONE,
+        S_COM3, '-', A_SKIP, S_COM4,
+        S_COM3, 0, A_MINUS, S_COM2,
+        S_COM3, -1, A_CMNT, S_DONE,
+        S_COM4, '-', A_MINUS3, S_COM4,
+        S_COM4, '>', A_CMNT, S_PCDATA,
+        S_COM4, 0, A_MINUS2, S_COM2,
+        S_COM4, -1, A_CMNT, S_DONE,
+        S_DECL, '-', A_SKIP, S_COM,
+        S_DECL, '[', A_SKIP, S_BB,
+        S_DECL, '>', A_SKIP, S_PCDATA,
+        S_DECL, 0, A_SAVE, S_DECL2,
+        S_DECL, -1, A_SKIP, S_DONE,
+        S_DECL2, '>', A_DECL, S_PCDATA,
+        S_DECL2, 0, A_SAVE, S_DECL2,
+        S_DECL2, -1, A_SKIP, S_DONE,
+        S_EMPTYTAG, '>', A_EMPTYTAG, S_PCDATA,
+        S_EMPTYTAG, 0, A_SAVE, S_ANAME,
+        S_EMPTYTAG, ' ', A_SKIP, S_TAGWS,
+        S_EMPTYTAG, '\n', A_SKIP, S_TAGWS,
+        S_EMPTYTAG, '\t', A_SKIP, S_TAGWS,
+        S_ENT, 0, A_ENTITY, S_ENT,
+        S_ENT, -1, A_ENTITY, S_DONE,
+        S_EQ, '=', A_SKIP, S_AVAL,
+        S_EQ, '>', A_ADUP_STAGC, S_PCDATA,
+        S_EQ, 0, A_ADUP_SAVE, S_ANAME,
+        S_EQ, -1, A_ADUP_STAGC, S_DONE,
+        S_EQ, ' ', A_SKIP, S_EQ,
+        S_EQ, '\n', A_SKIP, S_EQ,
+        S_EQ, '\t', A_SKIP, S_EQ,
+        S_ETAG, '>', A_ETAG, S_PCDATA,
+        S_ETAG, 0, A_SAVE, S_ETAG,
+        S_ETAG, -1, A_ETAG, S_DONE,
+        S_ETAG, ' ', A_SKIP, S_ETAG,
+        S_ETAG, '\n', A_SKIP, S_ETAG,
+        S_ETAG, '\t', A_SKIP, S_ETAG,
+        S_GI, '/', A_SKIP, S_EMPTYTAG,
+        S_GI, '>', A_GI_STAGC, S_PCDATA,
+        S_GI, 0, A_SAVE, S_GI,
+        S_GI, -1, A_SKIP, S_DONE,
+        S_GI, ' ', A_GI, S_TAGWS,
+        S_GI, '\n', A_GI, S_TAGWS,
+        S_GI, '\t', A_GI, S_TAGWS,
+        S_NCR, 0, A_ENTITY, S_NCR,
+        S_NCR, -1, A_ENTITY, S_DONE,
+        S_PCDATA, '&', A_ENTITY_START, S_ENT,
+        S_PCDATA, '<', A_PCDATA, S_TAG,
+        S_PCDATA, 0, A_SAVE, S_PCDATA,
+        S_PCDATA, -1, A_PCDATA, S_DONE,
+        S_PI, '>', A_PI, S_PCDATA,
+        S_PI, 0, A_SAVE, S_PI,
+        S_PI, -1, A_PI, S_DONE,
+        S_PITARGET, '>', A_PITARGET_PI, S_PCDATA,
+        S_PITARGET, 0, A_SAVE, S_PITARGET,
+        S_PITARGET, -1, A_PITARGET_PI, S_DONE,
+        S_PITARGET, ' ', A_PITARGET, S_PI,
+        S_PITARGET, '\n', A_PITARGET, S_PI,
+        S_PITARGET, '\t', A_PITARGET, S_PI,
+        S_QUOT, '"', A_AVAL, S_TAGWS,
+        S_QUOT, 0, A_SAVE, S_QUOT,
+        S_QUOT, -1, A_AVAL_STAGC, S_DONE,
+        S_QUOT, ' ', A_SP, S_QUOT,
+        S_QUOT, '\n', A_SP, S_QUOT,
+        S_QUOT, '\t', A_SP, S_QUOT,
+        S_STAGC, '>', A_AVAL_STAGC, S_PCDATA,
+        S_STAGC, 0, A_SAVE, S_STAGC,
+        S_STAGC, -1, A_AVAL_STAGC, S_DONE,
+        S_STAGC, ' ', A_AVAL, S_TAGWS,
+        S_STAGC, '\n', A_AVAL, S_TAGWS,
+        S_STAGC, '\t', A_AVAL, S_TAGWS,
+        S_TAG, '!', A_SKIP, S_DECL,
+        S_TAG, '/', A_SKIP, S_ETAG,
+        S_TAG, '?', A_SKIP, S_PITARGET,
+        S_TAG, '<', A_SAVE, S_TAG,
+        S_TAG, 0, A_SAVE, S_GI,
+        S_TAG, -1, A_LT_PCDATA, S_DONE,
+        S_TAG, ' ', A_LT, S_PCDATA,
+        S_TAG, '\n', A_LT, S_PCDATA,
+        S_TAG, '\t', A_LT, S_PCDATA,
+        S_TAGWS, '/', A_SKIP, S_EMPTYTAG,
+        S_TAGWS, '>', A_STAGC, S_PCDATA,
+        S_TAGWS, 0, A_SAVE, S_ANAME,
+        S_TAGWS, -1, A_STAGC, S_DONE,
+        S_TAGWS, ' ', A_SKIP, S_TAGWS,
+        S_TAGWS, '\n', A_SKIP, S_TAGWS,
+        S_TAGWS, '\t', A_SKIP, S_TAGWS,
+        S_XNCR, 0, A_ENTITY, S_XNCR,
+        S_XNCR, -1, A_ENTITY, S_DONE,
+
+    };
+        private static readonly string[] debug_actionnames = { "", "A_ADUP", "A_ADUP_SAVE", "A_ADUP_STAGC", "A_ANAME", "A_ANAME_ADUP", "A_ANAME_ADUP_STAGC", "A_AVAL", "A_AVAL_STAGC", "A_CDATA", "A_CMNT", "A_DECL", "A_EMPTYTAG", "A_ENTITY", "A_ENTITY_START", "A_ETAG", "A_GI", "A_GI_STAGC", "A_LT", "A_LT_PCDATA", "A_MINUS", "A_MINUS2", "A_MINUS3", "A_PCDATA", "A_PI", "A_PITARGET", "A_PITARGET_PI", "A_SAVE", "A_SKIP", "A_SP", "A_STAGC", "A_UNGET", "A_UNSAVE_PCDATA" };
+        private static readonly string[] debug_statenames = { "", "S_ANAME", "S_APOS", "S_AVAL", "S_BB", "S_BBC", "S_BBCD", "S_BBCDA", "S_BBCDAT", "S_BBCDATA", "S_CDATA", "S_CDATA2", "S_CDSECT", "S_CDSECT1", "S_CDSECT2", "S_COM", "S_COM2", "S_COM3", "S_COM4", "S_DECL", "S_DECL2", "S_DONE", "S_EMPTYTAG", "S_ENT", "S_EQ", "S_ETAG", "S_GI", "S_NCR", "S_PCDATA", "S_PI", "S_PITARGET", "S_QUOT", "S_STAGC", "S_TAG", "S_TAGWS", "S_XNCR" };
+
+        // End of state table
+
+        private string thePublicid;         // Locator state
+        private string theSystemid;
+        private int theLastLine;
+        private int theLastColumn;
+        private int theCurrentLine;
+        private int theCurrentColumn;
+
+        int theState;                   // Current state
+        int theNextState;               // Next state
+        char[] theOutputBuffer = new char[200]; // Output buffer
+        int theSize;                    // Current buffer size
+        int[] theWinMap = {				// Windows chars map
+		0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
+        0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0xFFFD, 0x017D, 0xFFFD,
+        0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
+        0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0xFFFD, 0x017E, 0x0178};
+
+        ///<summary>
+        ///   Index into the state table for [state][input character - 2].
+        ///   The state table consists of 4-entry runs on the form
+        ///   { current state, input character, action, next state }.
+        ///   We precompute the index into the state table for all possible
+        ///   { current state, input character } and store the result in
+        ///   the statetableIndex array. Since only some input characters
+        ///   are present in the state table, we only do the computation for
+        ///   characters 0 to the highest character value in the state table.
+        ///   An input character of -2 is used to cover all other characters
+        ///   as -2 is guaranteed not to match any input character entry
+        ///   in the state table.
+        ///   <para>When doing lookups, the input character should first be tested
+        ///   to be in the range [-1 (inclusive), statetableIndexMaxChar (exclusive)].
+        ///   if it isn't use -2 as the input character.
+        ///   <para>Finally, add 2 to the input character to cover for the fact that
+        ///   Java doesn't support negative array indexes. Then look up
+        ///   the value in the statetableIndex. If the value is -1, then
+        ///   no action or next state was found for the { state, input } that
+        ///   you had. If it isn't -1, then action = statetable[value + 2] and
+        ///   next state = statetable[value + 3]. That is, the value points
+        ///   to the start of the answer 4-tuple in the statetable.
+        /// </summary>
+        static short[][] statetableIndex;
+
+        ///<summary>
+        ///   The highest character value seen in the statetable.
+        ///   See the doc comment for statetableIndex to see how this
+        ///   is used.
+        /// </summary>
+        static int statetableIndexMaxChar;
+        public HTMLScanner()
+        {
+            int maxState = -1;
+            int maxChar = -1;
+            for (int i = 0; i < statetable.Length; i += 4)
+            {
+                if (statetable[i] > maxState)
+                {
+                    maxState = statetable[i];
+                }
+                if (statetable[i + 1] > maxChar)
+                {
+                    maxChar = statetable[i + 1];
+                }
+            }
+            statetableIndexMaxChar = maxChar + 1;
+
+            statetableIndex = new short[maxState + 1][];
+
+            for (int i = 0; i <= maxState; i++)
+            {
+                statetableIndex[i] = new short[maxChar + 3];
+            }
+            for (int theState = 0; theState <= maxState; ++theState)
+            {
+                for (int ch = -2; ch <= maxChar; ++ch)
+                {
+                    int hit = -1;
+                    int action = 0;
+                    for (int i = 0; i < statetable.Length; i += 4)
+                    {
+                        if (theState != statetable[i])
+                        {
+                            if (action != 0) break;
+                            continue;
+                        }
+                        if (statetable[i + 1] == 0)
+                        {
+                            hit = i;
+                            action = statetable[i + 2];
+                        }
+                        else if (statetable[i + 1] == ch)
+                        {
+                            hit = i;
+                            action = statetable[i + 2];
+                            break;
+                        }
+                    }
+                    statetableIndex[theState][ch + 2] = (short)hit;
+                }
+            }
+        }
+
+        // Locator implementation
+
+        public virtual int LineNumber
+        {
+            get { return theLastLine; }
+        }
+        public virtual int ColumnNumber
+        {
+            get { return theLastColumn; }
+        }
+        public virtual string PublicId
+        {
+            get { return thePublicid; }
+        }
+        public virtual string SystemId
+        {
+            get { return theSystemid; }
+        }
+
+
+        // Scanner implementation
+
+        /// <summary>
+        /// Reset document locator, supplying systemid and publicid.
+        /// </summary>
+        /// <param name="systemid">System id</param>
+        /// <param name="publicid">Public id</param>
+        public virtual void ResetDocumentLocator(string publicid, string systemid)
+        {
+            thePublicid = publicid;
+            theSystemid = systemid;
+            theLastLine = theLastColumn = theCurrentLine = theCurrentColumn = 0;
+        }
+
+        /// <summary>
+        /// Scan HTML source, reporting lexical events.
+        /// </summary>
+        /// <param name="r">Reader that provides characters</param>
+        /// <param name="h">ScanHandler that accepts lexical events.</param>
+        public virtual void Scan(TextReader r, IScanHandler h)
+        {
+            theState = S_PCDATA;
+
+            int firstChar = r.Peek();   // Remove any leading BOM
+            if (firstChar == '\uFEFF') r.Read();
+
+            while (theState != S_DONE)
+            {
+                int ch = r.Peek();
+                bool unread = false;
+
+                // Process control characters
+                if (ch >= 0x80 && ch <= 0x9F) ch = theWinMap[ch - 0x80];
+
+                if (ch == '\r')
+                {
+                    r.Read();
+                    ch = r.Peek();      // expect LF next
+                    if (ch != '\n')
+                    {
+                        unread = true;
+                        ch = '\n';
+                    }
+                }
+
+                if (ch == '\n')
+                {
+                    theCurrentLine++;
+                    theCurrentColumn = 0;
+                }
+                else
+                {
+                    theCurrentColumn++;
+                }
+
+                if (!(ch >= 0x20 || ch == '\n' || ch == '\t' || ch == -1)) continue;
+
+                // Search state table
+                int adjCh = (ch >= -1 && ch < statetableIndexMaxChar) ? ch : -2;
+                int statetableRow = statetableIndex[theState][adjCh + 2];
+                int action = 0;
+                if (statetableRow != -1)
+                {
+                    action = statetable[statetableRow + 2];
+                    theNextState = statetable[statetableRow + 3];
+                }
+
+                //			System.err.println("In " + debug_statenames[theState] + " got " + nicechar(ch) + " doing " + debug_actionnames[action] + " then " + debug_statenames[theNextState]);
+                switch (action)
+                {
+                    case 0:
+                        throw new Exception(
+                            "HTMLScanner can't cope with " + (int)ch + " in state " +
+                            (int)theState);
+                    case A_ADUP:
+                        h.Adup(theOutputBuffer, 0, theSize);
+                        theSize = 0;
+                        break;
+                    case A_ADUP_SAVE:
+                        h.Adup(theOutputBuffer, 0, theSize);
+                        theSize = 0;
+                        Save(ch, h);
+                        break;
+                    case A_ADUP_STAGC:
+                        h.Adup(theOutputBuffer, 0, theSize);
+                        theSize = 0;
+                        h.STagC(theOutputBuffer, 0, theSize);
+                        break;
+                    case A_ANAME:
+                        h.Aname(theOutputBuffer, 0, theSize);
+                        theSize = 0;
+                        break;
+                    case A_ANAME_ADUP:
+                        h.Aname(theOutputBuffer, 0, theSize);
+                        theSize = 0;
+                        h.Adup(theOutputBuffer, 0, theSize);
+                        break;
+                    case A_ANAME_ADUP_STAGC:
+                        h.Aname(theOutputBuffer, 0, theSize);
+                        theSize = 0;
+                        h.Adup(theOutputBuffer, 0, theSize);
+                        h.STagC(theOutputBuffer, 0, theSize);
+                        break;
+                    case A_AVAL:
+                        h.Aval(theOutputBuffer, 0, theSize);
+                        theSize = 0;
+                        break;
+                    case A_AVAL_STAGC:
+                        h.Aval(theOutputBuffer, 0, theSize);
+                        theSize = 0;
+                        h.STagC(theOutputBuffer, 0, theSize);
+                        break;
+                    case A_CDATA:
+                        Mark();
+                        // suppress the final "]]" in the buffer
+                        if (theSize > 1) theSize -= 2;
+                        h.PCDATA(theOutputBuffer, 0, theSize);
+                        theSize = 0;
+                        break;
+                    case A_ENTITY_START:
+                        h.PCDATA(theOutputBuffer, 0, theSize);
+                        theSize = 0;
+                        Save(ch, h);
+                        break;
+                    case A_ENTITY:
+                        Mark();
+                        char ch1 = (char)ch;
+                        //				System.out.println("Got " + ch1 + " in state " + ((theState == S_ENT) ? "S_ENT" : ((theState == S_NCR) ? "S_NCR" : "UNK")));
+                        if (theState == S_ENT && ch1 == '#')
+                        {
+                            theNextState = S_NCR;
+                            Save(ch, h);
+                            break;
+                        }
+                        else if (theState == S_NCR && (ch1 == 'x' || ch1 == 'X'))
+                        {
+                            theNextState = S_XNCR;
+                            Save(ch, h);
+                            break;
+                        }
+                        else if (theState == S_ENT && char.IsLetterOrDigit(ch1))
+                        {
+                            Save(ch, h);
+                            break;
+                        }
+                        else if (theState == S_NCR && char.IsDigit(ch1))
+                        {
+                            Save(ch, h);
+                            break;
+                        }
+                        else if (theState == S_XNCR && (char.IsDigit(ch1) || "abcdefABCDEF".IndexOf(ch1) != -1))
+                        {
+                            Save(ch, h);
+                            break;
+                        }
+
+                        // The whole entity reference has been collected
+                        //				System.err.println("%%" + new String(theOutputBuffer, 0, theSize));
+                        h.Entity(theOutputBuffer, 1, theSize - 1);
+                        int ent = h.GetEntity();
+                        //				System.err.println("%% value = " + ent);
+                        if (ent != 0)
+                        {
+                            theSize = 0;
+                            if (ent >= 0x80 && ent <= 0x9F)
+                            {
+                                ent = theWinMap[ent - 0x80];
+                            }
+                            if (ent < 0x20)
+                            {
+                                // Control becomes space
+                                ent = 0x20;
+                            }
+                            else if (ent >= 0xD800 && ent <= 0xDFFF)
+                            {
+                                // Surrogates get dropped
+                                ent = 0;
+                            }
+                            else if (ent <= 0xFFFF)
+                            {
+                                // BMP character
+                                Save(ent, h);
+                            }
+                            else
+                            {
+                                // Astral converted to two surrogates
+                                ent -= 0x10000;
+                                Save((ent >> 10) + 0xD800, h);
+                                Save((ent & 0x3FF) + 0xDC00, h);
+                            }
+                            if (ch != ';')
+                            {
+                                unread = true;
+                                theCurrentColumn--;
+                            }
+                        }
+                        else
+                        {
+                            unread = true;
+                            theCurrentColumn--;
+                        }
+                        theNextState = S_PCDATA;
+                        break;
+                    case A_ETAG:
+                        h.ETag(theOutputBuffer, 0, theSize);
+                        theSize = 0;
+                        break;
+                    case A_DECL:
+                        h.Decl(theOutputBuffer, 0, theSize);
+                        theSize = 0;
+                        break;
+                    case A_GI:
+                        h.GI(theOutputBuffer, 0, theSize);
+                        theSize = 0;
+                        break;
+                    case A_GI_STAGC:
+                        h.GI(theOutputBuffer, 0, theSize);
+                        theSize = 0;
+                        h.STagC(theOutputBuffer, 0, theSize);
+                        break;
+                    case A_LT:
+                        Mark();
+                        Save('<', h);
+                        Save(ch, h);
+                        break;
+                    case A_LT_PCDATA:
+                        Mark();
+                        Save('<', h);
+                        h.PCDATA(theOutputBuffer, 0, theSize);
+                        theSize = 0;
+                        break;
+                    case A_PCDATA:
+                        Mark();
+                        h.PCDATA(theOutputBuffer, 0, theSize);
+                        theSize = 0;
+                        break;
+                    case A_CMNT:
+                        Mark();
+                        h.Cmnt(theOutputBuffer, 0, theSize);
+                        theSize = 0;
+                        break;
+                    case A_MINUS3:
+                        Save('-', h);
+                        Save(' ', h);
+                        break;
+                    case A_MINUS2:
+                        Save('-', h);
+                        Save(' ', h);
+                        Save('-', h);
+                        Save(ch, h);
+                        // fall through into A_MINUS
+                        break;
+                    case A_MINUS:
+                        Save('-', h);
+                        Save(ch, h);
+                        break;
+                    case A_PI:
+                        Mark();
+                        h.PI(theOutputBuffer, 0, theSize);
+                        theSize = 0;
+                        break;
+                    case A_PITARGET:
+                        h.PITarget(theOutputBuffer, 0, theSize);
+                        theSize = 0;
+                        break;
+                    case A_PITARGET_PI:
+                        h.PITarget(theOutputBuffer, 0, theSize);
+                        theSize = 0;
+                        h.PI(theOutputBuffer, 0, theSize);
+                        break;
+                    case A_SAVE:
+                        Save(ch, h);
+                        break;
+                    case A_SKIP:
+                        break;
+                    case A_SP:
+                        Save(' ', h);
+                        break;
+                    case A_STAGC:
+                        h.STagC(theOutputBuffer, 0, theSize);
+                        theSize = 0;
+                        break;
+                    case A_EMPTYTAG:
+                        Mark();
+                        //				System.err.println("%%% Empty tag seen");
+                        if (theSize > 0) h.GI(theOutputBuffer, 0, theSize);
+                        theSize = 0;
+                        h.STagE(theOutputBuffer, 0, theSize);
+                        break;
+                    case A_UNGET:
+                        unread = true;
+                        theCurrentColumn--;
+                        break;
+                    case A_UNSAVE_PCDATA:
+                        if (theSize > 0) theSize--;
+                        h.PCDATA(theOutputBuffer, 0, theSize);
+                        theSize = 0;
+                        break;
+                    default:
+                        throw new Exception("Can't process state " + action);
+                }
+                if (!unread)
+                {
+                    r.Read();
+                }
+                theState = theNextState;
+            }
+            h.EOF(theOutputBuffer, 0, 0);
+        }
+
+        /// <summary>
+        /// Mark the current scan position as a "point of interest" - start of a tag,
+        /// cdata, processing instruction etc.
+        /// </summary>
+        private void Mark()
+        {
+            theLastColumn = theCurrentColumn;
+            theLastLine = theCurrentLine;
+        }
+
+        /// <summary>
+        /// A callback for the ScanHandler that allows it to force
+        /// the lexer state to CDATA content (no markup is recognized except
+        /// the end of element.
+        /// </summary>
+        public virtual void StartCDATA() { theNextState = S_CDATA; }
+
+        private void Save(int ch, IScanHandler h)
+        {
+            if (theSize >= theOutputBuffer.Length - 20)
+            {
+                if (theState == S_PCDATA || theState == S_CDATA)
+                {
+                    // Return a buffer-sized chunk of PCDATA
+                    h.PCDATA(theOutputBuffer, 0, theSize);
+                    theSize = 0;
+                }
+                else
+                {
+                    // Grow the buffer size
+                    char[] newOutputBuffer = new char[theOutputBuffer.Length * 2];
+                    Array.Copy(theOutputBuffer, 0, newOutputBuffer, 0, theSize + 1);
+                    theOutputBuffer = newOutputBuffer;
+                }
+            }
+            theOutputBuffer[theSize++] = (char)ch;
+        }
+
+        /**
+        Test procedure.  Reads HTML from the standard input and writes
+        PYX to the standard output.
+        */
+
+        //	public static void main(string[] argv) {
+        //		IScanner s = new HTMLScanner();
+        //		TextReader r = new StreamReader(System.in, "UTF-8");
+        //		TextWriter w = new StreamWriter(System.out, "UTF-8");
+        //		PYXWriter pw = new PYXWriter(w);
+        //		s.scan(r, pw);
+        //		w.close();
+        //		}
+
+
+        private static string NiceChar(int value)
+        {
+            if (value == '\n') return "\\n";
+            if (value < 32) return "0x" + value.ToString("X");
+            return "'" + ((char)value) + "'";
+        }
+    }
+}