You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ni...@apache.org on 2017/08/06 17:59:16 UTC
[18/33] lucenenet git commit: Lucene.Net.Benchmark: Added Sax and
TagSoup to the Support folder.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/198e5868/src/Lucene.Net.Benchmark/Support/TagSoup/XMLReader.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/Support/TagSoup/XMLReader.cs b/src/Lucene.Net.Benchmark/Support/TagSoup/XMLReader.cs
new file mode 100644
index 0000000..443348d
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/Support/TagSoup/XMLReader.cs
@@ -0,0 +1,1567 @@
+// XMLWriter.java - serialize an XML document.
+// Written by David Megginson, david@megginson.com
+// and placed by him into the public domain.
+// Extensively modified by John Cowan for TagSoup.
+// TagSoup is licensed under the Apache License,
+// Version 2.0. You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 . You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+
+using Sax;
+using Sax.Ext;
+using Sax.Helpers;
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.Globalization;
+using System.IO;
+
+namespace TagSoup
+{
+ /// <summary>
+ /// Filter to write an XML document from a SAX event stream.
+ /// </summary>
+ /// <remarks>
+ /// This class can be used by itself or as part of a SAX event
+ /// stream: it takes as input a series of SAX2 ContentHandler
+ /// events and uses the information in those events to write
+ /// an XML document. Since this class is a filter, it can also
+ /// pass the events on down a filter chain for further processing
+ /// (you can use the XMLWriter to take a snapshot of the current
+ /// state at any point in a filter chain), and it can be
+ /// used directly as a ContentHandler for a SAX2 XMLReader.
+ /// <para>
+ /// The client creates a document by invoking the methods for
+ /// standard SAX2 events, always beginning with the
+ /// <see cref="StartDocument()" /> method and ending with
+ /// the <see cref="EndDocument()" /> method. There are convenience
+ /// methods provided so that clients to not have to create empty
+ /// attribute lists or provide empty strings as parameters; for
+ /// example, the method invocation
+ /// </para>
+ /// <code>
+ /// w.StartElement("foo");
+ /// </code>
+ /// <para>is equivalent to the regular SAX2 ContentHandler method</para>
+ /// <code>
+ /// w.StartElement("", "foo", "", new Attributes());
+ /// </code>
+ /// <para>
+ /// Except that it is more efficient because it does not allocate
+ /// a new empty attribute list each time. The following code will send
+ /// a simple XML document to standard output:
+ /// </para>
+ /// <code>
+ /// XMLWriter w = new XMLWriter();
+ /// w.StartDocument();
+ /// w.StartElement("greeting");
+ /// w.Characters("Hello, world!");
+ /// w.EndElement("greeting");
+ /// w.EndDocument();
+ /// </code>
+ /// <para>The resulting document will look like this:</para>
+ /// <code>
+ /// <?xml version="1.0" standalone="yes"?>
+ /// <greeting>Hello, world!</greeting>
+ /// </code>
+ /// <para>
+ /// In fact, there is an even simpler convenience method,
+ /// <see cref="DataElement(string, string)"/>, designed for writing elements that
+ /// contain only character data, so the code to generate the
+ /// document could be shortened to
+ /// </para>
+ /// <code>
+ /// XMLWriter w = new XMLWriter();
+ /// w.StartDocument();
+ /// w.DataElement("greeting", "Hello, world!");
+ /// w.EndDocument();
+ /// </code>
+ /// <h2>Whitespace</h2>
+ /// <para>
+ /// According to the XML Recommendation, <em>all</em> whitespace
+ /// in an XML document is potentially significant to an application,
+ /// so this class never adds newlines or indentation. If you
+ /// insert three elements in a row, as in
+ /// </para>
+ /// <code>
+ /// w.DataElement("item", "1");
+ /// w.DataElement("item", "2");
+ /// w.DataElement("item", "3");
+ /// </code>
+ /// <para>you will end up with</para>
+ /// <code>
+ /// <item>1</item><item>3</item><item>3</item>
+ /// </code>
+ /// <para>
+ /// You need to invoke one of the <c>Characters</c> methods
+ /// explicitly to add newlines or indentation. Alternatively, you
+ /// can use <see cref="com.megginson.sax.DataWriter DataWriter" />, which
+ /// is derived from this class -- it is optimized for writing
+ /// purely data-oriented (or field-oriented) XML, and does automatic
+ /// linebreaks and indentation (but does not support mixed content
+ /// properly).
+ /// </para>
+ /// <h2>Namespace Support</h2>
+ /// <para>
+ /// The writer contains extensive support for XML Namespaces, so that
+ /// a client application does not have to keep track of prefixes and
+ /// supply <c>xmlns</c> attributes. By default, the XML writer will
+ /// generate Namespace declarations in the form _NS1, _NS2, etc., wherever
+ /// they are needed, as in the following example:
+ /// </para>
+ /// <code>
+ /// w.StartDocument();
+ /// w.EmptyElement("http://www.foo.com/ns/", "foo");
+ /// w.EndDocument();
+ /// </code>
+ /// <para>The resulting document will look like this:</para>
+ /// <code>
+ /// <?xml version="1.0" standalone="yes"?>
+ /// <_NS1:foo xmlns:_NS1="http://www.foo.com/ns/"/>
+ /// </code>
+ /// <para>
+ /// In many cases, document authors will prefer to choose their
+ /// own prefixes rather than using the (ugly) default names. The
+ /// XML writer allows two methods for selecting prefixes:
+ /// </para>
+ /// <list type="number">
+ /// <item><description>the qualified name</description></item>
+ /// <item><description>the <see cref="Prefix" /> property.</description></item>
+ /// </list>
+ /// <para>
+ /// Whenever the XML writer finds a new Namespace URI, it checks
+ /// to see if a qualified (prefixed) name is also available; if so
+ /// it attempts to use the name's prefix (as long as the prefix is
+ /// not already in use for another Namespace URI).
+ /// </para>
+ /// <para>
+ /// Before writing a document, the client can also pre-map a prefix
+ /// to a Namespace URI with the setPrefix method:
+ /// </para>
+ /// <code>
+ /// w.SetPrefix("http://www.foo.com/ns/", "foo");
+ /// w.StartDocument();
+ /// w.EmptyElement("http://www.foo.com/ns/", "foo");
+ /// w.EndDocument();
+ /// </code>
+ /// <para>The resulting document will look like this:</para>
+ /// <code>
+ /// <?xml version="1.0" standalone="yes"?>
+ /// <foo:foo xmlns:foo="http://www.foo.com/ns/"/>
+ /// </code>
+ /// <para>The default Namespace simply uses an empty string as the prefix:</para>
+ /// <code>
+ /// w.SetPrefix("http://www.foo.com/ns/", "");
+ /// w.StartDocument();
+ /// w.EmptyElement("http://www.foo.com/ns/", "foo");
+ /// w.EndDocument();
+ /// </code>
+ /// <para>The resulting document will look like this:</para>
+ /// <code>
+ /// <?xml version="1.0" standalone="yes"?>
+ /// <foo xmlns="http://www.foo.com/ns/"/>
+ /// </code>
+ /// <para>
+ /// By default, the XML writer will not declare a Namespace until
+ /// it is actually used. Sometimes, this approach will create
+ /// a large number of Namespace declarations, as in the following
+ /// example:
+ /// </para>
+ /// <code>
+ /// <xml version="1.0" standalone="yes"?>
+ /// <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+ /// <rdf:Description about="http://www.foo.com/ids/books/12345">
+ /// <dc:title xmlns:dc="http://www.purl.org/dc/">A Dark Night</dc:title>
+ /// <dc:creator xmlns:dc="http://www.purl.org/dc/">Jane Smith</dc:title>
+ /// <dc:date xmlns:dc="http://www.purl.org/dc/">2000-09-09</dc:title>
+ /// </rdf:Description>
+ /// </rdf:RDF>
+ /// </code>
+ /// <para>
+ /// The "rdf" prefix is declared only once, because the RDF Namespace
+ /// is used by the root element and can be inherited by all of its
+ /// descendants; the "dc" prefix, on the other hand, is declared three
+ /// times, because no higher element uses the Namespace. To solve this
+ /// problem, you can instruct the XML writer to predeclare Namespaces
+ /// on the root element even if they are not used there:
+ /// </para>
+ /// <code>
+ /// w.ForceNSDecl("http://www.purl.org/dc/");
+ /// </code>
+ /// <para>
+ /// Now, the "dc" prefix will be declared on the root element even
+ /// though it's not needed there, and can be inherited by its
+ /// descendants:
+ /// </para>
+ /// <code>
+ /// <xml version="1.0" standalone="yes"?>
+ /// <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ /// xmlns:dc="http://www.purl.org/dc/">
+ /// <rdf:Description about="http://www.foo.com/ids/books/12345">
+ /// <dc:title>A Dark Night</dc:title>
+ /// <dc:creator>Jane Smith</dc:title>
+ /// <dc:date>2000-09-09</dc:title>
+ /// </rdf:Description>
+ /// </rdf:RDF>
+ /// </code>
+ /// <para>
+ /// This approach is also useful for declaring Namespace prefixes
+ /// that be used by qualified names appearing in attribute values or
+ /// character data.
+ /// </para>
+ /// </remarks>
+ /// <author>David Megginson, david@megginson.com</author>
+ /// <version>.2</version>
+ /// <seealso cref="IXMLFilter" />
+ /// <seealso cref="IContentHandler" />
+ public class XMLWriter : XMLFilter, ILexicalHandler
+ {
+ ////////////////////////////////////////////////////////////////////
+ // Constructors.
+ ////////////////////////////////////////////////////////////////////
+
+ /// <summary>
+ /// Create a new XML writer.
+ /// <para>Write to standard output.</para>
+ /// </summary>
+ public XMLWriter()
+ {
+ Init(null);
+ }
+
+ /// <summary>
+ /// Create a new XML writer.
+ /// <para>Write to the writer provided.</para>
+ /// </summary>
+ /// <param name="writer">
+ /// The output destination, or null to use standard
+ /// output.
+ /// </param>
+ public XMLWriter(TextWriter writer)
+ {
+ Init(writer);
+ }
+
+ /// <summary>
+ /// Create a new XML writer.
+ /// <para>Use the specified XML reader as the parent.</para>
+ /// </summary>
+ /// <param name="xmlreader">
+ /// The parent in the filter chain, or null
+ /// for no parent.
+ /// </param>
+ public XMLWriter(IXMLReader xmlreader) : base(xmlreader)
+ {
+ Init(null);
+ }
+
+ /// <summary>
+ /// Create a new XML writer.
+ /// <para>
+ /// Use the specified XML reader as the parent, and write
+ /// to the specified writer.
+ /// </para>
+ /// </summary>
+ /// <param name="xmlreader">
+ /// The parent in the filter chain, or null
+ /// for no parent.
+ /// </param>
+ /// <param name="writer">
+ /// The output destination, or null to use standard
+ /// output.
+ /// </param>
+ public XMLWriter(IXMLReader xmlreader, TextWriter writer) : base(xmlreader)
+ {
+ Init(writer);
+ }
+
+ public virtual void EndCDATA()
+ {
+ }
+
+ public virtual void EndDTD()
+ {
+ }
+
+ public virtual void EndEntity(string name)
+ {
+ }
+
+ public virtual void StartCDATA()
+ {
+ }
+
+ public virtual void StartDTD(string name, string publicid, string systemid)
+ {
+ if (name == null)
+ {
+ return; // can't cope
+ }
+ if (hasOutputDTD)
+ {
+ return; // only one DTD
+ }
+ hasOutputDTD = true;
+ Write("<!DOCTYPE ");
+ Write(name);
+ if (systemid == null)
+ {
+ systemid = "";
+ }
+ if (overrideSystem != null)
+ {
+ systemid = overrideSystem;
+ }
+ char sysquote = (systemid.IndexOf('"') != -1) ? '\'' : '"';
+ if (overridePublic != null)
+ {
+ publicid = overridePublic;
+ }
+ if (!(publicid == null || "".Equals(publicid)))
+ {
+ char pubquote = (publicid.IndexOf('"') != -1) ? '\'' : '"';
+ Write(" PUBLIC ");
+ Write(pubquote);
+ Write(publicid);
+ Write(pubquote);
+ Write(' ');
+ }
+ else
+ {
+ Write(" SYSTEM ");
+ }
+ Write(sysquote);
+ Write(systemid);
+ Write(sysquote);
+ Write(">\n");
+ }
+
+ public virtual void StartEntity(string name)
+ {
+ }
+
+ /// <summary>
+ /// Internal initialization method.
+ /// <para>All of the public constructors invoke this method.</para>
+ /// </summary>
+ /// <param name="writer">
+ /// The output destination, or null to use
+ /// standard output.
+ /// </param>
+ private void Init(TextWriter writer)
+ {
+ SetOutput(writer);
+ nsSupport = new NamespaceSupport();
+ prefixTable = new Hashtable();
+ forcedDeclTable = new Hashtable();
+ doneDeclTable = new Hashtable();
+ outputProperties = new Dictionary<string, string>();
+ }
+
+ /// <summary>
+ /// Reset the writer.
+ /// <para>
+ /// This method is especially useful if the writer throws an
+ /// exception before it is finished, and you want to reuse the
+ /// writer for a new document. It is usually a good idea to
+ /// invoke <see cref="Flush" /> before resetting the writer,
+ /// to make sure that no output is lost.
+ /// </para>
+ /// <para>
+ /// This method is invoked automatically by the
+ /// <see cref="StartDocument" /> method before writing
+ /// a new document.
+ /// </para>
+ /// <para>
+ /// <strong>Note:</strong> this method will <em>not</em>
+ /// clear the prefix or URI information in the writer or
+ /// the selected output writer.
+ /// </para>
+ /// </summary>
+ /// <seealso cref="Flush" />
+ public virtual void Reset()
+ {
+ elementLevel = 0;
+ prefixCounter = 0;
+ nsSupport.Reset();
+ }
+
+ /// <summary>
+ /// Flush the output.
+ /// <para>
+ /// This method flushes the output stream. It is especially useful
+ /// when you need to make certain that the entire document has
+ /// been written to output but do not want to close the output
+ /// stream.
+ /// </para>
+ /// <para>
+ /// This method is invoked automatically by the
+ /// <see cref="EndDocument" /> method after writing a
+ /// document.
+ /// </para>
+ /// </summary>
+ /// <seealso cref="Reset" />
+ public virtual void Flush()
+ {
+ output.Flush();
+ }
+
+ /// <summary>
+ /// Set a new output destination for the document.
+ /// </summary>
+ /// <param name="writer">
+ /// The output destination, or null to use
+ /// standard output.
+ /// </param>
+ /// <seealso cref="Flush" />
+ public virtual void SetOutput(TextWriter writer)
+ {
+ if (writer == null)
+ {
+ output = new StreamWriter(Console.OpenStandardOutput());
+ }
+ else
+ {
+ output = writer;
+ }
+ }
+
+ /// <summary>
+ /// Specify a preferred prefix for a Namespace URI.
+ /// <para>
+ /// Note that this method does not actually force the Namespace
+ /// to be declared; to do that, use the <see cref="ForceNSDecl(string)" />
+ /// method as well.
+ /// </para>
+ /// </summary>
+ /// <param name="uri">
+ /// The Namespace URI.
+ /// </param>
+ /// <param name="prefix">
+ /// The preferred prefix, or "" to select
+ /// the default Namespace.
+ /// </param>
+ /// <seealso cref="GetPrefix" />
+ /// <seealso cref="ForceNSDecl(string)" />
+ /// <seealso cref="ForceNSDecl(string,string)" />
+ public virtual void SetPrefix(string uri, string prefix)
+ {
+ prefixTable[uri] = prefix;
+ }
+
+ /// <summary>
+ /// Get the current or preferred prefix for a Namespace URI.
+ /// </summary>
+ /// <param name="uri">The Namespace URI.</param>
+ /// <returns>The preferred prefix, or "" for the default Namespace.</returns>
+ /// <seealso cref="SetPrefix" />
+ public virtual string GetPrefix(string uri)
+ {
+ return (string)(prefixTable.ContainsKey(uri) ? prefixTable[uri] : string.Empty);
+ }
+
+ /// <summary>
+ /// Force a Namespace to be declared on the root element.
+ /// <para>
+ /// By default, the XMLWriter will declare only the Namespaces
+ /// needed for an element; as a result, a Namespace may be
+ /// declared many places in a document if it is not used on the
+ /// root element.
+ /// </para>
+ /// <para>
+ /// This method forces a Namespace to be declared on the root
+ /// element even if it is not used there, and reduces the number
+ /// of xmlns attributes in the document.
+ /// </para>
+ /// </summary>
+ /// <param name="uri">
+ /// The Namespace URI to declare.
+ /// </param>
+ /// <seealso cref="ForceNSDecl(string,string)" />
+ /// <seealso cref="SetPrefix" />
+ public virtual void ForceNSDecl(string uri)
+ {
+ forcedDeclTable[uri] = true;
+ }
+
+ /// <summary>
+ /// Force a Namespace declaration with a preferred prefix.
+ /// <para>
+ /// This is a convenience method that invokes <see cref="SetPrefix" />
+ /// then <see cref="ForceNSDecl(string)" />.
+ /// </para>
+ /// </summary>
+ /// <param name="uri">
+ /// The Namespace URI to declare on the root element.
+ /// </param>
+ /// <param name="prefix">
+ /// The preferred prefix for the Namespace, or ""
+ /// for the default Namespace.
+ /// </param>
+ /// <seealso cref="SetPrefix" />
+ /// <seealso cref="ForceNSDecl(string)" />
+ public virtual void ForceNSDecl(string uri, string prefix)
+ {
+ SetPrefix(uri, prefix);
+ ForceNSDecl(uri);
+ }
+
+ ////////////////////////////////////////////////////////////////////
+ // Methods from Sax.5IContentHandler.
+ ////////////////////////////////////////////////////////////////////
+
+ /// <summary>
+ /// Write the XML declaration at the beginning of the document.
+ /// Pass the event on down the filter chain for further processing.
+ /// </summary>
+ /// <exception cref="SAXException">
+ /// If there is an error
+ /// writing the XML declaration, or if a handler further down
+ /// the filter chain raises an exception.
+ /// </exception>
+ /// <seealso cref="IContentHandler.StartDocument" />
+ public override void StartDocument()
+ {
+ Reset();
+ if (!("yes".Equals(outputProperties[OMIT_XML_DECLARATION] ?? "no")))
+ {
+ Write("<?xml");
+ if (version == null)
+ {
+ Write(" version=\"1.0\"");
+ }
+ else
+ {
+ Write(" version=\"");
+ Write(version);
+ Write("\"");
+ }
+ if (false == string.IsNullOrEmpty(outputEncoding))
+ {
+ Write(" encoding=\"");
+ Write(outputEncoding);
+ Write("\"");
+ }
+ if (standalone == null)
+ {
+ Write(" standalone=\"yes\"?>\n");
+ }
+ else
+ {
+ Write(" standalone=\"");
+ Write(standalone);
+ Write("\"");
+ }
+ }
+ base.StartDocument();
+ }
+
+ /// <summary>
+ /// Write a newline at the end of the document.
+ /// Pass the event on down the filter chain for further processing.
+ /// </summary>
+ /// <exception cref="SAXException">
+ /// If there is an error
+ /// writing the newline, or if a handler further down
+ /// the filter chain raises an exception.
+ /// </exception>
+ /// <seealso cref="IContentHandler.EndDocument" />
+ public override void EndDocument()
+ {
+ Write('\n');
+ base.EndDocument();
+ try
+ {
+ Flush();
+ }
+ catch (IOException e)
+ {
+ throw new SAXException(e.Message, e);
+ }
+ }
+
+ /// <summary>
+ /// Write a start tag.
+ /// Pass the event on down the filter chain for further processing.
+ /// </summary>
+ /// <param name="uri">
+ /// The Namespace URI, or the empty string if none
+ /// is available.
+ /// </param>
+ /// <param name="localName">
+ /// The element's local (unprefixed) name (required).
+ /// </param>
+ /// <param name="qName">
+ /// The element's qualified (prefixed) name, or the
+ /// empty string is none is available. This method will
+ /// use the qName as a template for generating a prefix
+ /// if necessary, but it is not guaranteed to use the
+ /// same qName.
+ /// </param>
+ /// <param name="atts">
+ /// The element's attribute list (must not be null).
+ /// </param>
+ /// <exception cref="SAXException">
+ /// If there is an error
+ /// writing the start tag, or if a handler further down
+ /// the filter chain raises an exception.
+ /// </exception>
+ /// <seealso cref="IContentHandler.StartElement" />
+ public override void StartElement(string uri, string localName, string qName, IAttributes atts)
+ {
+ elementLevel++;
+ nsSupport.PushContext();
+ if (forceDTD && !hasOutputDTD)
+ {
+ StartDTD(localName ?? qName, "", "");
+ }
+ Write('<');
+ WriteName(uri, localName, qName, true);
+ WriteAttributes(atts);
+ if (elementLevel == 1)
+ {
+ ForceNSDecls();
+ }
+ WriteNSDecls();
+ Write('>');
+ // System.out.println("%%%% startElement [" + qName + "] htmlMode = " + htmlMode);
+ if (htmlMode && (qName.Equals("script") || qName.Equals("style")))
+ {
+ cdataElement = true;
+ // System.out.println("%%%% CDATA element");
+ }
+ base.StartElement(uri, localName, qName, atts);
+ }
+
+ /// <summary>
+ /// Write an end tag.
+ /// Pass the event on down the filter chain for further processing.
+ /// </summary>
+ /// <param name="uri">
+ /// The Namespace URI, or the empty string if none
+ /// is available.
+ /// </param>
+ /// <param name="localName">
+ /// The element's local (unprefixed) name (required).
+ /// </param>
+ /// <param name="qName">
+ /// The element's qualified (prefixed) name, or the
+ /// empty string is none is available. This method will
+ /// use the qName as a template for generating a prefix
+ /// if necessary, but it is not guaranteed to use the
+ /// same qName.
+ /// </param>
+ /// <exception cref="SAXException">
+ /// If there is an error
+ /// writing the end tag, or if a handler further down
+ /// the filter chain raises an exception.
+ /// </exception>
+ /// <seealso cref="IContentHandler.EndElement" />
+ public override void EndElement(string uri, string localName, string qName)
+ {
+ if (
+ !(htmlMode && (uri.Equals("http://www.w3.org/1999/xhtml") || uri.Equals(""))
+ && (qName.Equals("area") || qName.Equals("base") || qName.Equals("basefont") || qName.Equals("br")
+ || qName.Equals("col") || qName.Equals("frame") || qName.Equals("hr") || qName.Equals("img")
+ || qName.Equals("input") || qName.Equals("isindex") || qName.Equals("link") || qName.Equals("meta")
+ || qName.Equals("param"))))
+ {
+ Write("</");
+ WriteName(uri, localName, qName, true);
+ Write('>');
+ }
+ if (elementLevel == 1)
+ {
+ Write('\n');
+ }
+ cdataElement = false;
+ base.EndElement(uri, localName, qName);
+ nsSupport.PopContext();
+ elementLevel--;
+ }
+
+ /// <summary>
+ /// Write character data.
+ /// Pass the event on down the filter chain for further processing.
+ /// </summary>
+ /// <param name="ch">
+ /// The array of characters to write.
+ /// </param>
+ /// <param name="start">
+ /// The starting position in the array.
+ /// </param>
+ /// <param name="length">
+ /// The number of characters to write.
+ /// </param>
+ /// <exception cref="SAXException">
+ /// If there is an error
+ /// writing the characters, or if a handler further down
+ /// the filter chain raises an exception.
+ /// </exception>
+ /// <seealso cref="IContentHandler.Characters" />
+ public override void Characters(char[] ch, int start, int length)
+ {
+ if (!cdataElement)
+ {
+ WriteEsc(ch, start, length, false);
+ }
+ else
+ {
+ for (int i = start; i < start + length; i++)
+ {
+ Write(ch[i]);
+ }
+ }
+ base.Characters(ch, start, length);
+ }
+
+ /// <summary>
+ /// Write ignorable whitespace.
+ /// Pass the event on down the filter chain for further processing.
+ /// </summary>
+ /// <param name="ch">
+ /// The array of characters to write.
+ /// </param>
+ /// <param name="start">
+ /// The starting position in the array.
+ /// </param>
+ /// <param name="length">
+ /// The number of characters to write.
+ /// </param>
+ /// <exception cref="SAXException">
+ /// If there is an error
+ /// writing the whitespace, or if a handler further down
+ /// the filter chain raises an exception.
+ /// </exception>
+ /// <seealso cref="IContentHandler.IgnorableWhitespace" />
+ public override void IgnorableWhitespace(char[] ch, int start, int length)
+ {
+ WriteEsc(ch, start, length, false);
+ base.IgnorableWhitespace(ch, start, length);
+ }
+
+ /// <summary>
+ /// Write a processing instruction.
+ /// Pass the event on down the filter chain for further processing.
+ /// </summary>
+ /// <param name="target">
+ /// The PI target.
+ /// </param>
+ /// <param name="data">
+ /// The PI data.
+ /// </param>
+ /// <exception cref="SAXException">
+ /// If there is an error
+ /// writing the PI, or if a handler further down
+ /// the filter chain raises an exception.
+ /// </exception>
+ /// <seealso cref="IContentHandler.ProcessingInstruction" />
+ public override void ProcessingInstruction(string target, string data)
+ {
+ Write("<?");
+ Write(target);
+ Write(' ');
+ Write(data);
+ Write("?>");
+ if (elementLevel < 1)
+ {
+ Write('\n');
+ }
+ base.ProcessingInstruction(target, data);
+ }
+
+ /// <summary>
+ /// Write an empty element.
+ /// This method writes an empty element tag rather than a start tag
+ /// followed by an end tag. Both a <see cref="StartElement" />
+ /// and an <see cref="EndElement(string,string,string)" /> event will
+ /// be passed on down the filter chain.
+ /// </summary>
+ /// <param name="uri">
+ /// The element's Namespace URI, or the empty string
+ /// if the element has no Namespace or if Namespace
+ /// processing is not being performed.
+ /// </param>
+ /// <param name="localName">
+ /// The element's local name (without prefix). This
+ /// parameter must be provided.
+ /// </param>
+ /// <param name="qName">
+ /// The element's qualified name (with prefix), or
+ /// the empty string if none is available. This parameter
+ /// is strictly advisory: the writer may or may not use
+ /// the prefix attached.
+ /// </param>
+ /// <param name="atts">
+ /// The element's attribute list.
+ /// </param>
+ /// <exception cref="SAXException">
+ /// If there is an error
+ /// writing the empty tag, or if a handler further down
+ /// the filter chain raises an exception.
+ /// </exception>
+ /// <seealso cref="StartElement" />
+ /// <seealso cref="EndElement(string,string,string) " />
+ public virtual void EmptyElement(string uri, string localName, string qName, IAttributes atts)
+ {
+ nsSupport.PushContext();
+ Write('<');
+ WriteName(uri, localName, qName, true);
+ WriteAttributes(atts);
+ if (elementLevel == 1)
+ {
+ ForceNSDecls();
+ }
+ WriteNSDecls();
+ Write("/>");
+ base.StartElement(uri, localName, qName, atts);
+ base.EndElement(uri, localName, qName);
+ }
+
+ /// <summary>
+ /// Start a new element without a qname or attributes.
+ /// <para>
+ /// This method will provide a default empty attribute
+ /// list and an empty string for the qualified name.
+ /// It invokes <see cref="StartElement(string, string, string, IAttributes)"/>
+ /// directly.
+ /// </para>
+ /// </summary>
+ /// <param name="uri">
+ /// The element's Namespace URI.
+ /// </param>
+ /// <param name="localName">
+ /// The element's local name.
+ /// </param>
+ /// <exception cref="SAXException">
+ /// If there is an error
+ /// writing the start tag, or if a handler further down
+ /// the filter chain raises an exception.
+ /// </exception>
+ /// <seealso cref="StartElement(string, string, string, IAttributes)" />
+ public virtual void StartElement(string uri, string localName)
+ {
+ StartElement(uri, localName, "", EMPTY_ATTS);
+ }
+
+ /// <summary>
+ /// Start a new element without a qname, attributes or a Namespace URI.
+ /// <para>
+ /// This method will provide an empty string for the
+ /// Namespace URI, and empty string for the qualified name,
+ /// and a default empty attribute list. It invokes
+ /// #startElement(string, string, string, Attributes)}
+ /// directly.
+ /// </para>
+ /// </summary>
+ /// <param name="localName">
+ /// The element's local name.
+ /// </param>
+ /// <exception cref="SAXException">
+ /// If there is an error
+ /// writing the start tag, or if a handler further down
+ /// the filter chain raises an exception.
+ /// </exception>
+ /// <seealso cref="StartElement(string, string, string, IAttributes)" />
+ public virtual void StartElement(string localName)
+ {
+ StartElement("", localName, "", EMPTY_ATTS);
+ }
+
+ /// <summary>
+ /// End an element without a qname.
+ /// <para>
+ /// This method will supply an empty string for the qName.
+ /// It invokes <see cref="EndElement(string, string, string)" />
+ /// directly.
+ /// </para>
+ /// </summary>
+ /// <param name="uri">
+ /// The element's Namespace URI.
+ /// </param>
+ /// <param name="localName">
+ /// The element's local name.
+ /// </param>
+ /// <exception cref="SAXException">
+ /// If there is an error
+ /// writing the end tag, or if a handler further down
+ /// the filter chain raises an exception.
+ /// </exception>
+ /// <seealso cref="EndElement(string, string, string)" />
+ public virtual void EndElement(string uri, string localName)
+ {
+ EndElement(uri, localName, "");
+ }
+
+ /// <summary>
+ /// End an element without a Namespace URI or qname.
+ /// <para>
+ /// This method will supply an empty string for the qName
+ /// and an empty string for the Namespace URI.
+ /// It invokes <see cref="EndElement(string, string, string)" />
+ /// directly.
+ /// </para>
+ /// </summary>
+ /// <param name="localName">
+ /// The element's local name.
+ /// </param>
+ /// <exception cref="SAXException">
+ /// If there is an error
+ /// writing the end tag, or if a handler further down
+ /// the filter chain raises an exception.
+ /// </exception>
+ /// <seealso cref="EndElement(string, string, string)" />
+ public virtual void EndElement(string localName)
+ {
+ EndElement("", localName, "");
+ }
+
+ /// <summary>
+ /// Add an empty element without a qname or attributes.
+ /// <para>
+ /// This method will supply an empty string for the qname
+ /// and an empty attribute list. It invokes
+ /// <see cref="EmptyElement(string, string, string, IAttributes)" />
+ /// directly.
+ /// </para>
+ /// </summary>
+ /// <param name="uri">
+ /// The element's Namespace URI.
+ /// </param>
+ /// <param name="localName">
+ /// The element's local name.
+ /// </param>
+ /// <exception cref="SAXException">
+ /// If there is an error
+ /// writing the empty tag, or if a handler further down
+ /// the filter chain raises an exception.
+ /// </exception>
+ /// <seealso cref="EmptyElement(string, string, string, IAttributes)" />
+ public virtual void EmptyElement(string uri, string localName)
+ {
+ EmptyElement(uri, localName, "", EMPTY_ATTS);
+ }
+
+ /// <summary>
+ /// Add an empty element without a Namespace URI, qname or attributes.
+ /// <para>
+ /// This method will supply an empty string for the qname,
+ /// and empty string for the Namespace URI, and an empty
+ /// attribute list. It invokes
+ /// <see cref="EmptyElement(string, string, string, IAttributes)" />
+ /// directly.
+ /// </para>
+ /// </summary>
+ /// <param name="localName">
+ /// The element's local name.
+ /// </param>
+ /// <exception cref="SAXException">
+ /// If there is an error
+ /// writing the empty tag, or if a handler further down
+ /// the filter chain raises an exception.
+ /// </exception>
+ /// <seealso cref="EmptyElement(string, string, string, IAttributes)" />
+ public virtual void EmptyElement(string localName)
+ {
+ EmptyElement("", localName, "", EMPTY_ATTS);
+ }
+
+ /// <summary>
+ /// Write an element with character data content.
+ /// <para>
+ /// This is a convenience method to write a complete element
+ /// with character data content, including the start tag
+ /// and end tag.
+ /// </para>
+ /// <para>
+ /// This method invokes
+ /// <see cref="StartElement(string, string, string, IAttributes)" />,
+ /// followed by
+ /// <see cref="Characters(string)" />, followed by
+ /// <see cref="EndElement(string, string, string)" />.
+ /// </para>
+ /// </summary>
+ /// <param name="uri">
+ /// The element's Namespace URI.
+ /// </param>
+ /// <param name="localName">
+ /// The element's local name.
+ /// </param>
+ /// <param name="qName">
+ /// The element's default qualified name.
+ /// </param>
+ /// <param name="atts">
+ /// The element's attributes.
+ /// </param>
+ /// <param name="content">
+ /// The character data content.
+ /// </param>
+ /// <exception cref="SAXException">
+ /// If there is an error
+ /// writing the empty tag, or if a handler further down
+ /// the filter chain raises an exception.
+ /// </exception>
+ /// <seealso cref="StartElement(string, string, string, IAttributes)" />
+ /// <seealso cref="Characters(string)" />
+ /// <seealso cref="EndElement(string, string, string)" />
+ public virtual void DataElement(string uri, string localName, string qName, IAttributes atts, string content)
+ {
+ StartElement(uri, localName, qName, atts);
+ Characters(content);
+ EndElement(uri, localName, qName);
+ }
+
+ /// <summary>
+ /// Write an element with character data content but no attributes.
+ /// <para>
+ /// This is a convenience method to write a complete element
+ /// with character data content, including the start tag
+ /// and end tag. This method provides an empty string
+ /// for the qname and an empty attribute list.
+ /// </para>
+ /// <para>
+ /// This method invokes
+ /// <see cref="StartElement(string, string, string, IAttributes)" />,
+ /// followed by
+ /// <see cref="Characters(string)" />, followed by
+ /// <see cref="EndElement(string, string, string)" />.
+ /// </para>
+ /// </summary>
+ /// <param name="uri">
+ /// The element's Namespace URI.
+ /// </param>
+ /// <param name="localName">
+ /// The element's local name.
+ /// </param>
+ /// <param name="content">
+ /// The character data content.
+ /// </param>
+ /// <exception cref="SAXException">
+ /// If there is an error
+ /// writing the empty tag, or if a handler further down
+ /// the filter chain raises an exception.
+ /// </exception>
+ /// <seealso cref="StartElement(string, string, string, IAttributes)" />
+ /// <seealso cref="Characters(string)" />
+ /// <seealso cref="EndElement(string, string, string)" />
+ public virtual void DataElement(string uri, string localName, string content)
+ {
+ DataElement(uri, localName, "", EMPTY_ATTS, content);
+ }
+
+ /// <summary>
+ /// Write an element with character data content but no attributes or Namespace URI.
+ /// <para>
+ /// This is a convenience method to write a complete element
+ /// with character data content, including the start tag
+ /// and end tag. The method provides an empty string for the
+ /// Namespace URI, and empty string for the qualified name,
+ /// and an empty attribute list.
+ /// </para>
+ /// <para>
+ /// This method invokes
+ /// <see cref="StartElement(string, string, string, IAttributes)" />,
+ /// followed by
+ /// <see cref="Characters(string)" />, followed by
+ /// <see cref="EndElement(string, string, string)" />.
+ /// </para>
+ /// </summary>
+ /// <param name="localName">
+ /// The element's local name.
+ /// </param>
+ /// <param name="content">
+ /// The character data content.
+ /// </param>
+ /// <exception cref="SAXException">
+ /// If there is an error
+ /// writing the empty tag, or if a handler further down
+ /// the filter chain raises an exception.
+ /// </exception>
+ /// <seealso cref="StartElement(string, string, string, IAttributes)" />
+ /// <seealso cref="Characters(string)" />
+ /// <seealso cref="EndElement(string, string, string)" />
+ public virtual void DataElement(string localName, string content)
+ {
+ DataElement("", localName, "", EMPTY_ATTS, content);
+ }
+
+ /// <summary>
+ /// Write a string of character data, with XML escaping.
+ /// <para>
+ /// This is a convenience method that takes an XML
+ /// string, converts it to a character array, then invokes
+ /// <see cref="Characters(char[], int, int)" />.
+ /// </para>
+ /// </summary>
+ /// <param name="data">
+ /// The character data.
+ /// </param>
+ /// <exception cref="SAXException">
+ /// If there is an error
+ /// writing the string, or if a handler further down
+ /// the filter chain raises an exception.
+ /// </exception>
+ /// <seealso cref="Characters(char[], int, int)" />
+ public virtual void Characters(string data)
+ {
+ char[] ch = data.ToCharArray();
+ Characters(ch, 0, ch.Length);
+ }
+
+ /// <summary>
+ /// Force all Namespaces to be declared.
+ /// This method is used on the root element to ensure that
+ /// the predeclared Namespaces all appear.
+ /// </summary>
+ private void ForceNSDecls()
+ {
+ foreach (string prefix in forcedDeclTable.Keys)
+ {
+ DoPrefix(prefix, null, true);
+ }
+ }
+
+ /// <summary>
+ /// Determine the prefix for an element or attribute name.
+ /// TODO: this method probably needs some cleanup.
+ /// </summary>
+ /// <param name="uri">
+ /// The Namespace URI.
+ /// </param>
+ /// <param name="qName">
+ /// The qualified name (optional); this will be used
+ /// to indicate the preferred prefix if none is currently
+ /// bound.
+ /// </param>
+ /// <param name="isElement">
+ /// true if this is an element name, false
+ /// if it is an attribute name (which cannot use the
+ /// default Namespace).
+ /// </param>
+ private string DoPrefix(string uri, string qName, bool isElement)
+ {
+ string defaultNS = nsSupport.GetUri("");
+ if ("".Equals(uri))
+ {
+ if (isElement && defaultNS != null)
+ {
+ nsSupport.DeclarePrefix("", "");
+ }
+ return null;
+ }
+ string prefix;
+ if (isElement && defaultNS != null && uri.Equals(defaultNS))
+ {
+ prefix = "";
+ }
+ else
+ {
+ prefix = nsSupport.GetPrefix(uri);
+ }
+ if (prefix != null)
+ {
+ return prefix;
+ }
+ bool containsPrefix = doneDeclTable.ContainsKey(uri);
+ prefix = (string)(containsPrefix ? doneDeclTable[uri] : null);
+ if (containsPrefix && ((!isElement || defaultNS != null) && "".Equals(prefix) || nsSupport.GetUri(prefix) != null))
+ {
+ prefix = null;
+ }
+ if (prefix == null)
+ {
+ containsPrefix = prefixTable.ContainsKey(uri);
+ prefix = (string)(containsPrefix ? prefixTable[uri] : null);
+ if (containsPrefix
+ && ((!isElement || defaultNS != null) && "".Equals(prefix) || nsSupport.GetUri(prefix) != null))
+ {
+ prefix = null;
+ }
+ }
+ if (prefix == null && qName != null && !"".Equals(qName))
+ {
+ int i = qName.IndexOf(':');
+ if (i == -1)
+ {
+ if (isElement && defaultNS == null)
+ {
+ prefix = "";
+ }
+ }
+ else
+ {
+ prefix = qName.Substring(0, i);
+ }
+ }
+ for (; prefix == null || nsSupport.GetUri(prefix) != null; prefix = "__NS" + ++prefixCounter)
+ {
+ }
+ nsSupport.DeclarePrefix(prefix, uri);
+ doneDeclTable[uri] = prefix;
+ return prefix;
+ }
+
+ /// <summary>
+ /// Write a raw character.
+ /// </summary>
+ /// <param name="c">
+ /// The character to write.
+ /// </param>
+ /// <exception cref="SAXException">
+ /// If there is an error writing
+ /// the character, this method will throw an IOException
+ /// wrapped in a SAXException.
+ /// </exception>
+ private void Write(char c)
+ {
+ try
+ {
+ output.Write(c);
+ }
+ catch (IOException e)
+ {
+ throw new SAXException(e.ToString(), e);
+ }
+ }
+
+ /// <summary>
+ /// Write a raw string.
+ /// </summary>
+ /// <param name="s"></param>
+ /// <exception cref="SAXException">
+ /// If there is an error writing the string,
+ /// this method will throw an IOException wrapped in a SAXException
+ /// </exception>
+ private void Write(string s)
+ {
+ try
+ {
+ output.Write(s);
+ }
+ catch (IOException e)
+ {
+ throw new SAXException(e.ToString(), e);
+ }
+ }
+
+ /// <summary>
+ /// Write out an attribute list, escaping values.
+ /// The names will have prefixes added to them.
+ /// </summary>
+ /// <param name="atts">
+ /// The attribute list to write.
+ /// </param>
+ /// <exception cref="SAXException">
+ /// If there is an error writing
+ /// the attribute list, this method will throw an
+ /// IOException wrapped in a SAXException.
+ /// </exception>
+ private void WriteAttributes(IAttributes atts)
+ {
+ int len = atts.Length;
+ for (int i = 0; i < len; i++)
+ {
+ char[] ch = atts.GetValue(i).ToCharArray();
+ Write(' ');
+ WriteName(atts.GetURI(i), atts.GetLocalName(i), atts.GetQName(i), false);
+ if (htmlMode && BoolAttribute(atts.GetLocalName(i), atts.GetQName(i), atts.GetValue(i)))
+ {
+ break;
+ }
+ Write("=\"");
+ WriteEsc(ch, 0, ch.Length, true);
+ Write('"');
+ }
+ }
+
+ // Return true if the attribute is an HTML bool from the above list.
+ private bool BoolAttribute(string localName, string qName, string value)
+ {
+ string name = localName;
+ if (name == null)
+ {
+ int i = qName.IndexOf(':');
+ if (i != -1)
+ {
+ name = qName.Substring(i + 1, qName.Length);
+ }
+ }
+ if (!name.Equals(value))
+ {
+ return false;
+ }
+ for (int j = 0; j < _bools.Length; j++)
+ {
+ if (name.Equals(_bools[j]))
+ {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /// <summary>
+ /// Write an array of data characters with escaping.
+ /// </summary>
+ /// <param name="ch">
+ /// The array of characters.
+ /// </param>
+ /// <param name="start">
+ /// The starting position.
+ /// </param>
+ /// <param name="length">
+ /// The number of characters to use.
+ /// </param>
+ /// <param name="isAttVal">
+ /// true if this is an attribute value literal.
+ /// </param>
+ /// <exception cref="SAXException">
+ /// If there is an error writing
+ /// the characters, this method will throw an
+ /// IOException wrapped in a SAXException.
+ /// </exception>
+ private void WriteEsc(char[] ch, int start, int length, bool isAttVal)
+ {
+ for (int i = start; i < start + length; i++)
+ {
+ switch (ch[i])
+ {
+ case '&':
+ Write("&");
+ break;
+ case '<':
+ Write("<");
+ break;
+ case '>':
+ Write(">");
+ break;
+ case '\"':
+ if (isAttVal)
+ {
+ Write(""");
+ }
+ else
+ {
+ Write('\"');
+ }
+ break;
+ default:
+ if (!unicodeMode && ch[i] > '\u007f')
+ {
+ Write("&#");
+ Write(((int)ch[i]).ToString(CultureInfo.InvariantCulture));
+ Write(';');
+ }
+ else
+ {
+ Write(ch[i]);
+ }
+ break;
+ }
+ }
+ }
+
+ /// <summary>
+ /// Write out the list of Namespace declarations.
+ /// </summary>
+ /// <exception cref="SAXException">
+ /// This method will throw
+ /// an IOException wrapped in a SAXException if
+ /// there is an error writing the Namespace
+ /// declarations.
+ /// </exception>
+ private void WriteNSDecls()
+ {
+ IEnumerable prefixes = nsSupport.GetDeclaredPrefixes();
+ foreach (string prefix in prefixes)
+ {
+ string uri = nsSupport.GetUri(prefix);
+ if (uri == null)
+ {
+ uri = "";
+ }
+ char[] ch = uri.ToCharArray();
+ Write(' ');
+ if ("".Equals(prefix))
+ {
+ Write("xmlns=\"");
+ }
+ else
+ {
+ Write("xmlns:");
+ Write(prefix);
+ Write("=\"");
+ }
+ WriteEsc(ch, 0, ch.Length, true);
+ Write('\"');
+ }
+ }
+
+ /// <summary>
+ /// Write an element or attribute name.
+ /// </summary>
+ /// <param name="uri">
+ /// The Namespace URI.
+ /// </param>
+ /// <param name="localName">
+ /// The local name.
+ /// </param>
+ /// <param name="qName">
+ /// The prefixed name, if available, or the empty string.
+ /// </param>
+ /// <param name="isElement">
+ /// true if this is an element name, false if it
+ /// is an attribute name.
+ /// </param>
+ /// <exception cref="SAXException">
+ /// This method will throw an
+ /// IOException wrapped in a SAXException if there is
+ /// an error writing the name.
+ /// </exception>
+ private void WriteName(string uri, string localName, string qName, bool isElement)
+ {
+ string prefix = DoPrefix(uri, qName, isElement);
+ if (prefix != null && !"".Equals(prefix))
+ {
+ Write(prefix);
+ Write(':');
+ }
+ if (localName != null && !"".Equals(localName))
+ {
+ Write(localName);
+ }
+ else
+ {
+ int i = qName.IndexOf(':');
+ Write(qName.Substring(i + 1, qName.Length - (i + 1)));
+ }
+ }
+
+ ////////////////////////////////////////////////////////////////////
+ // Default LexicalHandler implementation
+ ////////////////////////////////////////////////////////////////////
+
+ public virtual void Comment(char[] ch, int start, int length)
+ {
+ Write("<!--");
+ for (int i = start; i < start + length; i++)
+ {
+ Write(ch[i]);
+ if (ch[i] == '-' && i + 1 <= start + length && ch[i + 1] == '-')
+ {
+ Write(' ');
+ }
+ }
+ Write("-->");
+ }
+
+
+ ////////////////////////////////////////////////////////////////////
+ // Output properties
+ ////////////////////////////////////////////////////////////////////
+
+ public virtual string GetOutputProperty(string key)
+ {
+ return outputProperties[key];
+ }
+
+ public virtual void SetOutputProperty(string key, string value)
+ {
+ outputProperties[key] = value;
+ // System.out.println("%%%% key = [" + key + "] value = [" + value +"]");
+ if (key.Equals(ENCODING))
+ {
+ outputEncoding = value;
+ unicodeMode = value.Substring(0, 3).Equals("utf", StringComparison.OrdinalIgnoreCase);
+ // System.out.println("%%%% unicodeMode = " + unicodeMode);
+ }
+ else if (key.Equals(METHOD))
+ {
+ htmlMode = value.Equals("html");
+ }
+ else if (key.Equals(DOCTYPE_PUBLIC))
+ {
+ overridePublic = value;
+ forceDTD = true;
+ }
+ else if (key.Equals(DOCTYPE_SYSTEM))
+ {
+ overrideSystem = value;
+ forceDTD = true;
+ }
+ else if (key.Equals(VERSION))
+ {
+ version = value;
+ }
+ else if (key.Equals(STANDALONE))
+ {
+ standalone = value;
+ }
+ // System.out.println("%%%% htmlMode = " + htmlMode);
+ }
+
+ ////////////////////////////////////////////////////////////////////
+ // Constants.
+ ////////////////////////////////////////////////////////////////////
+
+ private readonly IAttributes EMPTY_ATTS = new Attributes();
+ public const string CDATA_SECTION_ELEMENTS =
+ "cdata-section-elements";
+ public const string DOCTYPE_PUBLIC = "doctype-public";
+ public const string DOCTYPE_SYSTEM = "doctype-system";
+ public const string ENCODING = "encoding";
+ public const string INDENT = "indent"; // currently ignored
+ public const string MEDIA_TYPE = "media-type"; // currently ignored
+ public const string METHOD = "method"; // currently html or xml
+ public const string OMIT_XML_DECLARATION = "omit-xml-declaration";
+ public const string STANDALONE = "standalone"; // currently ignored
+ public const string VERSION = "version";
+
+ ////////////////////////////////////////////////////////////////////
+ // Internal state.
+ ////////////////////////////////////////////////////////////////////
+
+
+ private readonly string[] _bools = {
+ "checked",
+ "compact",
+ "declare",
+ "defer",
+ "disabled",
+ "ismap",
+ "multiple",
+ "nohref",
+ "noresize",
+ "noshade",
+ "nowrap",
+ "readonly",
+ "selected"
+ };
+
+ private Hashtable prefixTable;
+ private Hashtable forcedDeclTable;
+ private Hashtable doneDeclTable;
+ private int elementLevel = 0;
+ private TextWriter output;
+ private NamespaceSupport nsSupport;
+ private int prefixCounter = 0;
+ private IDictionary<string, string> outputProperties;
+ private bool unicodeMode = false;
+ private string outputEncoding = "";
+ private bool htmlMode = false;
+ private bool forceDTD = false;
+ private bool hasOutputDTD = false;
+ private string overridePublic = null;
+ private string overrideSystem = null;
+ private string version = null;
+ private string standalone = null;
+ private bool cdataElement = false;
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/198e5868/src/Lucene.Net.Benchmark/Support/TagSoup/definitions/html.stml
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/Support/TagSoup/definitions/html.stml b/src/Lucene.Net.Benchmark/Support/TagSoup/definitions/html.stml
new file mode 100644
index 0000000..4cab973
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/Support/TagSoup/definitions/html.stml
@@ -0,0 +1,249 @@
+<!--
+// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+//
+// TagSoup is licensed under the Apache License,
+// Version 2.0. You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 . You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+-->
+
+<statetable xmlns='http://www.ccil.org/~cowan/XML/tagsoup/stml'
+ version='1.0'>
+
+ <symbol id='EOF'/>
+ <symbol id='S'/>
+ <symbol id='default'/>
+
+ <action id='A_ADUP'/>
+ <action id='A_ADUP_SAVE'/>
+ <action id='A_ADUP_STAGC'/>
+ <action id='A_ANAME'/>
+ <action id='A_ANAME_ADUP_STAGC'/>
+ <action id='A_AVAL'/>
+ <action id='A_AVAL_STAGC'/>
+ <action id='A_CDATA'/>
+ <action id='A_CMNT'/>
+ <action id='A_DECL'/>
+ <action id='A_ENTITY'/>
+ <action id='A_ENTITY_START'/>
+ <action id='A_ETAG'/>
+ <action id='A_EMPTYTAG'/>
+ <action id='A_ANAME_ADUP'/>
+ <action id='A_GI'/>
+ <action id='A_GI_STAGC'/>
+ <action id='A_LT'/>
+ <action id='A_LT_PCDATA'/>
+ <action id='A_MINUS'/>
+ <action id='A_MINUS2'/>
+ <action id='A_MINUS3'/>
+ <action id='A_PCDATA'/>
+ <action id='A_PI'/>
+ <action id='A_PITARGET'/>
+ <action id='A_PITARGET_PI'/>
+ <action id='A_SAVE'/>
+ <action id='A_SKIP'/>
+ <action id='A_SP'/>
+ <action id='A_STAGC'/>
+ <action id='A_UNGET'/>
+ <action id='A_UNSAVE_PCDATA'/>
+
+ <state id='S_ANAME'>
+ <tr symbol='default' action='A_SAVE' newstate='S_ANAME'/>
+ <tr char='=' action='A_ANAME' newstate='S_AVAL'/>
+ <tr char='>' action='A_ANAME_ADUP_STAGC' newstate='S_PCDATA'/>
+ <tr char='/' action='A_ANAME_ADUP' newstate='S_EMPTYTAG'/>
+ <tr symbol='EOF' action='A_ANAME_ADUP_STAGC' newstate='S_DONE'/>
+ <tr symbol='S' action='A_ANAME' newstate='S_EQ'/>
+ </state>
+ <state id='S_APOS'>
+ <tr symbol='default' action='A_SAVE' newstate='S_APOS'/>
+ <tr char=''' action='A_AVAL' newstate='S_TAGWS'/>
+ <tr symbol='EOF' action='A_AVAL_STAGC' newstate='S_DONE'/>
+ <tr symbol='S' action='A_SP' newstate='S_APOS'/>
+ </state>
+ <state id='S_AVAL'>
+ <tr symbol='default' action='A_SAVE' newstate='S_STAGC'/>
+ <tr char='"' action='A_SKIP' newstate='S_QUOT'/>
+ <tr char=''' action='A_SKIP' newstate='S_APOS'/>
+ <tr char='>' action='A_AVAL_STAGC' newstate='S_PCDATA'/>
+ <tr symbol='EOF' action='A_AVAL_STAGC' newstate='S_DONE'/>
+ <tr symbol='S' action='A_SKIP' newstate='S_AVAL'/>
+ </state>
+ <state id='S_CDATA'>
+ <tr symbol='default' action='A_SAVE' newstate='S_CDATA'/>
+ <tr char='<' action='A_SAVE' newstate='S_CDATA2'/>
+ <tr symbol='EOF' action='A_PCDATA' newstate='S_DONE'/>
+ </state>
+ <state id='S_CDATA2'>
+ <tr symbol='default' action='A_SAVE' newstate='S_CDATA'/>
+ <tr char='/' action='A_UNSAVE_PCDATA' newstate='S_ETAG'/>
+ <tr symbol='EOF' action='A_UNSAVE_PCDATA' newstate='S_DONE'/>
+ </state>
+ <state id='S_COM'>
+ <tr symbol='default' action='A_SAVE' newstate='S_COM2'/>
+ <tr char='-' action='A_SKIP' newstate='S_COM2'/>
+ <tr symbol='EOF' action='A_CMNT' newstate='S_DONE'/>
+ </state>
+ <state id='S_COM2'>
+ <tr symbol='default' action='A_SAVE' newstate='S_COM2'/>
+ <tr char='-' action='A_SKIP' newstate='S_COM3'/>
+ <tr symbol='EOF' action='A_CMNT' newstate='S_DONE'/>
+ </state>
+ <state id='S_COM3'>
+ <tr symbol='default' action='A_MINUS' newstate='S_COM2'/>
+ <tr char='-' action='A_SKIP' newstate='S_COM4'/>
+ <tr symbol='EOF' action='A_CMNT' newstate='S_DONE'/>
+ </state>
+ <state id='S_COM4'>
+ <tr symbol='default' action='A_MINUS2' newstate='S_COM2'/>
+ <tr char='-' action='A_MINUS3' newstate='S_COM4'/>
+ <tr char='>' action='A_CMNT' newstate='S_PCDATA'/>
+ <tr symbol='EOF' action='A_CMNT' newstate='S_DONE'/>
+ </state>
+ <state id='S_DECL'>
+ <tr symbol='default' action='A_SAVE' newstate='S_DECL2'/>
+ <tr char='-' action='A_SKIP' newstate='S_COM'/>
+ <tr char='[' action='A_SKIP' newstate='S_BB'/>
+ <tr char='>' action='A_SKIP' newstate='S_PCDATA'/>
+ <tr symbol='EOF' action='A_SKIP' newstate='S_DONE'/>
+ </state>
+ <state id='S_DECL2'>
+ <tr symbol='default' action='A_SAVE' newstate='S_DECL2'/>
+ <tr char='>' action='A_DECL' newstate='S_PCDATA'/>
+ <tr symbol='EOF' action='A_SKIP' newstate='S_DONE'/>
+ </state>
+ <state id='S_ENT'>
+ <tr symbol='default' action='A_ENTITY' newstate='S_ENT'/>
+ <tr symbol='EOF' action='A_ENTITY' newstate='S_DONE'/>
+ </state>
+ <state id='S_EQ'>
+ <tr symbol='default' action='A_ADUP_SAVE' newstate='S_ANAME'/>
+ <tr char='=' action='A_SKIP' newstate='S_AVAL'/>
+ <tr char='>' action='A_ADUP_STAGC' newstate='S_PCDATA'/>
+ <tr symbol='EOF' action='A_ADUP_STAGC' newstate='S_DONE'/>
+ <tr symbol='S' action='A_SKIP' newstate='S_EQ'/>
+ </state>
+ <state id='S_ETAG'>
+ <tr symbol='default' action='A_SAVE' newstate='S_ETAG'/>
+ <tr char='>' action='A_ETAG' newstate='S_PCDATA'/>
+ <tr symbol='EOF' action='A_ETAG' newstate='S_DONE'/>
+ <tr symbol='S' action='A_SKIP' newstate='S_ETAG'/>
+ </state>
+ <state id='S_GI'>
+ <tr symbol='default' action='A_SAVE' newstate='S_GI'/>
+ <tr char='/' action='A_SKIP' newstate='S_EMPTYTAG'/>
+ <tr char='>' action='A_GI_STAGC' newstate='S_PCDATA'/>
+ <tr symbol='EOF' action='A_SKIP' newstate='S_DONE'/>
+ <tr symbol='S' action='A_GI' newstate='S_TAGWS'/>
+ </state>
+ <state id='S_NCR'>
+ <tr symbol='default' action='A_ENTITY' newstate='S_NCR'/>
+ <tr symbol='EOF' action='A_ENTITY' newstate='S_DONE'/>
+ </state>
+ <state id='S_XNCR'>
+ <tr symbol='default' action='A_ENTITY' newstate='S_XNCR'/>
+ <tr symbol='EOF' action='A_ENTITY' newstate='S_DONE'/>
+ </state>
+ <state id='S_PCDATA'>
+ <tr symbol='default' action='A_SAVE' newstate='S_PCDATA'/>
+ <tr char='&' action='A_ENTITY_START' newstate='S_ENT'/>
+ <tr char='<' action='A_PCDATA' newstate='S_TAG'/>
+ <tr symbol='EOF' action='A_PCDATA' newstate='S_DONE'/>
+ </state>
+ <state id='S_PI'>
+ <tr symbol='default' action='A_SAVE' newstate='S_PI'/>
+ <tr char='>' action='A_PI' newstate='S_PCDATA'/>
+ <tr symbol='EOF' action='A_PI' newstate='S_DONE'/>
+ </state>
+ <state id='S_PITARGET'>
+ <tr symbol='default' action='A_SAVE' newstate='S_PITARGET'/>
+ <tr char='>' action='A_PITARGET_PI' newstate='S_PCDATA'/>
+ <tr symbol='EOF' action='A_PITARGET_PI' newstate='S_DONE'/>
+ <tr symbol='S' action='A_PITARGET' newstate='S_PI'/>
+ </state>
+ <state id='S_QUOT'>
+ <tr symbol='default' action='A_SAVE' newstate='S_QUOT'/>
+ <tr char='"' action='A_AVAL' newstate='S_TAGWS'/>
+ <tr symbol='EOF' action='A_AVAL_STAGC' newstate='S_DONE'/>
+ <tr symbol='S' action='A_SP' newstate='S_QUOT'/>
+ </state>
+ <state id='S_STAGC'>
+ <tr symbol='default' action='A_SAVE' newstate='S_STAGC'/>
+ <tr char='>' action='A_AVAL_STAGC' newstate='S_PCDATA'/>
+ <tr symbol='EOF' action='A_AVAL_STAGC' newstate='S_DONE'/>
+ <tr symbol='S' action='A_AVAL' newstate='S_TAGWS'/>
+ </state>
+ <state id='S_TAG'>
+ <tr symbol='default' action='A_SAVE' newstate='S_GI'/>
+ <tr char='!' action='A_SKIP' newstate='S_DECL'/>
+ <tr char='/' action='A_SKIP' newstate='S_ETAG'/>
+ <tr char='?' action='A_SKIP' newstate='S_PITARGET'/>
+ <tr char='<' action='A_SAVE' newstate='S_TAG'/>
+ <tr symbol='EOF' action='A_LT_PCDATA' newstate='S_DONE'/>
+ <tr symbol='S' action='A_LT' newstate='S_PCDATA'/>
+ </state>
+ <state id='S_TAGWS'>
+ <tr symbol='default' action='A_SAVE' newstate='S_ANAME'/>
+ <tr char='/' action='A_SKIP' newstate='S_EMPTYTAG'/>
+ <tr char='>' action='A_STAGC' newstate='S_PCDATA'/>
+ <tr symbol='EOF' action='A_STAGC' newstate='S_DONE'/>
+ <tr symbol='S' action='A_SKIP' newstate='S_TAGWS'/>
+ </state>
+ <state id='S_EMPTYTAG'>
+ <tr symbol='S' action='A_SKIP' newstate='S_TAGWS'/>
+ <tr symbol='default' action='A_SAVE' newstate='S_ANAME'/>
+ <tr char='>' action='A_EMPTYTAG' newstate='S_PCDATA'/>
+ </state>
+ <state id='S_BB'>
+ <tr char='C' action='A_SKIP' newstate='S_BBC'/>
+ <tr symbol='default' action='A_SKIP' newstate='S_DECL'/>
+ <tr symbol='EOF' action='A_SKIP' newstate='S_DONE'/>
+ </state>
+ <state id='S_BBC'>
+ <tr char='D' action='A_SKIP' newstate='S_BBCD'/>
+ <tr symbol='default' action='A_SKIP' newstate='S_DECL'/>
+ <tr symbol='EOF' action='A_SKIP' newstate='S_DONE'/>
+ </state>
+ <state id='S_BBCD'>
+ <tr char='A' action='A_SKIP' newstate='S_BBCDA'/>
+ <tr symbol='default' action='A_SKIP' newstate='S_DECL'/>
+ <tr symbol='EOF' action='A_SKIP' newstate='S_DONE'/>
+ </state>
+ <state id='S_BBCDA'>
+ <tr char='T' action='A_SKIP' newstate='S_BBCDAT'/>
+ <tr symbol='default' action='A_SKIP' newstate='S_DECL'/>
+ <tr symbol='EOF' action='A_SKIP' newstate='S_DONE'/>
+ </state>
+ <state id='S_BBCDAT'>
+ <tr char='A' action='A_SKIP' newstate='S_BBCDATA'/>
+ <tr symbol='default' action='A_SKIP' newstate='S_DECL'/>
+ <tr symbol='EOF' action='A_SKIP' newstate='S_DONE'/>
+ </state>
+ <state id='S_BBCDATA'>
+ <tr char='[' action='A_SKIP' newstate='S_CDSECT'/>
+ <tr symbol='default' action='A_SKIP' newstate='S_DECL'/>
+ <tr symbol='EOF' action='A_SKIP' newstate='S_DONE'/>
+ </state>
+ <state id='S_CDSECT'>
+ <tr char=']' action='A_SAVE' newstate='S_CDSECT1'/>
+ <tr symbol='default' action='A_SAVE' newstate='S_CDSECT'/>
+ <tr symbol='EOF' action='A_SKIP' newstate='S_DONE'/>
+ </state>
+ <state id='S_CDSECT1'>
+ <tr char=']' action='A_SAVE' newstate='S_CDSECT2'/>
+ <tr symbol='default' action='A_SAVE' newstate='S_CDSECT'/>
+ <tr symbol='EOF' action='A_SKIP' newstate='S_DONE'/>
+ </state>
+ <state id='S_CDSECT2'>
+ <tr char='>' action='A_CDATA' newstate='S_PCDATA'/>
+ <tr symbol='default' action='A_SAVE' newstate='S_CDSECT'/>
+ <tr symbol='EOF' action='A_SKIP' newstate='S_DONE'/>
+ </state>
+ <state id='S_DONE'/>
+</statetable>