You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ni...@apache.org on 2017/08/06 17:59:19 UTC
[21/33] lucenenet git commit: Lucene.Net.Benchmark: Added Sax and
TagSoup to the Support folder.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/198e5868/src/Lucene.Net.Benchmark/Support/Sax/SAXParseException.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/Support/Sax/SAXParseException.cs b/src/Lucene.Net.Benchmark/Support/Sax/SAXParseException.cs
new file mode 100644
index 0000000..b7cdf64
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/Support/Sax/SAXParseException.cs
@@ -0,0 +1,269 @@
+// SAX exception class.
+// http://www.saxproject.org
+// No warranty; no copyright -- use this as you will.
+// $Id: SAXParseException.java,v 1.11 2004/04/21 13:05:02 dmegginson Exp $
+
+using System;
+#if FEATURE_SERIALIZABLE
+using System.Runtime.Serialization;
+#endif
+
+namespace Sax
+{
+ /// <summary>
+ /// Encapsulate an XML parse error or warning.
+ /// </summary>
+ /// <remarks>
+ /// <em>This module, both source code and documentation, is in the
+ /// Public Domain, and comes with<strong> NO WARRANTY</strong>.</em>
+ /// See<a href='http://www.saxproject.org'>http://www.saxproject.org</a>
+ /// for further information.
+ /// <para/>
+ /// This exception may include information for locating the error
+ /// in the original XML document, as if it came from a <see cref="ILocator"/>
+ /// object. Note that although the application
+ /// will receive a SAXParseException as the argument to the handlers
+ /// in the <see cref="IErrorHandler"/> interface,
+ /// the application is not actually required to throw the exception;
+ /// instead, it can simply read the information in it and take a
+ /// different action.
+ /// <para/>
+ /// Since this exception is a subclass of <see cref="SAXException"/>,
+ /// it inherits the ability to wrap another exception.
+ /// </remarks>
+ /// <since>SAX 1.0</since>
+ /// <author>David Megginson</author>
+ /// <version>2.0.1 (sax2r2)</version>
+ /// <seealso cref="SAXException"/>
+ /// <seealso cref="ILocator"/>
+ /// <seealso cref="IErrorHandler"/>
+#if FEATURE_SERIALIZABLE
+ [Serializable]
+#endif
+ public class SAXParseException : SAXException
+ {
+ //////////////////////////////////////////////////////////////////////
+ // Constructors.
+ //////////////////////////////////////////////////////////////////////
+
+ /// <summary>
+ /// Construct a new exception with no message.
+ /// </summary>
+ // LUCENENET specific for serialization
+ public SAXParseException()
+ : base()
+ {
+ }
+
+ /// <summary>
+ /// Create a new <see cref="SAXParseException"/> from a message and a <see cref="ILocator"/>.
+ /// </summary>
+ /// <remarks>
+ /// This constructor is especially useful when an application is
+ /// creating its own exception from within a <see cref="IContentHandler"/>
+ /// callback.
+ /// </remarks>
+ /// <param name="message">The error or warning message.</param>
+ /// <param name="locator">The locator object for the error or warning (may be null).</param>
+ /// <seealso cref="ILocator"/>
+ public SAXParseException(string message, ILocator locator)
+ : base(message)
+ {
+ if (locator != null)
+ {
+ Init(locator.PublicId, locator.SystemId,
+ locator.LineNumber, locator.ColumnNumber);
+ }
+ else
+ {
+ Init(null, null, -1, -1);
+ }
+ }
+
+ /// <summary>
+ /// Wrap an existing exception in a SAXParseException.
+ /// </summary>
+ /// <remarks>
+ /// This constructor is especially useful when an application is
+ /// creating its own exception from within a <see cref="IContentHandler"/>
+ /// callback, and needs to wrap an existing exception that is not a
+ /// subclass of <see cref="SAXException"/>.
+ /// </remarks>
+ /// <param name="message">The error or warning message, or null to
+ /// use the message from the embedded exception.</param>
+ /// <param name="locator">The locator object for the error or warning (may be
+ /// null).</param>
+ /// <param name="e">Any exception.</param>
+ /// <seealso cref="ILocator"/>
+ public SAXParseException(string message, ILocator locator,
+ Exception e)
+ : base(message, e)
+ {
+ if (locator != null)
+ {
+ Init(locator.PublicId, locator.SystemId,
+ locator.LineNumber, locator.ColumnNumber);
+ }
+ else
+ {
+ Init(null, null, -1, -1);
+ }
+ }
+
+ /// <summary>
+ /// Create a new SAXParseException.
+ /// </summary>
+ /// <remarks>
+ /// This constructor is most useful for parser writers.
+ /// <para/>
+ /// All parameters except the message are as if
+ /// they were provided by a <see cref="ILocator"/>. For example, if the
+ /// system identifier is a URL (including relative filename), the
+ /// caller must resolve it fully before creating the exception.
+ /// </remarks>
+ /// <param name="message">The error or warning message.</param>
+ /// <param name="publicId">The public identifier of the entity that generated the error or warning.</param>
+ /// <param name="systemId">The system identifier of the entity that generated the error or warning.</param>
+ /// <param name="lineNumber">The line number of the end of the text that caused the error or warning.</param>
+ /// <param name="columnNumber">The column number of the end of the text that cause the error or warning.</param>
+ public SAXParseException(string message, string publicId, string systemId,
+ int lineNumber, int columnNumber)
+ : base(message)
+ {
+ Init(publicId, systemId, lineNumber, columnNumber);
+ }
+
+ /// <summary>
+ /// Create a new <see cref="SAXParseException"/> with an embedded exception.
+ /// </summary>
+ /// <remarks>
+ /// This constructor is most useful for parser writers who
+ /// need to wrap an exception that is not a subclass of
+ /// <see cref="SAXException"/>.
+ /// <para/>
+ /// All parameters except the message and exception are as if
+ /// they were provided by a <see cref="ILocator"/>. For example, if the
+ /// system identifier is a URL (including relative filename), the
+ /// caller must resolve it fully before creating the exception.
+ /// </remarks>
+ /// <param name="message">The error or warning message, or null to use the message from the embedded exception.</param>
+ /// <param name="publicId">The public identifier of the entity that generated the error or warning.</param>
+ /// <param name="systemId">The system identifier of the entity that generated the error or warning.</param>
+ /// <param name="lineNumber">The line number of the end of the text that caused the error or warning.</param>
+ /// <param name="columnNumber">The column number of the end of the text that cause the error or warning.</param>
+ /// <param name="e">Another exception to embed in this one.</param>
+ public SAXParseException(string message, string publicId, string systemId,
+ int lineNumber, int columnNumber, Exception e)
+ : base(message, e)
+ {
+ Init(publicId, systemId, lineNumber, columnNumber);
+ }
+
+#if FEATURE_SERIALIZABLE
+ /// <summary>
+ /// Initializes a new instance of this class with serialized data.
+ /// </summary>
+ /// <param name="info">The <see cref="SerializationInfo"/> that holds the serialized object data about the exception being thrown.</param>
+ /// <param name="context">The <see cref="StreamingContext"/> that contains contextual information about the source or destination.</param>
+ public SAXParseException(SerializationInfo info, StreamingContext context)
+ : base(info, context)
+ {
+ }
+#endif
+
+ /// <summary>
+ /// Internal initialization method.
+ /// </summary>
+ /// <param name="publicId">The public identifier of the entity which generated the exception, or null.</param>
+ /// <param name="systemId">The system identifier of the entity which generated the exception, or null.</param>
+ /// <param name="lineNumber">The line number of the error, or -1.</param>
+ /// <param name="columnNumber">The column number of the error, or -1.</param>
+ private void Init(string publicId, string systemId,
+ int lineNumber, int columnNumber)
+ {
+ this.publicId = publicId;
+ this.systemId = systemId;
+ this.lineNumber = lineNumber;
+ this.columnNumber = columnNumber;
+ }
+
+ /// <summary>
+ /// Get the public identifier of the entity where the exception occurred.
+ /// Returns a string containing the public identifier, or null if none is available.
+ /// </summary>
+ /// <seealso cref="ILocator.PublicId"/>
+ public string PublicId
+ {
+ get { return this.publicId; }
+ }
+
+ /// <summary>
+ /// Get the system identifier of the entity where the exception occurred.
+ /// <para/>
+ /// If the system identifier is a URL, it will have been resolved fully.
+ /// <para/>
+ /// A string containing the system identifier, or null if none is available.
+ /// </summary>
+ /// <seealso cref="ILocator.SystemId"/>
+ public string SystemId
+ {
+ get { return this.systemId; }
+ }
+
+ /// <summary>
+ /// The line number of the end of the text where the exception occurred.
+ /// <para/>
+ /// The first line is line 1.
+ /// <para/>
+ /// An integer representing the line number, or -1 if none is available.
+ /// </summary>
+ /// <seealso cref="ILocator.LineNumber"/>
+ public int LineNumber
+ {
+ get { return this.lineNumber; }
+ }
+
+ /// <summary>
+ /// The column number of the end of the text where the exception occurred.
+ /// <para/>
+ /// The first column in a line is position 1.
+ /// <para/>
+ /// An integer representing the column number, or -1
+ /// if none is available.
+ /// </summary>
+ /// <seealso cref="ILocator.ColumnNumber"/>
+ public int ColumnNumber
+ {
+ get { return this.columnNumber; }
+ }
+
+
+ //////////////////////////////////////////////////////////////////////
+ // Internal state.
+ //////////////////////////////////////////////////////////////////////
+
+ /// <summary>
+ /// The public identifier, or null.
+ /// </summary>
+ /// <seealso cref="PublicId"/>
+ private string publicId;
+
+ /// <summary>
+ /// The system identifier, or null.
+ /// </summary>
+ /// <seealso cref="SystemId"/>
+ private string systemId;
+
+ /// <summary>
+ /// The line number, or -1.
+ /// </summary>
+ /// <seealso cref="LineNumber"/>
+ private int lineNumber;
+
+ /// <summary>
+ /// The column number, or -1.
+ /// </summary>
+ /// <seealso cref="ColumnNumber"/>
+ private int columnNumber;
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/198e5868/src/Lucene.Net.Benchmark/Support/Sax/XMLFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/Support/Sax/XMLFilter.cs b/src/Lucene.Net.Benchmark/Support/Sax/XMLFilter.cs
new file mode 100644
index 0000000..f9350d3
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/Support/Sax/XMLFilter.cs
@@ -0,0 +1,41 @@
+// XMLFilter.java - filter SAX2 events.
+// http://www.saxproject.org
+// Written by David Megginson
+// NO WARRANTY! This class is in the Public Domain.
+// $Id: XMLFilter.java,v 1.6 2002/01/30 21:13:48 dbrownell Exp $
+
+namespace Sax
+{
+ /// <summary>
+ /// Interface for an XML filter.
+ /// </summary>
+ /// <remarks>
+ /// <em>This module, both source code and documentation, is in the
+ /// Public Domain, and comes with<strong> NO WARRANTY</strong>.</em>
+ /// See<a href='http://www.saxproject.org'>http://www.saxproject.org</a>
+ /// for further information.
+ /// <para/>
+ /// An XML filter is like an XML reader, except that it obtains its
+ /// events from another XML reader rather than a primary source like
+ /// an XML document or database.Filters can modify a stream of
+ /// events as they pass on to the final application.
+ /// <para/>
+ /// The XMLFilterImpl helper class provides a convenient base
+ /// for creating SAX2 filters, by passing on all <see cref="IEntityResolver"/>, <see cref="IDTDHandler"/>,
+ /// <see cref="IContentHandler"/> and <see cref="IErrorHandler"/>
+ /// events automatically.
+ /// </remarks>
+ public interface IXMLFilter : IXMLReader
+ {
+ /// <summary>
+ /// Gets or sets the parent reader. Returns the parent filter, or null if none has been set.
+ /// </summary>
+ /// <remarks>
+ /// This method allows the application to link or query the parent
+ /// reader (which may be another filter). It is generally a
+ /// bad idea to perform any operations on the parent reader
+ /// directly: they should all pass through this filter.
+ /// </remarks>
+ IXMLReader Parent { get; set; }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/198e5868/src/Lucene.Net.Benchmark/Support/Sax/XMLReader.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/Support/Sax/XMLReader.cs b/src/Lucene.Net.Benchmark/Support/Sax/XMLReader.cs
new file mode 100644
index 0000000..71b690f
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/Support/Sax/XMLReader.cs
@@ -0,0 +1,305 @@
+// XMLFilter.java - filter SAX2 events.
+// http://www.saxproject.org
+// Written by David Megginson
+// NO WARRANTY! This class is in the Public Domain.
+// $Id: XMLFilter.java,v 1.6 2002/01/30 21:13:48 dbrownell Exp $
+
+namespace Sax
+{
+ /// <summary>
+ /// Interface for an XML filter.
+ /// </summary>
+ /// <remarks>
+ /// <em>This module, both source code and documentation, is in the
+ /// Public Domain, and comes with<strong> NO WARRANTY</strong>.</em>
+ /// See<a href='http://www.saxproject.org'>http://www.saxproject.org</a>
+ /// for further information.
+ /// <para/>
+ /// An XML filter is like an XML reader, except that it obtains its
+ /// events from another XML reader rather than a primary source like
+ /// an XML document or database.Filters can modify a stream of
+ /// events as they pass on to the final application.
+ /// <para/>
+ /// The <see cref="IXMLFilter"/> helper class provides a convenient base
+ /// for creating SAX2 filters, by passing on all <see cref="IEntityResolver"/>,
+ /// <see cref="IDTDHandler"/>,
+ /// <see cref="IContentHandler"/> and <see cref="IErrorHandler"/>
+ /// events automatically.
+ /// </remarks>
+ /// <since>SAX 2.0</since>
+ /// <author>David Megginson</author>
+ /// <version>2.0.1 (sax2r2)</version>
+ /// <seealso cref="Helpers.XMLFilter"/>
+ public interface IXMLReader
+ {
+ ////////////////////////////////////////////////////////////////////
+ // Configuration.
+ ////////////////////////////////////////////////////////////////////
+
+
+ /// <summary>
+ /// Look up the value of a feature flag.
+ /// </summary>
+ /// <remarks>
+ /// The feature name is any fully-qualified URI. It is
+ /// possible for an XMLReader to recognize a feature name but
+ /// temporarily be unable to return its value.
+ /// Some feature values may be available only in specific
+ /// contexts, such as before, during, or after a parse.
+ /// Also, some feature values may not be programmatically accessible.
+ /// (In the case of an adapter for SAX1 {@link Parser}, there is no
+ /// implementation-independent way to expose whether the underlying
+ /// parser is performing validation, expanding external entities,
+ /// and so forth.)
+ /// <para/>All XMLReaders are required to recognize the
+ /// http://xml.org/sax/features/namespaces and the
+ /// http://xml.org/sax/features/namespace-prefixes feature names.
+ /// <para/>Typical usage is something like this:
+ /// <code>
+ /// XMLReader r = new MySAXDriver();
+ /// // try to activate validation
+ /// try {
+ /// r.SetFeature("http://xml.org/sax/features/validation", true);
+ /// } catch (SAXException e) {
+ /// Console.Error.WriteLine("Cannot activate validation.");
+ /// }
+ /// // register event handlers
+ /// r.ContentHandler = new MyContentHandler();
+ /// r.ErrorHandler = new MyErrorHandler();
+ /// // parse the first document
+ /// try {
+ /// r.Parse("http://www.foo.com/mydoc.xml");
+ /// } catch (IOException e) {
+ /// Console.Error.WriteLine("I/O exception reading XML document");
+ /// } catch (SAXException e) {
+ /// Console.Error.WriteLine("XML exception reading document.");
+ /// }
+ /// </code>
+ /// <para/>Implementors are free (and encouraged) to invent their own features,
+ /// using names built on their own URIs.
+ /// </remarks>
+ /// <param name="name">The feature name, which is a fully-qualified URI.</param>
+ /// <returns>The current value of the feature (true or false).</returns>
+ /// <exception cref="SAXNotRecognizedException">If the feature
+ /// value can't be assigned or retrieved.</exception>
+ /// <exception cref="SAXNotSupportedException">When the
+ /// <see cref="IXMLReader"/> recognizes the feature name but
+ /// cannot determine its value at this time.</exception>
+ /// <seealso cref="SetFeature(string, bool)"/>
+ bool GetFeature(string name);
+
+
+ /// <summary>
+ /// Set the value of a feature flag.
+ /// <para/>
+ /// The feature name is any fully-qualified URI. It is
+ /// possible for an XMLReader to expose a feature value but
+ /// to be unable to change the current value.
+ /// Some feature values may be immutable or mutable only
+ /// in specific contexts, such as before, during, or after
+ /// a parse.
+ /// <para/>
+ /// All XMLReaders are required to support setting
+ /// http://xml.org/sax/features/namespaces to true and
+ /// http://xml.org/sax/features/namespace-prefixes to false.
+ /// </summary>
+ /// <param name="name">The feature name, which is a fully-qualified URI.</param>
+ /// <param name="value">The requested value of the feature (true or false).</param>
+ /// <exception cref="SAXNotRecognizedException">If the feature
+ /// value can't be assigned or retrieved.</exception>
+ /// <exception cref="SAXNotSupportedException">When the
+ /// <see cref="IXMLReader"/> recognizes the feature name but
+ /// cannot set the requested value.</exception>
+ /// <seealso cref="GetFeature(string)"/>
+ void SetFeature(string name, bool value);
+
+
+ /// <summary>
+ /// Look up the value of a property.
+ /// </summary>
+ /// <remarks>
+ /// The property name is any fully-qualified URI. It is
+ /// possible for an XMLReader to recognize a property name but
+ /// temporarily be unable to return its value.
+ /// Some property values may be available only in specific
+ /// contexts, such as before, during, or after a parse.
+ /// <para/>
+ /// <see cref="IXMLReader"/>s are not required to recognize any specific
+ /// property names, though an initial core set is documented for
+ /// SAX2.
+ /// <para/>
+ /// Implementors are free (and encouraged) to invent their own properties,
+ /// using names built on their own URIs.
+ /// </remarks>
+ /// <param name="name">The property name, which is a fully-qualified URI.</param>
+ /// <returns>The current value of the property.</returns>
+ /// <exception cref="SAXNotRecognizedException">If the property
+ /// value can't be assigned or retrieved.</exception>
+ /// <exception cref="SAXNotSupportedException">When the
+ /// <see cref="IXMLReader"/> recognizes the property name but
+ /// cannot determine its value at this time.</exception>
+ /// <seealso cref="SetProperty(string, object)"/>
+ object GetProperty(string name);
+
+
+ /// <summary>
+ /// Set the value of a property.
+ /// </summary>
+ /// <remarks>
+ /// The property name is any fully-qualified URI. It is
+ /// possible for an <see cref="IXMLReader"/> to recognize a property name but
+ /// to be unable to change the current value.
+ /// Some property values may be immutable or mutable only
+ /// in specific contexts, such as before, during, or after
+ /// a parse.
+ /// <para/>
+ /// <see cref="IXMLReader"/>s are not required to recognize setting
+ /// any specific property names, though a core set is defined by
+ /// SAX2.
+ /// <para/>
+ /// This method is also the standard mechanism for setting
+ /// extended handlers.
+ /// </remarks>
+ /// <param name="name">The property name, which is a fully-qualified URI.</param>
+ /// <param name="value">The requested value for the property.</param>
+ /// <exception cref="SAXNotRecognizedException">If the property
+ /// value can't be assigned or retrieved.</exception>
+ /// <exception cref="SAXNotSupportedException">When the
+ /// <see cref="IXMLReader"/> recognizes the property name but
+ /// cannot set the requested value.</exception>
+ void SetProperty(string name, object value);
+
+
+
+ ////////////////////////////////////////////////////////////////////
+ // Event handlers.
+ ////////////////////////////////////////////////////////////////////
+
+
+ /// <summary>
+ /// Gets or Sets an entity resolver.
+ /// </summary>
+ /// <remarks>
+ /// If the application does not register an entity resolver,
+ /// the <see cref="IXMLReader"/> will perform its own default resolution.
+ /// <para/>
+ /// Applications may register a new or different resolver in the
+ /// middle of a parse, and the SAX parser must begin using the new
+ /// resolver immediately.
+ /// </remarks>
+ IEntityResolver EntityResolver { get; set; }
+
+ /// <summary>
+ /// Gets or Sets a DTD event handler.
+ /// </summary>
+ /// <remarks>
+ /// If the application does not register a DTD handler, all DTD
+ /// events reported by the SAX parser will be silently ignored.
+ /// <para/>
+ /// Applications may register a new or different handler in the
+ /// middle of a parse, and the SAX parser must begin using the new
+ /// handler immediately.
+ /// </remarks>
+ IDTDHandler DTDHandler { get; set; }
+
+ /// <summary>
+ /// Gets or Sets a content event handler.
+ /// </summary>
+ /// <remarks>
+ /// <para/>If the application does not register a content handler, all
+ /// content events reported by the SAX parser will be silently
+ /// ignored.
+ /// <para/>Applications may register a new or different handler in the
+ /// middle of a parse, and the SAX parser must begin using the new
+ /// handler immediately.
+ /// </remarks>
+ IContentHandler ContentHandler { get; set; }
+
+
+ /// <summary>
+ /// Gets or Sets an error event handler.
+ /// </summary>
+ /// <remarks>
+ /// If the application does not register an error handler, all
+ /// error events reported by the SAX parser will be silently
+ /// ignored; however, normal processing may not continue. It is
+ /// highly recommended that all SAX applications implement an
+ /// error handler to avoid unexpected bugs.
+ /// <para/>
+ /// Applications may register a new or different handler in the
+ /// middle of a parse, and the SAX parser must begin using the new
+ /// handler immediately.
+ /// </remarks>
+ IErrorHandler ErrorHandler { get; set; }
+
+
+ ////////////////////////////////////////////////////////////////////
+ // Parsing.
+ ////////////////////////////////////////////////////////////////////
+
+ /// <summary>
+ /// Parse an XML document.
+ /// </summary>
+ /// <remarks>
+ /// The application can use this method to instruct the XML
+ /// reader to begin parsing an XML document from any valid input
+ /// source (a character stream, a byte stream, or a URI).
+ /// <para/>
+ /// Applications may not invoke this method while a parse is in
+ /// progress (they should create a new XMLReader instead for each
+ /// nested XML document). Once a parse is complete, an
+ /// application may reuse the same XMLReader object, possibly with a
+ /// different input source.
+ /// Configuration of the <see cref="IXMLReader"/> object (such as handler bindings and
+ /// values established for feature flags and properties) is unchanged
+ /// by completion of a parse, unless the definition of that aspect of
+ /// the configuration explicitly specifies other behavior.
+ /// (For example, feature flags or properties exposing
+ /// characteristics of the document being parsed.)
+ /// <para/>
+ /// During the parse, the XMLReader will provide information
+ /// about the XML document through the registered event
+ /// handlers.
+ /// <para/>
+ /// This method is synchronous: it will not return until parsing
+ /// has ended. If a client application wants to terminate
+ /// parsing early, it should throw an exception.
+ /// </remarks>
+ /// <param name="input">The input source for the top-level of the
+ /// XML document.</param>
+ /// <exception cref="SAXException">Any SAX exception, possibly
+ /// wrapping another exception.</exception>
+ /// <exception cref="System.IO.IOException">An IO exception from the parser,
+ /// possibly from a byte stream or character stream
+ /// supplied by the application.</exception>
+ /// <seealso cref="InputSource"/>
+ /// <seealso cref="Parse(string)"/>
+ /// <seealso cref="EntityResolver"/>
+ /// <seealso cref="DTDHandler"/>
+ /// <seealso cref="ContentHandler"/>
+ /// <seealso cref="ErrorHandler"/>
+ void Parse(InputSource input);
+
+
+ /// <summary>
+ /// Parse an XML document from a system identifier (URI).
+ /// </summary>
+ /// <remarks>
+ /// This method is a shortcut for the common case of reading a
+ /// document from a system identifier. It is the exact
+ /// equivalent of the following:
+ /// <code>
+ /// Parse(new InputSource(systemId));
+ /// </code>
+ /// <para/>If the system identifier is a URL, it must be fully resolved
+ /// by the application before it is passed to the parser.
+ /// </remarks>
+ /// <param name="input">The system identifier (URI).</param>
+ /// <exception cref="SAXException">Any SAX exception, possibly
+ /// wrapping another exception.</exception>
+ /// <exception cref="System.IO.IOException">An IO exception from the parser,
+ /// possibly from a byte stream or character stream
+ /// supplied by the application.</exception>
+ void Parse(string systemId);
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/198e5868/src/Lucene.Net.Benchmark/Support/StringExtensions.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/Support/StringExtensions.cs b/src/Lucene.Net.Benchmark/Support/StringExtensions.cs
new file mode 100644
index 0000000..2104fdb
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/Support/StringExtensions.cs
@@ -0,0 +1,14 @@
+namespace Lucene.Net.Support
+{
+ public static class StringExtensions
+ {
+ public static string Intern(this string value)
+ {
+#if NETSTANDARD
+ return value;
+#else
+ return string.Intern(value);
+#endif
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/198e5868/src/Lucene.Net.Benchmark/Support/TagSoup/AutoDetector.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/Support/TagSoup/AutoDetector.cs b/src/Lucene.Net.Benchmark/Support/TagSoup/AutoDetector.cs
new file mode 100644
index 0000000..6fcb578
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/Support/TagSoup/AutoDetector.cs
@@ -0,0 +1,41 @@
+// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+//
+// TagSoup is licensed under the Apache License,
+// Version 2.0. You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 . You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+//
+//
+// Interface to objects that translate InputStreams to Readers by auto-detection
+
+using System.IO;
+
+namespace TagSoup
+{
+ /// <summary>
+ /// Classes which accept an <see cref="Stream"/> and provide a <see cref="TextReader"/> which figures
+ /// out the encoding of the <see cref="Stream"/> and reads characters from it should
+ /// conform to this interface.
+ /// </summary>
+ /// <seealso cref="Stream" />
+ /// <seealso cref="TextReader" />
+ public interface IAutoDetector
+ {
+ /// <summary>
+ /// Given a <see cref="Stream"/>, return a suitable <see cref="TextReader"/> that understands
+ /// the presumed character encoding of that <see cref="Stream"/>.
+ /// If bytes are consumed from the <see cref="Stream"/> in the process, they
+ /// <i>must</i> be pushed back onto the InputStream so that they can be
+ /// reinterpreted as characters.
+ /// </summary>
+ /// <param name="stream">The <see cref="Stream"/></param>
+ /// <returns>A <see cref="TextReader"/> that reads from the <see cref="Stream"/></returns>
+ TextReader AutoDetectingReader(Stream stream);
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/198e5868/src/Lucene.Net.Benchmark/Support/TagSoup/Element.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/Support/TagSoup/Element.cs b/src/Lucene.Net.Benchmark/Support/TagSoup/Element.cs
new file mode 100644
index 0000000..dca7eed
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/Support/TagSoup/Element.cs
@@ -0,0 +1,215 @@
+// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+//
+// TagSoup is licensed under the Apache License,
+// Version 2.0. You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 . You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+using Sax.Helpers;
+
+namespace TagSoup
+{
+ /// <summary>
+ /// The internal representation of an actual element (not an element type).
+ /// An Element has an element type, attributes, and a successor Element
+ /// for use in constructing stacks and queues of Elements.
+ /// </summary>
+ /// <seealso cref="ElementType" />
+ /// <seealso cref="Sax.Net.Helpers.Attributes" />
+ public class Element
+ {
+ private readonly Attributes _atts; // attributes of element
+ private readonly ElementType _type; // type of element
+ private bool _preclosed; // this element has been preclosed
+
+ /// <summary>
+ /// Return an Element from a specified ElementType.
+ /// </summary>
+ /// <param name="type">
+ /// The element type of the newly constructed element
+ /// </param>
+ /// <param name="defaultAttributes">
+ /// True if default attributes are wanted
+ /// </param>
+ public Element(ElementType type, bool defaultAttributes)
+ {
+ _type = type;
+ if (defaultAttributes)
+ {
+ _atts = new Attributes(type.Attributes);
+ }
+ else
+ {
+ _atts = new Attributes();
+ }
+ Next = null;
+ _preclosed = false;
+ }
+
+ /// <summary>
+ /// Gets the element type.
+ /// </summary>
+ public virtual ElementType Type
+ {
+ get { return _type; }
+ }
+
+ /// <summary>
+ /// Gets the attributes as an Attributes object.
+ /// Returning an Attributes makes the attributes mutable.
+ /// </summary>
+ /// <seealso cref="Attributes" />
+ public virtual Attributes Attributes
+ {
+ get { return _atts; }
+ }
+
+ /// <summary>
+ /// Gets or sets the next element in an element stack or queue.
+ /// </summary>
+ public virtual Element Next { get; set; }
+
+ /// <summary>
+ /// Gets the name of the element's type.
+ /// </summary>
+ public virtual string Name
+ {
+ get { return _type.Name; }
+ }
+
+ /// <summary>
+ /// Gets the namespace name of the element's type.
+ /// </summary>
+ public virtual string Namespace
+ {
+ get { return _type.Namespace; }
+ }
+
+ /// <summary>
+ /// Gets the local name of the element's type.
+ /// </summary>
+ public virtual string LocalName
+ {
+ get { return _type.LocalName; }
+ }
+
+ /// <summary>
+ /// Gets the content model vector of the element's type.
+ /// </summary>
+ public virtual int Model
+ {
+ get { return _type.Model; }
+ }
+
+ /// <summary>
+ /// Gets the member-of vector of the element's type.
+ /// </summary>
+ public virtual int MemberOf
+ {
+ get { return _type.MemberOf; }
+ }
+
+ /// <summary>
+ /// Gets the flags vector of the element's type.
+ /// </summary>
+ public virtual int Flags
+ {
+ get { return _type.Flags; }
+ }
+
+ /// <summary>
+ /// Gets the parent element type of the element's type.
+ /// </summary>
+ public virtual ElementType Parent
+ {
+ get { return _type.Parent; }
+ }
+
+ /// <summary>
+ /// Return true if this element has been preclosed.
+ /// </summary>
+ public virtual bool IsPreclosed
+ {
+ get { return _preclosed; }
+ }
+
+ /// <summary>
+ /// Return true if the type of this element can contain the type of
+ /// another element.
+ /// Convenience method.
+ /// </summary>
+ /// <param name="other">
+ /// The other element
+ /// </param>
+ public virtual bool CanContain(Element other)
+ {
+ return _type.CanContain(other._type);
+ }
+
+ /// <summary>
+ /// Set an attribute and its value into this element.
+ /// </summary>
+ /// <param name="name">
+ /// The attribute name (Qname)
+ /// </param>
+ /// <param name="type">
+ /// The attribute type
+ /// </param>
+ /// <param name="value">
+ /// The attribute value
+ /// </param>
+ public virtual void SetAttribute(string name, string type, string value)
+ {
+ _type.SetAttribute(_atts, name, type, value);
+ }
+
+ /// <summary>
+ /// Make this element anonymous.
+ /// Remove any <c>id</c> or <c>name</c> attribute present
+ /// in the element's attributes.
+ /// </summary>
+ public virtual void Anonymize()
+ {
+ for (int i = _atts.Length - 1; i >= 0; i--)
+ {
+ if (_atts.GetType(i).Equals("ID") || _atts.GetQName(i).Equals("name"))
+ {
+ _atts.RemoveAttribute(i);
+ }
+ }
+ }
+
+ /// <summary>
+ /// Clean the attributes of this element.
+ /// Attributes with null name (the name was ill-formed)
+ /// or null value (the attribute was present in the element type but
+ /// not in this actual element) are removed.
+ /// </summary>
+ public virtual void Clean()
+ {
+ for (int i = _atts.Length - 1; i >= 0; i--)
+ {
+ string name = _atts.GetLocalName(i);
+ if (_atts.GetValue(i) == null || string.IsNullOrEmpty(name))
+ {
+ _atts.RemoveAttribute(i);
+ }
+ }
+ }
+
+ /// <summary>
+ /// Force this element to preclosed status, meaning that an end-tag has
+ /// been seen but the element cannot yet be closed for structural reasons.
+ /// </summary>
+ public virtual void Preclose()
+ {
+ _preclosed = true;
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/198e5868/src/Lucene.Net.Benchmark/Support/TagSoup/ElementType.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/Support/TagSoup/ElementType.cs b/src/Lucene.Net.Benchmark/Support/TagSoup/ElementType.cs
new file mode 100644
index 0000000..6d62a2f
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/Support/TagSoup/ElementType.cs
@@ -0,0 +1,269 @@
+// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+//
+// TagSoup is licensed under the Apache License,
+// Version 2.0. You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 . You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+using Sax.Helpers;
+using System;
+using System.Text;
+
+namespace TagSoup
+{
+ /// <summary>
+ /// This class represents an element type in the schema.
+ /// An element type has a name, a content model vector, a member-of vector,
+ /// a flags vector, default attributes, and a schema to which it belongs.
+ /// </summary>
+ /// <seealso cref="Schema" />
+ public class ElementType
+ {
+ private readonly Attributes atts; // default attributes
+ private readonly string localName; // element type local name
+ private readonly string name; // element type name (Qname)
+ private readonly string @namespace; // element type namespace name
+ private readonly Schema schema; // schema to which this belongs
+
+ /// <summary>
+ /// Construct an <see cref="ElementType"/>:
+ /// but it's better to use <see cref="Schema.Element()"/> instead.
+ /// The content model, member-of, and flags vectors are specified as ints.
+ /// </summary>
+ /// <param name="name">The element type name</param>
+ /// <param name="model">ORed-together bits representing the content
+ /// models allowed in the content of this element type</param>
+ /// <param name="memberOf">ORed-together bits representing the content models
+ /// to which this element type belongs</param>
+ /// <param name="flags">ORed-together bits representing the flags associated
+ /// with this element type</param>
+ /// <param name="schema">
+ /// The schema with which this element type will be associated
+ /// </param>
+ public ElementType(string name, int model, int memberOf, int flags, Schema schema)
+ {
+ this.name = name;
+ Model = model;
+ MemberOf = memberOf;
+ Flags = flags;
+ atts = new Attributes();
+ this.schema = schema;
+ @namespace = GetNamespace(name, false);
+ localName = GetLocalName(name);
+ }
+
+ /// <summary>
+ /// Gets the name of this element type.
+ /// </summary>
+ public virtual string Name
+ {
+ get { return name; }
+ }
+
+ /// <summary>
+ /// Gets the namespace name of this element type.
+ /// </summary>
+ public virtual string Namespace
+ {
+ get { return @namespace; }
+ }
+
+ /// <summary>
+ /// Gets the local name of this element type.
+ /// </summary>
+ public virtual string LocalName
+ {
+ get { return localName; }
+ }
+
+ /// <summary>
+ /// Gets or sets the content models of this element type as a vector of bits
+ /// </summary>
+ public virtual int Model { get; set; }
+
+ /// <summary>
+ /// Gets or sets the content models to which this element type belongs as a vector of bits
+ /// </summary>
+ public virtual int MemberOf { get; set; }
+
+ /// <summary>
+ /// Gets or sets the flags associated with this element type as a vector of bits
+ /// </summary>
+ public virtual int Flags { get; set; }
+
+ /// <summary>
+ /// Returns the default attributes associated with this element type.
+ /// Attributes of type CDATA that don't have default values are
+ /// typically not included. Other attributes without default values
+ /// have an internal value of <c>null</c>.
+ /// The return value is an Attributes to allow the caller to mutate
+ /// the attributes.
+ /// </summary>
+ public virtual Attributes Attributes
+ {
+ get { return atts; }
+ }
+
+ /// <summary>
+ /// Gets or sets the parent element type of this element type.
+ /// </summary>
+ public virtual ElementType Parent { get; set; }
+
+ /// <summary>
+ /// Gets the schema which this element type is associated with.
+ /// </summary>
+ public virtual Schema Schema
+ {
+ get { return schema; }
+ }
+
+ /// <summary>
+ /// Return a namespace name from a Qname.
+ /// The attribute flag tells us whether to return an empty namespace
+ /// name if there is no prefix, or use the schema default instead.
+ /// </summary>
+ /// <param name="name">The Qname</param>
+ /// <param name="attribute">True if name is an attribute name</param>
+ /// <returns>The namespace name</returns>
+ public virtual string GetNamespace(string name, bool attribute)
+ {
+ int colon = name.IndexOf(':');
+ if (colon == -1)
+ {
+ return attribute ? "" : schema.Uri;
+ }
+ string prefix = name.Substring(0, colon);
+ if (prefix.Equals("xml"))
+ {
+ return "http://www.w3.org/XML/1998/namespace";
+ }
+ return string.Intern("urn:x-prefix:" + prefix);
+ }
+
+ /// <summary>
+ /// Return a local name from a Qname.
+ /// </summary>
+ /// <param name="name">The Qname</param>
+ /// <returns>The local name</returns>
+ public virtual string GetLocalName(string name)
+ {
+ int colon = name.IndexOf(':');
+ if (colon == -1)
+ {
+ return name;
+ }
+ return string.Intern(name.Substring(colon + 1));
+ }
+
+ /// <summary>
+ /// Returns <c>true</c> if this element type can contain another element type.
+ /// That is, if any of the models in this element's model vector
+ /// match any of the models in the other element type's member-of
+ /// vector.
+ /// </summary>
+ /// <param name="other">The other element type</param>
+ public virtual bool CanContain(ElementType other)
+ {
+ return (Model & other.MemberOf) != 0;
+ }
+
+ /// <summary>
+ /// Sets an attribute and its value into an <see cref="Sax.IAttributes"/> object.
+ /// Attempts to set a namespace declaration are ignored.
+ /// </summary>
+ /// <param name="atts">The <see cref="Sax.Helpers.Attributes"/> object</param>
+ /// <param name="name">The name (Qname) of the attribute</param>
+ /// <param name="type">The type of the attribute</param>
+ /// <param name="value">The value of the attribute</param>
+ public virtual void SetAttribute(Attributes atts, string name, string type, string value)
+ {
+ if (name.Equals("xmlns") || name.StartsWith("xmlns:"))
+ {
+ return;
+ }
+
+ string ns = GetNamespace(name, true);
+ string localName = GetLocalName(name);
+ int i = atts.GetIndex(name);
+ if (i == -1)
+ {
+ name = string.Intern(name);
+ if (type == null)
+ {
+ type = "CDATA";
+ }
+ if (!type.Equals("CDATA"))
+ {
+ value = Normalize(value);
+ }
+ atts.AddAttribute(ns, localName, name, type, value);
+ }
+ else
+ {
+ if (type == null)
+ {
+ type = atts.GetType(i);
+ }
+ if (!type.Equals("CDATA"))
+ {
+ value = Normalize(value);
+ }
+ atts.SetAttribute(i, ns, localName, name, type, value);
+ }
+ }
+
+ /// <summary>
+ /// Normalize an attribute value (ID-style).
+ /// CDATA-style attribute normalization is already done.
+ /// </summary>
+ /// <param name="value">The value to normalize</param>
+ public static string Normalize(string value)
+ {
+ if (value == null)
+ {
+ return null;
+ }
+ value = value.Trim();
+ if (value.IndexOf(" ", StringComparison.Ordinal) == -1)
+ {
+ return value;
+ }
+ bool space = false;
+ var b = new StringBuilder(value.Length);
+ foreach (char v in value)
+ {
+ if (v == ' ')
+ {
+ if (!space)
+ {
+ b.Append(v);
+ }
+ space = true;
+ }
+ else
+ {
+ b.Append(v);
+ space = false;
+ }
+ }
+ return b.ToString();
+ }
+
+ /// <summary>
+ /// Sets an attribute and its value into this element type.
+ /// </summary>
+ /// <param name="name">The name of the attribute</param>
+ /// <param name="type">The type of the attribute</param>
+ /// <param name="value">The value of the attribute</param>
+ public virtual void SetAttribute(string name, string type, string value)
+ {
+ SetAttribute(atts, name, type, value);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/198e5868/src/Lucene.Net.Benchmark/Support/TagSoup/HTMLScanner.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/Support/TagSoup/HTMLScanner.cs b/src/Lucene.Net.Benchmark/Support/TagSoup/HTMLScanner.cs
new file mode 100644
index 0000000..ed41f84
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/Support/TagSoup/HTMLScanner.cs
@@ -0,0 +1,745 @@
+// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+//
+// TagSoup is licensed under the Apache License,
+// Version 2.0. You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 . You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+//
+//
+
+using Sax;
+using System;
+using System.IO;
+
+namespace TagSoup
+{
+ /// <summary>
+ /// This class implements a table-driven scanner for HTML, allowing for lots of
+ /// defects. It implements the Scanner interface, which accepts a Reader
+ /// object to fetch characters from and a ScanHandler object to report lexical
+ /// events to.
+ /// </summary>
+ public class HTMLScanner : IScanner, ILocator
+ {
+ // Start of state table
+ private const int S_ANAME = 1;
+ private const int S_APOS = 2;
+ private const int S_AVAL = 3;
+ private const int S_BB = 4;
+ private const int S_BBC = 5;
+ private const int S_BBCD = 6;
+ private const int S_BBCDA = 7;
+ private const int S_BBCDAT = 8;
+ private const int S_BBCDATA = 9;
+ private const int S_CDATA = 10;
+ private const int S_CDATA2 = 11;
+ private const int S_CDSECT = 12;
+ private const int S_CDSECT1 = 13;
+ private const int S_CDSECT2 = 14;
+ private const int S_COM = 15;
+ private const int S_COM2 = 16;
+ private const int S_COM3 = 17;
+ private const int S_COM4 = 18;
+ private const int S_DECL = 19;
+ private const int S_DECL2 = 20;
+ private const int S_DONE = 21;
+ private const int S_EMPTYTAG = 22;
+ private const int S_ENT = 23;
+ private const int S_EQ = 24;
+ private const int S_ETAG = 25;
+ private const int S_GI = 26;
+ private const int S_NCR = 27;
+ private const int S_PCDATA = 28;
+ private const int S_PI = 29;
+ private const int S_PITARGET = 30;
+ private const int S_QUOT = 31;
+ private const int S_STAGC = 32;
+ private const int S_TAG = 33;
+ private const int S_TAGWS = 34;
+ private const int S_XNCR = 35;
+ private const int A_ADUP = 1;
+ private const int A_ADUP_SAVE = 2;
+ private const int A_ADUP_STAGC = 3;
+ private const int A_ANAME = 4;
+ private const int A_ANAME_ADUP = 5;
+ private const int A_ANAME_ADUP_STAGC = 6;
+ private const int A_AVAL = 7;
+ private const int A_AVAL_STAGC = 8;
+ private const int A_CDATA = 9;
+ private const int A_CMNT = 10;
+ private const int A_DECL = 11;
+ private const int A_EMPTYTAG = 12;
+ private const int A_ENTITY = 13;
+ private const int A_ENTITY_START = 14;
+ private const int A_ETAG = 15;
+ private const int A_GI = 16;
+ private const int A_GI_STAGC = 17;
+ private const int A_LT = 18;
+ private const int A_LT_PCDATA = 19;
+ private const int A_MINUS = 20;
+ private const int A_MINUS2 = 21;
+ private const int A_MINUS3 = 22;
+ private const int A_PCDATA = 23;
+ private const int A_PI = 24;
+ private const int A_PITARGET = 25;
+ private const int A_PITARGET_PI = 26;
+ private const int A_SAVE = 27;
+ private const int A_SKIP = 28;
+ private const int A_SP = 29;
+ private const int A_STAGC = 30;
+ private const int A_UNGET = 31;
+ private const int A_UNSAVE_PCDATA = 32;
+ private static int[] statetable = {
+ S_ANAME, '/', A_ANAME_ADUP, S_EMPTYTAG,
+ S_ANAME, '=', A_ANAME, S_AVAL,
+ S_ANAME, '>', A_ANAME_ADUP_STAGC, S_PCDATA,
+ S_ANAME, 0, A_SAVE, S_ANAME,
+ S_ANAME, -1, A_ANAME_ADUP_STAGC, S_DONE,
+ S_ANAME, ' ', A_ANAME, S_EQ,
+ S_ANAME, '\n', A_ANAME, S_EQ,
+ S_ANAME, '\t', A_ANAME, S_EQ,
+ S_APOS, '\'', A_AVAL, S_TAGWS,
+ S_APOS, 0, A_SAVE, S_APOS,
+ S_APOS, -1, A_AVAL_STAGC, S_DONE,
+ S_APOS, ' ', A_SP, S_APOS,
+ S_APOS, '\n', A_SP, S_APOS,
+ S_APOS, '\t', A_SP, S_APOS,
+ S_AVAL, '\'', A_SKIP, S_APOS,
+ S_AVAL, '"', A_SKIP, S_QUOT,
+ S_AVAL, '>', A_AVAL_STAGC, S_PCDATA,
+ S_AVAL, 0, A_SAVE, S_STAGC,
+ S_AVAL, -1, A_AVAL_STAGC, S_DONE,
+ S_AVAL, ' ', A_SKIP, S_AVAL,
+ S_AVAL, '\n', A_SKIP, S_AVAL,
+ S_AVAL, '\t', A_SKIP, S_AVAL,
+ S_BB, 'C', A_SKIP, S_BBC,
+ S_BB, 0, A_SKIP, S_DECL,
+ S_BB, -1, A_SKIP, S_DONE,
+ S_BBC, 'D', A_SKIP, S_BBCD,
+ S_BBC, 0, A_SKIP, S_DECL,
+ S_BBC, -1, A_SKIP, S_DONE,
+ S_BBCD, 'A', A_SKIP, S_BBCDA,
+ S_BBCD, 0, A_SKIP, S_DECL,
+ S_BBCD, -1, A_SKIP, S_DONE,
+ S_BBCDA, 'T', A_SKIP, S_BBCDAT,
+ S_BBCDA, 0, A_SKIP, S_DECL,
+ S_BBCDA, -1, A_SKIP, S_DONE,
+ S_BBCDAT, 'A', A_SKIP, S_BBCDATA,
+ S_BBCDAT, 0, A_SKIP, S_DECL,
+ S_BBCDAT, -1, A_SKIP, S_DONE,
+ S_BBCDATA, '[', A_SKIP, S_CDSECT,
+ S_BBCDATA, 0, A_SKIP, S_DECL,
+ S_BBCDATA, -1, A_SKIP, S_DONE,
+ S_CDATA, '<', A_SAVE, S_CDATA2,
+ S_CDATA, 0, A_SAVE, S_CDATA,
+ S_CDATA, -1, A_PCDATA, S_DONE,
+ S_CDATA2, '/', A_UNSAVE_PCDATA, S_ETAG,
+ S_CDATA2, 0, A_SAVE, S_CDATA,
+ S_CDATA2, -1, A_UNSAVE_PCDATA, S_DONE,
+ S_CDSECT, ']', A_SAVE, S_CDSECT1,
+ S_CDSECT, 0, A_SAVE, S_CDSECT,
+ S_CDSECT, -1, A_SKIP, S_DONE,
+ S_CDSECT1, ']', A_SAVE, S_CDSECT2,
+ S_CDSECT1, 0, A_SAVE, S_CDSECT,
+ S_CDSECT1, -1, A_SKIP, S_DONE,
+ S_CDSECT2, '>', A_CDATA, S_PCDATA,
+ S_CDSECT2, 0, A_SAVE, S_CDSECT,
+ S_CDSECT2, -1, A_SKIP, S_DONE,
+ S_COM, '-', A_SKIP, S_COM2,
+ S_COM, 0, A_SAVE, S_COM2,
+ S_COM, -1, A_CMNT, S_DONE,
+ S_COM2, '-', A_SKIP, S_COM3,
+ S_COM2, 0, A_SAVE, S_COM2,
+ S_COM2, -1, A_CMNT, S_DONE,
+ S_COM3, '-', A_SKIP, S_COM4,
+ S_COM3, 0, A_MINUS, S_COM2,
+ S_COM3, -1, A_CMNT, S_DONE,
+ S_COM4, '-', A_MINUS3, S_COM4,
+ S_COM4, '>', A_CMNT, S_PCDATA,
+ S_COM4, 0, A_MINUS2, S_COM2,
+ S_COM4, -1, A_CMNT, S_DONE,
+ S_DECL, '-', A_SKIP, S_COM,
+ S_DECL, '[', A_SKIP, S_BB,
+ S_DECL, '>', A_SKIP, S_PCDATA,
+ S_DECL, 0, A_SAVE, S_DECL2,
+ S_DECL, -1, A_SKIP, S_DONE,
+ S_DECL2, '>', A_DECL, S_PCDATA,
+ S_DECL2, 0, A_SAVE, S_DECL2,
+ S_DECL2, -1, A_SKIP, S_DONE,
+ S_EMPTYTAG, '>', A_EMPTYTAG, S_PCDATA,
+ S_EMPTYTAG, 0, A_SAVE, S_ANAME,
+ S_EMPTYTAG, ' ', A_SKIP, S_TAGWS,
+ S_EMPTYTAG, '\n', A_SKIP, S_TAGWS,
+ S_EMPTYTAG, '\t', A_SKIP, S_TAGWS,
+ S_ENT, 0, A_ENTITY, S_ENT,
+ S_ENT, -1, A_ENTITY, S_DONE,
+ S_EQ, '=', A_SKIP, S_AVAL,
+ S_EQ, '>', A_ADUP_STAGC, S_PCDATA,
+ S_EQ, 0, A_ADUP_SAVE, S_ANAME,
+ S_EQ, -1, A_ADUP_STAGC, S_DONE,
+ S_EQ, ' ', A_SKIP, S_EQ,
+ S_EQ, '\n', A_SKIP, S_EQ,
+ S_EQ, '\t', A_SKIP, S_EQ,
+ S_ETAG, '>', A_ETAG, S_PCDATA,
+ S_ETAG, 0, A_SAVE, S_ETAG,
+ S_ETAG, -1, A_ETAG, S_DONE,
+ S_ETAG, ' ', A_SKIP, S_ETAG,
+ S_ETAG, '\n', A_SKIP, S_ETAG,
+ S_ETAG, '\t', A_SKIP, S_ETAG,
+ S_GI, '/', A_SKIP, S_EMPTYTAG,
+ S_GI, '>', A_GI_STAGC, S_PCDATA,
+ S_GI, 0, A_SAVE, S_GI,
+ S_GI, -1, A_SKIP, S_DONE,
+ S_GI, ' ', A_GI, S_TAGWS,
+ S_GI, '\n', A_GI, S_TAGWS,
+ S_GI, '\t', A_GI, S_TAGWS,
+ S_NCR, 0, A_ENTITY, S_NCR,
+ S_NCR, -1, A_ENTITY, S_DONE,
+ S_PCDATA, '&', A_ENTITY_START, S_ENT,
+ S_PCDATA, '<', A_PCDATA, S_TAG,
+ S_PCDATA, 0, A_SAVE, S_PCDATA,
+ S_PCDATA, -1, A_PCDATA, S_DONE,
+ S_PI, '>', A_PI, S_PCDATA,
+ S_PI, 0, A_SAVE, S_PI,
+ S_PI, -1, A_PI, S_DONE,
+ S_PITARGET, '>', A_PITARGET_PI, S_PCDATA,
+ S_PITARGET, 0, A_SAVE, S_PITARGET,
+ S_PITARGET, -1, A_PITARGET_PI, S_DONE,
+ S_PITARGET, ' ', A_PITARGET, S_PI,
+ S_PITARGET, '\n', A_PITARGET, S_PI,
+ S_PITARGET, '\t', A_PITARGET, S_PI,
+ S_QUOT, '"', A_AVAL, S_TAGWS,
+ S_QUOT, 0, A_SAVE, S_QUOT,
+ S_QUOT, -1, A_AVAL_STAGC, S_DONE,
+ S_QUOT, ' ', A_SP, S_QUOT,
+ S_QUOT, '\n', A_SP, S_QUOT,
+ S_QUOT, '\t', A_SP, S_QUOT,
+ S_STAGC, '>', A_AVAL_STAGC, S_PCDATA,
+ S_STAGC, 0, A_SAVE, S_STAGC,
+ S_STAGC, -1, A_AVAL_STAGC, S_DONE,
+ S_STAGC, ' ', A_AVAL, S_TAGWS,
+ S_STAGC, '\n', A_AVAL, S_TAGWS,
+ S_STAGC, '\t', A_AVAL, S_TAGWS,
+ S_TAG, '!', A_SKIP, S_DECL,
+ S_TAG, '/', A_SKIP, S_ETAG,
+ S_TAG, '?', A_SKIP, S_PITARGET,
+ S_TAG, '<', A_SAVE, S_TAG,
+ S_TAG, 0, A_SAVE, S_GI,
+ S_TAG, -1, A_LT_PCDATA, S_DONE,
+ S_TAG, ' ', A_LT, S_PCDATA,
+ S_TAG, '\n', A_LT, S_PCDATA,
+ S_TAG, '\t', A_LT, S_PCDATA,
+ S_TAGWS, '/', A_SKIP, S_EMPTYTAG,
+ S_TAGWS, '>', A_STAGC, S_PCDATA,
+ S_TAGWS, 0, A_SAVE, S_ANAME,
+ S_TAGWS, -1, A_STAGC, S_DONE,
+ S_TAGWS, ' ', A_SKIP, S_TAGWS,
+ S_TAGWS, '\n', A_SKIP, S_TAGWS,
+ S_TAGWS, '\t', A_SKIP, S_TAGWS,
+ S_XNCR, 0, A_ENTITY, S_XNCR,
+ S_XNCR, -1, A_ENTITY, S_DONE,
+
+ };
+ private static readonly string[] debug_actionnames = { "", "A_ADUP", "A_ADUP_SAVE", "A_ADUP_STAGC", "A_ANAME", "A_ANAME_ADUP", "A_ANAME_ADUP_STAGC", "A_AVAL", "A_AVAL_STAGC", "A_CDATA", "A_CMNT", "A_DECL", "A_EMPTYTAG", "A_ENTITY", "A_ENTITY_START", "A_ETAG", "A_GI", "A_GI_STAGC", "A_LT", "A_LT_PCDATA", "A_MINUS", "A_MINUS2", "A_MINUS3", "A_PCDATA", "A_PI", "A_PITARGET", "A_PITARGET_PI", "A_SAVE", "A_SKIP", "A_SP", "A_STAGC", "A_UNGET", "A_UNSAVE_PCDATA" };
+ private static readonly string[] debug_statenames = { "", "S_ANAME", "S_APOS", "S_AVAL", "S_BB", "S_BBC", "S_BBCD", "S_BBCDA", "S_BBCDAT", "S_BBCDATA", "S_CDATA", "S_CDATA2", "S_CDSECT", "S_CDSECT1", "S_CDSECT2", "S_COM", "S_COM2", "S_COM3", "S_COM4", "S_DECL", "S_DECL2", "S_DONE", "S_EMPTYTAG", "S_ENT", "S_EQ", "S_ETAG", "S_GI", "S_NCR", "S_PCDATA", "S_PI", "S_PITARGET", "S_QUOT", "S_STAGC", "S_TAG", "S_TAGWS", "S_XNCR" };
+
+ // End of state table
+
+ private string thePublicid; // Locator state
+ private string theSystemid;
+ private int theLastLine;
+ private int theLastColumn;
+ private int theCurrentLine;
+ private int theCurrentColumn;
+
+ int theState; // Current state
+ int theNextState; // Next state
+ char[] theOutputBuffer = new char[200]; // Output buffer
+ int theSize; // Current buffer size
+ int[] theWinMap = { // Windows chars map
+ 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
+ 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0xFFFD, 0x017D, 0xFFFD,
+ 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
+ 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0xFFFD, 0x017E, 0x0178};
+
+ ///<summary>
+ /// Index into the state table for [state][input character - 2].
+ /// The state table consists of 4-entry runs on the form
+ /// { current state, input character, action, next state }.
+ /// We precompute the index into the state table for all possible
+ /// { current state, input character } and store the result in
+ /// the statetableIndex array. Since only some input characters
+ /// are present in the state table, we only do the computation for
+ /// characters 0 to the highest character value in the state table.
+ /// An input character of -2 is used to cover all other characters
+ /// as -2 is guaranteed not to match any input character entry
+ /// in the state table.
+ /// <para>When doing lookups, the input character should first be tested
+ /// to be in the range [-1 (inclusive), statetableIndexMaxChar (exclusive)].
+ /// if it isn't use -2 as the input character.
+ /// <para>Finally, add 2 to the input character to cover for the fact that
+ /// Java doesn't support negative array indexes. Then look up
+ /// the value in the statetableIndex. If the value is -1, then
+ /// no action or next state was found for the { state, input } that
+ /// you had. If it isn't -1, then action = statetable[value + 2] and
+ /// next state = statetable[value + 3]. That is, the value points
+ /// to the start of the answer 4-tuple in the statetable.
+ /// </summary>
+ static short[][] statetableIndex;
+
+ ///<summary>
+ /// The highest character value seen in the statetable.
+ /// See the doc comment for statetableIndex to see how this
+ /// is used.
+ /// </summary>
+ static int statetableIndexMaxChar;
+ public HTMLScanner()
+ {
+ int maxState = -1;
+ int maxChar = -1;
+ for (int i = 0; i < statetable.Length; i += 4)
+ {
+ if (statetable[i] > maxState)
+ {
+ maxState = statetable[i];
+ }
+ if (statetable[i + 1] > maxChar)
+ {
+ maxChar = statetable[i + 1];
+ }
+ }
+ statetableIndexMaxChar = maxChar + 1;
+
+ statetableIndex = new short[maxState + 1][];
+
+ for (int i = 0; i <= maxState; i++)
+ {
+ statetableIndex[i] = new short[maxChar + 3];
+ }
+ for (int theState = 0; theState <= maxState; ++theState)
+ {
+ for (int ch = -2; ch <= maxChar; ++ch)
+ {
+ int hit = -1;
+ int action = 0;
+ for (int i = 0; i < statetable.Length; i += 4)
+ {
+ if (theState != statetable[i])
+ {
+ if (action != 0) break;
+ continue;
+ }
+ if (statetable[i + 1] == 0)
+ {
+ hit = i;
+ action = statetable[i + 2];
+ }
+ else if (statetable[i + 1] == ch)
+ {
+ hit = i;
+ action = statetable[i + 2];
+ break;
+ }
+ }
+ statetableIndex[theState][ch + 2] = (short)hit;
+ }
+ }
+ }
+
+ // Locator implementation
+
+ public virtual int LineNumber
+ {
+ get { return theLastLine; }
+ }
+ public virtual int ColumnNumber
+ {
+ get { return theLastColumn; }
+ }
+ public virtual string PublicId
+ {
+ get { return thePublicid; }
+ }
+ public virtual string SystemId
+ {
+ get { return theSystemid; }
+ }
+
+
+ // Scanner implementation
+
+ /// <summary>
+ /// Reset document locator, supplying systemid and publicid.
+ /// </summary>
+ /// <param name="systemid">System id</param>
+ /// <param name="publicid">Public id</param>
+ public virtual void ResetDocumentLocator(string publicid, string systemid)
+ {
+ thePublicid = publicid;
+ theSystemid = systemid;
+ theLastLine = theLastColumn = theCurrentLine = theCurrentColumn = 0;
+ }
+
+ /// <summary>
+ /// Scan HTML source, reporting lexical events.
+ /// </summary>
+ /// <param name="r">Reader that provides characters</param>
+ /// <param name="h">ScanHandler that accepts lexical events.</param>
+ public virtual void Scan(TextReader r, IScanHandler h)
+ {
+ theState = S_PCDATA;
+
+ int firstChar = r.Peek(); // Remove any leading BOM
+ if (firstChar == '\uFEFF') r.Read();
+
+ while (theState != S_DONE)
+ {
+ int ch = r.Peek();
+ bool unread = false;
+
+ // Process control characters
+ if (ch >= 0x80 && ch <= 0x9F) ch = theWinMap[ch - 0x80];
+
+ if (ch == '\r')
+ {
+ r.Read();
+ ch = r.Peek(); // expect LF next
+ if (ch != '\n')
+ {
+ unread = true;
+ ch = '\n';
+ }
+ }
+
+ if (ch == '\n')
+ {
+ theCurrentLine++;
+ theCurrentColumn = 0;
+ }
+ else
+ {
+ theCurrentColumn++;
+ }
+
+ if (!(ch >= 0x20 || ch == '\n' || ch == '\t' || ch == -1)) continue;
+
+ // Search state table
+ int adjCh = (ch >= -1 && ch < statetableIndexMaxChar) ? ch : -2;
+ int statetableRow = statetableIndex[theState][adjCh + 2];
+ int action = 0;
+ if (statetableRow != -1)
+ {
+ action = statetable[statetableRow + 2];
+ theNextState = statetable[statetableRow + 3];
+ }
+
+ // System.err.println("In " + debug_statenames[theState] + " got " + nicechar(ch) + " doing " + debug_actionnames[action] + " then " + debug_statenames[theNextState]);
+ switch (action)
+ {
+ case 0:
+ throw new Exception(
+ "HTMLScanner can't cope with " + (int)ch + " in state " +
+ (int)theState);
+ case A_ADUP:
+ h.Adup(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ break;
+ case A_ADUP_SAVE:
+ h.Adup(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ Save(ch, h);
+ break;
+ case A_ADUP_STAGC:
+ h.Adup(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ h.STagC(theOutputBuffer, 0, theSize);
+ break;
+ case A_ANAME:
+ h.Aname(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ break;
+ case A_ANAME_ADUP:
+ h.Aname(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ h.Adup(theOutputBuffer, 0, theSize);
+ break;
+ case A_ANAME_ADUP_STAGC:
+ h.Aname(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ h.Adup(theOutputBuffer, 0, theSize);
+ h.STagC(theOutputBuffer, 0, theSize);
+ break;
+ case A_AVAL:
+ h.Aval(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ break;
+ case A_AVAL_STAGC:
+ h.Aval(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ h.STagC(theOutputBuffer, 0, theSize);
+ break;
+ case A_CDATA:
+ Mark();
+ // suppress the final "]]" in the buffer
+ if (theSize > 1) theSize -= 2;
+ h.PCDATA(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ break;
+ case A_ENTITY_START:
+ h.PCDATA(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ Save(ch, h);
+ break;
+ case A_ENTITY:
+ Mark();
+ char ch1 = (char)ch;
+ // System.out.println("Got " + ch1 + " in state " + ((theState == S_ENT) ? "S_ENT" : ((theState == S_NCR) ? "S_NCR" : "UNK")));
+ if (theState == S_ENT && ch1 == '#')
+ {
+ theNextState = S_NCR;
+ Save(ch, h);
+ break;
+ }
+ else if (theState == S_NCR && (ch1 == 'x' || ch1 == 'X'))
+ {
+ theNextState = S_XNCR;
+ Save(ch, h);
+ break;
+ }
+ else if (theState == S_ENT && char.IsLetterOrDigit(ch1))
+ {
+ Save(ch, h);
+ break;
+ }
+ else if (theState == S_NCR && char.IsDigit(ch1))
+ {
+ Save(ch, h);
+ break;
+ }
+ else if (theState == S_XNCR && (char.IsDigit(ch1) || "abcdefABCDEF".IndexOf(ch1) != -1))
+ {
+ Save(ch, h);
+ break;
+ }
+
+ // The whole entity reference has been collected
+ // System.err.println("%%" + new String(theOutputBuffer, 0, theSize));
+ h.Entity(theOutputBuffer, 1, theSize - 1);
+ int ent = h.GetEntity();
+ // System.err.println("%% value = " + ent);
+ if (ent != 0)
+ {
+ theSize = 0;
+ if (ent >= 0x80 && ent <= 0x9F)
+ {
+ ent = theWinMap[ent - 0x80];
+ }
+ if (ent < 0x20)
+ {
+ // Control becomes space
+ ent = 0x20;
+ }
+ else if (ent >= 0xD800 && ent <= 0xDFFF)
+ {
+ // Surrogates get dropped
+ ent = 0;
+ }
+ else if (ent <= 0xFFFF)
+ {
+ // BMP character
+ Save(ent, h);
+ }
+ else
+ {
+ // Astral converted to two surrogates
+ ent -= 0x10000;
+ Save((ent >> 10) + 0xD800, h);
+ Save((ent & 0x3FF) + 0xDC00, h);
+ }
+ if (ch != ';')
+ {
+ unread = true;
+ theCurrentColumn--;
+ }
+ }
+ else
+ {
+ unread = true;
+ theCurrentColumn--;
+ }
+ theNextState = S_PCDATA;
+ break;
+ case A_ETAG:
+ h.ETag(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ break;
+ case A_DECL:
+ h.Decl(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ break;
+ case A_GI:
+ h.GI(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ break;
+ case A_GI_STAGC:
+ h.GI(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ h.STagC(theOutputBuffer, 0, theSize);
+ break;
+ case A_LT:
+ Mark();
+ Save('<', h);
+ Save(ch, h);
+ break;
+ case A_LT_PCDATA:
+ Mark();
+ Save('<', h);
+ h.PCDATA(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ break;
+ case A_PCDATA:
+ Mark();
+ h.PCDATA(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ break;
+ case A_CMNT:
+ Mark();
+ h.Cmnt(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ break;
+ case A_MINUS3:
+ Save('-', h);
+ Save(' ', h);
+ break;
+ case A_MINUS2:
+ Save('-', h);
+ Save(' ', h);
+ Save('-', h);
+ Save(ch, h);
+ // fall through into A_MINUS
+ break;
+ case A_MINUS:
+ Save('-', h);
+ Save(ch, h);
+ break;
+ case A_PI:
+ Mark();
+ h.PI(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ break;
+ case A_PITARGET:
+ h.PITarget(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ break;
+ case A_PITARGET_PI:
+ h.PITarget(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ h.PI(theOutputBuffer, 0, theSize);
+ break;
+ case A_SAVE:
+ Save(ch, h);
+ break;
+ case A_SKIP:
+ break;
+ case A_SP:
+ Save(' ', h);
+ break;
+ case A_STAGC:
+ h.STagC(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ break;
+ case A_EMPTYTAG:
+ Mark();
+ // System.err.println("%%% Empty tag seen");
+ if (theSize > 0) h.GI(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ h.STagE(theOutputBuffer, 0, theSize);
+ break;
+ case A_UNGET:
+ unread = true;
+ theCurrentColumn--;
+ break;
+ case A_UNSAVE_PCDATA:
+ if (theSize > 0) theSize--;
+ h.PCDATA(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ break;
+ default:
+ throw new Exception("Can't process state " + action);
+ }
+ if (!unread)
+ {
+ r.Read();
+ }
+ theState = theNextState;
+ }
+ h.EOF(theOutputBuffer, 0, 0);
+ }
+
+ /// <summary>
+ /// Mark the current scan position as a "point of interest" - start of a tag,
+ /// cdata, processing instruction etc.
+ /// </summary>
+ private void Mark()
+ {
+ theLastColumn = theCurrentColumn;
+ theLastLine = theCurrentLine;
+ }
+
+ /// <summary>
+ /// A callback for the ScanHandler that allows it to force
+ /// the lexer state to CDATA content (no markup is recognized except
+ /// the end of element.
+ /// </summary>
+ public virtual void StartCDATA() { theNextState = S_CDATA; }
+
+ private void Save(int ch, IScanHandler h)
+ {
+ if (theSize >= theOutputBuffer.Length - 20)
+ {
+ if (theState == S_PCDATA || theState == S_CDATA)
+ {
+ // Return a buffer-sized chunk of PCDATA
+ h.PCDATA(theOutputBuffer, 0, theSize);
+ theSize = 0;
+ }
+ else
+ {
+ // Grow the buffer size
+ char[] newOutputBuffer = new char[theOutputBuffer.Length * 2];
+ Array.Copy(theOutputBuffer, 0, newOutputBuffer, 0, theSize + 1);
+ theOutputBuffer = newOutputBuffer;
+ }
+ }
+ theOutputBuffer[theSize++] = (char)ch;
+ }
+
+ /**
+ Test procedure. Reads HTML from the standard input and writes
+ PYX to the standard output.
+ */
+
+ // public static void main(string[] argv) {
+ // IScanner s = new HTMLScanner();
+ // TextReader r = new StreamReader(System.in, "UTF-8");
+ // TextWriter w = new StreamWriter(System.out, "UTF-8");
+ // PYXWriter pw = new PYXWriter(w);
+ // s.scan(r, pw);
+ // w.close();
+ // }
+
+
+ private static string NiceChar(int value)
+ {
+ if (value == '\n') return "\\n";
+ if (value < 32) return "0x" + value.ToString("X");
+ return "'" + ((char)value) + "'";
+ }
+ }
+}