You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by sy...@apache.org on 2015/01/30 13:39:24 UTC

[1/5] lucenenet git commit: More porting work

Repository: lucenenet
Updated Branches:
  refs/heads/master 0fae0c4eb -> 69f29113e


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/69f29113/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs
index 4fe2822..f0392f9 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs
@@ -1,558 +1,549 @@
 using System;
+using System.IO;
 using Lucene.Net.Analysis.Core;
+using Lucene.Net.Analysis.Tokenattributes;
 using Lucene.Net.Analysis.Util;
+using Version = Lucene.Net.Util.Version;
 
 namespace Lucene.Net.Analysis.Miscellaneous
 {
 
-	/*
-	 * Licensed to the Apache Software Foundation (ASF) under one or more
-	 * contributor license agreements.  See the NOTICE file distributed with
-	 * this work for additional information regarding copyright ownership.
-	 * The ASF licenses this file to You under the Apache License, Version 2.0
-	 * (the "License"); you may not use this file except in compliance with
-	 * the License.  You may obtain a copy of the License at
-	 *
-	 *     http://www.apache.org/licenses/LICENSE-2.0
-	 *
-	 * Unless required by applicable law or agreed to in writing, software
-	 * distributed under the License is distributed on an "AS IS" BASIS,
-	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-	 * See the License for the specific language governing permissions and
-	 * limitations under the License.
-	 */
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
     /// <summary>
-	/// Efficient Lucene analyzer/tokenizer that preferably operates on a String rather than a
-	/// <seealso cref="java.io.Reader"/>, that can flexibly separate text into terms via a regular expression <seealso cref="Pattern"/>
-	/// (with behaviour identical to <seealso cref="String#split(String)"/>),
-	/// and that combines the functionality of
-	/// <seealso cref="LetterTokenizer"/>,
-	/// <seealso cref="LowerCaseTokenizer"/>,
-	/// <seealso cref="WhitespaceTokenizer"/>,
-	/// <seealso cref="StopFilter"/> into a single efficient
-	/// multi-purpose class.
-	/// <para>
-	/// If you are unsure how exactly a regular expression should look like, consider 
-	/// prototyping by simply trying various expressions on some test texts via
-	/// <seealso cref="String#split(String)"/>. Once you are satisfied, give that regex to 
-	/// PatternAnalyzer. Also see <a target="_blank" 
-	/// href="http://java.sun.com/docs/books/tutorial/extra/regex/">Java Regular Expression Tutorial</a>.
-	/// </para>
-	/// <para>
-	/// This class can be considerably faster than the "normal" Lucene tokenizers. 
-	/// It can also serve as a building block in a compound Lucene
-	/// <seealso cref="org.apache.lucene.analysis.TokenFilter"/> chain. For example as in this 
-	/// stemming example:
-	/// <pre>
-	/// PatternAnalyzer pat = ...
-	/// TokenStream tokenStream = new SnowballFilter(
-	///     pat.tokenStream("content", "James is running round in the woods"), 
-	///     "English"));
-	/// </pre>
-	/// </para>
-	/// </summary>
-	/// @deprecated (4.0) use the pattern-based analysis in the analysis/pattern package instead. 
-	[Obsolete("(4.0) use the pattern-based analysis in the analysis/pattern package instead.")]
-	public sealed class PatternAnalyzer : Analyzer
-	{
-
-	  /// <summary>
-	  /// <code>"\\W+"</code>; Divides text at non-letters (NOT Character.isLetter(c)) </summary>
-	  public static readonly Pattern NON_WORD_PATTERN = Pattern.compile("\\W+");
-
-	  /// <summary>
-	  /// <code>"\\s+"</code>; Divides text at whitespaces (Character.isWhitespace(c)) </summary>
-	  public static readonly Pattern WHITESPACE_PATTERN = Pattern.compile("\\s+");
-
-	  private static readonly CharArraySet EXTENDED_ENGLISH_STOP_WORDS = CharArraySet.unmodifiableSet(new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList("a", "about", "above", "across", "adj", "after", "afterwards", "again", "against", "albeit", "all", "almost", "alone", "along", "already", "also", "although", "always", "among", "amongst", "an", "and", "another", "any", "anyhow", "anyone", "anything", "anywhere", "are", "around", "as", "at", "be", "became", "because", "become", "becomes", "becoming", "been", "before", "beforehand", "behind", "being", "below", "beside", "besides", "between", "beyond", "both", "but", "by", "can", "cannot", "co", "could", "down", "during", "each", "eg", "either", "else", "elsewhere", "enough", "etc", "even", "ever", "every", "everyone", "everything", "everywhere", "except", "few", "first", "for", "former", "formerly", "from", "further", "had", "has", "have", "he", "hence", "her", "here", "hereafter", "hereby", "herein", "hereupon", "hers", "herself", 
 "him", "himself", "his", "how", "however", "i", "ie", "if", "in", "inc", "indeed", "into", "is", "it", "its", "itself", "last", "latter", "latterly", "least", "less", "ltd", "many", "may", "me", "meanwhile", "might", "more", "moreover", "most", "mostly", "much", "must", "my", "myself", "namely", "neither", "never", "nevertheless", "next", "no", "nobody", "none", "noone", "nor", "not", "nothing", "now", "nowhere", "of", "off", "often", "on", "once one", "only", "onto", "or", "other", "others", "otherwise", "our", "ours", "ourselves", "out", "over", "own", "per", "perhaps", "rather", "s", "same", "seem", "seemed", "seeming", "seems", "several", "she", "should", "since", "so", "some", "somehow", "someone", "something", "sometime", "sometimes", "somewhere", "still", "such", "t", "than", "that", "the", "their", "them", "themselves", "then", "thence", "there", "thereafter", "thereby", "therefor", "therein", "thereupon", "these", "they", "this", "those", "though", "through", "throughout", 
 "thru", "thus", "to", "together", "too", "toward", "towards", "under", "until", "up", "upon", "us", "very", "via", "was", "we", "well", "were", "what", "whatever", "whatsoever", "when", "whence", "whenever", "whensoever", "where", "whereafter", "whereas", "whereat", "whereby", "wherefrom", "wherein", "whereinto", "whereof", "whereon", "whereto", "whereunto", "whereupon", "wherever", "wherewith", "whether", "which", "whichever", "whichsoever", "while", "whilst", "whither", "who", "whoever", "whole", "whom", "whomever", "whomsoever", "whose", "whosoever", "why", "will", "with", "within", "without", "would", "xsubj", "xcal", "xauthor", "xother ", "xnote", "yet", "you", "your", "yours", "yourself", "yourselves"), true));
-
-	  /// <summary>
-	  /// A lower-casing word analyzer with English stop words (can be shared
-	  /// freely across threads without harm); global per class loader.
-	  /// </summary>
-	  public static readonly PatternAnalyzer DEFAULT_ANALYZER = new PatternAnalyzer(Version.LUCENE_CURRENT, NON_WORD_PATTERN, true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
-
-	  /// <summary>
-	  /// A lower-casing word analyzer with <b>extended </b> English stop words
-	  /// (can be shared freely across threads without harm); global per class
-	  /// loader. The stop words are borrowed from
-	  /// http://thomas.loc.gov/home/stopwords.html, see
-	  /// http://thomas.loc.gov/home/all.about.inquery.html
-	  /// </summary>
-	  public static readonly PatternAnalyzer EXTENDED_ANALYZER = new PatternAnalyzer(Version.LUCENE_CURRENT, NON_WORD_PATTERN, true, EXTENDED_ENGLISH_STOP_WORDS);
-
-	  private readonly Pattern pattern;
-	  private readonly bool toLowerCase;
-	  private readonly CharArraySet stopWords;
-
-	  private readonly Version matchVersion;
-
-	  /// <summary>
-	  /// Constructs a new instance with the given parameters.
-	  /// </summary>
-	  /// <param name="matchVersion"> currently does nothing </param>
-	  /// <param name="pattern">
-	  ///            a regular expression delimiting tokens </param>
-	  /// <param name="toLowerCase">
-	  ///            if <code>true</code> returns tokens after applying
-	  ///            String.toLowerCase() </param>
-	  /// <param name="stopWords">
-	  ///            if non-null, ignores all tokens that are contained in the
-	  ///            given stop set (after previously having applied toLowerCase()
-	  ///            if applicable). For example, created via
-	  ///            <seealso cref="StopFilter#makeStopSet(Version, String[])"/>and/or
-	  ///            <seealso cref="WordlistLoader"/>as in
-	  ///            <code>WordlistLoader.getWordSet(new File("samples/fulltext/stopwords.txt")</code>
-	  ///            or <a href="http://www.unine.ch/info/clef/">other stop words
-	  ///            lists </a>. </param>
-	  public PatternAnalyzer(Version matchVersion, Pattern pattern, bool toLowerCase, CharArraySet stopWords)
-	  {
-		if (pattern == null)
-		{
-		  throw new System.ArgumentException("pattern must not be null");
-		}
-
-		if (eqPattern(NON_WORD_PATTERN, pattern))
-		{
-			pattern = NON_WORD_PATTERN;
-		}
-		else if (eqPattern(WHITESPACE_PATTERN, pattern))
-		{
-			pattern = WHITESPACE_PATTERN;
-		}
-
-		if (stopWords != null && stopWords.size() == 0)
-		{
-			stopWords = null;
-		}
-
-		this.pattern = pattern;
-		this.toLowerCase = toLowerCase;
-		this.stopWords = stopWords;
-		this.matchVersion = matchVersion;
-	  }
-
-	  /// <summary>
-	  /// Creates a token stream that tokenizes the given string into token terms
-	  /// (aka words).
-	  /// </summary>
-	  /// <param name="fieldName">
-	  ///            the name of the field to tokenize (currently ignored). </param>
-	  /// <param name="reader">
-	  ///            reader (e.g. charfilter) of the original text. can be null. </param>
-	  /// <param name="text">
-	  ///            the string to tokenize </param>
-	  /// <returns> a new token stream </returns>
-	  public TokenStreamComponents createComponents(string fieldName, Reader reader, string text)
-	  {
-		// Ideally the Analyzer superclass should have a method with the same signature, 
-		// with a default impl that simply delegates to the StringReader flavour. 
-		if (reader == null)
-		{
-		  reader = new FastStringReader(text);
-		}
-
-		if (pattern == NON_WORD_PATTERN) // fast path
-		{
-		  return new TokenStreamComponents(new FastStringTokenizer(reader, true, toLowerCase, stopWords));
-		} // fast path
-		else if (pattern == WHITESPACE_PATTERN)
-		{
-		  return new TokenStreamComponents(new FastStringTokenizer(reader, false, toLowerCase, stopWords));
-		}
-
-		Tokenizer tokenizer = new PatternTokenizer(reader, pattern, toLowerCase);
-		TokenStream result = (stopWords != null) ? new StopFilter(matchVersion, tokenizer, stopWords) : tokenizer;
-		return new TokenStreamComponents(tokenizer, result);
-	  }
-
-	  /// <summary>
-	  /// Creates a token stream that tokenizes all the text in the given Reader;
-	  /// This implementation forwards to <code>tokenStream(String, Reader, String)</code> and is
-	  /// less efficient than <code>tokenStream(String, Reader, String)</code>.
-	  /// </summary>
-	  /// <param name="fieldName">
-	  ///            the name of the field to tokenize (currently ignored). </param>
-	  /// <param name="reader">
-	  ///            the reader delivering the text </param>
-	  /// <returns> a new token stream </returns>
-	  public override TokenStreamComponents createComponents(string fieldName, Reader reader)
-	  {
-		return createComponents(fieldName, reader, null);
-	  }
-
-	  /// <summary>
-	  /// Indicates whether some other object is "equal to" this one.
-	  /// </summary>
-	  /// <param name="other">
-	  ///            the reference object with which to compare. </param>
-	  /// <returns> true if equal, false otherwise </returns>
-	  public override bool Equals(object other)
-	  {
-		if (this == other)
-		{
-			return true;
-		}
-		if (this == DEFAULT_ANALYZER && other == EXTENDED_ANALYZER)
-		{
-			return false;
-		}
-		if (other == DEFAULT_ANALYZER && this == EXTENDED_ANALYZER)
-		{
-			return false;
-		}
-
-		if (other is PatternAnalyzer)
-		{
-		  PatternAnalyzer p2 = (PatternAnalyzer) other;
-		  return toLowerCase == p2.toLowerCase && eqPattern(pattern, p2.pattern) && eq(stopWords, p2.stopWords);
-		}
-		return false;
-	  }
-
-	  /// <summary>
-	  /// Returns a hash code value for the object.
-	  /// </summary>
-	  /// <returns> the hash code. </returns>
-	  public override int GetHashCode()
-	  {
-		if (this == DEFAULT_ANALYZER) // fast path
-		{
-			return -1218418418;
-		}
-		if (this == EXTENDED_ANALYZER) // fast path
-		{
-			return 1303507063;
-		}
-
-		int h = 1;
-		h = 31 * h + pattern.pattern().GetHashCode();
-		h = 31 * h + pattern.flags();
-		h = 31 * h + (toLowerCase ? 1231 : 1237);
-		h = 31 * h + (stopWords != null ? stopWords.GetHashCode() : 0);
-		return h;
-	  }
-
-	  /// <summary>
-	  /// equality where o1 and/or o2 can be null </summary>
-	  private static bool eq(object o1, object o2)
-	  {
-		return (o1 == o2) || (o1 != null ? o1.Equals(o2) : false);
-	  }
-
-	  /// <summary>
-	  /// assumes p1 and p2 are not null </summary>
-	  private static bool eqPattern(Pattern p1, Pattern p2)
-	  {
-		return p1 == p2 || (p1.flags() == p2.flags() && p1.pattern().Equals(p2.pattern()));
-	  }
-
-	  /// <summary>
-	  /// Reads until end-of-stream and returns all read chars, finally closes the stream.
-	  /// </summary>
-	  /// <param name="input"> the input stream </param>
-	  /// <exception cref="IOException"> if an I/O error occurs while reading the stream </exception>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: private static String toString(java.io.Reader input) throws java.io.IOException
-	  private static string ToString(Reader input)
-	  {
-		if (input is FastStringReader) // fast path
-		{
-		  return ((FastStringReader) input).String;
-		}
-
-		try
-		{
-		  int len = 256;
-		  char[] buffer = new char[len];
-		  char[] output = new char[len];
-
-		  len = 0;
-		  int n;
-		  while ((n = input.read(buffer)) >= 0)
-		  {
-			if (len + n > output.Length) // grow capacity
-			{
-			  char[] tmp = new char[Math.Max(output.Length << 1, len + n)];
-			  Array.Copy(output, 0, tmp, 0, len);
-			  Array.Copy(buffer, 0, tmp, len, n);
-			  buffer = output; // use larger buffer for future larger bulk reads
-			  output = tmp;
-			}
-			else
-			{
-			  Array.Copy(buffer, 0, output, len, n);
-			}
-			len += n;
-		  }
-
-		  return new string(output, 0, len);
-		}
-		finally
-		{
-		  input.close();
-		}
-	  }
-
-
-	  ///////////////////////////////////////////////////////////////////////////////
-	  // Nested classes:
-	  ///////////////////////////////////////////////////////////////////////////////
-	  /// <summary>
-	  /// The work horse; performance isn't fantastic, but it's not nearly as bad
-	  /// as one might think - kudos to the Sun regex developers.
-	  /// </summary>
-	  private sealed class PatternTokenizer : Tokenizer
-	  {
-
-		internal readonly Pattern pattern;
-		internal string str;
-		internal readonly bool toLowerCase;
-		internal Matcher matcher;
-		internal int pos = 0;
-		internal bool initialized = false;
-		internal static readonly Locale locale = Locale.Default;
-		internal readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
-		internal readonly OffsetAttribute offsetAtt = addAttribute(typeof(OffsetAttribute));
-
-		public PatternTokenizer(Reader input, Pattern pattern, bool toLowerCase) : base(input)
-		{
-		  this.pattern = pattern;
-		  this.matcher = pattern.matcher("");
-		  this.toLowerCase = toLowerCase;
-		}
-
-		public override bool incrementToken()
-		{
-		  if (!initialized)
-		  {
-			throw new System.InvalidOperationException("Consumer did not call reset().");
-		  }
-		  if (matcher == null)
-		  {
-			  return false;
-		  }
-		  clearAttributes();
-		  while (true) // loop takes care of leading and trailing boundary cases
-		  {
-			int start = pos;
-			int end_Renamed;
-			bool isMatch = matcher.find();
-			if (isMatch)
-			{
-			  end_Renamed = matcher.start();
-			  pos = matcher.end();
-			}
-			else
-			{
-			  end_Renamed = str.Length;
-			  matcher = null; // we're finished
-			}
-
-			if (start != end_Renamed) // non-empty match (header/trailer)
-			{
-			  string text = str.Substring(start, end_Renamed - start);
-			  if (toLowerCase)
-			  {
-				  text = text.ToLower(locale);
-			  }
-			  termAtt.setEmpty().append(text);
-			  offsetAtt.setOffset(correctOffset(start), correctOffset(end_Renamed));
-			  return true;
-			}
-			if (!isMatch)
-			{
-				return false;
-			}
-		  }
-		}
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public final void end() throws java.io.IOException
-		public override void end()
-		{
-		  base.end();
-		  // set final offset
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final int finalOffset = correctOffset(str.length());
-		  int finalOffset = correctOffset(str.Length);
-		  this.offsetAtt.setOffset(finalOffset, finalOffset);
-		}
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void close() throws java.io.IOException
-		public override void close()
-		{
-		  base.close();
-		  this.initialized = false;
-		}
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
-		public override void reset()
-		{
-		  base.reset();
-		  this.str = PatternAnalyzer.ToString(input);
-		  this.matcher = pattern.matcher(this.str);
-		  this.pos = 0;
-		  this.initialized = true;
-		}
-	  }
-
-
-	  ///////////////////////////////////////////////////////////////////////////////
-	  // Nested classes:
-	  ///////////////////////////////////////////////////////////////////////////////
-	  /// <summary>
-	  /// Special-case class for best performance in common cases; this class is
-	  /// otherwise unnecessary.
-	  /// </summary>
-	  private sealed class FastStringTokenizer : Tokenizer
-	  {
-
-		internal string str;
-		internal int pos;
-		internal readonly bool isLetter;
-		internal readonly bool toLowerCase;
-		internal readonly CharArraySet stopWords;
-		internal static readonly Locale locale = Locale.Default;
-		internal readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
-		internal readonly OffsetAttribute offsetAtt = addAttribute(typeof(OffsetAttribute));
-
-		public FastStringTokenizer(Reader input, bool isLetter, bool toLowerCase, CharArraySet stopWords) : base(input)
-		{
-		  this.isLetter = isLetter;
-		  this.toLowerCase = toLowerCase;
-		  this.stopWords = stopWords;
-		}
-
-		public override bool incrementToken()
-		{
-		  if (str == null)
-		  {
-			throw new System.InvalidOperationException("Consumer did not call reset().");
-		  }
-		  clearAttributes();
-		  // cache loop instance vars (performance)
-		  string s = str;
-		  int len = s.Length;
-		  int i = pos;
-		  bool letter = isLetter;
-
-		  int start = 0;
-		  string text;
-		  do
-		  {
-			// find beginning of token
-			text = null;
-			while (i < len && !isTokenChar(s[i], letter))
-			{
-			  i++;
-			}
-
-			if (i < len) // found beginning; now find end of token
-			{
-			  start = i;
-			  while (i < len && isTokenChar(s[i], letter))
-			  {
-				i++;
-			  }
-
-			  text = s.Substring(start, i - start);
-			  if (toLowerCase)
-			  {
-				  text = text.ToLower(locale);
-			  }
-	//          if (toLowerCase) {            
-	////            use next line once JDK 1.5 String.toLowerCase() performance regression is fixed
-	////            see http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6265809
-	//            text = s.substring(start, i).toLowerCase(); 
-	////            char[] chars = new char[i-start];
-	////            for (int j=start; j < i; j++) chars[j-start] = Character.toLowerCase(s.charAt(j));
-	////            text = new String(chars);
-	//          } else {
-	//            text = s.substring(start, i);
-	//          }
-			}
-		  } while (text != null && isStopWord(text));
-
-		  pos = i;
-		  if (text == null)
-		  {
-			return false;
-		  }
-		  termAtt.setEmpty().append(text);
-		  offsetAtt.setOffset(correctOffset(start), correctOffset(i));
-		  return true;
-		}
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public final void end() throws java.io.IOException
-		public override void end()
-		{
-		  base.end();
-		  // set final offset
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final int finalOffset = str.length();
-		  int finalOffset = str.Length;
-		  this.offsetAtt.setOffset(correctOffset(finalOffset), correctOffset(finalOffset));
-		}
-
-		internal bool isTokenChar(char c, bool isLetter)
-		{
-		  return isLetter ? char.IsLetter(c) :!char.IsWhiteSpace(c);
-		}
-
-		internal bool isStopWord(string text)
-		{
-		  return stopWords != null && stopWords.contains(text);
-		}
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void close() throws java.io.IOException
-		public override void close()
-		{
-		  base.close();
-		  this.str = null;
-		}
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
-		public override void reset()
-		{
-		  base.reset();
-		  this.str = PatternAnalyzer.ToString(input);
-		  this.pos = 0;
-		}
-	  }
-
-
-	  ///////////////////////////////////////////////////////////////////////////////
-	  // Nested classes:
-	  ///////////////////////////////////////////////////////////////////////////////
-	  /// <summary>
-	  /// A StringReader that exposes it's contained string for fast direct access.
-	  /// Might make sense to generalize this to CharSequence and make it public?
-	  /// </summary>
-	  internal sealed class FastStringReader : StringReader
-	  {
-
-		internal readonly string s;
-
-		internal FastStringReader(string s) : base(s)
-		{
-		  this.s = s;
-		}
-
-		internal string String
-		{
-			get
-			{
-			  return s;
-			}
-		}
-	  }
-
-	}
-
+    /// Efficient Lucene analyzer/tokenizer that preferably operates on a String rather than a
+    /// <seealso cref="TextReader"/>, that can flexibly separate text into terms via a regular expression <seealso cref="Pattern"/>
+    /// (with behaviour identical to <seealso cref="String#split(String)"/>),
+    /// and that combines the functionality of
+    /// <seealso cref="LetterTokenizer"/>,
+    /// <seealso cref="LowerCaseTokenizer"/>,
+    /// <seealso cref="WhitespaceTokenizer"/>,
+    /// <seealso cref="StopFilter"/> into a single efficient
+    /// multi-purpose class.
+    /// <para>
+    /// If you are unsure how exactly a regular expression should look like, consider 
+    /// prototyping by simply trying various expressions on some test texts via
+    /// <seealso cref="String#split(String)"/>. Once you are satisfied, give that regex to 
+    /// PatternAnalyzer. Also see <a target="_blank" 
+    /// href="http://java.sun.com/docs/books/tutorial/extra/regex/">Java Regular Expression Tutorial</a>.
+    /// </para>
+    /// <para>
+    /// This class can be considerably faster than the "normal" Lucene tokenizers. 
+    /// It can also serve as a building block in a compound Lucene
+    /// <seealso cref="TokenFilter"/> chain. For example as in this 
+    /// stemming example:
+    /// <pre>
+    /// PatternAnalyzer pat = ...
+    /// TokenStream tokenStream = new SnowballFilter(
+    ///     pat.tokenStream("content", "James is running round in the woods"), 
+    ///     "English"));
+    /// </pre>
+    /// </para>
+    /// </summary>
+    /// @deprecated (4.0) use the pattern-based analysis in the analysis/pattern package instead. 
+    [Obsolete("(4.0) use the pattern-based analysis in the analysis/pattern package instead.")]
+    public sealed class PatternAnalyzer : Analyzer
+    {
+
+        /// <summary>
+        /// <code>"\\W+"</code>; Divides text at non-letters (NOT Character.isLetter(c)) </summary>
+        public static readonly Pattern NON_WORD_PATTERN = Pattern.compile("\\W+");
+
+        /// <summary>
+        /// <code>"\\s+"</code>; Divides text at whitespaces (Character.isWhitespace(c)) </summary>
+        public static readonly Pattern WHITESPACE_PATTERN = Pattern.compile("\\s+");
+
+        private static readonly CharArraySet EXTENDED_ENGLISH_STOP_WORDS = CharArraySet.unmodifiableSet(new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList("a", "about", "above", "across", "adj", "after", "afterwards", "again", "against", "albeit", "all", "almost", "alone", "along", "already", "also", "although", "always", "among", "amongst", "an", "and", "another", "any", "anyhow", "anyone", "anything", "anywhere", "are", "around", "as", "at", "be", "became", "because", "become", "becomes", "becoming", "been", "before", "beforehand", "behind", "being", "below", "beside", "besides", "between", "beyond", "both", "but", "by", "can", "cannot", "co", "could", "down", "during", "each", "eg", "either", "else", "elsewhere", "enough", "etc", "even", "ever", "every", "everyone", "everything", "everywhere", "except", "few", "first", "for", "former", "formerly", "from", "further", "had", "has", "have", "he", "hence", "her", "here", "hereafter", "hereby", "herein", "hereupon", "hers", "herse
 lf", "him", "himself", "his", "how", "however", "i", "ie", "if", "in", "inc", "indeed", "into", "is", "it", "its", "itself", "last", "latter", "latterly", "least", "less", "ltd", "many", "may", "me", "meanwhile", "might", "more", "moreover", "most", "mostly", "much", "must", "my", "myself", "namely", "neither", "never", "nevertheless", "next", "no", "nobody", "none", "noone", "nor", "not", "nothing", "now", "nowhere", "of", "off", "often", "on", "once one", "only", "onto", "or", "other", "others", "otherwise", "our", "ours", "ourselves", "out", "over", "own", "per", "perhaps", "rather", "s", "same", "seem", "seemed", "seeming", "seems", "several", "she", "should", "since", "so", "some", "somehow", "someone", "something", "sometime", "sometimes", "somewhere", "still", "such", "t", "than", "that", "the", "their", "them", "themselves", "then", "thence", "there", "thereafter", "thereby", "therefor", "therein", "thereupon", "these", "they", "this", "those", "though", "through", "througho
 ut", "thru", "thus", "to", "together", "too", "toward", "towards", "under", "until", "up", "upon", "us", "very", "via", "was", "we", "well", "were", "what", "whatever", "whatsoever", "when", "whence", "whenever", "whensoever", "where", "whereafter", "whereas", "whereat", "whereby", "wherefrom", "wherein", "whereinto", "whereof", "whereon", "whereto", "whereunto", "whereupon", "wherever", "wherewith", "whether", "which", "whichever", "whichsoever", "while", "whilst", "whither", "who", "whoever", "whole", "whom", "whomever", "whomsoever", "whose", "whosoever", "why", "will", "with", "within", "without", "would", "xsubj", "xcal", "xauthor", "xother ", "xnote", "yet", "you", "your", "yours", "yourself", "yourselves"), true));
+
+        /// <summary>
+        /// A lower-casing word analyzer with English stop words (can be shared
+        /// freely across threads without harm); global per class loader.
+        /// </summary>
+        public static readonly PatternAnalyzer DEFAULT_ANALYZER = new PatternAnalyzer(Version.LUCENE_CURRENT, NON_WORD_PATTERN, true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
+
+        /// <summary>
+        /// A lower-casing word analyzer with <b>extended </b> English stop words
+        /// (can be shared freely across threads without harm); global per class
+        /// loader. The stop words are borrowed from
+        /// http://thomas.loc.gov/home/stopwords.html, see
+        /// http://thomas.loc.gov/home/all.about.inquery.html
+        /// </summary>
+        public static readonly PatternAnalyzer EXTENDED_ANALYZER = new PatternAnalyzer(Version.LUCENE_CURRENT, NON_WORD_PATTERN, true, EXTENDED_ENGLISH_STOP_WORDS);
+
+        private readonly Pattern pattern;
+        private readonly bool toLowerCase;
+        private readonly CharArraySet stopWords;
+
+        private readonly Version matchVersion;
+
+        /// <summary>
+        /// Constructs a new instance with the given parameters.
+        /// </summary>
+        /// <param name="matchVersion"> currently does nothing </param>
+        /// <param name="pattern">
+        ///            a regular expression delimiting tokens </param>
+        /// <param name="toLowerCase">
+        ///            if <code>true</code> returns tokens after applying
+        ///            String.toLowerCase() </param>
+        /// <param name="stopWords">
+        ///            if non-null, ignores all tokens that are contained in the
+        ///            given stop set (after previously having applied toLowerCase()
+        ///            if applicable). For example, created via
+        ///            <seealso cref="StopFilter#makeStopSet(Version, String[])"/>and/or
+        ///            <seealso cref="WordlistLoader"/>as in
+        ///            <code>WordlistLoader.getWordSet(new File("samples/fulltext/stopwords.txt")</code>
+        ///            or <a href="http://www.unine.ch/info/clef/">other stop words
+        ///            lists </a>. </param>
+        public PatternAnalyzer(Version matchVersion, Pattern pattern, bool toLowerCase, CharArraySet stopWords)
+        {
+            if (pattern == null)
+            {
+                throw new System.ArgumentException("pattern must not be null");
+            }
+
+            if (eqPattern(NON_WORD_PATTERN, pattern))
+            {
+                pattern = NON_WORD_PATTERN;
+            }
+            else if (eqPattern(WHITESPACE_PATTERN, pattern))
+            {
+                pattern = WHITESPACE_PATTERN;
+            }
+
+            if (stopWords != null && stopWords.Size == 0)
+            {
+                stopWords = null;
+            }
+
+            this.pattern = pattern;
+            this.toLowerCase = toLowerCase;
+            this.stopWords = stopWords;
+            this.matchVersion = matchVersion;
+        }
+
+        /// <summary>
+        /// Creates a token stream that tokenizes the given string into token terms
+        /// (aka words).
+        /// </summary>
+        /// <param name="fieldName">
+        ///            the name of the field to tokenize (currently ignored). </param>
+        /// <param name="reader">
+        ///            reader (e.g. charfilter) of the original text. can be null. </param>
+        /// <param name="text">
+        ///            the string to tokenize </param>
+        /// <returns> a new token stream </returns>
+        public TokenStreamComponents createComponents(string fieldName, TextReader reader, string text)
+        {
+            // Ideally the Analyzer superclass should have a method with the same signature, 
+            // with a default impl that simply delegates to the StringReader flavour. 
+            if (reader == null)
+            {
+                reader = new FastStringReader(text);
+            }
+
+            if (pattern == NON_WORD_PATTERN) // fast path
+            {
+                return new TokenStreamComponents(new FastStringTokenizer(reader, true, toLowerCase, stopWords));
+            } // fast path
+            else if (pattern == WHITESPACE_PATTERN)
+            {
+                return new TokenStreamComponents(new FastStringTokenizer(reader, false, toLowerCase, stopWords));
+            }
+
+            Tokenizer tokenizer = new PatternTokenizer(reader, pattern, toLowerCase);
+            TokenStream result = (stopWords != null) ? new StopFilter(matchVersion, tokenizer, stopWords) : tokenizer;
+            return new TokenStreamComponents(tokenizer, result);
+        }
+
+        /// <summary>
+        /// Creates a token stream that tokenizes all the text in the given Reader;
+        /// This implementation forwards to <code>tokenStream(String, Reader, String)</code> and is
+        /// less efficient than <code>tokenStream(String, Reader, String)</code>.
+        /// </summary>
+        /// <param name="fieldName">
+        ///            the name of the field to tokenize (currently ignored). </param>
+        /// <param name="reader">
+        ///            the reader delivering the text </param>
+        /// <returns> a new token stream </returns>
+        public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
+        {
+            return createComponents(fieldName, reader, null);
+        }
+
+        /// <summary>
+        /// Indicates whether some other object is "equal to" this one.
+        /// </summary>
+        /// <param name="other">
+        ///            the reference object with which to compare. </param>
+        /// <returns> true if equal, false otherwise </returns>
+        public override bool Equals(object other)
+        {
+            if (this == other)
+            {
+                return true;
+            }
+            if (this == DEFAULT_ANALYZER && other == EXTENDED_ANALYZER)
+            {
+                return false;
+            }
+            if (other == DEFAULT_ANALYZER && this == EXTENDED_ANALYZER)
+            {
+                return false;
+            }
+
+            var p2 = other as PatternAnalyzer;
+            if (p2 != null)
+            {
+                return toLowerCase == p2.toLowerCase && eqPattern(pattern, p2.pattern) && eq(stopWords, p2.stopWords);
+            }
+            return false;
+        }
+
+        /// <summary>
+        /// Returns a hash code value for the object.
+        /// </summary>
+        /// <returns> the hash code. </returns>
+        public override int GetHashCode()
+        {
+            if (this == DEFAULT_ANALYZER) // fast path
+            {
+                return -1218418418;
+            }
+            if (this == EXTENDED_ANALYZER) // fast path
+            {
+                return 1303507063;
+            }
+
+            int h = 1;
+            h = 31 * h + pattern.pattern().GetHashCode();
+            h = 31 * h + pattern.flags();
+            h = 31 * h + (toLowerCase ? 1231 : 1237);
+            h = 31 * h + (stopWords != null ? stopWords.GetHashCode() : 0);
+            return h;
+        }
+
+        /// <summary>
+        /// equality where o1 and/or o2 can be null </summary>
+        private static bool eq(object o1, object o2)
+        {
+            return (o1 == o2) || (o1 != null ? o1.Equals(o2) : false);
+        }
+
+        /// <summary>
+        /// assumes p1 and p2 are not null </summary>
+        private static bool eqPattern(Pattern p1, Pattern p2)
+        {
+            return p1 == p2 || (p1.flags() == p2.flags() && p1.pattern().Equals(p2.pattern()));
+        }
+
+        /// <summary>
+        /// Reads until end-of-stream and returns all read chars, finally closes the stream.
+        /// </summary>
+        /// <param name="input"> the input stream </param>
+        /// <exception cref="IOException"> if an I/O error occurs while reading the stream </exception>
+        private static string ToString(TextReader input)
+        {
+            var reader = input as FastStringReader;
+            if (reader != null) // fast path
+            {
+                return reader.String;
+            }
+
+            try
+            {
+                int len = 256;
+                char[] buffer = new char[len];
+                char[] output = new char[len];
+
+                len = 0;
+                int n;
+                while ((n = input.Read(buffer)) >= 0)
+                {
+                    if (len + n > output.Length) // grow capacity
+                    {
+                        char[] tmp = new char[Math.Max(output.Length << 1, len + n)];
+                        Array.Copy(output, 0, tmp, 0, len);
+                        Array.Copy(buffer, 0, tmp, len, n);
+                        buffer = output; // use larger buffer for future larger bulk reads
+                        output = tmp;
+                    }
+                    else
+                    {
+                        Array.Copy(buffer, 0, output, len, n);
+                    }
+                    len += n;
+                }
+
+                return new string(output, 0, len);
+            }
+            finally
+            {
+                input.Dispose();
+            }
+        }
+
+
+        ///////////////////////////////////////////////////////////////////////////////
+        // Nested classes:
+        ///////////////////////////////////////////////////////////////////////////////
+        /// <summary>
+        /// The work horse; performance isn't fantastic, but it's not nearly as bad
+        /// as one might think - kudos to the Sun regex developers.
+        /// </summary>
+        private sealed class PatternTokenizer : Tokenizer
+        {
+            private readonly Pattern pattern;
+            private string str;
+            private readonly bool toLowerCase;
+            private Matcher matcher;
+            private int pos = 0;
+            private bool initialized = false;
+            private static readonly Locale locale = Locale.Default;
+            private readonly ICharTermAttribute termAtt;
+            private readonly IOffsetAttribute offsetAtt;
+
+            public PatternTokenizer(TextReader input, Pattern pattern, bool toLowerCase)
+                : base(input)
+            {
+                termAtt = AddAttribute<ICharTermAttribute>();
+                offsetAtt = AddAttribute<IOffsetAttribute>();
+                this.pattern = pattern;
+                this.matcher = pattern.matcher("");
+                this.toLowerCase = toLowerCase;
+            }
+
+            public override bool IncrementToken()
+            {
+                if (!initialized)
+                {
+                    throw new System.InvalidOperationException("Consumer did not call reset().");
+                }
+                if (matcher == null)
+                {
+                    return false;
+                }
+                ClearAttributes();
+                while (true) // loop takes care of leading and trailing boundary cases
+                {
+                    int start = pos;
+                    int end_Renamed;
+                    bool isMatch = matcher.find();
+                    if (isMatch)
+                    {
+                        end_Renamed = matcher.start();
+                        pos = matcher.end();
+                    }
+                    else
+                    {
+                        end_Renamed = str.Length;
+                        matcher = null; // we're finished
+                    }
+
+                    if (start != end_Renamed) // non-empty match (header/trailer)
+                    {
+                        string text = str.Substring(start, end_Renamed - start);
+                        if (toLowerCase)
+                        {
+                            text = text.ToLower(locale);
+                        }
+                        termAtt.SetEmpty().Append(text);
+                        offsetAtt.SetOffset(CorrectOffset(start), CorrectOffset(end_Renamed));
+                        return true;
+                    }
+                    if (!isMatch)
+                    {
+                        return false;
+                    }
+                }
+            }
+
+            public override void End()
+            {
+                base.End();
+                // set final offset
+                int finalOffset = CorrectOffset(str.Length);
+                this.offsetAtt.SetOffset(finalOffset, finalOffset);
+            }
+
+            public override void Dispose()
+            {
+                base.Dispose();
+                this.initialized = false;
+            }
+
+            public override void Reset()
+            {
+                base.Reset();
+                this.str = PatternAnalyzer.ToString(input);
+                this.matcher = pattern.matcher(this.str);
+                this.pos = 0;
+                this.initialized = true;
+            }
+        }
+
+
+        ///////////////////////////////////////////////////////////////////////////////
+        // Nested classes:
+        ///////////////////////////////////////////////////////////////////////////////
+        /// <summary>
+        /// Special-case class for best performance in common cases; this class is
+        /// otherwise unnecessary.
+        /// </summary>
+        private sealed class FastStringTokenizer : Tokenizer
+        {
+            private string str;
+            private int pos;
+            private readonly bool isLetter;
+            private readonly bool toLowerCase;
+            private readonly CharArraySet stopWords;
+            private static readonly Locale locale = Locale.Default;
+            private readonly ICharTermAttribute termAtt;
+            private readonly IOffsetAttribute offsetAtt;
+
+            public FastStringTokenizer(TextReader input, bool isLetter, bool toLowerCase, CharArraySet stopWords)
+                : base(input)
+            {
+                termAtt = AddAttribute<ICharTermAttribute>();
+                offsetAtt = AddAttribute<IOffsetAttribute>();
+
+                this.isLetter = isLetter;
+                this.toLowerCase = toLowerCase;
+                this.stopWords = stopWords;
+            }
+
+            public override bool IncrementToken()
+            {
+                if (str == null)
+                {
+                    throw new System.InvalidOperationException("Consumer did not call reset().");
+                }
+                ClearAttributes();
+                // cache loop instance vars (performance)
+                string s = str;
+                int len = s.Length;
+                int i = pos;
+                bool letter = isLetter;
+
+                int start = 0;
+                string text;
+                do
+                {
+                    // find beginning of token
+                    text = null;
+                    while (i < len && !isTokenChar(s[i], letter))
+                    {
+                        i++;
+                    }
+
+                    if (i < len) // found beginning; now find end of token
+                    {
+                        start = i;
+                        while (i < len && isTokenChar(s[i], letter))
+                        {
+                            i++;
+                        }
+
+                        text = s.Substring(start, i - start);
+                        if (toLowerCase)
+                        {
+                            text = text.ToLower(locale);
+                        }
+                        //          if (toLowerCase) {            
+                        ////            use next line once JDK 1.5 String.toLowerCase() performance regression is fixed
+                        ////            see http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6265809
+                        //            text = s.substring(start, i).toLowerCase(); 
+                        ////            char[] chars = new char[i-start];
+                        ////            for (int j=start; j < i; j++) chars[j-start] = Character.toLowerCase(s.charAt(j));
+                        ////            text = new String(chars);
+                        //          } else {
+                        //            text = s.substring(start, i);
+                        //          }
+                    }
+                } while (text != null && isStopWord(text));
+
+                pos = i;
+                if (text == null)
+                {
+                    return false;
+                }
+                termAtt.SetEmpty().Append(text);
+                offsetAtt.SetOffset(CorrectOffset(start), CorrectOffset(i));
+                return true;
+            }
+
+            public override void End()
+            {
+                base.End();
+                // set final offset
+                int finalOffset = str.Length;
+                this.offsetAtt.SetOffset(CorrectOffset(finalOffset), CorrectOffset(finalOffset));
+            }
+
+            private bool isTokenChar(char c, bool isLetter)
+            {
+                return isLetter ? char.IsLetter(c) : !char.IsWhiteSpace(c);
+            }
+
+            private bool isStopWord(string text)
+            {
+                return stopWords != null && stopWords.Contains(text);
+            }
+
+            public override void Dispose()
+            {
+                base.Dispose();
+                this.str = null;
+            }
+
+            public override void Reset()
+            {
+                base.Reset();
+                this.str = PatternAnalyzer.ToString(input);
+                this.pos = 0;
+            }
+        }
+
+
+        ///////////////////////////////////////////////////////////////////////////////
+        // Nested classes:
+        ///////////////////////////////////////////////////////////////////////////////
+        /// <summary>
+        /// A StringReader that exposes it's contained string for fast direct access.
+        /// Might make sense to generalize this to CharSequence and make it public?
+        /// </summary>
+        internal sealed class FastStringReader : StringReader
+        {
+
+            internal readonly string s;
+
+            internal FastStringReader(string s)
+                : base(s)
+            {
+                this.s = s;
+            }
+
+            internal string String
+            {
+                get
+                {
+                    return s;
+                }
+            }
+        }
+
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/69f29113/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternKeywordMarkerFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternKeywordMarkerFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternKeywordMarkerFilter.cs
index 3886da0..56a2331 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternKeywordMarkerFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternKeywordMarkerFilter.cs
@@ -1,62 +1,60 @@
-using Lucene.Net.Analysis.Miscellaneous;
+using Lucene.Net.Analysis.Tokenattributes;
 
-namespace org.apache.lucene.analysis.miscellaneous
+namespace Lucene.Net.Analysis.Miscellaneous
 {
 
-	/*
-	 * Licensed to the Apache Software Foundation (ASF) under one or more
-	 * contributor license agreements.  See the NOTICE file distributed with
-	 * this work for additional information regarding copyright ownership.
-	 * The ASF licenses this file to You under the Apache License, Version 2.0
-	 * (the "License"); you may not use this file except in compliance with
-	 * the License.  You may obtain a copy of the License at
-	 *
-	 *     http://www.apache.org/licenses/LICENSE-2.0
-	 *
-	 * Unless required by applicable law or agreed to in writing, software
-	 * distributed under the License is distributed on an "AS IS" BASIS,
-	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-	 * See the License for the specific language governing permissions and
-	 * limitations under the License.
-	 */
-
-	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
-
-	/// <summary>
-	/// Marks terms as keywords via the <seealso cref="KeywordAttribute"/>. Each token
-	/// that matches the provided pattern is marked as a keyword by setting
-	/// <seealso cref="KeywordAttribute#setKeyword(boolean)"/> to <code>true</code>.
-	/// </summary>
-	public sealed class PatternKeywordMarkerFilter : KeywordMarkerFilter
-	{
-	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
-	  private readonly Matcher matcher;
-
-	  /// <summary>
-	  /// Create a new <seealso cref="PatternKeywordMarkerFilter"/>, that marks the current
-	  /// token as a keyword if the tokens term buffer matches the provided
-	  /// <seealso cref="Pattern"/> via the <seealso cref="KeywordAttribute"/>.
-	  /// </summary>
-	  /// <param name="in">
-	  ///          TokenStream to filter </param>
-	  /// <param name="pattern">
-	  ///          the pattern to apply to the incoming term buffer
-	  ///  </param>
-	  public PatternKeywordMarkerFilter(TokenStream @in, Pattern pattern) : base(@in)
-	  {
-		this.matcher = pattern.matcher("");
-	  }
-
-	  protected internal override bool Keyword
-	  {
-		  get
-		  {
-			matcher.reset(termAtt);
-			return matcher.matches();
-		  }
-	  }
-
-	}
-
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+    /// <summary>
+    /// Marks terms as keywords via the <seealso cref="KeywordAttribute"/>. Each token
+    /// that matches the provided pattern is marked as a keyword by setting
+    /// <seealso cref="KeywordAttribute#setKeyword(boolean)"/> to <code>true</code>.
+    /// </summary>
+    public sealed class PatternKeywordMarkerFilter : KeywordMarkerFilter
+    {
+        private readonly ICharTermAttribute termAtt;
+        private readonly Matcher matcher;
+
+        /// <summary>
+        /// Create a new <seealso cref="PatternKeywordMarkerFilter"/>, that marks the current
+        /// token as a keyword if the tokens term buffer matches the provided
+        /// <seealso cref="Pattern"/> via the <seealso cref="KeywordAttribute"/>.
+        /// </summary>
+        /// <param name="in">
+        ///          TokenStream to filter </param>
+        /// <param name="pattern">
+        ///          the pattern to apply to the incoming term buffer
+        ///  </param>
+        public PatternKeywordMarkerFilter(TokenStream @in, Pattern pattern)
+            : base(@in)
+        {
+            termAtt = AddAttribute<ICharTermAttribute>();
+
+            this.matcher = pattern.matcher("");
+        }
+
+        protected internal override bool Keyword
+        {
+            get
+            {
+                matcher.Reset(termAtt);
+                return matcher.matches();
+            }
+        }
+
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/69f29113/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PerFieldAnalyzerWrapper.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PerFieldAnalyzerWrapper.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PerFieldAnalyzerWrapper.cs
index f61b230..52c8ded 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PerFieldAnalyzerWrapper.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PerFieldAnalyzerWrapper.cs
@@ -1,93 +1,94 @@
 using System.Collections.Generic;
 
-namespace org.apache.lucene.analysis.miscellaneous
+namespace Lucene.Net.Analysis.Miscellaneous
 {
 
-	/*
-	 * Licensed to the Apache Software Foundation (ASF) under one or more
-	 * contributor license agreements.  See the NOTICE file distributed with
-	 * this work for additional information regarding copyright ownership.
-	 * The ASF licenses this file to You under the Apache License, Version 2.0
-	 * (the "License"); you may not use this file except in compliance with
-	 * the License.  You may obtain a copy of the License at
-	 *
-	 *     http://www.apache.org/licenses/LICENSE-2.0
-	 *
-	 * Unless required by applicable law or agreed to in writing, software
-	 * distributed under the License is distributed on an "AS IS" BASIS,
-	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-	 * See the License for the specific language governing permissions and
-	 * limitations under the License.
-	 */
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
 
 
 
-	/// <summary>
-	/// This analyzer is used to facilitate scenarios where different
-	/// fields require different analysis techniques.  Use the Map
-	/// argument in <seealso cref="#PerFieldAnalyzerWrapper(Analyzer, java.util.Map)"/>
-	/// to add non-default analyzers for fields.
-	/// 
-	/// <para>Example usage:
-	/// 
-	/// <pre class="prettyprint">
-	/// {@code
-	/// Map<String,Analyzer> analyzerPerField = new HashMap<>();
-	/// analyzerPerField.put("firstname", new KeywordAnalyzer());
-	/// analyzerPerField.put("lastname", new KeywordAnalyzer());
-	/// 
-	/// PerFieldAnalyzerWrapper aWrapper =
-	///   new PerFieldAnalyzerWrapper(new StandardAnalyzer(version), analyzerPerField);
-	/// }
-	/// </pre>
-	/// 
-	/// </para>
-	/// <para>In this example, StandardAnalyzer will be used for all fields except "firstname"
-	/// and "lastname", for which KeywordAnalyzer will be used.
-	/// 
-	/// </para>
-	/// <para>A PerFieldAnalyzerWrapper can be used like any other analyzer, for both indexing
-	/// and query parsing.
-	/// </para>
-	/// </summary>
-	public sealed class PerFieldAnalyzerWrapper : AnalyzerWrapper
-	{
-	  private readonly Analyzer defaultAnalyzer;
-	  private readonly IDictionary<string, Analyzer> fieldAnalyzers;
+    /// <summary>
+    /// This analyzer is used to facilitate scenarios where different
+    /// fields require different analysis techniques.  Use the Map
+    /// argument in <seealso cref="#PerFieldAnalyzerWrapper(Analyzer, java.util.Map)"/>
+    /// to add non-default analyzers for fields.
+    /// 
+    /// <para>Example usage:
+    /// 
+    /// <pre class="prettyprint">
+    /// {@code
+    /// Map<String,Analyzer> analyzerPerField = new HashMap<>();
+    /// analyzerPerField.put("firstname", new KeywordAnalyzer());
+    /// analyzerPerField.put("lastname", new KeywordAnalyzer());
+    /// 
+    /// PerFieldAnalyzerWrapper aWrapper =
+    ///   new PerFieldAnalyzerWrapper(new StandardAnalyzer(version), analyzerPerField);
+    /// }
+    /// </pre>
+    /// 
+    /// </para>
+    /// <para>In this example, StandardAnalyzer will be used for all fields except "firstname"
+    /// and "lastname", for which KeywordAnalyzer will be used.
+    /// 
+    /// </para>
+    /// <para>A PerFieldAnalyzerWrapper can be used like any other analyzer, for both indexing
+    /// and query parsing.
+    /// </para>
+    /// </summary>
+    public sealed class PerFieldAnalyzerWrapper : AnalyzerWrapper
+    {
+        private readonly Analyzer defaultAnalyzer;
+        private readonly IDictionary<string, Analyzer> fieldAnalyzers;
 
-	  /// <summary>
-	  /// Constructs with default analyzer.
-	  /// </summary>
-	  /// <param name="defaultAnalyzer"> Any fields not specifically
-	  /// defined to use a different analyzer will use the one provided here. </param>
-	  public PerFieldAnalyzerWrapper(Analyzer defaultAnalyzer) : this(defaultAnalyzer, null)
-	  {
-	  }
+        /// <summary>
+        /// Constructs with default analyzer.
+        /// </summary>
+        /// <param name="defaultAnalyzer"> Any fields not specifically
+        /// defined to use a different analyzer will use the one provided here. </param>
+        public PerFieldAnalyzerWrapper(Analyzer defaultAnalyzer)
+            : this(defaultAnalyzer, null)
+        {
+        }
 
-	  /// <summary>
-	  /// Constructs with default analyzer and a map of analyzers to use for 
-	  /// specific fields.
-	  /// </summary>
-	  /// <param name="defaultAnalyzer"> Any fields not specifically
-	  /// defined to use a different analyzer will use the one provided here. </param>
-	  /// <param name="fieldAnalyzers"> a Map (String field name to the Analyzer) to be 
-	  /// used for those fields  </param>
-	  public PerFieldAnalyzerWrapper(Analyzer defaultAnalyzer, IDictionary<string, Analyzer> fieldAnalyzers) : base(PER_FIELD_REUSE_STRATEGY)
-	  {
-		this.defaultAnalyzer = defaultAnalyzer;
-		this.fieldAnalyzers = (fieldAnalyzers != null) ? fieldAnalyzers : System.Linq.Enumerable.Empty<string, Analyzer>();
-	  }
+        /// <summary>
+        /// Constructs with default analyzer and a map of analyzers to use for 
+        /// specific fields.
+        /// </summary>
+        /// <param name="defaultAnalyzer"> Any fields not specifically
+        /// defined to use a different analyzer will use the one provided here. </param>
+        /// <param name="fieldAnalyzers"> a Map (String field name to the Analyzer) to be 
+        /// used for those fields  </param>
+        public PerFieldAnalyzerWrapper(Analyzer defaultAnalyzer, IDictionary<string, Analyzer> fieldAnalyzers)
+            : base(PER_FIELD_REUSE_STRATEGY)
+        {
+            this.defaultAnalyzer = defaultAnalyzer;
+            this.fieldAnalyzers = (fieldAnalyzers != null) ? fieldAnalyzers : System.Linq.Enumerable.Empty<string, Analyzer>();
+        }
 
-	  protected internal override Analyzer getWrappedAnalyzer(string fieldName)
-	  {
-		Analyzer analyzer = fieldAnalyzers[fieldName];
-		return (analyzer != null) ? analyzer : defaultAnalyzer;
-	  }
-
-	  public override string ToString()
-	  {
-		return "PerFieldAnalyzerWrapper(" + fieldAnalyzers + ", default=" + defaultAnalyzer + ")";
-	  }
-	}
+        protected override Analyzer GetWrappedAnalyzer(string fieldName)
+        {
+            Analyzer analyzer = fieldAnalyzers[fieldName];
+            return analyzer ?? defaultAnalyzer;
+        }
 
+        public override string ToString()
+        {
+            return "PerFieldAnalyzerWrapper(" + fieldAnalyzers + ", default=" + defaultAnalyzer + ")";
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/69f29113/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PrefixAndSuffixAwareTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PrefixAndSuffixAwareTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PrefixAndSuffixAwareTokenFilter.cs
index de8b8ba..86f9548 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PrefixAndSuffixAwareTokenFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PrefixAndSuffixAwareTokenFilter.cs
@@ -1,4 +1,7 @@
-namespace org.apache.lucene.analysis.miscellaneous
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Miscellaneous;
+
+namespace org.apache.lucene.analysis.miscellaneous
 {
 
 	/*
@@ -28,7 +31,7 @@
 	public class PrefixAndSuffixAwareTokenFilter : TokenStream
 	{
 
-	  private PrefixAwareTokenFilter suffix;
+	  private readonly PrefixAwareTokenFilter suffix;
 
 	  public PrefixAndSuffixAwareTokenFilter(TokenStream prefix, TokenStream input, TokenStream suffix) : base(suffix)
 	  {
@@ -68,29 +71,24 @@
 
 	  public virtual Token updateInputToken(Token inputToken, Token lastPrefixToken)
 	  {
-		inputToken.setOffset(lastPrefixToken.endOffset() + inputToken.startOffset(), lastPrefixToken.endOffset() + inputToken.endOffset());
+		inputToken.SetOffset(lastPrefixToken.endOffset() + inputToken.startOffset(), lastPrefixToken.endOffset() + inputToken.endOffset());
 		return inputToken;
 	  }
 
 	  public virtual Token updateSuffixToken(Token suffixToken, Token lastInputToken)
 	  {
-		suffixToken.setOffset(lastInputToken.endOffset() + suffixToken.startOffset(), lastInputToken.endOffset() + suffixToken.endOffset());
+		suffixToken.SetOffset(lastInputToken.endOffset() + suffixToken.startOffset(), lastInputToken.endOffset() + suffixToken.endOffset());
 		return suffixToken;
 	  }
 
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
-	  public override bool incrementToken()
+	  public override bool IncrementToken()
 	  {
 		return suffix.incrementToken();
 	  }
 
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
-	  public override void reset()
+	  public override void Reset()
 	  {
-		suffix.reset();
+		suffix.Reset();
 	  }
 
 

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/69f29113/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PrefixAwareTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PrefixAwareTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PrefixAwareTokenFilter.cs
index 7835e7a..13fd361 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PrefixAwareTokenFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PrefixAwareTokenFilter.cs
@@ -1,246 +1,224 @@
-namespace org.apache.lucene.analysis.miscellaneous
-{
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Util;
 
-	/*
-	 * Licensed to the Apache Software Foundation (ASF) under one or more
-	 * contributor license agreements.  See the NOTICE file distributed with
-	 * this work for additional information regarding copyright ownership.
-	 * The ASF licenses this file to You under the Apache License, Version 2.0
-	 * (the "License"); you may not use this file except in compliance with
-	 * the License.  You may obtain a copy of the License at
-	 *
-	 *     http://www.apache.org/licenses/LICENSE-2.0
-	 *
-	 * Unless required by applicable law or agreed to in writing, software
-	 * distributed under the License is distributed on an "AS IS" BASIS,
-	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-	 * See the License for the specific language governing permissions and
-	 * limitations under the License.
-	 */
-
-	using FlagsAttribute = org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
-	using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-	using PayloadAttribute = org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
-	using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-	using TypeAttribute = org.apache.lucene.analysis.tokenattributes.TypeAttribute;
-	using BytesRef = org.apache.lucene.util.BytesRef;
-
-
-	/// <summary>
-	/// Joins two token streams and leaves the last token of the first stream available
-	/// to be used when updating the token values in the second stream based on that token.
-	/// 
-	/// The default implementation adds last prefix token end offset to the suffix token start and end offsets.
-	/// <p/>
-	/// <b>NOTE:</b> This filter might not behave correctly if used with custom Attributes, i.e. Attributes other than
-	/// the ones located in org.apache.lucene.analysis.tokenattributes. 
-	/// </summary>
-	public class PrefixAwareTokenFilter : TokenStream
-	{
-
-	  private TokenStream prefix;
-	  private TokenStream suffix;
-
-	  private CharTermAttribute termAtt;
-	  private PositionIncrementAttribute posIncrAtt;
-	  private PayloadAttribute payloadAtt;
-	  private OffsetAttribute offsetAtt;
-	  private TypeAttribute typeAtt;
-	  private FlagsAttribute flagsAtt;
-
-	  private CharTermAttribute p_termAtt;
-	  private PositionIncrementAttribute p_posIncrAtt;
-	  private PayloadAttribute p_payloadAtt;
-	  private OffsetAttribute p_offsetAtt;
-	  private TypeAttribute p_typeAtt;
-	  private FlagsAttribute p_flagsAtt;
-
-	  public PrefixAwareTokenFilter(TokenStream prefix, TokenStream suffix) : base(suffix)
-	  {
-		this.suffix = suffix;
-		this.prefix = prefix;
-		prefixExhausted = false;
-
-		termAtt = addAttribute(typeof(CharTermAttribute));
-		posIncrAtt = addAttribute(typeof(PositionIncrementAttribute));
-		payloadAtt = addAttribute(typeof(PayloadAttribute));
-		offsetAtt = addAttribute(typeof(OffsetAttribute));
-		typeAtt = addAttribute(typeof(TypeAttribute));
-		flagsAtt = addAttribute(typeof(FlagsAttribute));
-
-		p_termAtt = prefix.addAttribute(typeof(CharTermAttribute));
-		p_posIncrAtt = prefix.addAttribute(typeof(PositionIncrementAttribute));
-		p_payloadAtt = prefix.addAttribute(typeof(PayloadAttribute));
-		p_offsetAtt = prefix.addAttribute(typeof(OffsetAttribute));
-		p_typeAtt = prefix.addAttribute(typeof(TypeAttribute));
-		p_flagsAtt = prefix.addAttribute(typeof(FlagsAttribute));
-	  }
-
-	  private Token previousPrefixToken = new Token();
-	  private Token reusableToken = new Token();
-
-	  private bool prefixExhausted;
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
-	  public override bool incrementToken()
-	  {
-		if (!prefixExhausted)
-		{
-		  Token nextToken = getNextPrefixInputToken(reusableToken);
-		  if (nextToken == null)
-		  {
-			prefixExhausted = true;
-		  }
-		  else
-		  {
-			previousPrefixToken.reinit(nextToken);
-			// Make it a deep copy
-			BytesRef p = previousPrefixToken.Payload;
-			if (p != null)
-			{
-			  previousPrefixToken.Payload = p.clone();
-			}
-			CurrentToken = nextToken;
-			return true;
-		  }
-		}
-
-		Token nextToken = getNextSuffixInputToken(reusableToken);
-		if (nextToken == null)
-		{
-		  return false;
-		}
-
-		nextToken = updateSuffixToken(nextToken, previousPrefixToken);
-		CurrentToken = nextToken;
-		return true;
-	  }
-
-	  private Token CurrentToken
-	  {
-		  set
-		  {
-			if (value == null)
-			{
-				return;
-			}
-			clearAttributes();
-			termAtt.copyBuffer(value.buffer(), 0, value.length());
-			posIncrAtt.PositionIncrement = value.PositionIncrement;
-			flagsAtt.Flags = value.Flags;
-			offsetAtt.setOffset(value.startOffset(), value.endOffset());
-			typeAtt.Type = value.type();
-			payloadAtt.Payload = value.Payload;
-		  }
-	  }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: private org.apache.lucene.analysis.Token getNextPrefixInputToken(org.apache.lucene.analysis.Token token) throws java.io.IOException
-	  private Token getNextPrefixInputToken(Token token)
-	  {
-		if (!prefix.incrementToken())
-		{
-			return null;
-		}
-		token.copyBuffer(p_termAtt.buffer(), 0, p_termAtt.length());
-		token.PositionIncrement = p_posIncrAtt.PositionIncrement;
-		token.Flags = p_flagsAtt.Flags;
-		token.setOffset(p_offsetAtt.startOffset(), p_offsetAtt.endOffset());
-		token.Type = p_typeAtt.type();
-		token.Payload = p_payloadAtt.Payload;
-		return token;
-	  }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: private org.apache.lucene.analysis.Token getNextSuffixInputToken(org.apache.lucene.analysis.Token token) throws java.io.IOException
-	  private Token getNextSuffixInputToken(Token token)
-	  {
-		if (!suffix.incrementToken())
-		{
-			return null;
-		}
-		token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
-		token.PositionIncrement = posIncrAtt.PositionIncrement;
-		token.Flags = flagsAtt.Flags;
-		token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
-		token.Type = typeAtt.type();
-		token.Payload = payloadAtt.Payload;
-		return token;
-	  }
-
-	  /// <summary>
-	  /// The default implementation adds last prefix token end offset to the suffix token start and end offsets.
-	  /// </summary>
-	  /// <param name="suffixToken"> a token from the suffix stream </param>
-	  /// <param name="lastPrefixToken"> the last token from the prefix stream </param>
-	  /// <returns> consumer token </returns>
-	  public virtual Token updateSuffixToken(Token suffixToken, Token lastPrefixToken)
-	  {
-		suffixToken.setOffset(lastPrefixToken.endOffset() + suffixToken.startOffset(), lastPrefixToken.endOffset() + suffixToken.endOffset());
-		return suffixToken;
-	  }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void end() throws java.io.IOException
-	  public override void end()
-	  {
-		prefix.end();
-		suffix.end();
-	  }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void close() throws java.io.IOException
-	  public override void close()
-	  {
-		prefix.close();
-		suffix.close();
-	  }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
-	  public override void reset()
-	  {
-		base.reset();
-		if (prefix != null)
-		{
-		  prefixExhausted = false;
-		  prefix.reset();
-		}
-		if (suffix != null)
-		{
-		  suffix.reset();
-		}
-
-
-	  }
-
-	  public virtual TokenStream Prefix
-	  {
-		  get
-		  {
-			return prefix;
-		  }
-		  set
-		  {
-			this.prefix = value;
-		  }
-	  }
-
-
-	  public virtual TokenStream Suffix
-	  {
-		  get
-		  {
-			return suffix;
-		  }
-		  set
-		  {
-			this.suffix = value;
-		  }
-	  }
-
-	}
+namespace Lucene.Net.Analysis.Miscellaneous
+{
 
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+    /// <summary>
+    /// Joins two token streams and leaves the last token of the first stream available
+    /// to be used when updating the token values in the second stream based on that token.
+    /// 
+    /// The default implementation adds last prefix token end offset to the suffix token start and end offsets.
+    /// <p/>
+    /// <b>NOTE:</b> This filter might not behave correctly if used with custom Attributes, i.e. Attributes other than
+    /// the ones located in org.apache.lucene.analysis.tokenattributes. 
+    /// </summary>
+    public class PrefixAwareTokenFilter : TokenStream
+    {
+
+        private TokenStream prefix;
+        private TokenStream suffix;
+
+        private readonly ICharTermAttribute termAtt;
+        private readonly IPositionIncrementAttribute posIncrAtt;
+        private readonly IPayloadAttribute payloadAtt;
+        private readonly IOffsetAttribute offsetAtt;
+        private readonly ITypeAttribute typeAtt;
+        private readonly IFlagsAttribute flagsAtt;
+
+        private readonly ICharTermAttribute p_termAtt;
+        private readonly IPositionIncrementAttribute p_posIncrAtt;
+        private readonly IPayloadAttribute p_payloadAtt;
+        private readonly IOffsetAttribute p_offsetAtt;
+        private readonly ITypeAttribute p_typeAtt;
+        private readonly IFlagsAttribute p_flagsAtt;
+
+        public PrefixAwareTokenFilter(TokenStream prefix, TokenStream suffix)
+            : base(suffix)
+        {
+            this.suffix = suffix;
+            this.prefix = prefix;
+            prefixExhausted = false;
+
+            termAtt = AddAttribute<ICharTermAttribute>();
+            posIncrAtt = AddAttribute(typeof(PositionIncrementAttribute));
+            payloadAtt = AddAttribute(typeof(PayloadAttribute));
+            offsetAtt = AddAttribute(typeof(OffsetAttribute));
+            typeAtt = AddAttribute(typeof(TypeAttribute));
+            flagsAtt = AddAttribute(typeof(FlagsAttribute));
+
+            p_termAtt = prefix.AddAttribute(typeof(CharTermAttribute));
+            p_posIncrAtt = prefix.AddAttribute(typeof(PositionIncrementAttribute));
+            p_payloadAtt = prefix.AddAttribute(typeof(PayloadAttribute));
+            p_offsetAtt = prefix.AddAttribute(typeof(OffsetAttribute));
+            p_typeAtt = prefix.AddAttribute(typeof(TypeAttribute));
+            p_flagsAtt = prefix.AddAttribute(typeof(FlagsAttribute));
+        }
+
+        private readonly Token previousPrefixToken = new Token();
+        private readonly Token reusableToken = new Token();
+
+        private bool prefixExhausted;
+
+        public override bool IncrementToken()
+        {
+            if (!prefixExhausted)
+            {
+                Token nextToken = getNextPrefixInputToken(reusableToken);
+                if (nextToken == null)
+                {
+                    prefixExhausted = true;
+                }
+                else
+                {
+                    previousPrefixToken.Reinit(nextToken);
+                    // Make it a deep copy
+                    BytesRef p = previousPrefixToken.Payload;
+                    if (p != null)
+                    {
+                        previousPrefixToken.Payload = p.Clone();
+                    }
+                    CurrentToken = nextToken;
+                    return true;
+                }
+            }
+
+            Token nextToken = getNextSuffixInputToken(reusableToken);
+            if (nextToken == null)
+            {
+                return false;
+            }
+
+            nextToken = updateSuffixToken(nextToken, previousPrefixToken);
+            CurrentToken = nextToken;
+            return true;
+        }
+
+        private Token CurrentToken
+        {
+            set
+            {
+                if (value == null)
+                {
+                    return;
+                }
+                ClearAttributes();
+                termAtt.CopyBuffer(value.buffer(), 0, value.length());
+                posIncrAtt.PositionIncrement = value.PositionIncrement;
+                flagsAtt.Flags = value.Flags;
+                offsetAtt.setOffset(value.startOffset(), value.endOffset());
+                typeAtt.Type = value.type();
+                payloadAtt.Payload = value.Payload;
+            }
+        }
+
+        private Token getNextPrefixInputToken(Token token)
+        {
+            if (!prefix.IncrementToken())
+            {
+                return null;
+            }
+            token.copyBuffer(p_termAtt.buffer(), 0, p_termAtt.length());
+            token.PositionIncrement = p_posIncrAtt.PositionIncrement;
+            token.Flags = p_flagsAtt.Flags;
+            token.setOffset(p_offsetAtt.startOffset(), p_offsetAtt.endOffset());
+            token.Type = p_typeAtt.type();
+            token.Payload = p_payloadAtt.Payload;
+            return token;
+        }
+
+        private Token getNextSuffixInputToken(Token token)
+        {
+            if (!suffix.IncrementToken())
+            {
+                return null;
+            }
+            token.CopyBuffer(termAtt.buffer(), 0, termAtt.length());
+            token.PositionIncrement = posIncrAtt.PositionIncrement;
+            token.Flags = flagsAtt.Flags;
+            token.SetOffset(offsetAtt.StartOffset(), offsetAtt.EndOffset());
+            token.Type = typeAtt.Type;
+            token.Payload = payloadAtt.Payload;
+            return token;
+        }
+
+        /// <summary>
+        /// The default implementation adds last prefix token end offset to the suffix token start and end offsets.
+        /// </summary>
+        /// <param name="suffixToken"> a token from the suffix stream </param>
+        /// <param name="lastPrefixToken"> the last token from the prefix stream </param>
+        /// <returns> consumer token </returns>
+        public virtual Token updateSuffixToken(Token suffixToken, Token lastPrefixToken)
+        {
+            suffixToken.setOffset(lastPrefixToken.endOffset() + suffixToken.startOffset(), lastPrefixToken.endOffset() + suffixToken.endOffset());
+            return suffixToken;
+        }
+
+        public override void End()
+        {
+            prefix.End();
+            suffix.End();
+        }
+
+        public override void Dispose()
+        {
+            prefix.Dispose();
+            suffix.Dispose();
+        }
+
+        public override void Reset()
+        {
+            base.Reset();
+            if (prefix != null)
+            {
+                prefixExhausted = false;
+                prefix.Reset();
+            }
+            if (suffix != null)
+            {
+                suffix.Reset();
+            }
+        }
+
+        public virtual TokenStream Prefix
+        {
+            get
+            {
+                return prefix;
+            }
+            set
+            {
+                this.prefix = value;
+            }
+        }
+
+
+        public virtual TokenStream Suffix
+        {
+            get
+            {
+                return suffix;
+            }
+            set
+            {
+                this.suffix = value;
+            }
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/69f29113/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenFilter.cs
index 59b8dcb..85b997e 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenFilter.cs
@@ -1,4 +1,5 @@
-using Lucene.Net.Analysis.Util;
+using Lucene.Net.Analysis.Miscellaneous;
+using Lucene.Net.Analysis.Util;
 
 namespace org.apache.lucene.analysis.ngram
 {
@@ -20,7 +21,7 @@ namespace org.apache.lucene.analysis.ngram
 	 * limitations under the License.
 	 */
 
-	using CodepointCountFilter = org.apache.lucene.analysis.miscellaneous.CodepointCountFilter;
+	using CodepointCountFilter = CodepointCountFilter;
 	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 	using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 	using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/69f29113/src/Lucene.Net.Analysis.Common/Analysis/Util/AbstractAnalysisFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/AbstractAnalysisFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/AbstractAnalysisFactory.cs
index af4c555..5fe93c3 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/AbstractAnalysisFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/AbstractAnalysisFactory.cs
@@ -1,7 +1,11 @@
 using System;
 using System.Collections.Generic;
+using System.IO;
 using Lucene.Net.Analysis.Core;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
 using org.apache.lucene.analysis.util;
+using Version = System.Version;
 
 namespace Lucene.Net.Analysis.Util
 {
@@ -44,7 +48,7 @@ namespace Lucene.Net.Analysis.Util
 
 	  /// <summary>
 	  /// the luceneVersion arg </summary>
-	  protected internal readonly Lucene.Net.Util.Version? luceneMatchVersion;
+	  protected internal readonly Lucene.Net.Util.Version luceneMatchVersion;
 
         /// <summary>
 	  /// Initialize this factory via a set of key-value pairs.
@@ -75,7 +79,6 @@ namespace Lucene.Net.Analysis.Util
 	  {
 		if (luceneMatchVersion == null)
 		{
-//JAVA TO C# CONVERTER WARNING: The .NET Type.FullName property will not always yield results identical to the Java Class.getName method:
 		  throw new System.ArgumentException("Configuration Error: Factory '" + this.GetType().FullName + "' needs a 'luceneMatchVersion' parameter");
 		}
 	  }
@@ -278,8 +281,6 @@ namespace Lucene.Net.Analysis.Util
 	  /// Returns as <seealso cref="CharArraySet"/> from wordFiles, which
 	  /// can be a comma-separated list of filenames
 	  /// </summary>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: protected final CharArraySet getWordSet(ResourceLoader loader, String wordFiles, boolean ignoreCase) throws java.io.IOException
 	  protected internal CharArraySet GetWordSet(ResourceLoader loader, string wordFiles, bool ignoreCase)
 	  {
 		assureMatchVersion();
@@ -292,8 +293,8 @@ namespace Lucene.Net.Analysis.Util
 		  words = new CharArraySet(luceneMatchVersion, files.Count * 10, ignoreCase);
 		  foreach (string file in files)
 		  {
-			IList<string> wlist = getLines(loader, file.Trim());
-			words.addAll(StopFilter.makeStopSet(luceneMatchVersion, wlist, ignoreCase));
+			var wlist = getLines(loader, file.Trim());
+			words.AddAll(StopFilter.makeStopSet(luceneMatchVersion, wlist, ignoreCase));
 		  }
 		}
 		return words;
@@ -324,7 +325,7 @@ namespace Lucene.Net.Analysis.Util
 		  foreach (string file in files)
 		  {
 			InputStream stream = null;
-			Reader reader = null;
+			TextReader reader = null;
 			try
 			{
 			  stream = loader.openResource(file.Trim());

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/69f29113/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArraySet.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArraySet.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArraySet.cs
index e6d7cac..02d5ac8 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArraySet.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArraySet.cs
@@ -71,7 +71,7 @@ namespace Lucene.Net.Analysis.Util
 	  /// <param name="ignoreCase">
 	  ///          <code>false</code> if and only if the set should be case sensitive
 	  ///          otherwise <code>true</code>. </param>
-	  public CharArraySet(Version matchVersion, int startSize, bool ignoreCase) : this(new CharArrayMap<>(matchVersion, startSize, ignoreCase))
+	  public CharArraySet(Lucene.Net.Util.Version matchVersion, int startSize, bool ignoreCase) : this(new CharArrayMap<>(matchVersion, startSize, ignoreCase))
 	  {
 	  }
 

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/69f29113/src/Lucene.Net.Analysis.Common/Analysis/Util/FilteringTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/FilteringTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/FilteringTokenFilter.cs
index d5eb9fd..a52cc83 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/FilteringTokenFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/FilteringTokenFilter.cs
@@ -44,7 +44,7 @@ namespace Lucene.Net.Analysis.Util
         }
 
         protected internal readonly Version version;
-        private readonly PositionIncrementAttribute posIncrAtt = addAttribute(typeof(PositionIncrementAttribute));
+        private readonly IPositionIncrementAttribute posIncrAtt;
         private bool enablePositionIncrements; // no init needed, as ctor enforces setting value!
         private bool first = true;
         private int skippedPositions;
@@ -56,9 +56,10 @@ namespace Lucene.Net.Analysis.Util
         /// <param name="input">                    the input to consume </param>
         /// @deprecated enablePositionIncrements=false is not supported anymore as of Lucene 4.4 
         [Obsolete("enablePositionIncrements=false is not supported anymore as of Lucene 4.4")]
-        public FilteringTokenFilter(Version version, bool enablePositionIncrements, TokenStream input)
+        public FilteringTokenFilter(Lucene.Net.Util.Version version, bool enablePositionIncrements, TokenStream input)
             : this(version, input)
         {
+            posIncrAtt = AddAttribute<IPositionIncrementAttribute>();
             CheckPositionIncrement(version, enablePositionIncrements);
             this.enablePositionIncrements = enablePositionIncrements;
         }


[5/5] lucenenet git commit: More porting work

Posted by sy...@apache.org.
More porting work


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/69f29113
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/69f29113
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/69f29113

Branch: refs/heads/master
Commit: 69f29113e1244e3c3ae0d916b18769873ebaa62e
Parents: 0fae0c4
Author: Itamar Syn-Hershko <it...@code972.com>
Authored: Fri Jan 30 14:36:15 2015 +0200
Committer: Itamar Syn-Hershko <it...@code972.com>
Committed: Fri Jan 30 14:39:00 2015 +0200

----------------------------------------------------------------------
 .../Miscellaneous/ASCIIFoldingFilter.cs         | 4160 +++++++++---------
 .../Miscellaneous/ASCIIFoldingFilterFactory.cs  |  103 +-
 .../Miscellaneous/CapitalizationFilter.cs       |  387 +-
 .../CapitalizationFilterFactory.cs              |    1 -
 .../Miscellaneous/CodepointCountFilter.cs       |  143 +-
 .../CodepointCountFilterFactory.cs              |    3 +-
 .../Miscellaneous/HyphenatedWordsFilter.cs      |    1 -
 .../HyphenatedWordsFilterFactory.cs             |    1 -
 .../Miscellaneous/KeepWordFilterFactory.cs      |    2 -
 .../Miscellaneous/KeywordRepeatFilter.cs        |  130 +-
 .../Miscellaneous/KeywordRepeatFilterFactory.cs |    1 -
 .../Analysis/Miscellaneous/LengthFilter.cs      |    2 +-
 .../Miscellaneous/LengthFilterFactory.cs        |  114 +-
 .../Miscellaneous/LimitTokenCountAnalyzer.cs    |  121 +-
 .../Miscellaneous/LimitTokenCountFilter.cs      |  194 +-
 .../LimitTokenCountFilterFactory.cs             |  120 +-
 .../Miscellaneous/LimitTokenPositionFilter.cs   |  215 +-
 .../LimitTokenPositionFilterFactory.cs          |  120 +-
 .../Lucene47WordDelimiterFilter.cs              | 1191 ++---
 .../Analysis/Miscellaneous/PatternAnalyzer.cs   | 1089 +++--
 .../Miscellaneous/PatternKeywordMarkerFilter.cs |  114 +-
 .../Miscellaneous/PerFieldAnalyzerWrapper.cs    |  163 +-
 .../PrefixAndSuffixAwareTokenFilter.cs          |   22 +-
 .../Miscellaneous/PrefixAwareTokenFilter.cs     |  464 +-
 .../Analysis/Ngram/NGramTokenFilter.cs          |    5 +-
 .../Analysis/Util/AbstractAnalysisFactory.cs    |   15 +-
 .../Analysis/Util/CharArraySet.cs               |    2 +-
 .../Analysis/Util/FilteringTokenFilter.cs       |    5 +-
 28 files changed, 4419 insertions(+), 4469 deletions(-)
----------------------------------------------------------------------



[2/5] lucenenet git commit: More porting work

Posted by sy...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/69f29113/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/Lucene47WordDelimiterFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/Lucene47WordDelimiterFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/Lucene47WordDelimiterFilter.cs
index 950d0ca..db8c586 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/Lucene47WordDelimiterFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/Lucene47WordDelimiterFilter.cs
@@ -1,6 +1,4 @@
-using System;
-using System.Text;
-/*
+/*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
@@ -16,598 +14,607 @@ using System.Text;
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-using Lucene.Net.Analysis;
+using System;
+using System.Text;
+using Lucene.Net.Analysis.Tokenattributes;
 using Lucene.Net.Analysis.Util;
 using Lucene.Net.Util;
+using org.apache.lucene.analysis.miscellaneous;
 
-namespace org.apache.lucene.analysis.miscellaneous
+namespace Lucene.Net.Analysis.Miscellaneous
 {
 
-	/// <summary>
-	/// Old Broken version of <seealso cref="WordDelimiterFilter"/>
-	/// </summary>
-	[Obsolete]
-	public sealed class Lucene47WordDelimiterFilter : TokenFilter
-	{
-		private bool InstanceFieldsInitialized = false;
-
-		private void InitializeInstanceFields()
-		{
-			concat = new WordDelimiterConcatenation(this);
-			concatAll = new WordDelimiterConcatenation(this);
-		}
-
-
-	  public const int LOWER = 0x01;
-	  public const int UPPER = 0x02;
-	  public const int DIGIT = 0x04;
-	  public const int SUBWORD_DELIM = 0x08;
-
-	  // combinations: for testing, not for setting bits
-	  public const int ALPHA = 0x03;
-	  public const int ALPHANUM = 0x07;
-
-	  /// <summary>
-	  /// Causes parts of words to be generated:
-	  /// <p/>
-	  /// "PowerShot" => "Power" "Shot"
-	  /// </summary>
-	  public const int GENERATE_WORD_PARTS = 1;
-
-	  /// <summary>
-	  /// Causes number subwords to be generated:
-	  /// <p/>
-	  /// "500-42" => "500" "42"
-	  /// </summary>
-	  public const int GENERATE_NUMBER_PARTS = 2;
-
-	  /// <summary>
-	  /// Causes maximum runs of word parts to be catenated:
-	  /// <p/>
-	  /// "wi-fi" => "wifi"
-	  /// </summary>
-	  public const int CATENATE_WORDS = 4;
-
-	  /// <summary>
-	  /// Causes maximum runs of word parts to be catenated:
-	  /// <p/>
-	  /// "wi-fi" => "wifi"
-	  /// </summary>
-	  public const int CATENATE_NUMBERS = 8;
-
-	  /// <summary>
-	  /// Causes all subword parts to be catenated:
-	  /// <p/>
-	  /// "wi-fi-4000" => "wifi4000"
-	  /// </summary>
-	  public const int CATENATE_ALL = 16;
-
-	  /// <summary>
-	  /// Causes original words are preserved and added to the subword list (Defaults to false)
-	  /// <p/>
-	  /// "500-42" => "500" "42" "500-42"
-	  /// </summary>
-	  public const int PRESERVE_ORIGINAL = 32;
-
-	  /// <summary>
-	  /// If not set, causes case changes to be ignored (subwords will only be generated
-	  /// given SUBWORD_DELIM tokens)
-	  /// </summary>
-	  public const int SPLIT_ON_CASE_CHANGE = 64;
-
-	  /// <summary>
-	  /// If not set, causes numeric changes to be ignored (subwords will only be generated
-	  /// given SUBWORD_DELIM tokens).
-	  /// </summary>
-	  public const int SPLIT_ON_NUMERICS = 128;
-
-	  /// <summary>
-	  /// Causes trailing "'s" to be removed for each subword
-	  /// <p/>
-	  /// "O'Neil's" => "O", "Neil"
-	  /// </summary>
-	  public const int STEM_ENGLISH_POSSESSIVE = 256;
-
-	  /// <summary>
-	  /// If not null is the set of tokens to protect from being delimited
-	  /// 
-	  /// </summary>
-	  internal readonly CharArraySet protWords;
-
-	  private readonly int flags;
-
-	  private readonly CharTermAttribute termAttribute = addAttribute(typeof(CharTermAttribute));
-	  private readonly OffsetAttribute offsetAttribute = addAttribute(typeof(OffsetAttribute));
-	  private readonly PositionIncrementAttribute posIncAttribute = addAttribute(typeof(PositionIncrementAttribute));
-	  private readonly TypeAttribute typeAttribute = addAttribute(typeof(TypeAttribute));
-
-	  // used for iterating word delimiter breaks
-	  private readonly WordDelimiterIterator iterator;
-
-	  // used for concatenating runs of similar typed subwords (word,number)
-	  private WordDelimiterConcatenation concat;
-	  // number of subwords last output by concat.
-	  private int lastConcatCount = 0;
-
-	  // used for catenate all
-	  private WordDelimiterConcatenation concatAll;
-
-	  // used for accumulating position increment gaps
-	  private int accumPosInc = 0;
-
-	  private char[] savedBuffer = new char[1024];
-	  private int savedStartOffset;
-	  private int savedEndOffset;
-	  private string savedType;
-	  private bool hasSavedState = false;
-	  // if length by start + end offsets doesn't match the term text then assume
-	  // this is a synonym and don't adjust the offsets.
-	  private bool hasIllegalOffsets = false;
-
-	  // for a run of the same subword type within a word, have we output anything?
-	  private bool hasOutputToken = false;
-	  // when preserve original is on, have we output any token following it?
-	  // this token must have posInc=0!
-	  private bool hasOutputFollowingOriginal = false;
-
-	  /// <summary>
-	  /// Creates a new WordDelimiterFilter
-	  /// </summary>
-	  /// <param name="in"> TokenStream to be filtered </param>
-	  /// <param name="charTypeTable"> table containing character types </param>
-	  /// <param name="configurationFlags"> Flags configuring the filter </param>
-	  /// <param name="protWords"> If not null is the set of tokens to protect from being delimited </param>
-	  public Lucene47WordDelimiterFilter(TokenStream @in, sbyte[] charTypeTable, int configurationFlags, CharArraySet protWords) : base(@in)
-	  {
-		  if (!InstanceFieldsInitialized)
-		  {
-			  InitializeInstanceFields();
-			  InstanceFieldsInitialized = true;
-		  }
-		this.flags = configurationFlags;
-		this.protWords = protWords;
-		this.iterator = new WordDelimiterIterator(charTypeTable, has(SPLIT_ON_CASE_CHANGE), has(SPLIT_ON_NUMERICS), has(STEM_ENGLISH_POSSESSIVE));
-	  }
-
-	  /// <summary>
-	  /// Creates a new WordDelimiterFilter using <seealso cref="WordDelimiterIterator#DEFAULT_WORD_DELIM_TABLE"/>
-	  /// as its charTypeTable
-	  /// </summary>
-	  /// <param name="in"> TokenStream to be filtered </param>
-	  /// <param name="configurationFlags"> Flags configuring the filter </param>
-	  /// <param name="protWords"> If not null is the set of tokens to protect from being delimited </param>
-	  public Lucene47WordDelimiterFilter(TokenStream @in, int configurationFlags, CharArraySet protWords) : this(@in, WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, configurationFlags, protWords)
-	  {
-		  if (!InstanceFieldsInitialized)
-		  {
-			  InitializeInstanceFields();
-			  InstanceFieldsInitialized = true;
-		  }
-	  }
-
-	  public override bool IncrementToken()
-	  {
-		while (true)
-		{
-		  if (!hasSavedState)
-		  {
-			// process a new input word
-			if (!input.IncrementToken())
-			{
-			  return false;
-			}
-
-			int termLength = termAttribute.Length();
-			char[] termBuffer = termAttribute.buffer();
-
-			accumPosInc += posIncAttribute.PositionIncrement;
-
-			iterator.setText(termBuffer, termLength);
-			iterator.next();
-
-			// word of no delimiters, or protected word: just return it
-			if ((iterator.current == 0 && iterator.end == termLength) || (protWords != null && protWords.contains(termBuffer, 0, termLength)))
-			{
-			  posIncAttribute.PositionIncrement = accumPosInc;
-			  accumPosInc = 0;
-			  return true;
-			}
-
-			// word of simply delimiters
-			if (iterator.end == WordDelimiterIterator.DONE && !has(PRESERVE_ORIGINAL))
-			{
-			  // if the posInc is 1, simply ignore it in the accumulation
-			  if (posIncAttribute.PositionIncrement == 1)
-			  {
-				accumPosInc--;
-			  }
-			  continue;
-			}
-
-			saveState();
-
-			hasOutputToken = false;
-			hasOutputFollowingOriginal = !has(PRESERVE_ORIGINAL);
-			lastConcatCount = 0;
-
-			if (has(PRESERVE_ORIGINAL))
-			{
-			  posIncAttribute.PositionIncrement = accumPosInc;
-			  accumPosInc = 0;
-			  return true;
-			}
-		  }
-
-		  // at the end of the string, output any concatenations
-		  if (iterator.end == WordDelimiterIterator.DONE)
-		  {
-			if (!concat.Empty)
-			{
-			  if (flushConcatenation(concat))
-			  {
-				return true;
-			  }
-			}
-
-			if (!concatAll.Empty)
-			{
-			  // only if we haven't output this same combo above!
-			  if (concatAll.subwordCount > lastConcatCount)
-			  {
-				concatAll.writeAndClear();
-				return true;
-			  }
-			  concatAll.clear();
-			}
-
-			// no saved concatenations, on to the next input word
-			hasSavedState = false;
-			continue;
-		  }
-
-		  // word surrounded by delimiters: always output
-		  if (iterator.SingleWord)
-		  {
-			generatePart(true);
-			iterator.next();
-			return true;
-		  }
-
-		  int wordType = iterator.type();
-
-		  // do we already have queued up incompatible concatenations?
-		  if (!concat.Empty && (concat.type & wordType) == 0)
-		  {
-			if (flushConcatenation(concat))
-			{
-			  hasOutputToken = false;
-			  return true;
-			}
-			hasOutputToken = false;
-		  }
-
-		  // add subwords depending upon options
-		  if (shouldConcatenate(wordType))
-		  {
-			if (concat.Empty)
-			{
-			  concat.type = wordType;
-			}
-			concatenate(concat);
-		  }
-
-		  // add all subwords (catenateAll)
-		  if (has(CATENATE_ALL))
-		  {
-			concatenate(concatAll);
-		  }
-
-		  // if we should output the word or number part
-		  if (shouldGenerateParts(wordType))
-		  {
-			generatePart(false);
-			iterator.next();
-			return true;
-		  }
-
-		  iterator.next();
-		}
-	  }
-
-	  /// <summary>
-	  /// {@inheritDoc}
-	  /// </summary>
-	  public override void Reset()
-	  {
-		base.Reset();
-		hasSavedState = false;
-		concat.clear();
-		concatAll.clear();
-		accumPosInc = 0;
-	  }
-
-	  // ================================================= Helper Methods ================================================
-
-	  /// <summary>
-	  /// Saves the existing attribute states
-	  /// </summary>
-	  private void saveState()
-	  {
-		// otherwise, we have delimiters, save state
-		savedStartOffset = offsetAttribute.startOffset();
-		savedEndOffset = offsetAttribute.endOffset();
-		// if length by start + end offsets doesn't match the term text then assume this is a synonym and don't adjust the offsets.
-		hasIllegalOffsets = (savedEndOffset - savedStartOffset != termAttribute.length());
-		savedType = typeAttribute.type();
-
-		if (savedBuffer.Length < termAttribute.length())
-		{
-		  savedBuffer = new char[ArrayUtil.Oversize(termAttribute.Length(), RamUsageEstimator.NUM_BYTES_CHAR)];
-		}
-
-		Array.Copy(termAttribute.buffer(), 0, savedBuffer, 0, termAttribute.length());
-		iterator.text = savedBuffer;
-
-		hasSavedState = true;
-	  }
-
-	  /// <summary>
-	  /// Flushes the given WordDelimiterConcatenation by either writing its concat and then clearing, or just clearing.
-	  /// </summary>
-	  /// <param name="concatenation"> WordDelimiterConcatenation that will be flushed </param>
-	  /// <returns> {@code true} if the concatenation was written before it was cleared, {@code false} otherwise </returns>
-	  private bool flushConcatenation(WordDelimiterConcatenation concatenation)
-	  {
-		lastConcatCount = concatenation.subwordCount;
-		if (concatenation.subwordCount != 1 || !shouldGenerateParts(concatenation.type))
-		{
-		  concatenation.writeAndClear();
-		  return true;
-		}
-		concatenation.clear();
-		return false;
-	  }
-
-	  /// <summary>
-	  /// Determines whether to concatenate a word or number if the current word is the given type
-	  /// </summary>
-	  /// <param name="wordType"> Type of the current word used to determine if it should be concatenated </param>
-	  /// <returns> {@code true} if concatenation should occur, {@code false} otherwise </returns>
-	  private bool shouldConcatenate(int wordType)
-	  {
-		return (has(CATENATE_WORDS) && isAlpha(wordType)) || (has(CATENATE_NUMBERS) && isDigit(wordType));
-	  }
-
-	  /// <summary>
-	  /// Determines whether a word/number part should be generated for a word of the given type
-	  /// </summary>
-	  /// <param name="wordType"> Type of the word used to determine if a word/number part should be generated </param>
-	  /// <returns> {@code true} if a word/number part should be generated, {@code false} otherwise </returns>
-	  private bool shouldGenerateParts(int wordType)
-	  {
-		return (has(GENERATE_WORD_PARTS) && isAlpha(wordType)) || (has(GENERATE_NUMBER_PARTS) && isDigit(wordType));
-	  }
-
-	  /// <summary>
-	  /// Concatenates the saved buffer to the given WordDelimiterConcatenation
-	  /// </summary>
-	  /// <param name="concatenation"> WordDelimiterConcatenation to concatenate the buffer to </param>
-	  private void concatenate(WordDelimiterConcatenation concatenation)
-	  {
-		if (concatenation.Empty)
-		{
-		  concatenation.startOffset = savedStartOffset + iterator.current;
-		}
-		concatenation.append(savedBuffer, iterator.current, iterator.end - iterator.current);
-		concatenation.endOffset = savedStartOffset + iterator.end;
-	  }
-
-	  /// <summary>
-	  /// Generates a word/number part, updating the appropriate attributes
-	  /// </summary>
-	  /// <param name="isSingleWord"> {@code true} if the generation is occurring from a single word, {@code false} otherwise </param>
-	  private void generatePart(bool isSingleWord)
-	  {
-		clearAttributes();
-		termAttribute.copyBuffer(savedBuffer, iterator.current, iterator.end - iterator.current);
-
-		int startOffset = savedStartOffset + iterator.current;
-		int endOffset = savedStartOffset + iterator.end;
-
-		if (hasIllegalOffsets)
-		{
-		  // historically this filter did this regardless for 'isSingleWord', 
-		  // but we must do a sanity check:
-		  if (isSingleWord && startOffset <= savedEndOffset)
-		  {
-			offsetAttribute.setOffset(startOffset, savedEndOffset);
-		  }
-		  else
-		  {
-			offsetAttribute.setOffset(savedStartOffset, savedEndOffset);
-		  }
-		}
-		else
-		{
-		  offsetAttribute.setOffset(startOffset, endOffset);
-		}
-		posIncAttribute.PositionIncrement = position(false);
-		typeAttribute.Type = savedType;
-	  }
-
-	  /// <summary>
-	  /// Get the position increment gap for a subword or concatenation
-	  /// </summary>
-	  /// <param name="inject"> true if this token wants to be injected </param>
-	  /// <returns> position increment gap </returns>
-	  private int position(bool inject)
-	  {
-		int posInc = accumPosInc;
-
-		if (hasOutputToken)
-		{
-		  accumPosInc = 0;
-		  return inject ? 0 : Math.Max(1, posInc);
-		}
-
-		hasOutputToken = true;
-
-		if (!hasOutputFollowingOriginal)
-		{
-		  // the first token following the original is 0 regardless
-		  hasOutputFollowingOriginal = true;
-		  return 0;
-		}
-		// clear the accumulated position increment
-		accumPosInc = 0;
-		return Math.Max(1, posInc);
-	  }
-
-	  /// <summary>
-	  /// Checks if the given word type includes <seealso cref="#ALPHA"/>
-	  /// </summary>
-	  /// <param name="type"> Word type to check </param>
-	  /// <returns> {@code true} if the type contains ALPHA, {@code false} otherwise </returns>
-	  internal static bool isAlpha(int type)
-	  {
-		return (type & ALPHA) != 0;
-	  }
-
-	  /// <summary>
-	  /// Checks if the given word type includes <seealso cref="#DIGIT"/>
-	  /// </summary>
-	  /// <param name="type"> Word type to check </param>
-	  /// <returns> {@code true} if the type contains DIGIT, {@code false} otherwise </returns>
-	  internal static bool isDigit(int type)
-	  {
-		return (type & DIGIT) != 0;
-	  }
-
-	  /// <summary>
-	  /// Checks if the given word type includes <seealso cref="#SUBWORD_DELIM"/>
-	  /// </summary>
-	  /// <param name="type"> Word type to check </param>
-	  /// <returns> {@code true} if the type contains SUBWORD_DELIM, {@code false} otherwise </returns>
-	  internal static bool isSubwordDelim(int type)
-	  {
-		return (type & SUBWORD_DELIM) != 0;
-	  }
-
-	  /// <summary>
-	  /// Checks if the given word type includes <seealso cref="#UPPER"/>
-	  /// </summary>
-	  /// <param name="type"> Word type to check </param>
-	  /// <returns> {@code true} if the type contains UPPER, {@code false} otherwise </returns>
-	  internal static bool isUpper(int type)
-	  {
-		return (type & UPPER) != 0;
-	  }
-
-	  /// <summary>
-	  /// Determines whether the given flag is set
-	  /// </summary>
-	  /// <param name="flag"> Flag to see if set </param>
-	  /// <returns> {@code true} if flag is set </returns>
-	  private bool has(int flag)
-	  {
-		return (flags & flag) != 0;
-	  }
-
-	  // ================================================= Inner Classes =================================================
-
-	  /// <summary>
-	  /// A WDF concatenated 'run'
-	  /// </summary>
-	  internal sealed class WordDelimiterConcatenation
-	  {
-		  private readonly Lucene47WordDelimiterFilter outerInstance;
-
-		  public WordDelimiterConcatenation(Lucene47WordDelimiterFilter outerInstance)
-		  {
-			  this.outerInstance = outerInstance;
-		  }
-
-		internal readonly StringBuilder buffer = new StringBuilder();
-		internal int startOffset;
-		internal int endOffset;
-		internal int type;
-		internal int subwordCount;
-
-		/// <summary>
-		/// Appends the given text of the given length, to the concetenation at the given offset
-		/// </summary>
-		/// <param name="text"> Text to append </param>
-		/// <param name="offset"> Offset in the concetenation to add the text </param>
-		/// <param name="length"> Length of the text to append </param>
-		internal void append(char[] text, int offset, int length)
-		{
-		  buffer.Append(text, offset, length);
-		  subwordCount++;
-		}
-
-		/// <summary>
-		/// Writes the concatenation to the attributes
-		/// </summary>
-		private void Write()
-		{
-		  ClearAttributes();
-		  if (outerInstance.termAttribute.length() < buffer.Length)
-		  {
-			outerInstance.termAttribute.resizeBuffer(buffer.Length);
-		  }
-		  char[] termbuffer = outerInstance.termAttribute.buffer();
-
-		  buffer.getChars(0, buffer.Length, termbuffer, 0);
-		  outerInstance.termAttribute.Length = buffer.Length;
-
-		  if (outerInstance.hasIllegalOffsets)
-		  {
-			outerInstance.offsetAttribute.setOffset(outerInstance.savedStartOffset, outerInstance.savedEndOffset);
-		  }
-		  else
-		  {
-			outerInstance.offsetAttribute.setOffset(startOffset, endOffset);
-		  }
-		  outerInstance.posIncAttribute.PositionIncrement = outerInstance.position(true);
-		  outerInstance.typeAttribute.Type = outerInstance.savedType;
-		  outerInstance.accumPosInc = 0;
-		}
-
-		/// <summary>
-		/// Determines if the concatenation is empty
-		/// </summary>
-		/// <returns> {@code true} if the concatenation is empty, {@code false} otherwise </returns>
-		internal bool Empty
-		{
-			get
-			{
-			  return buffer.Length == 0;
-			}
-		}
-
-		/// <summary>
-		/// Clears the concatenation and resets its state
-		/// </summary>
-		internal void clear()
-		{
-		  buffer.Length = 0;
-		  startOffset = endOffset = type = subwordCount = 0;
-		}
-
-		/// <summary>
-		/// Convenience method for the common scenario of having to write the concetenation and then clearing its state
-		/// </summary>
-		internal void writeAndClear()
-		{
-		  write();
-		  clear();
-		}
-	  }
-	  // questions:
-	  // negative numbers?  -42 indexed as just 42?
-	  // dollar sign?  $42
-	  // percent sign?  33%
-	  // downsides:  if source text is "powershot" then a query of "PowerShot" won't match!
-	}
-
+    /// <summary>
+    /// Old Broken version of <seealso cref="WordDelimiterFilter"/>
+    /// </summary>
+    [Obsolete]
+    public sealed class Lucene47WordDelimiterFilter : TokenFilter
+    {
+        private bool InstanceFieldsInitialized = false;
+
+        private void InitializeInstanceFields()
+        {
+            concat = new WordDelimiterConcatenation(this);
+            concatAll = new WordDelimiterConcatenation(this);
+        }
+
+
+        public const int LOWER = 0x01;
+        public const int UPPER = 0x02;
+        public const int DIGIT = 0x04;
+        public const int SUBWORD_DELIM = 0x08;
+
+        // combinations: for testing, not for setting bits
+        public const int ALPHA = 0x03;
+        public const int ALPHANUM = 0x07;
+
+        /// <summary>
+        /// Causes parts of words to be generated:
+        /// <p/>
+        /// "PowerShot" => "Power" "Shot"
+        /// </summary>
+        public const int GENERATE_WORD_PARTS = 1;
+
+        /// <summary>
+        /// Causes number subwords to be generated:
+        /// <p/>
+        /// "500-42" => "500" "42"
+        /// </summary>
+        public const int GENERATE_NUMBER_PARTS = 2;
+
+        /// <summary>
+        /// Causes maximum runs of word parts to be catenated:
+        /// <p/>
+        /// "wi-fi" => "wifi"
+        /// </summary>
+        public const int CATENATE_WORDS = 4;
+
+        /// <summary>
+        /// Causes maximum runs of word parts to be catenated:
+        /// <p/>
+        /// "wi-fi" => "wifi"
+        /// </summary>
+        public const int CATENATE_NUMBERS = 8;
+
+        /// <summary>
+        /// Causes all subword parts to be catenated:
+        /// <p/>
+        /// "wi-fi-4000" => "wifi4000"
+        /// </summary>
+        public const int CATENATE_ALL = 16;
+
+        /// <summary>
+        /// Causes original words are preserved and added to the subword list (Defaults to false)
+        /// <p/>
+        /// "500-42" => "500" "42" "500-42"
+        /// </summary>
+        public const int PRESERVE_ORIGINAL = 32;
+
+        /// <summary>
+        /// If not set, causes case changes to be ignored (subwords will only be generated
+        /// given SUBWORD_DELIM tokens)
+        /// </summary>
+        public const int SPLIT_ON_CASE_CHANGE = 64;
+
+        /// <summary>
+        /// If not set, causes numeric changes to be ignored (subwords will only be generated
+        /// given SUBWORD_DELIM tokens).
+        /// </summary>
+        public const int SPLIT_ON_NUMERICS = 128;
+
+        /// <summary>
+        /// Causes trailing "'s" to be removed for each subword
+        /// <p/>
+        /// "O'Neil's" => "O", "Neil"
+        /// </summary>
+        public const int STEM_ENGLISH_POSSESSIVE = 256;
+
+        /// <summary>
+        /// If not null is the set of tokens to protect from being delimited
+        /// 
+        /// </summary>
+        internal readonly CharArraySet protWords;
+
+        private readonly int flags;
+
+        private readonly ICharTermAttribute termAttribute;
+        private readonly IOffsetAttribute offsetAttribute;
+        private readonly IPositionIncrementAttribute posIncAttribute;
+        private readonly ITypeAttribute typeAttribute;
+
+        // used for iterating word delimiter breaks
+        private readonly WordDelimiterIterator iterator;
+
+        // used for concatenating runs of similar typed subwords (word,number)
+        private WordDelimiterConcatenation concat;
+        // number of subwords last output by concat.
+        private int lastConcatCount = 0;
+
+        // used for catenate all
+        private WordDelimiterConcatenation concatAll;
+
+        // used for accumulating position increment gaps
+        private int accumPosInc = 0;
+
+        private char[] savedBuffer = new char[1024];
+        private int savedStartOffset;
+        private int savedEndOffset;
+        private string savedType;
+        private bool hasSavedState = false;
+        // if length by start + end offsets doesn't match the term text then assume
+        // this is a synonym and don't adjust the offsets.
+        private bool hasIllegalOffsets = false;
+
+        // for a run of the same subword type within a word, have we output anything?
+        private bool hasOutputToken = false;
+        // when preserve original is on, have we output any token following it?
+        // this token must have posInc=0!
+        private bool hasOutputFollowingOriginal = false;
+
+        /// <summary>
+        /// Creates a new WordDelimiterFilter
+        /// </summary>
+        /// <param name="in"> TokenStream to be filtered </param>
+        /// <param name="charTypeTable"> table containing character types </param>
+        /// <param name="configurationFlags"> Flags configuring the filter </param>
+        /// <param name="protWords"> If not null is the set of tokens to protect from being delimited </param>
+        public Lucene47WordDelimiterFilter(TokenStream @in, sbyte[] charTypeTable, int configurationFlags, CharArraySet protWords)
+            : base(@in)
+        {
+            termAttribute = AddAttribute<ICharTermAttribute>();
+            offsetAttribute = AddAttribute<IOffsetAttribute>();
+            posIncAttribute = AddAttribute<IPositionIncrementAttribute>();
+            typeAttribute = AddAttribute<ITypeAttribute>();
+
+            if (!InstanceFieldsInitialized)
+            {
+                InitializeInstanceFields();
+                InstanceFieldsInitialized = true;
+            }
+            this.flags = configurationFlags;
+            this.protWords = protWords;
+            this.iterator = new WordDelimiterIterator(charTypeTable, has(SPLIT_ON_CASE_CHANGE), has(SPLIT_ON_NUMERICS), has(STEM_ENGLISH_POSSESSIVE));
+        }
+
+        /// <summary>
+        /// Creates a new WordDelimiterFilter using <seealso cref="WordDelimiterIterator#DEFAULT_WORD_DELIM_TABLE"/>
+        /// as its charTypeTable
+        /// </summary>
+        /// <param name="in"> TokenStream to be filtered </param>
+        /// <param name="configurationFlags"> Flags configuring the filter </param>
+        /// <param name="protWords"> If not null is the set of tokens to protect from being delimited </param>
+        public Lucene47WordDelimiterFilter(TokenStream @in, int configurationFlags, CharArraySet protWords)
+            : this(@in, WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, configurationFlags, protWords)
+        {
+            if (!InstanceFieldsInitialized)
+            {
+                InitializeInstanceFields();
+                InstanceFieldsInitialized = true;
+            }
+        }
+
+        public override bool IncrementToken()
+        {
+            while (true)
+            {
+                if (!hasSavedState)
+                {
+                    // process a new input word
+                    if (!input.IncrementToken())
+                    {
+                        return false;
+                    }
+
+                    int termLength = termAttribute.Length;
+                    char[] termBuffer = termAttribute.Buffer();
+
+                    accumPosInc += posIncAttribute.PositionIncrement;
+
+                    iterator.setText(termBuffer, termLength);
+                    iterator.next();
+
+                    // word of no delimiters, or protected word: just return it
+                    if ((iterator.current == 0 && iterator.end == termLength) || (protWords != null && protWords.Contains(termBuffer, 0, termLength)))
+                    {
+                        posIncAttribute.PositionIncrement = accumPosInc;
+                        accumPosInc = 0;
+                        return true;
+                    }
+
+                    // word of simply delimiters
+                    if (iterator.end == WordDelimiterIterator.DONE && !has(PRESERVE_ORIGINAL))
+                    {
+                        // if the posInc is 1, simply ignore it in the accumulation
+                        if (posIncAttribute.PositionIncrement == 1)
+                        {
+                            accumPosInc--;
+                        }
+                        continue;
+                    }
+
+                    SaveState();
+
+                    hasOutputToken = false;
+                    hasOutputFollowingOriginal = !has(PRESERVE_ORIGINAL);
+                    lastConcatCount = 0;
+
+                    if (has(PRESERVE_ORIGINAL))
+                    {
+                        posIncAttribute.PositionIncrement = accumPosInc;
+                        accumPosInc = 0;
+                        return true;
+                    }
+                }
+
+                // at the end of the string, output any concatenations
+                if (iterator.end == WordDelimiterIterator.DONE)
+                {
+                    if (!concat.Empty)
+                    {
+                        if (FlushConcatenation(concat))
+                        {
+                            return true;
+                        }
+                    }
+
+                    if (!concatAll.Empty)
+                    {
+                        // only if we haven't output this same combo above!
+                        if (concatAll.subwordCount > lastConcatCount)
+                        {
+                            concatAll.writeAndClear();
+                            return true;
+                        }
+                        concatAll.clear();
+                    }
+
+                    // no saved concatenations, on to the next input word
+                    hasSavedState = false;
+                    continue;
+                }
+
+                // word surrounded by delimiters: always output
+                if (iterator.SingleWord)
+                {
+                    GeneratePart(true);
+                    iterator.next();
+                    return true;
+                }
+
+                int wordType = iterator.type();
+
+                // do we already have queued up incompatible concatenations?
+                if (!concat.Empty && (concat.type & wordType) == 0)
+                {
+                    if (FlushConcatenation(concat))
+                    {
+                        hasOutputToken = false;
+                        return true;
+                    }
+                    hasOutputToken = false;
+                }
+
+                // add subwords depending upon options
+                if (ShouldConcatenate(wordType))
+                {
+                    if (concat.Empty)
+                    {
+                        concat.type = wordType;
+                    }
+                    concatenate(concat);
+                }
+
+                // add all subwords (catenateAll)
+                if (has(CATENATE_ALL))
+                {
+                    concatenate(concatAll);
+                }
+
+                // if we should output the word or number part
+                if (ShouldGenerateParts(wordType))
+                {
+                    GeneratePart(false);
+                    iterator.next();
+                    return true;
+                }
+
+                iterator.next();
+            }
+        }
+
+        /// <summary>
+        /// {@inheritDoc}
+        /// </summary>
+        public override void Reset()
+        {
+            base.Reset();
+            hasSavedState = false;
+            concat.clear();
+            concatAll.clear();
+            accumPosInc = 0;
+        }
+
+        // ================================================= Helper Methods ================================================
+
+        /// <summary>
+        /// Saves the existing attribute states
+        /// </summary>
+        private void SaveState()
+        {
+            // otherwise, we have delimiters, save state
+            savedStartOffset = offsetAttribute.StartOffset();
+            savedEndOffset = offsetAttribute.EndOffset();
+            // if length by start + end offsets doesn't match the term text then assume this is a synonym and don't adjust the offsets.
+            hasIllegalOffsets = (savedEndOffset - savedStartOffset != termAttribute.Length);
+            savedType = typeAttribute.Type;
+
+            if (savedBuffer.Length < termAttribute.Length)
+            {
+                savedBuffer = new char[ArrayUtil.Oversize(termAttribute.Length, RamUsageEstimator.NUM_BYTES_CHAR)];
+            }
+
+            Array.Copy(termAttribute.Buffer(), 0, savedBuffer, 0, termAttribute.Length);
+            iterator.text = savedBuffer;
+
+            hasSavedState = true;
+        }
+
+        /// <summary>
+        /// Flushes the given WordDelimiterConcatenation by either writing its concat and then clearing, or just clearing.
+        /// </summary>
+        /// <param name="concatenation"> WordDelimiterConcatenation that will be flushed </param>
+        /// <returns> {@code true} if the concatenation was written before it was cleared, {@code false} otherwise </returns>
+        private bool FlushConcatenation(WordDelimiterConcatenation concatenation)
+        {
+            lastConcatCount = concatenation.subwordCount;
+            if (concatenation.subwordCount != 1 || !ShouldGenerateParts(concatenation.type))
+            {
+                concatenation.writeAndClear();
+                return true;
+            }
+            concatenation.clear();
+            return false;
+        }
+
+        /// <summary>
+        /// Determines whether to concatenate a word or number if the current word is the given type
+        /// </summary>
+        /// <param name="wordType"> Type of the current word used to determine if it should be concatenated </param>
+        /// <returns> {@code true} if concatenation should occur, {@code false} otherwise </returns>
+        private bool ShouldConcatenate(int wordType)
+        {
+            return (has(CATENATE_WORDS) && isAlpha(wordType)) || (has(CATENATE_NUMBERS) && isDigit(wordType));
+        }
+
+        /// <summary>
+        /// Determines whether a word/number part should be generated for a word of the given type
+        /// </summary>
+        /// <param name="wordType"> Type of the word used to determine if a word/number part should be generated </param>
+        /// <returns> {@code true} if a word/number part should be generated, {@code false} otherwise </returns>
+        private bool ShouldGenerateParts(int wordType)
+        {
+            return (has(GENERATE_WORD_PARTS) && isAlpha(wordType)) || (has(GENERATE_NUMBER_PARTS) && isDigit(wordType));
+        }
+
+        /// <summary>
+        /// Concatenates the saved buffer to the given WordDelimiterConcatenation
+        /// </summary>
+        /// <param name="concatenation"> WordDelimiterConcatenation to concatenate the buffer to </param>
+        private void concatenate(WordDelimiterConcatenation concatenation)
+        {
+            if (concatenation.Empty)
+            {
+                concatenation.startOffset = savedStartOffset + iterator.current;
+            }
+            concatenation.append(savedBuffer, iterator.current, iterator.end - iterator.current);
+            concatenation.endOffset = savedStartOffset + iterator.end;
+        }
+
+        /// <summary>
+        /// Generates a word/number part, updating the appropriate attributes
+        /// </summary>
+        /// <param name="isSingleWord"> {@code true} if the generation is occurring from a single word, {@code false} otherwise </param>
+        private void GeneratePart(bool isSingleWord)
+        {
+            ClearAttributes();
+            termAttribute.CopyBuffer(savedBuffer, iterator.current, iterator.end - iterator.current);
+
+            int startOffset = savedStartOffset + iterator.current;
+            int endOffset = savedStartOffset + iterator.end;
+
+            if (hasIllegalOffsets)
+            {
+                // historically this filter did this regardless for 'isSingleWord', 
+                // but we must do a sanity check:
+                if (isSingleWord && startOffset <= savedEndOffset)
+                {
+                    offsetAttribute.SetOffset(startOffset, savedEndOffset);
+                }
+                else
+                {
+                    offsetAttribute.SetOffset(savedStartOffset, savedEndOffset);
+                }
+            }
+            else
+            {
+                offsetAttribute.SetOffset(startOffset, endOffset);
+            }
+            posIncAttribute.PositionIncrement = position(false);
+            typeAttribute.Type = savedType;
+        }
+
+        /// <summary>
+        /// Get the position increment gap for a subword or concatenation
+        /// </summary>
+        /// <param name="inject"> true if this token wants to be injected </param>
+        /// <returns> position increment gap </returns>
+        private int position(bool inject)
+        {
+            int posInc = accumPosInc;
+
+            if (hasOutputToken)
+            {
+                accumPosInc = 0;
+                return inject ? 0 : Math.Max(1, posInc);
+            }
+
+            hasOutputToken = true;
+
+            if (!hasOutputFollowingOriginal)
+            {
+                // the first token following the original is 0 regardless
+                hasOutputFollowingOriginal = true;
+                return 0;
+            }
+            // clear the accumulated position increment
+            accumPosInc = 0;
+            return Math.Max(1, posInc);
+        }
+
+        /// <summary>
+        /// Checks if the given word type includes <seealso cref="#ALPHA"/>
+        /// </summary>
+        /// <param name="type"> Word type to check </param>
+        /// <returns> {@code true} if the type contains ALPHA, {@code false} otherwise </returns>
+        internal static bool isAlpha(int type)
+        {
+            return (type & ALPHA) != 0;
+        }
+
+        /// <summary>
+        /// Checks if the given word type includes <seealso cref="#DIGIT"/>
+        /// </summary>
+        /// <param name="type"> Word type to check </param>
+        /// <returns> {@code true} if the type contains DIGIT, {@code false} otherwise </returns>
+        internal static bool isDigit(int type)
+        {
+            return (type & DIGIT) != 0;
+        }
+
+        /// <summary>
+        /// Checks if the given word type includes <seealso cref="#SUBWORD_DELIM"/>
+        /// </summary>
+        /// <param name="type"> Word type to check </param>
+        /// <returns> {@code true} if the type contains SUBWORD_DELIM, {@code false} otherwise </returns>
+        internal static bool isSubwordDelim(int type)
+        {
+            return (type & SUBWORD_DELIM) != 0;
+        }
+
+        /// <summary>
+        /// Checks if the given word type includes <seealso cref="#UPPER"/>
+        /// </summary>
+        /// <param name="type"> Word type to check </param>
+        /// <returns> {@code true} if the type contains UPPER, {@code false} otherwise </returns>
+        internal static bool isUpper(int type)
+        {
+            return (type & UPPER) != 0;
+        }
+
+        /// <summary>
+        /// Determines whether the given flag is set
+        /// </summary>
+        /// <param name="flag"> Flag to see if set </param>
+        /// <returns> {@code true} if flag is set </returns>
+        private bool has(int flag)
+        {
+            return (flags & flag) != 0;
+        }
+
+        // ================================================= Inner Classes =================================================
+
+        /// <summary>
+        /// A WDF concatenated 'run'
+        /// </summary>
+        internal sealed class WordDelimiterConcatenation
+        {
+            private readonly Lucene47WordDelimiterFilter outerInstance;
+
+            public WordDelimiterConcatenation(Lucene47WordDelimiterFilter outerInstance)
+            {
+                this.outerInstance = outerInstance;
+            }
+
+            internal readonly StringBuilder buffer = new StringBuilder();
+            internal int startOffset;
+            internal int endOffset;
+            internal int type;
+            internal int subwordCount;
+
+            /// <summary>
+            /// Appends the given text of the given length, to the concetenation at the given offset
+            /// </summary>
+            /// <param name="text"> Text to append </param>
+            /// <param name="offset"> Offset in the concetenation to add the text </param>
+            /// <param name="length"> Length of the text to append </param>
+            internal void append(char[] text, int offset, int length)
+            {
+                buffer.Append(text, offset, length);
+                subwordCount++;
+            }
+
+            /// <summary>
+            /// Writes the concatenation to the attributes
+            /// </summary>
+            private void Write()
+            {
+                ClearAttributes();
+                if (outerInstance.termAttribute.Length < buffer.Length)
+                {
+                    outerInstance.termAttribute.ResizeBuffer(buffer.Length);
+                }
+                var termbuffer = outerInstance.termAttribute.Buffer();
+
+                buffer.GetChars(0, buffer.Length, termbuffer, 0);
+                outerInstance.termAttribute.Length = buffer.Length;
+
+                if (outerInstance.hasIllegalOffsets)
+                {
+                    outerInstance.offsetAttribute.SetOffset(outerInstance.savedStartOffset, outerInstance.savedEndOffset);
+                }
+                else
+                {
+                    outerInstance.offsetAttribute.SetOffset(startOffset, endOffset);
+                }
+                outerInstance.posIncAttribute.PositionIncrement = outerInstance.position(true);
+                outerInstance.typeAttribute.Type = outerInstance.savedType;
+                outerInstance.accumPosInc = 0;
+            }
+
+            /// <summary>
+            /// Determines if the concatenation is empty
+            /// </summary>
+            /// <returns> {@code true} if the concatenation is empty, {@code false} otherwise </returns>
+            internal bool Empty
+            {
+                get
+                {
+                    return buffer.Length == 0;
+                }
+            }
+
+            /// <summary>
+            /// Clears the concatenation and resets its state
+            /// </summary>
+            internal void clear()
+            {
+                buffer.Length = 0;
+                startOffset = endOffset = type = subwordCount = 0;
+            }
+
+            /// <summary>
+            /// Convenience method for the common scenario of having to write the concetenation and then clearing its state
+            /// </summary>
+            internal void writeAndClear()
+            {
+                Write();
+                clear();
+            }
+        }
+        // questions:
+        // negative numbers?  -42 indexed as just 42?
+        // dollar sign?  $42
+        // percent sign?  33%
+        // downsides:  if source text is "powershot" then a query of "PowerShot" won't match!
+    }
 }
\ No newline at end of file


[3/5] lucenenet git commit: More porting work

Posted by sy...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/69f29113/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ASCIIFoldingFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ASCIIFoldingFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ASCIIFoldingFilterFactory.cs
index d431ee7..8d0f085 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ASCIIFoldingFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ASCIIFoldingFilterFactory.cs
@@ -4,60 +4,59 @@ using Lucene.Net.Analysis.Util;
 namespace Lucene.Net.Analysis.Miscellaneous
 {
 
-	/*
-	 * Licensed to the Apache Software Foundation (ASF) under one or more
-	 * contributor license agreements.  See the NOTICE file distributed with
-	 * this work for additional information regarding copyright ownership.
-	 * The ASF licenses this file to You under the Apache License, Version 2.0
-	 * (the "License"); you may not use this file except in compliance with
-	 * the License.  You may obtain a copy of the License at
-	 *
-	 *     http://www.apache.org/licenses/LICENSE-2.0
-	 *
-	 * Unless required by applicable law or agreed to in writing, software
-	 * distributed under the License is distributed on an "AS IS" BASIS,
-	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-	 * See the License for the specific language governing permissions and
-	 * limitations under the License.
-	 */
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
     /// <summary>
-	/// Factory for <seealso cref="ASCIIFoldingFilter"/>.
-	/// <pre class="prettyprint">
-	/// &lt;fieldType name="text_ascii" class="solr.TextField" positionIncrementGap="100"&gt;
-	///   &lt;analyzer&gt;
-	///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
-	///     &lt;filter class="solr.ASCIIFoldingFilterFactory" preserveOriginal="false"/&gt;
-	///   &lt;/analyzer&gt;
-	/// &lt;/fieldType&gt;</pre>
-	/// </summary>
-	public class ASCIIFoldingFilterFactory : TokenFilterFactory, MultiTermAwareComponent
-	{
-	  private readonly bool preserveOriginal;
+    /// Factory for <seealso cref="ASCIIFoldingFilter"/>.
+    /// <pre class="prettyprint">
+    /// &lt;fieldType name="text_ascii" class="solr.TextField" positionIncrementGap="100"&gt;
+    ///   &lt;analyzer&gt;
+    ///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+    ///     &lt;filter class="solr.ASCIIFoldingFilterFactory" preserveOriginal="false"/&gt;
+    ///   &lt;/analyzer&gt;
+    /// &lt;/fieldType&gt;</pre>
+    /// </summary>
+    public class ASCIIFoldingFilterFactory : TokenFilterFactory, MultiTermAwareComponent
+    {
+        private readonly bool preserveOriginal;
 
-	  /// <summary>
-	  /// Creates a new ASCIIFoldingFilterFactory </summary>
-	  public ASCIIFoldingFilterFactory(IDictionary<string, string> args) : base(args)
-	  {
-		preserveOriginal = getBoolean(args, "preserveOriginal", false);
-		if (args.Count > 0)
-		{
-		  throw new System.ArgumentException("Unknown parameters: " + args);
-		}
-	  }
-
-	  public override TokenStream Create(TokenStream input)
-	  {
-		return new ASCIIFoldingFilter(input, preserveOriginal);
-	  }
-
-	  public virtual AbstractAnalysisFactory MultiTermComponent
-	  {
-		  get
-		  {
-			return this;
-		  }
-	  }
-	}
+        /// <summary>
+        /// Creates a new ASCIIFoldingFilterFactory </summary>
+        public ASCIIFoldingFilterFactory(IDictionary<string, string> args)
+            : base(args)
+        {
+            preserveOriginal = getBoolean(args, "preserveOriginal", false);
+            if (args.Count > 0)
+            {
+                throw new System.ArgumentException("Unknown parameters: " + args);
+            }
+        }
 
+        public override TokenStream Create(TokenStream input)
+        {
+            return new ASCIIFoldingFilter(input, preserveOriginal);
+        }
 
+        public virtual AbstractAnalysisFactory MultiTermComponent
+        {
+            get
+            {
+                return this;
+            }
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/69f29113/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilter.cs
index 9264435..e369452 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilter.cs
@@ -6,198 +6,201 @@ using Lucene.Net.Analysis.Util;
 namespace Lucene.Net.Analysis.Miscellaneous
 {
 
-	/*
-	 * Licensed to the Apache Software Foundation (ASF) under one or more
-	 * contributor license agreements.  See the NOTICE file distributed with
-	 * this work for additional information regarding copyright ownership.
-	 * The ASF licenses this file to You under the Apache License, Version 2.0
-	 * (the "License"); you may not use this file except in compliance with
-	 * the License.  You may obtain a copy of the License at
-	 *
-	 *     http://www.apache.org/licenses/LICENSE-2.0
-	 *
-	 * Unless required by applicable law or agreed to in writing, software
-	 * distributed under the License is distributed on an "AS IS" BASIS,
-	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-	 * See the License for the specific language governing permissions and
-	 * limitations under the License.
-	 */
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
     /// <summary>
-	/// A filter to apply normal capitalization rules to Tokens.  It will make the first letter
-	/// capital and the rest lower case.
-	/// <p/>
-	/// This filter is particularly useful to build nice looking facet parameters.  This filter
-	/// is not appropriate if you intend to use a prefix query.
-	/// </summary>
-	public sealed class CapitalizationFilter : TokenFilter
-	{
-	  public static readonly int DEFAULT_MAX_WORD_COUNT = int.MaxValue;
-	  public static readonly int DEFAULT_MAX_TOKEN_LENGTH = int.MaxValue;
-
-	  private readonly bool onlyFirstWord;
-	  private readonly CharArraySet keep;
-	  private readonly bool forceFirstLetter;
-	  private readonly ICollection<char[]> okPrefix;
-
-	  private readonly int minWordLength;
-	  private readonly int maxWordCount;
-	  private readonly int maxTokenLength;
-
-	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
-
-	  /// <summary>
-	  /// Creates a CapitalizationFilter with the default parameters.
-	  /// <para>
-	  /// Calls {@link #CapitalizationFilter(TokenStream, boolean, CharArraySet, boolean, Collection, int, int, int)
-	  ///   CapitalizationFilter(in, true, null, true, null, 0, DEFAULT_MAX_WORD_COUNT, DEFAULT_MAX_TOKEN_LENGTH)}
-	  /// </para>
-	  /// </summary>
-	  public CapitalizationFilter(TokenStream @in) : this(@in, true, null, true, null, 0, DEFAULT_MAX_WORD_COUNT, DEFAULT_MAX_TOKEN_LENGTH)
-	  {
-	  }
-
-	  /// <summary>
-	  /// Creates a CapitalizationFilter with the specified parameters. </summary>
-	  /// <param name="in"> input tokenstream </param>
-	  /// <param name="onlyFirstWord"> should each word be capitalized or all of the words? </param>
-	  /// <param name="keep"> a keep word list.  Each word that should be kept separated by whitespace. </param>
-	  /// <param name="forceFirstLetter"> Force the first letter to be capitalized even if it is in the keep list. </param>
-	  /// <param name="okPrefix"> do not change word capitalization if a word begins with something in this list. </param>
-	  /// <param name="minWordLength"> how long the word needs to be to get capitalization applied.  If the
-	  ///                      minWordLength is 3, "and" > "And" but "or" stays "or". </param>
-	  /// <param name="maxWordCount"> if the token contains more then maxWordCount words, the capitalization is
-	  ///                     assumed to be correct. </param>
-	  /// <param name="maxTokenLength"> ??? </param>
-	  public CapitalizationFilter(TokenStream @in, bool onlyFirstWord, CharArraySet keep, bool forceFirstLetter, ICollection<char[]> okPrefix, int minWordLength, int maxWordCount, int maxTokenLength) : base(@in)
-	  {
-		this.onlyFirstWord = onlyFirstWord;
-		this.keep = keep;
-		this.forceFirstLetter = forceFirstLetter;
-		this.okPrefix = okPrefix;
-		this.minWordLength = minWordLength;
-		this.maxWordCount = maxWordCount;
-		this.maxTokenLength = maxTokenLength;
-	  }
-
-	  public override bool IncrementToken()
-	  {
-		if (!input.IncrementToken())
-		{
-			return false;
-		}
-
-		char[] termBuffer = termAtt.Buffer();
-		int termBufferLength = termAtt.Length;
-		char[] backup = null;
-
-		if (maxWordCount < DEFAULT_MAX_WORD_COUNT)
-		{
-		  //make a backup in case we exceed the word count
-		  backup = new char[termBufferLength];
-		  Array.Copy(termBuffer, 0, backup, 0, termBufferLength);
-		}
-
-		if (termBufferLength < maxTokenLength)
-		{
-		  int wordCount = 0;
-
-		  int lastWordStart = 0;
-		  for (int i = 0; i < termBufferLength; i++)
-		  {
-			char c = termBuffer[i];
-			if (c <= ' ' || c == '.')
-			{
-			  int len = i - lastWordStart;
-			  if (len > 0)
-			  {
-				ProcessWord(termBuffer, lastWordStart, len, wordCount++);
-				lastWordStart = i + 1;
-				i++;
-			  }
-			}
-		  }
-
-		  // process the last word
-		  if (lastWordStart < termBufferLength)
-		  {
-			ProcessWord(termBuffer, lastWordStart, termBufferLength - lastWordStart, wordCount++);
-		  }
-
-		  if (wordCount > maxWordCount)
-		  {
-			termAtt.CopyBuffer(backup, 0, termBufferLength);
-		  }
-		}
-
-		return true;
-	  }
-
-	  private void ProcessWord(char[] buffer, int offset, int length, int wordCount)
-	  {
-		if (length < 1)
-		{
-		  return;
-		}
-
-		if (onlyFirstWord && wordCount > 0)
-		{
-		  for (int i = 0; i < length; i++)
-		  {
-			buffer[offset + i] = char.ToLower(buffer[offset + i]);
-
-		  }
-		  return;
-		}
-
-		if (keep != null && keep.Contains(buffer, offset, length))
-		{
-		  if (wordCount == 0 && forceFirstLetter)
-		  {
-			buffer[offset] = char.ToUpper(buffer[offset]);
-		  }
-		  return;
-		}
-
-		if (length < minWordLength)
-		{
-		  return;
-		}
-
-		if (okPrefix != null)
-		{
-		  foreach (char[] prefix in okPrefix)
-		  {
-			if (length >= prefix.Length) //don't bother checking if the buffer length is less than the prefix
-			{
-			  bool match = true;
-			  for (int i = 0; i < prefix.Length; i++)
-			  {
-				if (prefix[i] != buffer[offset + i])
-				{
-				  match = false;
-				  break;
-				}
-			  }
-			  if (match == true)
-			  {
-				return;
-			  }
-			}
-		  }
-		}
-
-		// We know it has at least one character
-		/*char[] chars = w.toCharArray();
-		StringBuilder word = new StringBuilder( w.length() );
-		word.append( Character.toUpperCase( chars[0] ) );*/
-		buffer[offset] = char.ToUpper(buffer[offset]);
-
-		for (int i = 1; i < length; i++)
-		{
-		  buffer[offset + i] = char.ToLower(buffer[offset + i]);
-		}
-		//return word.toString();
-	  }
-	}
+    /// A filter to apply normal capitalization rules to Tokens.  It will make the first letter
+    /// capital and the rest lower case.
+    /// <p/>
+    /// This filter is particularly useful to build nice looking facet parameters.  This filter
+    /// is not appropriate if you intend to use a prefix query.
+    /// </summary>
+    public sealed class CapitalizationFilter : TokenFilter
+    {
+        public static readonly int DEFAULT_MAX_WORD_COUNT = int.MaxValue;
+        public static readonly int DEFAULT_MAX_TOKEN_LENGTH = int.MaxValue;
+
+        private readonly bool onlyFirstWord;
+        private readonly CharArraySet keep;
+        private readonly bool forceFirstLetter;
+        private readonly ICollection<char[]> okPrefix;
+
+        private readonly int minWordLength;
+        private readonly int maxWordCount;
+        private readonly int maxTokenLength;
+
+        private readonly ICharTermAttribute termAtt;
+
+        /// <summary>
+        /// Creates a CapitalizationFilter with the default parameters.
+        /// <para>
+        /// Calls {@link #CapitalizationFilter(TokenStream, boolean, CharArraySet, boolean, Collection, int, int, int)
+        ///   CapitalizationFilter(in, true, null, true, null, 0, DEFAULT_MAX_WORD_COUNT, DEFAULT_MAX_TOKEN_LENGTH)}
+        /// </para>
+        /// </summary>
+        public CapitalizationFilter(TokenStream @in)
+            : this(@in, true, null, true, null, 0, DEFAULT_MAX_WORD_COUNT, DEFAULT_MAX_TOKEN_LENGTH)
+        {
+            termAtt = AddAttribute<ICharTermAttribute>();
+        }
+
+        /// <summary>
+        /// Creates a CapitalizationFilter with the specified parameters. </summary>
+        /// <param name="in"> input tokenstream </param>
+        /// <param name="onlyFirstWord"> should each word be capitalized or all of the words? </param>
+        /// <param name="keep"> a keep word list.  Each word that should be kept separated by whitespace. </param>
+        /// <param name="forceFirstLetter"> Force the first letter to be capitalized even if it is in the keep list. </param>
+        /// <param name="okPrefix"> do not change word capitalization if a word begins with something in this list. </param>
+        /// <param name="minWordLength"> how long the word needs to be to get capitalization applied.  If the
+        ///                      minWordLength is 3, "and" > "And" but "or" stays "or". </param>
+        /// <param name="maxWordCount"> if the token contains more then maxWordCount words, the capitalization is
+        ///                     assumed to be correct. </param>
+        /// <param name="maxTokenLength"> ??? </param>
+        public CapitalizationFilter(TokenStream @in, bool onlyFirstWord, CharArraySet keep, bool forceFirstLetter, ICollection<char[]> okPrefix, int minWordLength, int maxWordCount, int maxTokenLength)
+            : base(@in)
+        {
+            this.onlyFirstWord = onlyFirstWord;
+            this.keep = keep;
+            this.forceFirstLetter = forceFirstLetter;
+            this.okPrefix = okPrefix;
+            this.minWordLength = minWordLength;
+            this.maxWordCount = maxWordCount;
+            this.maxTokenLength = maxTokenLength;
+        }
+
+        public override bool IncrementToken()
+        {
+            if (!input.IncrementToken())
+            {
+                return false;
+            }
+
+            char[] termBuffer = termAtt.Buffer();
+            int termBufferLength = termAtt.Length;
+            char[] backup = null;
+
+            if (maxWordCount < DEFAULT_MAX_WORD_COUNT)
+            {
+                //make a backup in case we exceed the word count
+                backup = new char[termBufferLength];
+                Array.Copy(termBuffer, 0, backup, 0, termBufferLength);
+            }
+
+            if (termBufferLength < maxTokenLength)
+            {
+                int wordCount = 0;
+
+                int lastWordStart = 0;
+                for (int i = 0; i < termBufferLength; i++)
+                {
+                    char c = termBuffer[i];
+                    if (c <= ' ' || c == '.')
+                    {
+                        int len = i - lastWordStart;
+                        if (len > 0)
+                        {
+                            ProcessWord(termBuffer, lastWordStart, len, wordCount++);
+                            lastWordStart = i + 1;
+                            i++;
+                        }
+                    }
+                }
+
+                // process the last word
+                if (lastWordStart < termBufferLength)
+                {
+                    ProcessWord(termBuffer, lastWordStart, termBufferLength - lastWordStart, wordCount++);
+                }
+
+                if (wordCount > maxWordCount)
+                {
+                    termAtt.CopyBuffer(backup, 0, termBufferLength);
+                }
+            }
+
+            return true;
+        }
+
+        private void ProcessWord(char[] buffer, int offset, int length, int wordCount)
+        {
+            if (length < 1)
+            {
+                return;
+            }
+
+            if (onlyFirstWord && wordCount > 0)
+            {
+                for (int i = 0; i < length; i++)
+                {
+                    buffer[offset + i] = char.ToLower(buffer[offset + i]);
+
+                }
+                return;
+            }
+
+            if (keep != null && keep.Contains(buffer, offset, length))
+            {
+                if (wordCount == 0 && forceFirstLetter)
+                {
+                    buffer[offset] = char.ToUpper(buffer[offset]);
+                }
+                return;
+            }
+
+            if (length < minWordLength)
+            {
+                return;
+            }
+
+            if (okPrefix != null)
+            {
+                foreach (char[] prefix in okPrefix)
+                {
+                    if (length >= prefix.Length) //don't bother checking if the buffer length is less than the prefix
+                    {
+                        bool match = true;
+                        for (int i = 0; i < prefix.Length; i++)
+                        {
+                            if (prefix[i] != buffer[offset + i])
+                            {
+                                match = false;
+                                break;
+                            }
+                        }
+                        if (match)
+                        {
+                            return;
+                        }
+                    }
+                }
+            }
+
+            // We know it has at least one character
+            /*char[] chars = w.toCharArray();
+            StringBuilder word = new StringBuilder( w.length() );
+            word.append( Character.toUpperCase( chars[0] ) );*/
+            buffer[offset] = char.ToUpper(buffer[offset]);
+
+            for (int i = 1; i < length; i++)
+            {
+                buffer[offset + i] = char.ToLower(buffer[offset + i]);
+            }
+            //return word.toString();
+        }
+    }
 
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/69f29113/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilterFactory.cs
index fccc4db..4bb4255 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilterFactory.cs
@@ -1,7 +1,6 @@
 using System.Collections.Generic;
 using Lucene.Net.Analysis.Util;
 using Lucene.Net.Support;
-using org.apache.lucene.analysis.miscellaneous;
 
 namespace Lucene.Net.Analysis.Miscellaneous
 {

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/69f29113/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilter.cs
index b410fe9..6c99fcb 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilter.cs
@@ -1,82 +1,77 @@
-using FilteringTokenFilter = Lucene.Net.Analysis.Util.FilteringTokenFilter;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Analysis.Util;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
 
-namespace org.apache.lucene.analysis.miscellaneous
+namespace Lucene.Net.Analysis.Miscellaneous
 {
 
-	/*
-	 * Licensed to the Apache Software Foundation (ASF) under one or more
-	 * contributor license agreements.  See the NOTICE file distributed with
-	 * this work for additional information regarding copyright ownership.
-	 * The ASF licenses this file to You under the Apache License, Version 2.0
-	 * (the "License"); you may not use this file except in compliance with
-	 * the License.  You may obtain a copy of the License at
-	 *
-	 *     http://www.apache.org/licenses/LICENSE-2.0
-	 *
-	 * Unless required by applicable law or agreed to in writing, software
-	 * distributed under the License is distributed on an "AS IS" BASIS,
-	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-	 * See the License for the specific language governing permissions and
-	 * limitations under the License.
-	 */
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+    /// <summary>
+    /// Removes words that are too long or too short from the stream.
+    /// <para>
+    /// Note: Length is calculated as the number of Unicode codepoints.
+    /// </para>
+    /// </summary>
+    public sealed class CodepointCountFilter : FilteringTokenFilter
+    {
 
-	using FilteringTokenFilter = FilteringTokenFilter;
-	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-	using Version = org.apache.lucene.util.Version;
+        private readonly int min;
+        private readonly int max;
 
-	/// <summary>
-	/// Removes words that are too long or too short from the stream.
-	/// <para>
-	/// Note: Length is calculated as the number of Unicode codepoints.
-	/// </para>
-	/// </summary>
-	public sealed class CodepointCountFilter : FilteringTokenFilter
-	{
+        private readonly ICharTermAttribute termAtt;
 
-	  private readonly int min;
-	  private readonly int max;
-
-	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
-
-	  /// <summary>
-	  /// Create a new <seealso cref="CodepointCountFilter"/>. This will filter out tokens whose
-	  /// <seealso cref="CharTermAttribute"/> is either too short (<seealso cref="Character#codePointCount(char[], int, int)"/>
-	  /// &lt; min) or too long (<seealso cref="Character#codePointCount(char[], int, int)"/> &gt; max). </summary>
-	  /// <param name="version"> the Lucene match version </param>
-	  /// <param name="in">      the <seealso cref="TokenStream"/> to consume </param>
-	  /// <param name="min">     the minimum length </param>
-	  /// <param name="max">     the maximum length </param>
-	  public CodepointCountFilter(Version version, TokenStream @in, int min, int max) : base(version, @in)
-	  {
-		this.min = min;
-		this.max = max;
-	  }
-
-	  public override bool accept()
-	  {
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final int max32 = termAtt.length();
-		int max32 = termAtt.length();
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final int min32 = max32 >> 1;
-		int min32 = max32 >> 1;
-		if (min32 >= min && max32 <= max)
-		{
-		  // definitely within range
-		  return true;
-		}
-		else if (min32 > max || max32 < min)
-		{
-		  // definitely not
-		  return false;
-		}
-		else
-		{
-		  // we must count to be sure
-		  int len = char.codePointCount(termAtt.buffer(), 0, termAtt.length());
-		  return (len >= min && len <= max);
-		}
-	  }
-	}
+        /// <summary>
+        /// Create a new <seealso cref="CodepointCountFilter"/>. This will filter out tokens whose
+        /// <seealso cref="CharTermAttribute"/> is either too short (<seealso cref="Character#CodePointCount(char[], int, int)"/>
+        /// &lt; min) or too long (<seealso cref="Character#codePointCount(char[], int, int)"/> &gt; max). </summary>
+        /// <param name="version"> the Lucene match version </param>
+        /// <param name="in">      the <seealso cref="TokenStream"/> to consume </param>
+        /// <param name="min">     the minimum length </param>
+        /// <param name="max">     the maximum length </param>
+        public CodepointCountFilter(Version version, TokenStream @in, int min, int max)
+            : base(version, @in)
+        {
+            this.min = min;
+            this.max = max;
+            termAtt = AddAttribute<ICharTermAttribute>();
+        }
 
+        protected internal override bool Accept()
+        {
+            int max32 = termAtt.Length;
+            int min32 = max32 >> 1;
+            if (min32 >= min && max32 <= max)
+            {
+                // definitely within range
+                return true;
+            }
+            else if (min32 > max || max32 < min)
+            {
+                // definitely not
+                return false;
+            }
+            else
+            {
+                // we must count to be sure
+                int len = Character.CodePointCount(termAtt.Buffer(), 0, termAtt.Length);
+                return (len >= min && len <= max);
+            }
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/69f29113/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilterFactory.cs
index bb37bd1..f6283bb 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilterFactory.cs
@@ -1,6 +1,5 @@
 using System.Collections.Generic;
 using Lucene.Net.Analysis.Util;
-using org.apache.lucene.analysis.miscellaneous;
 
 namespace Lucene.Net.Analysis.Miscellaneous
 {
@@ -50,7 +49,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
 		}
 	  }
 
-	  public override CodepointCountFilter Create(TokenStream input)
+	  public override TokenStream Create(TokenStream input)
 	  {
 		return new CodepointCountFilter(luceneMatchVersion, input, min, max);
 	  }

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/69f29113/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilter.cs
index 022ee31..d423316 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilter.cs
@@ -155,5 +155,4 @@ namespace Lucene.Net.Analysis.Miscellaneous
             hyphenated.Length = 0;
         }
     }
-
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/69f29113/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilterFactory.cs
index b274564..c281cbb 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilterFactory.cs
@@ -48,5 +48,4 @@ namespace Lucene.Net.Analysis.Miscellaneous
 		return new HyphenatedWordsFilter(input);
 	  }
 	}
-
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/69f29113/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilterFactory.cs
index 266f4b9..219fd2b 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilterFactory.cs
@@ -1,7 +1,5 @@
 using System.Collections.Generic;
 using Lucene.Net.Analysis.Util;
-using org.apache.lucene.analysis.miscellaneous;
-using org.apache.lucene.analysis.util;
 
 namespace Lucene.Net.Analysis.Miscellaneous
 {

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/69f29113/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordRepeatFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordRepeatFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordRepeatFilter.cs
index f584199..ae69f03 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordRepeatFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordRepeatFilter.cs
@@ -1,75 +1,71 @@
-namespace org.apache.lucene.analysis.miscellaneous
-{
-
-	/*
-	 * Licensed to the Apache Software Foundation (ASF) under one or more
-	 * contributor license agreements.  See the NOTICE file distributed with
-	 * this work for additional information regarding copyright ownership.
-	 * The ASF licenses this file to You under the Apache License, Version 2.0
-	 * (the "License"); you may not use this file except in compliance with
-	 * the License.  You may obtain a copy of the License at
-	 *
-	 *     http://www.apache.org/licenses/LICENSE-2.0
-	 *
-	 * Unless required by applicable law or agreed to in writing, software
-	 * distributed under the License is distributed on an "AS IS" BASIS,
-	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-	 * See the License for the specific language governing permissions and
-	 * limitations under the License.
-	 */
-
-	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
-	using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+using Lucene.Net.Analysis.Tokenattributes;
 
+namespace Lucene.Net.Analysis.Miscellaneous
+{
 
-	/// <summary>
-	/// This TokenFilter emits each incoming token twice once as keyword and once non-keyword, in other words once with
-	/// <seealso cref="KeywordAttribute#setKeyword(boolean)"/> set to <code>true</code> and once set to <code>false</code>.
-	/// This is useful if used with a stem filter that respects the <seealso cref="KeywordAttribute"/> to index the stemmed and the
-	/// un-stemmed version of a term into the same field.
-	/// </summary>
-	public sealed class KeywordRepeatFilter : TokenFilter
-	{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+    /// <summary>
+    /// This TokenFilter emits each incoming token twice once as keyword and once non-keyword, in other words once with
+    /// <seealso cref="KeywordAttribute#setKeyword(boolean)"/> set to <code>true</code> and once set to <code>false</code>.
+    /// This is useful if used with a stem filter that respects the <seealso cref="KeywordAttribute"/> to index the stemmed and the
+    /// un-stemmed version of a term into the same field.
+    /// </summary>
+    public sealed class KeywordRepeatFilter : TokenFilter
+    {
 
-	  private readonly KeywordAttribute keywordAttribute = addAttribute(typeof(KeywordAttribute));
-	  private readonly PositionIncrementAttribute posIncAttr = addAttribute(typeof(PositionIncrementAttribute));
-	  private State state;
+        private readonly IKeywordAttribute keywordAttribute;
+        private readonly IPositionIncrementAttribute posIncAttr;
+        private State state;
 
-	  /// <summary>
-	  /// Construct a token stream filtering the given input.
-	  /// </summary>
-	  public KeywordRepeatFilter(TokenStream input) : base(input)
-	  {
-	  }
+        /// <summary>
+        /// Construct a token stream filtering the given input.
+        /// </summary>
+        public KeywordRepeatFilter(TokenStream input)
+            : base(input)
+        {
+            keywordAttribute = AddAttribute<IKeywordAttribute>();
+            posIncAttr = AddAttribute<IPositionIncrementAttribute>();
+        }
 
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
-	  public override bool incrementToken()
-	  {
-		if (state != null)
-		{
-		  restoreState(state);
-		  posIncAttr.PositionIncrement = 0;
-		  keywordAttribute.Keyword = false;
-		  state = null;
-		  return true;
-		}
-		if (input.incrementToken())
-		{
-		  state = captureState();
-		  keywordAttribute.Keyword = true;
-		  return true;
-		}
-		return false;
-	  }
+        public override bool IncrementToken()
+        {
+            if (state != null)
+            {
+                RestoreState(state);
+                posIncAttr.PositionIncrement = 0;
+                keywordAttribute.Keyword = false;
+                state = null;
+                return true;
+            }
+            if (input.IncrementToken())
+            {
+                state = CaptureState();
+                keywordAttribute.Keyword = true;
+                return true;
+            }
+            return false;
+        }
 
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
-	  public override void reset()
-	  {
-		base.reset();
-		state = null;
-	  }
-	}
+        public override void Reset()
+        {
+            base.Reset();
+            state = null;
+        }
+    }
 
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/69f29113/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordRepeatFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordRepeatFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordRepeatFilterFactory.cs
index 1e97350..1947ca2 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordRepeatFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordRepeatFilterFactory.cs
@@ -1,6 +1,5 @@
 using System.Collections.Generic;
 using Lucene.Net.Analysis.Util;
-using org.apache.lucene.analysis.miscellaneous;
 
 namespace Lucene.Net.Analysis.Miscellaneous
 {

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/69f29113/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilter.cs
index e0ba510..556fed2 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilter.cs
@@ -73,7 +73,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
 		this.max = max;
 	  }
 
-	  public override bool Accept()
+        protected internal override bool Accept()
 	  {
 		int len = termAtt.Length;
 		return (len >= min && len <= max);

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/69f29113/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilterFactory.cs
index afdc961..a5466f8 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilterFactory.cs
@@ -1,68 +1,62 @@
 using System.Collections.Generic;
-using Lucene.Net.Analysis.Miscellaneous;
-using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+using Lucene.Net.Analysis.Util;
 
-namespace org.apache.lucene.analysis.miscellaneous
+namespace Lucene.Net.Analysis.Miscellaneous
 {
 
-	/*
-	 * Licensed to the Apache Software Foundation (ASF) under one or more
-	 * contributor license agreements.  See the NOTICE file distributed with
-	 * this work for additional information regarding copyright ownership.
-	 * The ASF licenses this file to You under the Apache License, Version 2.0
-	 * (the "License"); you may not use this file except in compliance with
-	 * the License.  You may obtain a copy of the License at
-	 *
-	 *     http://www.apache.org/licenses/LICENSE-2.0
-	 *
-	 * Unless required by applicable law or agreed to in writing, software
-	 * distributed under the License is distributed on an "AS IS" BASIS,
-	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-	 * See the License for the specific language governing permissions and
-	 * limitations under the License.
-	 */
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+    /// <summary>
+    /// Factory for <seealso cref="LengthFilter"/>. 
+    /// <pre class="prettyprint">
+    /// &lt;fieldType name="text_lngth" class="solr.TextField" positionIncrementGap="100"&gt;
+    ///   &lt;analyzer&gt;
+    ///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+    ///     &lt;filter class="solr.LengthFilterFactory" min="0" max="1" /&gt;
+    ///   &lt;/analyzer&gt;
+    /// &lt;/fieldType&gt;</pre>
+    /// </summary>
+    public class LengthFilterFactory : TokenFilterFactory
+    {
+        internal readonly int min;
+        internal readonly int max;
+        internal readonly bool enablePositionIncrements;
+        public const string MIN_KEY = "min";
+        public const string MAX_KEY = "max";
 
-	using TokenFilterFactory = TokenFilterFactory;
+        /// <summary>
+        /// Creates a new LengthFilterFactory </summary>
+        public LengthFilterFactory(IDictionary<string, string> args)
+            : base(args)
+        {
+            min = requireInt(args, MIN_KEY);
+            max = requireInt(args, MAX_KEY);
+            enablePositionIncrements = getBoolean(args, "enablePositionIncrements", true);
+            if (args.Count > 0)
+            {
+                throw new System.ArgumentException("Unknown parameters: " + args);
+            }
+        }
 
-	/// <summary>
-	/// Factory for <seealso cref="LengthFilter"/>. 
-	/// <pre class="prettyprint">
-	/// &lt;fieldType name="text_lngth" class="solr.TextField" positionIncrementGap="100"&gt;
-	///   &lt;analyzer&gt;
-	///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
-	///     &lt;filter class="solr.LengthFilterFactory" min="0" max="1" /&gt;
-	///   &lt;/analyzer&gt;
-	/// &lt;/fieldType&gt;</pre>
-	/// </summary>
-	public class LengthFilterFactory : TokenFilterFactory
-	{
-	  internal readonly int min;
-	  internal readonly int max;
-	  internal readonly bool enablePositionIncrements;
-	  public const string MIN_KEY = "min";
-	  public const string MAX_KEY = "max";
-
-	  /// <summary>
-	  /// Creates a new LengthFilterFactory </summary>
-	  public LengthFilterFactory(IDictionary<string, string> args) : base(args)
-	  {
-		min = requireInt(args, MIN_KEY);
-		max = requireInt(args, MAX_KEY);
-		enablePositionIncrements = getBoolean(args, "enablePositionIncrements", true);
-		if (args.Count > 0)
-		{
-		  throw new System.ArgumentException("Unknown parameters: " + args);
-		}
-	  }
-
-	  public override LengthFilter create(TokenStream input)
-	  {
-//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
-//ORIGINAL LINE: @SuppressWarnings("deprecation") final LengthFilter filter = new LengthFilter(luceneMatchVersion, enablePositionIncrements, input,min,max);
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-		  LengthFilter filter = new LengthFilter(luceneMatchVersion, enablePositionIncrements, input,min,max);
-		return filter;
-	  }
-	}
+        public override TokenStream Create(TokenStream input)
+        {
+            var filter = new LengthFilter(luceneMatchVersion, enablePositionIncrements, input, min, max);
+            return filter;
+        }
+    }
 
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/69f29113/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountAnalyzer.cs
index 58e9d60..585accd 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountAnalyzer.cs
@@ -1,68 +1,71 @@
-namespace org.apache.lucene.analysis.miscellaneous
-{
+using org.apache.lucene.analysis.miscellaneous;
 
-	/*
-	 * Licensed to the Apache Software Foundation (ASF) under one or more
-	 * contributor license agreements.  See the NOTICE file distributed with
-	 * this work for additional information regarding copyright ownership.
-	 * The ASF licenses this file to You under the Apache License, Version 2.0
-	 * (the "License"); you may not use this file except in compliance with
-	 * the License.  You may obtain a copy of the License at
-	 *
-	 *     http://www.apache.org/licenses/LICENSE-2.0
-	 *
-	 * Unless required by applicable law or agreed to in writing, software
-	 * distributed under the License is distributed on an "AS IS" BASIS,
-	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-	 * See the License for the specific language governing permissions and
-	 * limitations under the License.
-	 */
+namespace Lucene.Net.Analysis.Miscellaneous
+{
 
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
 
-	/// <summary>
-	/// This Analyzer limits the number of tokens while indexing. It is
-	/// a replacement for the maximum field length setting inside <seealso cref="org.apache.lucene.index.IndexWriter"/>. </summary>
-	/// <seealso cref= LimitTokenCountFilter </seealso>
-	public sealed class LimitTokenCountAnalyzer : AnalyzerWrapper
-	{
-	  private readonly Analyzer @delegate;
-	  private readonly int maxTokenCount;
-	  private readonly bool consumeAllTokens;
 
-	  /// <summary>
-	  /// Build an analyzer that limits the maximum number of tokens per field.
-	  /// This analyzer will not consume any tokens beyond the maxTokenCount limit
-	  /// </summary>
-	  /// <seealso cref= #LimitTokenCountAnalyzer(Analyzer,int,boolean) </seealso>
-	  public LimitTokenCountAnalyzer(Analyzer @delegate, int maxTokenCount) : this(@delegate, maxTokenCount, false)
-	  {
-	  }
-	  /// <summary>
-	  /// Build an analyzer that limits the maximum number of tokens per field. </summary>
-	  /// <param name="delegate"> the analyzer to wrap </param>
-	  /// <param name="maxTokenCount"> max number of tokens to produce </param>
-	  /// <param name="consumeAllTokens"> whether all tokens from the delegate should be consumed even if maxTokenCount is reached. </param>
-	  public LimitTokenCountAnalyzer(Analyzer @delegate, int maxTokenCount, bool consumeAllTokens) : base(@delegate.ReuseStrategy)
-	  {
-		this.@delegate = @delegate;
-		this.maxTokenCount = maxTokenCount;
-		this.consumeAllTokens = consumeAllTokens;
-	  }
+    /// <summary>
+    /// This Analyzer limits the number of tokens while indexing. It is
+    /// a replacement for the maximum field length setting inside <seealso cref="org.apache.lucene.index.IndexWriter"/>. </summary>
+    /// <seealso cref= LimitTokenCountFilter </seealso>
+    public sealed class LimitTokenCountAnalyzer : AnalyzerWrapper
+    {
+        private readonly Analyzer @delegate;
+        private readonly int maxTokenCount;
+        private readonly bool consumeAllTokens;
 
-	  protected internal override Analyzer getWrappedAnalyzer(string fieldName)
-	  {
-		return @delegate;
-	  }
+        /// <summary>
+        /// Build an analyzer that limits the maximum number of tokens per field.
+        /// This analyzer will not consume any tokens beyond the maxTokenCount limit
+        /// </summary>
+        /// <seealso cref= #LimitTokenCountAnalyzer(Analyzer,int,boolean) </seealso>
+        public LimitTokenCountAnalyzer(Analyzer @delegate, int maxTokenCount)
+            : this(@delegate, maxTokenCount, false)
+        {
+        }
+        /// <summary>
+        /// Build an analyzer that limits the maximum number of tokens per field. </summary>
+        /// <param name="delegate"> the analyzer to wrap </param>
+        /// <param name="maxTokenCount"> max number of tokens to produce </param>
+        /// <param name="consumeAllTokens"> whether all tokens from the delegate should be consumed even if maxTokenCount is reached. </param>
+        public LimitTokenCountAnalyzer(Analyzer @delegate, int maxTokenCount, bool consumeAllTokens)
+            : base(@delegate.ReuseStrategy)
+        {
+            this.@delegate = @delegate;
+            this.maxTokenCount = maxTokenCount;
+            this.consumeAllTokens = consumeAllTokens;
+        }
 
-	  protected internal override TokenStreamComponents wrapComponents(string fieldName, TokenStreamComponents components)
-	  {
-		return new TokenStreamComponents(components.Tokenizer, new LimitTokenCountFilter(components.TokenStream, maxTokenCount, consumeAllTokens));
-	  }
+        protected override Analyzer GetWrappedAnalyzer(string fieldName)
+        {
+            return @delegate;
+        }
 
-	  public override string ToString()
-	  {
-		return "LimitTokenCountAnalyzer(" + @delegate.ToString() + ", maxTokenCount=" + maxTokenCount + ", consumeAllTokens=" + consumeAllTokens + ")";
-	  }
-	}
+        protected override TokenStreamComponents WrapComponents(string fieldName, TokenStreamComponents components)
+        {
+            return new TokenStreamComponents(components.Tokenizer, new LimitTokenCountFilter(components.TokenStream, maxTokenCount, consumeAllTokens));
+        }
 
+        public override string ToString()
+        {
+            return "LimitTokenCountAnalyzer(" + @delegate.ToString() + ", maxTokenCount=" + maxTokenCount + ", consumeAllTokens=" + consumeAllTokens + ")";
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/69f29113/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountFilter.cs
index 4b60687..18252e1 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountFilter.cs
@@ -1,109 +1,107 @@
-namespace org.apache.lucene.analysis.miscellaneous
+namespace Lucene.Net.Analysis.Miscellaneous
 {
 
-	/*
-	 * Licensed to the Apache Software Foundation (ASF) under one or more
-	 * contributor license agreements.  See the NOTICE file distributed with
-	 * this work for additional information regarding copyright ownership.
-	 * The ASF licenses this file to You under the Apache License, Version 2.0
-	 * (the "License"); you may not use this file except in compliance with
-	 * the License.  You may obtain a copy of the License at
-	 *
-	 *     http://www.apache.org/licenses/LICENSE-2.0
-	 *
-	 * Unless required by applicable law or agreed to in writing, software
-	 * distributed under the License is distributed on an "AS IS" BASIS,
-	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-	 * See the License for the specific language governing permissions and
-	 * limitations under the License.
-	 */
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
 
 
-	/// <summary>
-	/// This TokenFilter limits the number of tokens while indexing. It is
-	/// a replacement for the maximum field length setting inside <seealso cref="org.apache.lucene.index.IndexWriter"/>.
-	/// <para>
-	/// By default, this filter ignores any tokens in the wrapped {@code TokenStream}
-	/// once the limit has been reached, which can result in {@code reset()} being 
-	/// called prior to {@code incrementToken()} returning {@code false}.  For most 
-	/// {@code TokenStream} implementations this should be acceptable, and faster 
-	/// then consuming the full stream. If you are wrapping a {@code TokenStream} 
-	/// which requires that the full stream of tokens be exhausted in order to 
-	/// function properly, use the 
-	/// <seealso cref="#LimitTokenCountFilter(TokenStream,int,boolean) consumeAllTokens"/> 
-	/// option.
-	/// </para>
-	/// </summary>
-	public sealed class LimitTokenCountFilter : TokenFilter
-	{
+    /// <summary>
+    /// This TokenFilter limits the number of tokens while indexing. It is
+    /// a replacement for the maximum field length setting inside <seealso cref="org.apache.lucene.index.IndexWriter"/>.
+    /// <para>
+    /// By default, this filter ignores any tokens in the wrapped {@code TokenStream}
+    /// once the limit has been reached, which can result in {@code reset()} being 
+    /// called prior to {@code incrementToken()} returning {@code false}.  For most 
+    /// {@code TokenStream} implementations this should be acceptable, and faster 
+    /// then consuming the full stream. If you are wrapping a {@code TokenStream} 
+    /// which requires that the full stream of tokens be exhausted in order to 
+    /// function properly, use the 
+    /// <seealso cref="#LimitTokenCountFilter(TokenStream,int,boolean) consumeAllTokens"/> 
+    /// option.
+    /// </para>
+    /// </summary>
+    public sealed class LimitTokenCountFilter : TokenFilter
+    {
 
-	  private readonly int maxTokenCount;
-	  private readonly bool consumeAllTokens;
-	  private int tokenCount = 0;
-	  private bool exhausted = false;
+        private readonly int maxTokenCount;
+        private readonly bool consumeAllTokens;
+        private int tokenCount = 0;
+        private bool exhausted = false;
 
-	  /// <summary>
-	  /// Build a filter that only accepts tokens up to a maximum number.
-	  /// This filter will not consume any tokens beyond the maxTokenCount limit
-	  /// </summary>
-	  /// <seealso cref= #LimitTokenCountFilter(TokenStream,int,boolean) </seealso>
-	  public LimitTokenCountFilter(TokenStream @in, int maxTokenCount) : this(@in, maxTokenCount, false)
-	  {
-	  }
+        /// <summary>
+        /// Build a filter that only accepts tokens up to a maximum number.
+        /// This filter will not consume any tokens beyond the maxTokenCount limit
+        /// </summary>
+        /// <seealso cref= #LimitTokenCountFilter(TokenStream,int,boolean) </seealso>
+        public LimitTokenCountFilter(TokenStream @in, int maxTokenCount)
+            : this(@in, maxTokenCount, false)
+        {
+        }
 
-	  /// <summary>
-	  /// Build an filter that limits the maximum number of tokens per field. </summary>
-	  /// <param name="in"> the stream to wrap </param>
-	  /// <param name="maxTokenCount"> max number of tokens to produce </param>
-	  /// <param name="consumeAllTokens"> whether all tokens from the input must be consumed even if maxTokenCount is reached. </param>
-	  public LimitTokenCountFilter(TokenStream @in, int maxTokenCount, bool consumeAllTokens) : base(@in)
-	  {
-		if (maxTokenCount < 1)
-		{
-		  throw new System.ArgumentException("maxTokenCount must be greater than zero");
-		}
-		this.maxTokenCount = maxTokenCount;
-		this.consumeAllTokens = consumeAllTokens;
-	  }
+        /// <summary>
+        /// Build an filter that limits the maximum number of tokens per field. </summary>
+        /// <param name="in"> the stream to wrap </param>
+        /// <param name="maxTokenCount"> max number of tokens to produce </param>
+        /// <param name="consumeAllTokens"> whether all tokens from the input must be consumed even if maxTokenCount is reached. </param>
+        public LimitTokenCountFilter(TokenStream @in, int maxTokenCount, bool consumeAllTokens)
+            : base(@in)
+        {
+            if (maxTokenCount < 1)
+            {
+                throw new System.ArgumentException("maxTokenCount must be greater than zero");
+            }
+            this.maxTokenCount = maxTokenCount;
+            this.consumeAllTokens = consumeAllTokens;
+        }
 
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
-	  public override bool incrementToken()
-	  {
-		if (exhausted)
-		{
-		  return false;
-		}
-		else if (tokenCount < maxTokenCount)
-		{
-		  if (input.incrementToken())
-		  {
-			tokenCount++;
-			return true;
-		  }
-		  else
-		  {
-			exhausted = true;
-			return false;
-		  }
-		}
-		else
-		{
-		  while (consumeAllTokens && input.incrementToken()) // NOOP
-		  {
-		  }
-		  return false;
-		}
-	  }
+        public override bool IncrementToken()
+        {
+            if (exhausted)
+            {
+                return false;
+            }
+            else if (tokenCount < maxTokenCount)
+            {
+                if (input.IncrementToken())
+                {
+                    tokenCount++;
+                    return true;
+                }
+                else
+                {
+                    exhausted = true;
+                    return false;
+                }
+            }
+            else
+            {
+                while (consumeAllTokens && input.IncrementToken()) // NOOP
+                {
+                }
+                return false;
+            }
+        }
 
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
-	  public override void reset()
-	  {
-		base.reset();
-		tokenCount = 0;
-		exhausted = false;
-	  }
-	}
+        public override void Reset()
+        {
+            base.Reset();
+            tokenCount = 0;
+            exhausted = false;
+        }
+    }
 
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/69f29113/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountFilterFactory.cs
index ac55037..199b757 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountFilterFactory.cs
@@ -1,67 +1,63 @@
 using System.Collections.Generic;
-using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+using Lucene.Net.Analysis.Util;
 
-namespace org.apache.lucene.analysis.miscellaneous
+namespace Lucene.Net.Analysis.Miscellaneous
 {
 
-	/*
-	 * Licensed to the Apache Software Foundation (ASF) under one or more
-	 * contributor license agreements.  See the NOTICE file distributed with
-	 * this work for additional information regarding copyright ownership.
-	 * The ASF licenses this file to You under the Apache License, Version 2.0
-	 * (the "License"); you may not use this file except in compliance with
-	 * the License.  You may obtain a copy of the License at
-	 *
-	 *     http://www.apache.org/licenses/LICENSE-2.0
-	 *
-	 * Unless required by applicable law or agreed to in writing, software
-	 * distributed under the License is distributed on an "AS IS" BASIS,
-	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-	 * See the License for the specific language governing permissions and
-	 * limitations under the License.
-	 */
-
-	using TokenFilterFactory = TokenFilterFactory;
-
-	/// <summary>
-	/// Factory for <seealso cref="LimitTokenCountFilter"/>. 
-	/// <pre class="prettyprint">
-	/// &lt;fieldType name="text_lngthcnt" class="solr.TextField" positionIncrementGap="100"&gt;
-	///   &lt;analyzer&gt;
-	///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
-	///     &lt;filter class="solr.LimitTokenCountFilterFactory" maxTokenCount="10" consumeAllTokens="false" /&gt;
-	///   &lt;/analyzer&gt;
-	/// &lt;/fieldType&gt;</pre>
-	/// <para>
-	/// The {@code consumeAllTokens} property is optional and defaults to {@code false}.  
-	/// See <seealso cref="LimitTokenCountFilter"/> for an explanation of it's use.
-	/// </para>
-	/// </summary>
-	public class LimitTokenCountFilterFactory : TokenFilterFactory
-	{
-
-	  public const string MAX_TOKEN_COUNT_KEY = "maxTokenCount";
-	  public const string CONSUME_ALL_TOKENS_KEY = "consumeAllTokens";
-	  internal readonly int maxTokenCount;
-	  internal readonly bool consumeAllTokens;
-
-	  /// <summary>
-	  /// Creates a new LimitTokenCountFilterFactory </summary>
-	  public LimitTokenCountFilterFactory(IDictionary<string, string> args) : base(args)
-	  {
-		maxTokenCount = requireInt(args, MAX_TOKEN_COUNT_KEY);
-		consumeAllTokens = getBoolean(args, CONSUME_ALL_TOKENS_KEY, false);
-		if (args.Count > 0)
-		{
-		  throw new System.ArgumentException("Unknown parameters: " + args);
-		}
-	  }
-
-	  public override TokenStream create(TokenStream input)
-	  {
-		return new LimitTokenCountFilter(input, maxTokenCount, consumeAllTokens);
-	  }
-
-	}
-
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+    /// <summary>
+    /// Factory for <seealso cref="LimitTokenCountFilter"/>. 
+    /// <pre class="prettyprint">
+    /// &lt;fieldType name="text_lngthcnt" class="solr.TextField" positionIncrementGap="100"&gt;
+    ///   &lt;analyzer&gt;
+    ///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+    ///     &lt;filter class="solr.LimitTokenCountFilterFactory" maxTokenCount="10" consumeAllTokens="false" /&gt;
+    ///   &lt;/analyzer&gt;
+    /// &lt;/fieldType&gt;</pre>
+    /// <para>
+    /// The {@code consumeAllTokens} property is optional and defaults to {@code false}.  
+    /// See <seealso cref="LimitTokenCountFilter"/> for an explanation of it's use.
+    /// </para>
+    /// </summary>
+    public class LimitTokenCountFilterFactory : TokenFilterFactory
+    {
+
+        public const string MAX_TOKEN_COUNT_KEY = "maxTokenCount";
+        public const string CONSUME_ALL_TOKENS_KEY = "consumeAllTokens";
+        internal readonly int maxTokenCount;
+        internal readonly bool consumeAllTokens;
+
+        /// <summary>
+        /// Creates a new LimitTokenCountFilterFactory </summary>
+        public LimitTokenCountFilterFactory(IDictionary<string, string> args)
+            : base(args)
+        {
+            maxTokenCount = requireInt(args, MAX_TOKEN_COUNT_KEY);
+            consumeAllTokens = getBoolean(args, CONSUME_ALL_TOKENS_KEY, false);
+            if (args.Count > 0)
+            {
+                throw new System.ArgumentException("Unknown parameters: " + args);
+            }
+        }
+
+        public override TokenStream Create(TokenStream input)
+        {
+            return new LimitTokenCountFilter(input, maxTokenCount, consumeAllTokens);
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/69f29113/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenPositionFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenPositionFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenPositionFilter.cs
index 931e492..6bdf9ad 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenPositionFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenPositionFilter.cs
@@ -1,116 +1,113 @@
-namespace org.apache.lucene.analysis.miscellaneous
-{
-	/*
-	 * Licensed to the Apache Software Foundation (ASF) under one or more
-	 * contributor license agreements.  See the NOTICE file distributed with
-	 * this work for additional information regarding copyright ownership.
-	 * The ASF licenses this file to You under the Apache License, Version 2.0
-	 * (the "License"); you may not use this file except in compliance with
-	 * the License.  You may obtain a copy of the License at
-	 *
-	 *     http://www.apache.org/licenses/LICENSE-2.0
-	 *
-	 * Unless required by applicable law or agreed to in writing, software
-	 * distributed under the License is distributed on an "AS IS" BASIS,
-	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-	 * See the License for the specific language governing permissions and
-	 * limitations under the License.
-	 */
-
-	using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+using Lucene.Net.Analysis.Tokenattributes;
 
-	/// <summary>
-	/// This TokenFilter limits its emitted tokens to those with positions that
-	/// are not greater than the configured limit.
-	/// <para>
-	/// By default, this filter ignores any tokens in the wrapped {@code TokenStream}
-	/// once the limit has been exceeded, which can result in {@code reset()} being 
-	/// called prior to {@code incrementToken()} returning {@code false}.  For most 
-	/// {@code TokenStream} implementations this should be acceptable, and faster 
-	/// then consuming the full stream. If you are wrapping a {@code TokenStream}
-	/// which requires that the full stream of tokens be exhausted in order to 
-	/// function properly, use the 
-	/// <seealso cref="#LimitTokenPositionFilter(TokenStream,int,boolean) consumeAllTokens"/>
-	/// option.
-	/// </para>
-	/// </summary>
-	public sealed class LimitTokenPositionFilter : TokenFilter
-	{
-
-	  private readonly int maxTokenPosition;
-	  private readonly bool consumeAllTokens;
-	  private int tokenPosition = 0;
-	  private bool exhausted = false;
-	  private readonly PositionIncrementAttribute posIncAtt = addAttribute(typeof(PositionIncrementAttribute));
+namespace Lucene.Net.Analysis.Miscellaneous
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+    /// <summary>
+    /// This TokenFilter limits its emitted tokens to those with positions that
+    /// are not greater than the configured limit.
+    /// <para>
+    /// By default, this filter ignores any tokens in the wrapped {@code TokenStream}
+    /// once the limit has been exceeded, which can result in {@code reset()} being 
+    /// called prior to {@code incrementToken()} returning {@code false}.  For most 
+    /// {@code TokenStream} implementations this should be acceptable, and faster 
+    /// then consuming the full stream. If you are wrapping a {@code TokenStream}
+    /// which requires that the full stream of tokens be exhausted in order to 
+    /// function properly, use the 
+    /// <seealso cref="#LimitTokenPositionFilter(TokenStream,int,boolean) consumeAllTokens"/>
+    /// option.
+    /// </para>
+    /// </summary>
+    public sealed class LimitTokenPositionFilter : TokenFilter
+    {
 
-	  /// <summary>
-	  /// Build a filter that only accepts tokens up to and including the given maximum position.
-	  /// This filter will not consume any tokens with position greater than the maxTokenPosition limit.
-	  /// </summary>
-	  /// <param name="in"> the stream to wrap </param>
-	  /// <param name="maxTokenPosition"> max position of tokens to produce (1st token always has position 1)
-	  /// </param>
-	  /// <seealso cref= #LimitTokenPositionFilter(TokenStream,int,boolean) </seealso>
-	  public LimitTokenPositionFilter(TokenStream @in, int maxTokenPosition) : this(@in, maxTokenPosition, false)
-	  {
-	  }
+        private readonly int maxTokenPosition;
+        private readonly bool consumeAllTokens;
+        private int tokenPosition = 0;
+        private bool exhausted = false;
+        private readonly IPositionIncrementAttribute posIncAtt;
 
-	  /// <summary>
-	  /// Build a filter that limits the maximum position of tokens to emit.
-	  /// </summary>
-	  /// <param name="in"> the stream to wrap </param>
-	  /// <param name="maxTokenPosition"> max position of tokens to produce (1st token always has position 1) </param>
-	  /// <param name="consumeAllTokens"> whether all tokens from the wrapped input stream must be consumed
-	  ///                         even if maxTokenPosition is exceeded. </param>
-	  public LimitTokenPositionFilter(TokenStream @in, int maxTokenPosition, bool consumeAllTokens) : base(@in)
-	  {
-		if (maxTokenPosition < 1)
-		{
-		  throw new System.ArgumentException("maxTokenPosition must be greater than zero");
-		}
-		this.maxTokenPosition = maxTokenPosition;
-		this.consumeAllTokens = consumeAllTokens;
-	  }
+        /// <summary>
+        /// Build a filter that only accepts tokens up to and including the given maximum position.
+        /// This filter will not consume any tokens with position greater than the maxTokenPosition limit.
+        /// </summary>
+        /// <param name="in"> the stream to wrap </param>
+        /// <param name="maxTokenPosition"> max position of tokens to produce (1st token always has position 1)
+        /// </param>
+        /// <seealso cref= #LimitTokenPositionFilter(TokenStream,int,boolean) </seealso>
+        public LimitTokenPositionFilter(TokenStream @in, int maxTokenPosition)
+            : this(@in, maxTokenPosition, false)
+        {
+            posIncAtt = AddAttribute<IPositionIncrementAttribute>();
+        }
 
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
-	  public override bool incrementToken()
-	  {
-		if (exhausted)
-		{
-		  return false;
-		}
-		if (input.incrementToken())
-		{
-		  tokenPosition += posIncAtt.PositionIncrement;
-		  if (tokenPosition <= maxTokenPosition)
-		  {
-			return true;
-		  }
-		  else
-		  {
-			while (consumeAllTokens && input.incrementToken()) // NOOP
-			{
-			}
-			exhausted = true;
-			return false;
-		  }
-		}
-		else
-		{
-		  exhausted = true;
-		  return false;
-		}
-	  }
+        /// <summary>
+        /// Build a filter that limits the maximum position of tokens to emit.
+        /// </summary>
+        /// <param name="in"> the stream to wrap </param>
+        /// <param name="maxTokenPosition"> max position of tokens to produce (1st token always has position 1) </param>
+        /// <param name="consumeAllTokens"> whether all tokens from the wrapped input stream must be consumed
+        ///                         even if maxTokenPosition is exceeded. </param>
+        public LimitTokenPositionFilter(TokenStream @in, int maxTokenPosition, bool consumeAllTokens)
+            : base(@in)
+        {
+            if (maxTokenPosition < 1)
+            {
+                throw new System.ArgumentException("maxTokenPosition must be greater than zero");
+            }
+            this.maxTokenPosition = maxTokenPosition;
+            this.consumeAllTokens = consumeAllTokens;
+        }
 
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
-	  public override void reset()
-	  {
-		base.reset();
-		tokenPosition = 0;
-		exhausted = false;
-	  }
-	}
+        public override bool IncrementToken()
+        {
+            if (exhausted)
+            {
+                return false;
+            }
+            if (input.IncrementToken())
+            {
+                tokenPosition += posIncAtt.PositionIncrement;
+                if (tokenPosition <= maxTokenPosition)
+                {
+                    return true;
+                }
+                else
+                {
+                    while (consumeAllTokens && input.IncrementToken()) // NOOP
+                    {
+                    }
+                    exhausted = true;
+                    return false;
+                }
+            }
+            else
+            {
+                exhausted = true;
+                return false;
+            }
+        }
 
+        public override void Reset()
+        {
+            base.Reset();
+            tokenPosition = 0;
+            exhausted = false;
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/69f29113/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenPositionFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenPositionFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenPositionFilterFactory.cs
index 69877e8..44b5cfe 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenPositionFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenPositionFilterFactory.cs
@@ -1,66 +1,62 @@
 using System.Collections.Generic;
-using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+using Lucene.Net.Analysis.Util;
 
-namespace org.apache.lucene.analysis.miscellaneous
+namespace Lucene.Net.Analysis.Miscellaneous
 {
-	/*
-	 * Licensed to the Apache Software Foundation (ASF) under one or more
-	 * contributor license agreements.  See the NOTICE file distributed with
-	 * this work for additional information regarding copyright ownership.
-	 * The ASF licenses this file to You under the Apache License, Version 2.0
-	 * (the "License"); you may not use this file except in compliance with
-	 * the License.  You may obtain a copy of the License at
-	 *
-	 *     http://www.apache.org/licenses/LICENSE-2.0
-	 *
-	 * Unless required by applicable law or agreed to in writing, software
-	 * distributed under the License is distributed on an "AS IS" BASIS,
-	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-	 * See the License for the specific language governing permissions and
-	 * limitations under the License.
-	 */
-
-	using TokenFilterFactory = TokenFilterFactory;
-
-	/// <summary>
-	/// Factory for <seealso cref="LimitTokenPositionFilter"/>. 
-	/// <pre class="prettyprint">
-	/// &lt;fieldType name="text_limit_pos" class="solr.TextField" positionIncrementGap="100"&gt;
-	///   &lt;analyzer&gt;
-	///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
-	///     &lt;filter class="solr.LimitTokenPositionFilterFactory" maxTokenPosition="3" consumeAllTokens="false" /&gt;
-	///   &lt;/analyzer&gt;
-	/// &lt;/fieldType&gt;</pre>
-	/// <para>
-	/// The {@code consumeAllTokens} property is optional and defaults to {@code false}.  
-	/// See <seealso cref="LimitTokenPositionFilter"/> for an explanation of its use.
-	/// </para>
-	/// </summary>
-	public class LimitTokenPositionFilterFactory : TokenFilterFactory
-	{
-
-	  public const string MAX_TOKEN_POSITION_KEY = "maxTokenPosition";
-	  public const string CONSUME_ALL_TOKENS_KEY = "consumeAllTokens";
-	  internal readonly int maxTokenPosition;
-	  internal readonly bool consumeAllTokens;
-
-	  /// <summary>
-	  /// Creates a new LimitTokenPositionFilterFactory </summary>
-	  public LimitTokenPositionFilterFactory(IDictionary<string, string> args) : base(args)
-	  {
-		maxTokenPosition = requireInt(args, MAX_TOKEN_POSITION_KEY);
-		consumeAllTokens = getBoolean(args, CONSUME_ALL_TOKENS_KEY, false);
-		if (args.Count > 0)
-		{
-		  throw new System.ArgumentException("Unknown parameters: " + args);
-		}
-	  }
-
-	  public override TokenStream create(TokenStream input)
-	  {
-		return new LimitTokenPositionFilter(input, maxTokenPosition, consumeAllTokens);
-	  }
-
-	}
-
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+    /// <summary>
+    /// Factory for <seealso cref="LimitTokenPositionFilter"/>. 
+    /// <pre class="prettyprint">
+    /// &lt;fieldType name="text_limit_pos" class="solr.TextField" positionIncrementGap="100"&gt;
+    ///   &lt;analyzer&gt;
+    ///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+    ///     &lt;filter class="solr.LimitTokenPositionFilterFactory" maxTokenPosition="3" consumeAllTokens="false" /&gt;
+    ///   &lt;/analyzer&gt;
+    /// &lt;/fieldType&gt;</pre>
+    /// <para>
+    /// The {@code consumeAllTokens} property is optional and defaults to {@code false}.  
+    /// See <seealso cref="LimitTokenPositionFilter"/> for an explanation of its use.
+    /// </para>
+    /// </summary>
+    public class LimitTokenPositionFilterFactory : TokenFilterFactory
+    {
+
+        public const string MAX_TOKEN_POSITION_KEY = "maxTokenPosition";
+        public const string CONSUME_ALL_TOKENS_KEY = "consumeAllTokens";
+        internal readonly int maxTokenPosition;
+        internal readonly bool consumeAllTokens;
+
+        /// <summary>
+        /// Creates a new LimitTokenPositionFilterFactory </summary>
+        public LimitTokenPositionFilterFactory(IDictionary<string, string> args)
+            : base(args)
+        {
+            maxTokenPosition = requireInt(args, MAX_TOKEN_POSITION_KEY);
+            consumeAllTokens = getBoolean(args, CONSUME_ALL_TOKENS_KEY, false);
+            if (args.Count > 0)
+            {
+                throw new System.ArgumentException("Unknown parameters: " + args);
+            }
+        }
+
+        public override TokenStream Create(TokenStream input)
+        {
+            return new LimitTokenPositionFilter(input, maxTokenPosition, consumeAllTokens);
+        }
+    }
 }
\ No newline at end of file


[4/5] lucenenet git commit: More porting work

Posted by sy...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/69f29113/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ASCIIFoldingFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ASCIIFoldingFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ASCIIFoldingFilter.cs
index f170ebf..18df853 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ASCIIFoldingFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ASCIIFoldingFilter.cs
@@ -5,2094 +5,2098 @@ using Lucene.Net.Util;
 namespace Lucene.Net.Analysis.Miscellaneous
 {
 
-	/*
-	 * Licensed to the Apache Software Foundation (ASF) under one or more
-	 * contributor license agreements.  See the NOTICE file distributed with
-	 * this work for additional information regarding copyright ownership.
-	 * The ASF licenses this file to You under the Apache License, Version 2.0
-	 * (the "License"); you may not use this file except in compliance with
-	 * the License.  You may obtain a copy of the License at
-	 *
-	 *     http://www.apache.org/licenses/LICENSE-2.0
-	 *
-	 * Unless required by applicable law or agreed to in writing, software
-	 * distributed under the License is distributed on an "AS IS" BASIS,
-	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-	 * See the License for the specific language governing permissions and
-	 * limitations under the License.
-	 */
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
     /// <summary>
-	/// This class converts alphabetic, numeric, and symbolic Unicode characters
-	/// which are not in the first 127 ASCII characters (the "Basic Latin" Unicode
-	/// block) into their ASCII equivalents, if one exists.
-	/// 
-	/// Characters from the following Unicode blocks are converted; however, only
-	/// those characters with reasonable ASCII alternatives are converted:
-	/// 
-	/// <ul>
-	///   <li>C1 Controls and Latin-1 Supplement: <a href="http://www.unicode.org/charts/PDF/U0080.pdf">http://www.unicode.org/charts/PDF/U0080.pdf</a>
-	///   <li>Latin Extended-A: <a href="http://www.unicode.org/charts/PDF/U0100.pdf">http://www.unicode.org/charts/PDF/U0100.pdf</a>
-	///   <li>Latin Extended-B: <a href="http://www.unicode.org/charts/PDF/U0180.pdf">http://www.unicode.org/charts/PDF/U0180.pdf</a>
-	///   <li>Latin Extended Additional: <a href="http://www.unicode.org/charts/PDF/U1E00.pdf">http://www.unicode.org/charts/PDF/U1E00.pdf</a>
-	///   <li>Latin Extended-C: <a href="http://www.unicode.org/charts/PDF/U2C60.pdf">http://www.unicode.org/charts/PDF/U2C60.pdf</a>
-	///   <li>Latin Extended-D: <a href="http://www.unicode.org/charts/PDF/UA720.pdf">http://www.unicode.org/charts/PDF/UA720.pdf</a>
-	///   <li>IPA Extensions: <a href="http://www.unicode.org/charts/PDF/U0250.pdf">http://www.unicode.org/charts/PDF/U0250.pdf</a>
-	///   <li>Phonetic Extensions: <a href="http://www.unicode.org/charts/PDF/U1D00.pdf">http://www.unicode.org/charts/PDF/U1D00.pdf</a>
-	///   <li>Phonetic Extensions Supplement: <a href="http://www.unicode.org/charts/PDF/U1D80.pdf">http://www.unicode.org/charts/PDF/U1D80.pdf</a>
-	///   <li>General Punctuation: <a href="http://www.unicode.org/charts/PDF/U2000.pdf">http://www.unicode.org/charts/PDF/U2000.pdf</a>
-	///   <li>Superscripts and Subscripts: <a href="http://www.unicode.org/charts/PDF/U2070.pdf">http://www.unicode.org/charts/PDF/U2070.pdf</a>
-	///   <li>Enclosed Alphanumerics: <a href="http://www.unicode.org/charts/PDF/U2460.pdf">http://www.unicode.org/charts/PDF/U2460.pdf</a>
-	///   <li>Dingbats: <a href="http://www.unicode.org/charts/PDF/U2700.pdf">http://www.unicode.org/charts/PDF/U2700.pdf</a>
-	///   <li>Supplemental Punctuation: <a href="http://www.unicode.org/charts/PDF/U2E00.pdf">http://www.unicode.org/charts/PDF/U2E00.pdf</a>
-	///   <li>Alphabetic Presentation Forms: <a href="http://www.unicode.org/charts/PDF/UFB00.pdf">http://www.unicode.org/charts/PDF/UFB00.pdf</a>
-	///   <li>Halfwidth and Fullwidth Forms: <a href="http://www.unicode.org/charts/PDF/UFF00.pdf">http://www.unicode.org/charts/PDF/UFF00.pdf</a>
-	/// </ul>
-	///  
-	/// See: <a href="http://en.wikipedia.org/wiki/Latin_characters_in_Unicode">http://en.wikipedia.org/wiki/Latin_characters_in_Unicode</a>
-	/// 
-	/// For example, '&agrave;' will be replaced by 'a'.
-	/// </summary>
-	public sealed class ASCIIFoldingFilter : TokenFilter
-	{
-	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
-	  private readonly PositionIncrementAttribute posIncAttr = addAttribute(typeof(PositionIncrementAttribute));
-	  private readonly bool preserveOriginal;
-	  private char[] output = new char[512];
-	  private int outputPos;
-	  private State state;
+    /// This class converts alphabetic, numeric, and symbolic Unicode characters
+    /// which are not in the first 127 ASCII characters (the "Basic Latin" Unicode
+    /// block) into their ASCII equivalents, if one exists.
+    /// 
+    /// Characters from the following Unicode blocks are converted; however, only
+    /// those characters with reasonable ASCII alternatives are converted:
+    /// 
+    /// <ul>
+    ///   <li>C1 Controls and Latin-1 Supplement: <a href="http://www.unicode.org/charts/PDF/U0080.pdf">http://www.unicode.org/charts/PDF/U0080.pdf</a>
+    ///   <li>Latin Extended-A: <a href="http://www.unicode.org/charts/PDF/U0100.pdf">http://www.unicode.org/charts/PDF/U0100.pdf</a>
+    ///   <li>Latin Extended-B: <a href="http://www.unicode.org/charts/PDF/U0180.pdf">http://www.unicode.org/charts/PDF/U0180.pdf</a>
+    ///   <li>Latin Extended Additional: <a href="http://www.unicode.org/charts/PDF/U1E00.pdf">http://www.unicode.org/charts/PDF/U1E00.pdf</a>
+    ///   <li>Latin Extended-C: <a href="http://www.unicode.org/charts/PDF/U2C60.pdf">http://www.unicode.org/charts/PDF/U2C60.pdf</a>
+    ///   <li>Latin Extended-D: <a href="http://www.unicode.org/charts/PDF/UA720.pdf">http://www.unicode.org/charts/PDF/UA720.pdf</a>
+    ///   <li>IPA Extensions: <a href="http://www.unicode.org/charts/PDF/U0250.pdf">http://www.unicode.org/charts/PDF/U0250.pdf</a>
+    ///   <li>Phonetic Extensions: <a href="http://www.unicode.org/charts/PDF/U1D00.pdf">http://www.unicode.org/charts/PDF/U1D00.pdf</a>
+    ///   <li>Phonetic Extensions Supplement: <a href="http://www.unicode.org/charts/PDF/U1D80.pdf">http://www.unicode.org/charts/PDF/U1D80.pdf</a>
+    ///   <li>General Punctuation: <a href="http://www.unicode.org/charts/PDF/U2000.pdf">http://www.unicode.org/charts/PDF/U2000.pdf</a>
+    ///   <li>Superscripts and Subscripts: <a href="http://www.unicode.org/charts/PDF/U2070.pdf">http://www.unicode.org/charts/PDF/U2070.pdf</a>
+    ///   <li>Enclosed Alphanumerics: <a href="http://www.unicode.org/charts/PDF/U2460.pdf">http://www.unicode.org/charts/PDF/U2460.pdf</a>
+    ///   <li>Dingbats: <a href="http://www.unicode.org/charts/PDF/U2700.pdf">http://www.unicode.org/charts/PDF/U2700.pdf</a>
+    ///   <li>Supplemental Punctuation: <a href="http://www.unicode.org/charts/PDF/U2E00.pdf">http://www.unicode.org/charts/PDF/U2E00.pdf</a>
+    ///   <li>Alphabetic Presentation Forms: <a href="http://www.unicode.org/charts/PDF/UFB00.pdf">http://www.unicode.org/charts/PDF/UFB00.pdf</a>
+    ///   <li>Halfwidth and Fullwidth Forms: <a href="http://www.unicode.org/charts/PDF/UFF00.pdf">http://www.unicode.org/charts/PDF/UFF00.pdf</a>
+    /// </ul>
+    ///  
+    /// See: <a href="http://en.wikipedia.org/wiki/Latin_characters_in_Unicode">http://en.wikipedia.org/wiki/Latin_characters_in_Unicode</a>
+    /// 
+    /// For example, '&agrave;' will be replaced by 'a'.
+    /// </summary>
+    public sealed class ASCIIFoldingFilter : TokenFilter
+    {
+        private readonly ICharTermAttribute termAtt;
+        private readonly IPositionIncrementAttribute posIncAttr;
+        private readonly bool preserveOriginal;
+        private char[] output = new char[512];
+        private int outputPos;
+        private State state;
 
-	  public ASCIIFoldingFilter(TokenStream input) : this(input, false)
-	  {
-	  }
+        public ASCIIFoldingFilter(TokenStream input)
+            : this(input, false)
+        {
+            termAtt = AddAttribute<ICharTermAttribute>();
+            posIncAttr = AddAttribute<IPositionIncrementAttribute>();
+        }
 
-	  /// <summary>
-	  /// Create a new <seealso cref="ASCIIFoldingFilter"/>.
-	  /// </summary>
-	  /// <param name="input">
-	  ///          TokenStream to filter </param>
-	  /// <param name="preserveOriginal">
-	  ///          should the original tokens be kept on the input stream with a 0 position increment
-	  ///          from the folded tokens?
-	  ///  </param>
-	  public ASCIIFoldingFilter(TokenStream input, bool preserveOriginal) : base(input)
-	  {
-		this.preserveOriginal = preserveOriginal;
-	  }
+        /// <summary>
+        /// Create a new <seealso cref="ASCIIFoldingFilter"/>.
+        /// </summary>
+        /// <param name="input">
+        ///          TokenStream to filter </param>
+        /// <param name="preserveOriginal">
+        ///          should the original tokens be kept on the input stream with a 0 position increment
+        ///          from the folded tokens?
+        ///  </param>
+        public ASCIIFoldingFilter(TokenStream input, bool preserveOriginal)
+            : base(input)
+        {
+            this.preserveOriginal = preserveOriginal;
+        }
 
-	  /// <summary>
-	  /// Does the filter preserve the original tokens?
-	  /// </summary>
-	  public bool PreserveOriginal
-	  {
-		  get
-		  {
-			return preserveOriginal;
-		  }
-	  }
+        /// <summary>
+        /// Does the filter preserve the original tokens?
+        /// </summary>
+        public bool PreserveOriginal
+        {
+            get
+            {
+                return preserveOriginal;
+            }
+        }
 
-	  public override bool IncrementToken()
-	  {
-		if (state != null)
-		{
-		  Debug.Assert(preserveOriginal, "state should only be captured if preserveOriginal is true");
-		  RestoreState(state);
-		  posIncAttr.PositionIncrement = 0;
-		  state = null;
-		  return true;
-		}
-		if (input.IncrementToken())
-		{
-		  char[] buffer = termAtt.Buffer();
-		  int length = termAtt.Length;
+        public override bool IncrementToken()
+        {
+            if (state != null)
+            {
+                Debug.Assert(preserveOriginal, "state should only be captured if preserveOriginal is true");
+                RestoreState(state);
+                posIncAttr.PositionIncrement = 0;
+                state = null;
+                return true;
+            }
+            if (input.IncrementToken())
+            {
+                char[] buffer = termAtt.Buffer();
+                int length = termAtt.Length;
 
-		  // If no characters actually require rewriting then we
-		  // just return token as-is:
-		  for (int i = 0 ; i < length ; ++i)
-		  {
-			char c = buffer[i];
-			if (c >= '\u0080')
-			{
-			  FoldToASCII(buffer, length);
-			  termAtt.CopyBuffer(output, 0, outputPos);
-			  break;
-			}
-		  }
-		  return true;
-		}
-		else
-		{
-		  return false;
-		}
-	  }
+                // If no characters actually require rewriting then we
+                // just return token as-is:
+                for (int i = 0; i < length; ++i)
+                {
+                    char c = buffer[i];
+                    if (c >= '\u0080')
+                    {
+                        FoldToASCII(buffer, length);
+                        termAtt.CopyBuffer(output, 0, outputPos);
+                        break;
+                    }
+                }
+                return true;
+            }
+            else
+            {
+                return false;
+            }
+        }
 
-	  public override void Reset()
-	  {
-		base.Reset();
-		state = null;
-	  }
+        public override void Reset()
+        {
+            base.Reset();
+            state = null;
+        }
 
-	  /// <summary>
-	  /// Converts characters above ASCII to their ASCII equivalents.  For example,
-	  /// accents are removed from accented characters. </summary>
-	  /// <param name="input"> The string to fold </param>
-	  /// <param name="length"> The number of characters in the input string </param>
-	  public void FoldToASCII(char[] input, int length)
-	  {
-		if (preserveOriginal)
-		{
-		  state = CaptureState();
-		}
-		// Worst-case length required:
-		int maxSizeNeeded = 4 * length;
-		if (output.Length < maxSizeNeeded)
-		{
-		  output = new char[ArrayUtil.Oversize(maxSizeNeeded, RamUsageEstimator.NUM_BYTES_CHAR)];
-		}
+        /// <summary>
+        /// Converts characters above ASCII to their ASCII equivalents.  For example,
+        /// accents are removed from accented characters. </summary>
+        /// <param name="input"> The string to fold </param>
+        /// <param name="length"> The number of characters in the input string </param>
+        public void FoldToASCII(char[] input, int length)
+        {
+            if (preserveOriginal)
+            {
+                state = CaptureState();
+            }
+            // Worst-case length required:
+            int maxSizeNeeded = 4 * length;
+            if (output.Length < maxSizeNeeded)
+            {
+                output = new char[ArrayUtil.Oversize(maxSizeNeeded, RamUsageEstimator.NUM_BYTES_CHAR)];
+            }
 
-		outputPos = FoldToASCII(input, 0, output, 0, length);
-	  }
+            outputPos = FoldToASCII(input, 0, output, 0, length);
+        }
 
-	  /// <summary>
-	  /// Converts characters above ASCII to their ASCII equivalents.  For example,
-	  /// accents are removed from accented characters. </summary>
-	  /// <param name="input">     The characters to fold </param>
-	  /// <param name="inputPos">  Index of the first character to fold </param>
-	  /// <param name="output">    The result of the folding. Should be of size >= {@code length * 4}. </param>
-	  /// <param name="outputPos"> Index of output where to put the result of the folding </param>
-	  /// <param name="length">    The number of characters to fold </param>
-	  /// <returns> length of output
-	  /// @lucene.internal </returns>
-	  public static int FoldToASCII(char[] input, int inputPos, char[] output, int outputPos, int length)
-	  {
-		int end = inputPos + length;
-		for (int pos = inputPos; pos < end ; ++pos)
-		{
-		  char c = input[pos];
+        /// <summary>
+        /// Converts characters above ASCII to their ASCII equivalents.  For example,
+        /// accents are removed from accented characters. </summary>
+        /// <param name="input">     The characters to fold </param>
+        /// <param name="inputPos">  Index of the first character to fold </param>
+        /// <param name="output">    The result of the folding. Should be of size >= {@code length * 4}. </param>
+        /// <param name="outputPos"> Index of output where to put the result of the folding </param>
+        /// <param name="length">    The number of characters to fold </param>
+        /// <returns> length of output
+        /// @lucene.internal </returns>
+        public static int FoldToASCII(char[] input, int inputPos, char[] output, int outputPos, int length)
+        {
+            int end = inputPos + length;
+            for (int pos = inputPos; pos < end; ++pos)
+            {
+                char c = input[pos];
 
-		  // Quick test: if it's not in range then just keep current character
-		  if (c < '\u0080')
-		  {
-			output[outputPos++] = c;
-		  }
-		  else
-		  {
-			switch (c)
-			{
-			  case '\u00C0': // À  [LATIN CAPITAL LETTER A WITH GRAVE]
-			  case '\u00C1': // Á  [LATIN CAPITAL LETTER A WITH ACUTE]
-			  case '\u00C2': // Â  [LATIN CAPITAL LETTER A WITH CIRCUMFLEX]
-			  case '\u00C3': // Ã  [LATIN CAPITAL LETTER A WITH TILDE]
-			  case '\u00C4': // Ä  [LATIN CAPITAL LETTER A WITH DIAERESIS]
-			  case '\u00C5': // Å  [LATIN CAPITAL LETTER A WITH RING ABOVE]
-			  case '\u0100': // Ā  [LATIN CAPITAL LETTER A WITH MACRON]
-			  case '\u0102': // Ă  [LATIN CAPITAL LETTER A WITH BREVE]
-			  case '\u0104': // Ą  [LATIN CAPITAL LETTER A WITH OGONEK]
-			  case '\u018F': // Ə  http://en.wikipedia.org/wiki/Schwa  [LATIN CAPITAL LETTER SCHWA]
-			  case '\u01CD': // Ǎ  [LATIN CAPITAL LETTER A WITH CARON]
-			  case '\u01DE': // Ǟ  [LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON]
-			  case '\u01E0': // Ǡ  [LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON]
-			  case '\u01FA': // Ǻ  [LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE]
-			  case '\u0200': // Ȁ  [LATIN CAPITAL LETTER A WITH DOUBLE GRAVE]
-			  case '\u0202': // Ȃ  [LATIN CAPITAL LETTER A WITH INVERTED BREVE]
-			  case '\u0226': // Ȧ  [LATIN CAPITAL LETTER A WITH DOT ABOVE]
-			  case '\u023A': // Ⱥ  [LATIN CAPITAL LETTER A WITH STROKE]
-			  case '\u1D00': // ᴀ  [LATIN LETTER SMALL CAPITAL A]
-			  case '\u1E00': // Ḁ  [LATIN CAPITAL LETTER A WITH RING BELOW]
-			  case '\u1EA0': // Ạ  [LATIN CAPITAL LETTER A WITH DOT BELOW]
-			  case '\u1EA2': // Ả  [LATIN CAPITAL LETTER A WITH HOOK ABOVE]
-			  case '\u1EA4': // Ấ  [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE]
-			  case '\u1EA6': // Ầ  [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE]
-			  case '\u1EA8': // Ẩ  [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE]
-			  case '\u1EAA': // Ẫ  [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE]
-			  case '\u1EAC': // Ậ  [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW]
-			  case '\u1EAE': // Ắ  [LATIN CAPITAL LETTER A WITH BREVE AND ACUTE]
-			  case '\u1EB0': // Ằ  [LATIN CAPITAL LETTER A WITH BREVE AND GRAVE]
-			  case '\u1EB2': // Ẳ  [LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE]
-			  case '\u1EB4': // Ẵ  [LATIN CAPITAL LETTER A WITH BREVE AND TILDE]
-			  case '\u1EB6': // Ặ  [LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW]
-			  case '\u24B6': // Ⓐ  [CIRCLED LATIN CAPITAL LETTER A]
-			  case '\uFF21': // A  [FULLWIDTH LATIN CAPITAL LETTER A]
-				output[outputPos++] = 'A';
-				break;
-			  case '\u00E0': // à  [LATIN SMALL LETTER A WITH GRAVE]
-			  case '\u00E1': // á  [LATIN SMALL LETTER A WITH ACUTE]
-			  case '\u00E2': // â  [LATIN SMALL LETTER A WITH CIRCUMFLEX]
-			  case '\u00E3': // ã  [LATIN SMALL LETTER A WITH TILDE]
-			  case '\u00E4': // ä  [LATIN SMALL LETTER A WITH DIAERESIS]
-			  case '\u00E5': // å  [LATIN SMALL LETTER A WITH RING ABOVE]
-			  case '\u0101': // ā  [LATIN SMALL LETTER A WITH MACRON]
-			  case '\u0103': // ă  [LATIN SMALL LETTER A WITH BREVE]
-			  case '\u0105': // ą  [LATIN SMALL LETTER A WITH OGONEK]
-			  case '\u01CE': // ǎ  [LATIN SMALL LETTER A WITH CARON]
-			  case '\u01DF': // ǟ  [LATIN SMALL LETTER A WITH DIAERESIS AND MACRON]
-			  case '\u01E1': // ǡ  [LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON]
-			  case '\u01FB': // ǻ  [LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE]
-			  case '\u0201': // ȁ  [LATIN SMALL LETTER A WITH DOUBLE GRAVE]
-			  case '\u0203': // ȃ  [LATIN SMALL LETTER A WITH INVERTED BREVE]
-			  case '\u0227': // ȧ  [LATIN SMALL LETTER A WITH DOT ABOVE]
-			  case '\u0250': // ɐ  [LATIN SMALL LETTER TURNED A]
-			  case '\u0259': // ə  [LATIN SMALL LETTER SCHWA]
-			  case '\u025A': // ɚ  [LATIN SMALL LETTER SCHWA WITH HOOK]
-			  case '\u1D8F': // ᶏ  [LATIN SMALL LETTER A WITH RETROFLEX HOOK]
-			  case '\u1D95': // ᶕ  [LATIN SMALL LETTER SCHWA WITH RETROFLEX HOOK]
-			  case '\u1E01': // ạ  [LATIN SMALL LETTER A WITH RING BELOW]
-			  case '\u1E9A': // ả  [LATIN SMALL LETTER A WITH RIGHT HALF RING]
-			  case '\u1EA1': // ạ  [LATIN SMALL LETTER A WITH DOT BELOW]
-			  case '\u1EA3': // ả  [LATIN SMALL LETTER A WITH HOOK ABOVE]
-			  case '\u1EA5': // ấ  [LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE]
-			  case '\u1EA7': // ầ  [LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE]
-			  case '\u1EA9': // ẩ  [LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE]
-			  case '\u1EAB': // ẫ  [LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE]
-			  case '\u1EAD': // ậ  [LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW]
-			  case '\u1EAF': // ắ  [LATIN SMALL LETTER A WITH BREVE AND ACUTE]
-			  case '\u1EB1': // ằ  [LATIN SMALL LETTER A WITH BREVE AND GRAVE]
-			  case '\u1EB3': // ẳ  [LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE]
-			  case '\u1EB5': // ẵ  [LATIN SMALL LETTER A WITH BREVE AND TILDE]
-			  case '\u1EB7': // ặ  [LATIN SMALL LETTER A WITH BREVE AND DOT BELOW]
-			  case '\u2090': // ₐ  [LATIN SUBSCRIPT SMALL LETTER A]
-			  case '\u2094': // ₔ  [LATIN SUBSCRIPT SMALL LETTER SCHWA]
-			  case '\u24D0': // ⓐ  [CIRCLED LATIN SMALL LETTER A]
-			  case '\u2C65': // ⱥ  [LATIN SMALL LETTER A WITH STROKE]
-			  case '\u2C6F': // Ɐ  [LATIN CAPITAL LETTER TURNED A]
-			  case '\uFF41': // a  [FULLWIDTH LATIN SMALL LETTER A]
-				output[outputPos++] = 'a';
-				break;
-			  case '\uA732': // Ꜳ  [LATIN CAPITAL LETTER AA]
-				output[outputPos++] = 'A';
-				output[outputPos++] = 'A';
-				break;
-			  case '\u00C6': // Æ  [LATIN CAPITAL LETTER AE]
-			  case '\u01E2': // Ǣ  [LATIN CAPITAL LETTER AE WITH MACRON]
-			  case '\u01FC': // Ǽ  [LATIN CAPITAL LETTER AE WITH ACUTE]
-			  case '\u1D01': // ᴁ  [LATIN LETTER SMALL CAPITAL AE]
-				output[outputPos++] = 'A';
-				output[outputPos++] = 'E';
-				break;
-			  case '\uA734': // Ꜵ  [LATIN CAPITAL LETTER AO]
-				output[outputPos++] = 'A';
-				output[outputPos++] = 'O';
-				break;
-			  case '\uA736': // Ꜷ  [LATIN CAPITAL LETTER AU]
-				output[outputPos++] = 'A';
-				output[outputPos++] = 'U';
-				break;
-			  case '\uA738': // Ꜹ  [LATIN CAPITAL LETTER AV]
-			  case '\uA73A': // Ꜻ  [LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR]
-				output[outputPos++] = 'A';
-				output[outputPos++] = 'V';
-				break;
-			  case '\uA73C': // Ꜽ  [LATIN CAPITAL LETTER AY]
-				output[outputPos++] = 'A';
-				output[outputPos++] = 'Y';
-				break;
-			  case '\u249C': // ⒜  [PARENTHESIZED LATIN SMALL LETTER A]
-				output[outputPos++] = '(';
-				output[outputPos++] = 'a';
-				output[outputPos++] = ')';
-				break;
-			  case '\uA733': // ꜳ  [LATIN SMALL LETTER AA]
-				output[outputPos++] = 'a';
-				output[outputPos++] = 'a';
-				break;
-			  case '\u00E6': // æ  [LATIN SMALL LETTER AE]
-			  case '\u01E3': // ǣ  [LATIN SMALL LETTER AE WITH MACRON]
-			  case '\u01FD': // ǽ  [LATIN SMALL LETTER AE WITH ACUTE]
-			  case '\u1D02': // ᴂ  [LATIN SMALL LETTER TURNED AE]
-				output[outputPos++] = 'a';
-				output[outputPos++] = 'e';
-				break;
-			  case '\uA735': // ꜵ  [LATIN SMALL LETTER AO]
-				output[outputPos++] = 'a';
-				output[outputPos++] = 'o';
-				break;
-			  case '\uA737': // ꜷ  [LATIN SMALL LETTER AU]
-				output[outputPos++] = 'a';
-				output[outputPos++] = 'u';
-				break;
-			  case '\uA739': // ꜹ  [LATIN SMALL LETTER AV]
-			  case '\uA73B': // ꜻ  [LATIN SMALL LETTER AV WITH HORIZONTAL BAR]
-				output[outputPos++] = 'a';
-				output[outputPos++] = 'v';
-				break;
-			  case '\uA73D': // ꜽ  [LATIN SMALL LETTER AY]
-				output[outputPos++] = 'a';
-				output[outputPos++] = 'y';
-				break;
-			  case '\u0181': // Ɓ  [LATIN CAPITAL LETTER B WITH HOOK]
-			  case '\u0182': // Ƃ  [LATIN CAPITAL LETTER B WITH TOPBAR]
-			  case '\u0243': // Ƀ  [LATIN CAPITAL LETTER B WITH STROKE]
-			  case '\u0299': // ʙ  [LATIN LETTER SMALL CAPITAL B]
-			  case '\u1D03': // ᴃ  [LATIN LETTER SMALL CAPITAL BARRED B]
-			  case '\u1E02': // Ḃ  [LATIN CAPITAL LETTER B WITH DOT ABOVE]
-			  case '\u1E04': // Ḅ  [LATIN CAPITAL LETTER B WITH DOT BELOW]
-			  case '\u1E06': // Ḇ  [LATIN CAPITAL LETTER B WITH LINE BELOW]
-			  case '\u24B7': // Ⓑ  [CIRCLED LATIN CAPITAL LETTER B]
-			  case '\uFF22': // B  [FULLWIDTH LATIN CAPITAL LETTER B]
-				output[outputPos++] = 'B';
-				break;
-			  case '\u0180': // ƀ  [LATIN SMALL LETTER B WITH STROKE]
-			  case '\u0183': // ƃ  [LATIN SMALL LETTER B WITH TOPBAR]
-			  case '\u0253': // ɓ  [LATIN SMALL LETTER B WITH HOOK]
-			  case '\u1D6C': // ᵬ  [LATIN SMALL LETTER B WITH MIDDLE TILDE]
-			  case '\u1D80': // ᶀ  [LATIN SMALL LETTER B WITH PALATAL HOOK]
-			  case '\u1E03': // ḃ  [LATIN SMALL LETTER B WITH DOT ABOVE]
-			  case '\u1E05': // ḅ  [LATIN SMALL LETTER B WITH DOT BELOW]
-			  case '\u1E07': // ḇ  [LATIN SMALL LETTER B WITH LINE BELOW]
-			  case '\u24D1': // ⓑ  [CIRCLED LATIN SMALL LETTER B]
-			  case '\uFF42': // b  [FULLWIDTH LATIN SMALL LETTER B]
-				output[outputPos++] = 'b';
-				break;
-			  case '\u249D': // ⒝  [PARENTHESIZED LATIN SMALL LETTER B]
-				output[outputPos++] = '(';
-				output[outputPos++] = 'b';
-				output[outputPos++] = ')';
-				break;
-			  case '\u00C7': // Ç  [LATIN CAPITAL LETTER C WITH CEDILLA]
-			  case '\u0106': // Ć  [LATIN CAPITAL LETTER C WITH ACUTE]
-			  case '\u0108': // Ĉ  [LATIN CAPITAL LETTER C WITH CIRCUMFLEX]
-			  case '\u010A': // Ċ  [LATIN CAPITAL LETTER C WITH DOT ABOVE]
-			  case '\u010C': // Č  [LATIN CAPITAL LETTER C WITH CARON]
-			  case '\u0187': // Ƈ  [LATIN CAPITAL LETTER C WITH HOOK]
-			  case '\u023B': // Ȼ  [LATIN CAPITAL LETTER C WITH STROKE]
-			  case '\u0297': // ʗ  [LATIN LETTER STRETCHED C]
-			  case '\u1D04': // ᴄ  [LATIN LETTER SMALL CAPITAL C]
-			  case '\u1E08': // Ḉ  [LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE]
-			  case '\u24B8': // Ⓒ  [CIRCLED LATIN CAPITAL LETTER C]
-			  case '\uFF23': // C  [FULLWIDTH LATIN CAPITAL LETTER C]
-				output[outputPos++] = 'C';
-				break;
-			  case '\u00E7': // ç  [LATIN SMALL LETTER C WITH CEDILLA]
-			  case '\u0107': // ć  [LATIN SMALL LETTER C WITH ACUTE]
-			  case '\u0109': // ĉ  [LATIN SMALL LETTER C WITH CIRCUMFLEX]
-			  case '\u010B': // ċ  [LATIN SMALL LETTER C WITH DOT ABOVE]
-			  case '\u010D': // č  [LATIN SMALL LETTER C WITH CARON]
-			  case '\u0188': // ƈ  [LATIN SMALL LETTER C WITH HOOK]
-			  case '\u023C': // ȼ  [LATIN SMALL LETTER C WITH STROKE]
-			  case '\u0255': // ɕ  [LATIN SMALL LETTER C WITH CURL]
-			  case '\u1E09': // ḉ  [LATIN SMALL LETTER C WITH CEDILLA AND ACUTE]
-			  case '\u2184': // ↄ  [LATIN SMALL LETTER REVERSED C]
-			  case '\u24D2': // ⓒ  [CIRCLED LATIN SMALL LETTER C]
-			  case '\uA73E': // Ꜿ  [LATIN CAPITAL LETTER REVERSED C WITH DOT]
-			  case '\uA73F': // ꜿ  [LATIN SMALL LETTER REVERSED C WITH DOT]
-			  case '\uFF43': // c  [FULLWIDTH LATIN SMALL LETTER C]
-				output[outputPos++] = 'c';
-				break;
-			  case '\u249E': // ⒞  [PARENTHESIZED LATIN SMALL LETTER C]
-				output[outputPos++] = '(';
-				output[outputPos++] = 'c';
-				output[outputPos++] = ')';
-				break;
-			  case '\u00D0': // Ð  [LATIN CAPITAL LETTER ETH]
-			  case '\u010E': // Ď  [LATIN CAPITAL LETTER D WITH CARON]
-			  case '\u0110': // Đ  [LATIN CAPITAL LETTER D WITH STROKE]
-			  case '\u0189': // Ɖ  [LATIN CAPITAL LETTER AFRICAN D]
-			  case '\u018A': // Ɗ  [LATIN CAPITAL LETTER D WITH HOOK]
-			  case '\u018B': // Ƌ  [LATIN CAPITAL LETTER D WITH TOPBAR]
-			  case '\u1D05': // ᴅ  [LATIN LETTER SMALL CAPITAL D]
-			  case '\u1D06': // ᴆ  [LATIN LETTER SMALL CAPITAL ETH]
-			  case '\u1E0A': // Ḋ  [LATIN CAPITAL LETTER D WITH DOT ABOVE]
-			  case '\u1E0C': // Ḍ  [LATIN CAPITAL LETTER D WITH DOT BELOW]
-			  case '\u1E0E': // Ḏ  [LATIN CAPITAL LETTER D WITH LINE BELOW]
-			  case '\u1E10': // Ḑ  [LATIN CAPITAL LETTER D WITH CEDILLA]
-			  case '\u1E12': // Ḓ  [LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW]
-			  case '\u24B9': // Ⓓ  [CIRCLED LATIN CAPITAL LETTER D]
-			  case '\uA779': // Ꝺ  [LATIN CAPITAL LETTER INSULAR D]
-			  case '\uFF24': // D  [FULLWIDTH LATIN CAPITAL LETTER D]
-				output[outputPos++] = 'D';
-				break;
-			  case '\u00F0': // ð  [LATIN SMALL LETTER ETH]
-			  case '\u010F': // ď  [LATIN SMALL LETTER D WITH CARON]
-			  case '\u0111': // đ  [LATIN SMALL LETTER D WITH STROKE]
-			  case '\u018C': // ƌ  [LATIN SMALL LETTER D WITH TOPBAR]
-			  case '\u0221': // ȡ  [LATIN SMALL LETTER D WITH CURL]
-			  case '\u0256': // ɖ  [LATIN SMALL LETTER D WITH TAIL]
-			  case '\u0257': // ɗ  [LATIN SMALL LETTER D WITH HOOK]
-			  case '\u1D6D': // ᵭ  [LATIN SMALL LETTER D WITH MIDDLE TILDE]
-			  case '\u1D81': // ᶁ  [LATIN SMALL LETTER D WITH PALATAL HOOK]
-			  case '\u1D91': // ᶑ  [LATIN SMALL LETTER D WITH HOOK AND TAIL]
-			  case '\u1E0B': // ḋ  [LATIN SMALL LETTER D WITH DOT ABOVE]
-			  case '\u1E0D': // ḍ  [LATIN SMALL LETTER D WITH DOT BELOW]
-			  case '\u1E0F': // ḏ  [LATIN SMALL LETTER D WITH LINE BELOW]
-			  case '\u1E11': // ḑ  [LATIN SMALL LETTER D WITH CEDILLA]
-			  case '\u1E13': // ḓ  [LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW]
-			  case '\u24D3': // ⓓ  [CIRCLED LATIN SMALL LETTER D]
-			  case '\uA77A': // ꝺ  [LATIN SMALL LETTER INSULAR D]
-			  case '\uFF44': // d  [FULLWIDTH LATIN SMALL LETTER D]
-				output[outputPos++] = 'd';
-				break;
-			  case '\u01C4': // DŽ  [LATIN CAPITAL LETTER DZ WITH CARON]
-			  case '\u01F1': // DZ  [LATIN CAPITAL LETTER DZ]
-				output[outputPos++] = 'D';
-				output[outputPos++] = 'Z';
-				break;
-			  case '\u01C5': // Dž  [LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON]
-			  case '\u01F2': // Dz  [LATIN CAPITAL LETTER D WITH SMALL LETTER Z]
-				output[outputPos++] = 'D';
-				output[outputPos++] = 'z';
-				break;
-			  case '\u249F': // ⒟  [PARENTHESIZED LATIN SMALL LETTER D]
-				output[outputPos++] = '(';
-				output[outputPos++] = 'd';
-				output[outputPos++] = ')';
-				break;
-			  case '\u0238': // ȸ  [LATIN SMALL LETTER DB DIGRAPH]
-				output[outputPos++] = 'd';
-				output[outputPos++] = 'b';
-				break;
-			  case '\u01C6': // dž  [LATIN SMALL LETTER DZ WITH CARON]
-			  case '\u01F3': // dz  [LATIN SMALL LETTER DZ]
-			  case '\u02A3': // ʣ  [LATIN SMALL LETTER DZ DIGRAPH]
-			  case '\u02A5': // ʥ  [LATIN SMALL LETTER DZ DIGRAPH WITH CURL]
-				output[outputPos++] = 'd';
-				output[outputPos++] = 'z';
-				break;
-			  case '\u00C8': // È  [LATIN CAPITAL LETTER E WITH GRAVE]
-			  case '\u00C9': // É  [LATIN CAPITAL LETTER E WITH ACUTE]
-			  case '\u00CA': // Ê  [LATIN CAPITAL LETTER E WITH CIRCUMFLEX]
-			  case '\u00CB': // Ë  [LATIN CAPITAL LETTER E WITH DIAERESIS]
-			  case '\u0112': // Ē  [LATIN CAPITAL LETTER E WITH MACRON]
-			  case '\u0114': // Ĕ  [LATIN CAPITAL LETTER E WITH BREVE]
-			  case '\u0116': // Ė  [LATIN CAPITAL LETTER E WITH DOT ABOVE]
-			  case '\u0118': // Ę  [LATIN CAPITAL LETTER E WITH OGONEK]
-			  case '\u011A': // Ě  [LATIN CAPITAL LETTER E WITH CARON]
-			  case '\u018E': // Ǝ  [LATIN CAPITAL LETTER REVERSED E]
-			  case '\u0190': // Ɛ  [LATIN CAPITAL LETTER OPEN E]
-			  case '\u0204': // Ȅ  [LATIN CAPITAL LETTER E WITH DOUBLE GRAVE]
-			  case '\u0206': // Ȇ  [LATIN CAPITAL LETTER E WITH INVERTED BREVE]
-			  case '\u0228': // Ȩ  [LATIN CAPITAL LETTER E WITH CEDILLA]
-			  case '\u0246': // Ɇ  [LATIN CAPITAL LETTER E WITH STROKE]
-			  case '\u1D07': // ᴇ  [LATIN LETTER SMALL CAPITAL E]
-			  case '\u1E14': // Ḕ  [LATIN CAPITAL LETTER E WITH MACRON AND GRAVE]
-			  case '\u1E16': // Ḗ  [LATIN CAPITAL LETTER E WITH MACRON AND ACUTE]
-			  case '\u1E18': // Ḙ  [LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW]
-			  case '\u1E1A': // Ḛ  [LATIN CAPITAL LETTER E WITH TILDE BELOW]
-			  case '\u1E1C': // Ḝ  [LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE]
-			  case '\u1EB8': // Ẹ  [LATIN CAPITAL LETTER E WITH DOT BELOW]
-			  case '\u1EBA': // Ẻ  [LATIN CAPITAL LETTER E WITH HOOK ABOVE]
-			  case '\u1EBC': // Ẽ  [LATIN CAPITAL LETTER E WITH TILDE]
-			  case '\u1EBE': // Ế  [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE]
-			  case '\u1EC0': // Ề  [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE]
-			  case '\u1EC2': // Ể  [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE]
-			  case '\u1EC4': // Ễ  [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE]
-			  case '\u1EC6': // Ệ  [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW]
-			  case '\u24BA': // Ⓔ  [CIRCLED LATIN CAPITAL LETTER E]
-			  case '\u2C7B': // ⱻ  [LATIN LETTER SMALL CAPITAL TURNED E]
-			  case '\uFF25': // E  [FULLWIDTH LATIN CAPITAL LETTER E]
-				output[outputPos++] = 'E';
-				break;
-			  case '\u00E8': // è  [LATIN SMALL LETTER E WITH GRAVE]
-			  case '\u00E9': // é  [LATIN SMALL LETTER E WITH ACUTE]
-			  case '\u00EA': // ê  [LATIN SMALL LETTER E WITH CIRCUMFLEX]
-			  case '\u00EB': // ë  [LATIN SMALL LETTER E WITH DIAERESIS]
-			  case '\u0113': // ē  [LATIN SMALL LETTER E WITH MACRON]
-			  case '\u0115': // ĕ  [LATIN SMALL LETTER E WITH BREVE]
-			  case '\u0117': // ė  [LATIN SMALL LETTER E WITH DOT ABOVE]
-			  case '\u0119': // ę  [LATIN SMALL LETTER E WITH OGONEK]
-			  case '\u011B': // ě  [LATIN SMALL LETTER E WITH CARON]
-			  case '\u01DD': // ǝ  [LATIN SMALL LETTER TURNED E]
-			  case '\u0205': // ȅ  [LATIN SMALL LETTER E WITH DOUBLE GRAVE]
-			  case '\u0207': // ȇ  [LATIN SMALL LETTER E WITH INVERTED BREVE]
-			  case '\u0229': // ȩ  [LATIN SMALL LETTER E WITH CEDILLA]
-			  case '\u0247': // ɇ  [LATIN SMALL LETTER E WITH STROKE]
-			  case '\u0258': // ɘ  [LATIN SMALL LETTER REVERSED E]
-			  case '\u025B': // ɛ  [LATIN SMALL LETTER OPEN E]
-			  case '\u025C': // ɜ  [LATIN SMALL LETTER REVERSED OPEN E]
-			  case '\u025D': // ɝ  [LATIN SMALL LETTER REVERSED OPEN E WITH HOOK]
-			  case '\u025E': // ɞ  [LATIN SMALL LETTER CLOSED REVERSED OPEN E]
-			  case '\u029A': // ʚ  [LATIN SMALL LETTER CLOSED OPEN E]
-			  case '\u1D08': // ᴈ  [LATIN SMALL LETTER TURNED OPEN E]
-			  case '\u1D92': // ᶒ  [LATIN SMALL LETTER E WITH RETROFLEX HOOK]
-			  case '\u1D93': // ᶓ  [LATIN SMALL LETTER OPEN E WITH RETROFLEX HOOK]
-			  case '\u1D94': // ᶔ  [LATIN SMALL LETTER REVERSED OPEN E WITH RETROFLEX HOOK]
-			  case '\u1E15': // ḕ  [LATIN SMALL LETTER E WITH MACRON AND GRAVE]
-			  case '\u1E17': // ḗ  [LATIN SMALL LETTER E WITH MACRON AND ACUTE]
-			  case '\u1E19': // ḙ  [LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW]
-			  case '\u1E1B': // ḛ  [LATIN SMALL LETTER E WITH TILDE BELOW]
-			  case '\u1E1D': // ḝ  [LATIN SMALL LETTER E WITH CEDILLA AND BREVE]
-			  case '\u1EB9': // ẹ  [LATIN SMALL LETTER E WITH DOT BELOW]
-			  case '\u1EBB': // ẻ  [LATIN SMALL LETTER E WITH HOOK ABOVE]
-			  case '\u1EBD': // ẽ  [LATIN SMALL LETTER E WITH TILDE]
-			  case '\u1EBF': // ế  [LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE]
-			  case '\u1EC1': // ề  [LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE]
-			  case '\u1EC3': // ể  [LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE]
-			  case '\u1EC5': // ễ  [LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE]
-			  case '\u1EC7': // ệ  [LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW]
-			  case '\u2091': // ₑ  [LATIN SUBSCRIPT SMALL LETTER E]
-			  case '\u24D4': // ⓔ  [CIRCLED LATIN SMALL LETTER E]
-			  case '\u2C78': // ⱸ  [LATIN SMALL LETTER E WITH NOTCH]
-			  case '\uFF45': // e  [FULLWIDTH LATIN SMALL LETTER E]
-				output[outputPos++] = 'e';
-				break;
-			  case '\u24A0': // ⒠  [PARENTHESIZED LATIN SMALL LETTER E]
-				output[outputPos++] = '(';
-				output[outputPos++] = 'e';
-				output[outputPos++] = ')';
-				break;
-			  case '\u0191': // Ƒ  [LATIN CAPITAL LETTER F WITH HOOK]
-			  case '\u1E1E': // Ḟ  [LATIN CAPITAL LETTER F WITH DOT ABOVE]
-			  case '\u24BB': // Ⓕ  [CIRCLED LATIN CAPITAL LETTER F]
-			  case '\uA730': // ꜰ  [LATIN LETTER SMALL CAPITAL F]
-			  case '\uA77B': // Ꝼ  [LATIN CAPITAL LETTER INSULAR F]
-			  case '\uA7FB': // ꟻ  [LATIN EPIGRAPHIC LETTER REVERSED F]
-			  case '\uFF26': // F  [FULLWIDTH LATIN CAPITAL LETTER F]
-				output[outputPos++] = 'F';
-				break;
-			  case '\u0192': // ƒ  [LATIN SMALL LETTER F WITH HOOK]
-			  case '\u1D6E': // ᵮ  [LATIN SMALL LETTER F WITH MIDDLE TILDE]
-			  case '\u1D82': // ᶂ  [LATIN SMALL LETTER F WITH PALATAL HOOK]
-			  case '\u1E1F': // ḟ  [LATIN SMALL LETTER F WITH DOT ABOVE]
-			  case '\u1E9B': // ẛ  [LATIN SMALL LETTER LONG S WITH DOT ABOVE]
-			  case '\u24D5': // ⓕ  [CIRCLED LATIN SMALL LETTER F]
-			  case '\uA77C': // ꝼ  [LATIN SMALL LETTER INSULAR F]
-			  case '\uFF46': // f  [FULLWIDTH LATIN SMALL LETTER F]
-				output[outputPos++] = 'f';
-				break;
-			  case '\u24A1': // ⒡  [PARENTHESIZED LATIN SMALL LETTER F]
-				output[outputPos++] = '(';
-				output[outputPos++] = 'f';
-				output[outputPos++] = ')';
-				break;
-			  case '\uFB00': // ff  [LATIN SMALL LIGATURE FF]
-				output[outputPos++] = 'f';
-				output[outputPos++] = 'f';
-				break;
-			  case '\uFB03': // ffi  [LATIN SMALL LIGATURE FFI]
-				output[outputPos++] = 'f';
-				output[outputPos++] = 'f';
-				output[outputPos++] = 'i';
-				break;
-			  case '\uFB04': // ffl  [LATIN SMALL LIGATURE FFL]
-				output[outputPos++] = 'f';
-				output[outputPos++] = 'f';
-				output[outputPos++] = 'l';
-				break;
-			  case '\uFB01': // fi  [LATIN SMALL LIGATURE FI]
-				output[outputPos++] = 'f';
-				output[outputPos++] = 'i';
-				break;
-			  case '\uFB02': // fl  [LATIN SMALL LIGATURE FL]
-				output[outputPos++] = 'f';
-				output[outputPos++] = 'l';
-				break;
-			  case '\u011C': // Ĝ  [LATIN CAPITAL LETTER G WITH CIRCUMFLEX]
-			  case '\u011E': // Ğ  [LATIN CAPITAL LETTER G WITH BREVE]
-			  case '\u0120': // Ġ  [LATIN CAPITAL LETTER G WITH DOT ABOVE]
-			  case '\u0122': // Ģ  [LATIN CAPITAL LETTER G WITH CEDILLA]
-			  case '\u0193': // Ɠ  [LATIN CAPITAL LETTER G WITH HOOK]
-			  case '\u01E4': // Ǥ  [LATIN CAPITAL LETTER G WITH STROKE]
-			  case '\u01E5': // ǥ  [LATIN SMALL LETTER G WITH STROKE]
-			  case '\u01E6': // Ǧ  [LATIN CAPITAL LETTER G WITH CARON]
-			  case '\u01E7': // ǧ  [LATIN SMALL LETTER G WITH CARON]
-			  case '\u01F4': // Ǵ  [LATIN CAPITAL LETTER G WITH ACUTE]
-			  case '\u0262': // ɢ  [LATIN LETTER SMALL CAPITAL G]
-			  case '\u029B': // ʛ  [LATIN LETTER SMALL CAPITAL G WITH HOOK]
-			  case '\u1E20': // Ḡ  [LATIN CAPITAL LETTER G WITH MACRON]
-			  case '\u24BC': // Ⓖ  [CIRCLED LATIN CAPITAL LETTER G]
-			  case '\uA77D': // Ᵹ  [LATIN CAPITAL LETTER INSULAR G]
-			  case '\uA77E': // Ꝿ  [LATIN CAPITAL LETTER TURNED INSULAR G]
-			  case '\uFF27': // G  [FULLWIDTH LATIN CAPITAL LETTER G]
-				output[outputPos++] = 'G';
-				break;
-			  case '\u011D': // ĝ  [LATIN SMALL LETTER G WITH CIRCUMFLEX]
-			  case '\u011F': // ğ  [LATIN SMALL LETTER G WITH BREVE]
-			  case '\u0121': // ġ  [LATIN SMALL LETTER G WITH DOT ABOVE]
-			  case '\u0123': // ģ  [LATIN SMALL LETTER G WITH CEDILLA]
-			  case '\u01F5': // ǵ  [LATIN SMALL LETTER G WITH ACUTE]
-			  case '\u0260': // ɠ  [LATIN SMALL LETTER G WITH HOOK]
-			  case '\u0261': // ɡ  [LATIN SMALL LETTER SCRIPT G]
-			  case '\u1D77': // ᵷ  [LATIN SMALL LETTER TURNED G]
-			  case '\u1D79': // ᵹ  [LATIN SMALL LETTER INSULAR G]
-			  case '\u1D83': // ᶃ  [LATIN SMALL LETTER G WITH PALATAL HOOK]
-			  case '\u1E21': // ḡ  [LATIN SMALL LETTER G WITH MACRON]
-			  case '\u24D6': // ⓖ  [CIRCLED LATIN SMALL LETTER G]
-			  case '\uA77F': // ꝿ  [LATIN SMALL LETTER TURNED INSULAR G]
-			  case '\uFF47': // g  [FULLWIDTH LATIN SMALL LETTER G]
-				output[outputPos++] = 'g';
-				break;
-			  case '\u24A2': // ⒢  [PARENTHESIZED LATIN SMALL LETTER G]
-				output[outputPos++] = '(';
-				output[outputPos++] = 'g';
-				output[outputPos++] = ')';
-				break;
-			  case '\u0124': // Ĥ  [LATIN CAPITAL LETTER H WITH CIRCUMFLEX]
-			  case '\u0126': // Ħ  [LATIN CAPITAL LETTER H WITH STROKE]
-			  case '\u021E': // Ȟ  [LATIN CAPITAL LETTER H WITH CARON]
-			  case '\u029C': // ʜ  [LATIN LETTER SMALL CAPITAL H]
-			  case '\u1E22': // Ḣ  [LATIN CAPITAL LETTER H WITH DOT ABOVE]
-			  case '\u1E24': // Ḥ  [LATIN CAPITAL LETTER H WITH DOT BELOW]
-			  case '\u1E26': // Ḧ  [LATIN CAPITAL LETTER H WITH DIAERESIS]
-			  case '\u1E28': // Ḩ  [LATIN CAPITAL LETTER H WITH CEDILLA]
-			  case '\u1E2A': // Ḫ  [LATIN CAPITAL LETTER H WITH BREVE BELOW]
-			  case '\u24BD': // Ⓗ  [CIRCLED LATIN CAPITAL LETTER H]
-			  case '\u2C67': // Ⱨ  [LATIN CAPITAL LETTER H WITH DESCENDER]
-			  case '\u2C75': // Ⱶ  [LATIN CAPITAL LETTER HALF H]
-			  case '\uFF28': // H  [FULLWIDTH LATIN CAPITAL LETTER H]
-				output[outputPos++] = 'H';
-				break;
-			  case '\u0125': // ĥ  [LATIN SMALL LETTER H WITH CIRCUMFLEX]
-			  case '\u0127': // ħ  [LATIN SMALL LETTER H WITH STROKE]
-			  case '\u021F': // ȟ  [LATIN SMALL LETTER H WITH CARON]
-			  case '\u0265': // ɥ  [LATIN SMALL LETTER TURNED H]
-			  case '\u0266': // ɦ  [LATIN SMALL LETTER H WITH HOOK]
-			  case '\u02AE': // ʮ  [LATIN SMALL LETTER TURNED H WITH FISHHOOK]
-			  case '\u02AF': // ʯ  [LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL]
-			  case '\u1E23': // ḣ  [LATIN SMALL LETTER H WITH DOT ABOVE]
-			  case '\u1E25': // ḥ  [LATIN SMALL LETTER H WITH DOT BELOW]
-			  case '\u1E27': // ḧ  [LATIN SMALL LETTER H WITH DIAERESIS]
-			  case '\u1E29': // ḩ  [LATIN SMALL LETTER H WITH CEDILLA]
-			  case '\u1E2B': // ḫ  [LATIN SMALL LETTER H WITH BREVE BELOW]
-			  case '\u1E96': // ẖ  [LATIN SMALL LETTER H WITH LINE BELOW]
-			  case '\u24D7': // ⓗ  [CIRCLED LATIN SMALL LETTER H]
-			  case '\u2C68': // ⱨ  [LATIN SMALL LETTER H WITH DESCENDER]
-			  case '\u2C76': // ⱶ  [LATIN SMALL LETTER HALF H]
-			  case '\uFF48': // h  [FULLWIDTH LATIN SMALL LETTER H]
-				output[outputPos++] = 'h';
-				break;
-			  case '\u01F6': // Ƕ  http://en.wikipedia.org/wiki/Hwair  [LATIN CAPITAL LETTER HWAIR]
-				output[outputPos++] = 'H';
-				output[outputPos++] = 'V';
-				break;
-			  case '\u24A3': // ⒣  [PARENTHESIZED LATIN SMALL LETTER H]
-				output[outputPos++] = '(';
-				output[outputPos++] = 'h';
-				output[outputPos++] = ')';
-				break;
-			  case '\u0195': // ƕ  [LATIN SMALL LETTER HV]
-				output[outputPos++] = 'h';
-				output[outputPos++] = 'v';
-				break;
-			  case '\u00CC': // Ì  [LATIN CAPITAL LETTER I WITH GRAVE]
-			  case '\u00CD': // Í  [LATIN CAPITAL LETTER I WITH ACUTE]
-			  case '\u00CE': // Î  [LATIN CAPITAL LETTER I WITH CIRCUMFLEX]
-			  case '\u00CF': // Ï  [LATIN CAPITAL LETTER I WITH DIAERESIS]
-			  case '\u0128': // Ĩ  [LATIN CAPITAL LETTER I WITH TILDE]
-			  case '\u012A': // Ī  [LATIN CAPITAL LETTER I WITH MACRON]
-			  case '\u012C': // Ĭ  [LATIN CAPITAL LETTER I WITH BREVE]
-			  case '\u012E': // Į  [LATIN CAPITAL LETTER I WITH OGONEK]
-			  case '\u0130': // İ  [LATIN CAPITAL LETTER I WITH DOT ABOVE]
-			  case '\u0196': // Ɩ  [LATIN CAPITAL LETTER IOTA]
-			  case '\u0197': // Ɨ  [LATIN CAPITAL LETTER I WITH STROKE]
-			  case '\u01CF': // Ǐ  [LATIN CAPITAL LETTER I WITH CARON]
-			  case '\u0208': // Ȉ  [LATIN CAPITAL LETTER I WITH DOUBLE GRAVE]
-			  case '\u020A': // Ȋ  [LATIN CAPITAL LETTER I WITH INVERTED BREVE]
-			  case '\u026A': // ɪ  [LATIN LETTER SMALL CAPITAL I]
-			  case '\u1D7B': // ᵻ  [LATIN SMALL CAPITAL LETTER I WITH STROKE]
-			  case '\u1E2C': // Ḭ  [LATIN CAPITAL LETTER I WITH TILDE BELOW]
-			  case '\u1E2E': // Ḯ  [LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE]
-			  case '\u1EC8': // Ỉ  [LATIN CAPITAL LETTER I WITH HOOK ABOVE]
-			  case '\u1ECA': // Ị  [LATIN CAPITAL LETTER I WITH DOT BELOW]
-			  case '\u24BE': // Ⓘ  [CIRCLED LATIN CAPITAL LETTER I]
-			  case '\uA7FE': // ꟾ  [LATIN EPIGRAPHIC LETTER I LONGA]
-			  case '\uFF29': // I  [FULLWIDTH LATIN CAPITAL LETTER I]
-				output[outputPos++] = 'I';
-				break;
-			  case '\u00EC': // ì  [LATIN SMALL LETTER I WITH GRAVE]
-			  case '\u00ED': // í  [LATIN SMALL LETTER I WITH ACUTE]
-			  case '\u00EE': // î  [LATIN SMALL LETTER I WITH CIRCUMFLEX]
-			  case '\u00EF': // ï  [LATIN SMALL LETTER I WITH DIAERESIS]
-			  case '\u0129': // ĩ  [LATIN SMALL LETTER I WITH TILDE]
-			  case '\u012B': // ī  [LATIN SMALL LETTER I WITH MACRON]
-			  case '\u012D': // ĭ  [LATIN SMALL LETTER I WITH BREVE]
-			  case '\u012F': // į  [LATIN SMALL LETTER I WITH OGONEK]
-			  case '\u0131': // ı  [LATIN SMALL LETTER DOTLESS I]
-			  case '\u01D0': // ǐ  [LATIN SMALL LETTER I WITH CARON]
-			  case '\u0209': // ȉ  [LATIN SMALL LETTER I WITH DOUBLE GRAVE]
-			  case '\u020B': // ȋ  [LATIN SMALL LETTER I WITH INVERTED BREVE]
-			  case '\u0268': // ɨ  [LATIN SMALL LETTER I WITH STROKE]
-			  case '\u1D09': // ᴉ  [LATIN SMALL LETTER TURNED I]
-			  case '\u1D62': // ᵢ  [LATIN SUBSCRIPT SMALL LETTER I]
-			  case '\u1D7C': // ᵼ  [LATIN SMALL LETTER IOTA WITH STROKE]
-			  case '\u1D96': // ᶖ  [LATIN SMALL LETTER I WITH RETROFLEX HOOK]
-			  case '\u1E2D': // ḭ  [LATIN SMALL LETTER I WITH TILDE BELOW]
-			  case '\u1E2F': // ḯ  [LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE]
-			  case '\u1EC9': // ỉ  [LATIN SMALL LETTER I WITH HOOK ABOVE]
-			  case '\u1ECB': // ị  [LATIN SMALL LETTER I WITH DOT BELOW]
-			  case '\u2071': // ⁱ  [SUPERSCRIPT LATIN SMALL LETTER I]
-			  case '\u24D8': // ⓘ  [CIRCLED LATIN SMALL LETTER I]
-			  case '\uFF49': // i  [FULLWIDTH LATIN SMALL LETTER I]
-				output[outputPos++] = 'i';
-				break;
-			  case '\u0132': // IJ  [LATIN CAPITAL LIGATURE IJ]
-				output[outputPos++] = 'I';
-				output[outputPos++] = 'J';
-				break;
-			  case '\u24A4': // ⒤  [PARENTHESIZED LATIN SMALL LETTER I]
-				output[outputPos++] = '(';
-				output[outputPos++] = 'i';
-				output[outputPos++] = ')';
-				break;
-			  case '\u0133': // ij  [LATIN SMALL LIGATURE IJ]
-				output[outputPos++] = 'i';
-				output[outputPos++] = 'j';
-				break;
-			  case '\u0134': // Ĵ  [LATIN CAPITAL LETTER J WITH CIRCUMFLEX]
-			  case '\u0248': // Ɉ  [LATIN CAPITAL LETTER J WITH STROKE]
-			  case '\u1D0A': // ᴊ  [LATIN LETTER SMALL CAPITAL J]
-			  case '\u24BF': // Ⓙ  [CIRCLED LATIN CAPITAL LETTER J]
-			  case '\uFF2A': // J  [FULLWIDTH LATIN CAPITAL LETTER J]
-				output[outputPos++] = 'J';
-				break;
-			  case '\u0135': // ĵ  [LATIN SMALL LETTER J WITH CIRCUMFLEX]
-			  case '\u01F0': // ǰ  [LATIN SMALL LETTER J WITH CARON]
-			  case '\u0237': // ȷ  [LATIN SMALL LETTER DOTLESS J]
-			  case '\u0249': // ɉ  [LATIN SMALL LETTER J WITH STROKE]
-			  case '\u025F': // ɟ  [LATIN SMALL LETTER DOTLESS J WITH STROKE]
-			  case '\u0284': // ʄ  [LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK]
-			  case '\u029D': // ʝ  [LATIN SMALL LETTER J WITH CROSSED-TAIL]
-			  case '\u24D9': // ⓙ  [CIRCLED LATIN SMALL LETTER J]
-			  case '\u2C7C': // ⱼ  [LATIN SUBSCRIPT SMALL LETTER J]
-			  case '\uFF4A': // j  [FULLWIDTH LATIN SMALL LETTER J]
-				output[outputPos++] = 'j';
-				break;
-			  case '\u24A5': // ⒥  [PARENTHESIZED LATIN SMALL LETTER J]
-				output[outputPos++] = '(';
-				output[outputPos++] = 'j';
-				output[outputPos++] = ')';
-				break;
-			  case '\u0136': // Ķ  [LATIN CAPITAL LETTER K WITH CEDILLA]
-			  case '\u0198': // Ƙ  [LATIN CAPITAL LETTER K WITH HOOK]
-			  case '\u01E8': // Ǩ  [LATIN CAPITAL LETTER K WITH CARON]
-			  case '\u1D0B': // ᴋ  [LATIN LETTER SMALL CAPITAL K]
-			  case '\u1E30': // Ḱ  [LATIN CAPITAL LETTER K WITH ACUTE]
-			  case '\u1E32': // Ḳ  [LATIN CAPITAL LETTER K WITH DOT BELOW]
-			  case '\u1E34': // Ḵ  [LATIN CAPITAL LETTER K WITH LINE BELOW]
-			  case '\u24C0': // Ⓚ  [CIRCLED LATIN CAPITAL LETTER K]
-			  case '\u2C69': // Ⱪ  [LATIN CAPITAL LETTER K WITH DESCENDER]
-			  case '\uA740': // Ꝁ  [LATIN CAPITAL LETTER K WITH STROKE]
-			  case '\uA742': // Ꝃ  [LATIN CAPITAL LETTER K WITH DIAGONAL STROKE]
-			  case '\uA744': // Ꝅ  [LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE]
-			  case '\uFF2B': // K  [FULLWIDTH LATIN CAPITAL LETTER K]
-				output[outputPos++] = 'K';
-				break;
-			  case '\u0137': // ķ  [LATIN SMALL LETTER K WITH CEDILLA]
-			  case '\u0199': // ƙ  [LATIN SMALL LETTER K WITH HOOK]
-			  case '\u01E9': // ǩ  [LATIN SMALL LETTER K WITH CARON]
-			  case '\u029E': // ʞ  [LATIN SMALL LETTER TURNED K]
-			  case '\u1D84': // ᶄ  [LATIN SMALL LETTER K WITH PALATAL HOOK]
-			  case '\u1E31': // ḱ  [LATIN SMALL LETTER K WITH ACUTE]
-			  case '\u1E33': // ḳ  [LATIN SMALL LETTER K WITH DOT BELOW]
-			  case '\u1E35': // ḵ  [LATIN SMALL LETTER K WITH LINE BELOW]
-			  case '\u24DA': // ⓚ  [CIRCLED LATIN SMALL LETTER K]
-			  case '\u2C6A': // ⱪ  [LATIN SMALL LETTER K WITH DESCENDER]
-			  case '\uA741': // ꝁ  [LATIN SMALL LETTER K WITH STROKE]
-			  case '\uA743': // ꝃ  [LATIN SMALL LETTER K WITH DIAGONAL STROKE]
-			  case '\uA745': // ꝅ  [LATIN SMALL LETTER K WITH STROKE AND DIAGONAL STROKE]
-			  case '\uFF4B': // k  [FULLWIDTH LATIN SMALL LETTER K]
-				output[outputPos++] = 'k';
-				break;
-			  case '\u24A6': // ⒦  [PARENTHESIZED LATIN SMALL LETTER K]
-				output[outputPos++] = '(';
-				output[outputPos++] = 'k';
-				output[outputPos++] = ')';
-				break;
-			  case '\u0139': // Ĺ  [LATIN CAPITAL LETTER L WITH ACUTE]
-			  case '\u013B': // Ļ  [LATIN CAPITAL LETTER L WITH CEDILLA]
-			  case '\u013D': // Ľ  [LATIN CAPITAL LETTER L WITH CARON]
-			  case '\u013F': // Ŀ  [LATIN CAPITAL LETTER L WITH MIDDLE DOT]
-			  case '\u0141': // Ł  [LATIN CAPITAL LETTER L WITH STROKE]
-			  case '\u023D': // Ƚ  [LATIN CAPITAL LETTER L WITH BAR]
-			  case '\u029F': // ʟ  [LATIN LETTER SMALL CAPITAL L]
-			  case '\u1D0C': // ᴌ  [LATIN LETTER SMALL CAPITAL L WITH STROKE]
-			  case '\u1E36': // Ḷ  [LATIN CAPITAL LETTER L WITH DOT BELOW]
-			  case '\u1E38': // Ḹ  [LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON]
-			  case '\u1E3A': // Ḻ  [LATIN CAPITAL LETTER L WITH LINE BELOW]
-			  case '\u1E3C': // Ḽ  [LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW]
-			  case '\u24C1': // Ⓛ  [CIRCLED LATIN CAPITAL LETTER L]
-			  case '\u2C60': // Ⱡ  [LATIN CAPITAL LETTER L WITH DOUBLE BAR]
-			  case '\u2C62': // Ɫ  [LATIN CAPITAL LETTER L WITH MIDDLE TILDE]
-			  case '\uA746': // Ꝇ  [LATIN CAPITAL LETTER BROKEN L]
-			  case '\uA748': // Ꝉ  [LATIN CAPITAL LETTER L WITH HIGH STROKE]
-			  case '\uA780': // Ꞁ  [LATIN CAPITAL LETTER TURNED L]
-			  case '\uFF2C': // L  [FULLWIDTH LATIN CAPITAL LETTER L]
-				output[outputPos++] = 'L';
-				break;
-			  case '\u013A': // ĺ  [LATIN SMALL LETTER L WITH ACUTE]
-			  case '\u013C': // ļ  [LATIN SMALL LETTER L WITH CEDILLA]
-			  case '\u013E': // ľ  [LATIN SMALL LETTER L WITH CARON]
-			  case '\u0140': // ŀ  [LATIN SMALL LETTER L WITH MIDDLE DOT]
-			  case '\u0142': // ł  [LATIN SMALL LETTER L WITH STROKE]
-			  case '\u019A': // ƚ  [LATIN SMALL LETTER L WITH BAR]
-			  case '\u0234': // ȴ  [LATIN SMALL LETTER L WITH CURL]
-			  case '\u026B': // ɫ  [LATIN SMALL LETTER L WITH MIDDLE TILDE]
-			  case '\u026C': // ɬ  [LATIN SMALL LETTER L WITH BELT]
-			  case '\u026D': // ɭ  [LATIN SMALL LETTER L WITH RETROFLEX HOOK]
-			  case '\u1D85': // ᶅ  [LATIN SMALL LETTER L WITH PALATAL HOOK]
-			  case '\u1E37': // ḷ  [LATIN SMALL LETTER L WITH DOT BELOW]
-			  case '\u1E39': // ḹ  [LATIN SMALL LETTER L WITH DOT BELOW AND MACRON]
-			  case '\u1E3B': // ḻ  [LATIN SMALL LETTER L WITH LINE BELOW]
-			  case '\u1E3D': // ḽ  [LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW]
-			  case '\u24DB': // ⓛ  [CIRCLED LATIN SMALL LETTER L]
-			  case '\u2C61': // ⱡ  [LATIN SMALL LETTER L WITH DOUBLE BAR]
-			  case '\uA747': // ꝇ  [LATIN SMALL LETTER BROKEN L]
-			  case '\uA749': // ꝉ  [LATIN SMALL LETTER L WITH HIGH STROKE]
-			  case '\uA781': // ꞁ  [LATIN SMALL LETTER TURNED L]
-			  case '\uFF4C': // l  [FULLWIDTH LATIN SMALL LETTER L]
-				output[outputPos++] = 'l';
-				break;
-			  case '\u01C7': // LJ  [LATIN CAPITAL LETTER LJ]
-				output[outputPos++] = 'L';
-				output[outputPos++] = 'J';
-				break;
-			  case '\u1EFA': // Ỻ  [LATIN CAPITAL LETTER MIDDLE-WELSH LL]
-				output[outputPos++] = 'L';
-				output[outputPos++] = 'L';
-				break;
-			  case '\u01C8': // Lj  [LATIN CAPITAL LETTER L WITH SMALL LETTER J]
-				output[outputPos++] = 'L';
-				output[outputPos++] = 'j';
-				break;
-			  case '\u24A7': // ⒧  [PARENTHESIZED LATIN SMALL LETTER L]
-				output[outputPos++] = '(';
-				output[outputPos++] = 'l';
-				output[outputPos++] = ')';
-				break;
-			  case '\u01C9': // lj  [LATIN SMALL LETTER LJ]
-				output[outputPos++] = 'l';
-				output[outputPos++] = 'j';
-				break;
-			  case '\u1EFB': // ỻ  [LATIN SMALL LETTER MIDDLE-WELSH LL]
-				output[outputPos++] = 'l';
-				output[outputPos++] = 'l';
-				break;
-			  case '\u02AA': // ʪ  [LATIN SMALL LETTER LS DIGRAPH]
-				output[outputPos++] = 'l';
-				output[outputPos++] = 's';
-				break;
-			  case '\u02AB': // ʫ  [LATIN SMALL LETTER LZ DIGRAPH]
-				output[outputPos++] = 'l';
-				output[outputPos++] = 'z';
-				break;
-			  case '\u019C': // Ɯ  [LATIN CAPITAL LETTER TURNED M]
-			  case '\u1D0D': // ᴍ  [LATIN LETTER SMALL CAPITAL M]
-			  case '\u1E3E': // Ḿ  [LATIN CAPITAL LETTER M WITH ACUTE]
-			  case '\u1E40': // Ṁ  [LATIN CAPITAL LETTER M WITH DOT ABOVE]
-			  case '\u1E42': // Ṃ  [LATIN CAPITAL LETTER M WITH DOT BELOW]
-			  case '\u24C2': // Ⓜ  [CIRCLED LATIN CAPITAL LETTER M]
-			  case '\u2C6E': // Ɱ  [LATIN CAPITAL LETTER M WITH HOOK]
-			  case '\uA7FD': // ꟽ  [LATIN EPIGRAPHIC LETTER INVERTED M]
-			  case '\uA7FF': // ꟿ  [LATIN EPIGRAPHIC LETTER ARCHAIC M]
-			  case '\uFF2D': // M  [FULLWIDTH LATIN CAPITAL LETTER M]
-				output[outputPos++] = 'M';
-				break;
-			  case '\u026F': // ɯ  [LATIN SMALL LETTER TURNED M]
-			  case '\u0270': // ɰ  [LATIN SMALL LETTER TURNED M WITH LONG LEG]
-			  case '\u0271': // ɱ  [LATIN SMALL LETTER M WITH HOOK]
-			  case '\u1D6F': // ᵯ  [LATIN SMALL LETTER M WITH MIDDLE TILDE]
-			  case '\u1D86': // ᶆ  [LATIN SMALL LETTER M WITH PALATAL HOOK]
-			  case '\u1E3F': // ḿ  [LATIN SMALL LETTER M WITH ACUTE]
-			  case '\u1E41': // ṁ  [LATIN SMALL LETTER M WITH DOT ABOVE]
-			  case '\u1E43': // ṃ  [LATIN SMALL LETTER M WITH DOT BELOW]
-			  case '\u24DC': // ⓜ  [CIRCLED LATIN SMALL LETTER M]
-			  case '\uFF4D': // m  [FULLWIDTH LATIN SMALL LETTER M]
-				output[outputPos++] = 'm';
-				break;
-			  case '\u24A8': // ⒨  [PARENTHESIZED LATIN SMALL LETTER M]
-				output[outputPos++] = '(';
-				output[outputPos++] = 'm';
-				output[outputPos++] = ')';
-				break;
-			  case '\u00D1': // Ñ  [LATIN CAPITAL LETTER N WITH TILDE]
-			  case '\u0143': // Ń  [LATIN CAPITAL LETTER N WITH ACUTE]
-			  case '\u0145': // Ņ  [LATIN CAPITAL LETTER N WITH CEDILLA]
-			  case '\u0147': // Ň  [LATIN CAPITAL LETTER N WITH CARON]
-			  case '\u014A': // Ŋ  http://en.wikipedia.org/wiki/Eng_(letter)  [LATIN CAPITAL LETTER ENG]
-			  case '\u019D': // Ɲ  [LATIN CAPITAL LETTER N WITH LEFT HOOK]
-			  case '\u01F8': // Ǹ  [LATIN CAPITAL LETTER N WITH GRAVE]
-			  case '\u0220': // Ƞ  [LATIN CAPITAL LETTER N WITH LONG RIGHT LEG]
-			  case '\u0274': // ɴ  [LATIN LETTER SMALL CAPITAL N]
-			  case '\u1D0E': // ᴎ  [LATIN LETTER SMALL CAPITAL REVERSED N]
-			  case '\u1E44': // Ṅ  [LATIN CAPITAL LETTER N WITH DOT ABOVE]
-			  case '\u1E46': // Ṇ  [LATIN CAPITAL LETTER N WITH DOT BELOW]
-			  case '\u1E48': // Ṉ  [LATIN CAPITAL LETTER N WITH LINE BELOW]
-			  case '\u1E4A': // Ṋ  [LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW]
-			  case '\u24C3': // Ⓝ  [CIRCLED LATIN CAPITAL LETTER N]
-			  case '\uFF2E': // N  [FULLWIDTH LATIN CAPITAL LETTER N]
-				output[outputPos++] = 'N';
-				break;
-			  case '\u00F1': // ñ  [LATIN SMALL LETTER N WITH TILDE]
-			  case '\u0144': // ń  [LATIN SMALL LETTER N WITH ACUTE]
-			  case '\u0146': // ņ  [LATIN SMALL LETTER N WITH CEDILLA]
-			  case '\u0148': // ň  [LATIN SMALL LETTER N WITH CARON]
-			  case '\u0149': // ʼn  [LATIN SMALL LETTER N PRECEDED BY APOSTROPHE]
-			  case '\u014B': // ŋ  http://en.wikipedia.org/wiki/Eng_(letter)  [LATIN SMALL LETTER ENG]
-			  case '\u019E': // ƞ  [LATIN SMALL LETTER N WITH LONG RIGHT LEG]
-			  case '\u01F9': // ǹ  [LATIN SMALL LETTER N WITH GRAVE]
-			  case '\u0235': // ȵ  [LATIN SMALL LETTER N WITH CURL]
-			  case '\u0272': // ɲ  [LATIN SMALL LETTER N WITH LEFT HOOK]
-			  case '\u0273': // ɳ  [LATIN SMALL LETTER N WITH RETROFLEX HOOK]
-			  case '\u1D70': // ᵰ  [LATIN SMALL LETTER N WITH MIDDLE TILDE]
-			  case '\u1D87': // ᶇ  [LATIN SMALL LETTER N WITH PALATAL HOOK]
-			  case '\u1E45': // ṅ  [LATIN SMALL LETTER N WITH DOT ABOVE]
-			  case '\u1E47': // ṇ  [LATIN SMALL LETTER N WITH DOT BELOW]
-			  case '\u1E49': // ṉ  [LATIN SMALL LETTER N WITH LINE BELOW]
-			  case '\u1E4B': // ṋ  [LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW]
-			  case '\u207F': // ⁿ  [SUPERSCRIPT LATIN SMALL LETTER N]
-			  case '\u24DD': // ⓝ  [CIRCLED LATIN SMALL LETTER N]
-			  case '\uFF4E': // n  [FULLWIDTH LATIN SMALL LETTER N]
-				output[outputPos++] = 'n';
-				break;
-			  case '\u01CA': // NJ  [LATIN CAPITAL LETTER NJ]
-				output[outputPos++] = 'N';
-				output[outputPos++] = 'J';
-				break;
-			  case '\u01CB': // Nj  [LATIN CAPITAL LETTER N WITH SMALL LETTER J]
-				output[outputPos++] = 'N';
-				output[outputPos++] = 'j';
-				break;
-			  case '\u24A9': // ⒩  [PARENTHESIZED LATIN SMALL LETTER N]
-				output[outputPos++] = '(';
-				output[outputPos++] = 'n';
-				output[outputPos++] = ')';
-				break;
-			  case '\u01CC': // nj  [LATIN SMALL LETTER NJ]
-				output[outputPos++] = 'n';
-				output[outputPos++] = 'j';
-				break;
-			  case '\u00D2': // Ò  [LATIN CAPITAL LETTER O WITH GRAVE]
-			  case '\u00D3': // Ó  [LATIN CAPITAL LETTER O WITH ACUTE]
-			  case '\u00D4': // Ô  [LATIN CAPITAL LETTER O WITH CIRCUMFLEX]
-			  case '\u00D5': // Õ  [LATIN CAPITAL LETTER O WITH TILDE]
-			  case '\u00D6': // Ö  [LATIN CAPITAL LETTER O WITH DIAERESIS]
-			  case '\u00D8': // Ø  [LATIN CAPITAL LETTER O WITH STROKE]
-			  case '\u014C': // Ō  [LATIN CAPITAL LETTER O WITH MACRON]
-			  case '\u014E': // Ŏ  [LATIN CAPITAL LETTER O WITH BREVE]
-			  case '\u0150': // Ő  [LATIN CAPITAL LETTER O WITH DOUBLE ACUTE]
-			  case '\u0186': // Ɔ  [LATIN CAPITAL LETTER OPEN O]
-			  case '\u019F': // Ɵ  [LATIN CAPITAL LETTER O WITH MIDDLE TILDE]
-			  case '\u01A0': // Ơ  [LATIN CAPITAL LETTER O WITH HORN]
-			  case '\u01D1': // Ǒ  [LATIN CAPITAL LETTER O WITH CARON]
-			  case '\u01EA': // Ǫ  [LATIN CAPITAL LETTER O WITH OGONEK]
-			  case '\u01EC': // Ǭ  [LATIN CAPITAL LETTER O WITH OGONEK AND MACRON]
-			  case '\u01FE': // Ǿ  [LATIN CAPITAL LETTER O WITH STROKE AND ACUTE]
-			  case '\u020C': // Ȍ  [LATIN CAPITAL LETTER O WITH DOUBLE GRAVE]
-			  case '\u020E': // Ȏ  [LATIN CAPITAL LETTER O WITH INVERTED BREVE]
-			  case '\u022A': // Ȫ  [LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON]
-			  case '\u022C': // Ȭ  [LATIN CAPITAL LETTER O WITH TILDE AND MACRON]
-			  case '\u022E': // Ȯ  [LATIN CAPITAL LETTER O WITH DOT ABOVE]
-			  case '\u0230': // Ȱ  [LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON]
-			  case '\u1D0F': // ᴏ  [LATIN LETTER SMALL CAPITAL O]
-			  case '\u1D10': // ᴐ  [LATIN LETTER SMALL CAPITAL OPEN O]
-			  case '\u1E4C': // Ṍ  [LATIN CAPITAL LETTER O WITH TILDE AND ACUTE]
-			  case '\u1E4E': // Ṏ  [LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS]
-			  case '\u1E50': // Ṑ  [LATIN CAPITAL LETTER O WITH MACRON AND GRAVE]
-			  case '\u1E52': // Ṓ  [LATIN CAPITAL LETTER O WITH MACRON AND ACUTE]
-			  case '\u1ECC': // Ọ  [LATIN CAPITAL LETTER O WITH DOT BELOW]
-			  case '\u1ECE': // Ỏ  [LATIN CAPITAL LETTER O WITH HOOK ABOVE]
-			  case '\u1ED0': // Ố  [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE]
-			  case '\u1ED2': // Ồ  [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE]
-			  case '\u1ED4': // Ổ  [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE]
-			  case '\u1ED6': // Ỗ  [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE]
-			  case '\u1ED8': // Ộ  [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW]
-			  case '\u1EDA': // Ớ  [LATIN CAPITAL LETTER O WITH HORN AND ACUTE]
-			  case '\u1EDC': // Ờ  [LATIN CAPITAL LETTER O WITH HORN AND GRAVE]
-			  case '\u1EDE': // Ở  [LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE]
-			  case '\u1EE0': // Ỡ  [LATIN CAPITAL LETTER O WITH HORN AND TILDE]
-			  case '\u1EE2': // Ợ  [LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW]
-			  case '\u24C4': // Ⓞ  [CIRCLED LATIN CAPITAL LETTER O]
-			  case '\uA74A': // Ꝋ  [LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY]
-			  case '\uA74C': // Ꝍ  [LATIN CAPITAL LETTER O WITH LOOP]
-			  case '\uFF2F': // O  [FULLWIDTH LATIN CAPITAL LETTER O]
-				output[outputPos++] = 'O';
-				break;
-			  case '\u00F2': // ò  [LATIN SMALL LETTER O WITH GRAVE]
-			  case '\u00F3': // ó  [LATIN SMALL LETTER O WITH ACUTE]
-			  case '\u00F4': // ô  [LATIN SMALL LETTER O WITH CIRCUMFLEX]
-			  case '\u00F5': // õ  [LATIN SMALL LETTER O WITH TILDE]
-			  case '\u00F6': // ö  [LATIN SMALL LETTER O WITH DIAERESIS]
-			  case '\u00F8': // ø  [LATIN SMALL LETTER O WITH STROKE]
-			  case '\u014D': // ō  [LATIN SMALL LETTER O WITH MACRON]
-			  case '\u014F': // ŏ  [LATIN SMALL LETTER O WITH BREVE]
-			  case '\u0151': // ő  [LATIN SMALL LETTER O WITH DOUBLE ACUTE]
-			  case '\u01A1': // ơ  [LATIN SMALL LETTER O WITH HORN]
-			  case '\u01D2': // ǒ  [LATIN SMALL LETTER O WITH CARON]
-			  case '\u01EB': // ǫ  [LATIN SMALL LETTER O WITH OGONEK]
-			  case '\u01ED': // ǭ  [LATIN SMALL LETTER O WITH OGONEK AND MACRON]
-			  case '\u01FF': // ǿ  [LATIN SMALL LETTER O WITH STROKE AND ACUTE]
-			  case '\u020D': // ȍ  [LATIN SMALL LETTER O WITH DOUBLE GRAVE]
-			  case '\u020F': // ȏ  [LATIN SMALL LETTER O WITH INVERTED BREVE]
-			  case '\u022B': // ȫ  [LATIN SMALL LETTER O WITH DIAERESIS AND MACRON]
-			  case '\u022D': // ȭ  [LATIN SMALL LETTER O WITH TILDE AND MACRON]
-			  case '\u022F': // ȯ  [LATIN SMALL LETTER O WITH DOT ABOVE]
-			  case '\u0231': // ȱ  [LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON]
-			  case '\u0254': // ɔ  [LATIN SMALL LETTER OPEN O]
-			  case '\u0275': // ɵ  [LATIN SMALL LETTER BARRED O]
-			  case '\u1D16': // ᴖ  [LATIN SMALL LETTER TOP HALF O]
-			  case '\u1D17': // ᴗ  [LATIN SMALL LETTER BOTTOM HALF O]
-			  case '\u1D97': // ᶗ  [LATIN SMALL LETTER OPEN O WITH RETROFLEX HOOK]
-			  case '\u1E4D': // ṍ  [LATIN SMALL LETTER O WITH TILDE AND ACUTE]
-			  case '\u1E4F': // ṏ  [LATIN SMALL LETTER O WITH TILDE AND DIAERESIS]
-			  case '\u1E51': // ṑ  [LATIN SMALL LETTER O WITH MACRON AND GRAVE]
-			  case '\u1E53': // ṓ  [LATIN SMALL LETTER O WITH MACRON AND ACUTE]
-			  case '\u1ECD': // ọ  [LATIN SMALL LETTER O WITH DOT BELOW]
-			  case '\u1ECF': // ỏ  [LATIN SMALL LETTER O WITH HOOK ABOVE]
-			  case '\u1ED1': // ố  [LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE]
-			  case '\u1ED3': // ồ  [LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE]
-			  case '\u1ED5': // ổ  [LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE]
-			  case '\u1ED7': // ỗ  [LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE]
-			  case '\u1ED9': // ộ  [LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW]
-			  case '\u1EDB': // ớ  [LATIN SMALL LETTER O WITH HORN AND ACUTE]
-			  case '\u1EDD': // ờ  [LATIN SMALL LETTER O WITH HORN AND GRAVE]
-			  case '\u1EDF': // ở  [LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE]
-			  case '\u1EE1': // ỡ  [LATIN SMALL LETTER O WITH HORN AND TILDE]
-			  case '\u1EE3': // ợ  [LATIN SMALL LETTER O WITH HORN AND DOT BELOW]
-			  case '\u2092': // ₒ  [LATIN SUBSCRIPT SMALL LETTER O]
-			  case '\u24DE': // ⓞ  [CIRCLED LATIN SMALL LETTER O]
-			  case '\u2C7A': // ⱺ  [LATIN SMALL LETTER O WITH LOW RING INSIDE]
-			  case '\uA74B': // ꝋ  [LATIN SMALL LETTER O WITH LONG STROKE OVERLAY]
-			  case '\uA74D': // ꝍ  [LATIN SMALL LETTER O WITH LOOP]
-			  case '\uFF4F': // o  [FULLWIDTH LATIN SMALL LETTER O]
-				output[outputPos++] = 'o';
-				break;
-			  case '\u0152': // Π [LATIN CAPITAL LIGATURE OE]
-			  case '\u0276': // ɶ  [LATIN LETTER SMALL CAPITAL OE]
-				output[outputPos++] = 'O';
-				output[outputPos++] = 'E';
-				break;
-			  case '\uA74E': // Ꝏ  [LATIN CAPITAL LETTER OO]
-				output[outputPos++] = 'O';
-				output[outputPos++] = 'O';
-				break;
-			  case '\u0222': // Ȣ  http://en.wikipedia.org/wiki/OU  [LATIN CAPITAL LETTER OU]
-			  case '\u1D15': // ᴕ  [LATIN LETTER SMALL CAPITAL OU]
-				output[outputPos++] = 'O';
-				output[outputPos++] = 'U';
-				break;
-			  case '\u24AA': // ⒪  [PARENTHESIZED LATIN SMALL LETTER O]
-				output[outputPos++] = '(';
-				output[outputPos++] = 'o';
-				output[outputPos++] = ')';
-				break;
-			  case '\u0153': // œ  [LATIN SMALL LIGATURE OE]
-			  case '\u1D14': // ᴔ  [LATIN SMALL LETTER TURNED OE]
-				output[outputPos++] = 'o';
-				output[outputPos++] = 'e';
-				break;
-			  case '\uA74F': // ꝏ  [LATIN SMALL LETTER OO]
-				output[outputPos++] = 'o';
-				output[outputPos++] = 'o';
-				break;
-			  case '\u0223': // ȣ  http://en.wikipedia.org/wiki/OU  [LATIN SMALL LETTER OU]
-				output[outputPos++] = 'o';
-				output[outputPos++] = 'u';
-				break;
-			  case '\u01A4': // Ƥ  [LATIN CAPITAL LETTER P WITH HOOK]
-			  case '\u1D18': // ᴘ  [LATIN LETTER SMALL CAPITAL P]
-			  case '\u1E54': // Ṕ  [LATIN CAPITAL LETTER P WITH ACUTE]
-			  case '\u1E56': // Ṗ  [LATIN CAPITAL LETTER P WITH DOT ABOVE]
-			  case '\u24C5': // Ⓟ  [CIRCLED LATIN CAPITAL LETTER P]
-			  case '\u2C63': // Ᵽ  [LATIN CAPITAL LETTER P WITH STROKE]
-			  case '\uA750': // Ꝑ  [LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER]
-			  case '\uA752': // Ꝓ  [LATIN CAPITAL LETTER P WITH FLOURISH]
-			  case '\uA754': // Ꝕ  [LATIN CAPITAL LETTER P WITH SQUIRREL TAIL]
-			  case '\uFF30': // P  [FULLWIDTH LATIN CAPITAL LETTER P]
-				output[outputPos++] = 'P';
-				break;
-			  case '\u01A5': // ƥ  [LATIN SMALL LETTER P WITH HOOK]
-			  case '\u1D71': // ᵱ  [LATIN SMALL LETTER P WITH MIDDLE TILDE]
-			  case '\u1D7D': // ᵽ  [LATIN SMALL LETTER P WITH STROKE]
-			  case '\u1D88': // ᶈ  [LATIN SMALL LETTER P WITH PALATAL HOOK]
-			  case '\u1E55': // ṕ  [LATIN SMALL LETTER P WITH ACUTE]
-			  case '\u1E57': // ṗ  [LATIN SMALL LETTER P WITH DOT ABOVE]
-			  case '\u24DF': // ⓟ  [CIRCLED LATIN SMALL LETTER P]
-			  case '\uA751': // ꝑ  [LATIN SMALL LETTER P WITH STROKE THROUGH DESCENDER]
-			  case '\uA753': // ꝓ  [LATIN SMALL LETTER P WITH FLOURISH]
-			  case '\uA755': // ꝕ  [LATIN SMALL LETTER P WITH SQUIRREL TAIL]
-			  case '\uA7FC': // ꟼ  [LATIN EPIGRAPHIC LETTER REVERSED P]
-			  case '\uFF50': // p  [FULLWIDTH LATIN SMALL LETTER P]
-				output[outputPos++] = 'p';
-				break;
-			  case '\u24AB': // ⒫  [PARENTHESIZED LATIN SMALL LETTER P]
-				output[outputPos++] = '(';
-				output[outputPos++] = 'p';
-				output[outputPos++] = ')';
-				break;
-			  case '\u024A': // Ɋ  [LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL]
-			  case '\u24C6': // Ⓠ  [CIRCLED LATIN CAPITAL LETTER Q]
-			  case '\uA756': // Ꝗ  [LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER]
-			  case '\uA758': // Ꝙ  [LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE]
-			  case '\uFF31': // Q  [FULLWIDTH LATIN CAPITAL LETTER Q]
-				output[outputPos++] = 'Q';
-				break;
-			  case '\u0138': // ĸ  http://en.wikipedia.org/wiki/Kra_(letter)  [LATIN SMALL LETTER KRA]
-			  case '\u024B': // ɋ  [LATIN SMALL LETTER Q WITH HOOK TAIL]
-			  case '\u02A0': // ʠ  [LATIN SMALL LETTER Q WITH HOOK]
-			  case '\u24E0': // ⓠ  [CIRCLED LATIN SMALL LETTER Q]
-			  case '\uA757': // ꝗ  [LATIN SMALL LETTER Q WITH STROKE THROUGH DESCENDER]
-			  case '\uA759': // ꝙ  [LATIN SMALL LETTER Q WITH DIAGONAL STROKE]
-			  case '\uFF51': // q  [FULLWIDTH LATIN SMALL LETTER Q]
-				output[outputPos++] = 'q';
-				break;
-			  case '\u24AC': // ⒬  [PARENTHESIZED LATIN SMALL LETTER Q]
-				output[outputPos++] = '(';
-				output[outputPos++] = 'q';
-				output[outputPos++] = ')';
-				break;
-			  case '\u0239': // ȹ  [LATIN SMALL LETTER QP DIGRAPH]
-				output[outputPos++] = 'q';
-				output[outputPos++] = 'p';
-				break;
-			  case '\u0154': // Ŕ  [LATIN CAPITAL LETTER R WITH ACUTE]
-			  case '\u0156': // Ŗ  [LATIN CAPITAL LETTER R WITH CEDILLA]
-			  case '\u0158': // Ř  [LATIN CAPITAL LETTER R WITH CARON]
-			  case '\u0210': // Ȓ  [LATIN CAPITAL LETTER R WITH DOUBLE GRAVE]
-			  case '\u0212': // Ȓ  [LATIN CAPITAL LETTER R WITH INVERTED BREVE]
-			  case '\u024C': // Ɍ  [LATIN CAPITAL LETTER R WITH STROKE]
-			  case '\u0280': // ʀ  [LATIN LETTER SMALL CAPITAL R]
-			  case '\u0281': // ʁ  [LATIN LETTER SMALL CAPITAL INVERTED R]
-			  case '\u1D19': // ᴙ  [LATIN LETTER SMALL CAPITAL REVERSED R]
-			  case '\u1D1A': // ᴚ  [LATIN LETTER SMALL CAPITAL TURNED R]
-			  case '\u1E58': // Ṙ  [LATIN CAPITAL LETTER R WITH DOT ABOVE]
-			  case '\u1E5A': // Ṛ  [LATIN CAPITAL LETTER R WITH DOT BELOW]
-			  case '\u1E5C': // Ṝ  [LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON]
-			  case '\u1E5E': // Ṟ  [LATIN CAPITAL LETTER R WITH LINE BELOW]
-			  case '\u24C7': // Ⓡ  [CIRCLED LATIN CAPITAL LETTER R]
-			  case '\u2C64': // Ɽ  [LATIN CAPITAL LETTER R WITH TAIL]
-			  case '\uA75A': // Ꝛ  [LATIN CAPITAL LETTER R ROTUNDA]
-			  case '\uA782': // Ꞃ  [LATIN CAPITAL LETTER INSULAR R]
-			  case '\uFF32': // R  [FULLWIDTH LATIN CAPITAL LETTER R]
-				output[outputPos++] = 'R';
-				break;
-			  case '\u0155': // ŕ  [LATIN SMALL LETTER R WITH ACUTE]
-			  case '\u0157': // ŗ  [LATIN SMALL LETTER R WITH CEDILLA]
-			  case '\u0159': // ř  [LATIN SMALL LETTER R WITH CARON]
-			  case '\u0211': // ȑ  [LATIN SMALL LETTER R WITH DOUBLE GRAVE]
-			  case '\u0213': // ȓ  [LATIN SMALL LETTER R WITH INVERTED BREVE]
-			  case '\u024D': // ɍ  [LATIN SMALL LETTER R WITH STROKE]
-			  case '\u027C': // ɼ  [LATIN SMALL LETTER R WITH LONG LEG]
-			  case '\u027D': // ɽ  [LATIN SMALL LETTER R WITH TAIL]
-			  case '\u027E': // ɾ  [LATIN SMALL LETTER R WITH FISHHOOK]
-			  case '\u027F': // ɿ  [LATIN SMALL LETTER REVERSED R WITH FISHHOOK]
-			  case '\u1D63': // ᵣ  [LATIN SUBSCRIPT SMALL LETTER R]
-			  case '\u1D72': // ᵲ  [LATIN SMALL LETTER R WITH MIDDLE TILDE]
-			  case '\u1D73': // ᵳ  [LATIN SMALL LETTER R WITH FISHHOOK AND MIDDLE TILDE]
-			  case '\u1D89': // ᶉ  [LATIN SMALL LETTER R WITH PALATAL HOOK]
-			  case '\u1E59': // ṙ  [LATIN SMALL LETTER R WITH DOT ABOVE]
-			  case '\u1E5B': // ṛ  [LATIN SMALL LETTER R WITH DOT BELOW]
-			  case '\u1E5D': // ṝ  [LATIN SMALL LETTER R WITH DOT BELOW AND MACRON]
-			  case '\u1E5F': // ṟ  [LATIN SMALL LETTER R WITH LINE BELOW]
-			  case '\u24E1': // ⓡ  [CIRCLED LATIN SMALL LETTER R]
-			  case '\uA75B': // ꝛ  [LATIN SMALL LETTER R ROTUNDA]
-			  case '\uA783': // ꞃ  [LATIN SMALL LETTER INSULAR R]
-			  case '\uFF52': // r  [FULLWIDTH LATIN SMALL LETTER R]
-				output[outputPos++] = 'r';
-				break;
-			  case '\u24AD': // ⒭  [PARENTHESIZED LATIN SMALL LETTER R]
-				output[outputPos++] = '(';
-				output[outputPos++] = 'r';
-				output[outputPos++] = ')';
-				break;
-			  case '\u015A': // Ś  [LATIN CAPITAL LETTER S WITH ACUTE]
-			  case '\u015C': // Ŝ  [LATIN CAPITAL LETTER S WITH CIRCUMFLEX]
-			  case '\u015E': // Ş  [LATIN CAPITAL LETTER S WITH CEDILLA]
-			  case '\u0160': // Š  [LATIN CAPITAL LETTER S WITH CARON]
-			  case '\u0218': // Ș  [LATIN CAPITAL LETTER S WITH COMMA BELOW]
-			  case '\u1E60': // Ṡ  [LATIN CAPITAL LETTER S WITH DOT ABOVE]
-			  case '\u1E62': // Ṣ  [LATIN CAPITAL LETTER S WITH DOT BELOW]
-			  case '\u1E64': // Ṥ  [LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE]
-			  case '\u1E66': // Ṧ  [LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE]
-			  case '\u1E68': // Ṩ  [LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE]
-			  case '\u24C8': // Ⓢ  [CIRCLED LATIN CAPITAL LETTER S]
-			  case '\uA731': // ꜱ  [LATIN LETTER SMALL CAPITAL S]
-			  case '\uA785': // ꞅ  [LATIN SMALL LETTER INSULAR S]
-			  case '\uFF33': // S  [FULLWIDTH LATIN CAPITAL LETTER S]
-				output[outputPos++] = 'S';
-				break;
-			  case '\u015B': // ś  [LATIN SMALL LETTER S WITH ACUTE]
-			  case '\u015D': // ŝ  [LATIN SMALL LETTER S WITH CIRCUMFLEX]
-			  case '\u015F': // ş  [LATIN SMALL LETTER S WITH CEDILLA]
-			  case '\u0161': // š  [LATIN SMALL LETTER S WITH CARON]
-			  case '\u017F': // ſ  http://en.wikipedia.org/wiki/Long_S  [LATIN SMALL LETTER LONG S]
-			  case '\u0219': // ș  [LATIN SMALL LETTER S WITH COMMA BELOW]
-			  case '\u023F': // ȿ  [LATIN SMALL LETTER S WITH SWASH TAIL]
-			  case '\u0282': // ʂ  [LATIN SMALL LETTER S WITH HOOK]
-			  case '\u1D74': // ᵴ  [LATIN SMALL LETTER S WITH MIDDLE TILDE]
-			  case '\u1D8A': // ᶊ  [LATIN SMALL LETTER S WITH PALATAL HOOK]
-			  case '\u1E61': // ṡ  [LATIN SMALL LETTER S WITH DOT ABOVE]
-			  case '\u1E63': // ṣ  [LATIN SMALL LETTER S WITH DOT BELOW]
-			  case '\u1E65': // ṥ  [LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE]
-			  case '\u1E67': // ṧ  [LATIN SMALL LETTER S WITH CARON AND DOT ABOVE]
-			  case '\u1E69': // ṩ  [LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE]
-			  case '\u1E9C': // ẜ  [LATIN SMALL LETTER LONG S WITH DIAGONAL STROKE]
-			  case '\u1E9D': // ẝ  [LATIN SMALL LETTER LONG S WITH HIGH STROKE]
-			  case '\u24E2': // ⓢ  [CIRCLED LATIN SMALL LETTER S]
-			  case '\uA784': // Ꞅ  [LATIN CAPITAL LETTER INSULAR S]
-			  case '\uFF53': // s  [FULLWIDTH LATIN SMALL LETTER S]
-				output[outputPos++] = 's';
-				break;
-			  case '\u1E9E': // ẞ  [LATIN CAPITAL LETTER SHARP S]
-				output[outputPos++] = 'S';
-				output[outputPos++] = 'S';
-				break;
-			  case '\u24AE': // ⒮  [PARENTHESIZED LATIN SMALL LETTER S]
-				output[outputPos++] = '(';
-				output[outputPos++] = 's';
-				output[outputPos++] = ')';
-				break;
-			  case '\u00DF': // ß  [LATIN SMALL LETTER SHARP S]
-				output[outputPos++] = 's';
-				output[outputPos++] = 's';
-				break;
-			  case '\uFB06': // st  [LATIN SMALL LIGATURE ST]
-				output[outputPos++] = 's';
-				output[outputPos++] = 't';
-				break;
-			  case '\u0162': // Ţ  [LATIN CAPITAL LETTER T WITH CEDILLA]
-			  case '\u0164': // Ť  [LATIN CAPITAL LETTER T WITH CARON]
-			  case '\u0166': // Ŧ  [LATIN CAPITAL LETTER T WITH STROKE]
-			  case '\u01AC': // Ƭ  [LATIN CAPITAL LETTER T WITH HOOK]
-			  case '\u01AE': // Ʈ  [LATIN CAPITAL LETTER T WITH RETROFLEX HOOK]
-			  case '\u021A': // Ț  [LATIN CAPITAL LETTER T WITH COMMA BELOW]
-			  case '\u023E': // Ⱦ  [LATIN CAPITAL LETTER T WITH DIAGONAL STROKE]
-			  case '\u1D1B': // ᴛ  [LATIN LETTER SMALL CAPITAL T]
-			  case '\u1E6A': // Ṫ  [LATIN CAPITAL LETTER T WITH DOT ABOVE]
-			  case '\u1E6C': // Ṭ  [LATIN CAPITAL LETTER T WITH DOT BELOW]
-			  case '\u1E6E': // Ṯ  [LATIN CAPITAL LETTER T WITH LINE BELOW]
-			  case '\u1E70': // Ṱ  [LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW]
-			  case '\u24C9': // Ⓣ  [CIRCLED LATIN CAPITAL LETTER T]
-			  case '\uA786': // Ꞇ  [LATIN CAPITAL LETTER INSULAR T]
-			  case '\uFF34': // T  [FULLWIDTH LATIN CAPITAL LETTER T]
-				output[outputPos++] = 'T';
-				break;
-			  case '\u0163': // ţ  [LATIN SMALL LETTER T WITH CEDILLA]
-			  case '\u0165': // ť  [LATIN SMALL LETTER T WITH CARON]
-			  case '\u0167': // ŧ  [LATIN SMALL LETTER T WITH STROKE]
-			  case '\u01AB': // ƫ  [LATIN SMALL LETTER T WITH PALATAL HOOK]
-			  case '\u01AD': // ƭ  [LATIN SMALL LETTER T WITH HOOK]
-			  case '\u021B': // ț  [LATIN SMALL LETTER T WITH COMMA BELOW]
-			  case '\u0236': // ȶ  [LATIN SMALL LETTER T WITH CURL]
-			  case '\u0287': // ʇ  [LATIN SMALL LETTER TURNED T]
-			  case '\u0288': // ʈ  [LATIN SMALL LETTER T WITH RETROFLEX HOOK]
-			  case '\u1D75': // ᵵ  [LATIN SMALL LETTER T WITH MIDDLE TILDE]
-			  case '\u1E6B': // ṫ  [LATIN SMALL LETTER T WITH DOT ABOVE]
-			  case '\u1E6D': // ṭ  [LATIN SMALL LETTER T WITH DOT BELOW]
-			  case '\u1E6F': // ṯ  [LATIN SMALL LETTER T WITH LINE BELOW]
-			  case '\u1E71': // ṱ  [LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW]
-			  case '\u1E97': // ẗ  [LATIN SMALL LETTER T WITH DIAERESIS]
-			  case '\u24E3': // ⓣ  [CIRCLED LATIN SMALL LETTER T]
-			  case '\u2C66': // ⱦ  [LATIN SMALL LETTER T WITH DIAGONAL STROKE]
-			  case '\uFF54': // t  [FULLWIDTH LATIN SMALL LETTER T]
-				output[outputPos++] = 't';
-				break;
-			  case '\u00DE': // Þ  [LATIN CAPITAL LETTER THORN]
-			  case '\uA766': // Ꝧ  [LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER]
-				output[outputPos++] = 'T';
-				output[outputPos++] = 'H';
-				break;
-			  case '\uA728': // Ꜩ  [LATIN CAPITAL LETTER TZ]
-				output[outputPos++] = 'T';
-				output[outputPos++] = 'Z';
-				break;
-			  case '\u24AF': // ⒯  [PARENTHESIZED LATIN SMALL LETTER T]
-				output[outputPos++] = '(';
-				output[outputPos++] = 't';
-				output[outputPos++] = ')';
-				break;
-			  case '\u02A8': // ʨ  [LATIN SMALL LETTER TC DIGRAPH WITH CURL]
-				output[outputPos++] = 't';
-				output[outputPos++] = 'c';
-				break;
-			  case '\u00FE': // þ  [LATIN SMALL LETTER THORN]
-			  case '\u1D7A': // ᵺ  [LATIN SMALL LETTER TH WITH STRIKETHROUGH]
-			  case '\uA767': // ꝧ  [LATIN SMALL LETTER THORN WITH STROKE THROUGH DESCENDER]
-				output[outputPos++] = 't';
-				output[outputPos++] = 'h';
-				break;
-			  case '\u02A6': // ʦ  [LATIN SMALL LETTER TS DIGRAPH]
-				output[outputPos++] = 't';
-				output[outputPos++] = 's';
-				break;
-			  case '\uA729': // ꜩ  [LATIN SMALL LETTER TZ]
-				output[outputPos++] = 't';
-				output[outputPos++] = 'z';
-				break;
-			  case '\u00D9': // Ù  [LATIN CAPITAL LETTER U WITH GRAVE]
-			  case '\u00DA': // Ú  [LATIN CAPITAL LETTER U WITH ACUTE]
-			  case '\u00DB': // Û  [LATIN CAPITAL LETTER U WITH CIRCUMFLEX]
-			  case '\u00DC': // Ü  [LATIN CAPITAL LETTER U WITH DIAERESIS]
-			  case '\u0168': // Ũ  [LATIN CAPITAL LETTER U WITH TILDE]
-			  case '\u016A': // Ū  [LATIN CAPITAL LETTER U WITH MACRON]
-			  case '\u016C': // Ŭ  [LATIN CAPITAL LETTER U WITH BREVE]
-			  case '\u016E': // Ů  [LATIN CAPITAL LETTER U WITH RING ABOVE]
-			  case '\u0170': // Ű  [LATIN CAPITAL LETTER U WITH DOUBLE ACUTE]
-			  case '\u0172': // Ų  [LATIN CAPITAL LETTER U WITH OGONEK]
-			  case '\u01AF': // Ư  [LATIN CAPITAL LETTER U WITH HORN]
-			  case '\u01D3': // Ǔ  [LATIN CAPITAL LETTER U WITH CARON]
-			  case '\u01D5': // Ǖ  [LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON]
-			  case '\u01D7': // Ǘ  [LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE]
-			  case '\u01D9': // Ǚ  [LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON]
-			  case '\u01DB': // Ǜ  [LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE]
-			  case '\u0214': // Ȕ  [LATIN CAPITAL LETTER U WITH DOUBLE GRAVE]
-			  case '\u0216': // Ȗ  [LATIN CAPITAL LETTER U WITH INVERTED BREVE]
-			  case '\u0244': // Ʉ  [LATIN CAPITAL LETTER U BAR]
-			  case '\u1D1C': // ᴜ  [LATIN LETTER SMALL CAPITAL U]
-			  case '\u1D7E': // ᵾ  [LATIN SMALL CAPITAL LETTER U WITH STROKE]
-			  case '\u1E72': // Ṳ  [LATIN CAPITAL LETTER U WITH DIAERESIS BELOW]
-			  case '\u1E74': // Ṵ  [LATIN CAPITAL LETTER U WITH TILDE BELOW]
-			  case '\u1E76': // Ṷ  [LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW]
-			  case '\u1E78': // Ṹ  [LATIN CAPITAL LETTER U WITH TILDE AND ACUTE]
-			  case '\u1E7A': // Ṻ  [LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS]
-			  case '\u1EE4': // Ụ  [LATIN CAPITAL LETTER U WITH DOT BELOW]
-			  case '\u1EE6': // Ủ  [LATIN CAPITAL LETTER U WITH HOOK ABOVE]
-			  case '\u1EE8': // Ứ  [LATIN CAPITAL LETTER U WITH HORN AND ACUTE]
-			  case '\u1EEA': // Ừ  [LATIN CAPITAL LETTER U WITH HORN AND GRAVE]
-			  case '\u1EEC': // Ử  [LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE]
-			  case '\u1EEE': // Ữ  [LATIN CAPITAL LETTER U WITH HORN AND TILDE]
-			  case '\u1EF0': // Ự  [LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW]
-			  case '\u24CA': // Ⓤ  [CIRCLED LATIN CAPITAL LETTER U]
-			  case '\uFF35': // U  [FULLWIDTH LATIN CAPITAL LETTER U]
-				output[outputPos++] = 'U';
-				break;
-			  case '\u00F9': // ù  [LATIN SMALL LETTER U WITH GRAVE]
-			  case '\u00FA': // ú  [LATIN SMALL LETTER U WITH ACUTE]
-			  case '\u00FB': // û  [LATIN SMALL LETTER U WITH CIRCUMFLEX]
-			  case '\u00FC': // ü  [LATIN SMALL LETTER U WITH DIAERESIS]
-			  case '\u0169': // ũ  [LATIN SMALL LETTER U WITH TILDE]
-			  case '\u016B': // ū  [LATIN SMALL LETTER U WITH MACRON]
-			  case '\u016D': // ŭ  [LATIN SMALL LETTER U WITH BREVE]
-			  case '\u016F': // ů  [LATIN SMALL LETTER U WITH RING ABOVE]
-			  case '\u0171': // ű  [LATIN SMALL LETTER U WITH DOUBLE ACUTE]
-			  case '\u0173': // ų  [LATIN SMALL LETTER U WITH OGONEK]
-			  case '\u01B0': // ư  [LATIN SMALL LETTER U WITH HORN]
-			  case '\u01D4': // ǔ  [LATIN SMALL LETTER U WITH CARON]
-			  case '\u01D6': // ǖ  [LATIN SMALL LETTER U WITH DIAERESIS AND MACRON]
-			  case '\u01D8': // ǘ  [LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE]
-			  case '\u01DA': // ǚ  [LATIN SMALL LETTER U WITH DIAERESIS AND CARON]
-			  case '\u01DC': // ǜ  [LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE]
-			  case '\u0215': // ȕ  [LATIN SMALL LETTER U WITH DOUBLE GRAVE]
-			  case '\u0217': // ȗ  [LATIN SMALL LETTER U WITH INVERTED BREVE]
-			  case '\u0289': // ʉ  [LATIN SMALL LETTER U BAR]
-			  case '\u1D64': // ᵤ  [LATIN SUBSCRIPT SMALL LETTER U]
-			  case '\u1D99': // ᶙ  [LATIN SMALL LETTER U WITH RETROFLEX HOOK]
-			  case '\u1E73': // ṳ  [LATIN SMALL LETTER U WITH DIAERESIS BELOW]
-			  case '\u1E75': // ṵ  [LATIN SMALL LETTER U WITH TILDE BELOW]
-			  case '\u1E77': // ṷ  [LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW]
-			  case '\u1E79': // ṹ  [LATIN SMALL LETTER U WITH TILDE AND ACUTE]
-			  case '\u1E7B': // ṻ  [LATIN SMALL LETTER U WITH MACRON AND DIAERESIS]
-			  case '\u1EE5': // ụ  [LATIN SMALL LETTER U WITH DOT BELOW]
-			  case '\u1EE7': // ủ  [LATIN SMALL LETTER U WITH HOOK ABOVE]
-			  case '\u1EE9': // ứ  [LATIN SMALL LETTER U WITH HORN AND ACUTE]
-			  case '\u1EEB': // ừ  [LATIN SMALL LETTER U WITH HORN AND GRAVE]
-			  case '\u1EED': // ử  [LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE]
-			  case '\u1EEF': // ữ  [LATIN SMALL LETTER U WITH HORN AND TILDE]
-			  case '\u1EF1': // ự  [LATIN SMALL LETTER U WITH HORN AND DOT BELOW]
-			  case '\u24E4': // ⓤ  [CIRCLED LATIN SMALL LETTER U]
-			  case '\uFF55': // u  [FULLWIDTH LATIN SMALL LETTER U]
-				output[outputPos++] = 'u';
-				break;
-			  case '\u24B0': // ⒰  [PARENTHESIZED LATIN SMALL LETTER U]
-				output[outputPos++] = '(';
-				output[outputPos++] = 'u';
-				output[outputPos++] = ')';
-				break;
-			  case '\u1D6B': // ᵫ  [LATIN SMALL LETTER UE]
-				output[outputPos++] = 'u';
-				output[outputPos++] = 'e';
-				break;
-			  case '\u01B2': // Ʋ  [LATIN CAPITAL LETTER V WITH HOOK]
-			  case '\u0245': // Ʌ  [LATIN CAPITAL LETTER TURNED V]
-			  case '\u1D20': // ᴠ  [LATIN LETTER SMALL CAPITAL V]
-			  case '\u1E7C': // Ṽ  [LATIN CAPITAL LETTER V WITH TILDE]
-			  case '\u1E7E': // Ṿ  [LATIN CAPITAL LETTER V WITH DOT BELOW]
-			  case '\u1EFC': // Ỽ  [LATIN CAPITAL LETTER MIDDLE-WELSH V]
-			  case '\u24CB': // Ⓥ  [CIRCLED LATIN CAPITAL LETTER V]
-			  case '\uA75E': // Ꝟ  [LATIN CAPITAL LETTER V WITH DIAGONAL STROKE]
-			  case '\uA768': // Ꝩ  [LATIN CAPITAL LETTER VEND]
-			  case '\uFF36': // V  [FULLWIDTH LATIN CAPITAL LETTER V]
-				output[outputPos++] = 'V';
-				break;
-			  case '\u028B': // ʋ  [LATIN SMALL LETTER V WITH HOOK]
-			  case '\u028C': // ʌ  [LATIN SMALL LETTER TURNED V]
-			  case '\u1D65': // ᵥ  [LATIN SUBSCRIPT SMALL LETTER V]
-			  case '\u1D8C': // ᶌ  [LATIN SMALL LETTER V WITH PALATAL HOOK]
-			  case '\u1E7D': // ṽ  [LATIN SMALL LETTER V WITH TILDE]
-			  case '\u1E7F': // ṿ  [LATIN SMALL LETTER V WITH DOT BELOW]
-			  case '\u24E5': // ⓥ  [CIRCLED LATIN SMALL LETTER V]
-			  case '\u2C71': // ⱱ  [LATIN SMALL LETTER V WITH RIGHT HOOK]
-			  case '\u2C74': // ⱴ  [LATIN SMALL LETTER V WITH CURL]
-			  case '\uA75F': // ꝟ  [LATIN SMALL LETTER V WITH DIAGONAL STROKE]
-			  case '\uFF56': // v  [FULLWIDTH LATIN SMALL LETTER V]
-				output[outputPos++] = 'v';
-				break;
-			  case '\uA760': // Ꝡ  [LATIN CAPITAL LETTER VY]
-				output[outputPos++] = 'V';
-				output[outputPos++] = 'Y';
-				break;
-			  case '\u24B1': // ⒱  [PARENTHESIZED LATIN SMALL LETTER V]
-				output[outputPos++] = '(';
-				output[outputPos++] = 'v';
-				output[outputPos++] = ')';
-				break;
-			  case '\uA761': // ꝡ  [LATIN SMALL LETTER VY]
-				output[outputPos++] = 'v';
-				output[outputPos++] = 'y';
-				break;
-			  case '\u0174': // Ŵ  [LATIN CAPITAL LETTER W WITH CIRCUMFLEX]
-			  case '\u01F7': // Ƿ  http://en.wikipedia.org/wiki/Wynn  [LATIN CAPITAL LETTER WYNN]
-			  case '\u1D21': // ᴡ  [LATIN LETTER SMALL CAPITAL W]
-			  case '\u1E80': // Ẁ  [LATIN CAPITAL LETTER W WITH GRAVE]
-			  case '\u1E82': // Ẃ  [LATIN CAPITAL LETTER W WITH ACUTE]
-			  case '\u1E84': // Ẅ  [LATIN CAPITAL LETTER W WITH DIAERESIS]
-			  case '\u1E86': // Ẇ  [LATIN CAPITAL LETTER W WITH DOT ABOVE]
-			  case '\u1E88': // Ẉ  [LATIN CAPITAL LETTER W WITH DOT BELOW]
-			  case '\u24CC': // Ⓦ  [CIRCLED LATIN CAPITAL LETTER W]
-			  case '\u2C72': // Ⱳ  [LATIN CAPITAL LETTER W WITH HOOK]
-			  case '\uFF37': // W  [FULLWIDTH LATIN CAPITAL LETTER W]
-				output[outputPos++] = 'W';
-				break;
-			  case '\u0175': // ŵ  [LATIN SMALL LETTER W WITH CIRCUMFLEX]
-			  case '\u01BF': // ƿ  http://en.wikipedia.org/wiki/Wynn  [LATIN LETTER WYNN]
-			  case '\u028D': // ʍ  [LATIN SMALL LETTER TURNED W]
-			  case '\u1E81': // ẁ  [LATIN SMALL LETTER W WITH GRAVE]
-			  case '\u1E83': // ẃ  [LATIN SMALL LETTER W WITH ACUTE]
-			  case '\u1E85': // ẅ  [LATIN SMALL LETTER W WITH DIAERESIS]
-			  case '\u1E87': // ẇ  [LATIN SMALL LETTER W WITH DOT ABOVE]
-			  case '\u1E89': // ẉ  [LATIN SMALL LETTER W WITH DOT BELOW]
-			  case '\u1E98': // ẘ  [LATIN SMALL LETTER W WITH RING ABOVE]
-			  case '\u24E6': // ⓦ  [CIRCLED LATIN SMALL LETTER W]
-			  case '\u2C73': // ⱳ  [LATIN SMALL LETTER W WITH HOOK]
-			  case '\uFF57': // w  [FULLWIDTH LATIN SMALL LETTER W]
-				output[outputPos++] = 'w';
-				break;
-			  case '\u24B2': // ⒲  [PARENTHESIZED LATIN SMALL LETTER W]
-				output[outputPos++] = '(';
-				output[outputPos++] = 'w';
-				output[outputPos++] = ')';
-				break;
-			  case '\u1E8A': // Ẋ  [LATIN CAPITAL LETTER X WITH DOT ABOVE]
-			  case '\u1E8C': // Ẍ  [LATIN CAPITAL LETTER X WITH DIAERESIS]
-			  case '\u24CD': // Ⓧ  [CIRCLED LATIN CAPITAL LETTER X]
-			  case '\uFF38': // X  [FULLWIDTH LATIN CAPITAL LETTER X]
-				output[outputPos++] = 'X';
-				break;
-			  case '\u1D8D': // ᶍ  [LATIN SMALL LETTER X WITH PALATAL HOOK]
-			  case '\u1E8B': // ẋ  [LATIN SMALL LETTER X WITH DOT ABOVE]
-			  case '\u1E8D': // ẍ  [LATIN SMALL LETTER X WITH DIAERESIS]
-			  case '\u2093': // ₓ  [LATIN SUBSCRIPT SMALL LETTER X]
-			  case '\u24E7': // ⓧ  [CIRCLED LATIN SMALL LETTER X]
-			  case '\uFF58': // x  [FULLWIDTH LATIN SMALL LETTER X]
-				output[outputPos++] = 'x';
-				break;
-			  case '\u24B3': // ⒳  [PARENTHESIZED LATIN SMALL LETTER X]
-				output[outputPos++] = '(';
-				output[outputPos++] = 'x';
-				output[outputPos++] = ')';
-				break;
-			  case '\u00DD': // Ý  [LATIN CAPITAL LETTER Y WITH ACUTE]
-			  case '\u0176': // Ŷ  [LATIN CAPITAL LETTER Y WITH CIRCUMFLEX]
-			  case '\u0178': // Ÿ  [LATIN CAPITAL LETTER Y WITH DIAERESIS]
-			  case '\u01B3': // Ƴ  [LATIN CAPITAL LETTER Y WITH HOOK]
-			  case '\u0232': // Ȳ  [LATIN CAPITAL LETTER Y WITH MACRON]
-			  case '\u024E': // Ɏ  [LATIN CAPITAL LETTER Y WITH STROKE]
-			  case '\u028F': // ʏ  [LATIN LETTER SMALL CAPITAL Y]
-			  case '\u1E8E': // Ẏ  [LATIN CAPITAL LETTER Y WITH DOT ABOVE]
-			  case '\u1EF2': // Ỳ  [LATIN CAPITAL LETTER Y WITH GRAVE]
-			  case '\u1EF4': // Ỵ  [LATIN CAPITAL LETTER Y WITH DOT BELOW]
-			  case '\u1EF6': // Ỷ  [LATIN CAPITAL LETTER Y WITH HOOK ABOVE]
-			  case '\u1EF8': // Ỹ  [LATIN CAPITAL LETTER Y WITH TILDE]
-			  case '\u1EFE': // Ỿ  [LATIN CAPITAL LETTER Y WITH LOOP]
-			  case '\u24CE': // Ⓨ  [CIRCLED LATIN CAPITAL LETTER Y]
-			  case '\uFF39': // Y  [FULLWIDTH LATIN CAPITAL LETTER Y]
-				output[outputPos++] = 'Y';
-				break;
-			  case '\u00FD': // ý  [LATIN SMALL LETTER Y WITH ACUTE]
-			  case '\u00FF': // ÿ  [LATIN SMALL LETTER Y WITH DIAERESIS]
-			  case '\u0177': // ŷ  [LATIN SMALL LETTER Y WITH CIRCUMFLEX]
-			  case '\u01B4': // ƴ  [LATIN SMALL LETTER Y WITH HOOK]
-			  case '\u0233': // ȳ  [LATIN SMALL LETTER Y WITH MACRON]
-			  case '\u024F': // ɏ  [LATIN SMALL LETTER Y WITH STROKE]
-			  case '\u028E': // ʎ  [LATIN SMALL LETTER TURNED Y]
-			  case '\u1E8F': // ẏ  [LATIN SMALL LETTER Y WITH DOT ABOVE]
-			  case '\u1E99': // ẙ  [LATIN SMALL LETTER Y WITH RING ABOVE]
-			  case '\u1EF3': // ỳ  [LATIN SMALL LETTER Y WITH GRAVE]
-			  case '\u1EF5': // ỵ  [LATIN SMALL LETTER Y WITH DOT BELOW]
-			  case '\u1EF7': // ỷ  [LATIN SMALL LETTER Y WITH HOOK ABOVE]
-			  case '\u1EF9': // ỹ  [LATIN SMALL LETTER Y WITH TILDE]
-			  case '\u1EFF': // ỿ  [LATIN SMALL LETTER Y WITH LOOP]
-			  case '\u24E8': // ⓨ  [CIRCLED LATIN SMALL LETTER Y]
-			  case '\uFF59': // y  [FULLWIDTH LATIN SMALL LETTER Y]
-				output[outputPos++] = 'y';
-				break;
-			  case '\u24B4': // ⒴  [PARENTHESIZED LATIN SMALL LETTER Y]
-				output[outputPos++] = '(';
-				output[outputPos++] = 'y';
-				output[outputPos++] = ')';
-				break;
-			  case '\u0179': // Ź  [LATIN CAPITAL LETTER Z WITH ACUTE]
-			  case '\u017B': // Ż  [LATIN CAPITAL LETTER Z WITH DOT ABOVE]
-			  case '\u017D': // Ž  [LATIN CAPITAL LETTER Z WITH CARON]
-			  case '\u01B5': // Ƶ  [LATIN CAPITAL LETTER Z WITH STROKE]
-			  case '\u021C': // Ȝ  http://en.

<TRUNCATED>