You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2011/02/09 10:36:03 UTC

svn commit: r1068809 [20/36] - in /lucene/dev/branches/docvalues: ./ dev-tools/eclipse/ dev-tools/idea/.idea/ dev-tools/idea/.idea/copyright/ dev-tools/idea/lucene/ dev-tools/idea/lucene/contrib/ant/ dev-tools/idea/lucene/contrib/queryparser/ dev-tools...

Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilter.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilter.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilter.java Wed Feb  9 09:35:27 2011
@@ -22,10 +22,9 @@ import java.util.Arrays;
 import java.util.List;
 import java.util.Set;
 
-import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.util.FilteringTokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.queryParser.QueryParser;
 import org.apache.lucene.util.Version;
@@ -42,14 +41,10 @@ import org.apache.lucene.util.Version;
  *         increments are preserved
  * </ul>
  */
-public final class StopFilter extends TokenFilter {
+public final class StopFilter extends FilteringTokenFilter {
 
   private final CharArraySet stopWords;
-  private boolean enablePositionIncrements = true;
-
   private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
-  private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
-
 
   /**
    * Construct a token stream filtering the given input. If
@@ -75,7 +70,7 @@ public final class StopFilter extends To
    */
   public StopFilter(Version matchVersion, TokenStream input, Set<?> stopWords, boolean ignoreCase)
   {
-    super(input);
+    super(true, input);
     this.stopWords = stopWords instanceof CharArraySet ? (CharArraySet) stopWords : new CharArraySet(matchVersion, stopWords, ignoreCase);
   }
   
@@ -157,48 +152,8 @@ public final class StopFilter extends To
    * Returns the next input Token whose term() is not a stop word.
    */
   @Override
-  public final boolean incrementToken() throws IOException {
-    // return the first non-stop word found
-    int skippedPositions = 0;
-    while (input.incrementToken()) {
-      if (!stopWords.contains(termAtt.buffer(), 0, termAtt.length())) {
-        if (enablePositionIncrements) {
-          posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions);
-        }
-        return true;
-      }
-      skippedPositions += posIncrAtt.getPositionIncrement();
-    }
-    // reached EOS -- return false
-    return false;
-  }
-
-  /**
-   * @see #setEnablePositionIncrements(boolean)
-   */
-  public boolean getEnablePositionIncrements() {
-    return enablePositionIncrements;
+  protected boolean accept() throws IOException {
+    return !stopWords.contains(termAtt.buffer(), 0, termAtt.length());
   }
 
-  /**
-   * If <code>true</code>, this StopFilter will preserve
-   * positions of the incoming tokens (ie, accumulate and
-   * set position increments of the removed stop tokens).
-   * Generally, <code>true</code> is best as it does not
-   * lose information (positions of the original tokens)
-   * during indexing.
-   *
-   * Default is true.
-   * 
-   * <p> When set, when a token is stopped
-   * (omitted), the position increment of the following
-   * token is incremented.
-   *
-   * <p> <b>NOTE</b>: be sure to also
-   * set {@link QueryParser#setEnablePositionIncrements} if
-   * you use QueryParser to create queries.
-   */
-  public void setEnablePositionIncrements(boolean enable) {
-    this.enablePositionIncrements = enable;
-  }
 }

Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java Wed Feb  9 09:35:27 2011
@@ -86,7 +86,7 @@ public final class CzechAnalyzer extends
   private final Set<?> stemExclusionTable;
 
   /**
-   * Builds an analyzer with the default stop words ({@link #CZECH_STOP_WORDS}).
+   * Builds an analyzer with the default stop words ({@link #getDefaultStopSet()}).
    *
    * @param matchVersion Lucene version to match See
    *          {@link <a href="#version">above</a>}

Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianCharFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianCharFilter.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianCharFilter.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianCharFilter.java Wed Feb  9 09:35:27 2011
@@ -32,6 +32,7 @@ public class PersianCharFilter extends C
     super(in);
   }
   
+  @Override
   public int read(char[] cbuf, int off, int len) throws IOException {
     final int charsRead = super.read(cbuf, off, len);
     if (charsRead > 0) {

Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilter.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilter.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilter.java Wed Feb  9 09:35:27 2011
@@ -21,6 +21,7 @@ import java.io.IOException;
 
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.util.FilteringTokenFilter;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.util.CharArraySet;
 
@@ -30,22 +31,19 @@ import org.apache.lucene.analysis.util.C
  * 
  * @since solr 1.3
  */
-public final class KeepWordFilter extends TokenFilter {
+public final class KeepWordFilter extends FilteringTokenFilter {
   private final CharArraySet words;
   private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
 
   /** The words set passed to this constructor will be directly used by this filter
    * and should not be modified, */
-  public KeepWordFilter(TokenStream in, CharArraySet words) {
-    super(in);
+  public KeepWordFilter(boolean enablePositionIncrements, TokenStream in, CharArraySet words) {
+    super(enablePositionIncrements, in);
     this.words = words;
   }
 
   @Override
-  public boolean incrementToken() throws IOException {
-    while (input.incrementToken()) {
-      if (words.contains(termAtt.buffer(), 0, termAtt.length())) return true;
-    }
-    return false;
+  public boolean accept() throws IOException {
+    return words.contains(termAtt.buffer(), 0, termAtt.length());
   }
 }

Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordMarkerFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordMarkerFilter.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordMarkerFilter.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordMarkerFilter.java Wed Feb  9 09:35:27 2011
@@ -74,10 +74,12 @@ public final class KeywordMarkerFilter e
   @Override
   public final boolean incrementToken() throws IOException {
     if (input.incrementToken()) {
-      keywordAttr.setKeyword(keywordSet.contains(termAtt.buffer(), 0,
-          termAtt.length()));
+      if (keywordSet.contains(termAtt.buffer(), 0, termAtt.length())) { 
+        keywordAttr.setKeyword(true);
+      }
       return true;
-    } else
+    } else {
       return false;
+    }
   }
 }

Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java Wed Feb  9 09:35:27 2011
@@ -21,6 +21,7 @@ import java.io.IOException;
 
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.util.FilteringTokenFilter;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 
 /**
@@ -29,7 +30,7 @@ import org.apache.lucene.analysis.tokena
  * Note: Length is calculated as the number of UTF-16 code units.
  * </p>
  */
-public final class LengthFilter extends TokenFilter {
+public final class LengthFilter extends FilteringTokenFilter {
 
   private final int min;
   private final int max;
@@ -40,27 +41,15 @@ public final class LengthFilter extends 
    * Build a filter that removes words that are too long or too
    * short from the text.
    */
-  public LengthFilter(TokenStream in, int min, int max)
-  {
-    super(in);
+  public LengthFilter(boolean enablePositionIncrements, TokenStream in, int min, int max) {
+    super(enablePositionIncrements, in);
     this.min = min;
     this.max = max;
   }
   
-  /**
-   * Returns the next input Token whose term() is the right len
-   */
   @Override
-  public final boolean incrementToken() throws IOException {
-    // return the first non-stop word found
-    while (input.incrementToken()) {
-      int len = termAtt.length();
-      if (len >= min && len <= max) {
-          return true;
-      }
-      // note: else we ignore it but should we index each part of it?
-    }
-    // reached EOS -- return false
-    return false;
+  public boolean accept() throws IOException {
+    final int len = termAtt.length();
+    return (len >= min && len <= max);
   }
 }

Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java Wed Feb  9 09:35:27 2011
@@ -109,7 +109,7 @@ public final class DutchAnalyzer extends
   private final Version matchVersion;
 
   /**
-   * Builds an analyzer with the default stop words ({@link #DUTCH_STOP_WORDS}) 
+   * Builds an analyzer with the default stop words ({@link #getDefaultStopSet()}) 
    * and a few default entries for the stem exclusion table.
    * 
    */

Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternReplaceCharFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternReplaceCharFilter.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternReplaceCharFilter.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternReplaceCharFilter.java Wed Feb  9 09:35:27 2011
@@ -113,6 +113,7 @@ public class PatternReplaceCharFilter ex
     }
   }
 
+  @Override
   public int read() throws IOException {
     while( prepareReplaceBlock() ){
       return replaceBlockBuffer.charAt( replaceBlockBufferOffset++ );
@@ -120,6 +121,7 @@ public class PatternReplaceCharFilter ex
     return -1;
   }
 
+  @Override
   public int read(char[] cbuf, int off, int len) throws IOException {
     char[] tmp = new char[len];
     int l = input.read(tmp, 0, len);

Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemmer.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemmer.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemmer.java Wed Feb  9 09:35:27 2011
@@ -1,10 +1,5 @@
 package org.apache.lucene.analysis.pt;
 
-import java.util.Arrays;
-
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.util.Version;
-
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -31,89 +26,14 @@ import org.apache.lucene.util.Version;
  * which is just the plural reduction step of the RSLP
  * algorithm from <i>A Stemming Algorithmm for the Portuguese Language</i>,
  * Orengo et al.
+ * @see RSLPStemmerBase
  */
-public class PortugueseMinimalStemmer {
+public class PortugueseMinimalStemmer extends RSLPStemmerBase {
   
-  private static final CharArraySet excIS = new CharArraySet(Version.LUCENE_31,
-      Arrays.asList("lápis", "cais", "mais", "crúcis", "biquínis", "pois", 
-          "depois","dois","leis"),
-      false);
-  
-  private static final CharArraySet excS = new CharArraySet(Version.LUCENE_31,
-      Arrays.asList("aliás", "pires", "lápis", "cais", "mais", "mas", "menos",
-          "férias", "fezes", "pêsames", "crúcis", "gás", "atrás", "moisés",
-          "através", "convés", "ês", "país", "após", "ambas", "ambos",
-          "messias", "depois"), 
-      false);
+  private static final Step pluralStep = 
+    parse(PortugueseMinimalStemmer.class, "portuguese.rslp").get("Plural");
   
   public int stem(char s[], int len) {
-    if (len < 3 || s[len-1] != 's')
-      return len;
-    
-    if (s[len-2] == 'n') {
-      len--;
-      s[len-1] = 'm';
-      return len;
-    }
-    
-    if (len >= 6 && s[len-3] == 'õ' && s[len-2] == 'e') {
-      len--;
-      s[len-2] = 'ã';
-      s[len-1] = 'o';
-      return len;
-    }
-      
-    if (len >= 4 && s[len-3] == 'ã' && s[len-2] == 'e')
-      if (!(len == 4 && s[0] == 'm')) {
-        len--;
-        s[len-1] = 'o';
-        return len;
-      }
-    
-    if (len >= 4 && s[len-2] == 'i') {
-      if (s[len-3] == 'a')
-        if (!(len == 4 && (s[0] == 'c' || s[0] == 'm'))) {
-          len--;
-          s[len-1] = 'l';
-          return len;
-        }
-   
-      if (len >= 5 && s[len-3] == 'é') {
-        len--;
-        s[len-2] = 'e';
-        s[len-1] = 'l';
-        return len;
-      }
-    
-      if (len >= 5 && s[len-3] == 'e') {
-        len--;
-        s[len-1] = 'l';
-        return len;
-      }
-    
-      if (len >= 5 && s[len-3] == 'ó') {
-        len--;
-        s[len-2] = 'o';
-        s[len-1] = 'l';
-        return len;
-      }
-  
-      if (!excIS.contains(s, 0, len)) {
-        s[len-1] = 'l';
-        return len;
-      }
-    }
-    
-    if (len >= 6 && s[len-3] == 'l' && s[len-2] == 'e')
-      return len - 2;
-    
-    if (len >= 6 && s[len-3] == 'r' && s[len-2] == 'e')
-      if (!(len == 7 && s[0] == 'á' && s[1] == 'r' && s[2] == 'v' && s[3] == 'o'))
-        return len - 2;
-      
-    if (excS.contains(s, 0, len))
-      return len;
-    else
-      return len-1;
+    return pluralStep.apply(s, len);
   }
 }

Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro Wed Feb  9 09:35:27 2011
@@ -15,8 +15,8 @@
  */
 
 // Generated from IANA Root Zone Database <http://www.internic.net/zones/root.zone>
-// file version from Saturday, December 4, 2010 12:34:19 PM UTC
-// generated on Sunday, December 5, 2010 12:24:12 AM UTC
+// file version from Wednesday, January 5, 2011 12:34:09 PM UTC
+// generated on Thursday, January 6, 2011 5:09:41 AM UTC
 // by org.apache.lucene.analysis.standard.GenerateJflexTLDMacros
 
 ASCIITLD = "." (
@@ -306,6 +306,7 @@ ASCIITLD = "." (
 	| [xX][nN]--[pP]1[aA][iI]
 	| [xX][nN]--[pP][gG][bB][sS]0[dD][hH]
 	| [xX][nN]--[wW][gG][bB][hH]1[cC]
+	| [xX][nN]--[wW][gG][bB][lL]6[aA]
 	| [xX][nN]--[xX][kK][cC]2[aA][lL]3[hH][yY][eE]2[aA]
 	| [xX][nN]--[yY][gG][bB][iI]2[aA][mM][mM][xX]
 	| [xX][nN]--[zZ][cC][kK][zZ][aA][hH]

Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java Wed Feb  9 09:35:27 2011
@@ -34,14 +34,6 @@ import org.apache.lucene.util.Version;
  * Unicode Text Segmentation algorithm, as specified in 
  * <a href="http://unicode.org/reports/tr29/">Unicode Standard Annex #29</a>.
  * <p/>
- * <b>WARNING</b>: Because JFlex does not support Unicode supplementary 
- * characters (characters above the Basic Multilingual Plane, which contains
- * those up to and including U+FFFF), this scanner will not recognize them
- * properly.  If you need to be able to process text containing supplementary 
- * characters, consider using the ICU4J-backed implementation in modules/analysis/icu  
- * (org.apache.lucene.analysis.icu.segmentation.ICUTokenizer)
- * instead of this class, since the ICU4J-backed implementation does not have
- * this limitation.
  * <p>Many applications have specific tokenizer needs.  If this tokenizer does
  * not suit your application, please consider copying this source code
  * directory to your project and maintaining your own grammar-based tokenizer.

Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java Wed Feb  9 09:35:27 2011
@@ -1,4 +1,4 @@
-/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 12/4/10 7:24 PM */
+/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 1/6/11 12:09 AM */
 
 package org.apache.lucene.analysis.standard;
 
@@ -35,14 +35,6 @@ import org.apache.lucene.analysis.tokena
  *   <li>&lt;IDEOGRAPHIC&gt;: A single CJKV ideographic character</li>
  *   <li>&lt;HIRAGANA&gt;: A single hiragana character</li>
  * </ul>
- * <b>WARNING</b>: Because JFlex does not support Unicode supplementary 
- * characters (characters above the Basic Multilingual Plane, which contains
- * those up to and including U+FFFF), this scanner will not recognize them
- * properly.  If you need to be able to process text containing supplementary 
- * characters, consider using the ICU4J-backed implementation in modules/analysis/icu  
- * (org.apache.lucene.analysis.icu.segmentation.ICUTokenizer)
- * instead of this class, since the ICU4J-backed implementation does not have
- * this limitation.
  */
 
 public final class StandardTokenizerImpl implements StandardTokenizerInterface {
@@ -70,115 +62,138 @@ public final class StandardTokenizerImpl
    * Translates characters to character classes
    */
   private static final String ZZ_CMAP_PACKED = 
-    "\47\0\1\7\4\0\1\6\1\0\1\7\1\0\12\3\1\5\1\6"+
-    "\5\0\32\1\4\0\1\10\1\0\32\1\57\0\1\1\2\0\1\2"+
-    "\7\0\1\1\1\0\1\5\2\0\1\1\5\0\27\1\1\0\37\1"+
-    "\1\0\u01ca\1\4\0\14\1\16\0\5\1\7\0\1\1\1\0\1\1"+
-    "\21\0\160\2\5\1\1\0\2\1\2\0\4\1\1\6\7\0\1\1"+
-    "\1\5\3\1\1\0\1\1\1\0\24\1\1\0\123\1\1\0\213\1"+
-    "\1\0\7\2\236\1\11\0\46\1\2\0\1\1\7\0\47\1\1\0"+
-    "\1\6\7\0\55\2\1\0\1\2\1\0\2\2\1\0\2\2\1\0"+
-    "\1\2\10\0\33\1\5\0\4\1\1\5\13\0\4\2\10\0\2\6"+
-    "\2\0\13\2\5\0\53\1\25\2\12\3\1\0\1\3\1\6\1\0"+
-    "\2\1\1\2\143\1\1\0\1\1\10\2\1\0\6\2\2\1\2\2"+
-    "\1\0\4\2\2\1\12\3\3\1\2\0\1\1\17\0\1\2\1\1"+
-    "\1\2\36\1\33\2\2\0\131\1\13\2\1\1\16\0\12\3\41\1"+
-    "\11\2\2\1\2\0\1\6\1\0\1\1\5\0\26\1\4\2\1\1"+
-    "\11\2\1\1\3\2\1\1\5\2\22\0\31\1\3\2\244\0\4\2"+
-    "\66\1\3\2\1\1\22\2\1\1\7\2\12\1\2\2\2\0\12\3"+
-    "\1\0\7\1\1\0\7\1\1\0\3\2\1\0\10\1\2\0\2\1"+
-    "\2\0\26\1\1\0\7\1\1\0\1\1\3\0\4\1\2\0\1\2"+
-    "\1\1\7\2\2\0\2\2\2\0\3\2\1\1\10\0\1\2\4\0"+
-    "\2\1\1\0\3\1\2\2\2\0\12\3\2\1\17\0\3\2\1\0"+
-    "\6\1\4\0\2\1\2\0\26\1\1\0\7\1\1\0\2\1\1\0"+
-    "\2\1\1\0\2\1\2\0\1\2\1\0\5\2\4\0\2\2\2\0"+
-    "\3\2\3\0\1\2\7\0\4\1\1\0\1\1\7\0\12\3\2\2"+
-    "\3\1\1\2\13\0\3\2\1\0\11\1\1\0\3\1\1\0\26\1"+
-    "\1\0\7\1\1\0\2\1\1\0\5\1\2\0\1\2\1\1\10\2"+
-    "\1\0\3\2\1\0\3\2\2\0\1\1\17\0\2\1\2\2\2\0"+
-    "\12\3\21\0\3\2\1\0\10\1\2\0\2\1\2\0\26\1\1\0"+
-    "\7\1\1\0\2\1\1\0\5\1\2\0\1\2\1\1\7\2\2\0"+
-    "\2\2\2\0\3\2\10\0\2\2\4\0\2\1\1\0\3\1\2\2"+
-    "\2\0\12\3\1\0\1\1\20\0\1\2\1\1\1\0\6\1\3\0"+
-    "\3\1\1\0\4\1\3\0\2\1\1\0\1\1\1\0\2\1\3\0"+
-    "\2\1\3\0\3\1\3\0\14\1\4\0\5\2\3\0\3\2\1\0"+
-    "\4\2\2\0\1\1\6\0\1\2\16\0\12\3\21\0\3\2\1\0"+
-    "\10\1\1\0\3\1\1\0\27\1\1\0\12\1\1\0\5\1\3\0"+
-    "\1\1\7\2\1\0\3\2\1\0\4\2\7\0\2\2\1\0\2\1"+
-    "\6\0\2\1\2\2\2\0\12\3\22\0\2\2\1\0\10\1\1\0"+
-    "\3\1\1\0\27\1\1\0\12\1\1\0\5\1\2\0\1\2\1\1"+
-    "\7\2\1\0\3\2\1\0\4\2\7\0\2\2\7\0\1\1\1\0"+
-    "\2\1\2\2\2\0\12\3\1\0\2\1\17\0\2\2\1\0\10\1"+
-    "\1\0\3\1\1\0\51\1\2\0\1\1\7\2\1\0\3\2\1\0"+
-    "\4\2\1\1\10\0\1\2\10\0\2\1\2\2\2\0\12\3\12\0"+
-    "\6\1\2\0\2\2\1\0\22\1\3\0\30\1\1\0\11\1\1\0"+
-    "\1\1\2\0\7\1\3\0\1\2\4\0\6\2\1\0\1\2\1\0"+
-    "\10\2\22\0\2\2\15\0\60\11\1\12\2\11\7\12\5\0\7\11"+
-    "\10\12\1\0\12\3\47\0\2\11\1\0\1\11\2\0\2\11\1\0"+
-    "\1\11\2\0\1\11\6\0\4\11\1\0\7\11\1\0\3\11\1\0"+
-    "\1\11\1\0\1\11\2\0\2\11\1\0\4\11\1\12\2\11\6\12"+
-    "\1\0\2\12\1\11\2\0\5\11\1\0\1\11\1\0\6\12\2\0"+
-    "\12\3\2\0\2\11\42\0\1\1\27\0\2\2\6\0\12\3\13\0"+
-    "\1\2\1\0\1\2\1\0\1\2\4\0\2\2\10\1\1\0\44\1"+
-    "\4\0\24\2\1\0\2\2\5\1\13\2\1\0\44\2\11\0\1\2"+
-    "\71\0\53\11\24\12\1\11\12\3\6\0\6\11\4\12\4\11\3\12"+
-    "\1\11\3\12\2\11\7\12\3\11\4\12\15\11\14\12\1\11\1\12"+
-    "\12\3\4\12\2\11\46\1\12\0\53\1\1\0\1\1\3\0\u0149\1"+
-    "\1\0\4\1\2\0\7\1\1\0\1\1\1\0\4\1\2\0\51\1"+
-    "\1\0\4\1\2\0\41\1\1\0\4\1\2\0\7\1\1\0\1\1"+
-    "\1\0\4\1\2\0\17\1\1\0\71\1\1\0\4\1\2\0\103\1"+
-    "\2\0\3\2\40\0\20\1\20\0\125\1\14\0\u026c\1\2\0\21\1"+
-    "\1\0\32\1\5\0\113\1\3\0\3\1\17\0\15\1\1\0\4\1"+
-    "\3\2\13\0\22\1\3\2\13\0\22\1\2\2\14\0\15\1\1\0"+
-    "\3\1\1\0\2\2\14\0\64\11\40\12\3\0\1\11\4\0\1\11"+
-    "\1\12\2\0\12\3\41\0\3\2\2\0\12\3\6\0\130\1\10\0"+
-    "\51\1\1\2\1\1\5\0\106\1\12\0\35\1\3\0\14\2\4\0"+
-    "\14\2\12\0\12\3\36\11\2\0\5\11\13\0\54\11\4\0\21\12"+
-    "\7\11\2\12\6\0\12\3\1\11\3\0\2\11\40\0\27\1\5\2"+
-    "\4\0\65\11\12\12\1\0\35\12\2\0\1\2\12\3\6\0\12\3"+
-    "\6\0\16\11\122\0\5\2\57\1\21\2\7\1\4\0\12\3\21\0"+
-    "\11\2\14\0\3\2\36\1\12\2\3\0\2\1\12\3\6\0\46\1"+
-    "\16\2\14\0\44\1\24\2\10\0\12\3\3\0\3\1\12\3\44\1"+
-    "\122\0\3\2\1\0\25\2\4\1\1\2\4\1\1\2\15\0\300\1"+
-    "\47\2\25\0\4\2\u0116\1\2\0\6\1\2\0\46\1\2\0\6\1"+
-    "\2\0\10\1\1\0\1\1\1\0\1\1\1\0\1\1\1\0\37\1"+
-    "\2\0\65\1\1\0\7\1\1\0\1\1\3\0\3\1\1\0\7\1"+
-    "\3\0\4\1\2\0\6\1\4\0\15\1\5\0\3\1\1\0\7\1"+
-    "\17\0\4\2\10\0\2\7\12\0\1\7\2\0\1\5\2\0\5\2"+
-    "\20\0\2\10\3\0\1\6\17\0\1\10\13\0\5\2\5\0\6\2"+
-    "\1\0\1\1\15\0\1\1\20\0\15\1\63\0\41\2\21\0\1\1"+
-    "\4\0\1\1\2\0\12\1\1\0\1\1\3\0\5\1\6\0\1\1"+
-    "\1\0\1\1\1\0\1\1\1\0\4\1\1\0\13\1\2\0\4\1"+
-    "\5\0\5\1\4\0\1\1\21\0\51\1\u032d\0\64\1\u0716\0\57\1"+
-    "\1\0\57\1\1\0\205\1\6\0\4\1\3\2\16\0\46\1\12\0"+
-    "\66\1\11\0\1\1\17\0\1\2\27\1\11\0\7\1\1\0\7\1"+
-    "\1\0\7\1\1\0\7\1\1\0\7\1\1\0\7\1\1\0\7\1"+
-    "\1\0\7\1\1\0\40\2\57\0\1\1\120\0\32\13\1\0\131\13"+
-    "\14\0\326\13\57\0\1\1\1\0\1\13\31\0\11\13\6\2\1\0"+
-    "\5\4\2\0\3\13\1\1\1\1\4\0\126\14\2\0\2\2\2\4"+
-    "\3\14\133\4\1\0\4\4\5\0\51\1\3\0\136\1\21\0\33\1"+
-    "\65\0\20\4\320\0\57\4\1\0\130\4\250\0\u19b6\13\112\0\u51cc\13"+
-    "\64\0\u048d\1\103\0\56\1\2\0\u010d\1\3\0\20\1\12\3\2\1"+
-    "\24\0\57\1\4\2\11\0\2\2\1\0\31\1\10\0\120\1\2\2"+
-    "\45\0\11\1\2\0\147\1\2\0\4\1\1\0\2\1\16\0\12\1"+
-    "\120\0\10\1\1\2\3\1\1\2\4\1\1\2\27\1\5\2\30\0"+
-    "\64\1\14\0\2\2\62\1\21\2\13\0\12\3\6\0\22\2\6\1"+
-    "\3\0\1\1\4\0\12\3\34\1\10\2\2\0\27\1\15\2\14\0"+
-    "\35\1\3\0\4\2\57\1\16\2\16\0\1\1\12\3\46\0\51\1"+
-    "\16\2\11\0\3\1\1\2\10\1\2\2\2\0\12\3\6\0\33\11"+
-    "\1\12\4\0\60\11\1\12\1\11\3\12\2\11\2\12\5\11\2\12"+
-    "\1\11\1\12\1\11\30\0\5\11\41\0\6\1\2\0\6\1\2\0"+
-    "\6\1\11\0\7\1\1\0\7\1\221\0\43\1\10\2\1\0\2\2"+
-    "\2\0\12\3\6\0\u2ba4\1\14\0\27\1\4\0\61\1\u2104\0\u012e\13"+
-    "\2\0\76\13\2\0\152\13\46\0\7\1\14\0\5\1\5\0\1\1"+
-    "\1\2\12\1\1\0\15\1\1\0\5\1\1\0\1\1\1\0\2\1"+
-    "\1\0\2\1\1\0\154\1\41\0\u016b\1\22\0\100\1\2\0\66\1"+
-    "\50\0\14\1\4\0\20\2\1\6\2\0\1\5\1\6\13\0\7\2"+
-    "\14\0\2\10\30\0\3\10\1\6\1\0\1\7\1\0\1\6\1\5"+
-    "\32\0\5\1\1\0\207\1\2\0\1\2\7\0\1\7\4\0\1\6"+
-    "\1\0\1\7\1\0\12\3\1\5\1\6\5\0\32\1\4\0\1\10"+
-    "\1\0\32\1\13\0\70\4\2\2\37\1\3\0\6\1\2\0\6\1"+
-    "\2\0\6\1\2\0\3\1\34\0\3\2\4\0";
+    "\47\0\1\140\4\0\1\137\1\0\1\140\1\0\12\134\1\136\1\137"+
+    "\5\0\32\132\4\0\1\141\1\0\32\132\57\0\1\132\2\0\1\133"+
+    "\7\0\1\132\1\0\1\136\2\0\1\132\5\0\27\132\1\0\37\132"+
+    "\1\0\u01ca\132\4\0\14\132\16\0\5\132\7\0\1\132\1\0\1\132"+
+    "\21\0\160\133\5\132\1\0\2\132\2\0\4\132\1\137\7\0\1\132"+
+    "\1\136\3\132\1\0\1\132\1\0\24\132\1\0\123\132\1\0\213\132"+
+    "\1\0\7\133\236\132\11\0\46\132\2\0\1\132\7\0\47\132\1\0"+
+    "\1\137\7\0\55\133\1\0\1\133\1\0\2\133\1\0\2\133\1\0"+
+    "\1\133\10\0\33\132\5\0\4\132\1\136\13\0\4\133\10\0\2\137"+
+    "\2\0\13\133\5\0\53\132\25\133\12\134\1\0\1\134\1\137\1\0"+
+    "\2\132\1\133\143\132\1\0\1\132\7\133\1\133\1\0\6\133\2\132"+
+    "\2\133\1\0\4\133\2\132\12\134\3\132\2\0\1\132\17\0\1\133"+
+    "\1\132\1\133\36\132\33\133\2\0\131\132\13\133\1\132\16\0\12\134"+
+    "\41\132\11\133\2\132\2\0\1\137\1\0\1\132\5\0\26\132\4\133"+
+    "\1\132\11\133\1\132\3\133\1\132\5\133\22\0\31\132\3\133\244\0"+
+    "\4\133\66\132\3\133\1\132\22\133\1\132\7\133\12\132\2\133\2\0"+
+    "\12\134\1\0\7\132\1\0\7\132\1\0\3\133\1\0\10\132\2\0"+
+    "\2\132\2\0\26\132\1\0\7\132\1\0\1\132\3\0\4\132\2\0"+
+    "\1\133\1\132\7\133\2\0\2\133\2\0\3\133\1\132\10\0\1\133"+
+    "\4\0\2\132\1\0\3\132\2\133\2\0\12\134\2\132\17\0\3\133"+
+    "\1\0\6\132\4\0\2\132\2\0\26\132\1\0\7\132\1\0\2\132"+
+    "\1\0\2\132\1\0\2\132\2\0\1\133\1\0\5\133\4\0\2\133"+
+    "\2\0\3\133\3\0\1\133\7\0\4\132\1\0\1\132\7\0\12\134"+
+    "\2\133\3\132\1\133\13\0\3\133\1\0\11\132\1\0\3\132\1\0"+
+    "\26\132\1\0\7\132\1\0\2\132\1\0\5\132\2\0\1\133\1\132"+
+    "\10\133\1\0\3\133\1\0\3\133\2\0\1\132\17\0\2\132\2\133"+
+    "\2\0\12\134\21\0\3\133\1\0\10\132\2\0\2\132\2\0\26\132"+
+    "\1\0\7\132\1\0\2\132\1\0\5\132\2\0\1\133\1\132\7\133"+
+    "\2\0\2\133\2\0\3\133\10\0\2\133\4\0\2\132\1\0\3\132"+
+    "\2\133\2\0\12\134\1\0\1\132\20\0\1\133\1\132\1\0\6\132"+
+    "\3\0\3\132\1\0\4\132\3\0\2\132\1\0\1\132\1\0\2\132"+
+    "\3\0\2\132\3\0\3\132\3\0\14\132\4\0\5\133\3\0\3\133"+
+    "\1\0\4\133\2\0\1\132\6\0\1\133\16\0\12\134\21\0\3\133"+
+    "\1\0\10\132\1\0\3\132\1\0\27\132\1\0\12\132\1\0\5\132"+
+    "\3\0\1\132\7\133\1\0\3\133\1\0\4\133\7\0\2\133\1\0"+
+    "\2\132\6\0\2\132\2\133\2\0\12\134\22\0\2\133\1\0\10\132"+
+    "\1\0\3\132\1\0\27\132\1\0\12\132\1\0\5\132\2\0\1\133"+
+    "\1\132\7\133\1\0\3\133\1\0\4\133\7\0\2\133\7\0\1\132"+
+    "\1\0\2\132\2\133\2\0\12\134\1\0\2\132\17\0\2\133\1\0"+
+    "\10\132\1\0\3\132\1\0\51\132\2\0\1\132\7\133\1\0\3\133"+
+    "\1\0\4\133\1\132\10\0\1\133\10\0\2\132\2\133\2\0\12\134"+
+    "\12\0\6\132\2\0\2\133\1\0\22\132\3\0\30\132\1\0\11\132"+
+    "\1\0\1\132\2\0\7\132\3\0\1\133\4\0\6\133\1\0\1\133"+
+    "\1\0\10\133\22\0\2\133\15\0\60\142\1\143\2\142\7\143\5\0"+
+    "\7\142\10\143\1\0\12\134\47\0\2\142\1\0\1\142\2\0\2\142"+
+    "\1\0\1\142\2\0\1\142\6\0\4\142\1\0\7\142\1\0\3\142"+
+    "\1\0\1\142\1\0\1\142\2\0\2\142\1\0\4\142\1\143\2\142"+
+    "\6\143\1\0\2\143\1\142\2\0\5\142\1\0\1\142\1\0\6\143"+
+    "\2\0\12\134\2\0\2\142\42\0\1\132\27\0\2\133\6\0\12\134"+
+    "\13\0\1\133\1\0\1\133\1\0\1\133\4\0\2\133\10\132\1\0"+
+    "\44\132\4\0\24\133\1\0\2\133\5\132\13\133\1\0\44\133\11\0"+
+    "\1\133\71\0\53\142\24\143\1\142\12\134\6\0\6\142\4\143\4\142"+
+    "\3\143\1\142\3\143\2\142\7\143\3\142\4\143\15\142\14\143\1\142"+
+    "\1\143\12\134\4\143\2\142\46\132\12\0\53\132\1\0\1\132\3\0"+
+    "\u0149\132\1\0\4\132\2\0\7\132\1\0\1\132\1\0\4\132\2\0"+
+    "\51\132\1\0\4\132\2\0\41\132\1\0\4\132\2\0\7\132\1\0"+
+    "\1\132\1\0\4\132\2\0\17\132\1\0\71\132\1\0\4\132\2\0"+
+    "\103\132\2\0\3\133\40\0\20\132\20\0\125\132\14\0\u026c\132\2\0"+
+    "\21\132\1\0\32\132\5\0\113\132\3\0\3\132\17\0\15\132\1\0"+
+    "\4\132\3\133\13\0\22\132\3\133\13\0\22\132\2\133\14\0\15\132"+
+    "\1\0\3\132\1\0\2\133\14\0\64\142\2\143\36\143\3\0\1\142"+
+    "\4\0\1\142\1\143\2\0\12\134\41\0\3\133\2\0\12\134\6\0"+
+    "\130\132\10\0\51\132\1\133\1\132\5\0\106\132\12\0\35\132\3\0"+
+    "\14\133\4\0\14\133\12\0\12\134\36\142\2\0\5\142\13\0\54\142"+
+    "\4\0\21\143\7\142\2\143\6\0\12\134\1\142\3\0\2\142\40\0"+
+    "\27\132\5\133\4\0\65\142\12\143\1\0\35\143\2\0\1\133\12\134"+
+    "\6\0\12\134\6\0\16\142\122\0\5\133\57\132\21\133\7\132\4\0"+
+    "\12\134\21\0\11\133\14\0\3\133\36\132\12\133\3\0\2\132\12\134"+
+    "\6\0\46\132\16\133\14\0\44\132\24\133\10\0\12\134\3\0\3\132"+
+    "\12\134\44\132\122\0\3\133\1\0\25\133\4\132\1\133\4\132\1\133"+
+    "\15\0\300\132\47\133\25\0\4\133\u0116\132\2\0\6\132\2\0\46\132"+
+    "\2\0\6\132\2\0\10\132\1\0\1\132\1\0\1\132\1\0\1\132"+
+    "\1\0\37\132\2\0\65\132\1\0\7\132\1\0\1\132\3\0\3\132"+
+    "\1\0\7\132\3\0\4\132\2\0\6\132\4\0\15\132\5\0\3\132"+
+    "\1\0\7\132\17\0\2\133\2\133\10\0\2\140\12\0\1\140\2\0"+
+    "\1\136\2\0\5\133\20\0\2\141\3\0\1\137\17\0\1\141\13\0"+
+    "\5\133\5\0\6\133\1\0\1\132\15\0\1\132\20\0\15\132\63\0"+
+    "\41\133\21\0\1\132\4\0\1\132\2\0\12\132\1\0\1\132\3\0"+
+    "\5\132\6\0\1\132\1\0\1\132\1\0\1\132\1\0\4\132\1\0"+
+    "\13\132\2\0\4\132\5\0\5\132\4\0\1\132\21\0\51\132\u032d\0"+
+    "\64\132\u0716\0\57\132\1\0\57\132\1\0\205\132\6\0\4\132\3\133"+
+    "\16\0\46\132\12\0\66\132\11\0\1\132\17\0\1\133\27\132\11\0"+
+    "\7\132\1\0\7\132\1\0\7\132\1\0\7\132\1\0\7\132\1\0"+
+    "\7\132\1\0\7\132\1\0\7\132\1\0\40\133\57\0\1\132\120\0"+
+    "\32\144\1\0\131\144\14\0\326\144\57\0\1\132\1\0\1\144\31\0"+
+    "\11\144\6\133\1\0\5\135\2\0\3\144\1\132\1\132\4\0\126\145"+
+    "\2\0\2\133\2\135\3\145\133\135\1\0\4\135\5\0\51\132\3\0"+
+    "\136\132\21\0\33\132\65\0\20\135\320\0\57\135\1\0\130\135\250\0"+
+    "\u19b6\144\112\0\u51cc\144\64\0\u048d\132\103\0\56\132\2\0\u010d\132\3\0"+
+    "\20\132\12\134\2\132\24\0\57\132\4\133\11\0\2\133\1\0\31\132"+
+    "\10\0\120\132\2\133\45\0\11\132\2\0\147\132\2\0\4\132\1\0"+
+    "\2\132\16\0\12\132\120\0\10\132\1\133\3\132\1\133\4\132\1\133"+
+    "\27\132\5\133\30\0\64\132\14\0\2\133\62\132\21\133\13\0\12\134"+
+    "\6\0\22\133\6\132\3\0\1\132\4\0\12\134\34\132\10\133\2\0"+
+    "\27\132\15\133\14\0\35\132\3\0\4\133\57\132\16\133\16\0\1\132"+
+    "\12\134\46\0\51\132\16\133\11\0\3\132\1\133\10\132\2\133\2\0"+
+    "\12\134\6\0\33\142\1\143\4\0\60\142\1\143\1\142\3\143\2\142"+
+    "\2\143\5\142\2\143\1\142\1\143\1\142\30\0\5\142\41\0\6\132"+
+    "\2\0\6\132\2\0\6\132\11\0\7\132\1\0\7\132\221\0\43\132"+
+    "\10\133\1\0\2\133\2\0\12\134\6\0\u2ba4\132\14\0\27\132\4\0"+
+    "\61\132\4\0\1\31\1\25\1\46\1\43\1\13\3\0\1\7\1\5"+
+    "\2\0\1\3\1\1\14\0\1\11\21\0\1\112\7\0\1\65\1\17"+
+    "\6\0\1\130\3\0\1\120\1\120\1\120\1\120\1\120\1\120\1\120"+
+    "\1\120\1\120\1\120\1\120\1\120\1\120\1\120\1\120\1\120\1\120"+
+    "\1\120\1\120\1\120\1\120\1\120\1\120\1\120\1\120\1\120\1\120"+
+    "\1\120\1\120\1\120\1\120\1\120\1\120\1\120\1\120\1\120\1\120"+
+    "\1\120\1\120\1\120\1\120\1\121\1\120\1\120\1\120\1\125\1\123"+
+    "\17\0\1\114\u02c1\0\1\70\277\0\1\113\1\71\1\2\3\124\2\35"+
+    "\1\124\1\35\2\124\1\14\21\124\2\60\7\73\1\72\7\73\7\52"+
+    "\1\15\1\52\1\75\2\45\1\44\1\75\1\45\1\44\10\75\2\63"+
+    "\5\61\2\54\5\61\1\6\10\37\5\21\3\27\12\106\20\27\3\42"+
+    "\32\30\1\26\2\24\2\110\1\111\2\110\2\111\2\110\1\111\3\24"+
+    "\1\16\2\24\12\64\1\74\1\41\1\34\1\64\6\41\1\34\66\41"+
+    "\5\115\6\103\1\51\4\103\2\51\10\103\1\51\7\100\1\12\2\100"+
+    "\32\103\1\12\4\100\1\12\5\102\1\101\1\102\3\101\7\102\1\101"+
+    "\23\102\5\67\3\102\6\67\2\67\6\66\10\66\2\100\7\66\36\100"+
+    "\4\66\102\100\15\115\1\77\2\115\1\131\3\117\1\115\2\117\5\115"+
+    "\4\117\4\116\1\115\3\116\1\115\5\116\26\56\4\23\1\105\2\104"+
+    "\4\122\1\104\2\122\3\76\33\122\35\55\3\122\35\126\3\122\6\126"+
+    "\2\33\31\126\1\33\17\126\6\122\4\22\1\10\37\22\1\10\4\22"+
+    "\25\62\1\127\11\62\21\55\5\62\1\57\12\40\13\62\4\55\1\50"+
+    "\6\55\12\122\17\55\1\47\3\53\15\20\11\36\1\32\24\36\2\20"+
+    "\11\36\1\32\31\36\1\32\4\20\4\36\2\32\2\107\1\4\5\107"+
+    "\52\4\u1900\0\u012e\144\2\0\76\144\2\0\152\144\46\0\7\132\14\0"+
+    "\5\132\5\0\1\132\1\133\12\132\1\0\15\132\1\0\5\132\1\0"+
+    "\1\132\1\0\2\132\1\0\2\132\1\0\154\132\41\0\u016b\132\22\0"+
+    "\100\132\2\0\66\132\50\0\14\132\4\0\20\133\1\137\2\0\1\136"+
+    "\1\137\13\0\7\133\14\0\2\141\30\0\3\141\1\137\1\0\1\140"+
+    "\1\0\1\137\1\136\32\0\5\132\1\0\207\132\2\0\1\133\7\0"+
+    "\1\140\4\0\1\137\1\0\1\140\1\0\12\134\1\136\1\137\5\0"+
+    "\32\132\4\0\1\141\1\0\32\132\13\0\70\135\2\133\37\132\3\0"+
+    "\6\132\2\0\6\132\2\0\6\132\2\0\3\132\34\0\3\133\4\0";
 
   /** 
    * Translates characters to character classes
@@ -191,11 +206,11 @@ public final class StandardTokenizerImpl
   private static final int [] ZZ_ACTION = zzUnpackAction();
 
   private static final String ZZ_ACTION_PACKED_0 =
-    "\1\0\1\1\1\2\1\3\1\2\1\1\1\4\1\5"+
-    "\1\6\1\2\1\0\1\2\1\0\1\3\2\0";
+    "\1\0\23\1\1\2\1\3\1\2\1\1\1\4\1\5"+
+    "\1\6\15\0\1\2\1\0\1\2\10\0\1\3\61\0";
 
   private static int [] zzUnpackAction() {
-    int [] result = new int[16];
+    int [] result = new int[101];
     int offset = 0;
     offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result);
     return result;
@@ -220,11 +235,22 @@ public final class StandardTokenizerImpl
   private static final int [] ZZ_ROWMAP = zzUnpackRowMap();
 
   private static final String ZZ_ROWMAP_PACKED_0 =
-    "\0\0\0\15\0\32\0\47\0\64\0\101\0\116\0\15"+
-    "\0\15\0\133\0\150\0\165\0\202\0\217\0\101\0\234";
+    "\0\0\0\146\0\314\0\u0132\0\u0198\0\u01fe\0\u0264\0\u02ca"+
+    "\0\u0330\0\u0396\0\u03fc\0\u0462\0\u04c8\0\u052e\0\u0594\0\u05fa"+
+    "\0\u0660\0\u06c6\0\u072c\0\u0792\0\u07f8\0\u085e\0\u08c4\0\u092a"+
+    "\0\u0990\0\146\0\146\0\314\0\u0132\0\u0198\0\u01fe\0\u0264"+
+    "\0\u09f6\0\u0a5c\0\u0ac2\0\u0b28\0\u0462\0\u0b8e\0\u0bf4\0\u0c5a"+
+    "\0\u0cc0\0\u0d26\0\u0d8c\0\u0df2\0\u0330\0\u0396\0\u0e58\0\u0ebe"+
+    "\0\u0f24\0\u0f8a\0\u0ff0\0\u1056\0\u10bc\0\u1122\0\u1188\0\u11ee"+
+    "\0\u1254\0\u12ba\0\u1320\0\u1386\0\u13ec\0\u1452\0\u14b8\0\u092a"+
+    "\0\u151e\0\u1584\0\u15ea\0\u1650\0\u16b6\0\u171c\0\u1782\0\u17e8"+
+    "\0\u184e\0\u18b4\0\u191a\0\u1980\0\u19e6\0\u1a4c\0\u1ab2\0\u1b18"+
+    "\0\u1b7e\0\u1be4\0\u1c4a\0\u1cb0\0\u1d16\0\u1d7c\0\u1de2\0\u1e48"+
+    "\0\u1eae\0\u1f14\0\u1f7a\0\u1fe0\0\u2046\0\u20ac\0\u2112\0\u2178"+
+    "\0\u21de\0\u2244\0\u22aa\0\u2310\0\u2376";
 
   private static int [] zzUnpackRowMap() {
-    int [] result = new int[16];
+    int [] result = new int[101];
     int offset = 0;
     offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result);
     return result;
@@ -247,21 +273,280 @@ public final class StandardTokenizerImpl
   private static final int [] ZZ_TRANS = zzUnpackTrans();
 
   private static final String ZZ_TRANS_PACKED_0 =
-    "\1\2\1\3\1\2\1\4\1\5\3\2\1\6\2\7"+
-    "\1\10\1\11\16\0\2\3\1\12\1\0\1\13\1\0"+
-    "\1\13\1\14\1\0\1\3\3\0\1\3\2\4\2\0"+
-    "\2\15\1\16\1\0\1\4\4\0\1\5\1\0\1\5"+
-    "\3\0\1\14\1\0\1\5\3\0\1\3\1\17\1\4"+
-    "\1\5\3\0\1\17\1\0\1\17\13\0\2\7\3\0"+
-    "\1\3\2\12\2\0\2\20\1\14\1\0\1\12\3\0"+
-    "\1\3\1\13\7\0\1\13\3\0\1\3\1\14\1\12"+
-    "\1\5\3\0\1\14\1\0\1\14\4\0\1\15\1\4"+
-    "\6\0\1\15\3\0\1\3\1\16\1\4\1\5\3\0"+
-    "\1\16\1\0\1\16\4\0\1\20\1\12\6\0\1\20"+
-    "\2\0";
+    "\1\2\1\3\1\2\1\4\1\2\1\5\1\2\1\6"+
+    "\1\2\1\7\1\2\1\10\3\2\1\11\5\2\1\12"+
+    "\3\2\1\13\11\2\1\14\2\2\1\15\43\2\1\16"+
+    "\1\2\1\17\3\2\1\20\1\21\1\2\1\22\1\2"+
+    "\1\23\2\2\1\24\1\2\1\25\1\2\1\26\1\27"+
+    "\3\2\1\30\2\31\1\32\1\33\150\0\1\25\11\0"+
+    "\1\25\20\0\1\25\22\0\1\25\10\0\3\25\17\0"+
+    "\1\25\10\0\1\25\23\0\1\25\1\0\1\25\1\0"+
+    "\1\25\1\0\1\25\1\0\1\25\1\0\3\25\1\0"+
+    "\5\25\1\0\3\25\1\0\11\25\1\0\2\25\1\0"+
+    "\16\25\1\0\2\25\1\0\21\25\1\0\1\25\1\0"+
+    "\3\25\2\0\1\25\1\0\1\25\1\0\2\25\1\0"+
+    "\1\25\16\0\1\25\3\0\1\25\5\0\2\25\3\0"+
+    "\1\25\13\0\1\25\1\0\1\25\4\0\2\25\4\0"+
+    "\1\25\1\0\1\25\3\0\2\25\1\0\1\25\5\0"+
+    "\3\25\1\0\1\25\15\0\1\25\10\0\1\25\23\0"+
+    "\1\25\3\0\1\25\1\0\1\25\1\0\1\25\1\0"+
+    "\3\25\2\0\4\25\1\0\3\25\2\0\3\25\1\0"+
+    "\4\25\1\0\2\25\2\0\3\25\1\0\11\25\1\0"+
+    "\2\25\1\0\16\25\1\0\2\25\1\0\1\25\1\0"+
+    "\3\25\2\0\1\25\1\0\1\25\1\0\2\25\1\0"+
+    "\1\25\16\0\1\25\3\0\1\25\3\0\1\25\1\0"+
+    "\3\25\2\0\1\25\1\0\2\25\1\0\3\25\3\0"+
+    "\2\25\1\0\1\25\1\0\2\25\1\0\2\25\3\0"+
+    "\2\25\1\0\1\25\1\0\1\25\1\0\2\25\1\0"+
+    "\2\25\1\0\2\25\1\0\5\25\1\0\5\25\1\0"+
+    "\2\25\1\0\2\25\1\0\1\25\1\0\3\25\4\0"+
+    "\1\25\4\0\1\25\30\0\3\25\5\0\1\25\1\0"+
+    "\1\25\1\0\1\25\4\0\1\25\14\0\1\25\5\0"+
+    "\1\25\11\0\2\25\12\0\1\26\1\0\2\25\12\0"+
+    "\1\25\23\0\1\25\1\0\1\26\7\0\2\25\2\0"+
+    "\5\25\2\0\2\25\4\0\6\25\1\0\2\25\4\0"+
+    "\5\25\1\0\5\25\1\0\2\25\1\0\3\25\1\0"+
+    "\4\25\1\0\5\25\1\26\1\0\1\25\1\0\1\25"+
+    "\1\0\3\25\2\0\1\25\1\0\1\25\1\0\1\25"+
+    "\2\0\1\25\16\0\1\25\3\0\1\25\5\0\2\25"+
+    "\3\0\1\25\4\0\3\25\4\0\1\25\1\0\1\25"+
+    "\2\0\1\25\1\0\2\25\4\0\1\25\1\0\1\25"+
+    "\3\0\2\25\1\0\1\25\5\0\3\25\1\0\1\25"+
+    "\10\0\1\25\1\0\2\26\1\0\1\25\10\0\1\25"+
+    "\23\0\1\25\3\0\1\25\6\0\2\25\5\0\1\25"+
+    "\1\0\1\25\1\0\1\25\1\0\11\25\2\0\1\25"+
+    "\4\0\1\25\4\0\6\25\2\0\1\25\1\0\1\25"+
+    "\1\0\3\25\3\0\2\25\4\0\3\25\1\0\1\25"+
+    "\10\0\1\25\1\0\2\25\20\0\1\25\11\0\2\25"+
+    "\17\0\1\25\6\0\2\25\4\0\1\25\5\0\1\25"+
+    "\2\0\1\25\5\0\3\25\1\0\1\25\15\0\1\25"+
+    "\10\0\1\25\23\0\1\25\3\0\1\25\5\0\1\25"+
+    "\32\0\15\25\5\0\3\25\1\0\1\25\5\0\1\25"+
+    "\7\0\1\25\2\0\1\25\5\0\1\25\2\0\1\25"+
+    "\1\0\1\25\105\0\1\33\21\0\1\27\34\0\1\32"+
+    "\3\0\1\32\3\0\1\32\1\0\3\32\2\0\1\32"+
+    "\2\0\1\32\1\0\3\32\3\0\2\32\1\0\1\32"+
+    "\1\0\2\32\1\0\2\32\3\0\2\32\1\0\1\32"+
+    "\3\0\2\32\1\0\2\32\1\0\2\32\1\0\5\32"+
+    "\1\0\5\32\2\0\1\32\1\0\2\32\1\0\1\32"+
+    "\1\0\3\32\4\0\1\32\4\0\1\32\16\0\1\32"+
+    "\1\0\1\32\1\0\1\32\1\0\1\32\1\0\1\32"+
+    "\1\0\3\32\1\0\5\32\1\0\3\32\1\0\11\32"+
+    "\1\0\2\32\1\0\16\32\1\0\2\32\1\0\21\32"+
+    "\1\0\1\32\1\0\3\32\2\0\1\32\1\0\1\32"+
+    "\1\0\2\32\1\0\1\32\16\0\1\32\1\0\1\32"+
+    "\1\0\1\32\3\0\1\32\1\0\3\32\1\0\2\32"+
+    "\1\0\2\32\1\0\3\32\1\0\11\32\1\0\2\32"+
+    "\1\0\16\32\1\0\2\32\1\0\21\32\1\0\1\32"+
+    "\1\0\3\32\2\0\1\32\1\0\1\32\1\0\2\32"+
+    "\1\0\1\32\16\0\1\32\11\0\1\32\20\0\1\32"+
+    "\33\0\1\32\21\0\1\32\10\0\1\32\23\0\1\32"+
+    "\1\0\1\32\1\0\1\32\1\0\1\32\1\0\1\32"+
+    "\1\0\3\32\1\0\5\32\1\0\3\32\1\0\6\32"+
+    "\1\0\2\32\1\0\2\32\1\0\10\32\1\0\5\32"+
+    "\1\0\2\32\1\0\21\32\1\0\1\32\1\0\3\32"+
+    "\2\0\1\32\1\0\1\32\1\0\2\32\1\0\1\32"+
+    "\145\0\1\33\15\0\1\34\1\0\1\35\1\0\1\36"+
+    "\1\0\1\37\1\0\1\40\1\0\1\41\3\0\1\42"+
+    "\5\0\1\43\3\0\1\44\11\0\1\45\2\0\1\46"+
+    "\16\0\1\47\2\0\1\50\41\0\2\25\1\51\1\0"+
+    "\1\52\1\0\1\52\1\53\1\0\1\25\3\0\1\34"+
+    "\1\0\1\35\1\0\1\36\1\0\1\37\1\0\1\40"+
+    "\1\0\1\54\3\0\1\55\5\0\1\56\3\0\1\57"+
+    "\11\0\1\45\2\0\1\60\16\0\1\61\2\0\1\62"+
+    "\41\0\1\25\2\26\2\0\2\63\1\64\1\0\1\26"+
+    "\15\0\1\65\15\0\1\66\14\0\1\67\16\0\1\70"+
+    "\2\0\1\71\21\0\1\72\20\0\1\27\1\0\1\27"+
+    "\3\0\1\53\1\0\1\27\3\0\1\34\1\0\1\35"+
+    "\1\0\1\36\1\0\1\37\1\0\1\40\1\0\1\73"+
+    "\3\0\1\55\5\0\1\56\3\0\1\74\11\0\1\45"+
+    "\2\0\1\75\16\0\1\76\2\0\1\77\21\0\1\72"+
+    "\17\0\1\25\1\100\1\26\1\27\3\0\1\100\1\0"+
+    "\1\100\144\0\2\31\4\0\1\25\11\0\3\25\5\0"+
+    "\1\25\1\0\1\25\1\0\1\25\4\0\1\25\4\0"+
+    "\1\25\1\0\2\25\4\0\1\25\5\0\1\25\3\0"+
+    "\1\25\4\0\5\25\10\0\1\51\1\0\2\25\1\0"+
+    "\1\25\10\0\1\25\23\0\1\25\1\0\1\51\7\0"+
+    "\2\25\2\0\5\25\2\0\2\25\4\0\6\25\1\0"+
+    "\2\25\4\0\5\25\1\0\5\25\1\0\2\25\1\0"+
+    "\3\25\1\0\4\25\1\0\5\25\1\51\1\0\1\25"+
+    "\1\0\1\25\1\0\3\25\2\0\1\25\1\0\1\25"+
+    "\1\0\1\25\2\0\1\25\16\0\1\25\3\0\1\25"+
+    "\5\0\2\25\3\0\1\25\4\0\3\25\4\0\1\25"+
+    "\1\0\1\25\2\0\1\25\1\0\2\25\4\0\1\25"+
+    "\1\0\1\25\3\0\2\25\1\0\1\25\5\0\3\25"+
+    "\1\0\1\25\10\0\1\25\1\0\2\51\1\0\1\25"+
+    "\10\0\1\25\23\0\1\25\3\0\1\25\6\0\2\25"+
+    "\5\0\1\25\1\0\1\25\1\0\1\25\1\0\11\25"+
+    "\2\0\1\25\4\0\1\25\4\0\6\25\2\0\1\25"+
+    "\1\0\1\25\1\0\3\25\1\0\1\25\1\0\2\25"+
+    "\4\0\3\25\1\0\1\25\10\0\1\25\1\0\2\25"+
+    "\20\0\1\25\3\0\1\25\5\0\1\25\32\0\15\25"+
+    "\5\0\3\25\1\0\1\25\5\0\3\25\5\0\1\25"+
+    "\2\0\2\25\4\0\1\25\2\0\1\25\1\0\1\25"+
+    "\102\0\2\25\6\0\1\25\55\0\1\25\3\0\1\25"+
+    "\2\0\1\25\3\0\1\25\5\0\1\25\7\0\1\25"+
+    "\4\0\2\25\3\0\2\25\1\0\1\25\4\0\1\25"+
+    "\1\0\1\25\2\0\2\25\1\0\3\25\1\0\1\25"+
+    "\2\0\4\25\2\0\1\25\40\0\1\34\1\0\1\35"+
+    "\1\0\1\36\1\0\1\37\1\0\1\40\1\0\1\101"+
+    "\3\0\1\42\5\0\1\43\3\0\1\102\11\0\1\45"+
+    "\2\0\1\103\16\0\1\104\2\0\1\105\41\0\1\25"+
+    "\2\51\2\0\2\106\1\53\1\0\1\51\3\0\1\34"+
+    "\1\0\1\35\1\0\1\36\1\0\1\37\1\0\1\40"+
+    "\1\0\1\107\3\0\1\110\5\0\1\111\3\0\1\112"+
+    "\11\0\1\45\2\0\1\113\16\0\1\114\2\0\1\115"+
+    "\41\0\1\25\1\52\7\0\1\52\3\0\1\34\1\0"+
+    "\1\35\1\0\1\36\1\0\1\37\1\0\1\40\1\0"+
+    "\1\116\3\0\1\42\5\0\1\43\3\0\1\117\11\0"+
+    "\1\45\2\0\1\120\16\0\1\121\2\0\1\122\21\0"+
+    "\1\72\17\0\1\25\1\53\1\51\1\27\3\0\1\53"+
+    "\1\0\1\53\4\0\1\26\11\0\3\25\5\0\1\25"+
+    "\1\0\1\25\1\0\1\25\4\0\1\25\4\0\1\26"+
+    "\1\0\2\26\4\0\1\25\5\0\1\25\3\0\1\26"+
+    "\4\0\1\26\2\25\2\26\10\0\1\26\1\0\2\25"+
+    "\1\0\1\26\10\0\1\25\23\0\1\25\3\0\1\25"+
+    "\6\0\2\25\5\0\1\25\1\0\1\25\1\0\1\25"+
+    "\1\0\11\25\2\0\1\25\4\0\1\25\4\0\6\25"+
+    "\2\0\1\25\1\0\1\25\1\0\3\25\1\0\1\26"+
+    "\1\0\2\25\4\0\3\25\1\0\1\25\10\0\1\25"+
+    "\1\0\2\25\20\0\1\25\3\0\1\25\5\0\1\25"+
+    "\32\0\15\25\5\0\3\25\1\0\1\25\5\0\1\25"+
+    "\2\26\5\0\1\25\2\0\1\25\1\26\4\0\1\25"+
+    "\2\0\1\25\1\0\1\25\102\0\2\26\6\0\1\26"+
+    "\55\0\1\26\3\0\1\26\2\0\1\26\3\0\1\26"+
+    "\5\0\1\26\7\0\1\26\4\0\2\26\3\0\2\26"+
+    "\1\0\1\26\4\0\1\26\1\0\1\26\2\0\2\26"+
+    "\1\0\3\26\1\0\1\26\2\0\4\26\2\0\1\26"+
+    "\52\0\1\123\3\0\1\124\5\0\1\125\3\0\1\126"+
+    "\14\0\1\127\16\0\1\130\2\0\1\131\42\0\1\63"+
+    "\1\26\6\0\1\63\3\0\1\34\1\0\1\35\1\0"+
+    "\1\36\1\0\1\37\1\0\1\40\1\0\1\132\3\0"+
+    "\1\55\5\0\1\56\3\0\1\133\11\0\1\45\2\0"+
+    "\1\134\16\0\1\135\2\0\1\136\21\0\1\72\17\0"+
+    "\1\25\1\64\1\26\1\27\3\0\1\64\1\0\1\64"+
+    "\4\0\1\27\37\0\1\27\1\0\2\27\16\0\1\27"+
+    "\4\0\1\27\2\0\2\27\15\0\1\27\131\0\1\27"+
+    "\152\0\2\27\11\0\1\27\114\0\2\27\6\0\1\27"+
+    "\55\0\1\27\3\0\1\27\2\0\1\27\3\0\1\27"+
+    "\5\0\1\27\7\0\1\27\4\0\2\27\3\0\2\27"+
+    "\1\0\1\27\4\0\1\27\1\0\1\27\2\0\2\27"+
+    "\1\0\3\27\1\0\1\27\2\0\4\27\2\0\1\27"+
+    "\152\0\1\27\34\0\1\100\11\0\3\25\5\0\1\25"+
+    "\1\0\1\25\1\0\1\25\4\0\1\25\4\0\1\100"+
+    "\1\0\2\100\4\0\1\25\5\0\1\25\3\0\1\100"+
+    "\4\0\1\100\2\25\2\100\10\0\1\26\1\0\2\25"+
+    "\1\0\1\100\10\0\1\25\23\0\1\25\3\0\1\25"+
+    "\6\0\2\25\5\0\1\25\1\0\1\25\1\0\1\25"+
+    "\1\0\11\25\2\0\1\25\4\0\1\25\4\0\6\25"+
+    "\2\0\1\25\1\0\1\25\1\0\3\25\1\0\1\100"+
+    "\1\0\2\25\4\0\3\25\1\0\1\25\10\0\1\25"+
+    "\1\0\2\25\20\0\1\25\3\0\1\25\5\0\1\25"+
+    "\32\0\15\25\5\0\3\25\1\0\1\25\5\0\1\25"+
+    "\2\100\5\0\1\25\2\0\1\25\1\100\4\0\1\25"+
+    "\2\0\1\25\1\0\1\25\102\0\2\100\6\0\1\100"+
+    "\55\0\1\100\3\0\1\100\2\0\1\100\3\0\1\100"+
+    "\5\0\1\100\7\0\1\100\4\0\2\100\3\0\2\100"+
+    "\1\0\1\100\4\0\1\100\1\0\1\100\2\0\2\100"+
+    "\1\0\3\100\1\0\1\100\2\0\4\100\2\0\1\100"+
+    "\41\0\1\51\11\0\3\25\5\0\1\25\1\0\1\25"+
+    "\1\0\1\25\4\0\1\25\4\0\1\51\1\0\2\51"+
+    "\4\0\1\25\5\0\1\25\3\0\1\51\4\0\1\51"+
+    "\2\25\2\51\10\0\1\51\1\0\2\25\1\0\1\51"+
+    "\10\0\1\25\23\0\1\25\3\0\1\25\6\0\2\25"+
+    "\5\0\1\25\1\0\1\25\1\0\1\25\1\0\11\25"+
+    "\2\0\1\25\4\0\1\25\4\0\6\25\2\0\1\25"+
+    "\1\0\1\25\1\0\3\25\1\0\1\51\1\0\2\25"+
+    "\4\0\3\25\1\0\1\25\10\0\1\25\1\0\2\25"+
+    "\20\0\1\25\3\0\1\25\5\0\1\25\32\0\15\25"+
+    "\5\0\3\25\1\0\1\25\5\0\1\25\2\51\5\0"+
+    "\1\25\2\0\1\25\1\51\4\0\1\25\2\0\1\25"+
+    "\1\0\1\25\102\0\2\51\6\0\1\51\55\0\1\51"+
+    "\3\0\1\51\2\0\1\51\3\0\1\51\5\0\1\51"+
+    "\7\0\1\51\4\0\2\51\3\0\2\51\1\0\1\51"+
+    "\4\0\1\51\1\0\1\51\2\0\2\51\1\0\3\51"+
+    "\1\0\1\51\2\0\4\51\2\0\1\51\52\0\1\137"+
+    "\3\0\1\140\5\0\1\141\3\0\1\142\14\0\1\143"+
+    "\16\0\1\144\2\0\1\145\42\0\1\106\1\51\6\0"+
+    "\1\106\4\0\1\52\11\0\3\25\5\0\1\25\1\0"+
+    "\1\25\1\0\1\25\4\0\1\25\4\0\1\52\1\0"+
+    "\2\52\4\0\1\25\5\0\1\25\3\0\1\52\4\0"+
+    "\1\52\2\25\2\52\12\0\2\25\1\0\1\52\10\0"+
+    "\1\25\23\0\1\25\11\0\2\25\2\0\5\25\2\0"+
+    "\2\25\4\0\6\25\1\0\2\25\4\0\5\25\1\0"+
+    "\5\25\1\0\2\25\1\0\3\25\1\0\4\25\1\0"+
+    "\5\25\2\0\1\25\1\0\1\25\1\0\3\25\2\0"+
+    "\1\25\1\0\1\25\1\0\1\25\2\0\1\25\16\0"+
+    "\1\25\3\0\1\25\5\0\2\25\3\0\1\25\4\0"+
+    "\3\25\4\0\1\25\1\0\1\25\2\0\1\25\1\0"+
+    "\2\25\4\0\1\25\1\0\1\25\3\0\2\25\1\0"+
+    "\1\25\5\0\3\25\1\0\1\25\10\0\1\25\4\0"+
+    "\1\25\10\0\1\25\23\0\1\25\3\0\1\25\6\0"+
+    "\2\25\5\0\1\25\1\0\1\25\1\0\1\25\1\0"+
+    "\11\25\2\0\1\25\4\0\1\25\4\0\6\25\2\0"+
+    "\1\25\1\0\1\25\1\0\3\25\1\0\1\52\1\0"+
+    "\2\25\4\0\3\25\1\0\1\25\10\0\1\25\1\0"+
+    "\2\25\20\0\1\25\3\0\1\25\5\0\1\25\32\0"+
+    "\15\25\5\0\3\25\1\0\1\25\5\0\1\25\2\52"+
+    "\5\0\1\25\2\0\1\25\1\52\4\0\1\25\2\0"+
+    "\1\25\1\0\1\25\102\0\2\52\6\0\1\52\55\0"+
+    "\1\52\3\0\1\52\2\0\1\52\3\0\1\52\5\0"+
+    "\1\52\7\0\1\52\4\0\2\52\3\0\2\52\1\0"+
+    "\1\52\4\0\1\52\1\0\1\52\2\0\2\52\1\0"+
+    "\3\52\1\0\1\52\2\0\4\52\2\0\1\52\41\0"+
+    "\1\53\11\0\3\25\5\0\1\25\1\0\1\25\1\0"+
+    "\1\25\4\0\1\25\4\0\1\53\1\0\2\53\4\0"+
+    "\1\25\5\0\1\25\3\0\1\53\4\0\1\53\2\25"+
+    "\2\53\10\0\1\51\1\0\2\25\1\0\1\53\10\0"+
+    "\1\25\23\0\1\25\3\0\1\25\6\0\2\25\5\0"+
+    "\1\25\1\0\1\25\1\0\1\25\1\0\11\25\2\0"+
+    "\1\25\4\0\1\25\4\0\6\25\2\0\1\25\1\0"+
+    "\1\25\1\0\3\25\1\0\1\53\1\0\2\25\4\0"+
+    "\3\25\1\0\1\25\10\0\1\25\1\0\2\25\20\0"+
+    "\1\25\3\0\1\25\5\0\1\25\32\0\15\25\5\0"+
+    "\3\25\1\0\1\25\5\0\1\25\2\53\5\0\1\25"+
+    "\2\0\1\25\1\53\4\0\1\25\2\0\1\25\1\0"+
+    "\1\25\102\0\2\53\6\0\1\53\55\0\1\53\3\0"+
+    "\1\53\2\0\1\53\3\0\1\53\5\0\1\53\7\0"+
+    "\1\53\4\0\2\53\3\0\2\53\1\0\1\53\4\0"+
+    "\1\53\1\0\1\53\2\0\2\53\1\0\3\53\1\0"+
+    "\1\53\2\0\4\53\2\0\1\53\41\0\1\63\37\0"+
+    "\1\63\1\0\2\63\16\0\1\63\4\0\1\63\2\0"+
+    "\2\63\10\0\1\26\4\0\1\63\36\0\1\26\102\0"+
+    "\1\26\146\0\2\26\133\0\1\63\152\0\2\63\11\0"+
+    "\1\63\114\0\2\63\6\0\1\63\55\0\1\63\3\0"+
+    "\1\63\2\0\1\63\3\0\1\63\5\0\1\63\7\0"+
+    "\1\63\4\0\2\63\3\0\2\63\1\0\1\63\4\0"+
+    "\1\63\1\0\1\63\2\0\2\63\1\0\3\63\1\0"+
+    "\1\63\2\0\4\63\2\0\1\63\41\0\1\64\11\0"+
+    "\3\25\5\0\1\25\1\0\1\25\1\0\1\25\4\0"+
+    "\1\25\4\0\1\64\1\0\2\64\4\0\1\25\5\0"+
+    "\1\25\3\0\1\64\4\0\1\64\2\25\2\64\10\0"+
+    "\1\26\1\0\2\25\1\0\1\64\10\0\1\25\23\0"+
+    "\1\25\3\0\1\25\6\0\2\25\5\0\1\25\1\0"+
+    "\1\25\1\0\1\25\1\0\11\25\2\0\1\25\4\0"+
+    "\1\25\4\0\6\25\2\0\1\25\1\0\1\25\1\0"+
+    "\3\25\1\0\1\64\1\0\2\25\4\0\3\25\1\0"+
+    "\1\25\10\0\1\25\1\0\2\25\20\0\1\25\3\0"+
+    "\1\25\5\0\1\25\32\0\15\25\5\0\3\25\1\0"+
+    "\1\25\5\0\1\25\2\64\5\0\1\25\2\0\1\25"+
+    "\1\64\4\0\1\25\2\0\1\25\1\0\1\25\102\0"+
+    "\2\64\6\0\1\64\55\0\1\64\3\0\1\64\2\0"+
+    "\1\64\3\0\1\64\5\0\1\64\7\0\1\64\4\0"+
+    "\2\64\3\0\2\64\1\0\1\64\4\0\1\64\1\0"+
+    "\1\64\2\0\2\64\1\0\3\64\1\0\1\64\2\0"+
+    "\4\64\2\0\1\64\41\0\1\106\37\0\1\106\1\0"+
+    "\2\106\16\0\1\106\4\0\1\106\2\0\2\106\10\0"+
+    "\1\51\4\0\1\106\36\0\1\51\102\0\1\51\146\0"+
+    "\2\51\133\0\1\106\152\0\2\106\11\0\1\106\114\0"+
+    "\2\106\6\0\1\106\55\0\1\106\3\0\1\106\2\0"+
+    "\1\106\3\0\1\106\5\0\1\106\7\0\1\106\4\0"+
+    "\2\106\3\0\2\106\1\0\1\106\4\0\1\106\1\0"+
+    "\1\106\2\0\2\106\1\0\3\106\1\0\1\106\2\0"+
+    "\4\106\2\0\1\106\37\0";
 
   private static int [] zzUnpackTrans() {
-    int [] result = new int[169];
+    int [] result = new int[9180];
     int offset = 0;
     offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result);
     return result;
@@ -299,11 +584,11 @@ public final class StandardTokenizerImpl
   private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute();
 
   private static final String ZZ_ATTRIBUTE_PACKED_0 =
-    "\1\0\1\11\5\1\2\11\1\1\1\0\1\1\1\0"+
-    "\1\1\2\0";
+    "\1\0\1\11\27\1\2\11\15\0\1\1\1\0\1\1"+
+    "\10\0\1\1\61\0";
 
   private static int [] zzUnpackAttribute() {
-    int [] result = new int[16];
+    int [] result = new int[101];
     int offset = 0;
     offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result);
     return result;
@@ -434,7 +719,7 @@ public final class StandardTokenizerImpl
     char [] map = new char[0x10000];
     int i = 0;  /* index in packed string  */
     int j = 0;  /* index in unpacked array */
-    while (i < 2174) {
+    while (i < 2640) {
       int  count = packed.charAt(i++);
       char value = packed.charAt(i++);
       do map[j++] = value; while (--count > 0);
@@ -713,28 +998,28 @@ public final class StandardTokenizerImpl
       zzMarkedPos = zzMarkedPosL;
 
       switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
-        case 1: 
-          { /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */
+        case 2: 
+          { return WORD_TYPE;
           }
         case 7: break;
-        case 6: 
-          { return HIRAGANA_TYPE;
+        case 4: 
+          { return SOUTH_EAST_ASIAN_TYPE;
           }
         case 8: break;
-        case 2: 
-          { return WORD_TYPE;
-          }
-        case 9: break;
         case 5: 
           { return IDEOGRAPHIC_TYPE;
           }
+        case 9: break;
+        case 1: 
+          { /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */
+          }
         case 10: break;
         case 3: 
           { return NUMERIC_TYPE;
           }
         case 11: break;
-        case 4: 
-          { return SOUTH_EAST_ASIAN_TYPE;
+        case 6: 
+          { return HIRAGANA_TYPE;
           }
         case 12: break;
         default: 

Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex Wed Feb  9 09:35:27 2011
@@ -33,14 +33,6 @@ import org.apache.lucene.analysis.tokena
  *   <li>&lt;IDEOGRAPHIC&gt;: A single CJKV ideographic character</li>
  *   <li>&lt;HIRAGANA&gt;: A single hiragana character</li>
  * </ul>
- * <b>WARNING</b>: Because JFlex does not support Unicode supplementary 
- * characters (characters above the Basic Multilingual Plane, which contains
- * those up to and including U+FFFF), this scanner will not recognize them
- * properly.  If you need to be able to process text containing supplementary 
- * characters, consider using the ICU4J-backed implementation in modules/analysis/icu  
- * (org.apache.lucene.analysis.icu.segmentation.ICUTokenizer)
- * instead of this class, since the ICU4J-backed implementation does not have
- * this limitation.
  */
 %%
 
@@ -53,15 +45,29 @@ import org.apache.lucene.analysis.tokena
 %function getNextToken
 %char
 
+%include src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro
+ALetter = ([\p{WB:ALetter}] | {ALetterSupp})
+Format =  ([\p{WB:Format}] | {FormatSupp})
+Numeric = ([\p{WB:Numeric}] | {NumericSupp})
+Extend =  ([\p{WB:Extend}] | {ExtendSupp})
+Katakana = ([\p{WB:Katakana}] | {KatakanaSupp})
+MidLetter = ([\p{WB:MidLetter}] | {MidLetterSupp})
+MidNum = ([\p{WB:MidNum}] | {MidNumSupp})
+MidNumLet = ([\p{WB:MidNumLet}] | {MidNumLetSupp})
+ExtendNumLet = ([\p{WB:ExtendNumLet}] | {ExtendNumLetSupp})
+ComplexContext = ([\p{LB:Complex_Context}] | {ComplexContextSupp})
+Han = ([\p{Script:Han}] | {HanSupp})
+Hiragana = ([\p{Script:Hiragana}] | {HiraganaSupp})
+
 // UAX#29 WB4. X (Extend | Format)* --> X
 //
-ALetterEx      = \p{WB:ALetter}                     [\p{WB:Format}\p{WB:Extend}]*
+ALetterEx      = {ALetter}                     ({Format} | {Extend})*
 // TODO: Convert hard-coded full-width numeric range to property intersection (something like [\p{Full-Width}&&\p{Numeric}]) once JFlex supports it
-NumericEx      = [\p{WB:Numeric}\uFF10-\uFF19]      [\p{WB:Format}\p{WB:Extend}]*
-KatakanaEx     = \p{WB:Katakana}                    [\p{WB:Format}\p{WB:Extend}]* 
-MidLetterEx    = [\p{WB:MidLetter}\p{WB:MidNumLet}] [\p{WB:Format}\p{WB:Extend}]* 
-MidNumericEx   = [\p{WB:MidNum}\p{WB:MidNumLet}]    [\p{WB:Format}\p{WB:Extend}]*
-ExtendNumLetEx = \p{WB:ExtendNumLet}                [\p{WB:Format}\p{WB:Extend}]*
+NumericEx      = ({Numeric} | [\uFF10-\uFF19]) ({Format} | {Extend})*
+KatakanaEx     = {Katakana}                    ({Format} | {Extend})* 
+MidLetterEx    = ({MidLetter} | {MidNumLet})   ({Format} | {Extend})* 
+MidNumericEx   = ({MidNum} | {MidNumLet})      ({Format} | {Extend})*
+ExtendNumLetEx = {ExtendNumLet}                ({Format} | {Extend})*
 
 
 %{
@@ -156,12 +162,12 @@ ExtendNumLetEx = \p{WB:ExtendNumLet}    
 //
 //    http://www.unicode.org/reports/tr14/#SA
 //
-\p{LB:Complex_Context}+ { return SOUTH_EAST_ASIAN_TYPE; }
+{ComplexContext}+ { return SOUTH_EAST_ASIAN_TYPE; }
 
 // UAX#29 WB14.  Any ÷ Any
 //
-\p{Script:Han} { return IDEOGRAPHIC_TYPE; }
-\p{Script:Hiragana} { return HIRAGANA_TYPE; }
+{Han} { return IDEOGRAPHIC_TYPE; }
+{Hiragana} { return HIRAGANA_TYPE; }
 
 
 // UAX#29 WB3.   CR × LF