You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/08/04 23:05:47 UTC

svn commit: r1154014 - in /lucene/dev/branches/branch_3x: ./ lucene/ lucene/backwards/ lucene/backwards/src/test-framework/ lucene/backwards/src/test/ lucene/src/java/org/apache/lucene/analysis/standard/ lucene/src/java/org/apache/lucene/analysis/stand...

Author: rmuir
Date: Thu Aug  4 21:05:45 2011
New Revision: 1154014

URL: http://svn.apache.org/viewvc?rev=1154014&view=rev
Log:
LUCENE-3358: StandardTokenizer wrongly discarded combining marks attached to Han/Hiragana

Added:
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/std31/
      - copied from r1154005, lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/std31/StandardTokenizerImpl31.java
      - copied unchanged from r1154005, lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/StandardTokenizerImpl31.java
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/std31/StandardTokenizerImpl31.jflex
      - copied unchanged from r1154005, lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/StandardTokenizerImpl31.jflex
Modified:
    lucene/dev/branches/branch_3x/   (props changed)
    lucene/dev/branches/branch_3x/lucene/   (props changed)
    lucene/dev/branches/branch_3x/lucene/CHANGES.txt
    lucene/dev/branches/branch_3x/lucene/backwards/   (props changed)
    lucene/dev/branches/branch_3x/lucene/backwards/src/test/   (props changed)
    lucene/dev/branches/branch_3x/lucene/backwards/src/test-framework/   (props changed)
    lucene/dev/branches/branch_3x/lucene/build.xml
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerInterface.java
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestStandardAnalyzer.java
    lucene/dev/branches/branch_3x/solr/   (props changed)

Modified: lucene/dev/branches/branch_3x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/CHANGES.txt?rev=1154014&r1=1154013&r2=1154014&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_3x/lucene/CHANGES.txt Thu Aug  4 21:05:45 2011
@@ -33,6 +33,11 @@ Bug fixes
 * LUCENE-3340: Fixed case where IndexWriter was not flushing at
   exactly maxBufferedDeleteTerms (Mike McCandless)
 
+* LUCENE-3358: StandardTokenizer wrongly discarded combining marks attached
+  to Han or Hiragana characters, this is fixed if you supply Version >= 3.4
+  If you supply a previous lucene version, you get the old buggy behavior
+  for backwards compatibility.  (Trejkaz, Robert Muir)
+
 New Features
 
 * LUCENE-3290: Added FieldInvertState.numUniqueTerms 

Modified: lucene/dev/branches/branch_3x/lucene/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/build.xml?rev=1154014&r1=1154013&r2=1154014&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/build.xml (original)
+++ lucene/dev/branches/branch_3x/lucene/build.xml Thu Aug  4 21:05:45 2011
@@ -523,6 +523,9 @@
     <jflex file="src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex"
            outdir="src/java/org/apache/lucene/analysis/standard"
            nobak="on" />
+    <jflex file="src/java/org/apache/lucene/analysis/standard/std31/StandardTokenizerImpl31.jflex"
+           outdir="src/java/org/apache/lucene/analysis/standard/std31"
+           nobak="on" />
   </target>
 
   <target name="jflex-UAX29URLEmailTokenizer" depends="jflex-check" if="jflex.present">

Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java?rev=1154014&r1=1154013&r2=1154014&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java Thu Aug  4 21:05:45 2011
@@ -34,6 +34,9 @@ import java.util.Set;
  * <p>You must specify the required {@link Version}
  * compatibility when creating StandardAnalyzer:
  * <ul>
+ *   <li> As of 3.4, Hiragana and Han characters are no longer wrongly split
+ *        from their combining characters. If you use a previous version number,
+ *        you get the exact broken behavior for backwards compatibility.
  *   <li> As of 3.1, StandardTokenizer implements Unicode text segmentation,
  *        and StopFilter correctly handles Unicode 4.0 supplementary characters
  *        in stopwords.  {@link ClassicTokenizer} and {@link ClassicAnalyzer} 

Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java?rev=1154014&r1=1154013&r2=1154014&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java Thu Aug  4 21:05:45 2011
@@ -18,6 +18,7 @@
 package org.apache.lucene.analysis.standard;
 
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.standard.std31.StandardTokenizerImpl31;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
@@ -42,6 +43,9 @@ import java.io.Reader;
  * <p>You must specify the required {@link Version}
  * compatibility when creating StandardTokenizer:
  * <ul>
+ *   <li> As of 3.4, Hiragana and Han characters are no longer wrongly split
+ *   from their combining characters. If you use a previous version number,
+ *   you get the exact broken behavior for backwards compatibility.
  *   <li> As of 3.1, StandardTokenizer implements Unicode text segmentation.
  *   If you use a previous version number, you get the exact behavior of
  *   {@link ClassicTokenizer} for backwards compatibility.
@@ -147,8 +151,13 @@ public final class StandardTokenizer ext
   }
 
   private final void init(Reader input, Version matchVersion) {
-    this.scanner = matchVersion.onOrAfter(Version.LUCENE_31) ?
-      new StandardTokenizerImpl(input) : new ClassicTokenizerImpl(input);
+    if (matchVersion.onOrAfter(Version.LUCENE_34)) {
+      this.scanner = new StandardTokenizerImpl(input);
+    } else if (matchVersion.onOrAfter(Version.LUCENE_31)) {
+      this.scanner = new StandardTokenizerImpl31(input);
+    } else {
+      this.scanner = new ClassicTokenizerImpl(input);
+    }
     if (matchVersion.onOrAfter(Version.LUCENE_24)) {
       replaceInvalidAcronym = true;
     } else {

Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java?rev=1154014&r1=1154013&r2=1154014&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java Thu Aug  4 21:05:45 2011
@@ -1,4 +1,4 @@
-/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 2/9/11 11:45 AM */
+/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 8/4/11 4:07 PM */
 
 package org.apache.lucene.analysis.standard;
 
@@ -209,10 +209,10 @@ public final class StandardTokenizerImpl
   private static final String ZZ_ACTION_PACKED_0 =
     "\1\0\23\1\1\2\1\3\1\4\1\1\1\5\1\6"+
     "\1\7\1\10\15\0\1\2\1\0\1\2\10\0\1\3"+
-    "\15\0\1\2\57\0";
+    "\15\0\1\2\71\0";
 
   private static int [] zzUnpackAction() {
-    int [] result = new int[114];
+    int [] result = new int[124];
     int offset = 0;
     offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result);
     return result;
@@ -240,21 +240,22 @@ public final class StandardTokenizerImpl
     "\0\0\0\147\0\316\0\u0135\0\u019c\0\u0203\0\u026a\0\u02d1"+
     "\0\u0338\0\u039f\0\u0406\0\u046d\0\u04d4\0\u053b\0\u05a2\0\u0609"+
     "\0\u0670\0\u06d7\0\u073e\0\u07a5\0\u080c\0\u0873\0\u08da\0\u0941"+
-    "\0\u09a8\0\147\0\147\0\u0a0f\0\316\0\u0135\0\u019c\0\u0203"+
-    "\0\u026a\0\u0a76\0\u0add\0\u0b44\0\u0bab\0\u046d\0\u0c12\0\u0c79"+
-    "\0\u0ce0\0\u0d47\0\u0dae\0\u0e15\0\u0e7c\0\u0338\0\u039f\0\u0ee3"+
-    "\0\u0f4a\0\u0fb1\0\u1018\0\u107f\0\u10e6\0\u114d\0\u11b4\0\u121b"+
-    "\0\u1282\0\u12e9\0\u1350\0\u13b7\0\u141e\0\u1485\0\u14ec\0\u1553"+
-    "\0\u15ba\0\u0941\0\u1621\0\u1688\0\u16ef\0\u1756\0\u17bd\0\u1824"+
-    "\0\u188b\0\u18f2\0\u1959\0\u19c0\0\u1a27\0\u1a8e\0\u1af5\0\u1b5c"+
-    "\0\u1bc3\0\u1c2a\0\u1c91\0\u1cf8\0\u1d5f\0\u1dc6\0\u1e2d\0\u1e94"+
-    "\0\u1efb\0\u1f62\0\u1fc9\0\u2030\0\u2097\0\u20fe\0\u2165\0\u21cc"+
-    "\0\u2233\0\u229a\0\u2301\0\u2368\0\u23cf\0\u2436\0\u249d\0\u2504"+
-    "\0\u256b\0\u25d2\0\u2639\0\u26a0\0\u2707\0\u276e\0\u27d5\0\u283c"+
-    "\0\u28a3\0\u290a";
+    "\0\u09a8\0\u0a0f\0\u0a76\0\u0add\0\316\0\u0135\0\u019c\0\u0203"+
+    "\0\u026a\0\u0b44\0\u0bab\0\u0c12\0\u0c79\0\u046d\0\u0ce0\0\u0d47"+
+    "\0\u0dae\0\u0e15\0\u0e7c\0\u0ee3\0\u0f4a\0\u0338\0\u039f\0\u0fb1"+
+    "\0\u1018\0\u107f\0\u10e6\0\u114d\0\u11b4\0\u121b\0\u1282\0\u12e9"+
+    "\0\u1350\0\u13b7\0\u141e\0\u1485\0\u14ec\0\u1553\0\u15ba\0\u1621"+
+    "\0\u1688\0\u0941\0\u16ef\0\u1756\0\u17bd\0\u1824\0\u188b\0\u18f2"+
+    "\0\u1959\0\u19c0\0\u1a27\0\u1a8e\0\u1af5\0\u1b5c\0\u1bc3\0\u1c2a"+
+    "\0\u1c91\0\u1cf8\0\u1d5f\0\u1dc6\0\u1e2d\0\u1e94\0\u1efb\0\u1f62"+
+    "\0\u1fc9\0\u2030\0\u2097\0\u20fe\0\u2165\0\u21cc\0\u2233\0\u229a"+
+    "\0\u2301\0\u2368\0\u23cf\0\u2436\0\u249d\0\u2504\0\u256b\0\u25d2"+
+    "\0\u2639\0\u26a0\0\u2707\0\u276e\0\u27d5\0\u283c\0\u28a3\0\u290a"+
+    "\0\u2971\0\u29d8\0\u2a3f\0\u2aa6\0\u2b0d\0\u2b74\0\u2bdb\0\u2c42"+
+    "\0\u2ca9\0\u2d10\0\u2d77\0\u2dde";
 
   private static int [] zzUnpackRowMap() {
-    int [] result = new int[114];
+    int [] result = new int[124];
     int offset = 0;
     offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result);
     return result;
@@ -367,223 +368,241 @@ public final class StandardTokenizerImpl
     "\1\57\3\0\1\75\11\0\1\46\2\0\1\76\16\0"+
     "\1\77\2\0\1\100\21\0\1\101\17\0\1\25\1\102"+
     "\1\26\1\103\3\0\1\102\1\0\1\102\2\0\1\25"+
-    "\142\0\2\31\4\0\1\35\1\0\1\36\1\0\1\37"+
-    "\1\0\1\40\1\0\1\41\1\0\1\104\3\0\1\43"+
-    "\5\0\1\44\3\0\1\105\11\0\1\46\2\0\1\106"+
-    "\16\0\1\107\2\0\1\110\41\0\1\25\1\34\1\52"+
-    "\1\0\1\53\1\0\1\53\1\54\1\0\1\34\2\0"+
-    "\1\34\2\0\1\25\11\0\3\25\5\0\1\25\1\0"+
-    "\1\25\1\0\1\25\4\0\1\25\4\0\1\25\1\0"+
-    "\2\25\4\0\1\25\5\0\1\25\3\0\1\25\4\0"+
-    "\5\25\10\0\1\52\1\0\2\25\1\0\1\25\10\0"+
-    "\1\25\24\0\1\25\1\0\1\52\7\0\2\25\2\0"+
-    "\5\25\2\0\2\25\4\0\6\25\1\0\2\25\4\0"+
-    "\5\25\1\0\5\25\1\0\2\25\1\0\3\25\1\0"+
-    "\4\25\1\0\5\25\1\52\1\0\1\25\1\0\1\25"+
-    "\1\0\3\25\2\0\1\25\1\0\1\25\1\0\1\25"+
-    "\2\0\1\25\17\0\1\25\3\0\1\25\5\0\2\25"+
-    "\3\0\1\25\4\0\3\25\4\0\1\25\1\0\1\25"+
-    "\2\0\1\25\1\0\2\25\4\0\1\25\1\0\1\25"+
-    "\3\0\2\25\1\0\1\25\5\0\3\25\1\0\1\25"+
-    "\10\0\1\25\1\0\2\52\1\0\1\25\10\0\1\25"+
-    "\24\0\1\25\3\0\1\25\6\0\2\25\5\0\1\25"+
-    "\1\0\1\25\1\0\1\25\1\0\11\25\2\0\1\25"+
-    "\4\0\1\25\4\0\6\25\2\0\1\25\1\0\1\25"+
-    "\1\0\3\25\1\0\1\25\1\0\2\25\4\0\3\25"+
-    "\1\0\1\25\10\0\1\25\1\0\2\25\21\0\1\25"+
-    "\3\0\1\25\5\0\1\25\32\0\15\25\5\0\3\25"+
-    "\1\0\1\25\5\0\3\25\5\0\1\25\2\0\2\25"+
-    "\4\0\1\25\2\0\1\25\1\0\1\25\103\0\2\25"+
-    "\6\0\1\25\56\0\1\25\3\0\1\25\2\0\1\25"+
-    "\3\0\1\25\5\0\1\25\7\0\1\25\4\0\2\25"+
-    "\3\0\2\25\1\0\1\25\4\0\1\25\1\0\1\25"+
-    "\2\0\2\25\1\0\3\25\1\0\1\25\2\0\4\25"+
-    "\2\0\1\25\41\0\1\35\1\0\1\36\1\0\1\37"+
-    "\1\0\1\40\1\0\1\41\1\0\1\111\3\0\1\43"+
-    "\5\0\1\44\3\0\1\112\11\0\1\46\2\0\1\113"+
-    "\16\0\1\114\2\0\1\115\41\0\1\25\2\52\2\0"+
-    "\2\116\1\54\1\0\1\52\2\0\1\25\1\0\1\35"+
+    "\142\0\2\31\16\0\1\104\15\0\1\105\14\0\1\106"+
+    "\16\0\1\107\2\0\1\110\42\0\1\32\7\0\1\32"+
+    "\16\0\1\111\15\0\1\112\14\0\1\113\16\0\1\114"+
+    "\2\0\1\115\42\0\1\33\7\0\1\33\4\0\1\35"+
     "\1\0\1\36\1\0\1\37\1\0\1\40\1\0\1\41"+
-    "\1\0\1\117\3\0\1\120\5\0\1\121\3\0\1\122"+
-    "\11\0\1\46\2\0\1\123\16\0\1\124\2\0\1\125"+
-    "\41\0\1\25\1\53\7\0\1\53\2\0\1\25\1\0"+
-    "\1\35\1\0\1\36\1\0\1\37\1\0\1\40\1\0"+
-    "\1\41\1\0\1\126\3\0\1\43\5\0\1\44\3\0"+
-    "\1\127\11\0\1\46\2\0\1\130\16\0\1\131\2\0"+
-    "\1\132\21\0\1\101\17\0\1\25\1\54\1\52\1\103"+
-    "\3\0\1\54\1\0\1\54\2\0\1\25\2\0\1\26"+
-    "\11\0\3\25\5\0\1\25\1\0\1\25\1\0\1\25"+
-    "\4\0\1\25\4\0\1\26\1\0\2\26\4\0\1\25"+
-    "\5\0\1\25\3\0\1\26\4\0\1\26\2\25\2\26"+
-    "\10\0\1\26\1\0\2\25\1\0\1\26\10\0\1\25"+
-    "\24\0\1\25\3\0\1\25\6\0\2\25\5\0\1\25"+
-    "\1\0\1\25\1\0\1\25\1\0\11\25\2\0\1\25"+
-    "\4\0\1\25\4\0\6\25\2\0\1\25\1\0\1\25"+
-    "\1\0\3\25\1\0\1\26\1\0\2\25\4\0\3\25"+
-    "\1\0\1\25\10\0\1\25\1\0\2\25\21\0\1\25"+
-    "\3\0\1\25\5\0\1\25\32\0\15\25\5\0\3\25"+
-    "\1\0\1\25\5\0\1\25\2\26\5\0\1\25\2\0"+
-    "\1\25\1\26\4\0\1\25\2\0\1\25\1\0\1\25"+
-    "\103\0\2\26\6\0\1\26\56\0\1\26\3\0\1\26"+
-    "\2\0\1\26\3\0\1\26\5\0\1\26\7\0\1\26"+
-    "\4\0\2\26\3\0\2\26\1\0\1\26\4\0\1\26"+
-    "\1\0\1\26\2\0\2\26\1\0\3\26\1\0\1\26"+
-    "\2\0\4\26\2\0\1\26\53\0\1\133\3\0\1\134"+
-    "\5\0\1\135\3\0\1\136\14\0\1\137\16\0\1\140"+
-    "\2\0\1\141\42\0\1\64\1\26\6\0\1\64\4\0"+
-    "\1\35\1\0\1\36\1\0\1\37\1\0\1\40\1\0"+
-    "\1\41\1\0\1\142\3\0\1\56\5\0\1\57\3\0"+
-    "\1\143\11\0\1\46\2\0\1\144\16\0\1\145\2\0"+
-    "\1\146\21\0\1\101\17\0\1\25\1\65\1\26\1\103"+
-    "\3\0\1\65\1\0\1\65\2\0\1\25\2\0\1\27"+
-    "\37\0\1\27\1\0\2\27\16\0\1\27\4\0\1\27"+
-    "\2\0\2\27\15\0\1\27\132\0\1\27\153\0\2\27"+
-    "\11\0\1\27\115\0\2\27\6\0\1\27\56\0\1\27"+
-    "\3\0\1\27\2\0\1\27\3\0\1\27\5\0\1\27"+
-    "\7\0\1\27\4\0\2\27\3\0\2\27\1\0\1\27"+
-    "\4\0\1\27\1\0\1\27\2\0\2\27\1\0\3\27"+
-    "\1\0\1\27\2\0\4\27\2\0\1\27\153\0\1\27"+
-    "\35\0\1\102\11\0\3\25\5\0\1\25\1\0\1\25"+
-    "\1\0\1\25\4\0\1\25\4\0\1\102\1\0\2\102"+
-    "\4\0\1\25\5\0\1\25\3\0\1\102\4\0\1\102"+
-    "\2\25\2\102\10\0\1\26\1\0\2\25\1\0\1\102"+
-    "\10\0\1\25\24\0\1\25\3\0\1\25\6\0\2\25"+
-    "\5\0\1\25\1\0\1\25\1\0\1\25\1\0\11\25"+
-    "\2\0\1\25\4\0\1\25\4\0\6\25\2\0\1\25"+
-    "\1\0\1\25\1\0\3\25\1\0\1\102\1\0\2\25"+
-    "\4\0\3\25\1\0\1\25\10\0\1\25\1\0\2\25"+
-    "\21\0\1\25\3\0\1\25\5\0\1\25\32\0\15\25"+
-    "\5\0\3\25\1\0\1\25\5\0\1\25\2\102\5\0"+
-    "\1\25\2\0\1\25\1\102\4\0\1\25\2\0\1\25"+
-    "\1\0\1\25\103\0\2\102\6\0\1\102\56\0\1\102"+
-    "\3\0\1\102\2\0\1\102\3\0\1\102\5\0\1\102"+
-    "\7\0\1\102\4\0\2\102\3\0\2\102\1\0\1\102"+
-    "\4\0\1\102\1\0\1\102\2\0\2\102\1\0\3\102"+
-    "\1\0\1\102\2\0\4\102\2\0\1\102\153\0\1\103"+
-    "\46\0\1\147\15\0\1\150\14\0\1\151\16\0\1\152"+
-    "\2\0\1\153\21\0\1\101\20\0\1\103\1\0\1\103"+
-    "\3\0\1\54\1\0\1\103\5\0\1\34\11\0\3\25"+
+    "\1\0\1\116\3\0\1\43\5\0\1\44\3\0\1\117"+
+    "\11\0\1\46\2\0\1\120\16\0\1\121\2\0\1\122"+
+    "\41\0\1\25\1\34\1\52\1\0\1\53\1\0\1\53"+
+    "\1\54\1\0\1\34\2\0\1\34\2\0\1\25\11\0"+
+    "\3\25\5\0\1\25\1\0\1\25\1\0\1\25\4\0"+
+    "\1\25\4\0\1\25\1\0\2\25\4\0\1\25\5\0"+
+    "\1\25\3\0\1\25\4\0\5\25\10\0\1\52\1\0"+
+    "\2\25\1\0\1\25\10\0\1\25\24\0\1\25\1\0"+
+    "\1\52\7\0\2\25\2\0\5\25\2\0\2\25\4\0"+
+    "\6\25\1\0\2\25\4\0\5\25\1\0\5\25\1\0"+
+    "\2\25\1\0\3\25\1\0\4\25\1\0\5\25\1\52"+
+    "\1\0\1\25\1\0\1\25\1\0\3\25\2\0\1\25"+
+    "\1\0\1\25\1\0\1\25\2\0\1\25\17\0\1\25"+
+    "\3\0\1\25\5\0\2\25\3\0\1\25\4\0\3\25"+
+    "\4\0\1\25\1\0\1\25\2\0\1\25\1\0\2\25"+
+    "\4\0\1\25\1\0\1\25\3\0\2\25\1\0\1\25"+
+    "\5\0\3\25\1\0\1\25\10\0\1\25\1\0\2\52"+
+    "\1\0\1\25\10\0\1\25\24\0\1\25\3\0\1\25"+
+    "\6\0\2\25\5\0\1\25\1\0\1\25\1\0\1\25"+
+    "\1\0\11\25\2\0\1\25\4\0\1\25\4\0\6\25"+
+    "\2\0\1\25\1\0\1\25\1\0\3\25\1\0\1\25"+
+    "\1\0\2\25\4\0\3\25\1\0\1\25\10\0\1\25"+
+    "\1\0\2\25\21\0\1\25\3\0\1\25\5\0\1\25"+
+    "\32\0\15\25\5\0\3\25\1\0\1\25\5\0\3\25"+
+    "\5\0\1\25\2\0\2\25\4\0\1\25\2\0\1\25"+
+    "\1\0\1\25\103\0\2\25\6\0\1\25\56\0\1\25"+
+    "\3\0\1\25\2\0\1\25\3\0\1\25\5\0\1\25"+
+    "\7\0\1\25\4\0\2\25\3\0\2\25\1\0\1\25"+
+    "\4\0\1\25\1\0\1\25\2\0\2\25\1\0\3\25"+
+    "\1\0\1\25\2\0\4\25\2\0\1\25\41\0\1\35"+
+    "\1\0\1\36\1\0\1\37\1\0\1\40\1\0\1\41"+
+    "\1\0\1\123\3\0\1\43\5\0\1\44\3\0\1\124"+
+    "\11\0\1\46\2\0\1\125\16\0\1\126\2\0\1\127"+
+    "\41\0\1\25\2\52\2\0\2\130\1\54\1\0\1\52"+
+    "\2\0\1\25\1\0\1\35\1\0\1\36\1\0\1\37"+
+    "\1\0\1\40\1\0\1\41\1\0\1\131\3\0\1\132"+
+    "\5\0\1\133\3\0\1\134\11\0\1\46\2\0\1\135"+
+    "\16\0\1\136\2\0\1\137\41\0\1\25\1\53\7\0"+
+    "\1\53\2\0\1\25\1\0\1\35\1\0\1\36\1\0"+
+    "\1\37\1\0\1\40\1\0\1\41\1\0\1\140\3\0"+
+    "\1\43\5\0\1\44\3\0\1\141\11\0\1\46\2\0"+
+    "\1\142\16\0\1\143\2\0\1\144\21\0\1\101\17\0"+
+    "\1\25\1\54\1\52\1\103\3\0\1\54\1\0\1\54"+
+    "\2\0\1\25\2\0\1\26\11\0\3\25\5\0\1\25"+
+    "\1\0\1\25\1\0\1\25\4\0\1\25\4\0\1\26"+
+    "\1\0\2\26\4\0\1\25\5\0\1\25\3\0\1\26"+
+    "\4\0\1\26\2\25\2\26\10\0\1\26\1\0\2\25"+
+    "\1\0\1\26\10\0\1\25\24\0\1\25\3\0\1\25"+
+    "\6\0\2\25\5\0\1\25\1\0\1\25\1\0\1\25"+
+    "\1\0\11\25\2\0\1\25\4\0\1\25\4\0\6\25"+
+    "\2\0\1\25\1\0\1\25\1\0\3\25\1\0\1\26"+
+    "\1\0\2\25\4\0\3\25\1\0\1\25\10\0\1\25"+
+    "\1\0\2\25\21\0\1\25\3\0\1\25\5\0\1\25"+
+    "\32\0\15\25\5\0\3\25\1\0\1\25\5\0\1\25"+
+    "\2\26\5\0\1\25\2\0\1\25\1\26\4\0\1\25"+
+    "\2\0\1\25\1\0\1\25\103\0\2\26\6\0\1\26"+
+    "\56\0\1\26\3\0\1\26\2\0\1\26\3\0\1\26"+
+    "\5\0\1\26\7\0\1\26\4\0\2\26\3\0\2\26"+
+    "\1\0\1\26\4\0\1\26\1\0\1\26\2\0\2\26"+
+    "\1\0\3\26\1\0\1\26\2\0\4\26\2\0\1\26"+
+    "\53\0\1\145\3\0\1\146\5\0\1\147\3\0\1\150"+
+    "\14\0\1\151\16\0\1\152\2\0\1\153\42\0\1\64"+
+    "\1\26\6\0\1\64\4\0\1\35\1\0\1\36\1\0"+
+    "\1\37\1\0\1\40\1\0\1\41\1\0\1\154\3\0"+
+    "\1\56\5\0\1\57\3\0\1\155\11\0\1\46\2\0"+
+    "\1\156\16\0\1\157\2\0\1\160\21\0\1\101\17\0"+
+    "\1\25\1\65\1\26\1\103\3\0\1\65\1\0\1\65"+
+    "\2\0\1\25\2\0\1\27\37\0\1\27\1\0\2\27"+
+    "\16\0\1\27\4\0\1\27\2\0\2\27\15\0\1\27"+
+    "\132\0\1\27\153\0\2\27\11\0\1\27\115\0\2\27"+
+    "\6\0\1\27\56\0\1\27\3\0\1\27\2\0\1\27"+
+    "\3\0\1\27\5\0\1\27\7\0\1\27\4\0\2\27"+
+    "\3\0\2\27\1\0\1\27\4\0\1\27\1\0\1\27"+
+    "\2\0\2\27\1\0\3\27\1\0\1\27\2\0\4\27"+
+    "\2\0\1\27\153\0\1\27\35\0\1\102\11\0\3\25"+
     "\5\0\1\25\1\0\1\25\1\0\1\25\4\0\1\25"+
-    "\4\0\1\34\1\0\2\34\4\0\1\25\5\0\1\25"+
-    "\3\0\1\34\4\0\1\34\2\25\2\34\10\0\1\52"+
-    "\1\0\2\25\1\0\1\34\10\0\1\25\24\0\1\25"+
+    "\4\0\1\102\1\0\2\102\4\0\1\25\5\0\1\25"+
+    "\3\0\1\102\4\0\1\102\2\25\2\102\10\0\1\26"+
+    "\1\0\2\25\1\0\1\102\10\0\1\25\24\0\1\25"+
     "\3\0\1\25\6\0\2\25\5\0\1\25\1\0\1\25"+
     "\1\0\1\25\1\0\11\25\2\0\1\25\4\0\1\25"+
     "\4\0\6\25\2\0\1\25\1\0\1\25\1\0\3\25"+
-    "\1\0\1\34\1\0\2\25\4\0\3\25\1\0\1\25"+
+    "\1\0\1\102\1\0\2\25\4\0\3\25\1\0\1\25"+
     "\10\0\1\25\1\0\2\25\21\0\1\25\3\0\1\25"+
     "\5\0\1\25\32\0\15\25\5\0\3\25\1\0\1\25"+
-    "\5\0\1\25\2\34\5\0\1\25\2\0\1\25\1\34"+
-    "\4\0\1\25\2\0\1\25\1\0\1\25\103\0\2\34"+
-    "\6\0\1\34\56\0\1\34\3\0\1\34\2\0\1\34"+
-    "\3\0\1\34\5\0\1\34\7\0\1\34\4\0\2\34"+
-    "\3\0\2\34\1\0\1\34\4\0\1\34\1\0\1\34"+
-    "\2\0\2\34\1\0\3\34\1\0\1\34\2\0\4\34"+
-    "\2\0\1\34\42\0\1\52\11\0\3\25\5\0\1\25"+
-    "\1\0\1\25\1\0\1\25\4\0\1\25\4\0\1\52"+
-    "\1\0\2\52\4\0\1\25\5\0\1\25\3\0\1\52"+
-    "\4\0\1\52\2\25\2\52\10\0\1\52\1\0\2\25"+
-    "\1\0\1\52\10\0\1\25\24\0\1\25\3\0\1\25"+
-    "\6\0\2\25\5\0\1\25\1\0\1\25\1\0\1\25"+
-    "\1\0\11\25\2\0\1\25\4\0\1\25\4\0\6\25"+
-    "\2\0\1\25\1\0\1\25\1\0\3\25\1\0\1\52"+
-    "\1\0\2\25\4\0\3\25\1\0\1\25\10\0\1\25"+
-    "\1\0\2\25\21\0\1\25\3\0\1\25\5\0\1\25"+
-    "\32\0\15\25\5\0\3\25\1\0\1\25\5\0\1\25"+
-    "\2\52\5\0\1\25\2\0\1\25\1\52\4\0\1\25"+
-    "\2\0\1\25\1\0\1\25\103\0\2\52\6\0\1\52"+
-    "\56\0\1\52\3\0\1\52\2\0\1\52\3\0\1\52"+
-    "\5\0\1\52\7\0\1\52\4\0\2\52\3\0\2\52"+
-    "\1\0\1\52\4\0\1\52\1\0\1\52\2\0\2\52"+
-    "\1\0\3\52\1\0\1\52\2\0\4\52\2\0\1\52"+
-    "\53\0\1\154\3\0\1\155\5\0\1\156\3\0\1\157"+
-    "\14\0\1\160\16\0\1\161\2\0\1\162\42\0\1\116"+
-    "\1\52\6\0\1\116\5\0\1\53\11\0\3\25\5\0"+
+    "\5\0\1\25\2\102\5\0\1\25\2\0\1\25\1\102"+
+    "\4\0\1\25\2\0\1\25\1\0\1\25\103\0\2\102"+
+    "\6\0\1\102\56\0\1\102\3\0\1\102\2\0\1\102"+
+    "\3\0\1\102\5\0\1\102\7\0\1\102\4\0\2\102"+
+    "\3\0\2\102\1\0\1\102\4\0\1\102\1\0\1\102"+
+    "\2\0\2\102\1\0\3\102\1\0\1\102\2\0\4\102"+
+    "\2\0\1\102\153\0\1\103\46\0\1\161\15\0\1\162"+
+    "\14\0\1\163\16\0\1\164\2\0\1\165\21\0\1\101"+
+    "\20\0\1\103\1\0\1\103\3\0\1\54\1\0\1\103"+
+    "\5\0\1\32\37\0\1\32\1\0\2\32\16\0\1\32"+
+    "\4\0\1\32\2\0\2\32\15\0\1\32\132\0\1\32"+
+    "\153\0\2\32\11\0\1\32\115\0\2\32\6\0\1\32"+
+    "\56\0\1\32\3\0\1\32\2\0\1\32\3\0\1\32"+
+    "\5\0\1\32\7\0\1\32\4\0\2\32\3\0\2\32"+
+    "\1\0\1\32\4\0\1\32\1\0\1\32\2\0\2\32"+
+    "\1\0\3\32\1\0\1\32\2\0\4\32\2\0\1\32"+
+    "\42\0\1\33\37\0\1\33\1\0\2\33\16\0\1\33"+
+    "\4\0\1\33\2\0\2\33\15\0\1\33\132\0\1\33"+
+    "\153\0\2\33\11\0\1\33\115\0\2\33\6\0\1\33"+
+    "\56\0\1\33\3\0\1\33\2\0\1\33\3\0\1\33"+
+    "\5\0\1\33\7\0\1\33\4\0\2\33\3\0\2\33"+
+    "\1\0\1\33\4\0\1\33\1\0\1\33\2\0\2\33"+
+    "\1\0\3\33\1\0\1\33\2\0\4\33\2\0\1\33"+
+    "\42\0\1\34\11\0\3\25\5\0\1\25\1\0\1\25"+
+    "\1\0\1\25\4\0\1\25\4\0\1\34\1\0\2\34"+
+    "\4\0\1\25\5\0\1\25\3\0\1\34\4\0\1\34"+
+    "\2\25\2\34\10\0\1\52\1\0\2\25\1\0\1\34"+
+    "\10\0\1\25\24\0\1\25\3\0\1\25\6\0\2\25"+
+    "\5\0\1\25\1\0\1\25\1\0\1\25\1\0\11\25"+
+    "\2\0\1\25\4\0\1\25\4\0\6\25\2\0\1\25"+
+    "\1\0\1\25\1\0\3\25\1\0\1\34\1\0\2\25"+
+    "\4\0\3\25\1\0\1\25\10\0\1\25\1\0\2\25"+
+    "\21\0\1\25\3\0\1\25\5\0\1\25\32\0\15\25"+
+    "\5\0\3\25\1\0\1\25\5\0\1\25\2\34\5\0"+
+    "\1\25\2\0\1\25\1\34\4\0\1\25\2\0\1\25"+
+    "\1\0\1\25\103\0\2\34\6\0\1\34\56\0\1\34"+
+    "\3\0\1\34\2\0\1\34\3\0\1\34\5\0\1\34"+
+    "\7\0\1\34\4\0\2\34\3\0\2\34\1\0\1\34"+
+    "\4\0\1\34\1\0\1\34\2\0\2\34\1\0\3\34"+
+    "\1\0\1\34\2\0\4\34\2\0\1\34\42\0\1\52"+
+    "\11\0\3\25\5\0\1\25\1\0\1\25\1\0\1\25"+
+    "\4\0\1\25\4\0\1\52\1\0\2\52\4\0\1\25"+
+    "\5\0\1\25\3\0\1\52\4\0\1\52\2\25\2\52"+
+    "\10\0\1\52\1\0\2\25\1\0\1\52\10\0\1\25"+
+    "\24\0\1\25\3\0\1\25\6\0\2\25\5\0\1\25"+
+    "\1\0\1\25\1\0\1\25\1\0\11\25\2\0\1\25"+
+    "\4\0\1\25\4\0\6\25\2\0\1\25\1\0\1\25"+
+    "\1\0\3\25\1\0\1\52\1\0\2\25\4\0\3\25"+
+    "\1\0\1\25\10\0\1\25\1\0\2\25\21\0\1\25"+
+    "\3\0\1\25\5\0\1\25\32\0\15\25\5\0\3\25"+
+    "\1\0\1\25\5\0\1\25\2\52\5\0\1\25\2\0"+
+    "\1\25\1\52\4\0\1\25\2\0\1\25\1\0\1\25"+
+    "\103\0\2\52\6\0\1\52\56\0\1\52\3\0\1\52"+
+    "\2\0\1\52\3\0\1\52\5\0\1\52\7\0\1\52"+
+    "\4\0\2\52\3\0\2\52\1\0\1\52\4\0\1\52"+
+    "\1\0\1\52\2\0\2\52\1\0\3\52\1\0\1\52"+
+    "\2\0\4\52\2\0\1\52\53\0\1\166\3\0\1\167"+
+    "\5\0\1\170\3\0\1\171\14\0\1\172\16\0\1\173"+
+    "\2\0\1\174\42\0\1\130\1\52\6\0\1\130\5\0"+
+    "\1\53\11\0\3\25\5\0\1\25\1\0\1\25\1\0"+
+    "\1\25\4\0\1\25\4\0\1\53\1\0\2\53\4\0"+
+    "\1\25\5\0\1\25\3\0\1\53\4\0\1\53\2\25"+
+    "\2\53\12\0\2\25\1\0\1\53\10\0\1\25\24\0"+
+    "\1\25\11\0\2\25\2\0\5\25\2\0\2\25\4\0"+
+    "\6\25\1\0\2\25\4\0\5\25\1\0\5\25\1\0"+
+    "\2\25\1\0\3\25\1\0\4\25\1\0\5\25\2\0"+
+    "\1\25\1\0\1\25\1\0\3\25\2\0\1\25\1\0"+
+    "\1\25\1\0\1\25\2\0\1\25\17\0\1\25\3\0"+
+    "\1\25\5\0\2\25\3\0\1\25\4\0\3\25\4\0"+
+    "\1\25\1\0\1\25\2\0\1\25\1\0\2\25\4\0"+
+    "\1\25\1\0\1\25\3\0\2\25\1\0\1\25\5\0"+
+    "\3\25\1\0\1\25\10\0\1\25\4\0\1\25\10\0"+
+    "\1\25\24\0\1\25\3\0\1\25\6\0\2\25\5\0"+
+    "\1\25\1\0\1\25\1\0\1\25\1\0\11\25\2\0"+
+    "\1\25\4\0\1\25\4\0\6\25\2\0\1\25\1\0"+
+    "\1\25\1\0\3\25\1\0\1\53\1\0\2\25\4\0"+
+    "\3\25\1\0\1\25\10\0\1\25\1\0\2\25\21\0"+
+    "\1\25\3\0\1\25\5\0\1\25\32\0\15\25\5\0"+
+    "\3\25\1\0\1\25\5\0\1\25\2\53\5\0\1\25"+
+    "\2\0\1\25\1\53\4\0\1\25\2\0\1\25\1\0"+
+    "\1\25\103\0\2\53\6\0\1\53\56\0\1\53\3\0"+
+    "\1\53\2\0\1\53\3\0\1\53\5\0\1\53\7\0"+
+    "\1\53\4\0\2\53\3\0\2\53\1\0\1\53\4\0"+
+    "\1\53\1\0\1\53\2\0\2\53\1\0\3\53\1\0"+
+    "\1\53\2\0\4\53\2\0\1\53\42\0\1\54\11\0"+
+    "\3\25\5\0\1\25\1\0\1\25\1\0\1\25\4\0"+
+    "\1\25\4\0\1\54\1\0\2\54\4\0\1\25\5\0"+
+    "\1\25\3\0\1\54\4\0\1\54\2\25\2\54\10\0"+
+    "\1\52\1\0\2\25\1\0\1\54\10\0\1\25\24\0"+
+    "\1\25\3\0\1\25\6\0\2\25\5\0\1\25\1\0"+
+    "\1\25\1\0\1\25\1\0\11\25\2\0\1\25\4\0"+
+    "\1\25\4\0\6\25\2\0\1\25\1\0\1\25\1\0"+
+    "\3\25\1\0\1\54\1\0\2\25\4\0\3\25\1\0"+
+    "\1\25\10\0\1\25\1\0\2\25\21\0\1\25\3\0"+
+    "\1\25\5\0\1\25\32\0\15\25\5\0\3\25\1\0"+
+    "\1\25\5\0\1\25\2\54\5\0\1\25\2\0\1\25"+
+    "\1\54\4\0\1\25\2\0\1\25\1\0\1\25\103\0"+
+    "\2\54\6\0\1\54\56\0\1\54\3\0\1\54\2\0"+
+    "\1\54\3\0\1\54\5\0\1\54\7\0\1\54\4\0"+
+    "\2\54\3\0\2\54\1\0\1\54\4\0\1\54\1\0"+
+    "\1\54\2\0\2\54\1\0\3\54\1\0\1\54\2\0"+
+    "\4\54\2\0\1\54\42\0\1\64\37\0\1\64\1\0"+
+    "\2\64\16\0\1\64\4\0\1\64\2\0\2\64\10\0"+
+    "\1\26\4\0\1\64\37\0\1\26\102\0\1\26\147\0"+
+    "\2\26\134\0\1\64\153\0\2\64\11\0\1\64\115\0"+
+    "\2\64\6\0\1\64\56\0\1\64\3\0\1\64\2\0"+
+    "\1\64\3\0\1\64\5\0\1\64\7\0\1\64\4\0"+
+    "\2\64\3\0\2\64\1\0\1\64\4\0\1\64\1\0"+
+    "\1\64\2\0\2\64\1\0\3\64\1\0\1\64\2\0"+
+    "\4\64\2\0\1\64\42\0\1\65\11\0\3\25\5\0"+
     "\1\25\1\0\1\25\1\0\1\25\4\0\1\25\4\0"+
-    "\1\53\1\0\2\53\4\0\1\25\5\0\1\25\3\0"+
-    "\1\53\4\0\1\53\2\25\2\53\12\0\2\25\1\0"+
-    "\1\53\10\0\1\25\24\0\1\25\11\0\2\25\2\0"+
-    "\5\25\2\0\2\25\4\0\6\25\1\0\2\25\4\0"+
-    "\5\25\1\0\5\25\1\0\2\25\1\0\3\25\1\0"+
-    "\4\25\1\0\5\25\2\0\1\25\1\0\1\25\1\0"+
-    "\3\25\2\0\1\25\1\0\1\25\1\0\1\25\2\0"+
-    "\1\25\17\0\1\25\3\0\1\25\5\0\2\25\3\0"+
-    "\1\25\4\0\3\25\4\0\1\25\1\0\1\25\2\0"+
-    "\1\25\1\0\2\25\4\0\1\25\1\0\1\25\3\0"+
-    "\2\25\1\0\1\25\5\0\3\25\1\0\1\25\10\0"+
-    "\1\25\4\0\1\25\10\0\1\25\24\0\1\25\3\0"+
+    "\1\65\1\0\2\65\4\0\1\25\5\0\1\25\3\0"+
+    "\1\65\4\0\1\65\2\25\2\65\10\0\1\26\1\0"+
+    "\2\25\1\0\1\65\10\0\1\25\24\0\1\25\3\0"+
     "\1\25\6\0\2\25\5\0\1\25\1\0\1\25\1\0"+
     "\1\25\1\0\11\25\2\0\1\25\4\0\1\25\4\0"+
     "\6\25\2\0\1\25\1\0\1\25\1\0\3\25\1\0"+
-    "\1\53\1\0\2\25\4\0\3\25\1\0\1\25\10\0"+
+    "\1\65\1\0\2\25\4\0\3\25\1\0\1\25\10\0"+
     "\1\25\1\0\2\25\21\0\1\25\3\0\1\25\5\0"+
     "\1\25\32\0\15\25\5\0\3\25\1\0\1\25\5\0"+
-    "\1\25\2\53\5\0\1\25\2\0\1\25\1\53\4\0"+
-    "\1\25\2\0\1\25\1\0\1\25\103\0\2\53\6\0"+
-    "\1\53\56\0\1\53\3\0\1\53\2\0\1\53\3\0"+
-    "\1\53\5\0\1\53\7\0\1\53\4\0\2\53\3\0"+
-    "\2\53\1\0\1\53\4\0\1\53\1\0\1\53\2\0"+
-    "\2\53\1\0\3\53\1\0\1\53\2\0\4\53\2\0"+
-    "\1\53\42\0\1\54\11\0\3\25\5\0\1\25\1\0"+
-    "\1\25\1\0\1\25\4\0\1\25\4\0\1\54\1\0"+
-    "\2\54\4\0\1\25\5\0\1\25\3\0\1\54\4\0"+
-    "\1\54\2\25\2\54\10\0\1\52\1\0\2\25\1\0"+
-    "\1\54\10\0\1\25\24\0\1\25\3\0\1\25\6\0"+
-    "\2\25\5\0\1\25\1\0\1\25\1\0\1\25\1\0"+
-    "\11\25\2\0\1\25\4\0\1\25\4\0\6\25\2\0"+
-    "\1\25\1\0\1\25\1\0\3\25\1\0\1\54\1\0"+
-    "\2\25\4\0\3\25\1\0\1\25\10\0\1\25\1\0"+
-    "\2\25\21\0\1\25\3\0\1\25\5\0\1\25\32\0"+
-    "\15\25\5\0\3\25\1\0\1\25\5\0\1\25\2\54"+
-    "\5\0\1\25\2\0\1\25\1\54\4\0\1\25\2\0"+
-    "\1\25\1\0\1\25\103\0\2\54\6\0\1\54\56\0"+
-    "\1\54\3\0\1\54\2\0\1\54\3\0\1\54\5\0"+
-    "\1\54\7\0\1\54\4\0\2\54\3\0\2\54\1\0"+
-    "\1\54\4\0\1\54\1\0\1\54\2\0\2\54\1\0"+
-    "\3\54\1\0\1\54\2\0\4\54\2\0\1\54\42\0"+
-    "\1\64\37\0\1\64\1\0\2\64\16\0\1\64\4\0"+
-    "\1\64\2\0\2\64\10\0\1\26\4\0\1\64\37\0"+
-    "\1\26\102\0\1\26\147\0\2\26\134\0\1\64\153\0"+
-    "\2\64\11\0\1\64\115\0\2\64\6\0\1\64\56\0"+
-    "\1\64\3\0\1\64\2\0\1\64\3\0\1\64\5\0"+
-    "\1\64\7\0\1\64\4\0\2\64\3\0\2\64\1\0"+
-    "\1\64\4\0\1\64\1\0\1\64\2\0\2\64\1\0"+
-    "\3\64\1\0\1\64\2\0\4\64\2\0\1\64\42\0"+
-    "\1\65\11\0\3\25\5\0\1\25\1\0\1\25\1\0"+
-    "\1\25\4\0\1\25\4\0\1\65\1\0\2\65\4\0"+
-    "\1\25\5\0\1\25\3\0\1\65\4\0\1\65\2\25"+
-    "\2\65\10\0\1\26\1\0\2\25\1\0\1\65\10\0"+
-    "\1\25\24\0\1\25\3\0\1\25\6\0\2\25\5\0"+
-    "\1\25\1\0\1\25\1\0\1\25\1\0\11\25\2\0"+
-    "\1\25\4\0\1\25\4\0\6\25\2\0\1\25\1\0"+
-    "\1\25\1\0\3\25\1\0\1\65\1\0\2\25\4\0"+
-    "\3\25\1\0\1\25\10\0\1\25\1\0\2\25\21\0"+
-    "\1\25\3\0\1\25\5\0\1\25\32\0\15\25\5\0"+
-    "\3\25\1\0\1\25\5\0\1\25\2\65\5\0\1\25"+
-    "\2\0\1\25\1\65\4\0\1\25\2\0\1\25\1\0"+
-    "\1\25\103\0\2\65\6\0\1\65\56\0\1\65\3\0"+
-    "\1\65\2\0\1\65\3\0\1\65\5\0\1\65\7\0"+
-    "\1\65\4\0\2\65\3\0\2\65\1\0\1\65\4\0"+
-    "\1\65\1\0\1\65\2\0\2\65\1\0\3\65\1\0"+
-    "\1\65\2\0\4\65\2\0\1\65\42\0\1\103\37\0"+
-    "\1\103\1\0\2\103\16\0\1\103\4\0\1\103\2\0"+
-    "\2\103\15\0\1\103\132\0\1\103\153\0\2\103\11\0"+
-    "\1\103\115\0\2\103\6\0\1\103\56\0\1\103\3\0"+
-    "\1\103\2\0\1\103\3\0\1\103\5\0\1\103\7\0"+
-    "\1\103\4\0\2\103\3\0\2\103\1\0\1\103\4\0"+
-    "\1\103\1\0\1\103\2\0\2\103\1\0\3\103\1\0"+
-    "\1\103\2\0\4\103\2\0\1\103\42\0\1\116\37\0"+
-    "\1\116\1\0\2\116\16\0\1\116\4\0\1\116\2\0"+
-    "\2\116\10\0\1\52\4\0\1\116\37\0\1\52\102\0"+
-    "\1\52\147\0\2\52\134\0\1\116\153\0\2\116\11\0"+
-    "\1\116\115\0\2\116\6\0\1\116\56\0\1\116\3\0"+
-    "\1\116\2\0\1\116\3\0\1\116\5\0\1\116\7\0"+
-    "\1\116\4\0\2\116\3\0\2\116\1\0\1\116\4\0"+
-    "\1\116\1\0\1\116\2\0\2\116\1\0\3\116\1\0"+
-    "\1\116\2\0\4\116\2\0\1\116\40\0";
+    "\1\25\2\65\5\0\1\25\2\0\1\25\1\65\4\0"+
+    "\1\25\2\0\1\25\1\0\1\25\103\0\2\65\6\0"+
+    "\1\65\56\0\1\65\3\0\1\65\2\0\1\65\3\0"+
+    "\1\65\5\0\1\65\7\0\1\65\4\0\2\65\3\0"+
+    "\2\65\1\0\1\65\4\0\1\65\1\0\1\65\2\0"+
+    "\2\65\1\0\3\65\1\0\1\65\2\0\4\65\2\0"+
+    "\1\65\42\0\1\103\37\0\1\103\1\0\2\103\16\0"+
+    "\1\103\4\0\1\103\2\0\2\103\15\0\1\103\132\0"+
+    "\1\103\153\0\2\103\11\0\1\103\115\0\2\103\6\0"+
+    "\1\103\56\0\1\103\3\0\1\103\2\0\1\103\3\0"+
+    "\1\103\5\0\1\103\7\0\1\103\4\0\2\103\3\0"+
+    "\2\103\1\0\1\103\4\0\1\103\1\0\1\103\2\0"+
+    "\2\103\1\0\3\103\1\0\1\103\2\0\4\103\2\0"+
+    "\1\103\42\0\1\130\37\0\1\130\1\0\2\130\16\0"+
+    "\1\130\4\0\1\130\2\0\2\130\10\0\1\52\4\0"+
+    "\1\130\37\0\1\52\102\0\1\52\147\0\2\52\134\0"+
+    "\1\130\153\0\2\130\11\0\1\130\115\0\2\130\6\0"+
+    "\1\130\56\0\1\130\3\0\1\130\2\0\1\130\3\0"+
+    "\1\130\5\0\1\130\7\0\1\130\4\0\2\130\3\0"+
+    "\2\130\1\0\1\130\4\0\1\130\1\0\1\130\2\0"+
+    "\2\130\1\0\3\130\1\0\1\130\2\0\4\130\2\0"+
+    "\1\130\40\0";
 
   private static int [] zzUnpackTrans() {
-    int [] result = new int[10609];
+    int [] result = new int[11845];
     int offset = 0;
     offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result);
     return result;
@@ -621,11 +640,11 @@ public final class StandardTokenizerImpl
   private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute();
 
   private static final String ZZ_ATTRIBUTE_PACKED_0 =
-    "\1\0\1\11\27\1\2\11\1\1\15\0\1\1\1\0"+
-    "\1\1\10\0\1\1\15\0\1\1\57\0";
+    "\1\0\1\11\32\1\15\0\1\1\1\0\1\1\10\0"+
+    "\1\1\15\0\1\1\71\0";
 
   private static int [] zzUnpackAttribute() {
-    int [] result = new int[114];
+    int [] result = new int[124];
     int offset = 0;
     offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result);
     return result;

Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex?rev=1154014&r1=1154013&r2=1154014&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex Thu Aug  4 21:05:45 2011
@@ -71,6 +71,8 @@ MidLetterEx    = ({MidLetter} | {MidNumL
 MidNumericEx   = ({MidNum} | {MidNumLet})      ({Format} | {Extend})*
 ExtendNumLetEx = {ExtendNumLet}                ({Format} | {Extend})*
 
+HanEx = {Han} ({Format} | {Extend})*
+HiraganaEx = {Hiragana} ({Format} | {Extend})*
 
 %{
   /** Alphanumeric sequences */
@@ -178,8 +180,8 @@ ExtendNumLetEx = {ExtendNumLet}         
 
 // UAX#29 WB14.  Any ÷ Any
 //
-{Han} { return IDEOGRAPHIC_TYPE; }
-{Hiragana} { return HIRAGANA_TYPE; }
+{HanEx} { return IDEOGRAPHIC_TYPE; }
+{HiraganaEx} { return HIRAGANA_TYPE; }
 
 
 // UAX#29 WB3.   CR × LF

Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerInterface.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerInterface.java?rev=1154014&r1=1154013&r2=1154014&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerInterface.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerInterface.java Thu Aug  4 21:05:45 2011
@@ -22,7 +22,8 @@ import org.apache.lucene.analysis.tokena
 import java.io.Reader;
 import java.io.IOException;
 
-interface StandardTokenizerInterface {
+/** @lucene.internal */
+public interface StandardTokenizerInterface {
 
   /** This character denotes the end of file */
   public static final int YYEOF = -1;
@@ -30,12 +31,12 @@ interface StandardTokenizerInterface {
   /**
    * Copies the matched text into the CharTermAttribute
    */
-  void getText(CharTermAttribute t);
+  public void getText(CharTermAttribute t);
 
   /**
    * Returns the current position.
    */
-  int yychar();
+  public int yychar();
 
   /**
    * Resets the scanner to read from a new input stream.
@@ -47,12 +48,12 @@ interface StandardTokenizerInterface {
    *
    * @param reader   the new input stream 
    */
-  void yyreset(Reader reader);
+  public void yyreset(Reader reader);
 
   /**
    * Returns the length of the matched text region.
    */
-  int yylength();
+  public int yylength();
 
   /**
    * Resumes scanning until the next regular expression is matched,
@@ -61,6 +62,6 @@ interface StandardTokenizerInterface {
    * @return      the next token, {@link #YYEOF} on end of stream
    * @exception   IOException  if any I/O-Error occurs
    */
-  int getNextToken() throws IOException;
+  public int getNextToken() throws IOException;
 
 }

Modified: lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestStandardAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestStandardAnalyzer.java?rev=1154014&r1=1154013&r2=1154014&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestStandardAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestStandardAnalyzer.java Thu Aug  4 21:05:45 2011
@@ -2,6 +2,7 @@ package org.apache.lucene.analysis;
 
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.util.Version;
 
 import java.io.IOException;
 import java.io.Reader;
@@ -217,6 +218,23 @@ public class TestStandardAnalyzer extend
         new String[] { "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<HIRAGANA>", "<KATAKANA>" });
   }
   
+  public void testCombiningMarks() throws Exception {
+    checkOneTerm(a, "ざ", "ざ"); // hiragana
+    checkOneTerm(a, "ザ", "ザ"); // katakana
+    checkOneTerm(a, "壹゙", "壹゙"); // ideographic
+    checkOneTerm(a, "아゙",  "아゙"); // hangul
+  }
+  
+  /** @deprecated remove this and sophisticated backwards layer in 5.0 */
+  @Deprecated
+  public void testCombiningMarksBackwards() throws Exception {
+    Analyzer a = new StandardAnalyzer(Version.LUCENE_33);
+    checkOneTerm(a, "ざ", "さ"); // hiragana Bug
+    checkOneTerm(a, "ザ", "ザ"); // katakana Works
+    checkOneTerm(a, "壹゙", "壹"); // ideographic Bug
+    checkOneTerm(a, "아゙",  "아゙"); // hangul Works
+  }
+
   /** blast some random strings through the analyzer */
   public void testRandomStrings() throws Exception {
     checkRandomData(random, new StandardAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);