You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/08/04 23:05:47 UTC
svn commit: r1154014 - in /lucene/dev/branches/branch_3x: ./ lucene/
lucene/backwards/ lucene/backwards/src/test-framework/
lucene/backwards/src/test/
lucene/src/java/org/apache/lucene/analysis/standard/
lucene/src/java/org/apache/lucene/analysis/stand...
Author: rmuir
Date: Thu Aug 4 21:05:45 2011
New Revision: 1154014
URL: http://svn.apache.org/viewvc?rev=1154014&view=rev
Log:
LUCENE-3358: StandardTokenizer wrongly discarded combining marks attached to Han/Hiragana
Added:
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/std31/
- copied from r1154005, lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/std31/StandardTokenizerImpl31.java
- copied unchanged from r1154005, lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/StandardTokenizerImpl31.java
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/std31/StandardTokenizerImpl31.jflex
- copied unchanged from r1154005, lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/StandardTokenizerImpl31.jflex
Modified:
lucene/dev/branches/branch_3x/ (props changed)
lucene/dev/branches/branch_3x/lucene/ (props changed)
lucene/dev/branches/branch_3x/lucene/CHANGES.txt
lucene/dev/branches/branch_3x/lucene/backwards/ (props changed)
lucene/dev/branches/branch_3x/lucene/backwards/src/test/ (props changed)
lucene/dev/branches/branch_3x/lucene/backwards/src/test-framework/ (props changed)
lucene/dev/branches/branch_3x/lucene/build.xml
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerInterface.java
lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestStandardAnalyzer.java
lucene/dev/branches/branch_3x/solr/ (props changed)
Modified: lucene/dev/branches/branch_3x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/CHANGES.txt?rev=1154014&r1=1154013&r2=1154014&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_3x/lucene/CHANGES.txt Thu Aug 4 21:05:45 2011
@@ -33,6 +33,11 @@ Bug fixes
* LUCENE-3340: Fixed case where IndexWriter was not flushing at
exactly maxBufferedDeleteTerms (Mike McCandless)
+* LUCENE-3358: StandardTokenizer wrongly discarded combining marks attached
+ to Han or Hiragana characters, this is fixed if you supply Version >= 3.4
+ If you supply a previous lucene version, you get the old buggy behavior
+ for backwards compatibility. (Trejkaz, Robert Muir)
+
New Features
* LUCENE-3290: Added FieldInvertState.numUniqueTerms
Modified: lucene/dev/branches/branch_3x/lucene/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/build.xml?rev=1154014&r1=1154013&r2=1154014&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/build.xml (original)
+++ lucene/dev/branches/branch_3x/lucene/build.xml Thu Aug 4 21:05:45 2011
@@ -523,6 +523,9 @@
<jflex file="src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex"
outdir="src/java/org/apache/lucene/analysis/standard"
nobak="on" />
+ <jflex file="src/java/org/apache/lucene/analysis/standard/std31/StandardTokenizerImpl31.jflex"
+ outdir="src/java/org/apache/lucene/analysis/standard/std31"
+ nobak="on" />
</target>
<target name="jflex-UAX29URLEmailTokenizer" depends="jflex-check" if="jflex.present">
Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java?rev=1154014&r1=1154013&r2=1154014&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java Thu Aug 4 21:05:45 2011
@@ -34,6 +34,9 @@ import java.util.Set;
* <p>You must specify the required {@link Version}
* compatibility when creating StandardAnalyzer:
* <ul>
+ * <li> As of 3.4, Hiragana and Han characters are no longer wrongly split
+ * from their combining characters. If you use a previous version number,
+ * you get the exact broken behavior for backwards compatibility.
* <li> As of 3.1, StandardTokenizer implements Unicode text segmentation,
* and StopFilter correctly handles Unicode 4.0 supplementary characters
* in stopwords. {@link ClassicTokenizer} and {@link ClassicAnalyzer}
Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java?rev=1154014&r1=1154013&r2=1154014&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java Thu Aug 4 21:05:45 2011
@@ -18,6 +18,7 @@
package org.apache.lucene.analysis.standard;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.standard.std31.StandardTokenizerImpl31;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
@@ -42,6 +43,9 @@ import java.io.Reader;
* <p>You must specify the required {@link Version}
* compatibility when creating StandardTokenizer:
* <ul>
+ * <li> As of 3.4, Hiragana and Han characters are no longer wrongly split
+ * from their combining characters. If you use a previous version number,
+ * you get the exact broken behavior for backwards compatibility.
* <li> As of 3.1, StandardTokenizer implements Unicode text segmentation.
* If you use a previous version number, you get the exact behavior of
* {@link ClassicTokenizer} for backwards compatibility.
@@ -147,8 +151,13 @@ public final class StandardTokenizer ext
}
private final void init(Reader input, Version matchVersion) {
- this.scanner = matchVersion.onOrAfter(Version.LUCENE_31) ?
- new StandardTokenizerImpl(input) : new ClassicTokenizerImpl(input);
+ if (matchVersion.onOrAfter(Version.LUCENE_34)) {
+ this.scanner = new StandardTokenizerImpl(input);
+ } else if (matchVersion.onOrAfter(Version.LUCENE_31)) {
+ this.scanner = new StandardTokenizerImpl31(input);
+ } else {
+ this.scanner = new ClassicTokenizerImpl(input);
+ }
if (matchVersion.onOrAfter(Version.LUCENE_24)) {
replaceInvalidAcronym = true;
} else {
Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java?rev=1154014&r1=1154013&r2=1154014&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java Thu Aug 4 21:05:45 2011
@@ -1,4 +1,4 @@
-/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 2/9/11 11:45 AM */
+/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 8/4/11 4:07 PM */
package org.apache.lucene.analysis.standard;
@@ -209,10 +209,10 @@ public final class StandardTokenizerImpl
private static final String ZZ_ACTION_PACKED_0 =
"\1\0\23\1\1\2\1\3\1\4\1\1\1\5\1\6"+
"\1\7\1\10\15\0\1\2\1\0\1\2\10\0\1\3"+
- "\15\0\1\2\57\0";
+ "\15\0\1\2\71\0";
private static int [] zzUnpackAction() {
- int [] result = new int[114];
+ int [] result = new int[124];
int offset = 0;
offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result);
return result;
@@ -240,21 +240,22 @@ public final class StandardTokenizerImpl
"\0\0\0\147\0\316\0\u0135\0\u019c\0\u0203\0\u026a\0\u02d1"+
"\0\u0338\0\u039f\0\u0406\0\u046d\0\u04d4\0\u053b\0\u05a2\0\u0609"+
"\0\u0670\0\u06d7\0\u073e\0\u07a5\0\u080c\0\u0873\0\u08da\0\u0941"+
- "\0\u09a8\0\147\0\147\0\u0a0f\0\316\0\u0135\0\u019c\0\u0203"+
- "\0\u026a\0\u0a76\0\u0add\0\u0b44\0\u0bab\0\u046d\0\u0c12\0\u0c79"+
- "\0\u0ce0\0\u0d47\0\u0dae\0\u0e15\0\u0e7c\0\u0338\0\u039f\0\u0ee3"+
- "\0\u0f4a\0\u0fb1\0\u1018\0\u107f\0\u10e6\0\u114d\0\u11b4\0\u121b"+
- "\0\u1282\0\u12e9\0\u1350\0\u13b7\0\u141e\0\u1485\0\u14ec\0\u1553"+
- "\0\u15ba\0\u0941\0\u1621\0\u1688\0\u16ef\0\u1756\0\u17bd\0\u1824"+
- "\0\u188b\0\u18f2\0\u1959\0\u19c0\0\u1a27\0\u1a8e\0\u1af5\0\u1b5c"+
- "\0\u1bc3\0\u1c2a\0\u1c91\0\u1cf8\0\u1d5f\0\u1dc6\0\u1e2d\0\u1e94"+
- "\0\u1efb\0\u1f62\0\u1fc9\0\u2030\0\u2097\0\u20fe\0\u2165\0\u21cc"+
- "\0\u2233\0\u229a\0\u2301\0\u2368\0\u23cf\0\u2436\0\u249d\0\u2504"+
- "\0\u256b\0\u25d2\0\u2639\0\u26a0\0\u2707\0\u276e\0\u27d5\0\u283c"+
- "\0\u28a3\0\u290a";
+ "\0\u09a8\0\u0a0f\0\u0a76\0\u0add\0\316\0\u0135\0\u019c\0\u0203"+
+ "\0\u026a\0\u0b44\0\u0bab\0\u0c12\0\u0c79\0\u046d\0\u0ce0\0\u0d47"+
+ "\0\u0dae\0\u0e15\0\u0e7c\0\u0ee3\0\u0f4a\0\u0338\0\u039f\0\u0fb1"+
+ "\0\u1018\0\u107f\0\u10e6\0\u114d\0\u11b4\0\u121b\0\u1282\0\u12e9"+
+ "\0\u1350\0\u13b7\0\u141e\0\u1485\0\u14ec\0\u1553\0\u15ba\0\u1621"+
+ "\0\u1688\0\u0941\0\u16ef\0\u1756\0\u17bd\0\u1824\0\u188b\0\u18f2"+
+ "\0\u1959\0\u19c0\0\u1a27\0\u1a8e\0\u1af5\0\u1b5c\0\u1bc3\0\u1c2a"+
+ "\0\u1c91\0\u1cf8\0\u1d5f\0\u1dc6\0\u1e2d\0\u1e94\0\u1efb\0\u1f62"+
+ "\0\u1fc9\0\u2030\0\u2097\0\u20fe\0\u2165\0\u21cc\0\u2233\0\u229a"+
+ "\0\u2301\0\u2368\0\u23cf\0\u2436\0\u249d\0\u2504\0\u256b\0\u25d2"+
+ "\0\u2639\0\u26a0\0\u2707\0\u276e\0\u27d5\0\u283c\0\u28a3\0\u290a"+
+ "\0\u2971\0\u29d8\0\u2a3f\0\u2aa6\0\u2b0d\0\u2b74\0\u2bdb\0\u2c42"+
+ "\0\u2ca9\0\u2d10\0\u2d77\0\u2dde";
private static int [] zzUnpackRowMap() {
- int [] result = new int[114];
+ int [] result = new int[124];
int offset = 0;
offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result);
return result;
@@ -367,223 +368,241 @@ public final class StandardTokenizerImpl
"\1\57\3\0\1\75\11\0\1\46\2\0\1\76\16\0"+
"\1\77\2\0\1\100\21\0\1\101\17\0\1\25\1\102"+
"\1\26\1\103\3\0\1\102\1\0\1\102\2\0\1\25"+
- "\142\0\2\31\4\0\1\35\1\0\1\36\1\0\1\37"+
- "\1\0\1\40\1\0\1\41\1\0\1\104\3\0\1\43"+
- "\5\0\1\44\3\0\1\105\11\0\1\46\2\0\1\106"+
- "\16\0\1\107\2\0\1\110\41\0\1\25\1\34\1\52"+
- "\1\0\1\53\1\0\1\53\1\54\1\0\1\34\2\0"+
- "\1\34\2\0\1\25\11\0\3\25\5\0\1\25\1\0"+
- "\1\25\1\0\1\25\4\0\1\25\4\0\1\25\1\0"+
- "\2\25\4\0\1\25\5\0\1\25\3\0\1\25\4\0"+
- "\5\25\10\0\1\52\1\0\2\25\1\0\1\25\10\0"+
- "\1\25\24\0\1\25\1\0\1\52\7\0\2\25\2\0"+
- "\5\25\2\0\2\25\4\0\6\25\1\0\2\25\4\0"+
- "\5\25\1\0\5\25\1\0\2\25\1\0\3\25\1\0"+
- "\4\25\1\0\5\25\1\52\1\0\1\25\1\0\1\25"+
- "\1\0\3\25\2\0\1\25\1\0\1\25\1\0\1\25"+
- "\2\0\1\25\17\0\1\25\3\0\1\25\5\0\2\25"+
- "\3\0\1\25\4\0\3\25\4\0\1\25\1\0\1\25"+
- "\2\0\1\25\1\0\2\25\4\0\1\25\1\0\1\25"+
- "\3\0\2\25\1\0\1\25\5\0\3\25\1\0\1\25"+
- "\10\0\1\25\1\0\2\52\1\0\1\25\10\0\1\25"+
- "\24\0\1\25\3\0\1\25\6\0\2\25\5\0\1\25"+
- "\1\0\1\25\1\0\1\25\1\0\11\25\2\0\1\25"+
- "\4\0\1\25\4\0\6\25\2\0\1\25\1\0\1\25"+
- "\1\0\3\25\1\0\1\25\1\0\2\25\4\0\3\25"+
- "\1\0\1\25\10\0\1\25\1\0\2\25\21\0\1\25"+
- "\3\0\1\25\5\0\1\25\32\0\15\25\5\0\3\25"+
- "\1\0\1\25\5\0\3\25\5\0\1\25\2\0\2\25"+
- "\4\0\1\25\2\0\1\25\1\0\1\25\103\0\2\25"+
- "\6\0\1\25\56\0\1\25\3\0\1\25\2\0\1\25"+
- "\3\0\1\25\5\0\1\25\7\0\1\25\4\0\2\25"+
- "\3\0\2\25\1\0\1\25\4\0\1\25\1\0\1\25"+
- "\2\0\2\25\1\0\3\25\1\0\1\25\2\0\4\25"+
- "\2\0\1\25\41\0\1\35\1\0\1\36\1\0\1\37"+
- "\1\0\1\40\1\0\1\41\1\0\1\111\3\0\1\43"+
- "\5\0\1\44\3\0\1\112\11\0\1\46\2\0\1\113"+
- "\16\0\1\114\2\0\1\115\41\0\1\25\2\52\2\0"+
- "\2\116\1\54\1\0\1\52\2\0\1\25\1\0\1\35"+
+ "\142\0\2\31\16\0\1\104\15\0\1\105\14\0\1\106"+
+ "\16\0\1\107\2\0\1\110\42\0\1\32\7\0\1\32"+
+ "\16\0\1\111\15\0\1\112\14\0\1\113\16\0\1\114"+
+ "\2\0\1\115\42\0\1\33\7\0\1\33\4\0\1\35"+
"\1\0\1\36\1\0\1\37\1\0\1\40\1\0\1\41"+
- "\1\0\1\117\3\0\1\120\5\0\1\121\3\0\1\122"+
- "\11\0\1\46\2\0\1\123\16\0\1\124\2\0\1\125"+
- "\41\0\1\25\1\53\7\0\1\53\2\0\1\25\1\0"+
- "\1\35\1\0\1\36\1\0\1\37\1\0\1\40\1\0"+
- "\1\41\1\0\1\126\3\0\1\43\5\0\1\44\3\0"+
- "\1\127\11\0\1\46\2\0\1\130\16\0\1\131\2\0"+
- "\1\132\21\0\1\101\17\0\1\25\1\54\1\52\1\103"+
- "\3\0\1\54\1\0\1\54\2\0\1\25\2\0\1\26"+
- "\11\0\3\25\5\0\1\25\1\0\1\25\1\0\1\25"+
- "\4\0\1\25\4\0\1\26\1\0\2\26\4\0\1\25"+
- "\5\0\1\25\3\0\1\26\4\0\1\26\2\25\2\26"+
- "\10\0\1\26\1\0\2\25\1\0\1\26\10\0\1\25"+
- "\24\0\1\25\3\0\1\25\6\0\2\25\5\0\1\25"+
- "\1\0\1\25\1\0\1\25\1\0\11\25\2\0\1\25"+
- "\4\0\1\25\4\0\6\25\2\0\1\25\1\0\1\25"+
- "\1\0\3\25\1\0\1\26\1\0\2\25\4\0\3\25"+
- "\1\0\1\25\10\0\1\25\1\0\2\25\21\0\1\25"+
- "\3\0\1\25\5\0\1\25\32\0\15\25\5\0\3\25"+
- "\1\0\1\25\5\0\1\25\2\26\5\0\1\25\2\0"+
- "\1\25\1\26\4\0\1\25\2\0\1\25\1\0\1\25"+
- "\103\0\2\26\6\0\1\26\56\0\1\26\3\0\1\26"+
- "\2\0\1\26\3\0\1\26\5\0\1\26\7\0\1\26"+
- "\4\0\2\26\3\0\2\26\1\0\1\26\4\0\1\26"+
- "\1\0\1\26\2\0\2\26\1\0\3\26\1\0\1\26"+
- "\2\0\4\26\2\0\1\26\53\0\1\133\3\0\1\134"+
- "\5\0\1\135\3\0\1\136\14\0\1\137\16\0\1\140"+
- "\2\0\1\141\42\0\1\64\1\26\6\0\1\64\4\0"+
- "\1\35\1\0\1\36\1\0\1\37\1\0\1\40\1\0"+
- "\1\41\1\0\1\142\3\0\1\56\5\0\1\57\3\0"+
- "\1\143\11\0\1\46\2\0\1\144\16\0\1\145\2\0"+
- "\1\146\21\0\1\101\17\0\1\25\1\65\1\26\1\103"+
- "\3\0\1\65\1\0\1\65\2\0\1\25\2\0\1\27"+
- "\37\0\1\27\1\0\2\27\16\0\1\27\4\0\1\27"+
- "\2\0\2\27\15\0\1\27\132\0\1\27\153\0\2\27"+
- "\11\0\1\27\115\0\2\27\6\0\1\27\56\0\1\27"+
- "\3\0\1\27\2\0\1\27\3\0\1\27\5\0\1\27"+
- "\7\0\1\27\4\0\2\27\3\0\2\27\1\0\1\27"+
- "\4\0\1\27\1\0\1\27\2\0\2\27\1\0\3\27"+
- "\1\0\1\27\2\0\4\27\2\0\1\27\153\0\1\27"+
- "\35\0\1\102\11\0\3\25\5\0\1\25\1\0\1\25"+
- "\1\0\1\25\4\0\1\25\4\0\1\102\1\0\2\102"+
- "\4\0\1\25\5\0\1\25\3\0\1\102\4\0\1\102"+
- "\2\25\2\102\10\0\1\26\1\0\2\25\1\0\1\102"+
- "\10\0\1\25\24\0\1\25\3\0\1\25\6\0\2\25"+
- "\5\0\1\25\1\0\1\25\1\0\1\25\1\0\11\25"+
- "\2\0\1\25\4\0\1\25\4\0\6\25\2\0\1\25"+
- "\1\0\1\25\1\0\3\25\1\0\1\102\1\0\2\25"+
- "\4\0\3\25\1\0\1\25\10\0\1\25\1\0\2\25"+
- "\21\0\1\25\3\0\1\25\5\0\1\25\32\0\15\25"+
- "\5\0\3\25\1\0\1\25\5\0\1\25\2\102\5\0"+
- "\1\25\2\0\1\25\1\102\4\0\1\25\2\0\1\25"+
- "\1\0\1\25\103\0\2\102\6\0\1\102\56\0\1\102"+
- "\3\0\1\102\2\0\1\102\3\0\1\102\5\0\1\102"+
- "\7\0\1\102\4\0\2\102\3\0\2\102\1\0\1\102"+
- "\4\0\1\102\1\0\1\102\2\0\2\102\1\0\3\102"+
- "\1\0\1\102\2\0\4\102\2\0\1\102\153\0\1\103"+
- "\46\0\1\147\15\0\1\150\14\0\1\151\16\0\1\152"+
- "\2\0\1\153\21\0\1\101\20\0\1\103\1\0\1\103"+
- "\3\0\1\54\1\0\1\103\5\0\1\34\11\0\3\25"+
+ "\1\0\1\116\3\0\1\43\5\0\1\44\3\0\1\117"+
+ "\11\0\1\46\2\0\1\120\16\0\1\121\2\0\1\122"+
+ "\41\0\1\25\1\34\1\52\1\0\1\53\1\0\1\53"+
+ "\1\54\1\0\1\34\2\0\1\34\2\0\1\25\11\0"+
+ "\3\25\5\0\1\25\1\0\1\25\1\0\1\25\4\0"+
+ "\1\25\4\0\1\25\1\0\2\25\4\0\1\25\5\0"+
+ "\1\25\3\0\1\25\4\0\5\25\10\0\1\52\1\0"+
+ "\2\25\1\0\1\25\10\0\1\25\24\0\1\25\1\0"+
+ "\1\52\7\0\2\25\2\0\5\25\2\0\2\25\4\0"+
+ "\6\25\1\0\2\25\4\0\5\25\1\0\5\25\1\0"+
+ "\2\25\1\0\3\25\1\0\4\25\1\0\5\25\1\52"+
+ "\1\0\1\25\1\0\1\25\1\0\3\25\2\0\1\25"+
+ "\1\0\1\25\1\0\1\25\2\0\1\25\17\0\1\25"+
+ "\3\0\1\25\5\0\2\25\3\0\1\25\4\0\3\25"+
+ "\4\0\1\25\1\0\1\25\2\0\1\25\1\0\2\25"+
+ "\4\0\1\25\1\0\1\25\3\0\2\25\1\0\1\25"+
+ "\5\0\3\25\1\0\1\25\10\0\1\25\1\0\2\52"+
+ "\1\0\1\25\10\0\1\25\24\0\1\25\3\0\1\25"+
+ "\6\0\2\25\5\0\1\25\1\0\1\25\1\0\1\25"+
+ "\1\0\11\25\2\0\1\25\4\0\1\25\4\0\6\25"+
+ "\2\0\1\25\1\0\1\25\1\0\3\25\1\0\1\25"+
+ "\1\0\2\25\4\0\3\25\1\0\1\25\10\0\1\25"+
+ "\1\0\2\25\21\0\1\25\3\0\1\25\5\0\1\25"+
+ "\32\0\15\25\5\0\3\25\1\0\1\25\5\0\3\25"+
+ "\5\0\1\25\2\0\2\25\4\0\1\25\2\0\1\25"+
+ "\1\0\1\25\103\0\2\25\6\0\1\25\56\0\1\25"+
+ "\3\0\1\25\2\0\1\25\3\0\1\25\5\0\1\25"+
+ "\7\0\1\25\4\0\2\25\3\0\2\25\1\0\1\25"+
+ "\4\0\1\25\1\0\1\25\2\0\2\25\1\0\3\25"+
+ "\1\0\1\25\2\0\4\25\2\0\1\25\41\0\1\35"+
+ "\1\0\1\36\1\0\1\37\1\0\1\40\1\0\1\41"+
+ "\1\0\1\123\3\0\1\43\5\0\1\44\3\0\1\124"+
+ "\11\0\1\46\2\0\1\125\16\0\1\126\2\0\1\127"+
+ "\41\0\1\25\2\52\2\0\2\130\1\54\1\0\1\52"+
+ "\2\0\1\25\1\0\1\35\1\0\1\36\1\0\1\37"+
+ "\1\0\1\40\1\0\1\41\1\0\1\131\3\0\1\132"+
+ "\5\0\1\133\3\0\1\134\11\0\1\46\2\0\1\135"+
+ "\16\0\1\136\2\0\1\137\41\0\1\25\1\53\7\0"+
+ "\1\53\2\0\1\25\1\0\1\35\1\0\1\36\1\0"+
+ "\1\37\1\0\1\40\1\0\1\41\1\0\1\140\3\0"+
+ "\1\43\5\0\1\44\3\0\1\141\11\0\1\46\2\0"+
+ "\1\142\16\0\1\143\2\0\1\144\21\0\1\101\17\0"+
+ "\1\25\1\54\1\52\1\103\3\0\1\54\1\0\1\54"+
+ "\2\0\1\25\2\0\1\26\11\0\3\25\5\0\1\25"+
+ "\1\0\1\25\1\0\1\25\4\0\1\25\4\0\1\26"+
+ "\1\0\2\26\4\0\1\25\5\0\1\25\3\0\1\26"+
+ "\4\0\1\26\2\25\2\26\10\0\1\26\1\0\2\25"+
+ "\1\0\1\26\10\0\1\25\24\0\1\25\3\0\1\25"+
+ "\6\0\2\25\5\0\1\25\1\0\1\25\1\0\1\25"+
+ "\1\0\11\25\2\0\1\25\4\0\1\25\4\0\6\25"+
+ "\2\0\1\25\1\0\1\25\1\0\3\25\1\0\1\26"+
+ "\1\0\2\25\4\0\3\25\1\0\1\25\10\0\1\25"+
+ "\1\0\2\25\21\0\1\25\3\0\1\25\5\0\1\25"+
+ "\32\0\15\25\5\0\3\25\1\0\1\25\5\0\1\25"+
+ "\2\26\5\0\1\25\2\0\1\25\1\26\4\0\1\25"+
+ "\2\0\1\25\1\0\1\25\103\0\2\26\6\0\1\26"+
+ "\56\0\1\26\3\0\1\26\2\0\1\26\3\0\1\26"+
+ "\5\0\1\26\7\0\1\26\4\0\2\26\3\0\2\26"+
+ "\1\0\1\26\4\0\1\26\1\0\1\26\2\0\2\26"+
+ "\1\0\3\26\1\0\1\26\2\0\4\26\2\0\1\26"+
+ "\53\0\1\145\3\0\1\146\5\0\1\147\3\0\1\150"+
+ "\14\0\1\151\16\0\1\152\2\0\1\153\42\0\1\64"+
+ "\1\26\6\0\1\64\4\0\1\35\1\0\1\36\1\0"+
+ "\1\37\1\0\1\40\1\0\1\41\1\0\1\154\3\0"+
+ "\1\56\5\0\1\57\3\0\1\155\11\0\1\46\2\0"+
+ "\1\156\16\0\1\157\2\0\1\160\21\0\1\101\17\0"+
+ "\1\25\1\65\1\26\1\103\3\0\1\65\1\0\1\65"+
+ "\2\0\1\25\2\0\1\27\37\0\1\27\1\0\2\27"+
+ "\16\0\1\27\4\0\1\27\2\0\2\27\15\0\1\27"+
+ "\132\0\1\27\153\0\2\27\11\0\1\27\115\0\2\27"+
+ "\6\0\1\27\56\0\1\27\3\0\1\27\2\0\1\27"+
+ "\3\0\1\27\5\0\1\27\7\0\1\27\4\0\2\27"+
+ "\3\0\2\27\1\0\1\27\4\0\1\27\1\0\1\27"+
+ "\2\0\2\27\1\0\3\27\1\0\1\27\2\0\4\27"+
+ "\2\0\1\27\153\0\1\27\35\0\1\102\11\0\3\25"+
"\5\0\1\25\1\0\1\25\1\0\1\25\4\0\1\25"+
- "\4\0\1\34\1\0\2\34\4\0\1\25\5\0\1\25"+
- "\3\0\1\34\4\0\1\34\2\25\2\34\10\0\1\52"+
- "\1\0\2\25\1\0\1\34\10\0\1\25\24\0\1\25"+
+ "\4\0\1\102\1\0\2\102\4\0\1\25\5\0\1\25"+
+ "\3\0\1\102\4\0\1\102\2\25\2\102\10\0\1\26"+
+ "\1\0\2\25\1\0\1\102\10\0\1\25\24\0\1\25"+
"\3\0\1\25\6\0\2\25\5\0\1\25\1\0\1\25"+
"\1\0\1\25\1\0\11\25\2\0\1\25\4\0\1\25"+
"\4\0\6\25\2\0\1\25\1\0\1\25\1\0\3\25"+
- "\1\0\1\34\1\0\2\25\4\0\3\25\1\0\1\25"+
+ "\1\0\1\102\1\0\2\25\4\0\3\25\1\0\1\25"+
"\10\0\1\25\1\0\2\25\21\0\1\25\3\0\1\25"+
"\5\0\1\25\32\0\15\25\5\0\3\25\1\0\1\25"+
- "\5\0\1\25\2\34\5\0\1\25\2\0\1\25\1\34"+
- "\4\0\1\25\2\0\1\25\1\0\1\25\103\0\2\34"+
- "\6\0\1\34\56\0\1\34\3\0\1\34\2\0\1\34"+
- "\3\0\1\34\5\0\1\34\7\0\1\34\4\0\2\34"+
- "\3\0\2\34\1\0\1\34\4\0\1\34\1\0\1\34"+
- "\2\0\2\34\1\0\3\34\1\0\1\34\2\0\4\34"+
- "\2\0\1\34\42\0\1\52\11\0\3\25\5\0\1\25"+
- "\1\0\1\25\1\0\1\25\4\0\1\25\4\0\1\52"+
- "\1\0\2\52\4\0\1\25\5\0\1\25\3\0\1\52"+
- "\4\0\1\52\2\25\2\52\10\0\1\52\1\0\2\25"+
- "\1\0\1\52\10\0\1\25\24\0\1\25\3\0\1\25"+
- "\6\0\2\25\5\0\1\25\1\0\1\25\1\0\1\25"+
- "\1\0\11\25\2\0\1\25\4\0\1\25\4\0\6\25"+
- "\2\0\1\25\1\0\1\25\1\0\3\25\1\0\1\52"+
- "\1\0\2\25\4\0\3\25\1\0\1\25\10\0\1\25"+
- "\1\0\2\25\21\0\1\25\3\0\1\25\5\0\1\25"+
- "\32\0\15\25\5\0\3\25\1\0\1\25\5\0\1\25"+
- "\2\52\5\0\1\25\2\0\1\25\1\52\4\0\1\25"+
- "\2\0\1\25\1\0\1\25\103\0\2\52\6\0\1\52"+
- "\56\0\1\52\3\0\1\52\2\0\1\52\3\0\1\52"+
- "\5\0\1\52\7\0\1\52\4\0\2\52\3\0\2\52"+
- "\1\0\1\52\4\0\1\52\1\0\1\52\2\0\2\52"+
- "\1\0\3\52\1\0\1\52\2\0\4\52\2\0\1\52"+
- "\53\0\1\154\3\0\1\155\5\0\1\156\3\0\1\157"+
- "\14\0\1\160\16\0\1\161\2\0\1\162\42\0\1\116"+
- "\1\52\6\0\1\116\5\0\1\53\11\0\3\25\5\0"+
+ "\5\0\1\25\2\102\5\0\1\25\2\0\1\25\1\102"+
+ "\4\0\1\25\2\0\1\25\1\0\1\25\103\0\2\102"+
+ "\6\0\1\102\56\0\1\102\3\0\1\102\2\0\1\102"+
+ "\3\0\1\102\5\0\1\102\7\0\1\102\4\0\2\102"+
+ "\3\0\2\102\1\0\1\102\4\0\1\102\1\0\1\102"+
+ "\2\0\2\102\1\0\3\102\1\0\1\102\2\0\4\102"+
+ "\2\0\1\102\153\0\1\103\46\0\1\161\15\0\1\162"+
+ "\14\0\1\163\16\0\1\164\2\0\1\165\21\0\1\101"+
+ "\20\0\1\103\1\0\1\103\3\0\1\54\1\0\1\103"+
+ "\5\0\1\32\37\0\1\32\1\0\2\32\16\0\1\32"+
+ "\4\0\1\32\2\0\2\32\15\0\1\32\132\0\1\32"+
+ "\153\0\2\32\11\0\1\32\115\0\2\32\6\0\1\32"+
+ "\56\0\1\32\3\0\1\32\2\0\1\32\3\0\1\32"+
+ "\5\0\1\32\7\0\1\32\4\0\2\32\3\0\2\32"+
+ "\1\0\1\32\4\0\1\32\1\0\1\32\2\0\2\32"+
+ "\1\0\3\32\1\0\1\32\2\0\4\32\2\0\1\32"+
+ "\42\0\1\33\37\0\1\33\1\0\2\33\16\0\1\33"+
+ "\4\0\1\33\2\0\2\33\15\0\1\33\132\0\1\33"+
+ "\153\0\2\33\11\0\1\33\115\0\2\33\6\0\1\33"+
+ "\56\0\1\33\3\0\1\33\2\0\1\33\3\0\1\33"+
+ "\5\0\1\33\7\0\1\33\4\0\2\33\3\0\2\33"+
+ "\1\0\1\33\4\0\1\33\1\0\1\33\2\0\2\33"+
+ "\1\0\3\33\1\0\1\33\2\0\4\33\2\0\1\33"+
+ "\42\0\1\34\11\0\3\25\5\0\1\25\1\0\1\25"+
+ "\1\0\1\25\4\0\1\25\4\0\1\34\1\0\2\34"+
+ "\4\0\1\25\5\0\1\25\3\0\1\34\4\0\1\34"+
+ "\2\25\2\34\10\0\1\52\1\0\2\25\1\0\1\34"+
+ "\10\0\1\25\24\0\1\25\3\0\1\25\6\0\2\25"+
+ "\5\0\1\25\1\0\1\25\1\0\1\25\1\0\11\25"+
+ "\2\0\1\25\4\0\1\25\4\0\6\25\2\0\1\25"+
+ "\1\0\1\25\1\0\3\25\1\0\1\34\1\0\2\25"+
+ "\4\0\3\25\1\0\1\25\10\0\1\25\1\0\2\25"+
+ "\21\0\1\25\3\0\1\25\5\0\1\25\32\0\15\25"+
+ "\5\0\3\25\1\0\1\25\5\0\1\25\2\34\5\0"+
+ "\1\25\2\0\1\25\1\34\4\0\1\25\2\0\1\25"+
+ "\1\0\1\25\103\0\2\34\6\0\1\34\56\0\1\34"+
+ "\3\0\1\34\2\0\1\34\3\0\1\34\5\0\1\34"+
+ "\7\0\1\34\4\0\2\34\3\0\2\34\1\0\1\34"+
+ "\4\0\1\34\1\0\1\34\2\0\2\34\1\0\3\34"+
+ "\1\0\1\34\2\0\4\34\2\0\1\34\42\0\1\52"+
+ "\11\0\3\25\5\0\1\25\1\0\1\25\1\0\1\25"+
+ "\4\0\1\25\4\0\1\52\1\0\2\52\4\0\1\25"+
+ "\5\0\1\25\3\0\1\52\4\0\1\52\2\25\2\52"+
+ "\10\0\1\52\1\0\2\25\1\0\1\52\10\0\1\25"+
+ "\24\0\1\25\3\0\1\25\6\0\2\25\5\0\1\25"+
+ "\1\0\1\25\1\0\1\25\1\0\11\25\2\0\1\25"+
+ "\4\0\1\25\4\0\6\25\2\0\1\25\1\0\1\25"+
+ "\1\0\3\25\1\0\1\52\1\0\2\25\4\0\3\25"+
+ "\1\0\1\25\10\0\1\25\1\0\2\25\21\0\1\25"+
+ "\3\0\1\25\5\0\1\25\32\0\15\25\5\0\3\25"+
+ "\1\0\1\25\5\0\1\25\2\52\5\0\1\25\2\0"+
+ "\1\25\1\52\4\0\1\25\2\0\1\25\1\0\1\25"+
+ "\103\0\2\52\6\0\1\52\56\0\1\52\3\0\1\52"+
+ "\2\0\1\52\3\0\1\52\5\0\1\52\7\0\1\52"+
+ "\4\0\2\52\3\0\2\52\1\0\1\52\4\0\1\52"+
+ "\1\0\1\52\2\0\2\52\1\0\3\52\1\0\1\52"+
+ "\2\0\4\52\2\0\1\52\53\0\1\166\3\0\1\167"+
+ "\5\0\1\170\3\0\1\171\14\0\1\172\16\0\1\173"+
+ "\2\0\1\174\42\0\1\130\1\52\6\0\1\130\5\0"+
+ "\1\53\11\0\3\25\5\0\1\25\1\0\1\25\1\0"+
+ "\1\25\4\0\1\25\4\0\1\53\1\0\2\53\4\0"+
+ "\1\25\5\0\1\25\3\0\1\53\4\0\1\53\2\25"+
+ "\2\53\12\0\2\25\1\0\1\53\10\0\1\25\24\0"+
+ "\1\25\11\0\2\25\2\0\5\25\2\0\2\25\4\0"+
+ "\6\25\1\0\2\25\4\0\5\25\1\0\5\25\1\0"+
+ "\2\25\1\0\3\25\1\0\4\25\1\0\5\25\2\0"+
+ "\1\25\1\0\1\25\1\0\3\25\2\0\1\25\1\0"+
+ "\1\25\1\0\1\25\2\0\1\25\17\0\1\25\3\0"+
+ "\1\25\5\0\2\25\3\0\1\25\4\0\3\25\4\0"+
+ "\1\25\1\0\1\25\2\0\1\25\1\0\2\25\4\0"+
+ "\1\25\1\0\1\25\3\0\2\25\1\0\1\25\5\0"+
+ "\3\25\1\0\1\25\10\0\1\25\4\0\1\25\10\0"+
+ "\1\25\24\0\1\25\3\0\1\25\6\0\2\25\5\0"+
+ "\1\25\1\0\1\25\1\0\1\25\1\0\11\25\2\0"+
+ "\1\25\4\0\1\25\4\0\6\25\2\0\1\25\1\0"+
+ "\1\25\1\0\3\25\1\0\1\53\1\0\2\25\4\0"+
+ "\3\25\1\0\1\25\10\0\1\25\1\0\2\25\21\0"+
+ "\1\25\3\0\1\25\5\0\1\25\32\0\15\25\5\0"+
+ "\3\25\1\0\1\25\5\0\1\25\2\53\5\0\1\25"+
+ "\2\0\1\25\1\53\4\0\1\25\2\0\1\25\1\0"+
+ "\1\25\103\0\2\53\6\0\1\53\56\0\1\53\3\0"+
+ "\1\53\2\0\1\53\3\0\1\53\5\0\1\53\7\0"+
+ "\1\53\4\0\2\53\3\0\2\53\1\0\1\53\4\0"+
+ "\1\53\1\0\1\53\2\0\2\53\1\0\3\53\1\0"+
+ "\1\53\2\0\4\53\2\0\1\53\42\0\1\54\11\0"+
+ "\3\25\5\0\1\25\1\0\1\25\1\0\1\25\4\0"+
+ "\1\25\4\0\1\54\1\0\2\54\4\0\1\25\5\0"+
+ "\1\25\3\0\1\54\4\0\1\54\2\25\2\54\10\0"+
+ "\1\52\1\0\2\25\1\0\1\54\10\0\1\25\24\0"+
+ "\1\25\3\0\1\25\6\0\2\25\5\0\1\25\1\0"+
+ "\1\25\1\0\1\25\1\0\11\25\2\0\1\25\4\0"+
+ "\1\25\4\0\6\25\2\0\1\25\1\0\1\25\1\0"+
+ "\3\25\1\0\1\54\1\0\2\25\4\0\3\25\1\0"+
+ "\1\25\10\0\1\25\1\0\2\25\21\0\1\25\3\0"+
+ "\1\25\5\0\1\25\32\0\15\25\5\0\3\25\1\0"+
+ "\1\25\5\0\1\25\2\54\5\0\1\25\2\0\1\25"+
+ "\1\54\4\0\1\25\2\0\1\25\1\0\1\25\103\0"+
+ "\2\54\6\0\1\54\56\0\1\54\3\0\1\54\2\0"+
+ "\1\54\3\0\1\54\5\0\1\54\7\0\1\54\4\0"+
+ "\2\54\3\0\2\54\1\0\1\54\4\0\1\54\1\0"+
+ "\1\54\2\0\2\54\1\0\3\54\1\0\1\54\2\0"+
+ "\4\54\2\0\1\54\42\0\1\64\37\0\1\64\1\0"+
+ "\2\64\16\0\1\64\4\0\1\64\2\0\2\64\10\0"+
+ "\1\26\4\0\1\64\37\0\1\26\102\0\1\26\147\0"+
+ "\2\26\134\0\1\64\153\0\2\64\11\0\1\64\115\0"+
+ "\2\64\6\0\1\64\56\0\1\64\3\0\1\64\2\0"+
+ "\1\64\3\0\1\64\5\0\1\64\7\0\1\64\4\0"+
+ "\2\64\3\0\2\64\1\0\1\64\4\0\1\64\1\0"+
+ "\1\64\2\0\2\64\1\0\3\64\1\0\1\64\2\0"+
+ "\4\64\2\0\1\64\42\0\1\65\11\0\3\25\5\0"+
"\1\25\1\0\1\25\1\0\1\25\4\0\1\25\4\0"+
- "\1\53\1\0\2\53\4\0\1\25\5\0\1\25\3\0"+
- "\1\53\4\0\1\53\2\25\2\53\12\0\2\25\1\0"+
- "\1\53\10\0\1\25\24\0\1\25\11\0\2\25\2\0"+
- "\5\25\2\0\2\25\4\0\6\25\1\0\2\25\4\0"+
- "\5\25\1\0\5\25\1\0\2\25\1\0\3\25\1\0"+
- "\4\25\1\0\5\25\2\0\1\25\1\0\1\25\1\0"+
- "\3\25\2\0\1\25\1\0\1\25\1\0\1\25\2\0"+
- "\1\25\17\0\1\25\3\0\1\25\5\0\2\25\3\0"+
- "\1\25\4\0\3\25\4\0\1\25\1\0\1\25\2\0"+
- "\1\25\1\0\2\25\4\0\1\25\1\0\1\25\3\0"+
- "\2\25\1\0\1\25\5\0\3\25\1\0\1\25\10\0"+
- "\1\25\4\0\1\25\10\0\1\25\24\0\1\25\3\0"+
+ "\1\65\1\0\2\65\4\0\1\25\5\0\1\25\3\0"+
+ "\1\65\4\0\1\65\2\25\2\65\10\0\1\26\1\0"+
+ "\2\25\1\0\1\65\10\0\1\25\24\0\1\25\3\0"+
"\1\25\6\0\2\25\5\0\1\25\1\0\1\25\1\0"+
"\1\25\1\0\11\25\2\0\1\25\4\0\1\25\4\0"+
"\6\25\2\0\1\25\1\0\1\25\1\0\3\25\1\0"+
- "\1\53\1\0\2\25\4\0\3\25\1\0\1\25\10\0"+
+ "\1\65\1\0\2\25\4\0\3\25\1\0\1\25\10\0"+
"\1\25\1\0\2\25\21\0\1\25\3\0\1\25\5\0"+
"\1\25\32\0\15\25\5\0\3\25\1\0\1\25\5\0"+
- "\1\25\2\53\5\0\1\25\2\0\1\25\1\53\4\0"+
- "\1\25\2\0\1\25\1\0\1\25\103\0\2\53\6\0"+
- "\1\53\56\0\1\53\3\0\1\53\2\0\1\53\3\0"+
- "\1\53\5\0\1\53\7\0\1\53\4\0\2\53\3\0"+
- "\2\53\1\0\1\53\4\0\1\53\1\0\1\53\2\0"+
- "\2\53\1\0\3\53\1\0\1\53\2\0\4\53\2\0"+
- "\1\53\42\0\1\54\11\0\3\25\5\0\1\25\1\0"+
- "\1\25\1\0\1\25\4\0\1\25\4\0\1\54\1\0"+
- "\2\54\4\0\1\25\5\0\1\25\3\0\1\54\4\0"+
- "\1\54\2\25\2\54\10\0\1\52\1\0\2\25\1\0"+
- "\1\54\10\0\1\25\24\0\1\25\3\0\1\25\6\0"+
- "\2\25\5\0\1\25\1\0\1\25\1\0\1\25\1\0"+
- "\11\25\2\0\1\25\4\0\1\25\4\0\6\25\2\0"+
- "\1\25\1\0\1\25\1\0\3\25\1\0\1\54\1\0"+
- "\2\25\4\0\3\25\1\0\1\25\10\0\1\25\1\0"+
- "\2\25\21\0\1\25\3\0\1\25\5\0\1\25\32\0"+
- "\15\25\5\0\3\25\1\0\1\25\5\0\1\25\2\54"+
- "\5\0\1\25\2\0\1\25\1\54\4\0\1\25\2\0"+
- "\1\25\1\0\1\25\103\0\2\54\6\0\1\54\56\0"+
- "\1\54\3\0\1\54\2\0\1\54\3\0\1\54\5\0"+
- "\1\54\7\0\1\54\4\0\2\54\3\0\2\54\1\0"+
- "\1\54\4\0\1\54\1\0\1\54\2\0\2\54\1\0"+
- "\3\54\1\0\1\54\2\0\4\54\2\0\1\54\42\0"+
- "\1\64\37\0\1\64\1\0\2\64\16\0\1\64\4\0"+
- "\1\64\2\0\2\64\10\0\1\26\4\0\1\64\37\0"+
- "\1\26\102\0\1\26\147\0\2\26\134\0\1\64\153\0"+
- "\2\64\11\0\1\64\115\0\2\64\6\0\1\64\56\0"+
- "\1\64\3\0\1\64\2\0\1\64\3\0\1\64\5\0"+
- "\1\64\7\0\1\64\4\0\2\64\3\0\2\64\1\0"+
- "\1\64\4\0\1\64\1\0\1\64\2\0\2\64\1\0"+
- "\3\64\1\0\1\64\2\0\4\64\2\0\1\64\42\0"+
- "\1\65\11\0\3\25\5\0\1\25\1\0\1\25\1\0"+
- "\1\25\4\0\1\25\4\0\1\65\1\0\2\65\4\0"+
- "\1\25\5\0\1\25\3\0\1\65\4\0\1\65\2\25"+
- "\2\65\10\0\1\26\1\0\2\25\1\0\1\65\10\0"+
- "\1\25\24\0\1\25\3\0\1\25\6\0\2\25\5\0"+
- "\1\25\1\0\1\25\1\0\1\25\1\0\11\25\2\0"+
- "\1\25\4\0\1\25\4\0\6\25\2\0\1\25\1\0"+
- "\1\25\1\0\3\25\1\0\1\65\1\0\2\25\4\0"+
- "\3\25\1\0\1\25\10\0\1\25\1\0\2\25\21\0"+
- "\1\25\3\0\1\25\5\0\1\25\32\0\15\25\5\0"+
- "\3\25\1\0\1\25\5\0\1\25\2\65\5\0\1\25"+
- "\2\0\1\25\1\65\4\0\1\25\2\0\1\25\1\0"+
- "\1\25\103\0\2\65\6\0\1\65\56\0\1\65\3\0"+
- "\1\65\2\0\1\65\3\0\1\65\5\0\1\65\7\0"+
- "\1\65\4\0\2\65\3\0\2\65\1\0\1\65\4\0"+
- "\1\65\1\0\1\65\2\0\2\65\1\0\3\65\1\0"+
- "\1\65\2\0\4\65\2\0\1\65\42\0\1\103\37\0"+
- "\1\103\1\0\2\103\16\0\1\103\4\0\1\103\2\0"+
- "\2\103\15\0\1\103\132\0\1\103\153\0\2\103\11\0"+
- "\1\103\115\0\2\103\6\0\1\103\56\0\1\103\3\0"+
- "\1\103\2\0\1\103\3\0\1\103\5\0\1\103\7\0"+
- "\1\103\4\0\2\103\3\0\2\103\1\0\1\103\4\0"+
- "\1\103\1\0\1\103\2\0\2\103\1\0\3\103\1\0"+
- "\1\103\2\0\4\103\2\0\1\103\42\0\1\116\37\0"+
- "\1\116\1\0\2\116\16\0\1\116\4\0\1\116\2\0"+
- "\2\116\10\0\1\52\4\0\1\116\37\0\1\52\102\0"+
- "\1\52\147\0\2\52\134\0\1\116\153\0\2\116\11\0"+
- "\1\116\115\0\2\116\6\0\1\116\56\0\1\116\3\0"+
- "\1\116\2\0\1\116\3\0\1\116\5\0\1\116\7\0"+
- "\1\116\4\0\2\116\3\0\2\116\1\0\1\116\4\0"+
- "\1\116\1\0\1\116\2\0\2\116\1\0\3\116\1\0"+
- "\1\116\2\0\4\116\2\0\1\116\40\0";
+ "\1\25\2\65\5\0\1\25\2\0\1\25\1\65\4\0"+
+ "\1\25\2\0\1\25\1\0\1\25\103\0\2\65\6\0"+
+ "\1\65\56\0\1\65\3\0\1\65\2\0\1\65\3\0"+
+ "\1\65\5\0\1\65\7\0\1\65\4\0\2\65\3\0"+
+ "\2\65\1\0\1\65\4\0\1\65\1\0\1\65\2\0"+
+ "\2\65\1\0\3\65\1\0\1\65\2\0\4\65\2\0"+
+ "\1\65\42\0\1\103\37\0\1\103\1\0\2\103\16\0"+
+ "\1\103\4\0\1\103\2\0\2\103\15\0\1\103\132\0"+
+ "\1\103\153\0\2\103\11\0\1\103\115\0\2\103\6\0"+
+ "\1\103\56\0\1\103\3\0\1\103\2\0\1\103\3\0"+
+ "\1\103\5\0\1\103\7\0\1\103\4\0\2\103\3\0"+
+ "\2\103\1\0\1\103\4\0\1\103\1\0\1\103\2\0"+
+ "\2\103\1\0\3\103\1\0\1\103\2\0\4\103\2\0"+
+ "\1\103\42\0\1\130\37\0\1\130\1\0\2\130\16\0"+
+ "\1\130\4\0\1\130\2\0\2\130\10\0\1\52\4\0"+
+ "\1\130\37\0\1\52\102\0\1\52\147\0\2\52\134\0"+
+ "\1\130\153\0\2\130\11\0\1\130\115\0\2\130\6\0"+
+ "\1\130\56\0\1\130\3\0\1\130\2\0\1\130\3\0"+
+ "\1\130\5\0\1\130\7\0\1\130\4\0\2\130\3\0"+
+ "\2\130\1\0\1\130\4\0\1\130\1\0\1\130\2\0"+
+ "\2\130\1\0\3\130\1\0\1\130\2\0\4\130\2\0"+
+ "\1\130\40\0";
private static int [] zzUnpackTrans() {
- int [] result = new int[10609];
+ int [] result = new int[11845];
int offset = 0;
offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result);
return result;
@@ -621,11 +640,11 @@ public final class StandardTokenizerImpl
private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute();
private static final String ZZ_ATTRIBUTE_PACKED_0 =
- "\1\0\1\11\27\1\2\11\1\1\15\0\1\1\1\0"+
- "\1\1\10\0\1\1\15\0\1\1\57\0";
+ "\1\0\1\11\32\1\15\0\1\1\1\0\1\1\10\0"+
+ "\1\1\15\0\1\1\71\0";
private static int [] zzUnpackAttribute() {
- int [] result = new int[114];
+ int [] result = new int[124];
int offset = 0;
offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result);
return result;
Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex?rev=1154014&r1=1154013&r2=1154014&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex Thu Aug 4 21:05:45 2011
@@ -71,6 +71,8 @@ MidLetterEx = ({MidLetter} | {MidNumL
MidNumericEx = ({MidNum} | {MidNumLet}) ({Format} | {Extend})*
ExtendNumLetEx = {ExtendNumLet} ({Format} | {Extend})*
+HanEx = {Han} ({Format} | {Extend})*
+HiraganaEx = {Hiragana} ({Format} | {Extend})*
%{
/** Alphanumeric sequences */
@@ -178,8 +180,8 @@ ExtendNumLetEx = {ExtendNumLet}
// UAX#29 WB14. Any ÷ Any
//
-{Han} { return IDEOGRAPHIC_TYPE; }
-{Hiragana} { return HIRAGANA_TYPE; }
+{HanEx} { return IDEOGRAPHIC_TYPE; }
+{HiraganaEx} { return HIRAGANA_TYPE; }
// UAX#29 WB3. CR Ã LF
Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerInterface.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerInterface.java?rev=1154014&r1=1154013&r2=1154014&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerInterface.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerInterface.java Thu Aug 4 21:05:45 2011
@@ -22,7 +22,8 @@ import org.apache.lucene.analysis.tokena
import java.io.Reader;
import java.io.IOException;
-interface StandardTokenizerInterface {
+/** @lucene.internal */
+public interface StandardTokenizerInterface {
/** This character denotes the end of file */
public static final int YYEOF = -1;
@@ -30,12 +31,12 @@ interface StandardTokenizerInterface {
/**
* Copies the matched text into the CharTermAttribute
*/
- void getText(CharTermAttribute t);
+ public void getText(CharTermAttribute t);
/**
* Returns the current position.
*/
- int yychar();
+ public int yychar();
/**
* Resets the scanner to read from a new input stream.
@@ -47,12 +48,12 @@ interface StandardTokenizerInterface {
*
* @param reader the new input stream
*/
- void yyreset(Reader reader);
+ public void yyreset(Reader reader);
/**
* Returns the length of the matched text region.
*/
- int yylength();
+ public int yylength();
/**
* Resumes scanning until the next regular expression is matched,
@@ -61,6 +62,6 @@ interface StandardTokenizerInterface {
* @return the next token, {@link #YYEOF} on end of stream
* @exception IOException if any I/O-Error occurs
*/
- int getNextToken() throws IOException;
+ public int getNextToken() throws IOException;
}
Modified: lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestStandardAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestStandardAnalyzer.java?rev=1154014&r1=1154013&r2=1154014&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestStandardAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestStandardAnalyzer.java Thu Aug 4 21:05:45 2011
@@ -2,6 +2,7 @@ package org.apache.lucene.analysis;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.util.Version;
import java.io.IOException;
import java.io.Reader;
@@ -217,6 +218,23 @@ public class TestStandardAnalyzer extend
new String[] { "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<HIRAGANA>", "<KATAKANA>" });
}
+ public void testCombiningMarks() throws Exception {
+ checkOneTerm(a, "ãã", "ãã"); // hiragana
+ checkOneTerm(a, "ãµã", "ãµã"); // katakana
+ checkOneTerm(a, "壹ã", "壹ã"); // ideographic
+ checkOneTerm(a, "ìã", "ìã"); // hangul
+ }
+
+ /** @deprecated remove this and sophisticated backwards layer in 5.0 */
+ @Deprecated
+ public void testCombiningMarksBackwards() throws Exception {
+ Analyzer a = new StandardAnalyzer(Version.LUCENE_33);
+ checkOneTerm(a, "ãã", "ã"); // hiragana Bug
+ checkOneTerm(a, "ãµã", "ãµã"); // katakana Works
+ checkOneTerm(a, "壹ã", "壹"); // ideographic Bug
+ checkOneTerm(a, "ìã", "ìã"); // hangul Works
+ }
+
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new StandardAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);