You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by ot...@apache.org on 2005/10/05 05:53:24 UTC

svn commit: r294982 - in /lucene/java/trunk/src/java/org/apache/lucene/analysis/standard: StandardTokenizer.jj StandardTokenizerTokenManager.java

Author: otis
Date: Tue Oct  4 20:53:19 2005
New Revision: 294982

URL: http://svn.apache.org/viewcvs?rev=294982&view=rev
Log:
- Added Unicode range to fix tokenization of Korean - http://issues.apache.org/jira/browse/LUCENE-444

Modified:
    lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.jj
    lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardTokenizerTokenManager.java

Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.jj
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.jj?rev=294982&r1=294981&r2=294982&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.jj (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.jj Tue Oct  4 20:53:19 2005
@@ -112,6 +112,7 @@
        "\u3300"-"\u337f",
        "\u3400"-"\u3d2d",
        "\u4e00"-"\u9fff",
+       "\uac00"-"\ud7af",
        "\uf900"-"\ufaff"
       ]
   >

Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardTokenizerTokenManager.java
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardTokenizerTokenManager.java?rev=294982&r1=294981&r2=294982&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardTokenizerTokenManager.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardTokenizerTokenManager.java Tue Oct  4 20:53:19 2005
@@ -41,7 +41,7 @@
    jjCheckNAdd(jjnextStates[start + 1]);
 }
 static final long[] jjbitVec0 = {
-   0x1ff0000000000000L, 0xffffffffffffc000L, 0xffffffffL, 0x600000000000000L
+   0x1ff0000000000000L, 0xffffffffffffc000L, 0xfffff000ffffffffL, 0x6000000007fffffL
 };
 static final long[] jjbitVec2 = {
    0x0L, 0xffffffffffffffffL, 0xffffffffffffffffL, 0xffffffffffffffffL
@@ -56,30 +56,33 @@
    0x3fffffffffffL, 0x0L, 0x0L, 0x0L
 };
 static final long[] jjbitVec6 = {
-   0x1600L, 0x0L, 0x0L, 0x0L
+   0xffffffffffffffffL, 0xffffffffffffffffL, 0xffffffffffffL, 0x0L
 };
 static final long[] jjbitVec7 = {
-   0x0L, 0xffc000000000L, 0x0L, 0xffc000000000L
+   0x1600L, 0x0L, 0x0L, 0x0L
 };
 static final long[] jjbitVec8 = {
-   0x0L, 0x3ff00000000L, 0x0L, 0x3ff000000000000L
+   0x0L, 0xffc000000000L, 0x0L, 0xffc000000000L
 };
 static final long[] jjbitVec9 = {
-   0x0L, 0xffc000000000L, 0x0L, 0xff8000000000L
+   0x0L, 0x3ff00000000L, 0x0L, 0x3ff000000000000L
 };
 static final long[] jjbitVec10 = {
-   0x0L, 0xffc000000000L, 0x0L, 0x0L
+   0x0L, 0xffc000000000L, 0x0L, 0xff8000000000L
 };
 static final long[] jjbitVec11 = {
-   0x0L, 0x3ff0000L, 0x0L, 0x3ff0000L
+   0x0L, 0xffc000000000L, 0x0L, 0x0L
 };
 static final long[] jjbitVec12 = {
-   0x0L, 0x3ffL, 0x0L, 0x0L
+   0x0L, 0x3ff0000L, 0x0L, 0x3ff0000L
 };
 static final long[] jjbitVec13 = {
-   0xfffffffeL, 0x0L, 0x0L, 0x0L
+   0x0L, 0x3ffL, 0x0L, 0x0L
 };
 static final long[] jjbitVec14 = {
+   0xfffffffeL, 0x0L, 0x0L, 0x0L
+};
+static final long[] jjbitVec15 = {
    0x0L, 0x0L, 0x0L, 0xff7fffffff7fffffL
 };
 private final int jjMoveNfa_0(int startState, int curPos)
@@ -978,6 +981,8 @@
          return ((jjbitVec4[i2] & l2) != 0L);
       case 61:
          return ((jjbitVec5[i2] & l2) != 0L);
+      case 215:
+         return ((jjbitVec6[i2] & l2) != 0L);
       default : 
          if ((jjbitVec0[i1] & l1) != 0L)
             return true;
@@ -989,18 +994,18 @@
    switch(hiByte)
    {
       case 6:
-         return ((jjbitVec8[i2] & l2) != 0L);
-      case 11:
          return ((jjbitVec9[i2] & l2) != 0L);
-      case 13:
+      case 11:
          return ((jjbitVec10[i2] & l2) != 0L);
-      case 14:
+      case 13:
          return ((jjbitVec11[i2] & l2) != 0L);
-      case 16:
+      case 14:
          return ((jjbitVec12[i2] & l2) != 0L);
+      case 16:
+         return ((jjbitVec13[i2] & l2) != 0L);
       default : 
-         if ((jjbitVec6[i1] & l1) != 0L)
-            if ((jjbitVec7[i2] & l2) == 0L)
+         if ((jjbitVec7[i1] & l1) != 0L)
+            if ((jjbitVec8[i2] & l2) == 0L)
                return false;
             else
             return true;
@@ -1012,9 +1017,9 @@
    switch(hiByte)
    {
       case 0:
-         return ((jjbitVec14[i2] & l2) != 0L);
+         return ((jjbitVec15[i2] & l2) != 0L);
       default : 
-         if ((jjbitVec13[i1] & l1) != 0L)
+         if ((jjbitVec14[i1] & l1) != 0L)
             return true;
          return false;
    }