You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by ot...@apache.org on 2005/10/05 05:53:24 UTC
svn commit: r294982 - in
/lucene/java/trunk/src/java/org/apache/lucene/analysis/standard:
StandardTokenizer.jj StandardTokenizerTokenManager.java
Author: otis
Date: Tue Oct 4 20:53:19 2005
New Revision: 294982
URL: http://svn.apache.org/viewcvs?rev=294982&view=rev
Log:
- Added Unicode range to fix tokenization of Korean - http://issues.apache.org/jira/browse/LUCENE-444
Modified:
lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.jj
lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardTokenizerTokenManager.java
Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.jj
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.jj?rev=294982&r1=294981&r2=294982&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.jj (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.jj Tue Oct 4 20:53:19 2005
@@ -112,6 +112,7 @@
"\u3300"-"\u337f",
"\u3400"-"\u3d2d",
"\u4e00"-"\u9fff",
+ "\uac00"-"\ud7af",
"\uf900"-"\ufaff"
]
>
Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardTokenizerTokenManager.java
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardTokenizerTokenManager.java?rev=294982&r1=294981&r2=294982&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardTokenizerTokenManager.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardTokenizerTokenManager.java Tue Oct 4 20:53:19 2005
@@ -41,7 +41,7 @@
jjCheckNAdd(jjnextStates[start + 1]);
}
static final long[] jjbitVec0 = {
- 0x1ff0000000000000L, 0xffffffffffffc000L, 0xffffffffL, 0x600000000000000L
+ 0x1ff0000000000000L, 0xffffffffffffc000L, 0xfffff000ffffffffL, 0x6000000007fffffL
};
static final long[] jjbitVec2 = {
0x0L, 0xffffffffffffffffL, 0xffffffffffffffffL, 0xffffffffffffffffL
@@ -56,30 +56,33 @@
0x3fffffffffffL, 0x0L, 0x0L, 0x0L
};
static final long[] jjbitVec6 = {
- 0x1600L, 0x0L, 0x0L, 0x0L
+ 0xffffffffffffffffL, 0xffffffffffffffffL, 0xffffffffffffL, 0x0L
};
static final long[] jjbitVec7 = {
- 0x0L, 0xffc000000000L, 0x0L, 0xffc000000000L
+ 0x1600L, 0x0L, 0x0L, 0x0L
};
static final long[] jjbitVec8 = {
- 0x0L, 0x3ff00000000L, 0x0L, 0x3ff000000000000L
+ 0x0L, 0xffc000000000L, 0x0L, 0xffc000000000L
};
static final long[] jjbitVec9 = {
- 0x0L, 0xffc000000000L, 0x0L, 0xff8000000000L
+ 0x0L, 0x3ff00000000L, 0x0L, 0x3ff000000000000L
};
static final long[] jjbitVec10 = {
- 0x0L, 0xffc000000000L, 0x0L, 0x0L
+ 0x0L, 0xffc000000000L, 0x0L, 0xff8000000000L
};
static final long[] jjbitVec11 = {
- 0x0L, 0x3ff0000L, 0x0L, 0x3ff0000L
+ 0x0L, 0xffc000000000L, 0x0L, 0x0L
};
static final long[] jjbitVec12 = {
- 0x0L, 0x3ffL, 0x0L, 0x0L
+ 0x0L, 0x3ff0000L, 0x0L, 0x3ff0000L
};
static final long[] jjbitVec13 = {
- 0xfffffffeL, 0x0L, 0x0L, 0x0L
+ 0x0L, 0x3ffL, 0x0L, 0x0L
};
static final long[] jjbitVec14 = {
+ 0xfffffffeL, 0x0L, 0x0L, 0x0L
+};
+static final long[] jjbitVec15 = {
0x0L, 0x0L, 0x0L, 0xff7fffffff7fffffL
};
private final int jjMoveNfa_0(int startState, int curPos)
@@ -978,6 +981,8 @@
return ((jjbitVec4[i2] & l2) != 0L);
case 61:
return ((jjbitVec5[i2] & l2) != 0L);
+ case 215:
+ return ((jjbitVec6[i2] & l2) != 0L);
default :
if ((jjbitVec0[i1] & l1) != 0L)
return true;
@@ -989,18 +994,18 @@
switch(hiByte)
{
case 6:
- return ((jjbitVec8[i2] & l2) != 0L);
- case 11:
return ((jjbitVec9[i2] & l2) != 0L);
- case 13:
+ case 11:
return ((jjbitVec10[i2] & l2) != 0L);
- case 14:
+ case 13:
return ((jjbitVec11[i2] & l2) != 0L);
- case 16:
+ case 14:
return ((jjbitVec12[i2] & l2) != 0L);
+ case 16:
+ return ((jjbitVec13[i2] & l2) != 0L);
default :
- if ((jjbitVec6[i1] & l1) != 0L)
- if ((jjbitVec7[i2] & l2) == 0L)
+ if ((jjbitVec7[i1] & l1) != 0L)
+ if ((jjbitVec8[i2] & l2) == 0L)
return false;
else
return true;
@@ -1012,9 +1017,9 @@
switch(hiByte)
{
case 0:
- return ((jjbitVec14[i2] & l2) != 0L);
+ return ((jjbitVec15[i2] & l2) != 0L);
default :
- if ((jjbitVec13[i1] & l1) != 0L)
+ if ((jjbitVec14[i1] & l1) != 0L)
return true;
return false;
}