You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/01/09 00:54:45 UTC
svn commit: r1228995 - in
/lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src:
resources/org/apache/lucene/analysis/kuromoji/dict/
test/org/apache/lucene/analysis/kuromoji/
tools/java/org/apache/lucene/analysis/kuromoji/util/
Author: rmuir
Date: Sun Jan 8 23:54:44 2012
New Revision: 1228995
URL: http://svn.apache.org/viewvc?rev=1228995&view=rev
Log:
LUCENE-3305: start ords at 0 so we never write any empty targetmap array
Modified:
lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/resources/org/apache/lucene/analysis/kuromoji/dict/TokenInfoDictionary$fst.dat
lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/resources/org/apache/lucene/analysis/kuromoji/dict/TokenInfoDictionary$targetMap.dat
lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/SimpleBench.java
lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/kuromoji/util/BinaryDictionaryWriter.java
lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/kuromoji/util/TokenInfoDictionaryBuilder.java
Modified: lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/resources/org/apache/lucene/analysis/kuromoji/dict/TokenInfoDictionary$fst.dat
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/resources/org/apache/lucene/analysis/kuromoji/dict/TokenInfoDictionary%24fst.dat?rev=1228995&r1=1228994&r2=1228995&view=diff
==============================================================================
Binary files - no diff available.
Modified: lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/resources/org/apache/lucene/analysis/kuromoji/dict/TokenInfoDictionary$targetMap.dat
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/resources/org/apache/lucene/analysis/kuromoji/dict/TokenInfoDictionary%24targetMap.dat?rev=1228995&r1=1228994&r2=1228995&view=diff
==============================================================================
Binary files - no diff available.
Modified: lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/SimpleBench.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/SimpleBench.java?rev=1228995&r1=1228994&r2=1228995&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/SimpleBench.java (original)
+++ lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/SimpleBench.java Sun Jan 8 23:54:44 2012
@@ -24,11 +24,11 @@ public class SimpleBench {
for (int i = 0; i < 4; i++) {
long ms = System.currentTimeMillis();
- for (int j = 0; j < 50000; j++) {
+ for (int j = 0; j < 500000; j++) {
consume(a, "é女ç©å¤§å°ãã·ã¥ã¼ã»ãããã³ã¹ã é女ç©å¤§å°ãã·ã¥ã¼ã»ãããã³ã¹ã");
}
long ms2 = System.currentTimeMillis();
- for (int j = 0; j < 50000; j++) {
+ for (int j = 0; j < 500000; j++) {
consume(b, "é女ç©å¤§å°ãã·ã¥ã¼ã»ãããã³ã¹ã é女ç©å¤§å°ãã·ã¥ã¼ã»ãããã³ã¹ã");
}
long ms3 = System.currentTimeMillis();
Modified: lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/kuromoji/util/BinaryDictionaryWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/kuromoji/util/BinaryDictionaryWriter.java?rev=1228995&r1=1228994&r2=1228995&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/kuromoji/util/BinaryDictionaryWriter.java (original)
+++ lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/kuromoji/util/BinaryDictionaryWriter.java Sun Jan 8 23:54:44 2012
@@ -223,8 +223,7 @@ public abstract class BinaryDictionaryWr
int prev = 0;
for (int j = 0; j < targetMapSize; j++) {
final int size = targetMapComponentSizes[j];
- // note: size is 0 for ONLY wordID 0 of TokenInfoDictionary
- // this is because the FST uses 0 for NO_OUTPUT...
+ assert size > 0;
if (size == 1) {
int delta = targetMap[j][0] - prev;
assert delta >= 0;
Modified: lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/kuromoji/util/TokenInfoDictionaryBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/kuromoji/util/TokenInfoDictionaryBuilder.java?rev=1228995&r1=1228994&r2=1228995&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/kuromoji/util/TokenInfoDictionaryBuilder.java (original)
+++ lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/kuromoji/util/TokenInfoDictionaryBuilder.java Sun Jan 8 23:54:44 2012
@@ -255,14 +255,14 @@ public class TokenInfoDictionaryBuilder
PositiveIntOutputs o = PositiveIntOutputs.getSingleton(true);
Builder<Long> b = new Builder<Long>(FST.INPUT_TYPE.BYTE2, o);
IntsRef scratch = new IntsRef();
- long ord = 1;
+ long ord = 0;
for (String entry : unique) {
scratch.grow(entry.length());
scratch.length = entry.length();
for (int i = 0; i < entry.length(); i++) {
scratch.ints[i] = (int) entry.charAt(i);
}
- b.add(scratch, ord);
+ b.add(scratch, o.get(ord));
ord++;
}
words = b.finish();