You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/01/09 00:54:45 UTC

svn commit: r1228995 - in /lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src: resources/org/apache/lucene/analysis/kuromoji/dict/ test/org/apache/lucene/analysis/kuromoji/ tools/java/org/apache/lucene/analysis/kuromoji/util/

Author: rmuir
Date: Sun Jan  8 23:54:44 2012
New Revision: 1228995

URL: http://svn.apache.org/viewvc?rev=1228995&view=rev
Log:
LUCENE-3305: start ords at 0 so we never write any empty targetmap array

Modified:
    lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/resources/org/apache/lucene/analysis/kuromoji/dict/TokenInfoDictionary$fst.dat
    lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/resources/org/apache/lucene/analysis/kuromoji/dict/TokenInfoDictionary$targetMap.dat
    lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/SimpleBench.java
    lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/kuromoji/util/BinaryDictionaryWriter.java
    lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/kuromoji/util/TokenInfoDictionaryBuilder.java

Modified: lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/resources/org/apache/lucene/analysis/kuromoji/dict/TokenInfoDictionary$fst.dat
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/resources/org/apache/lucene/analysis/kuromoji/dict/TokenInfoDictionary%24fst.dat?rev=1228995&r1=1228994&r2=1228995&view=diff
==============================================================================
Binary files - no diff available.

Modified: lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/resources/org/apache/lucene/analysis/kuromoji/dict/TokenInfoDictionary$targetMap.dat
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/resources/org/apache/lucene/analysis/kuromoji/dict/TokenInfoDictionary%24targetMap.dat?rev=1228995&r1=1228994&r2=1228995&view=diff
==============================================================================
Binary files - no diff available.

Modified: lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/SimpleBench.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/SimpleBench.java?rev=1228995&r1=1228994&r2=1228995&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/SimpleBench.java (original)
+++ lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/SimpleBench.java Sun Jan  8 23:54:44 2012
@@ -24,11 +24,11 @@ public class SimpleBench {
     
     for (int i = 0; i < 4; i++) {
       long ms = System.currentTimeMillis();
-      for (int j = 0; j < 50000; j++) {
+      for (int j = 0; j < 500000; j++) {
         consume(a, "魔女狩大将マシュー・ホプキンス。 魔女狩大将マシュー・ホプキンス。");
       }
       long ms2 = System.currentTimeMillis();
-      for (int j = 0; j < 50000; j++) {
+      for (int j = 0; j < 500000; j++) {
         consume(b, "魔女狩大将マシュー・ホプキンス。 魔女狩大将マシュー・ホプキンス。");
       }
       long ms3 = System.currentTimeMillis();

Modified: lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/kuromoji/util/BinaryDictionaryWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/kuromoji/util/BinaryDictionaryWriter.java?rev=1228995&r1=1228994&r2=1228995&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/kuromoji/util/BinaryDictionaryWriter.java (original)
+++ lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/kuromoji/util/BinaryDictionaryWriter.java Sun Jan  8 23:54:44 2012
@@ -223,8 +223,7 @@ public abstract class BinaryDictionaryWr
       int prev = 0;
       for (int j = 0; j < targetMapSize; j++) {
         final int size = targetMapComponentSizes[j];
-        // note: size is 0 for ONLY wordID 0 of TokenInfoDictionary
-        // this is because the FST uses 0 for NO_OUTPUT... 
+        assert size > 0;
         if (size == 1) {
           int delta = targetMap[j][0] - prev;
           assert delta >= 0;

Modified: lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/kuromoji/util/TokenInfoDictionaryBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/kuromoji/util/TokenInfoDictionaryBuilder.java?rev=1228995&r1=1228994&r2=1228995&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/kuromoji/util/TokenInfoDictionaryBuilder.java (original)
+++ lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/kuromoji/util/TokenInfoDictionaryBuilder.java Sun Jan  8 23:54:44 2012
@@ -255,14 +255,14 @@ public class TokenInfoDictionaryBuilder 
     PositiveIntOutputs o = PositiveIntOutputs.getSingleton(true);
     Builder<Long> b = new Builder<Long>(FST.INPUT_TYPE.BYTE2, o);
     IntsRef scratch = new IntsRef();
-    long ord = 1;
+    long ord = 0;
     for (String entry : unique) {
       scratch.grow(entry.length());
       scratch.length = entry.length();
       for (int i = 0; i < entry.length(); i++) {
         scratch.ints[i] = (int) entry.charAt(i);
       }
-      b.add(scratch, ord);
+      b.add(scratch, o.get(ord));
       ord++;
     }
     words = b.finish();