You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2012/01/09 10:41:58 UTC

svn commit: r1229076 - in /lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src: java/org/apache/lucene/analysis/kuromoji/dict/ tools/java/org/apache/lucene/analysis/kuromoji/util/

Author: uschindler
Date: Mon Jan  9 09:41:57 2012
New Revision: 1229076

URL: http://svn.apache.org/viewvc?rev=1229076&view=rev
Log:
LUCENE-3305: Some cleanups

Modified:
    lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/dict/BinaryDictionary.java
    lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/dict/TokenInfoDictionary.java
    lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/kuromoji/util/UnknownDictionaryBuilder.java

Modified: lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/dict/BinaryDictionary.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/dict/BinaryDictionary.java?rev=1229076&r1=1229075&r2=1229076&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/dict/BinaryDictionary.java (original)
+++ lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/dict/BinaryDictionary.java Mon Jan  9 09:41:57 2012
@@ -60,9 +60,7 @@ public abstract class BinaryDictionary i
     String[] inflTypeDict = null;
     ByteBuffer buffer = null;
     try {
-      mapIS = getClass().getResourceAsStream(getClass().getSimpleName() + TARGETMAP_FILENAME_SUFFIX);
-      if (mapIS == null)
-        throw new FileNotFoundException("Not in classpath: " + getClass().getName().replace('.','/') + TARGETMAP_FILENAME_SUFFIX);
+      mapIS = getResource(TARGETMAP_FILENAME_SUFFIX);
       mapIS = new BufferedInputStream(mapIS);
       DataInput in = new InputStreamDataInput(mapIS);
       CodecUtil.checkHeader(in, TARGETMAP_HEADER, VERSION, VERSION);
@@ -81,10 +79,9 @@ public abstract class BinaryDictionary i
       if (sourceId + 1 != targetMapOffsets.length)
         throw new IOException("targetMap file format broken");
       targetMapOffsets[sourceId] = targetMap.length;
+      mapIS.close(); mapIS = null;
       
-      posIS = getClass().getResourceAsStream(getClass().getSimpleName() + POSDICT_FILENAME_SUFFIX);
-      if (posIS == null)
-        throw new FileNotFoundException("Not in classpath: " + getClass().getName().replace('.','/') + POSDICT_FILENAME_SUFFIX);
+      posIS = getResource(POSDICT_FILENAME_SUFFIX);
       posIS = new BufferedInputStream(posIS);
       in = new InputStreamDataInput(posIS);
       CodecUtil.checkHeader(in, POSDICT_HEADER, VERSION, VERSION);
@@ -92,10 +89,9 @@ public abstract class BinaryDictionary i
       for (int j = 0; j < posDict.length; j++) {
         posDict[j] = in.readString();
       }
+      posIS.close(); posIS = null;
       
-      inflIS = getClass().getResourceAsStream(getClass().getSimpleName() + INFLDICT_FILENAME_SUFFIX);
-      if (inflIS == null)
-        throw new FileNotFoundException("Not in classpath: " + getClass().getName().replace('.','/') + INFLDICT_FILENAME_SUFFIX);
+      inflIS = getResource(INFLDICT_FILENAME_SUFFIX);
       inflIS = new BufferedInputStream(inflIS);
       in = new InputStreamDataInput(inflIS);
       CodecUtil.checkHeader(in, INFLDICT_HEADER, VERSION, VERSION);
@@ -106,10 +102,10 @@ public abstract class BinaryDictionary i
         inflTypeDict[j] = in.readString();
         inflFormDict[j] = in.readString();
       }
+      inflIS.close(); inflIS = null;
 
-      dictIS = getClass().getResourceAsStream(getClass().getSimpleName() + DICT_FILENAME_SUFFIX);
-      if (dictIS == null)
-        throw new FileNotFoundException("Not in classpath: " + getClass().getName().replace('.','/') + DICT_FILENAME_SUFFIX);
+      dictIS = getResource(DICT_FILENAME_SUFFIX);
+      // no buffering here, as we load in one large buffer
       in = new InputStreamDataInput(dictIS);
       CodecUtil.checkHeader(in, DICT_HEADER, VERSION, VERSION);
       final int size = in.readVInt();
@@ -119,6 +115,7 @@ public abstract class BinaryDictionary i
       if (read != size) {
         throw new EOFException("Cannot read whole dictionary");
       }
+      dictIS.close(); dictIS = null;
       buffer = tmpBuffer.asReadOnlyBuffer();
     } catch (IOException ioe) {
       priorE = ioe;
@@ -134,6 +131,13 @@ public abstract class BinaryDictionary i
     this.buffer = buffer;
   }
   
+  protected final InputStream getResource(String suffix) throws IOException {
+    InputStream is = getClass().getResourceAsStream(getClass().getSimpleName() + suffix);
+    if (is == null)
+      throw new FileNotFoundException("Not in classpath: " + getClass().getName().replace('.','/') + suffix);
+    return is;
+  }
+  
   public void lookupWordIds(int sourceId, IntsRef ref) {
     ref.ints = targetMap;
     ref.offset = targetMapOffsets[sourceId];

Modified: lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/dict/TokenInfoDictionary.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/dict/TokenInfoDictionary.java?rev=1229076&r1=1229075&r2=1229076&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/dict/TokenInfoDictionary.java (original)
+++ lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/dict/TokenInfoDictionary.java Mon Jan  9 09:41:57 2012
@@ -17,6 +17,7 @@ package org.apache.lucene.analysis.kurom
  * limitations under the License.
  */
 
+import java.io.BufferedInputStream;
 import java.io.InputStream;
 import java.io.IOException;
 import java.io.FileNotFoundException;
@@ -38,9 +39,8 @@ public final class TokenInfoDictionary e
     InputStream is = null;
     FST<Long> fst = null;
     try {
-      is = getClass().getResourceAsStream(getClass().getSimpleName() + FST_FILENAME_SUFFIX);
-      if (is == null)
-        throw new FileNotFoundException("Not in classpath: " + getClass().getName().replace('.','/') + FST_FILENAME_SUFFIX);
+      is = getResource(FST_FILENAME_SUFFIX);
+      is = new BufferedInputStream(is);
       fst = new FST<Long>(new InputStreamDataInput(is), PositiveIntOutputs.getSingleton(true));
     } catch (IOException ioe) {
       priorE = ioe;

Modified: lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/kuromoji/util/UnknownDictionaryBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/kuromoji/util/UnknownDictionaryBuilder.java?rev=1229076&r1=1229075&r2=1229076&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/kuromoji/util/UnknownDictionaryBuilder.java (original)
+++ lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/kuromoji/util/UnknownDictionaryBuilder.java Mon Jan  9 09:41:57 2012
@@ -71,8 +71,11 @@ public class UnknownDictionaryBuilder {
     while ((line = lineReader.readLine()) != null) {
       // note: unk.def only has 10 fields, it simplifies the writer to just append empty reading and pronunciation,
       // even though the unknown dictionary returns hardcoded null here.
-      lines.add(CSVUtil.parse(line + ",*,*")); // Probably we don't need to validate entry
+      final String[] parsed = CSVUtil.parse(line + ",*,*"); // Probably we don't need to validate entry
+      lines.add(parsed);
+      dictionary.noteInflection(parsed); // for completeness; I think unk.def has no inflections...
     }
+    dictionary.finalizeInflections(); // should also be no-op
     
     Collections.sort(lines, new Comparator<String[]>() {
       public int compare(String[] left, String[] right) {