You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2012/01/09 10:41:58 UTC
svn commit: r1229076 - in
/lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src:
java/org/apache/lucene/analysis/kuromoji/dict/
tools/java/org/apache/lucene/analysis/kuromoji/util/
Author: uschindler
Date: Mon Jan 9 09:41:57 2012
New Revision: 1229076
URL: http://svn.apache.org/viewvc?rev=1229076&view=rev
Log:
LUCENE-3305: Some cleanups
Modified:
lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/dict/BinaryDictionary.java
lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/dict/TokenInfoDictionary.java
lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/kuromoji/util/UnknownDictionaryBuilder.java
Modified: lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/dict/BinaryDictionary.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/dict/BinaryDictionary.java?rev=1229076&r1=1229075&r2=1229076&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/dict/BinaryDictionary.java (original)
+++ lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/dict/BinaryDictionary.java Mon Jan 9 09:41:57 2012
@@ -60,9 +60,7 @@ public abstract class BinaryDictionary i
String[] inflTypeDict = null;
ByteBuffer buffer = null;
try {
- mapIS = getClass().getResourceAsStream(getClass().getSimpleName() + TARGETMAP_FILENAME_SUFFIX);
- if (mapIS == null)
- throw new FileNotFoundException("Not in classpath: " + getClass().getName().replace('.','/') + TARGETMAP_FILENAME_SUFFIX);
+ mapIS = getResource(TARGETMAP_FILENAME_SUFFIX);
mapIS = new BufferedInputStream(mapIS);
DataInput in = new InputStreamDataInput(mapIS);
CodecUtil.checkHeader(in, TARGETMAP_HEADER, VERSION, VERSION);
@@ -81,10 +79,9 @@ public abstract class BinaryDictionary i
if (sourceId + 1 != targetMapOffsets.length)
throw new IOException("targetMap file format broken");
targetMapOffsets[sourceId] = targetMap.length;
+ mapIS.close(); mapIS = null;
- posIS = getClass().getResourceAsStream(getClass().getSimpleName() + POSDICT_FILENAME_SUFFIX);
- if (posIS == null)
- throw new FileNotFoundException("Not in classpath: " + getClass().getName().replace('.','/') + POSDICT_FILENAME_SUFFIX);
+ posIS = getResource(POSDICT_FILENAME_SUFFIX);
posIS = new BufferedInputStream(posIS);
in = new InputStreamDataInput(posIS);
CodecUtil.checkHeader(in, POSDICT_HEADER, VERSION, VERSION);
@@ -92,10 +89,9 @@ public abstract class BinaryDictionary i
for (int j = 0; j < posDict.length; j++) {
posDict[j] = in.readString();
}
+ posIS.close(); posIS = null;
- inflIS = getClass().getResourceAsStream(getClass().getSimpleName() + INFLDICT_FILENAME_SUFFIX);
- if (inflIS == null)
- throw new FileNotFoundException("Not in classpath: " + getClass().getName().replace('.','/') + INFLDICT_FILENAME_SUFFIX);
+ inflIS = getResource(INFLDICT_FILENAME_SUFFIX);
inflIS = new BufferedInputStream(inflIS);
in = new InputStreamDataInput(inflIS);
CodecUtil.checkHeader(in, INFLDICT_HEADER, VERSION, VERSION);
@@ -106,10 +102,10 @@ public abstract class BinaryDictionary i
inflTypeDict[j] = in.readString();
inflFormDict[j] = in.readString();
}
+ inflIS.close(); inflIS = null;
- dictIS = getClass().getResourceAsStream(getClass().getSimpleName() + DICT_FILENAME_SUFFIX);
- if (dictIS == null)
- throw new FileNotFoundException("Not in classpath: " + getClass().getName().replace('.','/') + DICT_FILENAME_SUFFIX);
+ dictIS = getResource(DICT_FILENAME_SUFFIX);
+ // no buffering here, as we load in one large buffer
in = new InputStreamDataInput(dictIS);
CodecUtil.checkHeader(in, DICT_HEADER, VERSION, VERSION);
final int size = in.readVInt();
@@ -119,6 +115,7 @@ public abstract class BinaryDictionary i
if (read != size) {
throw new EOFException("Cannot read whole dictionary");
}
+ dictIS.close(); dictIS = null;
buffer = tmpBuffer.asReadOnlyBuffer();
} catch (IOException ioe) {
priorE = ioe;
@@ -134,6 +131,13 @@ public abstract class BinaryDictionary i
this.buffer = buffer;
}
+ protected final InputStream getResource(String suffix) throws IOException {
+ InputStream is = getClass().getResourceAsStream(getClass().getSimpleName() + suffix);
+ if (is == null)
+ throw new FileNotFoundException("Not in classpath: " + getClass().getName().replace('.','/') + suffix);
+ return is;
+ }
+
public void lookupWordIds(int sourceId, IntsRef ref) {
ref.ints = targetMap;
ref.offset = targetMapOffsets[sourceId];
Modified: lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/dict/TokenInfoDictionary.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/dict/TokenInfoDictionary.java?rev=1229076&r1=1229075&r2=1229076&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/dict/TokenInfoDictionary.java (original)
+++ lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/dict/TokenInfoDictionary.java Mon Jan 9 09:41:57 2012
@@ -17,6 +17,7 @@ package org.apache.lucene.analysis.kurom
* limitations under the License.
*/
+import java.io.BufferedInputStream;
import java.io.InputStream;
import java.io.IOException;
import java.io.FileNotFoundException;
@@ -38,9 +39,8 @@ public final class TokenInfoDictionary e
InputStream is = null;
FST<Long> fst = null;
try {
- is = getClass().getResourceAsStream(getClass().getSimpleName() + FST_FILENAME_SUFFIX);
- if (is == null)
- throw new FileNotFoundException("Not in classpath: " + getClass().getName().replace('.','/') + FST_FILENAME_SUFFIX);
+ is = getResource(FST_FILENAME_SUFFIX);
+ is = new BufferedInputStream(is);
fst = new FST<Long>(new InputStreamDataInput(is), PositiveIntOutputs.getSingleton(true));
} catch (IOException ioe) {
priorE = ioe;
Modified: lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/kuromoji/util/UnknownDictionaryBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/kuromoji/util/UnknownDictionaryBuilder.java?rev=1229076&r1=1229075&r2=1229076&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/kuromoji/util/UnknownDictionaryBuilder.java (original)
+++ lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/kuromoji/util/UnknownDictionaryBuilder.java Mon Jan 9 09:41:57 2012
@@ -71,8 +71,11 @@ public class UnknownDictionaryBuilder {
while ((line = lineReader.readLine()) != null) {
// note: unk.def only has 10 fields, it simplifies the writer to just append empty reading and pronunciation,
// even though the unknown dictionary returns hardcoded null here.
- lines.add(CSVUtil.parse(line + ",*,*")); // Probably we don't need to validate entry
+ final String[] parsed = CSVUtil.parse(line + ",*,*"); // Probably we don't need to validate entry
+ lines.add(parsed);
+ dictionary.noteInflection(parsed); // for completeness; I think unk.def has no inflections...
}
+ dictionary.finalizeInflections(); // should also be no-op
Collections.sort(lines, new Comparator<String[]>() {
public int compare(String[] left, String[] right) {