You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2012/01/06 00:23:16 UTC
svn commit: r1227891 - in
/lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src:
java/org/apache/lucene/analysis/kuromoji/dict/
java/org/apache/lucene/analysis/kuromoji/trie/
resources/org/apache/lucene/analysis/kuromoji/dict/ resources/org/apa...
Author: uschindler
Date: Thu Jan 5 23:23:15 2012
New Revision: 1227891
URL: http://svn.apache.org/viewvc?rev=1227891&view=rev
Log:
LUCENE-3305: Make real singletons out of ConnectionCosts and DoubleArrayTrie, also rename files. Some cleanup.
Added:
lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/resources/org/apache/lucene/analysis/kuromoji/dict/ConnectionCosts.dat
- copied unchanged from r1227881, lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/resources/org/apache/lucene/analysis/kuromoji/dict/cc.dat
lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/resources/org/apache/lucene/analysis/kuromoji/trie/DoubleArrayTrie.dat
- copied unchanged from r1227881, lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/resources/org/apache/lucene/analysis/kuromoji/trie/dat.dat
Removed:
lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/resources/org/apache/lucene/analysis/kuromoji/dict/cc.dat
lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/resources/org/apache/lucene/analysis/kuromoji/trie/dat.dat
Modified:
lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/dict/CharacterDefinition.java
lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/dict/ConnectionCosts.java
lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/trie/DoubleArrayTrie.java
lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/trie/DoubleArrayTrieTest.java
lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/kuromoji/util/DictionaryBuilder.java
lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/kuromoji/util/TokenInfoDictionaryWriter.java
Modified: lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/dict/CharacterDefinition.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/dict/CharacterDefinition.java?rev=1227891&r1=1227890&r2=1227891&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/dict/CharacterDefinition.java (original)
+++ lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/dict/CharacterDefinition.java Thu Jan 5 23:23:15 2012
@@ -148,6 +148,7 @@ public final class CharacterDefinition {
public void write(String baseDir) throws IOException {
String filename = baseDir + File.separator + getClass().getName().replace('.', File.separatorChar) + FILENAME_SUFFIX;
+ new File(filename).getParentFile().mkdirs();
OutputStream os = new FileOutputStream(filename);
try {
os = new BufferedOutputStream(os);
Modified: lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/dict/ConnectionCosts.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/dict/ConnectionCosts.java?rev=1227891&r1=1227890&r2=1227891&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/dict/ConnectionCosts.java (original)
+++ lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/dict/ConnectionCosts.java Thu Jan 5 23:23:15 2012
@@ -22,6 +22,7 @@ import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
+import java.io.FileNotFoundException;
import java.io.InputStream;
import java.io.OutputStream;
@@ -30,22 +31,50 @@ import org.apache.lucene.store.DataOutpu
import org.apache.lucene.store.InputStreamDataInput;
import org.apache.lucene.store.OutputStreamDataOutput;
import org.apache.lucene.util.CodecUtil;
+import org.apache.lucene.util.IOUtils;
-public class ConnectionCosts {
+public final class ConnectionCosts {
- public static final String FILENAME = "cc.dat";
+ public static final String FILENAME_SUFFIX = ".dat";
public static final String HEADER = "kuromoji_cc";
public static final int VERSION = 1;
- private short[][] costs; // array is backward IDs first since get is called using the same backward ID consecutively. maybe doesn't matter.
+ private final short[][] costs; // array is backward IDs first since get is called using the same backward ID consecutively. maybe doesn't matter.
- public ConnectionCosts() {
- }
-
- private ConnectionCosts(short[][] costs) {
+ private ConnectionCosts(boolean dummy) throws IOException {
+ assert dummy;
+
+ IOException priorE = null;
+ InputStream is = null;
+ short[][] costs = null;
+ try {
+ is = getClass().getResourceAsStream(getClass().getSimpleName()+FILENAME_SUFFIX);
+ if (is == null)
+ throw new FileNotFoundException("Not in classpath: " + getClass().getName().replace('.','/')+FILENAME_SUFFIX);
+ is = new BufferedInputStream(is);
+ final DataInput in = new InputStreamDataInput(is);
+ CodecUtil.checkHeader(in, HEADER, VERSION, VERSION);
+ costs = new short[in.readVInt()][];
+ for (int j = 0; j < costs.length; j++) {
+ final int len = in.readVInt();
+ final short[] a = new short[len];
+ for (int i = 0; i < len; i++) {
+ a[i] = in.readShort();
+ }
+ costs[j] = a;
+ }
+ } catch (IOException ioe) {
+ priorE = ioe;
+ } finally {
+ IOUtils.closeWhileHandlingException(priorE, is);
+ }
+
this.costs = costs;
}
+ /**
+ * Constructor for building. TODO: remove write access
+ */
public ConnectionCosts(int forwardSize, int backwardSize) {
this.costs = new short[backwardSize][forwardSize];
}
@@ -65,8 +94,9 @@ public class ConnectionCosts {
}
}
- public void write(String directoryname) throws IOException {
- String filename = directoryname + File.separator + FILENAME;
+ public void write(String baseDir) throws IOException {
+ String filename = baseDir + File.separator + getClass().getName().replace('.', File.separatorChar) + FILENAME_SUFFIX;
+ new File(filename).getParentFile().mkdirs();
OutputStream os = new FileOutputStream(filename);
try {
os = new BufferedOutputStream(os);
@@ -84,29 +114,15 @@ public class ConnectionCosts {
}
}
- public static ConnectionCosts getInstance() throws IOException, ClassNotFoundException {
- InputStream is = ConnectionCosts.class.getResourceAsStream(FILENAME);
- return read(is);
- }
-
- public static ConnectionCosts read(InputStream is) throws IOException, ClassNotFoundException {
- is = new BufferedInputStream(is);
- try {
- final DataInput in = new InputStreamDataInput(is);
- CodecUtil.checkHeader(in, HEADER, VERSION, VERSION);
- final short[][] costs = new short[in.readVInt()][];
- for (int j = 0; j < costs.length; j++) {
- final int len = in.readVInt();
- final short[] a = new short[len];
- for (int i = 0; i < len; i++) {
- a[i] = in.readShort();
- }
- costs[j] = a;
- }
- return new ConnectionCosts(costs);
- } finally {
- is.close();
+ public synchronized static ConnectionCosts getInstance() {
+ if (singleton == null) try {
+ singleton = new ConnectionCosts(true);
+ } catch (IOException ioe) {
+ throw new RuntimeException("Cannot load ConnectionCosts.", ioe);
}
+ return singleton;
}
+ private static ConnectionCosts singleton;
+
}
Modified: lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/trie/DoubleArrayTrie.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/trie/DoubleArrayTrie.java?rev=1227891&r1=1227890&r2=1227891&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/trie/DoubleArrayTrie.java (original)
+++ lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/trie/DoubleArrayTrie.java Thu Jan 5 23:23:15 2012
@@ -22,6 +22,7 @@ import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.EOFException;
+import java.io.FileNotFoundException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
@@ -37,10 +38,11 @@ import org.apache.lucene.store.DataOutpu
import org.apache.lucene.store.InputStreamDataInput;
import org.apache.lucene.store.OutputStreamDataOutput;
import org.apache.lucene.util.CodecUtil;
+import org.apache.lucene.util.IOUtils;
public class DoubleArrayTrie {
- public static final String FILENAME = "dat.dat";
+ public static final String FILENAME_SUFFIX = ".dat";
public static final String HEADER = "kuromoji_double_arr_trie";
public static final int VERSION = 1;
@@ -60,16 +62,68 @@ public class DoubleArrayTrie {
private int tailIndex = TAIL_OFFSET;
+ /**
+ * Constructor for building. TODO: remove write access
+ */
+ public DoubleArrayTrie() {
+ }
- public DoubleArrayTrie(){
+ private DoubleArrayTrie(boolean dummy) throws IOException {
+ assert dummy;
+
+ IOException priorE = null;
+ InputStream is = null;
+ try {
+ is = getClass().getResourceAsStream(getClass().getSimpleName()+FILENAME_SUFFIX);
+ if (is == null)
+ throw new FileNotFoundException("Not in classpath: " + getClass().getName().replace('.','/')+FILENAME_SUFFIX);
+ is = new BufferedInputStream(is);
+ final DataInput in = new InputStreamDataInput(is);
+ CodecUtil.checkHeader(in, HEADER, VERSION, VERSION);
+ int baseCheckSize = in.readVInt(); // Read size of baseArr and checkArr
+ int tailSize = in.readVInt(); // Read size of tailArr
+
+ ReadableByteChannel channel = Channels.newChannel(is);
+
+ int toRead, read;
+ ByteBuffer tmpBaseBuffer = ByteBuffer.allocateDirect(toRead = baseCheckSize * 4); // The size is 4 times the baseCheckSize since it is the length of array
+ read = channel.read(tmpBaseBuffer);
+ if (read != toRead) {
+ throw new EOFException("Cannot read DoubleArrayTree");
+ }
+ tmpBaseBuffer.rewind();
+ baseBuffer = tmpBaseBuffer.asIntBuffer().asReadOnlyBuffer();
+
+ ByteBuffer tmpCheckBuffer = ByteBuffer.allocateDirect(toRead = baseCheckSize * 4);
+ read = channel.read(tmpCheckBuffer);
+ if (read != toRead) {
+ throw new EOFException("Cannot read DoubleArrayTree");
+ }
+ tmpCheckBuffer.rewind();
+ checkBuffer = tmpCheckBuffer.asIntBuffer().asReadOnlyBuffer();
+
+ ByteBuffer tmpTailBuffer = ByteBuffer.allocateDirect(toRead = tailSize * 2); // The size is 2 times the tailSize since it is the length of array
+ read = channel.read(tmpTailBuffer);
+ if (read != toRead) {
+ throw new EOFException("Cannot read DoubleArrayTree");
+ }
+ tmpTailBuffer.rewind();
+ tailBuffer = tmpTailBuffer.asCharBuffer().asReadOnlyBuffer();
+ } catch (IOException ioe) {
+ priorE = ioe;
+ } finally {
+ IOUtils.closeWhileHandlingException(priorE, is);
+ }
}
+
/**
* Write to file
* @throws IOException
*/
- public void write(String directoryname) throws IOException {
- String filename = directoryname + File.separator + FILENAME;
+ public void write(String baseDir) throws IOException {
+ String filename = baseDir + File.separator + getClass().getName().replace('.', File.separatorChar) + FILENAME_SUFFIX;
+ new File(filename).getParentFile().mkdirs();
baseBuffer.rewind();
checkBuffer.rewind();
@@ -108,58 +162,6 @@ public class DoubleArrayTrie {
}
}
- public static DoubleArrayTrie getInstance() throws IOException {
- InputStream is = DoubleArrayTrie.class.getResourceAsStream(FILENAME);
- return read(is);
- }
-
- /**
- * Load Stored data
- * @throws IOException
- */
- public static DoubleArrayTrie read(InputStream is) throws IOException {
- is = new BufferedInputStream(is);
- try {
- final DataInput in = new InputStreamDataInput(is);
- CodecUtil.checkHeader(in, HEADER, VERSION, VERSION);
- int baseCheckSize = in.readVInt(); // Read size of baseArr and checkArr
- int tailSize = in.readVInt(); // Read size of tailArr
-
- ReadableByteChannel channel = Channels.newChannel(is);
-
- DoubleArrayTrie trie = new DoubleArrayTrie();
-
- int toRead, read;
- ByteBuffer tmpBaseBuffer = ByteBuffer.allocateDirect(toRead = baseCheckSize * 4); // The size is 4 times the baseCheckSize since it is the length of array
- read = channel.read(tmpBaseBuffer);
- if (read != toRead) {
- throw new EOFException("Cannot read DoubleArrayTree");
- }
- tmpBaseBuffer.rewind();
- trie.baseBuffer = tmpBaseBuffer.asIntBuffer().asReadOnlyBuffer();
-
- ByteBuffer tmpCheckBuffer = ByteBuffer.allocateDirect(toRead = baseCheckSize * 4);
- read = channel.read(tmpCheckBuffer);
- if (read != toRead) {
- throw new EOFException("Cannot read DoubleArrayTree");
- }
- tmpCheckBuffer.rewind();
- trie.checkBuffer = tmpCheckBuffer.asIntBuffer().asReadOnlyBuffer();
-
- ByteBuffer tmpTailBuffer = ByteBuffer.allocateDirect(toRead = tailSize * 2); // The size is 2 times the tailSize since it is the length of array
- read = channel.read(tmpTailBuffer);
- if (read != toRead) {
- throw new EOFException("Cannot read DoubleArrayTree");
- }
- tmpTailBuffer.rewind();
- trie.tailBuffer = tmpTailBuffer.asCharBuffer().asReadOnlyBuffer();
-
- return trie;
- } finally {
- is.close();
- }
- }
-
/**
* Construct double array trie which is equivalent to input trie
* @param trie normal trie which contains all dictionary words
@@ -337,4 +339,16 @@ public class DoubleArrayTrie {
node = node.getChildren()[0]; // Move to next node
}
}
+
+ public synchronized static DoubleArrayTrie getInstance() {
+ if (singleton == null) try {
+ singleton = new DoubleArrayTrie(true);
+ } catch (IOException ioe) {
+ throw new RuntimeException("Cannot load DoubleArrayTrie.", ioe);
+ }
+ return singleton;
+ }
+
+ private static DoubleArrayTrie singleton;
+
}
Modified: lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/trie/DoubleArrayTrieTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/trie/DoubleArrayTrieTest.java?rev=1227891&r1=1227890&r2=1227891&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/trie/DoubleArrayTrieTest.java (original)
+++ lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/trie/DoubleArrayTrieTest.java Thu Jan 5 23:23:15 2012
@@ -24,58 +24,19 @@ import java.io.IOException;
import org.apache.lucene.analysis.kuromoji.trie.DoubleArrayTrie;
import org.apache.lucene.analysis.kuromoji.trie.Trie;
import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util._TestUtil;
import org.junit.Test;
public class DoubleArrayTrieTest extends LuceneTestCase {
@Test
- public void testBuild() {
+ public void test() {
Trie trie = getTrie();
DoubleArrayTrie doubleArrayTrie = new DoubleArrayTrie();
doubleArrayTrie.build(trie);
- }
-
- @Test
- public void testWrite() throws IOException {
- Trie trie = getTrie();
-
- DoubleArrayTrie doubleArrayTrie = new DoubleArrayTrie();
- doubleArrayTrie.build(trie);
-
- try{
- doubleArrayTrie.write("/some/path/which/is/not/exist");
- fail();
- }catch(IOException e){
-
- }
-
- File dir = _TestUtil.getTempDir("testWrite");
- dir.mkdirs();
- doubleArrayTrie.write(dir.getCanonicalPath());
-
- assertTrue(new File(dir, "dat.dat").exists());
-
- }
-
- @Test
- public void testLookup() throws IOException {
- Trie trie = getTrie();
-
- DoubleArrayTrie doubleArrayTrie = new DoubleArrayTrie();
- doubleArrayTrie.build(trie);
-
- File dir = _TestUtil.getTempDir("testLookup");
- dir.mkdirs();
- doubleArrayTrie.write(dir.getCanonicalPath());
-
- doubleArrayTrie = DoubleArrayTrie.read(new FileInputStream(dir.getCanonicalPath() + File.separator + DoubleArrayTrie.FILENAME));
-
assertEquals(0, doubleArrayTrie.lookup("a"));
assertTrue(doubleArrayTrie.lookup("abc") > 0);
assertTrue(doubleArrayTrie.lookup("ããã") > 0);
assertTrue(doubleArrayTrie.lookup("xyz") < 0);
-
}
private Trie getTrie() {
@@ -87,5 +48,4 @@ public class DoubleArrayTrieTest extends
return trie;
}
-
}
Modified: lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/kuromoji/util/DictionaryBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/kuromoji/util/DictionaryBuilder.java?rev=1227891&r1=1227890&r2=1227891&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/kuromoji/util/DictionaryBuilder.java (original)
+++ lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/kuromoji/util/DictionaryBuilder.java Thu Jan 5 23:23:15 2012
@@ -45,7 +45,7 @@ public class DictionaryBuilder {
System.out.print(" building double array trie...");
DoubleArrayTrie trie = DoubleArrayTrieBuilder.build(tokenInfoBuilder.entrySet());
- trie.write(outputDirname+File.separatorChar+DoubleArrayTrie.class.getPackage().getName().replace('.',File.separatorChar));
+ trie.write(outputDirname);
System.out.println(" done");
System.out.print(" processing target map...");
@@ -72,7 +72,7 @@ public class DictionaryBuilder {
System.out.print("building connection costs...");
ConnectionCosts connectionCosts
= ConnectionCostsBuilder.build(inputDirname + File.separator + "matrix.def");
- connectionCosts.write(outputDirname+File.separatorChar+ConnectionCosts.class.getPackage().getName().replace('.',File.separatorChar));
+ connectionCosts.write(outputDirname);
System.out.println("done");
}
Modified: lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/kuromoji/util/TokenInfoDictionaryWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/kuromoji/util/TokenInfoDictionaryWriter.java?rev=1227891&r1=1227890&r2=1227891&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/kuromoji/util/TokenInfoDictionaryWriter.java (original)
+++ lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/kuromoji/util/TokenInfoDictionaryWriter.java Thu Jan 5 23:23:15 2012
@@ -113,6 +113,7 @@ public class TokenInfoDictionaryWriter {
}
protected void writeTargetMap(String filename) throws IOException {
+ new File(filename).getParentFile().mkdirs();
OutputStream os = new FileOutputStream(filename);
try {
os = new BufferedOutputStream(os);
@@ -149,6 +150,7 @@ public class TokenInfoDictionaryWriter {
}
protected void writeDictionary(String filename) throws IOException {
+ new File(filename).getParentFile().mkdirs();
final FileOutputStream os = new FileOutputStream(filename);
try {
final DataOutput out = new OutputStreamDataOutput(os);