You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2012/01/06 01:57:16 UTC
svn commit: r1227917 - in
/lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src:
java/org/apache/lucene/analysis/kuromoji/trie/
test/org/apache/lucene/analysis/kuromoji/trie/
tools/java/org/apache/lucene/analysis/kuromoji/util/
Author: uschindler
Date: Fri Jan 6 00:57:15 2012
New Revision: 1227917
URL: http://svn.apache.org/viewvc?rev=1227917&view=rev
Log:
LUCENE-3305: DoubleArrayTrie and Trie are useful classes maybe also outside kuromoji. So make it useable, but still supply the default singleton.
Modified:
lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/trie/DoubleArrayTrie.java
lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/trie/Trie.java
lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/trie/DoubleArrayTrieTest.java
lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/kuromoji/util/DoubleArrayTrieBuilder.java
Modified: lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/trie/DoubleArrayTrie.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/trie/DoubleArrayTrie.java?rev=1227917&r1=1227916&r2=1227917&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/trie/DoubleArrayTrie.java (original)
+++ lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/trie/DoubleArrayTrie.java Fri Jan 6 00:57:15 2012
@@ -24,6 +24,7 @@ import java.io.IOException;
import java.io.EOFException;
import java.io.FileNotFoundException;
import java.io.InputStream;
+import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.IntBuffer;
@@ -40,7 +41,7 @@ import org.apache.lucene.store.OutputStr
import org.apache.lucene.util.CodecUtil;
import org.apache.lucene.util.IOUtils;
-public class DoubleArrayTrie {
+public final class DoubleArrayTrie {
public static final String FILENAME_SUFFIX = ".dat";
public static final String HEADER = "kuromoji_double_arr_trie";
@@ -63,115 +64,99 @@ public class DoubleArrayTrie {
private int tailIndex = TAIL_OFFSET;
/**
- * Constructor for building. TODO: remove write access
+ * Construct double array trie which is equivalent to input trie
+ * @param trie normal trie which contains all dictionary words
+ * TODO: maybe remove write access
*/
- public DoubleArrayTrie() {
- }
-
- private DoubleArrayTrie(boolean dummy) throws IOException {
- assert dummy;
-
- IOException priorE = null;
- InputStream is = null;
- try {
- is = getClass().getResourceAsStream(getClass().getSimpleName()+FILENAME_SUFFIX);
- if (is == null)
- throw new FileNotFoundException("Not in classpath: " + getClass().getName().replace('.','/')+FILENAME_SUFFIX);
- is = new BufferedInputStream(is);
- final DataInput in = new InputStreamDataInput(is);
- CodecUtil.checkHeader(in, HEADER, VERSION, VERSION);
- int baseCheckSize = in.readVInt(); // Read size of baseArr and checkArr
- int tailSize = in.readVInt(); // Read size of tailArr
-
- ReadableByteChannel channel = Channels.newChannel(is);
-
- int toRead, read;
- ByteBuffer tmpBaseBuffer = ByteBuffer.allocateDirect(toRead = baseCheckSize * 4); // The size is 4 times the baseCheckSize since it is the length of array
- read = channel.read(tmpBaseBuffer);
- if (read != toRead) {
- throw new EOFException("Cannot read DoubleArrayTree");
- }
- tmpBaseBuffer.rewind();
- baseBuffer = tmpBaseBuffer.asIntBuffer().asReadOnlyBuffer();
-
- ByteBuffer tmpCheckBuffer = ByteBuffer.allocateDirect(toRead = baseCheckSize * 4);
- read = channel.read(tmpCheckBuffer);
- if (read != toRead) {
- throw new EOFException("Cannot read DoubleArrayTree");
- }
- tmpCheckBuffer.rewind();
- checkBuffer = tmpCheckBuffer.asIntBuffer().asReadOnlyBuffer();
-
- ByteBuffer tmpTailBuffer = ByteBuffer.allocateDirect(toRead = tailSize * 2); // The size is 2 times the tailSize since it is the length of array
- read = channel.read(tmpTailBuffer);
- if (read != toRead) {
- throw new EOFException("Cannot read DoubleArrayTree");
- }
- tmpTailBuffer.rewind();
- tailBuffer = tmpTailBuffer.asCharBuffer().asReadOnlyBuffer();
- } catch (IOException ioe) {
- priorE = ioe;
- } finally {
- IOUtils.closeWhileHandlingException(priorE, is);
- }
+ public DoubleArrayTrie(Trie trie) {
+ baseBuffer = ByteBuffer.allocate(BASE_CHECK_INITILAL_SIZE * 4).asIntBuffer();
+ baseBuffer.put(0, 1);
+ checkBuffer = ByteBuffer.allocate(BASE_CHECK_INITILAL_SIZE * 4).asIntBuffer();
+ tailBuffer = ByteBuffer.allocate(TAIL_INITIAL_SIZE * 2).asCharBuffer();
+ add(-1, 0, trie.getRoot());
}
+ public DoubleArrayTrie(InputStream is) throws IOException {
+ final DataInput in = new InputStreamDataInput(is);
+ CodecUtil.checkHeader(in, HEADER, VERSION, VERSION);
+ int baseCheckSize = in.readVInt(); // Read size of baseArr and checkArr
+ int tailSize = in.readVInt(); // Read size of tailArr
+
+ ReadableByteChannel channel = Channels.newChannel(is);
+
+ int toRead, read;
+ ByteBuffer tmpBaseBuffer = ByteBuffer.allocateDirect(toRead = baseCheckSize * 4); // The size is 4 times the baseCheckSize since it is the length of array
+ read = channel.read(tmpBaseBuffer);
+ if (read != toRead) {
+ throw new EOFException("Cannot read DoubleArrayTree");
+ }
+ tmpBaseBuffer.rewind();
+ baseBuffer = tmpBaseBuffer.asIntBuffer().asReadOnlyBuffer();
+
+ ByteBuffer tmpCheckBuffer = ByteBuffer.allocateDirect(toRead = baseCheckSize * 4);
+ read = channel.read(tmpCheckBuffer);
+ if (read != toRead) {
+ throw new EOFException("Cannot read DoubleArrayTree");
+ }
+ tmpCheckBuffer.rewind();
+ checkBuffer = tmpCheckBuffer.asIntBuffer().asReadOnlyBuffer();
+
+ ByteBuffer tmpTailBuffer = ByteBuffer.allocateDirect(toRead = tailSize * 2); // The size is 2 times the tailSize since it is the length of array
+ read = channel.read(tmpTailBuffer);
+ if (read != toRead) {
+ throw new EOFException("Cannot read DoubleArrayTree");
+ }
+ tmpTailBuffer.rewind();
+ tailBuffer = tmpTailBuffer.asCharBuffer().asReadOnlyBuffer();
+ }
/**
- * Write to file
+ * Write to file (used by builder). Path is this class' slashed canonical classname + ".dat".
* @throws IOException
*/
public void write(String baseDir) throws IOException {
String filename = baseDir + File.separator + getClass().getName().replace('.', File.separatorChar) + FILENAME_SUFFIX;
new File(filename).getParentFile().mkdirs();
- baseBuffer.rewind();
- checkBuffer.rewind();
- tailBuffer.rewind();
-
final FileOutputStream os = new FileOutputStream(filename);
try {
- final DataOutput out = new OutputStreamDataOutput(os);
- CodecUtil.writeHeader(out, HEADER, VERSION);
- out.writeVInt(baseBuffer.capacity());
- out.writeVInt(tailBuffer.capacity());
- final WritableByteChannel channel = Channels.newChannel(os);
-
- ByteBuffer tmpBuffer = ByteBuffer.allocate(baseBuffer.capacity() * 4);
- IntBuffer tmpIntBuffer = tmpBuffer.asIntBuffer();
- tmpIntBuffer.put(baseBuffer);
- tmpBuffer.rewind();
- channel.write(tmpBuffer);
- assert tmpBuffer.remaining() == 0L;
-
- tmpBuffer = ByteBuffer.allocate(checkBuffer.capacity() * 4);
- tmpIntBuffer = tmpBuffer.asIntBuffer();
- tmpIntBuffer.put(checkBuffer);
- tmpBuffer.rewind();
- channel.write(tmpBuffer);
- assert tmpBuffer.remaining() == 0L;
-
- tmpBuffer = ByteBuffer.allocate(tailBuffer.capacity() * 2);
- CharBuffer tmpCharBuffer = tmpBuffer.asCharBuffer();
- tmpCharBuffer.put(tailBuffer);
- tmpBuffer.rewind();
- channel.write(tmpBuffer);
- assert tmpBuffer.remaining() == 0L;
+ write(os);
} finally {
os.close();
}
}
- /**
- * Construct double array trie which is equivalent to input trie
- * @param trie normal trie which contains all dictionary words
- */
- public void build(Trie trie) {
- baseBuffer = ByteBuffer.allocate(BASE_CHECK_INITILAL_SIZE * 4).asIntBuffer();
- baseBuffer.put(0, 1);
- checkBuffer = ByteBuffer.allocate(BASE_CHECK_INITILAL_SIZE * 4).asIntBuffer();
- tailBuffer = ByteBuffer.allocate(TAIL_INITIAL_SIZE * 2).asCharBuffer();
- add(-1, 0, trie.getRoot());
+ public void write(OutputStream os) throws IOException {
+ baseBuffer.rewind();
+ checkBuffer.rewind();
+ tailBuffer.rewind();
+
+ final DataOutput out = new OutputStreamDataOutput(os);
+ CodecUtil.writeHeader(out, HEADER, VERSION);
+ out.writeVInt(baseBuffer.capacity());
+ out.writeVInt(tailBuffer.capacity());
+ final WritableByteChannel channel = Channels.newChannel(os);
+
+ ByteBuffer tmpBuffer = ByteBuffer.allocate(baseBuffer.capacity() * 4);
+ IntBuffer tmpIntBuffer = tmpBuffer.asIntBuffer();
+ tmpIntBuffer.put(baseBuffer);
+ tmpBuffer.rewind();
+ channel.write(tmpBuffer);
+ assert tmpBuffer.remaining() == 0L;
+
+ tmpBuffer = ByteBuffer.allocate(checkBuffer.capacity() * 4);
+ tmpIntBuffer = tmpBuffer.asIntBuffer();
+ tmpIntBuffer.put(checkBuffer);
+ tmpBuffer.rewind();
+ channel.write(tmpBuffer);
+ assert tmpBuffer.remaining() == 0L;
+
+ tmpBuffer = ByteBuffer.allocate(tailBuffer.capacity() * 2);
+ CharBuffer tmpCharBuffer = tmpBuffer.asCharBuffer();
+ tmpCharBuffer.put(tailBuffer);
+ tmpBuffer.rewind();
+ channel.write(tmpBuffer);
+ assert tmpBuffer.remaining() == 0L;
}
/**
@@ -340,11 +325,20 @@ public class DoubleArrayTrie {
}
}
+ /** Returns the default trie as singleton with data from classpath, that fits the other dictionaries */
public synchronized static DoubleArrayTrie getInstance() {
- if (singleton == null) try {
- singleton = new DoubleArrayTrie(true);
- } catch (IOException ioe) {
- throw new RuntimeException("Cannot load DoubleArrayTrie.", ioe);
+ if (singleton == null) {
+ InputStream is = null;
+ try {
+ is = DoubleArrayTrie.class.getResourceAsStream(DoubleArrayTrie.class.getSimpleName() + FILENAME_SUFFIX);
+ if (is == null)
+ throw new FileNotFoundException("Not in classpath: " + DoubleArrayTrie.class.getName().replace('.','/') + FILENAME_SUFFIX);
+ singleton = new DoubleArrayTrie(is);
+ } catch (IOException ioe) {
+ throw new RuntimeException("Cannot load DoubleArrayTrie.", ioe);
+ } finally {
+ IOUtils.closeWhileHandlingException(is);
+ }
}
return singleton;
}
Modified: lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/trie/Trie.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/trie/Trie.java?rev=1227917&r1=1227916&r2=1227917&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/trie/Trie.java (original)
+++ lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/trie/Trie.java Fri Jan 6 00:57:15 2012
@@ -17,9 +17,9 @@ package org.apache.lucene.analysis.kurom
* limitations under the License.
*/
-public class Trie {
+public final class Trie {
- private Node root; // Root node of Trie
+ private final Node root; // Root node of Trie
/**
* Constructor
Modified: lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/trie/DoubleArrayTrieTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/trie/DoubleArrayTrieTest.java?rev=1227917&r1=1227916&r2=1227917&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/trie/DoubleArrayTrieTest.java (original)
+++ lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/trie/DoubleArrayTrieTest.java Fri Jan 6 00:57:15 2012
@@ -31,8 +31,7 @@ public class DoubleArrayTrieTest extends
@Test
public void test() {
Trie trie = getTrie();
- DoubleArrayTrie doubleArrayTrie = new DoubleArrayTrie();
- doubleArrayTrie.build(trie);
+ DoubleArrayTrie doubleArrayTrie = new DoubleArrayTrie(trie);
assertEquals(0, doubleArrayTrie.lookup("a"));
assertTrue(doubleArrayTrie.lookup("abc") > 0);
assertTrue(doubleArrayTrie.lookup("ããã") > 0);
Modified: lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/kuromoji/util/DoubleArrayTrieBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/kuromoji/util/DoubleArrayTrieBuilder.java?rev=1227917&r1=1227916&r2=1227917&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/kuromoji/util/DoubleArrayTrieBuilder.java (original)
+++ lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/kuromoji/util/DoubleArrayTrieBuilder.java Fri Jan 6 00:57:15 2012
@@ -32,9 +32,7 @@ public class DoubleArrayTrieBuilder {
public static DoubleArrayTrie build(Set<Entry<Integer, String>> entries) {
Trie tempTrie = buildTrie(entries);
- DoubleArrayTrie daTrie = new DoubleArrayTrie();
- daTrie.build(tempTrie);
- return daTrie;
+ return new DoubleArrayTrie(tempTrie);
}
public static Trie buildTrie(Set<Entry<Integer, String>> entries) {