You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2012/01/06 00:23:16 UTC

svn commit: r1227891 - in /lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src: java/org/apache/lucene/analysis/kuromoji/dict/ java/org/apache/lucene/analysis/kuromoji/trie/ resources/org/apache/lucene/analysis/kuromoji/dict/ resources/org/apa...

Author: uschindler
Date: Thu Jan  5 23:23:15 2012
New Revision: 1227891

URL: http://svn.apache.org/viewvc?rev=1227891&view=rev
Log:
LUCENE-3305: Make real singletons out of ConnectionCosts and DoubleArrayTrie, also rename files. Some cleanup.

Added:
    lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/resources/org/apache/lucene/analysis/kuromoji/dict/ConnectionCosts.dat
      - copied unchanged from r1227881, lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/resources/org/apache/lucene/analysis/kuromoji/dict/cc.dat
    lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/resources/org/apache/lucene/analysis/kuromoji/trie/DoubleArrayTrie.dat
      - copied unchanged from r1227881, lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/resources/org/apache/lucene/analysis/kuromoji/trie/dat.dat
Removed:
    lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/resources/org/apache/lucene/analysis/kuromoji/dict/cc.dat
    lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/resources/org/apache/lucene/analysis/kuromoji/trie/dat.dat
Modified:
    lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/dict/CharacterDefinition.java
    lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/dict/ConnectionCosts.java
    lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/trie/DoubleArrayTrie.java
    lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/trie/DoubleArrayTrieTest.java
    lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/kuromoji/util/DictionaryBuilder.java
    lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/kuromoji/util/TokenInfoDictionaryWriter.java

Modified: lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/dict/CharacterDefinition.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/dict/CharacterDefinition.java?rev=1227891&r1=1227890&r2=1227891&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/dict/CharacterDefinition.java (original)
+++ lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/dict/CharacterDefinition.java Thu Jan  5 23:23:15 2012
@@ -148,6 +148,7 @@ public final class CharacterDefinition {
 
   public void write(String baseDir) throws IOException {
     String filename = baseDir + File.separator + getClass().getName().replace('.', File.separatorChar) + FILENAME_SUFFIX;
+    new File(filename).getParentFile().mkdirs();
     OutputStream os = new FileOutputStream(filename);
     try {
       os = new BufferedOutputStream(os);

Modified: lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/dict/ConnectionCosts.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/dict/ConnectionCosts.java?rev=1227891&r1=1227890&r2=1227891&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/dict/ConnectionCosts.java (original)
+++ lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/dict/ConnectionCosts.java Thu Jan  5 23:23:15 2012
@@ -22,6 +22,7 @@ import java.io.BufferedOutputStream;
 import java.io.File;
 import java.io.FileOutputStream;
 import java.io.IOException;
+import java.io.FileNotFoundException;
 import java.io.InputStream;
 import java.io.OutputStream;
 
@@ -30,22 +31,50 @@ import org.apache.lucene.store.DataOutpu
 import org.apache.lucene.store.InputStreamDataInput;
 import org.apache.lucene.store.OutputStreamDataOutput;
 import org.apache.lucene.util.CodecUtil;
+import org.apache.lucene.util.IOUtils;
 
-public class ConnectionCosts {
+public final class ConnectionCosts {
   
-  public static final String FILENAME = "cc.dat";
+  public static final String FILENAME_SUFFIX = ".dat";
   public static final String HEADER = "kuromoji_cc";
   public static final int VERSION = 1;
   
-  private short[][] costs; // array is backward IDs first since get is called using the same backward ID consecutively. maybe doesn't matter.
+  private final short[][] costs; // array is backward IDs first since get is called using the same backward ID consecutively. maybe doesn't matter.
   
-  public ConnectionCosts() {
-  }
-  
-  private ConnectionCosts(short[][] costs) {
+  private ConnectionCosts(boolean dummy) throws IOException {
+    assert dummy;
+        
+    IOException priorE = null;
+    InputStream is = null;
+    short[][] costs = null;
+    try {
+      is = getClass().getResourceAsStream(getClass().getSimpleName()+FILENAME_SUFFIX);
+      if (is == null)
+        throw new FileNotFoundException("Not in classpath: " + getClass().getName().replace('.','/')+FILENAME_SUFFIX);
+      is = new BufferedInputStream(is);
+      final DataInput in = new InputStreamDataInput(is);
+      CodecUtil.checkHeader(in, HEADER, VERSION, VERSION);
+      costs = new short[in.readVInt()][];
+      for (int j = 0; j < costs.length; j++) {
+        final int len = in.readVInt();
+        final short[] a = new short[len];
+        for (int i = 0; i < len; i++) {
+          a[i] = in.readShort();
+        }
+        costs[j] = a;
+      }
+    } catch (IOException ioe) {
+      priorE = ioe;
+    } finally {
+      IOUtils.closeWhileHandlingException(priorE, is);
+    }
+    
     this.costs = costs;
   }
   
+  /**
+   * Constructor for building. TODO: remove write access
+   */
   public ConnectionCosts(int forwardSize, int backwardSize) {
     this.costs = new short[backwardSize][forwardSize]; 
   }
@@ -65,8 +94,9 @@ public class ConnectionCosts {
     }
   }
   
-  public void write(String directoryname) throws IOException {
-    String filename = directoryname + File.separator + FILENAME;
+  public void write(String baseDir) throws IOException {
+    String filename = baseDir + File.separator + getClass().getName().replace('.', File.separatorChar) + FILENAME_SUFFIX;
+    new File(filename).getParentFile().mkdirs();
     OutputStream os = new FileOutputStream(filename);
     try {
       os = new BufferedOutputStream(os);
@@ -84,29 +114,15 @@ public class ConnectionCosts {
     }
   }
   
-  public static ConnectionCosts getInstance() throws IOException, ClassNotFoundException {
-    InputStream is = ConnectionCosts.class.getResourceAsStream(FILENAME);
-    return read(is);
-  }
-  
-  public static ConnectionCosts read(InputStream is) throws IOException, ClassNotFoundException {
-    is = new BufferedInputStream(is);
-    try {
-      final DataInput in = new InputStreamDataInput(is);
-      CodecUtil.checkHeader(in, HEADER, VERSION, VERSION);
-      final short[][] costs = new short[in.readVInt()][];
-      for (int j = 0; j < costs.length; j++) {
-        final int len = in.readVInt();
-        final short[] a = new short[len];
-        for (int i = 0; i < len; i++) {
-          a[i] = in.readShort();
-        }
-        costs[j] = a;
-      }
-      return new ConnectionCosts(costs);
-    } finally {
-      is.close();
+  public synchronized static ConnectionCosts getInstance() {
+    if (singleton == null) try {
+      singleton = new ConnectionCosts(true);
+    } catch (IOException ioe) {
+      throw new RuntimeException("Cannot load ConnectionCosts.", ioe);
     }
+    return singleton;
   }
   
+  private static ConnectionCosts singleton;
+  
 }

Modified: lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/trie/DoubleArrayTrie.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/trie/DoubleArrayTrie.java?rev=1227891&r1=1227890&r2=1227891&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/trie/DoubleArrayTrie.java (original)
+++ lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/trie/DoubleArrayTrie.java Thu Jan  5 23:23:15 2012
@@ -22,6 +22,7 @@ import java.io.File;
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.EOFException;
+import java.io.FileNotFoundException;
 import java.io.InputStream;
 import java.nio.ByteBuffer;
 import java.nio.CharBuffer;
@@ -37,10 +38,11 @@ import org.apache.lucene.store.DataOutpu
 import org.apache.lucene.store.InputStreamDataInput;
 import org.apache.lucene.store.OutputStreamDataOutput;
 import org.apache.lucene.util.CodecUtil;
+import org.apache.lucene.util.IOUtils;
 
 public class DoubleArrayTrie {
   
-  public static final String FILENAME = "dat.dat";
+  public static final String FILENAME_SUFFIX = ".dat";
   public static final String HEADER = "kuromoji_double_arr_trie";
   public static final int VERSION = 1;
   
@@ -60,16 +62,68 @@ public class DoubleArrayTrie {
   
   private int tailIndex = TAIL_OFFSET;
   
+  /**
+   * Constructor for building. TODO: remove write access
+   */
+  public DoubleArrayTrie() {
+  }
   
-  public DoubleArrayTrie(){
+  private DoubleArrayTrie(boolean dummy) throws IOException {
+    assert dummy;
+    
+    IOException priorE = null;
+    InputStream is = null;
+    try {
+      is = getClass().getResourceAsStream(getClass().getSimpleName()+FILENAME_SUFFIX);
+      if (is == null)
+        throw new FileNotFoundException("Not in classpath: " + getClass().getName().replace('.','/')+FILENAME_SUFFIX);
+      is = new BufferedInputStream(is);
+      final DataInput in = new InputStreamDataInput(is);
+      CodecUtil.checkHeader(in, HEADER, VERSION, VERSION);
+      int baseCheckSize = in.readVInt();	// Read size of baseArr and checkArr
+      int tailSize = in.readVInt();		// Read size of tailArr
+      
+      ReadableByteChannel channel = Channels.newChannel(is);
+      
+      int toRead, read;
+      ByteBuffer tmpBaseBuffer = ByteBuffer.allocateDirect(toRead = baseCheckSize * 4);	// The size is 4 times the baseCheckSize since it is the length of array
+      read = channel.read(tmpBaseBuffer);
+      if (read != toRead) {
+        throw new EOFException("Cannot read DoubleArrayTree");
+      }
+      tmpBaseBuffer.rewind();
+      baseBuffer = tmpBaseBuffer.asIntBuffer().asReadOnlyBuffer();
+      
+      ByteBuffer tmpCheckBuffer = ByteBuffer.allocateDirect(toRead = baseCheckSize * 4);
+      read = channel.read(tmpCheckBuffer);
+      if (read != toRead) {
+        throw new EOFException("Cannot read DoubleArrayTree");
+      }
+      tmpCheckBuffer.rewind();
+      checkBuffer = tmpCheckBuffer.asIntBuffer().asReadOnlyBuffer();
+      
+      ByteBuffer tmpTailBuffer = ByteBuffer.allocateDirect(toRead = tailSize * 2);			// The size is 2 times the tailSize since it is the length of array
+      read = channel.read(tmpTailBuffer);
+      if (read != toRead) {
+        throw new EOFException("Cannot read DoubleArrayTree");
+      }
+      tmpTailBuffer.rewind();
+      tailBuffer = tmpTailBuffer.asCharBuffer().asReadOnlyBuffer();
+    } catch (IOException ioe) {
+      priorE = ioe;
+    } finally {
+      IOUtils.closeWhileHandlingException(priorE, is);
+    }
   }
   
+  
   /**
    * Write to file
    * @throws IOException
    */
-  public void write(String directoryname) throws IOException  {
-    String filename = directoryname + File.separator + FILENAME;
+  public void write(String baseDir) throws IOException  {
+    String filename = baseDir + File.separator + getClass().getName().replace('.', File.separatorChar) + FILENAME_SUFFIX;
+    new File(filename).getParentFile().mkdirs();
     
     baseBuffer.rewind();
     checkBuffer.rewind();
@@ -108,58 +162,6 @@ public class DoubleArrayTrie {
     }
   }
   
-  public static DoubleArrayTrie getInstance() throws IOException {
-    InputStream is = DoubleArrayTrie.class.getResourceAsStream(FILENAME);
-    return read(is);
-  }
-  
-  /**
-   * Load Stored data
-   * @throws IOException
-   */
-  public static DoubleArrayTrie read(InputStream is) throws IOException {
-    is = new BufferedInputStream(is);
-    try {
-      final DataInput in = new InputStreamDataInput(is);
-      CodecUtil.checkHeader(in, HEADER, VERSION, VERSION);
-      int baseCheckSize = in.readVInt();	// Read size of baseArr and checkArr
-      int tailSize = in.readVInt();		// Read size of tailArr
-      
-      ReadableByteChannel channel = Channels.newChannel(is);
-      
-      DoubleArrayTrie trie = new DoubleArrayTrie();
-
-      int toRead, read;
-      ByteBuffer tmpBaseBuffer = ByteBuffer.allocateDirect(toRead = baseCheckSize * 4);	// The size is 4 times the baseCheckSize since it is the length of array
-      read = channel.read(tmpBaseBuffer);
-      if (read != toRead) {
-        throw new EOFException("Cannot read DoubleArrayTree");
-      }
-      tmpBaseBuffer.rewind();
-      trie.baseBuffer = tmpBaseBuffer.asIntBuffer().asReadOnlyBuffer();
-      
-      ByteBuffer tmpCheckBuffer = ByteBuffer.allocateDirect(toRead = baseCheckSize * 4);
-      read = channel.read(tmpCheckBuffer);
-      if (read != toRead) {
-        throw new EOFException("Cannot read DoubleArrayTree");
-      }
-      tmpCheckBuffer.rewind();
-      trie.checkBuffer = tmpCheckBuffer.asIntBuffer().asReadOnlyBuffer();
-      
-      ByteBuffer tmpTailBuffer = ByteBuffer.allocateDirect(toRead = tailSize * 2);			// The size is 2 times the tailSize since it is the length of array
-      read = channel.read(tmpTailBuffer);
-      if (read != toRead) {
-        throw new EOFException("Cannot read DoubleArrayTree");
-      }
-      tmpTailBuffer.rewind();
-      trie.tailBuffer = tmpTailBuffer.asCharBuffer().asReadOnlyBuffer();
-      
-      return trie;
-    } finally {
-      is.close();
-    }
-  }
-  
   /**
    * Construct double array trie which is equivalent to input trie
    * @param trie normal trie which contains all dictionary words
@@ -337,4 +339,16 @@ public class DoubleArrayTrie {
       node = node.getChildren()[0];	// Move to next node
     }
   }
+  
+  public synchronized static DoubleArrayTrie getInstance() {
+    if (singleton == null) try {
+      singleton = new DoubleArrayTrie(true);
+    } catch (IOException ioe) {
+      throw new RuntimeException("Cannot load DoubleArrayTrie.", ioe);
+    }
+    return singleton;
+  }
+  
+  private static DoubleArrayTrie singleton;
+  
 }

Modified: lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/trie/DoubleArrayTrieTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/trie/DoubleArrayTrieTest.java?rev=1227891&r1=1227890&r2=1227891&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/trie/DoubleArrayTrieTest.java (original)
+++ lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/trie/DoubleArrayTrieTest.java Thu Jan  5 23:23:15 2012
@@ -24,58 +24,19 @@ import java.io.IOException;
 import org.apache.lucene.analysis.kuromoji.trie.DoubleArrayTrie;
 import org.apache.lucene.analysis.kuromoji.trie.Trie;
 import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util._TestUtil;
 import org.junit.Test;
 
 public class DoubleArrayTrieTest extends LuceneTestCase {
 
   @Test
-  public void testBuild() {		
+  public void test() {		
     Trie trie = getTrie();
     DoubleArrayTrie doubleArrayTrie = new DoubleArrayTrie();
     doubleArrayTrie.build(trie);
-  }
-  
-  @Test
-  public void testWrite() throws IOException {
-    Trie trie = getTrie();
-    
-    DoubleArrayTrie doubleArrayTrie = new DoubleArrayTrie();
-    doubleArrayTrie.build(trie);
-    
-    try{
-      doubleArrayTrie.write("/some/path/which/is/not/exist");
-      fail();
-    }catch(IOException e){
-      
-    }
-    
-    File dir = _TestUtil.getTempDir("testWrite");
-    dir.mkdirs();
-    doubleArrayTrie.write(dir.getCanonicalPath());
-    
-    assertTrue(new File(dir, "dat.dat").exists());
-    
-  }
-  
-  @Test
-  public void testLookup() throws IOException {
-    Trie trie = getTrie();
-    
-    DoubleArrayTrie doubleArrayTrie = new DoubleArrayTrie();
-    doubleArrayTrie.build(trie);
-    
-    File dir = _TestUtil.getTempDir("testLookup");
-    dir.mkdirs();
-    doubleArrayTrie.write(dir.getCanonicalPath());
-    
-    doubleArrayTrie = DoubleArrayTrie.read(new FileInputStream(dir.getCanonicalPath() + File.separator + DoubleArrayTrie.FILENAME));
-    
     assertEquals(0, doubleArrayTrie.lookup("a"));
     assertTrue(doubleArrayTrie.lookup("abc") > 0);
     assertTrue(doubleArrayTrie.lookup("あいう") > 0);
     assertTrue(doubleArrayTrie.lookup("xyz") < 0);
-    
   }
   
   private Trie getTrie() {
@@ -87,5 +48,4 @@ public class DoubleArrayTrieTest extends
     return trie;
   }
   
-  
 }

Modified: lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/kuromoji/util/DictionaryBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/kuromoji/util/DictionaryBuilder.java?rev=1227891&r1=1227890&r2=1227891&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/kuromoji/util/DictionaryBuilder.java (original)
+++ lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/kuromoji/util/DictionaryBuilder.java Thu Jan  5 23:23:15 2012
@@ -45,7 +45,7 @@ public class DictionaryBuilder {
     
     System.out.print("  building double array trie...");
     DoubleArrayTrie trie = DoubleArrayTrieBuilder.build(tokenInfoBuilder.entrySet());
-    trie.write(outputDirname+File.separatorChar+DoubleArrayTrie.class.getPackage().getName().replace('.',File.separatorChar));
+    trie.write(outputDirname);
     System.out.println("  done");
     
     System.out.print("  processing target map...");
@@ -72,7 +72,7 @@ public class DictionaryBuilder {
     System.out.print("building connection costs...");
     ConnectionCosts connectionCosts
       = ConnectionCostsBuilder.build(inputDirname + File.separator + "matrix.def");
-    connectionCosts.write(outputDirname+File.separatorChar+ConnectionCosts.class.getPackage().getName().replace('.',File.separatorChar));
+    connectionCosts.write(outputDirname);
     System.out.println("done");
   }
   

Modified: lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/kuromoji/util/TokenInfoDictionaryWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/kuromoji/util/TokenInfoDictionaryWriter.java?rev=1227891&r1=1227890&r2=1227891&view=diff
==============================================================================
--- lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/kuromoji/util/TokenInfoDictionaryWriter.java (original)
+++ lucene/dev/branches/lucene3305/modules/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/kuromoji/util/TokenInfoDictionaryWriter.java Thu Jan  5 23:23:15 2012
@@ -113,6 +113,7 @@ public class TokenInfoDictionaryWriter {
   }
   
   protected void writeTargetMap(String filename) throws IOException {
+    new File(filename).getParentFile().mkdirs();
     OutputStream os = new FileOutputStream(filename);
     try {
       os = new BufferedOutputStream(os);
@@ -149,6 +150,7 @@ public class TokenInfoDictionaryWriter {
   }
   
   protected void writeDictionary(String filename) throws IOException {
+    new File(filename).getParentFile().mkdirs();
     final FileOutputStream os = new FileOutputStream(filename);
     try {
       final DataOutput out = new OutputStreamDataOutput(os);