You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@labs.apache.org by ka...@apache.org on 2009/03/14 08:45:29 UTC

svn commit: r753612 - in /labs/bananadb/trunk: FILES.txt src/main/java/org/apache/labs/bananadb/hashtable/Hashtable.java src/test/java/org/apache/labs/bananadb/hashtable/Benchmark.java src/test/java/org/apache/labs/bananadb/hashtable/TestHashtable.java

Author: kalle
Date: Sat Mar 14 07:45:28 2009
New Revision: 753612

URL: http://svn.apache.org/viewvc?rev=753612&view=rev
Log:
BananaDB

0.2-SNAPSHOT
added rudamentary rehasing features

Modified:
    labs/bananadb/trunk/FILES.txt
    labs/bananadb/trunk/src/main/java/org/apache/labs/bananadb/hashtable/Hashtable.java
    labs/bananadb/trunk/src/test/java/org/apache/labs/bananadb/hashtable/Benchmark.java
    labs/bananadb/trunk/src/test/java/org/apache/labs/bananadb/hashtable/TestHashtable.java

Modified: labs/bananadb/trunk/FILES.txt
URL: http://svn.apache.org/viewvc/labs/bananadb/trunk/FILES.txt?rev=753612&r1=753611&r2=753612&view=diff
==============================================================================
--- labs/bananadb/trunk/FILES.txt (original)
+++ labs/bananadb/trunk/FILES.txt Sat Mar 14 07:45:28 2009
@@ -1,6 +1,6 @@
 = Files =
 
-== data.[0-9]+ ==
+== values.[0-9]+ ==
 
 Hashtable entity values, the serialized V part of the Hashtable<K, V>.
 There can be any number of these files, refered to as partitions in the code.
@@ -13,7 +13,7 @@
 
 }}}
 
-== key postings ==
+== keys ==
 
 Hashtable key postings list, the serialized K part of the Hashtable<K, V>.
 If keys share the same hash code,

Modified: labs/bananadb/trunk/src/main/java/org/apache/labs/bananadb/hashtable/Hashtable.java
URL: http://svn.apache.org/viewvc/labs/bananadb/trunk/src/main/java/org/apache/labs/bananadb/hashtable/Hashtable.java?rev=753612&r1=753611&r2=753612&view=diff
==============================================================================
--- labs/bananadb/trunk/src/main/java/org/apache/labs/bananadb/hashtable/Hashtable.java (original)
+++ labs/bananadb/trunk/src/main/java/org/apache/labs/bananadb/hashtable/Hashtable.java Sat Mar 14 07:45:28 2009
@@ -29,7 +29,7 @@
 
 /**
  * Hashtable<K, V> that lives on local disk via random access file.
- *
+ * <p/>
  * It does not handle null keys but allows null values.
  * It is limited to a maximum number of entities, defined when opening (formatting) the map.
  * <p/>
@@ -41,7 +41,7 @@
 public class Hashtable<K, V> {
 
   private Log log = LogFactory.getLog(Hashtable.class);
-  
+
   private static final int HASHTABLE_RECORD_BYTE_SIZE = 1 + 4;
 
   /**
@@ -64,7 +64,9 @@
 
   private KeyClassHandler<K> keyClassHandler;
 
-  /** size of a key posting in the key postings file */
+  /**
+   * size of a key posting in the key postings file
+   */
   private int keyPostingByteSize;
 
   private ValueClassHandler<V> valueClassHandler;
@@ -76,7 +78,8 @@
 
   private long lockWaitTimeoutMilliseconds = 10000;
   private Lock writeLock;
-  private LockFactory lockFactory;
+  private Lock readLock;
+
 
   private void setNextKeyPostingOffset(int nextKeyPostingOffset) {
     this.nextKeyPostingOffset = nextKeyPostingOffset;
@@ -94,7 +97,7 @@
   private List<HashtableAccessor> accessors = new ArrayList<HashtableAccessor>();
 
   /**
-   * @param readOnly 
+   * @param readOnly
    * @return A new accessor used to seek in the hashtable.
    * @throws IOException
    */
@@ -109,26 +112,26 @@
   }
 
 
-
   /**
    * Open or create a hashtable.
-   *
+   * <p/>
    * A hashtable that contains about the number of items as defined by parameter capacity
    * will be about 8 times slower to access (random read and random write)
    * than a hashtable with a capacity of 8 times greater than the number of entities it can store.
    * This values seems to be rather linear.
-   *
+   * <p/>
    * Milage might vary depending on distribution of key value hash codes.
    *
-   * @param path Directory where hashtable data is stored. Only one hashtable can fit in this directory.
-   * @param capacity Maximum number of entites that fits this hashtable. This value is only valid when creating a new hashtable.
+   * @param path              Directory where hashtable data is stored. Only one hashtable can fit in this directory.
+   * @param capacity          Maximum number of entites that fits this hashtable. This value is only valid when creating a new hashtable.
    * @param keyClassHandler
    * @param valueClassHandler
    * @throws IOException
    */
   public Hashtable(File path, int capacity, KeyClassHandler<K> keyClassHandler, ValueClassHandler<V> valueClassHandler, LockFactory lockFactory) throws IOException {
 
-    writeLock = lockFactory.makeLock("lock");
+    writeLock = lockFactory.makeLock("write");
+    readLock = lockFactory.makeLock("read");
 
     this.keyClassHandler = keyClassHandler;
     this.valueClassHandler = valueClassHandler;
@@ -180,7 +183,7 @@
 
     File[] valueFiles = path.listFiles(new FileFilter() {
       public boolean accept(File file) {
-        return file.isFile() && file.getName().matches(valuePostingsFileName+"\\.[0-9]+");
+        return file.isFile() && file.getName().matches(valuePostingsFileName + "\\.[0-9]+");
       }
     });
     Arrays.sort(valueFiles, new Comparator<File>() {
@@ -193,7 +196,7 @@
     if (valueFiles.length > 0) {
       for (int i = 0; i < valueFiles.length; i++) {
         if (!valueFiles[i].getName().endsWith("." + i)) {
-          throw new FileNotFoundException("Expected partition "+valuePostingsFileName+"." + i + " but found " + valueFiles[i].getName());
+          throw new FileNotFoundException("Expected partition " + valuePostingsFileName + "." + i + " but found " + valueFiles[i].getName());
         }
         Partition partition = new Partition(i);
         partition.open();
@@ -210,7 +213,6 @@
   }
 
   /**
-   *
    * @param accessor
    * @param key
    * @param value
@@ -218,16 +220,19 @@
    * @throws IOException
    */
   public V put(final HashtableAccessor accessor, final K key, final V value) throws IOException {
-    Lock.With<V> with = new Lock.With<V>(writeLock, lockWaitTimeoutMilliseconds) {
-      protected V doBody() throws IOException {
-        return doPut(accessor, key, value);
-      }
-    };
-    return with.run();
+    if (writeLock == null) {
+      return doPut(accessor, key, value);
+    } else {
+      Lock.With<V> with = new Lock.With<V>(writeLock, lockWaitTimeoutMilliseconds) {
+        protected V doBody() throws IOException {
+          return doPut(accessor, key, value);
+        }
+      };
+      return with.run();
+    }
   }
 
   /**
-   *
    * @param accessor
    * @param key
    * @param value
@@ -258,10 +263,8 @@
     int index = keyHash & (capacity - 1);
     byte[] hashtableRecordBuf = new byte[HASHTABLE_RECORD_BYTE_SIZE];
     accessor.getHashtableRAF().seek(index * HASHTABLE_RECORD_BYTE_SIZE);
-    accessor.getHashtableRAF().read(hashtableRecordBuf);
-    DataInput in = new DataInputStream(new ByteArrayInputStream(hashtableRecordBuf));
 
-    if (in.readByte() == 0) {
+    if (accessor.getHashtableRAF().readByte() == 0) {
       // create inital posting for hash
 
       int keyPostingOffset = getNextKeyPostingOffset();
@@ -283,7 +286,7 @@
 
       // already contains a posting for the hash
 
-      int firstKeyPostingOffset = in.readInt();
+      int firstKeyPostingOffset = accessor.getHashtableRAF().readInt();
 
       K compareKey;
 
@@ -327,14 +330,14 @@
       // although it shares hash with some other keys in there.
 
       // create new key posting and point at it from the previous key posting
-      int KeyPostingOffset = getNextKeyPostingOffset();
+      int keyPostingOffset = getNextKeyPostingOffset();
       setNextKeyPostingOffset(getNextKeyPostingOffset() + keyPostingByteSize);
 
-      accessor.getKeyPostingsRAF().seek(KeyPostingOffset);
+      accessor.getKeyPostingsRAF().seek(keyPostingOffset);
       writePostings(accessor, -1, keyBuf, valueBuf, keyHash);
 
       accessor.getKeyPostingsRAF().seek(previousKeyPostingOffset);
-      accessor.getKeyPostingsRAF().writeInt(KeyPostingOffset);
+      accessor.getKeyPostingsRAF().writeInt(keyPostingOffset);
 
       accessor.getKeyPostingsRAF().seek(4);
       accessor.getKeyPostingsRAF().writeInt(++entityCount);
@@ -383,13 +386,12 @@
       accessor.getKeyPostingsRAF().write(keyBuf);
 
       // key    hash
-      accessor.getKeyPostingsRAF().write(keyHashCode);
+      accessor.getKeyPostingsRAF().writeInt(keyHashCode);
     }
 
   }
 
   /**
-   *
    * @param accessor
    * @param key
    * @return true if hashtable contains a posting for the given key.
@@ -428,7 +430,7 @@
 //        return false;
 //      }
       int partitionOffset = accessor.getKeyPostingsRAF().readInt();
-      
+
       int keySize = accessor.getKeyPostingsRAF().readInt();
       if (keySize > 0) {
         byte[] compareKeyBuf = new byte[keySize];
@@ -568,6 +570,191 @@
     throw new RuntimeException("Expected to find the value in data file!");
   }
 
+  public void rehash(final int capacity) throws IOException {
+    if (writeLock == null) {
+      doRehash(capacity);
+    } else {
+      Lock.With<V> with = new Lock.With<V>(writeLock, lockWaitTimeoutMilliseconds) {
+        protected V doBody() throws IOException {
+          doRehash(capacity);
+          return null;
+        }
+      };
+      with.run();
+    }
+  }
+
+  private void doRehash(final int capacity) throws IOException {
+    final File hashtableFile = new File(path, "hashtable.rehash");
+    format(hashtableFile, capacity * HASHTABLE_RECORD_BYTE_SIZE);
+
+    final File keyPostingsFile = new File(path, "keys.rehash");
+    format(keyPostingsFile, KEY_POSTINGS_HEADER_BYTE_SIZE + (capacity * keyPostingByteSize));
+
+    RandomAccessFile hashtableOutput = new RandomAccessFile(hashtableFile, "rw");
+
+    int read;
+
+    RandomAccessFile keyPostingsInput = new RandomAccessFile(this.keyPostingsFile, "r");
+    RandomAccessFile keyPostingsOutput = new RandomAccessFile(keyPostingsFile, "rw");
+
+    byte[] buf = new byte[KEY_POSTINGS_HEADER_BYTE_SIZE];
+    read = keyPostingsInput.read(buf);
+    if (read != KEY_POSTINGS_HEADER_BYTE_SIZE) {
+      throw new IOException("Expected " + KEY_POSTINGS_HEADER_BYTE_SIZE + " bytes long key postings header");
+    }
+    keyPostingsOutput.write(buf);
+
+    int nextKeyListPostingOffset;
+    int valuePartition;
+    int valuePartitionOffset;
+
+    byte[] keyBuf = new byte[keyClassHandler.getByteSize()];
+    int keyHash;
+
+    int nextAvailableKeyPostingOffset = KEY_POSTINGS_HEADER_BYTE_SIZE;
+
+    for (int inputKeyPostingIndex = 0; inputKeyPostingIndex < size(); inputKeyPostingIndex++) {
+
+
+      //  int         offset position in this file to the next entry with the same hash, -1 = null
+      nextKeyListPostingOffset = keyPostingsInput.readInt();
+      //  int         value partition number, -1 = null value
+      valuePartition = keyPostingsInput.readInt();
+      //  int         offset position in value partition, -1 = null
+      valuePartitionOffset = keyPostingsInput.readInt();
+      //  int         key length in bytes, 0 = null
+      keyPostingsInput.readInt();
+      //  byte[]      serialized key (if not null)
+      keyPostingsInput.read(keyBuf);
+      //  int         key hash, will be used for future rehashing
+      keyHash = keyPostingsInput.readInt();
+
+
+      int index = keyHash & (capacity - 1);
+      hashtableOutput.seek(index * HASHTABLE_RECORD_BYTE_SIZE);
+
+      if (hashtableOutput.readByte() == 0) {
+        // create inital posting for hash
+        int keyPostingOffset = nextAvailableKeyPostingOffset;
+        nextAvailableKeyPostingOffset += keyPostingByteSize;
+
+        keyPostingsOutput.seek(keyPostingOffset);
+        // int    offset position in this file to the next key posting with the same hash, -1 = null
+        keyPostingsOutput.writeInt(-1);
+
+        // int    value partition number, -1 == null
+        keyPostingsOutput.writeInt(valuePartition);
+
+        // int    offset position in value partition, -1 == null
+        keyPostingsOutput.writeInt((int) valuePartitionOffset);
+
+        // int    key length in bytes, 0 = null
+        if (keyBuf == null || keyBuf.length == 0) {
+          keyPostingsOutput.writeInt(0);
+        } else {
+          keyPostingsOutput.writeInt(keyBuf.length);
+
+          // key    writable key
+          keyPostingsOutput.write(keyBuf);
+
+          // key    hash
+          keyPostingsOutput.write(keyHash);
+        }
+
+        hashtableOutput.seek(index * HASHTABLE_RECORD_BYTE_SIZE);
+        hashtableOutput.writeByte(1);
+        hashtableOutput.writeInt(keyPostingOffset);
+
+      } else {
+        // already contains a posting for the hash
+
+        int firstKeyPostingOffset = hashtableOutput.readInt();
+
+        K compareKey;
+
+        int previousKeyPostingOffset = -1;
+        int nextKeyPostingOffset = firstKeyPostingOffset;
+        while (nextKeyPostingOffset != -1) {
+          int keyPostingOffset = nextKeyPostingOffset;
+
+          keyPostingsOutput.seek(keyPostingOffset);
+
+          nextKeyPostingOffset = keyPostingsOutput.readInt();
+          int partitionNumber = keyPostingsOutput.readInt();
+          int partitionOffset = keyPostingsOutput.readInt();
+          int keySize = keyPostingsOutput.readInt();
+          previousKeyPostingOffset = keyPostingOffset;
+
+        }
+
+        // this key instance did not exist in entries
+        // although it shares hash with some other keys in there.
+
+        // create new key posting and point at it from the previous key posting
+        int keyPostingOffset = nextAvailableKeyPostingOffset;
+        nextAvailableKeyPostingOffset += keyPostingByteSize;
+
+        keyPostingsOutput.seek(keyPostingOffset);
+        // int    offset position in this file to the next key posting with the same hash, -1 = null
+        keyPostingsOutput.writeInt(-1);
+
+        // int    value partition number, -1 == null
+        keyPostingsOutput.writeInt(valuePartition);
+
+        // int    offset position in value partition, -1 == null
+        keyPostingsOutput.writeInt((int) valuePartitionOffset);
+
+        // int    key length in bytes, 0 = null
+        if (keyBuf == null || keyBuf.length == 0) {
+          keyPostingsOutput.writeInt(0);
+        } else {
+          keyPostingsOutput.writeInt(keyBuf.length);
+
+          // key    writable key
+          keyPostingsOutput.write(keyBuf);
+
+          // key    hash
+          keyPostingsOutput.write(keyHash);
+        }
+
+        keyPostingsOutput.seek(previousKeyPostingOffset);
+        keyPostingsOutput.writeInt(keyPostingOffset);
+
+      }
+    }
+
+    keyPostingsInput.close();
+    keyPostingsOutput.close();
+
+    hashtableOutput.close();
+
+    Lock.With with = new Lock.With(readLock, lockWaitTimeoutMilliseconds) {
+      protected Object doBody() throws IOException {
+
+        // lock all accessors for reading
+
+        Hashtable.this.hashtableFile.delete();
+        hashtableFile.renameTo(Hashtable.this.hashtableFile);
+
+        Hashtable.this.keyPostingsFile.delete();
+        keyPostingsFile.renameTo(Hashtable.this.keyPostingsFile);
+
+        Hashtable.this.capacity = capacity;
+
+        // todo reopen all accessor RAFs of hashtable and keys file
+
+        // unlock all accessors for reading
+
+        return null;
+      }
+    };
+    with.run();
+
+
+    System.currentTimeMillis();
+
+  }
 
   /**
    * Represents an hashtable value data partition file.
@@ -768,13 +955,13 @@
 
   /**
    * A Cursor iterates the entities in a Hashtable.
-   *
+   * <p/>
    * It is NOT thread safe! I.e. you can not use multiple threads to iterate the
    * same cursor at the same time. TODO: fix that. Multiple solutions available.
-   *
+   * <p/>
    * The entities are returned close to the order of the hash code, but not always.
    * The greater the capacity of a hashtable the more precise the order.
-   *
+   * <p/>
    * If another accessor is adding a new entity to the hashtable and the cursor already have
    * iterated passed the hash code of the new entity key, this item will not be iterated,
    * however if the hash code of the new entity key is greater than the current entity key
@@ -796,6 +983,7 @@
 
     /**
      * Attempts to move the curors one step forward.
+     *
      * @return true if cursor moved to next entity, false if no more entries.
      * @throws IOException
      */

Modified: labs/bananadb/trunk/src/test/java/org/apache/labs/bananadb/hashtable/Benchmark.java
URL: http://svn.apache.org/viewvc/labs/bananadb/trunk/src/test/java/org/apache/labs/bananadb/hashtable/Benchmark.java?rev=753612&r1=753611&r2=753612&view=diff
==============================================================================
--- labs/bananadb/trunk/src/test/java/org/apache/labs/bananadb/hashtable/Benchmark.java (original)
+++ labs/bananadb/trunk/src/test/java/org/apache/labs/bananadb/hashtable/Benchmark.java Sat Mar 14 07:45:28 2009
@@ -25,7 +25,7 @@
 
     Benchmark benchmark = new Benchmark();
 
-    for (int capacityfactor = 1; capacityfactor <= 10; capacityfactor += 2) {
+    for (int capacityfactor = 8; capacityfactor <= 10; capacityfactor += 2) {
       for (int items = 1000; items <= 1000000; items *= 1.5) {
         benchmark.test(items, capacityfactor * items, csv);
         System.out.println("-----");

Modified: labs/bananadb/trunk/src/test/java/org/apache/labs/bananadb/hashtable/TestHashtable.java
URL: http://svn.apache.org/viewvc/labs/bananadb/trunk/src/test/java/org/apache/labs/bananadb/hashtable/TestHashtable.java?rev=753612&r1=753611&r2=753612&view=diff
==============================================================================
--- labs/bananadb/trunk/src/test/java/org/apache/labs/bananadb/hashtable/TestHashtable.java (original)
+++ labs/bananadb/trunk/src/test/java/org/apache/labs/bananadb/hashtable/TestHashtable.java Sat Mar 14 07:45:28 2009
@@ -47,6 +47,44 @@
   }
 
 
+
+
+  @Test
+  public void testRehash() throws Exception {
+
+    File path = new File(this.path, "testRehash");
+
+    Hashtable<Integer, String> map = new Hashtable<Integer, String>(path, 100, new IntegerHandler(), new StringValueHandler());
+    HashtableAccessor accessor = map.createAccessor(false);
+
+    map.put(accessor, 0, "hello world");
+    map.put(accessor, 1, "greetings world");
+    map.put(accessor, 5, "salutations world");
+
+    accessor.close();
+
+    map.rehash(1000);
+
+    assertEquals(3, map.size());
+
+    accessor = map.createAccessor(false);
+    assertEquals("hello world", map.get(accessor, 0));
+    assertEquals("greetings world", map.get(accessor, 1));
+    assertEquals("salutations world", map.get(accessor, 5));
+
+    assertEquals(3, map.size());
+
+    // todo cursor
+
+    accessor.close();
+
+    // todo close and open again, then assert the same post-rehash tests once again
+
+
+
+  }
+
+
   private class TestThreadedRunnable implements Runnable {
     private Hashtable<Integer, String> map;
     private HashtableAccessor accessor;



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@labs.apache.org
For additional commands, e-mail: commits-help@labs.apache.org