You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@labs.apache.org by ka...@apache.org on 2009/03/14 08:45:29 UTC
svn commit: r753612 - in /labs/bananadb/trunk: FILES.txt
src/main/java/org/apache/labs/bananadb/hashtable/Hashtable.java
src/test/java/org/apache/labs/bananadb/hashtable/Benchmark.java
src/test/java/org/apache/labs/bananadb/hashtable/TestHashtable.java
Author: kalle
Date: Sat Mar 14 07:45:28 2009
New Revision: 753612
URL: http://svn.apache.org/viewvc?rev=753612&view=rev
Log:
BananaDB
0.2-SNAPSHOT
added rudamentary rehasing features
Modified:
labs/bananadb/trunk/FILES.txt
labs/bananadb/trunk/src/main/java/org/apache/labs/bananadb/hashtable/Hashtable.java
labs/bananadb/trunk/src/test/java/org/apache/labs/bananadb/hashtable/Benchmark.java
labs/bananadb/trunk/src/test/java/org/apache/labs/bananadb/hashtable/TestHashtable.java
Modified: labs/bananadb/trunk/FILES.txt
URL: http://svn.apache.org/viewvc/labs/bananadb/trunk/FILES.txt?rev=753612&r1=753611&r2=753612&view=diff
==============================================================================
--- labs/bananadb/trunk/FILES.txt (original)
+++ labs/bananadb/trunk/FILES.txt Sat Mar 14 07:45:28 2009
@@ -1,6 +1,6 @@
= Files =
-== data.[0-9]+ ==
+== values.[0-9]+ ==
Hashtable entity values, the serialized V part of the Hashtable<K, V>.
There can be any number of these files, refered to as partitions in the code.
@@ -13,7 +13,7 @@
}}}
-== key postings ==
+== keys ==
Hashtable key postings list, the serialized K part of the Hashtable<K, V>.
If keys share the same hash code,
Modified: labs/bananadb/trunk/src/main/java/org/apache/labs/bananadb/hashtable/Hashtable.java
URL: http://svn.apache.org/viewvc/labs/bananadb/trunk/src/main/java/org/apache/labs/bananadb/hashtable/Hashtable.java?rev=753612&r1=753611&r2=753612&view=diff
==============================================================================
--- labs/bananadb/trunk/src/main/java/org/apache/labs/bananadb/hashtable/Hashtable.java (original)
+++ labs/bananadb/trunk/src/main/java/org/apache/labs/bananadb/hashtable/Hashtable.java Sat Mar 14 07:45:28 2009
@@ -29,7 +29,7 @@
/**
* Hashtable<K, V> that lives on local disk via random access file.
- *
+ * <p/>
* It does not handle null keys but allows null values.
* It is limited to a maximum number of entities, defined when opening (formatting) the map.
* <p/>
@@ -41,7 +41,7 @@
public class Hashtable<K, V> {
private Log log = LogFactory.getLog(Hashtable.class);
-
+
private static final int HASHTABLE_RECORD_BYTE_SIZE = 1 + 4;
/**
@@ -64,7 +64,9 @@
private KeyClassHandler<K> keyClassHandler;
- /** size of a key posting in the key postings file */
+ /**
+ * size of a key posting in the key postings file
+ */
private int keyPostingByteSize;
private ValueClassHandler<V> valueClassHandler;
@@ -76,7 +78,8 @@
private long lockWaitTimeoutMilliseconds = 10000;
private Lock writeLock;
- private LockFactory lockFactory;
+ private Lock readLock;
+
private void setNextKeyPostingOffset(int nextKeyPostingOffset) {
this.nextKeyPostingOffset = nextKeyPostingOffset;
@@ -94,7 +97,7 @@
private List<HashtableAccessor> accessors = new ArrayList<HashtableAccessor>();
/**
- * @param readOnly
+ * @param readOnly
* @return A new accessor used to seek in the hashtable.
* @throws IOException
*/
@@ -109,26 +112,26 @@
}
-
/**
* Open or create a hashtable.
- *
+ * <p/>
* A hashtable that contains about the number of items as defined by parameter capacity
* will be about 8 times slower to access (random read and random write)
* than a hashtable with a capacity of 8 times greater than the number of entities it can store.
* This values seems to be rather linear.
- *
+ * <p/>
* Milage might vary depending on distribution of key value hash codes.
*
- * @param path Directory where hashtable data is stored. Only one hashtable can fit in this directory.
- * @param capacity Maximum number of entites that fits this hashtable. This value is only valid when creating a new hashtable.
+ * @param path Directory where hashtable data is stored. Only one hashtable can fit in this directory.
+ * @param capacity Maximum number of entites that fits this hashtable. This value is only valid when creating a new hashtable.
* @param keyClassHandler
* @param valueClassHandler
* @throws IOException
*/
public Hashtable(File path, int capacity, KeyClassHandler<K> keyClassHandler, ValueClassHandler<V> valueClassHandler, LockFactory lockFactory) throws IOException {
- writeLock = lockFactory.makeLock("lock");
+ writeLock = lockFactory.makeLock("write");
+ readLock = lockFactory.makeLock("read");
this.keyClassHandler = keyClassHandler;
this.valueClassHandler = valueClassHandler;
@@ -180,7 +183,7 @@
File[] valueFiles = path.listFiles(new FileFilter() {
public boolean accept(File file) {
- return file.isFile() && file.getName().matches(valuePostingsFileName+"\\.[0-9]+");
+ return file.isFile() && file.getName().matches(valuePostingsFileName + "\\.[0-9]+");
}
});
Arrays.sort(valueFiles, new Comparator<File>() {
@@ -193,7 +196,7 @@
if (valueFiles.length > 0) {
for (int i = 0; i < valueFiles.length; i++) {
if (!valueFiles[i].getName().endsWith("." + i)) {
- throw new FileNotFoundException("Expected partition "+valuePostingsFileName+"." + i + " but found " + valueFiles[i].getName());
+ throw new FileNotFoundException("Expected partition " + valuePostingsFileName + "." + i + " but found " + valueFiles[i].getName());
}
Partition partition = new Partition(i);
partition.open();
@@ -210,7 +213,6 @@
}
/**
- *
* @param accessor
* @param key
* @param value
@@ -218,16 +220,19 @@
* @throws IOException
*/
public V put(final HashtableAccessor accessor, final K key, final V value) throws IOException {
- Lock.With<V> with = new Lock.With<V>(writeLock, lockWaitTimeoutMilliseconds) {
- protected V doBody() throws IOException {
- return doPut(accessor, key, value);
- }
- };
- return with.run();
+ if (writeLock == null) {
+ return doPut(accessor, key, value);
+ } else {
+ Lock.With<V> with = new Lock.With<V>(writeLock, lockWaitTimeoutMilliseconds) {
+ protected V doBody() throws IOException {
+ return doPut(accessor, key, value);
+ }
+ };
+ return with.run();
+ }
}
/**
- *
* @param accessor
* @param key
* @param value
@@ -258,10 +263,8 @@
int index = keyHash & (capacity - 1);
byte[] hashtableRecordBuf = new byte[HASHTABLE_RECORD_BYTE_SIZE];
accessor.getHashtableRAF().seek(index * HASHTABLE_RECORD_BYTE_SIZE);
- accessor.getHashtableRAF().read(hashtableRecordBuf);
- DataInput in = new DataInputStream(new ByteArrayInputStream(hashtableRecordBuf));
- if (in.readByte() == 0) {
+ if (accessor.getHashtableRAF().readByte() == 0) {
// create inital posting for hash
int keyPostingOffset = getNextKeyPostingOffset();
@@ -283,7 +286,7 @@
// already contains a posting for the hash
- int firstKeyPostingOffset = in.readInt();
+ int firstKeyPostingOffset = accessor.getHashtableRAF().readInt();
K compareKey;
@@ -327,14 +330,14 @@
// although it shares hash with some other keys in there.
// create new key posting and point at it from the previous key posting
- int KeyPostingOffset = getNextKeyPostingOffset();
+ int keyPostingOffset = getNextKeyPostingOffset();
setNextKeyPostingOffset(getNextKeyPostingOffset() + keyPostingByteSize);
- accessor.getKeyPostingsRAF().seek(KeyPostingOffset);
+ accessor.getKeyPostingsRAF().seek(keyPostingOffset);
writePostings(accessor, -1, keyBuf, valueBuf, keyHash);
accessor.getKeyPostingsRAF().seek(previousKeyPostingOffset);
- accessor.getKeyPostingsRAF().writeInt(KeyPostingOffset);
+ accessor.getKeyPostingsRAF().writeInt(keyPostingOffset);
accessor.getKeyPostingsRAF().seek(4);
accessor.getKeyPostingsRAF().writeInt(++entityCount);
@@ -383,13 +386,12 @@
accessor.getKeyPostingsRAF().write(keyBuf);
// key hash
- accessor.getKeyPostingsRAF().write(keyHashCode);
+ accessor.getKeyPostingsRAF().writeInt(keyHashCode);
}
}
/**
- *
* @param accessor
* @param key
* @return true if hashtable contains a posting for the given key.
@@ -428,7 +430,7 @@
// return false;
// }
int partitionOffset = accessor.getKeyPostingsRAF().readInt();
-
+
int keySize = accessor.getKeyPostingsRAF().readInt();
if (keySize > 0) {
byte[] compareKeyBuf = new byte[keySize];
@@ -568,6 +570,191 @@
throw new RuntimeException("Expected to find the value in data file!");
}
+ public void rehash(final int capacity) throws IOException {
+ if (writeLock == null) {
+ doRehash(capacity);
+ } else {
+ Lock.With<V> with = new Lock.With<V>(writeLock, lockWaitTimeoutMilliseconds) {
+ protected V doBody() throws IOException {
+ doRehash(capacity);
+ return null;
+ }
+ };
+ with.run();
+ }
+ }
+
+ private void doRehash(final int capacity) throws IOException {
+ final File hashtableFile = new File(path, "hashtable.rehash");
+ format(hashtableFile, capacity * HASHTABLE_RECORD_BYTE_SIZE);
+
+ final File keyPostingsFile = new File(path, "keys.rehash");
+ format(keyPostingsFile, KEY_POSTINGS_HEADER_BYTE_SIZE + (capacity * keyPostingByteSize));
+
+ RandomAccessFile hashtableOutput = new RandomAccessFile(hashtableFile, "rw");
+
+ int read;
+
+ RandomAccessFile keyPostingsInput = new RandomAccessFile(this.keyPostingsFile, "r");
+ RandomAccessFile keyPostingsOutput = new RandomAccessFile(keyPostingsFile, "rw");
+
+ byte[] buf = new byte[KEY_POSTINGS_HEADER_BYTE_SIZE];
+ read = keyPostingsInput.read(buf);
+ if (read != KEY_POSTINGS_HEADER_BYTE_SIZE) {
+ throw new IOException("Expected " + KEY_POSTINGS_HEADER_BYTE_SIZE + " bytes long key postings header");
+ }
+ keyPostingsOutput.write(buf);
+
+ int nextKeyListPostingOffset;
+ int valuePartition;
+ int valuePartitionOffset;
+
+ byte[] keyBuf = new byte[keyClassHandler.getByteSize()];
+ int keyHash;
+
+ int nextAvailableKeyPostingOffset = KEY_POSTINGS_HEADER_BYTE_SIZE;
+
+ for (int inputKeyPostingIndex = 0; inputKeyPostingIndex < size(); inputKeyPostingIndex++) {
+
+
+ // int offset position in this file to the next entry with the same hash, -1 = null
+ nextKeyListPostingOffset = keyPostingsInput.readInt();
+ // int value partition number, -1 = null value
+ valuePartition = keyPostingsInput.readInt();
+ // int offset position in value partition, -1 = null
+ valuePartitionOffset = keyPostingsInput.readInt();
+ // int key length in bytes, 0 = null
+ keyPostingsInput.readInt();
+ // byte[] serialized key (if not null)
+ keyPostingsInput.read(keyBuf);
+ // int key hash, will be used for future rehashing
+ keyHash = keyPostingsInput.readInt();
+
+
+ int index = keyHash & (capacity - 1);
+ hashtableOutput.seek(index * HASHTABLE_RECORD_BYTE_SIZE);
+
+ if (hashtableOutput.readByte() == 0) {
+ // create inital posting for hash
+ int keyPostingOffset = nextAvailableKeyPostingOffset;
+ nextAvailableKeyPostingOffset += keyPostingByteSize;
+
+ keyPostingsOutput.seek(keyPostingOffset);
+ // int offset position in this file to the next key posting with the same hash, -1 = null
+ keyPostingsOutput.writeInt(-1);
+
+ // int value partition number, -1 == null
+ keyPostingsOutput.writeInt(valuePartition);
+
+ // int offset position in value partition, -1 == null
+ keyPostingsOutput.writeInt((int) valuePartitionOffset);
+
+ // int key length in bytes, 0 = null
+ if (keyBuf == null || keyBuf.length == 0) {
+ keyPostingsOutput.writeInt(0);
+ } else {
+ keyPostingsOutput.writeInt(keyBuf.length);
+
+ // key writable key
+ keyPostingsOutput.write(keyBuf);
+
+ // key hash
+ keyPostingsOutput.write(keyHash);
+ }
+
+ hashtableOutput.seek(index * HASHTABLE_RECORD_BYTE_SIZE);
+ hashtableOutput.writeByte(1);
+ hashtableOutput.writeInt(keyPostingOffset);
+
+ } else {
+ // already contains a posting for the hash
+
+ int firstKeyPostingOffset = hashtableOutput.readInt();
+
+ K compareKey;
+
+ int previousKeyPostingOffset = -1;
+ int nextKeyPostingOffset = firstKeyPostingOffset;
+ while (nextKeyPostingOffset != -1) {
+ int keyPostingOffset = nextKeyPostingOffset;
+
+ keyPostingsOutput.seek(keyPostingOffset);
+
+ nextKeyPostingOffset = keyPostingsOutput.readInt();
+ int partitionNumber = keyPostingsOutput.readInt();
+ int partitionOffset = keyPostingsOutput.readInt();
+ int keySize = keyPostingsOutput.readInt();
+ previousKeyPostingOffset = keyPostingOffset;
+
+ }
+
+ // this key instance did not exist in entries
+ // although it shares hash with some other keys in there.
+
+ // create new key posting and point at it from the previous key posting
+ int keyPostingOffset = nextAvailableKeyPostingOffset;
+ nextAvailableKeyPostingOffset += keyPostingByteSize;
+
+ keyPostingsOutput.seek(keyPostingOffset);
+ // int offset position in this file to the next key posting with the same hash, -1 = null
+ keyPostingsOutput.writeInt(-1);
+
+ // int value partition number, -1 == null
+ keyPostingsOutput.writeInt(valuePartition);
+
+ // int offset position in value partition, -1 == null
+ keyPostingsOutput.writeInt((int) valuePartitionOffset);
+
+ // int key length in bytes, 0 = null
+ if (keyBuf == null || keyBuf.length == 0) {
+ keyPostingsOutput.writeInt(0);
+ } else {
+ keyPostingsOutput.writeInt(keyBuf.length);
+
+ // key writable key
+ keyPostingsOutput.write(keyBuf);
+
+ // key hash
+ keyPostingsOutput.write(keyHash);
+ }
+
+ keyPostingsOutput.seek(previousKeyPostingOffset);
+ keyPostingsOutput.writeInt(keyPostingOffset);
+
+ }
+ }
+
+ keyPostingsInput.close();
+ keyPostingsOutput.close();
+
+ hashtableOutput.close();
+
+ Lock.With with = new Lock.With(readLock, lockWaitTimeoutMilliseconds) {
+ protected Object doBody() throws IOException {
+
+ // lock all accessors for reading
+
+ Hashtable.this.hashtableFile.delete();
+ hashtableFile.renameTo(Hashtable.this.hashtableFile);
+
+ Hashtable.this.keyPostingsFile.delete();
+ keyPostingsFile.renameTo(Hashtable.this.keyPostingsFile);
+
+ Hashtable.this.capacity = capacity;
+
+ // todo reopen all accessor RAFs of hashtable and keys file
+
+ // unlock all accessors for reading
+
+ return null;
+ }
+ };
+ with.run();
+
+
+ System.currentTimeMillis();
+
+ }
/**
* Represents an hashtable value data partition file.
@@ -768,13 +955,13 @@
/**
* A Cursor iterates the entities in a Hashtable.
- *
+ * <p/>
* It is NOT thread safe! I.e. you can not use multiple threads to iterate the
* same cursor at the same time. TODO: fix that. Multiple solutions available.
- *
+ * <p/>
* The entities are returned close to the order of the hash code, but not always.
* The greater the capacity of a hashtable the more precise the order.
- *
+ * <p/>
* If another accessor is adding a new entity to the hashtable and the cursor already have
* iterated passed the hash code of the new entity key, this item will not be iterated,
* however if the hash code of the new entity key is greater than the current entity key
@@ -796,6 +983,7 @@
/**
* Attempts to move the curors one step forward.
+ *
* @return true if cursor moved to next entity, false if no more entries.
* @throws IOException
*/
Modified: labs/bananadb/trunk/src/test/java/org/apache/labs/bananadb/hashtable/Benchmark.java
URL: http://svn.apache.org/viewvc/labs/bananadb/trunk/src/test/java/org/apache/labs/bananadb/hashtable/Benchmark.java?rev=753612&r1=753611&r2=753612&view=diff
==============================================================================
--- labs/bananadb/trunk/src/test/java/org/apache/labs/bananadb/hashtable/Benchmark.java (original)
+++ labs/bananadb/trunk/src/test/java/org/apache/labs/bananadb/hashtable/Benchmark.java Sat Mar 14 07:45:28 2009
@@ -25,7 +25,7 @@
Benchmark benchmark = new Benchmark();
- for (int capacityfactor = 1; capacityfactor <= 10; capacityfactor += 2) {
+ for (int capacityfactor = 8; capacityfactor <= 10; capacityfactor += 2) {
for (int items = 1000; items <= 1000000; items *= 1.5) {
benchmark.test(items, capacityfactor * items, csv);
System.out.println("-----");
Modified: labs/bananadb/trunk/src/test/java/org/apache/labs/bananadb/hashtable/TestHashtable.java
URL: http://svn.apache.org/viewvc/labs/bananadb/trunk/src/test/java/org/apache/labs/bananadb/hashtable/TestHashtable.java?rev=753612&r1=753611&r2=753612&view=diff
==============================================================================
--- labs/bananadb/trunk/src/test/java/org/apache/labs/bananadb/hashtable/TestHashtable.java (original)
+++ labs/bananadb/trunk/src/test/java/org/apache/labs/bananadb/hashtable/TestHashtable.java Sat Mar 14 07:45:28 2009
@@ -47,6 +47,44 @@
}
+
+
+ @Test
+ public void testRehash() throws Exception {
+
+ File path = new File(this.path, "testRehash");
+
+ Hashtable<Integer, String> map = new Hashtable<Integer, String>(path, 100, new IntegerHandler(), new StringValueHandler());
+ HashtableAccessor accessor = map.createAccessor(false);
+
+ map.put(accessor, 0, "hello world");
+ map.put(accessor, 1, "greetings world");
+ map.put(accessor, 5, "salutations world");
+
+ accessor.close();
+
+ map.rehash(1000);
+
+ assertEquals(3, map.size());
+
+ accessor = map.createAccessor(false);
+ assertEquals("hello world", map.get(accessor, 0));
+ assertEquals("greetings world", map.get(accessor, 1));
+ assertEquals("salutations world", map.get(accessor, 5));
+
+ assertEquals(3, map.size());
+
+ // todo cursor
+
+ accessor.close();
+
+ // todo close and open again, then assert the same post-rehash tests once again
+
+
+
+ }
+
+
private class TestThreadedRunnable implements Runnable {
private Hashtable<Integer, String> map;
private HashtableAccessor accessor;
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@labs.apache.org
For additional commands, e-mail: commits-help@labs.apache.org