You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@labs.apache.org by ka...@apache.org on 2009/03/12 20:51:35 UTC
svn commit: r752986 - in /labs/bananadb/trunk: BENCHMARK.txt FILES.txt
src/main/java/org/apache/labs/bananadb/hashtable/Hashtable.java
src/main/java/org/apache/labs/bananadb/hashtable/HashtableAccessor.java
Author: kalle
Date: Thu Mar 12 19:51:35 2009
New Revision: 752986
URL: http://svn.apache.org/viewvc?rev=752986&view=rev
Log:
BananDB
fixed typos
Added:
labs/bananadb/trunk/FILES.txt
Modified:
labs/bananadb/trunk/BENCHMARK.txt
labs/bananadb/trunk/src/main/java/org/apache/labs/bananadb/hashtable/Hashtable.java
labs/bananadb/trunk/src/main/java/org/apache/labs/bananadb/hashtable/HashtableAccessor.java
Modified: labs/bananadb/trunk/BENCHMARK.txt
URL: http://svn.apache.org/viewvc/labs/bananadb/trunk/BENCHMARK.txt?rev=752986&r1=752985&r2=752986&view=diff
==============================================================================
--- labs/bananadb/trunk/BENCHMARK.txt (original)
+++ labs/bananadb/trunk/BENCHMARK.txt Thu Mar 12 19:51:35 2009
@@ -52,7 +52,7 @@
86481 86481 14.944 0.0647 1.660
129721 129721 1.605 0.0861 0.169
-And then I gave up waiting.. The numbers are rather linear though, compare add see for your self.
+And then I gave up waiting.. The numbers are however rather linear if you compare capacity.
The test was made using class o.a.l.b.hashtable.Benchmark
Added: labs/bananadb/trunk/FILES.txt
URL: http://svn.apache.org/viewvc/labs/bananadb/trunk/FILES.txt?rev=752986&view=auto
==============================================================================
--- labs/bananadb/trunk/FILES.txt (added)
+++ labs/bananadb/trunk/FILES.txt Thu Mar 12 19:51:35 2009
@@ -0,0 +1,59 @@
+= Files =
+
+== data.[0-9]+ ==
+
+Hashtable entity values, the serialized V part of the Hashtable<K, V>.
+There can be any number of these files, refered to as partitions in the code.
+
+Posting data:
+{{{
+
+int value length in bytes. 0 = null
+byte[] serialized value
+
+}}}
+
+== entities ==
+
+Hashtable key entity list, the serialized K part of the Hashtable<K, V>.
+If keys share the same hash code,
+or if the capacity is not great enough for keys to get a unique posting the the hashtable
+entities will be added to a list that must be iterated in order to find the correct posting.
+
+This file contains metadata in the first 1024 bytes. This really ought to be factored out to a new file.
+Currently it only contains two values:
+{{{
+
+int next available entry offset
+int entity count
+
+}}}
+
+Posting data:
+{{{
+
+int offset position in this file to the next entry with the same hash, -1 = null
+int value partition number, -1 = null value
+int offset position in value partition, -1 = null
+int key length in bytes, 0 = null
+byte[] serialized key (if not null)
+int key hash, will be used for future rehashing
+
+}}}
+
+== hashtable ==
+
+The actual key entity hashtable, points at the first and last position in entities-file for the given hash.
+The position for any given key is calculated with: hash & (capacity - 1);
+
+Posting data:
+{{{
+
+byte 0 = no posting. Any other value means there is a posting.
+int offset position to first entity with this hash
+
+TODO: data below this line is not yet implemented
+int offset position to last entry with this hash, speeds up adding when there are many items sharing the same hash position
+
+}}}
+
Modified: labs/bananadb/trunk/src/main/java/org/apache/labs/bananadb/hashtable/Hashtable.java
URL: http://svn.apache.org/viewvc/labs/bananadb/trunk/src/main/java/org/apache/labs/bananadb/hashtable/Hashtable.java?rev=752986&r1=752985&r2=752986&view=diff
==============================================================================
--- labs/bananadb/trunk/src/main/java/org/apache/labs/bananadb/hashtable/Hashtable.java (original)
+++ labs/bananadb/trunk/src/main/java/org/apache/labs/bananadb/hashtable/Hashtable.java Thu Mar 12 19:51:35 2009
@@ -25,54 +25,20 @@
import java.util.*;
/**
- * Map<K, V> that lives on local disk via random access file.
+ * Hashtable<K, V> that lives on local disk via random access file.
* It is not thread safe.
* It can not handle null keys.
- * It is limited to a maximum number of entries, defined when opening (formatting) the map.
- * The entry values can however be of any size.
- * <p/>
- * It will need a entries partitioning scheme similar to the the data partitioning scheme
- * <p/>
- * <p/>
- * <p/>
- * multiple files contains the values <pk, value>
- * one file contains the entries <pk, key, pointer<above file, offset, length>, next entry position>
- * one file contains the hashtable <key hash, first entry pos in previous file>
- * <p/>
- * <p/>
- * hashtable.raw:
- * byte 0 = null
- * int offset position to first entry with this hash
- * <p/>
- * entries: (hashtable entry)
- * int offset position in this file to the next entry with the same hash, -1 = null
- * int value partition number, -1 == value is null
- * int offset position in value partition, -1 == null
- * int key length in bytes, 0 = null
- * key writable key (if not null)
- * int key hash, used for rehashing
- * <p/>
- * data.[0-9]+:
- * int value length in bytes, 0 = null
- * value writable value
- * <p/>
- * <p/>
- * <p/>
- * Entries file contains metadata in the first 1024 bytes.
- * <p/>
- * at 0: integer> next entry offset
- * at 4: integer> count
- * <p/>
+ * It is limited to a maximum number of entities, defined when opening (formatting) the map.
* <p/>
* todo: optimize
- * todo: overwrite old value in data file if possible when replacing a value. keep track of unused positions
* todo: rehash()
* todo: use partitioned space for entries and not just for value data
+ * todo: overwrite old value in data file if possible when replacing a value. keep track of unused positions
*/
public class Hashtable<K, V> {
private Log log = LogFactory.getLog(Hashtable.class);
-
+
private static final int HASHTABLE_RECORD_BYTE_SIZE = 1 + 4;
/**
@@ -95,25 +61,23 @@
private KeyClassHandler<K> keyClassHandler;
- /**
- * size of an entry in the hash file
- */
- private int entryByteSize;
+ /** size of an entity in the entity file */
+ private int entityByteSize;
private EntityClassHandler<V> valueClassHandler;
private int hashtableSize;
- private int entryCount;
- private int nextEntryOffset;
+ private int entityCount;
+ private int nextEntityOffset;
- private void setNextEntryOffset(int nextEntryOffset) {
- this.nextEntryOffset = nextEntryOffset;
+ private void setNextEntityOffset(int nextEntityOffset) {
+ this.nextEntityOffset = nextEntityOffset;
}
- private int getNextEntryOffset() {
- return nextEntryOffset;
+ private int getNextEntityOffset() {
+ return nextEntityOffset;
}
// // todo factor out RAF to new reader (transaction) class.
@@ -155,7 +119,7 @@
this.keyClassHandler = keyClassHandler;
this.valueClassHandler = valueClassHandler;
- entryByteSize = 4 + 4 + 4 + 4 + 4 + keyClassHandler.getByteSize();
+ entityByteSize = 4 + 4 + 4 + 4 + 4 + keyClassHandler.getByteSize();
this.path = path;
if (!path.exists()) {
@@ -180,20 +144,20 @@
log.info("Creating a new entries file");
- format(entriesFile, ENTRIES_HEADER_BYTE_SIZE + (capacity * entryByteSize));
- setNextEntryOffset(ENTRIES_HEADER_BYTE_SIZE);
+ format(entriesFile, ENTRIES_HEADER_BYTE_SIZE + (capacity * entityByteSize));
+ setNextEntityOffset(ENTRIES_HEADER_BYTE_SIZE);
RandomAccessFile entriesRAF = new RandomAccessFile(entriesFile, "rw");
entriesRAF.seek(0);
- entriesRAF.writeInt(getNextEntryOffset());
+ entriesRAF.writeInt(getNextEntityOffset());
entriesRAF.close();
} else {
RandomAccessFile entriesRAF = new RandomAccessFile(entriesFile, "r");
entriesRAF.seek(0);
- setNextEntryOffset(entriesRAF.readInt());
- entryCount = entriesRAF.readInt();
+ setNextEntityOffset(entriesRAF.readInt());
+ entityCount = entriesRAF.readInt();
entriesRAF.close();
}
@@ -266,18 +230,19 @@
if (in.readByte() == 0) {
// create posting for hash
- int entryOffset = getNextEntryOffset();
- setNextEntryOffset(getNextEntryOffset() + entryByteSize);
- accessor.getEntriesRAF().seek(0);
- accessor.getEntriesRAF().writeInt(getNextEntryOffset());
- accessor.getEntriesRAF().writeInt(++entryCount);
+ int entityOffset = getNextEntityOffset();
+ setNextEntityOffset(getNextEntityOffset() + entityByteSize);
+ accessor.getEntitiesRAF().seek(0);
+ accessor.getEntitiesRAF().writeInt(getNextEntityOffset());
+ accessor.getEntitiesRAF().writeInt(++entityCount);
- accessor.getEntriesRAF().seek(entryOffset);
- writeEntry(accessor, -1, keyBuf, valueBuf, keyHash);
+ accessor.getEntitiesRAF().seek(entityOffset);
+ writeEntity(accessor, -1, keyBuf, valueBuf, keyHash);
accessor.getHashtableRAF().seek(index * HASHTABLE_RECORD_BYTE_SIZE);
accessor.getHashtableRAF().writeByte(1);
- accessor.getHashtableRAF().writeInt(entryOffset);
+ accessor.getHashtableRAF().writeInt(entityOffset);
+ accessor.getHashtableRAF().writeInt(entityOffset);
return null;
@@ -285,24 +250,24 @@
// already contains a posting for the hash
- int firstEntryOffset = in.readInt();
+ int firstEntityOffset = in.readInt();
K compareKey;
- int previousEntryOffset = -1;
- int nextEntryOffset = firstEntryOffset;
- while (nextEntryOffset != -1) {
- int entryOffset = nextEntryOffset;
-
- accessor.getEntriesRAF().seek(entryOffset);
-
- nextEntryOffset = accessor.getEntriesRAF().readInt();
- int partitionNumber = accessor.getEntriesRAF().readInt();
- int partitionOffset = accessor.getEntriesRAF().readInt();
- int keySize = accessor.getEntriesRAF().readInt();
+ int previousEntityOffset = -1;
+ int nextEntityOffset = firstEntityOffset;
+ while (nextEntityOffset != -1) {
+ int entityOffset = nextEntityOffset;
+
+ accessor.getEntitiesRAF().seek(entityOffset);
+
+ nextEntityOffset = accessor.getEntitiesRAF().readInt();
+ int partitionNumber = accessor.getEntitiesRAF().readInt();
+ int partitionOffset = accessor.getEntitiesRAF().readInt();
+ int keySize = accessor.getEntitiesRAF().readInt();
if (keySize > 0) {
byte[] compareKeyBuf = new byte[keySize];
- accessor.getEntriesRAF().read(compareKeyBuf);
+ accessor.getEntitiesRAF().read(compareKeyBuf);
compareKey = keyClassHandler.read(new DataInputStream(new ByteArrayInputStream(compareKeyBuf)));
if (key.equals(compareKey)) {
@@ -313,40 +278,40 @@
oldValue = partition.read(accessor, partitionOffset);
}
- // over write old entry posting
- accessor.getEntriesRAF().seek(entryOffset);
- writeEntry(accessor, nextEntryOffset, keyBuf, valueBuf, keyHash);
+ // over write old entity posting
+ accessor.getEntitiesRAF().seek(entityOffset);
+ writeEntity(accessor, nextEntityOffset, keyBuf, valueBuf, keyHash);
return oldValue;
}
}
- previousEntryOffset = entryOffset;
+ previousEntityOffset = entityOffset;
}
// this key instance did not exist in entries
// although it shares hash with some other keys in there.
- // create new entry and point at it from the previous entry
- int entryOffset = getNextEntryOffset();
- setNextEntryOffset(getNextEntryOffset() + entryByteSize);
+ // create new entity and point at it from the previous entity
+ int entityOffset = getNextEntityOffset();
+ setNextEntityOffset(getNextEntityOffset() + entityByteSize);
- accessor.getEntriesRAF().seek(entryOffset);
- writeEntry(accessor, -1, keyBuf, valueBuf, keyHash);
+ accessor.getEntitiesRAF().seek(entityOffset);
+ writeEntity(accessor, -1, keyBuf, valueBuf, keyHash);
- accessor.getEntriesRAF().seek(previousEntryOffset);
- accessor.getEntriesRAF().writeInt(entryOffset);
+ accessor.getEntitiesRAF().seek(previousEntityOffset);
+ accessor.getEntitiesRAF().writeInt(entityOffset);
- accessor.getEntriesRAF().seek(4);
- accessor.getEntriesRAF().writeInt(++entryCount);
+ accessor.getEntitiesRAF().seek(4);
+ accessor.getEntitiesRAF().writeInt(++entityCount);
return null;
}
}
- private synchronized void writeEntry(HashtableAccessor accessor, int nextEntity, byte[] keyBuf, byte[] valueBuf, int keyHashCode) throws IOException {
+ private synchronized void writeEntity(HashtableAccessor accessor, int nextEntity, byte[] keyBuf, byte[] valueBuf, int keyHashCode) throws IOException {
int partitionNumber = -1;
long offset = -1;
@@ -366,26 +331,26 @@
offset = partition.append(accessor, valueBuf);
}
- // int offset position in this file to the next entry with the same hash, -1 = null
- accessor.getEntriesRAF().writeInt(nextEntity);
+ // int offset position in this file to the next entity with the same hash, -1 = null
+ accessor.getEntitiesRAF().writeInt(nextEntity);
// int value partition number, -1 == null
- accessor.getEntriesRAF().writeInt(partitionNumber);
+ accessor.getEntitiesRAF().writeInt(partitionNumber);
// int offset position in value partition, -1 == null
- accessor.getEntriesRAF().writeInt((int) offset);
+ accessor.getEntitiesRAF().writeInt((int) offset);
// int key length in bytes, 0 = null
if (keyBuf == null || keyBuf.length == 0) {
- accessor.getEntriesRAF().writeInt(0);
+ accessor.getEntitiesRAF().writeInt(0);
} else {
- accessor.getEntriesRAF().writeInt(keyBuf.length);
+ accessor.getEntitiesRAF().writeInt(keyBuf.length);
// key writable key
- accessor.getEntriesRAF().write(keyBuf);
+ accessor.getEntitiesRAF().write(keyBuf);
// key hash
- accessor.getEntriesRAF().write(keyHashCode);
+ accessor.getEntitiesRAF().write(keyHashCode);
}
}
@@ -403,32 +368,32 @@
return null;
}
- int firstEntryOffset = in.readInt();
+ int firstEntityOffset = in.readInt();
K compareKey;
// there was some thought behind this attribute but I cant seem to recall what..
- int previousEntryOffset = -1;
+ int previousEntityOffset = -1;
- int nextEntryOffset = firstEntryOffset;
- while (nextEntryOffset != -1) {
- int entryOffset = nextEntryOffset;
+ int nextEntityOffset = firstEntityOffset;
+ while (nextEntityOffset != -1) {
+ int entityOffset = nextEntityOffset;
- accessor.getEntriesRAF().seek(entryOffset);
- previousEntryOffset = entryOffset;
+ accessor.getEntitiesRAF().seek(entityOffset);
+ previousEntityOffset = entityOffset;
- nextEntryOffset = accessor.getEntriesRAF().readInt();
- int partitionNumber = accessor.getEntriesRAF().readInt();
+ nextEntityOffset = accessor.getEntitiesRAF().readInt();
+ int partitionNumber = accessor.getEntitiesRAF().readInt();
if (partitionNumber == -1) {
return null;
}
- int partitionOffset = accessor.getEntriesRAF().readInt();
- int keySize = accessor.getEntriesRAF().readInt();
+ int partitionOffset = accessor.getEntitiesRAF().readInt();
+ int keySize = accessor.getEntitiesRAF().readInt();
if (keySize > 0) {
byte[] compareKeyBuf = new byte[keySize];
- accessor.getEntriesRAF().read(compareKeyBuf);
+ accessor.getEntitiesRAF().read(compareKeyBuf);
compareKey = keyClassHandler.read(new DataInputStream(new ByteArrayInputStream(compareKeyBuf)));
if (key.equals(compareKey)) {
@@ -461,24 +426,24 @@
byte isnull = in.readByte();
- int firstEntryOffset = in.readInt();
+ int firstEntityOffset = in.readInt();
K compareKey;
- int previousEntryOffset = -1;
- int nextEntryOffset = firstEntryOffset;
- while (nextEntryOffset != -1) {
- int entryOffset = nextEntryOffset;
-
- accessor.getEntriesRAF().seek(entryOffset);
-
- nextEntryOffset = accessor.getEntriesRAF().readInt();
- int partitionNumber = accessor.getEntriesRAF().readInt();
- int partitionOffset = accessor.getEntriesRAF().readInt();
- int keySize = accessor.getEntriesRAF().readInt();
+ int previousEntityOffset = -1;
+ int nextEntityOffset = firstEntityOffset;
+ while (nextEntityOffset != -1) {
+ int entityOffset = nextEntityOffset;
+
+ accessor.getEntitiesRAF().seek(entityOffset);
+
+ nextEntityOffset = accessor.getEntitiesRAF().readInt();
+ int partitionNumber = accessor.getEntitiesRAF().readInt();
+ int partitionOffset = accessor.getEntitiesRAF().readInt();
+ int keySize = accessor.getEntitiesRAF().readInt();
if (keySize > 0) {
byte[] compareKeyBuf = new byte[keySize];
- accessor.getEntriesRAF().read(compareKeyBuf);
+ accessor.getEntitiesRAF().read(compareKeyBuf);
compareKey = keyClassHandler.read(new DataInputStream(new ByteArrayInputStream(compareKeyBuf)));
if (key.equals(compareKey)) {
@@ -491,24 +456,24 @@
oldValue = partition.read(accessor, partitionOffset);
}
- if (previousEntryOffset != -1) {
- // update offset in previous entry to point at next offset in removed entry
- accessor.getEntriesRAF().seek(previousEntryOffset);
- accessor.getEntriesRAF().writeInt(nextEntryOffset);
+ if (previousEntityOffset != -1) {
+ // update offset in previous entity to point at next offset in removed entity
+ accessor.getEntitiesRAF().seek(previousEntityOffset);
+ accessor.getEntitiesRAF().writeInt(nextEntityOffset);
} else {
// remove posting in hashtable
accessor.getHashtableRAF().seek(index * HASHTABLE_RECORD_BYTE_SIZE);
accessor.getHashtableRAF().writeByte(0);
}
- accessor.getEntriesRAF().seek(4);
- accessor.getEntriesRAF().writeInt(--entryCount);
+ accessor.getEntitiesRAF().seek(4);
+ accessor.getEntitiesRAF().writeInt(--entityCount);
return oldValue;
}
}
- previousEntryOffset = entryOffset;
+ previousEntityOffset = entityOffset;
}
@@ -631,7 +596,7 @@
public int size() {
- return entryCount;
+ return entityCount;
}
@@ -736,22 +701,22 @@
}
private long hashtableCursor = 0;
- private long entryCursor = -1;
+ private long entityCursor = -1;
private K key;
private V value;
/**
* Attempts to move the curors one step forward.
- * @return true if cursor moved to next entry, false if no more entries.
+ * @return true if cursor moved to next entity, false if no more entries.
* @throws IOException
*/
public boolean next() throws IOException {
- if (entryCursor != -1) {
- accessor.getEntriesRAF().seek(entryCursor);
- entryCursor = accessor.getEntriesRAF().readInt(); // next entry
- if (entryCursor != -1) {
- accessor.getEntriesRAF().seek(entryCursor + 4);
+ if (entityCursor != -1) {
+ accessor.getEntitiesRAF().seek(entityCursor);
+ entityCursor = accessor.getEntitiesRAF().readInt(); // next entity
+ if (entityCursor != -1) {
+ accessor.getEntitiesRAF().seek(entityCursor + 4);
set();
return true;
}
@@ -761,8 +726,8 @@
hashtableCursor += HASHTABLE_RECORD_BYTE_SIZE;
byte isnull = accessor.getHashtableRAF().readByte();
if (isnull != 0) {
- entryCursor = accessor.getHashtableRAF().readInt();
- accessor.getEntriesRAF().seek(entryCursor + 4);
+ entityCursor = accessor.getHashtableRAF().readInt();
+ accessor.getEntitiesRAF().seek(entityCursor + 4);
set();
return true;
@@ -773,10 +738,10 @@
}
private void set() throws IOException {
- int partitionNumber = accessor.getEntriesRAF().readInt();
- int partitionOffset = accessor.getEntriesRAF().readInt();
- byte[] keyBuf = new byte[accessor.getEntriesRAF().readInt()];
- accessor.getEntriesRAF().read(keyBuf);
+ int partitionNumber = accessor.getEntitiesRAF().readInt();
+ int partitionOffset = accessor.getEntitiesRAF().readInt();
+ byte[] keyBuf = new byte[accessor.getEntitiesRAF().readInt()];
+ accessor.getEntitiesRAF().read(keyBuf);
K key;
key = keyClassHandler.read(new DataInputStream(new ByteArrayInputStream(keyBuf)));
this.key = key;
Modified: labs/bananadb/trunk/src/main/java/org/apache/labs/bananadb/hashtable/HashtableAccessor.java
URL: http://svn.apache.org/viewvc/labs/bananadb/trunk/src/main/java/org/apache/labs/bananadb/hashtable/HashtableAccessor.java?rev=752986&r1=752985&r2=752986&view=diff
==============================================================================
--- labs/bananadb/trunk/src/main/java/org/apache/labs/bananadb/hashtable/HashtableAccessor.java (original)
+++ labs/bananadb/trunk/src/main/java/org/apache/labs/bananadb/hashtable/HashtableAccessor.java Thu Mar 12 19:51:35 2009
@@ -32,7 +32,7 @@
private String access;
private RandomAccessFile hashtableRAF;
- private RandomAccessFile entriesRAF;
+ private RandomAccessFile entitiesRAF;
private Map<Hashtable.Partition, RandomAccessFile> partitionRAFs = new HashMap<Hashtable.Partition, RandomAccessFile>();
public HashtableAccessor(Hashtable<?, ?> hashtable, boolean readOnly) throws IOException {
@@ -41,7 +41,7 @@
access = readOnly ? "r" : "rw";
hashtableRAF = new RandomAccessFile(hashtable.getHashtableFile(), access);
- entriesRAF = new RandomAccessFile(hashtable.getEntriesFile(), access);
+ entitiesRAF = new RandomAccessFile(hashtable.getEntriesFile(), access);
for (Hashtable.Partition partition : hashtable.getPartitions()) {
registerPartition(partition);
@@ -56,8 +56,8 @@
return hashtableRAF;
}
- RandomAccessFile getEntriesRAF() {
- return entriesRAF;
+ RandomAccessFile getEntitiesRAF() {
+ return entitiesRAF;
}
RandomAccessFile getPartitionRAF(Hashtable.Partition partition) {
@@ -72,7 +72,7 @@
public void close() throws IOException {
getHashtableRAF().close();
- getEntriesRAF().close();
+ getEntitiesRAF().close();
for (RandomAccessFile partitionRAF : partitionRAFs.values()) {
partitionRAF.close();
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@labs.apache.org
For additional commands, e-mail: commits-help@labs.apache.org