You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@directory.apache.org by el...@apache.org on 2014/11/12 11:14:34 UTC
svn commit: r1638731 - in /directory/mavibot/trunk/mavibot/src:
main/java/org/apache/directory/mavibot/btree/BulkLoader.java
test/java/org/apache/directory/mavibot/btree/BulkLoaderTest.java
Author: elecharny
Date: Wed Nov 12 10:14:33 2014
New Revision: 1638731
URL: http://svn.apache.org/r1638731
Log:
Added support for multiple-value bulkload.
Modified:
directory/mavibot/trunk/mavibot/src/main/java/org/apache/directory/mavibot/btree/BulkLoader.java
directory/mavibot/trunk/mavibot/src/test/java/org/apache/directory/mavibot/btree/BulkLoaderTest.java
Modified: directory/mavibot/trunk/mavibot/src/main/java/org/apache/directory/mavibot/btree/BulkLoader.java
URL: http://svn.apache.org/viewvc/directory/mavibot/trunk/mavibot/src/main/java/org/apache/directory/mavibot/btree/BulkLoader.java?rev=1638731&r1=1638730&r2=1638731&view=diff
==============================================================================
--- directory/mavibot/trunk/mavibot/src/main/java/org/apache/directory/mavibot/btree/BulkLoader.java (original)
+++ directory/mavibot/trunk/mavibot/src/main/java/org/apache/directory/mavibot/btree/BulkLoader.java Wed Nov 12 10:14:33 2014
@@ -216,6 +216,62 @@ public class BulkLoader<K, V>
/**
+ * Read all the sorted files, and inject them into one single big file containing all the
+ * sorted and merged elements.
+ * @throws IOException
+ */
+ private Tuple<Iterator<Tuple<K, Set<V>>>, Integer> processFiles( BTree<K, V> btree,
+ Iterator<Tuple<K, Set<V>>> dataIterator ) throws IOException
+ {
+ File file = File.createTempFile( "sortedUnique", "data" );
+ file.deleteOnExit();
+ FileOutputStream fos = new FileOutputStream( file );
+
+ // Number of read elements
+ int nbReads = 0;
+
+ // Flush the tuples on disk
+ while ( dataIterator.hasNext() )
+ {
+ nbReads++;
+
+ // grab a tuple
+ Tuple<K, Set<V>> tuple = dataIterator.next();
+
+ // Serialize the key
+ byte[] bytesKey = btree.getKeySerializer().serialize( tuple.key );
+ fos.write( IntSerializer.serialize( bytesKey.length ) );
+ fos.write( bytesKey );
+
+ // Serialize the number of values
+ int nbValues = tuple.getValue().size();
+ fos.write( IntSerializer.serialize( nbValues ) );
+
+ // Serialize the values
+ for ( V value : tuple.getValue() )
+ {
+ byte[] bytesValue = btree.getValueSerializer().serialize( value );
+
+ // Serialize the value
+ fos.write( IntSerializer.serialize( bytesValue.length ) );
+ fos.write( bytesValue );
+ }
+ }
+
+ fos.flush();
+ fos.close();
+
+ FileInputStream fis = new FileInputStream( file );
+ Iterator<Tuple<K, Set<V>>> uniqueIterator = createUniqueFileIterator( btree, fis );
+
+ Tuple<Iterator<Tuple<K, Set<V>>>, Integer> result = new Tuple<Iterator<Tuple<K, Set<V>>>, Integer>(
+ uniqueIterator, nbReads );
+
+ return result;
+ }
+
+
+ /**
* Bulk Load data into a persisted BTree
*
* @param btree The persisted BTree in which we want to load the data
@@ -259,12 +315,14 @@ public class BulkLoader<K, V>
// Now that we have processed all the data, we can start storing them in the btree
Iterator<Tuple<K, Set<V>>> dataIterator = null;
FileInputStream[] streams = null;
+ BTree<K, V> resultBTree = null;
if ( inMemory )
{
// Here, we have all the data in memory, no need to merge files
// We will build a simple iterator over the data
dataIterator = createTupleIterator( btree, tuples );
+ resultBTree = bulkLoad( btree, dataIterator, nbElems );
}
else
{
@@ -278,12 +336,14 @@ public class BulkLoader<K, V>
}
dataIterator = createIterator( btree, streams );
+
+ // Process the files, and construct one single file with an iterator
+ Tuple<Iterator<Tuple<K, Set<V>>>, Integer> result = processFiles( btree, dataIterator );
+ resultBTree = bulkLoad( btree, result.key, result.value );
}
// Ok, we have an iterator over sorted elements, we can now load them in the
// target btree.
- BTree<K, V> resultBTree = bulkLoad( btree, dataIterator, nbElems );
-
// Now, close the FileInputStream, and delete them if we have some
if ( !inMemory )
{
@@ -1224,6 +1284,55 @@ public class BulkLoader<K, V>
/**
+ * Build an iterator over an array of sorted tuples, from files on the disk
+ * @throws FileNotFoundException
+ */
+ private Iterator<Tuple<K, Set<V>>> createUniqueFileIterator( final BTree<K, V> btree, final FileInputStream stream )
+ throws FileNotFoundException
+ {
+ Iterator<Tuple<K, Set<V>>> tupleIterator = new Iterator<Tuple<K, Set<V>>>()
+ {
+ boolean hasNext = true;
+
+
+ @Override
+ public Tuple<K, Set<V>> next()
+ {
+ // Get the tuple from the stream
+ Tuple<K, Set<V>> tuple = fetchTuple( btree, stream );
+
+ // We can now return the found value
+ return tuple;
+ }
+
+
+ @Override
+ public boolean hasNext()
+ {
+ // Check that we have at least one element to read
+ try
+ {
+ return stream.available() > 0;
+ }
+ catch ( IOException e )
+ {
+ return false;
+ }
+ }
+
+
+ @Override
+ public void remove()
+ {
+ }
+
+ };
+
+ return tupleIterator;
+ }
+
+
+ /**
* Compact a given persisted BTree, making it dense. All the values will be stored
* in newly created pages, each one of them containing as much elements
* as it's size.
Modified: directory/mavibot/trunk/mavibot/src/test/java/org/apache/directory/mavibot/btree/BulkLoaderTest.java
URL: http://svn.apache.org/viewvc/directory/mavibot/trunk/mavibot/src/test/java/org/apache/directory/mavibot/btree/BulkLoaderTest.java?rev=1638731&r1=1638730&r2=1638731&view=diff
==============================================================================
--- directory/mavibot/trunk/mavibot/src/test/java/org/apache/directory/mavibot/btree/BulkLoaderTest.java (original)
+++ directory/mavibot/trunk/mavibot/src/test/java/org/apache/directory/mavibot/btree/BulkLoaderTest.java Wed Nov 12 10:14:33 2014
@@ -598,7 +598,6 @@ public class BulkLoaderTest
* Test that we can load 100 BTrees with 0 to 1000 elements, each one of them having multiple values
* @throws BTreeAlreadyManagedException
*/
- @Ignore
@Test
public void testPersistedBulkLoad1000ElementsMultipleValues() throws IOException, KeyNotFoundException,
BTreeAlreadyManagedException