You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@directory.apache.org by el...@apache.org on 2014/05/17 15:31:25 UTC
svn commit: r1595477 [5/8] - in /directory/mavibot/trunk: ./ mavibot/img/
mavibot/src/main/java/org/apache/directory/mavibot/btree/
mavibot/src/main/java/org/apache/directory/mavibot/btree/comparator/
mavibot/src/main/java/org/apache/directory/mavibot/...
Modified: directory/mavibot/trunk/mavibot/src/main/java/org/apache/directory/mavibot/btree/RecordManager.java
URL: http://svn.apache.org/viewvc/directory/mavibot/trunk/mavibot/src/main/java/org/apache/directory/mavibot/btree/RecordManager.java?rev=1595477&r1=1595476&r2=1595477&view=diff
==============================================================================
--- directory/mavibot/trunk/mavibot/src/main/java/org/apache/directory/mavibot/btree/RecordManager.java (original)
+++ directory/mavibot/trunk/mavibot/src/main/java/org/apache/directory/mavibot/btree/RecordManager.java Sat May 17 13:31:23 2014
@@ -26,18 +26,25 @@ import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.util.ArrayList;
+import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
+import java.util.Queue;
import java.util.Set;
+import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.atomic.AtomicLong;
+import java.util.concurrent.locks.Lock;
+import java.util.concurrent.locks.ReadWriteLock;
+import java.util.concurrent.locks.ReentrantLock;
+import java.util.concurrent.locks.ReentrantReadWriteLock;
import org.apache.directory.mavibot.btree.exception.BTreeAlreadyManagedException;
import org.apache.directory.mavibot.btree.exception.BTreeCreationException;
import org.apache.directory.mavibot.btree.exception.EndOfFileExceededException;
-import org.apache.directory.mavibot.btree.exception.FreePageException;
-import org.apache.directory.mavibot.btree.exception.InvalidBTreeException;
+import org.apache.directory.mavibot.btree.exception.FileException;
+import org.apache.directory.mavibot.btree.exception.InvalidOffsetException;
import org.apache.directory.mavibot.btree.exception.KeyNotFoundException;
import org.apache.directory.mavibot.btree.exception.RecordManagerException;
import org.apache.directory.mavibot.btree.serializer.ElementSerializer;
@@ -50,33 +57,36 @@ import org.slf4j.LoggerFactory;
/**
- * The RecordManager is used to manage the file in which we will store the BTrees.
- * A RecordManager will manage more than one BTree.<br/>
+ * The RecordManager is used to manage the file in which we will store the B-trees.
+ * A RecordManager will manage more than one B-tree.<br/>
*
* It stores data in fixed size pages (default size is 512 bytes), which may be linked one to
* the other if the data we want to store is too big for a page.
*
* @author <a href="mailto:dev@directory.apache.org">Apache Directory Project</a>
*/
-public class RecordManager
+public class RecordManager extends AbstractTransactionManager
{
/** The LoggerFactory used by this class */
protected static final Logger LOG = LoggerFactory.getLogger( RecordManager.class );
+ /** The LoggerFactory used by this class */
+ protected static final Logger LOG_PAGES = LoggerFactory.getLogger( "LOG_PAGES" );
+
/** A dedicated logger for the check */
- protected static final Logger LOG_CHECK = LoggerFactory.getLogger( "RM_CHECK" );
+ protected static final Logger LOG_CHECK = LoggerFactory.getLogger( "LOG_CHECK" );
/** The associated file */
private File file;
/** The channel used to read and write data */
- private FileChannel fileChannel;
+ /* no qualifier */ FileChannel fileChannel;
- /** The number of stored BTrees */
- private int nbBtree;
+ /** The number of managed B-trees */
+ /* no qualifier */ int nbBtree;
/** The first and last free page */
- private long firstFreePage;
+ /* no qualifier */ long firstFreePage;
/** The list of available free pages */
List<PageIO> freePages = new ArrayList<PageIO>();
@@ -86,25 +96,22 @@ public class RecordManager
public AtomicLong nbCreatedPages = new AtomicLong( 0 );
public AtomicLong nbReusedPages = new AtomicLong( 0 );
public AtomicLong nbUpdateRMHeader = new AtomicLong( 0 );
- public AtomicLong nbUpdateBTreeHeader = new AtomicLong( 0 );
+ public AtomicLong nbUpdateBtreeHeader = new AtomicLong( 0 );
public AtomicLong nbUpdatePageIOs = new AtomicLong( 0 );
/** The offset of the end of the file */
private long endOfFileOffset;
/**
- * A Btree used to manage the page that has been copied in a new version.
+ * A B-tree used to manage the page that has been copied in a new version.
* Those pages can be reclaimed when the associated version is dead.
**/
- private BTree<RevisionName, long[]> copiedPageBTree;
-
- /** A BTree used to store all the valid revisions for all the stored BTrees */
- private BTree<RevisionName, Long> revisionBTree;
+ private BTree<RevisionName, long[]> copiedPageBtree;
/** A constant for an offset on a non existing page */
- private static final long NO_PAGE = -1L;
+ public static final long NO_PAGE = -1L;
- /** The header page size */
+ /** The number of element we can store in a page */
private static final int PAGE_SIZE = 4;
/** The size of the link to next page */
@@ -112,44 +119,40 @@ public class RecordManager
/** Some constants */
private static final int BYTE_SIZE = 1;
- private static final int INT_SIZE = 4;
- private static final int LONG_SIZE = 8;
+ /* no qualifier */ static final int INT_SIZE = 4;
+ /* no qualifier */ static final int LONG_SIZE = 8;
/** The default page size */
- private static final int DEFAULT_PAGE_SIZE = 512;
+ public static final int DEFAULT_PAGE_SIZE = 512;
+
+ /** The minimal page size. Can't be below 64, as we have to store many thing sin the RMHeader */
+ private static final int MIN_PAGE_SIZE = 64;
- /** The header size */
- private static int HEADER_SIZE = DEFAULT_PAGE_SIZE;
+ /** The RecordManager header size */
+ /* no qualifier */ static int RECORD_MANAGER_HEADER_SIZE = DEFAULT_PAGE_SIZE;
- /** A global buffer used to store the header */
- private static ByteBuffer HEADER_BUFFER;
+ /** A global buffer used to store the RecordManager header */
+ private static ByteBuffer RECORD_MANAGER_HEADER_BUFFER;
- /** A static buffer used to store the header */
- private static byte[] HEADER_BYTES;
+ /** A static buffer used to store the RecordManager header */
+ private static byte[] RECORD_MANAGER_HEADER_BYTES;
/** The length of an Offset, as a negative value */
private static byte[] LONG_LENGTH = new byte[]
{ ( byte ) 0xFF, ( byte ) 0xFF, ( byte ) 0xFF, ( byte ) 0xF8 };
/** The RecordManager underlying page size. */
- private int pageSize = DEFAULT_PAGE_SIZE;
+ /* no qualifier */ int pageSize = DEFAULT_PAGE_SIZE;
- /** The set of managed BTrees */
- private Map<String, BTree<Object, Object>> managedBTrees;
-
- /** The offset on the last added BTree */
- private long lastAddedBTreeOffset = NO_PAGE;
+ /** The set of managed B-trees */
+ private Map<String, BTree<Object, Object>> managedBtrees;
+
+ /** The queue of recently closed transactions */
+ private Queue<RevisionName> closedTransactionsQueue = new LinkedBlockingQueue<RevisionName>();
/** The default file name */
private static final String DEFAULT_FILE_NAME = "mavibot.db";
- /** A deserializer for Offsets */
- private static final LongSerializer OFFSET_SERIALIZER = new LongSerializer();
-
- private static final String REVISION_BTREE_NAME = "_revisionBTree_";
-
- private static final String COPIED_PAGE_BTREE_NAME = "_copiedPageBTree_";
-
/** A flag set to true if we want to keep old revisions */
private boolean keepRevisions;
@@ -159,18 +162,48 @@ public class RecordManager
/** A flag used by internal btrees */
public static final boolean NORMAL_BTREE = false;
- /** A map of pending pages */
- private Map<Page<?, ?>, BTree<?, ?>> pendingPages = new LinkedHashMap<Page<?, ?>, BTree<?, ?>>();
-
- /** The Btree of Btrees */
+ /** The B-tree of B-trees */
private BTree<NameRevision, Long> btreeOfBtrees;
- private static final String BOB_ONE_NAME = "_BTREE_OF_BTREES_";
+ /** The B-tree of B-trees management btree name */
+ /* no qualifier */ static final String BTREE_OF_BTREES_NAME = "_btree_of_btrees_";
+
+ /** The CopiedPages management btree name */
+ /* no qualifier */ static final String COPIED_PAGE_BTREE_NAME = "_copiedPageBtree_";
- /** The two latest revisions of the BOB */
- private long bobCurrentRevision;
- private long bobOldRevision;
+ /** The current B-tree of B-trees header offset */
+ /* no qualifier */ long currentBtreeOfBtreesOffset;
+ /** The previous B-tree of B-trees header offset */
+ private long previousBtreeOfBtreesOffset = NO_PAGE;
+
+ /** The offset on the current copied pages B-tree */
+ /* no qualifier */ long currentCopiedPagesBtreeOffset = NO_PAGE;
+
+ /** The offset on the previous copied pages B-tree */
+ private long previousCopiedPagesBtreeOffset = NO_PAGE;
+
+ /** A lock to protect the transaction handling */
+ private Lock transactionLock = new ReentrantLock();
+
+ /** A ThreadLocalStorage used to store the current transaction */
+ private static final ThreadLocal<Integer> context = new ThreadLocal<Integer>();
+
+ /** The list of PageIO that can be freed after a commit */
+ List<PageIO> freedPages = new ArrayList<PageIO>();
+
+ /** The list of PageIO that can be freed after a roolback */
+ private List<PageIO> allocatedPages = new ArrayList<PageIO>();
+
+ /** A Map keeping the latest revisions for each managed BTree */
+ private Map<String, BTreeHeader<?, ?>> currentBTreeHeaders = new HashMap<String, BTreeHeader<?, ?>>();
+
+ /** A Map storing the new revisions when some change have been made in some BTrees */
+ private Map<String, BTreeHeader<?, ?>> newBTreeHeaders = new HashMap<String, BTreeHeader<?, ?>>();
+
+ /** A lock to protect the BtreeHeader maps */
+ private ReadWriteLock btreeHeadersLock = new ReentrantReadWriteLock();
+
/**
* Create a Record manager which will either create the underlying file
* or load an existing one. If a folder is provided, then we will create
@@ -190,15 +223,24 @@ public class RecordManager
* a file with a default name : mavibot.db
*
* @param name The file name, or a folder name
- * @param pageSize the size of a page on disk
+ * @param pageSize the size of a page on disk, in bytes
*/
public RecordManager( String fileName, int pageSize )
{
- managedBTrees = new LinkedHashMap<String, BTree<Object, Object>>();
+ managedBtrees = new LinkedHashMap<String, BTree<Object, Object>>();
+
+ if ( pageSize < MIN_PAGE_SIZE )
+ {
+ this.pageSize = MIN_PAGE_SIZE;
+ }
+ else
+ {
+ this.pageSize = pageSize;
+ }
- HEADER_BUFFER = ByteBuffer.allocate( pageSize );
- HEADER_BYTES = new byte[pageSize];
- HEADER_SIZE = pageSize;
+ RECORD_MANAGER_HEADER_BUFFER = ByteBuffer.allocate( this.pageSize );
+ RECORD_MANAGER_HEADER_BYTES = new byte[this.pageSize];
+ RECORD_MANAGER_HEADER_SIZE = this.pageSize;
// Open the file or create it
File tmpFile = new File( fileName );
@@ -222,7 +264,6 @@ public class RecordManager
if ( isNewFile )
{
- this.pageSize = pageSize;
initRecordManager();
}
else
@@ -264,33 +305,35 @@ public class RecordManager
LOG.error( "Cannot create the file {}", mavibotFile.getName() );
return false;
}
-
}
+
/**
- * We will create a brand new RecordManager file, containing nothing, but the header,
- * a BTree to manage the old revisions we want to keep and
- * a BTree used to manage pages associated with old versions.
+ * We will create a brand new RecordManager file, containing nothing, but the RecordManager header,
+ * a B-tree to manage the old revisions we want to keep and
+ * a B-tree used to manage pages associated with old versions.
* <br/>
- * The Header contains the following details :
+ * The RecordManager header contains the following details :
* <pre>
- * +---------------+
- * | PageSize | 4 bytes : The size of a physical page (default to 4096)
- * +---------------+
- * | NbTree | 4 bytes : The number of managed BTrees (at least 1)
- * +---------------+
- * | FirstFree | 8 bytes : The offset of the first free page
- * +---------------+
- * | currentBoB | 1 byte : The current BoB in use
- * +---------------+
- * | BoB offset[0] | 8 bytes : The offset of the first BoB
- * +---------------+
- * | BoB offset[1] | 8 bytes : The offset of the second BoB
- * +---------------+
+ * +--------------------------+
+ * | PageSize | 4 bytes : The size of a physical page (default to 4096)
+ * +--------------------------+
+ * | NbTree | 4 bytes : The number of managed B-trees (at least 1)
+ * +--------------------------+
+ * | FirstFree | 8 bytes : The offset of the first free page
+ * +--------------------------+
+ * | current BoB offset | 8 bytes : The offset of the current BoB
+ * +--------------------------+
+ * | previous BoB offset | 8 bytes : The offset of the previous BoB
+ * +--------------------------+
+ * | current CP btree offset | 8 bytes : The offset of the current BoB
+ * +--------------------------+
+ * | previous CP btree offset | 8 bytes : The offset of the previous BoB
+ * +--------------------------+
* </pre>
*
- * We then store the BTree managing the pages that have been copied when we have added
- * or deleted an element in the BTree. They are associated with a version.
+ * We then store the B-tree managing the pages that have been copied when we have added
+ * or deleted an element in the B-tree. They are associated with a version.
*
* Last, we add the bTree that keep a track on each revision we can have access to.
*/
@@ -299,8 +342,7 @@ public class RecordManager
// Create a new Header
nbBtree = 0;
firstFreePage = NO_PAGE;
- bobCurrentRevision = 0L;
- bobOldRevision = 0L;
+ currentBtreeOfBtreesOffset = 0L;
updateRecordManagerHeader();
@@ -308,22 +350,32 @@ public class RecordManager
endOfFileOffset = fileChannel.size();
// First, create the btree of btrees <NameRevision, Long>
- btreeOfBtrees = BTreeFactory.createPersistedBTree( BOB_ONE_NAME, new NameRevisionSerializer(),
- new LongSerializer() );
+ createBtreeOfBtrees();
- // Now, initialize the Copied Page BTree
- copiedPageBTree = BTreeFactory.createPersistedBTree( COPIED_PAGE_BTREE_NAME, new RevisionNameSerializer(),
- new LongArraySerializer() );
-
- // and initialize the Revision BTree
- revisionBTree = BTreeFactory.createPersistedBTree( REVISION_BTREE_NAME, new RevisionNameSerializer(),
- new LongSerializer() );
+ // Now, initialize the Copied Page B-tree
+ createCopiedPagesBtree();
- // Inject these BTrees into the RecordManager
+ // Inject these B-trees into the RecordManager. They are internal B-trees.
try
{
- manage( copiedPageBTree );
- manage( revisionBTree );
+ manage( btreeOfBtrees, INTERNAL_BTREE );
+
+ currentBtreeOfBtreesOffset = ((PersistedBTree<NameRevision, Long>)btreeOfBtrees).getBtreeHeader().getBTreeHeaderOffset();
+ updateRecordManagerHeader();
+
+ // Inject the BtreeOfBtrees into the currentBtreeHeaders map
+ currentBTreeHeaders.put( BTREE_OF_BTREES_NAME, ((PersistedBTree<NameRevision, Long>)btreeOfBtrees).getBtreeHeader() );
+ newBTreeHeaders.put( BTREE_OF_BTREES_NAME, ((PersistedBTree<NameRevision, Long>)btreeOfBtrees).getBtreeHeader() );
+
+ // The FreePage B-tree
+ manage( copiedPageBtree, INTERNAL_BTREE );
+
+ currentCopiedPagesBtreeOffset = ((PersistedBTree<RevisionName, long[]>)copiedPageBtree).getBtreeHeader().getBTreeHeaderOffset();
+ updateRecordManagerHeader();
+
+ // Inject the CopiedPagesBTree into the currentBtreeHeaders map
+ currentBTreeHeaders.put( COPIED_PAGE_BTREE_NAME, ((PersistedBTree<RevisionName, long[]>)copiedPageBtree).getBtreeHeader() );
+ newBTreeHeaders.put( COPIED_PAGE_BTREE_NAME, ((PersistedBTree<RevisionName, long[]>)copiedPageBtree).getBtreeHeader() );
}
catch ( BTreeAlreadyManagedException btame )
{
@@ -335,109 +387,164 @@ public class RecordManager
/**
+ * Create the B-treeOfBtrees
+ */
+ private void createBtreeOfBtrees()
+ {
+ PersistedBTreeConfiguration<NameRevision, Long> configuration = new PersistedBTreeConfiguration<NameRevision, Long>();
+ configuration.setKeySerializer( NameRevisionSerializer.INSTANCE );
+ configuration.setName( BTREE_OF_BTREES_NAME );
+ configuration.setValueSerializer( LongSerializer.INSTANCE );
+ configuration.setBtreeType( BTreeTypeEnum.BTREE_OF_BTREES );
+ configuration.setCacheSize( PersistedBTree.DEFAULT_CACHE_SIZE );
+
+ btreeOfBtrees = BTreeFactory.createPersistedBTree( configuration );
+ }
+
+
+ /**
+ * Create the CopiedPagesBtree
+ */
+ private void createCopiedPagesBtree()
+ {
+ PersistedBTreeConfiguration<RevisionName, long[]> configuration = new PersistedBTreeConfiguration<RevisionName, long[]>();
+ configuration.setKeySerializer( RevisionNameSerializer.INSTANCE );
+ configuration.setName( COPIED_PAGE_BTREE_NAME );
+ configuration.setValueSerializer( LongArraySerializer.INSTANCE );
+ configuration.setBtreeType( BTreeTypeEnum.COPIED_PAGES_BTREE );
+ configuration.setCacheSize( PersistedBTree.DEFAULT_CACHE_SIZE );
+
+ copiedPageBtree = BTreeFactory.createPersistedBTree( configuration );
+ }
+
+
+ /**
* Load the BTrees from the disk.
*
* @throws InstantiationException
* @throws IllegalAccessException
* @throws ClassNotFoundException
+ * @throws NoSuchFieldException
+ * @throws SecurityException
+ * @throws IllegalArgumentException
*/
private void loadRecordManager() throws IOException, ClassNotFoundException, IllegalAccessException,
- InstantiationException
+ InstantiationException, IllegalArgumentException, SecurityException, NoSuchFieldException, KeyNotFoundException
{
if ( fileChannel.size() != 0 )
{
- ByteBuffer header = ByteBuffer.allocate( HEADER_SIZE );
+ ByteBuffer recordManagerHeader = ByteBuffer.allocate( RECORD_MANAGER_HEADER_SIZE );
// The file exists, we have to load the data now
- fileChannel.read( header );
+ fileChannel.read( recordManagerHeader );
- header.rewind();
+ recordManagerHeader.rewind();
// read the RecordManager Header :
- // +----------------+
- // | PageSize | 4 bytes : The size of a physical page (default to 4096)
- // +----------------+
- // | NbTree | 4 bytes : The number of managed BTrees (at least 1)
- // +----------------+
- // | FirstFree | 8 bytes : The offset of the first free page
- // +----------------+
- // | BoB old offset | 8 bytes : The previous BoB revision
- // +----------------+
- // | BoB new offset | 8 bytes : The current BoB revision
- // +----------------+
+ // +---------------------+
+ // | PageSize | 4 bytes : The size of a physical page (default to 4096)
+ // +---------------------+
+ // | NbTree | 4 bytes : The number of managed B-trees (at least 1)
+ // +---------------------+
+ // | FirstFree | 8 bytes : The offset of the first free page
+ // +---------------------+
+ // | current BoB offset | 8 bytes : The offset of the current B-tree of B-trees
+ // +---------------------+
+ // | previous BoB offset | 8 bytes : The offset of the previous B-tree of B-trees
+ // +---------------------+
+ // | current CP offset | 8 bytes : The offset of the current Copied Pages B-tree
+ // +---------------------+
+ // | previous CP offset | 8 bytes : The offset of the previous Copied Pages B-tree
+ // +---------------------+
// The page size
- pageSize = header.getInt();
+ pageSize = recordManagerHeader.getInt();
- // The number of managed BTrees
- nbBtree = header.getInt();
+ // The number of managed B-trees
+ nbBtree = recordManagerHeader.getInt();
// The first and last free page
- firstFreePage = header.getLong();
+ firstFreePage = recordManagerHeader.getLong();
- // The BOB revisions
- long bobRevision1 = header.getLong();
- long bobRevision2 = header.getLong();
+ // The current BOB offset
+ currentBtreeOfBtreesOffset = recordManagerHeader.getLong();
- if ( bobRevision1 < bobRevision2 )
- {
- bobOldRevision = bobRevision1;
- bobCurrentRevision = bobRevision2;
- }
- else if ( bobRevision1 > bobRevision2 )
- {
- bobOldRevision = bobRevision2;
- bobCurrentRevision = bobRevision1;
- }
- else
- {
- // Special case : the RecordManage has been shtudown correctly
- bobOldRevision = bobRevision1;
- bobCurrentRevision = bobRevision2;
- }
+ // The previous BOB offset
+ previousBtreeOfBtreesOffset = recordManagerHeader.getLong();
+
+ // The current Copied Pages B-tree offset
+ currentCopiedPagesBtreeOffset = recordManagerHeader.getLong();
+
+ // The previous Copied Pages B-tree offset
+ previousCopiedPagesBtreeOffset = recordManagerHeader.getLong();
+
+ // read the B-tree of B-trees
+ PageIO[] bobHeaderPageIos = readPageIOs( currentBtreeOfBtreesOffset, Long.MAX_VALUE );
- // Now read each BTree. The first one is the one which
- // manage the modified pages. Once read, we can discard all
- // the pages that are stored in it, as we have restarted
- // the RecordManager.
- long btreeOffset = HEADER_SIZE;
+ btreeOfBtrees = BTreeFactory.<NameRevision, Long> createPersistedBTree( BTreeTypeEnum.BTREE_OF_BTREES );
+ //BTreeFactory.<NameRevision, Long> setBtreeHeaderOffset( ( PersistedBTree<NameRevision, Long> )btreeOfBtrees, currentBtreeOfBtreesOffset );
- PageIO[] pageIos = readPageIOs( HEADER_SIZE, Long.MAX_VALUE );
+ loadBtree( bobHeaderPageIos, btreeOfBtrees );
- // Create the BTree
- copiedPageBTree = BTreeFactory.<RevisionName, long[]> createPersistedBTree();
- ( ( PersistedBTree<RevisionName, long[]> ) copiedPageBTree ).setBtreeOffset( btreeOffset );
+ // read the copied page B-tree
+ PageIO[] copiedPagesPageIos = readPageIOs( currentCopiedPagesBtreeOffset, Long.MAX_VALUE );
- loadBTree( pageIos, copiedPageBTree );
- long nextBtreeOffset = ( ( PersistedBTree<RevisionName, long[]> ) copiedPageBTree ).getNextBTreeOffset();
+ copiedPageBtree = BTreeFactory.<RevisionName, long[]> createPersistedBTree( BTreeTypeEnum.COPIED_PAGES_BTREE );
+ //( ( PersistedBTree<RevisionName, long[]> ) copiedPageBtree ).setBtreeHeaderOffset( currentCopiedPagesBtreeOffset );
- // And the Revision BTree
- pageIos = readPageIOs( nextBtreeOffset, Long.MAX_VALUE );
+ loadBtree( copiedPagesPageIos, copiedPageBtree );
- revisionBTree = BTreeFactory.<RevisionName, Long> createPersistedBTree();
- ( ( PersistedBTree<RevisionName, Long> ) revisionBTree ).setBtreeOffset( nextBtreeOffset );
+ // Now, read all the B-trees from the btree of btrees
+ TupleCursor<NameRevision, Long> btreeCursor = btreeOfBtrees.browse();
+ Map<String, Long> loadedBtrees = new HashMap<String, Long>();
- loadBTree( pageIos, revisionBTree );
- nextBtreeOffset = ( ( PersistedBTree<RevisionName, Long> ) revisionBTree ).getNextBTreeOffset();
+ // loop on all the btrees we have, and keep only the latest revision
+ long currentRevision = -1L;
- // Then process the next ones
- for ( int i = 2; i < nbBtree; i++ )
+ while ( btreeCursor.hasNext() )
{
- // Create the BTree
- BTree<Object, Object> btree = BTreeFactory.createPersistedBTree();
- ( ( PersistedBTree<Object, Object> ) btree ).setRecordManager( this );
- ( ( PersistedBTree<Object, Object> ) btree ).setBtreeOffset( nextBtreeOffset );
- lastAddedBTreeOffset = nextBtreeOffset;
+ Tuple<NameRevision, Long> btreeTuple = btreeCursor.next();
+ NameRevision nameRevision = btreeTuple.getKey();
+ long btreeOffset = btreeTuple.getValue();
+ long revision = nameRevision.getValue();
- // Read the associated pages
- pageIos = readPageIOs( nextBtreeOffset, Long.MAX_VALUE );
+ // Check if we already have processed this B-tree
+ Long loadedBtreeRevision = loadedBtrees.get( nameRevision.getName() );
- // Load the BTree
- loadBTree( pageIos, btree );
- nextBtreeOffset = ( ( PersistedBTree<Object, Object> ) btree ).getNextBTreeOffset();
+ if ( loadedBtreeRevision != null )
+ {
+ // The btree has already been loaded. The revision is necessarily higher
+ if ( revision > currentRevision )
+ {
+ // We have a newer revision : switch to the new revision (we keep the offset atm)
+ loadedBtrees.put( nameRevision.getName(), btreeOffset );
+ currentRevision = revision;
+ }
+ }
+ else
+ {
+ // This is a new B-tree
+ loadedBtrees.put( nameRevision.getName(), btreeOffset );
+ currentRevision = nameRevision.getRevision();
+ }
+ }
- // Store it into the managedBtrees map
- managedBTrees.put( btree.getName(), btree );
+ // TODO : clean up the old revisions...
+
+
+ // Now, we can load the real btrees using the offsets
+ for ( String btreeName : loadedBtrees.keySet() )
+ {
+ long btreeOffset = loadedBtrees.get( btreeName );
+
+ PageIO[] btreePageIos = readPageIOs( btreeOffset, Long.MAX_VALUE );
+
+ BTree<?, ?> btree = BTreeFactory.<NameRevision, Long> createPersistedBTree();
+ //( ( PersistedBTree<NameRevision, Long> ) btree ).setBtreeHeaderOffset( btreeOffset );
+ loadBtree( btreePageIos, btree );
+
+ // Add the btree into the map of managed B-trees
+ managedBtrees.put( btreeName, ( BTree<Object, Object> ) btree );
}
// We are done ! Let's finish with the last initialization parts
@@ -447,13 +554,170 @@ public class RecordManager
/**
+ * Starts a transaction
+ */
+ public void beginTransaction()
+ {
+ // First, take the lock
+ transactionLock.lock();
+
+ // Now, check the TLS state
+ Integer nbTxnLevel = context.get();
+
+ if ( nbTxnLevel == null )
+ {
+ context.set( 1 );
+ }
+ else
+ {
+ // And increment the counter of inner txn.
+ context.set( nbTxnLevel + 1 );
+ }
+ }
+
+
+ /**
+ * Commits a transaction
+ */
+ public void commit()
+ {
+ if ( !fileChannel.isOpen() )
+ {
+ // The file has been closed, nothing remains to commit, let's get out
+ transactionLock.unlock();
+ return;
+ }
+
+ int nbTxnStarted = context.get();
+
+ switch ( nbTxnStarted )
+ {
+ case 0 :
+ // The transaction was rollbacked, quit immediatelly
+ transactionLock.unlock();
+
+ return;
+
+ case 1 :
+ // We are done with the transaction, we can update the RMHeader and swap the BTreeHeaders
+ // First update the RMHeader to be sure that we have a way to restore from a crash
+ updateRecordManagerHeader();
+
+ // Swap the BtreeHeaders maps
+ swapCurrentBtreeHeaders();
+
+ // We can now free pages
+ for ( PageIO pageIo : freedPages )
+ {
+ try
+ {
+ free( pageIo );
+ }
+ catch ( IOException ioe )
+ {
+ throw new RecordManagerException( ioe.getMessage() );
+ }
+ }
+
+ // Release the allocated and freed pages list
+ freedPages.clear();
+ allocatedPages.clear();
+
+ // And update the RMHeader again, removing the old references to BOB and CPB b-tree headers
+ // here, we have to erase the old references to keep only the new ones.
+ updateRecordManagerHeader();
+
+ // And decrement the number of started transactions
+ context.set( nbTxnStarted - 1 );
+
+ // Finally, release the global lock
+ transactionLock.unlock();
+
+ return;
+
+ default :
+ // We are inner an existing transaction. Just update the necessary elements
+ // Update the RMHeader to be sure that we have a way to restore from a crash
+ updateRecordManagerHeader();
+
+ // Swap the BtreeHeaders maps
+ swapCurrentBtreeHeaders();
+
+ // We can now free pages
+ for ( PageIO pageIo : freedPages )
+ {
+ try
+ {
+ free( pageIo );
+ }
+ catch ( IOException ioe )
+ {
+ throw new RecordManagerException( ioe.getMessage() );
+ }
+ }
+
+ // Release the allocated and freed pages list
+ freedPages.clear();
+ allocatedPages.clear();
+
+ // And update the RMHeader again, removing the old references to BOB and CPB b-tree headers
+ // here, we have to erase the old references to keep only the new ones.
+ updateRecordManagerHeader();
+
+ // And decrement the number of started transactions
+ context.set( nbTxnStarted - 1 );
+
+ // Finally, release the global lock
+ transactionLock.unlock();
+ return;
+ }
+ }
+
+
+ /**
+ * Rollback a transaction
+ */
+ public void rollback()
+ {
+ // Reset the counter
+ context.set( 0 );
+
+ // We can now free allocated pages, this is the end of the transaction
+ for ( PageIO pageIo : allocatedPages )
+ {
+ try
+ {
+ free( pageIo );
+ }
+ catch ( IOException ioe )
+ {
+ throw new RecordManagerException( ioe.getMessage() );
+ }
+ }
+
+ // Release the allocated and freed pages list
+ freedPages.clear();
+ allocatedPages.clear();
+
+ // And update the RMHeader
+ updateRecordManagerHeader();
+
+ // And restore the BTreeHeaders new Map to the current state
+ revertBtreeHeaders();
+
+ transactionLock.unlock();
+ }
+
+
+ /**
* Reads all the PageIOs that are linked to the page at the given position, including
* the first page.
*
* @param position The position of the first page
+ * @param limit The maximum bytes to read. Set this value to -1 when the size is unknown.
* @return An array of pages
*/
- private PageIO[] readPageIOs( long position, long limit ) throws IOException, EndOfFileExceededException
+ /*no qualifier*/ PageIO[] readPageIOs( long position, long limit ) throws IOException, EndOfFileExceededException
{
LOG.debug( "Read PageIOs at position {}", position );
@@ -497,52 +761,105 @@ public class RecordManager
/**
- * Read a BTree from the disk. The meta-data are at the given position in the list of pages.
+ * Check the offset to be sure it's a valid one :
+ * <ul>
+ * <li>It's >= 0</li>
+ * <li>It's below the end of the file</li>
+ * <li>It's a multipl of the pageSize
+ * </ul>
+ * @param offset The offset to check
+ * @throws InvalidOffsetException If the offset is not valid
+ */
+ private void checkOffset( long offset )
+ {
+ if ( ( offset < 0 ) || ( offset > endOfFileOffset ) || ( ( offset % pageSize ) != 0 ) )
+ {
+ throw new InvalidOffsetException( "Bad Offset : " + offset );
+ }
+ }
+
+
+ /**
+ * Read a B-tree from the disk. The meta-data are at the given position in the list of pages.
+ * We load a B-tree in two steps : first, we load the B-tree header, then the common informations
*
* @param pageIos The list of pages containing the meta-data
- * @param btree The BTree we have to initialize
+ * @param btree The B-tree we have to initialize
* @throws InstantiationException
* @throws IllegalAccessException
* @throws ClassNotFoundException
+ * @throws NoSuchFieldException
+ * @throws SecurityException
+ * @throws IllegalArgumentException
*/
- private <K, V> void loadBTree( PageIO[] pageIos, BTree<K, V> btree ) throws EndOfFileExceededException,
- IOException, ClassNotFoundException, IllegalAccessException, InstantiationException
+ private <K, V> void loadBtree( PageIO[] pageIos, BTree<K, V> btree ) throws EndOfFileExceededException,
+ IOException, ClassNotFoundException, IllegalAccessException, InstantiationException, IllegalArgumentException, SecurityException, NoSuchFieldException
+ {
+ loadBtree( pageIos, btree, null );
+ }
+
+
+ /**
+ * Read a B-tree from the disk. The meta-data are at the given position in the list of pages.
+ * We load a B-tree in two steps : first, we load the B-tree header, then the common informations
+ *
+ * @param pageIos The list of pages containing the meta-data
+ * @param btree The B-tree we have to initialize
+ * @throws InstantiationException
+ * @throws IllegalAccessException
+ * @throws ClassNotFoundException
+ * @throws NoSuchFieldException
+ * @throws SecurityException
+ * @throws IllegalArgumentException
+ */
+ /* no qualifier */ <K, V> void loadBtree( PageIO[] pageIos, BTree btree, BTree<K, V> parentBTree ) throws EndOfFileExceededException,
+ IOException, ClassNotFoundException, IllegalAccessException, InstantiationException, IllegalArgumentException, SecurityException, NoSuchFieldException
{
long dataPos = 0L;
- // The BTree current revision
+ // Process the B-tree header
+ BTreeHeader<K, V> btreeHeader = new BTreeHeader<K, V>();
+ btreeHeader.setBtree( btree );
+
+ // The BtreeHeader offset
+ btreeHeader.setBTreeHeaderOffset( pageIos[0].getOffset() );
+
+ // The B-tree current revision
long revision = readLong( pageIos, dataPos );
- BTreeFactory.setRevision( btree, revision );
+ btreeHeader.setRevision( revision );
dataPos += LONG_SIZE;
// The nb elems in the tree
long nbElems = readLong( pageIos, dataPos );
- BTreeFactory.setNbElems( btree, nbElems );
+ btreeHeader.setNbElems( nbElems );
dataPos += LONG_SIZE;
- // The BTree rootPage offset
+ // The B-tree rootPage offset
long rootPageOffset = readLong( pageIos, dataPos );
- BTreeFactory.setRootPageOffset( btree, rootPageOffset );
+ btreeHeader.setRootPageOffset( rootPageOffset );
dataPos += LONG_SIZE;
- // The next BTree offset
- long nextBTreeOffset = readLong( pageIos, dataPos );
- BTreeFactory.setNextBTreeOffset( btree, nextBTreeOffset );
- dataPos += LONG_SIZE;
+ // The B-tree information offset
+ long btreeInfoOffset = readLong( pageIos, dataPos );
- // The BTree page size
- int btreePageSize = readInt( pageIos, dataPos );
+ // Now, process the common informations
+ PageIO[] infoPageIos = readPageIOs( btreeInfoOffset, Long.MAX_VALUE );
+ ((PersistedBTree<K, V>)btree).setBtreeInfoOffset( infoPageIos[0].getOffset() );
+ dataPos = 0L;
+
+ // The B-tree page size
+ int btreePageSize = readInt( infoPageIos, dataPos );
BTreeFactory.setPageSize( btree, btreePageSize );
dataPos += INT_SIZE;
// The tree name
- ByteBuffer btreeNameBytes = readBytes( pageIos, dataPos );
+ ByteBuffer btreeNameBytes = readBytes( infoPageIos, dataPos );
dataPos += INT_SIZE + btreeNameBytes.limit();
String btreeName = Strings.utf8ToString( btreeNameBytes );
BTreeFactory.setName( btree, btreeName );
// The keySerializer FQCN
- ByteBuffer keySerializerBytes = readBytes( pageIos, dataPos );
+ ByteBuffer keySerializerBytes = readBytes( infoPageIos, dataPos );
dataPos += INT_SIZE + keySerializerBytes.limit();
String keySerializerFqcn = "";
@@ -555,7 +872,7 @@ public class RecordManager
BTreeFactory.setKeySerializer( btree, keySerializerFqcn );
// The valueSerialier FQCN
- ByteBuffer valueSerializerBytes = readBytes( pageIos, dataPos );
+ ByteBuffer valueSerializerBytes = readBytes( infoPageIos, dataPos );
String valueSerializerFqcn = "";
dataPos += INT_SIZE + valueSerializerBytes.limit();
@@ -567,19 +884,25 @@ public class RecordManager
BTreeFactory.setValueSerializer( btree, valueSerializerFqcn );
- // The BTree allowDuplicates flag
- int allowDuplicates = readInt( pageIos, dataPos );
+ // The B-tree allowDuplicates flag
+ int allowDuplicates = readInt( infoPageIos, dataPos );
( ( PersistedBTree<K, V> ) btree ).setAllowDuplicates( allowDuplicates != 0 );
dataPos += INT_SIZE;
- // Now, init the BTree
- btree.init();
-
+ // Set the recordManager in the btree
( ( PersistedBTree<K, V> ) btree ).setRecordManager( this );
- // Now, load the rootPage, which can be a Leaf or a Node, depending
- // on the number of elements in the tree : if it's above the pageSize,
- // it's a Node, otherwise it's a Leaf
+ // Set the current revision to the one stored in the B-tree header
+ // Here, we have to tell the BTree to keep this revision in the
+ // btreeRevisions Map, thus the 'true' parameter at the end.
+ ((PersistedBTree<K, V>)btree).storeRevision( btreeHeader, true );
+
+ // Now, init the B-tree
+ ( ( PersistedBTree<K, V> ) btree ).init( parentBTree );
+
+ // Update the BtreeHeaders Maps
+ currentBTreeHeaders.put( btree.getName(), ( ( PersistedBTree<K, V> ) btree ).getBtreeHeader() );
+ newBTreeHeaders.put( btree.getName(), ( ( PersistedBTree<K, V> ) btree ).getBtreeHeader() );
// Read the rootPage pages on disk
PageIO[] rootPageIos = readPageIOs( rootPageOffset, Long.MAX_VALUE );
@@ -591,31 +914,33 @@ public class RecordManager
}
- private <K, V> Page<K, V> readNode( BTree<K, V> btree, long offset, long revision, int nbElems ) throws IOException
- {
- Page<K, V> node = BTreeFactory.createNode( btree, revision, nbElems );
-
- // Read the rootPage pages on disk
- PageIO[] pageIos = readPageIOs( offset, Long.MAX_VALUE );
-
- return node;
- }
-
-
+ /**
+ * Deserialize a Page from a B-tree at a give position
+ *
+ * @param btree The B-tree we want to read a Page from
+ * @param offset The position in the file for this page
+ * @return The read page
+ * @throws EndOfFileExceededException If we have reached the end of the file while reading the page
+ */
public <K, V> Page<K, V> deserialize( BTree<K, V> btree, long offset ) throws EndOfFileExceededException,
IOException
{
+ checkOffset( offset );
PageIO[] rootPageIos = readPageIOs( offset, Long.MAX_VALUE );
Page<K, V> page = readPage( btree, rootPageIos );
- ( ( AbstractPage<K, V> ) page ).setOffset( rootPageIos[0].getOffset() );
- ( ( AbstractPage<K, V> ) page ).setLastOffset( rootPageIos[rootPageIos.length - 1].getOffset() );
-
return page;
}
+ /**
+ * Read a page from some PageIO for a given B-tree
+ * @param btree The B-tree we want to read a page for
+ * @param pageIos The PageIO containing the raw data
+ * @return The read Page if successful
+ * @throws IOException If the deserialization failed
+ */
private <K, V> Page<K, V> readPage( BTree<K, V> btree, PageIO[] pageIos ) throws IOException
{
// Deserialize the rootPage now
@@ -651,6 +976,12 @@ public class RecordManager
page = readNodeKeysAndValues( btree, -nbElems, revision, byteBuffer, pageIos );
}
+ ( ( AbstractPage<K, V> ) page ).setOffset( pageIos[0].getOffset() );
+ if ( pageIos.length > 1 )
+ {
+ ( ( AbstractPage<K, V> ) page ).setLastOffset( pageIos[pageIos.length - 1].getOffset() );
+ }
+
return page;
}
@@ -659,8 +990,7 @@ public class RecordManager
* Deserialize a Leaf from some PageIOs
*/
private <K, V> PersistedLeaf<K, V> readLeafKeysAndValues( BTree<K, V> btree, int nbElems, long revision,
- ByteBuffer byteBuffer,
- PageIO[] pageIos )
+ ByteBuffer byteBuffer, PageIO[] pageIos )
{
// Its a leaf, create it
PersistedLeaf<K, V> leaf = ( PersistedLeaf<K, V> ) BTreeFactory.createLeaf( btree, revision, nbElems );
@@ -717,8 +1047,7 @@ public class RecordManager
* Deserialize a Node from some PageIos
*/
private <K, V> PersistedNode<K, V> readNodeKeysAndValues( BTree<K, V> btree, int nbElems, long revision,
- ByteBuffer byteBuffer,
- PageIO[] pageIos ) throws IOException
+ ByteBuffer byteBuffer, PageIO[] pageIos ) throws IOException
{
PersistedNode<K, V> node = ( PersistedNode<K, V> ) BTreeFactory.createNode( btree, revision, nbElems );
@@ -726,8 +1055,8 @@ public class RecordManager
for ( int i = 0; i < nbElems; i++ )
{
// This is an Offset
- long offset = OFFSET_SERIALIZER.deserialize( byteBuffer );
- long lastOffset = OFFSET_SERIALIZER.deserialize( byteBuffer );
+ long offset = LongSerializer.INSTANCE.deserialize( byteBuffer );
+ long lastOffset = LongSerializer.INSTANCE.deserialize( byteBuffer );
PersistedPageHolder<K, V> valueHolder = new PersistedPageHolder<K, V>( btree, null, offset, lastOffset );
node.setValue( i, valueHolder );
@@ -747,8 +1076,8 @@ public class RecordManager
}
// and read the last value, as it's a node
- long offset = OFFSET_SERIALIZER.deserialize( byteBuffer );
- long lastOffset = OFFSET_SERIALIZER.deserialize( byteBuffer );
+ long offset = LongSerializer.INSTANCE.deserialize( byteBuffer );
+ long lastOffset = LongSerializer.INSTANCE.deserialize( byteBuffer );
PersistedPageHolder<K, V> valueHolder = new PersistedPageHolder<K, V>( btree, null, offset, lastOffset );
node.setValue( nbElems, valueHolder );
@@ -759,11 +1088,12 @@ public class RecordManager
/**
* Read a byte[] from pages.
+ *
* @param pageIos The pages we want to read the byte[] from
* @param position The position in the data stored in those pages
* @return The byte[] we have read
*/
- private ByteBuffer readBytes( PageIO[] pageIos, long position )
+ /* no qualifier */ ByteBuffer readBytes( PageIO[] pageIos, long position )
{
// Read the byte[] length first
int length = readInt( pageIos, position );
@@ -787,7 +1117,6 @@ public class RecordManager
else
{
ByteBuffer bytes = ByteBuffer.allocate( length );
- int bytesPos = 0;
while ( length > 0 )
{
@@ -814,7 +1143,6 @@ public class RecordManager
pageData.reset();
pageNb++;
pagePos = LINK_SIZE;
- bytesPos += remaining;
pageData = pageIos[pageNb].getData();
length -= remaining;
remaining = pageData.capacity() - pagePos;
@@ -833,7 +1161,7 @@ public class RecordManager
* @param position The position in the data stored in those pages
* @return The int we have read
*/
- private int readInt( PageIO[] pageIos, long position )
+ /* no qualifier */ int readInt( PageIO[] pageIos, long position )
{
// Compute the page in which we will store the data given the
// current position
@@ -923,7 +1251,7 @@ public class RecordManager
* @param position The position in the data stored in those pages
* @return The long we have read
*/
- private long readLong( PageIO[] pageIos, long position )
+ /* no qualifier */ long readLong( PageIO[] pageIos, long position )
{
// Compute the page in which we will store the data given the
// current position
@@ -1014,158 +1342,96 @@ public class RecordManager
/**
- * Manage a BTree. The btree will be added and managed by this RecordManager. We will create a
- * new RootPage for this added BTree, which will contain no data.
- *
- * @param btree The new BTree to manage.
+ * Manage a B-tree. The btree will be added and managed by this RecordManager. We will create a
+ * new RootPage for this added B-tree, which will contain no data.<br/>
+ * This method is threadsafe.
+ *
+ * @param btree The new B-tree to manage.
+ * @throws BTreeAlreadyManagedException if the B-tree is already managed
+ * @throws IOException if there was a problem while accessing the file
*/
public synchronized <K, V> void manage( BTree<K, V> btree ) throws BTreeAlreadyManagedException, IOException
{
+ beginTransaction();
+
manage( ( BTree<Object, Object> ) btree, NORMAL_BTREE );
+
+ commit();
}
/**
- * works the same as @see #manage(BTree) except the given tree will not be linked to top level trees that will be
- * loaded initially if the internalTree flag is set to true
+ * Managing a btree is a matter of storing an reference to the managed B-tree in the B-tree Of B-trees.
+ * We store a tuple of NameRevision (where revision is 0L) and a offset to the B-tree header.
+ * At the same time, we keep a track of the managed B-trees in a Map.
*
- * @param btree The new BTree to manage.
- * @param internalTree flag indicating if this is an internal tree
+ * @param btree The new B-tree to manage.
+ * @param treeType flag indicating if this is an internal tree
*
- * @throws BTreeAlreadyManagedException
+ * @throws BTreeAlreadyManagedException If the B-tree is already managed
* @throws IOException
*/
- public synchronized <K, V> void manage( BTree<K, V> btree, boolean internalTree )
- throws BTreeAlreadyManagedException,
- IOException
+ public synchronized <K, V> void manage( BTree<K, V> btree, boolean treeType )
+ throws BTreeAlreadyManagedException, IOException
{
- LOG.debug( "Managing the btree {} which is an internam tree : {}", btree.getName(), internalTree );
+ LOG.debug( "Managing the btree {} which is an internam tree : {}", btree.getName(), treeType );
BTreeFactory.setRecordManager( btree, this );
String name = btree.getName();
- if ( managedBTrees.containsKey( name ) )
+ if ( managedBtrees.containsKey( name ) )
{
- // There is already a BTree with this name in the recordManager...
- LOG.error( "There is already a BTree named '{}' managed by this recordManager", name );
+ // There is already a B-tree with this name in the recordManager...
+ LOG.error( "There is already a B-tree named '{}' managed by this recordManager", name );
throw new BTreeAlreadyManagedException( name );
}
- // Do not add the BTree if it's internal into the Map of managed btrees, otherwise we will
- // not discard it when reloading a page wth internal btrees
- if ( !internalTree )
- {
- managedBTrees.put( name, ( BTree<Object, Object> ) btree );
- }
-
- // We will add the newly managed BTree at the end of the header.
- byte[] btreeNameBytes = Strings.getBytesUtf8( name );
- byte[] keySerializerBytes = Strings.getBytesUtf8( btree.getKeySerializerFQCN() );
- byte[] valueSerializerBytes = Strings.getBytesUtf8( btree.getValueSerializerFQCN() );
-
- int bufferSize =
- INT_SIZE + // The name size
- btreeNameBytes.length + // The name
- INT_SIZE + // The keySerializerBytes size
- keySerializerBytes.length + // The keySerializerBytes
- INT_SIZE + // The valueSerializerBytes size
- valueSerializerBytes.length + // The valueSerializerBytes
- INT_SIZE + // The page size
- LONG_SIZE + // The revision
- LONG_SIZE + // the number of element
- LONG_SIZE + // the nextBtree offset
- LONG_SIZE + // The root offset
- INT_SIZE; // The allowDuplicates flag
-
- // Get the pageIOs we need to store the data. We may need more than one.
- PageIO[] pageIos = getFreePageIOs( bufferSize );
-
- // Store the BTree Offset into the BTree
- long btreeOffset = pageIos[0].getOffset();
- ( ( PersistedBTree<K, V> ) btree ).setBtreeOffset( btreeOffset );
-
- // Now store the BTree data in the pages :
- // - the BTree revision
- // - the BTree number of elements
- // - The RootPage offset
- // - The next Btree offset
- // - the BTree page size
- // - the BTree name
- // - the keySerializer FQCN
- // - the valueSerializer FQCN
- // - the flags that tell if the dups are allowed
- // Starts at 0
- long position = 0L;
-
- // The BTree current revision
- position = store( position, btree.getRevision(), pageIos );
-
- // The nb elems in the tree
- position = store( position, btree.getNbElems(), pageIos );
+ // Now, write the B-tree informations
+ long btreeInfoOffset = writeBtreeInfo( btree );
+ BTreeHeader<K, V> btreeHeader = ((AbstractBTree<K,V>)btree).getBtreeHeader();
+ ((PersistedBTree<K, V>)btree).setBtreeInfoOffset( btreeInfoOffset );
- // Serialize the BTree root page
- Page<K, V> rootPage = BTreeFactory.getRootPage( btree );
+ // Serialize the B-tree root page
+ Page<K, V> rootPage = btreeHeader.getRootPage();
- PageIO[] rootPageIos = serializePage( btree, btree.getRevision(), rootPage );
+ PageIO[] rootPageIos = serializePage( btree, btreeHeader.getRevision(), rootPage );
// Get the reference on the first page
- PageIO rootPageIo = rootPageIos[0];
-
- // Now, we can inject the BTree rootPage offset into the BTree header
- position = store( position, rootPageIo.getOffset(), pageIos );
- ( ( PersistedBTree<K, V> ) btree ).setRootPageOffset( rootPageIo.getOffset() );
- ( ( PersistedLeaf<K, V> ) rootPage ).setOffset( rootPageIo.getOffset() );
-
- // The next BTree Header offset (-1L, as it's a new BTree)
- position = store( position, NO_PAGE, pageIos );
-
- // The BTree page size
- position = store( position, btree.getPageSize(), pageIos );
+ long rootPageOffset = rootPageIos[0].getOffset();
- // The tree name
- position = store( position, btreeNameBytes, pageIos );
-
- // The keySerializer FQCN
- position = store( position, keySerializerBytes, pageIos );
-
- // The valueSerialier FQCN
- position = store( position, valueSerializerBytes, pageIos );
-
- // The allowDuplicates flag
- position = store( position, ( btree.isAllowDuplicates() ? 1 : 0 ), pageIos );
+ // Store the rootPageOffset into the Btree header and into the rootPage
+ btreeHeader.setRootPageOffset( rootPageOffset );
+ ( ( PersistedLeaf<K, V> ) rootPage ).setOffset( rootPageOffset );
- // And flush the pages to disk now
- LOG.debug( "Flushing the newly managed '{}' btree header", btree.getName() );
- flushPages( pageIos );
LOG.debug( "Flushing the newly managed '{}' btree rootpage", btree.getName() );
flushPages( rootPageIos );
- // Now, if this added BTree is not the first BTree, we have to link it with the
- // latest added BTree
- if ( !internalTree )
- {
- nbBtree++;
+ // And the B-tree header
+ long btreeHeaderOffset = writeBtreeHeader( btree, btreeHeader );
- if ( lastAddedBTreeOffset != NO_PAGE )
- {
- // We have to update the nextBtreeOffset from the previous BTreeHeader
- pageIos = readPageIOs( lastAddedBTreeOffset, LONG_SIZE + LONG_SIZE + LONG_SIZE + LONG_SIZE );
- store( LONG_SIZE + LONG_SIZE + LONG_SIZE, btreeOffset, pageIos );
+ // Now, if this is a new B-tree, add it to the B-tree of B-trees
+ if ( treeType != INTERNAL_BTREE )
+ {
+ // Add the btree into the map of managed B-trees
+ managedBtrees.put( name, ( BTree<Object, Object> ) btree );
+
+ // And in the Map of currentBtreeHeaders and newBtreeHeaders
+ currentBTreeHeaders.put( name, btreeHeader );
+ newBTreeHeaders.put( name, btreeHeader );
- // Write the pages on disk
- LOG.debug( "Updated the previous btree pointer on the added BTree {}", btree.getName() );
- flushPages( pageIos );
- }
+ // We can safely increment the number of managed B-trees
+ nbBtree++;
- lastAddedBTreeOffset = btreeOffset;
+ // Create the new NameRevision
+ NameRevision nameRevision = new NameRevision( name, 0L );
- // Last, not least, update the number of managed BTrees in the header
- updateRecordManagerHeader();
+ // Inject it into the B-tree of B-tree
+ btreeOfBtrees.insert( nameRevision, btreeHeaderOffset );
}
if ( LOG_CHECK.isDebugEnabled() )
{
- check();
+ MavibotInspector.check( this );
}
}
@@ -1352,7 +1618,7 @@ public class RecordManager
/**
- * Serialize a Leaf's Value. We store
+ * Serialize a Leaf's Value.
*/
private <K, V> int serializeLeafValue( PersistedLeaf<K, V> leaf, int pos, List<byte[]> serializedData )
throws IOException
@@ -1403,7 +1669,7 @@ public class RecordManager
serializedData.add( buffer );
dataSize += buffer.length;
- // the BTree offset
+ // the B-tree offset
buffer = LongSerializer.serialize( ( ( PersistedValueHolder<V> ) valueHolder ).getOffset() );
serializedData.add( buffer );
dataSize += buffer.length;
@@ -1464,148 +1730,598 @@ public class RecordManager
/**
- * Update the header, injecting the following data :
+ * Update the RecordManager header, injecting the following data :
+ *
* <pre>
- * +---------------+
- * | PageSize | 4 bytes : The size of a physical page (default to 4096)
- * +---------------+
- * | NbTree | 4 bytes : The number of managed BTrees (at least 1)
- * +---------------+
- * | FirstFree | 8 bytes : The offset of the first free page
- * +---------------+
- * | currentBoB | 1 byte : The current BoB in use
- * +---------------+
- * | BoB offset[0] | 8 bytes : The offset of the first BoB
- * +---------------+
- * | BoB offset[1] | 8 bytes : The offset of the second BoB
- * +---------------+
+ * +---------------------+
+ * | PageSize | 4 bytes : The size of a physical page (default to 4096)
+ * +---------------------+
+ * | NbTree | 4 bytes : The number of managed B-trees (at least 1)
+ * +---------------------+
+ * | FirstFree | 8 bytes : The offset of the first free page
+ * +---------------------+
+ * | current BoB offset | 8 bytes : The offset of the current B-tree of B-trees
+ * +---------------------+
+ * | previous BoB offset | 8 bytes : The offset of the previous B-tree of B-trees
+ * +---------------------+
+ * | current CP offset | 8 bytes : The offset of the current CopiedPages B-tree
+ * +---------------------+
+ * | previous CP offset | 8 bytes : The offset of the previous CopiedPages B-tree
+ * +---------------------+
* </pre>
*/
- public void updateRecordManagerHeader() throws IOException
+ public void updateRecordManagerHeader()
{
// The page size
- HEADER_BYTES[0] = ( byte ) ( pageSize >>> 24 );
- HEADER_BYTES[1] = ( byte ) ( pageSize >>> 16 );
- HEADER_BYTES[2] = ( byte ) ( pageSize >>> 8 );
- HEADER_BYTES[3] = ( byte ) ( pageSize );
-
- // The number of managed BTree (currently we have only one : the discardedPage BTree
- HEADER_BYTES[4] = ( byte ) ( nbBtree >>> 24 );
- HEADER_BYTES[5] = ( byte ) ( nbBtree >>> 16 );
- HEADER_BYTES[6] = ( byte ) ( nbBtree >>> 8 );
- HEADER_BYTES[7] = ( byte ) ( nbBtree );
+ int position = writeData( RECORD_MANAGER_HEADER_BYTES, 0, pageSize );
+
+ // The number of managed B-tree
+ position = writeData( RECORD_MANAGER_HEADER_BYTES, position, nbBtree );
// The first free page
- HEADER_BYTES[8] = ( byte ) ( firstFreePage >>> 56 );
- HEADER_BYTES[9] = ( byte ) ( firstFreePage >>> 48 );
- HEADER_BYTES[10] = ( byte ) ( firstFreePage >>> 40 );
- HEADER_BYTES[11] = ( byte ) ( firstFreePage >>> 32 );
- HEADER_BYTES[12] = ( byte ) ( firstFreePage >>> 24 );
- HEADER_BYTES[13] = ( byte ) ( firstFreePage >>> 16 );
- HEADER_BYTES[14] = ( byte ) ( firstFreePage >>> 8 );
- HEADER_BYTES[15] = ( byte ) ( firstFreePage );
-
- // The offset of the first BoB
- HEADER_BYTES[17] = ( byte ) ( bobOldRevision >>> 56 );
- HEADER_BYTES[18] = ( byte ) ( bobOldRevision >>> 48 );
- HEADER_BYTES[19] = ( byte ) ( bobOldRevision >>> 40 );
- HEADER_BYTES[20] = ( byte ) ( bobOldRevision >>> 32 );
- HEADER_BYTES[21] = ( byte ) ( bobOldRevision >>> 24 );
- HEADER_BYTES[22] = ( byte ) ( bobOldRevision >>> 16 );
- HEADER_BYTES[23] = ( byte ) ( bobOldRevision >>> 8 );
- HEADER_BYTES[24] = ( byte ) ( bobOldRevision );
-
- // The offset of the second BoB
- HEADER_BYTES[17] = ( byte ) ( bobCurrentRevision >>> 56 );
- HEADER_BYTES[18] = ( byte ) ( bobCurrentRevision >>> 48 );
- HEADER_BYTES[19] = ( byte ) ( bobCurrentRevision >>> 40 );
- HEADER_BYTES[20] = ( byte ) ( bobCurrentRevision >>> 32 );
- HEADER_BYTES[21] = ( byte ) ( bobCurrentRevision >>> 24 );
- HEADER_BYTES[22] = ( byte ) ( bobCurrentRevision >>> 16 );
- HEADER_BYTES[23] = ( byte ) ( bobCurrentRevision >>> 8 );
- HEADER_BYTES[24] = ( byte ) ( bobCurrentRevision );
-
- // Write the header on disk
- HEADER_BUFFER.put( HEADER_BYTES );
- HEADER_BUFFER.flip();
-
- LOG.debug( "Update RM header, FF : {}", firstFreePage );
- fileChannel.write( HEADER_BUFFER, 0 );
- HEADER_BUFFER.clear();
+ position = writeData( RECORD_MANAGER_HEADER_BYTES, position, firstFreePage );
- nbUpdateRMHeader.incrementAndGet();
- }
+ // The offset of the current B-tree of B-trees
+ position = writeData( RECORD_MANAGER_HEADER_BYTES, position, currentBtreeOfBtreesOffset );
+ // The offset of the copied pages B-tree
+ position = writeData( RECORD_MANAGER_HEADER_BYTES, position, previousBtreeOfBtreesOffset );
- /**
- * Update the BTree header after a BTree modification. This will make the latest modification
- * visible.
- * We update the following fields :
- * <ul>
- * <li>the revision</li>
- * <li>the number of elements</li>
- * <li>the reference to the current BTree revisions</li>
- * <li>the reference to the old BTree revisions</li>
- * </ul>
- * @param btree
- * @throws IOException
- * @throws EndOfFileExceededException
- */
- /* No qualifier*/<K, V> void updateBtreeHeader( BTree<K, V> btree, long rootPageOffset )
- throws EndOfFileExceededException,
- IOException
- {
- // Read the pageIOs associated with this BTree
- long offset = ( ( PersistedBTree<K, V> ) btree ).getBtreeOffset();
- long headerSize = LONG_SIZE + LONG_SIZE + LONG_SIZE;
+ // The offset of the current B-tree of B-trees
+ position = writeData( RECORD_MANAGER_HEADER_BYTES, position, currentCopiedPagesBtreeOffset );
- PageIO[] pageIos = readPageIOs( offset, headerSize );
+ // The offset of the copied pages B-tree
+ position = writeData( RECORD_MANAGER_HEADER_BYTES, position, previousCopiedPagesBtreeOffset );
- // Now, update the revision
- long position = 0;
+ // Write the RecordManager header on disk
+ RECORD_MANAGER_HEADER_BUFFER.put( RECORD_MANAGER_HEADER_BYTES );
+ RECORD_MANAGER_HEADER_BUFFER.flip();
- position = store( position, btree.getRevision(), pageIos );
- position = store( position, btree.getNbElems(), pageIos );
- position = store( position, rootPageOffset, pageIos );
+ LOG.debug( "Update RM header" );
- // Write the pages on disk
- if ( LOG.isDebugEnabled() )
+ if ( LOG_PAGES.isDebugEnabled() )
{
- LOG.debug( "-----> Flushing the '{}' BTreeHeader", btree.getName() );
- LOG.debug( " revision : " + btree.getRevision() + ", NbElems : " + btree.getNbElems() + ", root offset : "
- + rootPageOffset );
- }
+ StringBuilder sb = new StringBuilder();
- flushPages( pageIos );
+ sb.append( "First free page : 0x" ).append( Long.toHexString( firstFreePage ) ).append( "\n" );
+ sb.append( "Current BOB header : 0x" ).append( Long.toHexString( currentBtreeOfBtreesOffset ) ).append( "\n" );
+ sb.append( "Previous BOB header : 0x" ).append( Long.toHexString( previousBtreeOfBtreesOffset ) ).append( "\n" );
+ sb.append( "Current CPB header : 0x" ).append( Long.toHexString( currentCopiedPagesBtreeOffset ) ).append( "\n" );
+ sb.append( "Previous CPB header : 0x" ).append( Long.toHexString( previousCopiedPagesBtreeOffset ) ).append( "\n" );
- nbUpdateBTreeHeader.incrementAndGet();
+ if ( firstFreePage != NO_PAGE )
+ {
+ long freePage = firstFreePage;
+ sb.append( "free pages list : " );
- if ( LOG_CHECK.isDebugEnabled() )
- {
- check();
- }
- }
+ boolean isFirst = true;
+ while ( freePage != NO_PAGE )
+ {
+ if ( isFirst )
+ {
+ isFirst = false;
+ }
+ else
+ {
+ sb.append( " -> " );
+ }
- /**
- * Write the pages in the disk, either at the end of the file, or at
- * the position they were taken from.
- *
- * @param pageIos The list of pages to write
- * @throws IOException If the write failed
- */
- private void flushPages( PageIO... pageIos ) throws IOException
- {
- if ( LOG.isDebugEnabled() )
- {
- for ( PageIO pageIo : pageIos )
- {
- dump( pageIo );
- }
- }
+ sb.append( "0x" ).append( Long.toHexString( freePage ) );
- for ( PageIO pageIo : pageIos )
- {
+ try
+ {
+ PageIO[] freePageIO = readPageIOs( freePage, 8 );
+
+ freePage = freePageIO[0].getNextPage();
+ }
+ catch ( EndOfFileExceededException e )
+ {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
+ catch ( IOException e )
+ {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
+ }
+
+ }
+
+ LOG_PAGES.debug( "Update RM Header : \n{}", sb.toString() );
+ }
+
+ try
+ {
+ fileChannel.write( RECORD_MANAGER_HEADER_BUFFER, 0 );
+ }
+ catch ( IOException ioe )
+ {
+ throw new FileException( ioe.getMessage() );
+ }
+
+ RECORD_MANAGER_HEADER_BUFFER.clear();
+
+ // Reset the old versions
+ previousBtreeOfBtreesOffset = -1L;
+ previousCopiedPagesBtreeOffset = -1L;
+
+ nbUpdateRMHeader.incrementAndGet();
+ }
+
+
+ /**
+ * Update the RecordManager header, injecting the following data :
+ *
+ * <pre>
+ * +---------------------+
+ * | PageSize | 4 bytes : The size of a physical page (default to 4096)
+ * +---------------------+
+ * | NbTree | 4 bytes : The number of managed B-trees (at least 1)
+ * +---------------------+
+ * | FirstFree | 8 bytes : The offset of the first free page
+ * +---------------------+
+ * | current BoB offset | 8 bytes : The offset of the current B-tree of B-trees
+ * +---------------------+
+ * | previous BoB offset | 8 bytes : The offset of the previous B-tree of B-trees
+ * +---------------------+
+ * | current CP offset | 8 bytes : The offset of the current CopiedPages B-tree
+ * +---------------------+
+ * | previous CP offset | 8 bytes : The offset of the previous CopiedPages B-tree
+ * +---------------------+
+ * </pre>
+ */
+ public void updateRecordManagerHeader( long newBtreeOfBtreesOffset, long newCopiedPageBtreeOffset )
+ {
+ if ( newBtreeOfBtreesOffset != -1L )
+ {
+ previousBtreeOfBtreesOffset = currentBtreeOfBtreesOffset;
+ currentBtreeOfBtreesOffset = newBtreeOfBtreesOffset;
+ }
+
+ if ( newCopiedPageBtreeOffset != -1L )
+ {
+ previousCopiedPagesBtreeOffset = currentCopiedPagesBtreeOffset;
+ currentCopiedPagesBtreeOffset = newCopiedPageBtreeOffset;
+ }
+ }
+
+
+ /**
+ * Inject an int into a byte[] at a given position.
+ */
+ private int writeData( byte[] buffer, int position, int value )
+ {
+ RECORD_MANAGER_HEADER_BYTES[position] = ( byte ) ( value >>> 24 );
+ RECORD_MANAGER_HEADER_BYTES[position+1] = ( byte ) ( value >>> 16 );
+ RECORD_MANAGER_HEADER_BYTES[position+2] = ( byte ) ( value >>> 8 );
+ RECORD_MANAGER_HEADER_BYTES[position+3] = ( byte ) ( value );
+
+ return position + 4;
+ }
+
+
+ /**
+ * Inject a long into a byte[] at a given position.
+ */
+ private int writeData( byte[] buffer, int position, long value )
+ {
+ RECORD_MANAGER_HEADER_BYTES[position] = ( byte ) ( value >>> 56 );
+ RECORD_MANAGER_HEADER_BYTES[position+1] = ( byte ) ( value >>> 48 );
+ RECORD_MANAGER_HEADER_BYTES[position+2] = ( byte ) ( value >>> 40 );
+ RECORD_MANAGER_HEADER_BYTES[position+3] = ( byte ) ( value >>> 32 );
+ RECORD_MANAGER_HEADER_BYTES[position+4] = ( byte ) ( value >>> 24 );
+ RECORD_MANAGER_HEADER_BYTES[position+5] = ( byte ) ( value >>> 16 );
+ RECORD_MANAGER_HEADER_BYTES[position+6] = ( byte ) ( value >>> 8 );
+ RECORD_MANAGER_HEADER_BYTES[position+7] = ( byte ) ( value );
+
+ return position + 8;
+ }
+
+
+ /**
+ * Add a new <btree, revision> tuple into the B-tree of B-trees.
+ *
+ * @param name The B-tree name
+ * @param revision The B-tree revision
+ * @param btreeHeaderOffset The B-tree offset
+ * @throws IOException If the update failed
+ */
+ /* no qualifier */ <K, V> void addInBtreeOfBtrees( String name, long revision, long btreeHeaderOffset ) throws IOException
+ {
+ checkOffset( btreeHeaderOffset );
+ NameRevision nameRevision = new NameRevision( name, revision );
+
+ btreeOfBtrees.insert( nameRevision, btreeHeaderOffset );
+
+ // Update the B-tree of B-trees offset
+ currentBtreeOfBtreesOffset = getBTreeHeader( BTREE_OF_BTREES_NAME ).getBTreeHeaderOffset();
+ }
+
+
+ /**
+ * Add a new <btree, revision> tuple into the CopiedPages B-tree.
+ *
+ * @param name The B-tree name
+ * @param revision The B-tree revision
+ * @param btreeHeaderOffset The B-tree offset
+ * @throws IOException If the update failed
+ */
+ /* no qualifier */ <K, V> void addInCopiedPagesBtree( String name, long revision, List<Page<K, V>> pages ) throws IOException
+ {
+ RevisionName revisionName = new RevisionName( revision, name );
+
+ long[] pageOffsets = new long[pages.size()];
+ int pos = 0;
+
+ for ( Page<K, V> page : pages )
+ {
+ pageOffsets[pos++] = ((AbstractPage<K, V>)page).getOffset();
+ }
+
+ copiedPageBtree.insert( revisionName, pageOffsets );
+
+ // Update the CopiedPageBtree offset
+ currentCopiedPagesBtreeOffset = ((AbstractBTree<RevisionName, long[]>)copiedPageBtree).getBtreeHeader().getBTreeHeaderOffset();
+ }
+
+
+ /**
+ * Internal method used to update the B-tree of B-trees offset
+ * @param btreeOfBtreesOffset The new offset
+ */
+ /* no qualifier */ void setBtreeOfBtreesOffset( long btreeOfBtreesOffset )
+ {
+ checkOffset( btreeOfBtreesOffset );
+ this.currentBtreeOfBtreesOffset = btreeOfBtreesOffset;
+ }
+
+
+ /**
+ * Write the B-tree header on disk. We will write the following informations :
+ * <pre>
+ * +------------+
+ * | revision | The B-tree revision
+ * +------------+
+ * | nbElems | The B-tree number of elements
+ * +------------+
+ * | rootPage | The root page offset
+ * +------------+
+ * | BtreeInfo | The B-tree info offset
+ * +------------+
+ * </pre>
+ * @param btree The B-tree which header has to be written
+ * @param btreeInfoOffset The offset of the B-tree informations
+ * @return The B-tree header offset
+ * @throws IOException If we weren't able to write the B-tree header
+ */
+ /* no qualifier */ <K, V> long writeBtreeHeader( BTree<K, V> btree, BTreeHeader<K, V> btreeHeader ) throws IOException
+ {
+ int bufferSize =
+ LONG_SIZE + // The revision
+ LONG_SIZE + // the number of element
+ LONG_SIZE + // The root page offset
+ LONG_SIZE; // The B-tree info page offset
+
+ // Get the pageIOs we need to store the data. We may need more than one.
+ PageIO[] btreeHeaderPageIos = getFreePageIOs( bufferSize );
+
+ // Store the B-tree header Offset into the B-tree
+ long btreeHeaderOffset = btreeHeaderPageIos[0].getOffset();
+
+ // Now store the B-tree data in the pages :
+ // - the B-tree revision
+ // - the B-tree number of elements
+ // - the B-tree root page offset
+ // - the B-tree info page offset
+ // Starts at 0
+ long position = 0L;
+
+ // The B-tree current revision
+ position = store( position, btreeHeader.getRevision(), btreeHeaderPageIos );
+
+ // The nb elems in the tree
+ position = store( position, btreeHeader.getNbElems(), btreeHeaderPageIos );
+
+
+ // Now, we can inject the B-tree rootPage offset into the B-tree header
+ position = store( position, btreeHeader.getRootPageOffset(), btreeHeaderPageIos );
+
+ // The B-tree info page offset
+ position = store( position, ((PersistedBTree<K, V>)btree).getBtreeInfoOffset(), btreeHeaderPageIos );
+
+ // And flush the pages to disk now
+ LOG.debug( "Flushing the newly managed '{}' btree header", btree.getName() );
+
+ if ( LOG_PAGES.isDebugEnabled() )
+ {
+ LOG_PAGES.debug( "Writing BTreeHeader revision {} for {}", btreeHeader.getRevision(), btree.getName() );
+ StringBuilder sb = new StringBuilder();
+
+ sb.append( "Offset : " ).append( Long.toHexString( btreeHeaderOffset ) ).append( "\n" );
+ sb.append( " Revision : " ).append( btreeHeader.getRevision() ).append( "\n" );
+ sb.append( " NbElems : " ).append( btreeHeader.getNbElems() ).append( "\n" );
+ sb.append( " RootPage : 0x" ).append( Long.toHexString( btreeHeader.getRootPageOffset() ) ).append( "\n" );
+ sb.append( " Info : 0x" ).append( Long.toHexString( ((PersistedBTree<K, V>)btree).getBtreeInfoOffset() ) ).append( "\n" );
+
+ LOG_PAGES.debug( "Btree Header[{}]\n{}", btreeHeader.getRevision(), sb.toString() );
+ }
+
+ flushPages( btreeHeaderPageIos );
+
+ btreeHeader.setBTreeHeaderOffset( btreeHeaderOffset );
+
+ return btreeHeaderOffset;
+ }
+
+
+ /**
+ * Write the B-tree informations on disk. We will write the following informations :
+ * <pre>
+ * +------------+
+ * | pageSize | The B-tree page size (ie, the number of elements per page max)
+ * +------------+
+ * | nameSize | The B-tree name size
+ * +------------+
+ * | name | The B-tree name
+ * +------------+
+ * | keySerSize | The keySerializer FQCN size
+ * +------------+
+ * | keySerFQCN | The keySerializer FQCN
+ * +------------+
+ * | valSerSize | The Value serializer FQCN size
+ * +------------+
+ * | valSerKQCN | The valueSerializer FQCN
+ * +------------+
+ * | dups | The flags that tell if the dups are allowed
+ * +------------+
+ * </pre>
+ * @param btree The B-tree which header has to be written
+ * @return The B-tree header offset
+ * @throws IOException If we weren't able to write the B-tree header
+ */
+ private <K, V> long writeBtreeInfo( BTree<K, V> btree ) throws IOException
+ {
+ // We will add the newly managed B-tree at the end of the header.
+ byte[] btreeNameBytes = Strings.getBytesUtf8( btree.getName() );
+ byte[] keySerializerBytes = Strings.getBytesUtf8( btree.getKeySerializerFQCN() );
+ byte[] valueSerializerBytes = Strings.getBytesUtf8( btree.getValueSerializerFQCN() );
+
+ int bufferSize =
+ INT_SIZE + // The page size
+ INT_SIZE + // The name size
+ btreeNameBytes.length + // The name
+ INT_SIZE + // The keySerializerBytes size
+ keySerializerBytes.length + // The keySerializerBytes
+ INT_SIZE + // The valueSerializerBytes size
+ valueSerializerBytes.length + // The valueSerializerBytes
+ INT_SIZE; // The allowDuplicates flag
+
+ // Get the pageIOs we need to store the data. We may need more than one.
+ PageIO[] btreeHeaderPageIos = getFreePageIOs( bufferSize );
+
+ // Keep the B-tree header Offset into the B-tree
+ long btreeInfoOffset = btreeHeaderPageIos[0].getOffset();
+
+ // Now store the B-tree information data in the pages :
+ // - the B-tree page size
+ // - the B-tree name
+ // - the keySerializer FQCN
+ // - the valueSerializer FQCN
+ // - the flags that tell if the dups are allowed
+ // Starts at 0
+ long position = 0L;
+
+ // The B-tree page size
+ position = store( position, btree.getPageSize(), btreeHeaderPageIos );
+
+ // The tree name
+ position = store( position, btreeNameBytes, btreeHeaderPageIos );
+
+ // The keySerializer FQCN
+ position = store( position, keySerializerBytes, btreeHeaderPageIos );
+
+ // The valueSerialier FQCN
+ position = store( position, valueSerializerBytes, btreeHeaderPageIos );
+
+ // The allowDuplicates flag
+ position = store( position, ( btree.isAllowDuplicates() ? 1 : 0 ), btreeHeaderPageIos );
+
+ // And flush the pages to disk now
+ LOG.debug( "Flushing the newly managed '{}' btree header", btree.getName() );
+ flushPages( btreeHeaderPageIos );
+
+ return btreeInfoOffset;
+ }
+
+
+ /**
+ * Update the B-tree header after a B-tree modification. This will make the latest modification
+ * visible.<br/>
+ * We update the following fields :
+ * <ul>
+ * <li>the revision</li>
+ * <li>the number of elements</li>
+ * <li>the B-tree root page offset</li>
+ * </ul>
+ * <br/>
+ * As a result, a new version of the BtreHeader will be created, which will replace the previous
+ * B-tree header
+ * @param btree TheB-tree to update
+ * @param btreeHeaderOffset The offset of the modified btree header
+ * @return The offset of the new B-tree Header
+ * @throws IOException If we weren't able to write the file on disk
+ * @throws EndOfFileExceededException If we tried to write after the end of the file
+ */
+ /* no qualifier */ <K, V> long updateBtreeHeader( BTree<K, V> btree, long btreeHeaderOffset )
+ throws EndOfFileExceededException, IOException
+ {
+ return updateBtreeHeader( btree, btreeHeaderOffset, false );
+ }
+
+
+ /**
+ * Update the B-tree header after a B-tree modification. This will make the latest modification
+ * visible.<br/>
+ * We update the following fields :
+ * <ul>
+ * <li>the revision</li>
+ * <li>the number of elements</li>
+ * <li>the reference to the current B-tree revisions</li>
+ * <li>the reference to the old B-tree revisions</li>
+ * </ul>
+ * <br/>
+ * As a result, we new version of the BtreHeader will be created
+ * @param btree The B-tree to update
+ * @param btreeHeaderOffset The offset of the modified btree header
+ * @return The offset of the new B-tree Header if it has changed (ie, when the onPlace flag is set to true)
+ * @throws IOException
+ * @throws EndOfFileExceededException
+ */
+ /* no qualifier */ <K, V> void updateBtreeHeaderOnPlace( BTree<K, V> btree, long btreeHeaderOffset )
+ throws EndOfFileExceededException,
+ IOException
+ {
+ updateBtreeHeader( btree, btreeHeaderOffset, true );
+ }
+
+
+ /**
+ * Update the B-tree header after a B-tree modification. This will make the latest modification
+ * visible.<br/>
+ * We update the following fields :
+ * <ul>
+ * <li>the revision</li>
+ * <li>the number of elements</li>
+ * <li>the reference to the current B-tree revisions</li>
+ * <li>the reference to the old B-tree revisions</li>
+ * </ul>
+ * <br/>
+ * As a result, a new version of the BtreHeader will be created, which may replace the previous
+ * B-tree header (if the onPlace flag is set to true) or a new set of pageIos will contain the new
+ * version.
+ *
+ * @param btree The B-tree to update
+ * @param rootPageOffset The offset of the modified rootPage
+ * @param onPlace Tells if we modify the B-tree on place, or if we create a copy
+ * @return The offset of the new B-tree Header if it has changed (ie, when the onPlace flag is set to true)
+ * @throws EndOfFileExceededException If we tried to write after the end of the file
+ * @throws IOException If tehre were some error while writing the data on disk
+ */
+ private <K, V> long updateBtreeHeader( BTree<K, V> btree, long btreeHeaderOffset, boolean onPlace )
+ throws EndOfFileExceededException, IOException
+ {
+ // Read the pageIOs associated with this B-tree
+ PageIO[] pageIos;
+ long newBtreeHeaderOffset = NO_PAGE;
+ long offset = ( ( PersistedBTree<K, V> ) btree ).getBtreeOffset();
+
+ if ( onPlace )
+ {
+ // We just have to update the existing BTreeHeader
+ long headerSize = LONG_SIZE + LONG_SIZE + LONG_SIZE;
+
+ pageIos = readPageIOs( offset, headerSize );
+
+ // Now, update the revision
+ long position = 0;
+
+ position = store( position, btree.getRevision(), pageIos );
+ position = store( position, btree.getNbElems(), pageIos );
+ position = store( position, btreeHeaderOffset, pageIos );
+
+ // Write the pages on disk
+ if ( LOG.isDebugEnabled() )
+ {
+ LOG.debug( "-----> Flushing the '{}' B-treeHeader", btree.getName() );
+ LOG.debug( " revision : " + btree.getRevision() + ", NbElems : " + btree.getNbElems() + ", btreeHeader offset : 0x"
+ + Long.toHexString( btreeHeaderOffset ) );
+ }
+
+ // Get new place on disk to store the modified BTreeHeader if it's not onPlace
+ // Rewrite the pages at the same place
+ LOG.debug( "Rewriting the B-treeHeader on place for B-tree " + btree.getName() );
+ flushPages( pageIos );
+ }
+ else
+ {
+ // We have to read and copy the existing BTreeHeader and to create a new one
+ pageIos = readPageIOs( offset, Long.MAX_VALUE );
+
+ // Now, copy every read page
+ PageIO[] newPageIOs = new PageIO[pageIos.length];
+ int pos = 0;
+
+ for ( PageIO pageIo : pageIos )
+ {
+ // Fetch a free page
+ newPageIOs[pos] = fetchNewPage();
+
+ // keep a track of the allocated and copied pages so that we can
+ // free them when we do a commit or rollback, if the btree is an management one
+ if ( ( btree.getType() == BTreeTypeEnum.BTREE_OF_BTREES ) || ( btree.getType() == BTreeTypeEnum.COPIED_PAGES_BTREE ) )
+ {
+ freedPages.add( pageIo );
+ allocatedPages.add( newPageIOs[pos] );
+ }
+
+ pageIo.copy( newPageIOs[pos] );
+
+ if ( pos > 0 )
+ {
+ newPageIOs[pos - 1].setNextPage( newPageIOs[pos].getOffset() );
+ }
+
+ pos++;
+ }
+
+ // store the new btree header offset
+ // and update the revision
+ long position = 0;
+
+ position = store( position, btree.getRevision(), newPageIOs );
+ position = store( position, btree.getNbElems(), newPageIOs );
+ position = store( position, btreeHeaderOffset, newPageIOs );
+
+ // Get new place on disk to store the modified BTreeHeader if it's not onPlace
+ // Flush the new B-treeHeader on disk
+ LOG.debug( "Rewriting the B-treeHeader on place for B-tree " + btree.getName() );
+ flushPages( newPageIOs );
+
+ newBtreeHeaderOffset = newPageIOs[0].getOffset();
+ }
+
+ nbUpdateBtreeHeader.incrementAndGet();
+
+ if ( LOG_CHECK.isDebugEnabled() )
+ {
+ MavibotInspector.check( this );
+ }
+
+ return newBtreeHeaderOffset;
+ }
+
+
+ /**
+ * Write the pages on disk, either at the end of the file, or at
+ * the position they were taken from.
+ *
+ * @param pageIos The list of pages to write
+ * @throws IOException If the write failed
+ */
+ private void flushPages( PageIO... pageIos ) throws IOException
+ {
+ if ( LOG.isDebugEnabled() )
+ {
+ for ( PageIO pageIo : pageIos )
+ {
+ dump( pageIo );
+ }
+ }
+
+ for ( PageIO pageIo : pageIos )
+ {
pageIo.getData().rewind();
if ( fileChannel.size() < ( pageIo.getOffset() + pageSize ) )
@@ -1660,7 +2376,7 @@ public class RecordManager
* @param position The position in a virtual byte[] if all the pages were contiguous
* @param bytes The byte[] to serialize
* @param pageIos The pageIOs we have to store the data in
- * @return The new position
+ * @return The new offset
*/
private long store( long position, byte[] bytes, PageIO... pageIos )
{
@@ -1729,7 +2445,7 @@ public class RecordManager
* @param position The position in a virtual byte[] if all the pages were contiguous
* @param bytes The byte[] to serialize
* @param pageIos The pageIOs we have to store the data in
- * @return The new position
+ * @return The new offset
*/
private long storeRaw( long position, byte[] bytes, PageIO... pageIos )
{
@@ -1801,7 +2517,7 @@ public class RecordManager
* @param position The position in a virtual byte[] if all the pages were contiguous
* @param value The int to serialize
* @param pageIos The pageIOs we have to store the data in
- * @return The new position
+ * @return The new offset
*/
private long store( long position, int value, PageIO... pageIos )
{
@@ -1876,7 +2592,7 @@ public class RecordManager
* @param position The position in a virtual byte[] if all the pages were contiguous
* @param value The long to serialize
* @param pageIos The pageIOs we have to store the data in
- * @return The new position
+ * @return The new offset
*/
private long store( long position, long value, PageIO... pageIos )
{
@@ -1977,10 +2693,9 @@ public class RecordManager
/**
- * Stores a new page on disk. We will add the modified page into the tree of copied pages.
- * The new page is serialized and saved on disk.
+ * Write the page in a serialized form.
*
- * @param btree The persistedBTree we will create a new PageHolder for
+ * @param btree The persistedBtree we will create a new PageHolder for
* @param newPage The page to write on disk
* @param newRevision The page's revision
* @return A PageHolder containing the copied page
@@ -1992,7 +2707,12 @@ public class RecordManager
// We first need to save the new page on disk
PageIO[] pageIos = serializePage( btree, newRevision, newPage );
- LOG.debug( "Write data for '{}' btree ", btree.getName() );
+ if ( LOG_PAGES.isDebugEnabled() )
+ {
+ LOG_PAGES.debug( "Write data for '{}' btree", btree.getName() );
+
+ logPageIos( pageIos );
+ }
// Write the page on disk
flushPages( pageIos );
@@ -2005,20 +2725,76 @@ public class RecordManager
if ( LOG_CHECK.isDebugEnabled() )
{
- check();
+ MavibotInspector.check( this );
}
[... 1360 lines stripped ...]