You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2017/10/03 19:34:21 UTC

[35/65] [abbrv] jena git commit: JENA-1397: Rename java packages

http://git-wip-us.apache.org/repos/asf/jena/blob/3d456654/jena-db/jena-dboe-trans-data/src/main/java/org/apache/jena/dboe/trans/bplustree/BPTreeNode.java
----------------------------------------------------------------------
diff --git a/jena-db/jena-dboe-trans-data/src/main/java/org/apache/jena/dboe/trans/bplustree/BPTreeNode.java b/jena-db/jena-dboe-trans-data/src/main/java/org/apache/jena/dboe/trans/bplustree/BPTreeNode.java
new file mode 100644
index 0000000..a9a9fa5
--- /dev/null
+++ b/jena-db/jena-dboe-trans-data/src/main/java/org/apache/jena/dboe/trans/bplustree/BPTreeNode.java
@@ -0,0 +1,1508 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.dboe.trans.bplustree;
+
+import static org.apache.jena.dboe.base.record.Record.keyGT;
+import static org.apache.jena.dboe.base.record.Record.keyLT;
+import static org.apache.jena.dboe.base.record.Record.keyNE;
+import static org.apache.jena.dboe.trans.bplustree.BPT.*;
+
+import java.util.ArrayList ;
+import java.util.Iterator ;
+import java.util.List ;
+
+import org.apache.jena.atlas.io.IndentedLineBuffer ;
+import org.apache.jena.atlas.io.IndentedWriter ;
+import org.apache.jena.dboe.base.block.Block;
+import org.apache.jena.dboe.base.block.BlockMgr;
+import org.apache.jena.dboe.base.buffer.PtrBuffer;
+import org.apache.jena.dboe.base.buffer.RecordBuffer;
+import org.apache.jena.dboe.base.page.PageBlockMgr;
+import org.apache.jena.dboe.base.record.Record;
+import org.apache.jena.dboe.sys.SystemIndex;
+import org.slf4j.Logger ;
+import org.slf4j.LoggerFactory ;
+
+public final class BPTreeNode extends BPTreePage
+{
+    private static Logger log = LoggerFactory.getLogger(BPTreeNode.class) ;
+    @Override protected Logger getLogger() { return log ; }
+   
+    /*package*/ Block block ;
+    /** Page id.
+     *  Pages are addressed by ints (a page ref does in on-disk blocks)
+     *  although blocks are addressed in longs.
+     *  1k pages => 2Tbyte file limit.
+     */
+    /*package*/int id ;             // Or block.getId()            
+    
+    private int parent ;
+    private int count ;             // Number of records.  Number of pointers is +1
+    
+    // "Leaf" of the BPTree is the lowest level of ptr/key splits, not the data blocks.
+    // We need to know this to know which block manager the block pointers refer to.
+    private boolean isLeaf ;        
+
+    private RecordBuffer records ;
+    /*package*/ void setRecordBuffer(RecordBuffer r) { records = r ; }
+    
+    // Accessed by BPlusTreeFactory
+    /*package*/ PtrBuffer ptrs ;
+    /*package*/ void setPtrBuffer(PtrBuffer pb) { ptrs = pb ; }
+
+    /* B+Tree
+     * 
+     * Two block managers : 
+     *   one for Nodes (BPlusTreePages => BPlusTreeNode)
+     *   one for Leaves (RecordBufferPages)
+     * The split key is the held in the highest in the block  
+     * 
+     * A "leaf" node is a leaf of the B+Tree part, and points to 
+     * highest record in a RecordBuffer 
+     *
+     * Example for N = 2
+     *  2*N:      MaxRec = 4, MaxPtr = 5,
+     *  Max-1:    HighRec = 3, HighPtr = 4
+     *  N-1:      MinRec = 1, MinPtr = 2
+     *  
+     * which is why a tree of order <2 does not make sense. 
+     *  
+     *
+     * BPTreeNode:
+     * 
+     *      +------------------------+
+     *      |-| K0 | K1 | K2 | K3 |--|
+     *      +------------------------+
+     *      | P0 | P1 | P2 | P3 | P4 |
+     *      +------------------------+
+     *
+     *      +------------------------+
+     *      | | K0 | K1 | ** | ** |--|
+     *      +------------------------+
+     *      | P0 | P1 | P2 | ** | ** |
+     *      +------------------------+
+     *      
+     * BPTreeRecords -> RecordBuffer:
+     *      
+     *      +------------------------+
+     *      | K0 | K1 | K2 | ** | ** |
+     *      +------------------------+
+     *      
+     * The size of records blocks and size of tree nodes don't have to be the same.
+     * They use different page managers, and are in different files.  
+     *
+     * The minimal tree is one, leaf, root BPTreeNode and one BPTreeRecords page.
+     * 
+     * Pictures:      
+     *      /--\ \--\
+     * means a block with free space introduced between records[i] and records[i+1], ptrs[i+1]/ptrs[i+2]
+     * Lower half is a valid structure (except for overall size) 
+     *       
+     *      /--/ /--\
+     * means a block with free space introduced between records[i] and records[i+1], ptrs[i]/ptrs[i+1]
+     * Upper half is a valid structure (except for overall size) 
+     */
+
+    // Branch nodes only need create branch nodes (splitting sideways)
+    // Leaf nodes only create leaf nodes.
+    // The root is an exception.
+    
+    private BPTreeNode create(int parent, boolean isLeaf) {
+        return create(bpTree, parent, isLeaf) ;
+    }
+
+    private static BPTreeNode create(BPlusTree bpTree, int parent, boolean isLeaf) {
+        BPTreeNode n = bpTree.getNodeManager().createNode(parent) ;
+        n.isLeaf = isLeaf ;
+        return n ;
+    }
+
+    /*package*/ BPTreeNode(BPlusTree bpTree) {
+        super(bpTree) ;
+        // Other set by BPTreeNodeMgr.formatBPTreeNode 
+    }
+    
+    // ---- [[TXN]] ** work for transactions.
+    void checkTxn() {}
+    void checkWriteTxn() {}
+
+//    static BPTreeNode xensureModifiableRoot(BPTreeNode root, Object state) {
+//        BPTreeNode root2 = promote1(root, root, NO_ID)(null, root) ;
+//        if ( root != root2 && root.getId() != root2.getId() ) {
+//            System.err.println("Cloned root") ;
+//            if ( state == null )
+//                System.err.println("... no state") ;
+//            // [[TXN]] ** Root clone
+//            // get state and update root.
+//        }
+//        return root2 ;
+//    }
+
+    // ----
+    
+    @Override
+    public void reset(Block block) {
+        this.block = block ;
+        // reformat block (sets record and pointer buffers)
+        BPTreeNodeMgr.formatBPTreeNode(this, bpTree, block, isLeaf, parent, count) ;
+    }
+    
+    /** Get the page at slot idx - switch between B+Tree and records files */
+    /*package*/ BPTreePage get(int idx) {
+        if ( false && logging(log) ) {
+            String leafOrNode = isLeaf ? "L" : "N" ;
+            log(log, "%d[%s].get(%d)", id, leafOrNode, idx) ; 
+        }
+        int subId = ptrs.get(idx) ;
+        PageBlockMgr<? extends BPTreePage> pbm = getPageBlockMgr() ;
+        // Always get a "read" block - it must be promoted to a "write" block
+        // to update it.  Getting a write block is unhelpful as many blocks
+        // during a write operation are only read.
+        return pbm.getRead(subId, this.getId()) ;
+    }
+
+    private PageBlockMgr<? extends BPTreePage> getPageBlockMgr() {
+        if ( isLeaf )
+            return bpTree.getRecordsMgr() ;
+        else
+            return bpTree.getNodeManager() ;
+    }
+    
+    // ---------- Public calls.
+    // None of these are called recursively.
+
+    /** Find a record, using the active comparator */
+    public static Record search(BPTreeNode root, Record rec) {
+        root.internalCheckNodeDeep() ;
+        if ( ! root.isRoot() )
+            throw new BPTreeException("Search not starting from the root: " + root) ;
+        AccessPath path = new AccessPath(root) ;
+        Record r = root.internalSearch(path, rec) ;
+        return r ;
+    }
+
+    /** Insert a record - return existing value if any, else null */
+    public static Record insert(BPTreeNode root, Record record) {
+        if ( logging(log) ) {
+            log(log, "** insert(%s) / root=%d", record, root.getId()) ;
+            if ( DumpTree )
+                root.dump() ;
+        }
+
+        if ( !root.isRoot() )
+            throw new BPTreeException("Insert begins but this is not the root") ;
+        
+        if ( root.isFull() ) {
+            // Root full - root split is a special case.
+            splitRoot(root) ;
+            if ( DumpTree )
+                root.dump() ;
+        }
+
+        AccessPath path = new AccessPath(root) ;
+        // Root ready - call insert proper.
+        Record result = root.internalInsert(path, record) ;
+
+        root.internalCheckNodeDeep() ;
+
+        if ( logging(log) ) {
+            log(log, "** insert(%s) / finish", record) ;
+            if ( DumpTree )
+                root.dump() ;
+        }
+        return result ;
+    }
+
+    /** Delete a record - return the old value if there was one, else null */
+    public static Record delete(BPTreeNode root, Record rec) {
+        if ( logging(log) ) {
+            log(log, "** delete(%s) / start", rec) ;
+            if ( BPT.DumpTree )
+                root.dump() ;
+        }
+        if ( !root.isRoot() )
+            throw new BPTreeException("Delete begins but this is not the root") ;
+
+        AccessPath path = new AccessPath(root) ;
+        
+        if ( root.isLeaf && root.count == 0 ) {
+            // Special case. Just a records block. Allow that to go too small.
+            BPTreePage page = root.get(0) ;
+            if ( BPT.CheckingNode && !(page instanceof BPTreeRecords) )
+                BPT.error("Zero size leaf root but not pointing to a records block") ;
+            trackPath(path, root, 0, page) ;
+            Record r = page.internalDelete(path, rec) ;
+            page.release() ;
+            if ( r != null )
+                root.write() ;
+            if ( BPT.DumpTree )
+                root.dump() ;
+            return r ;
+        }
+
+        // Entry: checkNodeDeep() ;
+        Record v = root.internalDelete(path, rec) ;
+        // Fix the root in case it became empty in deletion process.
+        if ( !root.isLeaf && root.count == 0 ) {
+            reduceRoot(root) ;
+            root.bpTree.newRoot(root) ;
+            root.internalCheckNodeDeep() ;
+        }
+
+        if ( logging(log) ) {
+            log(log, "** delete(%s) / finish", rec) ;
+            if ( BPT.DumpTree )
+                root.dump() ;
+        }
+        return v ;
+    }
+
+    /** Iterator over the pages below that have records between minRec (inclusive) and maxRec(exclusive).
+     *  There may be other records as as well.
+     * @param minRec
+     * @param maxRec
+     * @return Iterator&lt;BPTreePage>
+     */
+    Iterator<BPTreePage> iterator(Record minRec, Record maxRec) {
+        if ( minRec != null && maxRec != null && Record.keyGE(minRec, maxRec) )
+            return null ;//throw new IllegalArgumentException("minRec >= maxRec: "+minRec+" >= "+maxRec ) ;
+        
+        int x1 = 0 ;
+        if ( minRec != null ) {
+            x1 = findSlot(minRec) ;
+            // If there is an exact match, we still need to go down that place to get the max.
+            // If there is no match, it returns -(i+1) and we need to go down the tree at i.
+            // Same effect - start at x1
+            // TODO Optimization - get max record on min side once and for all if exact match.
+            x1 = apply(x1) ;
+        }
+        
+        int x2 = this.getCount() ;    // Highest place to look. Inclusive.   
+        if ( maxRec != null ) {
+            // If there is an exact match, we need to go down that place to get the record upto max.
+            // If there is no match, it returns -(i+1) and we need to go down the tree at i to get from last key to max (exclusive).
+            // Same effect - start at x2 inclusive.
+            x2 = findSlot(maxRec) ;
+            x2 = apply(x2) ;
+        }
+        // Pages from pointer slots x1 to x2 (inc because while we exclude maxRec, 
+        // keys are only a max of the subtree they mark out.
+        
+        // XXX Just grab them now - later, keep indexes and fetch on next(). 
+        // XXX Epoch tracking 
+        
+        List<BPTreePage> x = new ArrayList<>(x2-x1+1) ;
+        for ( int i = x1 ; i <= x2 ; i++ )
+            x.add(get(i)) ;
+        
+        return x.iterator() ;
+    }
+    
+//    // OUT OF DATE WITH MVCC
+//    /**
+//     * Returns the id of the records buffer page for this record. Records Buffer
+//     * Page NOT read; record may not exist
+//     */
+//    static int recordsPageId(BPTreeNode node, Record fromRec) {
+//        // Used by BPlusTree.iterator
+//        // Walk down the B+tree part of the structure ...
+//        while (!node.isLeaf) {
+//            BPTreePage page = (fromRec == null) ? node.get(0) : node.findHere(null, fromRec) ;
+//            // Not a leaf so we can cast safely.
+//            BPTreeNode n = (BPTreeNode)page ;
+//            // Release if not root.
+//            if ( !node.isRoot() )
+//                node.release() ;
+//            node = n ;
+//        }
+//        // ... then find the id of the next step down, 
+//        // but do not touch the records buffer page.
+//        int id ;
+//        if ( fromRec == null ) {
+//            // Just get the lowest starting place.
+//            id = node.getPtrBuffer().getLow() ;
+//        } else {
+//            // Get the right id based on starting record.
+//            int idx = node.findSlot(fromRec) ;
+//            idx = apply(idx) ;
+//            id = node.getPtrBuffer().get(idx) ;
+//        }
+//        if ( !node.isRoot() )
+//            node.release() ;
+//        return id ;
+//    }
+
+    final static Record minRecord(BPTreeNode root) {
+        AccessPath path = new AccessPath(root) ; 
+        return root.internalMinRecord(path) ;
+    }
+
+    final static Record maxRecord(BPTreeNode root) {
+        AccessPath path = new AccessPath(root) ; 
+        return root.internalMaxRecord(path) ;
+    }
+    
+    @Override
+    protected Record internalMaxRecord(AccessPath path) {
+        BPTreePage page = get(count) ;
+        trackPath(path, this, count, page) ;
+        Record r = page.internalMaxRecord(path) ;
+        page.release() ;
+        return r ;
+    }
+
+    @Override
+    protected Record internalMinRecord(AccessPath path) {
+        BPTreePage page = get(0) ;
+        trackPath(path, this, 0, page) ;
+        Record r = page.internalMinRecord(path) ;
+        page.release() ;
+        return r ;
+    }
+
+    @Override
+    final Record getLowRecord() {
+        return records.getLow() ;
+    }
+
+    @Override
+    final Record getHighRecord() {
+        return records.getHigh() ;
+    }
+
+    // count is the number of pointers.
+    
+    @Override
+    public final int getMaxSize()          { return bpTree.getParams().getOrder() ; }
+    
+    @Override
+    public final int getCount()            { return count ; }
+ 
+    @Override
+    public final void setCount(int count)  { this.count = count ; }
+    
+    @Override
+  public Block getBackingBlock()           { return block ; }
+    
+    @Override
+    public BlockMgr getBlockMgr()               { return bpTree.getNodeManager().getBlockMgr() ; }
+    
+    /** Do not use without great care */
+    public final RecordBuffer getRecordBuffer()  { return records ; }
+    /** Do not use without great care */
+    public final PtrBuffer getPtrBuffer()        { return ptrs ; }
+    
+    final void setParent(int parentId)           { this.parent = parentId ; } 
+    public final int getParent()                 { return parent ; }
+
+    
+    public final void setIsLeaf(boolean isLeaf)  { this.isLeaf = isLeaf ; }
+    public final boolean isLeaf()                { return this.isLeaf ; }
+    
+    @Override
+    public final int getId()        { return id ; }
+    
+    @Override
+    public String getRefStr() {
+        return String.format("BPTNode[id=%d]", getBackingBlock().getId()) ;
+    }
+
+    @Override
+    final void write()          { bpTree.getNodeManager().write(this) ; } 
+    
+    final private static void trackPath(AccessPath path, BPTreeNode node, int idx, BPTreePage page) {
+        if ( path != null )
+            path.add(node, idx, page) ;
+    }
+    
+    final private static void resetTrackPath(AccessPath path, BPTreeNode node, int idx, BPTreePage page) {
+        if ( path != null ) {
+            path.reset(node, idx, page) ;
+        }
+    }
+    
+    @Override
+    final boolean promote() {
+        if ( bpTree.getNodeManager().isWritable(this.getId()) )
+            return false ;
+        // This calls reset is needed.
+        //   The id, records buffer and pointer buffers need resetting if the block changed.
+        boolean promoteInPlace = bpTree.state().modifiableNodeBlock(getId()) ;
+        if ( promoteInPlace ) {
+            bpTree.getNodeManager().promoteInPlace(this) ;
+            return false ;
+        } else {
+            Block oldBlock = block ;
+            boolean b = bpTree.getNodeManager().promoteDuplicate(this) ;
+            if ( b ) {
+                bpTree.getNodeManager().getBlockMgr().release(oldBlock);
+            }
+            return b ;
+        }
+    }
+    
+    @Override
+    final void release()        { bpTree.getNodeManager().release(this) ; } 
+
+    @Override
+    final void free()           { bpTree.getNodeManager().free(this) ; } 
+    
+    // ============ SEARCH
+    
+    /* 
+     * Do a (binary) search of the node to find the record.
+     *   Returns: 
+     *     +ve or 0 => the index of the record 
+     *     -ve => The insertion point : the immediate higher record or length as (-i-1)
+     *  Convert to +ve and decend to find the RecordBuffer with the record in it. 
+     */
+    
+    @Override
+    final Record internalSearch(AccessPath path, Record rec) {
+        if ( BPT.CheckingNode )
+            internalCheckNode() ;
+        BPTreePage page = findHere(path, rec) ;
+        Record r = page.internalSearch(path, rec) ;
+        page.release() ;
+        return r ;
+    }
+
+    /** Find the next page to look at as we walk down the tree */
+    private final BPTreePage findHere(AccessPath path, Record rec) {
+        int idx = findSlot(rec) ;
+        idx = apply(idx) ;
+        // Find index, or insertion point (immediate higher slot) as (-i-1)
+        // A key is the highest element of the records up to this point
+        // so we search down at slot idx (between something smaller and
+        // something larger).
+        BPTreePage page = get(idx) ;
+        trackPath(path, this, idx, page) ;
+        return page ;
+    }
+    
+    // ============ INSERT
+
+    /*
+     * Traverse this page, ensuring the node below is not full before
+     * decending. Therefore there is always space to do the actual insert.
+     */
+
+    @Override
+    final Record internalInsert(AccessPath path, Record record) {
+        if ( logging(log) )
+            log(log, "internalInsert: %s [%s]", record, this) ;
+
+        internalCheckNode() ;
+
+        int idx = findSlot(record) ;
+        
+
+        if ( logging(log) )
+            log(log, "internalInsert: idx=%d (=>%d)", idx, apply(idx)) ;
+
+        idx = apply(idx) ;
+        BPTreePage page = get(idx) ;
+        trackPath(path, this, idx, page) ;
+
+        if ( logging(log) )
+            log(log, "internalInsert: next: %s", page) ;
+
+        if ( page.isFull() ) {
+            // Need to split the page before descending.
+            split(path, idx, page) ;
+            // Did it shift the insert index?
+            // Examine the record we pulled up in the split.
+            if ( Record.keyGT(record, records.get(idx)) ) {
+                page.release() ;
+                // Yes. Get the new (upper) page
+                idx = idx + 1 ;
+                page = get(idx) ;
+                path.reset(this, idx, page);
+            }
+            internalCheckNode() ;
+        }
+        
+        Record r = page.internalInsert(path, record) ;
+        page.release() ;
+        return r ;
+    }
+
+    /*
+     * Split a non-root node y, held at slot idx in its parent,
+     * which is 'this'and is large enough for a new entry without
+     * another split because we split full blocks on the way down.  
+     * Do this by splitting the node in two (call to BPTree.split)
+     * and inserting the new key/pointer pair.
+     * WRITE(y)
+     * WRITE(z)
+     * WRITE(this)
+     */
+    private void split(AccessPath path, int idx, BPTreePage y) {
+        // logging = true ;
+        if ( logging(log) ) {
+            log(log, "split >> y=%s  this=%s idx=%d", y.getRefStr(), this.getRefStr(), idx) ;
+            log(log, "split --   %s", y) ;
+        }
+
+        internalCheckNode() ;
+        if ( BPT.CheckingNode ) {
+            if ( !y.isFull() )
+                BPT.error("Node is not full") ;
+            if ( this.ptrs.get(idx) != y.getId() ) {
+                int a = this.ptrs.get(idx) ;
+                int b = y.getId() ;
+                BPT.error("Node to be split isn't in right place [%d/%d]", a, b) ;
+            }
+        }
+        internalCheckNodeDeep() ;
+
+        // Either-Or
+//        promotePage(path, this) ;
+//        promote1(y, this, idx) ;
+        // Includes promote "this" because "this" is in the path.
+        promotePage(path, y) ;
+
+        Record splitKey = y.getSplitKey() ;
+        splitKey = keyRecord(splitKey) ;
+
+        if ( logging(log) )
+            log(log, "Split key: %s", splitKey) ;
+
+        BPTreePage z = y.split() ;
+        if ( logging(log) ) {
+            log(log, "Split: %s", y) ;
+            log(log, "Split: %s", z) ;
+        }
+
+        // Key only.
+        if ( splitKey.hasSeparateValue() )
+            splitKey = keyRecord(splitKey) ;
+
+        // Insert new node. "add" shuffle's up as well.
+        records.add(idx, splitKey) ;
+        ptrs.add(idx + 1, z.getId()) ;
+        count++ ;
+
+        if ( logging(log) ) {
+            log(log, "split <<   %s", this) ;
+            log(log, "split <<   %s", y) ;
+            log(log, "split <<   %s", z) ;
+        }
+
+        y.write() ;
+        z.write() ;
+        z.release() ;
+        // y.release() ; y release management done by caller.
+        this.write() ;
+        if ( BPT.CheckingNode ) {
+            if ( Record.keyNE(splitKey, y.maxRecord()) )
+                BPT.error("Split key %d but max subtree %s", splitKey, y.maxRecord()) ;
+            internalCheckNodeDeep() ;
+        }
+    }
+
+    @Override
+    final Record getSplitKey() {
+        int ix = params.SplitIndex ;
+        Record split = records.get(ix) ;
+        return split ;
+    }
+
+    /** Split this block - return the split record (key only needed) */
+    @Override
+    final BPTreePage split() {
+        // Median record : will go in parent.
+        int ix = params.SplitIndex ;
+
+        // New block.
+        BPTreeNode z = create(this.parent, isLeaf) ;
+
+        // Leave the low end untouched and copy, and clear the high end.
+        // z becomes the new upper node, not the lower node.
+        // 'this' is the lower block.
+
+        int maxRec = maxRecords() ;
+        // Copy from top of y into z.
+        records.copy(ix + 1, z.records, 0, maxRec - (ix + 1)) ;
+        records.clear(ix, maxRec - ix) ; // Clear copied and median slot
+        records.setSize(ix) ; // Reset size
+
+        ptrs.copy(ix + 1, z.ptrs, 0, params.MaxPtr - (ix + 1)) ;
+        ptrs.clear(ix + 1, params.MaxPtr - (ix + 1)) ;
+        ptrs.setSize(ix + 1) ;
+
+        // Set sizes of subnodes
+        setCount(ix) ; // Median is ix
+        internalCheckNode() ; // y finished
+
+        z.isLeaf = isLeaf ;
+        z.setCount(maxRec - (ix + 1)) ; // Number copied into z
+
+        // Caller puts the blocks in split(int, BTreePage)
+        z.internalCheckNode() ;
+        return z ;
+    }
+
+    /*
+     * Split the root and leave the root block as the root.
+     * This is the only point the height of the tree increases.
+     * 
+     * Allocate new blocks.
+     * Copy root low into left
+     * Copy root high into right
+     * Set counts.
+     * Create new root settings (two pointers, one key record)
+     * WRITE(left)
+     * WRITE(right)
+     * WRITE(root)
+     */
+    private static void splitRoot(BPTreeNode root) {
+        BPlusTree bpTree = root.bpTree ;
+
+        if ( BPT.CheckingNode )
+            if ( ! root.isRoot() )
+                BPT.error("Not root: %d (root is id zero)", root.getId()) ;
+        root.internalCheckNode() ;
+        promoteRoot(root) ;
+
+        // Median record
+        int splitIdx = root.params.SplitIndex ;
+        Record rec = root.records.get(splitIdx) ;
+
+        if ( logging(log) ) {
+            log(log, "** Split root %d (%s)", splitIdx, rec) ;
+            log(log, "splitRoot >>   %s", root) ;
+        }
+
+        // New blocks.
+        BPTreeNode left = create(bpTree, root.getId(), root.isLeaf) ;
+        BPTreeNode right = create(bpTree, root.getId(), root.isLeaf) ;
+
+        // int maxRecords = maxRecords() ;
+
+        // New left
+        root.records.copy(0, left.records, 0, splitIdx) ;
+        root.ptrs.copy(0, left.ptrs, 0, splitIdx + 1) ;
+        left.count = splitIdx ;
+
+        // New right
+        root.records.copy(splitIdx + 1, right.records, 0, root.maxRecords() - (splitIdx + 1)) ;
+        root.ptrs.copy(splitIdx + 1, right.ptrs, 0, root.params.MaxPtr - (splitIdx + 1)) ;
+        right.count = root.maxRecords() - (splitIdx + 1) ;
+
+        if ( logging(log) ) {
+            log(log, "splitRoot -- left:   %s", left) ;
+            log(log, "splitRoot -- right:  %s", right) ;
+        }
+
+        // So left.count+right.count = bTree.NumRec-1
+
+        // Clear root by reformatting. New root not a leaf. Has count of 1 after
+        // formatting.
+        BPTreeNodeMgr.formatForRoot(root, false) ;
+        // Make a non-leaf.
+
+        // Insert two subnodes, divided by the median record
+        root.count = 1 ;
+
+        root.records.add(0, rec) ;
+        root.ptrs.setSize(2) ;
+        root.ptrs.set(0, left.getId()) ; // slot 0
+        root.ptrs.set(1, right.getId()) ; // slot 1
+
+        if ( logging(log) ) {
+            log(log, "splitRoot <<   %s", root) ;
+            log(log, "splitRoot <<   %s", left) ;
+            log(log, "splitRoot <<   %s", right) ;
+        }
+
+        left.write() ;
+        right.write() ;
+        left.release() ;
+        right.release() ;
+        root.write() ;
+
+        if ( BPT.CheckingNode ) {
+            root.internalCheckNode() ;
+            left.internalCheckNode() ;
+            right.internalCheckNode() ;
+        }
+    }
+
+    // ============ DELETE
+
+    /*
+     * Delete
+     * Descend, making sure that the node is not minimum size at each descend.
+     * If it is, rebalenace.
+     */
+
+    @Override
+    final Record internalDelete(AccessPath path, Record rec) {
+        if ( logging(log) )
+            log(log, ">> internalDelete(%s) : %s", rec, this) ;
+        internalCheckNode() ;
+
+        int x = findSlot(rec) ;
+        int y = apply(x) ;
+        BPTreePage page = get(y) ;
+        trackPath(path, this, y, page) ;
+
+        if ( page.isMinSize() ) { 
+            // Ensure that a node is at least min+1 so a delete can happen.
+            // Can't be the root - we decended in the get().
+            rebalance(path, page, y) ;  // Ignore return - need to refind.
+            page.release(); // TODO But rebalance may have freed this?
+            // Rebalance may have moved the record due to shuffling.  
+            x = findSlot(rec) ;
+            y = apply(x) ;
+            page = get(y) ;
+            promote1(page, this, y) ;
+            resetTrackPath(path, this, y, page) ;
+            if ( BPT.CheckingNode ) {
+                internalCheckNode() ;
+                page.checkNode() ;
+            }
+            this.write() ;
+            // Needed in case the record being deleted is not in the
+            // tree, which mean there is no work done and
+            // this page is not written.
+            page.write() ;
+        }
+
+        // Go to bottom
+        // Need to return the deleted key/value.
+        Record r2 = page.internalDelete(path, rec) ;
+        if ( x >= 0 ) {
+            // And hence r2 != null.
+            // The deleted key was in the tree as well as the records.
+            // Change to the new key for the subtree. 
+            // Path is already promoted by the delete. 
+            // promote1(this, ??, ??) ;
+            Record mx = page.maxRecord() ;
+            records.set(x, keyRecord(mx)) ;
+            this.write() ;
+        }
+        if ( logging(log) )
+            log(log, "<< internalDelete(%s) : %s", rec, this) ;
+
+        page.release() ;
+        return r2 ;
+    }
+
+    /*
+     * Reduce the root when it has only one pointer and no records.
+     * WRITE(root)
+     * RELEASE(old child)
+     * This is the only point the height of the tree decreases.
+     */
+
+    private static void reduceRoot(BPTreeNode root) {
+        if ( logging(log) )
+            log(log, "reduceRoot >> %s", root) ;
+
+        if ( BPT.CheckingNode && (!root.isRoot() || root.count != 0) )
+            BPT.error("Not an empty root") ;
+
+        if ( root.isLeaf ) {
+            if ( logging(log) )
+                log(log, "reduceRoot << leaf root") ;
+            return ;
+        }
+
+        // Must have been cloned due to delete lower down
+        promoteRoot(root) ;
+        
+        BPTreePage sub = root.get(0) ;
+        promote1(sub, root, 0) ;
+        BPTreeNode n = cast(sub) ;
+        // Can pull up into the root.
+        // Leave root node in same block (rather than swap to new root).
+        BPTreeNodeMgr.formatForRoot(root, n.isLeaf) ;
+        n.records.copy(0, root.records, 0, n.count) ;
+        n.ptrs.copy(0, root.ptrs, 0, n.count + 1) ;
+        root.isLeaf = n.isLeaf ;
+        root.count = n.count ;
+        root.write() ;
+        // Free up.
+        n.free() ;
+        if ( BPT.CheckingNode )
+            root.internalCheckNodeDeep() ;
+
+        if ( logging(log) )
+            log(log, "reduceRoot << %s", root) ;
+    }
+
+    /*
+     * Rebalance "node" at slot idx in parent (this)
+     * The node will then be greater than the minimum size
+     * and one-pass delete is then possible.
+     * 
+     * try to shift right, from the left sibling (if exists)
+     * WRITE(left)
+     * WRITE(n)
+     * WRITE(this)
+     * try to shift left, from the right sibling (if exists)
+     * WRITE(right)
+     * WRITE(n)
+     * WRITE(this)
+     * else
+     * merge with left or right sibling
+     * Suboperations do all the write-back of nodes.
+     * 
+     * return the page which might be coalesced from left or right..
+     */
+    private BPTreePage rebalance(AccessPath path, BPTreePage node, int idx) {
+        if ( logging(log) ) {
+            log(log, "rebalance(id=%d, idx=%d)", node.getId(), idx) ;
+            log(log, ">> this: %s", this) ;
+            log(log, ">> node: %s", node) ;
+        }
+        internalCheckNode() ;
+//        promotePage(path, this) ;
+//        promote1(node, this, idx) ;
+        // Includes promote "this"
+        promotePage(path, node) ;
+
+        // Try left first
+        BPTreePage left = null ;
+        if ( idx > 0 )
+            left = get(idx - 1) ;
+
+        // *** SHIFTING : need to change the marker record in the parent.
+        // *** getHighRecord of lower block.
+
+        if ( left != null && !left.isMinSize() ) {
+            if ( logging(log) )
+                log(log, "rebalance/shiftRight") ;
+            // Move elements around.
+            promote1(left, this, idx-1) ;
+            
+            shiftRight(left, node, idx - 1) ;
+
+            if ( logging(log) )
+                log(log, "<< rebalance: %s", this) ;
+            if ( BPT.CheckingNode ) {
+                left.checkNode() ;
+                node.checkNode() ;
+                this.internalCheckNode() ;
+            }
+            left.release() ;
+            return node ;
+        }
+
+        BPTreePage right = null ;
+        if ( idx < count )
+            right = get(idx + 1) ;
+
+        if ( right != null && !right.isMinSize() ) {
+            if ( logging(log) )
+                log(log, "rebalance/shiftLeft") ;
+            
+            promote1(right, this, idx+1) ;
+            
+            shiftLeft(node, right, idx) ;
+
+            if ( logging(log) )
+                log(log, "<< rebalance: %s", this) ;
+            if ( BPT.CheckingNode ) {
+                right.checkNode() ;
+                node.checkNode() ;
+                this.internalCheckNode() ;
+            }
+            if ( left != null )
+                left.release() ;
+            right.release() ;
+            return node ;
+        }
+
+        // Couldn't shift. Collapse two pages.
+        if ( BPT.CheckingNode && left == null && right == null )
+            BPT.error("No siblings") ;
+
+        if ( left != null ) {
+            promote1(left, this, idx-1 ) ;
+            if ( logging(log) )
+                log(log, "rebalance/merge/left: left=%d n=%d [%d]", left.getId(), node.getId(), idx - 1) ;
+            if ( BPT.CheckingNode && left.getId() == node.getId() )
+                BPT.error("Left and n the same: %s", left) ;
+            BPTreePage page = merge(left, node, idx - 1) ;
+            if ( right != null )
+                // HACK : We didn't use it.
+                right.release() ;
+            //left release?
+            return page ;
+        } else {
+            // left == null
+            // rigth != null
+            promote1(right, this, idx+1 ) ;
+            // TODO ** HERE is it tracked correctly? Turmn tracking on, test_clear_02 and enable read checking in   Lifecycle track.free.
+            if ( logging(log) )
+                log(log, "rebalance/merge/right: n=%d right=%d [%d]", node.getId(), right.getId(), idx) ;
+            if ( BPT.CheckingNode && right.getId() == node.getId() )
+                BPT.error("N and right the same: %s", right) ;
+            BPTreePage page = merge(node, right, idx) ;
+            return page ;
+        }
+    }
+
+    /** Merge left with right ; fills left, frees right */
+    private BPTreePage merge(BPTreePage left, BPTreePage right, int dividingSlot) {
+        if ( logging(log) ) {
+            log(log, ">> merge(@%d): %s", dividingSlot, this) ;
+            log(log, ">> left:  %s", left) ;
+            log(log, ">> right: %s", right) ;
+        }
+
+        // /==\ + key + /==\ ==> /====\
+        Record splitKey = records.get(dividingSlot) ;
+        BPTreePage page = left.merge(right, splitKey) ;
+        // Must release right (not done in merge)
+        if ( logging(log) )
+            log(log, "-- merge: %s", page) ;
+
+        left.write() ;
+        left.release() ;
+        right.free() ;
+
+        if ( page == right )
+            BPT.error("Returned page is not the left") ;
+
+        if ( BPT.CheckingNode ) {
+            if ( isLeaf ) {
+                // If two data blocks, then the split key is not included
+                // (it's already there, with its value).
+                // Size is N+N and max could be odd so N+N and N+N+1 are
+                // possible.
+                if ( left.getCount() + 1 != left.getMaxSize() && left.getCount() != left.getMaxSize() )
+                    BPT.error("Inconsistent data node size: %d/%d", left.getCount(), left.getMaxSize()) ;
+            } else if ( !left.isFull() ) {
+                // If not two data blocks, the left side should now be full
+                // (N+N+split)
+                BPT.error("Inconsistent node size: %d/%d", left.getCount(), left.getMaxSize()) ;
+            }
+        }
+
+        // Remove from parent (which is "this")
+        shuffleDown(dividingSlot) ;
+        this.write() ;
+        internalCheckNodeDeep() ;
+        if ( logging(log) ) {
+            log(log, "<< merge: %s", this) ;
+            log(log, "<< left:  %s", left) ;
+        }
+        return left ;
+    }
+
+    @Override
+    BPTreePage merge(BPTreePage right, Record splitKey) {
+        return merge(this, splitKey, cast(right)) ;
+    }
+
+    private static BPTreeNode merge(BPTreeNode left, Record splitKey, BPTreeNode right) {
+        // Merge blocks - does not adjust the parent.
+        // Copy right to top of left.
+        // Caller releases 'right' (needed for testing code).
+
+        left.records.add(splitKey) ;
+
+        // Copy over right to top of left.
+        right.records.copyToTop(left.records) ;
+        right.ptrs.copyToTop(left.ptrs) ;
+
+        // Update count
+        left.count = left.count + right.count + 1 ;
+        left.internalCheckNode() ;
+
+        right.records.clear() ;
+        right.ptrs.clear() ;
+        return left ;
+    }
+
+    private void shiftRight(BPTreePage left, BPTreePage right, int i) {
+        if ( logging(log) ) {
+            log(log, ">> shiftRight: this:  %s", this) ;
+            log(log, ">> shiftRight: left:  %s", left) ;
+            log(log, ">> shiftRight: right: %s", right) ;
+        }
+        Record r1 = records.get(i) ;
+        Record r2 = left.shiftRight(right, r1) ;
+        r2 = keyRecord(r2) ;
+        this.records.set(i, r2) ;
+
+        left.write() ;
+        right.write() ;
+        // Do later -- this.put();
+        if ( logging(log) ) {
+            log(log, "<< shiftRight: this:  %s", this) ;
+            log(log, "<< shiftRight: left:  %s", left) ;
+            log(log, "<< shiftRight: right: %s", right) ;
+        }
+    }
+
+    private void shiftLeft(BPTreePage left, BPTreePage right, int i) {
+        if ( logging(log) ) {
+            log(log, ">> shiftLeft: this:  %s", this) ;
+            log(log, ">> shiftLeft: left:  %s", left) ;
+            log(log, ">> shiftLeft: right: %s", right) ;
+        }
+        Record r1 = records.get(i) ;
+        Record r2 = left.shiftLeft(right, r1) ;
+        r2 = keyRecord(r2) ;
+        this.records.set(i, r2) ;
+
+        left.write() ;
+        right.write() ;
+        // Do this later - this.put();
+        if ( logging(log) ) {
+            log(log, "<< shiftLeft: this:  %s", this) ;
+            log(log, "<< shiftLeft: left:  %s", left) ;
+            log(log, "<< shiftLeft: right: %s", right) ;
+        }
+    }
+
+    @Override
+    Record shiftRight(BPTreePage other, Record splitKey) {
+        BPTreeNode node = cast(other) ;
+        if ( BPT.CheckingNode ) {
+            if ( count == 0 )
+                BPT.error("Node is empty - can't shift a slot out") ;
+            if ( node.isFull() )
+                BPT.error("Destination node is full") ;
+        }
+        // Records: promote moving element, replace with splitKey
+        Record r = this.records.getHigh() ;
+        this.records.removeTop() ;
+        node.records.add(0, splitKey) ;
+
+        // Pointers just shift
+        this.ptrs.shiftRight(node.ptrs) ;
+
+        this.count-- ;
+        node.count++ ;
+        this.internalCheckNode() ;
+        node.internalCheckNode() ;
+        return r ;
+    }
+
+    @Override
+    Record shiftLeft(BPTreePage other, Record splitKey) {
+        BPTreeNode node = cast(other) ;
+        if ( BPT.CheckingNode ) {
+            if ( count == 0 )
+                BPT.error("Node is empty - can't shift a slot out") ;
+            if ( isFull() )
+                BPT.error("Destination node is full") ;
+        }
+        Record r = node.records.getLow() ;
+        // Records: promote moving element, replace with splitKey
+        this.records.add(splitKey) ;
+        node.records.shiftDown(0) ;
+
+        // Pointers just shift
+        this.ptrs.shiftLeft(node.ptrs) ;
+
+        this.count++ ;
+        node.count-- ;
+        return r ;
+    }
+
+    private void shuffleDown(int x) {
+        // x is the index in the parent and may be on eover the end.
+        if ( logging(log) ) {
+            log(log, "ShuffleDown: i=%d count=%d MaxRec=%d", x, count, maxRecords()) ;
+            log(log, "ShuffleDown >> %s", this) ;
+        }
+
+        if ( BPT.CheckingNode && x >= count )
+            BPT.error("shuffleDown out of bounds") ;
+
+        // Just the top to clear
+
+        if ( x == count - 1 ) {
+            records.removeTop() ;
+            ptrs.removeTop() ;
+
+            count-- ;
+            if ( logging(log) ) {
+                log(log, "shuffleDown << Clear top") ;
+                log(log, "shuffleDown << %s", this) ;
+            }
+            internalCheckNode() ;
+            return ;
+        }
+
+        // Shuffle down. Removes key and pointer just above key.
+
+        records.shiftDown(x) ;
+        ptrs.shiftDown(x + 1) ;
+        count-- ;
+        if ( logging(log) )
+            log(log, "shuffleDown << %s", this) ;
+        internalCheckNode() ;
+    }
+
+    // ---- Utilities
+
+    private static final BPTreeNode cast(BPTreePage other) {
+        try {
+            return (BPTreeNode)other ;
+        }
+        catch (ClassCastException ex) {
+            BPT.error("Wrong type: " + other) ;
+            return null ;
+        }
+    }
+
+    final int findSlot(Record rec) {
+        int x = records.find(rec) ;
+        return x ;
+    }
+
+    final boolean isRoot() {
+        // No BPT remembered root node currently
+        // if ( bpTree.root == this ) return true ;
+        return this.parent == BPlusTreeParams.RootParent ;
+    }
+
+    private Record keyRecord(Record record) {
+        return bpTree.getRecordFactory().createKeyOnly(record) ;
+    }
+
+    // Fixup/remove?
+    private final int maxRecords() {
+        return params.MaxRec ;
+    }
+
+    @Override
+    final boolean isFull() {
+        if ( BPT.CheckingNode && count > maxRecords() )
+            BPT.error("isFull: Moby block: %s", this) ;
+
+        // Count is number of records.
+        return count >= maxRecords() ;
+    }
+    
+    /** Return true if there are no keys here or below this node */
+    @Override
+    final boolean hasAnyKeys() {
+        if ( this.count > 0 )
+            return true ;
+        if ( !isRoot() )
+            return false ;
+
+        // The root can be zero size and point to a single data block.
+        BPTreePage page = get(0) ;
+        boolean b = page.hasAnyKeys() ;
+        page.release() ;
+        return b ;
+    }
+
+    @Override
+    final boolean isMinSize() {
+        int min = params.getMinRec() ;
+        if ( BPT.CheckingNode && count < min )
+            BPT.error("isMinSize: Dwarf block: %s", this) ;
+
+        return count <= min ;
+    }
+
+    // ========== Other
+
+    @Override
+    public String toString() {
+        StringBuilder b = new StringBuilder() ;
+        if ( isLeaf )
+            b.append("LEAF: ") ;
+        else
+            b.append("NODE: ") ;
+        String labelStr = "??" ;
+        if ( parent >= 0 )
+            labelStr = Integer.toString(parent) ;
+        else if ( parent == BPlusTreeParams.RootParent )
+            labelStr = "root" ;
+        if ( isLeaf )
+            labelStr = labelStr + "/leaf" ;
+
+        b.append(String.format("%d [%s] (size %d) -- ", getId(), labelStr, count)) ;
+        for ( int i = 0 ; i < maxRecords() ; i++ ) {
+            b.append(childStr(i)) ;
+            b.append(" (") ;
+            b.append(recstr(records, i)) ;
+            b.append(") ") ;
+        }
+        b.append(childStr(params.HighPtr)) ;
+        return b.toString() ;
+    }
+
+    @Override
+    protected String typeMark() {
+        String mark = isLeaf() ? "Leaf" : "Node" ;
+        if ( isRoot() )
+            mark = mark+"/Root" ;
+        return mark ; 
+    }
+
+    private final String recstr(RecordBuffer records, int idx) {
+        if ( records.isClear(idx) )
+            return "----" ;
+
+        Record r = records._get(idx) ;
+        return r.toString() ;
+    }
+
+    public void dump() {
+        boolean b = BPT.Logging ;
+        BPT.Logging = false ;
+        try { dump(IndentedWriter.stdout) ; }
+        finally { BPT.Logging = b ; }
+    }
+
+    public void dump(IndentedWriter out) {
+        output(out) ;
+        out.ensureStartOfLine() ;
+        out.flush() ;
+    }
+
+    public String dumpToString() {
+        IndentedLineBuffer buff = new IndentedLineBuffer() ;
+        output(buff) ;
+        return buff.asString() ;
+    }
+
+    @Override
+    public void output(IndentedWriter out) {
+        out.print(toString()) ;
+        out.incIndent() ;
+        for ( int i = 0 ; i < count + 1 ; i++ ) {
+            out.println() ;
+            BPTreePage page = get(i) ;
+            page.output(out) ;
+            page.release() ;
+
+        }
+        out.decIndent() ;
+    }
+
+    private String childStr(int i) {
+        if ( i >= ptrs.size() )
+            return "*" ;
+        int x = ptrs.get(i) ;
+        return Integer.toString(x) ;
+    }
+
+    // =========== Checking
+    // internal checks - only if checking
+
+    // Check node does not assume a valid tree - may be in mid-operation.
+    private final void internalCheckNode() {
+        if ( BPT.CheckingNode )
+            checkNode(null, null) ;
+    }
+
+    private final void internalCheckNodeDeep() {
+        // This distrubes trackign operations and blocks.
+        // Hooks left but switch off.
+        if ( false )
+            checkNodeDeep() ;
+    }
+
+    @Override
+    final void checkNode() {
+        checkNode(null, null) ;
+    }
+
+    @Override
+    final void checkNodeDeep() {
+        if ( isRoot() ) {
+            // if ( !isLeaf && count == 0 )
+            // error("Root is of size zero (one pointer) but not a leaf") ;
+            if ( parent != BPlusTreeParams.RootParent )
+                BPT.error("Root parent is wrong") ;
+            // if ( count == 0 )
+            // return ;
+        }
+        checkNodeDeep(null, null) ;
+    }
+
+    // Checks of a single node - no looking at children
+    // min - inclusive; max - inclusive (allows for duplicates)
+    final private void checkNode(Record min, Record max) {
+        int id = getId() ;
+        if ( count != records.size() )
+            BPT.error("Inconsistent: id=%d, count=%d, records.size()=%d : %s", id, count, records.size(), this) ;
+
+        if ( !isLeaf && count + 1 != ptrs.size() )
+            BPT.error("Inconsistent: id=%d, count+1=%d, ptrs.size()=%d ; %s", id, count + 1, ptrs.size(), this) ;
+
+        // No BPT remembered root node currently
+        // if ( bpTree.root != null && !isRoot() && count < params.MinRec)
+        if ( !isRoot() && count < params.MinRec ) {
+            // warning("Runt node: %s", this) ;
+            BPT.error("Runt node: %s", this) ;
+        }
+        if ( !isRoot() && count > maxRecords() )
+            BPT.error("Over full node: %s", this) ;
+        if ( !isLeaf && parent == id )
+            BPT.error("Parent same as id: %s", this) ;
+        Record k = min ;
+
+        // Test records in the allocated area
+        for ( int i = 0 ; i < count ; i++ ) {
+            if ( records.get(i) == null )
+                BPT.error("Node: %d : Invalid record @%d :: %s", id, i, this) ;
+            if ( k != null && keyGT(k, records.get(i)) ) {
+                Record r = records.get(i) ;
+                // keyGT(k, r) ;
+                BPT.error("Node: %d: Not sorted (%d) (%s, %s) :: %s ", id, i, k, r, this) ;
+            }
+            k = records.get(i) ;
+        }
+
+        if ( k != null && max != null && keyGT(k, max) )
+            BPT.error("Node: %d - Record is too high (max=%s):: %s", id, max, this) ;
+
+        if ( SystemIndex.getNullOut() ) {
+            // Test records in the free area
+            for ( int i = count ; i < maxRecords() ; i++ ) {
+                if ( !records.isClear(i) )
+                    BPT.error("Node: %d - not clear (idx=%d) :: %s", id, i, this) ;
+            }
+        }
+
+        // Pointer checks.
+        int i = 0 ;
+        // Check not empty at bottom.
+        for ( ; i < count + 1 ; i++ ) {
+            if ( ptrs.get(i) < 0 )
+                BPT.error("Node: %d: Invalid child pointer @%d :: %s", id, i, this) ;
+//            // This does Block IO so disturbs tracking.
+//            if ( BPT.CheckingTree && isLeaf ) {
+//                int ptr = ptrs.get(i) ;
+//                BPTreeRecords records = bpTree.getRecordsMgr().getRead(ptr) ;
+//                int rid = records.getId() ;
+//                if ( rid != ptrs.get(i) )
+//                    BPT.error("Records: Block @%d has a different id: %d :: %s", rid, i, this) ;
+//                int link = records.getLink() ;
+//                // Don't check if -1 which does not exist.
+//                if ( link != -1 && i != count ) {
+//                    BPTreeRecords page = bpTree.getRecordsMgr().getRead(ptrs.get(i)) ;
+//                    int rid2 = page.getLink() ;
+//                    if ( link != rid2 )
+//                        BPT.error("Records: Link not to next block @%d/@%d has a different id: %d :: %s", rid, rid2, i, records) ;
+//                    bpTree.getRecordsMgr().release(page) ;
+//                }
+//                records.release() ;
+//            }
+        }
+
+        // Check empty is empty
+        if ( SystemIndex.getNullOut() ) {
+            int x = params.MaxPtr ;
+            for ( ; i < x ; i++ ) {
+                if ( !ptrs.isClear(i) )
+                    BPT.error("Node: %d: Unexpected pointer @%d :: %s", id, i, this) ;
+            }
+        }
+    }
+
+    private void checkNodeDeep(Record min, Record max) {
+        checkNode(min, max) ;
+        int id = getId() ;
+        // Check pointers.
+        int limit = (count == 0) ? 0 : count + 1 ;
+
+        for ( int i = 0 ; i < limit ; i++ ) {
+            Record min1 = min ;
+            Record max1 = max ;
+            BPTreePage n = get(i) ;
+
+            if ( i != count ) {
+                Record keySubTree = n.getHighRecord() ; // high key in immediate
+                                                        // child
+                Record keyHere = records.get(i) ; // key in this
+
+                if ( keySubTree == null )
+                    BPT.error("Node: %d: Can't get high record from %d", id, n.getId()) ;
+
+                if ( keySubTree.getKey() == null )
+                    BPT.error("Node: %d: Can't get high record is missing it's key from %d", id, n.getId()) ;
+
+                if ( keyHere == null )
+                    BPT.error("Node: %d: record is null", id) ;
+
+                if ( keyHere.getKey() == null )
+                    BPT.error("Node: %d: Record key is null", id) ;
+
+                if ( keyGT(keySubTree, keyHere) )
+                    BPT.error("Node: %d: Child key %s is greater than this key %s", id, keySubTree, keyHere) ;
+
+                Record keyMax = n.maxRecord() ; // max key in subTree
+                Record keyMin = n.internalMinRecord(null) ;
+
+                if ( keyNE(keyHere, keyMax) )
+                    BPT.error("Node: %d: Key %s is not the max [%s] of the sub-tree idx=%d", id, keyHere, keyMax, i) ;
+
+                if ( min != null && keyGT(min, keyMin) )
+                    BPT.error("Node: %d: Minimun for this node should be %s but it's %s", id, min, keyMin) ;
+                if ( max != null && keyLT(max, keyMax) )
+                    BPT.error("Node: %d: Maximum for this node should be %s but it's %s", id, max, keyMax) ;
+                if ( min != null && keyGT(min, keyHere) )
+                    BPT.error("Node: %d: Key too small: %s - min should be %s", id, keyHere, min) ;
+                // keyHere == keyMax ??
+                if ( max != null && keyLT(max, keyHere) )
+                    BPT.error("Node: %d: Key too large: %s - max should be %s", id, keyHere, max) ;
+            }
+
+            // Look deeper.
+            if ( !(n instanceof BPTreeNode) ) {
+                // Records.
+                n.checkNodeDeep() ;
+                n.release() ;
+                continue ;
+            }
+
+            // Valid pointer?
+            if ( isLeaf ) {
+                if ( !bpTree.getRecordsMgr().getBlockMgr().valid(ptrs.get(i)) )
+                    BPT.error("Node: %d: Dangling ptr (records) in block @%d :: %s", id, i, this) ;
+            } else {
+                if ( !bpTree.getNodeManager().valid(ptrs.get(i)) )
+                    BPT.error("Node: %d: Dangling ptr in block @%d :: %s", id, i, this) ;
+            }
+
+            // Calc new min/max.
+            if ( i == 0 )
+                max1 = records.get(0) ;
+            else if ( i == count ) {
+                min1 = records.get(count - 1) ;
+                max1 = null ;
+            } else {
+                min1 = records.get(i - 1) ;
+                max1 = records.get(i) ;
+            }
+            // if ( n.parent != id )
+            // error("Node: %d [%d]: Parent/child mismatch :: %s", id, n.parent,
+            // this) ;
+
+            ((BPTreeNode)n).checkNodeDeep(min1, max1) ;
+            n.release() ;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/3d456654/jena-db/jena-dboe-trans-data/src/main/java/org/apache/jena/dboe/trans/bplustree/BPTreeNodeMgr.java
----------------------------------------------------------------------
diff --git a/jena-db/jena-dboe-trans-data/src/main/java/org/apache/jena/dboe/trans/bplustree/BPTreeNodeMgr.java b/jena-db/jena-dboe-trans-data/src/main/java/org/apache/jena/dboe/trans/bplustree/BPTreeNodeMgr.java
new file mode 100644
index 0000000..2d542ed
--- /dev/null
+++ b/jena-db/jena-dboe-trans-data/src/main/java/org/apache/jena/dboe/trans/bplustree/BPTreeNodeMgr.java
@@ -0,0 +1,229 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.dboe.trans.bplustree;
+
+import static org.apache.jena.dboe.base.block.BlockType.BPTREE_BRANCH;
+import static org.apache.jena.dboe.base.block.BlockType.BPTREE_LEAF;
+import static org.apache.jena.dboe.base.block.BlockType.RECORD_BLOCK;
+
+import java.nio.ByteBuffer ;
+
+import org.apache.jena.dboe.base.block.Block;
+import org.apache.jena.dboe.base.block.BlockMgr;
+import org.apache.jena.dboe.base.block.BlockType;
+import org.apache.jena.dboe.base.buffer.PtrBuffer;
+import org.apache.jena.dboe.base.buffer.RecordBuffer;
+import org.apache.jena.dboe.base.page.BlockConverter;
+import org.apache.jena.dboe.base.page.PageBlockMgr;
+
+/** BPlusTreePageMgr = BPlusTreeNode manager */
+public final class BPTreeNodeMgr extends PageBlockMgr<BPTreeNode>
+{
+    // Only "public" for external very low level tools in development to access this class.
+    // Assume package access.
+
+    public BPTreeNodeMgr(BPlusTree bpTree, BlockMgr blockMgr) {
+        super(new Block2BPTreeNode(bpTree), blockMgr) ;
+    }
+   
+    /** Allocate space for a fresh node. */
+    public BPTreeNode createNode(int parent) {
+        BPTreeNode n = create(BPTREE_BRANCH) ;
+        n.setIsLeaf(false) ;
+        n.setParent(parent) ;
+        return n ;
+    }
+
+    @Override
+    public BPTreeNode getWrite(int id) {
+        return super.getWrite(id, BPlusTreeParams.UnsetParent) ;
+    }
+
+    @Override
+    public BPTreeNode getRead(int id) {
+        return super.getRead(id, BPlusTreeParams.UnsetParent) ;
+    }
+
+    /** Fetch a block - fill in the parent id, which is not in the on-disk bytes */
+    @Override
+    public BPTreeNode getRead(int id, int parent) {
+        BPTreeNode n = super.getRead$(id) ;
+        n.setParent(parent);
+        return n ;
+    }
+
+    /** Fetch a block - fill in the parent id, which is not in the on-disk bytes */
+    @Override
+    public BPTreeNode getWrite(int id, int parent) {
+        BPTreeNode n = super.getWrite$(id) ;
+        n.setParent(parent);
+        return n ;
+    }
+
+    boolean isWritable(int id) {
+        //System.err.println("BPTreeNodeMgr.isWritable") ;
+        return false ;
+//      return bpTree.state.modifiableNodeBlock(id) ;
+    }
+
+    private static class Block2BPTreeNode implements BlockConverter<BPTreeNode>
+    {
+        private final BPlusTree bpTree ;
+
+        Block2BPTreeNode(BPlusTree bpTree) { this.bpTree = bpTree ; }
+        
+        @Override
+        public BPTreeNode createFromBlock(Block block, BlockType bType) {
+            return overlay(bpTree, block, bType == RECORD_BLOCK, 0) ;
+        }
+
+        @Override
+        public BPTreeNode fromBlock(Block block) {
+            // synchronized - needed for multiple reader?
+            synchronized (block) {
+                int x = block.getByteBuffer().getInt(0) ;
+                BlockType type = getType(x) ;
+
+                if ( type != BPTREE_BRANCH && type != BPTREE_LEAF )
+                    throw new BPTreeException("Wrong block type: " + type) ;
+                int count = decodeCount(x) ;
+                return overlay(bpTree, block, (type == BPTREE_LEAF), count) ;
+            }
+        }
+
+        @Override
+        public Block toBlock(BPTreeNode node) {
+            // It's manipulated in-place so no conversion needed, 
+            // Just the count needs to be fixed up. 
+//            ByteBuffer bb = node.getBackingByteBuffer() ;
+//            BlockType bType = (node.isLeaf ? BPTREE_LEAF : BPTREE_BRANCH ) ;
+
+            Block block = node.getBackingBlock() ;
+            BlockType bType = (node.isLeaf() ? BPTREE_LEAF : BPTREE_BRANCH ) ;
+
+            int c = encodeCount(bType, node.getCount()) ;
+            block.getByteBuffer().putInt(0, c) ;
+            return block ;
+        }
+    }
+    
+//    // Leaves have a count of -(count+1)
+//    // (same as the binary search encoding of "not found")
+//    private static final int encCount(int i)     { return -(i+1) ; } 
+//    private static final int decCount(int i)     { return -i-1 ; }
+
+    // ----
+    private static final BlockType getType(int x) {
+        return BlockType.extract(x >>> 24) ;
+    }
+
+    private static final int encodeCount(BlockType type, int i) {
+        return (type.id() << 24) | (i & 0x00FFFFFF) ;
+    }
+
+    private static final int decodeCount(int i) {
+        return i & 0x00FFFFFF ;
+    }
+
+    /** byte[] layout.
+     * 
+     * New:
+     *  0: Block type
+     *  1-3: Count 
+     *  Internal nodes:
+     *    4-X:        Records: b+tree.MaxRec*record length
+     *    X- :        Pointers: b+tree.MaxPtr*ptr length 
+     */
+    private static BPTreeNode overlay(BPlusTree bpTree, Block block, boolean asLeaf, int count) {
+        // if ( byteBuffer.order() != Const.NetworkOrder )
+        // throw new BTreeException("ByteBuffer in wrong order") ;
+
+        // Fix up the id later.
+        BPTreeNode n = new BPTreeNode(bpTree) ;
+        // The count is zero at the root only.
+        // When the root is zero, it's a leaf.
+        formatBPTreeNode(n, bpTree, block, asLeaf, BPlusTreeParams.NoParent, count) ;
+        return n ;
+    }
+
+    static void formatBPTreeNode(BPTreeNode n, BPlusTree bpTree, Block block, boolean leaf, int parent, int count) {
+        BPlusTreeParams params = bpTree.getParams() ;
+
+        int ptrBuffLen = params.MaxPtr * params.getPtrLength() ;
+        // Only store the key part of records in a B+Tree block
+        // OLD - Node table has real value part - what's going on?
+
+        // [Issue:FREC]
+        // Allocate space for record, key and value, despite slight over
+        // allocation.
+        int recBuffLen = params.MaxRec * params.getRecordLength() ;
+
+        // [Issue:FREC] Should be: key space only.
+        // int recBuffLen = params.MaxRec * params.getKeyLength() ;
+
+        n.id = block.getId().intValue() ;
+        n.setParent(parent) ;
+        n.setCount(count) ;
+        n.setIsLeaf(leaf) ;
+
+        int header = BPlusTreeParams.BlockHeaderSize ;
+        int rStart = header ;
+        int pStart = header + recBuffLen ;
+
+        // Find the number of pointers.
+        int numPtrs = -1 ;
+
+        // The root can have count zero - which means one pointer always.
+        // Junk when creating a new new node.
+        if ( n.getCount() < 0 ) {
+            numPtrs = 0 ;
+            n.setCount(decodeCount(n.getCount())) ;
+        } else
+            numPtrs = n.getCount() + 1 ;
+
+        // Block dependent
+        
+        n.block = block ;
+        ByteBuffer byteBuffer = block.getByteBuffer() ;
+
+        // -- Records area
+        byteBuffer.position(rStart) ;
+        byteBuffer.limit(rStart + recBuffLen) ;
+        ByteBuffer bbr = byteBuffer.slice() ;
+        // bbr.limit(recBuffLen) ;
+        n.setRecordBuffer(new RecordBuffer(bbr, n.params.keyFactory, n.getCount())) ;
+
+        // -- Pointers area
+        byteBuffer.position(pStart) ;
+        byteBuffer.limit(pStart + ptrBuffLen) ;
+
+        ByteBuffer bbi = byteBuffer.slice() ;
+        // bbi.limit(ptrBuffLen) ;
+        n.setPtrBuffer(new PtrBuffer(bbi, numPtrs)) ;
+
+        // Reset
+        byteBuffer.rewind() ;
+    }
+    
+    static final void formatForRoot(BPTreeNode n, boolean asLeaf) {
+        BPTreeNodeMgr.formatBPTreeNode(n, n.bpTree, n.getBackingBlock(), asLeaf, BPlusTreeParams.RootParent, 0) ;
+        // Tweak for the root-specials. The node is not consistent yet.
+        // Has one dangling pointer.
+    }
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/3d456654/jena-db/jena-dboe-trans-data/src/main/java/org/apache/jena/dboe/trans/bplustree/BPTreePage.java
----------------------------------------------------------------------
diff --git a/jena-db/jena-dboe-trans-data/src/main/java/org/apache/jena/dboe/trans/bplustree/BPTreePage.java b/jena-db/jena-dboe-trans-data/src/main/java/org/apache/jena/dboe/trans/bplustree/BPTreePage.java
new file mode 100644
index 0000000..a023718
--- /dev/null
+++ b/jena-db/jena-dboe-trans-data/src/main/java/org/apache/jena/dboe/trans/bplustree/BPTreePage.java
@@ -0,0 +1,136 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.dboe.trans.bplustree;
+
+import org.apache.jena.dboe.base.block.BlockMgr;
+import org.apache.jena.dboe.base.page.Page;
+import org.apache.jena.dboe.base.record.Record;
+import org.slf4j.Logger ;
+
+/** Abstraction of a B+Tree node - either an branch (BTreeNode) or records block (BTreeRecords) */
+abstract public class BPTreePage implements Page
+{
+    protected BPTreePage(BPlusTree bpTree) {
+        this.bpTree = bpTree ;
+        // bpTree can be null in testing.
+        this.params = ( bpTree == null ) ? null : bpTree.getParams() ;
+    }
+    
+    protected final BPlusTree bpTree ; 
+    protected final BPlusTreeParams params ;
+
+    /** Short form for logging */
+    protected String label() {
+        return String.format("%d[%s]", getId(), typeMark()) ;
+    }
+    
+    protected abstract String typeMark() ;
+    
+    protected abstract Logger getLogger() ;
+    
+    abstract BlockMgr getBlockMgr() ;
+
+    /** Split in two, return the new (upper) page.  
+     *  Split key is highest key of the old (lower) page.
+     *  Does NOT put pages back to any underlying block manager
+     */ 
+    abstract BPTreePage split() ;
+    
+    /** Move the element from the high end of this to the low end of other,
+    *  possible including the splitKey
+     * Return the new split point (highest record in left tree for records; moved element for nodes)
+     */
+    abstract Record shiftRight(BPTreePage other, Record splitKey) ;
+    
+    /** Move the element from the high end of other to the high end of this, 
+     * possible including the splitKey
+     * Return the new split point (highest record in left tree; moved element for nodes)
+     */
+    abstract Record shiftLeft(BPTreePage other, Record splitKey) ;
+    
+    /** Merge this (left) and the page imemdiately to it's right other into a single block
+     */
+    abstract BPTreePage merge(BPTreePage right, Record splitKey) ;
+    //* Return the new page (may be left or right)
+    
+    /** Test whether this page is full (has no space for a new element) - used in "insert" */
+    abstract boolean isFull() ;
+    
+    /**  Test whether this page is of minimum size (removing a record would violate the packing limits) - used in "delete" */
+    abstract boolean isMinSize() ;
+    
+    abstract int getCount() ;
+    
+    abstract void setCount(int count) ;
+  
+    abstract int getMaxSize() ;
+    
+    /**  Test whether this page has any keys */
+    abstract boolean hasAnyKeys() ;
+
+    /** Find a record; return null if not found */
+    abstract Record internalSearch(AccessPath path, Record rec) ;
+    
+    /** Insert a record - return existing value if any, else null - put back modifed blocks */
+    abstract Record internalInsert(AccessPath path, Record record) ;
+    
+    /** Delete a record - return the old value if there was one, else null - put back modifed blocks */
+    abstract Record internalDelete(AccessPath path, Record record) ;
+
+    /** Least in page */
+    abstract Record getLowRecord() ;
+
+    /** Greatest in page */
+    abstract Record getHighRecord() ;
+
+    /** Least in subtree */
+    abstract Record internalMinRecord(AccessPath path) ;
+
+    /** Greatest in subtree */
+    abstract Record internalMaxRecord(AccessPath path) ;
+
+    // For checking ...
+    
+    /** Least in subtree */
+    Record minRecord()      { return internalMinRecord(null) ; } 
+
+    /** Greatest in subtree */
+    Record maxRecord()      { return internalMaxRecord(null) ; }
+    
+    /** Write, or at least ensure will be written */
+    abstract void write() ; 
+    
+    /** Turn a read page into a write page. Return true if any changes were made. */
+    abstract boolean promote() ;
+
+    /** Mark as no longer needed */
+    abstract void release() ;
+    
+    /** Discard with this block (for ever) */
+    abstract void free() ;
+
+    /** Check - just this level.*/
+    abstract void checkNode() ;
+    
+    /** Check - here and below */
+    abstract void checkNodeDeep() ;
+
+    /** Return the split point for this record (need only be a key)*/
+    abstract Record getSplitKey() ;
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/3d456654/jena-db/jena-dboe-trans-data/src/main/java/org/apache/jena/dboe/trans/bplustree/BPTreeRangeIterator.java
----------------------------------------------------------------------
diff --git a/jena-db/jena-dboe-trans-data/src/main/java/org/apache/jena/dboe/trans/bplustree/BPTreeRangeIterator.java b/jena-db/jena-dboe-trans-data/src/main/java/org/apache/jena/dboe/trans/bplustree/BPTreeRangeIterator.java
new file mode 100644
index 0000000..2ad1f23
--- /dev/null
+++ b/jena-db/jena-dboe-trans-data/src/main/java/org/apache/jena/dboe/trans/bplustree/BPTreeRangeIterator.java
@@ -0,0 +1,153 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.dboe.trans.bplustree;
+
+import java.util.* ;
+
+import org.apache.jena.atlas.iterator.Iter ;
+import org.apache.jena.atlas.lib.InternalErrorException ;
+import org.apache.jena.dboe.base.record.Record;
+import org.apache.jena.dboe.trans.bplustree.AccessPath.AccessStep;
+import org.slf4j.Logger ;
+import org.slf4j.LoggerFactory ;
+
+/** Iterator over records that does not assume records block linkage */ 
+class BPTreeRangeIterator implements Iterator<Record> {
+    static Logger log = LoggerFactory.getLogger(BPTreeRangeIterator.class) ;
+    
+    public static Iterator<Record> create(BPTreeNode node, Record minRec, Record maxRec) {
+        if ( minRec != null && maxRec != null && Record.keyGE(minRec, maxRec) )
+            return Iter.nullIter();
+        return new BPTreeRangeIterator(node, minRec, maxRec) ;
+    }
+    
+    // Convert path to a stack of iterators
+    private final Deque<Iterator<BPTreePage>> stack = new ArrayDeque<>();
+    final private Record minRecord ;
+    final private Record maxRecord ;
+    private Iterator<Record> current ;
+    private Record slot = null ;
+    private boolean finished = false ;
+    
+    BPTreeRangeIterator(BPTreeNode node, Record minRec, Record maxRec ) {
+        this.minRecord = minRec ;
+        this.maxRecord = maxRec ;
+        BPTreeRecords r = loadStack(node) ;
+        current = getRecordsIterator(r, minRecord, maxRecord) ;
+    }
+
+    @Override
+    public boolean hasNext() {
+        if ( finished ) 
+            return false ;
+        if ( slot != null )
+            return true ;
+        while(current != null && !current.hasNext()) {
+            current = moveOnCurrent() ;
+        } 
+        if ( current == null ) {
+            end() ;
+            return false ;
+        }
+        slot = current.next() ;
+        return true ;
+        
+    }
+    
+    // Move across the head of the stack until empty - then move next level. 
+    private Iterator<Record> moveOnCurrent() {
+        Iterator<BPTreePage> iter = null ;
+        while(!stack.isEmpty()) { 
+            iter = stack.peek() ;
+            if ( iter.hasNext() )
+              break ;
+            stack.pop() ;
+        } 
+        
+        if ( iter == null || ! iter.hasNext() )
+            return null ;
+        BPTreePage p = iter.next() ;
+        BPTreeRecords r = null ;
+        if (p instanceof BPTreeNode) {
+            r = loadStack((BPTreeNode)p) ;
+        }
+        else {
+            r = (BPTreeRecords)p ;
+        }
+        return getRecordsIterator(r, minRecord, maxRecord) ;
+    }
+    
+    // ---- Places we touch blocks. 
+    
+    private static Iterator<Record> getRecordsIterator(BPTreeRecords records, Record minRecord, Record maxRecord) {
+        records.bpTree.startReadBlkMgr();
+        Iterator<Record> iter = records.getRecordBuffer().iterator(minRecord, maxRecord) ;
+        records.bpTree.finishReadBlkMgr();
+        return iter ;
+    }
+    
+    private BPTreeRecords loadStack(BPTreeNode node) {
+        AccessPath path = new AccessPath(null) ;
+        node.bpTree.startReadBlkMgr();
+        
+        if ( minRecord == null )
+            node.internalMinRecord(path) ;
+        else
+            node.internalSearch(path, minRecord) ;
+        List<AccessStep> steps = path.getPath() ;
+        for ( AccessStep step : steps ) {
+            BPTreeNode n = step.node ; 
+            Iterator<BPTreePage> it = n.iterator(minRecord, maxRecord) ;
+            if ( it == null || ! it.hasNext() )
+                continue ;
+            BPTreePage p = it.next() ;
+            stack.push(it) ;
+        }
+        BPTreePage p = steps.get(steps.size()-1).page ;
+        if ( ! ( p instanceof BPTreeRecords ) )
+            throw new InternalErrorException("Last path step not to a records block") ;
+        node.bpTree.finishReadBlkMgr();
+        return (BPTreeRecords)p ;
+    }
+
+    // ---- 
+
+    private void end() {
+        finished = true ;
+        current = null ;
+    }
+    
+    // ---- 
+    
+    public void close() {
+        if ( ! finished )
+            end() ;
+    }
+
+    @Override
+    public Record next() {
+        if ( ! hasNext() )
+            throw new NoSuchElementException() ;
+        Record r = slot ;
+        if ( r == null )
+            throw new InternalErrorException("Null slot after hasNext is true") ;
+        slot = null ;
+        return r ;
+    }
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/3d456654/jena-db/jena-dboe-trans-data/src/main/java/org/apache/jena/dboe/trans/bplustree/BPTreeRecords.java
----------------------------------------------------------------------
diff --git a/jena-db/jena-dboe-trans-data/src/main/java/org/apache/jena/dboe/trans/bplustree/BPTreeRecords.java b/jena-db/jena-dboe-trans-data/src/main/java/org/apache/jena/dboe/trans/bplustree/BPTreeRecords.java
new file mode 100644
index 0000000..18030a1
--- /dev/null
+++ b/jena-db/jena-dboe-trans-data/src/main/java/org/apache/jena/dboe/trans/bplustree/BPTreeRecords.java
@@ -0,0 +1,389 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.dboe.trans.bplustree ;
+
+import static java.lang.String.format ;
+import static org.apache.jena.atlas.lib.Alg.decodeIndex ;
+import static org.apache.jena.dboe.trans.bplustree.BPT.CheckingNode;
+import static org.apache.jena.dboe.trans.bplustree.BPT.promotePage;
+
+import org.apache.jena.atlas.io.IndentedWriter ;
+import org.apache.jena.dboe.base.StorageException;
+import org.apache.jena.dboe.base.block.Block;
+import org.apache.jena.dboe.base.block.BlockMgr;
+import org.apache.jena.dboe.base.buffer.RecordBuffer;
+import org.apache.jena.dboe.base.page.Page;
+import org.apache.jena.dboe.base.record.Record;
+import org.apache.jena.dboe.base.recordbuffer.RecordBufferPage;
+import org.slf4j.Logger ;
+import org.slf4j.LoggerFactory ;
+
+/**
+ * B+Tree wrapper over a block of records in a RecordBufferPage.
+ * This class adds no persistent state to a RecordBufferPage.
+ */
+public final class BPTreeRecords extends BPTreePage {
+    private static Logger log = LoggerFactory.getLogger(BPTreeRecords.class) ;
+
+    @Override
+    protected Logger getLogger() {
+        return log ;
+    }
+
+    private final RecordBufferPage rBuffPage ;
+    private final BPTreeRecordsMgr bprRecordsMgr ;
+    private RecordBuffer           rBuff ;        // Used heavily. Derived from
+                                                   // rBuffPage
+
+    BPTreeRecords(BPTreeRecordsMgr mgr, RecordBufferPage rbp) {
+        super(mgr.getBPTree()) ;
+        this.bprRecordsMgr = mgr ;
+        rBuffPage = rbp ;
+        rBuff = rBuffPage.getRecordBuffer() ;
+    }
+
+    RecordBufferPage getRecordBufferPage() {
+        return rBuffPage ;
+    }
+
+    RecordBuffer getRecordBuffer() {
+        return rBuff ;
+    }
+
+    public final Record get(int idx) {
+        return rBuff.get(idx) ;
+    }
+
+    @Override
+    public final Block getBackingBlock() {
+        return rBuffPage.getBackingBlock() ;
+    }
+
+    @Override
+    public BlockMgr getBlockMgr() {
+        return bpTree.getRecordsMgr().getBlockMgr() ;
+    }
+
+    @Override
+    public void reset(Block block) {
+        rBuffPage.reset(block) ;
+        rBuff = rBuffPage.getRecordBuffer() ;
+    }
+
+    int getLink() {
+        return rBuffPage.getLink() ;
+    }
+
+    @Override
+    public boolean isFull() {
+        return (rBuff.size() >= rBuff.maxSize()) ;
+    }
+
+    @Override
+    public boolean hasAnyKeys() {
+        return rBuff.size() > 0 ;
+    }
+
+    @Override
+    public boolean isMinSize() {
+        // 50% packing minimum.
+        // If of max length 5 (i.e. odd), min size is 2. Integer division works.
+        return (rBuff.size() <= rBuff.maxSize() / 2) ;
+    }
+
+    @Override
+    Record internalSearch(AccessPath path, Record rec) {
+        int i = rBuff.find(rec) ;
+        if ( i < 0 )
+            return null ;
+        return rBuff.get(i) ;
+    }
+
+    @Override
+    final public void write() {
+        bprRecordsMgr.write(this) ;
+    }
+
+    @Override
+    final boolean promote() {
+        if ( bprRecordsMgr.isWritable(getId()) )
+            return false ;
+        // reset() will be called if necessary.
+        boolean promoteInPlace = (bpTree == null) ? true : bpTree.state().modifiableRecordsBlock(getId()) ;
+
+        if ( promoteInPlace ) {
+            bprRecordsMgr.promoteInPlace(this) ;
+            // TODO Is this needed?
+            // Is this replicated in BPTreeNode?
+            if ( getBackingBlock().isReadOnly() )
+                bprRecordsMgr.getBlockMgr().promote(getBackingBlock()) ;
+            return false ;
+        } else {
+            Block oldBlock = getBackingBlock() ;
+            boolean b = bprRecordsMgr.promoteDuplicate(this) ;
+            if ( b )
+                bprRecordsMgr.getBlockMgr().release(oldBlock) ;
+            return b ;
+        }
+
+    }
+
+    @Override
+    final public void release() {
+        bprRecordsMgr.release(this) ;
+    }
+
+    @Override
+    final public void free() {
+        bprRecordsMgr.free(this) ;
+    }
+
+    @Override
+    Record internalInsert(AccessPath path, Record record) {
+        // Delay promotion until we know change will happen.
+        int i = rBuff.find(record) ;
+        Record r2 = null ;
+        if ( i < 0 ) {
+            i = decodeIndex(i) ;
+            if ( rBuff.size() >= rBuff.maxSize() )
+                throw new StorageException("RecordBlock.put overflow") ;
+            promotePage(path, this) ;
+            rBuff.add(i, record) ;
+        } else {
+            r2 = rBuff.get(i) ;
+            if ( Record.compareByKeyValue(record, r2) != 0 ) {
+                // Replace : return old
+                promotePage(path, this) ;
+                rBuff.set(i, record) ;
+            } else
+                // No promotion, no write
+                return r2 ;
+        }
+        write() ;
+        return r2 ;
+    }
+
+    @Override
+    Record internalDelete(AccessPath path, Record record) {
+        int i = rBuff.find(record) ;
+        if ( i < 0 )
+            return null ;
+        promotePage(path, this) ;
+        Record r2 = rBuff.get(i) ;
+        rBuff.remove(i) ;
+        write() ;
+        return r2 ;
+    }
+
+    @Override
+    public Record getSplitKey() {
+        int splitIdx = rBuff.size() / 2 - 1 ;
+        Record r = rBuff.get(splitIdx) ;
+        return r ;
+    }
+
+    /**
+     * Split: place old high half in 'other'. Return the new (upper)
+     * BPTreeRecords(BPTreePage).
+     * Split is the high end of the low page.
+     */
+    @Override
+    public BPTreePage split() {
+        BPTreeRecords other = insertNewPage() ;
+        int splitIdx = rBuff.size() / 2 - 1 ;
+        Record r = rBuff.get(splitIdx) ; // Only need key for checking later.
+        int moveLen = rBuff.size() - (splitIdx + 1) ; // Number to move.
+        // Copy high end to new.
+        rBuff.copy(splitIdx + 1, other.getRecordBufferPage().getRecordBuffer(), 0, moveLen) ;
+        rBuff.clear(splitIdx + 1, moveLen) ;
+        rBuff.setSize(splitIdx + 1) ;
+
+        if ( CheckingNode ) {
+            if ( !Record.keyEQ(r, maxRecord()) ) {
+                System.err.println(rBuff) ;
+                System.err.println(other.rBuff) ;
+                error("BPTreeRecords.split: Not returning expected record") ;
+            }
+        }
+        return other ;
+    }
+    
+    private BPTreeRecords insertNewPage() {
+        // MVCC - link can not be maintained. 
+        if ( false /* ! bpTree.isTransaction() */ ) {
+            BPTreeRecords other = create(rBuffPage.getLink()) ;
+            rBuffPage.setLink(other.getId()) ;
+            return other ;
+        } 
+        BPTreeRecords other = create(Page.NO_ID) ;
+        rBuffPage.setLink(Page.NO_ID) ;
+        return other ;
+    }
+
+    private BPTreeRecords create(int linkId) {
+        BPTreeRecords newPage = bprRecordsMgr.create() ;
+        newPage.getRecordBufferPage().setLink(linkId) ;
+        return newPage ;
+    }
+
+    @Override
+    public Record shiftRight(BPTreePage other, Record splitKey) {
+        // Error checking by RecordBuffer
+        BPTreeRecords page = cast(other) ;
+        rBuff.shiftRight(page.rBuff) ;
+        if ( rBuff.size() == 0 )
+            return null ;
+        return rBuff.getHigh() ;
+    }
+
+    @Override
+    public Record shiftLeft(BPTreePage other, Record splitKey) {
+        // Error checking by RecordBuffer
+        BPTreeRecords page = cast(other) ;
+        rBuff.shiftLeft(page.rBuff) ;
+        if ( rBuff.size() == 0 )
+            return null ;
+        return rBuff.getHigh() ;
+    }
+
+    @Override
+    BPTreePage merge(BPTreePage right, Record splitKey) {
+        // Split key ignored - it's for the B+Tree case of pushing down a key
+        // Records blocks have all the key/values in them anyway.
+        return merge(this, cast(right)) ;
+    }
+
+    private static BPTreeRecords merge(BPTreeRecords left, BPTreeRecords right) {
+        // Copy right to top of left.
+        // The other way round needs a shift as well.
+        right.rBuff.copyToTop(left.rBuff) ;
+        // Same as: right.rBuff.copy(0, left.rBuff, left.rBuff.size(),
+        // right.rBuff.size()) ;
+        right.rBuff.clear() ;
+
+        // The right page is released by the caller. left is still in use.
+        // So the test code can poke around in the right block after merge.
+        // left.bpTree.getRecordsMgr().release(left.getId()) ;
+
+        // Fix up the link chain.
+        left.rBuffPage.setLink(right.rBuffPage.getLink()) ;
+        return left ;
+    }
+
+    private static BPTreeRecords cast(BPTreePage page) {
+        try {
+            return (BPTreeRecords)page ;
+        }
+        catch (ClassCastException ex) {
+            error("Wrong type: " + page) ;
+            return null ;
+        }
+    }
+
+    @Override
+    final public Record internalMinRecord(AccessPath path) {
+        return getLowRecord() ;
+    }
+
+    @Override
+    final public Record internalMaxRecord(AccessPath path) {
+        return getHighRecord() ;
+    }
+
+    private static void error(String msg, Object... args) {
+        msg = format(msg, args) ;
+        System.out.println(msg) ;
+        System.out.flush() ;
+        throw new BPTreeException(msg) ;
+    }
+
+    @Override
+    public final Record getLowRecord() {
+        if ( rBuff.size() == 0 )
+            return null ;
+        return rBuff.getLow() ;
+    }
+
+    @Override
+    public final Record getHighRecord() {
+        if ( rBuff.size() == 0 )
+            return null ;
+        return rBuff.getHigh() ;
+    }
+
+    @Override
+    public final int getMaxSize() {
+        return rBuff.maxSize() ;
+    }
+
+    @Override
+    public final int getCount() {
+        return rBuff.size() ;
+    }
+
+    @Override
+    public final void setCount(int count) {
+        rBuff.setSize(count) ;
+    }
+
+    @Override
+    public String toString() {
+        return String.format("BPTreeRecords[id=%d, link=%d]: %s", getId(), getLink(), rBuff.toString()) ;
+    }
+
+    @Override
+    protected String typeMark() {
+        return "Data" ;
+    }
+
+    @Override
+    public final void checkNode() {
+        if ( !CheckingNode )
+            return ;
+        if ( rBuff.size() < 0 || rBuff.size() > rBuff.maxSize() )
+            error("Misized: %s", this) ;
+
+        for ( int i = 1 ; i < getCount() ; i++ ) {
+            Record r1 = rBuff.get(i - 1) ;
+            Record r2 = rBuff.get(i) ;
+            if ( Record.keyGT(r1, r2) )
+                error("Not sorted: %s", this) ;
+        }
+    }
+
+    @Override
+    public final void checkNodeDeep() {
+        checkNode() ;
+    }
+
+    @Override
+    public int getId() {
+        return rBuffPage.getId() ;
+    }
+
+    @Override
+    public String getRefStr() {
+        return String.format("BPTRecord[id=%d]", getBackingBlock().getId()) ;
+    }
+
+    @Override
+    public void output(IndentedWriter out) {
+        out.print(toString()) ;
+    }
+}