You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ignite.apache.org by ib...@apache.org on 2021/11/25 14:09:53 UTC
[ignite] branch master updated: IGNITE-15990 Fixed B+Tree corruption during concurrent replace and remove. (#9602)
This is an automated email from the ASF dual-hosted git repository.
ibessonov pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ignite.git
The following commit(s) were added to refs/heads/master by this push:
new 499e5ef IGNITE-15990 Fixed B+Tree corruption during concurrent replace and remove. (#9602)
499e5ef is described below
commit 499e5efd0cef2f9d55901d988d69f944e34b7505
Author: ibessonov <be...@gmail.com>
AuthorDate: Thu Nov 25 17:09:23 2021 +0300
IGNITE-15990 Fixed B+Tree corruption during concurrent replace and remove. (#9602)
---
.../cache/persistence/tree/BPlusTree.java | 831 +++++++++++----------
.../database/BPlusTreeReplaceRemoveRaceTest.java | 424 +++++++++++
.../ignite/testsuites/IgniteBasicTestSuite.java | 2 +
3 files changed, 857 insertions(+), 400 deletions(-)
diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/tree/BPlusTree.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/tree/BPlusTree.java
index 6e508a7..49dc924 100644
--- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/tree/BPlusTree.java
+++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/tree/BPlusTree.java
@@ -382,6 +382,7 @@ public abstract class BPlusTree<L, T extends L> extends DataStructure implements
return RETRY;
assert p.btmLvl == 0 : "split is impossible with replace";
+ assert lvl == 0 : "Replace via page handler is only possible on the leaves level.";
final int cnt = io.getCount(pageAddr);
final int idx = findInsertionPoint(lvl, io, pageAddr, 0, cnt, p.row, 0);
@@ -389,40 +390,26 @@ public abstract class BPlusTree<L, T extends L> extends DataStructure implements
if (idx < 0) // Not found, split or merge happened.
return RETRY;
- // Replace link at idx with new one.
- // Need to read link here because `p.finish()` will clear row.
- L newRow = p.row;
+ assert p.oldRow == null : "The old row must be set only once.";
- // Detach the old row if we are on a leaf page.
- if (lvl == 0) {
- assert p.oldRow == null; // The old row must be set only once.
-
- // Inner replace state must be consistent by the end of the operation.
- assert p.needReplaceInner == FALSE || p.needReplaceInner == DONE : p.needReplaceInner;
-
- // Need to replace inner key if now we are replacing the rightmost row and have a forward page.
- if (canGetRowFromInner && idx + 1 == cnt && p.fwdId != 0L && p.needReplaceInner == FALSE) {
- // Can happen only for invoke, otherwise inner key must be replaced on the way down.
- assert p.invoke != null;
-
- // We need to restart the operation from root to perform inner replace.
- // On the second pass we will not get here (will avoid infinite loop) because
- // needReplaceInner will be DONE or our key will not be the rightmost anymore.
- return RETRY_ROOT;
- }
+ // Lock the leaf if the row should be replaced in an inner node as well.
+ if (canGetRowFromInner && idx + 1 == cnt && p.fwdId != 0L) {
+ Tail<L> tail = p.addTail(pageId, page, pageAddr, io, lvl, Tail.EXACT);
- // Get old row in leaf page to reduce contention at upper level.
- p.oldRow = p.needOld ? getRow(io, pageAddr, idx) : (T)Boolean.TRUE;
+ // Row index is cached, because it won't change until the leaf is unlocked.
+ tail.idx = (short)idx;
- p.finish();
+ return FOUND;
}
- boolean needWal = needWalDeltaRecord(pageId, page, null);
+ // Row exists in this leaf only. No other actions will be required.
- byte[] newRowBytes = io.store(pageAddr, idx, newRow, null, needWal);
+ // Read old row before actual replacement.
+ p.oldRow = p.needOld ? getRow(io, pageAddr, idx) : (T)Boolean.TRUE;
- if (needWal)
- wal.log(new ReplaceRecord<>(grpId, pageId, io, newRowBytes, idx));
+ p.replaceRowInPage(io, pageId, page, pageAddr, idx);
+
+ p.finish();
return FOUND;
}
@@ -466,7 +453,7 @@ public abstract class BPlusTree<L, T extends L> extends DataStructure implements
// Here forward page can't be concurrently removed because we keep write lock on tail which is the only
// page who knows about the forward page, because it was just produced by split.
p.rightId = io.getForward(pageAddr);
- p.tail(pageId, page, pageAddr);
+ p.setTailForSplit(pageId, page, pageAddr, io, p.btmLvl - 1);
assert p.rightId != 0;
}
@@ -617,6 +604,28 @@ public abstract class BPlusTree<L, T extends L> extends DataStructure implements
}
}
+ /**
+ * Page handler that adds the page to the tail of {@link Update} object.
+ * Results in {@link Result#FOUND} if added to tail successfully.
+ * Results in {@link Result#RETRY} if triangle invariant is violated.
+ */
+ private final PageHandler<Update, Result> lockTailExact;
+
+ /** */
+ private class LockTailExact extends GetPageHandler<Update> {
+ /** {@inheritDoc} */
+ @Override protected Result run0(long pageId, long page, long pageAddr, BPlusIO<L> io, Update u, int lvl)
+ throws IgniteCheckedException {
+ // Check the triangle invariant.
+ if (io.getForward(pageAddr) != u.fwdId)
+ return RETRY;
+
+ u.addTail(pageId, page, pageAddr, io, lvl, Tail.EXACT);
+
+ return FOUND;
+ }
+ }
+
/** */
private final PageHandler<Remove, Result> lockTail;
@@ -837,6 +846,7 @@ public abstract class BPlusTree<L, T extends L> extends DataStructure implements
// Initialize page handlers.
askNeighbor = (PageHandler<Get, Result>)wrap(this, new AskNeighbor());
search = (PageHandler<Get, Result>)wrap(this, new Search());
+ lockTailExact = (PageHandler<Update, Result>)wrap(this, new LockTailExact());
lockTail = (PageHandler<Remove, Result>)wrap(this, new LockTail());
lockTailForward = (PageHandler<Remove, Result>)wrap(this, new LockTailForward());
lockBackAndTail = (PageHandler<Remove, Result>)wrap(this, new LockBackAndTail());
@@ -2010,10 +2020,8 @@ public abstract class BPlusTree<L, T extends L> extends DataStructure implements
// Intentional fallthrough.
case GO_DOWN:
- res = x.tryReplaceInner(pageId, page, fwdId, lvl);
-
- if (res != RETRY)
- res = invokeDown(x, x.pageId, x.backId, x.fwdId, lvl - 1);
+ // Go down recursively.
+ res = invokeDown(x, x.pageId, x.backId, x.fwdId, lvl - 1);
if (res == RETRY_ROOT || x.isFinished())
return res;
@@ -2024,13 +2032,9 @@ public abstract class BPlusTree<L, T extends L> extends DataStructure implements
continue;
}
- // Unfinished Put does insertion on the same level.
- if (x.isPut())
- continue;
-
- assert x.isRemove(); // Guarded by isFinished.
+ assert x.op != null; // Guarded by isFinished.
- res = x.finishOrLockTail(pageId, page, backId, fwdId, lvl);
+ res = x.op.finishOrLockTail(pageId, page, backId, fwdId, lvl);
return res;
@@ -2038,11 +2042,19 @@ public abstract class BPlusTree<L, T extends L> extends DataStructure implements
if (lvl == 0)
x.invokeClosure();
+ // Level must be equal to bottom level. This is the place when we would insert values into
+ // parent nodes during splits.
+ assert lvl == (x.isPut() ? ((Put)x.op).btmLvl : 0)
+ : "NOT_FOUND on the wrong level [lvl=" + lvl + ", x=" + x
+ + ", btmLvl=" + (x.isPut() ? ((Put)x.op).btmLvl : 0) + ']';
+
return x.onNotFound(pageId, page, fwdId, lvl);
case FOUND:
- if (lvl == 0)
- x.invokeClosure();
+ // Item can only be found in the leaf page.
+ assert lvl == 0 : "Invoke found an item in an inner node instead of going down: lvl=" + lvl;
+
+ x.invokeClosure();
return x.onFound(pageId, page, backId, fwdId, lvl);
@@ -2444,16 +2456,20 @@ public abstract class BPlusTree<L, T extends L> extends DataStructure implements
continue;
case FOUND:
- // We may need to insert split key into upper level here.
+ // We may need to perform an inner replace on the upper level.
if (!p.isFinished()) {
- // It must be impossible to have an insert higher than the current root,
- // because we are making decision about creating new root while keeping
- // write lock on current root, so it can't concurrently change.
- assert p.btmLvl <= getRootLevel();
+ res = p.finishTail();
- checkInterrupted();
+ // If not found, then the root split has happened and operation should be retried from the actual root.
+ if (res == RETRY || res == NOT_FOUND) {
+ p.releaseTail();
- continue;
+ assert p.checkTailLevel(getRootLevel()) : "tail=" + p.tail + ", res=" + res;
+
+ checkInterrupted();
+
+ continue;
+ }
}
return p.oldRow;
@@ -2820,18 +2836,23 @@ public abstract class BPlusTree<L, T extends L> extends DataStructure implements
assert p.pageId != pageId;
assert p.fwdId != fwdId || fwdId == 0;
- res = p.tryReplaceInner(pageId, page, fwdId, lvl);
-
- if (res != RETRY) // Go down recursively.
- res = putDown(p, p.pageId, p.fwdId, lvl - 1);
+ // Go down recursively.
+ res = putDown(p, p.pageId, p.fwdId, lvl - 1);
if (res == RETRY_ROOT || p.isFinished())
return res;
- if (res == RETRY)
+ if (res == RETRY) {
checkInterrupted();
- continue; // We have to insert split row to this level or it is a retry.
+ continue;
+ }
+
+ // We have to either insert split row to this level,
+ // perform inner replace, lock the tail or retry.
+ res = p.finishOrLockTail(pageId, page, 0L, fwdId, lvl);
+
+ return res;
case FOUND: // Do replace.
assert lvl == 0 : "This replace can happen only at the bottom level.";
@@ -2840,7 +2861,6 @@ public abstract class BPlusTree<L, T extends L> extends DataStructure implements
case NOT_FOUND: // Do insert.
assert lvl == p.btmLvl : "must insert at the bottom level";
- assert p.needReplaceInner == FALSE : p.needReplaceInner + " " + lvl;
return p.tryInsert(pageId, page, fwdId, lvl);
@@ -3580,16 +3600,7 @@ public abstract class BPlusTree<L, T extends L> extends DataStructure implements
/**
* Put operation.
*/
- public final class Put extends Get {
- /** Mark of NULL value of page id. It means valid value can't be equal this value. */
- private static final long NULL_PAGE_ID = 0L;
-
- /** Mark of NULL value of page. */
- private static final long NULL_PAGE = 0L;
-
- /** Mark of NULL value of page address. */
- private static final long NULL_PAGE_ADDRESS = 0L;
-
+ public final class Put extends Update {
/** Right child page ID for split row. */
long rightId;
@@ -3597,26 +3608,11 @@ public abstract class BPlusTree<L, T extends L> extends DataStructure implements
T oldRow;
/**
- * This page is kept locked after split until insert to the upper level will not be finished. It is needed
- * because split row will be "in flight" and if we'll release tail, remove on split row may fail.
- */
- long tailId;
-
- /** */
- long tailPage;
-
- /** */
- long tailAddr;
-
- /**
* Bottom level for insertion (insert can't go deeper). Will be incremented on split on each level.
*/
short btmLvl;
/** */
- Bool needReplaceInner = FALSE;
-
- /** */
final boolean needOld;
/**
@@ -3624,55 +3620,109 @@ public abstract class BPlusTree<L, T extends L> extends DataStructure implements
* @param needOld {@code True} If need return old value.
*/
private Put(T row, boolean needOld) {
- super(row, false);
+ super(row);
this.needOld = needOld;
}
/** {@inheritDoc} */
- @Override boolean found(BPlusIO<L> io, long pageAddr, int idx, int lvl) {
- if (lvl == 0) // Leaf: need to stop.
- return true;
+ @Override boolean notFound(BPlusIO<L> io, long pageAddr, int idx, int lvl) {
+ assert btmLvl >= 0 : btmLvl;
+ assert lvl >= btmLvl : lvl;
+
+ return lvl == btmLvl;
+ }
- assert btmLvl == 0; // It can not be insert.
+ /** {@inheritDoc} */
+ @Override protected Result finishOrLockTail(long pageId, long page, long backId, long fwdId, int lvl)
+ throws IgniteCheckedException {
+ if (btmLvl == lvl) {
+ // Insert for the split.
+ return tryInsert(pageId, page, fwdId, lvl);
+ }
+
+ // Finish inner replace.
+ Result res = finishTail();
- // If we can get full row from the inner page, we have to replace it with the new one. On the way down
- // we can not miss inner key even in presence of concurrent operations because of `triangle` invariant +
- // concurrent inner replace handling by retrying from root.
- if (canGetRowFromInner && needReplaceInner == FALSE)
- needReplaceInner = TRUE;
+ // Add this page to the tail if inner replace has not happened.
+ if (res == NOT_FOUND) {
+ // Set forward id to check the triangle invariant under the write-lock.
+ fwdId(fwdId);
- return false;
+ res = write(pageId, page, lockTailExact, this, lvl, RETRY, statisticsHolder());
+ }
+
+ // Release tail if retry is required.
+ if (res == RETRY)
+ releaseTail();
+
+ return res;
}
/** {@inheritDoc} */
- @Override boolean notFound(BPlusIO<L> io, long pageAddr, int idx, int lvl) {
- assert btmLvl >= 0 : btmLvl;
- assert lvl >= btmLvl : lvl;
+ @Override protected Result finishTail() throws IgniteCheckedException {
+ // An inner node is required for replacement.
+ if (tail.lvl == 0)
+ return NOT_FOUND;
- return lvl == btmLvl;
+ int idx = insertionPoint(tail);
+
+ // Missing row means that current page must be added to tail.
+ if (idx < 0) {
+ idx = fix(idx);
+
+ BPlusInnerIO<L> io = (BPlusInnerIO<L>)tail.io;
+
+ // Release tail in case of broken triangle invariant in locked pages.
+ if (io.getLeft(tail.buf, idx) != tail.down.pageId) {
+ releaseTail();
+
+ return RETRY;
+ }
+
+ return NOT_FOUND;
+ }
+
+ assert oldRow == null : "The old row must be set only once.";
+
+ // Insertion index is found. Replace must be performed in both tail top and tail bottom.
+ replaceRowInPage(tail.io, tail.pageId, tail.page, tail.buf, idx);
+
+ // Unlock everything until the leaf, there's no need to hold these locks anymore.
+ while (tail.lvl != 0) {
+ writeUnlockAndClose(tail.pageId, tail.page, tail.buf, null);
+
+ tail = tail.down;
+ }
+
+ // Read old row from the leaf to reduce contention in inner pages.
+ oldRow = needOld ? getRow(tail.io, tail.buf, tail.idx) : (T)Boolean.TRUE;
+
+ // Replace row in the leaf.
+ replaceRowInPage(tail.io, tail.pageId, tail.page, tail.buf, tail.idx);
+
+ finish();
+
+ return FOUND;
}
/**
+ * Tail page is kept locked after split until insert to the upper level will not be finished. It is needed
+ * because split row will be "in flight" and if we'll release tail, remove on split row may fail.
+ *
* @param tailId Tail page ID.
* @param tailPage Tail page pointer.
* @param tailPageAddr Tail page address
+ * @param io Tail page IO.
+ * @param lvl Tail page level.
*/
- private void tail(long tailId, long tailPage, long tailPageAddr) {
- assert (tailId == NULL_PAGE_ID) == (tailPage == NULL_PAGE);
- assert (tailPage == NULL_PAGE) == (tailPageAddr == NULL_PAGE_ADDRESS);
+ private void setTailForSplit(long tailId, long tailPage, long tailPageAddr, BPlusIO<L> io, int lvl) {
+ assert tailId != 0L && tailPage != 0L && tailPageAddr != 0L;
- if (this.tailPage != NULL_PAGE)
- writeUnlockAndClose(this.tailId, this.tailPage, this.tailAddr, null);
-
- this.tailId = tailId;
- this.tailPage = tailPage;
- this.tailAddr = tailPageAddr;
- }
+ // Old tail must be unlocked.
+ releaseTail();
- /** {@inheritDoc} */
- @Override public boolean canRelease(long pageId, int lvl) {
- return pageId != NULL_PAGE_ID && tailId != pageId;
+ addTail(tailId, tailPage, tailPageAddr, io, lvl, Tail.EXACT);
}
/**
@@ -3682,7 +3732,7 @@ public abstract class BPlusTree<L, T extends L> extends DataStructure implements
row = null;
rightId = 0;
- tail(NULL_PAGE_ID, NULL_PAGE, NULL_PAGE_ADDRESS);
+ releaseTail();
}
/** {@inheritDoc} */
@@ -3753,7 +3803,7 @@ public abstract class BPlusTree<L, T extends L> extends DataStructure implements
long fwdPageAddr = writeLock(fwdId, fwdPage); // Initial write, no need to check for concurrent modification.
- assert fwdPageAddr != NULL_PAGE_ADDRESS;
+ assert fwdPageAddr != 0L;
// TODO GG-11640 log a correct forward page record.
final Boolean fwdPageWalPlc = Boolean.TRUE;
@@ -3802,7 +3852,7 @@ public abstract class BPlusTree<L, T extends L> extends DataStructure implements
long newRootAddr = writeLock(newRootId, newRootPage); // Initial write.
- assert newRootAddr != NULL_PAGE_ADDRESS;
+ assert newRootAddr != 0L;
// Never write full new root page, because it is known to be new.
final Boolean newRootPageWalPlc = Boolean.FALSE;
@@ -3859,44 +3909,6 @@ public abstract class BPlusTree<L, T extends L> extends DataStructure implements
* @return Result.
* @throws IgniteCheckedException If failed.
*/
- private Result tryReplaceInner(long pageId, long page, long fwdId, int lvl)
- throws IgniteCheckedException {
- // Need to replace key in inner page. There is no race because we keep tail lock after split.
- if (needReplaceInner == TRUE) {
- needReplaceInner = FALSE; // Protect from retries.
-
- long oldFwdId = this.fwdId;
- long oldPageId = this.pageId;
-
- // Set old args.
- this.fwdId = fwdId;
- this.pageId = pageId;
-
- Result res = write(pageId, page, replace, this, lvl, RETRY, statisticsHolder());
-
- // Restore args.
- this.pageId = oldPageId;
- this.fwdId = oldFwdId;
-
- if (res == RETRY)
- return RETRY;
-
- needReplaceInner = DONE; // We can have only a single matching inner key.
-
- return FOUND;
- }
-
- return NOT_FOUND;
- }
-
- /**
- * @param pageId Page ID.
- * @param page Page pointer.
- * @param fwdId Forward ID.
- * @param lvl Level.
- * @return Result.
- * @throws IgniteCheckedException If failed.
- */
private Result tryInsert(long pageId, long page, long fwdId, int lvl) throws IgniteCheckedException {
// Init args.
this.pageId = pageId;
@@ -3921,10 +3933,28 @@ public abstract class BPlusTree<L, T extends L> extends DataStructure implements
return write(pageId, page, replace, this, lvl, RETRY, statisticsHolder());
}
+ /**
+ * Replaces a row in the page with a new one.
+ *
+ * @param io Page IO for the page.
+ * @param pageId Page id.
+ * @param page Page pointer.
+ * @param pageAddr Page address.
+ * @param idx Replacement index.
+ */
+ public void replaceRowInPage(BPlusIO<L> io, long pageId, long page, long pageAddr, int idx) throws IgniteCheckedException {
+ boolean needWal = needWalDeltaRecord(pageId, page, null);
+
+ byte[] newRowBytes = io.store(pageAddr, idx, row, null, needWal);
+
+ if (needWal)
+ wal.log(new ReplaceRecord<>(grpId, pageId, io, newRowBytes, idx));
+ }
+
/** {@inheritDoc} */
@Override void checkLockRetry() throws IgniteCheckedException {
- //non null tailId means that lock on tail page still hold and we can't fail with exception.
- if (tailId == NULL_PAGE_ID)
+ // Non-null tail means that lock on the tail page is still being held, and we can't fail with exception.
+ if (tail == null)
super.checkLockRetry();
}
}
@@ -3946,7 +3976,7 @@ public abstract class BPlusTree<L, T extends L> extends DataStructure implements
T foundRow;
/** */
- Get op;
+ Update op;
/**
* @param row Row.
@@ -4101,7 +4131,7 @@ public abstract class BPlusTree<L, T extends L> extends DataStructure implements
* @return {@code true} If it is a {@link Remove} and the page is in tail.
*/
private boolean isTail(long pageId, int lvl) {
- return isRemove() && ((Remove)op).isTail(pageId, lvl);
+ return op != null && op.isTail(pageId, lvl);
}
/**
@@ -4171,14 +4201,14 @@ public abstract class BPlusTree<L, T extends L> extends DataStructure implements
private Result tryFinish() throws IgniteCheckedException {
assert op != null; // Must be guarded by isFinished.
- if (isPut())
- return RETRY;
-
- Result res = ((Remove)op).finishTail();
+ Result res = op.finishTail();
if (res == NOT_FOUND)
res = RETRY;
+ if (res == RETRY && isPut())
+ op.releaseTail();
+
assert res == FOUND || res == RETRY : res;
return res;
@@ -4194,23 +4224,33 @@ public abstract class BPlusTree<L, T extends L> extends DataStructure implements
return op.isFinished();
}
+ }
+
+ /**
+ * Update operation. Has basic operations for {@link Tail} support.
+ */
+ private abstract class Update extends Get {
+ /** We may need to lock part of the tree branch from the bottom to up for multiple levels. */
+ Tail<L> tail;
/**
- * @param pageId Page ID.
- * @param page Page pointer.
- * @param fwdId Forward ID.
- * @param lvl Level.
- * @return Result.
- * @throws IgniteCheckedException If failed.
+ * @param row Row.
*/
- Result tryReplaceInner(long pageId, long page, long fwdId, int lvl) throws IgniteCheckedException {
- if (!isPut())
- return NOT_FOUND;
-
- return ((Put)op).tryReplaceInner(pageId, page, fwdId, lvl);
+ private Update(L row) {
+ super(row, false);
}
/**
+ * Method that's invoked when operation goes up from the recursion and {@link #isFinished()} returns false.
+ * Either finishes the operation or locks the page for further processing on another level.
+ * <p/>
+ * Returns {@link Result#FOUND} if operation has finished and {@link #isFinished()} returns {@code true} now.
+ * <p/>
+ * Returns {@link Result#RETRY} if operation should be retried.
+ * <p/>
+ * Returns {@link Result#NOT_FOUND} if operation has added the page to tail, meaning that operation can't be
+ * finished on current level.
+ *
* @param pageId Page ID.
* @param page Page pointer.
* @param backId Back page ID.
@@ -4219,63 +4259,242 @@ public abstract class BPlusTree<L, T extends L> extends DataStructure implements
* @return Result.
* @throws IgniteCheckedException If failed.
*/
- public Result finishOrLockTail(long pageId, long page, long backId, long fwdId, int lvl)
- throws IgniteCheckedException {
- return ((Remove)op).finishOrLockTail(pageId, page, backId, fwdId, lvl);
- }
- }
-
- /**
- * Remove operation.
- */
- private final class Remove extends Get implements ReuseBag {
- /** We may need to lock part of the tree branch from the bottom to up for multiple levels. */
- Tail<L> tail;
-
- /** */
- Bool needReplaceInner = FALSE;
-
- /** */
- Bool needMergeEmptyBranch = FALSE;
+ protected abstract Result finishOrLockTail(long pageId, long page, long backId, long fwdId, int lvl)
+ throws IgniteCheckedException;
- /** Removed row. */
- T rmvd;
+ /**
+ * Process tail and finish. Same as {@link #finishOrLockTail(long, long, long, long, int)} but doesn't add the
+ * page to the tail.
+ *
+ * @return Result.
+ * @throws IgniteCheckedException If failed.
+ */
+ protected abstract Result finishTail() throws IgniteCheckedException;
- /** Current page absolute pointer. */
- long page;
+ /**
+ * Release pages for all locked levels at the tail.
+ */
+ protected final void releaseTail() {
+ doReleaseTail(tail);
- /** */
- Object freePages;
+ tail = null;
+ }
- /** */
- final boolean needOld;
+ /**
+ * @param rootLvl Actual root level.
+ * @return {@code true} If tail level is correct.
+ */
+ protected final boolean checkTailLevel(int rootLvl) {
+ return tail == null || tail.lvl < rootLvl;
+ }
/**
- * @param row Row.
- * @param needOld {@code True} If need return old value.
+ * @param t Tail.
*/
- private Remove(L row, boolean needOld) {
- super(row, false);
+ protected final void doReleaseTail(Tail<L> t) {
+ while (t != null) {
+ writeUnlockAndClose(t.pageId, t.page, t.buf, t.walPlc);
- this.needOld = needOld;
+ Tail<L> s = t.sibling;
+
+ if (s != null)
+ writeUnlockAndClose(s.pageId, s.page, s.buf, s.walPlc);
+
+ t = t.down;
+ }
}
/** {@inheritDoc} */
- @Override public long pollFreePage() {
- if (freePages == null)
- return 0L;
+ @Override public final boolean canRelease(long pageId, int lvl) {
+ return pageId != 0L && !isTail(pageId, lvl);
+ }
- if (freePages.getClass() == GridLongList.class) {
- GridLongList list = ((GridLongList)freePages);
+ /**
+ * @param pageId Page ID.
+ * @param lvl Level.
+ * @return {@code true} If the given page is in tail.
+ */
+ protected final boolean isTail(long pageId, int lvl) {
+ Tail<L> t = tail;
- return list.isEmpty() ? 0L : list.remove();
- }
+ while (t != null) {
+ if (t.lvl < lvl)
+ return false;
- long res = (long)freePages;
+ if (t.lvl == lvl) {
+ if (t.pageId == pageId)
+ return true;
- freePages = null;
+ t = t.sibling;
- return res;
+ return t != null && t.pageId == pageId;
+ }
+
+ t = t.down;
+ }
+
+ return false;
+ }
+
+ /**
+ * @param pageId Page ID.
+ * @param page Page pointer.
+ * @param pageAddr Page address.
+ * @param io IO.
+ * @param lvl Level.
+ * @param type Type.
+ * @return Added tail.
+ */
+ protected final Tail<L> addTail(long pageId, long page, long pageAddr, BPlusIO<L> io, int lvl, byte type) {
+ final Tail<L> t = new Tail<>(pageId, page, pageAddr, io, type, lvl);
+
+ if (tail == null)
+ tail = t;
+ else if (tail.lvl == lvl) { // Add on the same level.
+ assert tail.sibling == null; // Only two siblings on a single level.
+
+ if (type == Tail.EXACT) {
+ assert tail.type != Tail.EXACT;
+
+ if (tail.down != null) { // Take down from sibling, EXACT must own down link.
+ t.down = tail.down;
+ tail.down = null;
+ }
+
+ t.sibling = tail;
+ tail = t;
+ }
+ else {
+ assert tail.type == Tail.EXACT : tail.type;
+
+ tail.sibling = t;
+ }
+ }
+ else { // Add on top of existing level.
+ assert tail.lvl == lvl - 1 : "tail=" + tail + ", lvl=" + lvl;
+
+ t.down = tail;
+ tail = t;
+ }
+
+ return t;
+ }
+
+ /**
+ * @param tail Tail to start with.
+ * @param lvl Level.
+ * @return Tail of {@link Tail#EXACT} type at the given level.
+ */
+ protected final Tail<L> getTail(Tail<L> tail, int lvl) {
+ assert tail != null;
+ assert lvl >= 0 && lvl <= tail.lvl : lvl;
+
+ Tail<L> t = tail;
+
+ while (t.lvl != lvl)
+ t = t.down;
+
+ assert t.type == Tail.EXACT : t.type; // All the down links must be of EXACT type.
+
+ return t;
+ }
+
+ /**
+ * @param tail Tail.
+ * @return Insertion point. May be negative.
+ * @throws IgniteCheckedException If failed.
+ */
+ protected final int insertionPoint(Tail<L> tail) throws IgniteCheckedException {
+ assert tail.type == Tail.EXACT : tail.type;
+
+ if (tail.idx == Short.MIN_VALUE) {
+ int idx = findInsertionPoint(tail.lvl, tail.io, tail.buf, 0, tail.getCount(), row, 0);
+
+ assert checkIndex(idx) : idx;
+
+ tail.idx = (short)idx;
+ }
+
+ return tail.idx;
+ }
+
+ /**
+ * @param keys If we have to show keys.
+ * @return Tail as a String.
+ * @throws IgniteCheckedException If failed.
+ */
+ protected final String printTail(boolean keys) throws IgniteCheckedException {
+ SB sb = new SB("");
+
+ Tail<L> t = tail;
+
+ while (t != null) {
+ sb.a(t.lvl).a(": ").a(printPage(t.io, t.buf, keys));
+
+ Tail<L> d = t.down;
+
+ t = t.sibling;
+
+ if (t != null)
+ sb.a(" -> ").a(t.type == Tail.FORWARD ? "F" : "B").a(' ').a(printPage(t.io, t.buf, keys));
+
+ sb.a('\n');
+
+ t = d;
+ }
+
+ return sb.toString();
+ }
+ }
+
+ /**
+ * Remove operation.
+ */
+ private final class Remove extends Update implements ReuseBag {
+ /** */
+ Bool needReplaceInner = FALSE;
+
+ /** */
+ Bool needMergeEmptyBranch = FALSE;
+
+ /** Removed row. */
+ T rmvd;
+
+ /** Current page absolute pointer. */
+ long page;
+
+ /** */
+ Object freePages;
+
+ /** */
+ final boolean needOld;
+
+ /**
+ * @param row Row.
+ * @param needOld {@code True} If need return old value.
+ */
+ private Remove(L row, boolean needOld) {
+ super(row);
+
+ this.needOld = needOld;
+ }
+
+ /** {@inheritDoc} */
+ @Override public long pollFreePage() {
+ if (freePages == null)
+ return 0L;
+
+ if (freePages.getClass() == GridLongList.class) {
+ GridLongList list = ((GridLongList)freePages);
+
+ return list.isEmpty() ? 0L : list.remove();
+ }
+
+ long res = (long)freePages;
+
+ freePages = null;
+
+ return res;
}
/** {@inheritDoc} */
@@ -4460,13 +4679,8 @@ public abstract class BPlusTree<L, T extends L> extends DataStructure implements
return false;
}
- /**
- * Process tail and finish.
- *
- * @return Result.
- * @throws IgniteCheckedException If failed.
- */
- private Result finishTail() throws IgniteCheckedException {
+ /** {@inheritDoc} */
+ @Override protected Result finishTail() throws IgniteCheckedException {
assert !isFinished();
assert tail.type == Tail.EXACT && tail.lvl >= 0 : tail;
@@ -4726,25 +4940,6 @@ public abstract class BPlusTree<L, T extends L> extends DataStructure implements
}
/**
- * @param tail Tail.
- * @return Insertion point. May be negative.
- * @throws IgniteCheckedException If failed.
- */
- private int insertionPoint(Tail<L> tail) throws IgniteCheckedException {
- assert tail.type == Tail.EXACT : tail.type;
-
- if (tail.idx == Short.MIN_VALUE) {
- int idx = findInsertionPoint(tail.lvl, tail.io, tail.buf, 0, tail.getCount(), row, 0);
-
- assert checkIndex(idx) : idx;
-
- tail.idx = (short)idx;
- }
-
- return tail.idx;
- }
-
- /**
* @return {@code true} If the currently locked tail is valid.
* @throws IgniteCheckedException If failed.
*/
@@ -5019,162 +5214,6 @@ public abstract class BPlusTree<L, T extends L> extends DataStructure implements
}
/**
- * Release pages for all locked levels at the tail.
- */
- private void releaseTail() {
- doReleaseTail(tail);
-
- tail = null;
- }
-
- /**
- * @param t Tail.
- */
- private void doReleaseTail(Tail<L> t) {
- while (t != null) {
- writeUnlockAndClose(t.pageId, t.page, t.buf, t.walPlc);
-
- Tail<L> s = t.sibling;
-
- if (s != null)
- writeUnlockAndClose(s.pageId, s.page, s.buf, s.walPlc);
-
- t = t.down;
- }
- }
-
- /** {@inheritDoc} */
- @Override public boolean canRelease(long pageId, int lvl) {
- return pageId != 0L && !isTail(pageId, lvl);
- }
-
- /**
- * @param pageId Page ID.
- * @param lvl Level.
- * @return {@code true} If the given page is in tail.
- */
- private boolean isTail(long pageId, int lvl) {
- Tail t = tail;
-
- while (t != null) {
- if (t.lvl < lvl)
- return false;
-
- if (t.lvl == lvl) {
- if (t.pageId == pageId)
- return true;
-
- t = t.sibling;
-
- return t != null && t.pageId == pageId;
- }
-
- t = t.down;
- }
-
- return false;
- }
-
- /**
- * @param pageId Page ID.
- * @param page Page pointer.
- * @param pageAddr Page address.
- * @param io IO.
- * @param lvl Level.
- * @param type Type.
- * @return Added tail.
- */
- private Tail<L> addTail(long pageId, long page, long pageAddr, BPlusIO<L> io, int lvl, byte type) {
- final Tail<L> t = new Tail<>(pageId, page, pageAddr, io, type, lvl);
-
- if (tail == null)
- tail = t;
- else if (tail.lvl == lvl) { // Add on the same level.
- assert tail.sibling == null; // Only two siblings on a single level.
-
- if (type == Tail.EXACT) {
- assert tail.type != Tail.EXACT;
-
- if (tail.down != null) { // Take down from sibling, EXACT must own down link.
- t.down = tail.down;
- tail.down = null;
- }
-
- t.sibling = tail;
- tail = t;
- }
- else {
- assert tail.type == Tail.EXACT : tail.type;
-
- tail.sibling = t;
- }
- }
- else if (tail.lvl == lvl - 1) { // Add on top of existing level.
- t.down = tail;
- tail = t;
- }
- else
- throw new IllegalStateException();
-
- return t;
- }
-
- /**
- * @param tail Tail to start with.
- * @param lvl Level.
- * @return Tail of {@link Tail#EXACT} type at the given level.
- */
- private Tail<L> getTail(Tail<L> tail, int lvl) {
- assert tail != null;
- assert lvl >= 0 && lvl <= tail.lvl : lvl;
-
- Tail<L> t = tail;
-
- while (t.lvl != lvl)
- t = t.down;
-
- assert t.type == Tail.EXACT : t.type; // All the down links must be of EXACT type.
-
- return t;
- }
-
- /**
- * @param keys If we have to show keys.
- * @return Tail as a String.
- * @throws IgniteCheckedException If failed.
- */
- private String printTail(boolean keys) throws IgniteCheckedException {
- SB sb = new SB("");
-
- Tail<L> t = tail;
-
- while (t != null) {
- sb.a(t.lvl).a(": ").a(printPage(t.io, t.buf, keys));
-
- Tail<L> d = t.down;
-
- t = t.sibling;
-
- if (t != null)
- sb.a(" -> ").a(t.type == Tail.FORWARD ? "F" : "B").a(' ').a(printPage(t.io, t.buf, keys));
-
- sb.a('\n');
-
- t = d;
- }
-
- return sb.toString();
- }
-
- /**
- * @param rootLvl Actual root level.
- * @return {@code true} If tail level is correct.
- */
- private boolean checkTailLevel(int rootLvl) {
- return tail == null || tail.lvl < rootLvl;
- }
-
- /**
* @throws IgniteCheckedException If failed.
*/
private void releaseAll() throws IgniteCheckedException {
@@ -5182,16 +5221,8 @@ public abstract class BPlusTree<L, T extends L> extends DataStructure implements
reuseFreePages();
}
- /**
- * @param pageId Page ID.
- * @param page Page pointer.
- * @param backId Back page ID.
- * @param fwdId Forward ID.
- * @param lvl Level.
- * @return Result.
- * @throws IgniteCheckedException If failed.
- */
- private Result finishOrLockTail(long pageId, long page, long backId, long fwdId, int lvl)
+ /** {@inheritDoc} */
+ @Override protected Result finishOrLockTail(long pageId, long page, long backId, long fwdId, int lvl)
throws IgniteCheckedException {
Result res = finishTail();
diff --git a/modules/core/src/test/java/org/apache/ignite/internal/processors/database/BPlusTreeReplaceRemoveRaceTest.java b/modules/core/src/test/java/org/apache/ignite/internal/processors/database/BPlusTreeReplaceRemoveRaceTest.java
new file mode 100644
index 0000000..ecfc093
--- /dev/null
+++ b/modules/core/src/test/java/org/apache/ignite/internal/processors/database/BPlusTreeReplaceRemoveRaceTest.java
@@ -0,0 +1,424 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.internal.processors.database;
+
+import java.io.Externalizable;
+import java.util.concurrent.BrokenBarrierException;
+import java.util.concurrent.CyclicBarrier;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicLong;
+import org.apache.ignite.IgniteCheckedException;
+import org.apache.ignite.IgniteException;
+import org.apache.ignite.configuration.DataRegionConfiguration;
+import org.apache.ignite.failure.FailureContext;
+import org.apache.ignite.internal.IgniteInternalFuture;
+import org.apache.ignite.internal.mem.unsafe.UnsafeMemoryProvider;
+import org.apache.ignite.internal.pagemem.FullPageId;
+import org.apache.ignite.internal.pagemem.PageIdAllocator;
+import org.apache.ignite.internal.pagemem.PageMemory;
+import org.apache.ignite.internal.pagemem.PageUtils;
+import org.apache.ignite.internal.pagemem.impl.PageMemoryNoStoreImpl;
+import org.apache.ignite.internal.processors.cache.persistence.DataRegionMetricsImpl;
+import org.apache.ignite.internal.processors.cache.persistence.diagnostic.pagelocktracker.PageLockTrackerManager;
+import org.apache.ignite.internal.processors.cache.persistence.tree.BPlusTree;
+import org.apache.ignite.internal.processors.cache.persistence.tree.io.BPlusIO;
+import org.apache.ignite.internal.processors.cache.persistence.tree.io.BPlusInnerIO;
+import org.apache.ignite.internal.processors.cache.persistence.tree.io.BPlusLeafIO;
+import org.apache.ignite.internal.processors.cache.persistence.tree.io.IOVersions;
+import org.apache.ignite.internal.processors.cache.persistence.tree.io.PageIO;
+import org.apache.ignite.internal.processors.cache.persistence.tree.reuse.ReuseList;
+import org.apache.ignite.internal.processors.failure.FailureProcessor;
+import org.apache.ignite.internal.util.typedef.T2;
+import org.apache.ignite.testframework.GridTestUtils;
+import org.apache.ignite.testframework.junits.GridTestKernalContext;
+import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest;
+import org.junit.Test;
+
+import static org.apache.ignite.internal.util.IgniteUtils.MB;
+
+/**
+ * Test is based on {@link BPlusTreeSelfTest} and has a partial copy of its code.
+ */
+public class BPlusTreeReplaceRemoveRaceTest extends GridCommonAbstractTest {
+ /** */
+ private static final short PAIR_INNER_IO = 30000;
+
+ /** */
+ private static final short PAIR_LEAF_IO = 30001;
+
+ /** */
+ protected static final int PAGE_SIZE = 512;
+
+ /** */
+ private static final int CACHE_ID = 100500;
+
+ /** */
+ protected PageMemory pageMem;
+
+ /** Tracking of locks holding. */
+ private PageLockTrackerManager lockTrackerManager;
+
+ /** {@inheritDoc} */
+ @Override protected void beforeTest() throws Exception {
+ pageMem = createPageMemory();
+
+ lockTrackerManager = new PageLockTrackerManager(log, "testTreeManager");
+
+ lockTrackerManager.start();
+ }
+
+ /** {@inheritDoc} */
+ @Override protected void afterTest() throws Exception {
+ if (pageMem != null)
+ pageMem.stop(true);
+
+ if (lockTrackerManager != null)
+ lockTrackerManager.stop();
+ }
+
+ /**
+ * @return Page memory.
+ */
+ protected PageMemory createPageMemory() {
+ DataRegionConfiguration plcCfg = new DataRegionConfiguration()
+ .setInitialSize(1024 * MB)
+ .setMaxSize(1024 * MB);
+
+ PageMemory pageMem = new PageMemoryNoStoreImpl(log,
+ new UnsafeMemoryProvider(log),
+ null,
+ PAGE_SIZE,
+ plcCfg,
+ new DataRegionMetricsImpl(plcCfg, new GridTestKernalContext(log())),
+ true);
+
+ pageMem.start();
+
+ return pageMem;
+ }
+
+ /**
+ * @return Allocated meta page ID.
+ * @throws IgniteCheckedException If failed.
+ */
+ private FullPageId allocateMetaPage() throws IgniteCheckedException {
+ return new FullPageId(pageMem.allocatePage(CACHE_ID, PageIdAllocator.INDEX_PARTITION, PageIdAllocator.FLAG_IDX), CACHE_ID);
+ }
+
+ /**
+ * Short for {@code T2<Integer, Integer>}.
+ */
+ protected static class Pair extends T2<Integer, Integer> {
+ /** Serial version uid. */
+ private static final long serialVersionUID = 0L;
+
+ /**
+ * @param key Key.
+ * @param val Value.
+ */
+ public Pair(Integer key, Integer val) {
+ super(key, val);
+ }
+
+ /**
+ * No-arg constructor for {@link Externalizable}.
+ */
+ public Pair() {
+ }
+ }
+
+ /**
+ * Test tree, maps {@link Integer} to {@code Integer}.
+ */
+ protected static class TestPairTree extends BPlusTree<Pair, Pair> {
+ /**
+ * @param reuseList Reuse list.
+ * @param canGetRow Can get row from inner page.
+ * @param cacheId Cache ID.
+ * @param pageMem Page memory.
+ * @param metaPageId Meta page ID.
+ * @throws IgniteCheckedException If failed.
+ */
+ public TestPairTree(
+ ReuseList reuseList,
+ boolean canGetRow,
+ int cacheId,
+ PageMemory pageMem,
+ long metaPageId,
+ PageLockTrackerManager lockTrackerManager
+ ) throws IgniteCheckedException {
+ super(
+ "test",
+ cacheId,
+ null,
+ pageMem,
+ null,
+ new AtomicLong(),
+ metaPageId,
+ reuseList,
+ new IOVersions<>(new TestPairInnerIO()),
+ new IOVersions<>(new TestPairLeafIO()),
+ PageIdAllocator.FLAG_IDX,
+ new FailureProcessor(new GridTestKernalContext(log)) {
+ /** {@inheritdoc} */
+ @Override public boolean process(FailureContext failureCtx) {
+ return true;
+ }
+ },
+ lockTrackerManager
+ );
+
+ PageIO.registerTest(latestInnerIO(), latestLeafIO());
+
+ initTree(true);
+ }
+
+ /** {@inheritDoc} */
+ @Override protected int compare(BPlusIO<Pair> io, long pageAddr, int idx, Pair n2)
+ throws IgniteCheckedException {
+ Pair n1 = io.getLookupRow(this, pageAddr, idx);
+
+ return Integer.compare(n1.getKey(), n2.getKey());
+ }
+
+ /** {@inheritDoc} */
+ @Override public Pair getRow(BPlusIO<Pair> io, long pageAddr, int idx, Object ignore)
+ throws IgniteCheckedException {
+ return io.getLookupRow(this, pageAddr, idx);
+ }
+ }
+
+ /** */
+ private static final class TestPairInnerIO extends BPlusInnerIO<Pair> {
+ /** */
+ TestPairInnerIO() {
+ super(PAIR_INNER_IO, 1, true, 8);
+ }
+
+ /** {@inheritDoc} */
+ @Override public int getMaxCount(long buf, int pageSize) {
+ return 2;
+ }
+
+ /** {@inheritDoc} */
+ @Override public void store(long dst, int dstIdx, BPlusIO<Pair> srcIo, long src, int srcIdx)
+ throws IgniteCheckedException {
+ Pair row = srcIo.getLookupRow(null, src, srcIdx);
+
+ store(dst, dstIdx, row, null, false);
+ }
+
+ /** {@inheritDoc} */
+ @Override public void storeByOffset(long pageAddr, int off, Pair row) {
+ PageUtils.putInt(pageAddr, off, row.getKey());
+ PageUtils.putInt(pageAddr, off + 4, row.getValue());
+ }
+
+ /** {@inheritDoc} */
+ @Override public Pair getLookupRow(BPlusTree<Pair, ?> tree, long pageAddr, int idx) {
+ int key = PageUtils.getInt(pageAddr, offset(idx));
+ int val = PageUtils.getInt(pageAddr, offset(idx) + 4);
+
+ return new Pair(key, val);
+ }
+ }
+
+ /** */
+ private static final class TestPairLeafIO extends BPlusLeafIO<Pair> {
+ /** */
+ TestPairLeafIO() {
+ super(PAIR_LEAF_IO, 1, 8);
+ }
+
+ /** {@inheritDoc} */
+ @Override public int getMaxCount(long pageAddr, int pageSize) {
+ return 2;
+ }
+
+ /** {@inheritDoc} */
+ @Override public void storeByOffset(long pageAddr, int off, Pair row) {
+ PageUtils.putInt(pageAddr, off, row.getKey());
+ PageUtils.putInt(pageAddr, off + 4, row.getValue());
+ }
+
+ /** {@inheritDoc} */
+ @Override public void store(long dst, int dstIdx, BPlusIO<Pair> srcIo, long src, int srcIdx) throws IgniteCheckedException {
+ Pair row = srcIo.getLookupRow(null, src, srcIdx);
+
+ store(dst, dstIdx, row, null, false);
+ }
+
+ /** {@inheritDoc} */
+ @Override public Pair getLookupRow(BPlusTree<Pair, ?> tree, long pageAddr, int idx) {
+ int key = PageUtils.getInt(pageAddr, offset(idx));
+ int val = PageUtils.getInt(pageAddr, offset(idx) + 4);
+
+ return new Pair(key, val);
+ }
+ }
+
+ /**
+ * Tests a very specific scenario for concurrent replace and remove that used to corrupt the tree before the fix.
+ * <p/>
+ *
+ * Consider the following tree, represented by a {@code tree} variable in test:<br>
+ * <pre><code>
+ * [ 5:0 ]
+ * / \
+ * [ 2:0 | 4:0 ] [ 6:0 ]
+ * / | \ / \
+ * [ 1:0 | 2:0 ] [ 3:0 | 4:0 ] [ 5:0 ] [ 6:0 ] [ 7:0 ]
+ * </code></pre>
+ *
+ * Individual replace {@code 4:0} to {@code 4:8} would take two steps and look like this:
+ * <pre><code>
+ * // Inner node goes first.
+ * [ 5:0 ]
+ * / \
+ * [ 2:0 | 4:8 ] [ 6:0 ]
+ * / | \ / \
+ * [ 1:0 | 2:0 ] [ 3:0 | 4:0 ] [ 5:0 ] [ 6:0 ] [ 7:0 ]
+ *
+ * // Leaf node goes last.
+ * [ 5:0 ]
+ * / \
+ * [ 2:0 | 4:8 ] [ 6:0 ]
+ * / | \ / \
+ * [ 1:0 | 2:0 ] [ 3:0 | 4:8 ] [ 5:0 ] [ 6:0 ] [ 7:0 ]
+ * </code></pre>
+ *
+ * Note that inbetween these two updates tree is fully unlocked and available for modifications. So, if one tries
+ * to remove {@code 5:0} during the replacement, following modifications would happen:
+ * <pre><code>
+ * // Inner node update from replacement goes first, as before.
+ * [ 5:0 ]
+ * / \
+ * [ 2:0 | 4:8 ] [ 6:0 ]
+ * / | \ / \
+ * [ 1:0 | 2:0 ] [ 3:0 | 4:0 ] [ 5:0 ] [ 6:0 ] [ 7:0 ]
+ *
+ * // Removal of 5:0 starts from the leaf.
+ * [ 5:0 ]
+ * / \
+ * [ 2:0 | 4:8 ] [ 6:0 ]
+ * / | \ / \
+ * [ 1:0 | 2:0 ] [ 3:0 | 4:0 ] [] [ 6:0 ] [ 7:0 ]
+ *
+ * // Merge of empty branch is now required, 4:8 is removed from inner node.
+ * [ 5:0 ]
+ * / \
+ * [ 2:0 ] [ 6:0 ]
+ * / \ / \
+ * [ 1:0 | 2:0 ] [ 3:0 | 4:0 ] [ 6:0 ] [ 7:0 ]
+ *
+ * // Inner replace is happening in the root. To do that, closest left value is retrieved from the leaf, it's 4:0.
+ * [ 4:0 ]
+ * / \
+ * [ 2:0 ] [ 6:0 ]
+ * / \ / \
+ * [ 1:0 | 2:0 ] [ 3:0 | 4:0 ] [ 6:0 ] [ 7:0 ]
+ *
+ * // At this point removal is complete. Last replacement step will do the following.
+ * [ 4:0 ]
+ * / \
+ * [ 2:0 ] [ 6:0 ]
+ * / \ / \
+ * [ 1:0 | 2:0 ] [ 3:0 | 4:8 ] [ 6:0 ] [ 7:0 ]
+ * </code></pre>
+ *
+ * It is clear that root has an invalid value {@code 4:0}, hence the tree should be considered corrupted.
+ * This is the exact situation that test is trying to check.
+ * <p/>
+ *
+ * Several iterations are required for this, given that there's no guaranteed way to force a tree to perform page
+ * modifications in the desired order. Typically, less than {@code 10} attempts have been required to get a
+ * corrupted tree. Value {@code 50} is arbitrary and has been chosen to be big enough for test to fail in case of
+ * regression, but not too big so that test won't run for too long.
+ *
+ * @throws Exception If failed.
+ */
+ @Test
+ public void testConcurrentPutRemove() throws Exception {
+ for (int i = 0; i < 50; i++) {
+ TestPairTree tree = new TestPairTree(
+ null,
+ true,
+ CACHE_ID,
+ pageMem,
+ allocateMetaPage().pageId(),
+ lockTrackerManager
+ );
+
+ tree.putx(new Pair(1, 0));
+ tree.putx(new Pair(2, 0));
+ tree.putx(new Pair(4, 0));
+ tree.putx(new Pair(6, 0));
+ tree.putx(new Pair(7, 0));
+
+ // Split root.
+ tree.putx(new Pair(5, 0));
+
+ // Split its left subtree.
+ tree.putx(new Pair(3, 0));
+
+ // Exact tree from the description is constructed at this point.
+ CyclicBarrier barrier = new CyclicBarrier(2);
+
+ // This is the replace operation.
+ IgniteInternalFuture<?> putFut = GridTestUtils.runAsync(() -> {
+ try {
+ barrier.await();
+
+ tree.putx(new Pair(4, 999));
+ }
+ catch (IgniteCheckedException | BrokenBarrierException | InterruptedException e) {
+ throw new IgniteException(e);
+ }
+ });
+
+ // This is the remove opertation.
+ IgniteInternalFuture<?> remFut = GridTestUtils.runAsync(() -> {
+ try {
+ barrier.await();
+
+ tree.removex(new Pair(5, -1));
+ }
+ catch (IgniteCheckedException | BrokenBarrierException | InterruptedException e) {
+ throw new IgniteException(e);
+ }
+ });
+
+ // Wait for both operations.
+ try {
+ putFut.get(1, TimeUnit.SECONDS);
+ }
+ finally {
+ remFut.get(1, TimeUnit.SECONDS);
+ }
+
+ // Just in case.
+ tree.validateTree();
+
+ // Find a value associated with 4. It'll be right in the root page.
+ Pair pair = tree.findOne(new Pair(4, -1));
+
+ // Assert that it is valid.
+ assertEquals(999, pair.getValue().intValue());
+ }
+ }
+}
diff --git a/modules/core/src/test/java/org/apache/ignite/testsuites/IgniteBasicTestSuite.java b/modules/core/src/test/java/org/apache/ignite/testsuites/IgniteBasicTestSuite.java
index 30d8b8b..a209258 100644
--- a/modules/core/src/test/java/org/apache/ignite/testsuites/IgniteBasicTestSuite.java
+++ b/modules/core/src/test/java/org/apache/ignite/testsuites/IgniteBasicTestSuite.java
@@ -97,6 +97,7 @@ import org.apache.ignite.internal.processors.configuration.distributed.Distribut
import org.apache.ignite.internal.processors.continuous.GridEventConsumeSelfTest;
import org.apache.ignite.internal.processors.continuous.GridMessageListenSelfTest;
import org.apache.ignite.internal.processors.database.BPlusTreeFakeReuseSelfTest;
+import org.apache.ignite.internal.processors.database.BPlusTreeReplaceRemoveRaceTest;
import org.apache.ignite.internal.processors.database.BPlusTreeReuseSelfTest;
import org.apache.ignite.internal.processors.database.BPlusTreeSelfTest;
import org.apache.ignite.internal.processors.database.CacheFreeListSelfTest;
@@ -239,6 +240,7 @@ import org.junit.runners.Suite;
BPlusTreeSelfTest.class,
BPlusTreeFakeReuseSelfTest.class,
BPlusTreeReuseSelfTest.class,
+ BPlusTreeReplaceRemoveRaceTest.class,
IndexStorageSelfTest.class,
CacheFreeListSelfTest.class,
DataRegionMetricsSelfTest.class,