You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ignite.apache.org by jo...@apache.org on 2019/04/10 13:22:46 UTC
[ignite] branch master updated: IGNITE-10669 Properly handle free
list corruption errors - Fixes #6207.
This is an automated email from the ASF dual-hosted git repository.
jokser pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ignite.git
The following commit(s) were added to refs/heads/master by this push:
new 47da5df IGNITE-10669 Properly handle free list corruption errors - Fixes #6207.
47da5df is described below
commit 47da5df328a18d0d55ba534b1af541b72df76901
Author: Pavel Kovalenko <jo...@gmail.com>
AuthorDate: Wed Apr 10 16:21:46 2019 +0300
IGNITE-10669 Properly handle free list corruption errors - Fixes #6207.
Signed-off-by: Pavel Kovalenko <jo...@gmail.com>
---
...ion.java => CorruptedPersistenceException.java} | 20 +--
.../persistence/freelist/AbstractFreeList.java | 152 ++++++++++++++-------
.../CorruptedFreeListException.java} | 10 +-
.../persistence/tree/CorruptedTreeException.java | 4 +-
.../processors/failure/FailureProcessor.java | 8 ++
5 files changed, 122 insertions(+), 72 deletions(-)
diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/tree/CorruptedTreeException.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/CorruptedPersistenceException.java
similarity index 55%
copy from modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/tree/CorruptedTreeException.java
copy to modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/CorruptedPersistenceException.java
index a6bfb1f..6ba7cb3 100644
--- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/tree/CorruptedTreeException.java
+++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/CorruptedPersistenceException.java
@@ -15,24 +15,10 @@
* limitations under the License.
*/
-package org.apache.ignite.internal.processors.cache.persistence.tree;
-
-import org.apache.ignite.IgniteCheckedException;
-import org.apache.ignite.internal.InvalidEnvironmentException;
-import org.jetbrains.annotations.Nullable;
+package org.apache.ignite.internal.processors.cache.persistence;
/**
- * Exception to distinguish {@link BPlusTree} tree broken invariants.
+ * Marker interface to distinguish exceptions that were caused by broken persistence datastructures invariants.
*/
-public class CorruptedTreeException extends IgniteCheckedException implements InvalidEnvironmentException {
- /** */
- private static final long serialVersionUID = 0L;
-
- /**
- * @param msg Message.
- * @param cause Cause.
- */
- public CorruptedTreeException(String msg, @Nullable Throwable cause) {
- super(msg, cause);
- }
+public interface CorruptedPersistenceException {
}
diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/freelist/AbstractFreeList.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/freelist/AbstractFreeList.java
index 60aefb9..958fb31 100644
--- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/freelist/AbstractFreeList.java
+++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/freelist/AbstractFreeList.java
@@ -475,38 +475,46 @@ public abstract class AbstractFreeList<T extends Storable> extends PagesList imp
int written = 0;
- do {
- if (written != 0)
- memMetrics.incrementLargeEntriesPages();
+ try {
+ do {
+ if (written != 0)
+ memMetrics.incrementLargeEntriesPages();
- int remaining = rowSize - written;
+ int remaining = rowSize - written;
- long pageId = 0L;
+ long pageId = 0L;
- for (int b = remaining < MIN_SIZE_FOR_DATA_PAGE ? bucket(remaining, false) + 1 : REUSE_BUCKET; b < BUCKETS; b++) {
- pageId = takeEmptyPage(b, ioVersions(), statHolder);
+ for (int b = remaining < MIN_SIZE_FOR_DATA_PAGE ? bucket(remaining, false) + 1 : REUSE_BUCKET; b < BUCKETS; b++) {
+ pageId = takeEmptyPage(b, ioVersions(), statHolder);
- if (pageId != 0L)
- break;
- }
+ if (pageId != 0L)
+ break;
+ }
- AbstractDataPageIO<T> initIo = null;
+ AbstractDataPageIO<T> initIo = null;
- if (pageId == 0L) {
- pageId = allocateDataPage(row.partition());
+ if (pageId == 0L) {
+ pageId = allocateDataPage(row.partition());
- initIo = ioVersions().latest();
- }
- else if (PageIdUtils.tag(pageId) != PageIdAllocator.FLAG_DATA)
- pageId = initReusedPage(pageId, row.partition(), statHolder);
- else
- pageId = PageIdUtils.changePartitionId(pageId, (row.partition()));
+ initIo = ioVersions().latest();
+ }
+ else if (PageIdUtils.tag(pageId) != PageIdAllocator.FLAG_DATA)
+ pageId = initReusedPage(pageId, row.partition(), statHolder);
+ else
+ pageId = PageIdUtils.changePartitionId(pageId, (row.partition()));
- written = write(pageId, writeRow, initIo, row, written, FAIL_I, statHolder);
+ written = write(pageId, writeRow, initIo, row, written, FAIL_I, statHolder);
- assert written != FAIL_I; // We can't fail here.
+ assert written != FAIL_I; // We can't fail here.
+ }
+ while (written != COMPLETE);
+ }
+ catch (IgniteCheckedException | Error e) {
+ throw e;
+ }
+ catch (Throwable t) {
+ throw new CorruptedFreeListException("Failed to insert data row", t);
}
- while (written != COMPLETE);
}
/**
@@ -543,14 +551,22 @@ public abstract class AbstractFreeList<T extends Storable> extends PagesList imp
IoStatisticsHolder statHolder) throws IgniteCheckedException {
assert link != 0;
- long pageId = PageIdUtils.pageId(link);
- int itemId = PageIdUtils.itemId(link);
+ try {
+ long pageId = PageIdUtils.pageId(link);
+ int itemId = PageIdUtils.itemId(link);
- Boolean updated = write(pageId, updateRow, row, itemId, null, statHolder);
+ Boolean updated = write(pageId, updateRow, row, itemId, null, statHolder);
- assert updated != null; // Can't fail here.
+ assert updated != null; // Can't fail here.
- return updated;
+ return updated;
+ }
+ catch (IgniteCheckedException | Error e) {
+ throw e;
+ }
+ catch (Throwable t) {
+ throw new CorruptedFreeListException("Failed to update data row", t);
+ }
}
/** {@inheritDoc} */
@@ -558,41 +574,57 @@ public abstract class AbstractFreeList<T extends Storable> extends PagesList imp
IoStatisticsHolder statHolder) throws IgniteCheckedException {
assert link != 0;
- long pageId = PageIdUtils.pageId(link);
- int itemId = PageIdUtils.itemId(link);
+ try {
+ long pageId = PageIdUtils.pageId(link);
+ int itemId = PageIdUtils.itemId(link);
- R updRes = write(pageId, pageHnd, arg, itemId, null, statHolder);
+ R updRes = write(pageId, pageHnd, arg, itemId, null, statHolder);
- assert updRes != null; // Can't fail here.
+ assert updRes != null; // Can't fail here.
- return updRes;
+ return updRes;
+ }
+ catch (IgniteCheckedException | Error e) {
+ throw e;
+ }
+ catch (Throwable t) {
+ throw new CorruptedFreeListException("Failed to update data row", t);
+ }
}
/** {@inheritDoc} */
@Override public void removeDataRowByLink(long link, IoStatisticsHolder statHolder) throws IgniteCheckedException {
assert link != 0;
- long pageId = PageIdUtils.pageId(link);
- int itemId = PageIdUtils.itemId(link);
+ try {
+ long pageId = PageIdUtils.pageId(link);
+ int itemId = PageIdUtils.itemId(link);
- ReuseBag bag = new LongListReuseBag();
+ ReuseBag bag = new LongListReuseBag();
- long nextLink = write(pageId, rmvRow, bag, itemId, FAIL_L, statHolder);
+ long nextLink = write(pageId, rmvRow, bag, itemId, FAIL_L, statHolder);
- assert nextLink != FAIL_L; // Can't fail here.
+ assert nextLink != FAIL_L; // Can't fail here.
- while (nextLink != 0L) {
- memMetrics.decrementLargeEntriesPages();
+ while (nextLink != 0L) {
+ memMetrics.decrementLargeEntriesPages();
- itemId = PageIdUtils.itemId(nextLink);
- pageId = PageIdUtils.pageId(nextLink);
+ itemId = PageIdUtils.itemId(nextLink);
+ pageId = PageIdUtils.pageId(nextLink);
- nextLink = write(pageId, rmvRow, bag, itemId, FAIL_L, statHolder);
+ nextLink = write(pageId, rmvRow, bag, itemId, FAIL_L, statHolder);
- assert nextLink != FAIL_L; // Can't fail here.
- }
+ assert nextLink != FAIL_L; // Can't fail here.
+ }
- reuseList.addForRecycle(bag);
+ reuseList.addForRecycle(bag);
+ }
+ catch (IgniteCheckedException | Error e) {
+ throw e;
+ }
+ catch (Throwable t) {
+ throw new CorruptedFreeListException("Failed to remove data by link", t);
+ }
}
/** {@inheritDoc} */
@@ -621,21 +653,45 @@ public abstract class AbstractFreeList<T extends Storable> extends PagesList imp
@Override public void addForRecycle(ReuseBag bag) throws IgniteCheckedException {
assert reuseList == this : "not allowed to be a reuse list";
- put(bag, 0, 0, 0L, REUSE_BUCKET, IoStatisticsHolderNoOp.INSTANCE);
+ try {
+ put(bag, 0, 0, 0L, REUSE_BUCKET, IoStatisticsHolderNoOp.INSTANCE);
+ }
+ catch (IgniteCheckedException | Error e) {
+ throw e;
+ }
+ catch (Throwable t) {
+ throw new CorruptedFreeListException("Failed to add page for recycle", t);
+ }
}
/** {@inheritDoc} */
@Override public long takeRecycledPage() throws IgniteCheckedException {
assert reuseList == this : "not allowed to be a reuse list";
- return takeEmptyPage(REUSE_BUCKET, null, IoStatisticsHolderNoOp.INSTANCE);
+ try {
+ return takeEmptyPage(REUSE_BUCKET, null, IoStatisticsHolderNoOp.INSTANCE);
+ }
+ catch (IgniteCheckedException | Error e) {
+ throw e;
+ }
+ catch (Throwable t) {
+ throw new CorruptedFreeListException("Failed to take recycled page", t);
+ }
}
/** {@inheritDoc} */
@Override public long recycledPagesCount() throws IgniteCheckedException {
assert reuseList == this : "not allowed to be a reuse list";
- return storedPagesCount(REUSE_BUCKET);
+ try {
+ return storedPagesCount(REUSE_BUCKET);
+ }
+ catch (IgniteCheckedException | Error e) {
+ throw e;
+ }
+ catch (Throwable t) {
+ throw new CorruptedFreeListException("Failed to count recycled pages", t);
+ }
}
/**
diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/tree/CorruptedTreeException.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/freelist/CorruptedFreeListException.java
similarity index 75%
copy from modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/tree/CorruptedTreeException.java
copy to modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/freelist/CorruptedFreeListException.java
index a6bfb1f..eb94c63 100644
--- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/tree/CorruptedTreeException.java
+++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/freelist/CorruptedFreeListException.java
@@ -15,16 +15,16 @@
* limitations under the License.
*/
-package org.apache.ignite.internal.processors.cache.persistence.tree;
+package org.apache.ignite.internal.processors.cache.persistence.freelist;
import org.apache.ignite.IgniteCheckedException;
-import org.apache.ignite.internal.InvalidEnvironmentException;
+import org.apache.ignite.internal.processors.cache.persistence.CorruptedPersistenceException;
import org.jetbrains.annotations.Nullable;
/**
- * Exception to distinguish {@link BPlusTree} tree broken invariants.
+ * Exception to distinguish {@link AbstractFreeList} broken invariants.
*/
-public class CorruptedTreeException extends IgniteCheckedException implements InvalidEnvironmentException {
+public class CorruptedFreeListException extends IgniteCheckedException implements CorruptedPersistenceException {
/** */
private static final long serialVersionUID = 0L;
@@ -32,7 +32,7 @@ public class CorruptedTreeException extends IgniteCheckedException implements In
* @param msg Message.
* @param cause Cause.
*/
- public CorruptedTreeException(String msg, @Nullable Throwable cause) {
+ public CorruptedFreeListException(String msg, @Nullable Throwable cause) {
super(msg, cause);
}
}
diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/tree/CorruptedTreeException.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/tree/CorruptedTreeException.java
index a6bfb1f..fbd8d73 100644
--- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/tree/CorruptedTreeException.java
+++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/tree/CorruptedTreeException.java
@@ -18,13 +18,13 @@
package org.apache.ignite.internal.processors.cache.persistence.tree;
import org.apache.ignite.IgniteCheckedException;
-import org.apache.ignite.internal.InvalidEnvironmentException;
+import org.apache.ignite.internal.processors.cache.persistence.CorruptedPersistenceException;
import org.jetbrains.annotations.Nullable;
/**
* Exception to distinguish {@link BPlusTree} tree broken invariants.
*/
-public class CorruptedTreeException extends IgniteCheckedException implements InvalidEnvironmentException {
+public class CorruptedTreeException extends IgniteCheckedException implements CorruptedPersistenceException {
/** */
private static final long serialVersionUID = 0L;
diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/failure/FailureProcessor.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/failure/FailureProcessor.java
index f00d7b6..2dcdd37 100644
--- a/modules/core/src/main/java/org/apache/ignite/internal/processors/failure/FailureProcessor.java
+++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/failure/FailureProcessor.java
@@ -27,6 +27,7 @@ import org.apache.ignite.failure.NoOpFailureHandler;
import org.apache.ignite.failure.StopNodeOrHaltFailureHandler;
import org.apache.ignite.internal.GridKernalContext;
import org.apache.ignite.internal.processors.GridProcessorAdapter;
+import org.apache.ignite.internal.processors.cache.persistence.CorruptedPersistenceException;
import org.apache.ignite.internal.util.typedef.X;
import org.apache.ignite.internal.util.typedef.internal.U;
@@ -129,6 +130,13 @@ public class FailureProcessor extends GridProcessorAdapter {
if (reserveBuf != null && X.hasCause(failureCtx.error(), OutOfMemoryError.class))
reserveBuf = null;
+ if (X.hasCause(failureCtx.error(), CorruptedPersistenceException.class))
+ log.error("A critical problem with persistence data structures was detected." +
+ " Please make backup of persistence storage and WAL files for further analysis." +
+ " Persistence storage path: " + ctx.config().getDataStorageConfiguration().getStoragePath() +
+ " WAL path: " + ctx.config().getDataStorageConfiguration().getWalPath() +
+ " WAL archive path: " + ctx.config().getDataStorageConfiguration().getWalArchivePath());
+
if (IGNITE_DUMP_THREADS_ON_FAILURE)
U.dumpThreads(log);