You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ignite.apache.org by jo...@apache.org on 2019/04/10 13:22:46 UTC

[ignite] branch master updated: IGNITE-10669 Properly handle free list corruption errors - Fixes #6207.

This is an automated email from the ASF dual-hosted git repository.

jokser pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ignite.git


The following commit(s) were added to refs/heads/master by this push:
     new 47da5df  IGNITE-10669 Properly handle free list corruption errors - Fixes #6207.
47da5df is described below

commit 47da5df328a18d0d55ba534b1af541b72df76901
Author: Pavel Kovalenko <jo...@gmail.com>
AuthorDate: Wed Apr 10 16:21:46 2019 +0300

    IGNITE-10669 Properly handle free list corruption errors - Fixes #6207.
    
    Signed-off-by: Pavel Kovalenko <jo...@gmail.com>
---
 ...ion.java => CorruptedPersistenceException.java} |  20 +--
 .../persistence/freelist/AbstractFreeList.java     | 152 ++++++++++++++-------
 .../CorruptedFreeListException.java}               |  10 +-
 .../persistence/tree/CorruptedTreeException.java   |   4 +-
 .../processors/failure/FailureProcessor.java       |   8 ++
 5 files changed, 122 insertions(+), 72 deletions(-)

diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/tree/CorruptedTreeException.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/CorruptedPersistenceException.java
similarity index 55%
copy from modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/tree/CorruptedTreeException.java
copy to modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/CorruptedPersistenceException.java
index a6bfb1f..6ba7cb3 100644
--- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/tree/CorruptedTreeException.java
+++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/CorruptedPersistenceException.java
@@ -15,24 +15,10 @@
  * limitations under the License.
  */
 
-package org.apache.ignite.internal.processors.cache.persistence.tree;
-
-import org.apache.ignite.IgniteCheckedException;
-import org.apache.ignite.internal.InvalidEnvironmentException;
-import org.jetbrains.annotations.Nullable;
+package org.apache.ignite.internal.processors.cache.persistence;
 
 /**
- * Exception to distinguish {@link BPlusTree} tree broken invariants.
+ * Marker interface to distinguish exceptions that were caused by broken persistence datastructures invariants.
  */
-public class CorruptedTreeException extends IgniteCheckedException implements InvalidEnvironmentException {
-    /** */
-    private static final long serialVersionUID = 0L;
-
-    /**
-     * @param msg Message.
-     * @param cause Cause.
-     */
-    public CorruptedTreeException(String msg, @Nullable Throwable cause) {
-        super(msg, cause);
-    }
+public interface CorruptedPersistenceException {
 }
diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/freelist/AbstractFreeList.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/freelist/AbstractFreeList.java
index 60aefb9..958fb31 100644
--- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/freelist/AbstractFreeList.java
+++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/freelist/AbstractFreeList.java
@@ -475,38 +475,46 @@ public abstract class AbstractFreeList<T extends Storable> extends PagesList imp
 
         int written = 0;
 
-        do {
-            if (written != 0)
-                memMetrics.incrementLargeEntriesPages();
+        try {
+            do {
+                if (written != 0)
+                    memMetrics.incrementLargeEntriesPages();
 
-            int remaining = rowSize - written;
+                int remaining = rowSize - written;
 
-            long pageId = 0L;
+                long pageId = 0L;
 
-            for (int b = remaining < MIN_SIZE_FOR_DATA_PAGE ? bucket(remaining, false) + 1 : REUSE_BUCKET; b < BUCKETS; b++) {
-                pageId = takeEmptyPage(b, ioVersions(), statHolder);
+                for (int b = remaining < MIN_SIZE_FOR_DATA_PAGE ? bucket(remaining, false) + 1 : REUSE_BUCKET; b < BUCKETS; b++) {
+                    pageId = takeEmptyPage(b, ioVersions(), statHolder);
 
-                if (pageId != 0L)
-                    break;
-            }
+                    if (pageId != 0L)
+                        break;
+                }
 
-            AbstractDataPageIO<T> initIo = null;
+                AbstractDataPageIO<T> initIo = null;
 
-            if (pageId == 0L) {
-                pageId = allocateDataPage(row.partition());
+                if (pageId == 0L) {
+                    pageId = allocateDataPage(row.partition());
 
-                initIo = ioVersions().latest();
-            }
-            else if (PageIdUtils.tag(pageId) != PageIdAllocator.FLAG_DATA)
-                pageId = initReusedPage(pageId, row.partition(), statHolder);
-            else
-                pageId = PageIdUtils.changePartitionId(pageId, (row.partition()));
+                    initIo = ioVersions().latest();
+                }
+                else if (PageIdUtils.tag(pageId) != PageIdAllocator.FLAG_DATA)
+                    pageId = initReusedPage(pageId, row.partition(), statHolder);
+                else
+                    pageId = PageIdUtils.changePartitionId(pageId, (row.partition()));
 
-            written = write(pageId, writeRow, initIo, row, written, FAIL_I, statHolder);
+                written = write(pageId, writeRow, initIo, row, written, FAIL_I, statHolder);
 
-            assert written != FAIL_I; // We can't fail here.
+                assert written != FAIL_I; // We can't fail here.
+            }
+            while (written != COMPLETE);
+        }
+        catch (IgniteCheckedException | Error e) {
+            throw e;
+        }
+        catch (Throwable t) {
+            throw new CorruptedFreeListException("Failed to insert data row", t);
         }
-        while (written != COMPLETE);
     }
 
     /**
@@ -543,14 +551,22 @@ public abstract class AbstractFreeList<T extends Storable> extends PagesList imp
         IoStatisticsHolder statHolder) throws IgniteCheckedException {
         assert link != 0;
 
-        long pageId = PageIdUtils.pageId(link);
-        int itemId = PageIdUtils.itemId(link);
+        try {
+            long pageId = PageIdUtils.pageId(link);
+            int itemId = PageIdUtils.itemId(link);
 
-        Boolean updated = write(pageId, updateRow, row, itemId, null, statHolder);
+            Boolean updated = write(pageId, updateRow, row, itemId, null, statHolder);
 
-        assert updated != null; // Can't fail here.
+            assert updated != null; // Can't fail here.
 
-        return updated;
+            return updated;
+        }
+        catch (IgniteCheckedException | Error e) {
+            throw e;
+        }
+        catch (Throwable t) {
+            throw new CorruptedFreeListException("Failed to update data row", t);
+        }
     }
 
     /** {@inheritDoc} */
@@ -558,41 +574,57 @@ public abstract class AbstractFreeList<T extends Storable> extends PagesList imp
         IoStatisticsHolder statHolder) throws IgniteCheckedException {
         assert link != 0;
 
-        long pageId = PageIdUtils.pageId(link);
-        int itemId = PageIdUtils.itemId(link);
+        try {
+            long pageId = PageIdUtils.pageId(link);
+            int itemId = PageIdUtils.itemId(link);
 
-        R updRes = write(pageId, pageHnd, arg, itemId, null, statHolder);
+            R updRes = write(pageId, pageHnd, arg, itemId, null, statHolder);
 
-        assert updRes != null; // Can't fail here.
+            assert updRes != null; // Can't fail here.
 
-        return updRes;
+            return updRes;
+        }
+        catch (IgniteCheckedException | Error e) {
+            throw e;
+        }
+        catch (Throwable t) {
+            throw new CorruptedFreeListException("Failed to update data row", t);
+        }
     }
 
     /** {@inheritDoc} */
     @Override public void removeDataRowByLink(long link, IoStatisticsHolder statHolder) throws IgniteCheckedException {
         assert link != 0;
 
-        long pageId = PageIdUtils.pageId(link);
-        int itemId = PageIdUtils.itemId(link);
+        try {
+            long pageId = PageIdUtils.pageId(link);
+            int itemId = PageIdUtils.itemId(link);
 
-        ReuseBag bag = new LongListReuseBag();
+            ReuseBag bag = new LongListReuseBag();
 
-        long nextLink = write(pageId, rmvRow, bag, itemId, FAIL_L, statHolder);
+            long nextLink = write(pageId, rmvRow, bag, itemId, FAIL_L, statHolder);
 
-        assert nextLink != FAIL_L; // Can't fail here.
+            assert nextLink != FAIL_L; // Can't fail here.
 
-        while (nextLink != 0L) {
-            memMetrics.decrementLargeEntriesPages();
+            while (nextLink != 0L) {
+                memMetrics.decrementLargeEntriesPages();
 
-            itemId = PageIdUtils.itemId(nextLink);
-            pageId = PageIdUtils.pageId(nextLink);
+                itemId = PageIdUtils.itemId(nextLink);
+                pageId = PageIdUtils.pageId(nextLink);
 
-            nextLink = write(pageId, rmvRow, bag, itemId, FAIL_L, statHolder);
+                nextLink = write(pageId, rmvRow, bag, itemId, FAIL_L, statHolder);
 
-            assert nextLink != FAIL_L; // Can't fail here.
-        }
+                assert nextLink != FAIL_L; // Can't fail here.
+            }
 
-        reuseList.addForRecycle(bag);
+            reuseList.addForRecycle(bag);
+        }
+        catch (IgniteCheckedException | Error e) {
+            throw e;
+        }
+        catch (Throwable t) {
+            throw new CorruptedFreeListException("Failed to remove data by link", t);
+        }
     }
 
     /** {@inheritDoc} */
@@ -621,21 +653,45 @@ public abstract class AbstractFreeList<T extends Storable> extends PagesList imp
     @Override public void addForRecycle(ReuseBag bag) throws IgniteCheckedException {
         assert reuseList == this : "not allowed to be a reuse list";
 
-        put(bag, 0, 0, 0L, REUSE_BUCKET, IoStatisticsHolderNoOp.INSTANCE);
+        try {
+            put(bag, 0, 0, 0L, REUSE_BUCKET, IoStatisticsHolderNoOp.INSTANCE);
+        }
+        catch (IgniteCheckedException | Error e) {
+            throw e;
+        }
+        catch (Throwable t) {
+            throw new CorruptedFreeListException("Failed to add page for recycle", t);
+        }
     }
 
     /** {@inheritDoc} */
     @Override public long takeRecycledPage() throws IgniteCheckedException {
         assert reuseList == this : "not allowed to be a reuse list";
 
-        return takeEmptyPage(REUSE_BUCKET, null, IoStatisticsHolderNoOp.INSTANCE);
+        try {
+            return takeEmptyPage(REUSE_BUCKET, null, IoStatisticsHolderNoOp.INSTANCE);
+        }
+        catch (IgniteCheckedException | Error e) {
+            throw e;
+        }
+        catch (Throwable t) {
+            throw new CorruptedFreeListException("Failed to take recycled page", t);
+        }
     }
 
     /** {@inheritDoc} */
     @Override public long recycledPagesCount() throws IgniteCheckedException {
         assert reuseList == this : "not allowed to be a reuse list";
 
-        return storedPagesCount(REUSE_BUCKET);
+        try {
+            return storedPagesCount(REUSE_BUCKET);
+        }
+        catch (IgniteCheckedException | Error e) {
+            throw e;
+        }
+        catch (Throwable t) {
+            throw new CorruptedFreeListException("Failed to count recycled pages", t);
+        }
     }
 
     /**
diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/tree/CorruptedTreeException.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/freelist/CorruptedFreeListException.java
similarity index 75%
copy from modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/tree/CorruptedTreeException.java
copy to modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/freelist/CorruptedFreeListException.java
index a6bfb1f..eb94c63 100644
--- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/tree/CorruptedTreeException.java
+++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/freelist/CorruptedFreeListException.java
@@ -15,16 +15,16 @@
  * limitations under the License.
  */
 
-package org.apache.ignite.internal.processors.cache.persistence.tree;
+package org.apache.ignite.internal.processors.cache.persistence.freelist;
 
 import org.apache.ignite.IgniteCheckedException;
-import org.apache.ignite.internal.InvalidEnvironmentException;
+import org.apache.ignite.internal.processors.cache.persistence.CorruptedPersistenceException;
 import org.jetbrains.annotations.Nullable;
 
 /**
- * Exception to distinguish {@link BPlusTree} tree broken invariants.
+ * Exception to distinguish {@link AbstractFreeList} broken invariants.
  */
-public class CorruptedTreeException extends IgniteCheckedException implements InvalidEnvironmentException {
+public class CorruptedFreeListException extends IgniteCheckedException implements CorruptedPersistenceException {
     /** */
     private static final long serialVersionUID = 0L;
 
@@ -32,7 +32,7 @@ public class CorruptedTreeException extends IgniteCheckedException implements In
      * @param msg Message.
      * @param cause Cause.
      */
-    public CorruptedTreeException(String msg, @Nullable Throwable cause) {
+    public CorruptedFreeListException(String msg, @Nullable Throwable cause) {
         super(msg, cause);
     }
 }
diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/tree/CorruptedTreeException.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/tree/CorruptedTreeException.java
index a6bfb1f..fbd8d73 100644
--- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/tree/CorruptedTreeException.java
+++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/tree/CorruptedTreeException.java
@@ -18,13 +18,13 @@
 package org.apache.ignite.internal.processors.cache.persistence.tree;
 
 import org.apache.ignite.IgniteCheckedException;
-import org.apache.ignite.internal.InvalidEnvironmentException;
+import org.apache.ignite.internal.processors.cache.persistence.CorruptedPersistenceException;
 import org.jetbrains.annotations.Nullable;
 
 /**
  * Exception to distinguish {@link BPlusTree} tree broken invariants.
  */
-public class CorruptedTreeException extends IgniteCheckedException implements InvalidEnvironmentException {
+public class CorruptedTreeException extends IgniteCheckedException implements CorruptedPersistenceException {
     /** */
     private static final long serialVersionUID = 0L;
 
diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/failure/FailureProcessor.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/failure/FailureProcessor.java
index f00d7b6..2dcdd37 100644
--- a/modules/core/src/main/java/org/apache/ignite/internal/processors/failure/FailureProcessor.java
+++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/failure/FailureProcessor.java
@@ -27,6 +27,7 @@ import org.apache.ignite.failure.NoOpFailureHandler;
 import org.apache.ignite.failure.StopNodeOrHaltFailureHandler;
 import org.apache.ignite.internal.GridKernalContext;
 import org.apache.ignite.internal.processors.GridProcessorAdapter;
+import org.apache.ignite.internal.processors.cache.persistence.CorruptedPersistenceException;
 import org.apache.ignite.internal.util.typedef.X;
 import org.apache.ignite.internal.util.typedef.internal.U;
 
@@ -129,6 +130,13 @@ public class FailureProcessor extends GridProcessorAdapter {
         if (reserveBuf != null && X.hasCause(failureCtx.error(), OutOfMemoryError.class))
             reserveBuf = null;
 
+        if (X.hasCause(failureCtx.error(), CorruptedPersistenceException.class))
+            log.error("A critical problem with persistence data structures was detected." +
+                " Please make backup of persistence storage and WAL files for further analysis." +
+                " Persistence storage path: " + ctx.config().getDataStorageConfiguration().getStoragePath() +
+                " WAL path: " + ctx.config().getDataStorageConfiguration().getWalPath() +
+                " WAL archive path: " + ctx.config().getDataStorageConfiguration().getWalArchivePath());
+
         if (IGNITE_DUMP_THREADS_ON_FAILURE)
             U.dumpThreads(log);