You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ignite.apache.org by dp...@apache.org on 2019/03/29 13:01:58 UTC

[ignite] branch ignite-2.7.5 updated: IGNITE-10003 Introduced SYSTEM_CRITICAL_OPERATION_TIMEOUT failure type

This is an automated email from the ASF dual-hosted git repository.

dpavlov pushed a commit to branch ignite-2.7.5
in repository https://gitbox.apache.org/repos/asf/ignite.git


The following commit(s) were added to refs/heads/ignite-2.7.5 by this push:
     new e3a9add  IGNITE-10003 Introduced SYSTEM_CRITICAL_OPERATION_TIMEOUT failure type
e3a9add is described below

commit e3a9addc6a0cb277d364e7488f83023b97c76f70
Author: Andrey Kuznetsov <st...@gmail.com>
AuthorDate: Mon Dec 24 17:38:37 2018 +0300

    IGNITE-10003 Introduced SYSTEM_CRITICAL_OPERATION_TIMEOUT failure type
    
    Signed-off-by: Andrey Gura <ag...@apache.org>
    
    (cherry picked from commit 76ad0a4d10ac761f5cdec01e6ec4d7299e434a07)
    
    Fixes #5084
---
 .../ignite/failure/AbstractFailureHandler.java     |   5 +-
 .../org/apache/ignite/failure/FailureType.java     |   5 +-
 .../GridCacheDatabaseSharedManager.java            |   5 +-
 .../ignite/failure/SystemWorkersBlockingTest.java  |  17 ++-
 .../persistence/CheckpointReadLockFailureTest.java | 125 +++++++++++++++++++++
 .../IgniteBasicWithPersistenceTestSuite.java       |   7 +-
 6 files changed, 156 insertions(+), 8 deletions(-)

diff --git a/modules/core/src/main/java/org/apache/ignite/failure/AbstractFailureHandler.java b/modules/core/src/main/java/org/apache/ignite/failure/AbstractFailureHandler.java
index d3685c0..79b1f8f 100644
--- a/modules/core/src/main/java/org/apache/ignite/failure/AbstractFailureHandler.java
+++ b/modules/core/src/main/java/org/apache/ignite/failure/AbstractFailureHandler.java
@@ -18,11 +18,13 @@
 package org.apache.ignite.failure;
 
 import java.util.Collections;
+import java.util.EnumSet;
 import java.util.Set;
 import org.apache.ignite.Ignite;
 import org.apache.ignite.internal.util.tostring.GridToStringInclude;
 import org.apache.ignite.internal.util.typedef.internal.S;
 
+import static org.apache.ignite.failure.FailureType.SYSTEM_CRITICAL_OPERATION_TIMEOUT;
 import static org.apache.ignite.failure.FailureType.SYSTEM_WORKER_BLOCKED;
 
 /**
@@ -33,7 +35,8 @@ import static org.apache.ignite.failure.FailureType.SYSTEM_WORKER_BLOCKED;
 public abstract class AbstractFailureHandler implements FailureHandler {
     /** */
     @GridToStringInclude
-    private Set<FailureType> ignoredFailureTypes = Collections.singleton(SYSTEM_WORKER_BLOCKED);
+    private Set<FailureType> ignoredFailureTypes =
+            Collections.unmodifiableSet(EnumSet.of(SYSTEM_WORKER_BLOCKED, SYSTEM_CRITICAL_OPERATION_TIMEOUT));
 
     /**
      * Sets failure types that must be ignored by failure handler.
diff --git a/modules/core/src/main/java/org/apache/ignite/failure/FailureType.java b/modules/core/src/main/java/org/apache/ignite/failure/FailureType.java
index fbd5529f..114e432 100644
--- a/modules/core/src/main/java/org/apache/ignite/failure/FailureType.java
+++ b/modules/core/src/main/java/org/apache/ignite/failure/FailureType.java
@@ -31,5 +31,8 @@ public enum FailureType {
     SYSTEM_WORKER_BLOCKED,
 
     /** Critical error - error which leads to the system's inoperability. */
-    CRITICAL_ERROR
+    CRITICAL_ERROR,
+
+    /** System-critical operation has been timed out. */
+    SYSTEM_CRITICAL_OPERATION_TIMEOUT
 }
diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/GridCacheDatabaseSharedManager.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/GridCacheDatabaseSharedManager.java
index da965f8..c4abf8f 100755
--- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/GridCacheDatabaseSharedManager.java
+++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/GridCacheDatabaseSharedManager.java
@@ -173,6 +173,7 @@ import static org.apache.ignite.IgniteSystemProperties.IGNITE_CHECKPOINT_READ_LO
 import static org.apache.ignite.IgniteSystemProperties.IGNITE_PDS_SKIP_CRC;
 import static org.apache.ignite.IgniteSystemProperties.IGNITE_PDS_WAL_REBALANCE_THRESHOLD;
 import static org.apache.ignite.failure.FailureType.CRITICAL_ERROR;
+import static org.apache.ignite.failure.FailureType.SYSTEM_CRITICAL_OPERATION_TIMEOUT;
 import static org.apache.ignite.failure.FailureType.SYSTEM_WORKER_TERMINATION;
 import static org.apache.ignite.internal.pagemem.wal.record.WALRecord.RecordType.CHECKPOINT_RECORD;
 import static org.apache.ignite.internal.processors.cache.persistence.metastorage.MetaStorage.METASTORAGE_CACHE_ID;
@@ -260,7 +261,7 @@ public class GridCacheDatabaseSharedManager extends IgniteCacheDatabaseSharedMan
     private volatile GridFutureAdapter<Void> enableChangeApplied;
 
     /** */
-    private ReentrantReadWriteLock checkpointLock = new ReentrantReadWriteLock();
+    ReentrantReadWriteLock checkpointLock = new ReentrantReadWriteLock();
 
     /** */
     private long checkpointFreq;
@@ -1545,7 +1546,7 @@ public class GridCacheDatabaseSharedManager extends IgniteCacheDatabaseSharedMan
 
         IgniteException e = new IgniteException(msg);
 
-        if (cctx.kernalContext().failure().process(new FailureContext(CRITICAL_ERROR, e)))
+        if (cctx.kernalContext().failure().process(new FailureContext(SYSTEM_CRITICAL_OPERATION_TIMEOUT, e)))
             throw e;
 
         throw new CheckpointReadLockTimeoutException(msg);
diff --git a/modules/core/src/test/java/org/apache/ignite/failure/SystemWorkersBlockingTest.java b/modules/core/src/test/java/org/apache/ignite/failure/SystemWorkersBlockingTest.java
index 3ca7948..28c5897 100644
--- a/modules/core/src/test/java/org/apache/ignite/failure/SystemWorkersBlockingTest.java
+++ b/modules/core/src/test/java/org/apache/ignite/failure/SystemWorkersBlockingTest.java
@@ -17,6 +17,8 @@
 
 package org.apache.ignite.failure;
 
+import java.util.HashSet;
+import java.util.Set;
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.TimeUnit;
 import org.apache.ignite.Ignite;
@@ -40,13 +42,22 @@ public class SystemWorkersBlockingTest extends GridCommonAbstractTest {
     @Override protected IgniteConfiguration getConfiguration(String igniteInstanceName) throws Exception {
         IgniteConfiguration cfg = super.getConfiguration(igniteInstanceName);
 
-        cfg.setFailureHandler(new AbstractFailureHandler() {
+        AbstractFailureHandler failureHnd = new AbstractFailureHandler() {
             @Override protected boolean handle(Ignite ignite, FailureContext failureCtx) {
-                hndLatch.countDown();
+                if (failureCtx.type() == FailureType.SYSTEM_WORKER_BLOCKED)
+                    hndLatch.countDown();
 
                 return false;
             }
-        });
+        };
+
+        Set<FailureType> ignoredFailureTypes = new HashSet<>(failureHnd.getIgnoredFailureTypes());
+
+        ignoredFailureTypes.remove(FailureType.SYSTEM_WORKER_BLOCKED);
+
+        failureHnd.setIgnoredFailureTypes(ignoredFailureTypes);
+
+        cfg.setFailureHandler(failureHnd);
 
         cfg.setFailureDetectionTimeout(FAILURE_DETECTION_TIMEOUT);
 
diff --git a/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/persistence/CheckpointReadLockFailureTest.java b/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/persistence/CheckpointReadLockFailureTest.java
new file mode 100644
index 0000000..2ec5c2d
--- /dev/null
+++ b/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/persistence/CheckpointReadLockFailureTest.java
@@ -0,0 +1,125 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.internal.processors.cache.persistence;
+
+import java.util.HashSet;
+import java.util.Set;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeUnit;
+import org.apache.ignite.Ignite;
+import org.apache.ignite.configuration.DataRegionConfiguration;
+import org.apache.ignite.configuration.DataStorageConfiguration;
+import org.apache.ignite.configuration.IgniteConfiguration;
+import org.apache.ignite.failure.AbstractFailureHandler;
+import org.apache.ignite.failure.FailureContext;
+import org.apache.ignite.failure.FailureType;
+import org.apache.ignite.internal.IgniteEx;
+import org.apache.ignite.internal.IgniteInternalFuture;
+import org.apache.ignite.testframework.GridTestUtils;
+import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest;
+
+/**
+ * Tests critical failure handling on checkpoint read lock acquisition errors.
+ */
+public class CheckpointReadLockFailureTest extends GridCommonAbstractTest {
+    /** */
+    private static final AbstractFailureHandler FAILURE_HND = new AbstractFailureHandler() {
+        @Override protected boolean handle(Ignite ignite, FailureContext failureCtx) {
+            if (failureCtx.type() != FailureType.SYSTEM_CRITICAL_OPERATION_TIMEOUT)
+                return true;
+
+            if (hndLatch != null)
+                hndLatch.countDown();
+
+            return false;
+        }
+    };
+
+    /** */
+    private static volatile CountDownLatch hndLatch;
+
+    /** {@inheritDoc} */
+    @Override protected IgniteConfiguration getConfiguration(String igniteInstanceName) throws Exception {
+        return super.getConfiguration(igniteInstanceName)
+            .setFailureHandler(FAILURE_HND)
+            .setDataStorageConfiguration(new DataStorageConfiguration()
+                .setDefaultDataRegionConfiguration(new DataRegionConfiguration()
+                    .setPersistenceEnabled(true))
+                .setCheckpointFrequency(Integer.MAX_VALUE)
+                .setCheckpointReadLockTimeout(1));
+    }
+
+    /** {@inheritDoc} */
+    @Override protected void beforeTestsStarted() throws Exception {
+        Set<FailureType> ignoredFailureTypes = new HashSet<>(FAILURE_HND.getIgnoredFailureTypes());
+        ignoredFailureTypes.remove(FailureType.SYSTEM_CRITICAL_OPERATION_TIMEOUT);
+
+        FAILURE_HND.setIgnoredFailureTypes(ignoredFailureTypes);
+    }
+
+    /** {@inheritDoc} */
+    @Override protected void beforeTest() throws Exception {
+        cleanPersistenceDir();
+    }
+
+    /** {@inheritDoc} */
+    @Override protected void afterTest() throws Exception {
+        cleanPersistenceDir();
+    }
+
+    /**
+     * @throws Exception If failed.
+     */
+    public void testFailureTypeOnTimeout() throws Exception {
+        hndLatch = new CountDownLatch(1);
+
+        IgniteEx ig = startGrid(0);
+
+        ig.cluster().active(true);
+
+        GridCacheDatabaseSharedManager db = (GridCacheDatabaseSharedManager)ig.context().cache().context().database();
+
+        IgniteInternalFuture acquireWriteLock = GridTestUtils.runAsync(() -> {
+            db.checkpointLock.writeLock().lock();
+
+            try {
+                doSleep(Long.MAX_VALUE);
+            }
+            finally {
+                db.checkpointLock.writeLock().unlock();
+            }
+        });
+
+        GridTestUtils.waitForCondition(() -> db.checkpointLock.writeLock().isHeldByCurrentThread(), 5000);
+
+        IgniteInternalFuture acquireReadLock = GridTestUtils.runAsync(() -> {
+            db.checkpointReadLock();
+            db.checkpointReadUnlock();
+        });
+
+        assertTrue(hndLatch.await(5, TimeUnit.SECONDS));
+
+        acquireWriteLock.cancel();
+
+        acquireReadLock.get(5, TimeUnit.SECONDS);
+
+        GridTestUtils.waitForCondition(acquireWriteLock::isCancelled, 5000);
+
+        stopGrid(0);
+    }
+}
diff --git a/modules/core/src/test/java/org/apache/ignite/testsuites/IgniteBasicWithPersistenceTestSuite.java b/modules/core/src/test/java/org/apache/ignite/testsuites/IgniteBasicWithPersistenceTestSuite.java
index 68085db..5d29664 100644
--- a/modules/core/src/test/java/org/apache/ignite/testsuites/IgniteBasicWithPersistenceTestSuite.java
+++ b/modules/core/src/test/java/org/apache/ignite/testsuites/IgniteBasicWithPersistenceTestSuite.java
@@ -18,11 +18,14 @@
 package org.apache.ignite.testsuites;
 
 import java.util.Set;
+import junit.framework.JUnit4TestAdapter;
 import junit.framework.TestSuite;
 import org.apache.ignite.failure.FailureHandlingConfigurationTest;
 import org.apache.ignite.failure.IoomFailureHandlerTest;
+import org.apache.ignite.failure.SystemWorkersBlockingTest;
 import org.apache.ignite.failure.SystemWorkersTerminationTest;
 import org.apache.ignite.internal.ClusterBaselineNodesMetricsSelfTest;
+import org.apache.ignite.internal.GridNodeMetricsLogPdsSelfTest;
 import org.apache.ignite.internal.encryption.EncryptedCacheBigEntryTest;
 import org.apache.ignite.internal.encryption.EncryptedCacheCreateTest;
 import org.apache.ignite.internal.encryption.EncryptedCacheDestroyTest;
@@ -30,7 +33,7 @@ import org.apache.ignite.internal.encryption.EncryptedCacheGroupCreateTest;
 import org.apache.ignite.internal.encryption.EncryptedCacheNodeJoinTest;
 import org.apache.ignite.internal.encryption.EncryptedCachePreconfiguredRestartTest;
 import org.apache.ignite.internal.encryption.EncryptedCacheRestartTest;
-import org.apache.ignite.internal.GridNodeMetricsLogPdsSelfTest;
+import org.apache.ignite.internal.processors.cache.persistence.CheckpointReadLockFailureTest;
 import org.apache.ignite.internal.processors.service.ServiceDeploymentOnActivationTest;
 import org.apache.ignite.internal.processors.service.ServiceDeploymentOutsideBaselineTest;
 import org.apache.ignite.marshaller.GridMarshallerMappingConsistencyTest;
@@ -65,6 +68,8 @@ public class IgniteBasicWithPersistenceTestSuite extends TestSuite {
         suite.addTestSuite(GridMarshallerMappingConsistencyTest.class);
         suite.addTestSuite(SystemWorkersTerminationTest.class);
         suite.addTestSuite(FailureHandlingConfigurationTest.class);
+        suite.addTest(new JUnit4TestAdapter(SystemWorkersBlockingTest.class));
+        suite.addTest(new JUnit4TestAdapter(CheckpointReadLockFailureTest.class));
 
         suite.addTestSuite(GridCommandHandlerTest.class);
         suite.addTestSuite(GridInternalTaskUnusedWalSegmentsTest.class);