You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ignite.apache.org by ag...@apache.org on 2020/01/13 09:29:02 UTC

[ignite] branch master updated: IGNITE-12523 Added throttling for thread dumps generation on system failure.

This is an automated email from the ASF dual-hosted git repository.

agura pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ignite.git


The following commit(s) were added to refs/heads/master by this push:
     new 594061d  IGNITE-12523 Added throttling for thread dumps generation on system failure.
594061d is described below

commit 594061d906749c38b718763c5753d99281b36728
Author: Andrey Gura <ag...@apache.org>
AuthorDate: Thu Dec 19 20:20:12 2019 +0300

    IGNITE-12523 Added throttling for thread dumps generation on system failure.
---
 .../org/apache/ignite/IgniteSystemProperties.java  |   8 +
 .../processors/failure/FailureProcessor.java       |  63 ++++++-
 .../FailureProcessorThreadDumpThrottlingTest.java  | 203 +++++++++++++++++++++
 .../ignite/testsuites/IgniteBasicTestSuite.java    |   2 +
 4 files changed, 273 insertions(+), 3 deletions(-)

diff --git a/modules/core/src/main/java/org/apache/ignite/IgniteSystemProperties.java b/modules/core/src/main/java/org/apache/ignite/IgniteSystemProperties.java
index 53b7a9a..3388071 100644
--- a/modules/core/src/main/java/org/apache/ignite/IgniteSystemProperties.java
+++ b/modules/core/src/main/java/org/apache/ignite/IgniteSystemProperties.java
@@ -987,6 +987,14 @@ public final class IgniteSystemProperties {
     public static final String IGNITE_DUMP_THREADS_ON_FAILURE = "IGNITE_DUMP_THREADS_ON_FAILURE";
 
     /**
+     * Throttling time out for thread dump generation during failure handling.
+     *
+     * Default is failure detection timeout. {@code 0} or negative value - throttling is disabled.
+     */
+    public static final String IGNITE_DUMP_THREADS_ON_FAILURE_THROTTLING_TIMEOUT =
+            "IGNITE_DUMP_THREADS_ON_FAILURE_THROTTLING_TIMEOUT";
+
+    /**
      * Throttling timeout in millis which avoid excessive PendingTree access on unwind if there is nothing to clean yet.
      *
      * Default is 500 ms.
diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/failure/FailureProcessor.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/failure/FailureProcessor.java
index 19495eb..7980a4f 100644
--- a/modules/core/src/main/java/org/apache/ignite/internal/processors/failure/FailureProcessor.java
+++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/failure/FailureProcessor.java
@@ -17,6 +17,8 @@
 
 package org.apache.ignite.internal.processors.failure;
 
+import java.util.EnumMap;
+import java.util.Map;
 import org.apache.ignite.Ignite;
 import org.apache.ignite.IgniteCheckedException;
 import org.apache.ignite.IgniteSystemProperties;
@@ -24,6 +26,7 @@ import org.apache.ignite.configuration.IgniteConfiguration;
 import org.apache.ignite.failure.AbstractFailureHandler;
 import org.apache.ignite.failure.FailureContext;
 import org.apache.ignite.failure.FailureHandler;
+import org.apache.ignite.failure.FailureType;
 import org.apache.ignite.failure.NoOpFailureHandler;
 import org.apache.ignite.failure.StopNodeOrHaltFailureHandler;
 import org.apache.ignite.internal.GridKernalContext;
@@ -33,13 +36,19 @@ import org.apache.ignite.internal.processors.diagnostic.DiagnosticProcessor;
 import org.apache.ignite.internal.util.typedef.X;
 import org.apache.ignite.internal.util.typedef.internal.U;
 
+import static org.apache.ignite.IgniteSystemProperties.IGNITE_DUMP_THREADS_ON_FAILURE;
+import static org.apache.ignite.IgniteSystemProperties.IGNITE_DUMP_THREADS_ON_FAILURE_THROTTLING_TIMEOUT;
+
 /**
  * General failure processing API
  */
 public class FailureProcessor extends GridProcessorAdapter {
     /** Value of the system property that enables threads dumping on failure. */
     private final boolean igniteDumpThreadsOnFailure =
-        IgniteSystemProperties.getBoolean(IgniteSystemProperties.IGNITE_DUMP_THREADS_ON_FAILURE, false);
+        IgniteSystemProperties.getBoolean(IGNITE_DUMP_THREADS_ON_FAILURE, false);
+
+    /** Timeout for throttling of thread dumps generation. */
+    long dumpThreadsTrottlingTimeout;
 
     /** Ignored failure log message. */
     static final String IGNORED_FAILURE_LOG_MSG = "Possible failure suppressed accordingly to a configured handler ";
@@ -48,6 +57,9 @@ public class FailureProcessor extends GridProcessorAdapter {
     static final String FAILURE_LOG_MSG = "Critical system error detected. " +
         "Will be handled accordingly to configured handler ";
 
+    /** Thread dump per failure type timestamps. */
+    private Map<FailureType, Long> threadDumpPerFailureTypeTime;
+
     /** Ignite. */
     private final Ignite ignite;
 
@@ -66,7 +78,22 @@ public class FailureProcessor extends GridProcessorAdapter {
     public FailureProcessor(GridKernalContext ctx) {
         super(ctx);
 
-        this.ignite = ctx.grid();
+        ignite = ctx.grid();
+
+        if (igniteDumpThreadsOnFailure) {
+            dumpThreadsTrottlingTimeout =
+                    IgniteSystemProperties.getLong(
+                            IGNITE_DUMP_THREADS_ON_FAILURE_THROTTLING_TIMEOUT,
+                            ctx.config().getFailureDetectionTimeout()
+                    );
+
+            if (dumpThreadsTrottlingTimeout > 0) {
+                threadDumpPerFailureTypeTime = new EnumMap<>(FailureType.class);
+
+                for (FailureType type : FailureType.values())
+                    threadDumpPerFailureTypeTime.put(type, 0L);
+            }
+        }
     }
 
     /** {@inheritDoc} */
@@ -152,7 +179,7 @@ public class FailureProcessor extends GridProcessorAdapter {
                 " WAL path: " + ctx.config().getDataStorageConfiguration().getWalPath() +
                 " WAL archive path: " + ctx.config().getDataStorageConfiguration().getWalArchivePath());
 
-        if (igniteDumpThreadsOnFailure)
+        if (igniteDumpThreadsOnFailure && !throttleThreadDump(failureCtx.type()))
             U.dumpThreads(log, !failureTypeIgnored(failureCtx, hnd));
 
         DiagnosticProcessor diagnosticProcessor = ctx.diagnostic();
@@ -172,6 +199,36 @@ public class FailureProcessor extends GridProcessorAdapter {
     }
 
     /**
+     * Defines whether thread dump should be throttled for givn failure type or not.
+     *
+     * @param type Failure type.
+     * @return {@code True} if thread dump generation should be throttled fro given failure type.
+     */
+    private boolean throttleThreadDump(FailureType type) {
+        if (dumpThreadsTrottlingTimeout <= 0)
+            return false;
+
+        long curr = U.currentTimeMillis();
+
+        Long last = threadDumpPerFailureTypeTime.get(type);
+
+        assert last != null : "Unknown failure type " + type;
+
+        boolean throttle = curr - last < dumpThreadsTrottlingTimeout;
+
+        if (!throttle)
+            threadDumpPerFailureTypeTime.put(type, curr);
+        else {
+            if (log.isInfoEnabled()) {
+                log.info("Thread dump is hidden due to throttling settings. " +
+                        "Set IGNITE_DUMP_THREADS_ON_FAILURE_THROTTLING_TIMEOUT property to 0 to see all thread dumps.");
+            }
+        }
+
+        return throttle;
+    }
+
+    /**
      * @param failureCtx Failure context.
      * @param hnd Handler.
      */
diff --git a/modules/core/src/test/java/org/apache/ignite/internal/processors/failure/FailureProcessorThreadDumpThrottlingTest.java b/modules/core/src/test/java/org/apache/ignite/internal/processors/failure/FailureProcessorThreadDumpThrottlingTest.java
new file mode 100644
index 0000000..9f85ae6
--- /dev/null
+++ b/modules/core/src/test/java/org/apache/ignite/internal/processors/failure/FailureProcessorThreadDumpThrottlingTest.java
@@ -0,0 +1,203 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.internal.processors.failure;
+
+import com.google.common.collect.ImmutableSet;
+import org.apache.ignite.IgniteSystemProperties;
+import org.apache.ignite.configuration.IgniteConfiguration;
+import org.apache.ignite.failure.FailureContext;
+import org.apache.ignite.failure.FailureType;
+import org.apache.ignite.failure.TestFailureHandler;
+import org.apache.ignite.internal.IgniteEx;
+import org.apache.ignite.internal.util.typedef.internal.U;
+import org.apache.ignite.testframework.ListeningTestLogger;
+import org.apache.ignite.testframework.LogListener;
+import org.apache.ignite.testframework.junits.WithSystemProperty;
+import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest;
+import org.junit.Test;
+
+import static org.apache.ignite.failure.FailureType.SYSTEM_CRITICAL_OPERATION_TIMEOUT;
+import static org.apache.ignite.failure.FailureType.SYSTEM_WORKER_BLOCKED;
+import static org.apache.ignite.internal.util.IgniteUtils.THREAD_DUMP_MSG;
+
+/**
+ * Tests for throttling thread dumps during handling failures.
+ */
+@WithSystemProperty(key = IgniteSystemProperties.IGNITE_DUMP_THREADS_ON_FAILURE, value = "true")
+public class FailureProcessorThreadDumpThrottlingTest extends GridCommonAbstractTest {
+    /** Test logger. */
+    private final ListeningTestLogger testLog = new ListeningTestLogger(true, log);
+
+    /** {@inheritDoc} */
+    @Override protected IgniteConfiguration getConfiguration(String igniteInstanceName) throws Exception {
+        IgniteConfiguration cfg =  super.getConfiguration(igniteInstanceName);
+
+        TestFailureHandler hnd = new TestFailureHandler(false);
+
+        hnd.setIgnoredFailureTypes(ImmutableSet.of(FailureType.SYSTEM_CRITICAL_OPERATION_TIMEOUT, SYSTEM_WORKER_BLOCKED));
+
+        cfg.setFailureHandler(hnd);
+
+        cfg.setGridLogger(testLog);
+
+        return cfg;
+    }
+
+    /** {@inheritDoc} */
+    @Override protected void beforeTest() throws Exception {
+        super.beforeTest();
+
+        startGrid(0);
+    }
+
+    /** {@inheritDoc} */
+    @Override protected void afterTest() throws Exception {
+        testLog.clearListeners();
+
+        stopAllGrids();
+
+        super.afterTest();
+    }
+
+    /**
+     * Tests that thread dumps will not get if {@code IGNITE_DUMP_THREADS_ON_FAILURE == false}.
+     */
+    @Test
+    @WithSystemProperty(key = IgniteSystemProperties.IGNITE_DUMP_THREADS_ON_FAILURE, value = "false")
+    @WithSystemProperty(key = IgniteSystemProperties.IGNITE_DUMP_THREADS_ON_FAILURE_THROTTLING_TIMEOUT, value = "0")
+    public void testNoThreadDumps() throws Exception {
+        LogListener lsnr = LogListener.matches(THREAD_DUMP_MSG).times(0).build();
+
+        testLog.registerListener(lsnr);
+
+        IgniteEx ignite = ignite(0);
+
+        FailureContext failureCtx =
+                new FailureContext(SYSTEM_WORKER_BLOCKED, new Throwable("Failure context error"));
+
+        for (int i = 0; i < 3; i++)
+            ignite.context().failure().process(failureCtx);
+
+        assertTrue(lsnr.check());
+    }
+
+    /**
+     * Tests that thread dumps will get for every failure for disabled throttling.
+     */
+    @Test
+    @WithSystemProperty(key = IgniteSystemProperties.IGNITE_DUMP_THREADS_ON_FAILURE, value = "true")
+    @WithSystemProperty(key = IgniteSystemProperties.IGNITE_DUMP_THREADS_ON_FAILURE_THROTTLING_TIMEOUT, value = "0")
+    public void testNoThrottling() throws Exception {
+        LogListener lsnr = LogListener.matches(THREAD_DUMP_MSG).times(3).build();
+
+        testLog.registerListener(lsnr);
+
+        IgniteEx ignite = ignite(0);
+
+        FailureContext failureCtx =
+                new FailureContext(SYSTEM_WORKER_BLOCKED, new Throwable("Failure context error"));
+
+        for (int i = 0; i < 3; i++)
+            ignite.context().failure().process(failureCtx);
+
+        assertTrue(lsnr.check());
+    }
+
+    /**
+     * Tests that thread dumps will be throttled and will be generated again after timeout exceeded.
+     */
+    @Test
+    @WithSystemProperty(key = IgniteSystemProperties.IGNITE_DUMP_THREADS_ON_FAILURE, value = "true")
+    @WithSystemProperty(key = IgniteSystemProperties.IGNITE_DUMP_THREADS_ON_FAILURE_THROTTLING_TIMEOUT, value = "1000")
+    public void testThrottling() throws Exception {
+        LogListener dumpLsnr = LogListener.matches(THREAD_DUMP_MSG).times(2).build();
+        LogListener throttledLsnr = LogListener.matches("Thread dump is hidden").times(4).build();
+
+        testLog.registerListener(dumpLsnr);
+        testLog.registerListener(throttledLsnr);
+
+        IgniteEx ignite = ignite(0);
+
+        FailureContext failureCtx =
+                new FailureContext(SYSTEM_WORKER_BLOCKED, new Throwable("Failure context error"));
+
+        for (int i = 0; i < 3; i++)
+            ignite.context().failure().process(failureCtx);
+
+        U.sleep(1000);
+
+        for (int i = 0; i < 3; i++)
+            ignite.context().failure().process(failureCtx);
+
+        assertTrue(dumpLsnr.check());
+        assertTrue(throttledLsnr.check());
+    }
+
+    /**
+     * Tests that thread dumps will be throttled per failure type and will be generated again after timeout exceeded.
+     */
+    @Test
+    @WithSystemProperty(key = IgniteSystemProperties.IGNITE_DUMP_THREADS_ON_FAILURE, value = "true")
+    @WithSystemProperty(key = IgniteSystemProperties.IGNITE_DUMP_THREADS_ON_FAILURE_THROTTLING_TIMEOUT, value = "1000")
+    public void testThrottlingPerFailureType() throws Exception {
+        LogListener dumpLsnr = LogListener.matches(THREAD_DUMP_MSG).times(4).build();
+        LogListener throttledLsnr = LogListener.matches("Thread dump is hidden").times(8).build();
+
+        testLog.registerListener(dumpLsnr);
+        testLog.registerListener(throttledLsnr);
+
+        IgniteEx ignite = ignite(0);
+
+        FailureContext workerBlockedFailureCtx =
+                new FailureContext(SYSTEM_WORKER_BLOCKED, new Throwable("Failure context error"));
+
+        FailureContext opTimeoutFailureCtx =
+                new FailureContext(SYSTEM_CRITICAL_OPERATION_TIMEOUT, new Throwable("Failure context error"));
+
+        for (int i = 0; i < 3; i++) {
+            ignite.context().failure().process(workerBlockedFailureCtx);
+
+            ignite.context().failure().process(opTimeoutFailureCtx);
+        }
+
+        U.sleep(1000);
+
+        for (int i = 0; i < 3; i++) {
+            ignite.context().failure().process(workerBlockedFailureCtx);
+
+            ignite.context().failure().process(opTimeoutFailureCtx);
+        }
+
+        assertTrue(dumpLsnr.check());
+        assertTrue(throttledLsnr.check());
+    }
+
+    /**
+     * Tests that default thread dump trhottling timeout equals failure detection timeout.
+     */
+    @Test
+    @WithSystemProperty(key = IgniteSystemProperties.IGNITE_DUMP_THREADS_ON_FAILURE, value = "true")
+    public void testDefaultThrottlingTimeout() throws Exception {
+        IgniteEx ignite = ignite(0);
+
+        assertEquals(
+                ignite.context().failure().dumpThreadsTrottlingTimeout,
+                ignite.configuration().getFailureDetectionTimeout().longValue()
+        );
+    }
+}
diff --git a/modules/core/src/test/java/org/apache/ignite/testsuites/IgniteBasicTestSuite.java b/modules/core/src/test/java/org/apache/ignite/testsuites/IgniteBasicTestSuite.java
index 927dd71..5c6be99 100644
--- a/modules/core/src/test/java/org/apache/ignite/testsuites/IgniteBasicTestSuite.java
+++ b/modules/core/src/test/java/org/apache/ignite/testsuites/IgniteBasicTestSuite.java
@@ -90,6 +90,7 @@ import org.apache.ignite.internal.processors.database.CacheFreeListSelfTest;
 import org.apache.ignite.internal.processors.database.DataRegionMetricsSelfTest;
 import org.apache.ignite.internal.processors.database.IndexStorageSelfTest;
 import org.apache.ignite.internal.processors.database.SwapPathConstructionSelfTest;
+import org.apache.ignite.internal.processors.failure.FailureProcessorThreadDumpThrottlingTest;
 import org.apache.ignite.internal.processors.metastorage.DistributedMetaStorageTest;
 import org.apache.ignite.internal.processors.metastorage.persistence.DistributedMetaStorageHistoryCacheTest;
 import org.apache.ignite.internal.processors.metastorage.persistence.DmsDataWriterWorkerTest;
@@ -240,6 +241,7 @@ import org.junit.runners.Suite;
     OomFailureHandlerTest.class,
     TransactionIntegrityWithSystemWorkerDeathTest.class,
     FailureProcessorLoggingTest.class,
+    FailureProcessorThreadDumpThrottlingTest.class,
 
     AtomicOperationsInTxTest.class,