You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ignite.apache.org by ag...@apache.org on 2020/01/13 09:29:02 UTC
[ignite] branch master updated: IGNITE-12523 Added throttling for
thread dumps generation on system failure.
This is an automated email from the ASF dual-hosted git repository.
agura pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ignite.git
The following commit(s) were added to refs/heads/master by this push:
new 594061d IGNITE-12523 Added throttling for thread dumps generation on system failure.
594061d is described below
commit 594061d906749c38b718763c5753d99281b36728
Author: Andrey Gura <ag...@apache.org>
AuthorDate: Thu Dec 19 20:20:12 2019 +0300
IGNITE-12523 Added throttling for thread dumps generation on system failure.
---
.../org/apache/ignite/IgniteSystemProperties.java | 8 +
.../processors/failure/FailureProcessor.java | 63 ++++++-
.../FailureProcessorThreadDumpThrottlingTest.java | 203 +++++++++++++++++++++
.../ignite/testsuites/IgniteBasicTestSuite.java | 2 +
4 files changed, 273 insertions(+), 3 deletions(-)
diff --git a/modules/core/src/main/java/org/apache/ignite/IgniteSystemProperties.java b/modules/core/src/main/java/org/apache/ignite/IgniteSystemProperties.java
index 53b7a9a..3388071 100644
--- a/modules/core/src/main/java/org/apache/ignite/IgniteSystemProperties.java
+++ b/modules/core/src/main/java/org/apache/ignite/IgniteSystemProperties.java
@@ -987,6 +987,14 @@ public final class IgniteSystemProperties {
public static final String IGNITE_DUMP_THREADS_ON_FAILURE = "IGNITE_DUMP_THREADS_ON_FAILURE";
/**
+ * Throttling time out for thread dump generation during failure handling.
+ *
+ * Default is failure detection timeout. {@code 0} or negative value - throttling is disabled.
+ */
+ public static final String IGNITE_DUMP_THREADS_ON_FAILURE_THROTTLING_TIMEOUT =
+ "IGNITE_DUMP_THREADS_ON_FAILURE_THROTTLING_TIMEOUT";
+
+ /**
* Throttling timeout in millis which avoid excessive PendingTree access on unwind if there is nothing to clean yet.
*
* Default is 500 ms.
diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/failure/FailureProcessor.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/failure/FailureProcessor.java
index 19495eb..7980a4f 100644
--- a/modules/core/src/main/java/org/apache/ignite/internal/processors/failure/FailureProcessor.java
+++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/failure/FailureProcessor.java
@@ -17,6 +17,8 @@
package org.apache.ignite.internal.processors.failure;
+import java.util.EnumMap;
+import java.util.Map;
import org.apache.ignite.Ignite;
import org.apache.ignite.IgniteCheckedException;
import org.apache.ignite.IgniteSystemProperties;
@@ -24,6 +26,7 @@ import org.apache.ignite.configuration.IgniteConfiguration;
import org.apache.ignite.failure.AbstractFailureHandler;
import org.apache.ignite.failure.FailureContext;
import org.apache.ignite.failure.FailureHandler;
+import org.apache.ignite.failure.FailureType;
import org.apache.ignite.failure.NoOpFailureHandler;
import org.apache.ignite.failure.StopNodeOrHaltFailureHandler;
import org.apache.ignite.internal.GridKernalContext;
@@ -33,13 +36,19 @@ import org.apache.ignite.internal.processors.diagnostic.DiagnosticProcessor;
import org.apache.ignite.internal.util.typedef.X;
import org.apache.ignite.internal.util.typedef.internal.U;
+import static org.apache.ignite.IgniteSystemProperties.IGNITE_DUMP_THREADS_ON_FAILURE;
+import static org.apache.ignite.IgniteSystemProperties.IGNITE_DUMP_THREADS_ON_FAILURE_THROTTLING_TIMEOUT;
+
/**
* General failure processing API
*/
public class FailureProcessor extends GridProcessorAdapter {
/** Value of the system property that enables threads dumping on failure. */
private final boolean igniteDumpThreadsOnFailure =
- IgniteSystemProperties.getBoolean(IgniteSystemProperties.IGNITE_DUMP_THREADS_ON_FAILURE, false);
+ IgniteSystemProperties.getBoolean(IGNITE_DUMP_THREADS_ON_FAILURE, false);
+
+ /** Timeout for throttling of thread dumps generation. */
+ long dumpThreadsTrottlingTimeout;
/** Ignored failure log message. */
static final String IGNORED_FAILURE_LOG_MSG = "Possible failure suppressed accordingly to a configured handler ";
@@ -48,6 +57,9 @@ public class FailureProcessor extends GridProcessorAdapter {
static final String FAILURE_LOG_MSG = "Critical system error detected. " +
"Will be handled accordingly to configured handler ";
+ /** Thread dump per failure type timestamps. */
+ private Map<FailureType, Long> threadDumpPerFailureTypeTime;
+
/** Ignite. */
private final Ignite ignite;
@@ -66,7 +78,22 @@ public class FailureProcessor extends GridProcessorAdapter {
public FailureProcessor(GridKernalContext ctx) {
super(ctx);
- this.ignite = ctx.grid();
+ ignite = ctx.grid();
+
+ if (igniteDumpThreadsOnFailure) {
+ dumpThreadsTrottlingTimeout =
+ IgniteSystemProperties.getLong(
+ IGNITE_DUMP_THREADS_ON_FAILURE_THROTTLING_TIMEOUT,
+ ctx.config().getFailureDetectionTimeout()
+ );
+
+ if (dumpThreadsTrottlingTimeout > 0) {
+ threadDumpPerFailureTypeTime = new EnumMap<>(FailureType.class);
+
+ for (FailureType type : FailureType.values())
+ threadDumpPerFailureTypeTime.put(type, 0L);
+ }
+ }
}
/** {@inheritDoc} */
@@ -152,7 +179,7 @@ public class FailureProcessor extends GridProcessorAdapter {
" WAL path: " + ctx.config().getDataStorageConfiguration().getWalPath() +
" WAL archive path: " + ctx.config().getDataStorageConfiguration().getWalArchivePath());
- if (igniteDumpThreadsOnFailure)
+ if (igniteDumpThreadsOnFailure && !throttleThreadDump(failureCtx.type()))
U.dumpThreads(log, !failureTypeIgnored(failureCtx, hnd));
DiagnosticProcessor diagnosticProcessor = ctx.diagnostic();
@@ -172,6 +199,36 @@ public class FailureProcessor extends GridProcessorAdapter {
}
/**
+ * Defines whether thread dump should be throttled for givn failure type or not.
+ *
+ * @param type Failure type.
+ * @return {@code True} if thread dump generation should be throttled fro given failure type.
+ */
+ private boolean throttleThreadDump(FailureType type) {
+ if (dumpThreadsTrottlingTimeout <= 0)
+ return false;
+
+ long curr = U.currentTimeMillis();
+
+ Long last = threadDumpPerFailureTypeTime.get(type);
+
+ assert last != null : "Unknown failure type " + type;
+
+ boolean throttle = curr - last < dumpThreadsTrottlingTimeout;
+
+ if (!throttle)
+ threadDumpPerFailureTypeTime.put(type, curr);
+ else {
+ if (log.isInfoEnabled()) {
+ log.info("Thread dump is hidden due to throttling settings. " +
+ "Set IGNITE_DUMP_THREADS_ON_FAILURE_THROTTLING_TIMEOUT property to 0 to see all thread dumps.");
+ }
+ }
+
+ return throttle;
+ }
+
+ /**
* @param failureCtx Failure context.
* @param hnd Handler.
*/
diff --git a/modules/core/src/test/java/org/apache/ignite/internal/processors/failure/FailureProcessorThreadDumpThrottlingTest.java b/modules/core/src/test/java/org/apache/ignite/internal/processors/failure/FailureProcessorThreadDumpThrottlingTest.java
new file mode 100644
index 0000000..9f85ae6
--- /dev/null
+++ b/modules/core/src/test/java/org/apache/ignite/internal/processors/failure/FailureProcessorThreadDumpThrottlingTest.java
@@ -0,0 +1,203 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.internal.processors.failure;
+
+import com.google.common.collect.ImmutableSet;
+import org.apache.ignite.IgniteSystemProperties;
+import org.apache.ignite.configuration.IgniteConfiguration;
+import org.apache.ignite.failure.FailureContext;
+import org.apache.ignite.failure.FailureType;
+import org.apache.ignite.failure.TestFailureHandler;
+import org.apache.ignite.internal.IgniteEx;
+import org.apache.ignite.internal.util.typedef.internal.U;
+import org.apache.ignite.testframework.ListeningTestLogger;
+import org.apache.ignite.testframework.LogListener;
+import org.apache.ignite.testframework.junits.WithSystemProperty;
+import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest;
+import org.junit.Test;
+
+import static org.apache.ignite.failure.FailureType.SYSTEM_CRITICAL_OPERATION_TIMEOUT;
+import static org.apache.ignite.failure.FailureType.SYSTEM_WORKER_BLOCKED;
+import static org.apache.ignite.internal.util.IgniteUtils.THREAD_DUMP_MSG;
+
+/**
+ * Tests for throttling thread dumps during handling failures.
+ */
+@WithSystemProperty(key = IgniteSystemProperties.IGNITE_DUMP_THREADS_ON_FAILURE, value = "true")
+public class FailureProcessorThreadDumpThrottlingTest extends GridCommonAbstractTest {
+ /** Test logger. */
+ private final ListeningTestLogger testLog = new ListeningTestLogger(true, log);
+
+ /** {@inheritDoc} */
+ @Override protected IgniteConfiguration getConfiguration(String igniteInstanceName) throws Exception {
+ IgniteConfiguration cfg = super.getConfiguration(igniteInstanceName);
+
+ TestFailureHandler hnd = new TestFailureHandler(false);
+
+ hnd.setIgnoredFailureTypes(ImmutableSet.of(FailureType.SYSTEM_CRITICAL_OPERATION_TIMEOUT, SYSTEM_WORKER_BLOCKED));
+
+ cfg.setFailureHandler(hnd);
+
+ cfg.setGridLogger(testLog);
+
+ return cfg;
+ }
+
+ /** {@inheritDoc} */
+ @Override protected void beforeTest() throws Exception {
+ super.beforeTest();
+
+ startGrid(0);
+ }
+
+ /** {@inheritDoc} */
+ @Override protected void afterTest() throws Exception {
+ testLog.clearListeners();
+
+ stopAllGrids();
+
+ super.afterTest();
+ }
+
+ /**
+ * Tests that thread dumps will not get if {@code IGNITE_DUMP_THREADS_ON_FAILURE == false}.
+ */
+ @Test
+ @WithSystemProperty(key = IgniteSystemProperties.IGNITE_DUMP_THREADS_ON_FAILURE, value = "false")
+ @WithSystemProperty(key = IgniteSystemProperties.IGNITE_DUMP_THREADS_ON_FAILURE_THROTTLING_TIMEOUT, value = "0")
+ public void testNoThreadDumps() throws Exception {
+ LogListener lsnr = LogListener.matches(THREAD_DUMP_MSG).times(0).build();
+
+ testLog.registerListener(lsnr);
+
+ IgniteEx ignite = ignite(0);
+
+ FailureContext failureCtx =
+ new FailureContext(SYSTEM_WORKER_BLOCKED, new Throwable("Failure context error"));
+
+ for (int i = 0; i < 3; i++)
+ ignite.context().failure().process(failureCtx);
+
+ assertTrue(lsnr.check());
+ }
+
+ /**
+ * Tests that thread dumps will get for every failure for disabled throttling.
+ */
+ @Test
+ @WithSystemProperty(key = IgniteSystemProperties.IGNITE_DUMP_THREADS_ON_FAILURE, value = "true")
+ @WithSystemProperty(key = IgniteSystemProperties.IGNITE_DUMP_THREADS_ON_FAILURE_THROTTLING_TIMEOUT, value = "0")
+ public void testNoThrottling() throws Exception {
+ LogListener lsnr = LogListener.matches(THREAD_DUMP_MSG).times(3).build();
+
+ testLog.registerListener(lsnr);
+
+ IgniteEx ignite = ignite(0);
+
+ FailureContext failureCtx =
+ new FailureContext(SYSTEM_WORKER_BLOCKED, new Throwable("Failure context error"));
+
+ for (int i = 0; i < 3; i++)
+ ignite.context().failure().process(failureCtx);
+
+ assertTrue(lsnr.check());
+ }
+
+ /**
+ * Tests that thread dumps will be throttled and will be generated again after timeout exceeded.
+ */
+ @Test
+ @WithSystemProperty(key = IgniteSystemProperties.IGNITE_DUMP_THREADS_ON_FAILURE, value = "true")
+ @WithSystemProperty(key = IgniteSystemProperties.IGNITE_DUMP_THREADS_ON_FAILURE_THROTTLING_TIMEOUT, value = "1000")
+ public void testThrottling() throws Exception {
+ LogListener dumpLsnr = LogListener.matches(THREAD_DUMP_MSG).times(2).build();
+ LogListener throttledLsnr = LogListener.matches("Thread dump is hidden").times(4).build();
+
+ testLog.registerListener(dumpLsnr);
+ testLog.registerListener(throttledLsnr);
+
+ IgniteEx ignite = ignite(0);
+
+ FailureContext failureCtx =
+ new FailureContext(SYSTEM_WORKER_BLOCKED, new Throwable("Failure context error"));
+
+ for (int i = 0; i < 3; i++)
+ ignite.context().failure().process(failureCtx);
+
+ U.sleep(1000);
+
+ for (int i = 0; i < 3; i++)
+ ignite.context().failure().process(failureCtx);
+
+ assertTrue(dumpLsnr.check());
+ assertTrue(throttledLsnr.check());
+ }
+
+ /**
+ * Tests that thread dumps will be throttled per failure type and will be generated again after timeout exceeded.
+ */
+ @Test
+ @WithSystemProperty(key = IgniteSystemProperties.IGNITE_DUMP_THREADS_ON_FAILURE, value = "true")
+ @WithSystemProperty(key = IgniteSystemProperties.IGNITE_DUMP_THREADS_ON_FAILURE_THROTTLING_TIMEOUT, value = "1000")
+ public void testThrottlingPerFailureType() throws Exception {
+ LogListener dumpLsnr = LogListener.matches(THREAD_DUMP_MSG).times(4).build();
+ LogListener throttledLsnr = LogListener.matches("Thread dump is hidden").times(8).build();
+
+ testLog.registerListener(dumpLsnr);
+ testLog.registerListener(throttledLsnr);
+
+ IgniteEx ignite = ignite(0);
+
+ FailureContext workerBlockedFailureCtx =
+ new FailureContext(SYSTEM_WORKER_BLOCKED, new Throwable("Failure context error"));
+
+ FailureContext opTimeoutFailureCtx =
+ new FailureContext(SYSTEM_CRITICAL_OPERATION_TIMEOUT, new Throwable("Failure context error"));
+
+ for (int i = 0; i < 3; i++) {
+ ignite.context().failure().process(workerBlockedFailureCtx);
+
+ ignite.context().failure().process(opTimeoutFailureCtx);
+ }
+
+ U.sleep(1000);
+
+ for (int i = 0; i < 3; i++) {
+ ignite.context().failure().process(workerBlockedFailureCtx);
+
+ ignite.context().failure().process(opTimeoutFailureCtx);
+ }
+
+ assertTrue(dumpLsnr.check());
+ assertTrue(throttledLsnr.check());
+ }
+
+ /**
+ * Tests that default thread dump trhottling timeout equals failure detection timeout.
+ */
+ @Test
+ @WithSystemProperty(key = IgniteSystemProperties.IGNITE_DUMP_THREADS_ON_FAILURE, value = "true")
+ public void testDefaultThrottlingTimeout() throws Exception {
+ IgniteEx ignite = ignite(0);
+
+ assertEquals(
+ ignite.context().failure().dumpThreadsTrottlingTimeout,
+ ignite.configuration().getFailureDetectionTimeout().longValue()
+ );
+ }
+}
diff --git a/modules/core/src/test/java/org/apache/ignite/testsuites/IgniteBasicTestSuite.java b/modules/core/src/test/java/org/apache/ignite/testsuites/IgniteBasicTestSuite.java
index 927dd71..5c6be99 100644
--- a/modules/core/src/test/java/org/apache/ignite/testsuites/IgniteBasicTestSuite.java
+++ b/modules/core/src/test/java/org/apache/ignite/testsuites/IgniteBasicTestSuite.java
@@ -90,6 +90,7 @@ import org.apache.ignite.internal.processors.database.CacheFreeListSelfTest;
import org.apache.ignite.internal.processors.database.DataRegionMetricsSelfTest;
import org.apache.ignite.internal.processors.database.IndexStorageSelfTest;
import org.apache.ignite.internal.processors.database.SwapPathConstructionSelfTest;
+import org.apache.ignite.internal.processors.failure.FailureProcessorThreadDumpThrottlingTest;
import org.apache.ignite.internal.processors.metastorage.DistributedMetaStorageTest;
import org.apache.ignite.internal.processors.metastorage.persistence.DistributedMetaStorageHistoryCacheTest;
import org.apache.ignite.internal.processors.metastorage.persistence.DmsDataWriterWorkerTest;
@@ -240,6 +241,7 @@ import org.junit.runners.Suite;
OomFailureHandlerTest.class,
TransactionIntegrityWithSystemWorkerDeathTest.class,
FailureProcessorLoggingTest.class,
+ FailureProcessorThreadDumpThrottlingTest.class,
AtomicOperationsInTxTest.class,