You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ignite.apache.org by ir...@apache.org on 2019/02/28 08:14:59 UTC
[ignite] branch master updated: IGNITE-11059 Print information
about pending locks queue in case of dht local tx timeout. - Fixes #5935.
This is an automated email from the ASF dual-hosted git repository.
irakov pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ignite.git
The following commit(s) were added to refs/heads/master by this push:
new 651fa78 IGNITE-11059 Print information about pending locks queue in case of dht local tx timeout. - Fixes #5935.
651fa78 is described below
commit 651fa786d930e655a51d5ee2f0d826d52579218b
Author: Ivan Daschinskiy <iv...@gmail.com>
AuthorDate: Thu Feb 28 11:10:56 2019 +0300
IGNITE-11059 Print information about pending locks queue in case of dht local tx timeout. - Fixes #5935.
Signed-off-by: Ivan Rakov <ir...@apache.org>
---
.../org/apache/ignite/internal/IgniteKernal.java | 9 +-
.../cache/distributed/dht/GridDhtLockFuture.java | 73 +++++++++++++-
.../cache/transactions/IgniteTxManager.java | 24 +++++
.../managers/IgniteDiagnosticMessagesTest.java | 106 +++++++++++++++++++++
4 files changed, 208 insertions(+), 4 deletions(-)
diff --git a/modules/core/src/main/java/org/apache/ignite/internal/IgniteKernal.java b/modules/core/src/main/java/org/apache/ignite/internal/IgniteKernal.java
index e427810..9ef9429 100644
--- a/modules/core/src/main/java/org/apache/ignite/internal/IgniteKernal.java
+++ b/modules/core/src/main/java/org/apache/ignite/internal/IgniteKernal.java
@@ -320,6 +320,9 @@ public class IgniteKernal implements IgniteEx, IgniteMXBean, Externalizable {
/** Separator for formatted coordinator properties. */
public static final String COORDINATOR_PROPERTIES_SEPARATOR = ",";
+ /** Default long operations dump timeout. */
+ public static final long DFLT_LONG_OPERATIONS_DUMP_TIMEOUT = 60_000L;
+
/** Long jvm pause detector. */
private LongJVMPauseDetector longJVMPauseDetector;
@@ -1270,8 +1273,10 @@ public class IgniteKernal implements IgniteEx, IgniteMXBean, Externalizable {
}, metricsLogFreq, metricsLogFreq);
}
- final long longOpDumpTimeout =
- IgniteSystemProperties.getLong(IgniteSystemProperties.IGNITE_LONG_OPERATIONS_DUMP_TIMEOUT, 60_000);
+ final long longOpDumpTimeout = IgniteSystemProperties.getLong(
+ IgniteSystemProperties.IGNITE_LONG_OPERATIONS_DUMP_TIMEOUT,
+ DFLT_LONG_OPERATIONS_DUMP_TIMEOUT
+ );
if (longOpDumpTimeout > 0) {
longOpDumpTask = ctx.timeout().schedule(new Runnable() {
diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/GridDhtLockFuture.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/GridDhtLockFuture.java
index ef2e70d..4f3581a 100644
--- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/GridDhtLockFuture.java
+++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/GridDhtLockFuture.java
@@ -58,6 +58,7 @@ import org.apache.ignite.internal.processors.cache.distributed.near.GridNearCach
import org.apache.ignite.internal.processors.cache.mvcc.MvccUpdateVersionAware;
import org.apache.ignite.internal.processors.cache.mvcc.MvccVersionAware;
import org.apache.ignite.internal.processors.cache.mvcc.txlog.TxState;
+import org.apache.ignite.internal.processors.cache.transactions.IgniteInternalTx;
import org.apache.ignite.internal.processors.cache.transactions.IgniteTxEntry;
import org.apache.ignite.internal.processors.cache.transactions.IgniteTxKey;
import org.apache.ignite.internal.processors.cache.version.GridCacheVersion;
@@ -1163,10 +1164,18 @@ public final class GridDhtLockFuture extends GridCacheCompoundIdentityFuture<Boo
/** {@inheritDoc} */
@Override public void onTimeout() {
- if (log.isDebugEnabled())
- log.debug("Timed out waiting for lock response: " + this);
+ long longOpsDumpTimeout = cctx.tm().longOperationsDumpTimeout();
synchronized (GridDhtLockFuture.this) {
+ if (log.isDebugEnabled() || timeout >= longOpsDumpTimeout) {
+ String msg = dumpPendingLocks();
+
+ if (log.isDebugEnabled())
+ log.debug(msg);
+ else
+ log.warning(msg);
+ }
+
timedOut = true;
// Stop locks and responses processing.
@@ -1184,6 +1193,66 @@ public final class GridDhtLockFuture extends GridCacheCompoundIdentityFuture<Boo
@Override public String toString() {
return S.toString(LockTimeoutObject.class, this);
}
+
+ /**
+ * NB! Should be called in synchronized block on {@link GridDhtLockFuture} instance.
+ *
+ * @return String representation of pending locks.
+ */
+ private String dumpPendingLocks() {
+ StringBuilder sb = new StringBuilder();
+
+ sb.append("Transaction tx=").append(tx.getClass().getSimpleName());
+ sb.append(" [xid=").append(tx.xid());
+ sb.append(", xidVer=").append(tx.xidVersion());
+ sb.append(", nearXid=").append(tx.nearXidVersion().asGridUuid());
+ sb.append(", nearXidVer=").append(tx.nearXidVersion());
+ sb.append(", nearNodeId=").append(tx.nearNodeId());
+ sb.append(", label=").append(tx.label());
+ sb.append("] timed out, can't acquire lock for ");
+
+ Iterator<KeyCacheObject> locks = pendingLocks.iterator();
+
+ boolean found = false;
+
+ while (!found && locks.hasNext()) {
+ KeyCacheObject key = locks.next();
+
+ GridCacheEntryEx entry = cctx.cache().entryEx(key, topVer);
+
+ while (true) {
+ try {
+ Collection<GridCacheMvccCandidate> candidates = entry.localCandidates();
+
+ for (GridCacheMvccCandidate candidate : candidates) {
+ IgniteInternalTx itx = cctx.tm().tx(candidate.version());
+
+ if (itx != null && candidate.owner() && !candidate.version().equals(tx.xidVersion())) {
+ sb.append("key=").append(key).append(", owner=");
+ sb.append("[xid=").append(itx.xid()).append(", ");
+ sb.append("xidVer=").append(itx.xidVersion()).append(", ");
+ sb.append("nearXid=").append(itx.nearXidVersion().asGridUuid()).append(", ");
+ sb.append("nearXidVer=").append(itx.nearXidVersion()).append(", ");
+ sb.append("label=").append(itx.label()).append(", ");
+ sb.append("nearNodeId=").append(candidate.otherNodeId()).append("]");
+ sb.append(", queueSize=").append(candidates.isEmpty() ? 0 : candidates.size() - 1);
+
+ found = true;
+
+ break;
+ }
+ }
+
+ break;
+ }
+ catch (GridCacheEntryRemovedException e) {
+ entry = cctx.cache().entryEx(key, topVer);
+ }
+ }
+ }
+
+ return sb.toString();
+ }
}
/**
diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/transactions/IgniteTxManager.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/transactions/IgniteTxManager.java
index b3df67a..e7096fa 100644
--- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/transactions/IgniteTxManager.java
+++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/transactions/IgniteTxManager.java
@@ -110,6 +110,7 @@ import static org.apache.ignite.events.EventType.EVT_NODE_LEFT;
import static org.apache.ignite.events.EventType.EVT_TX_STARTED;
import static org.apache.ignite.internal.GridTopic.TOPIC_CACHE_COORDINATOR;
import static org.apache.ignite.internal.GridTopic.TOPIC_TX;
+import static org.apache.ignite.internal.IgniteKernal.DFLT_LONG_OPERATIONS_DUMP_TIMEOUT;
import static org.apache.ignite.internal.managers.communication.GridIoPolicy.SYSTEM_POOL;
import static org.apache.ignite.internal.processors.cache.GridCacheOperation.READ;
import static org.apache.ignite.internal.processors.cache.GridCacheUtils.isNearEnabled;
@@ -153,6 +154,12 @@ public class IgniteTxManager extends GridCacheSharedManagerAdapter {
static int DEADLOCK_MAX_ITERS =
IgniteSystemProperties.getInteger(IGNITE_TX_DEADLOCK_DETECTION_MAX_ITERS, 1000);
+ /** Long operation dump timeout. */
+ private static final long LONG_OPERATIONS_DUMP_TIMEOUT = IgniteSystemProperties.getLong(
+ IgniteSystemProperties.IGNITE_LONG_OPERATIONS_DUMP_TIMEOUT,
+ DFLT_LONG_OPERATIONS_DUMP_TIMEOUT
+ );
+
/** Committing transactions. */
private final ThreadLocal<IgniteInternalTx> threadCtx = new ThreadLocal<>();
@@ -203,6 +210,9 @@ public class IgniteTxManager extends GridCacheSharedManagerAdapter {
/** Slow tx warn timeout. */
private int slowTxWarnTimeout = SLOW_TX_WARN_TIMEOUT;
+ /** Long operations dump timeout. */
+ private long longOpsDumpTimeout = LONG_OPERATIONS_DUMP_TIMEOUT;
+
/**
* Near version to DHT version map. Note that we initialize to 5K size from get go,
* to avoid future map resizings.
@@ -1825,6 +1835,20 @@ public class IgniteTxManager extends GridCacheSharedManagerAdapter {
}
/**
+ * @return Long operations dump timeout.
+ */
+ public long longOperationsDumpTimeout() {
+ return longOpsDumpTimeout;
+ }
+
+ /**
+ * @param longOpsDumpTimeout Long operations dump timeout.
+ */
+ public void longOperationsDumpTimeout(long longOpsDumpTimeout) {
+ this.longOpsDumpTimeout = longOpsDumpTimeout;
+ }
+
+ /**
* Checks if transactions with given near version ID was prepared or committed.
*
* @param nearVer Near version ID.
diff --git a/modules/core/src/test/java/org/apache/ignite/internal/managers/IgniteDiagnosticMessagesTest.java b/modules/core/src/test/java/org/apache/ignite/internal/managers/IgniteDiagnosticMessagesTest.java
index 6708756..48a7bfc 100644
--- a/modules/core/src/test/java/org/apache/ignite/internal/managers/IgniteDiagnosticMessagesTest.java
+++ b/modules/core/src/test/java/org/apache/ignite/internal/managers/IgniteDiagnosticMessagesTest.java
@@ -24,17 +24,21 @@ import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicReference;
+import java.util.regex.Pattern;
import org.apache.ignite.Ignite;
import org.apache.ignite.IgniteCache;
+import org.apache.ignite.IgniteLogger;
import org.apache.ignite.cache.CacheAtomicityMode;
import org.apache.ignite.cluster.ClusterNode;
import org.apache.ignite.configuration.CacheConfiguration;
import org.apache.ignite.configuration.IgniteConfiguration;
import org.apache.ignite.configuration.NearCacheConfiguration;
import org.apache.ignite.internal.IgniteDiagnosticPrepareContext;
+import org.apache.ignite.internal.IgniteEx;
import org.apache.ignite.internal.IgniteInternalFuture;
import org.apache.ignite.internal.IgniteKernal;
import org.apache.ignite.internal.TestRecordingCommunicationSpi;
+import org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtLockFuture;
import org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtTxPrepareResponse;
import org.apache.ignite.internal.processors.cache.distributed.near.GridNearLockResponse;
import org.apache.ignite.internal.processors.cache.distributed.near.GridNearSingleGetResponse;
@@ -59,6 +63,7 @@ import static org.apache.ignite.cache.CacheAtomicityMode.TRANSACTIONAL;
import static org.apache.ignite.cache.CacheAtomicityMode.TRANSACTIONAL_SNAPSHOT;
import static org.apache.ignite.cache.CacheMode.REPLICATED;
import static org.apache.ignite.cache.CacheWriteSynchronizationMode.FULL_SYNC;
+import static org.apache.ignite.testframework.GridTestUtils.runAsync;
import static org.apache.ignite.testframework.GridTestUtils.waitForCondition;
import static org.apache.ignite.transactions.TransactionConcurrency.PESSIMISTIC;
import static org.apache.ignite.transactions.TransactionIsolation.REPEATABLE_READ;
@@ -377,6 +382,50 @@ public class IgniteDiagnosticMessagesTest extends GridCommonAbstractTest {
}
/**
+ * Tests that {@link org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtLockFuture} timeout object
+ * dumps debug info to log.
+ *
+ * @throws Exception If fails.
+ */
+ @Test
+ public void testTimeOutTxLock() throws Exception {
+ final int longOpDumpTimeout = 500;
+
+ ListeningTestLogger testLog = new ListeningTestLogger(false, log);
+
+ IgniteLogger oldLog = GridTestUtils.getFieldValue(GridDhtLockFuture.class, "log");
+
+ GridTestUtils.setFieldValue(GridDhtLockFuture.class, "log", testLog);
+
+ try {
+ IgniteEx grid1 = startGrid(0);
+
+ LogListener lsnr = LogListener
+ .matches(Pattern.compile("Transaction tx=GridNearTxLocal \\[.*\\] timed out, can't acquire lock for"))
+ .andMatches(Pattern.compile(".*xid=.*, xidVer=.*, nearXid=.*, nearXidVer=.*, label=lock, " +
+ "nearNodeId=" + grid1.cluster().localNode().id() + ".*"))
+ .build();
+
+ testLog.registerListener(lsnr);
+
+ this.testLog = testLog;
+
+ IgniteEx grid2 = startGrid(1);
+
+ grid2.context().cache().context().tm().longOperationsDumpTimeout(longOpDumpTimeout);
+
+ awaitPartitionMapExchange();
+
+ emulateTxLockTimeout(grid1, grid2);
+
+ assertTrue(lsnr.check());
+ }
+ finally {
+ GridTestUtils.setFieldValue(GridDhtLockFuture.class, "log", oldLog);
+ }
+ }
+
+ /**
* @param atomicityMode Cache atomicity mode.
* @throws Exception If failed.
*/
@@ -638,4 +687,61 @@ public class IgniteDiagnosticMessagesTest extends GridCommonAbstractTest {
}
}
}
+
+ /**
+ * Emulates tx lock timeout.
+ *
+ * @param node1 First node.
+ * @param node2 Second node.
+ * @throws Exception If failed.
+ */
+ private void emulateTxLockTimeout(Ignite node1, Ignite node2) throws Exception {
+ node1.createCache(cacheConfiguration(TRANSACTIONAL).setBackups(1));
+
+ final CountDownLatch l1 = new CountDownLatch(1);
+ final CountDownLatch l2 = new CountDownLatch(1);
+
+ IgniteInternalFuture<?> fut1 = runAsync(new Runnable() {
+ @Override public void run() {
+ try {
+ try (Transaction tx = node1.transactions().withLabel("lock")
+ .txStart(PESSIMISTIC, REPEATABLE_READ, 60_000, 2)) {
+ node1.cache(DEFAULT_CACHE_NAME).put(1, 10);
+
+ l1.countDown();
+
+ U.awaitQuiet(l2);
+
+ U.sleep(100);
+
+ tx.commit();
+ }
+ }
+ catch (Exception e) {
+ log.error("Failed on node1", e);
+ }
+ }
+ }, "First");
+
+ IgniteInternalFuture<?> fut2 = runAsync(new Runnable() {
+ @Override public void run() {
+ try (Transaction tx = node2.transactions().withLabel("lock")
+ .txStart(PESSIMISTIC, REPEATABLE_READ, 2000L, 2)) {
+ U.awaitQuiet(l1);
+
+ node2.cache(DEFAULT_CACHE_NAME).put(1, 20);
+
+ tx.commit();
+ }
+ catch (Exception e) {
+ log.error("Failed on node2 " + node2.cluster().localNode().id(), e);
+
+ l2.countDown();
+ }
+ }
+ }, "Second");
+
+ fut1.get();
+ fut2.get();
+ }
}