You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ignite.apache.org by ir...@apache.org on 2019/02/28 08:14:59 UTC

[ignite] branch master updated: IGNITE-11059 Print information about pending locks queue in case of dht local tx timeout. - Fixes #5935.

This is an automated email from the ASF dual-hosted git repository.

irakov pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ignite.git


The following commit(s) were added to refs/heads/master by this push:
     new 651fa78  IGNITE-11059 Print information about pending locks queue in case of dht local tx timeout. - Fixes #5935.
651fa78 is described below

commit 651fa786d930e655a51d5ee2f0d826d52579218b
Author: Ivan Daschinskiy <iv...@gmail.com>
AuthorDate: Thu Feb 28 11:10:56 2019 +0300

    IGNITE-11059 Print information about pending locks queue in case of dht local tx timeout. - Fixes #5935.
    
    Signed-off-by: Ivan Rakov <ir...@apache.org>
---
 .../org/apache/ignite/internal/IgniteKernal.java   |   9 +-
 .../cache/distributed/dht/GridDhtLockFuture.java   |  73 +++++++++++++-
 .../cache/transactions/IgniteTxManager.java        |  24 +++++
 .../managers/IgniteDiagnosticMessagesTest.java     | 106 +++++++++++++++++++++
 4 files changed, 208 insertions(+), 4 deletions(-)

diff --git a/modules/core/src/main/java/org/apache/ignite/internal/IgniteKernal.java b/modules/core/src/main/java/org/apache/ignite/internal/IgniteKernal.java
index e427810..9ef9429 100644
--- a/modules/core/src/main/java/org/apache/ignite/internal/IgniteKernal.java
+++ b/modules/core/src/main/java/org/apache/ignite/internal/IgniteKernal.java
@@ -320,6 +320,9 @@ public class IgniteKernal implements IgniteEx, IgniteMXBean, Externalizable {
     /** Separator for formatted coordinator properties. */
     public static final String COORDINATOR_PROPERTIES_SEPARATOR = ",";
 
+    /** Default long operations dump timeout. */
+    public static final long DFLT_LONG_OPERATIONS_DUMP_TIMEOUT = 60_000L;
+
     /** Long jvm pause detector. */
     private LongJVMPauseDetector longJVMPauseDetector;
 
@@ -1270,8 +1273,10 @@ public class IgniteKernal implements IgniteEx, IgniteMXBean, Externalizable {
             }, metricsLogFreq, metricsLogFreq);
         }
 
-        final long longOpDumpTimeout =
-            IgniteSystemProperties.getLong(IgniteSystemProperties.IGNITE_LONG_OPERATIONS_DUMP_TIMEOUT, 60_000);
+        final long longOpDumpTimeout = IgniteSystemProperties.getLong(
+                IgniteSystemProperties.IGNITE_LONG_OPERATIONS_DUMP_TIMEOUT,
+                DFLT_LONG_OPERATIONS_DUMP_TIMEOUT
+        );
 
         if (longOpDumpTimeout > 0) {
             longOpDumpTask = ctx.timeout().schedule(new Runnable() {
diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/GridDhtLockFuture.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/GridDhtLockFuture.java
index ef2e70d..4f3581a 100644
--- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/GridDhtLockFuture.java
+++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/GridDhtLockFuture.java
@@ -58,6 +58,7 @@ import org.apache.ignite.internal.processors.cache.distributed.near.GridNearCach
 import org.apache.ignite.internal.processors.cache.mvcc.MvccUpdateVersionAware;
 import org.apache.ignite.internal.processors.cache.mvcc.MvccVersionAware;
 import org.apache.ignite.internal.processors.cache.mvcc.txlog.TxState;
+import org.apache.ignite.internal.processors.cache.transactions.IgniteInternalTx;
 import org.apache.ignite.internal.processors.cache.transactions.IgniteTxEntry;
 import org.apache.ignite.internal.processors.cache.transactions.IgniteTxKey;
 import org.apache.ignite.internal.processors.cache.version.GridCacheVersion;
@@ -1163,10 +1164,18 @@ public final class GridDhtLockFuture extends GridCacheCompoundIdentityFuture<Boo
 
         /** {@inheritDoc} */
         @Override public void onTimeout() {
-            if (log.isDebugEnabled())
-                log.debug("Timed out waiting for lock response: " + this);
+            long longOpsDumpTimeout = cctx.tm().longOperationsDumpTimeout();
 
             synchronized (GridDhtLockFuture.this) {
+                if (log.isDebugEnabled() || timeout >= longOpsDumpTimeout) {
+                    String msg = dumpPendingLocks();
+
+                    if (log.isDebugEnabled())
+                        log.debug(msg);
+                    else
+                        log.warning(msg);
+                }
+
                 timedOut = true;
 
                 // Stop locks and responses processing.
@@ -1184,6 +1193,66 @@ public final class GridDhtLockFuture extends GridCacheCompoundIdentityFuture<Boo
         @Override public String toString() {
             return S.toString(LockTimeoutObject.class, this);
         }
+
+        /**
+         * NB! Should be called in synchronized block on {@link GridDhtLockFuture} instance.
+         *
+         * @return String representation of pending locks.
+         */
+        private String dumpPendingLocks() {
+            StringBuilder sb = new StringBuilder();
+
+            sb.append("Transaction tx=").append(tx.getClass().getSimpleName());
+            sb.append(" [xid=").append(tx.xid());
+            sb.append(", xidVer=").append(tx.xidVersion());
+            sb.append(", nearXid=").append(tx.nearXidVersion().asGridUuid());
+            sb.append(", nearXidVer=").append(tx.nearXidVersion());
+            sb.append(", nearNodeId=").append(tx.nearNodeId());
+            sb.append(", label=").append(tx.label());
+            sb.append("] timed out, can't acquire lock for ");
+
+            Iterator<KeyCacheObject> locks = pendingLocks.iterator();
+
+            boolean found = false;
+
+            while (!found && locks.hasNext()) {
+                KeyCacheObject key = locks.next();
+
+                GridCacheEntryEx entry = cctx.cache().entryEx(key, topVer);
+
+                while (true) {
+                    try {
+                        Collection<GridCacheMvccCandidate> candidates = entry.localCandidates();
+
+                        for (GridCacheMvccCandidate candidate : candidates) {
+                            IgniteInternalTx itx = cctx.tm().tx(candidate.version());
+
+                            if (itx != null && candidate.owner() && !candidate.version().equals(tx.xidVersion())) {
+                                sb.append("key=").append(key).append(", owner=");
+                                sb.append("[xid=").append(itx.xid()).append(", ");
+                                sb.append("xidVer=").append(itx.xidVersion()).append(", ");
+                                sb.append("nearXid=").append(itx.nearXidVersion().asGridUuid()).append(", ");
+                                sb.append("nearXidVer=").append(itx.nearXidVersion()).append(", ");
+                                sb.append("label=").append(itx.label()).append(", ");
+                                sb.append("nearNodeId=").append(candidate.otherNodeId()).append("]");
+                                sb.append(", queueSize=").append(candidates.isEmpty() ? 0 : candidates.size() - 1);
+
+                                found = true;
+
+                                break;
+                            }
+                        }
+
+                        break;
+                    }
+                    catch (GridCacheEntryRemovedException e) {
+                        entry = cctx.cache().entryEx(key, topVer);
+                    }
+                }
+            }
+
+            return sb.toString();
+        }
     }
 
     /**
diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/transactions/IgniteTxManager.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/transactions/IgniteTxManager.java
index b3df67a..e7096fa 100644
--- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/transactions/IgniteTxManager.java
+++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/transactions/IgniteTxManager.java
@@ -110,6 +110,7 @@ import static org.apache.ignite.events.EventType.EVT_NODE_LEFT;
 import static org.apache.ignite.events.EventType.EVT_TX_STARTED;
 import static org.apache.ignite.internal.GridTopic.TOPIC_CACHE_COORDINATOR;
 import static org.apache.ignite.internal.GridTopic.TOPIC_TX;
+import static org.apache.ignite.internal.IgniteKernal.DFLT_LONG_OPERATIONS_DUMP_TIMEOUT;
 import static org.apache.ignite.internal.managers.communication.GridIoPolicy.SYSTEM_POOL;
 import static org.apache.ignite.internal.processors.cache.GridCacheOperation.READ;
 import static org.apache.ignite.internal.processors.cache.GridCacheUtils.isNearEnabled;
@@ -153,6 +154,12 @@ public class IgniteTxManager extends GridCacheSharedManagerAdapter {
     static int DEADLOCK_MAX_ITERS =
         IgniteSystemProperties.getInteger(IGNITE_TX_DEADLOCK_DETECTION_MAX_ITERS, 1000);
 
+    /** Long operation dump timeout. */
+    private static final long LONG_OPERATIONS_DUMP_TIMEOUT = IgniteSystemProperties.getLong(
+            IgniteSystemProperties.IGNITE_LONG_OPERATIONS_DUMP_TIMEOUT,
+            DFLT_LONG_OPERATIONS_DUMP_TIMEOUT
+    );
+
     /** Committing transactions. */
     private final ThreadLocal<IgniteInternalTx> threadCtx = new ThreadLocal<>();
 
@@ -203,6 +210,9 @@ public class IgniteTxManager extends GridCacheSharedManagerAdapter {
     /** Slow tx warn timeout. */
     private int slowTxWarnTimeout = SLOW_TX_WARN_TIMEOUT;
 
+    /** Long operations dump timeout. */
+    private long longOpsDumpTimeout = LONG_OPERATIONS_DUMP_TIMEOUT;
+
     /**
      * Near version to DHT version map. Note that we initialize to 5K size from get go,
      * to avoid future map resizings.
@@ -1825,6 +1835,20 @@ public class IgniteTxManager extends GridCacheSharedManagerAdapter {
     }
 
     /**
+     * @return Long operations dump timeout.
+     */
+    public long longOperationsDumpTimeout() {
+        return longOpsDumpTimeout;
+    }
+
+    /**
+     * @param longOpsDumpTimeout Long operations dump timeout.
+     */
+    public void longOperationsDumpTimeout(long longOpsDumpTimeout) {
+        this.longOpsDumpTimeout = longOpsDumpTimeout;
+    }
+
+    /**
      * Checks if transactions with given near version ID was prepared or committed.
      *
      * @param nearVer Near version ID.
diff --git a/modules/core/src/test/java/org/apache/ignite/internal/managers/IgniteDiagnosticMessagesTest.java b/modules/core/src/test/java/org/apache/ignite/internal/managers/IgniteDiagnosticMessagesTest.java
index 6708756..48a7bfc 100644
--- a/modules/core/src/test/java/org/apache/ignite/internal/managers/IgniteDiagnosticMessagesTest.java
+++ b/modules/core/src/test/java/org/apache/ignite/internal/managers/IgniteDiagnosticMessagesTest.java
@@ -24,17 +24,21 @@ import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.ThreadLocalRandom;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicReference;
+import java.util.regex.Pattern;
 import org.apache.ignite.Ignite;
 import org.apache.ignite.IgniteCache;
+import org.apache.ignite.IgniteLogger;
 import org.apache.ignite.cache.CacheAtomicityMode;
 import org.apache.ignite.cluster.ClusterNode;
 import org.apache.ignite.configuration.CacheConfiguration;
 import org.apache.ignite.configuration.IgniteConfiguration;
 import org.apache.ignite.configuration.NearCacheConfiguration;
 import org.apache.ignite.internal.IgniteDiagnosticPrepareContext;
+import org.apache.ignite.internal.IgniteEx;
 import org.apache.ignite.internal.IgniteInternalFuture;
 import org.apache.ignite.internal.IgniteKernal;
 import org.apache.ignite.internal.TestRecordingCommunicationSpi;
+import org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtLockFuture;
 import org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtTxPrepareResponse;
 import org.apache.ignite.internal.processors.cache.distributed.near.GridNearLockResponse;
 import org.apache.ignite.internal.processors.cache.distributed.near.GridNearSingleGetResponse;
@@ -59,6 +63,7 @@ import static org.apache.ignite.cache.CacheAtomicityMode.TRANSACTIONAL;
 import static org.apache.ignite.cache.CacheAtomicityMode.TRANSACTIONAL_SNAPSHOT;
 import static org.apache.ignite.cache.CacheMode.REPLICATED;
 import static org.apache.ignite.cache.CacheWriteSynchronizationMode.FULL_SYNC;
+import static org.apache.ignite.testframework.GridTestUtils.runAsync;
 import static org.apache.ignite.testframework.GridTestUtils.waitForCondition;
 import static org.apache.ignite.transactions.TransactionConcurrency.PESSIMISTIC;
 import static org.apache.ignite.transactions.TransactionIsolation.REPEATABLE_READ;
@@ -377,6 +382,50 @@ public class IgniteDiagnosticMessagesTest extends GridCommonAbstractTest {
     }
 
     /**
+     * Tests that {@link org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtLockFuture} timeout object
+     * dumps debug info to log.
+     *
+     * @throws Exception If fails.
+     */
+    @Test
+    public void testTimeOutTxLock() throws Exception {
+        final int longOpDumpTimeout = 500;
+
+        ListeningTestLogger testLog = new ListeningTestLogger(false, log);
+
+        IgniteLogger oldLog = GridTestUtils.getFieldValue(GridDhtLockFuture.class, "log");
+
+        GridTestUtils.setFieldValue(GridDhtLockFuture.class, "log", testLog);
+
+        try {
+            IgniteEx grid1 = startGrid(0);
+
+            LogListener lsnr = LogListener
+                    .matches(Pattern.compile("Transaction tx=GridNearTxLocal \\[.*\\] timed out, can't acquire lock for"))
+                    .andMatches(Pattern.compile(".*xid=.*, xidVer=.*, nearXid=.*, nearXidVer=.*, label=lock, " +
+                            "nearNodeId=" + grid1.cluster().localNode().id() + ".*"))
+                    .build();
+
+            testLog.registerListener(lsnr);
+
+            this.testLog = testLog;
+
+            IgniteEx grid2 = startGrid(1);
+
+            grid2.context().cache().context().tm().longOperationsDumpTimeout(longOpDumpTimeout);
+
+            awaitPartitionMapExchange();
+
+            emulateTxLockTimeout(grid1, grid2);
+
+            assertTrue(lsnr.check());
+        }
+        finally {
+            GridTestUtils.setFieldValue(GridDhtLockFuture.class, "log", oldLog);
+        }
+    }
+
+    /**
      * @param atomicityMode Cache atomicity mode.
      * @throws Exception If failed.
      */
@@ -638,4 +687,61 @@ public class IgniteDiagnosticMessagesTest extends GridCommonAbstractTest {
             }
         }
     }
+
+    /**
+     * Emulates tx lock timeout.
+     *
+     * @param node1 First node.
+     * @param node2 Second node.
+     * @throws Exception If failed.
+     */
+    private void emulateTxLockTimeout(Ignite node1, Ignite node2) throws Exception {
+        node1.createCache(cacheConfiguration(TRANSACTIONAL).setBackups(1));
+
+        final CountDownLatch l1 = new CountDownLatch(1);
+        final CountDownLatch l2 = new CountDownLatch(1);
+
+        IgniteInternalFuture<?> fut1 = runAsync(new Runnable() {
+            @Override public void run() {
+                try {
+                    try (Transaction tx = node1.transactions().withLabel("lock")
+                            .txStart(PESSIMISTIC, REPEATABLE_READ, 60_000, 2)) {
+                        node1.cache(DEFAULT_CACHE_NAME).put(1, 10);
+
+                        l1.countDown();
+
+                        U.awaitQuiet(l2);
+
+                        U.sleep(100);
+
+                        tx.commit();
+                    }
+                }
+                catch (Exception e) {
+                    log.error("Failed on node1", e);
+                }
+            }
+        }, "First");
+
+        IgniteInternalFuture<?> fut2 = runAsync(new Runnable() {
+            @Override public void run() {
+                try (Transaction tx = node2.transactions().withLabel("lock")
+                        .txStart(PESSIMISTIC, REPEATABLE_READ, 2000L, 2)) {
+                    U.awaitQuiet(l1);
+
+                    node2.cache(DEFAULT_CACHE_NAME).put(1, 20);
+
+                    tx.commit();
+                }
+                catch (Exception e) {
+                    log.error("Failed on node2 " + node2.cluster().localNode().id(), e);
+
+                    l2.countDown();
+                }
+            }
+        }, "Second");
+
+        fut1.get();
+        fut2.get();
+    }
 }