You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@ignite.apache.org by "Nikolay Izhikov (Jira)" <ji...@apache.org> on 2020/04/17 07:52:00 UTC
[jira] [Updated] (IGNITE-12774) Transaction hangs after too many
open files NIO exception
[ https://issues.apache.org/jira/browse/IGNITE-12774?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Nikolay Izhikov updated IGNITE-12774:
-------------------------------------
Fix Version/s: 2.8.1
> Transaction hangs after too many open files NIO exception
> ---------------------------------------------------------
>
> Key: IGNITE-12774
> URL: https://issues.apache.org/jira/browse/IGNITE-12774
> Project: Ignite
> Issue Type: Bug
> Reporter: Sergey Antonov
> Assignee: Sergey Antonov
> Priority: Major
> Fix For: 2.9, 2.8.1
>
> Time Spent: 0.5h
> Remaining Estimate: 0h
>
> Transaction hung after “Open too many files” error and never been finished.
> {code:java}
> import java.net.SocketException;
> import java.util.concurrent.atomic.AtomicBoolean;
> import org.apache.ignite.cluster.ClusterNode;
> import org.apache.ignite.configuration.CacheConfiguration;
> import org.apache.ignite.configuration.IgniteConfiguration;
> import org.apache.ignite.failure.StopNodeOrHaltFailureHandler;
> import org.apache.ignite.internal.IgniteEx;
> import org.apache.ignite.lang.IgniteInClosure;
> import org.apache.ignite.plugin.extensions.communication.Message;
> import org.apache.ignite.spi.IgniteSpiException;
> import org.apache.ignite.spi.communication.tcp.TcpCommunicationSpi;
> import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest;
> import org.apache.ignite.transactions.Transaction;
> import org.apache.ignite.transactions.TransactionConcurrency;
> import org.apache.ignite.transactions.TransactionIsolation;
> import static org.apache.ignite.cache.CacheAtomicityMode.TRANSACTIONAL;
> import static org.apache.ignite.cache.CacheMode.PARTITIONED;
> public class TooManyOpenFilesTest extends GridCommonAbstractTest {
> @Override protected IgniteConfiguration getConfiguration(String igniteInstanceName) throws Exception {
> return super.getConfiguration(igniteInstanceName)
> .setFailureHandler(new StopNodeOrHaltFailureHandler())
> .setCommunicationSpi(new TooManyOpenFilesTcpCommunicationSpi())
> .setConsistentId(igniteInstanceName);
> }
> @Override protected void beforeTest() throws Exception {
> super.beforeTest();
> stopAllGrids();
> cleanPersistenceDir();
> }
> @Override protected void afterTest() throws Exception {
> stopAllGrids();
> cleanPersistenceDir();
> super.afterTest();
> }
> public void test() throws Exception {
> IgniteEx crd = startGrids(3);
> crd.cluster().active(true);
> crd.getOrCreateCache(new CacheConfiguration<>().setName(DEFAULT_CACHE_NAME).setAtomicityMode(TRANSACTIONAL).setBackups(1).setCacheMode(PARTITIONED));
> TooManyOpenFilesTcpCommunicationSpi spi = (TooManyOpenFilesTcpCommunicationSpi)grid(2).context().config().getCommunicationSpi();
> try (Transaction tx = grid(1).transactions().txStart(TransactionConcurrency.PESSIMISTIC, TransactionIsolation.REPEATABLE_READ)) {
> IgniteCache<Object, Object> cache = grid(1).cache(DEFAULT_CACHE_NAME);
> cache.put(1, 1);
> spi.throwException.set(true);
> cache.put(2, 2);
> cache.put(3, 2);
> cache.put(4, 2);
> // hungs here.
> tx.commit();
> }
> for (int i=0; i < 3 ; i++) {
> assertEquals(1, grid(i).cache(DEFAULT_CACHE_NAME).get(1));
> assertEquals(2, grid(i).cache(DEFAULT_CACHE_NAME).get(2));
> }
> }
> private static class TooManyOpenFilesTcpCommunicationSpi extends TcpCommunicationSpi {
> private final AtomicBoolean throwException = new AtomicBoolean();
> /** {@inheritDoc} */
> @Override public void sendMessage(ClusterNode node, Message msg) throws IgniteSpiException {
> if (throwException.get())
> throw getException(node);
> super.sendMessage(node, msg);
> }
> /** {@inheritDoc} */
> @Override public void sendMessage(
> ClusterNode node,
> Message msg,
> IgniteInClosure<IgniteException> ackC
> ) throws IgniteSpiException {
> if (throwException.get())
> throw getException(node);
> super.sendMessage(node, msg, ackC);
> }
> private IgniteSpiException getException(ClusterNode node) {
> String checkedExceptionMsg = "Failed to connect to node (is node still alive?). " +
> "Make sure that each ComputeTask and cache Transaction has a timeout set " +
> "in order to prevent parties from waiting forever in case of network issues " +
> "[nodeId=" + node.id() + ", addrs=null]";
> return new IgniteSpiException("Failed to send message to remote node: " + node.id(), new IgniteCheckedException(checkedExceptionMsg, new SocketException("Too many open files")));
> }
> }
> }
> {code}
--
This message was sent by Atlassian Jira
(v8.3.4#803005)