You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@ignite.apache.org by 赵剑 <it...@gmail.com> on 2019/05/23 01:58:11 UTC

When the client frequently has FullGC, it blocks all requests from the server. "Possible starvation in striped pool"

Hello
When the client frequently has FullGC, it blocks all requests from the
server. I try to modify many server parameters to solve this problem.
The modified parameters are as follows:
slowClientQueueLimit
socketWriteTimeout
clientFailureDetectionTimeout
failureDetectionTimeout

The blocking occurred is a large number of "[2019-05-21T16:36:04,880][WARN
][grid-timeout-worker-#10343][G] >>> Possible starvation in striped pool."

Please refer to the attachment for the full log, 10.110.118.53 in the log
is the FullGC test node.

What parameters can be modified to avoid similar problems? What adjustments
do I need to make?

Thank you very much.

Ignite Version 2.4.0

server config file:

<?xml version="1.0" encoding="UTF-8"?>
<beans xmlns="http://www.springframework.org/schema/beans"
       xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
       xmlns:util="http://www.springframework.org/schema/util"
       xsi:schemaLocation="http://www.springframework.org/schema/beans
http://www.springframework.org/schema/beans/spring-beans.xsd
        http://www.springframework.org/schema/util
http://www.springframework.org/schema/util/spring-util.xsd
        ">
    <bean id="ignite.cfg"
class="org.apache.ignite.configuration.IgniteConfiguration">
     <!-- <property name="failureDetectionTimeout" value="60000"/> -->
     <!-- <property name="clientFailureDetectionTimeout" value="60000"/> -->
     <property name="segmentationPolicy" value="RESTART_JVM"/>
<property name="publicThreadPoolSize" value="192"/>
<property name="systemThreadPoolSize" value="192"/>

<property name="dataStreamerThreadPoolSize" value="96"/>
<property name="stripedPoolSize" value="10240"/>

<property name="rebalanceThreadPoolSize" value="4" />
        <property name="dataStorageConfiguration">
            <bean
class="org.apache.ignite.configuration.DataStorageConfiguration">
                <property name="defaultDataRegionConfiguration">
                    <bean
class="org.apache.ignite.configuration.DataRegionConfiguration">
<property name="name" value="qipu_entity_cache_data_region"/>
<property name="initialSize" value="#{10L * 1024 * 1024 * 1024}"/>
<property name="maxSize" value="#{100L * 1024 * 1024 * 1024}"/>
                        <property name="persistenceEnabled" value="true"/>
<property name="metricsEnabled" value="true"/>
<property name="checkpointPageBufferSize" value="#{1 * 1024 * 1024 *
1024}"/>
                    </bean>
                </property>
<property name="storagePath"
value="/home/qipu/production/apache-ignite-2.4.0/persistence"/>
<property name="walPath"
value="/home/qipu/production/apache-ignite-2.4.0/wal"/>
<property name="walArchivePath"
value="/home/qipu/production/apache-ignite-2.4.0/wal/archive"/>
<property name="walSegmentSize" value="#{64 * 1024 * 1024}"/>
<property name="pageSize" value="#{4 * 1024}"/>
<property name="walSegments" value="#{20}"/>
<property name="walMode" value="LOG_ONLY"/>
<property name="metricsEnabled" value="true"/>
<property name="writeThrottlingEnabled" value="false"/>
<property name="checkpointThreads" value="8"/>
<property name="walThreadLocalBufferSize" value="#{1 * 1024 * 1024}"/>
            </bean>
        </property>
<property name="cacheConfiguration">
<bean class="org.apache.ignite.configuration.CacheConfiguration">
<property name="dataRegionName" value="qipu_entity_cache_data_region"/>
<property name="name" value="qipu_entity_cache"/>
<property name="cacheMode" value="PARTITIONED"/>
<property name="partitionLossPolicy" value="IGNORE"/>
<property name="atomicityMode" value="ATOMIC"/>
<property name="backups" value="2"/>
<property name="writeSynchronizationMode" value="FULL_SYNC"/>
<property name="statisticsEnabled" value="true"/>
<property name="rebalanceBatchSize" value="#{2 * 1024 * 1024}"/>
<property name="rebalanceThrottle" value="100"/>
<property name="rebalanceMode" value="ASYNC"/>
<property name="rebalanceTimeout" value="40000"/>
</bean>
</property>
<property name="communicationSpi">
<bean class="org.apache.ignite.spi.communication.tcp.TcpCommunicationSpi">
<property name="socketWriteTimeout" value="1500"/>
<property name="messageQueueLimit" value="102400"/>
<property name="slowClientQueueLimit" value="4000"/>
<property name="usePairedConnections" value="true"/>
</bean>
</property>
        <property name="discoverySpi">
            <bean
class="org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi">
<property name="forceServerMode" value="true"/>
<property name="ipFinder">
<bean
class="org.apache.ignite.spi.discovery.tcp.ipfinder.vm.TcpDiscoveryVmIpFinder">
                        <property name="addresses">
                            <list>
                                <!-- In distributed environment, replace
with actual host IP address. -->
<value>10.16.133.179:47500..47509</value>
<value>10.16.133.180:47500..47509</value>
<value>10.16.133.181:47500..47509</value>
<value>10.16.133.182:47500..47509</value>
<value>10.16.133.183:47500..47509</value>
<value>10.16.133.184:47500..47509</value>
<value>10.16.133.185:47500..47509</value>
<value>10.16.133.186:47500..47509</value>
<value>10.16.133.187:47500..47509</value>
<value>10.16.133.188:47500..47509</value>
                            </list>
                        </property>
                    </bean>
</property>
            </bean>
        </property>
<property name="gridLogger">
<bean class="org.apache.ignite.logger.log4j2.Log4J2Logger">
<constructor-arg type="java.lang.String" value="/config/ignite-log4j2.xml"/>
</bean>
</property>
    </bean>
</beans>



client code:

IgniteCluster igniteCluster = IgniteCluster.valueOf("CLUSTER_A");
        boolean usePairedConnections = true;
        int messageQueueLimit = 20480;
        System.out.println("ignite.cluster: "+igniteCluster+" ,
ignite.usePairedConnections: "+usePairedConnections+" ,
ignite.messageQueueLimit: "+messageQueueLimit);

        Ignition.setClientMode(true);

        IgniteConfiguration cfg = new IgniteConfiguration();
        TcpDiscoverySpi spi = new TcpDiscoverySpi();

        TcpDiscoveryVmIpFinder finder = new TcpDiscoveryVmIpFinder();


finder.setAddresses(Arrays.asList(igniteCluster.getConfig().getServer().split(",")));

        spi.setIpFinder(finder);

        TcpCommunicationSpi tcpCommunicationSpi = new TcpCommunicationSpi();
        tcpCommunicationSpi.setUsePairedConnections(usePairedConnections);
        tcpCommunicationSpi.setMessageQueueLimit(messageQueueLimit);

        cfg.setDiscoverySpi(spi);
        cfg.setCommunicationSpi(tcpCommunicationSpi);
        ignite = Ignition.start(cfg);

        igniteCache =
ignite.getOrCreateCache(IgniteCacheName.valueOf("QIPU_ENTITY_CACHE").toString());

        // read operation
        byte[] value = cache.getAsync(key).get(500);
        // write operation
        cache.putAsync(entry.getKey(), entry.getValue()).get(putTimeOut);

Re: When the client frequently has FullGC, it blocks all requests from the server. "Possible starvation in striped pool"

Posted by Ilya Kasnacheev <il...@gmail.com>.
Hello!

I think that this will only be mitigated ny moving to some kind of thin
client. Optionally you can try to bring thick client out of VM that is
having long GCs (a separate JVM?).

Regards,
-- 
Ilya Kasnacheev


чт, 23 мая 2019 г. в 04:59, 赵剑 <it...@gmail.com>:

> Hello
> When the client frequently has FullGC, it blocks all requests from the
> server. I try to modify many server parameters to solve this problem.
> The modified parameters are as follows:
> slowClientQueueLimit
> socketWriteTimeout
> clientFailureDetectionTimeout
> failureDetectionTimeout
>
> The blocking occurred is a large number of "[2019-05-21T16:36:04,880][WARN
> ][grid-timeout-worker-#10343][G] >>> Possible starvation in striped pool."
>
> Please refer to the attachment for the full log, 10.110.118.53 in the log
> is the FullGC test node.
>
> What parameters can be modified to avoid similar problems? What
> adjustments do I need to make?
>
> Thank you very much.
>
> Ignite Version 2.4.0
>
> server config file:
>
> <?xml version="1.0" encoding="UTF-8"?>
> <beans xmlns="http://www.springframework.org/schema/beans"
>        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
>        xmlns:util="http://www.springframework.org/schema/util"
>        xsi:schemaLocation="http://www.springframework.org/schema/beans
> http://www.springframework.org/schema/beans/spring-beans.xsd
>         http://www.springframework.org/schema/util
> http://www.springframework.org/schema/util/spring-util.xsd
>         ">
>     <bean id="ignite.cfg"
> class="org.apache.ignite.configuration.IgniteConfiguration">
>      <!-- <property name="failureDetectionTimeout" value="60000"/> -->
>      <!-- <property name="clientFailureDetectionTimeout" value="60000"/>
> -->
>      <property name="segmentationPolicy" value="RESTART_JVM"/>
> <property name="publicThreadPoolSize" value="192"/>
> <property name="systemThreadPoolSize" value="192"/>
>
> <property name="dataStreamerThreadPoolSize" value="96"/>
> <property name="stripedPoolSize" value="10240"/>
>
> <property name="rebalanceThreadPoolSize" value="4" />
>         <property name="dataStorageConfiguration">
>             <bean
> class="org.apache.ignite.configuration.DataStorageConfiguration">
>                 <property name="defaultDataRegionConfiguration">
>                     <bean
> class="org.apache.ignite.configuration.DataRegionConfiguration">
> <property name="name" value="qipu_entity_cache_data_region"/>
> <property name="initialSize" value="#{10L * 1024 * 1024 * 1024}"/>
> <property name="maxSize" value="#{100L * 1024 * 1024 * 1024}"/>
>                         <property name="persistenceEnabled" value="true"/>
> <property name="metricsEnabled" value="true"/>
> <property name="checkpointPageBufferSize" value="#{1 * 1024 * 1024 *
> 1024}"/>
>                     </bean>
>                 </property>
> <property name="storagePath"
> value="/home/qipu/production/apache-ignite-2.4.0/persistence"/>
> <property name="walPath"
> value="/home/qipu/production/apache-ignite-2.4.0/wal"/>
> <property name="walArchivePath"
> value="/home/qipu/production/apache-ignite-2.4.0/wal/archive"/>
> <property name="walSegmentSize" value="#{64 * 1024 * 1024}"/>
> <property name="pageSize" value="#{4 * 1024}"/>
> <property name="walSegments" value="#{20}"/>
> <property name="walMode" value="LOG_ONLY"/>
> <property name="metricsEnabled" value="true"/>
> <property name="writeThrottlingEnabled" value="false"/>
> <property name="checkpointThreads" value="8"/>
> <property name="walThreadLocalBufferSize" value="#{1 * 1024 * 1024}"/>
>             </bean>
>         </property>
> <property name="cacheConfiguration">
> <bean class="org.apache.ignite.configuration.CacheConfiguration">
> <property name="dataRegionName" value="qipu_entity_cache_data_region"/>
> <property name="name" value="qipu_entity_cache"/>
> <property name="cacheMode" value="PARTITIONED"/>
> <property name="partitionLossPolicy" value="IGNORE"/>
> <property name="atomicityMode" value="ATOMIC"/>
> <property name="backups" value="2"/>
> <property name="writeSynchronizationMode" value="FULL_SYNC"/>
> <property name="statisticsEnabled" value="true"/>
> <property name="rebalanceBatchSize" value="#{2 * 1024 * 1024}"/>
> <property name="rebalanceThrottle" value="100"/>
> <property name="rebalanceMode" value="ASYNC"/>
> <property name="rebalanceTimeout" value="40000"/>
> </bean>
> </property>
> <property name="communicationSpi">
> <bean class="org.apache.ignite.spi.communication.tcp.TcpCommunicationSpi">
> <property name="socketWriteTimeout" value="1500"/>
> <property name="messageQueueLimit" value="102400"/>
> <property name="slowClientQueueLimit" value="4000"/>
> <property name="usePairedConnections" value="true"/>
> </bean>
> </property>
>         <property name="discoverySpi">
>             <bean
> class="org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi">
> <property name="forceServerMode" value="true"/>
> <property name="ipFinder">
> <bean
> class="org.apache.ignite.spi.discovery.tcp.ipfinder.vm.TcpDiscoveryVmIpFinder">
>                         <property name="addresses">
>                             <list>
>                                 <!-- In distributed environment, replace
> with actual host IP address. -->
> <value>10.16.133.179:47500..47509</value>
> <value>10.16.133.180:47500..47509</value>
> <value>10.16.133.181:47500..47509</value>
> <value>10.16.133.182:47500..47509</value>
> <value>10.16.133.183:47500..47509</value>
> <value>10.16.133.184:47500..47509</value>
> <value>10.16.133.185:47500..47509</value>
> <value>10.16.133.186:47500..47509</value>
> <value>10.16.133.187:47500..47509</value>
> <value>10.16.133.188:47500..47509</value>
>                             </list>
>                         </property>
>                     </bean>
> </property>
>             </bean>
>         </property>
> <property name="gridLogger">
> <bean class="org.apache.ignite.logger.log4j2.Log4J2Logger">
> <constructor-arg type="java.lang.String"
> value="/config/ignite-log4j2.xml"/>
> </bean>
> </property>
>     </bean>
> </beans>
>
>
>
> client code:
>
> IgniteCluster igniteCluster = IgniteCluster.valueOf("CLUSTER_A");
>         boolean usePairedConnections = true;
>         int messageQueueLimit = 20480;
>         System.out.println("ignite.cluster: "+igniteCluster+" ,
> ignite.usePairedConnections: "+usePairedConnections+" ,
> ignite.messageQueueLimit: "+messageQueueLimit);
>
>         Ignition.setClientMode(true);
>
>         IgniteConfiguration cfg = new IgniteConfiguration();
>         TcpDiscoverySpi spi = new TcpDiscoverySpi();
>
>         TcpDiscoveryVmIpFinder finder = new TcpDiscoveryVmIpFinder();
>
>
> finder.setAddresses(Arrays.asList(igniteCluster.getConfig().getServer().split(",")));
>
>         spi.setIpFinder(finder);
>
>         TcpCommunicationSpi tcpCommunicationSpi = new
> TcpCommunicationSpi();
>         tcpCommunicationSpi.setUsePairedConnections(usePairedConnections);
>         tcpCommunicationSpi.setMessageQueueLimit(messageQueueLimit);
>
>         cfg.setDiscoverySpi(spi);
>         cfg.setCommunicationSpi(tcpCommunicationSpi);
>         ignite = Ignition.start(cfg);
>
>         igniteCache =
> ignite.getOrCreateCache(IgniteCacheName.valueOf("QIPU_ENTITY_CACHE").toString());
>
>         // read operation
>         byte[] value = cache.getAsync(key).get(500);
>         // write operation
>         cache.putAsync(entry.getKey(), entry.getValue()).get(putTimeOut);
>