You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@ignite.apache.org by Lijun Cao <64...@qq.com> on 2018/08/01 01:48:49 UTC
Re: Utilization of CPU problem

Hi Evgenii,

Here is my full benchmark and settings.

JVM parameters info:
I didn’t set any JVM parameters, just use the default.

Cluster info:
The cluster is set up by 3 nodes. Each node has got a  4 core CPU with 32GB memory and default region size is set to 16GB. Here is the settings of each node.
<?xml version="1.0" encoding="UTF-8"?>

<!--
       Licensed to the Apache Software Foundation (ASF) under one or more
  contributor license agreements.  See the NOTICE file distributed with
  this work for additional information regarding copyright ownership.
  The ASF licenses this file to You under the Apache License, Version 2.0
  (the "License"); you may not use this file except in compliance with
  the License.  You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License.
-->

<beans xmlns="http://www.springframework.org/schema/beans"
       xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
       xsi:schemaLocation="
       http://www.springframework.org/schema/beans
       http://www.springframework.org/schema/beans/spring-beans.xsd">
    <!--
                 Alter configuration below as needed.
    -->
    <bean id="grid.cfg" class="org.apache.ignite.configuration.IgniteConfiguration">
    <!-- Configure query thread pool. -->
    <property name="queryThreadPoolSize" value="1000"/>

      <property name="discoverySpi">
         <bean class="org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi">
           <property name="ipFinder">
              <bean class="org.apache.ignite.spi.discovery.tcp.ipfinder.vm.TcpDiscoveryVmIpFinder">
                <property name="addresses">
                  <list>
                    <!--  Explicitly specifying address of a local node to let it start and operate normally even if there is no more nodes in the cluster  You can also optionally specify an individual port or port range.-->
                    <value>Ignite01</value>
                  </list>
                </property>
              </bean>
           </property>
         </bean>
      </property>
      <!-- Redefining maximum memory size for the cluster node usage. -->
      <property name="dataStorageConfiguration">
        <bean class="org.apache.ignite.configuration.DataStorageConfiguration">
          <!-- Redefining the default region's settings -->
          <property name="defaultDataRegionConfiguration">
            <bean class="org.apache.ignite.configuration.DataRegionConfiguration">
              <property name="name" value="Default_Region"/>
              <!-- Setting the size of the default region to 16GB. -->
              <property name="maxSize" value="#{16L * 1024 * 1024 * 1024}”/>
              <!-- Enabling Apache Ignite native persistence. -->
              <property name="persistenceEnabled" value="true"/>
            </bean>
          </property>
        </bean>
      </property>
   </bean>
</beans>


The data of SSB benchmark 
is generated by ssb-dbgen(https://github.com/electrum/ssb-dbgen <https://github.com/electrum/ssb-dbgen>) and the scale is 20.

The DDL of SSB benchmark for Ignite is:
CREATE TABLE PART(
P_PARTKEY INTEGER PRIMARY KEY,
P_NAME VARCHAR(22),
P_MFGR CHAR(6),
P_CATEGORY CHAR(7),
P_BRAND CHAR(9),
P_COLOR VARCHAR(11),
P_TYPE VARCHAR(25),
P_SIZE NUMERIC(50,1),
P_CONTAINER CHAR(10)
) WITH "TEMPLATE=REPLICATED";
 
CREATE TABLE supplier(
s_suppkey NUMERIC PRIMARY KEY,
s_name CHAR(25),
s_address VARCHAR(25),
s_city CHAR(10),
s_nation CHAR(15),
s_region CHAR(12),
s_phone CHAR(15)
) WITH "TEMPLATE=REPLICATED";
 
CREATE TABLE customer(
c_custkey NUMERIC PRIMARY KEY,
c_name VARCHAR(25),
c_address VARCHAR(25),
c_city CHAR(10),
c_nation CHAR(15),
c_region CHAR(12),
c_phone CHAR(15),
c_mktsegment CHAR(10)
) WITH "TEMPLATE=REPLICATED";
 
CREATE TABLE date(
d_datekey CHAR(8) PRIMARY KEY,
d_date CHAR(18),
d_dayofweek CHAR(8),
d_month CHAR(9),
d_year INTEGER,
d_yearmonthnum NUMERIC,
d_yearmonth CHAR(7),
d_daynuminweek NUMERIC,
d_daynuminmonth NUMERIC,
d_daynuminyear NUMERIC,
d_monthuminyear NUMERIC,
d_weeknuminyear NUMERIC,
d_sellingseason CHAR(12),
d_lastdayinweekfl SMALLINT,
d_lastdayinmonthfl SMALLINT,
d_holidayfl SMALLINT,
d_weekdayfl SMALLINT
) WITH "TEMPLATE=REPLICATED";
 
CREATE TABLE lineorder(
lo_orderkey NUMERIC,
lo_linenumber NUMERIC,
lo_custkey NUMERIC,
lo_partkey INTEGER,
lo_suppkey NUMERIC,
lo_orderdate VARCHAR(10),
lo_orderpriority CHAR(15),
lo_shippriority CHAR(1),
lo_quantity NUMERIC,
lo_extendedprice NUMERIC,
lo_ordtotalprice NUMERIC,
lo_discount NUMERIC,
lo_revenue NUMERIC,
lo_supplycost NUMERIC,
lo_tax NUMERIC,
lo_commitdate VARCHAR(10),
lo_shipmode CHAR(10),
PRIMARY KEY(lo_orderkey,lo_linenumber)
) WITH "TEMPLATE=PARTITIONED";

The query language of SSB benchmark is:
Q1.1
select sum(lo_extendedprice*lo_discount) as revenue
 from lineorder, date
 where lo_orderdate = d_datekey
 and d_year = 1993
 and lo_discount BETWEEN 1 and 3
 and lo_quantity < 25;
 
Q1.2
select sum(lo_extendedprice*lo_discount) as revenue
 from lineorder, date
 where lo_orderdate = d_datekey
 and d_yearmonthnum = 199401
 and lo_discount BETWEEN 4 and 6
 and lo_quantity between 26 and 35;
 
Q1.3
select sum(lo_extendedprice*lo_discount) as revenue
 from lineorder, date
 where lo_orderdate = d_datekey
 and d_weeknuminyear = 6
 and d_year = 1994
 and lo_discount between 5 and 7
 and lo_quantity between 26 and 35;
 
Q2.1
select sum(lo_revenue), d_year, p_brand from lineorder, date, part, supplier
  where lo_orderdate = d_datekey
  and lo_partkey = p_partkey
  and lo_suppkey = s_suppkey
  and p_category = 'MFGR#12'
  and s_region = 'AMERICA'
  group by d_year, p_brand
  order by d_year, p_brand;
 
Q2.2
select sum(lo_revenue), d_year, p_brand
 from lineorder, date, part, supplier
 where lo_orderdate = d_datekey
 and lo_partkey = p_partkey
 and lo_suppkey = s_suppkey
 and p_brand between
 'MFGR#2221' and 'MFGR#2228'
 and s_region = 'ASIA'
 group by d_year, p_brand
 order by d_year, p_brand;
 
Q2.3
select sum(lo_revenue), d_year, p_brand
 from lineorder, date, part, supplier
 where lo_orderdate = d_datekey
 and lo_partkey = p_partkey
 and lo_suppkey = s_suppkey
 and p_brand = 'MFGR#2221'
 and s_region = 'EUROPE'
 group by d_year, p_brand
 order by d_year, p_brand;
 
Q3.1
select c_nation, s_nation, d_year, sum(lo_revenue)
 as revenue from customer, lineorder, supplier, date
 where lo_custkey = c_custkey
 and lo_suppkey = s_suppkey
 and lo_orderdate = d_datekey
 and c_region = 'ASIA' and s_region = 'ASIA'
 and d_year >= 1992 and d_year <= 1997
 group by c_nation, s_nation, d_year
 order by d_year asc, revenue desc;
 
Q3.2
select c_city, s_city, d_year, sum(lo_revenue) as revenue
 from customer, lineorder, supplier, date
 where lo_custkey = c_custkey
 and lo_suppkey = s_suppkey
 and lo_orderdate = d_datekey
 and c_nation = 'UNITED STATES'
 and s_nation = 'UNITED STATES'
 and d_year >= 1992 and d_year <= 1997
 group by c_city, s_city, d_year
 order by d_year asc, revenue desc;
 
Q3.3
select c_city, s_city, d_year, sum(lo_revenue) as revenue
 from customer, lineorder, supplier, date
 where lo_custkey = c_custkey
 and lo_suppkey = s_suppkey and lo_orderdate = d_datekey
 and (c_city='UNITED KI1'or c_city='UNITED KI5')
 and (s_city='UNITED KI1'or s_city='UNITED KI5')
 and d_year >= 1992 and d_year <= 1997
 group by c_city, s_city, d_year
 order by d_year asc, revenue desc;
 
Q3.4
select c_city, s_city, d_year, sum(lo_revenue) as revenue
 from customer, lineorder, supplier, date
 where lo_custkey = c_custkey
 and lo_suppkey = s_suppkey
 and lo_orderdate = d_datekey
 and (c_city='UNITED KI1' or
 c_city='UNITED KI5')
 and (s_city='UNITED KI1' or
 s_city='UNITED KI5')
 and d_yearmonth = 'Dec1997'
 group by c_city, s_city, d_year
 order by d_year asc, revenue desc;
 
Q4.1
select d_year, c_nation, sum(lo_revenue -
 lo_supplycost) as profit from date, customer, supplier,
 part, lineorder
 where lo_custkey = c_custkey
 and lo_suppkey = s_suppkey
 and lo_partkey = p_partkey
 and lo_orderdate = d_datekey
 and c_region = 'AMERICA'
 and s_region = 'AMERICA'
 and (p_mfgr = 'MFGR#1' or p_mfgr = 'MFGR#2')
 group by d_year, c_nation
 order by d_year, c_nation
 
Q4.2
select d_year, s_nation, p_category,
 sum(lo_revenue - lo_supplycost) as profit
 from date, customer, supplier, part, lineorder
 where lo_custkey = c_custkey
 and lo_suppkey = s_suppkey
 and lo_partkey = p_partkey
 and lo_orderdate = d_datekey
 and c_region = 'AMERICA'
 and s_region = 'AMERICA'
 and (d_year = 1997 or d_year = 1998)
 and (p_mfgr = 'MFGR#1'
 or p_mfgr = 'MFGR#2')
 group by d_year, s_nation, p_category
 order by d_year, s_nation, p_category
 
Q4.3
select d_year, s_city, p_brand1,
 sum(lo_revenue - lo_supplycost) as profit
 from date, customer, supplier, part, lineorder
 where lo_custkey = c_custkey
 and lo_suppkey = s_suppkey
 and lo_partkey = p_partkey
 and lo_orderdate = d_datekey
 and c_region = 'AMERICA'
 and s_nation = 'UNITED STATES'
 and (d_year = 1997 or d_year = 1998)
 and p_category = 'MFGR#14'
 group by d_year, s_city, p_brand1
 order by d_year, s_city, p_brand1

The step of executing SSB benchmark:
I send only one query by using sqlline to the cluster each time. After the query finish, then I send the next one.

Problem Example:
When I send Q3.1(the most time consuming query ) to cluster, you can find that the most of CPU resources on each node is not fully used.




> 在 2018年7月31日，23:09，Evgenii Zhuravlev <e....@gmail.com> 写道：
> 
> can you share the full benchmark and jvm parameters so community could reproduce it?
> 
> Thanks,
> Evgenii
> 
> 2018-07-31 17:28 GMT+03:00 Lijun Cao <641507577@qq.com <ma...@qq.com>>:
> Thank you for your reply.
> 
> This is my SSB benchmark result, the consumption of time is out of my expectation. My expectation is they should have been finished in 1 minute. My query operation is using the sqlline to connect to the 1 node of cluster and then send SQL to excute.
> <屏幕快照 2018-07-31 22.25.19.png>
> 
> 
>> 在 2018年7月31日，20:05，Evgenii Zhuravlev <e.zhuravlev.wk@gmail.com <ma...@gmail.com>> 写道：
>> 
>> What do you mean by "result is terrible"? Looks like you just can't give enough load to the cluster. What kind of operations do you perform on the cluster?
>> 
>> Evgenii
>> 
>> 2018-07-31 14:11 GMT+03:00 Lijun Cao <641507577@qq.com <ma...@qq.com>>:
>> Hi,
>> 
>> I did a SSB benchmark on Ignite database, but the the result is not optimistic (actually it is terrible). My testing data is shown below.
>> 
>> Table Name   |     Records
>> date               |        2556      
>> supplier         |         39,997
>> customer       |        599,941   
>> part                |       999,901  
>> lineorder        |       119,982,610
>> 
>> Then I checked the monitor and found that the utilization of CPU is pretty low, just 20% average usage. But I have 12 core on cluster (the cluster is set up by 3 nodes, each node has got 4 core). 
>> 
>> So how can I improve the utilization of CPU ?
>> 
>> Here is my settings:
>> <!--
>>        Licensed to the Apache Software Foundation (ASF) under one or more
>>   contributor license agreements.  See the NOTICE file distributed with
>>   this work for additional information regarding copyright ownership.
>>   The ASF licenses this file to You under the Apache License, Version 2.0
>>   (the "License"); you may not use this file except in compliance with
>>   the License.  You may obtain a copy of the License at
>> 
>>        http://www.apache.org/licenses/LICENSE-2.0 <http://www.apache.org/licenses/LICENSE-2.0>
>> 
>>   Unless required by applicable law or agreed to in writing, software
>>   distributed under the License is distributed on an "AS IS" BASIS,
>>   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
>>   See the License for the specific language governing permissions and
>>   limitations under the License.
>> -->
>> 
>> <beans xmlns="http://www.springframework.org/schema/beans <http://www.springframework.org/schema/beans>"
>>        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance <http://www.w3.org/2001/XMLSchema-instance>"
>>        xsi:schemaLocation="
>>        http://www.springframework.org/schema/beans <http://www.springframework.org/schema/beans>
>>        http://www.springframework.org/schema/beans/spring-beans.xsd <http://www.springframework.org/schema/beans/spring-beans.xsd>">
>>     <!--
>>                  Alter configuration below as needed.
>>     -->
>>     <bean id="grid.cfg" class="org.apache.ignite.configuration.IgniteConfiguration">
>>     <!-- performance tune -->
>>     <!-- Configure internal thread pool. -->
>>     <property name="publicThreadPoolSize" value="64"/>    
>>     <!-- Configure system thread pool. -->
>>     <property name="systemThreadPoolSize" value="32"/>
>>     <!-- Configure query thread pool. -->
>>     <property name="queryThreadPoolSize" value="64"/>
>> 
>>       <property name="discoverySpi">
>>          <bean class="org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi">
>>            <property name="ipFinder">
>>               <bean class="org.apache.ignite.spi.discovery.tcp.ipfinder.vm.TcpDiscoveryVmIpFinder">
>>                 <property name="addresses">
>>                   <list>
>>                     <!--  Explicitly specifying address of a local node to let it start and operate normally even if there is no more nodes in the cluster  You can also optionally specify an individual port or port range.-->
>>                     <value>Ignite01</value>
>>                   </list>
>>                 </property>
>>               </bean>
>>            </property>
>>          </bean>
>>       </property>
>>       <!-- Redefining maximum memory size for the cluster node usage. -->  
>>       <property name="dataStorageConfiguration">
>>         <bean class="org.apache.ignite.configuration.DataStorageConfiguration">
>>           <!-- Redefining the default region's settings -->
>>           <property name="defaultDataRegionConfiguration">
>>             <bean class="org.apache.ignite.configuration.DataRegionConfiguration">
>>               <property name="name" value="Default_Region"/>
>>               <!-- Setting the size of the default region to 4GB. -->
>>               <property name="maxSize" value="#{16L * 1024 * 1024 * 1024}"/>
>>               <!-- Enabling Apache Ignite native persistence. -->
>>               <property name="persistenceEnabled" value="true"/>
>>             </bean>
>>           </property>
>>         </bean>
>>       </property>
>> 
>>       <property name="cacheConfiguration">
>>         <bean class="org.apache.ignite.configuration.CacheConfiguration">
>>             <!-- Set a cache name. -->
>>             <property name="name" value="cacheName"/>
>>             <!-- Set cache mode. -->
>>             <property name="cacheMode" value="PARTITIONED"/>
>>             <!-- Set number of backups to 0-->
>>             <property name="backups" value="0"/>
>>         </bean>
>>     </property>
>>    </bean>
>> </beans>
>> 
> 
>