You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@tez.apache.org by oracle cool <or...@gmail.com> on 2014/05/15 08:41:50 UTC

Problem in submitting the hive query

Hi

We are running  Hadoop 2.2 , Hive 0.13 - Hive Server1.


When we enabled tez 0.4 ( tables are in rcfile, no vectorization) &
executed the query, we see major improvement in query but when we run
concurrent 10 session & execute same query  ( select count(*) from tab ),
One batch of 10 query executed but next batch hang after 3 query.

-------------
via hive server 1 jdbc connection - 10 connection which execute same query
after 1 min.

query execution time in first batch is 15 sec.

set hive.execution.engine=tez;
select count(*) from table1;

---------------


yarn-site.xml
-------------------

<configuration>
        <!-- Site specific YARN configuration properties -->
        <property>
                <name>yarn.nodemanager.aux-services</name>
                <value>mapreduce_shuffle</value>
        </property>
        <property>

<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
                <value>org.apache.hadoop.mapred.ShuffleHandler</value>
        </property>
        <property>
                <name>yarn.resourcemanager.resource-tracker.address</name>
                <value>proj233:8025</value>
        </property>
        <property>
                <name>yarn.resourcemanager.scheduler.address</name>
                <value>proj233:8030</value>
        </property>
        <property>
                <name>yarn.resourcemanager.address</name>
                <value>proj233:8040</value>
        </property>
  <property>
    <name>yarn.scheduler.minimum-allocation-mb</name>
    <value>1024</value>
      </property>
  <property>
    <name>yarn.scheduler.maximum-allocation-mb</name>
    <value>40960</value>
      </property>
  <property>
    <name>yarn.scheduler.minimum-allocation-vcores</name>
    <value>1</value>
      </property>
  <property>
    <name>yarn.scheduler.maximum-allocation-vcores</name>
    <value>24</value>
      </property>
  <property>
    <name>yarn.nodemanager.resource.memory-mb</name>
    <value>40960</value>
      </property>
  <property>
    <name>yarn.nodemanager.resource.cpu-vcores</name>
    <value>1</value>
      </property>
<property>
  <name>yarn.resourcemanager.scheduler.class</name>

<value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler</value>
</property>
</configuration>


Mapred-site.xml
---------------------
<configuration>

<property>
   <name>mapreduce.framework.name</name>
      <value>yarn-tez</value>
</property>

 <property>
    <name>yarn.nodemanager.vmem-pmem-ratio</name>
    <value>2.1</value>
      </property>
  <property>
    <name>mapreduce.task.io.sort.mb</name>
    <value>32</value>
      </property>
  <property>
        <name>yarn.app.mapreduce.am.resource.mb</name>
        <value>1024</value>
    </property>
    <property>
        <name>yarn.app.mapreduce.am.command-opts</name>
        <value>-Xmx768m</value>
    </property>
    <property>
        <name>mapreduce.map.cpu.vcores</name>
        <value>1</value>
            </property>
    <property>
        <name>mapreduce.reduce.cpu.vcores</name>
        <value>1</value>
            </property>
    <property>
        <name>mapreduce.map.memory.mb</name>
        <value>1024</value>
            </property>
    <property>
        <name>mapreduce.map.java.opts</name>
        <value>-Xmx768m</value>
            </property>
    <property>
        <name>mapreduce.reduce.memory.mb</name>
        <value>1024</value>
            </property>
    <property>
        <name>mapreduce.reduce.java.opts</name>
        <value>-Xmx768m</value>
            </property>
</configuration>


hdfs-site.xml
--------------------
<configuration>
  <property>
    <name>dfs.replication</name>
    <value>3</value>
  </property>
  <property>
    <name>dfs.namenode.name.dir</name>
    <value>file:/Data0/yarn_data/hdfs/namenode</value>
  </property>
  <property>
    <name>dfs.datanode.data.dir</name>

<value>/Data0/yarn_data/hdfs/datanode,/Data1/yarn_data/hdfs/datanode,/Data3/yarn_data/hdfs/datanode,/Data4/yarn_data/hdfs/datanode</value>
  </property>
  <property>
    <name>dfs.heartbeat.interval</name>
    <value>3</value>
        </description>
  </property>

  <property>
    <name>dfs.safemode.threshold.pct</name>
    <value>1.0f</value>
  </property>
<property>
   <name>dfs.block.size</name>
   <value>134217728</value>
</property>
<property>
  <name>dfs.datanode.max.xcievers</name>
  <value>16384</value>
</property>

<property>
  <name>dfs.client.block.write.replace-datanode-on-failure.enable</name>
  <value>false</value>
</property>
<property>
  <name>dfs.datanode.handler.count</name>
  <value>10</value>
</property>
<property>
  <name>dfs.datanode.socket.write.timeout</name>
  <value>0</value>
</configuration>



Thanks
SS

RE: Problem in submitting the hive query

Posted by Bikas Saha <bi...@hortonworks.com>.
Does the cluster have enough capacity for 10 sessions in HiveServer?

You may want to reduce tez.am.container.session.delay-allocation-millis in
your tez-site.xml to ensure that unused resources are release quickly by a
session so that YARN can give them to a different session.



You can increase the performance of select count(*) by enabling statistics
in Hive because then these queries can be answered directly from the
metastore without needing to launch a job.



Lastly, HiveServer questions need to sent to the hive user mailing list.
They will know better about HiveServer internals and known issues like
hanging queries.



Bikas



*From:* oracle cool [mailto:oraclecool@gmail.com]
*Sent:* Thursday, May 15, 2014 1:45 AM
*To:* user@tez.incubator.apache.org
*Subject:* Re: Problem in submitting the hive query



<configuration>

  <property>

      <name>tez.version</name>

          <value>0.4.0</value>

            </property>



  <property>

      <name>tez.lib.uris</name>

          <value>${fs.default.name}/apps/tez-0.4.0-incubating-full,${
fs.default.name}/apps/tez-0.4.0-incubating-full/lib/</value>

            </property>

                                      <!-- Client Submission timeout value
when submitting DAGs to a session -->

  <property>

      <name>tez.session.client.timeout.secs</name>

          <value>-1</value>

            </property>

              <!-- prewarm stuff -->

</configuration>





On Thu, May 15, 2014 at 12:11 PM, oracle cool <or...@gmail.com> wrote:

Hi



We are running  Hadoop 2.2 , Hive 0.13 - Hive Server1.





When we enabled tez 0.4 ( tables are in rcfile, no vectorization) &
executed the query, we see major improvement in query but when we run
concurrent 10 session & execute same query  ( select count(*) from tab ),
One batch of 10 query executed but next batch hang after 3 query.



-------------

via hive server 1 jdbc connection - 10 connection which execute same query
after 1 min.



query execution time in first batch is 15 sec.



set hive.execution.engine=tez;

select count(*) from table1;



---------------





yarn-site.xml

-------------------



<configuration>

        <!-- Site specific YARN configuration properties -->

        <property>

                <name>yarn.nodemanager.aux-services</name>

                <value>mapreduce_shuffle</value>

        </property>

        <property>


<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>

                <value>org.apache.hadoop.mapred.ShuffleHandler</value>

        </property>

        <property>

                <name>yarn.resourcemanager.resource-tracker.address</name>

                <value>proj233:8025</value>

        </property>

        <property>

                <name>yarn.resourcemanager.scheduler.address</name>

                <value>proj233:8030</value>

        </property>

        <property>

                <name>yarn.resourcemanager.address</name>

                <value>proj233:8040</value>

        </property>

  <property>

    <name>yarn.scheduler.minimum-allocation-mb</name>

    <value>1024</value>

      </property>

  <property>

    <name>yarn.scheduler.maximum-allocation-mb</name>

    <value>40960</value>

      </property>

  <property>

    <name>yarn.scheduler.minimum-allocation-vcores</name>

    <value>1</value>

      </property>

  <property>

    <name>yarn.scheduler.maximum-allocation-vcores</name>

    <value>24</value>

      </property>

  <property>

    <name>yarn.nodemanager.resource.memory-mb</name>

    <value>40960</value>

      </property>

  <property>

    <name>yarn.nodemanager.resource.cpu-vcores</name>

    <value>1</value>

      </property>

<property>

  <name>yarn.resourcemanager.scheduler.class</name>


<value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler</value>

</property>

</configuration>





Mapred-site.xml

---------------------

<configuration>



<property>

   <name>mapreduce.framework.name</name>

      <value>yarn-tez</value>

</property>



 <property>

    <name>yarn.nodemanager.vmem-pmem-ratio</name>

    <value>2.1</value>

      </property>

  <property>

    <name>mapreduce.task.io.sort.mb</name>

    <value>32</value>

      </property>

  <property>

        <name>yarn.app.mapreduce.am.resource.mb</name>

        <value>1024</value>

    </property>

    <property>

        <name>yarn.app.mapreduce.am.command-opts</name>

        <value>-Xmx768m</value>

    </property>

    <property>

        <name>mapreduce.map.cpu.vcores</name>

        <value>1</value>

            </property>

    <property>

        <name>mapreduce.reduce.cpu.vcores</name>

        <value>1</value>

            </property>

    <property>

        <name>mapreduce.map.memory.mb</name>

        <value>1024</value>

            </property>

    <property>

        <name>mapreduce.map.java.opts</name>

        <value>-Xmx768m</value>

            </property>

    <property>

        <name>mapreduce.reduce.memory.mb</name>

        <value>1024</value>

            </property>

    <property>

        <name>mapreduce.reduce.java.opts</name>

        <value>-Xmx768m</value>

            </property>

</configuration>





hdfs-site.xml

--------------------

<configuration>

  <property>

    <name>dfs.replication</name>

    <value>3</value>

  </property>

  <property>

    <name>dfs.namenode.name.dir</name>

    <value>file:/Data0/yarn_data/hdfs/namenode</value>

  </property>

  <property>

    <name>dfs.datanode.data.dir</name>


<value>/Data0/yarn_data/hdfs/datanode,/Data1/yarn_data/hdfs/datanode,/Data3/yarn_data/hdfs/datanode,/Data4/yarn_data/hdfs/datanode</value>

  </property>

  <property>

    <name>dfs.heartbeat.interval</name>

    <value>3</value>

        </description>

  </property>



  <property>

    <name>dfs.safemode.threshold.pct</name>

    <value>1.0f</value>

  </property>

<property>

   <name>dfs.block.size</name>

   <value>134217728</value>

</property>

<property>

  <name>dfs.datanode.max.xcievers</name>

  <value>16384</value>

</property>



<property>

  <name>dfs.client.block.write.replace-datanode-on-failure.enable</name>

  <value>false</value>

</property>

<property>

  <name>dfs.datanode.handler.count</name>

  <value>10</value>

</property>

<property>

  <name>dfs.datanode.socket.write.timeout</name>

  <value>0</value>

</configuration>







Thanks

SS

-- 
CONFIDENTIALITY NOTICE
NOTICE: This message is intended for the use of the individual or entity to 
which it is addressed and may contain information that is confidential, 
privileged and exempt from disclosure under applicable law. If the reader 
of this message is not the intended recipient, you are hereby notified that 
any printing, copying, dissemination, distribution, disclosure or 
forwarding of this communication is strictly prohibited. If you have 
received this communication in error, please contact the sender immediately 
and delete it from your system. Thank You.

Re: Problem in submitting the hive query

Posted by oracle cool <or...@gmail.com>.
<configuration>
  <property>
      <name>tez.version</name>
          <value>0.4.0</value>
            </property>

  <property>
      <name>tez.lib.uris</name>
          <value>${fs.default.name}/apps/tez-0.4.0-incubating-full,${
fs.default.name}/apps/tez-0.4.0-incubating-full/lib/</value>
            </property>
                                      <!-- Client Submission timeout value
when submitting DAGs to a session -->
  <property>
      <name>tez.session.client.timeout.secs</name>
          <value>-1</value>
            </property>
              <!-- prewarm stuff -->
</configuration>



On Thu, May 15, 2014 at 12:11 PM, oracle cool <or...@gmail.com> wrote:

> Hi
>
> We are running  Hadoop 2.2 , Hive 0.13 - Hive Server1.
>
>
> When we enabled tez 0.4 ( tables are in rcfile, no vectorization) &
> executed the query, we see major improvement in query but when we run
> concurrent 10 session & execute same query  ( select count(*) from tab ),
> One batch of 10 query executed but next batch hang after 3 query.
>
> -------------
> via hive server 1 jdbc connection - 10 connection which execute same query
> after 1 min.
>
> query execution time in first batch is 15 sec.
>
> set hive.execution.engine=tez;
> select count(*) from table1;
>
> ---------------
>
>
> yarn-site.xml
> -------------------
>
> <configuration>
>         <!-- Site specific YARN configuration properties -->
>         <property>
>                 <name>yarn.nodemanager.aux-services</name>
>                 <value>mapreduce_shuffle</value>
>         </property>
>         <property>
>
> <name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
>                 <value>org.apache.hadoop.mapred.ShuffleHandler</value>
>         </property>
>         <property>
>                 <name>yarn.resourcemanager.resource-tracker.address</name>
>                 <value>proj233:8025</value>
>         </property>
>         <property>
>                 <name>yarn.resourcemanager.scheduler.address</name>
>                 <value>proj233:8030</value>
>         </property>
>         <property>
>                 <name>yarn.resourcemanager.address</name>
>                 <value>proj233:8040</value>
>         </property>
>   <property>
>     <name>yarn.scheduler.minimum-allocation-mb</name>
>     <value>1024</value>
>       </property>
>   <property>
>     <name>yarn.scheduler.maximum-allocation-mb</name>
>     <value>40960</value>
>       </property>
>   <property>
>     <name>yarn.scheduler.minimum-allocation-vcores</name>
>     <value>1</value>
>       </property>
>   <property>
>     <name>yarn.scheduler.maximum-allocation-vcores</name>
>     <value>24</value>
>       </property>
>   <property>
>     <name>yarn.nodemanager.resource.memory-mb</name>
>     <value>40960</value>
>       </property>
>   <property>
>     <name>yarn.nodemanager.resource.cpu-vcores</name>
>     <value>1</value>
>       </property>
> <property>
>   <name>yarn.resourcemanager.scheduler.class</name>
>
> <value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler</value>
> </property>
> </configuration>
>
>
> Mapred-site.xml
> ---------------------
> <configuration>
>
> <property>
>    <name>mapreduce.framework.name</name>
>       <value>yarn-tez</value>
> </property>
>
>  <property>
>     <name>yarn.nodemanager.vmem-pmem-ratio</name>
>     <value>2.1</value>
>       </property>
>   <property>
>     <name>mapreduce.task.io.sort.mb</name>
>     <value>32</value>
>       </property>
>   <property>
>         <name>yarn.app.mapreduce.am.resource.mb</name>
>         <value>1024</value>
>     </property>
>     <property>
>         <name>yarn.app.mapreduce.am.command-opts</name>
>         <value>-Xmx768m</value>
>     </property>
>     <property>
>         <name>mapreduce.map.cpu.vcores</name>
>         <value>1</value>
>             </property>
>     <property>
>         <name>mapreduce.reduce.cpu.vcores</name>
>         <value>1</value>
>             </property>
>     <property>
>         <name>mapreduce.map.memory.mb</name>
>         <value>1024</value>
>             </property>
>     <property>
>         <name>mapreduce.map.java.opts</name>
>         <value>-Xmx768m</value>
>             </property>
>     <property>
>         <name>mapreduce.reduce.memory.mb</name>
>         <value>1024</value>
>             </property>
>     <property>
>         <name>mapreduce.reduce.java.opts</name>
>         <value>-Xmx768m</value>
>             </property>
> </configuration>
>
>
> hdfs-site.xml
> --------------------
> <configuration>
>   <property>
>     <name>dfs.replication</name>
>     <value>3</value>
>   </property>
>   <property>
>     <name>dfs.namenode.name.dir</name>
>     <value>file:/Data0/yarn_data/hdfs/namenode</value>
>   </property>
>   <property>
>     <name>dfs.datanode.data.dir</name>
>
> <value>/Data0/yarn_data/hdfs/datanode,/Data1/yarn_data/hdfs/datanode,/Data3/yarn_data/hdfs/datanode,/Data4/yarn_data/hdfs/datanode</value>
>   </property>
>   <property>
>     <name>dfs.heartbeat.interval</name>
>     <value>3</value>
>         </description>
>   </property>
>
>   <property>
>     <name>dfs.safemode.threshold.pct</name>
>     <value>1.0f</value>
>   </property>
> <property>
>    <name>dfs.block.size</name>
>    <value>134217728</value>
> </property>
> <property>
>   <name>dfs.datanode.max.xcievers</name>
>   <value>16384</value>
> </property>
>
> <property>
>   <name>dfs.client.block.write.replace-datanode-on-failure.enable</name>
>   <value>false</value>
> </property>
> <property>
>   <name>dfs.datanode.handler.count</name>
>   <value>10</value>
> </property>
> <property>
>   <name>dfs.datanode.socket.write.timeout</name>
>   <value>0</value>
> </configuration>
>
>
>
> Thanks
> SS
>
>