You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by nc...@apache.org on 2014/02/27 22:40:37 UTC
git commit: AMBARI-4858. Add configurations to support Hive running on Tez or Mapreduce (ncole)

Repository: ambari
Updated Branches:
  refs/heads/trunk f5dc33261 -> 0ee9b81f9


AMBARI-4858. Add configurations to support Hive running on Tez or Mapreduce (ncole)


Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/0ee9b81f
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/0ee9b81f
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/0ee9b81f

Branch: refs/heads/trunk
Commit: 0ee9b81f9f6064a9bc982135af6f5d5664a74108
Parents: f5dc332
Author: Nate Cole <nc...@hortonworks.com>
Authored: Wed Feb 26 19:40:20 2014 -0500
Committer: Nate Cole <nc...@hortonworks.com>
Committed: Thu Feb 27 16:28:58 2014 -0500

----------------------------------------------------------------------
 .../services/HIVE/configuration/hive-site.xml   | 106 ++++++++++++++++++-
 .../services/HIVE/configuration/mapred-site.xml |  79 ++++++++++++++
 .../stacks/HDP/2.1.1/services/HIVE/metainfo.xml |   1 +
 3 files changed, 184 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ambari/blob/0ee9b81f/ambari-server/src/main/resources/stacks/HDP/2.1.1/services/HIVE/configuration/hive-site.xml
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.1.1/services/HIVE/configuration/hive-site.xml b/ambari-server/src/main/resources/stacks/HDP/2.1.1/services/HIVE/configuration/hive-site.xml
index 07f37ca..f0be795 100644
--- a/ambari-server/src/main/resources/stacks/HDP/2.1.1/services/HIVE/configuration/hive-site.xml
+++ b/ambari-server/src/main/resources/stacks/HDP/2.1.1/services/HIVE/configuration/hive-site.xml
@@ -165,6 +165,12 @@ limitations under the License.
   </property>
 
   <property>
+    <name>hive.enforce.sortmergebucketmapjoin</name>
+    <value>true</value>
+    <description>If the user asked for sort-merge bucketed map-side join, and it cannot be performed, should the query fail or not</description>
+  </property>
+
+  <property>
     <name>hive.map.aggr</name>
     <value>true</value>
     <description>Whether to use map-side aggregation in Hive Group By queries.</description>
@@ -234,7 +240,7 @@ limitations under the License.
 
   <property>
     <name>hive.optimize.reducededuplication.min.reducer</name>
-    <value>1</value>
+    <value>4</value>
     <description>Reduce deduplication merges two RSs by moving key/parts/reducer-num of the child RS to parent RS.
       That means if reducer-num of the child RS is fixed (order by or forced bucketing) and small, it can make very slow, single MR.
       The optimization will be disabled if number of reducers is less than specified value.
@@ -262,7 +268,7 @@ limitations under the License.
 
   <property>
     <name>hive.vectorized.execution.enabled</name>
-    <value>false</value>
+    <value>true</value>
     <description>This flag controls the vectorized mode of query execution as documented in HIVE-4160 (as of Hive 0.13.0)
     </description>
   </property>
@@ -300,4 +306,100 @@ limitations under the License.
     <description>Pre Execute Hook for Tests</description>
   </property>
 
+  <property>
+    <name>hive.vectorized.groupby.maxentries</name>
+    <value>1024</value>
+    <description>Max number of entries in the vector group by aggregation hashtables.
+      Exceeding this will trigger a flush irrelevant of memory pressure condition.
+    </description>
+  </property>
+
+  <property>
+    <name>hive.vectorized.groupby.checkinterval</name>
+    <value>1024</value>
+    <description>Number of entries added to the group by aggregation hash before a reocmputation of average entry size is performed.</description>
+  </property>
+
+  <property>
+    <name>hive.vectorized.groupby.flush.percent</name>
+    <value>1.0</value>
+    <description>Percent of entries in the group by aggregation hash flushed when the memory treshold is exceeded.</description>
+  </property>
+
+  <property>
+    <name>hive.stats.autogather</name>
+    <value>true</value>
+    <description>A flag to gather statistics automatically during the INSERT OVERWRITE command.</description>
+  </property>
+
+  <property>
+    <name>hive.tez.container.size</name>
+    <value>3000000000</value>
+    <description></description>
+  </property>
+
+  <property>
+    <name>hive.tez.input.format</name>
+    <value>org.apache.hadoop.hive.ql.io.HiveInputFormat</value>
+    <description></description>
+  </property>
+
+  <property>
+    <name>hive.tez.java.opts</name>
+    <value>-server -Xmx1024m -Djava.net.preferIPv4Stack=true</value>
+    <description></description>
+  </property>
+
+  <property>
+    <name>hive.compute.query.using.stats</name>
+    <value>true</value>
+    <description>
+      When set to true Hive will answer a few queries like count(1) purely using stats
+      stored in metastore. For basic stats collection turn on the config hive.stats.autogather to true.
+      For more advanced stats collection need to run analyze table queries.
+    </description>
+  </property>
+
+  <property>
+    <name>hive.orc.splits.include.file.footer</name>
+    <value>false</value>
+    <description>
+      If turned on splits generated by orc will include metadata about the stripes in the file. This
+      data is read remotely (from the client or HS2 machine) and sent to all the tasks.
+    </description>
+  </property>
+
+  <property>
+    <name>hive.limit.pushdown.memory.usage</name>
+    <value>0.04</value>
+    <description>The max memory to be used for hash in RS operator for top K selection.</description>
+  </property>
+
+  <property>
+    <name>hive.jar.directory</name>
+    <value>hdfs:///apps/hive/install</value>
+    <description>
+      This is the location hive in Tez mode will look for to find a site wide 
+      installed hive instance.
+    </description>
+  </property>
+
+  <property>
+    <name>hive.server2.tez.default.queues</name>
+    <value></value>
+    <description></description>
+  </property>
+
+  <property>
+    <name>hive.server2.tez.sessions.per.default.queue</name>
+    <value>1</value>
+    <description></description>
+  </property>
+
+  <property>
+    <name>hive.server2.tez.initialize.default.sessions</name>
+    <value>false</value>
+    <description></description>
+  </property>
+
 </configuration>

http://git-wip-us.apache.org/repos/asf/ambari/blob/0ee9b81f/ambari-server/src/main/resources/stacks/HDP/2.1.1/services/HIVE/configuration/mapred-site.xml
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.1.1/services/HIVE/configuration/mapred-site.xml b/ambari-server/src/main/resources/stacks/HDP/2.1.1/services/HIVE/configuration/mapred-site.xml
new file mode 100644
index 0000000..a8fc002
--- /dev/null
+++ b/ambari-server/src/main/resources/stacks/HDP/2.1.1/services/HIVE/configuration/mapred-site.xml
@@ -0,0 +1,79 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!--
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+-->
+
+<configuration>
+  <property>
+    <name>mapred.reduce.parallel.copies</name>
+    <value>30</value>
+    <description></description>
+  </property>
+
+  <property>
+    <name>mapred.job.shuffle.input.buffer.percent</name>
+    <value>0.6</value>
+    <description></description>
+  </property>
+
+  <property>
+    <name>mapred.job.reduce.input.buffer.percent</name>
+    <value>0.2</value>
+    <description></description>
+  </property>
+
+  <property>
+    <name>mapred.map.child.java.opts</name>
+    <value>-server -Xmx1536m -Djava.net.preferIPv4Stack=true -XX:+UseNUMA -XX:+UseParallelGC</value>
+    <description></description>
+  </property>
+    
+
+  <property>
+    <name>mapred.reduce.child.java.opts</name>
+    <value>-server -Xmx1536m -Djava.net.preferIPv4Stack=true -XX:+UseNUMA -XX:+UseParallelGC</value>
+    <description></description>
+  </property>
+
+  <property>
+    <name>mapreduce.map.memory.mb</name>
+    <value>2048</value>
+    <description></description>
+  </property>
+
+  <property>
+    <name>mapreduce.reduce.memory.mb</name>
+    <value>2048</value>
+    <description></description>
+  </property>
+
+  <property>
+    <name>mapreduce.map.output.compress</name>
+    <value>false</value>
+    <description></description>
+  </property>
+
+  <property>
+    <name>mapreduce.map.output.compress.codec</name>
+    <value>org.apache.hadoop.io.compress.SnappyCodec</value>
+    <description></description>
+  </property>
+
+</configuration>

http://git-wip-us.apache.org/repos/asf/ambari/blob/0ee9b81f/ambari-server/src/main/resources/stacks/HDP/2.1.1/services/HIVE/metainfo.xml
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.1.1/services/HIVE/metainfo.xml b/ambari-server/src/main/resources/stacks/HDP/2.1.1/services/HIVE/metainfo.xml
index 5aba96f..b4e1788 100644
--- a/ambari-server/src/main/resources/stacks/HDP/2.1.1/services/HIVE/metainfo.xml
+++ b/ambari-server/src/main/resources/stacks/HDP/2.1.1/services/HIVE/metainfo.xml
@@ -75,6 +75,7 @@
         <config-type>hive-log4j</config-type>
         <config-type>hive-exec-log4j</config-type>
         <config-type>global</config-type>
+        <config-type>mapred-site</config-type>
       </configuration-dependencies>
     </service>