You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by kh...@apache.org on 2014/06/07 09:27:46 UTC

svn commit: r1601072 - /hive/trunk/conf/hive-default.xml.template

Author: khorgath
Date: Sat Jun  7 07:27:46 2014
New Revision: 1601072

URL: http://svn.apache.org/r1601072
Log:
HIVE-7165 : Fix hive-default.xml.template errors & omissions (Lefty Leverenz via Sushanth Sowmyan,Thejas Nair)

Modified:
    hive/trunk/conf/hive-default.xml.template

Modified: hive/trunk/conf/hive-default.xml.template
URL: http://svn.apache.org/viewvc/hive/trunk/conf/hive-default.xml.template?rev=1601072&r1=1601071&r2=1601072&view=diff
==============================================================================
--- hive/trunk/conf/hive-default.xml.template (original)
+++ hive/trunk/conf/hive-default.xml.template Sat Jun  7 07:27:46 2014
@@ -599,19 +599,22 @@
 <property>
   <name>hive.join.emit.interval</name>
   <value>1000</value>
-  <description>How many rows in the right-most join operand Hive should buffer before emitting the join result.</description>
+  <description>How many rows in the right-most join operand Hive should buffer before emitting the
+  join result.</description>
 </property>
 
 <property>
   <name>hive.join.cache.size</name>
   <value>25000</value>
-  <description>How many rows in the joining tables (except the streaming table) should be cached in memory. </description>
+  <description>How many rows in the joining tables (except the streaming table) should be cached
+  in memory.</description>
 </property>
 
 <property>
   <name>hive.smbjoin.cache.rows</name>
   <value>10000</value>
-  <description>How many rows with the same key value should be cached in memory per smb joined table. </description>
+  <description>How many rows with the same key value should be cached in memory per SMB joined
+  table.</description>
 </property>
 
 <property>
@@ -857,8 +860,9 @@
 
 <property>
   <name>hive.auto.convert.join</name>
-  <value>false</value>
-  <description>Whether Hive enables the optimization about converting common join into mapjoin based on the input file size</description>
+  <value>true</value>
+  <description>Whether Hive enables the optimization about converting common join into mapjoin based
+  on the input file size</description>
 </property>
 
 <property>
@@ -1244,8 +1248,11 @@
 
 <property>
   <name>hive.stats.dbclass</name>
-  <value>counter</value>
-  <description>The storage that stores temporary Hive statistics. Currently, jdbc, hbase, counter and custom type are supported.</description>
+  <value>fs</value>
+  <description>The storage that stores temporary Hive statistics. Supported values are
+  fs (filesystem), jdbc(:.*), hbase, counter, and custom. In FS based statistics collection,
+  each task writes statistics it has collected in a file on the filesystem, which will be
+  aggregated after the job has finished.</description>
 </property>
 
 <property>
@@ -2234,7 +2241,7 @@
   <description>
     SPNego service principal, optional,
     typical value would look like HTTP/_HOST@EXAMPLE.COM
-    SPNego service principal would be used by hiveserver2 when kerberos security is enabled
+    SPNego service principal would be used by HiveServer2 when Kerberos security is enabled
     and HTTP transport mode is used.
     This needs to be set only if SPNEGO is to be used in authentication.
   </description>
@@ -2246,14 +2253,14 @@
   <description>
     keytab file for SPNego principal, optional,
     typical value would look like /etc/security/keytabs/spnego.service.keytab,
-    This keytab would be used by hiveserver2 when kerberos security is enabled
+    This keytab would be used by HiveServer2 when Kerberos security is enabled
     and HTTP transport mode is used.
     This needs to be set only if SPNEGO is to be used in authentication.
     SPNego authentication would be honored only if valid
     hive.server2.authentication.spnego.principal
     and
     hive.server2.authentication.spnego.keytab
-    are specified
+    are specified.
   </description>
 </property>
 
@@ -2261,7 +2268,7 @@
   <name>hive.server2.authentication.ldap.url</name>
   <value></value>
   <description>
-    LDAP connection URL
+    LDAP connection URL.
   </description>
 </property>
 
@@ -2269,7 +2276,15 @@
   <name>hive.server2.authentication.ldap.baseDN</name>
   <value></value>
   <description>
-    LDAP base DN
+    LDAP base DN (distinguished name).
+  </description>
+</property>
+
+<property>
+  <name>hive.server2.authentication.ldap.Domain</name>
+  <value></value>
+  <description>
+    LDAP domain.
   </description>
 </property>
 
@@ -2286,7 +2301,7 @@
   <name>hive.execution.engine</name>
   <value>mr</value>
   <description>
-    Chooses execution engine. Options are: mr (Map reduce, default) or tez (hadoop 2 only)
+    Chooses execution engine. Options are mr (Map reduce, default) or Tez (Hadoop 2 only).
   </description>
 </property>
 
@@ -2294,7 +2309,7 @@
   <name>hive.prewarm.enabled</name>
   <value>false</value>
   <description>
-    Enables container prewarm for tez (hadoop 2 only)
+    Enables container prewarm for Tez (Hadoop 2 only)
   </description>
 </property>
 
@@ -2302,7 +2317,7 @@
   <name>hive.prewarm.numcontainers</name>
   <value>10</value>
   <description>
-    Controls the number of containers to prewarm for tez (hadoop 2 only)
+    Controls the number of containers to prewarm for Tez (Hadoop 2 only)
   </description>
 </property>
 
@@ -2318,15 +2333,24 @@
 </property>
 
 <property>
+  <name>hive.server2.session.hook</name>
+  <value></value>
+  <description>
+    Session-level hook for HiveServer2.
+  </description>
+</property>
+
+<property>
   <name>hive.server2.thrift.sasl.qop</name>
   <value>auth</value>
-  <description>Sasl QOP value; Set it to one of following values to enable higher levels of
-     protection for HiveServer2 communication with clients. hadoop.rpc.protection being set 
-     to a higher level than HiveServer2 does not make sense in most situations. 
-     HiveServer2 ignores hadoop.rpc.protection in favor of hive.server2.thrift.sasl.qop.
+  <description>Sasl QOP value; set it to one of following values to enable higher levels of
+     protection for HiveServer2 communication with clients.
       "auth" - authentication only (default)
       "auth-int" - authentication plus integrity protection
       "auth-conf" - authentication plus integrity and confidentiality protection
+     Note that hadoop.rpc.protection being set to a higher level than HiveServer2 does not
+     make sense in most situations. HiveServer2 ignores hadoop.rpc.protection in favor of
+     hive.server2.thrift.sasl.qop.
      This is applicable only if HiveServer2 is configured to use Kerberos authentication.
  </description>
 </property>
@@ -2393,11 +2417,11 @@
   <name>hive.metastore.integral.jdo.pushdown</name>
   <value>false</value>
   <description>
-   Allow JDO query pushdown for integral partition columns in metastore. Off by default. This
-   improves metastore perf for integral columns, especially if there's a large number of partitions.
-   However, it doesn't work correctly with integral values that are not normalized (e.g. have
-   leading zeroes, like 0012). If metastore direct SQL is enabled and works, this optimization
-   is also irrelevant.
+   Allow JDO query pushdown for integral partition columns in the metastore. Off by default.
+   This improves metastore performance for integral columns, especially with a large number of
+   partitions. However, it doesn't work correctly for integral values that are not normalized
+   (for example, if they have leading zeroes like 0012). If metastore direct SQL is enabled and
+   works (hive.metastore.try.direct.sql), this optimization is also irrelevant.
   </description>
 </property>
 
@@ -2447,8 +2471,8 @@
   <name>hive.jar.directory</name>
   <value></value>
   <description>
-    This is the location hive in tez mode will look for to find a site wide 
-    installed hive instance. If not set, the directory under hive.user.install.directory 
+    This is the location Hive in Tez mode will look for to find a site wide
+    installed Hive instance. If not set, the directory under hive.user.install.directory
     corresponding to current user name will be used.
   </description>
 </property>
@@ -2457,8 +2481,8 @@
   <name>hive.user.install.directory</name>
   <value>hdfs:///user/</value>
   <description>
-    If hive (in tez mode only) cannot find a usable hive jar in "hive.jar.directory", 
-    it will upload the hive jar to &lt;hive.user.install.directory&gt;/&lt;user name&gt; 
+    If Hive (in Tez mode only) cannot find a usable Hive jar in "hive.jar.directory",
+    it will upload the Hive jar to &lt;hive.user.install.directory&gt;/&lt;user name&gt;
     and use it to run queries.
   </description>
 </property>
@@ -2466,13 +2490,15 @@
 <property>
   <name>hive.tez.container.size</name>
   <value>-1</value>
-  <description>By default tez will spawn containers of the size of a mapper. This can be used to overwrite.</description>
+  <description>By default Tez will spawn containers of the size of a mapper.
+  This can be used to overwrite.</description>
 </property>
 
 <property>
   <name>hive.tez.java.opts</name>
   <value></value>
-  <description>By default tez will use the java opts from map tasks. This can be used to overwrite.</description>
+  <description>By default Tez will use the Java options from map tasks.
+  This can be used to overwrite.</description>
 </property>
 
 <property>
@@ -2480,7 +2506,7 @@
   <value>INFO</value>
   <description>
     The log level to use for tasks executing as part of the DAG.
-    Used only if hive.tez.java.opts is used to configure java opts.
+    Used only if hive.tez.java.opts is used to configure Java options.
   </description>
 </property>
 
@@ -2488,9 +2514,9 @@
   <name>hive.server2.tez.default.queues</name>
   <value></value>
   <description>
-    A list of comma separated values corresponding to yarn queues of the same name.
-    When hive server 2 is launched in tez mode, this configuration needs to be set
-    for multiple tez sessions to run in parallel on the cluster.
+    A list of comma separated values corresponding to YARN queues of the same name.
+    When HiveServer2 is launched in Tez mode, this configuration needs to be set
+    for multiple Tez sessions to run in parallel on the cluster.
   </description>
 </property>
 
@@ -2498,7 +2524,7 @@
   <name>hive.server2.tez.sessions.per.default.queue</name>
   <value>1</value>
   <description>
-    A positive integer that determines the number of tez sessions that should be
+    A positive integer that determines the number of Tez sessions that should be
     launched on each of the queues specified by "hive.server2.tez.default.queues".
     Determines the parallelism on each queue.
   </description>
@@ -2508,9 +2534,9 @@
   <name>hive.server2.tez.initialize.default.sessions</name>
   <value>false</value>
   <description>
-    This flag is used in hive server 2 to enable a user to use hive server 2 without
-    turning on tez for hive server 2. The user could potentially want to run queries
-    over tez without the pool of sessions.
+    This flag is used in HiveServer2 to enable a user to use HiveServer2 without
+    turning on Tez for HiveServer2. The user could potentially want to run queries
+    over Tez without the pool of sessions.
   </description>
 </property>
 
@@ -2518,7 +2544,7 @@
   <name>hive.lazysimple.extended_boolean_literal</name>
   <value>false</value>
   <description>
-    LazySiimpleSerde uses this properties to determine if it treats 'T', 't', 'F', 'f',
+    LazySimpleSerde uses this property to determine if it treats 'T', 't', 'F', 'f',
     '1', and '0' as extened, legal boolean literal, in addition to 'TRUE' and 'FALSE'.
     The default is false, which means only 'TRUE' and 'FALSE' are treated as legal
     boolean literal.
@@ -2529,7 +2555,7 @@
   <name>hive.server2.allow.user.substitution</name>
   <value>true</value>
   <description>
-    Allow alternate user to be specified as part of HiveServer2 open connection request
+    Allow alternate user to be specified as part of HiveServer2 open connection request.
   </description>
 </property>
 
@@ -2577,18 +2603,6 @@
 </property>
 
 <property>
-  <name>hive.metastore.integral.jdo.pushdown</name>
-  <value>false</value>
-  <description>
-  Whether to enable JDO pushdown for integral types. Off by default. Irrelevant if
-  hive.metastore.try.direct.sql is enabled. Otherwise, filter pushdown in metastore can improve
-  performance, but for partition columns storing integers in non-canonical form, (e.g. '012'),
-  it can produce incorrect results.
-  </description>
-</property>
-
-
-<property>
   <name>hive.mapjoin.optimized.keys</name>
   <value>true</value>
   <description>
@@ -2652,8 +2666,9 @@
 <property>
   <name>hive.server2.authentication.pam.services</name>
   <value></value>
-  <description>List of the underlying pam services that should be used when auth type is PAM.
-  A file with the same name must exist in /etc/pam.d</description>
+  <description>List of the underlying PAM services that should be used when authentication
+  type is PAM (hive.server2.authentication). A file with the same name must exist in
+  /etc/pam.d</description>
 </property>
 
 <property>
@@ -2673,7 +2688,8 @@
 <property>
   <name>hive.limit.query.max.table.partition</name>
   <value>-1</value>
-  <description>This controls how many partitions can be scanned for each partitioned table. The default value "-1" means no limit.</description>
+  <description>This controls how many partitions can be scanned for each partitioned table.
+  The default value "-1" means no limit.</description>
 </property>
 
 <property>
@@ -2715,44 +2731,52 @@
 <property>
   <name>hive.compactor.worker.timeout</name>
   <value>86400</value>
-  <description>Time, in seconds, before a given compaction in working state is declared a failure and returned to the initiated state.</description>
+  <description>Time, in seconds, before a given compaction in working state is declared a failure
+  and returned to the initiated state.</description>
 </property>
 
 <property>
   <name>hive.compactor.check.interval</name>
   <value>300</value>
   <description>Time in seconds between checks to see if any partitions need compacted.
-  This should be kept high because each check for compaction requires many calls against the NameNode.</description>
+  This should be kept high because each check for compaction requires many calls against the
+  NameNode.</description>
 </property>
 
 <property>
   <name>hive.compactor.delta.num.threshold</name>
   <value>10</value>
-  <description>Number of delta files that must exist in a directory before the compactor will attempt a minor compaction.</description>
+  <description>Number of delta files that must exist in a directory before the compactor will
+  attempt a minor compaction.</description>
 </property>
 
 <property>
   <name>hive.compactor.delta.pct.threshold</name>
   <value>0.1</value>
-  <description>Percentage (by size) of base that deltas can be before major compaction is initiated.</description>
+  <description>Percentage (by size) of base that deltas can be before major compaction is
+  initiated.</description>
 </property>
 
 <property>
   <name>hive.compactor.abortedtxn.threshold</name>
   <value>1000</value>
-  <description>Number of aborted transactions involving a particular table or partition before major compaction is initiated.</description>
+  <description>Number of aborted transactions involving a particular table or partition before major
+  compaction is initiated.</description>
 </property>
 
 <property>
   <name>hive.mapjoin.optimized.hashtable</name>
   <value>true</value>
-  <description>Whether Hive should use memory-optimized hash table for MapJoin. Only works on Tez, because memory-optimized hashtable cannot be serialized.</description>
+  <description>Whether Hive should use memory-optimized hash table for MapJoin. Only works on Tez,
+  because memory-optimized hashtable cannot be serialized.</description>
 </property>
 
 <property>
   <name>hive.mapjoin.optimized.hashtable.wbsize</name>
   <value>10485760</value>
-  <description>Optimized hashtable (see hive.mapjoin.optimized.hashtable) uses a chain of buffers to store data. This is one buffer size. HT may be slightly faster if this is larger, but for small joins unnecessary memory will be allocated and then trimmed.</description>
+  <description>Optimized hashtable (see hive.mapjoin.optimized.hashtable) uses a chain of buffers to
+  store data. This is one buffer size. HT may be slightly faster if this is larger, but for small
+  joins unnecessary memory will be allocated and then trimmed.</description>
 </property>
 
 </configuration>