You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by gu...@apache.org on 2014/01/04 00:51:30 UTC
svn commit: r1555287 - in /hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql: exec/MapJoinOperator.java io/HiveInputFormat.java

Author: gunther
Date: Fri Jan  3 23:51:29 2014
New Revision: 1555287

URL: http://svn.apache.org/r1555287
Log:
HIVE-6138: Tez: Add some additional comments to clarify intent (Gunther Hagleitner)

Modified:
    hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
    hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java

Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java?rev=1555287&r1=1555286&r2=1555287&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java Fri Jan  3 23:51:29 2014
@@ -70,6 +70,7 @@ public class MapJoinOperator extends Abs
   /*
    * We need the base (operator.java) implementation of start/endGroup.
    * The parent class has functionality in those that map join can't use.
+   * Note: The mapjoin can be run in the reducer only on Tez.
    */
   @Override
   public void endGroup() throws HiveException {
@@ -87,6 +88,8 @@ public class MapJoinOperator extends Abs
 
     int tagLen = conf.getTagLength();
 
+    // On Tez only: The hash map might already be cached in the container we run
+    // the task in. On MR: The cache is a no-op.
     tableKey = "__HASH_MAP_"+this.getOperatorId()+"_container";
     serdeKey = "__HASH_MAP_"+this.getOperatorId()+"_serde";
 

Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java?rev=1555287&r1=1555286&r2=1555287&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java Fri Jan  3 23:51:29 2014
@@ -256,6 +256,12 @@ public class HiveInputFormat<K extends W
     pathToPartitionInfo = mrwork.getPathToPartitionInfo();
   }
 
+  /*
+   * AddSplitsForGroup collects separate calls to setInputPaths into one where possible.
+   * The reason for this is that this is faster on some InputFormats. E.g.: Orc will start
+   * a threadpool to do the work and calling it multiple times unnecessarily will create a lot
+   * of unnecessary thread pools.
+   */
   private void addSplitsForGroup(List<Path> dirs, TableScanOperator tableScan, JobConf conf,
       InputFormat inputFormat, Class<? extends InputFormat> inputFormatClass, int splits,
       TableDesc table, List<InputSplit> result) throws IOException {