You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by gu...@apache.org on 2014/01/04 00:51:30 UTC
svn commit: r1555287 - in
/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql:
exec/MapJoinOperator.java io/HiveInputFormat.java
Author: gunther
Date: Fri Jan 3 23:51:29 2014
New Revision: 1555287
URL: http://svn.apache.org/r1555287
Log:
HIVE-6138: Tez: Add some additional comments to clarify intent (Gunther Hagleitner)
Modified:
hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java?rev=1555287&r1=1555286&r2=1555287&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java Fri Jan 3 23:51:29 2014
@@ -70,6 +70,7 @@ public class MapJoinOperator extends Abs
/*
* We need the base (operator.java) implementation of start/endGroup.
* The parent class has functionality in those that map join can't use.
+ * Note: The mapjoin can be run in the reducer only on Tez.
*/
@Override
public void endGroup() throws HiveException {
@@ -87,6 +88,8 @@ public class MapJoinOperator extends Abs
int tagLen = conf.getTagLength();
+ // On Tez only: The hash map might already be cached in the container we run
+ // the task in. On MR: The cache is a no-op.
tableKey = "__HASH_MAP_"+this.getOperatorId()+"_container";
serdeKey = "__HASH_MAP_"+this.getOperatorId()+"_serde";
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java?rev=1555287&r1=1555286&r2=1555287&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java Fri Jan 3 23:51:29 2014
@@ -256,6 +256,12 @@ public class HiveInputFormat<K extends W
pathToPartitionInfo = mrwork.getPathToPartitionInfo();
}
+ /*
+ * AddSplitsForGroup collects separate calls to setInputPaths into one where possible.
+ * The reason for this is that this is faster on some InputFormats. E.g.: Orc will start
+ * a threadpool to do the work and calling it multiple times unnecessarily will create a lot
+ * of unnecessary thread pools.
+ */
private void addSplitsForGroup(List<Path> dirs, TableScanOperator tableScan, JobConf conf,
InputFormat inputFormat, Class<? extends InputFormat> inputFormatClass, int splits,
TableDesc table, List<InputSplit> result) throws IOException {