You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by vi...@apache.org on 2015/10/30 22:59:11 UTC
[1/2] hive git commit: HIVE-11356: SMB join on tez fails when one of
the tables is empty (Vikram Dixit K, reviewed by Gunther Hagleitner)
Repository: hive
Updated Branches:
refs/heads/master cd531e3f0 -> d4df2aee1
HIVE-11356: SMB join on tez fails when one of the tables is empty (Vikram Dixit K, reviewed by Gunther Hagleitner)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f958de0a
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f958de0a
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f958de0a
Branch: refs/heads/master
Commit: f958de0ab5673fd4cdd5b021600037eb0487e6ad
Parents: cd531e3
Author: vikram <vi...@hortonworks.com>
Authored: Tue Oct 27 12:57:19 2015 -0700
Committer: vikram <vi...@hortonworks.com>
Committed: Fri Oct 30 14:54:22 2015 -0700
----------------------------------------------------------------------
.../test/resources/testconfiguration.properties | 1 +
.../hive/ql/exec/CommonMergeJoinOperator.java | 8 +-
.../apache/hadoop/hive/ql/exec/MapOperator.java | 27 +
.../hive/ql/exec/TezDummyStoreOperator.java | 9 +
.../hive/ql/exec/tez/CustomPartitionVertex.java | 1 +
.../hive/ql/exec/tez/MapRecordProcessor.java | 27 +-
.../test/queries/clientpositive/tez_smb_empty.q | 55 ++
.../clientpositive/tez/tez_smb_empty.q.out | 676 +++++++++++++++++++
8 files changed, 800 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/f958de0a/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 13efc58..8a4c1fa 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -373,6 +373,7 @@ minitez.query.files=bucket_map_join_tez1.q,\
tez_union_group_by.q,\
tez_smb_main.q,\
tez_smb_1.q,\
+ tez_smb_empty.q,\
vectorized_dynamic_partition_pruning.q,\
tez_multi_union.q,\
tez_join.q,\
http://git-wip-us.apache.org/repos/asf/hive/blob/f958de0a/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonMergeJoinOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonMergeJoinOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonMergeJoinOperator.java
index d5d62ca..ede8dc8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonMergeJoinOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonMergeJoinOperator.java
@@ -143,7 +143,13 @@ public class CommonMergeJoinOperator extends AbstractMapJoinOperator<CommonMerge
for (byte pos = 0; pos < order.length; pos++) {
if (pos != posBigTable) {
- fetchDone[pos] = false;
+ if ((parentOperators != null) && (parentOperators.isEmpty() == false)
+ && (parentOperators.get(pos) instanceof TezDummyStoreOperator)) {
+ TezDummyStoreOperator dummyStoreOp = (TezDummyStoreOperator) parentOperators.get(pos);
+ fetchDone[pos] = dummyStoreOp.getFetchDone();
+ } else {
+ fetchDone[pos] = false;
+ }
}
foundNextKeyGroup[pos] = false;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/f958de0a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java
index ee6af04..c2a5726 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java
@@ -37,6 +37,7 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
+import org.apache.hadoop.hive.ql.exec.MapOperator.MapOpCtx;
import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext;
import org.apache.hadoop.hive.ql.io.RecordIdentifier;
import org.apache.hadoop.hive.ql.metadata.HiveException;
@@ -55,6 +56,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
@@ -332,6 +334,31 @@ public class MapOperator extends Operator<MapWork> implements Serializable, Clon
return tableDescOI;
}
+ /*
+ * This is the same as the setChildren method below but for empty tables.
+ * It takes care of the following:
+ * 1. Create the right object inspector.
+ * 2. Set up the childrenOpToOI with the object inspector.
+ * So as to ensure that the initialization happens correctly.
+ */
+ public void initEmptyInputChildren(List<Operator<?>> children, Configuration hconf)
+ throws SerDeException, Exception {
+ setChildOperators(children);
+ for (Operator<?> child : children) {
+ TableScanOperator tsOp = (TableScanOperator) child;
+ StructObjectInspector soi = null;
+ PartitionDesc partDesc = conf.getAliasToPartnInfo().get(tsOp.getConf().getAlias());
+ Deserializer serde = partDesc.getTableDesc().getDeserializer();
+ partDesc.setProperties(partDesc.getProperties());
+ MapOpCtx opCtx = new MapOpCtx(tsOp.getConf().getAlias(), child, partDesc);
+ StructObjectInspector tableRowOI = (StructObjectInspector) serde.getObjectInspector();
+ initObjectInspector(hconf, opCtx, tableRowOI);
+ soi = opCtx.rowObjectInspector;
+ child.getParentOperators().add(this);
+ childrenOpToOI.put(child, soi);
+ }
+ }
+
public void setChildren(Configuration hconf) throws Exception {
List<Operator<? extends OperatorDesc>> children =
http://git-wip-us.apache.org/repos/asf/hive/blob/f958de0a/ql/src/java/org/apache/hadoop/hive/ql/exec/TezDummyStoreOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/TezDummyStoreOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/TezDummyStoreOperator.java
index 6bd156b..e9f65be 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/TezDummyStoreOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/TezDummyStoreOperator.java
@@ -27,6 +27,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveException;
*
*/
public class TezDummyStoreOperator extends DummyStoreOperator {
+ private boolean fetchDone = false;
/**
* Unlike the MR counterpoint, on Tez we want processOp to forward
@@ -37,4 +38,12 @@ public class TezDummyStoreOperator extends DummyStoreOperator {
super.process(row, tag);
forward(result.o, outputObjInspector);
}
+
+ public boolean getFetchDone() {
+ return fetchDone;
+ }
+
+ public void setFetchDone(boolean fetchDone) {
+ this.fetchDone = fetchDone;
+ }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/f958de0a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/CustomPartitionVertex.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/CustomPartitionVertex.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/CustomPartitionVertex.java
index e9f1c98..fe1ef37 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/CustomPartitionVertex.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/CustomPartitionVertex.java
@@ -336,6 +336,7 @@ public class CustomPartitionVertex extends VertexManagerPlugin {
+ " multi mr inputs. " + bucketToTaskMap);
Integer[] numSplitsForTask = new Integer[taskCount];
+ Arrays.fill(numSplitsForTask, 0);
Multimap<Integer, ByteBuffer> bucketToSerializedSplitMap = LinkedListMultimap.create();
http://git-wip-us.apache.org/repos/asf/hive/blob/f958de0a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MapRecordProcessor.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MapRecordProcessor.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MapRecordProcessor.java
index 948829b..1d645a0 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MapRecordProcessor.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MapRecordProcessor.java
@@ -43,6 +43,7 @@ import org.apache.hadoop.hive.ql.exec.ObjectCache;
import org.apache.hadoop.hive.ql.exec.ObjectCacheFactory;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.OperatorUtils;
+import org.apache.hadoop.hive.ql.exec.TezDummyStoreOperator;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.exec.mr.ExecMapper.ReportStats;
import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext;
@@ -199,9 +200,27 @@ public class MapRecordProcessor extends RecordProcessor {
l4j.info("Input name is " + mergeMapWork.getName());
jconf.set(Utilities.INPUT_NAME, mergeMapWork.getName());
mergeMapOp.initialize(jconf, null);
- mergeMapOp.setChildren(jconf);
+ // if there are no files/partitions to read, we need to skip trying to read
+ boolean skipRead = mergeMapOp.getConf().getPathToAliases().isEmpty();
+ if (skipRead) {
+ List<Operator<?>> children = new ArrayList<Operator<?>>();
+ children.addAll(mergeMapOp.getConf().getAliasToWork().values());
+ // do the same thing as setChildren when there is nothing to read.
+ // the setChildren method initializes the object inspector needed by the operators
+ // based on path and partition information which we don't have in this case.
+ mergeMapOp.initEmptyInputChildren(children, jconf);
+ } else {
+ // the setChildren method initializes the object inspector needed by the operators
+ // based on path and partition information.
+ mergeMapOp.setChildren(jconf);
+ }
DummyStoreOperator dummyOp = getJoinParentOp(mergeMapOp);
+ if (dummyOp instanceof TezDummyStoreOperator) {
+ // we ensure that we don't try to read any data for this table.
+ ((TezDummyStoreOperator) dummyOp).setFetchDone(skipRead);
+ }
+
mapOp.setConnectedOperators(mergeMapWork.getTag(), dummyOp);
mergeMapOp.passExecContext(new ExecMapperContext(jconf));
@@ -281,8 +300,10 @@ public class MapRecordProcessor extends RecordProcessor {
MultiMRInput multiMRInput = multiMRInputMap.get(inputName);
Collection<KeyValueReader> kvReaders = multiMRInput.getKeyValueReaders();
l4j.debug("There are " + kvReaders.size() + " key-value readers for input " + inputName);
- reader = getKeyValueReader(kvReaders, mapOp);
- sources[tag].init(jconf, mapOp, reader);
+ if (kvReaders.size() > 0) {
+ reader = getKeyValueReader(kvReaders, mapOp);
+ sources[tag].init(jconf, mapOp, reader);
+ }
}
((TezContext) MapredContext.get()).setRecordSources(sources);
}
http://git-wip-us.apache.org/repos/asf/hive/blob/f958de0a/ql/src/test/queries/clientpositive/tez_smb_empty.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/tez_smb_empty.q b/ql/src/test/queries/clientpositive/tez_smb_empty.q
new file mode 100644
index 0000000..196cc97
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/tez_smb_empty.q
@@ -0,0 +1,55 @@
+set hive.explain.user=false;
+set hive.mapjoin.hybridgrace.hashtable=false;
+set hive.join.emit.interval=2;
+set hive.auto.convert.join.noconditionaltask=true;
+set hive.auto.convert.join.noconditionaltask.size=10000;
+set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ;
+
+-- SORT_QUERY_RESULTS
+
+CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
+CREATE TABLE tab_part (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
+CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
+
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08');
+
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+
+set hive.enforce.bucketing=true;
+set hive.enforce.sorting = true;
+set hive.optimize.bucketingsorting=false;
+insert overwrite table tab_part partition (ds='2008-04-08')
+select key,value from srcbucket_mapjoin_part;
+
+CREATE TABLE tab(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
+insert overwrite table tab partition (ds='2008-04-08')
+select key,value from srcbucket_mapjoin;
+
+set hive.auto.convert.sortmerge.join = true;
+
+set hive.auto.convert.join.noconditionaltask.size=500;
+CREATE TABLE empty(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
+
+explain
+select count(*) from tab s1 join empty s3 on s1.key=s3.key;
+
+select count(*) from tab s1 join empty s3 on s1.key=s3.key;
+
+explain
+select * from tab s1 left outer join empty s3 on s1.key=s3.key;
+
+select * from tab s1 left outer join empty s3 on s1.key=s3.key;
+
+explain
+select count(*) from tab s1 left outer join tab s2 on s1.key=s2.key join empty s3 on s1.key = s3.key;
+
+select count(*) from tab s1 left outer join tab s2 on s1.key=s2.key join empty s3 on s1.key = s3.key;
+
+explain
+select count(*) from tab s1 left outer join empty s2 on s1.key=s2.key join tab s3 on s1.key = s3.key;
+
+select count(*) from tab s1 left outer join empty s2 on s1.key=s2.key join tab s3 on s1.key = s3.key;
http://git-wip-us.apache.org/repos/asf/hive/blob/f958de0a/ql/src/test/results/clientpositive/tez/tez_smb_empty.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/tez_smb_empty.q.out b/ql/src/test/results/clientpositive/tez/tez_smb_empty.q.out
new file mode 100644
index 0000000..82ec31d
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/tez_smb_empty.q.out
@@ -0,0 +1,676 @@
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@srcbucket_mapjoin
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@srcbucket_mapjoin
+PREHOOK: query: CREATE TABLE tab_part (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tab_part
+POSTHOOK: query: CREATE TABLE tab_part (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tab_part
+PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@srcbucket_mapjoin_part
+POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@srcbucket_mapjoin_part
+PREHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08')
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@srcbucket_mapjoin
+POSTHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08')
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@srcbucket_mapjoin
+POSTHOOK: Output: default@srcbucket_mapjoin@ds=2008-04-08
+PREHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08')
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@srcbucket_mapjoin@ds=2008-04-08
+POSTHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08')
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@srcbucket_mapjoin@ds=2008-04-08
+PREHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08')
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@srcbucket_mapjoin_part
+POSTHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08')
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@srcbucket_mapjoin_part
+POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08
+PREHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08')
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08
+POSTHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08')
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08
+PREHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08')
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08
+POSTHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08')
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08
+PREHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08')
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08
+POSTHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08')
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08
+PREHOOK: query: insert overwrite table tab_part partition (ds='2008-04-08')
+select key,value from srcbucket_mapjoin_part
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcbucket_mapjoin_part
+PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08
+PREHOOK: Output: default@tab_part@ds=2008-04-08
+POSTHOOK: query: insert overwrite table tab_part partition (ds='2008-04-08')
+select key,value from srcbucket_mapjoin_part
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcbucket_mapjoin_part
+POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08
+POSTHOOK: Output: default@tab_part@ds=2008-04-08
+POSTHOOK: Lineage: tab_part PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin_part)srcbucket_mapjoin_part.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: tab_part PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin_part)srcbucket_mapjoin_part.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: CREATE TABLE tab(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tab
+POSTHOOK: query: CREATE TABLE tab(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tab
+PREHOOK: query: insert overwrite table tab partition (ds='2008-04-08')
+select key,value from srcbucket_mapjoin
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcbucket_mapjoin
+PREHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08
+PREHOOK: Output: default@tab@ds=2008-04-08
+POSTHOOK: query: insert overwrite table tab partition (ds='2008-04-08')
+select key,value from srcbucket_mapjoin
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcbucket_mapjoin
+POSTHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08
+POSTHOOK: Output: default@tab@ds=2008-04-08
+POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: CREATE TABLE empty(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@empty
+POSTHOOK: query: CREATE TABLE empty(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@empty
+PREHOOK: query: explain
+select count(*) from tab s1 join empty s3 on s1.key=s3.key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from tab s1 join empty s3 on s1.key=s3.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: s3
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Map Operator Tree:
+ TableScan
+ alias: s1
+ Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 key (type: int)
+ 1 key (type: int)
+ Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from tab s1 join empty s3 on s1.key=s3.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@empty
+PREHOOK: Input: default@tab
+PREHOOK: Input: default@tab@ds=2008-04-08
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from tab s1 join empty s3 on s1.key=s3.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@empty
+POSTHOOK: Input: default@tab
+POSTHOOK: Input: default@tab@ds=2008-04-08
+#### A masked pattern was here ####
+0
+PREHOOK: query: explain
+select * from tab s1 left outer join empty s3 on s1.key=s3.key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select * from tab s1 left outer join empty s3 on s1.key=s3.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: s3
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: key (type: int), value (type: string), ds (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Map Operator Tree:
+ TableScan
+ alias: s1
+ Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int), value (type: string), ds (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
+ Merge Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 266 Data size: 2822 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 266 Data size: 2822 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select * from tab s1 left outer join empty s3 on s1.key=s3.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@empty
+PREHOOK: Input: default@tab
+PREHOOK: Input: default@tab@ds=2008-04-08
+#### A masked pattern was here ####
+POSTHOOK: query: select * from tab s1 left outer join empty s3 on s1.key=s3.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@empty
+POSTHOOK: Input: default@tab
+POSTHOOK: Input: default@tab@ds=2008-04-08
+#### A masked pattern was here ####
+0 val_0 2008-04-08 NULL NULL NULL
+0 val_0 2008-04-08 NULL NULL NULL
+0 val_0 2008-04-08 NULL NULL NULL
+103 val_103 2008-04-08 NULL NULL NULL
+103 val_103 2008-04-08 NULL NULL NULL
+105 val_105 2008-04-08 NULL NULL NULL
+11 val_11 2008-04-08 NULL NULL NULL
+114 val_114 2008-04-08 NULL NULL NULL
+116 val_116 2008-04-08 NULL NULL NULL
+118 val_118 2008-04-08 NULL NULL NULL
+118 val_118 2008-04-08 NULL NULL NULL
+125 val_125 2008-04-08 NULL NULL NULL
+125 val_125 2008-04-08 NULL NULL NULL
+129 val_129 2008-04-08 NULL NULL NULL
+129 val_129 2008-04-08 NULL NULL NULL
+134 val_134 2008-04-08 NULL NULL NULL
+134 val_134 2008-04-08 NULL NULL NULL
+136 val_136 2008-04-08 NULL NULL NULL
+138 val_138 2008-04-08 NULL NULL NULL
+138 val_138 2008-04-08 NULL NULL NULL
+138 val_138 2008-04-08 NULL NULL NULL
+138 val_138 2008-04-08 NULL NULL NULL
+143 val_143 2008-04-08 NULL NULL NULL
+145 val_145 2008-04-08 NULL NULL NULL
+149 val_149 2008-04-08 NULL NULL NULL
+149 val_149 2008-04-08 NULL NULL NULL
+15 val_15 2008-04-08 NULL NULL NULL
+15 val_15 2008-04-08 NULL NULL NULL
+150 val_150 2008-04-08 NULL NULL NULL
+152 val_152 2008-04-08 NULL NULL NULL
+152 val_152 2008-04-08 NULL NULL NULL
+156 val_156 2008-04-08 NULL NULL NULL
+158 val_158 2008-04-08 NULL NULL NULL
+163 val_163 2008-04-08 NULL NULL NULL
+165 val_165 2008-04-08 NULL NULL NULL
+165 val_165 2008-04-08 NULL NULL NULL
+167 val_167 2008-04-08 NULL NULL NULL
+167 val_167 2008-04-08 NULL NULL NULL
+167 val_167 2008-04-08 NULL NULL NULL
+169 val_169 2008-04-08 NULL NULL NULL
+169 val_169 2008-04-08 NULL NULL NULL
+169 val_169 2008-04-08 NULL NULL NULL
+169 val_169 2008-04-08 NULL NULL NULL
+17 val_17 2008-04-08 NULL NULL NULL
+170 val_170 2008-04-08 NULL NULL NULL
+172 val_172 2008-04-08 NULL NULL NULL
+172 val_172 2008-04-08 NULL NULL NULL
+174 val_174 2008-04-08 NULL NULL NULL
+174 val_174 2008-04-08 NULL NULL NULL
+176 val_176 2008-04-08 NULL NULL NULL
+176 val_176 2008-04-08 NULL NULL NULL
+178 val_178 2008-04-08 NULL NULL NULL
+181 val_181 2008-04-08 NULL NULL NULL
+183 val_183 2008-04-08 NULL NULL NULL
+187 val_187 2008-04-08 NULL NULL NULL
+187 val_187 2008-04-08 NULL NULL NULL
+187 val_187 2008-04-08 NULL NULL NULL
+189 val_189 2008-04-08 NULL NULL NULL
+19 val_19 2008-04-08 NULL NULL NULL
+190 val_190 2008-04-08 NULL NULL NULL
+192 val_192 2008-04-08 NULL NULL NULL
+194 val_194 2008-04-08 NULL NULL NULL
+196 val_196 2008-04-08 NULL NULL NULL
+2 val_2 2008-04-08 NULL NULL NULL
+20 val_20 2008-04-08 NULL NULL NULL
+200 val_200 2008-04-08 NULL NULL NULL
+200 val_200 2008-04-08 NULL NULL NULL
+202 val_202 2008-04-08 NULL NULL NULL
+208 val_208 2008-04-08 NULL NULL NULL
+208 val_208 2008-04-08 NULL NULL NULL
+208 val_208 2008-04-08 NULL NULL NULL
+213 val_213 2008-04-08 NULL NULL NULL
+213 val_213 2008-04-08 NULL NULL NULL
+217 val_217 2008-04-08 NULL NULL NULL
+217 val_217 2008-04-08 NULL NULL NULL
+219 val_219 2008-04-08 NULL NULL NULL
+219 val_219 2008-04-08 NULL NULL NULL
+222 val_222 2008-04-08 NULL NULL NULL
+224 val_224 2008-04-08 NULL NULL NULL
+224 val_224 2008-04-08 NULL NULL NULL
+226 val_226 2008-04-08 NULL NULL NULL
+228 val_228 2008-04-08 NULL NULL NULL
+233 val_233 2008-04-08 NULL NULL NULL
+233 val_233 2008-04-08 NULL NULL NULL
+235 val_235 2008-04-08 NULL NULL NULL
+237 val_237 2008-04-08 NULL NULL NULL
+237 val_237 2008-04-08 NULL NULL NULL
+239 val_239 2008-04-08 NULL NULL NULL
+239 val_239 2008-04-08 NULL NULL NULL
+24 val_24 2008-04-08 NULL NULL NULL
+24 val_24 2008-04-08 NULL NULL NULL
+242 val_242 2008-04-08 NULL NULL NULL
+242 val_242 2008-04-08 NULL NULL NULL
+244 val_244 2008-04-08 NULL NULL NULL
+248 val_248 2008-04-08 NULL NULL NULL
+255 val_255 2008-04-08 NULL NULL NULL
+255 val_255 2008-04-08 NULL NULL NULL
+257 val_257 2008-04-08 NULL NULL NULL
+26 val_26 2008-04-08 NULL NULL NULL
+26 val_26 2008-04-08 NULL NULL NULL
+260 val_260 2008-04-08 NULL NULL NULL
+262 val_262 2008-04-08 NULL NULL NULL
+266 val_266 2008-04-08 NULL NULL NULL
+273 val_273 2008-04-08 NULL NULL NULL
+273 val_273 2008-04-08 NULL NULL NULL
+273 val_273 2008-04-08 NULL NULL NULL
+275 val_275 2008-04-08 NULL NULL NULL
+277 val_277 2008-04-08 NULL NULL NULL
+277 val_277 2008-04-08 NULL NULL NULL
+277 val_277 2008-04-08 NULL NULL NULL
+277 val_277 2008-04-08 NULL NULL NULL
+28 val_28 2008-04-08 NULL NULL NULL
+280 val_280 2008-04-08 NULL NULL NULL
+280 val_280 2008-04-08 NULL NULL NULL
+282 val_282 2008-04-08 NULL NULL NULL
+282 val_282 2008-04-08 NULL NULL NULL
+284 val_284 2008-04-08 NULL NULL NULL
+286 val_286 2008-04-08 NULL NULL NULL
+288 val_288 2008-04-08 NULL NULL NULL
+288 val_288 2008-04-08 NULL NULL NULL
+291 val_291 2008-04-08 NULL NULL NULL
+305 val_305 2008-04-08 NULL NULL NULL
+307 val_307 2008-04-08 NULL NULL NULL
+307 val_307 2008-04-08 NULL NULL NULL
+309 val_309 2008-04-08 NULL NULL NULL
+309 val_309 2008-04-08 NULL NULL NULL
+310 val_310 2008-04-08 NULL NULL NULL
+316 val_316 2008-04-08 NULL NULL NULL
+316 val_316 2008-04-08 NULL NULL NULL
+316 val_316 2008-04-08 NULL NULL NULL
+318 val_318 2008-04-08 NULL NULL NULL
+318 val_318 2008-04-08 NULL NULL NULL
+318 val_318 2008-04-08 NULL NULL NULL
+321 val_321 2008-04-08 NULL NULL NULL
+321 val_321 2008-04-08 NULL NULL NULL
+323 val_323 2008-04-08 NULL NULL NULL
+325 val_325 2008-04-08 NULL NULL NULL
+325 val_325 2008-04-08 NULL NULL NULL
+327 val_327 2008-04-08 NULL NULL NULL
+327 val_327 2008-04-08 NULL NULL NULL
+327 val_327 2008-04-08 NULL NULL NULL
+33 val_33 2008-04-08 NULL NULL NULL
+332 val_332 2008-04-08 NULL NULL NULL
+336 val_336 2008-04-08 NULL NULL NULL
+338 val_338 2008-04-08 NULL NULL NULL
+341 val_341 2008-04-08 NULL NULL NULL
+345 val_345 2008-04-08 NULL NULL NULL
+35 val_35 2008-04-08 NULL NULL NULL
+35 val_35 2008-04-08 NULL NULL NULL
+35 val_35 2008-04-08 NULL NULL NULL
+356 val_356 2008-04-08 NULL NULL NULL
+365 val_365 2008-04-08 NULL NULL NULL
+367 val_367 2008-04-08 NULL NULL NULL
+367 val_367 2008-04-08 NULL NULL NULL
+369 val_369 2008-04-08 NULL NULL NULL
+369 val_369 2008-04-08 NULL NULL NULL
+369 val_369 2008-04-08 NULL NULL NULL
+37 val_37 2008-04-08 NULL NULL NULL
+37 val_37 2008-04-08 NULL NULL NULL
+374 val_374 2008-04-08 NULL NULL NULL
+378 val_378 2008-04-08 NULL NULL NULL
+389 val_389 2008-04-08 NULL NULL NULL
+392 val_392 2008-04-08 NULL NULL NULL
+394 val_394 2008-04-08 NULL NULL NULL
+396 val_396 2008-04-08 NULL NULL NULL
+396 val_396 2008-04-08 NULL NULL NULL
+396 val_396 2008-04-08 NULL NULL NULL
+4 val_4 2008-04-08 NULL NULL NULL
+400 val_400 2008-04-08 NULL NULL NULL
+402 val_402 2008-04-08 NULL NULL NULL
+404 val_404 2008-04-08 NULL NULL NULL
+404 val_404 2008-04-08 NULL NULL NULL
+406 val_406 2008-04-08 NULL NULL NULL
+406 val_406 2008-04-08 NULL NULL NULL
+406 val_406 2008-04-08 NULL NULL NULL
+406 val_406 2008-04-08 NULL NULL NULL
+411 val_411 2008-04-08 NULL NULL NULL
+413 val_413 2008-04-08 NULL NULL NULL
+413 val_413 2008-04-08 NULL NULL NULL
+417 val_417 2008-04-08 NULL NULL NULL
+417 val_417 2008-04-08 NULL NULL NULL
+417 val_417 2008-04-08 NULL NULL NULL
+419 val_419 2008-04-08 NULL NULL NULL
+42 val_42 2008-04-08 NULL NULL NULL
+42 val_42 2008-04-08 NULL NULL NULL
+424 val_424 2008-04-08 NULL NULL NULL
+424 val_424 2008-04-08 NULL NULL NULL
+431 val_431 2008-04-08 NULL NULL NULL
+431 val_431 2008-04-08 NULL NULL NULL
+431 val_431 2008-04-08 NULL NULL NULL
+435 val_435 2008-04-08 NULL NULL NULL
+437 val_437 2008-04-08 NULL NULL NULL
+439 val_439 2008-04-08 NULL NULL NULL
+439 val_439 2008-04-08 NULL NULL NULL
+44 val_44 2008-04-08 NULL NULL NULL
+444 val_444 2008-04-08 NULL NULL NULL
+446 val_446 2008-04-08 NULL NULL NULL
+448 val_448 2008-04-08 NULL NULL NULL
+453 val_453 2008-04-08 NULL NULL NULL
+455 val_455 2008-04-08 NULL NULL NULL
+457 val_457 2008-04-08 NULL NULL NULL
+459 val_459 2008-04-08 NULL NULL NULL
+459 val_459 2008-04-08 NULL NULL NULL
+460 val_460 2008-04-08 NULL NULL NULL
+462 val_462 2008-04-08 NULL NULL NULL
+462 val_462 2008-04-08 NULL NULL NULL
+466 val_466 2008-04-08 NULL NULL NULL
+466 val_466 2008-04-08 NULL NULL NULL
+466 val_466 2008-04-08 NULL NULL NULL
+468 val_468 2008-04-08 NULL NULL NULL
+468 val_468 2008-04-08 NULL NULL NULL
+468 val_468 2008-04-08 NULL NULL NULL
+468 val_468 2008-04-08 NULL NULL NULL
+475 val_475 2008-04-08 NULL NULL NULL
+477 val_477 2008-04-08 NULL NULL NULL
+479 val_479 2008-04-08 NULL NULL NULL
+480 val_480 2008-04-08 NULL NULL NULL
+480 val_480 2008-04-08 NULL NULL NULL
+480 val_480 2008-04-08 NULL NULL NULL
+482 val_482 2008-04-08 NULL NULL NULL
+484 val_484 2008-04-08 NULL NULL NULL
+491 val_491 2008-04-08 NULL NULL NULL
+493 val_493 2008-04-08 NULL NULL NULL
+495 val_495 2008-04-08 NULL NULL NULL
+497 val_497 2008-04-08 NULL NULL NULL
+51 val_51 2008-04-08 NULL NULL NULL
+51 val_51 2008-04-08 NULL NULL NULL
+53 val_53 2008-04-08 NULL NULL NULL
+57 val_57 2008-04-08 NULL NULL NULL
+64 val_64 2008-04-08 NULL NULL NULL
+66 val_66 2008-04-08 NULL NULL NULL
+77 val_77 2008-04-08 NULL NULL NULL
+8 val_8 2008-04-08 NULL NULL NULL
+80 val_80 2008-04-08 NULL NULL NULL
+82 val_82 2008-04-08 NULL NULL NULL
+84 val_84 2008-04-08 NULL NULL NULL
+84 val_84 2008-04-08 NULL NULL NULL
+86 val_86 2008-04-08 NULL NULL NULL
+95 val_95 2008-04-08 NULL NULL NULL
+95 val_95 2008-04-08 NULL NULL NULL
+97 val_97 2008-04-08 NULL NULL NULL
+97 val_97 2008-04-08 NULL NULL NULL
+PREHOOK: query: explain
+select count(*) from tab s1 left outer join tab s2 on s1.key=s2.key join empty s3 on s1.key = s3.key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from tab s1 left outer join tab s2 on s1.key=s2.key join empty s3 on s1.key = s3.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: s2
+ Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
+ Map Operator Tree:
+ TableScan
+ alias: s3
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Map Operator Tree:
+ TableScan
+ alias: s1
+ Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
+ Merge Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ Inner Join 0 to 2
+ keys:
+ 0 key (type: int)
+ 1 key (type: int)
+ 2 key (type: int)
+ Statistics: Num rows: 532 Data size: 5645 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from tab s1 left outer join tab s2 on s1.key=s2.key join empty s3 on s1.key = s3.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@empty
+PREHOOK: Input: default@tab
+PREHOOK: Input: default@tab@ds=2008-04-08
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from tab s1 left outer join tab s2 on s1.key=s2.key join empty s3 on s1.key = s3.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@empty
+POSTHOOK: Input: default@tab
+POSTHOOK: Input: default@tab@ds=2008-04-08
+#### A masked pattern was here ####
+0
+PREHOOK: query: explain
+select count(*) from tab s1 left outer join empty s2 on s1.key=s2.key join tab s3 on s1.key = s3.key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from tab s1 left outer join empty s2 on s1.key=s2.key join tab s3 on s1.key = s3.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: s2
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Map Operator Tree:
+ TableScan
+ alias: s3
+ Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
+ Map Operator Tree:
+ TableScan
+ alias: s1
+ Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
+ Merge Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ Inner Join 0 to 2
+ keys:
+ 0 key (type: int)
+ 1 key (type: int)
+ 2 key (type: int)
+ Statistics: Num rows: 532 Data size: 5645 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from tab s1 left outer join empty s2 on s1.key=s2.key join tab s3 on s1.key = s3.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@empty
+PREHOOK: Input: default@tab
+PREHOOK: Input: default@tab@ds=2008-04-08
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from tab s1 left outer join empty s2 on s1.key=s2.key join tab s3 on s1.key = s3.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@empty
+POSTHOOK: Input: default@tab
+POSTHOOK: Input: default@tab@ds=2008-04-08
+#### A masked pattern was here ####
+480
[2/2] hive git commit: HIVE-12249: Improve logging with tez (Vikram
Dixit K, reviewed by Sergey Shelukhin)
Posted by vi...@apache.org.
HIVE-12249: Improve logging with tez (Vikram Dixit K, reviewed by Sergey Shelukhin)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/d4df2aee
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/d4df2aee
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/d4df2aee
Branch: refs/heads/master
Commit: d4df2aee19e262898676d9cdfae5c8c07097d19b
Parents: f958de0
Author: vikram <vi...@hortonworks.com>
Authored: Fri Oct 30 14:55:05 2015 -0700
Committer: vikram <vi...@hortonworks.com>
Committed: Fri Oct 30 14:55:05 2015 -0700
----------------------------------------------------------------------
.../org/apache/hadoop/hive/cli/CliDriver.java | 7 +++++++
.../org/apache/hadoop/hive/conf/HiveConf.java | 20 ++++++++++++++++++-
.../apache/hadoop/hive/ql/exec/tez/TezTask.java | 9 +++++++++
.../apache/hadoop/hive/ql/hooks/ATSHook.java | 21 ++++++++++++++------
ql/src/test/queries/clientpositive/mrr.q | 2 ++
.../service/cli/session/HiveSessionImpl.java | 21 ++++++++++++++++++++
6 files changed, 73 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/d4df2aee/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java
----------------------------------------------------------------------
diff --git a/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java b/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java
index 30ec14b..82d064d 100644
--- a/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java
+++ b/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java
@@ -101,6 +101,7 @@ public class CliDriver {
private final LogHelper console;
protected ConsoleReader reader;
private Configuration conf;
+ private final String originalThreadName;
public CliDriver() {
SessionState ss = SessionState.get();
@@ -108,11 +109,15 @@ public class CliDriver {
Logger LOG = LoggerFactory.getLogger("CliDriver");
LOG.debug("CliDriver inited with classpath {}", System.getProperty("java.class.path"));
console = new LogHelper(LOG);
+ originalThreadName = Thread.currentThread().getName();
}
public int processCmd(String cmd) {
CliSessionState ss = (CliSessionState) SessionState.get();
ss.setLastCommand(cmd);
+
+ String callerInfo = ss.getConf().getLogIdVar(ss.getSessionId());
+ Thread.currentThread().setName(callerInfo + " " + originalThreadName);
// Flush the print stream, so it doesn't include output from the last command
ss.err.flush();
String cmd_trimmed = cmd.trim();
@@ -182,6 +187,7 @@ public class CliDriver {
}
}
+ Thread.currentThread().setName(originalThreadName);
return ret;
}
@@ -698,6 +704,7 @@ public class CliDriver {
SessionState.start(ss);
}
+ Thread.currentThread().setName(conf.getLogIdVar(ss.getSessionId()) + " " + originalThreadName);
// execute cli driver work
try {
return executeDriver(ss, conf, oproc);
http://git-wip-us.apache.org/repos/asf/hive/blob/d4df2aee/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 936ef54..a55e962 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -83,6 +83,7 @@ public class HiveConf extends Configuration {
private Pattern modWhiteListPattern = null;
private volatile boolean isSparkConfigUpdated = false;
+ private static final int LOG_PREFIX_LENGTH = 64;
public boolean getSparkConfigUpdated() {
return isSparkConfigUpdated;
@@ -2392,7 +2393,10 @@ public class HiveConf extends Configuration {
HIVE_TEZ_ENABLE_MEMORY_MANAGER("hive.tez.enable.memory.manager", true,
"Enable memory manager for tez"),
HIVE_HASH_TABLE_INFLATION_FACTOR("hive.hash.table.inflation.factor", (float) 2.0,
- "Expected inflation factor between disk/in memory representation of hash tables");
+ "Expected inflation factor between disk/in memory representation of hash tables"),
+ HIVE_LOG_TRACE_ID("hive.log.trace.id", "",
+ "Log tracing id that can be used by upstream clients for tracking respective logs. " +
+ "Truncated to " + LOG_PREFIX_LENGTH + " characters. Defaults to use auto-generated session id.");
public final String varname;
@@ -2838,6 +2842,20 @@ public class HiveConf extends Configuration {
return conf.get(var.varname, defaultVal);
}
+ public String getLogIdVar(String defaultValue) {
+ String retval = getVar(ConfVars.HIVE_LOG_TRACE_ID);
+ if (retval.equals("")) {
+ l4j.info("Using the default value passed in for log id: " + defaultValue);
+ retval = defaultValue;
+ }
+ if (retval.length() > LOG_PREFIX_LENGTH) {
+ l4j.warn("The original log id prefix is " + retval + " has been truncated to "
+ + retval.substring(0, LOG_PREFIX_LENGTH - 1));
+ retval = retval.substring(0, LOG_PREFIX_LENGTH - 1);
+ }
+ return retval;
+ }
+
public static void setVar(Configuration conf, ConfVars var, String val) {
assert (var.valClass == String.class) : var.varname;
conf.set(var.varname, val);
http://git-wip-us.apache.org/repos/asf/hive/blob/d4df2aee/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java
index 032a9e6..a6d911d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java
@@ -54,6 +54,7 @@ import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.yarn.api.records.LocalResource;
import org.apache.hadoop.yarn.api.records.LocalResourceType;
+import org.apache.tez.client.CallerContext;
import org.apache.tez.client.TezClient;
import org.apache.tez.common.counters.CounterGroup;
import org.apache.tez.common.counters.TezCounter;
@@ -163,6 +164,14 @@ public class TezTask extends Task<TezWork> {
// next we translate the TezWork to a Tez DAG
DAG dag = build(jobConf, work, scratchDir, appJarLr, additionalLr, ctx);
+ if (driverContext.getCtx() == null) {
+ boolean a = false;
+ }
+ CallerContext callerContext = CallerContext.create("HIVE",
+ conf.getLogIdVar(SessionState.get().getSessionId()) + " "
+ + conf.getVar(HiveConf.ConfVars.HIVEQUERYID),
+ "HIVE_QUERY_ID", queryPlan.getQueryStr());
+ dag.setCallerContext(callerContext);
// Add the extra resources to the dag
addExtraResourcesToDag(session, dag, inputOutputJars, inputOutputLocalResources);
http://git-wip-us.apache.org/repos/asf/hive/blob/d4df2aee/ql/src/java/org/apache/hadoop/hive/ql/hooks/ATSHook.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/hooks/ATSHook.java b/ql/src/java/org/apache/hadoop/hive/ql/hooks/ATSHook.java
index 5610fab..38b6b5d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/hooks/ATSHook.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/hooks/ATSHook.java
@@ -26,11 +26,13 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.ql.QueryPlan;
import org.apache.hadoop.hive.ql.exec.ExplainTask;
import org.apache.hadoop.hive.ql.exec.TaskFactory;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.plan.ExplainWork;
+import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.yarn.api.records.timeline.TimelineEntity;
import org.apache.hadoop.yarn.api.records.timeline.TimelineEvent;
@@ -52,7 +54,10 @@ public class ATSHook implements ExecuteWithHookContext {
private static TimelineClient timelineClient;
private enum EntityTypes { HIVE_QUERY_ID };
private enum EventTypes { QUERY_SUBMITTED, QUERY_COMPLETED };
- private enum OtherInfoTypes { QUERY, STATUS, TEZ, MAPRED };
+
+ private enum OtherInfoTypes {
+ QUERY, STATUS, TEZ, MAPRED, INVOKER_INFO, THREAD_NAME
+ };
private enum PrimaryFilterTypes { user, requestuser, operationid };
private static final int WAIT_TIME = 3;
@@ -104,7 +109,7 @@ public class ATSHook implements ExecuteWithHookContext {
String user = hookContext.getUgi().getUserName();
String requestuser = hookContext.getUserName();
if (hookContext.getUserName() == null ){
- requestuser = hookContext.getUgi().getUserName() ;
+ requestuser = hookContext.getUgi().getUserName() ;
}
int numMrJobs = Utilities.getMRTasks(plan.getRootTasks()).size();
int numTezJobs = Utilities.getTezTasks(plan.getRootTasks()).size();
@@ -133,8 +138,9 @@ public class ATSHook implements ExecuteWithHookContext {
explain.initialize(conf, plan, null);
String query = plan.getQueryStr();
JSONObject explainPlan = explain.getJSONPlan(null, work);
- fireAndForget(conf, createPreHookEvent(queryId, query,
- explainPlan, queryStartTime, user, requestuser, numMrJobs, numTezJobs, opId));
+ String logID = conf.getLogIdVar(SessionState.get().getSessionId());
+ fireAndForget(conf, createPreHookEvent(queryId, query, explainPlan, queryStartTime,
+ user, requestuser, numMrJobs, numTezJobs, opId, logID));
break;
case POST_EXEC_HOOK:
fireAndForget(conf, createPostHookEvent(queryId, currentTime, user, requestuser, true, opId));
@@ -154,7 +160,8 @@ public class ATSHook implements ExecuteWithHookContext {
}
TimelineEntity createPreHookEvent(String queryId, String query, JSONObject explainPlan,
- long startTime, String user, String requestuser, int numMrJobs, int numTezJobs, String opId) throws Exception {
+ long startTime, String user, String requestuser, int numMrJobs, int numTezJobs, String opId,
+ String logID) throws Exception {
JSONObject queryObj = new JSONObject(new LinkedHashMap<>());
queryObj.put("queryText", query);
@@ -171,7 +178,7 @@ public class ATSHook implements ExecuteWithHookContext {
atsEntity.setEntityType(EntityTypes.HIVE_QUERY_ID.name());
atsEntity.addPrimaryFilter(PrimaryFilterTypes.user.name(), user);
atsEntity.addPrimaryFilter(PrimaryFilterTypes.requestuser.name(), requestuser);
-
+
if (opId != null) {
atsEntity.addPrimaryFilter(PrimaryFilterTypes.operationid.name(), opId);
}
@@ -184,6 +191,8 @@ public class ATSHook implements ExecuteWithHookContext {
atsEntity.addOtherInfo(OtherInfoTypes.QUERY.name(), queryObj.toString());
atsEntity.addOtherInfo(OtherInfoTypes.TEZ.name(), numTezJobs > 0);
atsEntity.addOtherInfo(OtherInfoTypes.MAPRED.name(), numMrJobs > 0);
+ atsEntity.addOtherInfo(OtherInfoTypes.INVOKER_INFO.name(), logID);
+ atsEntity.addOtherInfo(OtherInfoTypes.THREAD_NAME.name(), Thread.currentThread().getName());
return atsEntity;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/d4df2aee/ql/src/test/queries/clientpositive/mrr.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/mrr.q b/ql/src/test/queries/clientpositive/mrr.q
index 2ced4db..a8eddaf 100644
--- a/ql/src/test/queries/clientpositive/mrr.q
+++ b/ql/src/test/queries/clientpositive/mrr.q
@@ -1,5 +1,6 @@
set hive.explain.user=false;
set hive.auto.convert.join.noconditionaltask.size=60000000;
+set hive.log.trace.id=mrrTest;
-- simple query with multiple reduce stages
-- SORT_QUERY_RESULTS
@@ -50,6 +51,7 @@ WHERE
s1.cnt > 1
ORDER BY s1.key;
+set hive.log.trace.id=Test2;
set hive.auto.convert.join=true;
-- query with broadcast join in the reduce stage
EXPLAIN
http://git-wip-us.apache.org/repos/asf/hive/blob/d4df2aee/service/src/java/org/apache/hive/service/cli/session/HiveSessionImpl.java
----------------------------------------------------------------------
diff --git a/service/src/java/org/apache/hive/service/cli/session/HiveSessionImpl.java b/service/src/java/org/apache/hive/service/cli/session/HiveSessionImpl.java
index 50e938e..27d11df 100644
--- a/service/src/java/org/apache/hive/service/cli/session/HiveSessionImpl.java
+++ b/service/src/java/org/apache/hive/service/cli/session/HiveSessionImpl.java
@@ -310,6 +310,11 @@ public class HiveSessionImpl implements HiveSession {
if (userAccess) {
lastAccessTime = System.currentTimeMillis();
}
+ // set the thread name with the logging prefix.
+ String logPrefix = getHiveConf().getLogIdVar(sessionState.getSessionId());
+ LOG.info(
+ "Prefixing the thread name (" + Thread.currentThread().getName() + ") with " + logPrefix);
+ Thread.currentThread().setName(logPrefix + Thread.currentThread().getName());
Hive.set(sessionHive);
}
@@ -321,6 +326,22 @@ public class HiveSessionImpl implements HiveSession {
* @see org.apache.hive.service.server.ThreadWithGarbageCleanup#finalize()
*/
protected synchronized void release(boolean userAccess) {
+ if (sessionState != null) {
+ // can be null in-case of junit tests. skip reset.
+ // reset thread name at release time.
+ String[] names = Thread.currentThread().getName()
+ .split(getHiveConf().getLogIdVar(sessionState.getSessionId()));
+ String threadName = null;
+ if (names.length > 1) {
+ threadName = names[names.length - 1];
+ } else if (names.length == 1) {
+ threadName = names[0];
+ } else {
+ threadName = "";
+ }
+ Thread.currentThread().setName(threadName);
+ }
+
SessionState.detachSession();
if (ThreadWithGarbageCleanup.currentThread() instanceof ThreadWithGarbageCleanup) {
ThreadWithGarbageCleanup currentThread =