You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by gu...@apache.org on 2014/09/23 20:17:32 UTC

svn commit: r1627109 [1/2] - in /hive/trunk: itests/src/test/resources/ ql/src/java/org/apache/hadoop/hive/ql/exec/ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/ ql/src/test/queries/clientp...

Author: gunther
Date: Tue Sep 23 18:17:31 2014
New Revision: 1627109

URL: http://svn.apache.org/r1627109
Log:
HIVE-8163: With dynamic partition pruning map operator that generates the partition filters is not vectorized (Gunther Hagleitner, reviewed by Ashutosh Chauhan)

Added:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAppMasterEventOperator.java
    hive/trunk/ql/src/test/queries/clientpositive/vectorized_dynamic_partition_pruning.q
    hive/trunk/ql/src/test/results/clientpositive/tez/vectorized_dynamic_partition_pruning.q.out
Modified:
    hive/trunk/itests/src/test/resources/testconfiguration.properties
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/AppMasterEventOperator.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java

Modified: hive/trunk/itests/src/test/resources/testconfiguration.properties
URL: http://svn.apache.org/viewvc/hive/trunk/itests/src/test/resources/testconfiguration.properties?rev=1627109&r1=1627108&r2=1627109&view=diff
==============================================================================
--- hive/trunk/itests/src/test/resources/testconfiguration.properties (original)
+++ hive/trunk/itests/src/test/resources/testconfiguration.properties Tue Sep 23 18:17:31 2014
@@ -187,7 +187,8 @@ minitez.query.files=bucket_map_join_tez1
   tez_joins_explain.q,\
   tez_schema_evolution.q,\
   tez_union.q,\
-  tez_union_decimal.q
+  tez_union_decimal.q,\
+  vectorized_dynamic_partition_pruning.q
 
 beeline.positive.exclude=add_part_exist.q,\
   alter1.q,\

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/AppMasterEventOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/AppMasterEventOperator.java?rev=1627109&r1=1627108&r2=1627109&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/AppMasterEventOperator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/AppMasterEventOperator.java Tue Sep 23 18:17:31 2014
@@ -44,10 +44,10 @@ import org.apache.tez.runtime.api.events
 @SuppressWarnings({ "deprecation", "serial" })
 public class AppMasterEventOperator extends Operator<AppMasterEventDesc> {
 
-  private transient Serializer serializer;
-  private transient DataOutputBuffer buffer;
-  private transient boolean hasReachedMaxSize = false;
-  private transient long MAX_SIZE;
+  protected transient Serializer serializer;
+  protected transient DataOutputBuffer buffer;
+  protected transient boolean hasReachedMaxSize = false;
+  protected transient long MAX_SIZE;
 
   @Override
   public void initializeOp(Configuration hconf) throws HiveException {
@@ -57,7 +57,7 @@ public class AppMasterEventOperator exte
     initDataBuffer(false);
   }
 
-  private void initDataBuffer(boolean skipPruning) throws HiveException {
+  protected void initDataBuffer(boolean skipPruning) throws HiveException {
     buffer = new DataOutputBuffer();
     try {
       // where does this go to?

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java?rev=1627109&r1=1627108&r2=1627109&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java Tue Sep 23 18:17:31 2014
@@ -18,6 +18,7 @@
 
 package org.apache.hadoop.hive.ql.exec;
 
+import org.apache.hadoop.hive.ql.exec.vector.VectorAppMasterEventOperator;
 import org.apache.hadoop.hive.ql.exec.vector.VectorExtractOperator;
 import org.apache.hadoop.hive.ql.exec.vector.VectorFileSinkOperator;
 import org.apache.hadoop.hive.ql.exec.vector.VectorFilterOperator;
@@ -118,6 +119,10 @@ public final class OperatorFactory {
 
   static {
     vectorOpvec = new ArrayList<OpTuple>();
+    vectorOpvec.add(new OpTuple<AppMasterEventDesc>(AppMasterEventDesc.class,
+        VectorAppMasterEventOperator.class));
+    vectorOpvec.add(new OpTuple<DynamicPruningEventDesc>(DynamicPruningEventDesc.class,
+        VectorAppMasterEventOperator.class));
     vectorOpvec.add(new OpTuple<SelectDesc>(SelectDesc.class, VectorSelectOperator.class));
     vectorOpvec.add(new OpTuple<GroupByDesc>(GroupByDesc.class, VectorGroupByOperator.class));
     vectorOpvec.add(new OpTuple<MapJoinDesc>(MapJoinDesc.class, VectorMapJoinOperator.class));

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAppMasterEventOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAppMasterEventOperator.java?rev=1627109&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAppMasterEventOperator.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAppMasterEventOperator.java Tue Sep 23 18:17:31 2014
@@ -0,0 +1,128 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.ql.exec.AppMasterEventOperator;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriterFactory;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.AppMasterEventDesc;
+import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+import org.apache.hadoop.hive.common.StatsSetupConst;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.SerDeStats;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.io.ObjectWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+
+/**
+ * App Master Event operator implementation.
+ **/
+public class VectorAppMasterEventOperator extends AppMasterEventOperator {
+
+  private static final long serialVersionUID = 1L;
+
+  protected transient Object[] singleRow;
+
+  protected transient VectorExpressionWriter[] valueWriters;
+
+  public VectorAppMasterEventOperator(VectorizationContext context,
+      OperatorDesc conf) {
+    super();
+    this.conf = (AppMasterEventDesc) conf;
+  }
+
+  public VectorAppMasterEventOperator() {
+  }
+
+  @Override
+  public void initializeOp(Configuration hconf) throws HiveException {
+    super.initializeOp(hconf);
+    valueWriters = VectorExpressionWriterFactory.getExpressionWriters(
+        (StructObjectInspector) inputObjInspectors[0]);
+    singleRow = new Object[valueWriters.length];
+  }
+
+  @Override
+  public void processOp(Object data, int tag) throws HiveException {
+    
+    VectorizedRowBatch vrg = (VectorizedRowBatch) data;
+    
+    Writable [] records = null;
+    Writable recordValue = null;
+    boolean vectorizedSerde = false;
+
+    try {
+      if (serializer instanceof VectorizedSerde) {
+        recordValue = ((VectorizedSerde) serializer).serializeVector(vrg,
+            inputObjInspectors[0]);
+        records = (Writable[]) ((ObjectWritable) recordValue).get();
+        vectorizedSerde = true;
+      }
+    } catch (SerDeException e1) {
+      throw new HiveException(e1);
+    }
+    
+    for (int i = 0; i < vrg.size; i++) {
+      Writable row = null;
+      if (vectorizedSerde) {
+        row = records[i];
+      } else {
+        if (vrg.valueWriters == null) {
+          vrg.setValueWriters(this.valueWriters);
+        }
+        try {
+          row = serializer.serialize(getRowObject(vrg, i), inputObjInspectors[0]);
+        } catch (SerDeException ex) {
+          throw new HiveException(ex);
+        }
+      }
+      try {
+        row.write(buffer);
+        if (buffer.getLength() > MAX_SIZE) {
+          LOG.info("Disabling AM events. Buffer size too large: " + buffer.getLength());
+          hasReachedMaxSize = true;
+          buffer = null;
+        }
+      } catch (Exception e) {
+        throw new HiveException(e);
+      }
+    }
+  }
+
+  private Object[] getRowObject(VectorizedRowBatch vrg, int rowIndex)
+      throws HiveException {
+    int batchIndex = rowIndex;
+    if (vrg.selectedInUse) {
+      batchIndex = vrg.selected[rowIndex];
+    }
+    for (int i = 0; i < vrg.projectionSize; i++) {
+      ColumnVector vectorColumn = vrg.cols[vrg.projectedColumns[i]];
+      singleRow[i] = vrg.valueWriters[i].writeValue(vectorColumn, batchIndex);
+    }
+    return singleRow;
+  }
+}

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java?rev=1627109&r1=1627108&r2=1627109&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java Tue Sep 23 18:17:31 2014
@@ -817,6 +817,7 @@ public class Vectorizer implements Physi
         break;
       case FILESINK:
       case LIMIT:
+      case EVENT:
         ret = true;
         break;
       default:
@@ -862,6 +863,7 @@ public class Vectorizer implements Physi
         ret = validateFileSinkOperator((FileSinkOperator) op);
         break;
       case LIMIT:
+      case EVENT:
         ret = true;
         break;
       default:
@@ -1203,6 +1205,7 @@ public class Vectorizer implements Physi
       case REDUCESINK:
       case LIMIT:
       case EXTRACT:
+      case EVENT:
         vectorOp = OperatorFactory.getVectorOperator(op.getConf(), vContext);
         break;
       default:

Added: hive/trunk/ql/src/test/queries/clientpositive/vectorized_dynamic_partition_pruning.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/vectorized_dynamic_partition_pruning.q?rev=1627109&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/vectorized_dynamic_partition_pruning.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/vectorized_dynamic_partition_pruning.q Tue Sep 23 18:17:31 2014
@@ -0,0 +1,192 @@
+set hive.optimize.ppd=true;
+set hive.ppd.remove.duplicatefilters=true;
+set hive.tez.dynamic.partition.pruning=true;
+set hive.optimize.metadataonly=false;
+set hive.optimize.index.filter=true;
+set hive.vectorized.execution.enabled=true;
+
+
+select distinct ds from srcpart;
+select distinct hr from srcpart;
+
+EXPLAIN create table srcpart_date as select ds as ds, ds as date from srcpart group by ds;
+create table srcpart_date stored as orc as select ds as ds, ds as date from srcpart group by ds;
+create table srcpart_hour stored as orc as select hr as hr, hr as hour from srcpart group by hr;
+create table srcpart_date_hour stored as orc as select ds as ds, ds as date, hr as hr, hr as hour from srcpart group by ds, hr;
+create table srcpart_double_hour stored as orc as select (hr*2) as hr, hr as hour from srcpart group by hr;
+
+-- single column, single key
+EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08';
+select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08';
+set hive.tez.dynamic.partition.pruning=false;
+EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08';
+select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08';
+set hive.tez.dynamic.partition.pruning=true;
+select count(*) from srcpart where ds = '2008-04-08';
+
+-- multiple sources, single key
+EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) 
+where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11;
+select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) 
+where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11;
+set hive.tez.dynamic.partition.pruning=false;
+EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) 
+where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11;
+select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) 
+where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11;
+set hive.tez.dynamic.partition.pruning=true;
+select count(*) from srcpart where hr = 11 and ds = '2008-04-08';
+
+-- multiple columns single source
+EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11;
+select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11;
+set hive.tez.dynamic.partition.pruning=false;
+EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11;
+select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11;
+set hive.tez.dynamic.partition.pruning=true;
+select count(*) from srcpart where ds = '2008-04-08' and hr = 11;
+
+-- empty set
+EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = 'I DONT EXIST';
+select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = 'I DONT EXIST';
+set hive.tez.dynamic.partition.pruning=false;
+EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = 'I DONT EXIST';
+select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = 'I DONT EXIST';
+set hive.tez.dynamic.partition.pruning=true;
+select count(*) from srcpart where ds = 'I DONT EXIST';
+
+-- expressions
+EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11;
+select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11;
+EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11;
+select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11;
+set hive.tez.dynamic.partition.pruning=false;
+EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11;
+select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11;
+EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11;
+select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11;
+set hive.tez.dynamic.partition.pruning=true;
+select count(*) from srcpart where hr = 11;
+EXPLAIN select count(*) from srcpart join srcpart_double_hour on (cast(srcpart.hr*2 as string) = cast(srcpart_double_hour.hr as string)) where srcpart_double_hour.hour = 11;
+select count(*) from srcpart join srcpart_double_hour on (cast(srcpart.hr*2 as string) = cast(srcpart_double_hour.hr as string)) where srcpart_double_hour.hour = 11;
+set hive.tez.dynamic.partition.pruning=true;
+select count(*) from srcpart where cast(hr as string) = 11;
+
+
+-- parent is reduce tasks
+EXPLAIN select count(*) from srcpart join (select ds as ds, ds as date from srcpart group by ds) s on (srcpart.ds = s.ds) where s.date = '2008-04-08';
+select count(*) from srcpart join (select ds as ds, ds as date from srcpart group by ds) s on (srcpart.ds = s.ds) where s.date = '2008-04-08';
+select count(*) from srcpart where ds = '2008-04-08';
+
+-- non-equi join
+EXPLAIN select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr);
+select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr);
+
+-- old style join syntax
+EXPLAIN select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr;
+select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr;
+
+-- left join
+EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08';
+EXPLAIN select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08';
+
+-- full outer
+EXPLAIN select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08';
+
+-- with static pruning
+EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) 
+where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11;
+select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) 
+where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11;
+EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) 
+where srcpart_date.date = '2008-04-08' and srcpart.hr = 13;
+select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) 
+where srcpart_date.date = '2008-04-08' and srcpart.hr = 13;
+
+-- union + subquery
+EXPLAIN select count(*) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart);
+select count(*) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart);
+EXPLAIN select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart);
+select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart);
+EXPLAIN select ds from (select distinct(ds) as ds from srcpart union all select distinct(ds) as ds from srcpart) s where s.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart);
+select ds from (select distinct(ds) as ds from srcpart union all select distinct(ds) as ds from srcpart) s where s.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart);
+
+set hive.auto.convert.join=true;
+set hive.auto.convert.join.noconditionaltask = true;
+set hive.auto.convert.join.noconditionaltask.size = 10000000;
+
+-- single column, single key
+EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08';
+select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08';
+select count(*) from srcpart where ds = '2008-04-08';
+
+-- multiple sources, single key
+EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) 
+where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11;
+select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) 
+where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11;
+select count(*) from srcpart where hr = 11 and ds = '2008-04-08';
+
+-- multiple columns single source
+EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11;
+select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11;
+select count(*) from srcpart where ds = '2008-04-08' and hr = 11;
+
+-- empty set
+EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = 'I DONT EXIST';
+-- Disabled until TEZ-1486 is fixed
+-- select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = 'I DONT EXIST';
+
+-- expressions
+EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11;
+select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11;
+EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11;
+select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11;
+select count(*) from srcpart where hr = 11;
+
+-- parent is reduce tasks
+EXPLAIN select count(*) from srcpart join (select ds as ds, ds as date from srcpart group by ds) s on (srcpart.ds = s.ds) where s.date = '2008-04-08';
+select count(*) from srcpart join (select ds as ds, ds as date from srcpart group by ds) s on (srcpart.ds = s.ds) where s.date = '2008-04-08';
+select count(*) from srcpart where ds = '2008-04-08';
+
+-- left join
+EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08';
+EXPLAIN select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08';
+
+-- full outer
+EXPLAIN select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08';
+
+-- with static pruning
+EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) 
+where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11;
+select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) 
+where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11;
+EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) 
+where srcpart_date.date = '2008-04-08' and srcpart.hr = 13;
+-- Disabled until TEZ-1486 is fixed
+-- select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) 
+-- where srcpart_date.date = '2008-04-08' and srcpart.hr = 13;
+
+-- union + subquery
+EXPLAIN select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart);
+select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart);
+
+
+-- different file format
+create table srcpart_orc (key int, value string) partitioned by (ds string, hr int) stored as orc;
+
+
+set hive.exec.dynamic.partition.mode=nonstrict;
+set hive.vectorized.execution.enabled=false;
+set hive.exec.max.dynamic.partitions=1000;
+
+insert into table srcpart_orc partition (ds, hr) select key, value, ds, hr from srcpart;
+EXPLAIN select count(*) from srcpart_orc join srcpart_date_hour on (srcpart_orc.ds = srcpart_date_hour.ds and srcpart_orc.hr = srcpart_date_hour.hr) where srcpart_date_hour.hour = 11 and (srcpart_date_hour.date = '2008-04-08' or srcpart_date_hour.date = '2008-04-09');
+select count(*) from srcpart_orc join srcpart_date_hour on (srcpart_orc.ds = srcpart_date_hour.ds and srcpart_orc.hr = srcpart_date_hour.hr) where srcpart_date_hour.hour = 11 and (srcpart_date_hour.date = '2008-04-08' or srcpart_date_hour.date = '2008-04-09');
+select count(*) from srcpart where (ds = '2008-04-08' or ds = '2008-04-09') and hr = 11;
+
+drop table srcpart_orc;
+drop table srcpart_date;
+drop table srcpart_hour;
+drop table srcpart_date_hour;
+drop table srcpart_double_hour;