You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@hive.apache.org by pr...@apache.org on 2015/08/28 20:11:35 UTC

[01/12] hive git commit: HIVE-11638: ExprNodeDesc hashMap accidentally degrades into O(N) instead of O(1) (Gopal V, reviewed by Jesus Camacho Rodriguez)

Repository: hive
Updated Branches:
  refs/heads/llap 387fed279 -> 025765382


HIVE-11638: ExprNodeDesc hashMap accidentally degrades into O(N) instead of O(1) (Gopal V, reviewed by Jesus Camacho Rodriguez)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/037fb02a
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/037fb02a
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/037fb02a

Branch: refs/heads/llap
Commit: 037fb02a8edc1266d0beb02eee1fb90737fc1ef7
Parents: bb7153f
Author: Gopal V <go...@apache.org>
Authored: Thu Aug 27 02:27:35 2015 +0200
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Thu Aug 27 02:27:35 2015 +0200

----------------------------------------------------------------------
 .../java/org/apache/hadoop/hive/ql/parse/LeadLagInfo.java   | 4 ++--
 .../java/org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java   | 8 ++++++++
 .../java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java  | 9 +++++----
 3 files changed, 15 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/037fb02a/ql/src/java/org/apache/hadoop/hive/ql/parse/LeadLagInfo.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/LeadLagInfo.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/LeadLagInfo.java
index 366b74b..6a81170 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/LeadLagInfo.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/LeadLagInfo.java
@@ -19,7 +19,7 @@
 package org.apache.hadoop.hive.ql.parse;
 
 import java.util.ArrayList;
-import java.util.HashMap;
+import java.util.IdentityHashMap;
 import java.util.List;
 import java.util.Map;
 
@@ -53,7 +53,7 @@ public class LeadLagInfo {
   public void addLLFuncExprForTopExpr(ExprNodeDesc topExpr, ExprNodeGenericFuncDesc llFuncExpr) {
     addLeadLagExpr(llFuncExpr);
     mapTopExprToLLFunExprs = mapTopExprToLLFunExprs == null ?
-        new HashMap<ExprNodeDesc, List<ExprNodeGenericFuncDesc>>() : mapTopExprToLLFunExprs;
+        new IdentityHashMap<ExprNodeDesc, List<ExprNodeGenericFuncDesc>>() : mapTopExprToLLFunExprs;
     List<ExprNodeGenericFuncDesc> funcList = mapTopExprToLLFunExprs.get(topExpr);
     if (funcList == null) {
       funcList = new ArrayList<ExprNodeGenericFuncDesc>();

http://git-wip-us.apache.org/repos/asf/hive/blob/037fb02a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java
index 15267b9..328bd86 100755
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java
@@ -60,6 +60,14 @@ public abstract class ExprNodeDesc implements Serializable, Node {
     return typeInfo.hashCode();
   }
 
+  @Override
+  public final boolean equals(Object o) {
+    // prevent equals from being overridden in sub-classes
+    // always use ExprNodeDescEqualityWrapper
+    // if you need any other equality than Object.equals()
+    return (o == this);
+  }
+
   public TypeInfo getTypeInfo() {
     return typeInfo;
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/037fb02a/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java b/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java
index 33ad3e8..f23facf 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java
@@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.ppd;
 
 import java.util.ArrayList;
 import java.util.HashMap;
+import java.util.IdentityHashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
@@ -98,17 +99,17 @@ public class ExprWalkerInfo implements NodeProcessorCtx {
   public ExprWalkerInfo() {
     pushdownPreds = new HashMap<String, List<ExprNodeDesc>>();
     nonFinalPreds = new HashMap<String, List<ExprNodeDesc>>();
-    exprInfoMap = new HashMap<ExprNodeDesc, ExprInfo>();
-    newToOldExprMap = new HashMap<ExprNodeDesc, ExprNodeDesc>();
+    exprInfoMap = new IdentityHashMap<ExprNodeDesc, ExprInfo>();
+    newToOldExprMap = new IdentityHashMap<ExprNodeDesc, ExprNodeDesc>();
   }
 
   public ExprWalkerInfo(Operator<? extends OperatorDesc> op) {
     this.op = op;
 
     pushdownPreds = new HashMap<String, List<ExprNodeDesc>>();
-    exprInfoMap = new HashMap<ExprNodeDesc, ExprInfo>();
+    exprInfoMap = new IdentityHashMap<ExprNodeDesc, ExprInfo>();
     nonFinalPreds = new HashMap<String, List<ExprNodeDesc>>();
-    newToOldExprMap = new HashMap<ExprNodeDesc, ExprNodeDesc>();
+    newToOldExprMap = new IdentityHashMap<ExprNodeDesc, ExprNodeDesc>();
   }
 
   /**

[09/12] hive git commit: HIVE-10175: DynamicPartitionPruning lacks a fast-path exit for large IN() queries (Gopal V, reviewed by Jesus Camacho Rodriguez)

Posted by pr...@apache.org.

HIVE-10175: DynamicPartitionPruning lacks a fast-path exit for large IN() queries (Gopal V, reviewed by Jesus Camacho Rodriguez)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b6d1143a
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b6d1143a
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b6d1143a

Branch: refs/heads/llap
Commit: b6d1143aa7aaa20de035898f34df2d6b581895b6
Parents: d147a79
Author: Gopal V <go...@apache.org>
Authored: Fri Aug 28 01:22:45 2015 -0700
Committer: Gopal V <go...@apache.org>
Committed: Fri Aug 28 01:22:45 2015 -0700

----------------------------------------------------------------------
 .../optimizer/DynamicPartitionPruningOptimization.java  | 12 ++++++++++++
 1 file changed, 12 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/b6d1143a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
index f475926..5ebd28a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
@@ -189,6 +189,18 @@ public class DynamicPartitionPruningOptimization implements NodeProcessor {
       LOG.debug("TableScan: " + ts);
     }
 
+    if (ts == null) {
+      // could be a reduce sink
+      LOG.warn("Could not find the table scan for " + filter); 
+      return null;
+    } else {
+      Table table = ts.getConf().getTableMetadata();
+      if (table != null && !table.isPartitioned()) {
+        // table is not partitioned, skip optimizer
+        return null;
+      }
+    }
+
     // collect the dynamic pruning conditions
     removerContext.dynLists.clear();
     walkExprTree(desc.getPredicate(), removerContext);

[10/12] hive git commit: HIVE-11659: Make Vectorization use the fast StringExpr (Gopal V, reviewed by Matt McCline)

Posted by pr...@apache.org.

HIVE-11659: Make Vectorization use the fast StringExpr (Gopal V, reviewed by Matt McCline)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ce258168
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ce258168
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ce258168

Branch: refs/heads/llap
Commit: ce2581680f1c109ea0a43868e0345a15b06b41c8
Parents: b6d1143
Author: Gopal V <go...@apache.org>
Authored: Fri Aug 28 01:24:32 2015 -0700
Committer: Gopal V <go...@apache.org>
Committed: Fri Aug 28 01:24:32 2015 -0700

----------------------------------------------------------------------
 .../apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java  | 2 +-
 .../hadoop/hive/ql/exec/vector/expressions/CuckooSetBytes.java   | 2 +-
 .../vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java  | 4 ++--
 .../ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java | 4 ++--
 .../exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java | 4 ++--
 .../ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java | 4 ++--
 6 files changed, 10 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/ce258168/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java
index 626cea5..aff3551 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java
@@ -143,7 +143,7 @@ public class VectorHashKeyWrapper extends KeyWrapper {
     for (int i = 0; i < byteValues.length; ++i) {
       // the byte comparison is potentially expensive so is better to branch on null
       if (!isNull[longValues.length + doubleValues.length + i]) {
-        if (0 != StringExpr.compare(
+        if (!StringExpr.equal(
             byteValues[i],
             byteStarts[i],
             byteLengths[i],

http://git-wip-us.apache.org/repos/asf/hive/blob/ce258168/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CuckooSetBytes.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CuckooSetBytes.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CuckooSetBytes.java
index a21162b..6383e8a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CuckooSetBytes.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CuckooSetBytes.java
@@ -82,7 +82,7 @@ public class CuckooSetBytes {
   }
 
   private static boolean entryEqual(byte[][] t, int hash, byte[] b, int start, int len) {
-    return t[hash] != null && StringExpr.compare(t[hash], 0, t[hash].length, b, start, len) == 0;
+    return t[hash] != null && StringExpr.equal(t[hash], 0, t[hash].length, b, start, len);
   }
 
   public void insert(byte[] x) {

http://git-wip-us.apache.org/repos/asf/hive/blob/ce258168/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java
index 87a11c0..9f2d4c3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java
@@ -234,8 +234,8 @@ public class VectorMapJoinInnerBigOnlyStringOperator extends VectorMapJoinInnerB
            */
 
           if (!haveSaveKey ||
-              StringExpr.compare(vector[saveKeyBatchIndex], start[saveKeyBatchIndex], length[saveKeyBatchIndex],
-                                 vector[batchIndex], start[batchIndex], length[batchIndex]) != 0) {
+              StringExpr.equal(vector[saveKeyBatchIndex], start[saveKeyBatchIndex], length[saveKeyBatchIndex],
+                                 vector[batchIndex], start[batchIndex], length[batchIndex]) == false) {
 
             // New key.
 

http://git-wip-us.apache.org/repos/asf/hive/blob/ce258168/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java
index 9f10ff1..5a5d54f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java
@@ -229,8 +229,8 @@ public class VectorMapJoinInnerStringOperator extends VectorMapJoinInnerGenerate
            */
 
           if (!haveSaveKey ||
-              StringExpr.compare(vector[saveKeyBatchIndex], start[saveKeyBatchIndex], length[saveKeyBatchIndex],
-                                 vector[batchIndex], start[batchIndex], length[batchIndex]) != 0) {
+              StringExpr.equal(vector[saveKeyBatchIndex], start[saveKeyBatchIndex], length[saveKeyBatchIndex],
+                                 vector[batchIndex], start[batchIndex], length[batchIndex]) == false) {
 
             // New key.
 

http://git-wip-us.apache.org/repos/asf/hive/blob/ce258168/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java
index 9ff1141..e9ce739 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java
@@ -230,8 +230,8 @@ public class VectorMapJoinLeftSemiStringOperator extends VectorMapJoinLeftSemiGe
            */
 
           if (!haveSaveKey ||
-              StringExpr.compare(vector[saveKeyBatchIndex], start[saveKeyBatchIndex], length[saveKeyBatchIndex],
-                                 vector[batchIndex], start[batchIndex], length[batchIndex]) != 0) {
+              StringExpr.equal(vector[saveKeyBatchIndex], start[saveKeyBatchIndex], length[saveKeyBatchIndex],
+                                 vector[batchIndex], start[batchIndex], length[batchIndex]) == false) {
 
             // New key.
 

http://git-wip-us.apache.org/repos/asf/hive/blob/ce258168/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java
index 49efe1a..dfdd6d7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java
@@ -290,8 +290,8 @@ public class VectorMapJoinOuterStringOperator extends VectorMapJoinOuterGenerate
              */
 
             if (!haveSaveKey ||
-                StringExpr.compare(vector[saveKeyBatchIndex], start[saveKeyBatchIndex], length[saveKeyBatchIndex],
-                                   vector[batchIndex], start[batchIndex], length[batchIndex]) != 0) {
+                StringExpr.equal(vector[saveKeyBatchIndex], start[saveKeyBatchIndex], length[saveKeyBatchIndex],
+                                   vector[batchIndex], start[batchIndex], length[batchIndex]) == false) {
               // New key.
 
               if (haveSaveKey) {

[04/12] hive git commit: HIVE-11658: Load data file format validation does not work with directories (Prasanth Jayachandran reviewed by Gunther Hagleitner)

Posted by pr...@apache.org.

HIVE-11658: Load data file format validation does not work with directories (Prasanth Jayachandran reviewed by Gunther Hagleitner)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/9670a2b3
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/9670a2b3
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/9670a2b3

Branch: refs/heads/llap
Commit: 9670a2b3c35dfc3b9f61481b7ea8fcefbb01571c
Parents: b247cac
Author: Prasanth Jayachandran <j....@gmail.com>
Authored: Thu Aug 27 11:43:25 2015 -0500
Committer: Prasanth Jayachandran <j....@gmail.com>
Committed: Thu Aug 27 11:43:25 2015 -0500

----------------------------------------------------------------------
 .../hive/ql/parse/LoadSemanticAnalyzer.java     | 38 +++++++++++---------
 .../queries/clientnegative/load_orc_negative3.q |  6 ++++
 .../test/queries/clientpositive/load_orc_part.q |  4 +++
 .../clientnegative/load_orc_negative3.q.out     | 25 +++++++++++++
 .../results/clientpositive/load_orc_part.q.out  | 18 ++++++++++
 5 files changed, 75 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/9670a2b3/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
index 85fa9c9..9d2702f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
@@ -128,9 +128,11 @@ public class LoadSemanticAnalyzer extends BaseSemanticAnalyzer {
     return new URI(fromScheme, fromAuthority, path, null, null);
   }
 
-  private void applyConstraints(URI fromURI, URI toURI, Tree ast,
+  private FileStatus[] applyConstraintsAndGetFiles(URI fromURI, URI toURI, Tree ast,
       boolean isLocal) throws SemanticException {
 
+    FileStatus[] srcs = null;
+
     // local mode implies that scheme should be "file"
     // we can change this going forward
     if (isLocal && !fromURI.getScheme().equals("file")) {
@@ -139,7 +141,7 @@ public class LoadSemanticAnalyzer extends BaseSemanticAnalyzer {
     }
 
     try {
-      FileStatus[] srcs = matchFilesOrDir(FileSystem.get(fromURI, conf), new Path(fromURI));
+      srcs = matchFilesOrDir(FileSystem.get(fromURI, conf), new Path(fromURI));
       if (srcs == null || srcs.length == 0) {
         throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(ast,
             "No files matching path " + fromURI));
@@ -168,6 +170,8 @@ public class LoadSemanticAnalyzer extends BaseSemanticAnalyzer {
           + "\"hive.metastore.warehouse.dir\" do not conflict.";
       throw new SemanticException(ErrorMsg.ILLEGAL_PATH.getMsg(ast, reason));
     }
+
+    return srcs;
   }
 
   @Override
@@ -227,11 +231,11 @@ public class LoadSemanticAnalyzer extends BaseSemanticAnalyzer {
     }
 
     // make sure the arguments make sense
-    applyConstraints(fromURI, toURI, fromTree, isLocal);
+    FileStatus[] files = applyConstraintsAndGetFiles(fromURI, toURI, fromTree, isLocal);
 
     // for managed tables, make sure the file formats match
     if (TableType.MANAGED_TABLE.equals(ts.tableHandle.getTableType())) {
-      ensureFileFormatsMatch(ts, fromURI);
+      ensureFileFormatsMatch(ts, files);
     }
     inputs.add(toReadEntity(new Path(fromURI)));
     Task<? extends Serializable> rTask = null;
@@ -325,7 +329,7 @@ public class LoadSemanticAnalyzer extends BaseSemanticAnalyzer {
     }
   }
 
-  private void ensureFileFormatsMatch(TableSpec ts, URI fromURI) throws SemanticException {
+  private void ensureFileFormatsMatch(TableSpec ts, FileStatus[] fileStatuses) throws SemanticException {
     final Class<? extends InputFormat> destInputFormat;
     try {
       if (ts.getPartSpec() == null || ts.getPartSpec().isEmpty()) {
@@ -340,17 +344,19 @@ public class LoadSemanticAnalyzer extends BaseSemanticAnalyzer {
     // Other file formats should do similar check to make sure file formats match
     // when doing LOAD DATA .. INTO TABLE
     if (OrcInputFormat.class.equals(destInputFormat)) {
-      Path inputFilePath = new Path(fromURI);
-      try {
-        FileSystem fs = FileSystem.get(fromURI, conf);
-        // just creating orc reader is going to do sanity checks to make sure its valid ORC file
-        OrcFile.createReader(fs, inputFilePath);
-      } catch (FileFormatException e) {
-        throw new SemanticException(ErrorMsg.INVALID_FILE_FORMAT_IN_LOAD.getMsg("Destination" +
-            " table is stored as ORC but the file being loaded is not a valid ORC file."));
-      } catch (IOException e) {
-        throw new SemanticException("Unable to load data to destination table." +
-            " Error: " + e.getMessage());
+      for (FileStatus fileStatus : fileStatuses) {
+        try {
+          Path filePath = fileStatus.getPath();
+          FileSystem fs = FileSystem.get(filePath.toUri(), conf);
+          // just creating orc reader is going to do sanity checks to make sure its valid ORC file
+          OrcFile.createReader(fs, filePath);
+        } catch (FileFormatException e) {
+          throw new SemanticException(ErrorMsg.INVALID_FILE_FORMAT_IN_LOAD.getMsg("Destination" +
+              " table is stored as ORC but the file being loaded is not a valid ORC file."));
+        } catch (IOException e) {
+          throw new SemanticException("Unable to load data to destination table." +
+              " Error: " + e.getMessage());
+        }
       }
     }
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/9670a2b3/ql/src/test/queries/clientnegative/load_orc_negative3.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientnegative/load_orc_negative3.q b/ql/src/test/queries/clientnegative/load_orc_negative3.q
new file mode 100644
index 0000000..9a4116e
--- /dev/null
+++ b/ql/src/test/queries/clientnegative/load_orc_negative3.q
@@ -0,0 +1,6 @@
+create table text_test (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp);
+load data local inpath '../../data/files/kv1.txt' into table text_test;
+
+set hive.default.fileformat=ORC;
+create table orc_test (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp);
+load data inpath '${hiveconf:hive.metastore.warehouse.dir}/text_test/' into table orc_test;

http://git-wip-us.apache.org/repos/asf/hive/blob/9670a2b3/ql/src/test/queries/clientpositive/load_orc_part.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/load_orc_part.q b/ql/src/test/queries/clientpositive/load_orc_part.q
index 0927ea4..2ff884d 100644
--- a/ql/src/test/queries/clientpositive/load_orc_part.q
+++ b/ql/src/test/queries/clientpositive/load_orc_part.q
@@ -9,6 +9,10 @@ load data inpath '${hiveconf:hive.metastore.warehouse.dir}/orc_staging/orc_split
 load data local inpath '../../data/files/orc_split_elim.orc' into table orc_test partition (ds='10');
 dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/orc_test/ds=10/;
 
+load data local inpath '../../data/files/orc_split_elim.orc' overwrite into table orc_staging;
+load data inpath '${hiveconf:hive.metastore.warehouse.dir}/orc_staging/' overwrite into table orc_test partition (ds='10');
+dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/orc_test/ds=10/;
+
 alter table orc_test add partition(ds='11');
 alter table orc_test partition(ds='11') set fileformat textfile;
 load data local inpath '../../data/files/kv1.txt' into table orc_test partition(ds='11');

http://git-wip-us.apache.org/repos/asf/hive/blob/9670a2b3/ql/src/test/results/clientnegative/load_orc_negative3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/load_orc_negative3.q.out b/ql/src/test/results/clientnegative/load_orc_negative3.q.out
new file mode 100644
index 0000000..77fb50e
--- /dev/null
+++ b/ql/src/test/results/clientnegative/load_orc_negative3.q.out
@@ -0,0 +1,25 @@
+PREHOOK: query: create table text_test (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@text_test
+POSTHOOK: query: create table text_test (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@text_test
+PREHOOK: query: load data local inpath '../../data/files/kv1.txt' into table text_test
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@text_test
+POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' into table text_test
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@text_test
+PREHOOK: query: create table orc_test (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@orc_test
+POSTHOOK: query: create table orc_test (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@orc_test
+FAILED: SemanticException [Error 30019]: The file that you are trying to load does not match the file format of the destination table. Destination table is stored as ORC but the file being loaded is not a valid ORC file.

http://git-wip-us.apache.org/repos/asf/hive/blob/9670a2b3/ql/src/test/results/clientpositive/load_orc_part.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/load_orc_part.q.out b/ql/src/test/results/clientpositive/load_orc_part.q.out
index 34ca493..2e02c2e 100644
--- a/ql/src/test/results/clientpositive/load_orc_part.q.out
+++ b/ql/src/test/results/clientpositive/load_orc_part.q.out
@@ -42,6 +42,24 @@ POSTHOOK: type: LOAD
 POSTHOOK: Output: default@orc_test@ds=10
 Found 2 items
 #### A masked pattern was here ####
+PREHOOK: query: load data local inpath '../../data/files/orc_split_elim.orc' overwrite into table orc_staging
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@orc_staging
+POSTHOOK: query: load data local inpath '../../data/files/orc_split_elim.orc' overwrite into table orc_staging
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@orc_staging
+#### A masked pattern was here ####
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@orc_test@ds=10
+#### A masked pattern was here ####
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@orc_test@ds=10
+Found 1 items
+#### A masked pattern was here ####
 PREHOOK: query: alter table orc_test add partition(ds='11')
 PREHOOK: type: ALTERTABLE_ADDPARTS
 PREHOOK: Output: default@orc_test

[07/12] hive git commit: HIVE-11664: Make tez container logs work with new log4j2 changes (Prasanth Jayachandran reviewed by Gopal V)

Posted by pr...@apache.org.

HIVE-11664: Make tez container logs work with new log4j2 changes (Prasanth Jayachandran reviewed by Gopal V)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/607b0e8a
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/607b0e8a
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/607b0e8a

Branch: refs/heads/llap
Commit: 607b0e8a6b4da164606b87c4d012059276b3a994
Parents: 9e85bbf
Author: Prasanth Jayachandran <j....@gmail.com>
Authored: Thu Aug 27 17:14:17 2015 -0500
Committer: Prasanth Jayachandran <j....@gmail.com>
Committed: Thu Aug 27 17:14:17 2015 -0500

----------------------------------------------------------------------
 data/conf/tez/hive-site.xml                    |  4 ++
 ql/src/main/resources/tez-container-log4j2.xml | 49 +++++++++++++++++++++
 2 files changed, 53 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/607b0e8a/data/conf/tez/hive-site.xml
----------------------------------------------------------------------
diff --git a/data/conf/tez/hive-site.xml b/data/conf/tez/hive-site.xml
index e0238aa..2f9415a 100644
--- a/data/conf/tez/hive-site.xml
+++ b/data/conf/tez/hive-site.xml
@@ -253,5 +253,9 @@
   </description>
 </property>
 
+<property>
+  <name>hive.tez.java.opts</name>
+  <value> -Dlog4j.configurationFile=tez-container-log4j2.xml -Dtez.container.log.level=INFO -Dtez.container.root.logger=CLA </value>
+</property>
 
 </configuration>

http://git-wip-us.apache.org/repos/asf/hive/blob/607b0e8a/ql/src/main/resources/tez-container-log4j2.xml
----------------------------------------------------------------------
diff --git a/ql/src/main/resources/tez-container-log4j2.xml b/ql/src/main/resources/tez-container-log4j2.xml
new file mode 100644
index 0000000..be949dc
--- /dev/null
+++ b/ql/src/main/resources/tez-container-log4j2.xml
@@ -0,0 +1,49 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<Configuration status="trace" strict="true" name="TezContainerLog4j2"
+ packages="org.apache.hadoop.hive.ql.log">
+
+  <Properties>
+    <Property name="tez.container.log.threshold">ALL</Property>
+    <Property name="tez.container.log.level">INFO</Property>
+    <Property name="tez.container.root.logger">CLA</Property>
+    <Property name="tez.container.log.dir">${sys:yarn.app.container.log.dir}</Property>
+    <Property name="tez.container.log.file">syslog</Property>
+  </Properties>
+
+  <Appenders>
+    <RollingFile name="CLA" fileName="${sys:tez.container.log.dir}/${sys:tez.container.log.file}"
+     filePattern="${sys:tez.container.log.dir}/${sys:tez.container.log.file}.%d{yyyy-MM-dd}">
+      <PatternLayout pattern="%d{ISO8601} %p [%t] %c{2}: %m%n" />
+      <Policies>
+        <!-- Rollover at mignight (interval = 1 means daily) -->
+        <TimeBasedTriggeringPolicy interval="1" modulate="true"/>
+      </Policies>
+      <!-- 30-day backup -->
+      <!-- <DefaultRolloverStrategy max="30"/> -->
+    </RollingFile>
+  </Appenders>
+
+  <Loggers>
+    <Root level="${sys:tez.container.log.threshold}">
+      <AppenderRef ref="${sys:tez.container.root.logger}" level="${sys:tez.container.log.level}"/>
+    </Root>
+  </Loggers>
+
+</Configuration>

[12/12] hive git commit: HIVE-11682: LLAP: Merge master into branch (Prasanth Jayachandran)

Posted by pr...@apache.org.

HIVE-11682: LLAP: Merge master into branch (Prasanth Jayachandran)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/02576538
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/02576538
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/02576538

Branch: refs/heads/llap
Commit: 02576538282b9d1a0454f268b7ef181ee259184d
Parents: 387fed2 2ef40ca
Author: Prasanth Jayachandran <j....@gmail.com>
Authored: Fri Aug 28 13:07:12 2015 -0500
Committer: Prasanth Jayachandran <j....@gmail.com>
Committed: Fri Aug 28 13:07:12 2015 -0500

----------------------------------------------------------------------
 .../org/apache/hadoop/hive/conf/HiveConf.java   |    4 +
 data/conf/tez/hive-site.xml                     |   20 +-
 .../hive/metastore/MetaStoreDirectSql.java      |   40 +-
 .../ql/exec/vector/VectorHashKeyWrapper.java    |    2 +-
 .../exec/vector/expressions/CuckooSetBytes.java |    2 +-
 ...VectorMapJoinInnerBigOnlyStringOperator.java |    4 +-
 .../VectorMapJoinInnerStringOperator.java       |    4 +-
 .../VectorMapJoinLeftSemiStringOperator.java    |    4 +-
 .../VectorMapJoinOuterStringOperator.java       |    4 +-
 .../hadoop/hive/ql/optimizer/Optimizer.java     |   17 +-
 .../hive/ql/optimizer/PointLookupOptimizer.java |  102 +-
 .../calcite/translator/HiveOpConverter.java     |   31 +-
 .../hadoop/hive/ql/parse/LeadLagInfo.java       |    4 +-
 .../hive/ql/parse/LoadSemanticAnalyzer.java     |   38 +-
 .../hadoop/hive/ql/plan/ExprNodeDesc.java       |    8 +
 .../hadoop/hive/ql/ppd/ExprWalkerInfo.java      |  136 +-
 .../hive/ql/ppd/ExprWalkerProcFactory.java      |   92 +-
 .../hadoop/hive/ql/ppd/OpProcFactory.java       |   11 +-
 ql/src/main/resources/tez-container-log4j2.xml  |   49 +
 .../queries/clientnegative/load_orc_negative3.q |    6 +
 .../queries/clientpositive/flatten_and_or.q     |    4 +-
 .../test/queries/clientpositive/load_orc_part.q |    4 +
 .../test/queries/clientpositive/pointlookup.q   |   59 +
 .../test/queries/clientpositive/pointlookup2.q  |   51 +
 .../clientnegative/load_orc_negative3.q.out     |   25 +
 .../alter_partition_coltype.q.out               |   12 +-
 .../clientpositive/annotate_stats_filter.q.out  |    8 +-
 .../results/clientpositive/flatten_and_or.q.out |    8 +-
 .../results/clientpositive/load_orc_part.q.out  |   18 +
 ql/src/test/results/clientpositive/pcr.q.out    |   12 +-
 .../results/clientpositive/pointlookup.q.out    |  198 +++
 .../results/clientpositive/pointlookup2.q.out   | 1647 ++++++++++++++++++
 .../results/clientpositive/ppd_transform.q.out  |   12 +-
 .../test/results/clientpositive/spark/pcr.q.out |   12 +-
 .../clientpositive/spark/ppd_transform.q.out    |   12 +-
 .../clientpositive/spark/vectorized_case.q.out  |    2 +-
 .../clientpositive/tez/explainuser_1.q.out      |    2 +-
 .../clientpositive/tez/vectorized_case.q.out    |    2 +-
 .../clientpositive/vectorized_case.q.out        |    9 +-
 39 files changed, 2404 insertions(+), 271 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/02576538/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------

[06/12] hive git commit: HIVE-11623: CBO: Calcite Operator To Hive Operator (Calcite Return Path): fix the tableAlias for ReduceSink operator (Pengcheng Xiong, reviewed by Jesus Camacho Rodriguez)

Posted by pr...@apache.org.

HIVE-11623: CBO: Calcite Operator To Hive Operator (Calcite Return Path): fix the tableAlias for ReduceSink operator (Pengcheng Xiong, reviewed by Jesus Camacho Rodriguez)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/9e85bbf2
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/9e85bbf2
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/9e85bbf2

Branch: refs/heads/llap
Commit: 9e85bbf2780510edda79c247248da57619530577
Parents: fb152e4
Author: Pengcheng Xiong <px...@apache.org>
Authored: Thu Aug 27 11:26:25 2015 -0700
Committer: Pengcheng Xiong <px...@apache.org>
Committed: Thu Aug 27 11:26:46 2015 -0700

----------------------------------------------------------------------
 .../calcite/translator/HiveOpConverter.java     | 31 ++++++++++++++++----
 1 file changed, 26 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/9e85bbf2/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java
index 4db9863..1931880 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java
@@ -686,13 +686,34 @@ public class HiveOpConverter {
       int numReducers, Operation acidOperation, boolean strictMode,
       List<String> keepColNames) throws SemanticException {
     // 1. Generate RS operator
-    if (input.getSchema().getTableNames().size() != 1) {
+    // 1.1 Prune the tableNames, only count the tableNames that are not empty strings
+	// as empty string in table aliases is only allowed for virtual columns.
+    String tableAlias = null;
+    Set<String> tableNames = input.getSchema().getTableNames();
+    for (String tableName : tableNames) {
+      if (tableName != null) {
+        if (tableName.length() == 0) {
+          if (tableAlias == null) {
+            tableAlias = tableName;
+          }
+        } else {
+          if (tableAlias == null || tableAlias.length() == 0) {
+            tableAlias = tableName;
+          } else {
+            if (!tableName.equals(tableAlias)) {
+              throw new SemanticException(
+                  "In CBO return path, genReduceSinkAndBacktrackSelect is expecting only one tableAlias but there is more than one");
+            }
+          }
+        }
+      }
+    }
+    if (tableAlias == null) {
       throw new SemanticException(
-          "In CBO return path, genReduceSinkAndBacktrackSelect is expecting only one SelectOp but there is "
-              + input.getSchema().getTableNames().size());
+          "In CBO return path, genReduceSinkAndBacktrackSelect is expecting only one tableAlias but there is none");
     }
-    ReduceSinkOperator rsOp = genReduceSink(input, input.getSchema().getTableNames().iterator()
-        .next(), keys, tag, partitionCols, order, numReducers, acidOperation, strictMode);
+    // 1.2 Now generate RS operator
+    ReduceSinkOperator rsOp = genReduceSink(input, tableAlias, keys, tag, partitionCols, order, numReducers, acidOperation, strictMode);
 
     // 2. Generate backtrack Select operator
     Map<String, ExprNodeDesc> descriptors = buildBacktrackFromReduceSink(keepColNames,

[03/12] hive git commit: HIVE-11573: PointLookupOptimizer can be pessimistic at a low nDV (Gopal V, reviewed by Jesus Camacho Rodriguez)

Posted by pr...@apache.org.

HIVE-11573: PointLookupOptimizer can be pessimistic at a low nDV (Gopal V, reviewed by Jesus Camacho Rodriguez)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b247cac4
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b247cac4
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b247cac4

Branch: refs/heads/llap
Commit: b247cac4fc3814e422d4f5d5aad96a1c6e385a7b
Parents: 037fb02
Author: Gopal V <go...@apache.org>
Authored: Thu Aug 27 09:50:08 2015 +0200
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Thu Aug 27 09:50:08 2015 +0200

----------------------------------------------------------------------
 .../org/apache/hadoop/hive/conf/HiveConf.java   |    4 +
 .../hadoop/hive/ql/optimizer/Optimizer.java     |   17 +-
 .../hive/ql/optimizer/PointLookupOptimizer.java |  102 +-
 .../queries/clientpositive/flatten_and_or.q     |    4 +-
 .../test/queries/clientpositive/pointlookup.q   |   59 +
 .../test/queries/clientpositive/pointlookup2.q  |   51 +
 .../alter_partition_coltype.q.out               |   12 +-
 .../clientpositive/annotate_stats_filter.q.out  |    8 +-
 .../results/clientpositive/flatten_and_or.q.out |    8 +-
 ql/src/test/results/clientpositive/pcr.q.out    |   12 +-
 .../results/clientpositive/pointlookup.q.out    |  198 +++
 .../results/clientpositive/pointlookup2.q.out   | 1647 ++++++++++++++++++
 .../results/clientpositive/ppd_transform.q.out  |   12 +-
 .../test/results/clientpositive/spark/pcr.q.out |   12 +-
 .../clientpositive/spark/ppd_transform.q.out    |   12 +-
 .../clientpositive/spark/vectorized_case.q.out  |    2 +-
 .../clientpositive/tez/explainuser_1.q.out      |    2 +-
 .../clientpositive/tez/vectorized_case.q.out    |    2 +-
 .../clientpositive/vectorized_case.q.out        |    9 +-
 19 files changed, 2118 insertions(+), 55 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 8706a2d..8a00079 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -1190,6 +1190,10 @@ public class HiveConf extends Configuration {
         "Whether to push predicates down into storage handlers.  Ignored when hive.optimize.ppd is false."),
     HIVEPOINTLOOKUPOPTIMIZER("hive.optimize.point.lookup", true,
          "Whether to transform OR clauses in Filter operators into IN clauses"),
+    HIVEPOINTLOOKUPOPTIMIZERMIN("hive.optimize.point.lookup.min", 31,
+             "Minimum number of OR clauses needed to transform into IN clauses"),
+    HIVEPOINTLOOKUPOPTIMIZEREXTRACT("hive.optimize.point.lookup.extract", true,
+                 "Extract partial expressions when optimizing point lookup IN clauses"),
     // Constant propagation optimizer
     HIVEOPTCONSTANTPROPAGATION("hive.optimize.constant.propagation", true, "Whether to enable constant propagation optimizer"),
     HIVEIDENTITYPROJECTREMOVER("hive.optimize.remove.identity.project", true, "Removes identity project from operator tree"),

http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
index 14f362f..439f616 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
@@ -68,6 +68,18 @@ public class Optimizer {
 
     // Add the transformation that computes the lineage information.
     transformations.add(new Generator());
+
+    // Try to transform OR predicates in Filter into simpler IN clauses first
+    if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZER)) {
+      final int min = HiveConf.getIntVar(hiveConf,
+          HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZERMIN);
+      final boolean extract = HiveConf.getBoolVar(hiveConf,
+          HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZEREXTRACT);
+      final boolean testMode = HiveConf.getBoolVar(hiveConf,
+          HiveConf.ConfVars.HIVE_IN_TEST);
+      transformations.add(new PointLookupOptimizer(min, extract, testMode));
+    }
+
     if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTPPD)) {
       transformations.add(new PredicateTransitivePropagate());
       if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCONSTANTPROPAGATION)) {
@@ -82,11 +94,6 @@ public class Optimizer {
         transformations.add(new ConstantPropagate());
     }
 
-    // Try to transform OR predicates in Filter into IN clauses.
-    if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZER)) {
-      transformations.add(new PointLookupOptimizer());
-    }
-
     if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTPPD)) {
       transformations.add(new PartitionPruner());
       transformations.add(new PartitionConditionRemover());

http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java
index 6a8acec..d83636d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java
@@ -18,10 +18,14 @@
 package org.apache.hadoop.hive.ql.optimizer;
 
 import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Comparator;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 import java.util.Stack;
 
 import org.apache.calcite.util.Pair;
@@ -46,15 +50,18 @@ import org.apache.hadoop.hive.ql.parse.SemanticException;
 import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc.ExprNodeDescEqualityWrapper;
 import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStruct;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
 
 import com.google.common.collect.ArrayListMultimap;
+import com.google.common.collect.ImmutableSortedSet;
 import com.google.common.collect.ListMultimap;
 
 /**
@@ -71,7 +78,49 @@ public class PointLookupOptimizer implements Transform {
           GenericUDFIn.class.getAnnotation(Description.class).name();
   private static final String STRUCT_UDF =
           GenericUDFStruct.class.getAnnotation(Description.class).name();
+  private static final String AND_UDF =
+      GenericUDFOPAnd.class.getAnnotation(Description.class).name();
+
+  // these are closure-bound for all the walkers in context
+  public final int minOrExpr;
+  public final boolean extract;
+  public final boolean testMode;
+
+  /*
+   * Pass in configs and pre-create a parse context
+   */
+  public PointLookupOptimizer(final int min, final boolean extract, final boolean testMode) {
+    this.minOrExpr = min;
+    this.extract = extract;
+    this.testMode = testMode;
+  }
+
+  // Hash Set iteration isn't ordered, but force string sorted order
+  // to get a consistent test run.
+  private Collection<ExprNodeDescEqualityWrapper> sortForTests(
+      Set<ExprNodeDescEqualityWrapper> valuesExpr) {
+    if (!testMode) {
+      // normal case - sorting is wasted for an IN()
+      return valuesExpr;
+    }
+    final Collection<ExprNodeDescEqualityWrapper> sortedValues;
+
+    sortedValues = ImmutableSortedSet.copyOf(
+        new Comparator<ExprNodeDescEqualityWrapper>() {
+          @Override
+          public int compare(ExprNodeDescEqualityWrapper w1,
+              ExprNodeDescEqualityWrapper w2) {
+            // fail if you find nulls (this is a test-code section)
+            if (w1.equals(w2)) {
+              return 0;
+            }
+            return w1.getExprNodeDesc().getExprString()
+                .compareTo(w2.getExprNodeDesc().getExprString());
+          }
+        }, valuesExpr);
 
+    return sortedValues;
+  }
 
   @Override
   public ParseContext transform(ParseContext pctx) throws SemanticException {
@@ -103,7 +152,9 @@ public class PointLookupOptimizer implements Transform {
         if (LOG.isDebugEnabled()) {
           LOG.debug("Generated new predicate with IN clause: " + newPredicate);
         }
-        filterOp.getConf().setOrigPredicate(predicate);
+        if (!extract) {
+          filterOp.getConf().setOrigPredicate(predicate);
+        }
         filterOp.getConf().setPredicate(newPredicate);
       }
 
@@ -140,8 +191,11 @@ public class PointLookupOptimizer implements Transform {
         return null;
       }
 
-      // 2. It is an OR operator
+      // 2. It is an OR operator with enough children
       List<ExprNodeDesc> children = fd.getChildren();
+      if (children.size() < minOrExpr) {
+        return null;
+      }
       ListMultimap<String,Pair<ExprNodeColumnDesc, ExprNodeConstantDesc>> columnConstantsMap =
               ArrayListMultimap.create();
       boolean modeAnd = false;
@@ -272,6 +326,50 @@ public class PointLookupOptimizer implements Transform {
       newPredicate = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo,
               FunctionRegistry.getFunctionInfo(IN_UDF).getGenericUDF(), newChildren);
 
+      if (extract && columns.size() > 1) {
+        final List<ExprNodeDesc> subExpr = new ArrayList<ExprNodeDesc>(columns.size()+1);
+
+        // extract pre-conditions for the tuple expressions
+        // (a,b) IN ((1,2),(2,3)) ->
+        //          ((a) IN (1,2) and b in (2,3)) and (a,b) IN ((1,2),(2,3))
+
+        for (String keyString : columnConstantsMap.keySet()) {
+          final Set<ExprNodeDescEqualityWrapper> valuesExpr = 
+              new HashSet<ExprNodeDescEqualityWrapper>(children.size());
+          final List<Pair<ExprNodeColumnDesc, ExprNodeConstantDesc>> partial = 
+              columnConstantsMap.get(keyString);
+          for (int i = 0; i < children.size(); i++) {
+            Pair<ExprNodeColumnDesc, ExprNodeConstantDesc> columnConstant = partial
+                .get(i);
+            valuesExpr
+                .add(new ExprNodeDescEqualityWrapper(columnConstant.right));
+          }
+          ExprNodeColumnDesc lookupCol = partial.get(0).left;
+          // generate a partial IN clause, if the column is a partition column
+          if (lookupCol.getIsPartitionColOrVirtualCol()
+              || valuesExpr.size() < children.size()) {
+            // optimize only nDV reductions
+            final List<ExprNodeDesc> inExpr = new ArrayList<ExprNodeDesc>();
+            inExpr.add(lookupCol);
+            for (ExprNodeDescEqualityWrapper value : sortForTests(valuesExpr)) {
+              inExpr.add(value.getExprNodeDesc());
+            }
+            subExpr.add(new ExprNodeGenericFuncDesc(
+                TypeInfoFactory.booleanTypeInfo, FunctionRegistry
+                    .getFunctionInfo(IN_UDF).getGenericUDF(), inExpr));
+          }
+        }
+        // loop complete, inspect the sub expressions generated
+        if (subExpr.size() > 0) {
+          // add the newPredicate to the end & produce an AND clause
+          subExpr.add(newPredicate);
+          newPredicate = new ExprNodeGenericFuncDesc(
+              TypeInfoFactory.booleanTypeInfo, FunctionRegistry
+                  .getFunctionInfo(AND_UDF).getGenericUDF(), subExpr);
+        }
+        // else, newPredicate is unmodified
+      }
+
       return newPredicate;
     }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/queries/clientpositive/flatten_and_or.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/flatten_and_or.q b/ql/src/test/queries/clientpositive/flatten_and_or.q
index 6d65225..6c6e0f9 100644
--- a/ql/src/test/queries/clientpositive/flatten_and_or.q
+++ b/ql/src/test/queries/clientpositive/flatten_and_or.q
@@ -1,3 +1,5 @@
+set hive.optimize.point.lookup=false;
+
 explain
 SELECT key
 FROM src
@@ -14,4 +16,4 @@ WHERE
    AND value = '1') OR (key = '9'
    AND value = '1') OR (key = '10'
    AND value = '3'))
-;
\ No newline at end of file
+;

http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/queries/clientpositive/pointlookup.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/pointlookup.q b/ql/src/test/queries/clientpositive/pointlookup.q
new file mode 100644
index 0000000..1aef2ef
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/pointlookup.q
@@ -0,0 +1,59 @@
+explain
+SELECT key
+FROM src
+WHERE
+   ((key = '0'
+   AND value = '8') OR (key = '1'
+   AND value = '5') OR (key = '2'
+   AND value = '6') OR (key = '3'
+   AND value = '8') OR (key = '4'
+   AND value = '1') OR (key = '5'
+   AND value = '6') OR (key = '6'
+   AND value = '1') OR (key = '7'
+   AND value = '1') OR (key = '8'
+   AND value = '1') OR (key = '9'
+   AND value = '1') OR (key = '10'
+   AND value = '3'))
+;
+
+
+set hive.optimize.point.lookup.min=3;
+set hive.optimize.point.lookup.extract=false;
+
+explain
+SELECT key
+FROM src
+WHERE
+   ((key = '0'
+   AND value = '8') OR (key = '1'
+   AND value = '5') OR (key = '2'
+   AND value = '6') OR (key = '3'
+   AND value = '8') OR (key = '4'
+   AND value = '1') OR (key = '5'
+   AND value = '6') OR (key = '6'
+   AND value = '1') OR (key = '7'
+   AND value = '1') OR (key = '8'
+   AND value = '1') OR (key = '9'
+   AND value = '1') OR (key = '10'
+   AND value = '3'))
+;
+
+set hive.optimize.point.lookup.extract=true;
+
+explain
+SELECT key
+FROM src
+WHERE
+   ((key = '0'
+   AND value = '8') OR (key = '1'
+   AND value = '5') OR (key = '2'
+   AND value = '6') OR (key = '3'
+   AND value = '8') OR (key = '4'
+   AND value = '1') OR (key = '5'
+   AND value = '6') OR (key = '6'
+   AND value = '1') OR (key = '7'
+   AND value = '1') OR (key = '8'
+   AND value = '1') OR (key = '9'
+   AND value = '1') OR (key = '10'
+   AND value = '3'))
+;

http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/queries/clientpositive/pointlookup2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/pointlookup2.q b/ql/src/test/queries/clientpositive/pointlookup2.q
new file mode 100644
index 0000000..31bebbb
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/pointlookup2.q
@@ -0,0 +1,51 @@
+drop table pcr_t1;
+drop table pcr_t2;
+drop table pcr_t3;
+
+create table pcr_t1 (key int, value string) partitioned by (ds string);
+insert overwrite table pcr_t1 partition (ds='2000-04-08') select * from src where key < 20 order by key;
+insert overwrite table pcr_t1 partition (ds='2000-04-09') select * from src where key < 20 order by key;
+insert overwrite table pcr_t1 partition (ds='2000-04-10') select * from src where key < 20 order by key;
+
+create table pcr_t2 (ds string, key int, value string);
+from pcr_t1
+insert overwrite table pcr_t2 select ds, key, value where ds='2000-04-08';
+from pcr_t1
+insert overwrite table pcr_t2 select ds, key, value where ds='2000-04-08' and key=2;
+
+set hive.optimize.point.lookup.min=2;
+set hive.optimize.point.lookup.extract=true;
+
+explain extended
+select key, value, ds
+from pcr_t1
+where (ds='2000-04-08' and key=1) or (ds='2000-04-09' and key=2)
+order by key, value, ds;
+
+explain extended
+select *
+from pcr_t1 t1 join pcr_t1 t2
+on t1.key=t2.key and t1.ds='2000-04-08' and t2.ds='2000-04-08'
+order by t1.key;
+
+explain extended
+select *
+from pcr_t1 t1 join pcr_t1 t2
+on t1.key=t2.key and t1.ds='2000-04-08' and t2.ds='2000-04-09'
+order by t1.key;
+
+explain extended
+select *
+from pcr_t1 t1 join pcr_t2 t2
+where (t1.ds='2000-04-08' and t2.key=1) or (t1.ds='2000-04-09' and t2.key=2)
+order by t2.key, t2.value, t1.ds;
+
+explain extended
+select *
+from pcr_t1 t1 join pcr_t2 t2
+where (t2.ds='2000-04-08' and t1.key=1) or (t2.ds='2000-04-09' and t1.key=2)
+order by t1.key, t1.value, t2.ds;
+
+drop table pcr_t1;
+drop table pcr_t2;
+drop table pcr_t3;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/alter_partition_coltype.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/alter_partition_coltype.q.out b/ql/src/test/results/clientpositive/alter_partition_coltype.q.out
index 06515da..9fc3c8d 100644
--- a/ql/src/test/results/clientpositive/alter_partition_coltype.q.out
+++ b/ql/src/test/results/clientpositive/alter_partition_coltype.q.out
@@ -1134,15 +1134,11 @@ STAGE PLANS:
           alias: alterdynamic_part_table
           Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
           GatherStats: false
-          Filter Operator
-            isSamplingPred: false
-            predicate: (struct(partcol1,partcol2)) IN (const struct(2,'1'), const struct(1,'__HIVE_DEFAULT_PARTITION__')) (type: boolean)
+          Select Operator
+            expressions: intcol (type: string)
+            outputColumnNames: _col0
             Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
-            Select Operator
-              expressions: intcol (type: string)
-              outputColumnNames: _col0
-              Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
-              ListSink
+            ListSink
 
 PREHOOK: query: select intcol from pt.alterdynamic_part_table where (partcol1='2' and partcol2='1')or (partcol1='1' and partcol2='__HIVE_DEFAULT_PARTITION__')
 PREHOOK: type: QUERY

http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/annotate_stats_filter.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/annotate_stats_filter.q.out b/ql/src/test/results/clientpositive/annotate_stats_filter.q.out
index 9e0e78a..054b573 100644
--- a/ql/src/test/results/clientpositive/annotate_stats_filter.q.out
+++ b/ql/src/test/results/clientpositive/annotate_stats_filter.q.out
@@ -678,15 +678,15 @@ STAGE PLANS:
             alias: loc_orc
             Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
             Filter Operator
-              predicate: (state) IN ('OH', 'CA') (type: boolean)
-              Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE
+              predicate: ((state = 'OH') or (state = 'CA')) (type: boolean)
+              Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
               Select Operator
                 expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int)
                 outputColumnNames: _col0, _col1, _col2, _col3
-                Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
                   table:
                       input format: org.apache.hadoop.mapred.TextInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/flatten_and_or.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/flatten_and_or.q.out b/ql/src/test/results/clientpositive/flatten_and_or.q.out
index 5f25daa..9c51ff3 100644
--- a/ql/src/test/results/clientpositive/flatten_and_or.q.out
+++ b/ql/src/test/results/clientpositive/flatten_and_or.q.out
@@ -44,15 +44,15 @@ STAGE PLANS:
             alias: src
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: (struct(key,value)) IN (const struct('0','8'), const struct('1','5'), const struct('2','6'), const struct('3','8'), const struct('4','1'), const struct('5','6'), const struct('6','1'), const struct('7','1'), const struct('8','1'), const struct('9','1'), const struct('10','3')) (type: boolean)
-              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              predicate: (((key = '0') and (value = '8')) or ((key = '1') and (value = '5')) or ((key = '2') and (value = '6')) or ((key = '3') and (value = '8')) or ((key = '4') and (value = '1')) or ((key = '5') and (value = '6')) or ((key = '6') and (value = '1')) or ((key = '7') and (value = '1')) or ((key = '8') and (value = '1')) or ((key = '9') and (value = '1')) or ((key = '10') and (value = '3'))) (type: boolean)
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: key (type: string)
                 outputColumnNames: _col0
-                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.TextInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/pcr.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/pcr.q.out b/ql/src/test/results/clientpositive/pcr.q.out
index 4c9ea77..d7c40a3 100644
--- a/ql/src/test/results/clientpositive/pcr.q.out
+++ b/ql/src/test/results/clientpositive/pcr.q.out
@@ -2475,16 +2475,16 @@ STAGE PLANS:
             GatherStats: false
             Filter Operator
               isSamplingPred: false
-              predicate: (struct(key,ds)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean)
-              Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+              predicate: (((ds = '2000-04-08') and (key = 1)) or ((ds = '2000-04-09') and (key = 2))) (type: boolean)
+              Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: key (type: int), value (type: string), ds (type: string)
                 outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string)
                   sort order: +++
-                  Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
                   tag: -1
                   auto parallelism: false
       Path -> Alias:
@@ -2588,13 +2588,13 @@ STAGE PLANS:
         Select Operator
           expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string)
           outputColumnNames: _col0, _col1, _col2
-          Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
             GlobalTableId: 0
 #### A masked pattern was here ####
             NumFilesPerFileSink: 1
-            Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
 #### A masked pattern was here ####
             table:
                 input format: org.apache.hadoop.mapred.TextInputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/pointlookup.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/pointlookup.q.out b/ql/src/test/results/clientpositive/pointlookup.q.out
new file mode 100644
index 0000000..7e19be4
--- /dev/null
+++ b/ql/src/test/results/clientpositive/pointlookup.q.out
@@ -0,0 +1,198 @@
+PREHOOK: query: explain
+SELECT key
+FROM src
+WHERE
+   ((key = '0'
+   AND value = '8') OR (key = '1'
+   AND value = '5') OR (key = '2'
+   AND value = '6') OR (key = '3'
+   AND value = '8') OR (key = '4'
+   AND value = '1') OR (key = '5'
+   AND value = '6') OR (key = '6'
+   AND value = '1') OR (key = '7'
+   AND value = '1') OR (key = '8'
+   AND value = '1') OR (key = '9'
+   AND value = '1') OR (key = '10'
+   AND value = '3'))
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+SELECT key
+FROM src
+WHERE
+   ((key = '0'
+   AND value = '8') OR (key = '1'
+   AND value = '5') OR (key = '2'
+   AND value = '6') OR (key = '3'
+   AND value = '8') OR (key = '4'
+   AND value = '1') OR (key = '5'
+   AND value = '6') OR (key = '6'
+   AND value = '1') OR (key = '7'
+   AND value = '1') OR (key = '8'
+   AND value = '1') OR (key = '9'
+   AND value = '1') OR (key = '10'
+   AND value = '3'))
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: (((key = '0') and (value = '8')) or ((key = '1') and (value = '5')) or ((key = '2') and (value = '6')) or ((key = '3') and (value = '8')) or ((key = '4') and (value = '1')) or ((key = '5') and (value = '6')) or ((key = '6') and (value = '1')) or ((key = '7') and (value = '1')) or ((key = '8') and (value = '1')) or ((key = '9') and (value = '1')) or ((key = '10') and (value = '3'))) (type: boolean)
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: key (type: string)
+                outputColumnNames: _col0
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain
+SELECT key
+FROM src
+WHERE
+   ((key = '0'
+   AND value = '8') OR (key = '1'
+   AND value = '5') OR (key = '2'
+   AND value = '6') OR (key = '3'
+   AND value = '8') OR (key = '4'
+   AND value = '1') OR (key = '5'
+   AND value = '6') OR (key = '6'
+   AND value = '1') OR (key = '7'
+   AND value = '1') OR (key = '8'
+   AND value = '1') OR (key = '9'
+   AND value = '1') OR (key = '10'
+   AND value = '3'))
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+SELECT key
+FROM src
+WHERE
+   ((key = '0'
+   AND value = '8') OR (key = '1'
+   AND value = '5') OR (key = '2'
+   AND value = '6') OR (key = '3'
+   AND value = '8') OR (key = '4'
+   AND value = '1') OR (key = '5'
+   AND value = '6') OR (key = '6'
+   AND value = '1') OR (key = '7'
+   AND value = '1') OR (key = '8'
+   AND value = '1') OR (key = '9'
+   AND value = '1') OR (key = '10'
+   AND value = '3'))
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: (struct(key,value)) IN (const struct('0','8'), const struct('1','5'), const struct('2','6'), const struct('3','8'), const struct('4','1'), const struct('5','6'), const struct('6','1'), const struct('7','1'), const struct('8','1'), const struct('9','1'), const struct('10','3')) (type: boolean)
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: key (type: string)
+                outputColumnNames: _col0
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain
+SELECT key
+FROM src
+WHERE
+   ((key = '0'
+   AND value = '8') OR (key = '1'
+   AND value = '5') OR (key = '2'
+   AND value = '6') OR (key = '3'
+   AND value = '8') OR (key = '4'
+   AND value = '1') OR (key = '5'
+   AND value = '6') OR (key = '6'
+   AND value = '1') OR (key = '7'
+   AND value = '1') OR (key = '8'
+   AND value = '1') OR (key = '9'
+   AND value = '1') OR (key = '10'
+   AND value = '3'))
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+SELECT key
+FROM src
+WHERE
+   ((key = '0'
+   AND value = '8') OR (key = '1'
+   AND value = '5') OR (key = '2'
+   AND value = '6') OR (key = '3'
+   AND value = '8') OR (key = '4'
+   AND value = '1') OR (key = '5'
+   AND value = '6') OR (key = '6'
+   AND value = '1') OR (key = '7'
+   AND value = '1') OR (key = '8'
+   AND value = '1') OR (key = '9'
+   AND value = '1') OR (key = '10'
+   AND value = '3'))
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: ((value) IN ('1', '3', '5', '6', '8') and (struct(key,value)) IN (const struct('0','8'), const struct('1','5'), const struct('2','6'), const struct('3','8'), const struct('4','1'), const struct('5','6'), const struct('6','1'), const struct('7','1'), const struct('8','1'), const struct('9','1'), const struct('10','3'))) (type: boolean)
+              Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: key (type: string)
+                outputColumnNames: _col0
+                Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+

[05/12] hive git commit: HIVE-11123 : Fix how to confirm the RDBMS product name at Metastore. (Shinichi Yamashita, reviewed by Sergey Shelukhin and Deepesh Khandelwal)

Posted by pr...@apache.org.

HIVE-11123 : Fix how to confirm the RDBMS product name at Metastore. (Shinichi Yamashita, reviewed by Sergey Shelukhin and Deepesh Khandelwal)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/fb152e45
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/fb152e45
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/fb152e45

Branch: refs/heads/llap
Commit: fb152e45064fcb2846b198ba14e7f7cc13ada4bb
Parents: 9670a2b
Author: Sergey Shelukhin <se...@apache.org>
Authored: Thu Aug 27 10:54:58 2015 -0700
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Thu Aug 27 10:54:58 2015 -0700

----------------------------------------------------------------------
 .../hive/metastore/MetaStoreDirectSql.java      | 40 +++++++-------------
 1 file changed, 13 insertions(+), 27 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/fb152e45/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
index 5776ec6..522fcc2 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
@@ -148,16 +148,16 @@ class MetaStoreDirectSql {
 
   private DB determineDbType() {
     DB dbType = DB.OTHER;
-    if (runDbCheck("SET @@session.sql_mode=ANSI_QUOTES", "MySql")) {
-      dbType = DB.MYSQL;
-    } else if (runDbCheck("SELECT version FROM v$instance", "Oracle")) {
-      dbType = DB.ORACLE;
-    } else if (runDbCheck("SELECT @@version", "MSSQL")) {
-      dbType = DB.MSSQL;
-    } else {
-      // TODO: maybe we should use getProductName to identify all the DBs
-      String productName = getProductName();
-      if (productName != null && productName.toLowerCase().contains("derby")) {
+    String productName = getProductName();
+    if (productName != null) {
+      productName = productName.toLowerCase();
+      if (productName.contains("mysql")) {
+        dbType = DB.MYSQL;
+      } else if (productName.contains("oracle")) {
+        dbType = DB.ORACLE;
+      } else if (productName.contains("microsoft sql server")) {
+        dbType = DB.MSSQL;
+      } else if (productName.contains("derby")) {
         dbType = DB.DERBY;
       }
     }
@@ -210,6 +210,9 @@ class MetaStoreDirectSql {
 
   private boolean runTestQuery() {
     Transaction tx = pm.currentTransaction();
+    if (!tx.isActive()) {
+      tx.begin();
+    }
     Query query = null;
     // Run a self-test query. If it doesn't work, we will self-disable. What a PITA...
     String selfTestQuery = "select \"DB_ID\" from \"DBS\"";
@@ -261,23 +264,6 @@ class MetaStoreDirectSql {
     }
   }
 
-  private boolean runDbCheck(String queryText, String name) {
-    Transaction tx = pm.currentTransaction();
-    if (!tx.isActive()) {
-      tx.begin();
-    }
-    try {
-      executeNoResult(queryText);
-      return true;
-    } catch (Throwable t) {
-      LOG.debug(name + " check failed, assuming we are not on " + name + ": " + t.getMessage());
-      tx.rollback();
-      tx = pm.currentTransaction();
-      tx.begin();
-      return false;
-    }
-  }
-
   public Database getDatabase(String dbName) throws MetaException{
     Query queryDbSelector = null;
     Query queryDbParams = null;

[11/12] hive git commit: Revert "HIVE-10175: DynamicPartitionPruning lacks a fast-path exit for large IN() queries (Gopal V, reviewed by Jesus Camacho Rodriguez)"

Posted by pr...@apache.org.

Revert "HIVE-10175: DynamicPartitionPruning lacks a fast-path exit for large IN() queries (Gopal V, reviewed by Jesus Camacho Rodriguez)"

This reverts commit b6d1143aa7aaa20de035898f34df2d6b581895b6.


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/2ef40ca6
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/2ef40ca6
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/2ef40ca6

Branch: refs/heads/llap
Commit: 2ef40ca66ab0b9fbcf9bca5e6b8c5d7bd6d580c6
Parents: ce25816
Author: Gopal V <go...@apache.org>
Authored: Fri Aug 28 02:43:44 2015 -0700
Committer: Gopal V <go...@apache.org>
Committed: Fri Aug 28 02:43:44 2015 -0700

----------------------------------------------------------------------
 .../optimizer/DynamicPartitionPruningOptimization.java  | 12 ------------
 1 file changed, 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/2ef40ca6/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
index 5ebd28a..f475926 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
@@ -189,18 +189,6 @@ public class DynamicPartitionPruningOptimization implements NodeProcessor {
       LOG.debug("TableScan: " + ts);
     }
 
-    if (ts == null) {
-      // could be a reduce sink
-      LOG.warn("Could not find the table scan for " + filter); 
-      return null;
-    } else {
-      Table table = ts.getConf().getTableMetadata();
-      if (table != null && !table.isPartitioned()) {
-        // table is not partitioned, skip optimizer
-        return null;
-      }
-    }
-
     // collect the dynamic pruning conditions
     removerContext.dynLists.clear();
     walkExprTree(desc.getPredicate(), removerContext);

[08/12] hive git commit: HIVE-11627: Reduce the number of accesses to hashmaps in PPD (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

Posted by pr...@apache.org.

HIVE-11627: Reduce the number of accesses to hashmaps in PPD (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/d147a79c
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/d147a79c
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/d147a79c

Branch: refs/heads/llap
Commit: d147a79c13a9fdde83372c740167236eb80962de
Parents: 607b0e8
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Thu Aug 27 18:07:48 2015 +0200
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Fri Aug 28 10:09:38 2015 +0200

----------------------------------------------------------------------
 .../hadoop/hive/ql/ppd/ExprWalkerInfo.java      | 127 ++++---------------
 .../hive/ql/ppd/ExprWalkerProcFactory.java      |  92 +++++++++-----
 .../hadoop/hive/ql/ppd/OpProcFactory.java       |  11 +-
 3 files changed, 93 insertions(+), 137 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/d147a79c/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java b/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java
index f23facf..e4b768e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java
@@ -38,29 +38,21 @@ import org.apache.hadoop.hive.ql.plan.OperatorDesc;
 public class ExprWalkerInfo implements NodeProcessorCtx {
 
   /** Information maintained for an expr while walking an expr tree. */
-  private static class ExprInfo {
+  protected class ExprInfo {
     /**
      * true if expr rooted at this node doesn't contain more than one table.
      * alias
      */
-    public boolean isCandidate = false;
+    protected boolean isCandidate = false;
     /** alias that this expression refers to. */
-    public String alias = null;
+    protected String alias = null;
     /** new expr for this expression. */
-    public ExprNodeDesc convertedExpr = null;
+    protected ExprNodeDesc convertedExpr = null;
 
-    public ExprInfo() {
-    }
 
-    public ExprInfo(boolean isCandidate, String alias, ExprNodeDesc replacedNode) {
-      this.isCandidate = isCandidate;
-      this.alias = alias;
-      convertedExpr = replacedNode;
-    }
   }
 
-  protected static final Log LOG = LogFactory.getLog(OpProcFactory.class
-      .getName());;
+  protected static final Log LOG = LogFactory.getLog(OpProcFactory.class.getName());
   private Operator<? extends OperatorDesc> op = null;
 
   /**
@@ -127,105 +119,33 @@ public class ExprWalkerInfo implements NodeProcessorCtx {
   }
 
   /**
-   * @return converted expression for give node. If there is none then returns
-   *         null.
-   */
-  public ExprNodeDesc getConvertedNode(ExprNodeDesc nd) {
-    ExprInfo ei = exprInfoMap.get(nd);
-    if (ei == null) {
-      return null;
-    }
-    return ei.convertedExpr;
-  }
-
-  /**
-   * adds a replacement node for this expression.
-   *
-   * @param oldNode
-   *          original node
-   * @param newNode
-   *          new node
+   * Get additional info for a given expression node
    */
-  public void addConvertedNode(ExprNodeDesc oldNode, ExprNodeDesc newNode) {
-    ExprInfo ei = exprInfoMap.get(oldNode);
-    if (ei == null) {
-      ei = new ExprInfo();
-      exprInfoMap.put(oldNode, ei);
-    }
-    ei.convertedExpr = newNode;
-    exprInfoMap.put(newNode, new ExprInfo(ei.isCandidate, ei.alias, null));
+  public ExprInfo getExprInfo(ExprNodeDesc expr) {
+    return exprInfoMap.get(expr);
   }
 
   /**
-   * Returns true if the specified expression is pushdown candidate else false.
-   *
-   * @param expr
-   * @return true or false
+   * Get additional info for a given expression node if it
+   * exists, or create a new one and store it if it does not
    */
-  public boolean isCandidate(ExprNodeDesc expr) {
-    ExprInfo ei = exprInfoMap.get(expr);
-    if (ei == null) {
-      return false;
-    }
-    return ei.isCandidate;
+  public ExprInfo addExprInfo(ExprNodeDesc expr) {
+    ExprInfo exprInfo = new ExprInfo();
+    exprInfoMap.put(expr, exprInfo);
+    return exprInfo;
   }
 
   /**
-   * Marks the specified expr to the specified value.
-   *
-   * @param expr
-   * @param b
-   *          can
+   * Get additional info for a given expression node if it
+   * exists, or create a new one and store it if it does not
    */
-  public void setIsCandidate(ExprNodeDesc expr, boolean b) {
-    ExprInfo ei = exprInfoMap.get(expr);
-    if (ei == null) {
-      ei = new ExprInfo();
-      exprInfoMap.put(expr, ei);
+  public ExprInfo addOrGetExprInfo(ExprNodeDesc expr) {
+    ExprInfo exprInfo = exprInfoMap.get(expr);
+    if (exprInfo == null) {
+      exprInfo = new ExprInfo();
+      exprInfoMap.put(expr, exprInfo);
     }
-    ei.isCandidate = b;
-  }
-
-  /**
-   * Returns the alias of the specified expr.
-   *
-   * @param expr
-   * @return The alias of the expression
-   */
-  public String getAlias(ExprNodeDesc expr) {
-    ExprInfo ei = exprInfoMap.get(expr);
-    if (ei == null) {
-      return null;
-    }
-    return ei.alias;
-  }
-
-  /**
-   * Adds the specified alias to the specified expr.
-   *
-   * @param expr
-   * @param alias
-   */
-  public void addAlias(ExprNodeDesc expr, String alias) {
-    if (alias == null) {
-      return;
-    }
-    ExprInfo ei = exprInfoMap.get(expr);
-    if (ei == null) {
-      ei = new ExprInfo();
-      exprInfoMap.put(expr, ei);
-    }
-    ei.alias = alias;
-  }
-
-  /**
-   * Adds the specified expr as the top-most pushdown expr (ie all its children
-   * can be pushed).
-   *
-   * @param expr
-   */
-  public void addFinalCandidate(ExprNodeDesc expr) {
-    addFinalCandidate(getAlias(expr), expr);
+    return exprInfo;
   }
 
   public void addFinalCandidate(String alias, ExprNodeDesc expr) {
@@ -278,8 +198,7 @@ public class ExprWalkerInfo implements NodeProcessorCtx {
    *
    * @param expr
    */
-  public void addNonFinalCandidate(ExprNodeDesc expr) {
-    String alias = getAlias(expr);
+  public void addNonFinalCandidate(String alias, ExprNodeDesc expr) {
     if (nonFinalPreds.get(alias) == null) {
       nonFinalPreds.put(alias, new ArrayList<ExprNodeDesc>());
     }

http://git-wip-us.apache.org/repos/asf/hive/blob/d147a79c/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java
index 6a1bef9..64efbdd 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java
@@ -38,8 +38,6 @@ import org.apache.hadoop.hive.ql.lib.Node;
 import org.apache.hadoop.hive.ql.lib.NodeProcessor;
 import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
 import org.apache.hadoop.hive.ql.lib.Rule;
-import org.apache.hadoop.hive.ql.lib.RuleExactMatch;
-import org.apache.hadoop.hive.ql.lib.RuleRegExp;
 import org.apache.hadoop.hive.ql.lib.TypeRule;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
 import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
@@ -47,6 +45,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
 import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+import org.apache.hadoop.hive.ql.ppd.ExprWalkerInfo.ExprInfo;
 
 /**
  * Expression factory for predicate pushdown processing. Each processor
@@ -55,8 +54,7 @@ import org.apache.hadoop.hive.ql.plan.OperatorDesc;
  */
 public final class ExprWalkerProcFactory {
 
-  private static final Log LOG = LogFactory
-      .getLog(ExprWalkerProcFactory.class.getName());
+  private static final Log LOG = LogFactory.getLog(ExprWalkerProcFactory.class.getName());
 
   /**
    * ColumnExprProcessor.
@@ -80,6 +78,7 @@ public final class ExprWalkerProcFactory {
         tabAlias = ci.getTabAlias();
       }
 
+      ExprInfo colExprInfo = null;
       boolean isCandidate = true;
       if (op.getColumnExprMap() != null) {
         // replace the output expression with the input expression so that
@@ -88,7 +87,8 @@ public final class ExprWalkerProcFactory {
         if (exp == null) {
           // means that expression can't be pushed either because it is value in
           // group by
-          ctx.setIsCandidate(colref, false);
+          colExprInfo = ctx.addOrGetExprInfo(colref);
+          colExprInfo.isCandidate = false;
           return false;
         } else {
           if (exp instanceof ExprNodeGenericFuncDesc) {
@@ -99,16 +99,25 @@ public final class ExprWalkerProcFactory {
             tabAlias = column.getTabAlias();
           }
         }
-        ctx.addConvertedNode(colref, exp);
-        ctx.setIsCandidate(exp, isCandidate);
-        ctx.addAlias(exp, tabAlias);
+        colExprInfo = ctx.addOrGetExprInfo(colref);
+        colExprInfo.convertedExpr = exp;
+        ExprInfo expInfo = ctx.addExprInfo(exp);
+        expInfo.isCandidate = isCandidate;
+        if (tabAlias != null) {
+          expInfo.alias = tabAlias;
+        } else {
+          expInfo.alias = colExprInfo.alias;
+        }
       } else {
         if (ci == null) {
           return false;
         }
-        ctx.addAlias(colref, tabAlias);
+        colExprInfo = ctx.addOrGetExprInfo(colref);
+        if (tabAlias != null) {
+          colExprInfo.alias = tabAlias;
+        }
       }
-      ctx.setIsCandidate(colref, isCandidate);
+      colExprInfo.isCandidate = isCandidate;
       return isCandidate;
     }
 
@@ -127,30 +136,37 @@ public final class ExprWalkerProcFactory {
       String alias = null;
       ExprNodeFieldDesc expr = (ExprNodeFieldDesc) nd;
 
-      boolean isCandidate = true;
       assert (nd.getChildren().size() == 1);
       ExprNodeDesc ch = (ExprNodeDesc) nd.getChildren().get(0);
-      ExprNodeDesc newCh = ctx.getConvertedNode(ch);
+      ExprInfo chExprInfo = ctx.getExprInfo(ch);
+      ExprNodeDesc newCh = chExprInfo != null ? chExprInfo.convertedExpr : null;
       if (newCh != null) {
         expr.setDesc(newCh);
         ch = newCh;
+        chExprInfo = ctx.getExprInfo(ch);
       }
-      String chAlias = ctx.getAlias(ch);
 
-      isCandidate = isCandidate && ctx.isCandidate(ch);
+      boolean isCandidate;
+      String chAlias;
+      if (chExprInfo != null) {
+        chAlias = chExprInfo.alias;
+        isCandidate = chExprInfo.isCandidate;
+      } else {
+        chAlias = null;
+        isCandidate = false;
+      }
       // need to iterate through all children even if one is found to be not a
       // candidate
       // in case if the other children could be individually pushed up
       if (isCandidate && chAlias != null) {
-        if (alias == null) {
-          alias = chAlias;
-        } else if (!chAlias.equalsIgnoreCase(alias)) {
-          isCandidate = false;
-        }
+        alias = chAlias;
       }
 
-      ctx.addAlias(expr, alias);
-      ctx.setIsCandidate(expr, isCandidate);
+      ExprInfo exprInfo = ctx.addOrGetExprInfo(expr);
+      if (alias != null) {
+        exprInfo.alias = alias;
+      }
+      exprInfo.isCandidate = isCandidate;
       return isCandidate;
     }
 
@@ -172,7 +188,8 @@ public final class ExprWalkerProcFactory {
 
       if (!FunctionRegistry.isDeterministic(expr.getGenericUDF())) {
         // this GenericUDF can't be pushed down
-        ctx.setIsCandidate(expr, false);
+        ExprInfo exprInfo = ctx.addOrGetExprInfo(expr);
+        exprInfo.isCandidate = false;
         ctx.setDeterministic(false);
         return false;
       }
@@ -180,14 +197,22 @@ public final class ExprWalkerProcFactory {
       boolean isCandidate = true;
       for (int i = 0; i < nd.getChildren().size(); i++) {
         ExprNodeDesc ch = (ExprNodeDesc) nd.getChildren().get(i);
-        ExprNodeDesc newCh = ctx.getConvertedNode(ch);
+        ExprInfo chExprInfo = ctx.getExprInfo(ch);
+        ExprNodeDesc newCh = chExprInfo != null ? chExprInfo.convertedExpr : null;
         if (newCh != null) {
           expr.getChildren().set(i, newCh);
           ch = newCh;
+          chExprInfo = ctx.getExprInfo(ch);
         }
-        String chAlias = ctx.getAlias(ch);
 
-        isCandidate = isCandidate && ctx.isCandidate(ch);
+        String chAlias;
+        if (chExprInfo != null) {
+          chAlias = chExprInfo.alias;
+          isCandidate = isCandidate && chExprInfo.isCandidate;
+        } else {
+          chAlias = null;
+          isCandidate = false;
+        }
         // need to iterate through all children even if one is found to be not a
         // candidate
         // in case if the other children could be individually pushed up
@@ -203,8 +228,11 @@ public final class ExprWalkerProcFactory {
           break;
         }
       }
-      ctx.addAlias(expr, alias);
-      ctx.setIsCandidate(expr, isCandidate);
+      ExprInfo exprInfo = ctx.addOrGetExprInfo(expr);
+      if (alias != null) {
+        exprInfo.alias = alias;
+      }
+      exprInfo.isCandidate = isCandidate;
       return isCandidate;
     }
 
@@ -219,7 +247,8 @@ public final class ExprWalkerProcFactory {
     public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
         Object... nodeOutputs) throws SemanticException {
       ExprWalkerInfo ctx = (ExprWalkerInfo) procCtx;
-      ctx.setIsCandidate((ExprNodeDesc) nd, true);
+      ExprInfo exprInfo = ctx.addOrGetExprInfo((ExprNodeDesc) nd);
+      exprInfo.isCandidate = true;
       return true;
     }
   }
@@ -324,12 +353,13 @@ public final class ExprWalkerProcFactory {
       return;
     }
 
-    if (ctx.isCandidate(expr)) {
-      ctx.addFinalCandidate(expr);
+    ExprInfo exprInfo = ctx.getExprInfo(expr);
+    if (exprInfo != null && exprInfo.isCandidate) {
+      ctx.addFinalCandidate(exprInfo.alias, expr);
       return;
     } else if (!FunctionRegistry.isOpAnd(expr) &&
         HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEPPDREMOVEDUPLICATEFILTERS)) {
-      ctx.addNonFinalCandidate(expr);
+      ctx.addNonFinalCandidate(exprInfo != null ? exprInfo.alias : null, expr);
     }
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/d147a79c/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java
index 6f9df53..dbd021b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java
@@ -66,6 +66,7 @@ import org.apache.hadoop.hive.ql.plan.ptf.ValueBoundaryDef;
 import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef;
 import org.apache.hadoop.hive.ql.plan.ptf.WindowFunctionDef;
 import org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef;
+import org.apache.hadoop.hive.ql.ppd.ExprWalkerInfo.ExprInfo;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFDenseRank.GenericUDAFDenseRankEvaluator;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFLead.GenericUDAFLeadEvaluator;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFRank.GenericUDAFRankEvaluator;
@@ -483,8 +484,14 @@ public final class OpProcFactory {
             prunePreds.getFinalCandidates().get(alias)) {
             // add expr to the list of predicates rejected from further pushing
             // so that we know to add it in createFilter()
-            prunePreds.addAlias(expr, alias);
-            prunePreds.addNonFinalCandidate(expr);
+            ExprInfo exprInfo;
+            if (alias != null) {
+              exprInfo = prunePreds.addOrGetExprInfo(expr);
+              exprInfo.alias = alias;
+            } else {
+              exprInfo = prunePreds.getExprInfo(expr);
+            }
+            prunePreds.addNonFinalCandidate(exprInfo != null ? exprInfo.alias : null, expr);
           }
           prunePreds.getFinalCandidates().remove(alias);
         }

[02/12] hive git commit: HIVE-11573: PointLookupOptimizer can be pessimistic at a low nDV (Gopal V, reviewed by Jesus Camacho Rodriguez)

Posted by pr...@apache.org.

http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/pointlookup2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/pointlookup2.q.out b/ql/src/test/results/clientpositive/pointlookup2.q.out
new file mode 100644
index 0000000..55edd90
--- /dev/null
+++ b/ql/src/test/results/clientpositive/pointlookup2.q.out
@@ -0,0 +1,1647 @@
+PREHOOK: query: drop table pcr_t1
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table pcr_t1
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table pcr_t2
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table pcr_t2
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table pcr_t3
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table pcr_t3
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table pcr_t1 (key int, value string) partitioned by (ds string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@pcr_t1
+POSTHOOK: query: create table pcr_t1 (key int, value string) partitioned by (ds string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@pcr_t1
+PREHOOK: query: insert overwrite table pcr_t1 partition (ds='2000-04-08') select * from src where key < 20 order by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@pcr_t1@ds=2000-04-08
+POSTHOOK: query: insert overwrite table pcr_t1 partition (ds='2000-04-08') select * from src where key < 20 order by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@pcr_t1@ds=2000-04-08
+POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-08).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-08).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: insert overwrite table pcr_t1 partition (ds='2000-04-09') select * from src where key < 20 order by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@pcr_t1@ds=2000-04-09
+POSTHOOK: query: insert overwrite table pcr_t1 partition (ds='2000-04-09') select * from src where key < 20 order by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@pcr_t1@ds=2000-04-09
+POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-09).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-09).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: insert overwrite table pcr_t1 partition (ds='2000-04-10') select * from src where key < 20 order by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@pcr_t1@ds=2000-04-10
+POSTHOOK: query: insert overwrite table pcr_t1 partition (ds='2000-04-10') select * from src where key < 20 order by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@pcr_t1@ds=2000-04-10
+POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-10).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-10).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: create table pcr_t2 (ds string, key int, value string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@pcr_t2
+POSTHOOK: query: create table pcr_t2 (ds string, key int, value string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@pcr_t2
+PREHOOK: query: from pcr_t1
+insert overwrite table pcr_t2 select ds, key, value where ds='2000-04-08'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@pcr_t1
+PREHOOK: Input: default@pcr_t1@ds=2000-04-08
+PREHOOK: Output: default@pcr_t2
+POSTHOOK: query: from pcr_t1
+insert overwrite table pcr_t2 select ds, key, value where ds='2000-04-08'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@pcr_t1
+POSTHOOK: Input: default@pcr_t1@ds=2000-04-08
+POSTHOOK: Output: default@pcr_t2
+POSTHOOK: Lineage: pcr_t2.ds SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:ds, type:string, comment:null), ]
+POSTHOOK: Lineage: pcr_t2.key SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: pcr_t2.value SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: from pcr_t1
+insert overwrite table pcr_t2 select ds, key, value where ds='2000-04-08' and key=2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@pcr_t1
+PREHOOK: Input: default@pcr_t1@ds=2000-04-08
+PREHOOK: Output: default@pcr_t2
+POSTHOOK: query: from pcr_t1
+insert overwrite table pcr_t2 select ds, key, value where ds='2000-04-08' and key=2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@pcr_t1
+POSTHOOK: Input: default@pcr_t1@ds=2000-04-08
+POSTHOOK: Output: default@pcr_t2
+POSTHOOK: Lineage: pcr_t2.ds SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:ds, type:string, comment:null), ]
+POSTHOOK: Lineage: pcr_t2.key SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: pcr_t2.value SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: explain extended
+select key, value, ds
+from pcr_t1
+where (ds='2000-04-08' and key=1) or (ds='2000-04-09' and key=2)
+order by key, value, ds
+PREHOOK: type: QUERY
+POSTHOOK: query: explain extended
+select key, value, ds
+from pcr_t1
+where (ds='2000-04-08' and key=1) or (ds='2000-04-09' and key=2)
+order by key, value, ds
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  
+TOK_QUERY
+   TOK_FROM
+      TOK_TABREF
+         TOK_TABNAME
+            pcr_t1
+   TOK_INSERT
+      TOK_DESTINATION
+         TOK_DIR
+            TOK_TMP_FILE
+      TOK_SELECT
+         TOK_SELEXPR
+            TOK_TABLE_OR_COL
+               key
+         TOK_SELEXPR
+            TOK_TABLE_OR_COL
+               value
+         TOK_SELEXPR
+            TOK_TABLE_OR_COL
+               ds
+      TOK_WHERE
+         or
+            and
+               =
+                  TOK_TABLE_OR_COL
+                     ds
+                  '2000-04-08'
+               =
+                  TOK_TABLE_OR_COL
+                     key
+                  1
+            and
+               =
+                  TOK_TABLE_OR_COL
+                     ds
+                  '2000-04-09'
+               =
+                  TOK_TABLE_OR_COL
+                     key
+                  2
+      TOK_ORDERBY
+         TOK_TABSORTCOLNAMEASC
+            TOK_TABLE_OR_COL
+               key
+         TOK_TABSORTCOLNAMEASC
+            TOK_TABLE_OR_COL
+               value
+         TOK_TABSORTCOLNAMEASC
+            TOK_TABLE_OR_COL
+               ds
+
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: pcr_t1
+            Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
+            GatherStats: false
+            Filter Operator
+              isSamplingPred: false
+              predicate: (struct(key,ds)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean)
+              Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: key (type: int), value (type: string), ds (type: string)
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string)
+                  sort order: +++
+                  Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+                  tag: -1
+                  auto parallelism: false
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: ds=2000-04-08
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds 2000-04-08
+            properties:
+              COLUMN_STATS_ACCURATE true
+              bucket_count -1
+              columns key,value
+              columns.comments 
+              columns.types int:string
+#### A masked pattern was here ####
+              name default.pcr_t1
+              numFiles 1
+              numRows 20
+              partition_columns ds
+              partition_columns.types string
+              rawDataSize 160
+              serialization.ddl struct pcr_t1 { i32 key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 180
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns key,value
+                columns.comments 
+                columns.types int:string
+#### A masked pattern was here ####
+                name default.pcr_t1
+                partition_columns ds
+                partition_columns.types string
+                serialization.ddl struct pcr_t1 { i32 key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.pcr_t1
+            name: default.pcr_t1
+#### A masked pattern was here ####
+          Partition
+            base file name: ds=2000-04-09
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds 2000-04-09
+            properties:
+              COLUMN_STATS_ACCURATE true
+              bucket_count -1
+              columns key,value
+              columns.comments 
+              columns.types int:string
+#### A masked pattern was here ####
+              name default.pcr_t1
+              numFiles 1
+              numRows 20
+              partition_columns ds
+              partition_columns.types string
+              rawDataSize 160
+              serialization.ddl struct pcr_t1 { i32 key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 180
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns key,value
+                columns.comments 
+                columns.types int:string
+#### A masked pattern was here ####
+                name default.pcr_t1
+                partition_columns ds
+                partition_columns.types string
+                serialization.ddl struct pcr_t1 { i32 key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.pcr_t1
+            name: default.pcr_t1
+      Truncated Path -> Alias:
+        /pcr_t1/ds=2000-04-08 [pcr_t1]
+        /pcr_t1/ds=2000-04-09 [pcr_t1]
+      Needs Tagging: false
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string)
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+#### A masked pattern was here ####
+            NumFilesPerFileSink: 1
+            Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                properties:
+                  columns _col0,_col1,_col2
+                  columns.types int:string:string
+                  escape.delim \
+                  hive.serialization.extend.additional.nesting.levels true
+                  serialization.format 1
+                  serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            TotalFiles: 1
+            GatherStats: false
+            MultiFileSpray: false
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain extended
+select *
+from pcr_t1 t1 join pcr_t1 t2
+on t1.key=t2.key and t1.ds='2000-04-08' and t2.ds='2000-04-08'
+order by t1.key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain extended
+select *
+from pcr_t1 t1 join pcr_t1 t2
+on t1.key=t2.key and t1.ds='2000-04-08' and t2.ds='2000-04-08'
+order by t1.key
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  
+TOK_QUERY
+   TOK_FROM
+      TOK_JOIN
+         TOK_TABREF
+            TOK_TABNAME
+               pcr_t1
+            t1
+         TOK_TABREF
+            TOK_TABNAME
+               pcr_t1
+            t2
+         and
+            and
+               =
+                  .
+                     TOK_TABLE_OR_COL
+                        t1
+                     key
+                  .
+                     TOK_TABLE_OR_COL
+                        t2
+                     key
+               =
+                  .
+                     TOK_TABLE_OR_COL
+                        t1
+                     ds
+                  '2000-04-08'
+            =
+               .
+                  TOK_TABLE_OR_COL
+                     t2
+                  ds
+               '2000-04-08'
+   TOK_INSERT
+      TOK_DESTINATION
+         TOK_DIR
+            TOK_TMP_FILE
+      TOK_SELECT
+         TOK_SELEXPR
+            TOK_ALLCOLREF
+      TOK_ORDERBY
+         TOK_TABSORTCOLNAMEASC
+            .
+               TOK_TABLE_OR_COL
+                  t1
+               key
+
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: t1
+            Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+            GatherStats: false
+            Filter Operator
+              isSamplingPred: false
+              predicate: key is not null (type: boolean)
+              Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: key (type: int)
+                sort order: +
+                Map-reduce partition columns: key (type: int)
+                Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+                tag: 0
+                value expressions: value (type: string)
+                auto parallelism: false
+          TableScan
+            alias: t2
+            Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+            GatherStats: false
+            Filter Operator
+              isSamplingPred: false
+              predicate: key is not null (type: boolean)
+              Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: key (type: int)
+                sort order: +
+                Map-reduce partition columns: key (type: int)
+                Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+                tag: 1
+                value expressions: value (type: string)
+                auto parallelism: false
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: ds=2000-04-08
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds 2000-04-08
+            properties:
+              COLUMN_STATS_ACCURATE true
+              bucket_count -1
+              columns key,value
+              columns.comments 
+              columns.types int:string
+#### A masked pattern was here ####
+              name default.pcr_t1
+              numFiles 1
+              numRows 20
+              partition_columns ds
+              partition_columns.types string
+              rawDataSize 160
+              serialization.ddl struct pcr_t1 { i32 key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 180
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns key,value
+                columns.comments 
+                columns.types int:string
+#### A masked pattern was here ####
+                name default.pcr_t1
+                partition_columns ds
+                partition_columns.types string
+                serialization.ddl struct pcr_t1 { i32 key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.pcr_t1
+            name: default.pcr_t1
+      Truncated Path -> Alias:
+        /pcr_t1/ds=2000-04-08 [t1, t2]
+      Needs Tagging: true
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 key (type: int)
+            1 key (type: int)
+          outputColumnNames: _col0, _col1, _col6, _col7
+          Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: string)
+            outputColumnNames: _col0, _col1, _col3, _col4
+            Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+#### A masked pattern was here ####
+              NumFilesPerFileSink: 1
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  properties:
+                    columns _col0,_col1,_col3,_col4
+                    columns.types int,string,int,string
+                    escape.delim \
+                    serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+              TotalFiles: 1
+              GatherStats: false
+              MultiFileSpray: false
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            GatherStats: false
+            Reduce Output Operator
+              key expressions: _col0 (type: int)
+              sort order: +
+              Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+              tag: -1
+              value expressions: _col1 (type: string), _col3 (type: int), _col4 (type: string)
+              auto parallelism: false
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: -mr-10003
+            input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+            properties:
+              columns _col0,_col1,_col3,_col4
+              columns.types int,string,int,string
+              escape.delim \
+              serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+            serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+          
+              input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+              properties:
+                columns _col0,_col1,_col3,_col4
+                columns.types int,string,int,string
+                escape.delim \
+                serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+              serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+      Truncated Path -> Alias:
+#### A masked pattern was here ####
+      Needs Tagging: false
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), '2000-04-08' (type: string), VALUE._col2 (type: int), VALUE._col3 (type: string), '2000-04-08' (type: string)
+          outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+          Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+#### A masked pattern was here ####
+            NumFilesPerFileSink: 1
+            Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                properties:
+                  columns _col0,_col1,_col2,_col3,_col4,_col5
+                  columns.types int:string:string:int:string:string
+                  escape.delim \
+                  hive.serialization.extend.additional.nesting.levels true
+                  serialization.format 1
+                  serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            TotalFiles: 1
+            GatherStats: false
+            MultiFileSpray: false
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain extended
+select *
+from pcr_t1 t1 join pcr_t1 t2
+on t1.key=t2.key and t1.ds='2000-04-08' and t2.ds='2000-04-09'
+order by t1.key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain extended
+select *
+from pcr_t1 t1 join pcr_t1 t2
+on t1.key=t2.key and t1.ds='2000-04-08' and t2.ds='2000-04-09'
+order by t1.key
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  
+TOK_QUERY
+   TOK_FROM
+      TOK_JOIN
+         TOK_TABREF
+            TOK_TABNAME
+               pcr_t1
+            t1
+         TOK_TABREF
+            TOK_TABNAME
+               pcr_t1
+            t2
+         and
+            and
+               =
+                  .
+                     TOK_TABLE_OR_COL
+                        t1
+                     key
+                  .
+                     TOK_TABLE_OR_COL
+                        t2
+                     key
+               =
+                  .
+                     TOK_TABLE_OR_COL
+                        t1
+                     ds
+                  '2000-04-08'
+            =
+               .
+                  TOK_TABLE_OR_COL
+                     t2
+                  ds
+               '2000-04-09'
+   TOK_INSERT
+      TOK_DESTINATION
+         TOK_DIR
+            TOK_TMP_FILE
+      TOK_SELECT
+         TOK_SELEXPR
+            TOK_ALLCOLREF
+      TOK_ORDERBY
+         TOK_TABSORTCOLNAMEASC
+            .
+               TOK_TABLE_OR_COL
+                  t1
+               key
+
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: t1
+            Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+            GatherStats: false
+            Filter Operator
+              isSamplingPred: false
+              predicate: key is not null (type: boolean)
+              Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: key (type: int)
+                sort order: +
+                Map-reduce partition columns: key (type: int)
+                Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+                tag: 0
+                value expressions: value (type: string)
+                auto parallelism: false
+          TableScan
+            alias: t2
+            Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+            GatherStats: false
+            Filter Operator
+              isSamplingPred: false
+              predicate: key is not null (type: boolean)
+              Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: key (type: int)
+                sort order: +
+                Map-reduce partition columns: key (type: int)
+                Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+                tag: 1
+                value expressions: value (type: string)
+                auto parallelism: false
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: ds=2000-04-08
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds 2000-04-08
+            properties:
+              COLUMN_STATS_ACCURATE true
+              bucket_count -1
+              columns key,value
+              columns.comments 
+              columns.types int:string
+#### A masked pattern was here ####
+              name default.pcr_t1
+              numFiles 1
+              numRows 20
+              partition_columns ds
+              partition_columns.types string
+              rawDataSize 160
+              serialization.ddl struct pcr_t1 { i32 key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 180
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns key,value
+                columns.comments 
+                columns.types int:string
+#### A masked pattern was here ####
+                name default.pcr_t1
+                partition_columns ds
+                partition_columns.types string
+                serialization.ddl struct pcr_t1 { i32 key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.pcr_t1
+            name: default.pcr_t1
+#### A masked pattern was here ####
+          Partition
+            base file name: ds=2000-04-09
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds 2000-04-09
+            properties:
+              COLUMN_STATS_ACCURATE true
+              bucket_count -1
+              columns key,value
+              columns.comments 
+              columns.types int:string
+#### A masked pattern was here ####
+              name default.pcr_t1
+              numFiles 1
+              numRows 20
+              partition_columns ds
+              partition_columns.types string
+              rawDataSize 160
+              serialization.ddl struct pcr_t1 { i32 key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 180
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns key,value
+                columns.comments 
+                columns.types int:string
+#### A masked pattern was here ####
+                name default.pcr_t1
+                partition_columns ds
+                partition_columns.types string
+                serialization.ddl struct pcr_t1 { i32 key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.pcr_t1
+            name: default.pcr_t1
+      Truncated Path -> Alias:
+        /pcr_t1/ds=2000-04-08 [t1]
+        /pcr_t1/ds=2000-04-09 [t2]
+      Needs Tagging: true
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 key (type: int)
+            1 key (type: int)
+          outputColumnNames: _col0, _col1, _col6, _col7
+          Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: string)
+            outputColumnNames: _col0, _col1, _col3, _col4
+            Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+#### A masked pattern was here ####
+              NumFilesPerFileSink: 1
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  properties:
+                    columns _col0,_col1,_col3,_col4
+                    columns.types int,string,int,string
+                    escape.delim \
+                    serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+              TotalFiles: 1
+              GatherStats: false
+              MultiFileSpray: false
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            GatherStats: false
+            Reduce Output Operator
+              key expressions: _col0 (type: int)
+              sort order: +
+              Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+              tag: -1
+              value expressions: _col1 (type: string), _col3 (type: int), _col4 (type: string)
+              auto parallelism: false
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: -mr-10003
+            input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+            properties:
+              columns _col0,_col1,_col3,_col4
+              columns.types int,string,int,string
+              escape.delim \
+              serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+            serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+          
+              input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+              properties:
+                columns _col0,_col1,_col3,_col4
+                columns.types int,string,int,string
+                escape.delim \
+                serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+              serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+      Truncated Path -> Alias:
+#### A masked pattern was here ####
+      Needs Tagging: false
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), '2000-04-08' (type: string), VALUE._col2 (type: int), VALUE._col3 (type: string), '2000-04-09' (type: string)
+          outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+          Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+#### A masked pattern was here ####
+            NumFilesPerFileSink: 1
+            Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                properties:
+                  columns _col0,_col1,_col2,_col3,_col4,_col5
+                  columns.types int:string:string:int:string:string
+                  escape.delim \
+                  hive.serialization.extend.additional.nesting.levels true
+                  serialization.format 1
+                  serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            TotalFiles: 1
+            GatherStats: false
+            MultiFileSpray: false
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+Warning: Shuffle Join JOIN[4][tables = [t1, t2]] in Stage 'Stage-1:MAPRED' is a cross product
+PREHOOK: query: explain extended
+select *
+from pcr_t1 t1 join pcr_t2 t2
+where (t1.ds='2000-04-08' and t2.key=1) or (t1.ds='2000-04-09' and t2.key=2)
+order by t2.key, t2.value, t1.ds
+PREHOOK: type: QUERY
+POSTHOOK: query: explain extended
+select *
+from pcr_t1 t1 join pcr_t2 t2
+where (t1.ds='2000-04-08' and t2.key=1) or (t1.ds='2000-04-09' and t2.key=2)
+order by t2.key, t2.value, t1.ds
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  
+TOK_QUERY
+   TOK_FROM
+      TOK_JOIN
+         TOK_TABREF
+            TOK_TABNAME
+               pcr_t1
+            t1
+         TOK_TABREF
+            TOK_TABNAME
+               pcr_t2
+            t2
+   TOK_INSERT
+      TOK_DESTINATION
+         TOK_DIR
+            TOK_TMP_FILE
+      TOK_SELECT
+         TOK_SELEXPR
+            TOK_ALLCOLREF
+      TOK_WHERE
+         or
+            and
+               =
+                  .
+                     TOK_TABLE_OR_COL
+                        t1
+                     ds
+                  '2000-04-08'
+               =
+                  .
+                     TOK_TABLE_OR_COL
+                        t2
+                     key
+                  1
+            and
+               =
+                  .
+                     TOK_TABLE_OR_COL
+                        t1
+                     ds
+                  '2000-04-09'
+               =
+                  .
+                     TOK_TABLE_OR_COL
+                        t2
+                     key
+                  2
+      TOK_ORDERBY
+         TOK_TABSORTCOLNAMEASC
+            .
+               TOK_TABLE_OR_COL
+                  t2
+               key
+         TOK_TABSORTCOLNAMEASC
+            .
+               TOK_TABLE_OR_COL
+                  t2
+               value
+         TOK_TABSORTCOLNAMEASC
+            .
+               TOK_TABLE_OR_COL
+                  t1
+               ds
+
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: t1
+            Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
+            GatherStats: false
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
+              tag: 0
+              value expressions: key (type: int), value (type: string), ds (type: string)
+              auto parallelism: false
+          TableScan
+            alias: t2
+            Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+            GatherStats: false
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+              tag: 1
+              value expressions: ds (type: string), key (type: int), value (type: string)
+              auto parallelism: false
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: ds=2000-04-08
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds 2000-04-08
+            properties:
+              COLUMN_STATS_ACCURATE true
+              bucket_count -1
+              columns key,value
+              columns.comments 
+              columns.types int:string
+#### A masked pattern was here ####
+              name default.pcr_t1
+              numFiles 1
+              numRows 20
+              partition_columns ds
+              partition_columns.types string
+              rawDataSize 160
+              serialization.ddl struct pcr_t1 { i32 key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 180
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns key,value
+                columns.comments 
+                columns.types int:string
+#### A masked pattern was here ####
+                name default.pcr_t1
+                partition_columns ds
+                partition_columns.types string
+                serialization.ddl struct pcr_t1 { i32 key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.pcr_t1
+            name: default.pcr_t1
+#### A masked pattern was here ####
+          Partition
+            base file name: ds=2000-04-09
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds 2000-04-09
+            properties:
+              COLUMN_STATS_ACCURATE true
+              bucket_count -1
+              columns key,value
+              columns.comments 
+              columns.types int:string
+#### A masked pattern was here ####
+              name default.pcr_t1
+              numFiles 1
+              numRows 20
+              partition_columns ds
+              partition_columns.types string
+              rawDataSize 160
+              serialization.ddl struct pcr_t1 { i32 key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 180
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns key,value
+                columns.comments 
+                columns.types int:string
+#### A masked pattern was here ####
+                name default.pcr_t1
+                partition_columns ds
+                partition_columns.types string
+                serialization.ddl struct pcr_t1 { i32 key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.pcr_t1
+            name: default.pcr_t1
+#### A masked pattern was here ####
+          Partition
+            base file name: pcr_t2
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            properties:
+              COLUMN_STATS_ACCURATE true
+              bucket_count -1
+              columns ds,key,value
+              columns.comments 
+              columns.types string:int:string
+#### A masked pattern was here ####
+              name default.pcr_t2
+              numFiles 1
+              numRows 1
+              rawDataSize 18
+              serialization.ddl struct pcr_t2 { string ds, i32 key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 19
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                COLUMN_STATS_ACCURATE true
+                bucket_count -1
+                columns ds,key,value
+                columns.comments 
+                columns.types string:int:string
+#### A masked pattern was here ####
+                name default.pcr_t2
+                numFiles 1
+                numRows 1
+                rawDataSize 18
+                serialization.ddl struct pcr_t2 { string ds, i32 key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                totalSize 19
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.pcr_t2
+            name: default.pcr_t2
+      Truncated Path -> Alias:
+        /pcr_t1/ds=2000-04-08 [t1]
+        /pcr_t1/ds=2000-04-09 [t1]
+        /pcr_t2 [t2]
+      Needs Tagging: true
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 
+            1 
+          outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col8
+          Statistics: Num rows: 44 Data size: 352 Basic stats: COMPLETE Column stats: NONE
+          Filter Operator
+            isSamplingPred: false
+            predicate: ((_col2) IN ('2000-04-08', '2000-04-09') and (struct(_col7,_col2)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09'))) (type: boolean)
+            Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col6 (type: string), _col7 (type: int), _col8 (type: string)
+              outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+              Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+              File Output Operator
+                compressed: false
+                GlobalTableId: 0
+#### A masked pattern was here ####
+                NumFilesPerFileSink: 1
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    properties:
+                      columns _col0,_col1,_col2,_col3,_col4,_col5
+                      columns.types int,string,string,string,int,string
+                      escape.delim \
+                      serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+                TotalFiles: 1
+                GatherStats: false
+                MultiFileSpray: false
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            GatherStats: false
+            Reduce Output Operator
+              key expressions: _col4 (type: int), _col5 (type: string), _col2 (type: string)
+              sort order: +++
+              Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+              tag: -1
+              value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string)
+              auto parallelism: false
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: -mr-10003
+            input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+            properties:
+              columns _col0,_col1,_col2,_col3,_col4,_col5
+              columns.types int,string,string,string,int,string
+              escape.delim \
+              serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+            serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+          
+              input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+              properties:
+                columns _col0,_col1,_col2,_col3,_col4,_col5
+                columns.types int,string,string,string,int,string
+                escape.delim \
+                serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+              serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+      Truncated Path -> Alias:
+#### A masked pattern was here ####
+      Needs Tagging: false
+      Reduce Operator Tree:
+        Select Operator
+          expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string)
+          outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+          Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+#### A masked pattern was here ####
+            NumFilesPerFileSink: 1
+            Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                properties:
+                  columns _col0,_col1,_col2,_col3,_col4,_col5
+                  columns.types int:string:string:string:int:string
+                  escape.delim \
+                  hive.serialization.extend.additional.nesting.levels true
+                  serialization.format 1
+                  serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            TotalFiles: 1
+            GatherStats: false
+            MultiFileSpray: false
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+Warning: Shuffle Join JOIN[4][tables = [t1, t2]] in Stage 'Stage-1:MAPRED' is a cross product
+PREHOOK: query: explain extended
+select *
+from pcr_t1 t1 join pcr_t2 t2
+where (t2.ds='2000-04-08' and t1.key=1) or (t2.ds='2000-04-09' and t1.key=2)
+order by t1.key, t1.value, t2.ds
+PREHOOK: type: QUERY
+POSTHOOK: query: explain extended
+select *
+from pcr_t1 t1 join pcr_t2 t2
+where (t2.ds='2000-04-08' and t1.key=1) or (t2.ds='2000-04-09' and t1.key=2)
+order by t1.key, t1.value, t2.ds
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  
+TOK_QUERY
+   TOK_FROM
+      TOK_JOIN
+         TOK_TABREF
+            TOK_TABNAME
+               pcr_t1
+            t1
+         TOK_TABREF
+            TOK_TABNAME
+               pcr_t2
+            t2
+   TOK_INSERT
+      TOK_DESTINATION
+         TOK_DIR
+            TOK_TMP_FILE
+      TOK_SELECT
+         TOK_SELEXPR
+            TOK_ALLCOLREF
+      TOK_WHERE
+         or
+            and
+               =
+                  .
+                     TOK_TABLE_OR_COL
+                        t2
+                     ds
+                  '2000-04-08'
+               =
+                  .
+                     TOK_TABLE_OR_COL
+                        t1
+                     key
+                  1
+            and
+               =
+                  .
+                     TOK_TABLE_OR_COL
+                        t2
+                     ds
+                  '2000-04-09'
+               =
+                  .
+                     TOK_TABLE_OR_COL
+                        t1
+                     key
+                  2
+      TOK_ORDERBY
+         TOK_TABSORTCOLNAMEASC
+            .
+               TOK_TABLE_OR_COL
+                  t1
+               key
+         TOK_TABSORTCOLNAMEASC
+            .
+               TOK_TABLE_OR_COL
+                  t1
+               value
+         TOK_TABSORTCOLNAMEASC
+            .
+               TOK_TABLE_OR_COL
+                  t2
+               ds
+
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: t1
+            Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE
+            GatherStats: false
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE
+              tag: 0
+              value expressions: key (type: int), value (type: string), ds (type: string)
+              auto parallelism: false
+          TableScan
+            alias: t2
+            Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+            GatherStats: false
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+              tag: 1
+              value expressions: ds (type: string), key (type: int), value (type: string)
+              auto parallelism: false
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: ds=2000-04-08
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds 2000-04-08
+            properties:
+              COLUMN_STATS_ACCURATE true
+              bucket_count -1
+              columns key,value
+              columns.comments 
+              columns.types int:string
+#### A masked pattern was here ####
+              name default.pcr_t1
+              numFiles 1
+              numRows 20
+              partition_columns ds
+              partition_columns.types string
+              rawDataSize 160
+              serialization.ddl struct pcr_t1 { i32 key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 180
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns key,value
+                columns.comments 
+                columns.types int:string
+#### A masked pattern was here ####
+                name default.pcr_t1
+                partition_columns ds
+                partition_columns.types string
+                serialization.ddl struct pcr_t1 { i32 key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.pcr_t1
+            name: default.pcr_t1
+#### A masked pattern was here ####
+          Partition
+            base file name: ds=2000-04-09
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds 2000-04-09
+            properties:
+              COLUMN_STATS_ACCURATE true
+              bucket_count -1
+              columns key,value
+              columns.comments 
+              columns.types int:string
+#### A masked pattern was here ####
+              name default.pcr_t1
+              numFiles 1
+              numRows 20
+              partition_columns ds
+              partition_columns.types string
+              rawDataSize 160
+              serialization.ddl struct pcr_t1 { i32 key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 180
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns key,value
+                columns.comments 
+                columns.types int:string
+#### A masked pattern was here ####
+                name default.pcr_t1
+                partition_columns ds
+                partition_columns.types string
+                serialization.ddl struct pcr_t1 { i32 key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.pcr_t1
+            name: default.pcr_t1
+#### A masked pattern was here ####
+          Partition
+            base file name: ds=2000-04-10
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds 2000-04-10
+            properties:
+              COLUMN_STATS_ACCURATE true
+              bucket_count -1
+              columns key,value
+              columns.comments 
+              columns.types int:string
+#### A masked pattern was here ####
+              name default.pcr_t1
+              numFiles 1
+              numRows 20
+              partition_columns ds
+              partition_columns.types string
+              rawDataSize 160
+              serialization.ddl struct pcr_t1 { i32 key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 180
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns key,value
+                columns.comments 
+                columns.types int:string
+#### A masked pattern was here ####
+                name default.pcr_t1
+                partition_columns ds
+                partition_columns.types string
+                serialization.ddl struct pcr_t1 { i32 key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.pcr_t1
+            name: default.pcr_t1
+#### A masked pattern was here ####
+          Partition
+            base file name: pcr_t2
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            properties:
+              COLUMN_STATS_ACCURATE true
+              bucket_count -1
+              columns ds,key,value
+              columns.comments 
+              columns.types string:int:string
+#### A masked pattern was here ####
+              name default.pcr_t2
+              numFiles 1
+              numRows 1
+              rawDataSize 18
+              serialization.ddl struct pcr_t2 { string ds, i32 key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 19
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                COLUMN_STATS_ACCURATE true
+                bucket_count -1
+                columns ds,key,value
+                columns.comments 
+                columns.types string:int:string
+#### A masked pattern was here ####
+                name default.pcr_t2
+                numFiles 1
+                numRows 1
+                rawDataSize 18
+                serialization.ddl struct pcr_t2 { string ds, i32 key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                totalSize 19
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.pcr_t2
+            name: default.pcr_t2
+      Truncated Path -> Alias:
+        /pcr_t1/ds=2000-04-08 [t1]
+        /pcr_t1/ds=2000-04-09 [t1]
+        /pcr_t1/ds=2000-04-10 [t1]
+        /pcr_t2 [t2]
+      Needs Tagging: true
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 
+            1 
+          outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col8
+          Statistics: Num rows: 66 Data size: 528 Basic stats: COMPLETE Column stats: NONE
+          Filter Operator
+            isSamplingPred: false
+            predicate: (struct(_col0,_col6)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean)
+            Statistics: Num rows: 33 Data size: 264 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col6 (type: string), _col7 (type: int), _col8 (type: string)
+              outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+              Statistics: Num rows: 33 Data size: 264 Basic stats: COMPLETE Column stats: NONE
+              File Output Operator
+                compressed: false
+                GlobalTableId: 0
+#### A masked pattern was here ####
+                NumFilesPerFileSink: 1
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    properties:
+                      columns _col0,_col1,_col2,_col3,_col4,_col5
+                      columns.types int,string,string,string,int,string
+                      escape.delim \
+                      serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+                TotalFiles: 1
+                GatherStats: false
+                MultiFileSpray: false
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            GatherStats: false
+            Reduce Output Operator
+              key expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string)
+              sort order: +++
+              Statistics: Num rows: 33 Data size: 264 Basic stats: COMPLETE Column stats: NONE
+              tag: -1
+              value expressions: _col2 (type: string), _col4 (type: int), _col5 (type: string)
+              auto parallelism: false
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: -mr-10003
+            input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+            properties:
+              columns _col0,_col1,_col2,_col3,_col4,_col5
+              columns.types int,string,string,string,int,string
+              escape.delim \
+              serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+            serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+          
+              input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+              properties:
+                columns _col0,_col1,_col2,_col3,_col4,_col5
+                columns.types int,string,string,string,int,string
+                escape.delim \
+                serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+              serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+      Truncated Path -> Alias:
+#### A masked pattern was here ####
+      Needs Tagging: false
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: string)
+          outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+          Statistics: Num rows: 33 Data size: 264 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+#### A masked pattern was here ####
+            NumFilesPerFileSink: 1
+            Statistics: Num rows: 33 Data size: 264 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                properties:
+                  columns _col0,_col1,_col2,_col3,_col4,_col5
+                  columns.types int:string:string:string:int:string
+                  escape.delim \
+                  hive.serialization.extend.additional.nesting.levels true
+                  serialization.format 1
+                  serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            TotalFiles: 1
+            GatherStats: false
+            MultiFileSpray: false
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: drop table pcr_t1
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@pcr_t1
+PREHOOK: Output: default@pcr_t1
+POSTHOOK: query: drop table pcr_t1
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@pcr_t1
+POSTHOOK: Output: default@pcr_t1
+PREHOOK: query: drop table pcr_t2
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@pcr_t2
+PREHOOK: Output: default@pcr_t2
+POSTHOOK: query: drop table pcr_t2
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@pcr_t2
+POSTHOOK: Output: default@pcr_t2
+PREHOOK: query: drop table pcr_t3
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table pcr_t3
+POSTHOOK: type: DROPTABLE

http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/ppd_transform.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/ppd_transform.q.out b/ql/src/test/results/clientpositive/ppd_transform.q.out
index f536767..17248e4 100644
--- a/ql/src/test/results/clientpositive/ppd_transform.q.out
+++ b/ql/src/test/results/clientpositive/ppd_transform.q.out
@@ -390,21 +390,21 @@ STAGE PLANS:
                     serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                 Filter Operator
-                  predicate: (_col0) IN ('a', 'b') (type: boolean)
-                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                  predicate: ((_col0 = 'a') or (_col0 = 'b')) (type: boolean)
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                   File Output Operator
                     compressed: false
-                    Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                     table:
                         input format: org.apache.hadoop.mapred.TextInputFormat
                         output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                         serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                 Filter Operator
-                  predicate: (_col0) IN ('c', 'd') (type: boolean)
-                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                  predicate: ((_col0 = 'c') or (_col0 = 'd')) (type: boolean)
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                   File Output Operator
                     compressed: false
-                    Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                     table:
                         input format: org.apache.hadoop.mapred.TextInputFormat
                         output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/spark/pcr.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/pcr.q.out b/ql/src/test/results/clientpositive/spark/pcr.q.out
index 5aa0df8..fb08f10 100644
--- a/ql/src/test/results/clientpositive/spark/pcr.q.out
+++ b/ql/src/test/results/clientpositive/spark/pcr.q.out
@@ -2534,16 +2534,16 @@ STAGE PLANS:
                   GatherStats: false
                   Filter Operator
                     isSamplingPred: false
-                    predicate: (struct(key,ds)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean)
-                    Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+                    predicate: (((ds = '2000-04-08') and (key = 1)) or ((ds = '2000-04-09') and (key = 2))) (type: boolean)
+                    Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: key (type: int), value (type: string), ds (type: string)
                       outputColumnNames: _col0, _col1, _col2
-                      Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string)
                         sort order: +++
-                        Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
                         tag: -1
                         auto parallelism: false
             Path -> Alias:
@@ -2648,13 +2648,13 @@ STAGE PLANS:
               Select Operator
                 expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string)
                 outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
                   GlobalTableId: 0
 #### A masked pattern was here ####
                   NumFilesPerFileSink: 1
-                  Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
 #### A masked pattern was here ####
                   table:
                       input format: org.apache.hadoop.mapred.TextInputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/spark/ppd_transform.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/ppd_transform.q.out b/ql/src/test/results/clientpositive/spark/ppd_transform.q.out
index a6e6e38..52a847a 100644
--- a/ql/src/test/results/clientpositive/spark/ppd_transform.q.out
+++ b/ql/src/test/results/clientpositive/spark/ppd_transform.q.out
@@ -405,21 +405,21 @@ STAGE PLANS:
                           serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                       Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                       Filter Operator
-                        predicate: (_col0) IN ('a', 'b') (type: boolean)
-                        Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                        predicate: ((_col0 = 'a') or (_col0 = 'b')) (type: boolean)
+                        Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                         File Output Operator
                           compressed: false
-                          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                          Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                           table:
                               input format: org.apache.hadoop.mapred.TextInputFormat
                               output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                       Filter Operator
-                        predicate: (_col0) IN ('c', 'd') (type: boolean)
-                        Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                        predicate: ((_col0 = 'c') or (_col0 = 'd')) (type: boolean)
+                        Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                         File Output Operator
                           compressed: false
-                          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                          Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                           table:
                               input format: org.apache.hadoop.mapred.TextInputFormat
                               output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/spark/vectorized_case.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorized_case.q.out b/ql/src/test/results/clientpositive/spark/vectorized_case.q.out
index 54003c3..c2250e6 100644
--- a/ql/src/test/results/clientpositive/spark/vectorized_case.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorized_case.q.out
@@ -45,7 +45,7 @@ STAGE PLANS:
         TableScan
           alias: alltypesorc
           Filter Operator
-            predicate: (csmallint) IN (418, 12205, 10583) (type: boolean)
+            predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean)
             Select Operator
               expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string), CASE (csmallint) WHEN (418) THEN ('a') WHEN (12205) THEN ('b') ELSE ('c') END (type: string)
               outputColumnNames: _col0, _col1, _col2

http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
index e8a9786..9756b0c 100644
--- a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
+++ b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
@@ -2909,7 +2909,7 @@ Stage-0
       Select Operator [SEL_2]
          outputColumnNames:["_col0"]
          Filter Operator [FIL_4]
-            predicate:(c_int) IN (-6, 6) (type: boolean)
+            predicate:((c_int = -6) or (c_int = 6)) (type: boolean)
             TableScan [TS_0]
                alias:cbo_t1
 

http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/tez/vectorized_case.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vectorized_case.q.out b/ql/src/test/results/clientpositive/tez/vectorized_case.q.out
index 54003c3..c2250e6 100644
--- a/ql/src/test/results/clientpositive/tez/vectorized_case.q.out
+++ b/ql/src/test/results/clientpositive/tez/vectorized_case.q.out
@@ -45,7 +45,7 @@ STAGE PLANS:
         TableScan
           alias: alltypesorc
           Filter Operator
-            predicate: (csmallint) IN (418, 12205, 10583) (type: boolean)
+            predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean)
             Select Operator
               expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string), CASE (csmallint) WHEN (418) THEN ('a') WHEN (12205) THEN ('b') ELSE ('c') END (type: string)
               outputColumnNames: _col0, _col1, _col2

http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/vectorized_case.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vectorized_case.q.out b/ql/src/test/results/clientpositive/vectorized_case.q.out
index 9e47014..73bf12d 100644
--- a/ql/src/test/results/clientpositive/vectorized_case.q.out
+++ b/ql/src/test/results/clientpositive/vectorized_case.q.out
@@ -46,19 +46,20 @@ STAGE PLANS:
             alias: alltypesorc
             Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: (csmallint) IN (418, 12205, 10583) (type: boolean)
-              Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+              predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean)
+              Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string), CASE (csmallint) WHEN (418) THEN ('a') WHEN (12205) THEN ('b') ELSE ('c') END (type: string)
                 outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.TextInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Execution mode: vectorized
 
   Stage: Stage-0
     Fetch Operator