You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by pr...@apache.org on 2018/01/26 23:58:22 UTC

hive git commit: HIVE-15680: Incorrect results when hive.optimize.index.filter=true and same ORC table is referenced twice in query (Anthony Hsu, Prasanth Jayachandran reviewed by Thejas Nair)

Repository: hive
Updated Branches:
  refs/heads/master ad1552745 -> df5c56bd6


HIVE-15680: Incorrect results when hive.optimize.index.filter=true and same ORC table is referenced twice in query (Anthony Hsu, Prasanth Jayachandran reviewed by Thejas Nair)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/df5c56bd
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/df5c56bd
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/df5c56bd

Branch: refs/heads/master
Commit: df5c56bd66c64200f675ce61058a0813f6b1f66f
Parents: ad15527
Author: Prasanth Jayachandran <pr...@apache.org>
Authored: Fri Jan 26 15:58:07 2018 -0800
Committer: Prasanth Jayachandran <pr...@apache.org>
Committed: Fri Jan 26 15:58:07 2018 -0800

----------------------------------------------------------------------
 .../apache/hadoop/hive/ql/exec/FetchTask.java   |  2 +-
 .../hadoop/hive/ql/exec/SMBMapJoinOperator.java |  2 +-
 .../hadoop/hive/ql/exec/mr/MapredLocalTask.java |  2 +-
 .../hadoop/hive/ql/io/HiveInputFormat.java      | 16 +++--
 .../orc_ppd_same_table_multiple_aliases.q       | 17 ++++++
 .../orc_ppd_same_table_multiple_aliases.q.out   | 64 ++++++++++++++++++++
 6 files changed, 96 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/df5c56bd/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchTask.java
index 39c1a42..a7dace9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchTask.java
@@ -77,7 +77,7 @@ public class FetchTask extends Task<FetchWork> implements Serializable {
         ColumnProjectionUtils.appendReadColumns(
             job, ts.getNeededColumnIDs(), ts.getNeededColumns(), ts.getNeededNestedColumnPaths());
         // push down filters
-        HiveInputFormat.pushFilters(job, ts);
+        HiveInputFormat.pushFilters(job, ts, null);
 
         AcidUtils.setAcidTableScan(job, ts.getConf().isAcidTable());
         AcidUtils.setAcidOperationalProperties(job, ts.getConf().getAcidOperationalProperties());

http://git-wip-us.apache.org/repos/asf/hive/blob/df5c56bd/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java
index 5a7c23d..270b576 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java
@@ -207,7 +207,7 @@ public class SMBMapJoinOperator extends AbstractMapJoinOperator<SMBJoinDesc> imp
       ColumnProjectionUtils.appendReadColumns(
           jobClone, ts.getNeededColumnIDs(), ts.getNeededColumns(), ts.getNeededNestedColumnPaths());
       // push down filters
-      HiveInputFormat.pushFilters(jobClone, ts);
+      HiveInputFormat.pushFilters(jobClone, ts, null);
 
       AcidUtils.setAcidTableScan(jobClone, ts.getConf().isAcidTable());
       AcidUtils.setAcidOperationalProperties(jobClone, ts.getConf().getAcidOperationalProperties());

http://git-wip-us.apache.org/repos/asf/hive/blob/df5c56bd/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapredLocalTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapredLocalTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapredLocalTask.java
index 5ac3d58..abd42ec 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapredLocalTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapredLocalTask.java
@@ -483,7 +483,7 @@ public class MapredLocalTask extends Task<MapredLocalWork> implements Serializab
       ColumnProjectionUtils.appendReadColumns(
           jobClone, ts.getNeededColumnIDs(), ts.getNeededColumns(), ts.getNeededNestedColumnPaths());
       // push down filters
-      HiveInputFormat.pushFilters(jobClone, ts);
+      HiveInputFormat.pushFilters(jobClone, ts, null);
 
       AcidUtils.setAcidTableScan(jobClone, ts.getConf().isAcidTable());
       AcidUtils.setAcidOperationalProperties(jobClone, ts.getConf().getAcidOperationalProperties());

http://git-wip-us.apache.org/repos/asf/hive/blob/df5c56bd/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
index 7d3ff36..c3b846c 100755
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
@@ -475,7 +475,7 @@ public class HiveInputFormat<K extends WritableComparable, V extends Writable>
     }
 
     if (tableScan != null) {
-      pushFilters(conf, tableScan);
+      pushFilters(conf, tableScan, this.mrwork);
     }
 
     Path[] finalDirs = processPathsForMmRead(dirs, conf, validTxnList);
@@ -633,7 +633,7 @@ public class HiveInputFormat<K extends WritableComparable, V extends Writable>
             tableScan.getNeededColumnIDs(), tableScan.getNeededColumns());
           pushDownProjection = true;
           // push down filters
-          pushFilters(newjob, tableScan);
+          pushFilters(newjob, tableScan, this.mrwork);
         }
       } else {
         if (LOG.isDebugEnabled()) {
@@ -729,7 +729,8 @@ public class HiveInputFormat<K extends WritableComparable, V extends Writable>
     return partDesc;
   }
 
-  public static void pushFilters(JobConf jobConf, TableScanOperator tableScan) {
+  public static void pushFilters(JobConf jobConf, TableScanOperator tableScan,
+    final MapWork mrwork) {
 
     // ensure filters are not set from previous pushFilters
     jobConf.unset(TableScanDesc.FILTER_TEXT_CONF_STR);
@@ -753,6 +754,13 @@ public class HiveInputFormat<K extends WritableComparable, V extends Writable>
       return;
     }
 
+    // disable filter pushdown for mapreduce when there are more than one table aliases,
+    // since we don't clone jobConf per alias
+    if (mrwork != null && mrwork.getAliases() != null && mrwork.getAliases().size() > 1 &&
+      jobConf.get(ConfVars.HIVE_EXECUTION_ENGINE.varname).equals("mr")) {
+      return;
+    }
+
     String serializedFilterObj = scanDesc.getSerializedFilterObject();
     String serializedFilterExpr = scanDesc.getSerializedFilterExpr();
     boolean hasObj = serializedFilterObj != null, hasExpr = serializedFilterExpr != null;
@@ -849,7 +857,7 @@ public class HiveInputFormat<K extends WritableComparable, V extends Writable>
         ColumnProjectionUtils.appendReadColumns(
             jobConf, ts.getNeededColumnIDs(), ts.getNeededColumns(), ts.getNeededNestedColumnPaths());
         // push down filters
-        pushFilters(jobConf, ts);
+        pushFilters(jobConf, ts, this.mrwork);
 
         AcidUtils.setAcidTableScan(job, ts.getConf().isAcidTable());
         AcidUtils.setAcidOperationalProperties(job, ts.getConf().getAcidOperationalProperties());

http://git-wip-us.apache.org/repos/asf/hive/blob/df5c56bd/ql/src/test/queries/clientpositive/orc_ppd_same_table_multiple_aliases.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/orc_ppd_same_table_multiple_aliases.q b/ql/src/test/queries/clientpositive/orc_ppd_same_table_multiple_aliases.q
new file mode 100644
index 0000000..3b72c14
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/orc_ppd_same_table_multiple_aliases.q
@@ -0,0 +1,17 @@
+-- SORT_QUERY_RESULTS;
+set hive.optimize.index.filter=true;
+create table test_table(number int) stored as ORC;
+
+-- Two insertions will create two files, with one stripe each
+insert into table test_table VALUES (1);
+insert into table test_table VALUES (2);
+
+-- This should return 2 records
+select * from test_table;
+
+-- These should each return 1 record
+select * from test_table where number = 1;
+select * from test_table where number = 2;
+
+-- This should return 2 records
+select * from test_table where number = 1 union all select * from test_table where number = 2;

http://git-wip-us.apache.org/repos/asf/hive/blob/df5c56bd/ql/src/test/results/clientpositive/orc_ppd_same_table_multiple_aliases.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/orc_ppd_same_table_multiple_aliases.q.out b/ql/src/test/results/clientpositive/orc_ppd_same_table_multiple_aliases.q.out
new file mode 100644
index 0000000..cc373a2
--- /dev/null
+++ b/ql/src/test/results/clientpositive/orc_ppd_same_table_multiple_aliases.q.out
@@ -0,0 +1,64 @@
+PREHOOK: query: create table test_table(number int) stored as ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test_table
+POSTHOOK: query: create table test_table(number int) stored as ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test_table
+PREHOOK: query: insert into table test_table VALUES (1)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@test_table
+POSTHOOK: query: insert into table test_table VALUES (1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@test_table
+POSTHOOK: Lineage: test_table.number SCRIPT []
+PREHOOK: query: insert into table test_table VALUES (2)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@test_table
+POSTHOOK: query: insert into table test_table VALUES (2)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@test_table
+POSTHOOK: Lineage: test_table.number SCRIPT []
+PREHOOK: query: select * from test_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table
+#### A masked pattern was here ####
+1
+2
+PREHOOK: query: select * from test_table where number = 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_table where number = 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table
+#### A masked pattern was here ####
+1
+PREHOOK: query: select * from test_table where number = 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_table where number = 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table
+#### A masked pattern was here ####
+2
+PREHOOK: query: select * from test_table where number = 1 union all select * from test_table where number = 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_table where number = 1 union all select * from test_table where number = 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table
+#### A masked pattern was here ####
+1
+2