You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by pr...@apache.org on 2018/01/26 23:58:22 UTC
hive git commit: HIVE-15680: Incorrect results when
hive.optimize.index.filter=true and same ORC table is referenced twice in
query (Anthony Hsu, Prasanth Jayachandran reviewed by Thejas Nair)
Repository: hive
Updated Branches:
refs/heads/master ad1552745 -> df5c56bd6
HIVE-15680: Incorrect results when hive.optimize.index.filter=true and same ORC table is referenced twice in query (Anthony Hsu, Prasanth Jayachandran reviewed by Thejas Nair)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/df5c56bd
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/df5c56bd
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/df5c56bd
Branch: refs/heads/master
Commit: df5c56bd66c64200f675ce61058a0813f6b1f66f
Parents: ad15527
Author: Prasanth Jayachandran <pr...@apache.org>
Authored: Fri Jan 26 15:58:07 2018 -0800
Committer: Prasanth Jayachandran <pr...@apache.org>
Committed: Fri Jan 26 15:58:07 2018 -0800
----------------------------------------------------------------------
.../apache/hadoop/hive/ql/exec/FetchTask.java | 2 +-
.../hadoop/hive/ql/exec/SMBMapJoinOperator.java | 2 +-
.../hadoop/hive/ql/exec/mr/MapredLocalTask.java | 2 +-
.../hadoop/hive/ql/io/HiveInputFormat.java | 16 +++--
.../orc_ppd_same_table_multiple_aliases.q | 17 ++++++
.../orc_ppd_same_table_multiple_aliases.q.out | 64 ++++++++++++++++++++
6 files changed, 96 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/df5c56bd/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchTask.java
index 39c1a42..a7dace9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchTask.java
@@ -77,7 +77,7 @@ public class FetchTask extends Task<FetchWork> implements Serializable {
ColumnProjectionUtils.appendReadColumns(
job, ts.getNeededColumnIDs(), ts.getNeededColumns(), ts.getNeededNestedColumnPaths());
// push down filters
- HiveInputFormat.pushFilters(job, ts);
+ HiveInputFormat.pushFilters(job, ts, null);
AcidUtils.setAcidTableScan(job, ts.getConf().isAcidTable());
AcidUtils.setAcidOperationalProperties(job, ts.getConf().getAcidOperationalProperties());
http://git-wip-us.apache.org/repos/asf/hive/blob/df5c56bd/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java
index 5a7c23d..270b576 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java
@@ -207,7 +207,7 @@ public class SMBMapJoinOperator extends AbstractMapJoinOperator<SMBJoinDesc> imp
ColumnProjectionUtils.appendReadColumns(
jobClone, ts.getNeededColumnIDs(), ts.getNeededColumns(), ts.getNeededNestedColumnPaths());
// push down filters
- HiveInputFormat.pushFilters(jobClone, ts);
+ HiveInputFormat.pushFilters(jobClone, ts, null);
AcidUtils.setAcidTableScan(jobClone, ts.getConf().isAcidTable());
AcidUtils.setAcidOperationalProperties(jobClone, ts.getConf().getAcidOperationalProperties());
http://git-wip-us.apache.org/repos/asf/hive/blob/df5c56bd/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapredLocalTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapredLocalTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapredLocalTask.java
index 5ac3d58..abd42ec 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapredLocalTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapredLocalTask.java
@@ -483,7 +483,7 @@ public class MapredLocalTask extends Task<MapredLocalWork> implements Serializab
ColumnProjectionUtils.appendReadColumns(
jobClone, ts.getNeededColumnIDs(), ts.getNeededColumns(), ts.getNeededNestedColumnPaths());
// push down filters
- HiveInputFormat.pushFilters(jobClone, ts);
+ HiveInputFormat.pushFilters(jobClone, ts, null);
AcidUtils.setAcidTableScan(jobClone, ts.getConf().isAcidTable());
AcidUtils.setAcidOperationalProperties(jobClone, ts.getConf().getAcidOperationalProperties());
http://git-wip-us.apache.org/repos/asf/hive/blob/df5c56bd/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
index 7d3ff36..c3b846c 100755
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
@@ -475,7 +475,7 @@ public class HiveInputFormat<K extends WritableComparable, V extends Writable>
}
if (tableScan != null) {
- pushFilters(conf, tableScan);
+ pushFilters(conf, tableScan, this.mrwork);
}
Path[] finalDirs = processPathsForMmRead(dirs, conf, validTxnList);
@@ -633,7 +633,7 @@ public class HiveInputFormat<K extends WritableComparable, V extends Writable>
tableScan.getNeededColumnIDs(), tableScan.getNeededColumns());
pushDownProjection = true;
// push down filters
- pushFilters(newjob, tableScan);
+ pushFilters(newjob, tableScan, this.mrwork);
}
} else {
if (LOG.isDebugEnabled()) {
@@ -729,7 +729,8 @@ public class HiveInputFormat<K extends WritableComparable, V extends Writable>
return partDesc;
}
- public static void pushFilters(JobConf jobConf, TableScanOperator tableScan) {
+ public static void pushFilters(JobConf jobConf, TableScanOperator tableScan,
+ final MapWork mrwork) {
// ensure filters are not set from previous pushFilters
jobConf.unset(TableScanDesc.FILTER_TEXT_CONF_STR);
@@ -753,6 +754,13 @@ public class HiveInputFormat<K extends WritableComparable, V extends Writable>
return;
}
+ // disable filter pushdown for mapreduce when there are more than one table aliases,
+ // since we don't clone jobConf per alias
+ if (mrwork != null && mrwork.getAliases() != null && mrwork.getAliases().size() > 1 &&
+ jobConf.get(ConfVars.HIVE_EXECUTION_ENGINE.varname).equals("mr")) {
+ return;
+ }
+
String serializedFilterObj = scanDesc.getSerializedFilterObject();
String serializedFilterExpr = scanDesc.getSerializedFilterExpr();
boolean hasObj = serializedFilterObj != null, hasExpr = serializedFilterExpr != null;
@@ -849,7 +857,7 @@ public class HiveInputFormat<K extends WritableComparable, V extends Writable>
ColumnProjectionUtils.appendReadColumns(
jobConf, ts.getNeededColumnIDs(), ts.getNeededColumns(), ts.getNeededNestedColumnPaths());
// push down filters
- pushFilters(jobConf, ts);
+ pushFilters(jobConf, ts, this.mrwork);
AcidUtils.setAcidTableScan(job, ts.getConf().isAcidTable());
AcidUtils.setAcidOperationalProperties(job, ts.getConf().getAcidOperationalProperties());
http://git-wip-us.apache.org/repos/asf/hive/blob/df5c56bd/ql/src/test/queries/clientpositive/orc_ppd_same_table_multiple_aliases.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/orc_ppd_same_table_multiple_aliases.q b/ql/src/test/queries/clientpositive/orc_ppd_same_table_multiple_aliases.q
new file mode 100644
index 0000000..3b72c14
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/orc_ppd_same_table_multiple_aliases.q
@@ -0,0 +1,17 @@
+-- SORT_QUERY_RESULTS;
+set hive.optimize.index.filter=true;
+create table test_table(number int) stored as ORC;
+
+-- Two insertions will create two files, with one stripe each
+insert into table test_table VALUES (1);
+insert into table test_table VALUES (2);
+
+-- This should return 2 records
+select * from test_table;
+
+-- These should each return 1 record
+select * from test_table where number = 1;
+select * from test_table where number = 2;
+
+-- This should return 2 records
+select * from test_table where number = 1 union all select * from test_table where number = 2;
http://git-wip-us.apache.org/repos/asf/hive/blob/df5c56bd/ql/src/test/results/clientpositive/orc_ppd_same_table_multiple_aliases.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/orc_ppd_same_table_multiple_aliases.q.out b/ql/src/test/results/clientpositive/orc_ppd_same_table_multiple_aliases.q.out
new file mode 100644
index 0000000..cc373a2
--- /dev/null
+++ b/ql/src/test/results/clientpositive/orc_ppd_same_table_multiple_aliases.q.out
@@ -0,0 +1,64 @@
+PREHOOK: query: create table test_table(number int) stored as ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test_table
+POSTHOOK: query: create table test_table(number int) stored as ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test_table
+PREHOOK: query: insert into table test_table VALUES (1)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@test_table
+POSTHOOK: query: insert into table test_table VALUES (1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@test_table
+POSTHOOK: Lineage: test_table.number SCRIPT []
+PREHOOK: query: insert into table test_table VALUES (2)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@test_table
+POSTHOOK: query: insert into table test_table VALUES (2)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@test_table
+POSTHOOK: Lineage: test_table.number SCRIPT []
+PREHOOK: query: select * from test_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table
+#### A masked pattern was here ####
+1
+2
+PREHOOK: query: select * from test_table where number = 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_table where number = 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table
+#### A masked pattern was here ####
+1
+PREHOOK: query: select * from test_table where number = 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_table where number = 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table
+#### A masked pattern was here ####
+2
+PREHOOK: query: select * from test_table where number = 1 union all select * from test_table where number = 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_table where number = 1 union all select * from test_table where number = 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table
+#### A masked pattern was here ####
+1
+2