You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by za...@apache.org on 2021/08/03 09:50:59 UTC

[hive] branch master updated: HIVE-25356: JDBCSplitFilterAboveJoinRule's onMatch method throws exception (Soumyakanti Das reviewed by Stamatis Zampetakis)

This is an automated email from the ASF dual-hosted git repository.

zabetak pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 214c723  HIVE-25356: JDBCSplitFilterAboveJoinRule's onMatch method throws exception (Soumyakanti Das reviewed by Stamatis Zampetakis)
214c723 is described below

commit 214c72366dc23d492dab6201b6d148897022ddd7
Author: Soumyakanti Das <so...@gmail.com>
AuthorDate: Tue Jul 20 16:06:59 2021 +0000

    HIVE-25356: JDBCSplitFilterAboveJoinRule's onMatch method throws exception (Soumyakanti Das reviewed by Stamatis Zampetakis)
    
    1. Select correct RelNode in onMatch to obtain the SqlDialect and
    avoid the ClassCastException.
    2. Call canSplitFilter inside matches to avoid assertion failures
    during onMatch method.
    3. Remove matches(call, dialect) method since it can be confused
    with matches(call) and lead to logic bugs (already happened twice).
    
    Closes #2504
---
 .../rules/jdbc/JDBCAbstractSplitFilterRule.java    |  20 +--
 .../queries/clientpositive/jdbc_split_filter.q     |  61 ++++++++
 .../clientpositive/llap/jdbc_split_filter.q.out    | 154 +++++++++++++++++++++
 3 files changed, 222 insertions(+), 13 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/jdbc/JDBCAbstractSplitFilterRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/jdbc/JDBCAbstractSplitFilterRule.java
index c167458..15d87e0 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/jdbc/JDBCAbstractSplitFilterRule.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/jdbc/JDBCAbstractSplitFilterRule.java
@@ -104,16 +104,6 @@ public abstract class JDBCAbstractSplitFilterRule extends RelOptRule {
     return visitor.canBeSplit();
   }
 
-  public boolean matches(RelOptRuleCall call, SqlDialect dialect) {
-    LOGGER.debug("MySplitFilter.matches has been called");
-
-    final HiveFilter filter = call.rel(0);
-
-    RexNode cond = filter.getCondition();
-
-    return canSplitFilter(cond, dialect);
-  }
-
   public void onMatch(RelOptRuleCall call, SqlDialect dialect) {
     LOGGER.debug("MySplitFilter.onMatch has been called");
 
@@ -168,17 +158,20 @@ public abstract class JDBCAbstractSplitFilterRule extends RelOptRule {
     public boolean matches(RelOptRuleCall call) {
       LOGGER.debug("MyUpperJoinFilterFilter.matches has been called");
 
+      final HiveFilter filter = call.rel(0);
       final HiveJoin join = call.rel(1);
       final HiveJdbcConverter conv = call.rel(2);
 
       RexNode joinCond = join.getCondition();
+      SqlDialect dialect = conv.getJdbcDialect();
 
-      return super.matches(call) && JDBCRexCallValidator.isValidJdbcOperation(joinCond, conv.getJdbcDialect());
+      return canSplitFilter(filter.getCondition(), dialect)
+        && JDBCRexCallValidator.isValidJdbcOperation(joinCond, dialect);
     }
 
     @Override
     public void onMatch(RelOptRuleCall call) {
-      final HiveJdbcConverter conv = call.rel(0);
+      final HiveJdbcConverter conv = call.rel(2);
       super.onMatch(call, conv.getJdbcDialect());
     }
   }
@@ -194,8 +187,9 @@ public abstract class JDBCAbstractSplitFilterRule extends RelOptRule {
 
     @Override
     public boolean matches(RelOptRuleCall call) {
+      final HiveFilter filter = call.rel(0);
       final HiveJdbcConverter conv = call.rel(1);
-      return super.matches(call, conv.getJdbcDialect());
+      return canSplitFilter(filter.getCondition(), conv.getJdbcDialect());
     }
 
     @Override
diff --git a/ql/src/test/queries/clientpositive/jdbc_split_filter.q b/ql/src/test/queries/clientpositive/jdbc_split_filter.q
new file mode 100644
index 0000000..ca7a811
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/jdbc_split_filter.q
@@ -0,0 +1,61 @@
+
+CREATE TEMPORARY FUNCTION dboutput AS 'org.apache.hadoop.hive.contrib.genericudf.example.GenericUDFDBOutput';
+
+
+SELECT
+
+dboutput ( 'jdbc:derby:;databaseName=${system:test.tmp.dir}/test_derby_as_external_table_db;create=true','','',
+'CREATE TABLE SIMPLE_DERBY_TABLE1 ("ikey" INTEGER, "bkey" BIGINT, "fkey" REAL, "dkey" DOUBLE)' );
+
+SELECT
+
+dboutput ( 'jdbc:derby:;databaseName=${system:test.tmp.dir}/test_derby_as_external_table_db;create=true','','',
+'CREATE TABLE SIMPLE_DERBY_TABLE2 ("ikey" INTEGER, "bkey" BIGINT, "fkey" REAL, "dkey" DOUBLE, "datekey" DATE)' );
+
+CREATE EXTERNAL TABLE ext_simple_derby_table1
+(
+ ikey int,
+ bkey bigint,
+ fkey float,
+ dkey double
+)
+STORED BY 'org.apache.hive.storage.jdbc.JdbcStorageHandler'
+TBLPROPERTIES (
+                "hive.sql.database.type" = "DERBY",
+                "hive.sql.jdbc.driver" = "org.apache.derby.jdbc.EmbeddedDriver",
+                "hive.sql.jdbc.url" = "jdbc:derby:;databaseName=${system:test.tmp.dir}/test_derby_as_external_table_db;create=true;collation=TERRITORY_BASED:PRIMARY",
+                "hive.sql.dbcp.username" = "APP",
+                "hive.sql.dbcp.password" = "mine",
+                "hive.sql.table" = "SIMPLE_DERBY_TABLE1",
+                "hive.sql.dbcp.maxActive" = "1"
+);
+
+
+CREATE EXTERNAL TABLE ext_simple_derby_table2
+(
+ ikey int,
+ bkey bigint,
+ fkey float,
+ dkey double,
+ datekey string
+)
+STORED BY 'org.apache.hive.storage.jdbc.JdbcStorageHandler'
+TBLPROPERTIES (
+                "hive.sql.database.type" = "DERBY",
+                "hive.sql.jdbc.driver" = "org.apache.derby.jdbc.EmbeddedDriver",
+                "hive.sql.jdbc.url" = "jdbc:derby:;databaseName=${system:test.tmp.dir}/test_derby_as_external_table_db;create=true;collation=TERRITORY_BASED:PRIMARY",
+                "hive.sql.dbcp.username" = "APP",
+                "hive.sql.dbcp.password" = "mine",
+                "hive.sql.table" = "SIMPLE_DERBY_TABLE2",
+                "hive.sql.dbcp.maxActive" = "1"
+);
+
+
+explain cbo
+with t1 as (select fkey, ikey, bkey, dkey from ext_simple_derby_table1),
+t2 as (select fkey, ikey, datekey, dkey, bkey from ext_simple_derby_table2)
+select t1.fkey, t2.dkey, sum(t1.ikey)
+from t1 left join t2
+on t1.ikey = t2.ikey AND t1.fkey = t2.fkey
+where t2.fkey is null
+group by t2.datekey, t1.fkey, t2.dkey;
\ No newline at end of file
diff --git a/ql/src/test/results/clientpositive/llap/jdbc_split_filter.q.out b/ql/src/test/results/clientpositive/llap/jdbc_split_filter.q.out
new file mode 100644
index 0000000..97cf6f5
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/jdbc_split_filter.q.out
@@ -0,0 +1,154 @@
+PREHOOK: query: CREATE TEMPORARY FUNCTION dboutput AS 'org.apache.hadoop.hive.contrib.genericudf.example.GenericUDFDBOutput'
+PREHOOK: type: CREATEFUNCTION
+PREHOOK: Output: dboutput
+POSTHOOK: query: CREATE TEMPORARY FUNCTION dboutput AS 'org.apache.hadoop.hive.contrib.genericudf.example.GenericUDFDBOutput'
+POSTHOOK: type: CREATEFUNCTION
+POSTHOOK: Output: dboutput
+PREHOOK: query: SELECT
+
+#### A masked pattern was here ####
+'CREATE TABLE SIMPLE_DERBY_TABLE1 ("ikey" INTEGER, "bkey" BIGINT, "fkey" REAL, "dkey" DOUBLE)' )
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT
+
+#### A masked pattern was here ####
+'CREATE TABLE SIMPLE_DERBY_TABLE1 ("ikey" INTEGER, "bkey" BIGINT, "fkey" REAL, "dkey" DOUBLE)' )
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+0
+PREHOOK: query: SELECT
+
+#### A masked pattern was here ####
+'CREATE TABLE SIMPLE_DERBY_TABLE2 ("ikey" INTEGER, "bkey" BIGINT, "fkey" REAL, "dkey" DOUBLE, "datekey" DATE)' )
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT
+
+#### A masked pattern was here ####
+'CREATE TABLE SIMPLE_DERBY_TABLE2 ("ikey" INTEGER, "bkey" BIGINT, "fkey" REAL, "dkey" DOUBLE, "datekey" DATE)' )
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+0
+PREHOOK: query: CREATE EXTERNAL TABLE ext_simple_derby_table1
+(
+ ikey int,
+ bkey bigint,
+ fkey float,
+ dkey double
+)
+STORED BY 'org.apache.hive.storage.jdbc.JdbcStorageHandler'
+TBLPROPERTIES (
+                "hive.sql.database.type" = "DERBY",
+                "hive.sql.jdbc.driver" = "org.apache.derby.jdbc.EmbeddedDriver",
+#### A masked pattern was here ####
+                "hive.sql.dbcp.username" = "APP",
+                "hive.sql.dbcp.password" = "mine",
+                "hive.sql.table" = "SIMPLE_DERBY_TABLE1",
+                "hive.sql.dbcp.maxActive" = "1"
+)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@ext_simple_derby_table1
+POSTHOOK: query: CREATE EXTERNAL TABLE ext_simple_derby_table1
+(
+ ikey int,
+ bkey bigint,
+ fkey float,
+ dkey double
+)
+STORED BY 'org.apache.hive.storage.jdbc.JdbcStorageHandler'
+TBLPROPERTIES (
+                "hive.sql.database.type" = "DERBY",
+                "hive.sql.jdbc.driver" = "org.apache.derby.jdbc.EmbeddedDriver",
+#### A masked pattern was here ####
+                "hive.sql.dbcp.username" = "APP",
+                "hive.sql.dbcp.password" = "mine",
+                "hive.sql.table" = "SIMPLE_DERBY_TABLE1",
+                "hive.sql.dbcp.maxActive" = "1"
+)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@ext_simple_derby_table1
+PREHOOK: query: CREATE EXTERNAL TABLE ext_simple_derby_table2
+(
+ ikey int,
+ bkey bigint,
+ fkey float,
+ dkey double,
+ datekey string
+)
+STORED BY 'org.apache.hive.storage.jdbc.JdbcStorageHandler'
+TBLPROPERTIES (
+                "hive.sql.database.type" = "DERBY",
+                "hive.sql.jdbc.driver" = "org.apache.derby.jdbc.EmbeddedDriver",
+#### A masked pattern was here ####
+                "hive.sql.dbcp.username" = "APP",
+                "hive.sql.dbcp.password" = "mine",
+                "hive.sql.table" = "SIMPLE_DERBY_TABLE2",
+                "hive.sql.dbcp.maxActive" = "1"
+)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@ext_simple_derby_table2
+POSTHOOK: query: CREATE EXTERNAL TABLE ext_simple_derby_table2
+(
+ ikey int,
+ bkey bigint,
+ fkey float,
+ dkey double,
+ datekey string
+)
+STORED BY 'org.apache.hive.storage.jdbc.JdbcStorageHandler'
+TBLPROPERTIES (
+                "hive.sql.database.type" = "DERBY",
+                "hive.sql.jdbc.driver" = "org.apache.derby.jdbc.EmbeddedDriver",
+#### A masked pattern was here ####
+                "hive.sql.dbcp.username" = "APP",
+                "hive.sql.dbcp.password" = "mine",
+                "hive.sql.table" = "SIMPLE_DERBY_TABLE2",
+                "hive.sql.dbcp.maxActive" = "1"
+)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@ext_simple_derby_table2
+PREHOOK: query: explain cbo
+with t1 as (select fkey, ikey, bkey, dkey from ext_simple_derby_table1),
+t2 as (select fkey, ikey, datekey, dkey, bkey from ext_simple_derby_table2)
+select t1.fkey, t2.dkey, sum(t1.ikey)
+from t1 left join t2
+on t1.ikey = t2.ikey AND t1.fkey = t2.fkey
+where t2.fkey is null
+group by t2.datekey, t1.fkey, t2.dkey
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ext_simple_derby_table1
+PREHOOK: Input: default@ext_simple_derby_table2
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo
+with t1 as (select fkey, ikey, bkey, dkey from ext_simple_derby_table1),
+t2 as (select fkey, ikey, datekey, dkey, bkey from ext_simple_derby_table2)
+select t1.fkey, t2.dkey, sum(t1.ikey)
+from t1 left join t2
+on t1.ikey = t2.ikey AND t1.fkey = t2.fkey
+where t2.fkey is null
+group by t2.datekey, t1.fkey, t2.dkey
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ext_simple_derby_table1
+POSTHOOK: Input: default@ext_simple_derby_table2
+#### A masked pattern was here ####
+CBO PLAN:
+HiveJdbcConverter(convention=[JDBC.DERBY])
+  JdbcProject(fkey=[$0], dkey=[$2], _o__c2=[$3])
+    JdbcAggregate(group=[{0, 4, 5}], agg#0=[sum($1)])
+      JdbcFilter(condition=[IS NULL($2)])
+        JdbcJoin(condition=[AND(=($1, $3), =($0, $2))], joinType=[left])
+          JdbcProject(fkey=[$2], ikey=[$0])
+            JdbcHiveTableScan(table=[[default, ext_simple_derby_table1]], table:alias=[ext_simple_derby_table1])
+          JdbcProject(fkey=[$2], ikey=[$0], datekey=[$4], dkey=[$3])
+            JdbcFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($2))])
+              JdbcHiveTableScan(table=[[default, ext_simple_derby_table2]], table:alias=[ext_simple_derby_table2])
+