You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@hive.apache.org by "xorsum (Jira)" <ji...@apache.org> on 2022/11/09 08:39:00 UTC

[jira] [Created] (HIVE-26715) CBO failed with JDBCJoinPushDown because wrong HiveJdbcConverter

xorsum created HIVE-26715:
-----------------------------

             Summary: CBO failed with JDBCJoinPushDown because wrong HiveJdbcConverter
                 Key: HIVE-26715
                 URL: https://issues.apache.org/jira/browse/HIVE-26715
             Project: Hive
          Issue Type: Bug
          Components: CBO
    Affects Versions: 3.1.3, 3.0.0
            Reporter: xorsum


h4. reproduction

 
{code:java}
create database tpch100m;
use tpch100m;
CREATE EXTERNAL TABLE region (
    r_regionkey int,
    r_name string,
    r_comment string
) STORED BY 'org.apache.hive.storage.jdbc.JdbcStorageHandler' TBLPROPERTIES (
    "hive.sql.database.type" = "MYSQL",
    "hive.sql.jdbc.url" = "jdbc:mysql://127.0.0.1:3306/tpch100m?serverTimezone=UTC&useSSL=false",
    "hive.sql.jdbc.driver" = "com.mysql.jdbc.Driver",
    "hive.sql.dbcp.username" = "root",
    "hive.sql.dbcp.password" = "123456",
    "hive.sql.table" = "region",
    "hive.sql.dbcp.maxActive" = "1"
);

CREATE EXTERNAL TABLE nation (
    n_nationkey int,
    n_name string,
    n_regionkey int,
    n_comment string
) STORED BY 'org.apache.hive.storage.jdbc.JdbcStorageHandler' TBLPROPERTIES (
    "hive.sql.database.type" = "MYSQL",
    "hive.sql.jdbc.url" = "jdbc:mysql://127.0.0.1:3306/tpch100m?serverTimezone=UTC&useSSL=false",
    "hive.sql.jdbc.driver" = "com.mysql.jdbc.Driver",
    "hive.sql.dbcp.username" = "root",
    "hive.sql.dbcp.password" = "123456",
    "hive.sql.table" = "nation",
    "hive.sql.dbcp.maxActive" = "1"
);

select * from nation,region where n_regionkey = r_regionkey; {code}
h4. log
{code:java}
2022-11-09T16:00:35,181  INFO [04ebb58a-de45-4f8c-9742-06cac555223b main] translator.ASTBuilder: The HiveJdbcConverter generated sql message is: 
SELECT *
FROM (SELECT *
FROM `nation`
WHERE `n_regionkey` IS NOT NULL) AS `t`
INNER JOIN (SELECT *
FROM `region`
WHERE `r_regionkey` IS NOT NULL) AS `t0` ON `t`.`n_regionkey` = `t0`.`r_regionkey`
2022-11-09T16:00:35,181  INFO [04ebb58a-de45-4f8c-9742-06cac555223b main] parse.CalcitePlanner: Get metadata for source tables
2022-11-09T16:00:35,181  INFO [04ebb58a-de45-4f8c-9742-06cac555223b main] metastore.HiveMetaStore: 0: get_table : tbl=hive.tpch100m.nation
2022-11-09T16:00:35,181  INFO [04ebb58a-de45-4f8c-9742-06cac555223b main] HiveMetaStore.audit: ugi=hbk    ip=unknown-ip-addr    cmd=get_table : tbl=hive.tpch100m.nation    
2022-11-09T16:00:35,194  INFO [04ebb58a-de45-4f8c-9742-06cac555223b main] parse.CalcitePlanner: Get metadata for subqueries
2022-11-09T16:00:35,194  INFO [04ebb58a-de45-4f8c-9742-06cac555223b main] parse.CalcitePlanner: Get metadata for destination tables
2022-11-09T16:00:35,202  INFO [04ebb58a-de45-4f8c-9742-06cac555223b main] ql.Context: New scratch dir is hdfs://0.0.0.0:9000/tmp/hive/hbk/04ebb58a-de45-4f8c-9742-06cac555223b/hive_2022-11-09_16-00-34_987_5570790922374716113-1
2022-11-09T16:00:35,224 ERROR [04ebb58a-de45-4f8c-9742-06cac555223b main] jdbc.JdbcSerDe: Caught exception while initializing the SqlSerDe
org.apache.hadoop.hive.serde2.SerDeException: Expected 7 columns. Table definition has 4 columns
    at org.apache.hive.storage.jdbc.JdbcSerDe.initialize(JdbcSerDe.java:79) ~[hive-jdbc-handler-3.1.3.jar:3.1.3]
    at org.apache.hadoop.hive.serde2.AbstractSerDe.initialize(AbstractSerDe.java:54) ~[hive-exec-3.1.3.jar:3.1.3]
    at org.apache.hadoop.hive.serde2.SerDeUtils.initializeSerDe(SerDeUtils.java:540) ~[hive-exec-3.1.3.jar:3.1.3]
    at org.apache.hadoop.hive.metastore.HiveMetaStoreUtils.getDeserializer(HiveMetaStoreUtils.java:90) ~[hive-exec-3.1.3.jar:3.1.3]
    at org.apache.hadoop.hive.metastore.HiveMetaStoreUtils.getDeserializer(HiveMetaStoreUtils.java:77) ~[hive-exec-3.1.3.jar:3.1.3]
    at org.apache.hadoop.hive.ql.metadata.Table.getDeserializerFromMetaStore(Table.java:289) ~[hive-exec-3.1.3.jar:3.1.3]
    at org.apache.hadoop.hive.ql.metadata.Table.getDeserializer(Table.java:271) ~[hive-exec-3.1.3.jar:3.1.3]
    at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genTablePlan(SemanticAnalyzer.java:10973) ~[hive-exec-3.1.3.jar:3.1.3]
    at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:11329) ~[hive-exec-3.1.3.jar:3.1.3]
    at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:11288) ~[hive-exec-3.1.3.jar:3.1.3]
    at org.apache.hadoop.hive.ql.parse.CalcitePlanner.genOPTree(CalcitePlanner.java:517) ~[hive-exec-3.1.3.jar:3.1.3]
    at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:12164) ~[hive-exec-3.1.3.jar:3.1.3]
    at org.apache.hadoop.hive.ql.parse.CalcitePlanner.analyzeInternal(CalcitePlanner.java:330) ~[hive-exec-3.1.3.jar:3.1.3]
    at org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:285) ~[hive-exec-3.1.3.jar:3.1.3]
    at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:659) ~[hive-exec-3.1.3.jar:3.1.3]
    at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:1826) ~[hive-exec-3.1.3.jar:3.1.3]
    at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:1773) ~[hive-exec-3.1.3.jar:3.1.3]
    at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:1768) ~[hive-exec-3.1.3.jar:3.1.3]
    at org.apache.hadoop.hive.ql.reexec.ReExecDriver.compileAndRespond(ReExecDriver.java:126) ~[hive-exec-3.1.3.jar:3.1.3]
    at org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:214) ~[hive-exec-3.1.3.jar:3.1.3]
    at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:239) ~[hive-cli-3.1.3.jar:3.1.3]
    at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:188) ~[hive-cli-3.1.3.jar:3.1.3]
    at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:402) ~[hive-cli-3.1.3.jar:3.1.3]
    at org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:821) ~[hive-cli-3.1.3.jar:3.1.3]
    at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:759) ~[hive-cli-3.1.3.jar:3.1.3]
    at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:683) ~[hive-cli-3.1.3.jar:3.1.3]
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) ~[?:1.8.0_342]
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) ~[?:1.8.0_342]
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) ~[?:1.8.0_342]
    at java.lang.reflect.Method.invoke(Method.java:498) ~[?:1.8.0_342]
    at org.apache.hadoop.util.RunJar.run(RunJar.java:244) ~[hadoop-common-2.9.2.jar:?]
    at org.apache.hadoop.util.RunJar.main(RunJar.java:158) ~[hadoop-common-2.9.2.jar:?]
2022-11-09T16:00:35,224 ERROR [04ebb58a-de45-4f8c-9742-06cac555223b main] hive.log: error in initSerDe: org.apache.hadoop.hive.serde2.SerDeException org.apache.hadoop.hive.serde2.SerDeException: Expected 7 columns. Table definition has 4 columns
org.apache.hadoop.hive.serde2.SerDeException: org.apache.hadoop.hive.serde2.SerDeException: Expected 7 columns. Table definition has 4 columns
    at org.apache.hive.storage.jdbc.JdbcSerDe.initialize(JdbcSerDe.java:104) ~[hive-jdbc-handler-3.1.3.jar:3.1.3]
    at org.apache.hadoop.hive.serde2.AbstractSerDe.initialize(AbstractSerDe.java:54) ~[hive-exec-3.1.3.jar:3.1.3]
    at org.apache.hadoop.hive.serde2.SerDeUtils.initializeSerDe(SerDeUtils.java:540) ~[hive-exec-3.1.3.jar:3.1.3]
    at org.apache.hadoop.hive.metastore.HiveMetaStoreUtils.getDeserializer(HiveMetaStoreUtils.java:90) ~[hive-exec-3.1.3.jar:3.1.3]
    at org.apache.hadoop.hive.metastore.HiveMetaStoreUtils.getDeserializer(HiveMetaStoreUtils.java:77) ~[hive-exec-3.1.3.jar:3.1.3]
    at org.apache.hadoop.hive.ql.metadata.Table.getDeserializerFromMetaStore(Table.java:289) ~[hive-exec-3.1.3.jar:3.1.3]
    at org.apache.hadoop.hive.ql.metadata.Table.getDeserializer(Table.java:271) ~[hive-exec-3.1.3.jar:3.1.3]
    at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genTablePlan(SemanticAnalyzer.java:10973) ~[hive-exec-3.1.3.jar:3.1.3]
    at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:11329) ~[hive-exec-3.1.3.jar:3.1.3]
    at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:11288) ~[hive-exec-3.1.3.jar:3.1.3]
    at org.apache.hadoop.hive.ql.parse.CalcitePlanner.genOPTree(CalcitePlanner.java:517) ~[hive-exec-3.1.3.jar:3.1.3]
    at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:12164) ~[hive-exec-3.1.3.jar:3.1.3]
    at org.apache.hadoop.hive.ql.parse.CalcitePlanner.analyzeInternal(CalcitePlanner.java:330) ~[hive-exec-3.1.3.jar:3.1.3]
    at org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:285) ~[hive-exec-3.1.3.jar:3.1.3]
    at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:659) ~[hive-exec-3.1.3.jar:3.1.3]
    at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:1826) ~[hive-exec-3.1.3.jar:3.1.3]
    at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:1773) ~[hive-exec-3.1.3.jar:3.1.3]
    at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:1768) ~[hive-exec-3.1.3.jar:3.1.3]
    at org.apache.hadoop.hive.ql.reexec.ReExecDriver.compileAndRespond(ReExecDriver.java:126) ~[hive-exec-3.1.3.jar:3.1.3]
    at org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:214) ~[hive-exec-3.1.3.jar:3.1.3]
    at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:239) ~[hive-cli-3.1.3.jar:3.1.3]
    at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:188) ~[hive-cli-3.1.3.jar:3.1.3]
    at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:402) ~[hive-cli-3.1.3.jar:3.1.3]
    at org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:821) ~[hive-cli-3.1.3.jar:3.1.3]
    at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:759) ~[hive-cli-3.1.3.jar:3.1.3]
    at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:683) ~[hive-cli-3.1.3.jar:3.1.3]
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) ~[?:1.8.0_342]
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) ~[?:1.8.0_342]
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) ~[?:1.8.0_342]
    at java.lang.reflect.Method.invoke(Method.java:498) ~[?:1.8.0_342]
    at org.apache.hadoop.util.RunJar.run(RunJar.java:244) ~[hadoop-common-2.9.2.jar:?]
    at org.apache.hadoop.util.RunJar.main(RunJar.java:158) ~[hadoop-common-2.9.2.jar:?]
Caused by: org.apache.hadoop.hive.serde2.SerDeException: Expected 7 columns. Table definition has 4 columns
    at org.apache.hive.storage.jdbc.JdbcSerDe.initialize(JdbcSerDe.java:79) ~[hive-jdbc-handler-3.1.3.jar:3.1.3]
    ... 31 more
2022-11-09T16:00:35,224 ERROR [04ebb58a-de45-4f8c-9742-06cac555223b main] parse.CalcitePlanner: CBO failed, skipping CBO. 
 {code}
h4. reason

1. The *JDBCJoinPushDownRule* transforms *HiveJoin(HiveJdbcConverter(nation),HiveJdbcConverter(region))* to {*}HiveJdbcConverte(JdbcJoin(nation,region)){*}.
2. *HiveJdbcConverter.getTableScan()* returns the first {*}JdbcHiveTableScan{*}(nation), but does not return {*}JdbcJoin{*}(nation join region). It is the reason of this bug.    
3. When initializing the {*}JdbcSerde{*}, the columns of (nation) and (nation join region) mismatch, so CBO failed. 

 



--
This message was sent by Atlassian Jira
(v8.20.10#820010)