You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@hive.apache.org by "Premal Shah (JIRA)" <ji...@apache.org> on 2016/11/12 09:11:58 UTC

[jira] [Updated] (HIVE-15187) CTAS with CBO throws errors

     [ https://issues.apache.org/jira/browse/HIVE-15187?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Premal Shah updated HIVE-15187:
-------------------------------
    Description: 
If I run a query with CREATE TABLE AS, it breaks with the error below. However, just running the query works if I don't try to create a table from the results. It does not happen to all CTAS queries. 

{noformat}
2016-11-10T04:52:27,531 ERROR [cb5f35ff-f404-41ce-aa9b-87708d80b078 main]: ql.Driver (SessionState.java:printError(1038)) - FAILED: SemanticException Line 0:-1 Invalid column reference '$f0'
org.apache.hadoop.hive.ql.parse.SemanticException: Line 0:-1 Invalid column reference '$f0'
        at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genAllExprNodeDesc(SemanticAnalyzer.java:10527)
        at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genExprNodeDesc(SemanticAnalyzer.java:10475)
        at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genSelectPlan(SemanticAnalyzer.java:3843)
        at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genSelectPlan(SemanticAnalyzer.java:3622)
        at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPostGroupByBodyPlan(SemanticAnalyzer.java:8895)
        at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genBodyPlan(SemanticAnalyzer.java:8850)
        at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:9703)
        at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:9583)
        at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:9610)
        at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:9583)
        at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:9586)
        at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:9610)
        at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:9596)
        at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genOPTree(SemanticAnalyzer.java:10092)
        at org.apache.hadoop.hive.ql.parse.CalcitePlanner.genOPTree(CalcitePlanner.java:353)
        at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:10103)
        at org.apache.hadoop.hive.ql.parse.CalcitePlanner.analyzeInternal(CalcitePlanner.java:228)
        at org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:239)
        at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:473)
        at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:319)
        at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:1249)
        at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1295)
        at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1178)
        at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1166)
        at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:236)
        at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:187)
        at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:403)
        at org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:782)
        at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:721)
        at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:648)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
        at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:498)
        at org.apache.hadoop.util.RunJar.run(RunJar.java:221)
{noformat}


This breaks.

{noformat}
CREATE TABLE unique_ip_tmp AS
SELECT DISTINCT
    new.ip
FROM 
    (
        SELECT COUNT(0) , ip
        FROM t1
        WHERE dt BETWEEN '2016-11-08' AND '2016-11-08'
        GROUP BY ip

        UNION ALL

        SELECT COUNT(0) , ip
        FROM t2
        WHERE dt BETWEEN '2016-11-08' AND '2016-11-08'
        GROUP BY ip
    ) new
    LEFT JOIN unique_ip old
        ON old.ip = new.ip
WHERE
    old.ip IS NULL
;
{noformat}

Also, tried this as per [~sershe]'s suggestion

{noformat}
CREATE TABLE unique_ip_tmp AS
SELECT DISTINCT
    new.ip
FROM 
    (
        SELECT 
            COUNT(0) as count , 
            ip as ip
        FROM t1
        WHERE dt BETWEEN '2016-11-08' AND '2016-11-08'
        GROUP BY ip

        UNION ALL

        SELECT 
            COUNT(0) as count , 
            ip as ip
        FROM t2
        WHERE dt BETWEEN '2016-11-08' AND '2016-11-08'
        GROUP BY ip
    ) new
    LEFT JOIN unique_ip old
        ON old.ip = new.ip
WHERE
    old.ip IS NULL
;
{noformat}

Here, I alias the count(0) and ip columns

If I remove one of the queries in the UNION, it works

{noformat}
CREATE TABLE unique_ip_tmp AS
SELECT DISTINCT
    new.ip
FROM 
    (
        SELECT
            COUNT(0)
            , ip
        FROM
            map_activity
        WHERE
            dt BETWEEN '2016-11-08' AND '2016-11-08'
        GROUP BY
            ip
    ) new
    LEFT JOIN unique_ip old
        ON old.ip = new.ip
WHERE
    old.ip IS NULL
;
{noformat}


If I create tmp tables from the group by queries and use them, that works too

{noformat}
CREATE TABLE unique_ip_tmp AS
SELECT DISTINCT
    new.ip
FROM 
    (
        SELECT * FROM dropme_t1
        UNION ALL
        SELECT * FROM dropme_t2
    ) new
    LEFT JOIN unique_ip old
        ON old.ip = new.ip
WHERE
    old.ip IS NULL
;
{noformat}


  was:
If I run a query with CREATE TABLE AS, it breaks with the error below. However, just running the query works if I don't try to create a table from the results. It does not happen to all CTAS queries. 

2016-11-10T04:52:27,531 ERROR [cb5f35ff-f404-41ce-aa9b-87708d80b078 main]: ql.Driver (SessionState.java:printError(1038)) - FAILED: SemanticException Line 0:-1 Invalid column reference '$f0'
org.apache.hadoop.hive.ql.parse.SemanticException: Line 0:-1 Invalid column reference '$f0'
        at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genAllExprNodeDesc(SemanticAnalyzer.java:10527)
        at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genExprNodeDesc(SemanticAnalyzer.java:10475)
        at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genSelectPlan(SemanticAnalyzer.java:3843)
        at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genSelectPlan(SemanticAnalyzer.java:3622)
        at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPostGroupByBodyPlan(SemanticAnalyzer.java:8895)
        at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genBodyPlan(SemanticAnalyzer.java:8850)
        at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:9703)
        at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:9583)
        at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:9610)
        at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:9583)
        at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:9586)
        at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:9610)
        at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:9596)
        at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genOPTree(SemanticAnalyzer.java:10092)
        at org.apache.hadoop.hive.ql.parse.CalcitePlanner.genOPTree(CalcitePlanner.java:353)
        at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:10103)
        at org.apache.hadoop.hive.ql.parse.CalcitePlanner.analyzeInternal(CalcitePlanner.java:228)
        at org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:239)
        at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:473)
        at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:319)
        at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:1249)
        at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1295)
        at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1178)
        at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1166)
        at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:236)
        at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:187)
        at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:403)
        at org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:782)
        at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:721)
        at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:648)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
        at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:498)
        at org.apache.hadoop.util.RunJar.run(RunJar.java:221)


This breaks.

CREATE TABLE unique_ip_tmp AS
SELECT DISTINCT
    new.ip
FROM 
    (
        SELECT COUNT(0) , ip
        FROM t1
        WHERE dt BETWEEN '2016-11-08' AND '2016-11-08'
        GROUP BY ip

        UNION ALL

        SELECT COUNT(0) , ip
        FROM t2
        WHERE dt BETWEEN '2016-11-08' AND '2016-11-08'
        GROUP BY ip
    ) new
    LEFT JOIN unique_ip old
        ON old.ip = new.ip
WHERE
    old.ip IS NULL
;

Also, tried this as per [~sershe]'s suggestion

CREATE TABLE unique_ip_tmp AS
SELECT DISTINCT
    new.ip
FROM 
    (
        SELECT 
            COUNT(0) as count , 
            ip as ip
        FROM t1
        WHERE dt BETWEEN '2016-11-08' AND '2016-11-08'
        GROUP BY ip

        UNION ALL

        SELECT 
            COUNT(0) as count , 
            ip as ip
        FROM t2
        WHERE dt BETWEEN '2016-11-08' AND '2016-11-08'
        GROUP BY ip
    ) new
    LEFT JOIN unique_ip old
        ON old.ip = new.ip
WHERE
    old.ip IS NULL
;

Here, I alias the count(0) and ip columns

If I remove one of the queries in the UNION, it works

CREATE TABLE unique_ip_tmp AS
SELECT DISTINCT
    new.ip
FROM 
    (
        SELECT
            COUNT(0)
            , ip
        FROM
            map_activity
        WHERE
            dt BETWEEN '2016-11-08' AND '2016-11-08'
        GROUP BY
            ip
    ) new
    LEFT JOIN unique_ip old
        ON old.ip = new.ip
WHERE
    old.ip IS NULL
;


If I create tmp tables from the group by queries and use them, that works too

CREATE TABLE unique_ip_tmp AS
SELECT DISTINCT
    new.ip
FROM 
    (
        SELECT * FROM dropme_t1
        UNION ALL
        SELECT * FROM dropme_t2
    ) new
    LEFT JOIN unique_ip old
        ON old.ip = new.ip
WHERE
    old.ip IS NULL
;




> CTAS with CBO throws errors
> ---------------------------
>
>                 Key: HIVE-15187
>                 URL: https://issues.apache.org/jira/browse/HIVE-15187
>             Project: Hive
>          Issue Type: Bug
>          Components: CBO
>    Affects Versions: 2.0.1
>            Reporter: Premal Shah
>
> If I run a query with CREATE TABLE AS, it breaks with the error below. However, just running the query works if I don't try to create a table from the results. It does not happen to all CTAS queries. 
> {noformat}
> 2016-11-10T04:52:27,531 ERROR [cb5f35ff-f404-41ce-aa9b-87708d80b078 main]: ql.Driver (SessionState.java:printError(1038)) - FAILED: SemanticException Line 0:-1 Invalid column reference '$f0'
> org.apache.hadoop.hive.ql.parse.SemanticException: Line 0:-1 Invalid column reference '$f0'
>         at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genAllExprNodeDesc(SemanticAnalyzer.java:10527)
>         at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genExprNodeDesc(SemanticAnalyzer.java:10475)
>         at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genSelectPlan(SemanticAnalyzer.java:3843)
>         at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genSelectPlan(SemanticAnalyzer.java:3622)
>         at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPostGroupByBodyPlan(SemanticAnalyzer.java:8895)
>         at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genBodyPlan(SemanticAnalyzer.java:8850)
>         at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:9703)
>         at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:9583)
>         at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:9610)
>         at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:9583)
>         at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:9586)
>         at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:9610)
>         at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:9596)
>         at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genOPTree(SemanticAnalyzer.java:10092)
>         at org.apache.hadoop.hive.ql.parse.CalcitePlanner.genOPTree(CalcitePlanner.java:353)
>         at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:10103)
>         at org.apache.hadoop.hive.ql.parse.CalcitePlanner.analyzeInternal(CalcitePlanner.java:228)
>         at org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:239)
>         at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:473)
>         at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:319)
>         at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:1249)
>         at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1295)
>         at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1178)
>         at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1166)
>         at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:236)
>         at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:187)
>         at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:403)
>         at org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:782)
>         at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:721)
>         at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:648)
>         at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>         at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
>         at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>         at java.lang.reflect.Method.invoke(Method.java:498)
>         at org.apache.hadoop.util.RunJar.run(RunJar.java:221)
> {noformat}
> This breaks.
> {noformat}
> CREATE TABLE unique_ip_tmp AS
> SELECT DISTINCT
>     new.ip
> FROM 
>     (
>         SELECT COUNT(0) , ip
>         FROM t1
>         WHERE dt BETWEEN '2016-11-08' AND '2016-11-08'
>         GROUP BY ip
>         UNION ALL
>         SELECT COUNT(0) , ip
>         FROM t2
>         WHERE dt BETWEEN '2016-11-08' AND '2016-11-08'
>         GROUP BY ip
>     ) new
>     LEFT JOIN unique_ip old
>         ON old.ip = new.ip
> WHERE
>     old.ip IS NULL
> ;
> {noformat}
> Also, tried this as per [~sershe]'s suggestion
> {noformat}
> CREATE TABLE unique_ip_tmp AS
> SELECT DISTINCT
>     new.ip
> FROM 
>     (
>         SELECT 
>             COUNT(0) as count , 
>             ip as ip
>         FROM t1
>         WHERE dt BETWEEN '2016-11-08' AND '2016-11-08'
>         GROUP BY ip
>         UNION ALL
>         SELECT 
>             COUNT(0) as count , 
>             ip as ip
>         FROM t2
>         WHERE dt BETWEEN '2016-11-08' AND '2016-11-08'
>         GROUP BY ip
>     ) new
>     LEFT JOIN unique_ip old
>         ON old.ip = new.ip
> WHERE
>     old.ip IS NULL
> ;
> {noformat}
> Here, I alias the count(0) and ip columns
> If I remove one of the queries in the UNION, it works
> {noformat}
> CREATE TABLE unique_ip_tmp AS
> SELECT DISTINCT
>     new.ip
> FROM 
>     (
>         SELECT
>             COUNT(0)
>             , ip
>         FROM
>             map_activity
>         WHERE
>             dt BETWEEN '2016-11-08' AND '2016-11-08'
>         GROUP BY
>             ip
>     ) new
>     LEFT JOIN unique_ip old
>         ON old.ip = new.ip
> WHERE
>     old.ip IS NULL
> ;
> {noformat}
> If I create tmp tables from the group by queries and use them, that works too
> {noformat}
> CREATE TABLE unique_ip_tmp AS
> SELECT DISTINCT
>     new.ip
> FROM 
>     (
>         SELECT * FROM dropme_t1
>         UNION ALL
>         SELECT * FROM dropme_t2
>     ) new
>     LEFT JOIN unique_ip old
>         ON old.ip = new.ip
> WHERE
>     old.ip IS NULL
> ;
> {noformat}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)