You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@hive.apache.org by "slim bouguerra (JIRA)" <ji...@apache.org> on 2018/03/25 17:14:00 UTC
[jira] [Assigned] (HIVE-19044) Duplicate field names within Druid
Query Generated by Calcite plan
[ https://issues.apache.org/jira/browse/HIVE-19044?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
slim bouguerra reassigned HIVE-19044:
-------------------------------------
> Duplicate field names within Druid Query Generated by Calcite plan
> ------------------------------------------------------------------
>
> Key: HIVE-19044
> URL: https://issues.apache.org/jira/browse/HIVE-19044
> Project: Hive
> Issue Type: Bug
> Components: Druid integration
> Reporter: slim bouguerra
> Assignee: slim bouguerra
> Priority: Major
>
> This is the Query plan as you can see "$f4" is duplicated.
> {code}
> PREHOOK: query: EXPLAIN SELECT Calcs.key AS none_key_nk, SUM(Calcs.num0) AS temp_z_stdevp_num0___1723718801__0_, COUNT(Calcs.num0) AS temp_z_stdevp_num0___2730138885__0_, SUM((Calcs.num0 * Calcs.num0)) AS temp_z_stdevp_num0___4071133194__0_, STDDEV_POP(Calcs.num0) AS stp_num0_ok FROM druid_tableau.calcs Calcs GROUP BY Calcs.key
> PREHOOK: type: QUERY
> POSTHOOK: query: EXPLAIN SELECT Calcs.key AS none_key_nk, SUM(Calcs.num0) AS temp_z_stdevp_num0___1723718801__0_, COUNT(Calcs.num0) AS temp_z_stdevp_num0___2730138885__0_, SUM((Calcs.num0 * Calcs.num0)) AS temp_z_stdevp_num0___4071133194__0_, STDDEV_POP(Calcs.num0) AS stp_num0_ok FROM druid_tableau.calcs Calcs GROUP BY Calcs.key
> POSTHOOK: type: QUERY
> STAGE DEPENDENCIES:
> Stage-0 is a root stage
> STAGE PLANS:
> Stage: Stage-0
> Fetch Operator
> limit: -1
> Processor Tree:
> TableScan
> alias: calcs
> properties:
> druid.fieldNames key,$f1,$f2,$f3,$f4
> druid.fieldTypes string,double,bigint,double,double
> druid.query.json {"queryType":"groupBy","dataSource":"druid_tableau.calcs","granularity":"all","dimensions":[{"type":"default","dimension":"key","outputName":"key","outputType":"STRING"}],"limitSpec":{"type":"default"},"aggregations":[{"type":"doubleSum","name":"$f1","fieldName":"num0"},{"type":"filtered","filter":{"type":"not","field":{"type":"selector","dimension":"num0","value":null}},"aggregator":{"type":"count","name":"$f2","fieldName":"num0"}},{"type":"doubleSum","name":"$f3","expression":"(\"num0\" * \"num0\")"},{"type":"doubleSum","name":"$f4","expression":"(\"num0\" * \"num0\")"}],"postAggregations":[{"type":"expression","name":"$f4","expression":"pow(((\"$f4\" - ((\"$f1\" * \"$f1\") / \"$f2\")) / \"$f2\"),0.5)"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]}
> druid.query.type groupBy
> Select Operator
> expressions: key (type: string), $f1 (type: double), $f2 (type: bigint), $f3 (type: double), $f4 (type: double)
> outputColumnNames: _col0, _col1, _col2, _col3, _col4
> ListSink
> {code}
> Table DDL
> {code}
> create database druid_tableau;
> use druid_tableau;
> drop table if exists calcs;
> create table calcs
> STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler'
> TBLPROPERTIES (
> "druid.segment.granularity" = "MONTH",
> "druid.query.granularity" = "DAY")
> AS SELECT
> cast(datetime0 as timestamp with local time zone) `__time`,
> key,
> str0, str1, str2, str3,
> date0, date1, date2, date3,
> time0, time1,
> datetime1,
> zzz,
> cast(bool0 as string) bool0,
> cast(bool1 as string) bool1,
> cast(bool2 as string) bool2,
> cast(bool3 as string) bool3,
> int0, int1, int2, int3,
> num0, num1, num2, num3, num4
> from default.calcs_orc;
> {code}
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)