You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2018/06/05 03:57:27 UTC
[2/2] hive git commit: HIVE-19762 : Druid Queries containing Joins
gives wrong results (Nishant Bangarwa via Ashutosh Chauhan)
HIVE-19762 : Druid Queries containing Joins gives wrong results (Nishant Bangarwa via Ashutosh Chauhan)
Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0ebf04c8
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0ebf04c8
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0ebf04c8
Branch: refs/heads/master
Commit: 0ebf04c87a6d8c36e699148c7f38dd502fe48b66
Parents: 91cdd4f
Author: Nishant Bangarwa <ni...@gmail.com>
Authored: Mon Jun 4 20:25:43 2018 -0700
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Mon Jun 4 20:56:41 2018 -0700
----------------------------------------------------------------------
.../test/resources/testconfiguration.properties | 2 +
.../queries/clientpositive/druidmini_joins.q | 60 +
.../clientpositive/druid/druid_basic2.q.out | 1051 ++++++++++++++++++
.../clientpositive/druid/druidmini_joins.q.out | 224 ++++
.../results/clientpositive/druid_basic2.q.out | 944 ----------------
.../hive/metastore/utils/MetaStoreUtils.java | 5 +-
6 files changed, 1341 insertions(+), 945 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/0ebf04c8/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 14a93a1..f3cb9de 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -1668,6 +1668,8 @@ spark.perf.disabled.query.files=query14.q,\
query64.q
druid.query.files=druidmini_test1.q,\
+ druid_basic2.q,\
+ druidmini_joins.q,\
druidmini_test_insert.q,\
druidmini_mv.q,\
druid_timestamptz.q,\
http://git-wip-us.apache.org/repos/asf/hive/blob/0ebf04c8/ql/src/test/queries/clientpositive/druidmini_joins.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/druidmini_joins.q b/ql/src/test/queries/clientpositive/druidmini_joins.q
new file mode 100644
index 0000000..720127e
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/druidmini_joins.q
@@ -0,0 +1,60 @@
+SET hive.vectorized.execution.enabled=false;
+SET hive.explain.user=false;
+
+--SET hive.execution.mode=llap;
+
+DROP TABLE druid_table_with_nulls;
+
+CREATE TABLE druid_table_with_nulls
+STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler'
+TBLPROPERTIES ("druid.segment.granularity" = "HOUR")
+AS
+SELECT cast(current_timestamp() AS timestamp with local time zone) AS `__time`,
+ cast(username AS string) AS username,
+ cast(double1 AS double) AS double1,
+ cast(int1 AS int) AS int1
+FROM TABLE (
+ VALUES
+ ('alfred', 10.30, 2),
+ ('bob', 3.14, null),
+ ('bonnie', null, 3),
+ ('calvin', null, null),
+ ('charlie', 9.8, 1),
+ ('charlie', 15.8, 1)) as q (username, double1, int1);
+
+EXPLAIN SELECT
+username AS `username`,
+SUM(double1) AS `sum_double1`
+FROM
+druid_table_with_nulls `tbl1`
+ JOIN (
+ SELECT
+ username AS `username`,
+ SUM(double1) AS `sum_double2`
+ FROM druid_table_with_nulls
+ GROUP BY `username`
+ ORDER BY `sum_double2`
+ DESC LIMIT 10
+ )
+ `tbl2`
+ ON (`tbl1`.`username` = `tbl2`.`username`)
+GROUP BY `tbl1`.`username`;
+
+
+SELECT
+username AS `username`,
+SUM(double1) AS `sum_double1`
+FROM
+druid_table_with_nulls `tbl1`
+ JOIN (
+ SELECT
+ username AS `username`,
+ SUM(double1) AS `sum_double2`
+ FROM druid_table_with_nulls
+ GROUP BY `username`
+ ORDER BY `sum_double2`
+ DESC LIMIT 10
+ )
+ `tbl2`
+ ON (`tbl1`.`username` = `tbl2`.`username`)
+GROUP BY `tbl1`.`username`;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/0ebf04c8/ql/src/test/results/clientpositive/druid/druid_basic2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/druid/druid_basic2.q.out b/ql/src/test/results/clientpositive/druid/druid_basic2.q.out
new file mode 100644
index 0000000..88916b9
--- /dev/null
+++ b/ql/src/test/results/clientpositive/druid/druid_basic2.q.out
@@ -0,0 +1,1051 @@
+PREHOOK: query: CREATE EXTERNAL TABLE druid_table_1_n2
+STORED BY 'org.apache.hadoop.hive.druid.QTestDruidStorageHandler'
+TBLPROPERTIES ("druid.datasource" = "wikipedia")
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@druid_table_1_n2
+POSTHOOK: query: CREATE EXTERNAL TABLE druid_table_1_n2
+STORED BY 'org.apache.hadoop.hive.druid.QTestDruidStorageHandler'
+TBLPROPERTIES ("druid.datasource" = "wikipedia")
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@druid_table_1_n2
+PREHOOK: query: DESCRIBE FORMATTED druid_table_1_n2
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@druid_table_1_n2
+POSTHOOK: query: DESCRIBE FORMATTED druid_table_1_n2
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@druid_table_1_n2
+# col_name data_type comment
+__time timestamp with local time zone from deserializer
+robot string from deserializer
+namespace string from deserializer
+anonymous string from deserializer
+unpatrolled string from deserializer
+page string from deserializer
+language string from deserializer
+newpage string from deserializer
+user string from deserializer
+count float from deserializer
+added float from deserializer
+delta float from deserializer
+variation float from deserializer
+deleted float from deserializer
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Retention: 0
+#### A masked pattern was here ####
+Table Type: EXTERNAL_TABLE
+Table Parameters:
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"__time\":\"true\",\"added\":\"true\",\"anonymous\":\"true\",\"count\":\"true\",\"deleted\":\"true\",\"delta\":\"true\",\"language\":\"true\",\"namespace\":\"true\",\"newpage\":\"true\",\"page\":\"true\",\"robot\":\"true\",\"unpatrolled\":\"true\",\"user\":\"true\",\"variation\":\"true\"}}
+ EXTERNAL TRUE
+ bucketing_version 2
+ druid.datasource wikipedia
+ numFiles 0
+ numRows 0
+ rawDataSize 0
+ storage_handler org.apache.hadoop.hive.druid.QTestDruidStorageHandler
+ totalSize 0
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.druid.QTestDruidSerDe
+InputFormat: null
+OutputFormat: null
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: EXPLAIN EXTENDED
+SELECT robot FROM druid_table_1_n2
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN EXTENDED
+SELECT robot FROM druid_table_1_n2
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: druid_table_1_n2
+ properties:
+ druid.fieldNames robot
+ druid.fieldTypes string
+ druid.query.json {"queryType":"scan","dataSource":"wikipedia","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"columns":["robot"],"resultFormat":"compactedList"}
+ druid.query.type scan
+ GatherStats: false
+ Select Operator
+ expressions: robot (type: string)
+ outputColumnNames: _col0
+ ListSink
+
+PREHOOK: query: EXPLAIN EXTENDED
+SELECT delta FROM druid_table_1_n2
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN EXTENDED
+SELECT delta FROM druid_table_1_n2
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: druid_table_1_n2
+ properties:
+ druid.fieldNames delta
+ druid.fieldTypes float
+ druid.query.json {"queryType":"scan","dataSource":"wikipedia","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"columns":["delta"],"resultFormat":"compactedList"}
+ druid.query.type scan
+ GatherStats: false
+ Select Operator
+ expressions: delta (type: float)
+ outputColumnNames: _col0
+ ListSink
+
+PREHOOK: query: EXPLAIN EXTENDED
+SELECT robot
+FROM druid_table_1_n2
+WHERE language = 'en'
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN EXTENDED
+SELECT robot
+FROM druid_table_1_n2
+WHERE language = 'en'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: druid_table_1_n2
+ properties:
+ druid.fieldNames robot
+ druid.fieldTypes string
+ druid.query.json {"queryType":"scan","dataSource":"wikipedia","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"selector","dimension":"language","value":"en"},"columns":["robot"],"resultFormat":"compactedList"}
+ druid.query.type scan
+ GatherStats: false
+ Select Operator
+ expressions: robot (type: string)
+ outputColumnNames: _col0
+ ListSink
+
+PREHOOK: query: EXPLAIN EXTENDED
+SELECT DISTINCT robot
+FROM druid_table_1_n2
+WHERE language = 'en'
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN EXTENDED
+SELECT DISTINCT robot
+FROM druid_table_1_n2
+WHERE language = 'en'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: druid_table_1_n2
+ properties:
+ druid.fieldNames robot
+ druid.fieldTypes string
+ druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"robot","outputName":"robot","outputType":"STRING"}],"limitSpec":{"type":"default"},"filter":{"type":"selector","dimension":"language","value":"en"},"aggregations":[],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]}
+ druid.query.type groupBy
+ GatherStats: false
+ Select Operator
+ expressions: robot (type: string)
+ outputColumnNames: _col0
+ ListSink
+
+PREHOOK: query: EXPLAIN EXTENDED
+SELECT a.robot, b.language
+FROM
+(
+ (SELECT robot, language
+ FROM druid_table_1_n2) a
+ JOIN
+ (SELECT language
+ FROM druid_table_1_n2) b
+ ON a.language = b.language
+)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN EXTENDED
+SELECT a.robot, b.language
+FROM
+(
+ (SELECT robot, language
+ FROM druid_table_1_n2) a
+ JOIN
+ (SELECT language
+ FROM druid_table_1_n2) b
+ ON a.language = b.language
+)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: druid_table_1_n2
+ properties:
+ druid.fieldNames robot,language
+ druid.fieldTypes string,string
+ druid.query.json {"queryType":"scan","dataSource":"wikipedia","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"not","field":{"type":"selector","dimension":"language","value":null}},"columns":["robot","language"],"resultFormat":"compactedList"}
+ druid.query.type scan
+ Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Select Operator
+ expressions: robot (type: string), language (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ null sort order: a
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
+ tag: 0
+ value expressions: _col0 (type: string)
+ auto parallelism: true
+ Execution mode: vectorized
+ Path -> Alias:
+ hdfs://### HDFS PATH ### [druid_table_1_n2]
+ Path -> Partition:
+ hdfs://### HDFS PATH ###
+ Partition
+ base file name: druid_table_1_n2
+ input format: org.apache.hadoop.hive.druid.io.DruidQueryBasedInputFormat
+ output format: org.apache.hadoop.hive.druid.io.DruidOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"__time":"true","added":"true","anonymous":"true","count":"true","deleted":"true","delta":"true","language":"true","namespace":"true","newpage":"true","page":"true","robot":"true","unpatrolled":"true","user":"true","variation":"true"}}
+ EXTERNAL TRUE
+ bucket_count -1
+ bucketing_version 2
+ column.name.delimiter ,
+ columns __time,robot,namespace,anonymous,unpatrolled,page,language,newpage,user,count,added,delta,variation,deleted
+ columns.comments 'from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer'
+ columns.types timestamp with local time zone:string:string:string:string:string:string:string:string:float:float:float:float:float
+ druid.datasource wikipedia
+ druid.fieldNames robot,language
+ druid.fieldTypes string,string
+ druid.query.json {"queryType":"scan","dataSource":"wikipedia","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"not","field":{"type":"selector","dimension":"language","value":null}},"columns":["robot","language"],"resultFormat":"compactedList"}
+ druid.query.type scan
+#### A masked pattern was here ####
+ location hdfs://### HDFS PATH ###
+ name default.druid_table_1_n2
+ numFiles 0
+ numRows 0
+ rawDataSize 0
+ serialization.ddl struct druid_table_1_n2 { timestamp with local time zone __time, string robot, string namespace, string anonymous, string unpatrolled, string page, string language, string newpage, string user, float count, float added, float delta, float variation, float deleted}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.druid.QTestDruidSerDe
+ storage_handler org.apache.hadoop.hive.druid.QTestDruidStorageHandler
+ totalSize 0
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.druid.QTestDruidSerDe
+
+ input format: org.apache.hadoop.hive.druid.io.DruidQueryBasedInputFormat
+ output format: org.apache.hadoop.hive.druid.io.DruidOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"__time":"true","added":"true","anonymous":"true","count":"true","deleted":"true","delta":"true","language":"true","namespace":"true","newpage":"true","page":"true","robot":"true","unpatrolled":"true","user":"true","variation":"true"}}
+ EXTERNAL TRUE
+ bucket_count -1
+ bucketing_version 2
+ column.name.delimiter ,
+ columns __time,robot,namespace,anonymous,unpatrolled,page,language,newpage,user,count,added,delta,variation,deleted
+ columns.comments 'from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer'
+ columns.types timestamp with local time zone:string:string:string:string:string:string:string:string:float:float:float:float:float
+ druid.datasource wikipedia
+ druid.fieldNames robot,language
+ druid.fieldTypes string,string
+ druid.query.json {"queryType":"scan","dataSource":"wikipedia","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"not","field":{"type":"selector","dimension":"language","value":null}},"columns":["robot","language"],"resultFormat":"compactedList"}
+ druid.query.type scan
+#### A masked pattern was here ####
+ location hdfs://### HDFS PATH ###
+ name default.druid_table_1_n2
+ numFiles 0
+ numRows 0
+ rawDataSize 0
+ serialization.ddl struct druid_table_1_n2 { timestamp with local time zone __time, string robot, string namespace, string anonymous, string unpatrolled, string page, string language, string newpage, string user, float count, float added, float delta, float variation, float deleted}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.druid.QTestDruidSerDe
+ storage_handler org.apache.hadoop.hive.druid.QTestDruidStorageHandler
+ totalSize 0
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.druid.QTestDruidSerDe
+ name: default.druid_table_1_n2
+ name: default.druid_table_1_n2
+ Truncated Path -> Alias:
+ /druid_table_1_n2 [druid_table_1_n2]
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: druid_table_1_n2
+ properties:
+ druid.fieldNames language
+ druid.fieldTypes string
+ druid.query.json {"queryType":"scan","dataSource":"wikipedia","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"not","field":{"type":"selector","dimension":"language","value":null}},"columns":["language"],"resultFormat":"compactedList"}
+ druid.query.type scan
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Reduce Output Operator
+ key expressions: language (type: string)
+ null sort order: a
+ sort order: +
+ Map-reduce partition columns: language (type: string)
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ tag: 1
+ auto parallelism: true
+ Execution mode: vectorized
+ Path -> Alias:
+ hdfs://### HDFS PATH ### [druid_table_1_n2]
+ Path -> Partition:
+ hdfs://### HDFS PATH ###
+ Partition
+ base file name: druid_table_1_n2
+ input format: org.apache.hadoop.hive.druid.io.DruidQueryBasedInputFormat
+ output format: org.apache.hadoop.hive.druid.io.DruidOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"__time":"true","added":"true","anonymous":"true","count":"true","deleted":"true","delta":"true","language":"true","namespace":"true","newpage":"true","page":"true","robot":"true","unpatrolled":"true","user":"true","variation":"true"}}
+ EXTERNAL TRUE
+ bucket_count -1
+ bucketing_version 2
+ column.name.delimiter ,
+ columns __time,robot,namespace,anonymous,unpatrolled,page,language,newpage,user,count,added,delta,variation,deleted
+ columns.comments 'from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer'
+ columns.types timestamp with local time zone:string:string:string:string:string:string:string:string:float:float:float:float:float
+ druid.datasource wikipedia
+ druid.fieldNames language
+ druid.fieldTypes string
+ druid.query.json {"queryType":"scan","dataSource":"wikipedia","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"not","field":{"type":"selector","dimension":"language","value":null}},"columns":["language"],"resultFormat":"compactedList"}
+ druid.query.type scan
+#### A masked pattern was here ####
+ location hdfs://### HDFS PATH ###
+ name default.druid_table_1_n2
+ numFiles 0
+ numRows 0
+ rawDataSize 0
+ serialization.ddl struct druid_table_1_n2 { timestamp with local time zone __time, string robot, string namespace, string anonymous, string unpatrolled, string page, string language, string newpage, string user, float count, float added, float delta, float variation, float deleted}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.druid.QTestDruidSerDe
+ storage_handler org.apache.hadoop.hive.druid.QTestDruidStorageHandler
+ totalSize 0
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.druid.QTestDruidSerDe
+
+ input format: org.apache.hadoop.hive.druid.io.DruidQueryBasedInputFormat
+ output format: org.apache.hadoop.hive.druid.io.DruidOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"__time":"true","added":"true","anonymous":"true","count":"true","deleted":"true","delta":"true","language":"true","namespace":"true","newpage":"true","page":"true","robot":"true","unpatrolled":"true","user":"true","variation":"true"}}
+ EXTERNAL TRUE
+ bucket_count -1
+ bucketing_version 2
+ column.name.delimiter ,
+ columns __time,robot,namespace,anonymous,unpatrolled,page,language,newpage,user,count,added,delta,variation,deleted
+ columns.comments 'from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer'
+ columns.types timestamp with local time zone:string:string:string:string:string:string:string:string:float:float:float:float:float
+ druid.datasource wikipedia
+ druid.fieldNames language
+ druid.fieldTypes string
+ druid.query.json {"queryType":"scan","dataSource":"wikipedia","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"not","field":{"type":"selector","dimension":"language","value":null}},"columns":["language"],"resultFormat":"compactedList"}
+ druid.query.type scan
+#### A masked pattern was here ####
+ location hdfs://### HDFS PATH ###
+ name default.druid_table_1_n2
+ numFiles 0
+ numRows 0
+ rawDataSize 0
+ serialization.ddl struct druid_table_1_n2 { timestamp with local time zone __time, string robot, string namespace, string anonymous, string unpatrolled, string page, string language, string newpage, string user, float count, float added, float delta, float variation, float deleted}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.druid.QTestDruidSerDe
+ storage_handler org.apache.hadoop.hive.druid.QTestDruidStorageHandler
+ totalSize 0
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.druid.QTestDruidSerDe
+ name: default.druid_table_1_n2
+ name: default.druid_table_1_n2
+ Truncated Path -> Alias:
+ /druid_table_1_n2 [druid_table_1_n2]
+ Reducer 2
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: string)
+ 1 language (type: string)
+ outputColumnNames: _col0, _col2
+ Position of Big Table: 0
+ Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col2 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ directory: hdfs://### HDFS PATH ###
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE
+ Stats Publishing Key Prefix: hdfs://### HDFS PATH ###
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1
+ columns.types string:string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+Warning: Shuffle Join MERGEJOIN[8][tables = [$hdt$_0, druid_table_1_n2]] in Stage 'Reducer 2' is a cross product
+PREHOOK: query: EXPLAIN EXTENDED
+SELECT a.robot, b.language
+FROM
+(
+ (SELECT robot, language
+ FROM druid_table_1_n2
+ WHERE language = 'en') a
+ JOIN
+ (SELECT language
+ FROM druid_table_1_n2) b
+ ON a.language = b.language
+)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN EXTENDED
+SELECT a.robot, b.language
+FROM
+(
+ (SELECT robot, language
+ FROM druid_table_1_n2
+ WHERE language = 'en') a
+ JOIN
+ (SELECT language
+ FROM druid_table_1_n2) b
+ ON a.language = b.language
+)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (XPROD_EDGE), Map 3 (XPROD_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: druid_table_1_n2
+ properties:
+ druid.fieldNames robot
+ druid.fieldTypes string
+ druid.query.json {"queryType":"scan","dataSource":"wikipedia","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"selector","dimension":"language","value":"en"},"columns":["robot"],"resultFormat":"compactedList"}
+ druid.query.type scan
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Select Operator
+ expressions: robot (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ tag: 0
+ value expressions: _col0 (type: string)
+ auto parallelism: false
+ Execution mode: vectorized
+ Path -> Alias:
+ hdfs://### HDFS PATH ### [druid_table_1_n2]
+ Path -> Partition:
+ hdfs://### HDFS PATH ###
+ Partition
+ base file name: druid_table_1_n2
+ input format: org.apache.hadoop.hive.druid.io.DruidQueryBasedInputFormat
+ output format: org.apache.hadoop.hive.druid.io.DruidOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"__time":"true","added":"true","anonymous":"true","count":"true","deleted":"true","delta":"true","language":"true","namespace":"true","newpage":"true","page":"true","robot":"true","unpatrolled":"true","user":"true","variation":"true"}}
+ EXTERNAL TRUE
+ bucket_count -1
+ bucketing_version 2
+ column.name.delimiter ,
+ columns __time,robot,namespace,anonymous,unpatrolled,page,language,newpage,user,count,added,delta,variation,deleted
+ columns.comments 'from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer'
+ columns.types timestamp with local time zone:string:string:string:string:string:string:string:string:float:float:float:float:float
+ druid.datasource wikipedia
+ druid.fieldNames robot
+ druid.fieldTypes string
+ druid.query.json {"queryType":"scan","dataSource":"wikipedia","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"selector","dimension":"language","value":"en"},"columns":["robot"],"resultFormat":"compactedList"}
+ druid.query.type scan
+#### A masked pattern was here ####
+ location hdfs://### HDFS PATH ###
+ name default.druid_table_1_n2
+ numFiles 0
+ numRows 0
+ rawDataSize 0
+ serialization.ddl struct druid_table_1_n2 { timestamp with local time zone __time, string robot, string namespace, string anonymous, string unpatrolled, string page, string language, string newpage, string user, float count, float added, float delta, float variation, float deleted}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.druid.QTestDruidSerDe
+ storage_handler org.apache.hadoop.hive.druid.QTestDruidStorageHandler
+ totalSize 0
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.druid.QTestDruidSerDe
+
+ input format: org.apache.hadoop.hive.druid.io.DruidQueryBasedInputFormat
+ output format: org.apache.hadoop.hive.druid.io.DruidOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"__time":"true","added":"true","anonymous":"true","count":"true","deleted":"true","delta":"true","language":"true","namespace":"true","newpage":"true","page":"true","robot":"true","unpatrolled":"true","user":"true","variation":"true"}}
+ EXTERNAL TRUE
+ bucket_count -1
+ bucketing_version 2
+ column.name.delimiter ,
+ columns __time,robot,namespace,anonymous,unpatrolled,page,language,newpage,user,count,added,delta,variation,deleted
+ columns.comments 'from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer'
+ columns.types timestamp with local time zone:string:string:string:string:string:string:string:string:float:float:float:float:float
+ druid.datasource wikipedia
+ druid.fieldNames robot
+ druid.fieldTypes string
+ druid.query.json {"queryType":"scan","dataSource":"wikipedia","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"selector","dimension":"language","value":"en"},"columns":["robot"],"resultFormat":"compactedList"}
+ druid.query.type scan
+#### A masked pattern was here ####
+ location hdfs://### HDFS PATH ###
+ name default.druid_table_1_n2
+ numFiles 0
+ numRows 0
+ rawDataSize 0
+ serialization.ddl struct druid_table_1_n2 { timestamp with local time zone __time, string robot, string namespace, string anonymous, string unpatrolled, string page, string language, string newpage, string user, float count, float added, float delta, float variation, float deleted}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.druid.QTestDruidSerDe
+ storage_handler org.apache.hadoop.hive.druid.QTestDruidStorageHandler
+ totalSize 0
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.druid.QTestDruidSerDe
+ name: default.druid_table_1_n2
+ name: default.druid_table_1_n2
+ Truncated Path -> Alias:
+ /druid_table_1_n2 [druid_table_1_n2]
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: druid_table_1_n2
+ properties:
+ druid.fieldNames vc
+ druid.fieldTypes string
+ druid.query.json {"queryType":"scan","dataSource":"wikipedia","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"selector","dimension":"language","value":"en"},"virtualColumns":[{"type":"expression","name":"vc","expression":"'en'","outputType":"STRING"}],"columns":["vc"],"resultFormat":"compactedList"}
+ druid.query.type scan
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ GatherStats: false
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ tag: 1
+ auto parallelism: false
+ Execution mode: vectorized
+ Path -> Alias:
+ hdfs://### HDFS PATH ### [druid_table_1_n2]
+ Path -> Partition:
+ hdfs://### HDFS PATH ###
+ Partition
+ base file name: druid_table_1_n2
+ input format: org.apache.hadoop.hive.druid.io.DruidQueryBasedInputFormat
+ output format: org.apache.hadoop.hive.druid.io.DruidOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"__time":"true","added":"true","anonymous":"true","count":"true","deleted":"true","delta":"true","language":"true","namespace":"true","newpage":"true","page":"true","robot":"true","unpatrolled":"true","user":"true","variation":"true"}}
+ EXTERNAL TRUE
+ bucket_count -1
+ bucketing_version 2
+ column.name.delimiter ,
+ columns __time,robot,namespace,anonymous,unpatrolled,page,language,newpage,user,count,added,delta,variation,deleted
+ columns.comments 'from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer'
+ columns.types timestamp with local time zone:string:string:string:string:string:string:string:string:float:float:float:float:float
+ druid.datasource wikipedia
+ druid.fieldNames vc
+ druid.fieldTypes string
+ druid.query.json {"queryType":"scan","dataSource":"wikipedia","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"selector","dimension":"language","value":"en"},"virtualColumns":[{"type":"expression","name":"vc","expression":"'en'","outputType":"STRING"}],"columns":["vc"],"resultFormat":"compactedList"}
+ druid.query.type scan
+#### A masked pattern was here ####
+ location hdfs://### HDFS PATH ###
+ name default.druid_table_1_n2
+ numFiles 0
+ numRows 0
+ rawDataSize 0
+ serialization.ddl struct druid_table_1_n2 { timestamp with local time zone __time, string robot, string namespace, string anonymous, string unpatrolled, string page, string language, string newpage, string user, float count, float added, float delta, float variation, float deleted}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.druid.QTestDruidSerDe
+ storage_handler org.apache.hadoop.hive.druid.QTestDruidStorageHandler
+ totalSize 0
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.druid.QTestDruidSerDe
+
+ input format: org.apache.hadoop.hive.druid.io.DruidQueryBasedInputFormat
+ output format: org.apache.hadoop.hive.druid.io.DruidOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"__time":"true","added":"true","anonymous":"true","count":"true","deleted":"true","delta":"true","language":"true","namespace":"true","newpage":"true","page":"true","robot":"true","unpatrolled":"true","user":"true","variation":"true"}}
+ EXTERNAL TRUE
+ bucket_count -1
+ bucketing_version 2
+ column.name.delimiter ,
+ columns __time,robot,namespace,anonymous,unpatrolled,page,language,newpage,user,count,added,delta,variation,deleted
+ columns.comments 'from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer'
+ columns.types timestamp with local time zone:string:string:string:string:string:string:string:string:float:float:float:float:float
+ druid.datasource wikipedia
+ druid.fieldNames vc
+ druid.fieldTypes string
+ druid.query.json {"queryType":"scan","dataSource":"wikipedia","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"selector","dimension":"language","value":"en"},"virtualColumns":[{"type":"expression","name":"vc","expression":"'en'","outputType":"STRING"}],"columns":["vc"],"resultFormat":"compactedList"}
+ druid.query.type scan
+#### A masked pattern was here ####
+ location hdfs://### HDFS PATH ###
+ name default.druid_table_1_n2
+ numFiles 0
+ numRows 0
+ rawDataSize 0
+ serialization.ddl struct druid_table_1_n2 { timestamp with local time zone __time, string robot, string namespace, string anonymous, string unpatrolled, string page, string language, string newpage, string user, float count, float added, float delta, float variation, float deleted}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.druid.QTestDruidSerDe
+ storage_handler org.apache.hadoop.hive.druid.QTestDruidStorageHandler
+ totalSize 0
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.druid.QTestDruidSerDe
+ name: default.druid_table_1_n2
+ name: default.druid_table_1_n2
+ Truncated Path -> Alias:
+ /druid_table_1_n2 [druid_table_1_n2]
+ Reducer 2
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0
+ Position of Big Table: 0
+ Statistics: Num rows: 1 Data size: 185 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), 'en' (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 185 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ directory: hdfs://### HDFS PATH ###
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 1 Data size: 185 Basic stats: PARTIAL Column stats: NONE
+ Stats Publishing Key Prefix: hdfs://### HDFS PATH ###
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1
+ columns.types string:string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: EXPLAIN EXTENDED
+SELECT robot, floor_day(`__time`), max(added) as m, sum(delta) as s
+FROM druid_table_1_n2
+GROUP BY robot, language, floor_day(`__time`)
+ORDER BY CAST(robot AS INTEGER) ASC, m DESC
+LIMIT 10
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN EXTENDED
+SELECT robot, floor_day(`__time`), max(added) as m, sum(delta) as s
+FROM druid_table_1_n2
+GROUP BY robot, language, floor_day(`__time`)
+ORDER BY CAST(robot AS INTEGER) ASC, m DESC
+LIMIT 10
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: druid_table_1_n2
+ properties:
+ druid.fieldNames robot,floor_day,$f3,$f4,(tok_function tok_int (tok_table_or_col robot))
+ druid.fieldTypes string,timestamp with local time zone,float,double,int
+ druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"robot","outputName":"robot","outputType":"STRING"},{"type":"default","dimension":"language","outputName":"language","outputType":"STRING"},{"type":"extraction","dimension":"__time","outputName":"floor_day","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","granularity":{"type":"period","period":"P1D","timeZone":"US/Pacific"},"timeZone":"US/Pacific","locale":"und"}}],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"(tok_function tok_int (tok_table_or_col robot))","direction":"ascending","dimensionOrder":"numeric"},{"dimension":"$f3","direction":"descending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleMax","name":"$f3","fieldName":"added"},{"type":"doubleSum","name":"$f4","fieldName":"delta"}],"postAggregations":[{"type":"expression","name":"(tok_function tok_int (tok_table_or_col robot
))","expression":"CAST(\"robot\", 'LONG')"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]}
+ druid.query.type groupBy
+ GatherStats: false
+ Select Operator
+ expressions: robot (type: string), floor_day (type: timestamp with local time zone), $f3 (type: float), $f4 (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ ListSink
+
+PREHOOK: query: EXPLAIN
+SELECT substring(namespace, CAST(deleted AS INT), 4)
+FROM druid_table_1_n2
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT substring(namespace, CAST(deleted AS INT), 4)
+FROM druid_table_1_n2
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: druid_table_1_n2
+ properties:
+ druid.fieldNames vc
+ druid.fieldTypes string
+ druid.query.json {"queryType":"scan","dataSource":"wikipedia","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"substring(\"namespace\", (CAST(\"deleted\", 'LONG') - 1), 4)","outputType":"STRING"}],"columns":["vc"],"resultFormat":"compactedList"}
+ druid.query.type scan
+ Select Operator
+ expressions: vc (type: string)
+ outputColumnNames: _col0
+ ListSink
+
+PREHOOK: query: EXPLAIN
+SELECT robot, floor_day(`__time`)
+FROM druid_table_1_n2
+WHERE floor_day(`__time`) BETWEEN '1999-11-01 00:00:00' AND '1999-11-10 00:00:00'
+GROUP BY robot, floor_day(`__time`)
+ORDER BY robot
+LIMIT 10
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT robot, floor_day(`__time`)
+FROM druid_table_1_n2
+WHERE floor_day(`__time`) BETWEEN '1999-11-01 00:00:00' AND '1999-11-10 00:00:00'
+GROUP BY robot, floor_day(`__time`)
+ORDER BY robot
+LIMIT 10
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: druid_table_1_n2
+ properties:
+ druid.fieldNames robot,floor_day
+ druid.fieldTypes string,timestamp with local time zone
+ druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"robot","outputName":"robot","outputType":"STRING"},{"type":"extraction","dimension":"__time","outputName":"floor_day","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","granularity":{"type":"period","period":"P1D","timeZone":"US/Pacific"},"timeZone":"US/Pacific","locale":"und"}}],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"robot","direction":"ascending","dimensionOrder":"lexicographic"}]},"aggregations":[],"intervals":["1999-11-01T08:00:00.000Z/1999-11-10T08:00:00.001Z"]}
+ druid.query.type groupBy
+ Select Operator
+ expressions: robot (type: string), floor_day (type: timestamp with local time zone)
+ outputColumnNames: _col0, _col1
+ ListSink
+
+PREHOOK: query: EXPLAIN
+SELECT robot, `__time`
+FROM druid_table_1_n2
+WHERE floor_day(`__time`) BETWEEN '1999-11-01 00:00:00' AND '1999-11-10 00:00:00'
+GROUP BY robot, `__time`
+ORDER BY robot
+LIMIT 10
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT robot, `__time`
+FROM druid_table_1_n2
+WHERE floor_day(`__time`) BETWEEN '1999-11-01 00:00:00' AND '1999-11-10 00:00:00'
+GROUP BY robot, `__time`
+ORDER BY robot
+LIMIT 10
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: druid_table_1_n2
+ properties:
+ druid.fieldNames extract,robot
+ druid.fieldTypes timestamp with local time zone,string
+ druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"extraction","dimension":"__time","outputName":"extract","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"US/Pacific"}},{"type":"default","dimension":"robot","outputName":"robot","outputType":"STRING"}],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"robot","direction":"ascending","dimensionOrder":"lexicographic"}]},"aggregations":[],"intervals":["1999-11-01T08:00:00.000Z/1999-11-10T08:00:00.001Z"]}
+ druid.query.type groupBy
+ Select Operator
+ expressions: robot (type: string), extract (type: timestamp with local time zone)
+ outputColumnNames: _col0, _col1
+ ListSink
+
+PREHOOK: query: EXPLAIN
+SELECT robot, floor_day(`__time`)
+FROM druid_table_1_n2
+WHERE `__time` BETWEEN '1999-11-01 00:00:00' AND '1999-11-10 00:00:00'
+GROUP BY robot, floor_day(`__time`)
+ORDER BY robot
+LIMIT 10
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT robot, floor_day(`__time`)
+FROM druid_table_1_n2
+WHERE `__time` BETWEEN '1999-11-01 00:00:00' AND '1999-11-10 00:00:00'
+GROUP BY robot, floor_day(`__time`)
+ORDER BY robot
+LIMIT 10
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: druid_table_1_n2
+ properties:
+ druid.fieldNames robot,floor_day
+ druid.fieldTypes string,timestamp with local time zone
+ druid.query.json {"queryType":"groupBy","dataSource":"wikipedia","granularity":"all","dimensions":[{"type":"default","dimension":"robot","outputName":"robot","outputType":"STRING"},{"type":"extraction","dimension":"__time","outputName":"floor_day","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","granularity":{"type":"period","period":"P1D","timeZone":"US/Pacific"},"timeZone":"US/Pacific","locale":"und"}}],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"robot","direction":"ascending","dimensionOrder":"lexicographic"}]},"aggregations":[],"intervals":["1999-11-01T08:00:00.000Z/1999-11-10T08:00:00.001Z"]}
+ druid.query.type groupBy
+ Select Operator
+ expressions: robot (type: string), floor_day (type: timestamp with local time zone)
+ outputColumnNames: _col0, _col1
+ ListSink
+
+PREHOOK: query: EXPLAIN EXTENDED
+SELECT robot, floor_day(`__time`), max(added) as m, sum(delta) as s
+FROM druid_table_1_n2
+GROUP BY robot, language, floor_day(`__time`)
+ORDER BY CAST(robot AS INTEGER) ASC, m DESC
+LIMIT 10
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN EXTENDED
+SELECT robot, floor_day(`__time`), max(added) as m, sum(delta) as s
+FROM druid_table_1_n2
+GROUP BY robot, language, floor_day(`__time`)
+ORDER BY CAST(robot AS INTEGER) ASC, m DESC
+LIMIT 10
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: druid_table_1_n2
+ Statistics: Num rows: 1 Data size: 416 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Select Operator
+ expressions: __time (type: timestamp with local time zone), robot (type: string), language (type: string), added (type: float), delta (type: float)
+ outputColumnNames: __time, robot, language, added, delta
+ Statistics: Num rows: 1 Data size: 416 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: max(added), sum(delta)
+ keys: robot (type: string), language (type: string), floor_day(__time) (type: timestamp with local time zone)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 1 Data size: 416 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: timestamp with local time zone)
+ null sort order: aaa
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: timestamp with local time zone)
+ Statistics: Num rows: 1 Data size: 416 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col3 (type: float), _col4 (type: double)
+ auto parallelism: true
+ Path -> Alias:
+ hdfs://### HDFS PATH ### [druid_table_1_n2]
+ Path -> Partition:
+ hdfs://### HDFS PATH ###
+ Partition
+ base file name: druid_table_1_n2
+ input format: org.apache.hadoop.hive.druid.io.DruidQueryBasedInputFormat
+ output format: org.apache.hadoop.hive.druid.io.DruidOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"__time":"true","added":"true","anonymous":"true","count":"true","deleted":"true","delta":"true","language":"true","namespace":"true","newpage":"true","page":"true","robot":"true","unpatrolled":"true","user":"true","variation":"true"}}
+ EXTERNAL TRUE
+ bucket_count -1
+ bucketing_version 2
+ column.name.delimiter ,
+ columns __time,robot,namespace,anonymous,unpatrolled,page,language,newpage,user,count,added,delta,variation,deleted
+ columns.comments 'from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer'
+ columns.types timestamp with local time zone:string:string:string:string:string:string:string:string:float:float:float:float:float
+ druid.datasource wikipedia
+#### A masked pattern was here ####
+ location hdfs://### HDFS PATH ###
+ name default.druid_table_1_n2
+ numFiles 0
+ numRows 0
+ rawDataSize 0
+ serialization.ddl struct druid_table_1_n2 { timestamp with local time zone __time, string robot, string namespace, string anonymous, string unpatrolled, string page, string language, string newpage, string user, float count, float added, float delta, float variation, float deleted}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.druid.QTestDruidSerDe
+ storage_handler org.apache.hadoop.hive.druid.QTestDruidStorageHandler
+ totalSize 0
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.druid.QTestDruidSerDe
+
+ input format: org.apache.hadoop.hive.druid.io.DruidQueryBasedInputFormat
+ output format: org.apache.hadoop.hive.druid.io.DruidOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"__time":"true","added":"true","anonymous":"true","count":"true","deleted":"true","delta":"true","language":"true","namespace":"true","newpage":"true","page":"true","robot":"true","unpatrolled":"true","user":"true","variation":"true"}}
+ EXTERNAL TRUE
+ bucket_count -1
+ bucketing_version 2
+ column.name.delimiter ,
+ columns __time,robot,namespace,anonymous,unpatrolled,page,language,newpage,user,count,added,delta,variation,deleted
+ columns.comments 'from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer'
+ columns.types timestamp with local time zone:string:string:string:string:string:string:string:string:float:float:float:float:float
+ druid.datasource wikipedia
+#### A masked pattern was here ####
+ location hdfs://### HDFS PATH ###
+ name default.druid_table_1_n2
+ numFiles 0
+ numRows 0
+ rawDataSize 0
+ serialization.ddl struct druid_table_1_n2 { timestamp with local time zone __time, string robot, string namespace, string anonymous, string unpatrolled, string page, string language, string newpage, string user, float count, float added, float delta, float variation, float deleted}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.druid.QTestDruidSerDe
+ storage_handler org.apache.hadoop.hive.druid.QTestDruidStorageHandler
+ totalSize 0
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.druid.QTestDruidSerDe
+ name: default.druid_table_1_n2
+ name: default.druid_table_1_n2
+ Truncated Path -> Alias:
+ /druid_table_1_n2 [druid_table_1_n2]
+ Reducer 2
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: max(VALUE._col0), sum(VALUE._col1)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: timestamp with local time zone)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 1 Data size: 416 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col2 (type: timestamp with local time zone), _col3 (type: float), _col4 (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 416 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: UDFToInteger(_col0) (type: int), _col2 (type: float)
+ null sort order: az
+ sort order: +-
+ Statistics: Num rows: 1 Data size: 416 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ TopN: 10
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col0 (type: string), _col1 (type: timestamp with local time zone), _col3 (type: double)
+ auto parallelism: false
+ Reducer 3
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string), VALUE._col1 (type: timestamp with local time zone), KEY.reducesinkkey1 (type: float), VALUE._col2 (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 416 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 10
+ Statistics: Num rows: 1 Data size: 416 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ directory: hdfs://### HDFS PATH ###
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 1 Data size: 416 Basic stats: COMPLETE Column stats: NONE
+ Stats Publishing Key Prefix: hdfs://### HDFS PATH ###
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3
+ columns.types string:timestamp with local time zone:float:double
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 10
+ Processor Tree:
+ ListSink
+
http://git-wip-us.apache.org/repos/asf/hive/blob/0ebf04c8/ql/src/test/results/clientpositive/druid/druidmini_joins.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/druid/druidmini_joins.q.out b/ql/src/test/results/clientpositive/druid/druidmini_joins.q.out
new file mode 100644
index 0000000..73a3c9f
--- /dev/null
+++ b/ql/src/test/results/clientpositive/druid/druidmini_joins.q.out
@@ -0,0 +1,224 @@
+PREHOOK: query: DROP TABLE druid_table_with_nulls
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE druid_table_with_nulls
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE druid_table_with_nulls
+STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler'
+TBLPROPERTIES ("druid.segment.granularity" = "HOUR")
+AS
+SELECT cast(current_timestamp() AS timestamp with local time zone) AS `__time`,
+ cast(username AS string) AS username,
+ cast(double1 AS double) AS double1,
+ cast(int1 AS int) AS int1
+FROM TABLE (
+ VALUES
+ ('alfred', 10.30, 2),
+ ('bob', 3.14, null),
+ ('bonnie', null, 3),
+ ('calvin', null, null),
+ ('charlie', 9.8, 1),
+ ('charlie', 15.8, 1)) as q (username, double1, int1)
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: database:default
+PREHOOK: Output: default@druid_table_with_nulls
+POSTHOOK: query: CREATE TABLE druid_table_with_nulls
+STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler'
+TBLPROPERTIES ("druid.segment.granularity" = "HOUR")
+AS
+SELECT cast(current_timestamp() AS timestamp with local time zone) AS `__time`,
+ cast(username AS string) AS username,
+ cast(double1 AS double) AS double1,
+ cast(int1 AS int) AS int1
+FROM TABLE (
+ VALUES
+ ('alfred', 10.30, 2),
+ ('bob', 3.14, null),
+ ('bonnie', null, 3),
+ ('calvin', null, null),
+ ('charlie', 9.8, 1),
+ ('charlie', 15.8, 1)) as q (username, double1, int1)
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@druid_table_with_nulls
+POSTHOOK: Lineage: druid_table_with_nulls.__time SIMPLE []
+POSTHOOK: Lineage: druid_table_with_nulls.double1 SCRIPT []
+POSTHOOK: Lineage: druid_table_with_nulls.int1 SCRIPT []
+POSTHOOK: Lineage: druid_table_with_nulls.username SCRIPT []
+PREHOOK: query: EXPLAIN SELECT
+username AS `username`,
+SUM(double1) AS `sum_double1`
+FROM
+druid_table_with_nulls `tbl1`
+ JOIN (
+ SELECT
+ username AS `username`,
+ SUM(double1) AS `sum_double2`
+ FROM druid_table_with_nulls
+ GROUP BY `username`
+ ORDER BY `sum_double2`
+ DESC LIMIT 10
+ )
+ `tbl2`
+ ON (`tbl1`.`username` = `tbl2`.`username`)
+GROUP BY `tbl1`.`username`
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT
+username AS `username`,
+SUM(double1) AS `sum_double1`
+FROM
+druid_table_with_nulls `tbl1`
+ JOIN (
+ SELECT
+ username AS `username`,
+ SUM(double1) AS `sum_double2`
+ FROM druid_table_with_nulls
+ GROUP BY `username`
+ ORDER BY `sum_double2`
+ DESC LIMIT 10
+ )
+ `tbl2`
+ ON (`tbl1`.`username` = `tbl2`.`username`)
+GROUP BY `tbl1`.`username`
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: tbl1
+ properties:
+ druid.fieldNames username,$f1
+ druid.fieldTypes string,double
+ druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table_with_nulls","granularity":"all","dimensions":[{"type":"default","dimension":"username","outputName":"username","outputType":"STRING"}],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"$f1","direction":"descending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleSum","name":"$f1","fieldName":"double1"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]}
+ druid.query.type groupBy
+ Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: username (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: _col0 is not null (type: boolean)
+ Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: tbl1
+ properties:
+ druid.fieldNames username,double1
+ druid.fieldTypes string,double
+ druid.query.json {"queryType":"scan","dataSource":"default.druid_table_with_nulls","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"not","field":{"type":"selector","dimension":"username","value":null}},"columns":["username","double1"],"resultFormat":"compactedList"}
+ druid.query.type scan
+ Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: username (type: string)
+ sort order: +
+ Map-reduce partition columns: username (type: string)
+ Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE
+ value expressions: double1 (type: double)
+ Reducer 2
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 username (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6 Data size: 1267 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col1)
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6 Data size: 1267 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 6 Data size: 1267 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: double)
+ Reducer 3
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 633 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3 Data size: 633 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT
+username AS `username`,
+SUM(double1) AS `sum_double1`
+FROM
+druid_table_with_nulls `tbl1`
+ JOIN (
+ SELECT
+ username AS `username`,
+ SUM(double1) AS `sum_double2`
+ FROM druid_table_with_nulls
+ GROUP BY `username`
+ ORDER BY `sum_double2`
+ DESC LIMIT 10
+ )
+ `tbl2`
+ ON (`tbl1`.`username` = `tbl2`.`username`)
+GROUP BY `tbl1`.`username`
+PREHOOK: type: QUERY
+PREHOOK: Input: default@druid_table_with_nulls
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: SELECT
+username AS `username`,
+SUM(double1) AS `sum_double1`
+FROM
+druid_table_with_nulls `tbl1`
+ JOIN (
+ SELECT
+ username AS `username`,
+ SUM(double1) AS `sum_double2`
+ FROM druid_table_with_nulls
+ GROUP BY `username`
+ ORDER BY `sum_double2`
+ DESC LIMIT 10
+ )
+ `tbl2`
+ ON (`tbl1`.`username` = `tbl2`.`username`)
+GROUP BY `tbl1`.`username`
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@druid_table_with_nulls
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+alfred 10.300000190734863
+bob 3.140000104904175
+bonnie 0.0
+calvin 0.0
+charlie 25.600000381469727