You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2018/06/03 05:03:58 UTC
[1/2] hive git commit: HIVE-19462 : Fix mapping for char_length
function to enable pushdown to Druid. (Slim Bouguerra via Ashutosh Chauhan)
Repository: hive
Updated Branches:
refs/heads/master 1c5faaac8 -> 3bccc4ed5
http://git-wip-us.apache.org/repos/asf/hive/blob/3bccc4ed/ql/src/test/results/clientpositive/druid/druidmini_floorTime.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/druid/druidmini_floorTime.q.out b/ql/src/test/results/clientpositive/druid/druidmini_floorTime.q.out
index 97d0704..c54fd93 100644
--- a/ql/src/test/results/clientpositive/druid/druidmini_floorTime.q.out
+++ b/ql/src/test/results/clientpositive/druid/druidmini_floorTime.q.out
@@ -72,65 +72,24 @@ WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyi
AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY floor(`__time` to SECOND)
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Tez
-#### A masked pattern was here ####
- Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: druid_table_n2
- filterExpr: (character_length(UDFToString(ctinyint)) < 10) (type: boolean)
- properties:
- druid.fieldNames vc,ctinyint
- druid.fieldTypes timestamp with local time zone,tinyint
- druid.query.json {"queryType":"scan","dataSource":"default.druid_table_n2","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) > 1)"},{"type":"expression","expression":"((pow(\"cfloat\",2) * pow(\"csmallint\",3)) > 1)"},{"type":"expression","expression":"(sqrt(abs(\"ctinyint\")) > 3)"}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc","ctinyint"],"resultFormat":"compactedList"}
- druid.query.type scan
- Statistics: Num rows: 9173 Data size: 383504 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (character_length(UDFToString(ctinyint)) < 10) (type: boolean)
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: floor_second(vc) (type: timestamp with local time zone)
- outputColumnNames: _col0
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: _col0 (type: timestamp with local time zone)
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: timestamp with local time zone)
- sort order: +
- Map-reduce partition columns: _col0 (type: timestamp with local time zone)
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Reducer 2
- Reduce Operator Tree:
- Group By Operator
- keys: KEY._col0 (type: timestamp with local time zone)
- mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- ListSink
+ TableScan
+ alias: druid_table_n2
+ properties:
+ druid.fieldNames timestamp
+ druid.fieldTypes timestamp with local time zone
+ druid.query.json {"queryType":"timeseries","dataSource":"default.druid_table_n2","descending":false,"granularity":{"type":"period","period":"PT1S","timeZone":"US/Pacific"},"filter":{"type":"and","fields":[{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) > 1)"},{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) < 10)"},{"type":"expression","expression":"((pow(\"cfloat\",2) * pow(\"csmallint\",3)) > 1)"},{"type":"expression","expression":"(sqrt(abs(\"ctinyint\")) > 3)"}]},"aggregations":[],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":true}}
+ druid.query.type timeseries
+ Select Operator
+ expressions: timestamp (type: timestamp with local time zone)
+ outputColumnNames: _col0
+ ListSink
PREHOOK: query: SELECT floor(`__time` to MINUTE) FROM druid_table_n2
WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10
@@ -155,65 +114,24 @@ WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyi
AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY floor(`__time` to MINUTE)
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Tez
-#### A masked pattern was here ####
- Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: druid_table_n2
- filterExpr: (character_length(UDFToString(ctinyint)) < 10) (type: boolean)
- properties:
- druid.fieldNames vc,ctinyint
- druid.fieldTypes timestamp with local time zone,tinyint
- druid.query.json {"queryType":"scan","dataSource":"default.druid_table_n2","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) > 1)"},{"type":"expression","expression":"((pow(\"cfloat\",2) * pow(\"csmallint\",3)) > 1)"},{"type":"expression","expression":"(sqrt(abs(\"ctinyint\")) > 3)"}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc","ctinyint"],"resultFormat":"compactedList"}
- druid.query.type scan
- Statistics: Num rows: 9173 Data size: 383504 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (character_length(UDFToString(ctinyint)) < 10) (type: boolean)
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: floor_minute(vc) (type: timestamp with local time zone)
- outputColumnNames: _col0
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: _col0 (type: timestamp with local time zone)
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: timestamp with local time zone)
- sort order: +
- Map-reduce partition columns: _col0 (type: timestamp with local time zone)
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Reducer 2
- Reduce Operator Tree:
- Group By Operator
- keys: KEY._col0 (type: timestamp with local time zone)
- mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- ListSink
+ TableScan
+ alias: druid_table_n2
+ properties:
+ druid.fieldNames timestamp
+ druid.fieldTypes timestamp with local time zone
+ druid.query.json {"queryType":"timeseries","dataSource":"default.druid_table_n2","descending":false,"granularity":{"type":"period","period":"PT1M","timeZone":"US/Pacific"},"filter":{"type":"and","fields":[{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) > 1)"},{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) < 10)"},{"type":"expression","expression":"((pow(\"cfloat\",2) * pow(\"csmallint\",3)) > 1)"},{"type":"expression","expression":"(sqrt(abs(\"ctinyint\")) > 3)"}]},"aggregations":[],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":true}}
+ druid.query.type timeseries
+ Select Operator
+ expressions: timestamp (type: timestamp with local time zone)
+ outputColumnNames: _col0
+ ListSink
PREHOOK: query: SELECT floor(`__time` to HOUR) FROM druid_table_n2
WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10
@@ -238,65 +156,24 @@ WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyi
AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY floor(`__time` to HOUR)
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Tez
-#### A masked pattern was here ####
- Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: druid_table_n2
- filterExpr: (character_length(UDFToString(ctinyint)) < 10) (type: boolean)
- properties:
- druid.fieldNames vc,ctinyint
- druid.fieldTypes timestamp with local time zone,tinyint
- druid.query.json {"queryType":"scan","dataSource":"default.druid_table_n2","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) > 1)"},{"type":"expression","expression":"((pow(\"cfloat\",2) * pow(\"csmallint\",3)) > 1)"},{"type":"expression","expression":"(sqrt(abs(\"ctinyint\")) > 3)"}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc","ctinyint"],"resultFormat":"compactedList"}
- druid.query.type scan
- Statistics: Num rows: 9173 Data size: 383504 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (character_length(UDFToString(ctinyint)) < 10) (type: boolean)
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: floor_hour(vc) (type: timestamp with local time zone)
- outputColumnNames: _col0
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: _col0 (type: timestamp with local time zone)
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: timestamp with local time zone)
- sort order: +
- Map-reduce partition columns: _col0 (type: timestamp with local time zone)
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Reducer 2
- Reduce Operator Tree:
- Group By Operator
- keys: KEY._col0 (type: timestamp with local time zone)
- mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- ListSink
+ TableScan
+ alias: druid_table_n2
+ properties:
+ druid.fieldNames timestamp
+ druid.fieldTypes timestamp with local time zone
+ druid.query.json {"queryType":"timeseries","dataSource":"default.druid_table_n2","descending":false,"granularity":{"type":"period","period":"PT1H","timeZone":"US/Pacific"},"filter":{"type":"and","fields":[{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) > 1)"},{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) < 10)"},{"type":"expression","expression":"((pow(\"cfloat\",2) * pow(\"csmallint\",3)) > 1)"},{"type":"expression","expression":"(sqrt(abs(\"ctinyint\")) > 3)"}]},"aggregations":[],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":true}}
+ druid.query.type timeseries
+ Select Operator
+ expressions: timestamp (type: timestamp with local time zone)
+ outputColumnNames: _col0
+ ListSink
PREHOOK: query: SELECT EXTRACT(DAY from `__time`) FROM druid_table_n2
WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10
@@ -320,65 +197,24 @@ WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyi
AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(DAY from `__time`)
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Tez
-#### A masked pattern was here ####
- Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: druid_table_n2
- filterExpr: (character_length(UDFToString(ctinyint)) < 10) (type: boolean)
- properties:
- druid.fieldNames vc,ctinyint
- druid.fieldTypes timestamp with local time zone,tinyint
- druid.query.json {"queryType":"scan","dataSource":"default.druid_table_n2","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) > 1)"},{"type":"expression","expression":"((pow(\"cfloat\",2) * pow(\"csmallint\",3)) > 1)"},{"type":"expression","expression":"(sqrt(abs(\"ctinyint\")) > 3)"}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc","ctinyint"],"resultFormat":"compactedList"}
- druid.query.type scan
- Statistics: Num rows: 9173 Data size: 383504 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (character_length(UDFToString(ctinyint)) < 10) (type: boolean)
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: day(vc) (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: _col0 (type: int)
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Reducer 2
- Reduce Operator Tree:
- Group By Operator
- keys: KEY._col0 (type: int)
- mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- ListSink
+ TableScan
+ alias: druid_table_n2
+ properties:
+ druid.fieldNames extract_day
+ druid.fieldTypes int
+ druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table_n2","granularity":"all","dimensions":[{"type":"extraction","dimension":"__time","outputName":"extract_day","extractionFn":{"type":"timeFormat","format":"d","timeZone":"US/Pacific","locale":"en-US"}}],"limitSpec":{"type":"default"},"filter":{"type":"and","fields":[{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) > 1)"},{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) < 10)"},{"type":"expression","expression":"((pow(\"cfloat\",2) * pow(\"csmallint\",3)) > 1)"},{"type":"expression","expression":"(sqrt(abs(\"ctinyint\")) > 3)"}]},"aggregations":[],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]}
+ druid.query.type groupBy
+ Select Operator
+ expressions: extract_day (type: int)
+ outputColumnNames: _col0
+ ListSink
PREHOOK: query: SELECT EXTRACT(WEEK from `__time`) FROM druid_table_n2
WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10
@@ -402,65 +238,24 @@ WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyi
AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(WEEK from `__time`)
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Tez
-#### A masked pattern was here ####
- Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: druid_table_n2
- filterExpr: (character_length(UDFToString(ctinyint)) < 10) (type: boolean)
- properties:
- druid.fieldNames vc,ctinyint
- druid.fieldTypes timestamp with local time zone,tinyint
- druid.query.json {"queryType":"scan","dataSource":"default.druid_table_n2","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) > 1)"},{"type":"expression","expression":"((pow(\"cfloat\",2) * pow(\"csmallint\",3)) > 1)"},{"type":"expression","expression":"(sqrt(abs(\"ctinyint\")) > 3)"}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc","ctinyint"],"resultFormat":"compactedList"}
- druid.query.type scan
- Statistics: Num rows: 9173 Data size: 383504 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (character_length(UDFToString(ctinyint)) < 10) (type: boolean)
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: weekofyear(vc) (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: _col0 (type: int)
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Reducer 2
- Reduce Operator Tree:
- Group By Operator
- keys: KEY._col0 (type: int)
- mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- ListSink
+ TableScan
+ alias: druid_table_n2
+ properties:
+ druid.fieldNames extract_week
+ druid.fieldTypes int
+ druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table_n2","granularity":"all","dimensions":[{"type":"extraction","dimension":"__time","outputName":"extract_week","extractionFn":{"type":"timeFormat","format":"w","timeZone":"US/Pacific","locale":"en-US"}}],"limitSpec":{"type":"default"},"filter":{"type":"and","fields":[{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) > 1)"},{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) < 10)"},{"type":"expression","expression":"((pow(\"cfloat\",2) * pow(\"csmallint\",3)) > 1)"},{"type":"expression","expression":"(sqrt(abs(\"ctinyint\")) > 3)"}]},"aggregations":[],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]}
+ druid.query.type groupBy
+ Select Operator
+ expressions: extract_week (type: int)
+ outputColumnNames: _col0
+ ListSink
PREHOOK: query: SELECT EXTRACT(MONTH from `__time`) FROM druid_table_n2
WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10
@@ -484,65 +279,24 @@ WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyi
AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(MONTH from `__time`)
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Tez
-#### A masked pattern was here ####
- Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: druid_table_n2
- filterExpr: (character_length(UDFToString(ctinyint)) < 10) (type: boolean)
- properties:
- druid.fieldNames vc,ctinyint
- druid.fieldTypes timestamp with local time zone,tinyint
- druid.query.json {"queryType":"scan","dataSource":"default.druid_table_n2","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) > 1)"},{"type":"expression","expression":"((pow(\"cfloat\",2) * pow(\"csmallint\",3)) > 1)"},{"type":"expression","expression":"(sqrt(abs(\"ctinyint\")) > 3)"}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc","ctinyint"],"resultFormat":"compactedList"}
- druid.query.type scan
- Statistics: Num rows: 9173 Data size: 383504 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (character_length(UDFToString(ctinyint)) < 10) (type: boolean)
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: month(vc) (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: _col0 (type: int)
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Reducer 2
- Reduce Operator Tree:
- Group By Operator
- keys: KEY._col0 (type: int)
- mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- ListSink
+ TableScan
+ alias: druid_table_n2
+ properties:
+ druid.fieldNames extract_month
+ druid.fieldTypes int
+ druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table_n2","granularity":"all","dimensions":[{"type":"extraction","dimension":"__time","outputName":"extract_month","extractionFn":{"type":"timeFormat","format":"M","timeZone":"US/Pacific","locale":"en-US"}}],"limitSpec":{"type":"default"},"filter":{"type":"and","fields":[{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) > 1)"},{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) < 10)"},{"type":"expression","expression":"((pow(\"cfloat\",2) * pow(\"csmallint\",3)) > 1)"},{"type":"expression","expression":"(sqrt(abs(\"ctinyint\")) > 3)"}]},"aggregations":[],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]}
+ druid.query.type groupBy
+ Select Operator
+ expressions: extract_month (type: int)
+ outputColumnNames: _col0
+ ListSink
PREHOOK: query: SELECT EXTRACT(QUARTER from `__time`) FROM druid_table_n2
WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10
@@ -566,65 +320,24 @@ WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyi
AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(QUARTER from `__time`)
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Tez
-#### A masked pattern was here ####
- Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: druid_table_n2
- filterExpr: (character_length(UDFToString(ctinyint)) < 10) (type: boolean)
- properties:
- druid.fieldNames vc,ctinyint
- druid.fieldTypes timestamp with local time zone,tinyint
- druid.query.json {"queryType":"scan","dataSource":"default.druid_table_n2","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) > 1)"},{"type":"expression","expression":"((pow(\"cfloat\",2) * pow(\"csmallint\",3)) > 1)"},{"type":"expression","expression":"(sqrt(abs(\"ctinyint\")) > 3)"}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc","ctinyint"],"resultFormat":"compactedList"}
- druid.query.type scan
- Statistics: Num rows: 9173 Data size: 383504 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (character_length(UDFToString(ctinyint)) < 10) (type: boolean)
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: quarter(vc) (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: _col0 (type: int)
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Reducer 2
- Reduce Operator Tree:
- Group By Operator
- keys: KEY._col0 (type: int)
- mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- ListSink
+ TableScan
+ alias: druid_table_n2
+ properties:
+ druid.fieldNames vc
+ druid.fieldTypes int
+ druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table_n2","granularity":"all","dimensions":[{"type":"default","dimension":"vc","outputName":"vc","outputType":"LONG"}],"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_extract(\"__time\",'QUARTER','US/Pacific')","outputType":"LONG"}],"limitSpec":{"type":"default"},"filter":{"type":"and","fields":[{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) > 1)"},{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) < 10)"},{"type":"expression","expression":"((pow(\"cfloat\",2) * pow(\"csmallint\",3)) > 1)"},{"type":"expression","expression":"(sqrt(abs(\"ctinyint\")) > 3)"}]},"aggregations":[],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]}
+ druid.query.type groupBy
+ Select Operator
+ expressions: vc (type: int)
+ outputColumnNames: _col0
+ ListSink
PREHOOK: query: SELECT EXTRACT(YEAR from `__time`) FROM druid_table_n2
WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10
@@ -648,65 +361,24 @@ WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyi
AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(YEAR from `__time`)
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Tez
-#### A masked pattern was here ####
- Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: druid_table_n2
- filterExpr: (character_length(UDFToString(ctinyint)) < 10) (type: boolean)
- properties:
- druid.fieldNames vc,ctinyint
- druid.fieldTypes timestamp with local time zone,tinyint
- druid.query.json {"queryType":"scan","dataSource":"default.druid_table_n2","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) > 1)"},{"type":"expression","expression":"((pow(\"cfloat\",2) * pow(\"csmallint\",3)) > 1)"},{"type":"expression","expression":"(sqrt(abs(\"ctinyint\")) > 3)"}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc","ctinyint"],"resultFormat":"compactedList"}
- druid.query.type scan
- Statistics: Num rows: 9173 Data size: 383504 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (character_length(UDFToString(ctinyint)) < 10) (type: boolean)
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: year(vc) (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: _col0 (type: int)
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Reducer 2
- Reduce Operator Tree:
- Group By Operator
- keys: KEY._col0 (type: int)
- mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- ListSink
+ TableScan
+ alias: druid_table_n2
+ properties:
+ druid.fieldNames extract_year
+ druid.fieldTypes int
+ druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table_n2","granularity":"all","dimensions":[{"type":"extraction","dimension":"__time","outputName":"extract_year","extractionFn":{"type":"timeFormat","format":"yyyy","timeZone":"US/Pacific","locale":"en-US"}}],"limitSpec":{"type":"default"},"filter":{"type":"and","fields":[{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) > 1)"},{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) < 10)"},{"type":"expression","expression":"((pow(\"cfloat\",2) * pow(\"csmallint\",3)) > 1)"},{"type":"expression","expression":"(sqrt(abs(\"ctinyint\")) > 3)"}]},"aggregations":[],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]}
+ druid.query.type groupBy
+ Select Operator
+ expressions: extract_year (type: int)
+ outputColumnNames: _col0
+ ListSink
PREHOOK: query: EXPLAIN SELECT EXTRACT(SECOND from `__time`) FROM druid_table_n2 WHERE EXTRACT(SECOND from `__time`) = 0 LIMIT 1
PREHOOK: type: QUERY
@@ -793,24 +465,19 @@ STAGE DEPENDENCIES:
STAGE PLANS:
Stage: Stage-0
Fetch Operator
- limit: 1
+ limit: -1
Processor Tree:
TableScan
alias: druid_table_n2
- filterExpr: (character_length(UDFToString(ctinyint)) < 10) (type: boolean)
properties:
- druid.fieldNames vc,ctinyint
- druid.fieldTypes timestamp with local time zone,tinyint
- druid.query.json {"queryType":"scan","dataSource":"default.druid_table_n2","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) > 1)"},{"type":"expression","expression":"((pow(\"cfloat\",2) * pow(\"csmallint\",3)) > 1)"},{"type":"expression","expression":"(sqrt(abs(\"ctinyint\")) > 3)"}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc","ctinyint"],"resultFormat":"compactedList"}
+ druid.fieldNames vc
+ druid.fieldTypes int
+ druid.query.json {"queryType":"scan","dataSource":"default.druid_table_n2","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) > 1)"},{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) < 10)"},{"type":"expression","expression":"((pow(\"cfloat\",2) * pow(\"csmallint\",3)) > 1)"},{"type":"expression","expression":"(sqrt(abs(\"ctinyint\")) > 3)"}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_extract(\"__time\",'HOUR','US/Pacific')","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList","limit":1}
druid.query.type scan
- Filter Operator
- predicate: (character_length(UDFToString(ctinyint)) < 10) (type: boolean)
- Select Operator
- expressions: hour(vc) (type: int)
- outputColumnNames: _col0
- Limit
- Number of rows: 1
- ListSink
+ Select Operator
+ expressions: vc (type: int)
+ outputColumnNames: _col0
+ ListSink
PREHOOK: query: SELECT EXTRACT(HOUR from `__time`) FROM druid_table_n2
WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10
[2/2] hive git commit: HIVE-19462 : Fix mapping for char_length
function to enable pushdown to Druid. (Slim Bouguerra via Ashutosh Chauhan)
Posted by ha...@apache.org.
HIVE-19462 : Fix mapping for char_length function to enable pushdown to Druid. (Slim Bouguerra via Ashutosh Chauhan)
Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/3bccc4ed
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/3bccc4ed
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/3bccc4ed
Branch: refs/heads/master
Commit: 3bccc4ed5654298c2fed0a87aef81d97d69683b5
Parents: 1c5faaa
Author: Slim Bouguerra <sl...@gmail.com>
Authored: Tue May 8 13:53:00 2018 -0700
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Sat Jun 2 22:03:12 2018 -0700
----------------------------------------------------------------------
.../translator/SqlFunctionConverter.java | 21 +-
.../druid/druidmini_expressions.q.out | 62 +--
.../druid/druidmini_extractTime.q.out | 541 ++++---------------
.../druid/druidmini_floorTime.q.out | 541 ++++---------------
4 files changed, 230 insertions(+), 935 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/3bccc4ed/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
index 950abe1..37b387f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
@@ -17,10 +17,9 @@
*/
package org.apache.hadoop.hive.ql.optimizer.calcite.translator;
-import java.lang.annotation.Annotation;
-import java.util.List;
-import java.util.Map;
-
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Maps;
import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.sql.SqlAggFunction;
import org.apache.calcite.sql.SqlFunction;
@@ -75,9 +74,9 @@ import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.ImmutableMap;
-import com.google.common.collect.Maps;
+import java.lang.annotation.Annotation;
+import java.util.List;
+import java.util.Map;
public class SqlFunctionConverter {
private static final Logger LOG = LoggerFactory.getLogger(SqlFunctionConverter.class);
@@ -431,11 +430,11 @@ public class SqlFunctionConverter {
registerFunction("lower", SqlStdOperatorTable.LOWER, hToken(HiveParser.Identifier, "lower"));
registerFunction("upper", SqlStdOperatorTable.UPPER, hToken(HiveParser.Identifier, "upper"));
registerFunction("abs", SqlStdOperatorTable.ABS, hToken(HiveParser.Identifier, "abs"));
- registerFunction("char_length", SqlStdOperatorTable.CHAR_LENGTH,
- hToken(HiveParser.Identifier, "char_length")
+ registerFunction("character_length", SqlStdOperatorTable.CHAR_LENGTH,
+ hToken(HiveParser.Identifier, "character_length")
);
- registerDuplicateFunction("character_length", SqlStdOperatorTable.CHAR_LENGTH,
- hToken(HiveParser.Identifier, "char_length")
+ registerDuplicateFunction("char_length", SqlStdOperatorTable.CHAR_LENGTH,
+ hToken(HiveParser.Identifier, "character_length")
);
registerFunction("length", SqlStdOperatorTable.CHARACTER_LENGTH,
hToken(HiveParser.Identifier, "length")
http://git-wip-us.apache.org/repos/asf/hive/blob/3bccc4ed/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out b/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out
index dbaa3a6..d51c5af 100644
--- a/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out
+++ b/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out
@@ -148,62 +148,24 @@ PREHOOK: type: QUERY
POSTHOOK: query: EXPLAIN SELECT count(*) FROM druid_table_n0 WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10 AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Tez
-#### A masked pattern was here ####
- Edges:
- Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: druid_table_n0
- filterExpr: (character_length(UDFToString(ctinyint)) < 10) (type: boolean)
- properties:
- druid.fieldNames __time,cstring1,cstring2,cdouble,cfloat,ctinyint,csmallint,cint,cbigint,cboolean1,cboolean2
- druid.fieldTypes timestamp with local time zone,string,string,double,float,tinyint,smallint,int,bigint,boolean,boolean
- druid.query.json {"queryType":"scan","dataSource":"default.druid_table_n0","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) > 1)"},{"type":"expression","expression":"((pow(\"cfloat\",2) * pow(\"csmallint\",3)) > 1)"},{"type":"expression","expression":"(sqrt(abs(\"ctinyint\")) > 3)"}]},"columns":["__time","cstring1","cstring2","cdouble","cfloat","ctinyint","csmallint","cint","cbigint","cboolean1","cboolean2"],"resultFormat":"compactedList"}
- druid.query.type scan
- Statistics: Num rows: 9173 Data size: 34864 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (character_length(UDFToString(ctinyint)) < 10) (type: boolean)
- Statistics: Num rows: 3057 Data size: 11618 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- Statistics: Num rows: 3057 Data size: 11618 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: bigint)
- Reducer 2
- Reduce Operator Tree:
- Group By Operator
- aggregations: count(VALUE._col0)
- mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- ListSink
+ TableScan
+ alias: druid_table_n0
+ properties:
+ druid.fieldNames $f0
+ druid.fieldTypes bigint
+ druid.query.json {"queryType":"timeseries","dataSource":"default.druid_table_n0","descending":false,"granularity":"all","filter":{"type":"and","fields":[{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) > 1)"},{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) < 10)"},{"type":"expression","expression":"((pow(\"cfloat\",2) * pow(\"csmallint\",3)) > 1)"},{"type":"expression","expression":"(sqrt(abs(\"ctinyint\")) > 3)"}]},"aggregations":[{"type":"count","name":"$f0"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":false}}
+ druid.query.type timeseries
+ Select Operator
+ expressions: $f0 (type: bigint)
+ outputColumnNames: _col0
+ ListSink
PREHOOK: query: EXPLAIN SELECT SUM(cfloat + 1), CAST(SUM(cdouble + ctinyint) AS INTEGER), SUM(ctinyint) + 1 , CAST(SUM(csmallint) + SUM(cint) AS DOUBLE), SUM(cint), SUM(cbigint)
FROM druid_table_n0 WHERE ceil(cfloat) > 0 AND floor(cdouble) * 2 < 1000 OR ln(cdouble) / log10(10) > 0 AND COS(cint) > 0 OR SIN(cdouble) > 1
http://git-wip-us.apache.org/repos/asf/hive/blob/3bccc4ed/ql/src/test/results/clientpositive/druid/druidmini_extractTime.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/druid/druidmini_extractTime.q.out b/ql/src/test/results/clientpositive/druid/druidmini_extractTime.q.out
index a1b1d24..442ae35 100644
--- a/ql/src/test/results/clientpositive/druid/druidmini_extractTime.q.out
+++ b/ql/src/test/results/clientpositive/druid/druidmini_extractTime.q.out
@@ -71,65 +71,24 @@ WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyi
AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(SECOND from `__time`)
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Tez
-#### A masked pattern was here ####
- Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: druid_table
- filterExpr: (character_length(UDFToString(ctinyint)) < 10) (type: boolean)
- properties:
- druid.fieldNames vc,ctinyint
- druid.fieldTypes timestamp with local time zone,tinyint
- druid.query.json {"queryType":"scan","dataSource":"default.druid_table","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) > 1)"},{"type":"expression","expression":"((pow(\"cfloat\",2) * pow(\"csmallint\",3)) > 1)"},{"type":"expression","expression":"(sqrt(abs(\"ctinyint\")) > 3)"}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc","ctinyint"],"resultFormat":"compactedList"}
- druid.query.type scan
- Statistics: Num rows: 9173 Data size: 383504 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (character_length(UDFToString(ctinyint)) < 10) (type: boolean)
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: second(vc) (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: _col0 (type: int)
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Reducer 2
- Reduce Operator Tree:
- Group By Operator
- keys: KEY._col0 (type: int)
- mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- ListSink
+ TableScan
+ alias: druid_table
+ properties:
+ druid.fieldNames extract_second
+ druid.fieldTypes int
+ druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table","granularity":"all","dimensions":[{"type":"extraction","dimension":"__time","outputName":"extract_second","extractionFn":{"type":"timeFormat","format":"s","timeZone":"US/Pacific","locale":"en-US"}}],"limitSpec":{"type":"default"},"filter":{"type":"and","fields":[{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) > 1)"},{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) < 10)"},{"type":"expression","expression":"((pow(\"cfloat\",2) * pow(\"csmallint\",3)) > 1)"},{"type":"expression","expression":"(sqrt(abs(\"ctinyint\")) > 3)"}]},"aggregations":[],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]}
+ druid.query.type groupBy
+ Select Operator
+ expressions: extract_second (type: int)
+ outputColumnNames: _col0
+ ListSink
PREHOOK: query: SELECT EXTRACT(MINUTE from `__time`) FROM druid_table
WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10
@@ -154,65 +113,24 @@ WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyi
AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(MINUTE from `__time`)
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Tez
-#### A masked pattern was here ####
- Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: druid_table
- filterExpr: (character_length(UDFToString(ctinyint)) < 10) (type: boolean)
- properties:
- druid.fieldNames vc,ctinyint
- druid.fieldTypes timestamp with local time zone,tinyint
- druid.query.json {"queryType":"scan","dataSource":"default.druid_table","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) > 1)"},{"type":"expression","expression":"((pow(\"cfloat\",2) * pow(\"csmallint\",3)) > 1)"},{"type":"expression","expression":"(sqrt(abs(\"ctinyint\")) > 3)"}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc","ctinyint"],"resultFormat":"compactedList"}
- druid.query.type scan
- Statistics: Num rows: 9173 Data size: 383504 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (character_length(UDFToString(ctinyint)) < 10) (type: boolean)
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: minute(vc) (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: _col0 (type: int)
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Reducer 2
- Reduce Operator Tree:
- Group By Operator
- keys: KEY._col0 (type: int)
- mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- ListSink
+ TableScan
+ alias: druid_table
+ properties:
+ druid.fieldNames extract_minute
+ druid.fieldTypes int
+ druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table","granularity":"all","dimensions":[{"type":"extraction","dimension":"__time","outputName":"extract_minute","extractionFn":{"type":"timeFormat","format":"m","timeZone":"US/Pacific","locale":"en-US"}}],"limitSpec":{"type":"default"},"filter":{"type":"and","fields":[{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) > 1)"},{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) < 10)"},{"type":"expression","expression":"((pow(\"cfloat\",2) * pow(\"csmallint\",3)) > 1)"},{"type":"expression","expression":"(sqrt(abs(\"ctinyint\")) > 3)"}]},"aggregations":[],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]}
+ druid.query.type groupBy
+ Select Operator
+ expressions: extract_minute (type: int)
+ outputColumnNames: _col0
+ ListSink
PREHOOK: query: SELECT EXTRACT(HOUR from `__time`) FROM druid_table
WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10
@@ -237,65 +155,24 @@ WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyi
AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(HOUR from `__time`)
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Tez
-#### A masked pattern was here ####
- Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: druid_table
- filterExpr: (character_length(UDFToString(ctinyint)) < 10) (type: boolean)
- properties:
- druid.fieldNames vc,ctinyint
- druid.fieldTypes timestamp with local time zone,tinyint
- druid.query.json {"queryType":"scan","dataSource":"default.druid_table","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) > 1)"},{"type":"expression","expression":"((pow(\"cfloat\",2) * pow(\"csmallint\",3)) > 1)"},{"type":"expression","expression":"(sqrt(abs(\"ctinyint\")) > 3)"}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc","ctinyint"],"resultFormat":"compactedList"}
- druid.query.type scan
- Statistics: Num rows: 9173 Data size: 383504 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (character_length(UDFToString(ctinyint)) < 10) (type: boolean)
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: hour(vc) (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: _col0 (type: int)
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Reducer 2
- Reduce Operator Tree:
- Group By Operator
- keys: KEY._col0 (type: int)
- mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- ListSink
+ TableScan
+ alias: druid_table
+ properties:
+ druid.fieldNames extract_hour
+ druid.fieldTypes int
+ druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table","granularity":"all","dimensions":[{"type":"extraction","dimension":"__time","outputName":"extract_hour","extractionFn":{"type":"timeFormat","format":"H","timeZone":"US/Pacific","locale":"en-US"}}],"limitSpec":{"type":"default"},"filter":{"type":"and","fields":[{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) > 1)"},{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) < 10)"},{"type":"expression","expression":"((pow(\"cfloat\",2) * pow(\"csmallint\",3)) > 1)"},{"type":"expression","expression":"(sqrt(abs(\"ctinyint\")) > 3)"}]},"aggregations":[],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]}
+ druid.query.type groupBy
+ Select Operator
+ expressions: extract_hour (type: int)
+ outputColumnNames: _col0
+ ListSink
PREHOOK: query: SELECT EXTRACT(DAY from `__time`) FROM druid_table
WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10
@@ -319,65 +196,24 @@ WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyi
AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(DAY from `__time`)
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Tez
-#### A masked pattern was here ####
- Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: druid_table
- filterExpr: (character_length(UDFToString(ctinyint)) < 10) (type: boolean)
- properties:
- druid.fieldNames vc,ctinyint
- druid.fieldTypes timestamp with local time zone,tinyint
- druid.query.json {"queryType":"scan","dataSource":"default.druid_table","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) > 1)"},{"type":"expression","expression":"((pow(\"cfloat\",2) * pow(\"csmallint\",3)) > 1)"},{"type":"expression","expression":"(sqrt(abs(\"ctinyint\")) > 3)"}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc","ctinyint"],"resultFormat":"compactedList"}
- druid.query.type scan
- Statistics: Num rows: 9173 Data size: 383504 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (character_length(UDFToString(ctinyint)) < 10) (type: boolean)
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: day(vc) (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: _col0 (type: int)
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Reducer 2
- Reduce Operator Tree:
- Group By Operator
- keys: KEY._col0 (type: int)
- mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- ListSink
+ TableScan
+ alias: druid_table
+ properties:
+ druid.fieldNames extract_day
+ druid.fieldTypes int
+ druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table","granularity":"all","dimensions":[{"type":"extraction","dimension":"__time","outputName":"extract_day","extractionFn":{"type":"timeFormat","format":"d","timeZone":"US/Pacific","locale":"en-US"}}],"limitSpec":{"type":"default"},"filter":{"type":"and","fields":[{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) > 1)"},{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) < 10)"},{"type":"expression","expression":"((pow(\"cfloat\",2) * pow(\"csmallint\",3)) > 1)"},{"type":"expression","expression":"(sqrt(abs(\"ctinyint\")) > 3)"}]},"aggregations":[],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]}
+ druid.query.type groupBy
+ Select Operator
+ expressions: extract_day (type: int)
+ outputColumnNames: _col0
+ ListSink
PREHOOK: query: SELECT EXTRACT(WEEK from `__time`) FROM druid_table
WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10
@@ -401,65 +237,24 @@ WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyi
AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(WEEK from `__time`)
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Tez
-#### A masked pattern was here ####
- Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: druid_table
- filterExpr: (character_length(UDFToString(ctinyint)) < 10) (type: boolean)
- properties:
- druid.fieldNames vc,ctinyint
- druid.fieldTypes timestamp with local time zone,tinyint
- druid.query.json {"queryType":"scan","dataSource":"default.druid_table","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) > 1)"},{"type":"expression","expression":"((pow(\"cfloat\",2) * pow(\"csmallint\",3)) > 1)"},{"type":"expression","expression":"(sqrt(abs(\"ctinyint\")) > 3)"}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc","ctinyint"],"resultFormat":"compactedList"}
- druid.query.type scan
- Statistics: Num rows: 9173 Data size: 383504 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (character_length(UDFToString(ctinyint)) < 10) (type: boolean)
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: weekofyear(vc) (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: _col0 (type: int)
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Reducer 2
- Reduce Operator Tree:
- Group By Operator
- keys: KEY._col0 (type: int)
- mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- ListSink
+ TableScan
+ alias: druid_table
+ properties:
+ druid.fieldNames extract_week
+ druid.fieldTypes int
+ druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table","granularity":"all","dimensions":[{"type":"extraction","dimension":"__time","outputName":"extract_week","extractionFn":{"type":"timeFormat","format":"w","timeZone":"US/Pacific","locale":"en-US"}}],"limitSpec":{"type":"default"},"filter":{"type":"and","fields":[{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) > 1)"},{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) < 10)"},{"type":"expression","expression":"((pow(\"cfloat\",2) * pow(\"csmallint\",3)) > 1)"},{"type":"expression","expression":"(sqrt(abs(\"ctinyint\")) > 3)"}]},"aggregations":[],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]}
+ druid.query.type groupBy
+ Select Operator
+ expressions: extract_week (type: int)
+ outputColumnNames: _col0
+ ListSink
PREHOOK: query: SELECT EXTRACT(MONTH from `__time`) FROM druid_table
WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10
@@ -483,65 +278,24 @@ WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyi
AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(MONTH from `__time`)
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Tez
-#### A masked pattern was here ####
- Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: druid_table
- filterExpr: (character_length(UDFToString(ctinyint)) < 10) (type: boolean)
- properties:
- druid.fieldNames vc,ctinyint
- druid.fieldTypes timestamp with local time zone,tinyint
- druid.query.json {"queryType":"scan","dataSource":"default.druid_table","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) > 1)"},{"type":"expression","expression":"((pow(\"cfloat\",2) * pow(\"csmallint\",3)) > 1)"},{"type":"expression","expression":"(sqrt(abs(\"ctinyint\")) > 3)"}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc","ctinyint"],"resultFormat":"compactedList"}
- druid.query.type scan
- Statistics: Num rows: 9173 Data size: 383504 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (character_length(UDFToString(ctinyint)) < 10) (type: boolean)
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: month(vc) (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: _col0 (type: int)
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Reducer 2
- Reduce Operator Tree:
- Group By Operator
- keys: KEY._col0 (type: int)
- mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- ListSink
+ TableScan
+ alias: druid_table
+ properties:
+ druid.fieldNames extract_month
+ druid.fieldTypes int
+ druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table","granularity":"all","dimensions":[{"type":"extraction","dimension":"__time","outputName":"extract_month","extractionFn":{"type":"timeFormat","format":"M","timeZone":"US/Pacific","locale":"en-US"}}],"limitSpec":{"type":"default"},"filter":{"type":"and","fields":[{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) > 1)"},{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) < 10)"},{"type":"expression","expression":"((pow(\"cfloat\",2) * pow(\"csmallint\",3)) > 1)"},{"type":"expression","expression":"(sqrt(abs(\"ctinyint\")) > 3)"}]},"aggregations":[],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]}
+ druid.query.type groupBy
+ Select Operator
+ expressions: extract_month (type: int)
+ outputColumnNames: _col0
+ ListSink
PREHOOK: query: SELECT EXTRACT(QUARTER from `__time`) FROM druid_table
WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10
@@ -565,65 +319,24 @@ WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyi
AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(QUARTER from `__time`)
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Tez
-#### A masked pattern was here ####
- Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: druid_table
- filterExpr: (character_length(UDFToString(ctinyint)) < 10) (type: boolean)
- properties:
- druid.fieldNames vc,ctinyint
- druid.fieldTypes timestamp with local time zone,tinyint
- druid.query.json {"queryType":"scan","dataSource":"default.druid_table","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) > 1)"},{"type":"expression","expression":"((pow(\"cfloat\",2) * pow(\"csmallint\",3)) > 1)"},{"type":"expression","expression":"(sqrt(abs(\"ctinyint\")) > 3)"}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc","ctinyint"],"resultFormat":"compactedList"}
- druid.query.type scan
- Statistics: Num rows: 9173 Data size: 383504 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (character_length(UDFToString(ctinyint)) < 10) (type: boolean)
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: quarter(vc) (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: _col0 (type: int)
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Reducer 2
- Reduce Operator Tree:
- Group By Operator
- keys: KEY._col0 (type: int)
- mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- ListSink
+ TableScan
+ alias: druid_table
+ properties:
+ druid.fieldNames vc
+ druid.fieldTypes int
+ druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table","granularity":"all","dimensions":[{"type":"default","dimension":"vc","outputName":"vc","outputType":"LONG"}],"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_extract(\"__time\",'QUARTER','US/Pacific')","outputType":"LONG"}],"limitSpec":{"type":"default"},"filter":{"type":"and","fields":[{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) > 1)"},{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) < 10)"},{"type":"expression","expression":"((pow(\"cfloat\",2) * pow(\"csmallint\",3)) > 1)"},{"type":"expression","expression":"(sqrt(abs(\"ctinyint\")) > 3)"}]},"aggregations":[],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]}
+ druid.query.type groupBy
+ Select Operator
+ expressions: vc (type: int)
+ outputColumnNames: _col0
+ ListSink
PREHOOK: query: SELECT EXTRACT(YEAR from `__time`) FROM druid_table
WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10
@@ -647,65 +360,24 @@ WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyi
AND power(cfloat, 2) * pow(csmallint, 3) > 1 AND SQRT(ABS(ctinyint)) > 3 GROUP BY EXTRACT(YEAR from `__time`)
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Tez
-#### A masked pattern was here ####
- Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: druid_table
- filterExpr: (character_length(UDFToString(ctinyint)) < 10) (type: boolean)
- properties:
- druid.fieldNames vc,ctinyint
- druid.fieldTypes timestamp with local time zone,tinyint
- druid.query.json {"queryType":"scan","dataSource":"default.druid_table","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) > 1)"},{"type":"expression","expression":"((pow(\"cfloat\",2) * pow(\"csmallint\",3)) > 1)"},{"type":"expression","expression":"(sqrt(abs(\"ctinyint\")) > 3)"}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc","ctinyint"],"resultFormat":"compactedList"}
- druid.query.type scan
- Statistics: Num rows: 9173 Data size: 383504 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (character_length(UDFToString(ctinyint)) < 10) (type: boolean)
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: year(vc) (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: _col0 (type: int)
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 3057 Data size: 127806 Basic stats: COMPLETE Column stats: NONE
- Reducer 2
- Reduce Operator Tree:
- Group By Operator
- keys: KEY._col0 (type: int)
- mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1528 Data size: 63882 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- ListSink
+ TableScan
+ alias: druid_table
+ properties:
+ druid.fieldNames extract_year
+ druid.fieldTypes int
+ druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table","granularity":"all","dimensions":[{"type":"extraction","dimension":"__time","outputName":"extract_year","extractionFn":{"type":"timeFormat","format":"yyyy","timeZone":"US/Pacific","locale":"en-US"}}],"limitSpec":{"type":"default"},"filter":{"type":"and","fields":[{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) > 1)"},{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) < 10)"},{"type":"expression","expression":"((pow(\"cfloat\",2) * pow(\"csmallint\",3)) > 1)"},{"type":"expression","expression":"(sqrt(abs(\"ctinyint\")) > 3)"}]},"aggregations":[],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]}
+ druid.query.type groupBy
+ Select Operator
+ expressions: extract_year (type: int)
+ outputColumnNames: _col0
+ ListSink
PREHOOK: query: EXPLAIN SELECT EXTRACT(SECOND from `__time`) FROM druid_table WHERE EXTRACT(SECOND from `__time`) = 0 LIMIT 1
PREHOOK: type: QUERY
@@ -792,24 +464,19 @@ STAGE DEPENDENCIES:
STAGE PLANS:
Stage: Stage-0
Fetch Operator
- limit: 1
+ limit: -1
Processor Tree:
TableScan
alias: druid_table
- filterExpr: (character_length(UDFToString(ctinyint)) < 10) (type: boolean)
properties:
- druid.fieldNames vc,ctinyint
- druid.fieldTypes timestamp with local time zone,tinyint
- druid.query.json {"queryType":"scan","dataSource":"default.druid_table","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) > 1)"},{"type":"expression","expression":"((pow(\"cfloat\",2) * pow(\"csmallint\",3)) > 1)"},{"type":"expression","expression":"(sqrt(abs(\"ctinyint\")) > 3)"}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc","ctinyint"],"resultFormat":"compactedList"}
+ druid.fieldNames vc
+ druid.fieldTypes int
+ druid.query.json {"queryType":"scan","dataSource":"default.druid_table","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) > 1)"},{"type":"expression","expression":"(strlen(CAST(\"ctinyint\", 'STRING')) < 10)"},{"type":"expression","expression":"((pow(\"cfloat\",2) * pow(\"csmallint\",3)) > 1)"},{"type":"expression","expression":"(sqrt(abs(\"ctinyint\")) > 3)"}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_extract(\"__time\",'HOUR','US/Pacific')","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList","limit":1}
druid.query.type scan
- Filter Operator
- predicate: (character_length(UDFToString(ctinyint)) < 10) (type: boolean)
- Select Operator
- expressions: hour(vc) (type: int)
- outputColumnNames: _col0
- Limit
- Number of rows: 1
- ListSink
+ Select Operator
+ expressions: vc (type: int)
+ outputColumnNames: _col0
+ ListSink
PREHOOK: query: SELECT EXTRACT(HOUR from `__time`) FROM druid_table
WHERE character_length(CAST(ctinyint AS STRING)) > 1 AND char_length(CAST(ctinyint AS STRING)) < 10