You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2017/02/05 08:36:02 UTC
hive git commit: HIVE-15804: Druid handler might not infer correct
column type when CBO fails (Jesus Camacho Rodriguez,
reviewed by Ashutosh Chauhan)
Repository: hive
Updated Branches:
refs/heads/master 79f6110bd -> bc8307eac
HIVE-15804: Druid handler might not infer correct column type when CBO fails (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/bc8307ea
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/bc8307ea
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/bc8307ea
Branch: refs/heads/master
Commit: bc8307eacd291368b9822c1820a047febdb76952
Parents: 79f6110
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Sat Feb 4 07:19:55 2017 +0000
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Sun Feb 5 08:34:23 2017 +0000
----------------------------------------------------------------------
.../hadoop/hive/druid/serde/DruidSerDe.java | 21 +
.../test/queries/clientpositive/druid_basic2.q | 16 +
.../results/clientpositive/druid_basic2.q.out | 428 +++++++++++++++++++
3 files changed, 465 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/bc8307ea/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDe.java
----------------------------------------------------------------------
diff --git a/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDe.java b/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDe.java
index ea84326..65259b0 100644
--- a/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDe.java
+++ b/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDe.java
@@ -29,6 +29,7 @@ import java.util.Properties;
import org.apache.calcite.adapter.druid.DruidTable;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.conf.Constants;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.druid.DruidStorageHandlerUtils;
@@ -38,6 +39,7 @@ import org.apache.hadoop.hive.serde2.AbstractSerDe;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.SerDeSpec;
import org.apache.hadoop.hive.serde2.SerDeStats;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
import org.apache.hadoop.hive.serde2.io.TimestampWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
@@ -55,8 +57,12 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspe
import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.io.ByteWritable;
+import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.FloatWritable;
+import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.ShortWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.util.StringUtils;
@@ -460,12 +466,27 @@ public class DruidSerDe extends AbstractSerDe {
case TIMESTAMP:
output.add(new TimestampWritable(new Timestamp((Long) value)));
break;
+ case BYTE:
+ output.add(new ByteWritable(((Number) value).byteValue()));
+ break;
+ case SHORT:
+ output.add(new ShortWritable(((Number) value).shortValue()));
+ break;
+ case INT:
+ output.add(new IntWritable(((Number) value).intValue()));
+ break;
case LONG:
output.add(new LongWritable(((Number) value).longValue()));
break;
case FLOAT:
output.add(new FloatWritable(((Number) value).floatValue()));
break;
+ case DOUBLE:
+ output.add(new DoubleWritable(((Number) value).doubleValue()));
+ break;
+ case DECIMAL:
+ output.add(new HiveDecimalWritable(HiveDecimal.create(((Number) value).doubleValue())));
+ break;
case STRING:
output.add(new Text(value.toString()));
break;
http://git-wip-us.apache.org/repos/asf/hive/blob/bc8307ea/ql/src/test/queries/clientpositive/druid_basic2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/druid_basic2.q b/ql/src/test/queries/clientpositive/druid_basic2.q
index fe24410..d70f3c3 100644
--- a/ql/src/test/queries/clientpositive/druid_basic2.q
+++ b/ql/src/test/queries/clientpositive/druid_basic2.q
@@ -50,3 +50,19 @@ FROM
FROM druid_table_1) b
ON a.language = b.language
);
+
+EXPLAIN EXTENDED
+SELECT robot, floor_day(`__time`), max(added) as m, sum(delta) as s
+FROM druid_table_1
+GROUP BY robot, language, floor_day(`__time`)
+ORDER BY CAST(robot AS INTEGER) ASC, m DESC
+LIMIT 10;
+
+-- No CBO test: it should work
+set hive.cbo.enable=false;
+EXPLAIN EXTENDED
+SELECT robot, floor_day(`__time`), max(added) as m, sum(delta) as s
+FROM druid_table_1
+GROUP BY robot, language, floor_day(`__time`)
+ORDER BY CAST(robot AS INTEGER) ASC, m DESC
+LIMIT 10;
http://git-wip-us.apache.org/repos/asf/hive/blob/bc8307ea/ql/src/test/results/clientpositive/druid_basic2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/druid_basic2.q.out b/ql/src/test/results/clientpositive/druid_basic2.q.out
index 74dccf0..bc9410b 100644
--- a/ql/src/test/results/clientpositive/druid_basic2.q.out
+++ b/ql/src/test/results/clientpositive/druid_basic2.q.out
@@ -529,3 +529,431 @@ STAGE PLANS:
Processor Tree:
ListSink
+PREHOOK: query: EXPLAIN EXTENDED
+SELECT robot, floor_day(`__time`), max(added) as m, sum(delta) as s
+FROM druid_table_1
+GROUP BY robot, language, floor_day(`__time`)
+ORDER BY CAST(robot AS INTEGER) ASC, m DESC
+LIMIT 10
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN EXTENDED
+SELECT robot, floor_day(`__time`), max(added) as m, sum(delta) as s
+FROM druid_table_1
+GROUP BY robot, language, floor_day(`__time`)
+ORDER BY CAST(robot AS INTEGER) ASC, m DESC
+LIMIT 10
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: druid_table_1
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ GatherStats: false
+ Select Operator
+ expressions: robot (type: string), language (type: string), __time (type: timestamp), added (type: float), delta (type: float)
+ outputColumnNames: robot, language, __time, added, delta
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Group By Operator
+ aggregations: max(added), sum(delta)
+ keys: robot (type: string), language (type: string), floor_day(__time) (type: timestamp)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: timestamp)
+ null sort order: aaa
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: timestamp)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ tag: -1
+ value expressions: _col3 (type: float), _col4 (type: double)
+ auto parallelism: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: druid_table_1
+ input format: org.apache.hadoop.hive.druid.io.DruidQueryBasedInputFormat
+ output format: org.apache.hadoop.hive.druid.io.DruidOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ EXTERNAL TRUE
+ bucket_count -1
+ column.name.delimiter ,
+ columns __time,robot,namespace,anonymous,unpatrolled,page,language,newpage,user,count,added,delta,variation,deleted
+ columns.comments 'from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer'
+ columns.types timestamp:string:string:string:string:string:string:string:string:float:float:float:float:float
+ druid.datasource wikipedia
+#### A masked pattern was here ####
+ name default.druid_table_1
+ numFiles 0
+ numRows 0
+ rawDataSize 0
+ serialization.ddl struct druid_table_1 { timestamp __time, string robot, string namespace, string anonymous, string unpatrolled, string page, string language, string newpage, string user, float count, float added, float delta, float variation, float deleted}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.druid.QTestDruidSerDe
+ storage_handler org.apache.hadoop.hive.druid.QTestDruidStorageHandler
+ totalSize 0
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.druid.QTestDruidSerDe
+
+ input format: org.apache.hadoop.hive.druid.io.DruidQueryBasedInputFormat
+ output format: org.apache.hadoop.hive.druid.io.DruidOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ EXTERNAL TRUE
+ bucket_count -1
+ column.name.delimiter ,
+ columns __time,robot,namespace,anonymous,unpatrolled,page,language,newpage,user,count,added,delta,variation,deleted
+ columns.comments 'from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer'
+ columns.types timestamp:string:string:string:string:string:string:string:string:float:float:float:float:float
+ druid.datasource wikipedia
+#### A masked pattern was here ####
+ name default.druid_table_1
+ numFiles 0
+ numRows 0
+ rawDataSize 0
+ serialization.ddl struct druid_table_1 { timestamp __time, string robot, string namespace, string anonymous, string unpatrolled, string page, string language, string newpage, string user, float count, float added, float delta, float variation, float deleted}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.druid.QTestDruidSerDe
+ storage_handler org.apache.hadoop.hive.druid.QTestDruidStorageHandler
+ totalSize 0
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.druid.QTestDruidSerDe
+ name: default.druid_table_1
+ name: default.druid_table_1
+ Truncated Path -> Alias:
+ /druid_table_1 [druid_table_1]
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: max(VALUE._col0), sum(VALUE._col1)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: timestamp)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col2 (type: timestamp), _col3 (type: float), _col4 (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ column.name.delimiter ,
+ columns _col0,_col1,_col2,_col3
+ columns.types string,timestamp,float,double
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ GatherStats: false
+ Reduce Output Operator
+ key expressions: UDFToInteger(_col0) (type: int), _col2 (type: float)
+ null sort order: az
+ sort order: +-
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ tag: -1
+ TopN: 10
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col0 (type: string), _col1 (type: timestamp), _col3 (type: double)
+ auto parallelism: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: -mr-10004
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ column.name.delimiter ,
+ columns _col0,_col1,_col2,_col3
+ columns.types string,timestamp,float,double
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ column.name.delimiter ,
+ columns _col0,_col1,_col2,_col3
+ columns.types string,timestamp,float,double
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Truncated Path -> Alias:
+#### A masked pattern was here ####
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string), VALUE._col1 (type: timestamp), KEY.reducesinkkey1 (type: float), VALUE._col2 (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Limit
+ Number of rows: 10
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3
+ columns.types string:timestamp:float:double
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 10
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: EXPLAIN EXTENDED
+SELECT robot, floor_day(`__time`), max(added) as m, sum(delta) as s
+FROM druid_table_1
+GROUP BY robot, language, floor_day(`__time`)
+ORDER BY CAST(robot AS INTEGER) ASC, m DESC
+LIMIT 10
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN EXTENDED
+SELECT robot, floor_day(`__time`), max(added) as m, sum(delta) as s
+FROM druid_table_1
+GROUP BY robot, language, floor_day(`__time`)
+ORDER BY CAST(robot AS INTEGER) ASC, m DESC
+LIMIT 10
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: druid_table_1
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ GatherStats: false
+ Select Operator
+ expressions: robot (type: string), language (type: string), __time (type: timestamp), added (type: float), delta (type: float)
+ outputColumnNames: robot, language, __time, added, delta
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Group By Operator
+ aggregations: max(added), sum(delta)
+ keys: robot (type: string), language (type: string), floor_day(__time) (type: timestamp)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: timestamp)
+ null sort order: aaa
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: timestamp)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ tag: -1
+ value expressions: _col3 (type: float), _col4 (type: double)
+ auto parallelism: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: druid_table_1
+ input format: org.apache.hadoop.hive.druid.io.DruidQueryBasedInputFormat
+ output format: org.apache.hadoop.hive.druid.io.DruidOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ EXTERNAL TRUE
+ bucket_count -1
+ column.name.delimiter ,
+ columns __time,robot,namespace,anonymous,unpatrolled,page,language,newpage,user,count,added,delta,variation,deleted
+ columns.comments 'from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer'
+ columns.types timestamp:string:string:string:string:string:string:string:string:float:float:float:float:float
+ druid.datasource wikipedia
+#### A masked pattern was here ####
+ name default.druid_table_1
+ numFiles 0
+ numRows 0
+ rawDataSize 0
+ serialization.ddl struct druid_table_1 { timestamp __time, string robot, string namespace, string anonymous, string unpatrolled, string page, string language, string newpage, string user, float count, float added, float delta, float variation, float deleted}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.druid.QTestDruidSerDe
+ storage_handler org.apache.hadoop.hive.druid.QTestDruidStorageHandler
+ totalSize 0
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.druid.QTestDruidSerDe
+
+ input format: org.apache.hadoop.hive.druid.io.DruidQueryBasedInputFormat
+ output format: org.apache.hadoop.hive.druid.io.DruidOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ EXTERNAL TRUE
+ bucket_count -1
+ column.name.delimiter ,
+ columns __time,robot,namespace,anonymous,unpatrolled,page,language,newpage,user,count,added,delta,variation,deleted
+ columns.comments 'from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer','from deserializer'
+ columns.types timestamp:string:string:string:string:string:string:string:string:float:float:float:float:float
+ druid.datasource wikipedia
+#### A masked pattern was here ####
+ name default.druid_table_1
+ numFiles 0
+ numRows 0
+ rawDataSize 0
+ serialization.ddl struct druid_table_1 { timestamp __time, string robot, string namespace, string anonymous, string unpatrolled, string page, string language, string newpage, string user, float count, float added, float delta, float variation, float deleted}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.druid.QTestDruidSerDe
+ storage_handler org.apache.hadoop.hive.druid.QTestDruidStorageHandler
+ totalSize 0
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.druid.QTestDruidSerDe
+ name: default.druid_table_1
+ name: default.druid_table_1
+ Truncated Path -> Alias:
+ /druid_table_1 [druid_table_1]
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: max(VALUE._col0), sum(VALUE._col1)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: timestamp)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col2 (type: timestamp), _col3 (type: float), _col4 (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ column.name.delimiter ,
+ columns _col0,_col1,_col2,_col3
+ columns.types string,timestamp,float,double
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ GatherStats: false
+ Reduce Output Operator
+ key expressions: UDFToInteger(_col0) (type: int), _col2 (type: float)
+ null sort order: az
+ sort order: +-
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ tag: -1
+ TopN: 10
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col0 (type: string), _col1 (type: timestamp), _col3 (type: double)
+ auto parallelism: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: -mr-10003
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ column.name.delimiter ,
+ columns _col0,_col1,_col2,_col3
+ columns.types string,timestamp,float,double
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ column.name.delimiter ,
+ columns _col0,_col1,_col2,_col3
+ columns.types string,timestamp,float,double
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Truncated Path -> Alias:
+#### A masked pattern was here ####
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string), VALUE._col1 (type: timestamp), KEY.reducesinkkey1 (type: float), VALUE._col2 (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Limit
+ Number of rows: 10
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3
+ columns.types string:timestamp:float:double
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 10
+ Processor Tree:
+ ListSink
+