You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2018/06/13 13:08:36 UTC
hive git commit: HIVE-19604 : Incorrect Handling of Boolean in
DruidSerde (Nishant Bangarwa via Ashutosh Chauhan)
Repository: hive
Updated Branches:
refs/heads/master 5e3b2e753 -> 16f57220f
HIVE-19604 : Incorrect Handling of Boolean in DruidSerde (Nishant Bangarwa via Ashutosh Chauhan)
Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/16f57220
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/16f57220
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/16f57220
Branch: refs/heads/master
Commit: 16f57220ff281a60bbfdaf6ee5a148efad48c229
Parents: 5e3b2e7
Author: Nishant Bangarwa <ni...@gmail.com>
Authored: Wed Jun 13 08:07:40 2018 -0500
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Wed Jun 13 08:07:40 2018 -0500
----------------------------------------------------------------------
.../hadoop/hive/druid/serde/DruidSerDe.java | 6 ++-
.../hadoop/hive/druid/serde/TestDruidSerDe.java | 23 ++++-----
.../clientpositive/druidmini_expressions.q | 12 ++++-
.../druid/druidmini_expressions.q.out | 51 ++++++++++++++++++++
4 files changed, 78 insertions(+), 14 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/16f57220/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDe.java
----------------------------------------------------------------------
diff --git a/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDe.java b/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDe.java
index 842a9fa..a9e7837 100644
--- a/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDe.java
+++ b/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDe.java
@@ -462,7 +462,11 @@ import static org.joda.time.format.ISODateTimeFormat.dateOptionalTimeParser;
output.add(new Text(value.toString()));
break;
case BOOLEAN:
- output.add(new BooleanWritable(Boolean.valueOf(value.toString())));
+ if (value instanceof Number) {
+ output.add(new BooleanWritable(((Number) value).intValue() != 0));
+ } else {
+ output.add(new BooleanWritable(Boolean.valueOf(value.toString())));
+ }
break;
default:
throw new SerDeException("Unknown type: " + types[i].getPrimitiveCategory());
http://git-wip-us.apache.org/repos/asf/hive/blob/16f57220/druid-handler/src/test/org/apache/hadoop/hive/druid/serde/TestDruidSerDe.java
----------------------------------------------------------------------
diff --git a/druid-handler/src/test/org/apache/hadoop/hive/druid/serde/TestDruidSerDe.java b/druid-handler/src/test/org/apache/hadoop/hive/druid/serde/TestDruidSerDe.java
index e45de0f..922c1db 100644
--- a/druid-handler/src/test/org/apache/hadoop/hive/druid/serde/TestDruidSerDe.java
+++ b/druid-handler/src/test/org/apache/hadoop/hive/druid/serde/TestDruidSerDe.java
@@ -74,6 +74,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.io.BooleanWritable;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
@@ -462,7 +463,7 @@ public class TestDruidSerDe {
+ " \"offset\" : 0, "
+ " \"event\" : { "
+ " \"timestamp\" : \"2013-01-01T00:00:00.000Z\", "
- + " \"robot\" : \"1\", "
+ + " \"robot\" : 1, "
+ " \"namespace\" : \"article\", "
+ " \"anonymous\" : \"0\", "
+ " \"unpatrolled\" : \"0\", "
@@ -481,7 +482,7 @@ public class TestDruidSerDe {
+ " \"offset\" : 1, "
+ " \"event\" : { "
+ " \"timestamp\" : \"2013-01-01T00:00:00.000Z\", "
- + " \"robot\" : \"0\", "
+ + " \"robot\" : 0, "
+ " \"namespace\" : \"article\", "
+ " \"anonymous\" : \"0\", "
+ " \"unpatrolled\" : \"0\", "
@@ -500,7 +501,7 @@ public class TestDruidSerDe {
+ " \"offset\" : 2, "
+ " \"event\" : { "
+ " \"timestamp\" : \"2013-01-01T00:00:12.000Z\", "
- + " \"robot\" : \"0\", "
+ + " \"robot\" : 0, "
+ " \"namespace\" : \"article\", "
+ " \"anonymous\" : \"0\", "
+ " \"unpatrolled\" : \"0\", "
@@ -519,7 +520,7 @@ public class TestDruidSerDe {
+ " \"offset\" : 3, "
+ " \"event\" : { "
+ " \"timestamp\" : \"2013-01-01T00:00:12.000Z\", "
- + " \"robot\" : \"0\", "
+ + " \"robot\" : 0, "
+ " \"namespace\" : \"article\", "
+ " \"anonymous\" : \"0\", "
+ " \"unpatrolled\" : \"0\", "
@@ -538,7 +539,7 @@ public class TestDruidSerDe {
+ " \"offset\" : 4, "
+ " \"event\" : { "
+ " \"timestamp\" : \"2013-01-01T00:00:12.000Z\", "
- + " \"robot\" : \"0\", "
+ + " \"robot\" : 0, "
+ " \"namespace\" : \"article\", "
+ " \"anonymous\" : \"0\", "
+ " \"unpatrolled\" : \"0\", "
@@ -556,30 +557,30 @@ public class TestDruidSerDe {
// Select query results as records (types defined by metastore)
private static final String SELECT_COLUMN_NAMES = "__time,robot,namespace,anonymous,unpatrolled,page,language,newpage,user,count,added,delta,variation,deleted";
- private static final String SELECT_COLUMN_TYPES = "timestamp with local time zone,string,string,string,string,string,string,string,string,double,double,float,float,float";
+ private static final String SELECT_COLUMN_TYPES = "timestamp with local time zone,boolean,string,string,string,string,string,string,string,double,double,float,float,float";
private static final Object[][] SELECT_QUERY_RESULTS_RECORDS = new Object[][] {
- new Object[] { new TimestampLocalTZWritable(new TimestampTZ(Instant.ofEpochMilli(1356998400000L).atZone(ZoneOffset.UTC))), new Text("1"),
+ new Object[] { new TimestampLocalTZWritable(new TimestampTZ(Instant.ofEpochMilli(1356998400000L).atZone(ZoneOffset.UTC))), new BooleanWritable(true),
new Text("article"), new Text("0"), new Text("0"),
new Text("11._korpus_(NOVJ)"), new Text("sl"), new Text("0"),
new Text("EmausBot"),
new DoubleWritable(1.0d), new DoubleWritable(39.0d), new FloatWritable(39.0F),
new FloatWritable(39.0F), new FloatWritable(0.0F) },
- new Object[] { new TimestampLocalTZWritable(new TimestampTZ(Instant.ofEpochMilli(1356998400000L).atZone(ZoneOffset.UTC))), new Text("0"),
+ new Object[] { new TimestampLocalTZWritable(new TimestampTZ(Instant.ofEpochMilli(1356998400000L).atZone(ZoneOffset.UTC))), new BooleanWritable(false),
new Text("article"), new Text("0"), new Text("0"),
new Text("112_U.S._580"), new Text("en"), new Text("1"), new Text("MZMcBride"),
new DoubleWritable(1.0d), new DoubleWritable(70.0d), new FloatWritable(70.0F),
new FloatWritable(70.0F), new FloatWritable(0.0F) },
- new Object[] { new TimestampLocalTZWritable(new TimestampTZ(Instant.ofEpochMilli(1356998412000L).atZone(ZoneOffset.UTC))), new Text("0"),
+ new Object[] { new TimestampLocalTZWritable(new TimestampTZ(Instant.ofEpochMilli(1356998412000L).atZone(ZoneOffset.UTC))), new BooleanWritable(false),
new Text("article"), new Text("0"), new Text("0"),
new Text("113_U.S._243"), new Text("en"), new Text("1"), new Text("MZMcBride"),
new DoubleWritable(1.0d), new DoubleWritable(77.0d), new FloatWritable(77.0F),
new FloatWritable(77.0F), new FloatWritable(0.0F) },
- new Object[] { new TimestampLocalTZWritable(new TimestampTZ(Instant.ofEpochMilli(1356998412000L).atZone(ZoneOffset.UTC))), new Text("0"),
+ new Object[] { new TimestampLocalTZWritable(new TimestampTZ(Instant.ofEpochMilli(1356998412000L).atZone(ZoneOffset.UTC))), new BooleanWritable(false),
new Text("article"), new Text("0"), new Text("0"),
new Text("113_U.S._73"), new Text("en"), new Text("1"), new Text("MZMcBride"),
new DoubleWritable(1.0d), new DoubleWritable(70.0d), new FloatWritable(70.0F),
new FloatWritable(70.0F), new FloatWritable(0.0F) },
- new Object[] { new TimestampLocalTZWritable(new TimestampTZ(Instant.ofEpochMilli(1356998412000L).atZone(ZoneOffset.UTC))), new Text("0"),
+ new Object[] { new TimestampLocalTZWritable(new TimestampTZ(Instant.ofEpochMilli(1356998412000L).atZone(ZoneOffset.UTC))), new BooleanWritable(false),
new Text("article"), new Text("0"), new Text("0"),
new Text("113_U.S._756"), new Text("en"), new Text("1"), new Text("MZMcBride"),
new DoubleWritable(1.0d), new DoubleWritable(68.0d), new FloatWritable(68.0F),
http://git-wip-us.apache.org/repos/asf/hive/blob/16f57220/ql/src/test/queries/clientpositive/druidmini_expressions.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/druidmini_expressions.q b/ql/src/test/queries/clientpositive/druidmini_expressions.q
index d88b281..f28b391 100644
--- a/ql/src/test/queries/clientpositive/druidmini_expressions.q
+++ b/ql/src/test/queries/clientpositive/druidmini_expressions.q
@@ -117,5 +117,13 @@ GROUP BY CAST(TRUNC(CAST(`druid_table_alias`.`__time` AS TIMESTAMP),'MM') AS DAT
explain SELECT DATE_ADD(cast(`__time` as date), CAST((cdouble / 1000) AS INT)) as date_1, DATE_SUB(cast(`__time` as date), CAST((cdouble / 1000) AS INT)) as date_2 from druid_table_n0 order by date_1, date_2 limit 3;
SELECT DATE_ADD(cast(`__time` as date), CAST((cdouble / 1000) AS INT)) as date_1, DATE_SUB(cast(`__time` as date), CAST((cdouble / 1000) AS INT)) as date_2 from druid_table_n0 order by date_1, date_2 limit 3;
-
-DROP TABLE druid_table_n0;
\ No newline at end of file
+ -- Boolean Values
+ SELECT cboolean2, count(*) from druid_table_n0 GROUP BY cboolean2;
+
+ -- Expected results of this query are wrong due to https://issues.apache.org/jira/browse/CALCITE-2319
+ -- It should get fixed once we upgrade calcite
+ SELECT ctinyint > 2, count(*) from druid_table_n0 GROUP BY ctinyint > 2;
+
+ EXPLAIN SELECT ctinyint > 2, count(*) from druid_table_n0 GROUP BY ctinyint > 2;
+
+DROP TABLE druid_table_n0;
http://git-wip-us.apache.org/repos/asf/hive/blob/16f57220/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out b/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out
index c1e54ea..8ccdeaa 100644
--- a/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out
+++ b/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out
@@ -1246,6 +1246,57 @@ POSTHOOK: Output: hdfs://### HDFS PATH ###
1969-02-26 1970-11-04
1969-03-19 1970-10-14
1969-11-13 1970-02-17
+PREHOOK: query: -- Boolean Values
+ SELECT cboolean2, count(*) from druid_table_n0 GROUP BY cboolean2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@druid_table_n0
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: -- Boolean Values
+ SELECT cboolean2, count(*) from druid_table_n0 GROUP BY cboolean2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@druid_table_n0
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+NULL 8
+false 3140
+true 2957
+PREHOOK: query: -- Expected results of this query are wrong due to https://issues.apache.org/jira/browse/CALCITE-2319
+ -- It should get fixed once we upgrade calcite
+ SELECT ctinyint > 2, count(*) from druid_table_n0 GROUP BY ctinyint > 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@druid_table_n0
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: -- Expected results of this query are wrong due to https://issues.apache.org/jira/browse/CALCITE-2319
+ -- It should get fixed once we upgrade calcite
+ SELECT ctinyint > 2, count(*) from druid_table_n0 GROUP BY ctinyint > 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@druid_table_n0
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+false 2653
+false 3452
+PREHOOK: query: EXPLAIN SELECT ctinyint > 2, count(*) from druid_table_n0 GROUP BY ctinyint > 2
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT ctinyint > 2, count(*) from druid_table_n0 GROUP BY ctinyint > 2
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: druid_table_n0
+ properties:
+ druid.fieldNames vc,$f1
+ druid.fieldTypes boolean,bigint
+ druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table_n0","granularity":"all","dimensions":[{"type":"default","dimension":"vc","outputName":"vc","outputType":"STRING"}],"virtualColumns":[{"type":"expression","name":"vc","expression":"(\"ctinyint\" > 2)","outputType":"FLOAT"}],"limitSpec":{"type":"default"},"aggregations":[{"type":"count","name":"$f1"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]}
+ druid.query.type groupBy
+ Select Operator
+ expressions: vc (type: boolean), $f1 (type: bigint)
+ outputColumnNames: _col0, _col1
+ ListSink
+
PREHOOK: query: DROP TABLE druid_table_n0
PREHOOK: type: DROPTABLE
PREHOOK: Input: default@druid_table_n0