You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2018/06/13 13:08:36 UTC

hive git commit: HIVE-19604 : Incorrect Handling of Boolean in DruidSerde (Nishant Bangarwa via Ashutosh Chauhan)

Repository: hive
Updated Branches:
  refs/heads/master 5e3b2e753 -> 16f57220f


HIVE-19604 : Incorrect Handling of Boolean in DruidSerde (Nishant Bangarwa via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan <ha...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/16f57220
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/16f57220
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/16f57220

Branch: refs/heads/master
Commit: 16f57220ff281a60bbfdaf6ee5a148efad48c229
Parents: 5e3b2e7
Author: Nishant Bangarwa <ni...@gmail.com>
Authored: Wed Jun 13 08:07:40 2018 -0500
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Wed Jun 13 08:07:40 2018 -0500

----------------------------------------------------------------------
 .../hadoop/hive/druid/serde/DruidSerDe.java     |  6 ++-
 .../hadoop/hive/druid/serde/TestDruidSerDe.java | 23 ++++-----
 .../clientpositive/druidmini_expressions.q      | 12 ++++-
 .../druid/druidmini_expressions.q.out           | 51 ++++++++++++++++++++
 4 files changed, 78 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/16f57220/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDe.java
----------------------------------------------------------------------
diff --git a/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDe.java b/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDe.java
index 842a9fa..a9e7837 100644
--- a/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDe.java
+++ b/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDe.java
@@ -462,7 +462,11 @@ import static org.joda.time.format.ISODateTimeFormat.dateOptionalTimeParser;
         output.add(new Text(value.toString()));
         break;
       case BOOLEAN:
-        output.add(new BooleanWritable(Boolean.valueOf(value.toString())));
+        if (value instanceof Number) {
+          output.add(new BooleanWritable(((Number) value).intValue() != 0));
+        } else {
+          output.add(new BooleanWritable(Boolean.valueOf(value.toString())));
+        }
         break;
       default:
         throw new SerDeException("Unknown type: " + types[i].getPrimitiveCategory());

http://git-wip-us.apache.org/repos/asf/hive/blob/16f57220/druid-handler/src/test/org/apache/hadoop/hive/druid/serde/TestDruidSerDe.java
----------------------------------------------------------------------
diff --git a/druid-handler/src/test/org/apache/hadoop/hive/druid/serde/TestDruidSerDe.java b/druid-handler/src/test/org/apache/hadoop/hive/druid/serde/TestDruidSerDe.java
index e45de0f..922c1db 100644
--- a/druid-handler/src/test/org/apache/hadoop/hive/druid/serde/TestDruidSerDe.java
+++ b/druid-handler/src/test/org/apache/hadoop/hive/druid/serde/TestDruidSerDe.java
@@ -74,6 +74,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
 import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.io.BooleanWritable;
 import org.apache.hadoop.io.FloatWritable;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.LongWritable;
@@ -462,7 +463,7 @@ public class TestDruidSerDe {
                   + "    \"offset\" : 0,  "
                   + "    \"event\" : {   "
                   + "     \"timestamp\" : \"2013-01-01T00:00:00.000Z\",   "
-                  + "     \"robot\" : \"1\",   "
+                  + "     \"robot\" : 1,   "
                   + "     \"namespace\" : \"article\",   "
                   + "     \"anonymous\" : \"0\",   "
                   + "     \"unpatrolled\" : \"0\",   "
@@ -481,7 +482,7 @@ public class TestDruidSerDe {
                   + "    \"offset\" : 1,  "
                   + "    \"event\" : {   "
                   + "     \"timestamp\" : \"2013-01-01T00:00:00.000Z\",   "
-                  + "     \"robot\" : \"0\",   "
+                  + "     \"robot\" : 0,   "
                   + "     \"namespace\" : \"article\",   "
                   + "     \"anonymous\" : \"0\",   "
                   + "     \"unpatrolled\" : \"0\",   "
@@ -500,7 +501,7 @@ public class TestDruidSerDe {
                   + "    \"offset\" : 2,  "
                   + "    \"event\" : {   "
                   + "     \"timestamp\" : \"2013-01-01T00:00:12.000Z\",   "
-                  + "     \"robot\" : \"0\",   "
+                  + "     \"robot\" : 0,   "
                   + "     \"namespace\" : \"article\",   "
                   + "     \"anonymous\" : \"0\",   "
                   + "     \"unpatrolled\" : \"0\",   "
@@ -519,7 +520,7 @@ public class TestDruidSerDe {
                   + "    \"offset\" : 3,  "
                   + "    \"event\" : {   "
                   + "     \"timestamp\" : \"2013-01-01T00:00:12.000Z\",   "
-                  + "     \"robot\" : \"0\",   "
+                  + "     \"robot\" : 0,   "
                   + "     \"namespace\" : \"article\",   "
                   + "     \"anonymous\" : \"0\",   "
                   + "     \"unpatrolled\" : \"0\",   "
@@ -538,7 +539,7 @@ public class TestDruidSerDe {
                   + "    \"offset\" : 4,  "
                   + "    \"event\" : {   "
                   + "     \"timestamp\" : \"2013-01-01T00:00:12.000Z\",   "
-                  + "     \"robot\" : \"0\",   "
+                  + "     \"robot\" : 0,   "
                   + "     \"namespace\" : \"article\",   "
                   + "     \"anonymous\" : \"0\",   "
                   + "     \"unpatrolled\" : \"0\",   "
@@ -556,30 +557,30 @@ public class TestDruidSerDe {
 
   // Select query results as records (types defined by metastore)
   private static final String SELECT_COLUMN_NAMES = "__time,robot,namespace,anonymous,unpatrolled,page,language,newpage,user,count,added,delta,variation,deleted";
-  private static final String SELECT_COLUMN_TYPES = "timestamp with local time zone,string,string,string,string,string,string,string,string,double,double,float,float,float";
+  private static final String SELECT_COLUMN_TYPES = "timestamp with local time zone,boolean,string,string,string,string,string,string,string,double,double,float,float,float";
   private static final Object[][] SELECT_QUERY_RESULTS_RECORDS = new Object[][] {
-          new Object[] { new TimestampLocalTZWritable(new TimestampTZ(Instant.ofEpochMilli(1356998400000L).atZone(ZoneOffset.UTC))), new Text("1"),
+          new Object[] { new TimestampLocalTZWritable(new TimestampTZ(Instant.ofEpochMilli(1356998400000L).atZone(ZoneOffset.UTC))), new BooleanWritable(true),
                   new Text("article"), new Text("0"), new Text("0"),
                   new Text("11._korpus_(NOVJ)"), new Text("sl"), new Text("0"),
                   new Text("EmausBot"),
                   new DoubleWritable(1.0d), new DoubleWritable(39.0d), new FloatWritable(39.0F),
                   new FloatWritable(39.0F), new FloatWritable(0.0F) },
-          new Object[] { new TimestampLocalTZWritable(new TimestampTZ(Instant.ofEpochMilli(1356998400000L).atZone(ZoneOffset.UTC))), new Text("0"),
+          new Object[] { new TimestampLocalTZWritable(new TimestampTZ(Instant.ofEpochMilli(1356998400000L).atZone(ZoneOffset.UTC))), new BooleanWritable(false),
                   new Text("article"), new Text("0"), new Text("0"),
                   new Text("112_U.S._580"), new Text("en"), new Text("1"), new Text("MZMcBride"),
                   new DoubleWritable(1.0d), new DoubleWritable(70.0d), new FloatWritable(70.0F),
                   new FloatWritable(70.0F), new FloatWritable(0.0F) },
-          new Object[] { new TimestampLocalTZWritable(new TimestampTZ(Instant.ofEpochMilli(1356998412000L).atZone(ZoneOffset.UTC))), new Text("0"),
+          new Object[] { new TimestampLocalTZWritable(new TimestampTZ(Instant.ofEpochMilli(1356998412000L).atZone(ZoneOffset.UTC))), new BooleanWritable(false),
                   new Text("article"), new Text("0"), new Text("0"),
                   new Text("113_U.S._243"), new Text("en"), new Text("1"), new Text("MZMcBride"),
                   new DoubleWritable(1.0d), new DoubleWritable(77.0d), new FloatWritable(77.0F),
                   new FloatWritable(77.0F), new FloatWritable(0.0F) },
-          new Object[] { new TimestampLocalTZWritable(new TimestampTZ(Instant.ofEpochMilli(1356998412000L).atZone(ZoneOffset.UTC))), new Text("0"),
+          new Object[] { new TimestampLocalTZWritable(new TimestampTZ(Instant.ofEpochMilli(1356998412000L).atZone(ZoneOffset.UTC))), new BooleanWritable(false),
                   new Text("article"), new Text("0"), new Text("0"),
                   new Text("113_U.S._73"), new Text("en"), new Text("1"), new Text("MZMcBride"),
                   new DoubleWritable(1.0d), new DoubleWritable(70.0d), new FloatWritable(70.0F),
                   new FloatWritable(70.0F), new FloatWritable(0.0F) },
-          new Object[] { new TimestampLocalTZWritable(new TimestampTZ(Instant.ofEpochMilli(1356998412000L).atZone(ZoneOffset.UTC))), new Text("0"),
+          new Object[] { new TimestampLocalTZWritable(new TimestampTZ(Instant.ofEpochMilli(1356998412000L).atZone(ZoneOffset.UTC))), new BooleanWritable(false),
                   new Text("article"), new Text("0"), new Text("0"),
                   new Text("113_U.S._756"), new Text("en"), new Text("1"), new Text("MZMcBride"),
                   new DoubleWritable(1.0d), new DoubleWritable(68.0d), new FloatWritable(68.0F),

http://git-wip-us.apache.org/repos/asf/hive/blob/16f57220/ql/src/test/queries/clientpositive/druidmini_expressions.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/druidmini_expressions.q b/ql/src/test/queries/clientpositive/druidmini_expressions.q
index d88b281..f28b391 100644
--- a/ql/src/test/queries/clientpositive/druidmini_expressions.q
+++ b/ql/src/test/queries/clientpositive/druidmini_expressions.q
@@ -117,5 +117,13 @@ GROUP BY CAST(TRUNC(CAST(`druid_table_alias`.`__time` AS TIMESTAMP),'MM') AS DAT
 explain SELECT DATE_ADD(cast(`__time` as date), CAST((cdouble / 1000) AS INT)) as date_1,  DATE_SUB(cast(`__time` as date), CAST((cdouble / 1000) AS INT)) as date_2 from druid_table_n0  order by date_1, date_2 limit 3;
 SELECT DATE_ADD(cast(`__time` as date), CAST((cdouble / 1000) AS INT)) as date_1,  DATE_SUB(cast(`__time` as date), CAST((cdouble / 1000) AS INT)) as date_2 from druid_table_n0  order by date_1, date_2 limit 3;
 
-
-DROP TABLE druid_table_n0;
\ No newline at end of file
+  -- Boolean Values
+ SELECT cboolean2, count(*) from druid_table_n0 GROUP BY cboolean2;
+  
+  -- Expected results of this query are wrong due to https://issues.apache.org/jira/browse/CALCITE-2319
+  -- It should get fixed once we upgrade calcite
+ SELECT ctinyint > 2, count(*) from druid_table_n0 GROUP BY ctinyint > 2;
+  
+ EXPLAIN SELECT ctinyint > 2, count(*) from druid_table_n0 GROUP BY ctinyint > 2;
+
+DROP TABLE druid_table_n0;

http://git-wip-us.apache.org/repos/asf/hive/blob/16f57220/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out b/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out
index c1e54ea..8ccdeaa 100644
--- a/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out
+++ b/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out
@@ -1246,6 +1246,57 @@ POSTHOOK: Output: hdfs://### HDFS PATH ###
 1969-02-26	1970-11-04
 1969-03-19	1970-10-14
 1969-11-13	1970-02-17
+PREHOOK: query: -- Boolean Values
+ SELECT cboolean2, count(*) from druid_table_n0 GROUP BY cboolean2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@druid_table_n0
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: -- Boolean Values
+ SELECT cboolean2, count(*) from druid_table_n0 GROUP BY cboolean2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@druid_table_n0
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+NULL	8
+false	3140
+true	2957
+PREHOOK: query: -- Expected results of this query are wrong due to https://issues.apache.org/jira/browse/CALCITE-2319
+  -- It should get fixed once we upgrade calcite
+ SELECT ctinyint > 2, count(*) from druid_table_n0 GROUP BY ctinyint > 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@druid_table_n0
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: -- Expected results of this query are wrong due to https://issues.apache.org/jira/browse/CALCITE-2319
+  -- It should get fixed once we upgrade calcite
+ SELECT ctinyint > 2, count(*) from druid_table_n0 GROUP BY ctinyint > 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@druid_table_n0
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+false	2653
+false	3452
+PREHOOK: query: EXPLAIN SELECT ctinyint > 2, count(*) from druid_table_n0 GROUP BY ctinyint > 2
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT ctinyint > 2, count(*) from druid_table_n0 GROUP BY ctinyint > 2
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        TableScan
+          alias: druid_table_n0
+          properties:
+            druid.fieldNames vc,$f1
+            druid.fieldTypes boolean,bigint
+            druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table_n0","granularity":"all","dimensions":[{"type":"default","dimension":"vc","outputName":"vc","outputType":"STRING"}],"virtualColumns":[{"type":"expression","name":"vc","expression":"(\"ctinyint\" > 2)","outputType":"FLOAT"}],"limitSpec":{"type":"default"},"aggregations":[{"type":"count","name":"$f1"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]}
+            druid.query.type groupBy
+          Select Operator
+            expressions: vc (type: boolean), $f1 (type: bigint)
+            outputColumnNames: _col0, _col1
+            ListSink
+
 PREHOOK: query: DROP TABLE druid_table_n0
 PREHOOK: type: DROPTABLE
 PREHOOK: Input: default@druid_table_n0