You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ak...@apache.org on 2019/06/24 11:08:07 UTC
[carbondata] branch master updated: [CARBONDATA-3441] Aggregate queries are failing on Reading from Hive

This is an automated email from the ASF dual-hosted git repository.

akashrn5 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git


The following commit(s) were added to refs/heads/master by this push:
     new 43746ca  [CARBONDATA-3441] Aggregate queries are failing on Reading from Hive
43746ca is described below

commit 43746cab3e6abedb1f2ab154dd40d8f51c7ccf28
Author: dhatchayani <dh...@gmail.com>
AuthorDate: Tue Jun 18 10:39:27 2019 +0530

    [CARBONDATA-3441] Aggregate queries are failing on Reading from Hive
    
    Problem:
    Aggregate queries are failing on Reading from Hive, as the table_name, db_name are not set to the conf.
    
    Solution:
    Set table_name, db_name to the conf and handle NullPointerException.
    
    This closes #3292
---
 examples/spark2/src/main/resources/Test_Data1.csv  | 16 +++++++
 .../apache/carbondata/examples/HiveExample.scala   | 51 +++++++++++++++++++++-
 .../carbondata/hive/CarbonHiveRecordReader.java    |  2 +-
 .../carbondata/hive/MapredCarbonInputFormat.java   |  4 +-
 4 files changed, 70 insertions(+), 3 deletions(-)

diff --git a/examples/spark2/src/main/resources/Test_Data1.csv b/examples/spark2/src/main/resources/Test_Data1.csv
new file mode 100644
index 0000000..809c043
--- /dev/null
+++ b/examples/spark2/src/main/resources/Test_Data1.csv
@@ -0,0 +1,16 @@
+1234,1234,0.123456789009876543211234567890098765432,1234,huawei,2017-07-01 12:07:28,Normal_values
+2345,2345,0.054,8765,honoor,2017-07-01 12:07:28,Normal_values
+3456,3456,0.1234567890098765432112345678900987654,4324,lenovo,2017-07-01 12:07:28,Normal_values
+4567,4567,.0001,4567,sony,2017-07-01 12:07:28,Normal_values
+2147483647,9223372036854775807,0.123456789009876543211234567890098765439,1.7976931348623157E308,SQL Capability Carbon is fully compliant with Spark SQL and supports all SQL queries which can run directly on Spark SQL Easy Table Definition: Carbon supports easy to use DDL Data Definition Language statements to define and create tables. Carbon DDL is highly flexible and is very easy to use as well as powerful enough to define complex tables Easy Data anagement  Carbon supports a variety of [...]
+-2147483648,-9223372036854775808,-0.0.12345678900987654321123456789009876538,4.9E-324,a,2017-07-01 12:07:28,Min_range_values
+-2147483647,-9223372036854775807,0.8,4.9E-323,b,2017-07-01 12:07:28,Min_range_values-1
+-2147483646,-9223372036854775806,0.9,4.9E-322,c,2017-07-01 12:07:28,Min_range_values-2
+-2147483645,-9223372036854775805,0,4.9E-321,d,2017-07-01 12:07:28,Min_range_values-3
+0,0,0,0,0,0,All_zeros_values
+2147483646,9223372036854775807,0.12345678900987654321123456789012345638,1.7976931348623157E308,SQL Capability Carbon is fully compliant with Spark SQL and supports all SQL queries which can run directly on Spark SQL Easy Table Definition: Carbon supports easy to use DDL Data Definition Language statements to define and create tables. Carbon DDL is highly flexible and is very easy to use as well as powerful enough to define complex tables Easy Data anagement  Carbon supports a variety of  [...]
+2147483646,9223372036854775807,12345678900987654321123456789012345678,1.7976931348623157E308,SQL Capability Carbon is fully compliant with Spark SQL and supports all SQL queries which can run directly on Spark SQL Easy Table Definition: Carbon supports easy to use DDL Data Definition Language statements to define and create tables. Carbon DDL is highly flexible and is very easy to use as well as powerful enough to define complex tables Easy Data anagement  Carbon supports a variety of da [...]
+2147483646,9223372036854775807,12345678900987654321123456789012345678,1.7976931348623157E308,SQL Capability Carbon is fully compliant with Spark SQL and supports all SQL queries which can run directly on Spark SQL Easy Table Definition: Carbon supports easy to use DDL Data Definition Language statements to define and create tables. Carbon DDL is highly flexible and is very easy to use as well as powerful enough to define complex tables Easy Data anagement  Carbon supports a variety of da [...]
+,,,,,,All_null_values
+2147483647,
+,,,,,
diff --git a/examples/spark2/src/main/scala/org/apache/carbondata/examples/HiveExample.scala b/examples/spark2/src/main/scala/org/apache/carbondata/examples/HiveExample.scala
index c043076..6639e8e 100644
--- a/examples/spark2/src/main/scala/org/apache/carbondata/examples/HiveExample.scala
+++ b/examples/spark2/src/main/scala/org/apache/carbondata/examples/HiveExample.scala
@@ -59,7 +59,7 @@ object HiveExample {
          | STORED BY 'carbondata'
        """.stripMargin)
 
-    val inputPath = FileFactory
+    var inputPath = FileFactory
       .getUpdatedFilePath(s"$rootPath/examples/spark2/src/main/resources/sample.csv")
 
     carbonSession.sql(
@@ -76,6 +76,24 @@ object HiveExample {
 
     carbonSession.sql("SELECT * FROM HIVE_CARBON_EXAMPLE").show()
 
+    carbonSession.sql("DROP TABLE IF EXISTS TEST_BOUNDARY")
+
+    carbonSession
+      .sql(
+        s"""CREATE TABLE TEST_BOUNDARY (c1_int int,c2_Bigint Bigint,c3_Decimal Decimal(38,30),
+           |c4_double double,c5_string string,c6_Timestamp Timestamp,c7_Datatype_Desc string)
+           |STORED BY 'org.apache.carbondata.format' TBLPROPERTIES
+           |('DICTIONARY_INCLUDE'='c6_Timestamp')""".stripMargin)
+
+    inputPath = FileFactory
+      .getUpdatedFilePath(s"$rootPath/examples/spark2/src/main/resources/Test_Data1.csv")
+
+    carbonSession
+      .sql(
+        s"LOAD DATA INPATH '$inputPath' INTO table TEST_BOUNDARY OPTIONS('DELIMITER'=','," +
+        "'QUOTECHAR'='\"', 'BAD_RECORDS_ACTION'='FORCE','FILEHEADER'='c1_int,c2_Bigint," +
+        "c3_Decimal,c4_double,c5_string,c6_Timestamp,c7_Datatype_Desc')")
+
     carbonSession.close()
 
     // delete the already existing lock on metastore so that new derby instance
@@ -217,6 +235,37 @@ object HiveExample {
       s"$outOfOrderColFetched")
     assert(outOfOrderColFetched == 4)
 
+    val resultAggQuery = statement
+      .executeQuery(
+        "SELECT min(c3_Decimal) as min, max(c3_Decimal) as max, " +
+        "sum(c3_Decimal) as sum FROM TEST_BOUNDARY")
+
+    var resultAggQueryFetched = 0
+
+    var resultMin = ""
+    var resultMax = ""
+    var resultSum = ""
+
+    while (resultAggQuery.next) {
+      if (resultAggQueryFetched == 0) {
+        println("+-----+" + "+-------------------+" + "+--------------------------------+")
+        println("| min |" + "| max               |" + "| sum                            |")
+
+        println("+-----+" + "+-------------------+" + "+--------------------------------+")
+
+        resultMin = resultAggQuery.getString("min")
+        resultMax = resultAggQuery.getString("max")
+        resultSum = resultAggQuery.getString("sum")
+
+        println(s"| $resultMin   |" + s"| $resultMax               |" + s"| $resultSum|")
+        println("+-----+" + "+-------------------+" + "+--------------------------------+")
+      }
+      resultAggQueryFetched = resultAggQueryFetched + 1
+    }
+    println(" ********** Total Rows Fetched When Aggregate Query **********" +
+            s"$resultAggQueryFetched")
+    assert(resultAggQueryFetched == 1)
+
     hiveEmbeddedServer2.stop()
   }
 }
diff --git a/integration/hive/src/main/java/org/apache/carbondata/hive/CarbonHiveRecordReader.java b/integration/hive/src/main/java/org/apache/carbondata/hive/CarbonHiveRecordReader.java
index e93a794..8f500ff 100644
--- a/integration/hive/src/main/java/org/apache/carbondata/hive/CarbonHiveRecordReader.java
+++ b/integration/hive/src/main/java/org/apache/carbondata/hive/CarbonHiveRecordReader.java
@@ -105,7 +105,7 @@ class CarbonHiveRecordReader extends CarbonRecordReader<ArrayWritable>
       valueObj = new ArrayWritable(Writable.class, new Writable[columnTypes.size()]);
     }
 
-    if (!colIds.equals("")) {
+    if (null != colIds && !colIds.equals("")) {
       String[] arraySelectedColId = colIds.split(",");
       columnIds = new int[arraySelectedColId.length];
       int columnId = 0;
diff --git a/integration/hive/src/main/java/org/apache/carbondata/hive/MapredCarbonInputFormat.java b/integration/hive/src/main/java/org/apache/carbondata/hive/MapredCarbonInputFormat.java
index 64edae2..9d2b918 100644
--- a/integration/hive/src/main/java/org/apache/carbondata/hive/MapredCarbonInputFormat.java
+++ b/integration/hive/src/main/java/org/apache/carbondata/hive/MapredCarbonInputFormat.java
@@ -131,6 +131,8 @@ public class MapredCarbonInputFormat extends CarbonTableInputFormat<ArrayWritabl
     }
     QueryModel queryModel = null;
     try {
+      jobConf.set(DATABASE_NAME, "_dummyDb_" + UUID.randomUUID().toString());
+      jobConf.set(TABLE_NAME, "_dummyTable_" + UUID.randomUUID().toString());
       queryModel = getQueryModel(jobConf, path);
     } catch (InvalidConfigurationException e) {
       LOGGER.error("Failed to create record reader: " + e.getMessage(), e);
@@ -181,7 +183,7 @@ public class MapredCarbonInputFormat extends CarbonTableInputFormat<ArrayWritabl
       allColumns.append(column.getColName() + ",");
     }
 
-    if (!projection.equals("")) {
+    if (null != projection && !projection.equals("")) {
       String[] columnNames = projection.split(",");
       //verify that the columns parsed by Hive exist in the table
       for (String col : columnNames) {