You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ra...@apache.org on 2019/03/08 01:42:10 UTC
[carbondata] branch master updated: [CARBONDATA-3301]Fix inserting null values to Array columns in carbon file format data load

This is an automated email from the ASF dual-hosted git repository.

ravipesala pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git


The following commit(s) were added to refs/heads/master by this push:
     new a5fc19d  [CARBONDATA-3301]Fix inserting null values to Array<date> columns in carbon file format data load
a5fc19d is described below

commit a5fc19d39d1d111ba6b520c5cc00deb81e44020a
Author: akashrn5 <ak...@gmail.com>
AuthorDate: Fri Feb 22 17:11:57 2019 +0530

    [CARBONDATA-3301]Fix inserting null values to Array<date> columns in carbon file format data load
    
    Problem:
    When carbon datasource table contains columns like complex column with Array or Array and data is inserted and queried, it gives null data for those columns.
    
    Solution:
    In file format case before the actual load, we hwt the internal row object from spark, and we convert the internal row to carbondata understandable object, so that obejvct for date will be of Integertype, So while inserting data only long case is handled and we were passing this interger value to parse in SimpleDateFormat, which throws exception and we were inserting null. SO handled for integer. In this case directly assign the surrogate key with this value.
    
    This closes #3133
---
 .../sql/carbondata/datasource/SparkCarbonDataSourceTest.scala | 11 +++++++++++
 .../carbondata/processing/datatypes/PrimitiveDataType.java    |  7 ++++++-
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/SparkCarbonDataSourceTest.scala b/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/SparkCarbonDataSourceTest.scala
index fa37548..d25e675 100644
--- a/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/SparkCarbonDataSourceTest.scala
+++ b/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/SparkCarbonDataSourceTest.scala
@@ -1760,6 +1760,16 @@ class SparkCarbonDataSourceTest extends FunSuite with BeforeAndAfterAll {
     spark.sql("drop table if exists fileformat_drop_hive")
   }
 
+  test("test complexdatype for date and timestamp datatype") {
+    spark.sql("drop table if exists fileformat_date")
+    spark.sql("drop table if exists fileformat_date_hive")
+    spark.sql("create table fileformat_date_hive(name string, age int, dob array<date>, joinTime array<timestamp>) using parquet")
+    spark.sql("create table fileformat_date(name string, age int, dob array<date>, joinTime array<timestamp>) using carbon")
+    spark.sql("insert into fileformat_date_hive select 'joey', 32, array('1994-04-06','1887-05-06'), array('1994-04-06 00:00:05','1887-05-06 00:00:08')")
+    spark.sql("insert into fileformat_date select 'joey', 32, array('1994-04-06','1887-05-06'), array('1994-04-06 00:00:05','1887-05-06 00:00:08')")
+    checkAnswer(spark.sql("select * from fileformat_date_hive"), spark.sql("select * from fileformat_date"))
+  }
+
   test("validate the columns not present in schema") {
     spark.sql("drop table if exists validate")
     spark.sql("create table validate (name string, age int, address string) using carbon options('inverted_index'='abc')")
@@ -1785,5 +1795,6 @@ class SparkCarbonDataSourceTest extends FunSuite with BeforeAndAfterAll {
     spark.sql("drop table if exists par_table")
     spark.sql("drop table if exists sdkout")
     spark.sql("drop table if exists validate")
+    spark.sql("drop table if exists fileformat_date")
   }
 }
diff --git a/processing/src/main/java/org/apache/carbondata/processing/datatypes/PrimitiveDataType.java b/processing/src/main/java/org/apache/carbondata/processing/datatypes/PrimitiveDataType.java
index cfbaa11..18dc89d 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/datatypes/PrimitiveDataType.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/datatypes/PrimitiveDataType.java
@@ -344,7 +344,7 @@ public class PrimitiveDataType implements GenericDataType<Object> {
             byte[] value = null;
             if (isDirectDictionary) {
               int surrogateKey;
-              if (!(input instanceof Long)) {
+              if (!(input instanceof Long) && !(input instanceof Integer)) {
                 SimpleDateFormat parser = new SimpleDateFormat(getDateFormat(carbonDimension));
                 parser.parse(parsedValue);
               }
@@ -353,6 +353,11 @@ public class PrimitiveDataType implements GenericDataType<Object> {
               // using dictionaryGenerator.
               if (dictionaryGenerator instanceof DirectDictionary && input instanceof Long) {
                 surrogateKey = ((DirectDictionary) dictionaryGenerator).generateKey((long) input);
+              } else if (dictionaryGenerator instanceof DirectDictionary
+                  && input instanceof Integer) {
+                // In case of file format, for complex type date or time type, input data comes as a
+                // Integer object, so just assign the surrogate key with the input object value
+                surrogateKey = (int) input;
               } else {
                 surrogateKey = dictionaryGenerator.getOrGenerateKey(parsedValue);
               }