You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ra...@apache.org on 2019/01/02 05:50:56 UTC

carbondata git commit: [CARBONDATA-3205]Fix Get Local Dictionary for empty Array of String

Repository: carbondata
Updated Branches:
  refs/heads/master 28432e30b -> 7477527e9


[CARBONDATA-3205]Fix Get Local Dictionary for empty Array of String

Problem:
In case of Array data type, if the data is empty then the Local dictionary entry for the data will not be created.
So, while querying the data, dictionary value read from the local dictionary will go wrong.
In case we have 3 rows of data with the first row as empty data, then the number of local dictionary entries will be 2. Reading back the dictionary values from the local dictionary while query throws ArrayIndexOutOfBoundException.

Solution:
Check for the dictionary value presence, fill default value only if dictionary values exists.

This closes #3028


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/7477527e
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/7477527e
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/7477527e

Branch: refs/heads/master
Commit: 7477527e91cbcc3693e8bb49449fe7c2895f8b39
Parents: 28432e3
Author: Indhumathi27 <in...@gmail.com>
Authored: Thu Dec 27 17:32:48 2018 +0530
Committer: ravipesala <ra...@gmail.com>
Committed: Wed Jan 2 11:20:45 2019 +0530

----------------------------------------------------------------------
 .../chunk/impl/DimensionRawColumnChunk.java     |  6 ++++--
 .../src/test/resources/test_json.json           |  1 +
 ...tCreateTableUsingSparkCarbonFileFormat.scala | 22 ++++++++++++++++++++
 3 files changed, 27 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/7477527e/core/src/main/java/org/apache/carbondata/core/datastore/chunk/impl/DimensionRawColumnChunk.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/impl/DimensionRawColumnChunk.java b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/impl/DimensionRawColumnChunk.java
index 9d798bd..ccf0a0e 100644
--- a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/impl/DimensionRawColumnChunk.java
+++ b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/impl/DimensionRawColumnChunk.java
@@ -208,8 +208,10 @@ public class DimensionRawColumnChunk extends AbstractRawColumnChunk {
         }
       }
       decode.freeMemory();
-      // as dictionary values starts from 1 setting null default value
-      dictionary[1] = CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY;
+      if (!usedDictionary.isEmpty()) {
+        // as dictionary values starts from 1 setting null default value
+        dictionary[1] = CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY;
+      }
       return new CarbonDictionaryImpl(dictionary, usedDictionary.cardinality());
     }
     return null;

http://git-wip-us.apache.org/repos/asf/carbondata/blob/7477527e/integration/spark-datasource/src/test/resources/test_json.json
----------------------------------------------------------------------
diff --git a/integration/spark-datasource/src/test/resources/test_json.json b/integration/spark-datasource/src/test/resources/test_json.json
new file mode 100644
index 0000000..6884ecd
--- /dev/null
+++ b/integration/spark-datasource/src/test/resources/test_json.json
@@ -0,0 +1 @@
+{"username":"cust","age":20,"phone":"5367288","housenum":"A780","address": {"Address_Detail": {"Building_Detail": {"Society_name":"xxxyy","building_no":"A780","house_no":4,"Building_Type": {"Buildingname":"abcd","buildingarea":100.3,"Building_Criteria":{"f1": {"username1":"cust0","age1":20,"phone1":682973,"housenum1":"A899","address1":{"Address_Detail1": {"Building_Detail1": {"Society_name1":"xxxyy","building_no1":"A780","house_no1":4,"Building_Type1": {"Buildingname1":"abcd","buildingarea1":100.3,"Building_Criteria1":{"f11": 33,"f21":{"inner1":{"children":[]}}},"AR11":["abc","gdf","ehf"],"AR21":[3,4,5],"AR31":[27735,7981366,9873262],"AR41":[9.2436482,686263.09,3.48249824],"AR51":[9.463462333333E7,47.398759828E5,9.2846892E4],"AR61":[true,true]}}}}}},"f2":"er","AR1":["abc","gdf","ehf"],"AR2":[3,4,5],"AR3":[27735,7981366,9873262],"AR4":[9.2436482,686263.09,3.48249824],"AR5":[9.463462333333E7,47.398759828E5,9.2846892E4],"AR6":[true,true]}}}}}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/carbondata/blob/7477527e/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/TestCreateTableUsingSparkCarbonFileFormat.scala
----------------------------------------------------------------------
diff --git a/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/TestCreateTableUsingSparkCarbonFileFormat.scala b/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/TestCreateTableUsingSparkCarbonFileFormat.scala
index 0d2fcd8..7814878 100644
--- a/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/TestCreateTableUsingSparkCarbonFileFormat.scala
+++ b/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/TestCreateTableUsingSparkCarbonFileFormat.scala
@@ -299,6 +299,28 @@ class TestCreateTableUsingSparkCarbonFileFormat extends FunSuite with BeforeAndA
     cleanTestData()
   }
 
+  test("Test complex json nested data with empty array of struct data") {
+    val rootPath = new File(this.getClass.getResource("/").getPath
+                            + "../../../..").getCanonicalPath
+    val resource = s"$rootPath/integration/spark-datasource/src/test/resources/test_json.json"
+    val path = writerPath + "_json"
+    FileUtils.deleteDirectory(new File(path))
+    val json = spark.read.json(s"$resource")
+    json.write.format("carbon").save(s"$path")
+    spark.sql("DROP TABLE IF EXISTS test_json")
+    if (SparkUtil.isSparkVersionEqualTo("2.1")) {
+      spark.sql(s"""CREATE TABLE test_json USING carbon OPTIONS (PATH '$path') """)
+    } else if (SparkUtil.isSparkVersionXandAbove("2.2")) {
+      spark.sql(
+        s"""CREATE TABLE test_json USING carbon LOCATION
+           |'$path' """.stripMargin)
+    } else {
+    }
+    assert(spark.sql("select age from test_json").collect().length == 1)
+    spark.sql("DROP TABLE IF EXISTS test_json")
+    FileUtils.deleteDirectory(new File(path))
+  }
+
   test("Read sdk writer output file without index file should not fail") {
     buildTestData()
     deleteIndexFile(writerPath, CarbonCommonConstants.UPDATE_INDEX_FILE_EXT)