You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ja...@apache.org on 2017/10/10 03:07:49 UTC

[02/50] [abbrv] carbondata git commit: [CARBONDATA-1491] Dictionary_exclude columns are not going into no_dictionary flow

[CARBONDATA-1491] Dictionary_exclude columns are not going into no_dictionary flow

(1) DICTIONARY_EXCLUDE columns are not considered as no_dictionary columns - while parsing we are not setting nodictionary columns.
(2) For reconstructing defaultValue for newly added no dictionary measure column, logic is changed, as the previous logic can throw IllegalArgumentException for wrong length.
(3) Test cases are added for the same.

This closes #1374


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/1e7da59b
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/1e7da59b
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/1e7da59b

Branch: refs/heads/streaming_ingest
Commit: 1e7da59b466ae4f33e3c184db02f218f91a6a2ae
Parents: d4eabbe
Author: dhatchayani <dh...@gmail.com>
Authored: Wed Sep 20 12:03:18 2017 +0530
Committer: Ravindra Pesala <ra...@gmail.com>
Committed: Thu Sep 21 11:24:39 2017 +0530

----------------------------------------------------------------------
 .../scan/executor/util/RestructureUtil.java     |  7 ++++--
 .../spark/sql/catalyst/CarbonDDLSqlParser.scala |  5 +++-
 .../vectorreader/AddColumnTestCases.scala       | 24 ++++++++++++++++++++
 3 files changed, 33 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/1e7da59b/core/src/main/java/org/apache/carbondata/core/scan/executor/util/RestructureUtil.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/executor/util/RestructureUtil.java b/core/src/main/java/org/apache/carbondata/core/scan/executor/util/RestructureUtil.java
index 5e78741..6a281dd 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/executor/util/RestructureUtil.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/executor/util/RestructureUtil.java
@@ -215,13 +215,16 @@ public class RestructureUtil {
    */
   private static Object getNoDictionaryDefaultValue(DataType datatype, byte[] defaultValue) {
     Object noDictionaryDefaultValue = null;
+    String value = null;
     if (!isDefaultValueNull(defaultValue)) {
       switch (datatype) {
         case INT:
-          noDictionaryDefaultValue = ByteUtil.toInt(defaultValue, 0, defaultValue.length);
+          value = new String(defaultValue, Charset.forName(CarbonCommonConstants.DEFAULT_CHARSET));
+          noDictionaryDefaultValue = Integer.parseInt(value);
           break;
         case LONG:
-          noDictionaryDefaultValue = ByteUtil.toLong(defaultValue, 0, defaultValue.length);
+          value = new String(defaultValue, Charset.forName(CarbonCommonConstants.DEFAULT_CHARSET));
+          noDictionaryDefaultValue = Long.parseLong(value);
           break;
         case TIMESTAMP:
           long timestampValue = ByteUtil.toLong(defaultValue, 0, defaultValue.length);

http://git-wip-us.apache.org/repos/asf/carbondata/blob/1e7da59b/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala b/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
index acdec91..42070c4 100644
--- a/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
+++ b/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
@@ -617,7 +617,10 @@ abstract class CarbonDDLSqlParser extends AbstractCarbonSparkSQLParser {
     // by default consider all String cols as dims and if any dictionary include isn't present then
     // add it to noDictionaryDims list. consider all dictionary excludes/include cols as dims
     fields.foreach { field =>
-      if (dictIncludeCols.exists(x => x.equalsIgnoreCase(field.column))) {
+      if (dictExcludeCols.exists(x => x.equalsIgnoreCase(field.column))) {
+        noDictionaryDims :+= field.column
+        dimFields += field
+      } else if (dictIncludeCols.exists(x => x.equalsIgnoreCase(field.column))) {
         dimFields += field
       } else if (DataTypeUtil.getDataType(field.dataType.get.toUpperCase) == DataType.TIMESTAMP &&
                  !dictIncludeCols.exists(x => x.equalsIgnoreCase(field.column))) {

http://git-wip-us.apache.org/repos/asf/carbondata/blob/1e7da59b/integration/spark2/src/test/scala/org/apache/spark/carbondata/restructure/vectorreader/AddColumnTestCases.scala
----------------------------------------------------------------------
diff --git a/integration/spark2/src/test/scala/org/apache/spark/carbondata/restructure/vectorreader/AddColumnTestCases.scala b/integration/spark2/src/test/scala/org/apache/spark/carbondata/restructure/vectorreader/AddColumnTestCases.scala
index 30485d1..f5ff2e3 100644
--- a/integration/spark2/src/test/scala/org/apache/spark/carbondata/restructure/vectorreader/AddColumnTestCases.scala
+++ b/integration/spark2/src/test/scala/org/apache/spark/carbondata/restructure/vectorreader/AddColumnTestCases.scala
@@ -418,6 +418,30 @@ class AddColumnTestCases extends Spark2QueryTest with BeforeAndAfterAll {
     sql("DROP TABLE IF EXISTS carbon_table")
   }
 
+  test("test to check if intField returns correct result - dictionary exclude") {
+    sqlContext.setConf("carbon.enable.vector.reader", "true")
+    sql("DROP TABLE IF EXISTS carbon_table")
+    sql("CREATE TABLE carbon_table(intField INT,stringField STRING,charField STRING,timestampField TIMESTAMP, decimalField DECIMAL(6,2)) STORED BY 'carbondata'")
+    sql(s"LOAD DATA LOCAL INPATH '$resourcesPath/restructure/data1.csv' INTO TABLE carbon_table OPTIONS('FILEHEADER'='intField,stringField,charField,timestampField,decimalField')")
+    sql(
+      "ALTER TABLE carbon_table ADD COLUMNS(newField INT) TBLPROPERTIES" +
+      "('DEFAULT.VALUE.newField'='67890', 'DICTIONARY_EXCLUDE'='newField')")
+    checkAnswer(sql("SELECT DISTINCT(newField) FROM carbon_table"), Row(67890))
+    sql("DROP TABLE IF EXISTS carbon_table")
+  }
+
+  test("test to check if bigintField returns correct result - dictionary exclude") {
+    sqlContext.setConf("carbon.enable.vector.reader", "true")
+    sql("DROP TABLE IF EXISTS carbon_table")
+    sql("CREATE TABLE carbon_table(intField INT,stringField STRING,charField STRING,timestampField TIMESTAMP, decimalField DECIMAL(6,2)) STORED BY 'carbondata'")
+    sql(s"LOAD DATA LOCAL INPATH '$resourcesPath/restructure/data1.csv' INTO TABLE carbon_table OPTIONS('FILEHEADER'='intField,stringField,charField,timestampField,decimalField')")
+    sql(
+      "ALTER TABLE carbon_table ADD COLUMNS(newField bigint) TBLPROPERTIES" +
+      "('DEFAULT.VALUE.newField'='67890', 'DICTIONARY_EXCLUDE'='newField')")
+    checkAnswer(sql("SELECT DISTINCT(newField) FROM carbon_table"), Row(67890))
+    sql("DROP TABLE IF EXISTS carbon_table")
+  }
+
   test("test to check if shortField returns correct result") {
     sql("DROP TABLE IF EXISTS carbon_table")
     sql("CREATE TABLE carbon_table(intField INT,stringField STRING,charField STRING,timestampField TIMESTAMP, decimalField DECIMAL(6,2)) STORED BY 'carbondata'")