You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ku...@apache.org on 2019/06/13 14:59:18 UTC
[carbondata] branch master updated: [CARBONDATA-3423] Validate dictionary for binary data type

This is an automated email from the ASF dual-hosted git repository.

kunalkapoor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git


The following commit(s) were added to refs/heads/master by this push:
     new afef232  [CARBONDATA-3423] Validate dictionary for binary data type
afef232 is described below

commit afef2327057fa3f1534441f8f942f37392d653b7
Author: xubo245 <xu...@huawei.com>
AuthorDate: Mon Jun 10 16:32:03 2019 +0800

    [CARBONDATA-3423] Validate dictionary for binary data type
    
    Add validation for dictionary Include doesn't support binary data type.
    
    This closes #3271
---
 docs/ddl-of-carbondata.md                          |  2 +-
 .../testsuite/binary/TestBinaryDataType.scala      | 57 ++++++++++++++++++++++
 .../spark/sql/catalyst/CarbonDDLSqlParser.scala    |  8 ++-
 3 files changed, 65 insertions(+), 2 deletions(-)

diff --git a/docs/ddl-of-carbondata.md b/docs/ddl-of-carbondata.md
index 387c46b..2495bf6 100644
--- a/docs/ddl-of-carbondata.md
+++ b/docs/ddl-of-carbondata.md
@@ -126,7 +126,7 @@ CarbonData DDL statements are documented here,which includes:
      ```
 
      **NOTE**: 
-      * Dictionary Include/Exclude for complex child columns is not supported.   
+      * Dictionary Include/Exclude for complex child columns is not supported. Dictionary Include doesn't support binary data type.  
       * Dictionary is global. Except global dictionary, there are local dictionary and non-dictionary in CarbonData.
       
    - ##### Local Dictionary Configuration
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/binary/TestBinaryDataType.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/binary/TestBinaryDataType.scala
index 390efe9..64c0ca6 100644
--- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/binary/TestBinaryDataType.scala
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/binary/TestBinaryDataType.scala
@@ -171,6 +171,63 @@ class TestBinaryDataType extends QueryTest with BeforeAndAfterAll {
         assert(true)
     }
 
+
+    test("Unsupport DICTIONARY_INCLUDE for binary") {
+
+        sql("DROP TABLE IF EXISTS binaryTable")
+        val exception = intercept[MalformedCarbonCommandException] {
+            sql(
+                """
+                  | CREATE TABLE binaryTable(
+                  |     id int,
+                  |     name string,
+                  |     city string,
+                  |     age int,
+                  |     binaryField binary)
+                  | STORED BY 'carbondata'
+                  | tblproperties('dictionary_include'='binaryField')
+                """.stripMargin)
+        }
+        assert(exception.getMessage.contains(
+            "DICTIONARY_INCLUDE is unsupported for binary data type column: binaryfield"))
+    }
+
+    test("Unsupport DICTIONARY_INCLUDE for binary, multiple column") {
+
+        sql("DROP TABLE IF EXISTS binaryTable")
+        val exception = intercept[MalformedCarbonCommandException] {
+            sql(
+                """
+                  | CREATE TABLE binaryTable(
+                  |     id int,
+                  |     name string,
+                  |     city string,
+                  |     age int,
+                  |     binaryField binary)
+                  | STORED BY 'carbondata'
+                  | tblproperties('dictionary_include'='name,binaryField')
+                """.stripMargin)
+        }
+        assert(exception.getMessage.contains(
+            "DICTIONARY_INCLUDE is unsupported for binary data type column: binaryfield"))
+    }
+
+    test("Supports DICTIONARY_EXCLUDE for binary") {
+        sql("DROP TABLE IF EXISTS binaryTable")
+        sql(
+            """
+              | CREATE TABLE binaryTable(
+              |     id int,
+              |     name string,
+              |     city string,
+              |     age int,
+              |     binaryField binary)
+              | STORED BY 'org.apache.carbondata.format'
+              | tblproperties('DICTIONARY_EXCLUDE'='binaryField')
+            """.stripMargin)
+        assert(true)
+    }
+
     test("Unsupport inverted_index for binary") {
         sql("DROP TABLE IF EXISTS binaryTable")
         val exception = intercept[MalformedCarbonCommandException] {
diff --git a/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala b/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
index 23ab806..3d3b89d 100644
--- a/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
+++ b/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
@@ -37,7 +37,7 @@ import org.apache.carbondata.common.logging.LogServiceFactory
 import org.apache.carbondata.core.constants.CarbonCommonConstants
 import org.apache.carbondata.core.constants.SortScopeOptions.SortScope
 import org.apache.carbondata.core.exception.InvalidConfigurationException
-import org.apache.carbondata.core.metadata.datatype.{DataType, DataTypes}
+import org.apache.carbondata.core.metadata.datatype.DataTypes
 import org.apache.carbondata.core.metadata.schema.PartitionInfo
 import org.apache.carbondata.core.metadata.schema.partition.PartitionType
 import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema
@@ -854,6 +854,12 @@ abstract class CarbonDDLSqlParser extends AbstractCarbonSparkSQLParser {
                          "Please check the create table statement."
           throw new MalformedCarbonCommandException(errorMsg)
         }
+        val rangeField = fields.find(_.column.equalsIgnoreCase(distIncludeCol.trim))
+        if ("binary".equalsIgnoreCase(rangeField.get.dataType.get)) {
+          throw new MalformedCarbonCommandException(
+            "DICTIONARY_INCLUDE is unsupported for binary data type column: " +
+                    distIncludeCol.trim)
+        }
         if (varcharCols.exists(x => x.equalsIgnoreCase(distIncludeCol.trim))) {
           throw new MalformedCarbonCommandException(
             "DICTIONARY_INCLUDE is unsupported for long string datatype column: " +