You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ku...@apache.org on 2019/06/12 07:04:12 UTC
[carbondata] branch master updated: [CARBONDATA-3418] Inherit Column Compressor Property from parent table to its child table's

This is an automated email from the ASF dual-hosted git repository.

kunalkapoor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git


The following commit(s) were added to refs/heads/master by this push:
     new 7678f37  [CARBONDATA-3418] Inherit Column Compressor Property from parent table to its child table's
7678f37 is described below

commit 7678f375d2fa82d59a70cbbc5abe39a5bb27b3e0
Author: Indhumathi27 <in...@gmail.com>
AuthorDate: Thu Jun 6 18:42:12 2019 +0530

    [CARBONDATA-3418] Inherit Column Compressor Property from parent table to its child table's
    
    Inherited Column Compressor Property from parent table to its child table's
    Fixed Describe formatted command to show inverted_index column, even if sort_scope is 'no_sort'
    Fixed inheriting sort_scope to child tables, when sort_columns is provided and sort_scope is not provided
    Alter set sort_columns="",when sort_scope is not no_sort is not supported. the same
    behavior is added for create table with sort_columns="" and sort_scope is not no_sort
    
    This closes #3264
---
 .../mv/rewrite/TestAllOperationsOnMV.scala         | 49 ++++++++++++++++++++++
 docs/ddl-of-carbondata.md                          |  1 +
 .../spark/sql/catalyst/CarbonDDLSqlParser.scala    | 13 ++++++
 .../table/CarbonDescribeFormattedCommand.scala     |  4 +-
 .../scala/org/apache/spark/util/DataMapUtil.scala  | 22 +++++++---
 .../booleantype/BooleanDataTypesLoadTest.scala     |  4 +-
 6 files changed, 83 insertions(+), 10 deletions(-)

diff --git a/datamap/mv/core/src/test/scala/org/apache/carbondata/mv/rewrite/TestAllOperationsOnMV.scala b/datamap/mv/core/src/test/scala/org/apache/carbondata/mv/rewrite/TestAllOperationsOnMV.scala
index e88a565..5f0a490 100644
--- a/datamap/mv/core/src/test/scala/org/apache/carbondata/mv/rewrite/TestAllOperationsOnMV.scala
+++ b/datamap/mv/core/src/test/scala/org/apache/carbondata/mv/rewrite/TestAllOperationsOnMV.scala
@@ -22,6 +22,8 @@ import org.apache.spark.sql.test.util.QueryTest
 import org.scalatest.BeforeAndAfterEach
 
 import org.apache.carbondata.common.exceptions.sql.MalformedCarbonCommandException
+import org.apache.carbondata.core.constants.CarbonCommonConstants
+import org.apache.carbondata.core.metadata.CarbonMetadata
 import org.apache.carbondata.spark.exception.ProcessMetaDataException
 
 /**
@@ -460,5 +462,52 @@ class TestAllOperationsOnMV extends QueryTest with BeforeAndAfterEach {
     sql("drop table IF EXISTS maintable")
   }
 
+  test("test column compressor on preagg and mv") {
+    sql("drop table IF EXISTS maintable")
+    sql("create table maintable(name string, c_code int, price int) stored by 'carbondata' tblproperties('carbon.column.compressor'='zstd')")
+    sql("insert into table maintable select 'abc',21,2000")
+    sql("drop datamap if exists dm_pre ")
+    sql("create datamap dm_pre on table maintable using 'preaggregate' as select name, sum(price) from maintable group by name")
+    var dataMapTable = CarbonMetadata.getInstance().getCarbonTable(CarbonCommonConstants.DATABASE_DEFAULT_NAME, "maintable_dm_pre")
+    assert(dataMapTable.getTableInfo.getFactTable.getTableProperties.get(CarbonCommonConstants.COMPRESSOR).equalsIgnoreCase("zstd"))
+    sql("drop datamap if exists dm_mv ")
+    sql("create datamap dm_mv on table maintable using 'mv' as select name, sum(price) from maintable group by name")
+    dataMapTable = CarbonMetadata.getInstance().getCarbonTable(CarbonCommonConstants.DATABASE_DEFAULT_NAME, "dm_mv_table")
+    assert(dataMapTable.getTableInfo.getFactTable.getTableProperties.get(CarbonCommonConstants.COMPRESSOR).equalsIgnoreCase("zstd"))
+    sql("drop table IF EXISTS maintable")
+  }
+
+  test("test sort_scope if sort_columns are provided") {
+    sql("drop table IF EXISTS maintable")
+    sql("create table maintable(name string, c_code int, price int) stored by 'carbondata' tblproperties('sort_columns'='name')")
+    sql("insert into table maintable select 'abc',21,2000")
+    sql("drop datamap if exists dm_pre ")
+    sql("create datamap dm_pre on table maintable using 'preaggregate' as select name, sum(price) from maintable group by name")
+    checkExistence(sql("describe formatted maintable_dm_pre"), true, "Sort Scope LOCAL_SORT")
+    sql("create datamap dm_mv on table maintable using 'mv' as select name, sum(price) from maintable group by name")
+    checkExistence(sql("describe formatted dm_mv_table"), true, "Sort Scope LOCAL_SORT")
+    sql("drop table IF EXISTS maintable")
+  }
+
+  test("test inverted_index if sort_scope is provided") {
+    sql("drop table IF EXISTS maintable")
+    sql("create table maintable(name string, c_code int, price int) stored by 'carbondata' tblproperties('sort_scope'='no_sort','sort_columns'='name', 'inverted_index'='name')")
+    sql("insert into table maintable select 'abc',21,2000")
+    checkExistence(sql("describe formatted maintable"), true, "Inverted Index Columns name")
+    sql("drop datamap if exists dm_pre ")
+    sql("create datamap dm_pre on table maintable using 'preaggregate' as select name, sum(price) from maintable group by name")
+    checkExistence(sql("describe formatted maintable_dm_pre"), true, "Inverted Index Columns maintable_name")
+    sql("create datamap dm_mv on table maintable using 'mv' as select name, sum(price) from maintable group by name")
+    checkExistence(sql("describe formatted dm_mv_table"), true, "Inverted Index Columns maintable_name")
+    sql("drop table IF EXISTS maintable")
+  }
+
+  test("test sort column") {
+    sql("drop table IF EXISTS maintable")
+    intercept[MalformedCarbonCommandException] {
+      sql("create table maintable(name string, c_code int, price int) stored by 'carbondata' tblproperties('sort_scope'='local_sort','sort_columns'='')")
+    }.getMessage.contains("Cannot set SORT_COLUMNS as empty when SORT_SCOPE is LOCAL_SORT")
+  }
+
 }
 
diff --git a/docs/ddl-of-carbondata.md b/docs/ddl-of-carbondata.md
index 2d43645..845d46d 100644
--- a/docs/ddl-of-carbondata.md
+++ b/docs/ddl-of-carbondata.md
@@ -811,6 +811,7 @@ Users can specify which columns to include and exclude for local dictionary gene
        new SORT_COLUMNS.  
        
        UNSET is not supported, but it can set SORT_COLUMNS to empty string instead of using UNSET.
+       NOTE: When SORT_SCOPE is not NO_SORT, then setting SORT_COLUMNS to empty string is not valid.
        ```
        ALTER TABLE tablename SET TBLPROPERTIES('SORT_COLUMNS'='')
        ```
diff --git a/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala b/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
index d0e4ba7..23ab806 100644
--- a/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
+++ b/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
@@ -35,6 +35,7 @@ import org.apache.carbondata.common.constants.LoggerAction
 import org.apache.carbondata.common.exceptions.sql.MalformedCarbonCommandException
 import org.apache.carbondata.common.logging.LogServiceFactory
 import org.apache.carbondata.core.constants.CarbonCommonConstants
+import org.apache.carbondata.core.constants.SortScopeOptions.SortScope
 import org.apache.carbondata.core.exception.InvalidConfigurationException
 import org.apache.carbondata.core.metadata.datatype.{DataType, DataTypes}
 import org.apache.carbondata.core.metadata.schema.PartitionInfo
@@ -295,6 +296,18 @@ abstract class CarbonDDLSqlParser extends AbstractCarbonSparkSQLParser {
     fields.zipWithIndex.foreach { case (field, index) =>
       field.schemaOrdinal = index
     }
+
+    // If sort_scope is not no_sort && sort_columns specified by user is empty, then throw exception
+    if (tableProperties.get(CarbonCommonConstants.SORT_COLUMNS).isDefined
+        && tableProperties(CarbonCommonConstants.SORT_COLUMNS).equalsIgnoreCase("") &&
+        tableProperties.get(CarbonCommonConstants.SORT_SCOPE).isDefined &&
+        !tableProperties(CarbonCommonConstants.SORT_SCOPE)
+          .equalsIgnoreCase(SortScope.NO_SORT.name())) {
+      throw new MalformedCarbonCommandException(
+        s"Cannot set SORT_COLUMNS as empty when SORT_SCOPE is ${
+          tableProperties(CarbonCommonConstants.SORT_SCOPE)
+        } ")
+    }
     val (dims, msrs, noDictionaryDims, sortKeyDims, varcharColumns) = extractDimAndMsrFields(
       fields, tableProperties)
 
diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/table/CarbonDescribeFormattedCommand.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/table/CarbonDescribeFormattedCommand.scala
index ebe4daf..ad8ad2b 100644
--- a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/table/CarbonDescribeFormattedCommand.scala
+++ b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/table/CarbonDescribeFormattedCommand.scala
@@ -124,8 +124,8 @@ private[sql] case class CarbonDescribeFormattedCommand(
       ("## Index Information", "", ""),
       ("Sort Scope", sortScope, ""),
       ("Sort Columns", relation.metaData.carbonTable.getSortColumns.asScala.mkString(", "), ""),
-      ("Inverted Index Columns", carbonTable.getInvertedIndexColumns.asScala
-        .map(_.getColumnName).mkString(", "), ""),
+      ("Inverted Index Columns", carbonTable.getTableInfo.getFactTable.getTableProperties.asScala
+        .getOrElse(CarbonCommonConstants.INVERTED_INDEX, ""), ""),
       ("Cached Min/Max Index Columns",
         carbonTable.getMinMaxCachedColumnsInCreateOrder.asScala.mkString(", "), ""),
       ("Min/Max Index Cache Level",
diff --git a/integration/spark2/src/main/scala/org/apache/spark/util/DataMapUtil.scala b/integration/spark2/src/main/scala/org/apache/spark/util/DataMapUtil.scala
index faee180..7925798 100644
--- a/integration/spark2/src/main/scala/org/apache/spark/util/DataMapUtil.scala
+++ b/integration/spark2/src/main/scala/org/apache/spark/util/DataMapUtil.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.util
 
 import java.io.IOException
-import java.util
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable
@@ -28,8 +27,9 @@ import org.apache.spark.sql.execution.command.{DataMapField, Field}
 import org.apache.carbondata.common.exceptions.sql.MalformedDataMapCommandException
 import org.apache.carbondata.core.constants.CarbonCommonConstants
 import org.apache.carbondata.core.datamap.DataMapStoreManager
+import org.apache.carbondata.core.datastore.compression.CompressorFactory
 import org.apache.carbondata.core.metadata.schema.datamap.DataMapClassProvider.MV
-import org.apache.carbondata.core.metadata.schema.table.{CarbonTable, DataMapSchema}
+import org.apache.carbondata.core.metadata.schema.table.CarbonTable
 
 /**
  * Utility class for keeping all the utility methods common for pre-aggregate and mv datamap
@@ -48,10 +48,14 @@ object DataMapUtil {
                            parentcol.equals(fieldRelationMap(col).
                              columnTableRelationList.get(0).parentColumnName))
         .map(cols => neworder :+= cols.column))
-    tableProperties.put(CarbonCommonConstants.SORT_COLUMNS, neworder.mkString(","))
-    tableProperties.put("sort_scope", parentTable.getTableInfo.getFactTable.
-      getTableProperties.asScala.getOrElse("sort_scope", CarbonCommonConstants
-      .LOAD_SORT_SCOPE_DEFAULT))
+    if (neworder.nonEmpty) {
+      tableProperties.put(CarbonCommonConstants.SORT_COLUMNS, neworder.mkString(","))
+    }
+    val sort_scope = parentTable.getTableInfo.getFactTable.getTableProperties.asScala
+      .get("sort_scope")
+    if (sort_scope.isDefined) {
+      tableProperties.put("sort_scope", sort_scope.get)
+    }
     tableProperties
       .put(CarbonCommonConstants.TABLE_BLOCKSIZE, parentTable.getBlockSizeInMB.toString)
     tableProperties.put(CarbonCommonConstants.FLAT_FOLDER,
@@ -98,6 +102,12 @@ object DataMapUtil {
       tableProperties.put(CarbonCommonConstants.LONG_STRING_COLUMNS,
         newLongStringColumn.mkString(","))
     }
+    // inherit compressor property
+    tableProperties
+      .put(CarbonCommonConstants.COMPRESSOR,
+        parentTable.getTableInfo.getFactTable.getTableProperties.asScala
+          .getOrElse(CarbonCommonConstants.COMPRESSOR,
+            CompressorFactory.getInstance().getCompressor.getName))
 
     // inherit the local dictionary properties of main parent table
     tableProperties
diff --git a/integration/spark2/src/test/scala/org/apache/carbondata/spark/testsuite/booleantype/BooleanDataTypesLoadTest.scala b/integration/spark2/src/test/scala/org/apache/carbondata/spark/testsuite/booleantype/BooleanDataTypesLoadTest.scala
index 4050fd8..93255fa 100644
--- a/integration/spark2/src/test/scala/org/apache/carbondata/spark/testsuite/booleantype/BooleanDataTypesLoadTest.scala
+++ b/integration/spark2/src/test/scala/org/apache/carbondata/spark/testsuite/booleantype/BooleanDataTypesLoadTest.scala
@@ -247,7 +247,7 @@ class BooleanDataTypesLoadTest extends QueryTest with BeforeAndAfterEach with Be
          | booleanField2 BOOLEAN
          | )
          | STORED BY 'carbondata'
-         | TBLPROPERTIES('sort_columns'='','DICTIONARY_EXCLUDE'='charField','TABLE_BLOCKSIZE'='512','NO_INVERTED_INDEX'='charField', 'SORT_SCOPE'='GLOBAL_SORT')
+         | TBLPROPERTIES('DICTIONARY_EXCLUDE'='charField','TABLE_BLOCKSIZE'='512','NO_INVERTED_INDEX'='charField', 'SORT_SCOPE'='GLOBAL_SORT')
        """.stripMargin)
 
     val storeLocation = s"$rootPath/integration/spark2/src/test/resources/bool/supportBoolean.csv"
@@ -337,7 +337,7 @@ class BooleanDataTypesLoadTest extends QueryTest with BeforeAndAfterEach with Be
          | complexData ARRAY<STRING>
          | )
          | STORED BY 'carbondata'
-         | TBLPROPERTIES('sort_columns'='','DICTIONARY_EXCLUDE'='charField','TABLE_BLOCKSIZE'='512','NO_INVERTED_INDEX'='charField', 'SORT_SCOPE'='GLOBAL_SORT')
+         | TBLPROPERTIES('DICTIONARY_EXCLUDE'='charField','TABLE_BLOCKSIZE'='512','NO_INVERTED_INDEX'='charField', 'SORT_SCOPE'='GLOBAL_SORT')
        """.stripMargin)
 
     val storeLocation = s"$rootPath/integration/spark2/src/test/resources/bool/supportBooleanWithFileHeader.csv"