You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ku...@apache.org on 2019/06/12 07:04:12 UTC
[carbondata] branch master updated: [CARBONDATA-3418] Inherit
Column Compressor Property from parent table to its child table's
This is an automated email from the ASF dual-hosted git repository.
kunalkapoor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git
The following commit(s) were added to refs/heads/master by this push:
new 7678f37 [CARBONDATA-3418] Inherit Column Compressor Property from parent table to its child table's
7678f37 is described below
commit 7678f375d2fa82d59a70cbbc5abe39a5bb27b3e0
Author: Indhumathi27 <in...@gmail.com>
AuthorDate: Thu Jun 6 18:42:12 2019 +0530
[CARBONDATA-3418] Inherit Column Compressor Property from parent table to its child table's
Inherited Column Compressor Property from parent table to its child table's
Fixed Describe formatted command to show inverted_index column, even if sort_scope is 'no_sort'
Fixed inheriting sort_scope to child tables, when sort_columns is provided and sort_scope is not provided
Alter set sort_columns="",when sort_scope is not no_sort is not supported. the same
behavior is added for create table with sort_columns="" and sort_scope is not no_sort
This closes #3264
---
.../mv/rewrite/TestAllOperationsOnMV.scala | 49 ++++++++++++++++++++++
docs/ddl-of-carbondata.md | 1 +
.../spark/sql/catalyst/CarbonDDLSqlParser.scala | 13 ++++++
.../table/CarbonDescribeFormattedCommand.scala | 4 +-
.../scala/org/apache/spark/util/DataMapUtil.scala | 22 +++++++---
.../booleantype/BooleanDataTypesLoadTest.scala | 4 +-
6 files changed, 83 insertions(+), 10 deletions(-)
diff --git a/datamap/mv/core/src/test/scala/org/apache/carbondata/mv/rewrite/TestAllOperationsOnMV.scala b/datamap/mv/core/src/test/scala/org/apache/carbondata/mv/rewrite/TestAllOperationsOnMV.scala
index e88a565..5f0a490 100644
--- a/datamap/mv/core/src/test/scala/org/apache/carbondata/mv/rewrite/TestAllOperationsOnMV.scala
+++ b/datamap/mv/core/src/test/scala/org/apache/carbondata/mv/rewrite/TestAllOperationsOnMV.scala
@@ -22,6 +22,8 @@ import org.apache.spark.sql.test.util.QueryTest
import org.scalatest.BeforeAndAfterEach
import org.apache.carbondata.common.exceptions.sql.MalformedCarbonCommandException
+import org.apache.carbondata.core.constants.CarbonCommonConstants
+import org.apache.carbondata.core.metadata.CarbonMetadata
import org.apache.carbondata.spark.exception.ProcessMetaDataException
/**
@@ -460,5 +462,52 @@ class TestAllOperationsOnMV extends QueryTest with BeforeAndAfterEach {
sql("drop table IF EXISTS maintable")
}
+ test("test column compressor on preagg and mv") {
+ sql("drop table IF EXISTS maintable")
+ sql("create table maintable(name string, c_code int, price int) stored by 'carbondata' tblproperties('carbon.column.compressor'='zstd')")
+ sql("insert into table maintable select 'abc',21,2000")
+ sql("drop datamap if exists dm_pre ")
+ sql("create datamap dm_pre on table maintable using 'preaggregate' as select name, sum(price) from maintable group by name")
+ var dataMapTable = CarbonMetadata.getInstance().getCarbonTable(CarbonCommonConstants.DATABASE_DEFAULT_NAME, "maintable_dm_pre")
+ assert(dataMapTable.getTableInfo.getFactTable.getTableProperties.get(CarbonCommonConstants.COMPRESSOR).equalsIgnoreCase("zstd"))
+ sql("drop datamap if exists dm_mv ")
+ sql("create datamap dm_mv on table maintable using 'mv' as select name, sum(price) from maintable group by name")
+ dataMapTable = CarbonMetadata.getInstance().getCarbonTable(CarbonCommonConstants.DATABASE_DEFAULT_NAME, "dm_mv_table")
+ assert(dataMapTable.getTableInfo.getFactTable.getTableProperties.get(CarbonCommonConstants.COMPRESSOR).equalsIgnoreCase("zstd"))
+ sql("drop table IF EXISTS maintable")
+ }
+
+ test("test sort_scope if sort_columns are provided") {
+ sql("drop table IF EXISTS maintable")
+ sql("create table maintable(name string, c_code int, price int) stored by 'carbondata' tblproperties('sort_columns'='name')")
+ sql("insert into table maintable select 'abc',21,2000")
+ sql("drop datamap if exists dm_pre ")
+ sql("create datamap dm_pre on table maintable using 'preaggregate' as select name, sum(price) from maintable group by name")
+ checkExistence(sql("describe formatted maintable_dm_pre"), true, "Sort Scope LOCAL_SORT")
+ sql("create datamap dm_mv on table maintable using 'mv' as select name, sum(price) from maintable group by name")
+ checkExistence(sql("describe formatted dm_mv_table"), true, "Sort Scope LOCAL_SORT")
+ sql("drop table IF EXISTS maintable")
+ }
+
+ test("test inverted_index if sort_scope is provided") {
+ sql("drop table IF EXISTS maintable")
+ sql("create table maintable(name string, c_code int, price int) stored by 'carbondata' tblproperties('sort_scope'='no_sort','sort_columns'='name', 'inverted_index'='name')")
+ sql("insert into table maintable select 'abc',21,2000")
+ checkExistence(sql("describe formatted maintable"), true, "Inverted Index Columns name")
+ sql("drop datamap if exists dm_pre ")
+ sql("create datamap dm_pre on table maintable using 'preaggregate' as select name, sum(price) from maintable group by name")
+ checkExistence(sql("describe formatted maintable_dm_pre"), true, "Inverted Index Columns maintable_name")
+ sql("create datamap dm_mv on table maintable using 'mv' as select name, sum(price) from maintable group by name")
+ checkExistence(sql("describe formatted dm_mv_table"), true, "Inverted Index Columns maintable_name")
+ sql("drop table IF EXISTS maintable")
+ }
+
+ test("test sort column") {
+ sql("drop table IF EXISTS maintable")
+ intercept[MalformedCarbonCommandException] {
+ sql("create table maintable(name string, c_code int, price int) stored by 'carbondata' tblproperties('sort_scope'='local_sort','sort_columns'='')")
+ }.getMessage.contains("Cannot set SORT_COLUMNS as empty when SORT_SCOPE is LOCAL_SORT")
+ }
+
}
diff --git a/docs/ddl-of-carbondata.md b/docs/ddl-of-carbondata.md
index 2d43645..845d46d 100644
--- a/docs/ddl-of-carbondata.md
+++ b/docs/ddl-of-carbondata.md
@@ -811,6 +811,7 @@ Users can specify which columns to include and exclude for local dictionary gene
new SORT_COLUMNS.
UNSET is not supported, but it can set SORT_COLUMNS to empty string instead of using UNSET.
+ NOTE: When SORT_SCOPE is not NO_SORT, then setting SORT_COLUMNS to empty string is not valid.
```
ALTER TABLE tablename SET TBLPROPERTIES('SORT_COLUMNS'='')
```
diff --git a/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala b/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
index d0e4ba7..23ab806 100644
--- a/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
+++ b/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
@@ -35,6 +35,7 @@ import org.apache.carbondata.common.constants.LoggerAction
import org.apache.carbondata.common.exceptions.sql.MalformedCarbonCommandException
import org.apache.carbondata.common.logging.LogServiceFactory
import org.apache.carbondata.core.constants.CarbonCommonConstants
+import org.apache.carbondata.core.constants.SortScopeOptions.SortScope
import org.apache.carbondata.core.exception.InvalidConfigurationException
import org.apache.carbondata.core.metadata.datatype.{DataType, DataTypes}
import org.apache.carbondata.core.metadata.schema.PartitionInfo
@@ -295,6 +296,18 @@ abstract class CarbonDDLSqlParser extends AbstractCarbonSparkSQLParser {
fields.zipWithIndex.foreach { case (field, index) =>
field.schemaOrdinal = index
}
+
+ // If sort_scope is not no_sort && sort_columns specified by user is empty, then throw exception
+ if (tableProperties.get(CarbonCommonConstants.SORT_COLUMNS).isDefined
+ && tableProperties(CarbonCommonConstants.SORT_COLUMNS).equalsIgnoreCase("") &&
+ tableProperties.get(CarbonCommonConstants.SORT_SCOPE).isDefined &&
+ !tableProperties(CarbonCommonConstants.SORT_SCOPE)
+ .equalsIgnoreCase(SortScope.NO_SORT.name())) {
+ throw new MalformedCarbonCommandException(
+ s"Cannot set SORT_COLUMNS as empty when SORT_SCOPE is ${
+ tableProperties(CarbonCommonConstants.SORT_SCOPE)
+ } ")
+ }
val (dims, msrs, noDictionaryDims, sortKeyDims, varcharColumns) = extractDimAndMsrFields(
fields, tableProperties)
diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/table/CarbonDescribeFormattedCommand.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/table/CarbonDescribeFormattedCommand.scala
index ebe4daf..ad8ad2b 100644
--- a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/table/CarbonDescribeFormattedCommand.scala
+++ b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/table/CarbonDescribeFormattedCommand.scala
@@ -124,8 +124,8 @@ private[sql] case class CarbonDescribeFormattedCommand(
("## Index Information", "", ""),
("Sort Scope", sortScope, ""),
("Sort Columns", relation.metaData.carbonTable.getSortColumns.asScala.mkString(", "), ""),
- ("Inverted Index Columns", carbonTable.getInvertedIndexColumns.asScala
- .map(_.getColumnName).mkString(", "), ""),
+ ("Inverted Index Columns", carbonTable.getTableInfo.getFactTable.getTableProperties.asScala
+ .getOrElse(CarbonCommonConstants.INVERTED_INDEX, ""), ""),
("Cached Min/Max Index Columns",
carbonTable.getMinMaxCachedColumnsInCreateOrder.asScala.mkString(", "), ""),
("Min/Max Index Cache Level",
diff --git a/integration/spark2/src/main/scala/org/apache/spark/util/DataMapUtil.scala b/integration/spark2/src/main/scala/org/apache/spark/util/DataMapUtil.scala
index faee180..7925798 100644
--- a/integration/spark2/src/main/scala/org/apache/spark/util/DataMapUtil.scala
+++ b/integration/spark2/src/main/scala/org/apache/spark/util/DataMapUtil.scala
@@ -18,7 +18,6 @@
package org.apache.spark.util
import java.io.IOException
-import java.util
import scala.collection.JavaConverters._
import scala.collection.mutable
@@ -28,8 +27,9 @@ import org.apache.spark.sql.execution.command.{DataMapField, Field}
import org.apache.carbondata.common.exceptions.sql.MalformedDataMapCommandException
import org.apache.carbondata.core.constants.CarbonCommonConstants
import org.apache.carbondata.core.datamap.DataMapStoreManager
+import org.apache.carbondata.core.datastore.compression.CompressorFactory
import org.apache.carbondata.core.metadata.schema.datamap.DataMapClassProvider.MV
-import org.apache.carbondata.core.metadata.schema.table.{CarbonTable, DataMapSchema}
+import org.apache.carbondata.core.metadata.schema.table.CarbonTable
/**
* Utility class for keeping all the utility methods common for pre-aggregate and mv datamap
@@ -48,10 +48,14 @@ object DataMapUtil {
parentcol.equals(fieldRelationMap(col).
columnTableRelationList.get(0).parentColumnName))
.map(cols => neworder :+= cols.column))
- tableProperties.put(CarbonCommonConstants.SORT_COLUMNS, neworder.mkString(","))
- tableProperties.put("sort_scope", parentTable.getTableInfo.getFactTable.
- getTableProperties.asScala.getOrElse("sort_scope", CarbonCommonConstants
- .LOAD_SORT_SCOPE_DEFAULT))
+ if (neworder.nonEmpty) {
+ tableProperties.put(CarbonCommonConstants.SORT_COLUMNS, neworder.mkString(","))
+ }
+ val sort_scope = parentTable.getTableInfo.getFactTable.getTableProperties.asScala
+ .get("sort_scope")
+ if (sort_scope.isDefined) {
+ tableProperties.put("sort_scope", sort_scope.get)
+ }
tableProperties
.put(CarbonCommonConstants.TABLE_BLOCKSIZE, parentTable.getBlockSizeInMB.toString)
tableProperties.put(CarbonCommonConstants.FLAT_FOLDER,
@@ -98,6 +102,12 @@ object DataMapUtil {
tableProperties.put(CarbonCommonConstants.LONG_STRING_COLUMNS,
newLongStringColumn.mkString(","))
}
+ // inherit compressor property
+ tableProperties
+ .put(CarbonCommonConstants.COMPRESSOR,
+ parentTable.getTableInfo.getFactTable.getTableProperties.asScala
+ .getOrElse(CarbonCommonConstants.COMPRESSOR,
+ CompressorFactory.getInstance().getCompressor.getName))
// inherit the local dictionary properties of main parent table
tableProperties
diff --git a/integration/spark2/src/test/scala/org/apache/carbondata/spark/testsuite/booleantype/BooleanDataTypesLoadTest.scala b/integration/spark2/src/test/scala/org/apache/carbondata/spark/testsuite/booleantype/BooleanDataTypesLoadTest.scala
index 4050fd8..93255fa 100644
--- a/integration/spark2/src/test/scala/org/apache/carbondata/spark/testsuite/booleantype/BooleanDataTypesLoadTest.scala
+++ b/integration/spark2/src/test/scala/org/apache/carbondata/spark/testsuite/booleantype/BooleanDataTypesLoadTest.scala
@@ -247,7 +247,7 @@ class BooleanDataTypesLoadTest extends QueryTest with BeforeAndAfterEach with Be
| booleanField2 BOOLEAN
| )
| STORED BY 'carbondata'
- | TBLPROPERTIES('sort_columns'='','DICTIONARY_EXCLUDE'='charField','TABLE_BLOCKSIZE'='512','NO_INVERTED_INDEX'='charField', 'SORT_SCOPE'='GLOBAL_SORT')
+ | TBLPROPERTIES('DICTIONARY_EXCLUDE'='charField','TABLE_BLOCKSIZE'='512','NO_INVERTED_INDEX'='charField', 'SORT_SCOPE'='GLOBAL_SORT')
""".stripMargin)
val storeLocation = s"$rootPath/integration/spark2/src/test/resources/bool/supportBoolean.csv"
@@ -337,7 +337,7 @@ class BooleanDataTypesLoadTest extends QueryTest with BeforeAndAfterEach with Be
| complexData ARRAY<STRING>
| )
| STORED BY 'carbondata'
- | TBLPROPERTIES('sort_columns'='','DICTIONARY_EXCLUDE'='charField','TABLE_BLOCKSIZE'='512','NO_INVERTED_INDEX'='charField', 'SORT_SCOPE'='GLOBAL_SORT')
+ | TBLPROPERTIES('DICTIONARY_EXCLUDE'='charField','TABLE_BLOCKSIZE'='512','NO_INVERTED_INDEX'='charField', 'SORT_SCOPE'='GLOBAL_SORT')
""".stripMargin)
val storeLocation = s"$rootPath/integration/spark2/src/test/resources/bool/supportBooleanWithFileHeader.csv"