You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ma...@apache.org on 2018/10/24 09:17:39 UTC
carbondata git commit: [CARBONDATA-2998] Refresh column schema for
old store(before V3) for SORT_COLUMNS option
Repository: carbondata
Updated Branches:
refs/heads/master c429cee16 -> 278d17178
[CARBONDATA-2998] Refresh column schema for old store(before V3) for SORT_COLUMNS option
Problem:
For old store, store before V3, SORT_COLUMN option is not set in ColumnSchema, but considered as SORT_COLUMNS. So, while refreshing the
table it will try to read from the thrift and make it as no sort column in ColumnSchema as it is not set before.
Solution:
While refreshing the table, check for the SORT_COLUMN property in the table properties and if nothing is set, then by default take all the
dimension columns as SORT_COLUMNS.
This closes #2806
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/278d1717
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/278d1717
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/278d1717
Branch: refs/heads/master
Commit: 278d1717880541a052aa4a5ee96ba73423d650b1
Parents: c429cee
Author: dhatchayani <dh...@gmail.com>
Authored: Tue Oct 9 17:34:42 2018 +0530
Committer: manishgupta88 <to...@gmail.com>
Committed: Wed Oct 24 14:52:25 2018 +0530
----------------------------------------------------------------------
.../management/RefreshCarbonTableCommand.scala | 31 ++++++++++++++++++++
.../merger/CompactionResultSortProcessor.java | 6 ++--
2 files changed, 34 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/carbondata/blob/278d1717/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/RefreshCarbonTableCommand.scala
----------------------------------------------------------------------
diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/RefreshCarbonTableCommand.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/RefreshCarbonTableCommand.scala
index 39e85ba..c129194 100644
--- a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/RefreshCarbonTableCommand.scala
+++ b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/RefreshCarbonTableCommand.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.execution.command.management
import java.util
import scala.collection.JavaConverters._
+import scala.collection.mutable
import org.apache.spark.sql._
import org.apache.spark.sql.catalyst.TableIdentifier
@@ -75,6 +76,9 @@ case class RefreshCarbonTableCommand(
if (FileFactory.isFileExist(schemaFilePath, FileFactory.getFileType(schemaFilePath))) {
// read TableInfo
val tableInfo = SchemaReader.getTableInfo(identifier)
+ // refresh the column schema in case of store before V3
+ refreshColumnSchema(tableInfo)
+
// 2.2 register the table with the hive check if the table being registered has
// aggregate table then do the below steps
// 2.2.1 validate that all the aggregate tables are copied at the store location.
@@ -119,6 +123,33 @@ case class RefreshCarbonTableCommand(
}
/**
+ * Refresh the sort_column flag in column schema in case of old store. Before V3, sort_column
+ * option is not set but by default all dimension columns should be treated
+ * as sort columns if SORT_COLUMNS property is not defined in tblproperties
+ *
+ * @param tableInfo
+ */
+ def refreshColumnSchema(tableInfo: TableInfo): Unit = {
+ val tableProps: mutable.Map[String, String] = tableInfo.getFactTable.getTableProperties.asScala
+ val sortColumns = tableProps.get(CarbonCommonConstants.SORT_COLUMNS)
+ sortColumns match {
+ case Some(sortColumn) =>
+ // don't do anything
+ case None =>
+ // iterate over all the columns and make all the dimensions as sort columns true
+ // check for the complex data types parent and child columns to
+ // avoid adding them in SORT_COLUMNS
+ tableInfo.getFactTable.getListOfColumns.asScala collect
+ ({
+ case columnSchema if columnSchema.isDimensionColumn &&
+ !columnSchema.getDataType.isComplexType &&
+ columnSchema.getSchemaOrdinal != -1 =>
+ columnSchema.setSortColumn(true)
+ })
+ }
+ }
+
+ /**
* the method prepare the data type for raw column
*
* @param column
http://git-wip-us.apache.org/repos/asf/carbondata/blob/278d1717/processing/src/main/java/org/apache/carbondata/processing/merger/CompactionResultSortProcessor.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/apache/carbondata/processing/merger/CompactionResultSortProcessor.java b/processing/src/main/java/org/apache/carbondata/processing/merger/CompactionResultSortProcessor.java
index e0a30da..8d28d45 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/merger/CompactionResultSortProcessor.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/merger/CompactionResultSortProcessor.java
@@ -415,13 +415,13 @@ public class CompactionResultSortProcessor extends AbstractResultProcessor {
isVarcharDimMapping = new boolean[dimensions.size()];
int i = 0;
for (CarbonDimension dimension : dimensions) {
+ if (dimension.isSortColumn()) {
+ sortColumnMapping[i] = true;
+ }
if (CarbonUtil.hasEncoding(dimension.getEncoder(), Encoding.DICTIONARY)) {
i++;
continue;
}
- if (dimension.isSortColumn()) {
- sortColumnMapping[i] = true;
- }
noDictionaryColMapping[i] = true;
if (dimension.getColumnSchema().getDataType() == DataTypes.VARCHAR) {
isVarcharDimMapping[i] = true;