You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ma...@apache.org on 2018/10/24 09:17:39 UTC

carbondata git commit: [CARBONDATA-2998] Refresh column schema for old store(before V3) for SORT_COLUMNS option

Repository: carbondata
Updated Branches:
  refs/heads/master c429cee16 -> 278d17178


[CARBONDATA-2998] Refresh column schema for old store(before V3) for SORT_COLUMNS option

Problem:
For old store, store before V3, SORT_COLUMN option is not set in ColumnSchema, but considered as SORT_COLUMNS. So, while refreshing the
table it will try to read from the thrift and make it as no sort column in ColumnSchema as it is not set before.

Solution:
While refreshing the table, check for the SORT_COLUMN property in the table properties and if nothing is set, then by default take all the
dimension columns as SORT_COLUMNS.

This closes #2806


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/278d1717
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/278d1717
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/278d1717

Branch: refs/heads/master
Commit: 278d1717880541a052aa4a5ee96ba73423d650b1
Parents: c429cee
Author: dhatchayani <dh...@gmail.com>
Authored: Tue Oct 9 17:34:42 2018 +0530
Committer: manishgupta88 <to...@gmail.com>
Committed: Wed Oct 24 14:52:25 2018 +0530

----------------------------------------------------------------------
 .../management/RefreshCarbonTableCommand.scala  | 31 ++++++++++++++++++++
 .../merger/CompactionResultSortProcessor.java   |  6 ++--
 2 files changed, 34 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/278d1717/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/RefreshCarbonTableCommand.scala
----------------------------------------------------------------------
diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/RefreshCarbonTableCommand.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/RefreshCarbonTableCommand.scala
index 39e85ba..c129194 100644
--- a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/RefreshCarbonTableCommand.scala
+++ b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/RefreshCarbonTableCommand.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.execution.command.management
 import java.util
 
 import scala.collection.JavaConverters._
+import scala.collection.mutable
 
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.TableIdentifier
@@ -75,6 +76,9 @@ case class RefreshCarbonTableCommand(
       if (FileFactory.isFileExist(schemaFilePath, FileFactory.getFileType(schemaFilePath))) {
         // read TableInfo
         val tableInfo = SchemaReader.getTableInfo(identifier)
+        // refresh the column schema in case of store before V3
+        refreshColumnSchema(tableInfo)
+
         // 2.2 register the table with the hive check if the table being registered has
         // aggregate table then do the below steps
         // 2.2.1 validate that all the aggregate tables are copied at the store location.
@@ -119,6 +123,33 @@ case class RefreshCarbonTableCommand(
   }
 
   /**
+   * Refresh the sort_column flag in column schema in case of old store. Before V3, sort_column
+   * option is not set but by default all dimension columns should be treated
+   * as sort columns if SORT_COLUMNS property is not defined in tblproperties
+   *
+   * @param tableInfo
+   */
+  def refreshColumnSchema(tableInfo: TableInfo): Unit = {
+    val tableProps: mutable.Map[String, String] = tableInfo.getFactTable.getTableProperties.asScala
+    val sortColumns = tableProps.get(CarbonCommonConstants.SORT_COLUMNS)
+    sortColumns match {
+      case Some(sortColumn) =>
+      // don't do anything
+      case None =>
+        // iterate over all the columns and make all the dimensions as sort columns true
+        // check for the complex data types parent and child columns to
+        // avoid adding them in SORT_COLUMNS
+        tableInfo.getFactTable.getListOfColumns.asScala collect
+        ({
+          case columnSchema if columnSchema.isDimensionColumn &&
+                               !columnSchema.getDataType.isComplexType &&
+                               columnSchema.getSchemaOrdinal != -1 =>
+            columnSchema.setSortColumn(true)
+        })
+    }
+  }
+
+  /**
    * the method prepare the data type for raw column
    *
    * @param column

http://git-wip-us.apache.org/repos/asf/carbondata/blob/278d1717/processing/src/main/java/org/apache/carbondata/processing/merger/CompactionResultSortProcessor.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/apache/carbondata/processing/merger/CompactionResultSortProcessor.java b/processing/src/main/java/org/apache/carbondata/processing/merger/CompactionResultSortProcessor.java
index e0a30da..8d28d45 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/merger/CompactionResultSortProcessor.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/merger/CompactionResultSortProcessor.java
@@ -415,13 +415,13 @@ public class CompactionResultSortProcessor extends AbstractResultProcessor {
     isVarcharDimMapping = new boolean[dimensions.size()];
     int i = 0;
     for (CarbonDimension dimension : dimensions) {
+      if (dimension.isSortColumn()) {
+        sortColumnMapping[i] = true;
+      }
       if (CarbonUtil.hasEncoding(dimension.getEncoder(), Encoding.DICTIONARY)) {
         i++;
         continue;
       }
-      if (dimension.isSortColumn()) {
-        sortColumnMapping[i] = true;
-      }
       noDictionaryColMapping[i] = true;
       if (dimension.getColumnSchema().getDataType() == DataTypes.VARCHAR) {
         isVarcharDimMapping[i] = true;