You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ra...@apache.org on 2019/05/16 19:05:43 UTC

[carbondata] 10/22: [HOTFIX] support compact segments with different sort_columns

This is an automated email from the ASF dual-hosted git repository.

ravipesala pushed a commit to branch branch-1.5
in repository https://gitbox.apache.org/repos/asf/carbondata.git

commit 9f23d2c1aeabbae0d9b53899e2f91d3ccd8888b9
Author: QiangCai <qi...@qq.com>
AuthorDate: Thu Apr 25 19:08:49 2019 +0800

    [HOTFIX] support compact segments with different sort_columns
    
    This closes #3190
---
 .../core/scan/executor/util/RestructureUtil.java   |  2 +-
 .../merger/CarbonCompactionExecutor.java           |  3 +-
 .../processing/merger/CarbonCompactionUtil.java    | 39 ++++++++++++++++++++--
 3 files changed, 38 insertions(+), 6 deletions(-)

diff --git a/core/src/main/java/org/apache/carbondata/core/scan/executor/util/RestructureUtil.java b/core/src/main/java/org/apache/carbondata/core/scan/executor/util/RestructureUtil.java
index 11b7372..0f93227 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/executor/util/RestructureUtil.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/executor/util/RestructureUtil.java
@@ -160,7 +160,7 @@ public class RestructureUtil {
    * @param tableColumn
    * @return
    */
-  private static boolean isColumnMatches(boolean isTransactionalTable,
+  public static boolean isColumnMatches(boolean isTransactionalTable,
       CarbonColumn queryColumn, CarbonColumn tableColumn) {
     // If it is non transactional table just check the column names, no need to validate
     // column id as multiple sdk's output placed in a single folder doesn't have same
diff --git a/processing/src/main/java/org/apache/carbondata/processing/merger/CarbonCompactionExecutor.java b/processing/src/main/java/org/apache/carbondata/processing/merger/CarbonCompactionExecutor.java
index 5961cd7..619b45a 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/merger/CarbonCompactionExecutor.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/merger/CarbonCompactionExecutor.java
@@ -136,8 +136,7 @@ public class CarbonCompactionExecutor {
       Set<String> taskBlockListMapping = taskBlockInfo.getTaskSet();
       // Check if block needs sorting or not
       boolean sortingRequired =
-          CarbonCompactionUtil.isRestructured(listMetadata, carbonTable.getTableLastUpdatedTime())
-              || !CarbonCompactionUtil.isSorted(listMetadata.get(0));
+          !CarbonCompactionUtil.isSortedByCurrentSortColumns(carbonTable, listMetadata.get(0));
       for (String task : taskBlockListMapping) {
         tableBlockInfos = taskBlockInfo.getTableBlockInfoList(task);
         // during update there may be a chance that the cardinality may change within the segment
diff --git a/processing/src/main/java/org/apache/carbondata/processing/merger/CarbonCompactionUtil.java b/processing/src/main/java/org/apache/carbondata/processing/merger/CarbonCompactionUtil.java
index efd2559..c4b6843 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/merger/CarbonCompactionUtil.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/merger/CarbonCompactionUtil.java
@@ -35,6 +35,7 @@ import org.apache.carbondata.core.metadata.schema.table.CarbonTable;
 import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension;
 import org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure;
 import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema;
+import org.apache.carbondata.core.scan.executor.util.RestructureUtil;
 import org.apache.carbondata.core.util.CarbonUtil;
 import org.apache.carbondata.core.util.path.CarbonTablePath;
 
@@ -464,12 +465,44 @@ public class CarbonCompactionUtil {
    * Returns if the DataFileFooter containing carbondata file contains
    * sorted data or not.
    *
+   * @param table
    * @param footer
    * @return
-   * @throws IOException
    */
-  public static boolean isSorted(DataFileFooter footer) throws IOException {
-    return footer.isSorted();
+  public static boolean isSortedByCurrentSortColumns(CarbonTable table, DataFileFooter footer) {
+    if (footer.isSorted()) {
+      // When sort_columns is modified, it will be consider as no_sort also.
+      List<CarbonDimension> sortColumnsOfSegment = new ArrayList<>();
+      for (ColumnSchema column : footer.getColumnInTable()) {
+        if (column.isDimensionColumn() && column.isSortColumn()) {
+          sortColumnsOfSegment.add(new CarbonDimension(column, -1, -1, -1));
+        }
+      }
+      if (sortColumnsOfSegment.size() < table.getNumberOfSortColumns()) {
+        return false;
+      }
+      List<CarbonDimension> sortColumnsOfTable = new ArrayList<>();
+      for (CarbonDimension dimension : table.getDimensions()) {
+        if (dimension.isSortColumn()) {
+          sortColumnsOfTable.add(dimension);
+        }
+      }
+      int sortColumnNums = sortColumnsOfTable.size();
+      if (sortColumnsOfSegment.size() < sortColumnNums) {
+        return false;
+      }
+      // compare sort_columns
+      for (int i = 0; i < sortColumnNums; i++) {
+        if (!RestructureUtil
+            .isColumnMatches(table.isTransactionalTable(), sortColumnsOfTable.get(i),
+                sortColumnsOfSegment.get(i))) {
+          return false;
+        }
+      }
+      return true;
+    } else {
+      return false;
+    }
   }
 
 }