You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ja...@apache.org on 2018/09/25 16:08:20 UTC

carbondata git commit: [HOTFIX] Fixed data loading failure

Repository: carbondata
Updated Branches:
  refs/heads/master 6a2a94d05 -> d7913b1fd


[HOTFIX] Fixed data loading failure

Problem:

1.Data loading is failing with ArrayIndexOutOfBoundException when all columns are not considered in sort columns. this is because while filling the sort columns details in CarbonDataProcessorUtil.getNoDictSortColMapping it is not checking whether column is present on sort column or not.
2.Some time in CI testcases are failing because of Negative array exception , still it is not clear from the code the main root cause of failure, this failure may happen in customer actual deployment .
Solution:

1.Add only those columns which is present in sort columns
2.Currently disabling unsafe default value to false

This closes #2754


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/d7913b1f
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/d7913b1f
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/d7913b1f

Branch: refs/heads/master
Commit: d7913b1fdbfb9eb080f4f14f2ebff984448710f4
Parents: 6a2a94d
Author: kumarvishal09 <ku...@gmail.com>
Authored: Mon Sep 24 19:34:59 2018 +0530
Committer: Jacky Li <ja...@qq.com>
Committed: Wed Sep 26 00:08:00 2018 +0800

----------------------------------------------------------------------
 .../core/constants/CarbonCommonConstants.java   |  2 +-
 ...feVariableLengthDimensionDataChunkStore.java |  2 +-
 .../load/DataLoadProcessBuilderOnSpark.scala    |  3 +-
 .../sort/sortdata/NewRowComparator.java         | 48 +++++++-------------
 .../processing/sort/sortdata/SortDataRows.java  |  7 ++-
 .../util/CarbonDataProcessorUtil.java           |  6 +--
 6 files changed, 26 insertions(+), 42 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/d7913b1f/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
index 21f1f34..faad0dc 100644
--- a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
+++ b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
@@ -1298,7 +1298,7 @@ public final class CarbonCommonConstants {
   /**
    * default property of unsafe processing
    */
-  public static final String ENABLE_UNSAFE_IN_QUERY_EXECUTION_DEFAULTVALUE = "true";
+  public static final String ENABLE_UNSAFE_IN_QUERY_EXECUTION_DEFAULTVALUE = "false";
 
   /**
    * whether to prefetch data while loading.

http://git-wip-us.apache.org/repos/asf/carbondata/blob/d7913b1f/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/safe/SafeVariableLengthDimensionDataChunkStore.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/safe/SafeVariableLengthDimensionDataChunkStore.java b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/safe/SafeVariableLengthDimensionDataChunkStore.java
index 196dc4c..8553506 100644
--- a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/safe/SafeVariableLengthDimensionDataChunkStore.java
+++ b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/safe/SafeVariableLengthDimensionDataChunkStore.java
@@ -139,7 +139,7 @@ public abstract class SafeVariableLengthDimensionDataChunkStore
       length = dataOffsets[rowId + 1] - (currentDataOffset + getLengthSize());
     } else {
       // for last record
-      length = (short) (this.data.length - currentDataOffset);
+      length = this.data.length - currentDataOffset;
     }
     DataType dt = vector.getType();
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/d7913b1f/integration/spark-common/src/main/scala/org/apache/carbondata/spark/load/DataLoadProcessBuilderOnSpark.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/load/DataLoadProcessBuilderOnSpark.scala b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/load/DataLoadProcessBuilderOnSpark.scala
index e810829..2e74a94 100644
--- a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/load/DataLoadProcessBuilderOnSpark.scala
+++ b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/load/DataLoadProcessBuilderOnSpark.scala
@@ -86,8 +86,7 @@ object DataLoadProcessBuilderOnSpark {
     val sortParameters = SortParameters.createSortParameters(configuration)
     val rowComparator: Comparator[Array[AnyRef]] =
       if (sortParameters.getNoDictionaryCount > 0) {
-        new NewRowComparator(sortParameters.getNoDictionaryDimnesionColumn,
-          sortParameters.getNoDictionarySortColumn,
+        new NewRowComparator(sortParameters.getNoDictionarySortColumn,
           sortParameters.getNoDictDataType)
       } else {
         new NewRowComparatorForNormalDims(sortParameters.getDimColCount)

http://git-wip-us.apache.org/repos/asf/carbondata/blob/d7913b1f/processing/src/main/java/org/apache/carbondata/processing/sort/sortdata/NewRowComparator.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/apache/carbondata/processing/sort/sortdata/NewRowComparator.java b/processing/src/main/java/org/apache/carbondata/processing/sort/sortdata/NewRowComparator.java
index f213764..09b3a59 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/sort/sortdata/NewRowComparator.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/sort/sortdata/NewRowComparator.java
@@ -28,21 +28,12 @@ import org.apache.carbondata.core.util.comparator.SerializableComparator;
 public class NewRowComparator implements Comparator<Object[]>, Serializable {
   private static final long serialVersionUID = -1739874611112709436L;
 
-  /**
-   * mapping of dictionary dimensions and no dictionary of sort_column.
-   */
-  private boolean[] noDicDimColMapping;
-
   private DataType[] noDicDataTypes;
 
   private boolean[] noDicSortColumnMapping;
 
-  /**
-   * @param noDicDimColMapping
-   */
-  public NewRowComparator(boolean[] noDicDimColMapping, boolean[] noDicSortColumnMapping,
+  public NewRowComparator(boolean[] noDicSortColumnMapping,
       DataType[] noDicDataTypes) {
-    this.noDicDimColMapping = noDicDimColMapping;
     this.noDicSortColumnMapping = noDicSortColumnMapping;
     this.noDicDataTypes = noDicDataTypes;
   }
@@ -55,27 +46,23 @@ public class NewRowComparator implements Comparator<Object[]>, Serializable {
     int index = 0;
     int dataTypeIdx = 0;
     int noDicSortIdx = 0;
+    for (int i = 0; i < noDicSortColumnMapping.length; i++) {
+      if (noDicSortColumnMapping[noDicSortIdx++]) {
+        if (DataTypeUtil.isPrimitiveColumn(noDicDataTypes[dataTypeIdx])) {
+          // use data types based comparator for the no dictionary measure columns
+          SerializableComparator comparator = org.apache.carbondata.core.util.comparator.Comparator
+              .getComparator(noDicDataTypes[dataTypeIdx]);
+          int difference = comparator.compare(rowA[index], rowB[index]);
+          if (difference != 0) {
+            return difference;
+          }
+        } else {
+          byte[] byteArr1 = (byte[]) rowA[index];
+          byte[] byteArr2 = (byte[]) rowB[index];
 
-    for (int i = 0; i < noDicDimColMapping.length; i++) {
-      if (noDicDimColMapping[i]) {
-        if (noDicSortColumnMapping[noDicSortIdx++]) {
-          if (DataTypeUtil.isPrimitiveColumn(noDicDataTypes[dataTypeIdx])) {
-            // use data types based comparator for the no dictionary measure columns
-            SerializableComparator comparator =
-                org.apache.carbondata.core.util.comparator.Comparator
-                    .getComparator(noDicDataTypes[dataTypeIdx]);
-            int difference = comparator.compare(rowA[index], rowB[index]);
-            if (difference != 0) {
-              return difference;
-            }
-          } else {
-            byte[] byteArr1 = (byte[]) rowA[index];
-            byte[] byteArr2 = (byte[]) rowB[index];
-
-            int difference = UnsafeComparer.INSTANCE.compareTo(byteArr1, byteArr2);
-            if (difference != 0) {
-              return difference;
-            }
+          int difference = UnsafeComparer.INSTANCE.compareTo(byteArr1, byteArr2);
+          if (difference != 0) {
+            return difference;
           }
         }
         dataTypeIdx++;
@@ -88,7 +75,6 @@ public class NewRowComparator implements Comparator<Object[]>, Serializable {
           return diff;
         }
       }
-
       index++;
     }
     return diff;

http://git-wip-us.apache.org/repos/asf/carbondata/blob/d7913b1f/processing/src/main/java/org/apache/carbondata/processing/sort/sortdata/SortDataRows.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/apache/carbondata/processing/sort/sortdata/SortDataRows.java b/processing/src/main/java/org/apache/carbondata/processing/sort/sortdata/SortDataRows.java
index 40ff2c9..637741c 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/sort/sortdata/SortDataRows.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/sort/sortdata/SortDataRows.java
@@ -204,8 +204,7 @@ public class SortDataRows {
       toSort = new Object[entryCount][];
       System.arraycopy(recordHolderList, 0, toSort, 0, entryCount);
       if (parameters.getNumberOfNoDictSortColumns() > 0) {
-        Arrays.sort(toSort, new NewRowComparator(parameters.getNoDictionaryDimnesionColumn(),
-            parameters.getNoDictionarySortColumn(),
+        Arrays.sort(toSort, new NewRowComparator(parameters.getNoDictionarySortColumn(),
             parameters.getNoDictDataType()));
       } else {
         Arrays.sort(toSort, new NewRowComparatorForNormalDims(parameters.getNumberOfSortColumns()));
@@ -318,8 +317,8 @@ public class SortDataRows {
         long startTime = System.currentTimeMillis();
         if (parameters.getNumberOfNoDictSortColumns() > 0) {
           Arrays.sort(recordHolderArray,
-              new NewRowComparator(parameters.getNoDictionaryDimnesionColumn(),
-                  parameters.getNoDictionarySortColumn(), parameters.getNoDictDataType()));
+              new NewRowComparator(parameters.getNoDictionarySortColumn(),
+                  parameters.getNoDictDataType()));
         } else {
           Arrays.sort(recordHolderArray,
               new NewRowComparatorForNormalDims(parameters.getNumberOfSortColumns()));

http://git-wip-us.apache.org/repos/asf/carbondata/blob/d7913b1f/processing/src/main/java/org/apache/carbondata/processing/util/CarbonDataProcessorUtil.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/apache/carbondata/processing/util/CarbonDataProcessorUtil.java b/processing/src/main/java/org/apache/carbondata/processing/util/CarbonDataProcessorUtil.java
index 3ba1e1d..525f7ee 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/util/CarbonDataProcessorUtil.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/util/CarbonDataProcessorUtil.java
@@ -439,7 +439,7 @@ public final class CarbonDataProcessorUtil {
     List<CarbonDimension> dimensions = carbonTable.getDimensionByTableName(tableName);
     List<DataType> type = new ArrayList<>();
     for (int i = 0; i < dimensions.size(); i++) {
-      if (!dimensions.get(i).hasEncoding(Encoding.DICTIONARY)) {
+      if (dimensions.get(i).isSortColumn() && !dimensions.get(i).hasEncoding(Encoding.DICTIONARY)) {
         type.add(dimensions.get(i).getDataType());
       }
     }
@@ -458,8 +458,8 @@ public final class CarbonDataProcessorUtil {
     List<CarbonDimension> dimensions = carbonTable.getDimensionByTableName(tableName);
     List<Boolean> noDicSortColMap = new ArrayList<>();
     for (int i = 0; i < dimensions.size(); i++) {
-      if (!dimensions.get(i).hasEncoding(Encoding.DICTIONARY)) {
-        if (dimensions.get(i).isSortColumn()) {
+      if (dimensions.get(i).isSortColumn()) {
+        if (!dimensions.get(i).hasEncoding(Encoding.DICTIONARY)) {
           noDicSortColMap.add(true);
         } else {
           noDicSortColMap.add(false);