You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ra...@apache.org on 2019/04/17 07:37:10 UTC

[carbondata] branch master updated: [CARBONDATA-3353 ]Fixed MinMax Based Pruning for Measure column in case of Legacy store

This is an automated email from the ASF dual-hosted git repository.

ravipesala pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git


The following commit(s) were added to refs/heads/master by this push:
     new a803304  [CARBONDATA-3353 ]Fixed MinMax Based Pruning for Measure column in case of Legacy store
a803304 is described below

commit a803304fc5c39dadf23c66c1b5412952ade1c9eb
Author: Indhumathi27 <in...@gmail.com>
AuthorDate: Fri Apr 12 12:25:38 2019 +0530

    [CARBONDATA-3353 ]Fixed MinMax Based Pruning for Measure column in case of Legacy store
    
    Why this PR needed?
    
    Problem:
    For table created and loaded with legacy store having a measure column, while building the page min max,
    min is written as max and viceversa, so blocklet level minmax is wrong. With current version, when we query with filter on measure column, measure filter pruning is skipping some blocks and giving wrong results.
    
    Solution:
    Skip MinMax based pruning in case of legacy store for measure column.
    
    This closes #3176
---
 .../indexstore/blockletindex/BlockDataMap.java     | 15 +------
 .../scan/executor/impl/AbstractQueryExecutor.java  | 15 +++----
 .../carbondata/core/scan/filter/FilterUtil.java    |  8 ++++
 .../filter/executer/IncludeFilterExecuterImpl.java | 11 +++--
 .../RowLevelRangeGrtThanFiterExecuterImpl.java     | 10 +++--
 ...LevelRangeGrtrThanEquaToFilterExecuterImpl.java | 10 +++--
 ...wLevelRangeLessThanEqualFilterExecuterImpl.java | 10 +++--
 .../RowLevelRangeLessThanFilterExecuterImpl.java   | 10 +++--
 .../apache/carbondata/core/util/CarbonUtil.java    | 47 ----------------------
 .../carbondata/core/util/CarbonUtilTest.java       | 46 ---------------------
 10 files changed, 51 insertions(+), 131 deletions(-)

diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockDataMap.java b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockDataMap.java
index 5b2132c..1fc5831 100644
--- a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockDataMap.java
+++ b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockDataMap.java
@@ -67,7 +67,6 @@ import org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf;
 import org.apache.carbondata.core.scan.model.QueryModel;
 import org.apache.carbondata.core.util.BlockletDataMapUtil;
 import org.apache.carbondata.core.util.ByteUtil;
-import org.apache.carbondata.core.util.CarbonUtil;
 import org.apache.carbondata.core.util.DataFileFooterConverter;
 import org.apache.carbondata.core.util.path.CarbonTablePath;
 
@@ -219,7 +218,7 @@ public class BlockDataMap extends CoarseGrainDataMap
     DataMapRowImpl summaryRow = null;
     CarbonRowSchema[] schema = getFileFooterEntrySchema();
     boolean[] minMaxFlag = new boolean[segmentProperties.getColumnsValueSize().length];
-    Arrays.fill(minMaxFlag, true);
+    FilterUtil.setMinMaxFlagForLegacyStore(minMaxFlag, segmentProperties);
     long totalRowCount = 0;
     for (DataFileFooter fileFooter : indexInfo) {
       TableBlockInfo blockInfo = fileFooter.getBlockInfo().getTableBlockInfo();
@@ -232,19 +231,9 @@ public class BlockDataMap extends CoarseGrainDataMap
       if (null != blockMetaInfo) {
         BlockletIndex blockletIndex = fileFooter.getBlockletIndex();
         BlockletMinMaxIndex minMaxIndex = blockletIndex.getMinMaxIndex();
-        byte[][] minValues =
-            BlockletDataMapUtil.updateMinValues(segmentProperties, minMaxIndex.getMinValues());
-        byte[][] maxValues =
-            BlockletDataMapUtil.updateMaxValues(segmentProperties, minMaxIndex.getMaxValues());
-        // update min max values in case of old store for measures as measure min/max in
-        // old stores in written opposite
-        byte[][] updatedMinValues =
-            CarbonUtil.updateMinMaxValues(fileFooter, maxValues, minValues, true);
-        byte[][] updatedMaxValues =
-            CarbonUtil.updateMinMaxValues(fileFooter, maxValues, minValues, false);
         summaryRow = loadToUnsafeBlock(schema, taskSummarySchema, fileFooter, segmentProperties,
             getMinMaxCacheColumns(), blockInfo.getFilePath(), summaryRow,
-            blockMetaInfo, updatedMinValues, updatedMaxValues, minMaxFlag);
+            blockMetaInfo, minMaxIndex.getMinValues(), minMaxIndex.getMaxValues(), minMaxFlag);
         totalRowCount += fileFooter.getNumberOfRows();
       }
     }
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java b/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java
index f81a3dc..b15bdb5 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java
@@ -238,6 +238,12 @@ public abstract class AbstractQueryExecutor<E> implements QueryExecutor<E> {
           LOGGER.warn("Skipping Direct Vector Filling as it is not Supported "
               + "for Legacy store prior to V3 store");
           queryModel.setDirectVectorFill(false);
+          // Skip minmax based pruning for measure column in case of legacy store
+          boolean[] minMaxFlag = new boolean[segmentProperties.getColumnsValueSize().length];
+          FilterUtil.setMinMaxFlagForLegacyStore(minMaxFlag, segmentProperties);
+          for (BlockletInfo blockletInfo : fileFooter.getBlockletList()) {
+            blockletInfo.getBlockletIndex().getMinMaxIndex().setIsMinMaxSet(minMaxFlag);
+          }
         }
         readAndFillBlockletInfo(tableBlockInfos, blockInfo,
             blockletDetailInfo, fileFooter, segmentProperties);
@@ -386,15 +392,6 @@ public abstract class AbstractQueryExecutor<E> implements QueryExecutor<E> {
     byte[][] maxValues = blockletInfo.getBlockletIndex().getMinMaxIndex().getMaxValues();
     byte[][] minValues = blockletInfo.getBlockletIndex().getMinMaxIndex().getMinValues();
     if (blockletDetailInfo.isLegacyStore()) {
-      minValues = BlockletDataMapUtil.updateMinValues(segmentProperties,
-          blockletInfo.getBlockletIndex().getMinMaxIndex().getMinValues());
-      maxValues = BlockletDataMapUtil.updateMaxValues(segmentProperties,
-          blockletInfo.getBlockletIndex().getMinMaxIndex().getMaxValues());
-      // update min and max values in case of old store for measures as min and max is written
-      // opposite for measures in old store ( store <= 1.1 version)
-      byte[][] tempMaxValues = maxValues;
-      maxValues = CarbonUtil.updateMinMaxValues(fileFooter, maxValues, minValues, false);
-      minValues = CarbonUtil.updateMinMaxValues(fileFooter, tempMaxValues, minValues, true);
       info.setDataBlockFromOldStore(true);
     }
     blockletInfo.getBlockletIndex().getMinMaxIndex().setMaxValues(maxValues);
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java
index 6cc13e2..9d8fe8d 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java
@@ -2326,4 +2326,12 @@ public final class FilterUtil {
     return defaultValue;
   }
 
+  public static void setMinMaxFlagForLegacyStore(boolean[] minMaxFlag,
+      SegmentProperties segmentProperties) {
+    int index = segmentProperties.getEachDimColumnValueSize().length + segmentProperties
+        .getEachComplexDimColumnValueSize().length;
+    Arrays.fill(minMaxFlag, 0, index, true);
+    Arrays.fill(minMaxFlag, index, minMaxFlag.length, false);
+  }
+
 }
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImpl.java
index 4668242..33a337b 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImpl.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImpl.java
@@ -524,10 +524,13 @@ public class IncludeFilterExecuterImpl implements FilterExecuter {
           isMinMaxSet[chunkIndex]);
       }
     } else if (isMeasurePresentInCurrentBlock) {
-      chunkIndex = msrColumnEvaluatorInfo.getColumnIndexInMinMaxByteArray();
-      isScanRequired = isScanRequired(blkMaxVal[chunkIndex], blkMinVal[chunkIndex],
-          msrColumnExecutorInfo.getFilterKeys(),
-          msrColumnEvaluatorInfo.getType());
+      if (isMinMaxSet[chunkIndex]) {
+        chunkIndex = msrColumnEvaluatorInfo.getColumnIndexInMinMaxByteArray();
+        isScanRequired = isScanRequired(blkMaxVal[chunkIndex], blkMinVal[chunkIndex],
+            msrColumnExecutorInfo.getFilterKeys(), msrColumnEvaluatorInfo.getType());
+      } else {
+        isScanRequired = true;
+      }
     }
 
     if (isScanRequired) {
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java
index d2c4b05..6b37e60 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java
@@ -122,9 +122,13 @@ public class RowLevelRangeGrtThanFiterExecuterImpl extends RowLevelFilterExecute
     byte[] maxValue = null;
     if (isMeasurePresentInCurrentBlock[0] || isDimensionPresentInCurrentBlock[0]) {
       if (isMeasurePresentInCurrentBlock[0]) {
-        maxValue = blockMaxValue[measureChunkIndex[0]];
-        isScanRequired =
-            isScanRequired(maxValue, msrFilterRangeValues, msrColEvalutorInfoList.get(0).getType());
+        if (isMinMaxSet[measureChunkIndex[0]]) {
+          maxValue = blockMaxValue[measureChunkIndex[0]];
+          isScanRequired = isScanRequired(maxValue, msrFilterRangeValues,
+              msrColEvalutorInfoList.get(0).getType());
+        } else {
+          isScanRequired = true;
+        }
       } else {
         maxValue = blockMaxValue[dimensionChunkIndex[0]];
         DataType dataType = dimColEvaluatorInfoList.get(0).getDimension().getDataType();
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtrThanEquaToFilterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtrThanEquaToFilterExecuterImpl.java
index cf31033..24c2e3c 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtrThanEquaToFilterExecuterImpl.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtrThanEquaToFilterExecuterImpl.java
@@ -120,9 +120,13 @@ public class RowLevelRangeGrtrThanEquaToFilterExecuterImpl extends RowLevelFilte
     byte[] maxValue = null;
     if (isMeasurePresentInCurrentBlock[0] || isDimensionPresentInCurrentBlock[0]) {
       if (isMeasurePresentInCurrentBlock[0]) {
-        maxValue = blockMaxValue[measureChunkIndex[0]];
-        isScanRequired =
-            isScanRequired(maxValue, msrFilterRangeValues, msrColEvalutorInfoList.get(0).getType());
+        if (isMinMaxSet[measureChunkIndex[0]]) {
+          maxValue = blockMaxValue[measureChunkIndex[0]];
+          isScanRequired = isScanRequired(maxValue, msrFilterRangeValues,
+              msrColEvalutorInfoList.get(0).getType());
+        } else {
+          isScanRequired = true;
+        }
       } else {
         maxValue = blockMaxValue[dimensionChunkIndex[0]];
         DataType dataType = dimColEvaluatorInfoList.get(0).getDimension().getDataType();
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanEqualFilterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanEqualFilterExecuterImpl.java
index 8ea6e0d..0dbdf79 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanEqualFilterExecuterImpl.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanEqualFilterExecuterImpl.java
@@ -120,9 +120,13 @@ public class RowLevelRangeLessThanEqualFilterExecuterImpl extends RowLevelFilter
     boolean isScanRequired = false;
     if (isMeasurePresentInCurrentBlock[0] || isDimensionPresentInCurrentBlock[0]) {
       if (isMeasurePresentInCurrentBlock[0]) {
-        minValue = blockMinValue[measureChunkIndex[0]];
-        isScanRequired =
-            isScanRequired(minValue, msrFilterRangeValues, msrColEvalutorInfoList.get(0).getType());
+        if (isMinMaxSet[measureChunkIndex[0]]) {
+          minValue = blockMinValue[measureChunkIndex[0]];
+          isScanRequired = isScanRequired(minValue, msrFilterRangeValues,
+              msrColEvalutorInfoList.get(0).getType());
+        } else {
+          isScanRequired = true;
+        }
       } else {
         minValue = blockMinValue[dimensionChunkIndex[0]];
         DataType dataType = dimColEvaluatorInfoList.get(0).getDimension().getDataType();
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanFilterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanFilterExecuterImpl.java
index df1afc4..acd918a 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanFilterExecuterImpl.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanFilterExecuterImpl.java
@@ -120,9 +120,13 @@ public class RowLevelRangeLessThanFilterExecuterImpl extends RowLevelFilterExecu
     boolean isScanRequired = false;
     if (isMeasurePresentInCurrentBlock[0] || isDimensionPresentInCurrentBlock[0]) {
       if (isMeasurePresentInCurrentBlock[0]) {
-        minValue = blockMinValue[measureChunkIndex[0]];
-        isScanRequired =
-            isScanRequired(minValue, msrFilterRangeValues, msrColEvalutorInfoList.get(0).getType());
+        if (isMinMaxSet[measureChunkIndex[0]]) {
+          minValue = blockMinValue[measureChunkIndex[0]];
+          isScanRequired = isScanRequired(minValue, msrFilterRangeValues,
+              msrColEvalutorInfoList.get(0).getType());
+        } else {
+          isScanRequired = true;
+        }
       } else {
         minValue = blockMinValue[dimensionChunkIndex[0]];
         DataType dataType = dimColEvaluatorInfoList.get(0).getDimension().getDataType();
diff --git a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
index d9f69e3..a4af9cc 100644
--- a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
+++ b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
@@ -88,8 +88,6 @@ import org.apache.carbondata.core.statusmanager.LoadMetadataDetails;
 import org.apache.carbondata.core.statusmanager.SegmentStatus;
 import org.apache.carbondata.core.statusmanager.SegmentStatusManager;
 import org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager;
-import org.apache.carbondata.core.util.comparator.Comparator;
-import org.apache.carbondata.core.util.comparator.SerializableComparator;
 import org.apache.carbondata.core.util.path.CarbonTablePath;
 import org.apache.carbondata.format.BlockletHeader;
 import org.apache.carbondata.format.DataChunk2;
@@ -2826,51 +2824,6 @@ public final class CarbonUtil {
   }
 
   /**
-   * This method will be used to update the min and max values and this will be used in case of
-   * old store where min and max values for measures are written opposite
-   * (i.e max values in place of min and min in place of max values)
-   *
-   * @param dataFileFooter
-   * @param maxValues
-   * @param minValues
-   * @param isMinValueComparison
-   * @return
-   */
-  public static byte[][] updateMinMaxValues(DataFileFooter dataFileFooter, byte[][] maxValues,
-      byte[][] minValues, boolean isMinValueComparison) {
-    byte[][] updatedMinMaxValues = new byte[maxValues.length][];
-    if (isMinValueComparison) {
-      System.arraycopy(minValues, 0, updatedMinMaxValues, 0, minValues.length);
-    } else {
-      System.arraycopy(maxValues, 0, updatedMinMaxValues, 0, maxValues.length);
-    }
-    for (int i = 0; i < maxValues.length; i++) {
-      // update min and max values only for measures
-      if (!dataFileFooter.getColumnInTable().get(i).isDimensionColumn()) {
-        DataType dataType = dataFileFooter.getColumnInTable().get(i).getDataType();
-        SerializableComparator comparator = Comparator.getComparator(dataType);
-        int compare;
-        if (isMinValueComparison) {
-          compare = comparator
-              .compare(DataTypeUtil.getMeasureObjectFromDataType(maxValues[i], dataType),
-                  DataTypeUtil.getMeasureObjectFromDataType(minValues[i], dataType));
-          if (compare < 0) {
-            updatedMinMaxValues[i] = maxValues[i];
-          }
-        } else {
-          compare = comparator
-              .compare(DataTypeUtil.getMeasureObjectFromDataType(minValues[i], dataType),
-                  DataTypeUtil.getMeasureObjectFromDataType(maxValues[i], dataType));
-          if (compare > 0) {
-            updatedMinMaxValues[i] = minValues[i];
-          }
-        }
-      }
-    }
-    return updatedMinMaxValues;
-  }
-
-  /**
    * Generate the blockid as per the block path
    *
    * @param identifier
diff --git a/core/src/test/java/org/apache/carbondata/core/util/CarbonUtilTest.java b/core/src/test/java/org/apache/carbondata/core/util/CarbonUtilTest.java
index a82a8aa..a8d30b1 100644
--- a/core/src/test/java/org/apache/carbondata/core/util/CarbonUtilTest.java
+++ b/core/src/test/java/org/apache/carbondata/core/util/CarbonUtilTest.java
@@ -1011,52 +1011,6 @@ public class CarbonUtilTest {
     Assert.assertTrue(schemaString.length() > schema.length());
   }
 
-  @Test
-  public void testUpdateMinMaxValues() {
-    // create dimension and measure column schema
-    ColumnSchema dimensionColumnSchema = createColumnSchema(DataTypes.STRING, true);
-    ColumnSchema measureColumnSchema = createColumnSchema(DataTypes.DOUBLE, false);
-    List<ColumnSchema> columnSchemas = new ArrayList<>(2);
-    columnSchemas.add(dimensionColumnSchema);
-    columnSchemas.add(measureColumnSchema);
-    // create data file footer object
-    DataFileFooter fileFooter = new DataFileFooter();
-    fileFooter.setColumnInTable(columnSchemas);
-    // initialise the expected values
-    int expectedMaxValue = 5;
-    int expectedMinValue = 2;
-    double expectedMeasureMaxValue = 28.74;
-    double expectedMeasureMinValue = -21.46;
-    // initialise the minValues
-    byte[][] minValues = new byte[2][];
-    minValues[0] = new byte[] { 2 };
-    ByteBuffer buffer = ByteBuffer.allocate(8);
-    minValues[1] = (byte[]) buffer.putDouble(28.74).flip().array();
-    buffer = ByteBuffer.allocate(8);
-    // initialise the maxValues
-    byte[][] maxValues = new byte[2][];
-    maxValues[0] = new byte[] { 5 };
-    maxValues[1] = (byte[]) buffer.putDouble(-21.46).flip().array();
-    byte[][] updateMaxValues =
-        CarbonUtil.updateMinMaxValues(fileFooter, maxValues, minValues, false);
-    byte[][] updateMinValues =
-        CarbonUtil.updateMinMaxValues(fileFooter, maxValues, minValues, true);
-    // compare max values
-    assert (expectedMaxValue == ByteBuffer.wrap(updateMaxValues[0]).get());
-    assert (expectedMeasureMaxValue == ByteBuffer.wrap(updateMaxValues[1]).getDouble());
-
-    // compare min values
-    assert (expectedMinValue == ByteBuffer.wrap(updateMinValues[0]).get());
-    assert (expectedMeasureMinValue == ByteBuffer.wrap(updateMinValues[1]).getDouble());
-  }
-
-  private ColumnSchema createColumnSchema(DataType dataType, boolean isDimensionColumn) {
-    ColumnSchema columnSchema = new ColumnSchema();
-    columnSchema.setDataType(dataType);
-    columnSchema.setDimensionColumn(isDimensionColumn);
-    return columnSchema;
-  }
-
   private String generateString(int length) {
     StringBuilder builder = new StringBuilder();
     for (int i = 0; i < length; i++) {