You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ma...@apache.org on 2018/10/23 09:37:26 UTC

carbondata git commit: [CARBONDATA-3022] Refactor ColumnPageWrapper

Repository: carbondata
Updated Branches:
  refs/heads/master c7c83684b -> fa9a4eeeb


[CARBONDATA-3022] Refactor ColumnPageWrapper

Refactor ColumnPageWrapper for better filter query performance.
Removed unnecessary checks and loops

This closes #2808


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/fa9a4eee
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/fa9a4eee
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/fa9a4eee

Branch: refs/heads/master
Commit: fa9a4eeeb489b77b3040d54e7878bac93ccb12aa
Parents: c7c8368
Author: dhatchayani <dh...@gmail.com>
Authored: Wed Oct 10 13:18:01 2018 +0530
Committer: manishgupta88 <to...@gmail.com>
Committed: Tue Oct 23 15:12:21 2018 +0530

----------------------------------------------------------------------
 .../chunk/store/ColumnPageWrapper.java          | 126 +++++--------------
 .../core/scan/executor/util/QueryUtil.java      |  32 -----
 .../carbondata/core/util/DataTypeUtil.java      |  18 ---
 3 files changed, 32 insertions(+), 144 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/fa9a4eee/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java
index 627c75f..ba853f9 100644
--- a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java
+++ b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java
@@ -26,7 +26,6 @@ import org.apache.carbondata.core.datastore.chunk.DimensionColumnPage;
 import org.apache.carbondata.core.datastore.page.ColumnPage;
 import org.apache.carbondata.core.metadata.datatype.DataType;
 import org.apache.carbondata.core.metadata.datatype.DataTypes;
-import org.apache.carbondata.core.scan.executor.util.QueryUtil;
 import org.apache.carbondata.core.scan.result.vector.CarbonColumnVector;
 import org.apache.carbondata.core.scan.result.vector.CarbonDictionary;
 import org.apache.carbondata.core.scan.result.vector.ColumnVectorInfo;
@@ -83,31 +82,6 @@ public class ColumnPageWrapper implements DimensionColumnPage {
     return chunkIndex + 1;
   }
 
-  /**
-   * Fill the data to the vector
-   *
-   * @param rowId
-   * @param vector
-   * @param vectorRow
-   */
-  private void fillRow(int rowId, CarbonColumnVector vector, int vectorRow) {
-    if (columnPage.getNullBits().get(rowId)
-        && columnPage.getColumnSpec().getColumnType() == ColumnType.COMPLEX_PRIMITIVE) {
-      // if this row is null, return default null represent in byte array
-      byte[] value = CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY;
-      QueryUtil.putDataToVector(vector, value, vectorRow, value.length);
-    } else if (columnPage.getNullBits().get(rowId)) {
-      // if this row is null, return default null represent in byte array
-      byte[] value = CarbonCommonConstants.EMPTY_BYTE_ARRAY;
-      QueryUtil.putDataToVector(vector, value, vectorRow, value.length);
-    } else {
-      if (isExplicitSorted) {
-        rowId = invertedReverseIndex[rowId];
-      }
-      QueryUtil.putDataToVector(vector, getActualData(rowId, true), vectorRow);
-    }
-  }
-
   @Override
   public int fillVector(int[] filteredRowId, ColumnVectorInfo[] vectorInfo, int chunkIndex) {
     ColumnVectorInfo columnVectorInfo = vectorInfo[chunkIndex];
@@ -214,78 +188,42 @@ public class ColumnPageWrapper implements DimensionColumnPage {
     return null;
   }
 
-  private Object getActualData(int rowId, boolean isRowIdChanged) {
-    ColumnType columnType = columnPage.getColumnSpec().getColumnType();
-    DataType srcDataType = columnPage.getColumnSpec().getSchemaDataType();
-    DataType targetDataType = columnPage.getDataType();
-    if (null != localDictionary) {
-      return localDictionary
-          .getDictionaryValue(CarbonUtil.getSurrogateInternal(columnPage.getBytes(rowId), 0, 3));
-    } else if ((columnType == ColumnType.COMPLEX_PRIMITIVE && this.isAdaptiveEncoded()) || (
-        columnType == ColumnType.PLAIN_VALUE && DataTypeUtil.isPrimitiveColumn(srcDataType))) {
-      if (!isRowIdChanged && columnPage.getNullBits().get(rowId)
-          && columnType == ColumnType.COMPLEX_PRIMITIVE) {
-        // if this row is null, return default null represent in byte array
-        return CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY;
-      }
-      if (!isRowIdChanged && columnPage.getNullBits().get(rowId)) {
-        // if this row is null, return default null represent in byte array
-        return CarbonCommonConstants.EMPTY_BYTE_ARRAY;
+  /**
+   * Fill the data to the vector
+   *
+   * @param rowId
+   * @param vector
+   * @param vectorRow
+   */
+  private void fillRow(int rowId, CarbonColumnVector vector, int vectorRow) {
+    if (columnPage.getNullBits().get(rowId)) {
+      vector.putNull(vectorRow);
+    } else {
+      if (isExplicitSorted) {
+        rowId = invertedReverseIndex[rowId];
       }
-      if (srcDataType == DataTypes.DOUBLE || srcDataType == DataTypes.FLOAT) {
-        double doubleData = columnPage.getDouble(rowId);
-        if (srcDataType == DataTypes.FLOAT) {
-          return (float) doubleData;
-        } else {
-          return doubleData;
-        }
-      } else if (DataTypes.isDecimal(srcDataType)) {
-        throw new RuntimeException("unsupported type: " + srcDataType);
-      } else if ((srcDataType == DataTypes.BYTE) || (srcDataType == DataTypes.BOOLEAN) || (
-          srcDataType == DataTypes.SHORT) || (srcDataType == DataTypes.SHORT_INT) || (srcDataType
-          == DataTypes.INT) || (srcDataType == DataTypes.LONG) || (srcDataType
-          == DataTypes.TIMESTAMP)) {
-        long longData = columnPage.getLong(rowId);
-        if ((srcDataType == DataTypes.BYTE)) {
-          return (byte) longData;
-        } else if (srcDataType == DataTypes.BOOLEAN) {
-          byte out = (byte) longData;
-          return ByteUtil.toBoolean(out);
-        } else if (srcDataType == DataTypes.SHORT) {
-          return (short) longData;
-        } else if (srcDataType == DataTypes.SHORT_INT) {
-          return (int) longData;
-        } else if (srcDataType == DataTypes.INT) {
-          return (int) longData;
-        } else {
-          // timestamp and long
-          return longData;
+      DataType dt = vector.getType();
+      long longData = columnPage.getLong(rowId);
+      if (dt == DataTypes.BOOLEAN) {
+        vector.putBoolean(vectorRow, ByteUtil.toBoolean((byte) longData));
+      } else if (dt == DataTypes.BYTE) {
+        vector.putByte(vectorRow, (byte) longData);
+      } else if (dt == DataTypes.SHORT) {
+        vector.putShort(vectorRow, (short) longData);
+      } else if (dt == DataTypes.INT) {
+        vector.putInt(vectorRow, (int) longData);
+      } else if (dt == DataTypes.LONG) {
+        // retrieving the data after change in data type restructure operation
+        if (vector.getBlockDataType() == DataTypes.INT) {
+          vector.putLong(vectorRow, (int) longData);
+        } else if (vector.getBlockDataType() == DataTypes.LONG) {
+          vector.putLong(vectorRow, longData);
         }
-      } else if ((targetDataType == DataTypes.STRING) || (targetDataType == DataTypes.VARCHAR) || (
-          targetDataType == DataTypes.BYTE_ARRAY)) {
-        return columnPage.getBytes(rowId);
-      } else {
-        throw new RuntimeException("unsupported type: " + targetDataType);
-      }
-    } else if ((columnType == ColumnType.COMPLEX_PRIMITIVE && !this.isAdaptiveEncoded())) {
-      if (!isRowIdChanged && columnPage.getNullBits().get(rowId)) {
-        return CarbonCommonConstants.EMPTY_BYTE_ARRAY;
-      }
-      if ((srcDataType == DataTypes.BYTE) || (srcDataType == DataTypes.BOOLEAN)) {
-        byte[] out = new byte[1];
-        out[0] = (columnPage.getByte(rowId));
-        return ByteUtil.toBoolean(out);
-      } else if (srcDataType == DataTypes.BYTE_ARRAY) {
-        return columnPage.getBytes(rowId);
-      } else if (srcDataType == DataTypes.DOUBLE) {
-        return columnPage.getDouble(rowId);
-      } else if (srcDataType == targetDataType) {
-        return columnPage.getBytes(rowId);
+      } else if (dt == DataTypes.TIMESTAMP) {
+        vector.putLong(vectorRow, longData * 1000L);
       } else {
-        throw new RuntimeException("unsupported type: " + targetDataType);
+        throw new RuntimeException("unsupported type: " + dt);
       }
-    } else {
-      return columnPage.getBytes(rowId);
     }
   }
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/fa9a4eee/core/src/main/java/org/apache/carbondata/core/scan/executor/util/QueryUtil.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/executor/util/QueryUtil.java b/core/src/main/java/org/apache/carbondata/core/scan/executor/util/QueryUtil.java
index 7849d10..22e1e72 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/executor/util/QueryUtil.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/executor/util/QueryUtil.java
@@ -777,36 +777,4 @@ public class QueryUtil {
       }
     }
   }
-
-  /**
-   * Put the data to vector
-   *
-   * @param vector
-   * @param value
-   * @param vectorRow
-   */
-  public static void putDataToVector(CarbonColumnVector vector, Object value, int vectorRow) {
-    DataType dt = vector.getType();
-    if (value.equals(CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY) || value
-        .equals(CarbonCommonConstants.EMPTY_BYTE_ARRAY)) {
-      vector.putNull(vectorRow);
-    } else {
-      if (dt == DataTypes.STRING) {
-        vector.putBytes(vectorRow, (byte[]) value);
-      } else if (dt == DataTypes.BOOLEAN) {
-        vector.putBoolean(vectorRow, (boolean) value);
-      } else if (dt == DataTypes.BYTE) {
-        vector.putByte(vectorRow, (byte) value);
-      } else if (dt == DataTypes.SHORT) {
-        vector.putShort(vectorRow, (short) value);
-      } else if (dt == DataTypes.INT) {
-        vector.putInt(vectorRow, (int) value);
-      } else if (dt == DataTypes.LONG) {
-        vector.putLong(vectorRow,
-            DataTypeUtil.getDataBasedOnRestructuredDataType(value, vector.getBlockDataType()));
-      } else if (dt == DataTypes.TIMESTAMP) {
-        vector.putLong(vectorRow, (long) value * 1000L);
-      }
-    }
-  }
 }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/fa9a4eee/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java b/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java
index 66faf20..8f05f39 100644
--- a/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java
+++ b/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java
@@ -1085,24 +1085,6 @@ public final class DataTypeUtil {
   }
 
   /**
-   * Method to type case the data based on modified data type. This method will used for
-   * retrieving the data after change in data type restructure operation
-   *
-   * @param data
-   * @param restructureDataType
-   * @return
-   */
-  public static long getDataBasedOnRestructuredDataType(Object data, DataType restructureDataType) {
-    long value = 0L;
-    if (restructureDataType == DataTypes.INT) {
-      value = (int) data;
-    } else if (restructureDataType == DataTypes.LONG) {
-      value = (long) data;
-    }
-    return value;
-  }
-
-  /**
    * Check if the column is a no dictionary primitive column
    *
    * @param dataType