You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ja...@apache.org on 2017/10/10 03:07:52 UTC

[05/50] [abbrv] carbondata git commit: [CARBONDATA-1449]Fixed date and timestamp filter gc issue in case of direct dictionary

[CARBONDATA-1449]Fixed date and timestamp filter gc issue in case of direct dictionary

Problem: When date or timestamp filter is going to row level there is lots of gc this is because for each row it is creating new direct dictionary object for data type;
Solution: create one object and use the same.
Improvement tested with 2million rows:
~19 seconds to ~9 seconds

This closes #1381


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/2cd22e1d
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/2cd22e1d
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/2cd22e1d

Branch: refs/heads/streaming_ingest
Commit: 2cd22e1d682f29c54a2491deec18314e65a03f4e
Parents: 7f6b08a
Author: kumarvishal <ku...@gmail.com>
Authored: Mon Sep 25 17:06:21 2017 +0530
Committer: Ravindra Pesala <ra...@gmail.com>
Committed: Tue Sep 26 20:06:50 2017 +0530

----------------------------------------------------------------------
 .../executer/RowLevelFilterExecuterImpl.java    | 27 +++++++++++++++-----
 1 file changed, 21 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/2cd22e1d/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelFilterExecuterImpl.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelFilterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelFilterExecuterImpl.java
index b79f18d..8f3eb93 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelFilterExecuterImpl.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelFilterExecuterImpl.java
@@ -100,6 +100,16 @@ public class RowLevelFilterExecuterImpl implements FilterExecuter {
    */
   protected boolean isNaturalSorted;
 
+  /**
+   * date direct dictionary generator
+   */
+  private DirectDictionaryGenerator dateDictionaryGenerator;
+
+  /**
+   * timestamp direct dictionary generator
+   */
+  private DirectDictionaryGenerator timestampDictionaryGenerator;
+
   public RowLevelFilterExecuterImpl(List<DimColumnResolvedFilterInfo> dimColEvaluatorInfoList,
       List<MeasureColumnResolvedFilterInfo> msrColEvalutorInfoList, Expression exp,
       AbsoluteTableIdentifier tableIdentifier, SegmentProperties segmentProperties,
@@ -132,6 +142,10 @@ public class RowLevelFilterExecuterImpl implements FilterExecuter {
     this.exp = exp;
     this.tableIdentifier = tableIdentifier;
     this.complexDimensionInfoMap = complexDimensionInfoMap;
+    this.dateDictionaryGenerator =
+        DirectDictionaryKeyGeneratorFactory.getDirectDictionaryGenerator(DataType.DATE);
+    this.timestampDictionaryGenerator =
+        DirectDictionaryKeyGeneratorFactory.getDirectDictionaryGenerator(DataType.TIMESTAMP);
     initDimensionBlockIndexes();
     initMeasureBlockIndexes();
   }
@@ -408,13 +422,14 @@ public class RowLevelFilterExecuterImpl implements FilterExecuter {
    */
   private Object getFilterActualValueFromDirectDictionaryValue(
       DimColumnResolvedFilterInfo dimColumnEvaluatorInfo, int dictionaryValue) {
-    Object memberString = null;
-    DirectDictionaryGenerator directDictionaryGenerator = DirectDictionaryKeyGeneratorFactory
-        .getDirectDictionaryGenerator(dimColumnEvaluatorInfo.getDimension().getDataType());
-    if (null != directDictionaryGenerator) {
-      memberString = directDictionaryGenerator.getValueFromSurrogate(dictionaryValue);
+    switch (dimColumnEvaluatorInfo.getDimension().getDataType()) {
+      case DATE:
+        return dateDictionaryGenerator.getValueFromSurrogate(dictionaryValue);
+      case TIMESTAMP:
+        return timestampDictionaryGenerator.getValueFromSurrogate(dictionaryValue);
+      default:
+        throw new RuntimeException("Invalid data type for dierct dictionary");
     }
-    return memberString;
   }
 
   /**