You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ra...@apache.org on 2018/10/09 15:50:19 UTC

[17/45] carbondata git commit: [CARBONDATA-2974] Fixed multiple expressions issue on datamap chooser and bloom datamap

[CARBONDATA-2974] Fixed multiple expressions issue on datamap chooser and bloom datamap

DataMap framework provide a mechanism to composite expression and
forward it to corresponding datamap, in this way, the datamap can handle
the pruning in batch. But currently the expressions the framework
forwarded contains the one that cannot be supported by the datamap, so
here we optimize the datamap chooser.

We will composite the expression and wrap them into AndExpression. These
expressions are exactly the datamap wanted. The bloomfilter datamap
changed accordingly to handle the AndExpression.

This closes #2767


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/8284d9ed
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/8284d9ed
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/8284d9ed

Branch: refs/heads/branch-1.5
Commit: 8284d9ed1fe60d8881788656b7f78c055f76e453
Parents: 8427771
Author: ravipesala <ra...@gmail.com>
Authored: Wed Sep 26 16:56:03 2018 +0530
Committer: xuchuanyin <xu...@hust.edu.cn>
Committed: Fri Sep 28 16:46:49 2018 +0800

----------------------------------------------------------------------
 .../carbondata/core/datamap/DataMapChooser.java | 76 ++++++++++----------
 .../datamap/bloom/BloomCoarseGrainDataMap.java  |  8 ++-
 .../bloom/BloomCoarseGrainDataMapSuite.scala    | 62 +++++++++++++++-
 3 files changed, 106 insertions(+), 40 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/8284d9ed/core/src/main/java/org/apache/carbondata/core/datamap/DataMapChooser.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/datamap/DataMapChooser.java b/core/src/main/java/org/apache/carbondata/core/datamap/DataMapChooser.java
index 68696cf..3b6537c 100644
--- a/core/src/main/java/org/apache/carbondata/core/datamap/DataMapChooser.java
+++ b/core/src/main/java/org/apache/carbondata/core/datamap/DataMapChooser.java
@@ -39,6 +39,7 @@ import org.apache.carbondata.core.scan.expression.logical.AndExpression;
 import org.apache.carbondata.core.scan.expression.logical.OrExpression;
 import org.apache.carbondata.core.scan.filter.intf.ExpressionType;
 import org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf;
+import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.TrueConditionalResolverImpl;
 
 /**
  * This chooser does 2 jobs.
@@ -123,9 +124,11 @@ public class DataMapChooser {
     if (resolverIntf != null) {
       Expression expression = resolverIntf.getFilterExpression();
       List<TableDataMap> datamaps = level == DataMapLevel.CG ? cgDataMaps : fgDataMaps;
-      ExpressionTuple tuple = selectDataMap(expression, datamaps, resolverIntf);
-      if (tuple.dataMapExprWrapper != null) {
-        return tuple.dataMapExprWrapper;
+      if (datamaps.size() > 0) {
+        ExpressionTuple tuple = selectDataMap(expression, datamaps, resolverIntf);
+        if (tuple.dataMapExprWrapper != null) {
+          return tuple.dataMapExprWrapper;
+        }
       }
     }
     return null;
@@ -177,34 +180,35 @@ public class DataMapChooser {
           // If both left and right has datamap then we can either merge both datamaps to single
           // datamap if possible. Otherwise apply AND expression.
           if (left.dataMapExprWrapper != null && right.dataMapExprWrapper != null) {
-            filterExpressionTypes.add(
-                left.dataMapExprWrapper.getFilterResolverIntf().getFilterExpression()
-                    .getFilterExpressionType());
-            filterExpressionTypes.add(
-                right.dataMapExprWrapper.getFilterResolverIntf().getFilterExpression()
-                    .getFilterExpressionType());
+            filterExpressionTypes.addAll(left.filterExpressionTypes);
+            filterExpressionTypes.addAll(right.filterExpressionTypes);
             List<ColumnExpression> columnExpressions = new ArrayList<>();
             columnExpressions.addAll(left.columnExpressions);
             columnExpressions.addAll(right.columnExpressions);
             // Check if we can merge them to single datamap.
             TableDataMap dataMap =
                 chooseDataMap(allDataMap, columnExpressions, filterExpressionTypes);
+            TrueConditionalResolverImpl resolver = new TrueConditionalResolverImpl(
+                new AndExpression(left.expression, right.expression), false,
+                true);
             if (dataMap != null) {
               ExpressionTuple tuple = new ExpressionTuple();
               tuple.columnExpressions = columnExpressions;
-              tuple.dataMapExprWrapper = new DataMapExprWrapperImpl(dataMap, filterResolverIntf);
+              tuple.dataMapExprWrapper = new DataMapExprWrapperImpl(dataMap, resolver);
+              tuple.expression = resolver.getFilterExpression();
               return tuple;
             } else {
               // Apply AND expression.
               ExpressionTuple tuple = new ExpressionTuple();
               tuple.columnExpressions = columnExpressions;
               tuple.dataMapExprWrapper = new AndDataMapExprWrapper(left.dataMapExprWrapper,
-                  right.dataMapExprWrapper, filterResolverIntf);
+                  right.dataMapExprWrapper, resolver);
+              tuple.expression = resolver.getFilterExpression();
               return tuple;
             }
-          } else if (left.dataMapExprWrapper != null && right.dataMapExprWrapper == null) {
+          } else if (left.dataMapExprWrapper != null) {
             return left;
-          } else if (left.dataMapExprWrapper == null && right.dataMapExprWrapper != null) {
+          } else if (right.dataMapExprWrapper != null) {
             return right;
           } else {
             return left;
@@ -218,33 +222,21 @@ public class DataMapChooser {
               filterResolverIntf.getLeft());
           ExpressionTuple right = selectDataMap(orExpression.getRight(), allDataMap,
               filterResolverIntf.getRight());
-          Set<ExpressionType> filterExpressionTypes = new HashSet<>();
           // If both left and right has datamap then we can either merge both datamaps to single
           // datamap if possible. Otherwise apply OR expression.
           if (left.dataMapExprWrapper != null && right.dataMapExprWrapper != null) {
-            filterExpressionTypes.add(
-                left.dataMapExprWrapper.getFilterResolverIntf().getFilterExpression()
-                    .getFilterExpressionType());
-            filterExpressionTypes.add(
-                right.dataMapExprWrapper.getFilterResolverIntf().getFilterExpression()
-                    .getFilterExpressionType());
+            TrueConditionalResolverImpl resolver = new TrueConditionalResolverImpl(
+                new OrExpression(left.expression, right.expression), false,
+                true);
             List<ColumnExpression> columnExpressions = new ArrayList<>();
             columnExpressions.addAll(left.columnExpressions);
             columnExpressions.addAll(right.columnExpressions);
-            TableDataMap dataMap =
-                chooseDataMap(allDataMap, columnExpressions, filterExpressionTypes);
-            if (dataMap != null) {
-              ExpressionTuple tuple = new ExpressionTuple();
-              tuple.columnExpressions = columnExpressions;
-              tuple.dataMapExprWrapper = new DataMapExprWrapperImpl(dataMap, filterResolverIntf);
-              return tuple;
-            } else {
-              ExpressionTuple tuple = new ExpressionTuple();
-              tuple.columnExpressions = columnExpressions;
-              tuple.dataMapExprWrapper = new OrDataMapExprWrapper(left.dataMapExprWrapper,
-                  right.dataMapExprWrapper, filterResolverIntf);
-              return tuple;
-            }
+            ExpressionTuple tuple = new ExpressionTuple();
+            tuple.columnExpressions = columnExpressions;
+            tuple.dataMapExprWrapper = new OrDataMapExprWrapper(left.dataMapExprWrapper,
+                right.dataMapExprWrapper, resolver);
+            tuple.expression = resolver.getFilterExpression();
+            return tuple;
           } else {
             left.dataMapExprWrapper = null;
             return left;
@@ -256,16 +248,22 @@ public class DataMapChooser {
         extractColumnExpression(expression, tuple.columnExpressions);
         Set<ExpressionType> filterExpressionTypes = new HashSet<>();
         filterExpressionTypes.add(expression.getFilterExpressionType());
+        TrueConditionalResolverImpl resolver = new TrueConditionalResolverImpl(
+            filterResolverIntf.getFilterExpression(), false,
+            true);
         TableDataMap dataMap =
             chooseDataMap(allDataMap, tuple.columnExpressions, filterExpressionTypes);
         if (dataMap != null) {
-          tuple.dataMapExprWrapper = new DataMapExprWrapperImpl(dataMap, filterResolverIntf);
+          tuple.dataMapExprWrapper = new DataMapExprWrapperImpl(dataMap, resolver);
+          tuple.filterExpressionTypes.addAll(filterExpressionTypes);
+          tuple.expression = filterResolverIntf.getFilterExpression();
         }
         return tuple;
     }
     return new ExpressionTuple();
   }
 
+
   private void extractColumnExpression(Expression expression,
       List<ColumnExpression> columnExpressions) {
     if (expression instanceof ColumnExpression) {
@@ -282,7 +280,9 @@ public class DataMapChooser {
       List<Expression> children = expression.getChildren();
       if (children != null && children.size() > 0) {
         for (Expression exp : children) {
-          extractColumnExpression(exp, columnExpressions);
+          if (exp != null && exp.getFilterExpressionType() != ExpressionType.UNKNOWN) {
+            extractColumnExpression(exp, columnExpressions);
+          }
         }
       }
     }
@@ -332,6 +332,10 @@ public class DataMapChooser {
 
     List<ColumnExpression> columnExpressions = new ArrayList<>();
 
+    Set<ExpressionType> filterExpressionTypes = new HashSet<>();
+
+    Expression expression;
+
   }
 
   private static class DataMapTuple implements Comparable<DataMapTuple> {

http://git-wip-us.apache.org/repos/asf/carbondata/blob/8284d9ed/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java
----------------------------------------------------------------------
diff --git a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java
index 344ec09..ee71142 100644
--- a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java
+++ b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java
@@ -59,6 +59,7 @@ import org.apache.carbondata.core.scan.expression.LiteralExpression;
 import org.apache.carbondata.core.scan.expression.conditional.EqualToExpression;
 import org.apache.carbondata.core.scan.expression.conditional.InExpression;
 import org.apache.carbondata.core.scan.expression.conditional.ListExpression;
+import org.apache.carbondata.core.scan.expression.logical.AndExpression;
 import org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf;
 import org.apache.carbondata.core.util.CarbonProperties;
 import org.apache.carbondata.core.util.CarbonUtil;
@@ -264,11 +265,12 @@ public class BloomCoarseGrainDataMap extends CoarseGrainDataMap {
         LOGGER.warn(errorMsg);
         throw new RuntimeException(errorMsg);
       }
+    }  else if (expression instanceof AndExpression) {
+      queryModels.addAll(createQueryModel(((AndExpression) expression).getLeft()));
+      queryModels.addAll(createQueryModel(((AndExpression) expression).getRight()));
+      return queryModels;
     }
 
-    for (Expression child : expression.getChildren()) {
-      queryModels.addAll(createQueryModel(child));
-    }
     return queryModels;
   }
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/8284d9ed/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala
----------------------------------------------------------------------
diff --git a/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala b/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala
index 0a22937..84edd73 100644
--- a/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala
+++ b/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala
@@ -22,7 +22,7 @@ import java.util.UUID
 
 import scala.util.Random
 
-import org.apache.spark.sql.{CarbonSession, DataFrame}
+import org.apache.spark.sql.{CarbonSession, DataFrame, Row}
 import org.apache.spark.sql.test.util.QueryTest
 import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach}
 
@@ -923,6 +923,66 @@ class BloomCoarseGrainDataMapSuite extends QueryTest with BeforeAndAfterAll with
       CarbonCommonConstants.CARBON_DATE_FORMAT, originDateFormat)
   }
 
+  test("test bloom datamap on multiple columns") {
+    sql("drop table if exists store")
+        sql(
+          s"""
+             |CREATE TABLE IF NOT EXISTS store(
+             | market_code STRING,
+             | device_code STRING,
+             | country_code STRING,
+             | category_id INTEGER,
+             | product_id string,
+             | date date,
+             | est_free_app_download LONG,
+             | est_paid_app_download LONG,
+             | est_revenue LONG
+             | )
+             | STORED BY 'carbondata'
+             | TBLPROPERTIES(
+             | 'SORT_COLUMNS'='market_code, device_code, country_code, category_id, date,product_id',
+             | 'NO_INVERTED_INDEX'='est_free_app_download, est_paid_app_download,est_revenue',
+             | 'DICTIONARY_INCLUDE' = 'market_code, device_code, country_code,category_id, product_id',
+             | 'SORT_SCOPE'='GLOBAL_SORT',
+             | 'CACHE_LEVEL'='BLOCKLET',  'TABLE_BLOCKSIZE'='256',
+             | 'GLOBAL_SORT_PARTITIONS'='2'
+             | )""".stripMargin)
+
+    sql(s"""insert into store values('a', 'ios-phone', 'EE', 100021, 590416158, '2016-09-01', 100, 200, 300)""")
+    sql(s"""insert into store values('b', 'ios-phone', 'EE', 100021, 590437560, '2016-09-03', 100, 200, 300)""")
+    sql(s"""insert into store values('a', 'ios-phone', 'EF', 100022, 590416159, '2016-09-04', 100, 200, 300)""")
+
+    sql(
+      s"""
+         |CREATE DATAMAP IF NOT EXISTS bloomfilter_all_dimensions ON TABLE store
+         | USING 'bloomfilter'
+         | DMPROPERTIES (
+         | 'INDEX_COLUMNS'='market_code, device_code, country_code, category_id, date,product_id',
+         | 'BLOOM_SIZE'='640000',
+         | 'BLOOM_FPP'='0.000001',
+         | 'BLOOM_COMPRESS'='true'
+         | )
+       """.stripMargin).show()
+
+    checkAnswer(sql(
+      s"""SELECT market_code, device_code, country_code,
+         |category_id, sum(est_free_app_download) FROM store WHERE date
+         |BETWEEN '2016-09-01' AND '2016-09-03' AND device_code='ios-phone'
+         |AND country_code='EE' AND category_id=100021 AND product_id IN (590416158, 590437560)
+         |GROUP BY date, market_code, device_code, country_code, category_id""".stripMargin),
+      Seq(Row("a", "ios-phone", "EE", 100021, 100), Row("b", "ios-phone", "EE", 100021, 100)))
+
+    assert(sql(
+      s"""SELECT market_code, device_code, country_code,
+         |category_id, sum(est_free_app_download) FROM store WHERE (device_code='ios-phone'
+         |AND country_code='EF') or (category_id=100021 AND product_id IN (590416158, 590437560))
+         |GROUP BY date, market_code, device_code, country_code, category_id""".stripMargin).collect().length == 3)
+
+    checkAnswer(sql("select device_code from store where product_id=590416158"), Seq(Row("ios-phone")))
+
+    sql("drop table if exists store")
+  }
+
   override protected def afterAll(): Unit = {
     // in case of search mode test case failed, stop search mode again
     if (carbonSession.isSearchModeEnabled) {