You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ja...@apache.org on 2018/06/22 01:34:45 UTC

[44/50] [abbrv] carbondata git commit: [CARBONDATA-2616][BloomDataMap] Fix bugs in querying bloom datamap with two index columns

[CARBONDATA-2616][BloomDataMap] Fix bugs in querying bloom datamap with two index columns

During pruning in bloomfilter datamap, the same blocklets has been added
to result more than once, thus causing explaining and querying returning
incorrect result.

This closes #2386


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/6eb360e1
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/6eb360e1
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/6eb360e1

Branch: refs/heads/carbonstore
Commit: 6eb360e1f5f577a576d185efb7dcbf1cc6a302e8
Parents: 01b48fc
Author: xuchuanyin <xu...@hust.edu.cn>
Authored: Wed Jun 20 16:31:28 2018 +0800
Committer: Jacky Li <ja...@qq.com>
Committed: Thu Jun 21 12:03:48 2018 +0800

----------------------------------------------------------------------
 .../datamap/bloom/BloomCoarseGrainDataMap.java    |  5 +++--
 .../bloom/BloomCoarseGrainDataMapSuite.scala      | 18 +++++++++++++++++-
 2 files changed, 20 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/6eb360e1/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java
----------------------------------------------------------------------
diff --git a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java
index e9af0ff..ed03256 100644
--- a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java
+++ b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java
@@ -21,6 +21,7 @@ import java.io.File;
 import java.io.IOException;
 import java.io.UnsupportedEncodingException;
 import java.util.ArrayList;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
 
@@ -85,7 +86,7 @@ public class BloomCoarseGrainDataMap extends CoarseGrainDataMap {
   @Override
   public List<Blocklet> prune(FilterResolverIntf filterExp, SegmentProperties segmentProperties,
       List<PartitionSpec> partitions) {
-    List<Blocklet> hitBlocklets = new ArrayList<Blocklet>();
+    Set<Blocklet> hitBlocklets = new HashSet<>();
     if (filterExp == null) {
       // null is different from empty here. Empty means after pruning, no blocklet need to scan.
       return null;
@@ -111,7 +112,7 @@ public class BloomCoarseGrainDataMap extends CoarseGrainDataMap {
         }
       }
     }
-    return hitBlocklets;
+    return new ArrayList<>(hitBlocklets);
   }
 
   private byte[] convertValueToBytes(DataType dataType, Object value) {

http://git-wip-us.apache.org/repos/asf/carbondata/blob/6eb360e1/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala
----------------------------------------------------------------------
diff --git a/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala b/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala
index 7df3901..c9a4097 100644
--- a/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala
+++ b/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala
@@ -71,7 +71,23 @@ class BloomCoarseGrainDataMapSuite extends QueryTest with BeforeAndAfterAll with
     checkAnswer(
       checkSqlHitDataMap(s"select * from $bloomDMSampleTable where city = 'city_999'", dataMapName, shouldHit),
       sql(s"select * from $normalTable where city = 'city_999'"))
-     checkAnswer(
+    // query with two index_columns
+    checkAnswer(
+      checkSqlHitDataMap(s"select * from $bloomDMSampleTable where id = 1 and city='city_1'", dataMapName, shouldHit),
+      sql(s"select * from $normalTable where id = 1 and city='city_1'"))
+    checkAnswer(
+      checkSqlHitDataMap(s"select * from $bloomDMSampleTable where id = 999 and city='city_999'", dataMapName, shouldHit),
+      sql(s"select * from $normalTable where id = 999 and city='city_999'"))
+    checkAnswer(
+      checkSqlHitDataMap(s"select * from $bloomDMSampleTable where city = 'city_1' and id = 0", dataMapName, shouldHit),
+      sql(s"select * from $normalTable where city = 'city_1' and id = 0"))
+    checkAnswer(
+      checkSqlHitDataMap(s"select * from $bloomDMSampleTable where city = 'city_999' and name='n999'", dataMapName, shouldHit),
+      sql(s"select * from $normalTable where city = 'city_999' and name='n999'"))
+    checkAnswer(
+      checkSqlHitDataMap(s"select * from $bloomDMSampleTable where city = 'city_999' and name='n1'", dataMapName, shouldHit),
+      sql(s"select * from $normalTable where city = 'city_999' and name='n1'"))
+    checkAnswer(
       sql(s"select min(id), max(id), min(name), max(name), min(city), max(city)" +
           s" from $bloomDMSampleTable"),
       sql(s"select min(id), max(id), min(name), max(name), min(city), max(city)" +