You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ra...@apache.org on 2017/04/18 12:04:05 UTC

[29/50] incubator-carbondata git commit: Problem: Is null query on a newly added measure column is not returning proper results.

Problem: Is null query on a newly added measure column is not returning proper results.

Analysis: When is null query is executed on newly added measure column, control goes to RowLevelFilterExecuterImpl class, where measure existence is checked. In case the measure is not found, bitset group is not getting populated with default values due to which that block is not returning any result.

Solution: When query is on a restructured block where newly added column does not exist, create the default bitset group so that based on default value existence default bitset group is created and results are returned based on that.


Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/1e8d26c2
Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/1e8d26c2
Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/1e8d26c2

Branch: refs/heads/branch-1.1
Commit: 1e8d26c29d6025d083bde62a535b415993606219
Parents: 51f9629
Author: manishgupta88 <to...@gmail.com>
Authored: Tue Apr 11 16:54:03 2017 +0530
Committer: manishgupta88 <to...@gmail.com>
Committed: Thu Apr 13 23:26:52 2017 +0530

----------------------------------------------------------------------
 .../carbondata/core/scan/filter/FilterUtil.java | 10 +++--
 .../executer/RowLevelFilterExecuterImpl.java    |  8 ++++
 .../core/scan/filter/FilterUtilTest.java        | 12 +++++
 .../src/test/resources/restructure/data6.csv    |  3 ++
 .../vectorreader/AddColumnTestCases.scala       | 46 ++++++++++++++++++++
 5 files changed, 75 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/1e8d26c2/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java
index 770aa7e..7799b6a 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java
@@ -1447,10 +1447,12 @@ public final class FilterUtil {
       bitSet.set(0, numberOfRows, defaultValue);
       bitSetGroup.setBitSet(bitSet, i);
     }
-    // create and fill bitset for the last page
-    BitSet bitSet = new BitSet(rowCountForLastPage);
-    bitSet.set(0, rowCountForLastPage, defaultValue);
-    bitSetGroup.setBitSet(bitSet, pagesTobeFullFilled);
+    // create and fill bitset for the last page if any records are left
+    if (rowCountForLastPage > 0) {
+      BitSet bitSet = new BitSet(rowCountForLastPage);
+      bitSet.set(0, rowCountForLastPage, defaultValue);
+      bitSetGroup.setBitSet(bitSet, pagesTobeFullFilled);
+    }
     return bitSetGroup;
   }
 

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/1e8d26c2/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelFilterExecuterImpl.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelFilterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelFilterExecuterImpl.java
index 10902ea..470de89 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelFilterExecuterImpl.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelFilterExecuterImpl.java
@@ -167,6 +167,10 @@ public class RowLevelFilterExecuterImpl implements FilterExecuter {
             blockChunkHolder.getDimensionRawDataChunk()[dimensionBlocksIndex[0]].getPagesCount();
         numberOfRows =
             blockChunkHolder.getDimensionRawDataChunk()[dimensionBlocksIndex[0]].getRowCount();
+      } else {
+        // specific for restructure case where default values need to be filled
+        pageNumbers = blockChunkHolder.getDataBlock().numberOfPages();
+        numberOfRows = new int[] { blockChunkHolder.getDataBlock().nodeSize() };
       }
     }
     if (msrColEvalutorInfoList.size() > 0) {
@@ -175,6 +179,10 @@ public class RowLevelFilterExecuterImpl implements FilterExecuter {
             blockChunkHolder.getMeasureRawDataChunk()[measureBlocksIndex[0]].getPagesCount();
         numberOfRows =
             blockChunkHolder.getMeasureRawDataChunk()[measureBlocksIndex[0]].getRowCount();
+      } else {
+        // specific for restructure case where default values need to be filled
+        pageNumbers = blockChunkHolder.getDataBlock().numberOfPages();
+        numberOfRows = new int[] { blockChunkHolder.getDataBlock().nodeSize() };
       }
     }
     BitSetGroup bitSetGroup = new BitSetGroup(pageNumbers);

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/1e8d26c2/core/src/test/java/org/apache/carbondata/core/scan/filter/FilterUtilTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/carbondata/core/scan/filter/FilterUtilTest.java b/core/src/test/java/org/apache/carbondata/core/scan/filter/FilterUtilTest.java
index 06063a4..fee3d3d 100644
--- a/core/src/test/java/org/apache/carbondata/core/scan/filter/FilterUtilTest.java
+++ b/core/src/test/java/org/apache/carbondata/core/scan/filter/FilterUtilTest.java
@@ -38,6 +38,7 @@ import org.apache.carbondata.core.scan.expression.Expression;
 import org.apache.carbondata.core.scan.expression.LiteralExpression;
 import org.apache.carbondata.core.scan.expression.conditional.ListExpression;
 import org.apache.carbondata.core.scan.filter.intf.RowImpl;
+import org.apache.carbondata.core.util.BitSetGroup;
 
 import mockit.Mock;
 import mockit.MockUp;
@@ -387,4 +388,15 @@ public class FilterUtilTest extends AbstractDictionaryCacheTest {
     SegmentProperties segmentProperties = new SegmentProperties(columnsInTable, columnCardinality);
     assertTrue(FilterUtil.prepareDefaultStartIndexKey(segmentProperties) instanceof IndexKey);
   }
+
+  @Test public void testCreateBitSetGroupWithDefaultValue() {
+    // test for exactly divisible values
+    BitSetGroup bitSetGroupWithDefaultValue =
+        FilterUtil.createBitSetGroupWithDefaultValue(14, 448000, true);
+    assertTrue(bitSetGroupWithDefaultValue.getNumberOfPages() == 14);
+    // test for remainder values
+    bitSetGroupWithDefaultValue =
+        FilterUtil.createBitSetGroupWithDefaultValue(15, 448200, true);
+    assertTrue(bitSetGroupWithDefaultValue.getNumberOfPages() == 15);
+  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/1e8d26c2/integration/spark-common-test/src/test/resources/restructure/data6.csv
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/resources/restructure/data6.csv b/integration/spark-common-test/src/test/resources/restructure/data6.csv
new file mode 100644
index 0000000..c7b4df2
--- /dev/null
+++ b/integration/spark-common-test/src/test/resources/restructure/data6.csv
@@ -0,0 +1,3 @@
+7,hello1
+8,welcome1
+bye,11
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/1e8d26c2/integration/spark2/src/test/scala/org/apache/spark/carbondata/restructure/vectorreader/AddColumnTestCases.scala
----------------------------------------------------------------------
diff --git a/integration/spark2/src/test/scala/org/apache/spark/carbondata/restructure/vectorreader/AddColumnTestCases.scala b/integration/spark2/src/test/scala/org/apache/spark/carbondata/restructure/vectorreader/AddColumnTestCases.scala
index 3b4a25c..1c7cc2a 100644
--- a/integration/spark2/src/test/scala/org/apache/spark/carbondata/restructure/vectorreader/AddColumnTestCases.scala
+++ b/integration/spark2/src/test/scala/org/apache/spark/carbondata/restructure/vectorreader/AddColumnTestCases.scala
@@ -187,6 +187,25 @@ class AddColumnTestCases extends QueryTest with BeforeAndAfterAll {
     checkAnswer(sql("select distinct(CUST_NAME) from carbon_new"),Row("testuser"))
   }
 
+  test("test for checking newly added measure column for is null condition") {
+    sql("DROP TABLE IF EXISTS carbon_measure_is_null")
+    sql("CREATE TABLE carbon_measure_is_null (CUST_ID int,CUST_NAME String) STORED BY 'carbondata'")
+    sql(
+      s"LOAD DATA INPATH '$resourcesPath/restructure/data6.csv' into table carbon_measure_is_null" +
+      s" OPTIONS" +
+      s"('BAD_RECORDS_LOGGER_ENABLE'='TRUE', " +
+      s"'BAD_RECORDS_ACTION'='FORCE','FILEHEADER'='CUST_ID,CUST_NAME')")
+    sql("ALTER TABLE carbon_measure_is_null ADD COLUMNS (a6 int)")
+    sql(
+      s"LOAD DATA INPATH '$resourcesPath/restructure/data6.csv' into table carbon_measure_is_null" +
+      s" OPTIONS" +
+      s"('BAD_RECORDS_LOGGER_ENABLE'='TRUE', " +
+      s"'BAD_RECORDS_ACTION'='FORCE','FILEHEADER'='CUST_ID,CUST_NAME,a6')")
+    checkAnswer(sql("select * from carbon_measure_is_null"),
+      sql("select * from carbon_measure_is_null where a6 is null"))
+    checkAnswer(sql("select count(*) from carbon_measure_is_null where a6 is not null"), Row(0))
+    sql("DROP TABLE IF EXISTS carbon_measure_is_null")
+  }
   test("test to check if intField returns correct result") {
     sql("DROP TABLE IF EXISTS carbon_table")
     sql("CREATE TABLE carbon_table(intField int,stringField string,charField string,timestampField timestamp, decimalField decimal(6,2)) STORED BY 'carbondata'")
@@ -232,6 +251,33 @@ class AddColumnTestCases extends QueryTest with BeforeAndAfterAll {
   }
 
 
+  test("test for checking newly added dictionary column for is null condition") {
+    sql("DROP TABLE IF EXISTS carbon_dictionary_is_null")
+    sql(
+      "CREATE TABLE carbon_dictionary_is_null (CUST_ID int,CUST_NAME String) STORED BY " +
+      "'carbondata'")
+    sql(
+      s"LOAD DATA INPATH '$resourcesPath/restructure/data6.csv' into table " +
+      s"carbon_dictionary_is_null" +
+      s" OPTIONS" +
+      s"('BAD_RECORDS_LOGGER_ENABLE'='TRUE', " +
+      s"'BAD_RECORDS_ACTION'='FORCE','FILEHEADER'='CUST_ID,CUST_NAME')")
+    sql(
+      "ALTER TABLE carbon_dictionary_is_null ADD COLUMNS (a6 int) tblproperties" +
+      "('dictionary_include'='a6')")
+    sql(
+      s"LOAD DATA INPATH '$resourcesPath/restructure/data6.csv' into table " +
+      s"carbon_dictionary_is_null" +
+      s" OPTIONS" +
+      s"('BAD_RECORDS_LOGGER_ENABLE'='TRUE', " +
+      s"'BAD_RECORDS_ACTION'='FORCE','FILEHEADER'='CUST_ID,CUST_NAME,a6')")
+    checkAnswer(sql("select * from carbon_dictionary_is_null"),
+      sql("select * from carbon_dictionary_is_null where a6 is null"))
+    checkAnswer(sql("select count(*) from carbon_dictionary_is_null where a6 is not null"), Row(0))
+    sql("DROP TABLE IF EXISTS carbon_dictionary_is_null")
+  }
+
+
   override def afterAll {
     sql("DROP TABLE IF EXISTS addcolumntest")
     sql("drop table if exists hivetable")