You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@carbondata.apache.org by ku...@apache.org on 2017/05/31 08:57:08 UTC

[1/2] carbondata git commit: Problem: Wrong results returned by the query in case inverted index is not created on a column

Repository: carbondata
Updated Branches:
  refs/heads/master 7b7579b87 -> 59b06b631


Problem: Wrong results returned by the query in case inverted index is not created on a column

Fix: When inverted index does not exist for a column or column is not a sort column then
1. Block or blocklet cannot be pruned as data for that column is not sorted
2. While applying the filter linear search should be applied instead of binary search as binary search can be applied only on sorted data


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/3ae44724
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/3ae44724
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/3ae44724

Branch: refs/heads/master
Commit: 3ae44724870226fedf2ad90911c9306d3e56bc87
Parents: 7b7579b
Author: manishgupta88 <to...@gmail.com>
Authored: Thu May 25 18:44:30 2017 +0530
Committer: kumarvishal <ku...@gmail.com>
Committed: Wed May 31 14:25:23 2017 +0530

----------------------------------------------------------------------
 .../executer/IncludeFilterExecuterImpl.java     |  40 +++-
 .../executer/RangeValueFilterExecuterImpl.java  | 198 +++++++++++++------
 .../RowLevelRangeGrtThanFiterExecuterImpl.java  |  68 ++++---
 ...elRangeGrtrThanEquaToFilterExecuterImpl.java |  62 +++---
 ...velRangeLessThanEqualFilterExecuterImpl.java |  90 +++++----
 .../RowLevelRangeLessThanFiterExecuterImpl.java |  98 +++++----
 .../TestNoInvertedIndexLoadAndQuery.scala       |  53 ++++-
 7 files changed, 405 insertions(+), 204 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/3ae44724/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImpl.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImpl.java
index 7b8f084..804e598 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImpl.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImpl.java
@@ -137,19 +137,39 @@ public class IncludeFilterExecuterImpl implements FilterExecuter {
       int numerOfRows) {
     BitSet bitSet = new BitSet(numerOfRows);
     if (dimensionColumnDataChunk instanceof FixedLengthDimensionDataChunk) {
+      int startIndex = 0;
       byte[][] filterValues = dimColumnExecuterInfo.getFilterKeys();
-      if (filterValues.length > 1) {
-        for (int i = 0; i < numerOfRows; i++) {
-          int index = CarbonUtil.binarySearch(filterValues, 0, filterValues.length - 1,
-              dimensionColumnDataChunk.getChunkData(i));
-          if (index >= 0) {
-            bitSet.set(i);
+      // binary search can only be applied if column is sorted and
+      // inverted index exists for that column
+      if (dimensionColumnDataChunk.isExplicitSorted()) {
+        for (int i = 0; i < filterValues.length; i++) {
+          if (startIndex >= numerOfRows) {
+            break;
+          }
+          int[] rangeIndex = CarbonUtil
+              .getRangeIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex, numerOfRows - 1,
+                  filterValues[i]);
+          for (int j = rangeIndex[0]; j <= rangeIndex[1]; j++) {
+            bitSet.set(j);
+          }
+          if (rangeIndex[1] >= 0) {
+            startIndex = rangeIndex[1] + 1;
           }
         }
-      } else if (filterValues.length == 1) {
-        for (int i = 0; i < numerOfRows; i++) {
-          if (dimensionColumnDataChunk.compareTo(i, filterValues[0]) == 0) {
-            bitSet.set(i);
+      } else {
+        if (filterValues.length > 1) {
+          for (int i = 0; i < numerOfRows; i++) {
+            int index = CarbonUtil.binarySearch(filterValues, 0, filterValues.length - 1,
+                dimensionColumnDataChunk.getChunkData(i));
+            if (index >= 0) {
+              bitSet.set(i);
+            }
+          }
+        } else {
+          for (int j = 0; j < numerOfRows; j++) {
+            if (dimensionColumnDataChunk.compareTo(j, filterValues[0]) == 0) {
+              bitSet.set(j);
+            }
           }
         }
       }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/3ae44724/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecuterImpl.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecuterImpl.java
index a20f414..40e0006 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecuterImpl.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecuterImpl.java
@@ -242,7 +242,7 @@ public class RangeValueFilterExecuterImpl extends ValueBasedFilterExecuterImpl {
     //                       Block Min <-----------------------> Block Max
     //         Filter Min <-----------------------------------------------> Filter Max
 
-    if (isDimensionPresentInCurrentBlock == true) {
+    if (isDimensionPresentInCurrentBlock) {
       if (((lessThanExp == true) && (
           ByteUtil.UnsafeComparer.INSTANCE.compareTo(blockMinValue, filterValues[1]) >= 0)) || (
           (lessThanEqualExp == true) && (
@@ -474,80 +474,166 @@ public class RangeValueFilterExecuterImpl extends ValueBasedFilterExecuterImpl {
       int numerOfRows) {
     BitSet bitSet = new BitSet(numerOfRows);
     // if (dimensionColumnDataChunk instanceof FixedLengthDimensionDataChunk) {
-    int start = 0;
-    int startMin = 0;
-    int endMax = 0;
-    int startIndex = 0;
     byte[][] filterValues = this.filterRangesValues;
-    // For Range expression we expect two values. The First is the Min Value and Second is the
-    // Max value.
-    if (startBlockMinIsDefaultStart == false) {
-
-      start = CarbonUtil
-          .getFirstIndexUsingBinarySearch(dimensionColumnDataChunk,
-              startIndex, numerOfRows - 1, filterValues[0], greaterThanExp);
+    if (dimensionColumnDataChunk.isExplicitSorted()) {
+      int start = 0;
+      int startMin = 0;
+      int endMax = 0;
+      int startIndex = 0;
+      // For Range expression we expect two values. The First is the Min Value and Second is the
+      // Max value.
+      if (startBlockMinIsDefaultStart == false) {
 
-      if (greaterThanExp == true && start >= 0) {
         start = CarbonUtil
-            .nextGreaterValueToTarget(start, dimensionColumnDataChunk, filterValues[0],
-                numerOfRows);
-      }
+            .getFirstIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex, numerOfRows - 1,
+                filterValues[0], greaterThanExp);
 
-      if (start < 0) {
-        start = -(start + 1);
-        if (start == numerOfRows) {
-          start = start - 1;
+        if (greaterThanExp == true && start >= 0) {
+          start = CarbonUtil
+              .nextGreaterValueToTarget(start, dimensionColumnDataChunk, filterValues[0],
+                  numerOfRows);
         }
-        // Method will compare the tentative index value after binary search, this tentative
-        // index needs to be compared by the filter member if its >= filter then from that
-        // index the bitset will be considered for filtering process.
-        if ((ByteUtil.compare(filterValues[0], dimensionColumnDataChunk.getChunkData(start)))
-            > 0) {
-          start = start + 1;
+
+        if (start < 0) {
+          start = -(start + 1);
+          if (start == numerOfRows) {
+            start = start - 1;
+          }
+          // Method will compare the tentative index value after binary search, this tentative
+          // index needs to be compared by the filter member if its >= filter then from that
+          // index the bitset will be considered for filtering process.
+          if ((ByteUtil.compare(filterValues[0], dimensionColumnDataChunk.getChunkData(start)))
+              > 0) {
+            start = start + 1;
+          }
         }
+        startMin = start;
+      } else {
+        startMin = startIndex;
       }
-      startMin = start;
-    } else {
-      startMin = startIndex;
-    }
-
-    if (endBlockMaxisDefaultEnd == false) {
-      start = CarbonUtil
-          .getFirstIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex, numerOfRows - 1,
-              filterValues[1], lessThanEqualExp);
 
-      if (lessThanExp == true && start >= 0) {
-        start =
-            CarbonUtil.nextLesserValueToTarget(start, dimensionColumnDataChunk, filterValues[1]);
-      }
+      if (endBlockMaxisDefaultEnd == false) {
+        start = CarbonUtil
+            .getFirstIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex, numerOfRows - 1,
+                filterValues[1], lessThanEqualExp);
 
-      if (start < 0) {
-        start = -(start + 1);
-        if (start == numerOfRows) {
-          start = start - 1;
+        if (lessThanExp == true && start >= 0) {
+          start =
+              CarbonUtil.nextLesserValueToTarget(start, dimensionColumnDataChunk, filterValues[1]);
         }
-        // In case the start is less than 0, then positive value of start is pointing to the next
-        // value of the searched key. So move to the previous one.
-        if ((ByteUtil.compare(filterValues[1], dimensionColumnDataChunk.getChunkData(start))
-            < 0)) {
-          start = start - 1;
+
+        if (start < 0) {
+          start = -(start + 1);
+          if (start == numerOfRows) {
+            start = start - 1;
+          }
+          // In case the start is less than 0, then positive value of start is pointing to the next
+          // value of the searched key. So move to the previous one.
+          if ((ByteUtil.compare(filterValues[1], dimensionColumnDataChunk.getChunkData(start))
+              < 0)) {
+            start = start - 1;
+          }
         }
+        endMax = start;
+      } else {
+        endMax = numerOfRows - 1;
+      }
+
+      for (int j = startMin; j <= endMax; j++) {
+        bitSet.set(j);
+      }
+
+      // Binary Search cannot be done on '@NU#LL$!", so need to check and compare for null on
+      // matching row.
+      if (dimensionColumnDataChunk.isNoDicitionaryColumn()) {
+        updateForNoDictionaryColumn(startMin, endMax, dimensionColumnDataChunk, bitSet);
       }
-      endMax = start;
     } else {
-      endMax = numerOfRows - 1;
+      // evaluate result for lower range value first and then perform and operation in the
+      // upper range value in order to compute the final result
+      bitSet = evaluateGreaterThanFilterForUnsortedColumn(dimensionColumnDataChunk, filterValues[0],
+          numerOfRows);
+      // remove null values from lower range selected bitSet values
+      removeNullValues(dimensionColumnDataChunk, bitSet);
+      BitSet upperRangeBitSet =
+          evaluateLessThanFilterForUnsortedColumn(dimensionColumnDataChunk, filterValues[1],
+              numerOfRows);
+      // remove null values from upper range selected bitSet values
+      removeNullValues(dimensionColumnDataChunk, upperRangeBitSet);
+      bitSet.and(upperRangeBitSet);
     }
+    return bitSet;
+  }
 
-    for (int j = startMin; j <= endMax; j++) {
-      bitSet.set(j);
+  /**
+   * This method will compare the selected data against null values and
+   * flip the bitSet if any null value is found
+   *
+   * @param dimensionColumnDataChunk
+   * @param bitSet
+   */
+  private void removeNullValues(DimensionColumnDataChunk dimensionColumnDataChunk, BitSet bitSet) {
+    if (!bitSet.isEmpty()) {
+      for (int i = bitSet.nextSetBit(0); i >= 0; i = bitSet.nextSetBit(i + 1)) {
+        if (dimensionColumnDataChunk.compareTo(i, CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY)
+            == 0) {
+          bitSet.flip(i);
+        }
+      }
     }
+  }
 
-    // Binary Search cannot be done on '@NU#LL$!", so need to check and compare for null on
-    // matching row.
-    if (dimensionColumnDataChunk.isNoDicitionaryColumn()) {
-      updateForNoDictionaryColumn(startMin, endMax, dimensionColumnDataChunk, bitSet);
+  /**
+   * This method will evaluate the result for filter column based on the lower range value
+   *
+   * @param dimensionColumnDataChunk
+   * @param filterValue
+   * @param numberOfRows
+   * @return
+   */
+  private BitSet evaluateGreaterThanFilterForUnsortedColumn(
+      DimensionColumnDataChunk dimensionColumnDataChunk, byte[] filterValue, int numberOfRows) {
+    BitSet bitSet = new BitSet(numberOfRows);
+    if (greaterThanExp) {
+      for (int i = 0; i < numberOfRows; i++) {
+        if ((ByteUtil.compare(dimensionColumnDataChunk.getChunkData(i), filterValue) > 0)) {
+          bitSet.set(i);
+        }
+      }
+    } else if (greaterThanEqualExp) {
+      for (int i = 0; i < numberOfRows; i++) {
+        if ((ByteUtil.compare(dimensionColumnDataChunk.getChunkData(i), filterValue) >= 0)) {
+          bitSet.set(i);
+        }
+      }
     }
+    return bitSet;
+  }
 
+  /**
+   * This method will evaluate the result for filter column based on the upper range value
+   *
+   * @param dimensionColumnDataChunk
+   * @param filterValue
+   * @param numberOfRows
+   * @return
+   */
+  private BitSet evaluateLessThanFilterForUnsortedColumn(
+      DimensionColumnDataChunk dimensionColumnDataChunk, byte[] filterValue, int numberOfRows) {
+    BitSet bitSet = new BitSet(numberOfRows);
+    if (lessThanExp) {
+      for (int i = 0; i < numberOfRows; i++) {
+        if ((ByteUtil.compare(dimensionColumnDataChunk.getChunkData(i), filterValue) < 0)) {
+          bitSet.set(i);
+        }
+      }
+    } else if (lessThanEqualExp) {
+      for (int i = 0; i < numberOfRows; i++) {
+        if ((ByteUtil.compare(dimensionColumnDataChunk.getChunkData(i), filterValue) <= 0)) {
+          bitSet.set(i);
+        }
+      }
+    }
     return bitSet;
   }
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/3ae44724/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java
index 6f8651a..daa7280 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java
@@ -228,39 +228,51 @@ public class RowLevelRangeGrtThanFiterExecuterImpl extends RowLevelFilterExecute
   private BitSet setFilterdIndexToBitSet(DimensionColumnDataChunk dimensionColumnDataChunk,
       int numerOfRows) {
     BitSet bitSet = new BitSet(numerOfRows);
-    int start = 0;
-    int last = 0;
-    int startIndex = 0;
     byte[][] filterValues = this.filterRangeValues;
-    for (int k = 0; k < filterValues.length; k++) {
-      start = CarbonUtil
-          .getFirstIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex, numerOfRows - 1,
-              filterValues[k], true);
-      if (start >= 0) {
+    // binary search can only be applied if column is sorted and
+    // inverted index exists for that column
+    if (dimensionColumnDataChunk.isExplicitSorted()) {
+      int start = 0;
+      int last = 0;
+      int startIndex = 0;
+      for (int k = 0; k < filterValues.length; k++) {
         start = CarbonUtil
-            .nextGreaterValueToTarget(start, dimensionColumnDataChunk, filterValues[k],
-                numerOfRows);
-      }
-      if (start < 0) {
-        start = -(start + 1);
-        if (start == numerOfRows) {
-          start = start - 1;
+            .getFirstIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex, numerOfRows - 1,
+                filterValues[k], true);
+        if (start >= 0) {
+          start = CarbonUtil
+              .nextGreaterValueToTarget(start, dimensionColumnDataChunk, filterValues[k],
+                  numerOfRows);
         }
-        // Method will compare the tentative index value after binary search, this tentative
-        // index needs to be compared by the filter member if its > filter then from that
-        // index the bitset will be considered for filtering process.
-        if (ByteUtil.compare(filterValues[k], dimensionColumnDataChunk.getChunkData(start)) > 0) {
-          start = start + 1;
+        if (start < 0) {
+          start = -(start + 1);
+          if (start == numerOfRows) {
+            start = start - 1;
+          }
+          // Method will compare the tentative index value after binary search, this tentative
+          // index needs to be compared by the filter member if its > filter then from that
+          // index the bitset will be considered for filtering process.
+          if (ByteUtil.compare(filterValues[k], dimensionColumnDataChunk.getChunkData(start)) > 0) {
+            start = start + 1;
+          }
+        }
+        last = start;
+        for (int j = start; j < numerOfRows; j++) {
+          bitSet.set(j);
+          last++;
+        }
+        startIndex = last;
+        if (startIndex >= numerOfRows) {
+          break;
         }
       }
-      last = start;
-      for (int j = start; j < numerOfRows; j++) {
-        bitSet.set(j);
-        last++;
-      }
-      startIndex = last;
-      if (startIndex >= numerOfRows) {
-        break;
+    } else {
+      for (int k = 0; k < filterValues.length; k++) {
+        for (int i = 0; i < numerOfRows; i++) {
+          if (ByteUtil.compare(dimensionColumnDataChunk.getChunkData(i), filterValues[k]) > 0) {
+            bitSet.set(i);
+          }
+        }
       }
     }
     return bitSet;

http://git-wip-us.apache.org/repos/asf/carbondata/blob/3ae44724/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtrThanEquaToFilterExecuterImpl.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtrThanEquaToFilterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtrThanEquaToFilterExecuterImpl.java
index fbc9b30..7c2a65a 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtrThanEquaToFilterExecuterImpl.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtrThanEquaToFilterExecuterImpl.java
@@ -218,35 +218,47 @@ public class RowLevelRangeGrtrThanEquaToFilterExecuterImpl extends RowLevelFilte
   private BitSet setFilterdIndexToBitSet(DimensionColumnDataChunk dimensionColumnDataChunk,
       int numerOfRows) {
     BitSet bitSet = new BitSet(numerOfRows);
-    int start = 0;
-    int last = 0;
-    int startIndex = 0;
     byte[][] filterValues = this.filterRangeValues;
-    for (int k = 0; k < filterValues.length; k++) {
-      start = CarbonUtil
-          .getFirstIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex, numerOfRows - 1,
-              filterValues[k], false);
-      if (start < 0) {
-        start = -(start + 1);
-        if (start == numerOfRows) {
-          start = start - 1;
-        }
-        // Method will compare the tentative index value after binary search, this tentative
-        // index needs to be compared by the filter member if its >= filter then from that
-        // index the bitset will be considered for filtering process.
-        if (ByteUtil.compare(filterValues[k], dimensionColumnDataChunk.getChunkData(start)) > 0) {
-          start = start + 1;
+    // binary search can only be applied if column is sorted and
+    // inverted index exists for that column
+    if (dimensionColumnDataChunk.isExplicitSorted()) {
+      int start = 0;
+      int last = 0;
+      int startIndex = 0;
+      for (int k = 0; k < filterValues.length; k++) {
+        start = CarbonUtil
+            .getFirstIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex, numerOfRows - 1,
+                filterValues[k], false);
+        if (start < 0) {
+          start = -(start + 1);
+          if (start == numerOfRows) {
+            start = start - 1;
+          }
+          // Method will compare the tentative index value after binary search, this tentative
+          // index needs to be compared by the filter member if its >= filter then from that
+          // index the bitset will be considered for filtering process.
+          if (ByteUtil.compare(filterValues[k], dimensionColumnDataChunk.getChunkData(start)) > 0) {
+            start = start + 1;
+          }
         }
-      }
 
-      last = start;
-      for (int j = start; j < numerOfRows; j++) {
-        bitSet.set(j);
-        last++;
+        last = start;
+        for (int j = start; j < numerOfRows; j++) {
+          bitSet.set(j);
+          last++;
+        }
+        startIndex = last;
+        if (startIndex >= numerOfRows) {
+          break;
+        }
       }
-      startIndex = last;
-      if (startIndex >= numerOfRows) {
-        break;
+    } else {
+      for (int k = 0; k < filterValues.length; k++) {
+        for (int i = 0; i < numerOfRows; i++) {
+          if (ByteUtil.compare(dimensionColumnDataChunk.getChunkData(i), filterValues[k]) >= 0) {
+            bitSet.set(i);
+          }
+        }
       }
     }
     return bitSet;

http://git-wip-us.apache.org/repos/asf/carbondata/blob/3ae44724/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanEqualFilterExecuterImpl.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanEqualFilterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanEqualFilterExecuterImpl.java
index 6341c3b..7a8d152 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanEqualFilterExecuterImpl.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanEqualFilterExecuterImpl.java
@@ -242,51 +242,63 @@ public class RowLevelRangeLessThanEqualFilterExecuterImpl extends RowLevelFilter
   private BitSet setFilterdIndexToBitSet(DimensionColumnDataChunk dimensionColumnDataChunk,
       int numerOfRows, byte[] defaultValue) {
     BitSet bitSet = new BitSet(numerOfRows);
-    int start = 0;
-    int last = 0;
-    int startIndex = 0;
     byte[][] filterValues = this.filterRangeValues;
-    int skip = 0;
-    //find the number of default values to skip the null value in case of direct dictionary
-    if (null != defaultValue) {
-      start = CarbonUtil
-          .getFirstIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex, numerOfRows - 1,
-              defaultValue, true);
-      if (start < 0) {
-        skip = -(start + 1);
-        // end of block
-        if (skip == numerOfRows) {
-          return bitSet;
+    // binary search can only be applied if column is sorted and
+    // inverted index exists for that column
+    if (dimensionColumnDataChunk.isExplicitSorted()) {
+      int start = 0;
+      int last = 0;
+      int startIndex = 0;
+      int skip = 0;
+      //find the number of default values to skip the null value in case of direct dictionary
+      if (null != defaultValue) {
+        start = CarbonUtil
+            .getFirstIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex, numerOfRows - 1,
+                defaultValue, true);
+        if (start < 0) {
+          skip = -(start + 1);
+          // end of block
+          if (skip == numerOfRows) {
+            return bitSet;
+          }
+        } else {
+          skip = start;
         }
-      } else {
-        skip = start;
+        startIndex = skip;
       }
-      startIndex = skip;
-    }
-    for (int k = 0; k < filterValues.length; k++) {
-      start = CarbonUtil
-          .getFirstIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex, numerOfRows - 1,
-              filterValues[k], true);
-      if (start < 0) {
-        start = -(start + 1);
-        if (start >= numerOfRows) {
-          start = start - 1;
+      for (int k = 0; k < filterValues.length; k++) {
+        start = CarbonUtil
+            .getFirstIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex, numerOfRows - 1,
+                filterValues[k], true);
+        if (start < 0) {
+          start = -(start + 1);
+          if (start >= numerOfRows) {
+            start = start - 1;
+          }
+          // When negative value of start is returned from getFirstIndexUsingBinarySearch the Start
+          // will be pointing to the next consecutive position. So compare it again and point to the
+          // previous value returned from getFirstIndexUsingBinarySearch.
+          if (ByteUtil.compare(filterValues[k], dimensionColumnDataChunk.getChunkData(start)) < 0) {
+            start = start - 1;
+          }
         }
-        // When negative value of start is returned from getFirstIndexUsingBinarySearch the Start
-        // will be pointing to the next consecutive position. So compare it again and point to the
-        // previous value returned from getFirstIndexUsingBinarySearch.
-        if (ByteUtil.compare(filterValues[k], dimensionColumnDataChunk.getChunkData(start)) < 0) {
-          start = start - 1;
+        last = start;
+        for (int j = start; j >= skip; j--) {
+          bitSet.set(j);
+          last--;
+        }
+        startIndex = last;
+        if (startIndex <= 0) {
+          break;
         }
       }
-      last = start;
-      for (int j = start; j >= skip; j--) {
-        bitSet.set(j);
-        last--;
-      }
-      startIndex = last;
-      if (startIndex <= 0) {
-        break;
+    } else {
+      for (int k = 0; k < filterValues.length; k++) {
+        for (int i = 0; i < numerOfRows; i++) {
+          if (ByteUtil.compare(dimensionColumnDataChunk.getChunkData(i), filterValues[k]) <= 0) {
+            bitSet.set(i);
+          }
+        }
       }
     }
     return bitSet;

http://git-wip-us.apache.org/repos/asf/carbondata/blob/3ae44724/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanFiterExecuterImpl.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanFiterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanFiterExecuterImpl.java
index 47ddfc4..97d4cf5 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanFiterExecuterImpl.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanFiterExecuterImpl.java
@@ -251,56 +251,68 @@ public class RowLevelRangeLessThanFiterExecuterImpl extends RowLevelFilterExecut
   private BitSet setFilterdIndexToBitSet(DimensionColumnDataChunk dimensionColumnDataChunk,
       int numerOfRows, byte[] defaultValue) {
     BitSet bitSet = new BitSet(numerOfRows);
-    int start = 0;
-    int last = 0;
-    int startIndex = 0;
-    int skip = 0;
     byte[][] filterValues = this.filterRangeValues;
-    //find the number of default values to skip the null value in case of direct dictionary
-    if (null != defaultValue) {
-      start = CarbonUtil
-          .getFirstIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex, numerOfRows - 1,
-              defaultValue, false);
-      if (start < 0) {
-        skip = -(start + 1);
-        // end of block
-        if (skip == numerOfRows) {
-          return bitSet;
+    // binary search can only be applied if column is sorted and
+    // inverted index exists for that column
+    if (dimensionColumnDataChunk.isExplicitSorted()) {
+      int start = 0;
+      int last = 0;
+      int startIndex = 0;
+      int skip = 0;
+      //find the number of default values to skip the null value in case of direct dictionary
+      if (null != defaultValue) {
+        start = CarbonUtil
+            .getFirstIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex, numerOfRows - 1,
+                defaultValue, false);
+        if (start < 0) {
+          skip = -(start + 1);
+          // end of block
+          if (skip == numerOfRows) {
+            return bitSet;
+          }
+        } else {
+          skip = start;
         }
-      } else {
-        skip = start;
+        startIndex = skip;
       }
-      startIndex = skip;
-    }
-    for (int k = 0; k < filterValues.length; k++) {
-      start = CarbonUtil
-          .getFirstIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex, numerOfRows - 1,
-              filterValues[k], false);
-      if (start >= 0) {
-        start =
-            CarbonUtil.nextLesserValueToTarget(start, dimensionColumnDataChunk, filterValues[k]);
-      }
-      if (start < 0) {
-        start = -(start + 1);
+      for (int k = 0; k < filterValues.length; k++) {
+        start = CarbonUtil
+            .getFirstIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex, numerOfRows - 1,
+                filterValues[k], false);
+        if (start >= 0) {
+          start =
+              CarbonUtil.nextLesserValueToTarget(start, dimensionColumnDataChunk, filterValues[k]);
+        }
+        if (start < 0) {
+          start = -(start + 1);
 
-        if (start >= numerOfRows) {
-          start = numerOfRows - 1;
+          if (start >= numerOfRows) {
+            start = numerOfRows - 1;
+          }
+          // When negative value of start is returned from getFirstIndexUsingBinarySearch the Start
+          // will be pointing to the next consecutive position. So compare it again and point to the
+          // previous value returned from getFirstIndexUsingBinarySearch.
+          if (ByteUtil.compare(filterValues[k], dimensionColumnDataChunk.getChunkData(start)) < 0) {
+            start = start - 1;
+          }
         }
-        // When negative value of start is returned from getFirstIndexUsingBinarySearch the Start
-        // will be pointing to the next consecutive position. So compare it again and point to the
-        // previous value returned from getFirstIndexUsingBinarySearch.
-        if (ByteUtil.compare(filterValues[k], dimensionColumnDataChunk.getChunkData(start)) < 0) {
-          start = start - 1;
+        last = start;
+        for (int j = start; j >= skip; j--) {
+          bitSet.set(j);
+          last--;
+        }
+        startIndex = last;
+        if (startIndex <= 0) {
+          break;
         }
       }
-      last = start;
-      for (int j = start; j >= skip; j--) {
-        bitSet.set(j);
-        last--;
-      }
-      startIndex = last;
-      if (startIndex <= 0) {
-        break;
+    } else {
+      for (int k = 0; k < filterValues.length; k++) {
+        for (int i = 0; i < numerOfRows; i++) {
+          if (ByteUtil.compare(dimensionColumnDataChunk.getChunkData(i), filterValues[k]) < 0) {
+            bitSet.set(i);
+          }
+        }
       }
     }
     return bitSet;

http://git-wip-us.apache.org/repos/asf/carbondata/blob/3ae44724/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestNoInvertedIndexLoadAndQuery.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestNoInvertedIndexLoadAndQuery.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestNoInvertedIndexLoadAndQuery.scala
index 5704dc8..702af21 100644
--- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestNoInvertedIndexLoadAndQuery.scala
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestNoInvertedIndexLoadAndQuery.scala
@@ -23,7 +23,6 @@ import org.scalatest.BeforeAndAfterAll
 
 import org.apache.carbondata.core.constants.CarbonCommonConstants
 import org.apache.carbondata.core.util.CarbonProperties
-import org.apache.carbondata.spark.exception.MalformedCarbonCommandException
 
 /**
  * Test Class for no inverted index load and query
@@ -36,8 +35,21 @@ class TestNoInvertedIndexLoadAndQuery extends QueryTest with BeforeAndAfterAll {
   val testData2 = s"$resourcesPath/source.csv"
 
   override def beforeAll {
+    clean
+    sql("""
+           CREATE TABLE hiveNoInvertedIndexTable
+           (id Int, name String, city String) row format delimited fields terminated by ','
+        """)
+    sql(s"""
+           LOAD DATA LOCAL INPATH '$testData1' into table hiveNoInvertedIndexTable
+           """)
+  }
+
+  def clean = {
     sql("DROP TABLE IF EXISTS index1")
     sql("DROP TABLE IF EXISTS index2")
+    sql("DROP TABLE IF EXISTS hiveNoInvertedIndexTable")
+    sql("DROP TABLE IF EXISTS carbonNoInvertedIndexTable")
   }
 
   test("no inverted index load and point query") {
@@ -212,11 +224,46 @@ class TestNoInvertedIndexLoadAndQuery extends QueryTest with BeforeAndAfterAll {
         """),
       Seq(Row(19.0, "Emily", "Bangalore")))
   }
+  
+  test("no inverted index test for row level filter queries") {
+    sql("""
+           CREATE TABLE IF NOT EXISTS carbonNoInvertedIndexTable
+           (id Int, name String, city String)
+           STORED BY 'org.apache.carbondata.format'
+           TBLPROPERTIES('NO_INVERTED_INDEX'='name,city', 'DICTIONARY_EXCLUDE'='city')
+        """)
+    sql(s"""
+           LOAD DATA LOCAL INPATH '$testData1' into table carbonNoInvertedIndexTable
+           OPTIONS('FILEHEADER'='id,name,city', 'BAD_RECORDS_ACTION'='FORCE')
+           """)
+    // row level filter evaluation test
+    checkAnswer(
+      sql("SELECT * FROM hiveNoInvertedIndexTable WHERE city <= 'Shanghai'"),
+      sql("SELECT * FROM carbonNoInvertedIndexTable WHERE city <= 'Shanghai'"))
+    checkAnswer(
+      sql("SELECT * FROM hiveNoInvertedIndexTable WHERE city >= 'Shanghai'"),
+      sql("SELECT * FROM carbonNoInvertedIndexTable WHERE city >= 'Shanghai'"))
+    checkAnswer(
+      sql("SELECT * FROM hiveNoInvertedIndexTable WHERE city < 'Shanghai'"),
+      sql("SELECT * FROM carbonNoInvertedIndexTable WHERE city < 'Shanghai'"))
+    checkAnswer(
+      sql("SELECT * FROM hiveNoInvertedIndexTable WHERE city > 'Shanghai'"),
+      sql("SELECT * FROM carbonNoInvertedIndexTable WHERE city > 'Shanghai'"))
+    // range filter test
+    checkAnswer(
+      sql("SELECT * FROM hiveNoInvertedIndexTable WHERE city > 'Shanghai' and city < 'Washington'"),
+      sql("SELECT * FROM carbonNoInvertedIndexTable WHERE city > 'Shanghai' and city < 'Washington'"))
+    checkAnswer(
+      sql("SELECT * FROM hiveNoInvertedIndexTable WHERE city >= 'Shanghai' and city < 'Washington'"),
+      sql("SELECT * FROM carbonNoInvertedIndexTable WHERE city >= 'Shanghai' and city < 'Washington'"))
+    checkAnswer(
+      sql("SELECT * FROM hiveNoInvertedIndexTable WHERE city > 'Shanghai' and city <= 'Washington'"),
+      sql("SELECT * FROM carbonNoInvertedIndexTable WHERE city > 'Shanghai' and city <= 'Washington'"))
+  }
 
 
   override def afterAll {
-    sql("drop table index1")
-    sql("drop table index2")
+    clean
   }
 
 }

[2/2] carbondata git commit: [CARBONDATA-1094] Wrong results returned by the query in case inverted index is not created on a column This closes #952

Posted by ku...@apache.org.

[CARBONDATA-1094] Wrong results returned by the query in case inverted index is not created on a column This closes #952


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/59b06b63
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/59b06b63
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/59b06b63

Branch: refs/heads/master
Commit: 59b06b631f92ffea0258f10d90fe59622dc5a1cc
Parents: 7b7579b 3ae4472
Author: kumarvishal <ku...@gmail.com>
Authored: Wed May 31 14:26:46 2017 +0530
Committer: kumarvishal <ku...@gmail.com>
Committed: Wed May 31 14:26:46 2017 +0530

----------------------------------------------------------------------
 .../executer/IncludeFilterExecuterImpl.java     |  40 +++-
 .../executer/RangeValueFilterExecuterImpl.java  | 198 +++++++++++++------
 .../RowLevelRangeGrtThanFiterExecuterImpl.java  |  68 ++++---
 ...elRangeGrtrThanEquaToFilterExecuterImpl.java |  62 +++---
 ...velRangeLessThanEqualFilterExecuterImpl.java |  90 +++++----
 .../RowLevelRangeLessThanFiterExecuterImpl.java |  98 +++++----
 .../TestNoInvertedIndexLoadAndQuery.scala       |  53 ++++-
 7 files changed, 405 insertions(+), 204 deletions(-)
----------------------------------------------------------------------