You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ch...@apache.org on 2016/06/30 17:42:26 UTC
[39/50] [abbrv] incubator-carbondata git commit: [issue-777] Filter
query issue for >, <, <= than filter for timestamp(#778)
[issue-777] Filter query issue for >, <, <= than filter for timestamp(#778)
Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/167d5279
Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/167d5279
Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/167d5279
Branch: refs/heads/master
Commit: 167d52799a87b2d17cd3aa2fb846cfa0a8337e63
Parents: fe7acdc
Author: Mohammad Shahid Khan <mo...@gmail.com>
Authored: Thu Jun 30 04:57:03 2016 +0530
Committer: Venkata Ramana G <g....@gmail.com>
Committed: Thu Jun 30 04:57:03 2016 +0530
----------------------------------------------------------------------
.../org/carbondata/core/util/CarbonUtil.java | 41 ++---
.../impl/AggregateQueryBlockProcessor.java | 2 +-
.../executer/ExcludeFilterExecuterImpl.java | 4 +-
.../executer/IncludeFilterExecuterImpl.java | 4 +-
.../RowLevelRangeGrtThanFiterExecuterImpl.java | 11 +-
...elRangeGrtrThanEquaToFilterExecuterImpl.java | 6 +-
...velRangeLessThanEqualFilterExecuterImpl.java | 66 ++++++-
.../RowLevelRangeLessThanFiterExecuterImpl.java | 69 ++++++--
.../filters/measurefilter/util/FilterUtil.java | 25 +++
.../spark/src/test/resources/filter/emp2.csv | 9 +
.../src/test/resources/filter/emp2allnull.csv | 9 +
.../src/test/resources/filter/emp2nonull.csv | 8 +
.../GrtLtFilterProcessorTestCase.scala | 176 +++++++++++++++++++
13 files changed, 375 insertions(+), 55 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/167d5279/core/src/main/java/org/carbondata/core/util/CarbonUtil.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/carbondata/core/util/CarbonUtil.java b/core/src/main/java/org/carbondata/core/util/CarbonUtil.java
index 12cbb6b..b19f8d3 100644
--- a/core/src/main/java/org/carbondata/core/util/CarbonUtil.java
+++ b/core/src/main/java/org/carbondata/core/util/CarbonUtil.java
@@ -20,27 +20,11 @@
package org.carbondata.core.util;
-import java.io.Closeable;
-import java.io.DataInputStream;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileNotFoundException;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.ObjectInputStream;
-import java.io.ObjectOutputStream;
-import java.io.OutputStream;
+import java.io.*;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.security.PrivilegedExceptionAction;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
+import java.util.*;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
@@ -866,7 +850,7 @@ public final class CarbonUtil {
}
public static int getFirstIndexUsingBinarySearch(FixedLengthDimensionDataChunk dimColumnDataChunk,
- int low, int high, byte[] compareValue) {
+ int low, int high, byte[] compareValue, boolean matchUpLimit) {
int cmpResult = 0;
while (high >= low) {
int mid = (low + high) / 2;
@@ -879,11 +863,20 @@ public final class CarbonUtil {
high = mid - 1;
} else {
int currentIndex = mid;
- while (currentIndex - 1 >= 0 && ByteUtil.UnsafeComparer.INSTANCE
- .compareTo(dimColumnDataChunk.getCompleteDataChunk(),
- (currentIndex - 1) * compareValue.length, compareValue.length, compareValue, 0,
- compareValue.length) == 0) {
- --currentIndex;
+ if(!matchUpLimit) {
+ while (currentIndex - 1 >= 0 && ByteUtil.UnsafeComparer.INSTANCE
+ .compareTo(dimColumnDataChunk.getCompleteDataChunk(),
+ (currentIndex - 1) * compareValue.length, compareValue.length, compareValue, 0,
+ compareValue.length) == 0) {
+ --currentIndex;
+ }
+ } else {
+ while (currentIndex + 1 <= high && ByteUtil.UnsafeComparer.INSTANCE
+ .compareTo(dimColumnDataChunk.getCompleteDataChunk(),
+ (currentIndex + 1) * compareValue.length, compareValue.length, compareValue, 0,
+ compareValue.length) == 0) {
+ currentIndex++;
+ }
}
return currentIndex;
}
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/167d5279/core/src/main/java/org/carbondata/query/carbon/processor/impl/AggregateQueryBlockProcessor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/carbondata/query/carbon/processor/impl/AggregateQueryBlockProcessor.java b/core/src/main/java/org/carbondata/query/carbon/processor/impl/AggregateQueryBlockProcessor.java
index 260b894..3a9f04c 100644
--- a/core/src/main/java/org/carbondata/query/carbon/processor/impl/AggregateQueryBlockProcessor.java
+++ b/core/src/main/java/org/carbondata/query/carbon/processor/impl/AggregateQueryBlockProcessor.java
@@ -32,7 +32,7 @@ public class AggregateQueryBlockProcessor extends AbstractDataBlockProcessor {
/**
* AggregateQueryScanner constructor
*
- * @param blockExecutionInfos
+ * @param tableBlockExecutionInfos
*/
public AggregateQueryBlockProcessor(BlockExecutionInfo tableBlockExecutionInfos,
FileHolder fileReader) {
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/167d5279/core/src/main/java/org/carbondata/query/filter/executer/ExcludeFilterExecuterImpl.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/carbondata/query/filter/executer/ExcludeFilterExecuterImpl.java b/core/src/main/java/org/carbondata/query/filter/executer/ExcludeFilterExecuterImpl.java
index 1f620c8..241e2ee 100644
--- a/core/src/main/java/org/carbondata/query/filter/executer/ExcludeFilterExecuterImpl.java
+++ b/core/src/main/java/org/carbondata/query/filter/executer/ExcludeFilterExecuterImpl.java
@@ -140,7 +140,7 @@ public class ExcludeFilterExecuterImpl implements FilterExecuter {
for (int i = 0; i < filterValues.length; i++) {
startKey = CarbonUtil
.getFirstIndexUsingBinarySearch(dimColumnDataChunk, startIndex, numerOfRows - 1,
- filterValues[i]);
+ filterValues[i], false);
if (startKey < 0) {
continue;
}
@@ -175,7 +175,7 @@ public class ExcludeFilterExecuterImpl implements FilterExecuter {
for (int k = 0; k < filterValues.length; k++) {
startKey = CarbonUtil
.getFirstIndexUsingBinarySearch(dimColumnDataChunk, startIndex, numerOfRows - 1,
- filterValues[k]);
+ filterValues[k], false);
if (startKey < 0) {
continue;
}
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/167d5279/core/src/main/java/org/carbondata/query/filter/executer/IncludeFilterExecuterImpl.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/carbondata/query/filter/executer/IncludeFilterExecuterImpl.java b/core/src/main/java/org/carbondata/query/filter/executer/IncludeFilterExecuterImpl.java
index 5123ce7..ae43a29 100644
--- a/core/src/main/java/org/carbondata/query/filter/executer/IncludeFilterExecuterImpl.java
+++ b/core/src/main/java/org/carbondata/query/filter/executer/IncludeFilterExecuterImpl.java
@@ -130,7 +130,7 @@ public class IncludeFilterExecuterImpl implements FilterExecuter {
for (int i = 0; i < filterValues.length; i++) {
start = CarbonUtil
.getFirstIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex, numerOfRows - 1,
- filterValues[i]);
+ filterValues[i], false);
if (start < 0) {
continue;
}
@@ -167,7 +167,7 @@ public class IncludeFilterExecuterImpl implements FilterExecuter {
for (int k = 0; k < filterValues.length; k++) {
start = CarbonUtil.getFirstIndexUsingBinarySearch(
(FixedLengthDimensionDataChunk) dimensionColumnDataChunk, startIndex, numerOfRows - 1,
- filterValues[k]);
+ filterValues[k], false);
if (start < 0) {
continue;
}
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/167d5279/core/src/main/java/org/carbondata/query/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/carbondata/query/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java b/core/src/main/java/org/carbondata/query/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java
index 0efca00..45aac01 100644
--- a/core/src/main/java/org/carbondata/query/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java
+++ b/core/src/main/java/org/carbondata/query/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java
@@ -117,10 +117,11 @@ public class RowLevelRangeGrtThanFiterExecuterImpl extends RowLevelFilterExecute
for (int i = 0; i < filterValues.length; i++) {
start = CarbonUtil
.getFirstIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex, numerOfRows - 1,
- filterValues[i]);
- start = CarbonUtil
- .nextGreaterValueToTarget(start, (FixedLengthDimensionDataChunk) dimensionColumnDataChunk,
- filterValues[i], numerOfRows);
+ filterValues[i], true);
+ if (start >= 0) {
+ start = CarbonUtil.nextGreaterValueToTarget(start,
+ (FixedLengthDimensionDataChunk) dimensionColumnDataChunk, filterValues[i], numerOfRows);
+ }
// Logic will handle the case where the range filter member is not present in block
// in this case the binary search will return the index from where the bit sets will be
// set inorder to apply filters. this is greater than filter so the range will be taken
@@ -175,7 +176,7 @@ public class RowLevelRangeGrtThanFiterExecuterImpl extends RowLevelFilterExecute
for (int k = 0; k < filterValues.length; k++) {
start = CarbonUtil.getFirstIndexUsingBinarySearch(
(FixedLengthDimensionDataChunk) dimensionColumnDataChunk, startIndex, numerOfRows - 1,
- filterValues[k]);
+ filterValues[k], true);
start = CarbonUtil.nextGreaterValueToTarget(start,
(FixedLengthDimensionDataChunk) dimensionColumnDataChunk, filterValues[k], numerOfRows);
if (start < 0) {
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/167d5279/core/src/main/java/org/carbondata/query/filter/executer/RowLevelRangeGrtrThanEquaToFilterExecuterImpl.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/carbondata/query/filter/executer/RowLevelRangeGrtrThanEquaToFilterExecuterImpl.java b/core/src/main/java/org/carbondata/query/filter/executer/RowLevelRangeGrtrThanEquaToFilterExecuterImpl.java
index 935c2ce..df3c843 100644
--- a/core/src/main/java/org/carbondata/query/filter/executer/RowLevelRangeGrtrThanEquaToFilterExecuterImpl.java
+++ b/core/src/main/java/org/carbondata/query/filter/executer/RowLevelRangeGrtrThanEquaToFilterExecuterImpl.java
@@ -118,7 +118,7 @@ public class RowLevelRangeGrtrThanEquaToFilterExecuterImpl extends RowLevelFilte
for (int i = 0; i < filterValues.length; i++) {
start = CarbonUtil
.getFirstIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex, numerOfRows - 1,
- filterValues[i]);
+ filterValues[i], false);
if (start < 0) {
start = -(start + 1);
if (start == numerOfRows) {
@@ -168,7 +168,7 @@ public class RowLevelRangeGrtrThanEquaToFilterExecuterImpl extends RowLevelFilte
for (int k = 0; k < filterValues.length; k++) {
start = CarbonUtil.getFirstIndexUsingBinarySearch(
(FixedLengthDimensionDataChunk) dimensionColumnDataChunk, startIndex, numerOfRows - 1,
- filterValues[k]);
+ filterValues[k], false);
if (start < 0) {
start = -(start + 1);
if (start == numerOfRows) {
@@ -177,7 +177,7 @@ public class RowLevelRangeGrtrThanEquaToFilterExecuterImpl extends RowLevelFilte
// Method will compare the tentative index value after binary search, this tentative
// index needs to be compared by the filter member if its >= filter then from that
// index the bitset will be considered for filtering process.
- if (ByteUtil.compare(filterValues[k],dimensionColumnDataChunk.getChunkData(start))
+ if (ByteUtil.compare(filterValues[k], dimensionColumnDataChunk.getChunkData(start))
>= 0) {
start = start + 1;
}
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/167d5279/core/src/main/java/org/carbondata/query/filter/executer/RowLevelRangeLessThanEqualFilterExecuterImpl.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/carbondata/query/filter/executer/RowLevelRangeLessThanEqualFilterExecuterImpl.java b/core/src/main/java/org/carbondata/query/filter/executer/RowLevelRangeLessThanEqualFilterExecuterImpl.java
index 27f7935..5319d6f 100644
--- a/core/src/main/java/org/carbondata/query/filter/executer/RowLevelRangeLessThanEqualFilterExecuterImpl.java
+++ b/core/src/main/java/org/carbondata/query/filter/executer/RowLevelRangeLessThanEqualFilterExecuterImpl.java
@@ -26,6 +26,8 @@ import org.carbondata.core.carbon.datastore.block.SegmentProperties;
import org.carbondata.core.carbon.datastore.chunk.DimensionColumnDataChunk;
import org.carbondata.core.carbon.datastore.chunk.impl.FixedLengthDimensionDataChunk;
import org.carbondata.core.carbon.metadata.encoder.Encoding;
+import org.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator;
+import org.carbondata.core.keygenerator.directdictionary.DirectDictionaryKeyGeneratorFactory;
import org.carbondata.core.util.ByteUtil;
import org.carbondata.core.util.CarbonUtil;
import org.carbondata.query.carbon.processor.BlocksChunkHolder;
@@ -33,6 +35,7 @@ import org.carbondata.query.expression.Expression;
import org.carbondata.query.expression.exception.FilterUnsupportedException;
import org.carbondata.query.filter.resolver.resolverinfo.DimColumnResolvedFilterInfo;
import org.carbondata.query.filter.resolver.resolverinfo.MeasureColumnResolvedFilterInfo;
+import org.carbondata.query.filters.measurefilter.util.FilterUtil;
public class RowLevelRangeLessThanEqualFilterExecuterImpl extends RowLevelFilterExecuterImpl {
protected byte[][] filterRangeValues;
@@ -88,12 +91,23 @@ public class RowLevelRangeLessThanEqualFilterExecuterImpl extends RowLevelFilter
private BitSet getFilteredIndexes(DimensionColumnDataChunk dimensionColumnDataChunk,
int numerOfRows) {
+ byte[] defaultValue = null;
+ if (dimColEvaluatorInfoList.get(0).getDimension().hasEncoding(Encoding.DIRECT_DICTIONARY)) {
+ DirectDictionaryGenerator directDictionaryGenerator = DirectDictionaryKeyGeneratorFactory
+ .getDirectDictionaryGenerator(
+ dimColEvaluatorInfoList.get(0).getDimension().getDataType());
+ int key = directDictionaryGenerator.generateDirectSurrogateKey(null) + 1;
+ defaultValue = FilterUtil.getMaskKey(key, dimColEvaluatorInfoList.get(0).getDimension(),
+ this.segmentProperties.getDimensionKeyGenerator());
+ }
if (null != dimensionColumnDataChunk.getAttributes().getInvertedIndexes()
&& dimensionColumnDataChunk instanceof FixedLengthDimensionDataChunk) {
+
return setFilterdIndexToBitSetWithColumnIndex(
- (FixedLengthDimensionDataChunk) dimensionColumnDataChunk, numerOfRows);
+ (FixedLengthDimensionDataChunk) dimensionColumnDataChunk, numerOfRows, defaultValue);
+
}
- return setFilterdIndexToBitSet(dimensionColumnDataChunk, numerOfRows);
+ return setFilterdIndexToBitSet(dimensionColumnDataChunk, numerOfRows, defaultValue);
}
/**
@@ -107,17 +121,35 @@ public class RowLevelRangeLessThanEqualFilterExecuterImpl extends RowLevelFilter
* @return BitSet.
*/
private BitSet setFilterdIndexToBitSetWithColumnIndex(
- FixedLengthDimensionDataChunk dimensionColumnDataChunk, int numerOfRows) {
+ FixedLengthDimensionDataChunk dimensionColumnDataChunk, int numerOfRows,
+ byte[] defaultValue) {
BitSet bitSet = new BitSet(numerOfRows);
int[] columnIndex = dimensionColumnDataChunk.getAttributes().getInvertedIndexes();
int start = 0;
int last = 0;
+ int skip = 0;
int startIndex = 0;
byte[][] filterValues = this.filterRangeValues;
+ //find the number of default values to skip the null value in case of direct dictionary
+ if (null != defaultValue) {
+ start = CarbonUtil
+ .getFirstIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex, numerOfRows - 1,
+ defaultValue, true);
+ if (start < 0) {
+ skip = -(start + 1);
+ // end of block
+ if (skip == numerOfRows) {
+ return bitSet;
+ }
+ } else {
+ skip = start;
+ }
+ startIndex = skip;
+ }
for (int i = 0; i < filterValues.length; i++) {
start = CarbonUtil
.getFirstIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex, numerOfRows - 1,
- filterValues[i]);
+ filterValues[i], true);
if (start < 0) {
start = -(start + 1);
if (start == numerOfRows) {
@@ -133,7 +165,7 @@ public class RowLevelRangeLessThanEqualFilterExecuterImpl extends RowLevelFilter
}
}
last = start;
- for (int j = start; j >= 0; j--) {
+ for (int j = start; j >= skip; j--) {
bitSet.set(columnIndex[j]);
last--;
}
@@ -153,20 +185,38 @@ public class RowLevelRangeLessThanEqualFilterExecuterImpl extends RowLevelFilter
*
* @param dimensionColumnDataChunk
* @param numerOfRows
+ * @param defaultValue
* @return BitSet.
*/
private BitSet setFilterdIndexToBitSet(DimensionColumnDataChunk dimensionColumnDataChunk,
- int numerOfRows) {
+ int numerOfRows, byte[] defaultValue) {
BitSet bitSet = new BitSet(numerOfRows);
if (dimensionColumnDataChunk instanceof FixedLengthDimensionDataChunk) {
int start = 0;
int last = 0;
int startIndex = 0;
byte[][] filterValues = this.filterRangeValues;
+ int skip = 0;
+ //find the number of default values to skip the null value in case of direct dictionary
+ if (null != defaultValue) {
+ start = CarbonUtil.getFirstIndexUsingBinarySearch(
+ (FixedLengthDimensionDataChunk) dimensionColumnDataChunk, startIndex, numerOfRows - 1,
+ defaultValue, true);
+ if (start < 0) {
+ skip = -(start + 1);
+ // end of block
+ if (skip == numerOfRows) {
+ return bitSet;
+ }
+ } else {
+ skip = start;
+ }
+ startIndex = skip;
+ }
for (int k = 0; k < filterValues.length; k++) {
start = CarbonUtil.getFirstIndexUsingBinarySearch(
(FixedLengthDimensionDataChunk) dimensionColumnDataChunk, startIndex, numerOfRows - 1,
- filterValues[k]);
+ filterValues[k], true);
if (start < 0) {
start = -(start + 1);
if (start == numerOfRows) {
@@ -181,7 +231,7 @@ public class RowLevelRangeLessThanEqualFilterExecuterImpl extends RowLevelFilter
}
}
last = start;
- for (int j = start; j >= 0; j--) {
+ for (int j = start; j >= skip; j--) {
bitSet.set(j);
last--;
}
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/167d5279/core/src/main/java/org/carbondata/query/filter/executer/RowLevelRangeLessThanFiterExecuterImpl.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/carbondata/query/filter/executer/RowLevelRangeLessThanFiterExecuterImpl.java b/core/src/main/java/org/carbondata/query/filter/executer/RowLevelRangeLessThanFiterExecuterImpl.java
index 1786553..19213fa 100644
--- a/core/src/main/java/org/carbondata/query/filter/executer/RowLevelRangeLessThanFiterExecuterImpl.java
+++ b/core/src/main/java/org/carbondata/query/filter/executer/RowLevelRangeLessThanFiterExecuterImpl.java
@@ -26,6 +26,8 @@ import org.carbondata.core.carbon.datastore.block.SegmentProperties;
import org.carbondata.core.carbon.datastore.chunk.DimensionColumnDataChunk;
import org.carbondata.core.carbon.datastore.chunk.impl.FixedLengthDimensionDataChunk;
import org.carbondata.core.carbon.metadata.encoder.Encoding;
+import org.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator;
+import org.carbondata.core.keygenerator.directdictionary.DirectDictionaryKeyGeneratorFactory;
import org.carbondata.core.util.ByteUtil;
import org.carbondata.core.util.CarbonUtil;
import org.carbondata.query.carbon.processor.BlocksChunkHolder;
@@ -33,6 +35,7 @@ import org.carbondata.query.expression.Expression;
import org.carbondata.query.expression.exception.FilterUnsupportedException;
import org.carbondata.query.filter.resolver.resolverinfo.DimColumnResolvedFilterInfo;
import org.carbondata.query.filter.resolver.resolverinfo.MeasureColumnResolvedFilterInfo;
+import org.carbondata.query.filters.measurefilter.util.FilterUtil;
public class RowLevelRangeLessThanFiterExecuterImpl extends RowLevelFilterExecuterImpl {
private byte[][] filterRangeValues;
@@ -88,12 +91,21 @@ public class RowLevelRangeLessThanFiterExecuterImpl extends RowLevelFilterExecut
private BitSet getFilteredIndexes(DimensionColumnDataChunk dimensionColumnDataChunk,
int numerOfRows) {
+ byte[] defaultValue = null;
+ if (dimColEvaluatorInfoList.get(0).getDimension().hasEncoding(Encoding.DIRECT_DICTIONARY)) {
+ DirectDictionaryGenerator directDictionaryGenerator = DirectDictionaryKeyGeneratorFactory
+ .getDirectDictionaryGenerator(
+ dimColEvaluatorInfoList.get(0).getDimension().getDataType());
+ int key = directDictionaryGenerator.generateDirectSurrogateKey(null) + 1;
+ defaultValue = FilterUtil.getMaskKey(key, dimColEvaluatorInfoList.get(0).getDimension(),
+ this.segmentProperties.getDimensionKeyGenerator());
+ }
if (null != dimensionColumnDataChunk.getAttributes().getInvertedIndexes()
&& dimensionColumnDataChunk instanceof FixedLengthDimensionDataChunk) {
return setFilterdIndexToBitSetWithColumnIndex(
- (FixedLengthDimensionDataChunk) dimensionColumnDataChunk, numerOfRows);
+ (FixedLengthDimensionDataChunk) dimensionColumnDataChunk, numerOfRows, defaultValue);
}
- return setFilterdIndexToBitSet(dimensionColumnDataChunk, numerOfRows);
+ return setFilterdIndexToBitSet(dimensionColumnDataChunk, numerOfRows, defaultValue);
}
/**
@@ -107,17 +119,37 @@ public class RowLevelRangeLessThanFiterExecuterImpl extends RowLevelFilterExecut
* @return BitSet.
*/
private BitSet setFilterdIndexToBitSetWithColumnIndex(
- FixedLengthDimensionDataChunk dimensionColumnDataChunk, int numerOfRows) {
+ FixedLengthDimensionDataChunk dimensionColumnDataChunk, int numerOfRows,
+ byte[] defaultValue) {
BitSet bitSet = new BitSet(numerOfRows);
int[] columnIndex = dimensionColumnDataChunk.getAttributes().getInvertedIndexes();
int start = 0;
int last = 0;
int startIndex = 0;
+ int skip = 0;
byte[][] filterValues = this.filterRangeValues;
+
+ //find the number of default values to skip the null value in case of direct dictionary
+ if (null != defaultValue) {
+ start = CarbonUtil
+ .getFirstIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex, numerOfRows - 1,
+ defaultValue, false);
+ if (start < 0) {
+ skip = -(start + 1);
+ // end of block
+ if (skip == numerOfRows) {
+ return bitSet;
+ }
+ } else {
+ skip = start;
+ }
+ startIndex = skip;
+ }
+
for (int i = 0; i < filterValues.length; i++) {
start = CarbonUtil
.getFirstIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex, numerOfRows - 1,
- filterValues[i]);
+ filterValues[i], false);
// Logic will handle the case where the range filter member is not present in block
// in this case the binary search will return the index from where the bit sets will be
// set inorder to apply filters. this is Lesser than filter so the range will be taken
@@ -138,7 +170,7 @@ public class RowLevelRangeLessThanFiterExecuterImpl extends RowLevelFilterExecut
}
}
last = start;
- for (int j = start; j >= 0; j--) {
+ for (int j = start; j >= skip; j--) {
bitSet.set(columnIndex[j]);
last--;
}
@@ -161,23 +193,40 @@ public class RowLevelRangeLessThanFiterExecuterImpl extends RowLevelFilterExecut
* @return BitSet.
*/
private BitSet setFilterdIndexToBitSet(DimensionColumnDataChunk dimensionColumnDataChunk,
- int numerOfRows) {
+ int numerOfRows, byte[] defaultValue) {
BitSet bitSet = new BitSet(numerOfRows);
if (dimensionColumnDataChunk instanceof FixedLengthDimensionDataChunk) {
int start = 0;
int last = 0;
int startIndex = 0;
+ int skip = 0;
byte[][] filterValues = this.filterRangeValues;
+ //find the number of default values to skip the null value in case of direct dictionary
+ if (null != defaultValue) {
+ start = CarbonUtil.getFirstIndexUsingBinarySearch(
+ (FixedLengthDimensionDataChunk) dimensionColumnDataChunk, startIndex, numerOfRows - 1,
+ defaultValue, false);
+ if (start < 0) {
+ skip = -(start + 1);
+ // end of block
+ if (skip == numerOfRows) {
+ return bitSet;
+ }
+ } else {
+ skip = start;
+ }
+ startIndex = skip;
+ }
for (int k = 0; k < filterValues.length; k++) {
start = CarbonUtil.getFirstIndexUsingBinarySearch(
(FixedLengthDimensionDataChunk) dimensionColumnDataChunk, startIndex, numerOfRows - 1,
- filterValues[k]);
+ filterValues[k], false);
start = CarbonUtil.nextLesserValueToTarget(start,
(FixedLengthDimensionDataChunk) dimensionColumnDataChunk, filterValues[k]);
if (start < 0) {
start = -(start + 1);
- if (start == numerOfRows) {
- start = start - 1;
+ if (start >= numerOfRows) {
+ start = numerOfRows - 1;
}
// Method will compare the tentative index value after binary search, this tentative
// index needs to be compared by the filter member if its < filter then from that
@@ -187,7 +236,7 @@ public class RowLevelRangeLessThanFiterExecuterImpl extends RowLevelFilterExecut
}
}
last = start;
- for (int j = start; j >= 0; j--) {
+ for (int j = start; j >= skip; j--) {
bitSet.set(j);
last--;
}
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/167d5279/core/src/main/java/org/carbondata/query/filters/measurefilter/util/FilterUtil.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/carbondata/query/filters/measurefilter/util/FilterUtil.java b/core/src/main/java/org/carbondata/query/filters/measurefilter/util/FilterUtil.java
index 8bae3fc..a7a78b0 100644
--- a/core/src/main/java/org/carbondata/query/filters/measurefilter/util/FilterUtil.java
+++ b/core/src/main/java/org/carbondata/query/filters/measurefilter/util/FilterUtil.java
@@ -605,12 +605,37 @@ public final class FilterUtil {
LOGGER.error(e.getMessage());
}
}
+
}
return filterValuesList.toArray(new byte[filterValuesList.size()][]);
}
/**
+ * The method is used to get the single dictionary key's mask key
+ * @param surrogate
+ * @param carbonDimension
+ * @param blockLevelKeyGenerator
+ * @return
+ */
+ public static byte[] getMaskKey(int surrogate, CarbonDimension carbonDimension,
+ KeyGenerator blockLevelKeyGenerator) {
+
+ int[] keys = new int[blockLevelKeyGenerator.getDimCount()];
+ byte[] maskedKey = null;
+ Arrays.fill(keys, 0);
+ int[] rangesForMaskedByte =
+ getRangesForMaskedByte((carbonDimension.getKeyOrdinal()), blockLevelKeyGenerator);
+ try {
+ keys[carbonDimension.getKeyOrdinal()] = surrogate;
+ maskedKey = getMaskedKey(rangesForMaskedByte, blockLevelKeyGenerator.generateKey(keys));
+ } catch (KeyGenException e) {
+ LOGGER.error(e.getMessage());
+ }
+ return maskedKey;
+ }
+
+ /**
* Method will return the start key based on KeyGenerator for the respective
* filter resolved instance.
*
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/167d5279/integration/spark/src/test/resources/filter/emp2.csv
----------------------------------------------------------------------
diff --git a/integration/spark/src/test/resources/filter/emp2.csv b/integration/spark/src/test/resources/filter/emp2.csv
new file mode 100644
index 0000000..171a3c9
--- /dev/null
+++ b/integration/spark/src/test/resources/filter/emp2.csv
@@ -0,0 +1,9 @@
+empid,ename,sal,deptno,mgr,gender,dob,comm,desc
+1,abc,1233,10,2,,2014-07-01 12:07:28,1234.191,string_null
+2,bcd,1322,,3,f,2014-07-01 12:07:28,19.99,int_null
+3,cde,4322,,4,m,,16.996,date_null
+4, ,43243,,5,m,,999.117,string_space
+5,,43242,20,6,m,2017-07-01 12:07:28,99.999,string_null
+6,ijk,,20,6,m,2017-07-01 12:07:28,50089,double_null
+7,pqr,2422,20,6,m,2017-07-01 12:07:28,32.339,decimal_null
+8
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/167d5279/integration/spark/src/test/resources/filter/emp2allnull.csv
----------------------------------------------------------------------
diff --git a/integration/spark/src/test/resources/filter/emp2allnull.csv b/integration/spark/src/test/resources/filter/emp2allnull.csv
new file mode 100644
index 0000000..9a1962c
--- /dev/null
+++ b/integration/spark/src/test/resources/filter/emp2allnull.csv
@@ -0,0 +1,9 @@
+empid,ename,sal,deptno,mgr,gender,dob,comm,desc
+1,abc,1233,10,2,,,1234.191,string_null
+2,bcd,1322,,3,f,,19.99,int_null
+3,cde,4322,,4,m,,16.996,date_null
+4, ,43243,,5,m,,999.117,string_space
+5,,43242,20,6,m,,99.999,string_null
+6,ijk,,20,6,m,,50089,double_null
+7,pqr,2422,20,6,m,,32.339,decimal_null
+8
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/167d5279/integration/spark/src/test/resources/filter/emp2nonull.csv
----------------------------------------------------------------------
diff --git a/integration/spark/src/test/resources/filter/emp2nonull.csv b/integration/spark/src/test/resources/filter/emp2nonull.csv
new file mode 100644
index 0000000..db3bd2c
--- /dev/null
+++ b/integration/spark/src/test/resources/filter/emp2nonull.csv
@@ -0,0 +1,8 @@
+empid,ename,sal,deptno,mgr,gender,dob,comm,desc
+1,abc,1233,10,2,,2014-07-01 12:07:28,1234.191,string_null
+2,bcd,1322,,3,f,2014-07-01 12:07:28,19.99,int_null
+3,cde,4322,,4,m,2014-07-01 12:07:28,16.996,date_null
+4, ,43243,,5,m,2014-07-01 12:07:28,999.117,string_space
+5,,43242,20,6,m,2017-07-01 12:07:28,99.999,string_null
+6,ijk,,20,6,m,2017-07-01 12:07:28,50089,double_null
+7,pqr,2422,20,6,m,2017-07-01 12:07:28,32.339,decimal_null
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/167d5279/integration/spark/src/test/scala/org/carbondata/spark/testsuite/filterexpr/GrtLtFilterProcessorTestCase.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/test/scala/org/carbondata/spark/testsuite/filterexpr/GrtLtFilterProcessorTestCase.scala b/integration/spark/src/test/scala/org/carbondata/spark/testsuite/filterexpr/GrtLtFilterProcessorTestCase.scala
new file mode 100644
index 0000000..8c51106
--- /dev/null
+++ b/integration/spark/src/test/scala/org/carbondata/spark/testsuite/filterexpr/GrtLtFilterProcessorTestCase.scala
@@ -0,0 +1,176 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.carbondata.spark.testsuite.filterexpr
+
+import java.io.File
+
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.common.util.CarbonHiveContext._
+import org.apache.spark.sql.common.util.QueryTest
+import org.carbondata.core.constants.CarbonCommonConstants
+import org.carbondata.core.util.CarbonProperties
+import org.scalatest.BeforeAndAfterAll
+
+/**
+ * Test Class for filter expression query on String datatypes
+ *
+ */
+class GrtLtFilterProcessorTestCase extends QueryTest with BeforeAndAfterAll {
+
+ override def beforeAll {
+ sql("drop table if exists a12")
+ sql("drop table if exists a12_allnull")
+ sql("drop table if exists a12_no_null")
+
+ sql(
+ "create table a12(empid String,ename String,sal double,deptno int,mgr string,gender string," +
+ "dob timestamp,comm decimal(4,2),desc string) stored by 'org.apache.carbondata.format'"
+ )
+ sql(
+ "create table a12_allnull(empid String,ename String,sal double,deptno int,mgr string,gender" +
+ " string," +
+ "dob timestamp,comm decimal(4,2),desc string) stored by 'org.apache.carbondata.format'"
+ )
+ sql(
+ "create table a12_no_null(empid String,ename String,sal double,deptno int,mgr string,gender" +
+ " string," +
+ "dob timestamp,comm decimal(4,2),desc string) stored by 'org.apache.carbondata.format'"
+ )
+ CarbonProperties.getInstance()
+ .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "yyyy-MM-dd HH:mm:ss")
+ val basePath = new File(this.getClass.getResource("/").getPath + "/../../")
+ .getCanonicalPath
+ var testData = basePath + "/src/test/resources/filter/emp2.csv"
+ sql(
+ s"""LOAD DATA LOCAL INPATH '$testData' into table a12 OPTIONS('DELIMITER'=',',
+ 'QUOTECHAR'='"','FILEHEADER'='empid,ename,sal,deptno,mgr,gender,dob,comm,desc')"""
+ .stripMargin
+ )
+ testData = basePath + "/src/test/resources/filter/emp2allnull.csv"
+
+ sql(
+ s"""LOAD DATA LOCAL INPATH '$testData' into table a12_allnull OPTIONS('DELIMITER'=',',
+ 'QUOTECHAR'='"','FILEHEADER'='empid,ename,sal,deptno,mgr,gender,dob,comm,desc')"""
+ .stripMargin
+ )
+ testData = basePath + "/src/test/resources/filter/emp2nonull.csv"
+
+ sql(
+ s"""LOAD DATA LOCAL INPATH '$testData' into table a12_no_null OPTIONS('DELIMITER'=',',
+ 'QUOTECHAR'='"')"""
+ .stripMargin
+ )
+ }
+ //mixed value test
+ test("Less Than Filter") {
+ checkAnswer(
+ sql("select count(empid) from a12 where dob < '2014-07-01 12:07:28'"),
+ Seq(Row(0))
+ )
+ }
+
+ test("Les Than equal Filter") {
+ checkAnswer(
+ sql("select count (empid) from a12 where dob <= '2014-07-01 12:07:28'"),
+ Seq(Row(2))
+ )
+ }
+
+ test("Greater Than Filter") {
+ checkAnswer(
+ sql("select count (empid) from a12 where dob > '2014-07-01 12:07:28'"),
+ Seq(Row(3))
+ )
+ }
+
+ test("Greater Than equal to Filter") {
+ sql("select count (empid) from a12 where dob >= '2014-07-01 12:07:28'").show()
+ checkAnswer(
+ sql("select count (empid) from a12 where dob >= '2014-07-01 12:07:28'"),
+ Seq(Row(5))
+ )
+ }
+ //all null test cases
+
+ test("Less Than Filter all null") {
+ checkAnswer(
+ sql("select count(empid) from a12_allnull where dob < '2014-07-01 12:07:28'"),
+ Seq(Row(0))
+ )
+ }
+
+ test("Les Than equal Filter all null") {
+ checkAnswer(
+ sql("select count (empid) from a12_allnull where dob <= '2014-07-01 12:07:28'"),
+ Seq(Row(0))
+ )
+ }
+
+ test("Greater Than Filter all null") {
+ checkAnswer(
+ sql("select count (empid) from a12_allnull where dob > '2014-07-01 12:07:28'"),
+ Seq(Row(0))
+ )
+ }
+
+ test("Greater Than equal to Filter all null") {
+ checkAnswer(
+ sql("select count (empid) from a12_allnull where dob >= '2014-07-01 12:07:28'"),
+ Seq(Row(0))
+ )
+ }
+
+ //no null test cases
+
+ test("Less Than Filter no null") {
+ checkAnswer(
+ sql("select count(empid) from a12_no_null where dob < '2014-07-01 12:07:28'"),
+ Seq(Row(0))
+ )
+ }
+
+ test("Les Than equal Filter no null") {
+ sql("select empid from a12_no_null where dob <= '2014-07-01 12:07:28'").show()
+ checkAnswer(
+ sql("select count(empid) from a12_no_null where dob <= '2014-07-01 12:07:28'"),
+ Seq(Row(4))
+ )
+ }
+
+ test("Greater Than Filter no null") {
+ checkAnswer(
+ sql("select count (empid) from a12_no_null where dob > '2014-07-01 12:07:28'"),
+ Seq(Row(3))
+ )
+ }
+
+ test("Greater Than equal to Filter no null") {
+ checkAnswer(
+ sql("select count (empid) from a12_no_null where dob >= '2014-07-01 12:07:28'"),
+ Seq(Row(7))
+ )
+ }
+
+ override def afterAll {
+ sql("drop table a12")
+ CarbonProperties.getInstance()
+ .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "dd-MM-yyyy")
+ }
+}