You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ra...@apache.org on 2017/06/08 10:13:09 UTC
[1/2] carbondata git commit: Improve No dictionary column Include And
Exclude filter Fixed Data mismatch issue Fixed Alter with Caps Decimal issue
Repository: carbondata
Updated Branches:
refs/heads/master da3a68374 -> 8ba562ab2
Improve No dictionary column Include And Exclude filter
Fixed Data mismatch issue
Fixed Alter with Caps Decimal issue
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/d963a706
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/d963a706
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/d963a706
Branch: refs/heads/master
Commit: d963a706c39c97993df84081a30837e4b78c7115
Parents: da3a683
Author: kumarvishal <ku...@gmail.com>
Authored: Wed May 31 15:49:54 2017 +0530
Committer: ravipesala <ra...@gmail.com>
Committed: Thu Jun 8 15:41:50 2017 +0530
----------------------------------------------------------------------
.../schema/table/column/CarbonDimension.java | 7 ++
.../carbondata/core/scan/filter/FilterUtil.java | 18 +++
.../executer/ExcludeFilterExecuterImpl.java | 120 ++++++++-----------
.../executer/IncludeFilterExecuterImpl.java | 117 +++++++-----------
.../executer/RangeValueFilterExecuterImpl.java | 24 ++--
.../executer/RowLevelFilterExecuterImpl.java | 5 +
.../RowLevelRangeGrtThanFiterExecuterImpl.java | 24 ++--
...elRangeGrtrThanEquaToFilterExecuterImpl.java | 23 ++--
...velRangeLessThanEqualFilterExecuterImpl.java | 28 +++--
.../RowLevelRangeLessThanFiterExecuterImpl.java | 27 +++--
10 files changed, 208 insertions(+), 185 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/carbondata/blob/d963a706/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/column/CarbonDimension.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/column/CarbonDimension.java b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/column/CarbonDimension.java
index bdc7a4c..8d02512 100644
--- a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/column/CarbonDimension.java
+++ b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/column/CarbonDimension.java
@@ -122,6 +122,13 @@ public class CarbonDimension extends CarbonColumn {
}
/**
+ * @return is column participated in sorting or not
+ */
+ public boolean isSortColumn() {
+ return this.columnSchema.isSortColumn();
+ }
+
+ /**
* to generate the hash code for this class
*/
@Override public int hashCode() {
http://git-wip-us.apache.org/repos/asf/carbondata/blob/d963a706/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java
index d816c69..5434a5f 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java
@@ -49,6 +49,7 @@ import org.apache.carbondata.core.constants.CarbonCommonConstants;
import org.apache.carbondata.core.constants.CarbonV3DataFormatConstants;
import org.apache.carbondata.core.datastore.IndexKey;
import org.apache.carbondata.core.datastore.block.SegmentProperties;
+import org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk;
import org.apache.carbondata.core.keygenerator.KeyGenException;
import org.apache.carbondata.core.keygenerator.KeyGenerator;
import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier;
@@ -1482,4 +1483,21 @@ public final class FilterUtil {
return bitSetGroup;
}
+ /**
+ * This method will compare the selected data against null values and
+ * flip the bitSet if any null value is found
+ *
+ * @param dimensionColumnDataChunk
+ * @param bitSet
+ */
+ public static void removeNullValues(DimensionColumnDataChunk dimensionColumnDataChunk,
+ BitSet bitSet, byte[] defaultValue) {
+ if (!bitSet.isEmpty()) {
+ for (int i = bitSet.nextSetBit(0); i >= 0; i = bitSet.nextSetBit(i + 1)) {
+ if (dimensionColumnDataChunk.compareTo(i, defaultValue) == 0) {
+ bitSet.flip(i);
+ }
+ }
+ }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/carbondata/blob/d963a706/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/ExcludeFilterExecuterImpl.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/ExcludeFilterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/ExcludeFilterExecuterImpl.java
index 7449781..23209ed 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/ExcludeFilterExecuterImpl.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/ExcludeFilterExecuterImpl.java
@@ -22,8 +22,6 @@ import java.util.BitSet;
import org.apache.carbondata.core.datastore.block.SegmentProperties;
import org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk;
import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk;
-import org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionDataChunk;
-import org.apache.carbondata.core.datastore.chunk.impl.VariableLengthDimensionDataChunk;
import org.apache.carbondata.core.scan.filter.FilterUtil;
import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.DimColumnResolvedFilterInfo;
import org.apache.carbondata.core.scan.processor.BlocksChunkHolder;
@@ -35,7 +33,10 @@ public class ExcludeFilterExecuterImpl implements FilterExecuter {
protected DimColumnResolvedFilterInfo dimColEvaluatorInfo;
protected DimColumnExecuterFilterInfo dimColumnExecuterInfo;
protected SegmentProperties segmentProperties;
-
+ /**
+ * is dimension column data is natural sorted
+ */
+ private boolean isNaturalSorted;
public ExcludeFilterExecuterImpl(DimColumnResolvedFilterInfo dimColEvaluatorInfo,
SegmentProperties segmentProperties) {
this.dimColEvaluatorInfo = dimColEvaluatorInfo;
@@ -43,6 +44,8 @@ public class ExcludeFilterExecuterImpl implements FilterExecuter {
this.segmentProperties = segmentProperties;
FilterUtil.prepareKeysFromSurrogates(dimColEvaluatorInfo.getFilterValues(), segmentProperties,
dimColEvaluatorInfo.getDimension(), dimColumnExecuterInfo);
+ isNaturalSorted = dimColEvaluatorInfo.getDimension().isUseInvertedIndex() && dimColEvaluatorInfo
+ .getDimension().isSortColumn();
}
@Override public BitSetGroup applyFilter(BlocksChunkHolder blockChunkHolder) throws IOException {
@@ -69,96 +72,71 @@ public class ExcludeFilterExecuterImpl implements FilterExecuter {
protected BitSet getFilteredIndexes(DimensionColumnDataChunk dimColumnDataChunk,
int numerOfRows) {
- // For high cardinality dimensions.
- if (dimColumnDataChunk.isNoDicitionaryColumn()
- && dimColumnDataChunk instanceof VariableLengthDimensionDataChunk) {
- return setDirectKeyFilterIndexToBitSet((VariableLengthDimensionDataChunk) dimColumnDataChunk,
- numerOfRows);
- }
- if (dimColumnDataChunk.isExplicitSorted()
- && dimColumnDataChunk instanceof FixedLengthDimensionDataChunk) {
- return setFilterdIndexToBitSetWithColumnIndex(
- (FixedLengthDimensionDataChunk) dimColumnDataChunk, numerOfRows);
+ if (dimColumnDataChunk.isExplicitSorted()) {
+ return setFilterdIndexToBitSetWithColumnIndex(dimColumnDataChunk, numerOfRows);
}
- return setFilterdIndexToBitSet((FixedLengthDimensionDataChunk) dimColumnDataChunk, numerOfRows);
- }
-
- private BitSet setDirectKeyFilterIndexToBitSet(
- VariableLengthDimensionDataChunk dimColumnDataChunk, int numerOfRows) {
- BitSet bitSet = new BitSet(numerOfRows);
- bitSet.flip(0, numerOfRows);
- byte[][] filterValues = dimColumnExecuterInfo.getFilterKeys();
- for (int i = 0; i < filterValues.length; i++) {
- byte[] filterVal = filterValues[i];
- if (dimColumnDataChunk.isExplicitSorted()) {
- for (int index = 0; index < numerOfRows; index++) {
- if (dimColumnDataChunk.compareTo(index, filterVal) == 0) {
- bitSet.flip(dimColumnDataChunk.getInvertedIndex(index));
- }
- }
- } else {
- for (int index = 0; index < numerOfRows; index++) {
- if (dimColumnDataChunk.compareTo(index, filterVal) == 0) {
- bitSet.flip(index);
- }
- }
- }
- }
- return bitSet;
-
+ return setFilterdIndexToBitSet(dimColumnDataChunk, numerOfRows);
}
private BitSet setFilterdIndexToBitSetWithColumnIndex(
- FixedLengthDimensionDataChunk dimColumnDataChunk, int numerOfRows) {
- int startKey = 0;
- int last = 0;
- int startIndex = 0;
+ DimensionColumnDataChunk dimensionColumnDataChunk, int numerOfRows) {
BitSet bitSet = new BitSet(numerOfRows);
bitSet.flip(0, numerOfRows);
+ int startIndex = 0;
byte[][] filterValues = dimColumnExecuterInfo.getFilterKeys();
for (int i = 0; i < filterValues.length; i++) {
- startKey = CarbonUtil
- .getFirstIndexUsingBinarySearch(dimColumnDataChunk, startIndex, numerOfRows - 1,
- filterValues[i], false);
- if (startKey < 0) {
- continue;
- }
- bitSet.flip(dimColumnDataChunk.getInvertedIndex(startKey));
- last = startKey;
- for (int j = startKey + 1; j < numerOfRows; j++) {
- if (dimColumnDataChunk.compareTo(j, filterValues[i]) == 0) {
- bitSet.flip(dimColumnDataChunk.getInvertedIndex(j));
- last++;
- } else {
- break;
- }
- }
- startIndex = last;
if (startIndex >= numerOfRows) {
break;
}
+ int[] rangeIndex = CarbonUtil
+ .getRangeIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex, numerOfRows - 1,
+ filterValues[i]);
+ for (int j = rangeIndex[0]; j <= rangeIndex[1]; j++) {
+ bitSet.flip(dimensionColumnDataChunk.getInvertedIndex(j));
+ }
+ if (rangeIndex[1] >= 0) {
+ startIndex = rangeIndex[1] + 1;
+ }
}
return bitSet;
}
- // use binary search to replace for clause
- private BitSet setFilterdIndexToBitSet(FixedLengthDimensionDataChunk dimColumnDataChunk,
+ private BitSet setFilterdIndexToBitSet(DimensionColumnDataChunk dimensionColumnDataChunk,
int numerOfRows) {
BitSet bitSet = new BitSet(numerOfRows);
bitSet.flip(0, numerOfRows);
byte[][] filterValues = dimColumnExecuterInfo.getFilterKeys();
- if (filterValues.length > 1) {
- for (int j = 0; j < numerOfRows; j++) {
- int index = CarbonUtil.binarySearch(filterValues, 0, filterValues.length - 1,
- dimColumnDataChunk.getChunkData(j));
- if (index >= 0) {
+ // binary search can only be applied if column is sorted
+ if (isNaturalSorted) {
+ int startIndex = 0;
+ for (int i = 0; i < filterValues.length; i++) {
+ if (startIndex >= numerOfRows) {
+ break;
+ }
+ int[] rangeIndex = CarbonUtil
+ .getRangeIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex, numerOfRows - 1,
+ filterValues[i]);
+ for (int j = rangeIndex[0]; j <= rangeIndex[1]; j++) {
bitSet.flip(j);
}
+ if (rangeIndex[1] >= 0) {
+ startIndex = rangeIndex[1] + 1;
+ }
}
- } else if (filterValues.length == 1) {
- for (int j = 0; j < numerOfRows; j++) {
- if (dimColumnDataChunk.compareTo(j, filterValues[0]) == 0) {
- bitSet.flip(j);
+ } else {
+ if (filterValues.length > 1) {
+ for (int i = 0; i < numerOfRows; i++) {
+ int index = CarbonUtil.binarySearch(filterValues, 0, filterValues.length - 1,
+ dimensionColumnDataChunk.getChunkData(i));
+ if (index >= 0) {
+ bitSet.flip(i);
+ }
+ }
+ } else {
+ for (int j = 0; j < numerOfRows; j++) {
+ if (dimensionColumnDataChunk.compareTo(j, filterValues[0]) == 0) {
+ bitSet.flip(j);
+ }
}
}
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/d963a706/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImpl.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImpl.java
index 804e598..8704496 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImpl.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImpl.java
@@ -22,8 +22,6 @@ import java.util.BitSet;
import org.apache.carbondata.core.datastore.block.SegmentProperties;
import org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk;
import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk;
-import org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionDataChunk;
-import org.apache.carbondata.core.datastore.chunk.impl.VariableLengthDimensionDataChunk;
import org.apache.carbondata.core.scan.filter.FilterUtil;
import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.DimColumnResolvedFilterInfo;
import org.apache.carbondata.core.scan.processor.BlocksChunkHolder;
@@ -36,6 +34,10 @@ public class IncludeFilterExecuterImpl implements FilterExecuter {
protected DimColumnResolvedFilterInfo dimColumnEvaluatorInfo;
protected DimColumnExecuterFilterInfo dimColumnExecuterInfo;
protected SegmentProperties segmentProperties;
+ /**
+ * is dimension column data is natural sorted
+ */
+ private boolean isNaturalSorted;
public IncludeFilterExecuterImpl(DimColumnResolvedFilterInfo dimColumnEvaluatorInfo,
SegmentProperties segmentProperties) {
@@ -44,7 +46,9 @@ public class IncludeFilterExecuterImpl implements FilterExecuter {
dimColumnExecuterInfo = new DimColumnExecuterFilterInfo();
FilterUtil.prepareKeysFromSurrogates(dimColumnEvaluatorInfo.getFilterValues(),
segmentProperties, dimColumnEvaluatorInfo.getDimension(), dimColumnExecuterInfo);
-
+ isNaturalSorted =
+ dimColumnEvaluatorInfo.getDimension().isUseInvertedIndex() && dimColumnEvaluatorInfo
+ .getDimension().isSortColumn();
}
@Override public BitSetGroup applyFilter(BlocksChunkHolder blockChunkHolder) throws IOException {
@@ -76,58 +80,29 @@ public class IncludeFilterExecuterImpl implements FilterExecuter {
protected BitSet getFilteredIndexes(DimensionColumnDataChunk dimensionColumnDataChunk,
int numerOfRows) {
- if (dimensionColumnDataChunk.isNoDicitionaryColumn()
- && dimensionColumnDataChunk instanceof VariableLengthDimensionDataChunk) {
- return setDirectKeyFilterIndexToBitSet(
- (VariableLengthDimensionDataChunk) dimensionColumnDataChunk, numerOfRows);
- } else if (dimensionColumnDataChunk.isExplicitSorted()
- && dimensionColumnDataChunk instanceof FixedLengthDimensionDataChunk) {
- return setFilterdIndexToBitSetWithColumnIndex(
- (FixedLengthDimensionDataChunk) dimensionColumnDataChunk, numerOfRows);
+ if (dimensionColumnDataChunk.isExplicitSorted()) {
+ return setFilterdIndexToBitSetWithColumnIndex(dimensionColumnDataChunk, numerOfRows);
}
-
return setFilterdIndexToBitSet(dimensionColumnDataChunk, numerOfRows);
}
- private BitSet setDirectKeyFilterIndexToBitSet(
- VariableLengthDimensionDataChunk dimensionColumnDataChunk, int numerOfRows) {
- BitSet bitSet = new BitSet(numerOfRows);
- byte[][] filterValues = dimColumnExecuterInfo.getFilterKeys();
- for (int i = 0; i < filterValues.length; i++) {
- byte[] filterVal = filterValues[i];
- if (dimensionColumnDataChunk.isExplicitSorted()) {
- for (int index = 0; index < numerOfRows; index++) {
- if (dimensionColumnDataChunk.compareTo(index, filterVal) == 0) {
- bitSet.set(dimensionColumnDataChunk.getInvertedIndex(index));
- }
- }
- } else {
- for (int index = 0; index < numerOfRows; index++) {
- if (dimensionColumnDataChunk.compareTo(index, filterVal) == 0) {
- bitSet.set(index);
- }
- }
- }
- }
- return bitSet;
-
- }
-
private BitSet setFilterdIndexToBitSetWithColumnIndex(
- FixedLengthDimensionDataChunk dimensionColumnDataChunk, int numerOfRows) {
+ DimensionColumnDataChunk dimensionColumnDataChunk, int numerOfRows) {
BitSet bitSet = new BitSet(numerOfRows);
int startIndex = 0;
byte[][] filterValues = dimColumnExecuterInfo.getFilterKeys();
for (int i = 0; i < filterValues.length; i++) {
- int[] rangeIndex = CarbonUtil.getRangeIndexUsingBinarySearch(dimensionColumnDataChunk,
- startIndex, numerOfRows - 1, filterValues[i]);
+ if (startIndex >= numerOfRows) {
+ break;
+ }
+ int[] rangeIndex = CarbonUtil
+ .getRangeIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex, numerOfRows - 1,
+ filterValues[i]);
for (int j = rangeIndex[0]; j <= rangeIndex[1]; j++) {
-
bitSet.set(dimensionColumnDataChunk.getInvertedIndex(j));
}
-
if (rangeIndex[1] >= 0) {
- startIndex = rangeIndex[1];
+ startIndex = rangeIndex[1] + 1;
}
}
return bitSet;
@@ -136,40 +111,38 @@ public class IncludeFilterExecuterImpl implements FilterExecuter {
private BitSet setFilterdIndexToBitSet(DimensionColumnDataChunk dimensionColumnDataChunk,
int numerOfRows) {
BitSet bitSet = new BitSet(numerOfRows);
- if (dimensionColumnDataChunk instanceof FixedLengthDimensionDataChunk) {
+ byte[][] filterValues = dimColumnExecuterInfo.getFilterKeys();
+ // binary search can only be applied if column is sorted and
+ // inverted index exists for that column
+ if (isNaturalSorted) {
int startIndex = 0;
- byte[][] filterValues = dimColumnExecuterInfo.getFilterKeys();
- // binary search can only be applied if column is sorted and
- // inverted index exists for that column
- if (dimensionColumnDataChunk.isExplicitSorted()) {
- for (int i = 0; i < filterValues.length; i++) {
- if (startIndex >= numerOfRows) {
- break;
- }
- int[] rangeIndex = CarbonUtil
- .getRangeIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex, numerOfRows - 1,
- filterValues[i]);
- for (int j = rangeIndex[0]; j <= rangeIndex[1]; j++) {
- bitSet.set(j);
- }
- if (rangeIndex[1] >= 0) {
- startIndex = rangeIndex[1] + 1;
+ for (int i = 0; i < filterValues.length; i++) {
+ if (startIndex >= numerOfRows) {
+ break;
+ }
+ int[] rangeIndex = CarbonUtil
+ .getRangeIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex, numerOfRows - 1,
+ filterValues[i]);
+ for (int j = rangeIndex[0]; j <= rangeIndex[1]; j++) {
+ bitSet.set(j);
+ }
+ if (rangeIndex[1] >= 0) {
+ startIndex = rangeIndex[1] + 1;
+ }
+ }
+ } else {
+ if (filterValues.length > 1) {
+ for (int i = 0; i < numerOfRows; i++) {
+ int index = CarbonUtil.binarySearch(filterValues, 0, filterValues.length - 1,
+ dimensionColumnDataChunk.getChunkData(i));
+ if (index >= 0) {
+ bitSet.set(i);
}
}
} else {
- if (filterValues.length > 1) {
- for (int i = 0; i < numerOfRows; i++) {
- int index = CarbonUtil.binarySearch(filterValues, 0, filterValues.length - 1,
- dimensionColumnDataChunk.getChunkData(i));
- if (index >= 0) {
- bitSet.set(i);
- }
- }
- } else {
- for (int j = 0; j < numerOfRows; j++) {
- if (dimensionColumnDataChunk.compareTo(j, filterValues[0]) == 0) {
- bitSet.set(j);
- }
+ for (int j = 0; j < numerOfRows; j++) {
+ if (dimensionColumnDataChunk.compareTo(j, filterValues[0]) == 0) {
+ bitSet.set(j);
}
}
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/d963a706/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecuterImpl.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecuterImpl.java
index 40e0006..6823531 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecuterImpl.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecuterImpl.java
@@ -24,7 +24,10 @@ import org.apache.carbondata.core.constants.CarbonCommonConstants;
import org.apache.carbondata.core.datastore.block.SegmentProperties;
import org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk;
import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk;
+import org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator;
+import org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryKeyGeneratorFactory;
import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier;
+import org.apache.carbondata.core.metadata.encoder.Encoding;
import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension;
import org.apache.carbondata.core.scan.expression.Expression;
import org.apache.carbondata.core.scan.expression.conditional.GreaterThanEqualToExpression;
@@ -48,8 +51,6 @@ import org.apache.carbondata.core.util.CarbonUtil;
public class RangeValueFilterExecuterImpl extends ValueBasedFilterExecuterImpl {
private DimColumnResolvedFilterInfo dimColEvaluatorInfo;
- private MeasureColumnResolvedFilterInfo msrColEvalutorInfo;
- private AbsoluteTableIdentifier tableIdentifier;
private Expression exp;
private byte[][] filterRangesValues;
private SegmentProperties segmentProperties;
@@ -78,10 +79,8 @@ public class RangeValueFilterExecuterImpl extends ValueBasedFilterExecuterImpl {
SegmentProperties segmentProperties) {
this.dimColEvaluatorInfo = dimColEvaluatorInfo;
- this.msrColEvalutorInfo = msrColEvaluatorInfo;
this.exp = exp;
this.segmentProperties = segmentProperties;
- this.tableIdentifier = tableIdentifier;
this.filterRangesValues = filterRangeValues;
this.lessThanExp = isLessThan();
this.lessThanEqualExp = isLessThanEqualTo();
@@ -549,18 +548,27 @@ public class RangeValueFilterExecuterImpl extends ValueBasedFilterExecuterImpl {
updateForNoDictionaryColumn(startMin, endMax, dimensionColumnDataChunk, bitSet);
}
} else {
+ byte[] defaultValue = null;
+ if (dimColEvaluatorInfo.getDimension().hasEncoding(Encoding.DIRECT_DICTIONARY)) {
+ DirectDictionaryGenerator directDictionaryGenerator = DirectDictionaryKeyGeneratorFactory
+ .getDirectDictionaryGenerator(dimColEvaluatorInfo.getDimension().getDataType());
+ int key = directDictionaryGenerator.generateDirectSurrogateKey(null) + 1;
+ CarbonDimension currentBlockDimension =
+ segmentProperties.getDimensions().get(dimensionBlocksIndex);
+ defaultValue = FilterUtil.getMaskKey(key, currentBlockDimension,
+ this.segmentProperties.getSortColumnsGenerator());
+ } else {
+ defaultValue = CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY;
+ }
// evaluate result for lower range value first and then perform and operation in the
// upper range value in order to compute the final result
bitSet = evaluateGreaterThanFilterForUnsortedColumn(dimensionColumnDataChunk, filterValues[0],
numerOfRows);
- // remove null values from lower range selected bitSet values
- removeNullValues(dimensionColumnDataChunk, bitSet);
BitSet upperRangeBitSet =
evaluateLessThanFilterForUnsortedColumn(dimensionColumnDataChunk, filterValues[1],
numerOfRows);
- // remove null values from upper range selected bitSet values
- removeNullValues(dimensionColumnDataChunk, upperRangeBitSet);
bitSet.and(upperRangeBitSet);
+ FilterUtil.removeNullValues(dimensionColumnDataChunk, bitSet, defaultValue);
}
return bitSet;
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/d963a706/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelFilterExecuterImpl.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelFilterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelFilterExecuterImpl.java
index e4f5dbd..35825ef 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelFilterExecuterImpl.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelFilterExecuterImpl.java
@@ -95,6 +95,11 @@ public class RowLevelFilterExecuterImpl implements FilterExecuter {
*/
protected boolean[] isMeasurePresentInCurrentBlock;
+ /**
+ * is dimension column data is natural sorted
+ */
+ protected boolean isNaturalSorted;
+
public RowLevelFilterExecuterImpl(List<DimColumnResolvedFilterInfo> dimColEvaluatorInfoList,
List<MeasureColumnResolvedFilterInfo> msrColEvalutorInfoList, Expression exp,
AbsoluteTableIdentifier tableIdentifier, SegmentProperties segmentProperties,
http://git-wip-us.apache.org/repos/asf/carbondata/blob/d963a706/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java
index daa7280..be82be7 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java
@@ -20,6 +20,7 @@ import java.io.IOException;
import java.util.BitSet;
import java.util.List;
+import org.apache.carbondata.core.constants.CarbonCommonConstants;
import org.apache.carbondata.core.datastore.block.SegmentProperties;
import org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk;
import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk;
@@ -43,7 +44,6 @@ public class RowLevelRangeGrtThanFiterExecuterImpl extends RowLevelFilterExecute
* flag to check whether default values is present in the filter value list
*/
private boolean isDefaultValuePresentInFilter;
-
public RowLevelRangeGrtThanFiterExecuterImpl(
List<DimColumnResolvedFilterInfo> dimColEvaluatorInfoList,
List<MeasureColumnResolvedFilterInfo> msrColEvalutorInfoList, Expression exp,
@@ -52,6 +52,8 @@ public class RowLevelRangeGrtThanFiterExecuterImpl extends RowLevelFilterExecute
super(dimColEvaluatorInfoList, msrColEvalutorInfoList, exp, tableIdentifier, segmentProperties,
null);
this.filterRangeValues = filterRangeValues;
+ isNaturalSorted = dimColEvaluatorInfoList.get(0).getDimension().isUseInvertedIndex()
+ && dimColEvaluatorInfoList.get(0).getDimension().isSortColumn();
ifDefaultValueMatchesFilter();
}
@@ -150,10 +152,17 @@ public class RowLevelRangeGrtThanFiterExecuterImpl extends RowLevelFilterExecute
private BitSet getFilteredIndexes(DimensionColumnDataChunk dimensionColumnDataChunk,
int numerOfRows) {
+ BitSet bitSet = null;
if (dimensionColumnDataChunk.isExplicitSorted()) {
- return setFilterdIndexToBitSetWithColumnIndex(dimensionColumnDataChunk, numerOfRows);
+ bitSet = setFilterdIndexToBitSetWithColumnIndex(dimensionColumnDataChunk, numerOfRows);
+ } else {
+ bitSet = setFilterdIndexToBitSet(dimensionColumnDataChunk, numerOfRows);
+ }
+ if (dimensionColumnDataChunk.isNoDicitionaryColumn()) {
+ FilterUtil.removeNullValues(dimensionColumnDataChunk, bitSet,
+ CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY);
}
- return setFilterdIndexToBitSet(dimensionColumnDataChunk, numerOfRows);
+ return bitSet;
}
/**
@@ -229,16 +238,15 @@ public class RowLevelRangeGrtThanFiterExecuterImpl extends RowLevelFilterExecute
int numerOfRows) {
BitSet bitSet = new BitSet(numerOfRows);
byte[][] filterValues = this.filterRangeValues;
- // binary search can only be applied if column is sorted and
- // inverted index exists for that column
- if (dimensionColumnDataChunk.isExplicitSorted()) {
+ // binary search can only be applied if column is sorted
+ if (isNaturalSorted) {
int start = 0;
int last = 0;
int startIndex = 0;
for (int k = 0; k < filterValues.length; k++) {
start = CarbonUtil
- .getFirstIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex, numerOfRows - 1,
- filterValues[k], true);
+ .getFirstIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex,
+ numerOfRows - 1, filterValues[k], true);
if (start >= 0) {
start = CarbonUtil
.nextGreaterValueToTarget(start, dimensionColumnDataChunk, filterValues[k],
http://git-wip-us.apache.org/repos/asf/carbondata/blob/d963a706/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtrThanEquaToFilterExecuterImpl.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtrThanEquaToFilterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtrThanEquaToFilterExecuterImpl.java
index 7c2a65a..53da6c5 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtrThanEquaToFilterExecuterImpl.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtrThanEquaToFilterExecuterImpl.java
@@ -20,6 +20,7 @@ import java.io.IOException;
import java.util.BitSet;
import java.util.List;
+import org.apache.carbondata.core.constants.CarbonCommonConstants;
import org.apache.carbondata.core.datastore.block.SegmentProperties;
import org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk;
import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk;
@@ -53,6 +54,8 @@ public class RowLevelRangeGrtrThanEquaToFilterExecuterImpl extends RowLevelFilte
super(dimColEvaluatorInfoList, msrColEvalutorInfoList, exp, tableIdentifier, segmentProperties,
null);
this.filterRangeValues = filterRangeValues;
+ isNaturalSorted = dimColEvaluatorInfoList.get(0).getDimension().isUseInvertedIndex()
+ && dimColEvaluatorInfoList.get(0).getDimension().isSortColumn();
ifDefaultValueMatchesFilter();
}
@@ -151,10 +154,17 @@ public class RowLevelRangeGrtrThanEquaToFilterExecuterImpl extends RowLevelFilte
private BitSet getFilteredIndexes(DimensionColumnDataChunk dimensionColumnDataChunk,
int numerOfRows) {
+ BitSet bitSet = null;
if (dimensionColumnDataChunk.isExplicitSorted()) {
- return setFilterdIndexToBitSetWithColumnIndex(dimensionColumnDataChunk, numerOfRows);
+ bitSet = setFilterdIndexToBitSetWithColumnIndex(dimensionColumnDataChunk, numerOfRows);
+ } else {
+ bitSet = setFilterdIndexToBitSet(dimensionColumnDataChunk, numerOfRows);
+ }
+ if (dimensionColumnDataChunk.isNoDicitionaryColumn()) {
+ FilterUtil.removeNullValues(dimensionColumnDataChunk, bitSet,
+ CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY);
}
- return setFilterdIndexToBitSet(dimensionColumnDataChunk, numerOfRows);
+ return bitSet;
}
/**
@@ -219,16 +229,15 @@ public class RowLevelRangeGrtrThanEquaToFilterExecuterImpl extends RowLevelFilte
int numerOfRows) {
BitSet bitSet = new BitSet(numerOfRows);
byte[][] filterValues = this.filterRangeValues;
- // binary search can only be applied if column is sorted and
- // inverted index exists for that column
- if (dimensionColumnDataChunk.isExplicitSorted()) {
+ // binary search can only be applied if column is sorted
+ if (isNaturalSorted) {
int start = 0;
int last = 0;
int startIndex = 0;
for (int k = 0; k < filterValues.length; k++) {
start = CarbonUtil
- .getFirstIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex, numerOfRows - 1,
- filterValues[k], false);
+ .getFirstIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex,
+ numerOfRows - 1, filterValues[k], false);
if (start < 0) {
start = -(start + 1);
if (start == numerOfRows) {
http://git-wip-us.apache.org/repos/asf/carbondata/blob/d963a706/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanEqualFilterExecuterImpl.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanEqualFilterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanEqualFilterExecuterImpl.java
index 7a8d152..d694960 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanEqualFilterExecuterImpl.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanEqualFilterExecuterImpl.java
@@ -20,6 +20,7 @@ import java.io.IOException;
import java.util.BitSet;
import java.util.List;
+import org.apache.carbondata.core.constants.CarbonCommonConstants;
import org.apache.carbondata.core.datastore.block.SegmentProperties;
import org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk;
import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk;
@@ -45,7 +46,6 @@ public class RowLevelRangeLessThanEqualFilterExecuterImpl extends RowLevelFilter
* flag to check whether default values is present in the filter value list
*/
private boolean isDefaultValuePresentInFilter;
-
public RowLevelRangeLessThanEqualFilterExecuterImpl(
List<DimColumnResolvedFilterInfo> dimColEvaluatorInfoList,
List<MeasureColumnResolvedFilterInfo> msrColEvalutorInfoList, Expression exp,
@@ -55,6 +55,8 @@ public class RowLevelRangeLessThanEqualFilterExecuterImpl extends RowLevelFilter
null);
this.filterRangeValues = filterRangeValues;
ifDefaultValueMatchesFilter();
+ isNaturalSorted = dimColEvaluatorInfoList.get(0).getDimension().isUseInvertedIndex()
+ && dimColEvaluatorInfoList.get(0).getDimension().isSortColumn();
}
/**
@@ -155,11 +157,18 @@ public class RowLevelRangeLessThanEqualFilterExecuterImpl extends RowLevelFilter
defaultValue = FilterUtil.getMaskKey(key, currentBlockDimension,
this.segmentProperties.getSortColumnsGenerator());
}
+ BitSet bitSet = null;
if (dimensionColumnDataChunk.isExplicitSorted()) {
- return setFilterdIndexToBitSetWithColumnIndex(dimensionColumnDataChunk, numerOfRows,
+ bitSet = setFilterdIndexToBitSetWithColumnIndex(dimensionColumnDataChunk, numerOfRows,
defaultValue);
+ } else {
+ bitSet = setFilterdIndexToBitSet(dimensionColumnDataChunk, numerOfRows, defaultValue);
+ }
+ if (dimensionColumnDataChunk.isNoDicitionaryColumn()) {
+ FilterUtil.removeNullValues(dimensionColumnDataChunk, bitSet,
+ CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY);
}
- return setFilterdIndexToBitSet(dimensionColumnDataChunk, numerOfRows, defaultValue);
+ return bitSet;
}
/**
@@ -243,9 +252,8 @@ public class RowLevelRangeLessThanEqualFilterExecuterImpl extends RowLevelFilter
int numerOfRows, byte[] defaultValue) {
BitSet bitSet = new BitSet(numerOfRows);
byte[][] filterValues = this.filterRangeValues;
- // binary search can only be applied if column is sorted and
- // inverted index exists for that column
- if (dimensionColumnDataChunk.isExplicitSorted()) {
+ // binary search can only be applied if column is sorted
+ if (isNaturalSorted) {
int start = 0;
int last = 0;
int startIndex = 0;
@@ -253,8 +261,8 @@ public class RowLevelRangeLessThanEqualFilterExecuterImpl extends RowLevelFilter
//find the number of default values to skip the null value in case of direct dictionary
if (null != defaultValue) {
start = CarbonUtil
- .getFirstIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex, numerOfRows - 1,
- defaultValue, true);
+ .getFirstIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex,
+ numerOfRows - 1, defaultValue, true);
if (start < 0) {
skip = -(start + 1);
// end of block
@@ -268,8 +276,8 @@ public class RowLevelRangeLessThanEqualFilterExecuterImpl extends RowLevelFilter
}
for (int k = 0; k < filterValues.length; k++) {
start = CarbonUtil
- .getFirstIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex, numerOfRows - 1,
- filterValues[k], true);
+ .getFirstIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex,
+ numerOfRows - 1, filterValues[k], true);
if (start < 0) {
start = -(start + 1);
if (start >= numerOfRows) {
http://git-wip-us.apache.org/repos/asf/carbondata/blob/d963a706/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanFiterExecuterImpl.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanFiterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanFiterExecuterImpl.java
index 97d4cf5..b3dd921 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanFiterExecuterImpl.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanFiterExecuterImpl.java
@@ -20,6 +20,7 @@ import java.io.IOException;
import java.util.BitSet;
import java.util.List;
+import org.apache.carbondata.core.constants.CarbonCommonConstants;
import org.apache.carbondata.core.datastore.block.SegmentProperties;
import org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk;
import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk;
@@ -55,6 +56,8 @@ public class RowLevelRangeLessThanFiterExecuterImpl extends RowLevelFilterExecut
null);
this.filterRangeValues = filterRangeValues;
ifDefaultValueMatchesFilter();
+ isNaturalSorted = dimColEvaluatorInfoList.get(0).getDimension().isUseInvertedIndex()
+ && dimColEvaluatorInfoList.get(0).getDimension().isSortColumn();
}
/**
@@ -155,11 +158,18 @@ public class RowLevelRangeLessThanFiterExecuterImpl extends RowLevelFilterExecut
defaultValue = FilterUtil.getMaskKey(key, currentBlockDimension,
this.segmentProperties.getSortColumnsGenerator());
}
+ BitSet bitSet = null;
if (dimensionColumnDataChunk.isExplicitSorted()) {
- return setFilterdIndexToBitSetWithColumnIndex(dimensionColumnDataChunk, numerOfRows,
+ bitSet = setFilterdIndexToBitSetWithColumnIndex(dimensionColumnDataChunk, numerOfRows,
defaultValue);
+ } else {
+ bitSet = setFilterdIndexToBitSet(dimensionColumnDataChunk, numerOfRows, defaultValue);
+ }
+ if (dimensionColumnDataChunk.isNoDicitionaryColumn()) {
+ FilterUtil.removeNullValues(dimensionColumnDataChunk, bitSet,
+ CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY);
}
- return setFilterdIndexToBitSet(dimensionColumnDataChunk, numerOfRows, defaultValue);
+ return bitSet;
}
/**
@@ -252,9 +262,8 @@ public class RowLevelRangeLessThanFiterExecuterImpl extends RowLevelFilterExecut
int numerOfRows, byte[] defaultValue) {
BitSet bitSet = new BitSet(numerOfRows);
byte[][] filterValues = this.filterRangeValues;
- // binary search can only be applied if column is sorted and
- // inverted index exists for that column
- if (dimensionColumnDataChunk.isExplicitSorted()) {
+ // binary search can only be applied if column is sorted
+ if (isNaturalSorted) {
int start = 0;
int last = 0;
int startIndex = 0;
@@ -262,8 +271,8 @@ public class RowLevelRangeLessThanFiterExecuterImpl extends RowLevelFilterExecut
//find the number of default values to skip the null value in case of direct dictionary
if (null != defaultValue) {
start = CarbonUtil
- .getFirstIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex, numerOfRows - 1,
- defaultValue, false);
+ .getFirstIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex,
+ numerOfRows - 1, defaultValue, false);
if (start < 0) {
skip = -(start + 1);
// end of block
@@ -277,8 +286,8 @@ public class RowLevelRangeLessThanFiterExecuterImpl extends RowLevelFilterExecut
}
for (int k = 0; k < filterValues.length; k++) {
start = CarbonUtil
- .getFirstIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex, numerOfRows - 1,
- filterValues[k], false);
+ .getFirstIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex,
+ numerOfRows - 1, filterValues[k], false);
if (start >= 0) {
start =
CarbonUtil.nextLesserValueToTarget(start, dimensionColumnDataChunk, filterValues[k]);
[2/2] carbondata git commit: [CARBONDATA-1111]Improve No dictionary
column Include And Exclude filter And Fixed Data Mismatch Issue In case of
range Filter This closes #981
Posted by ra...@apache.org.
[CARBONDATA-1111]Improve No dictionary column Include And Exclude filter And Fixed Data Mismatch Issue In case of range Filter This closes #981
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/8ba562ab
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/8ba562ab
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/8ba562ab
Branch: refs/heads/master
Commit: 8ba562ab2692fa3c646d568bcb8210156f68f5ea
Parents: da3a683 d963a70
Author: ravipesala <ra...@gmail.com>
Authored: Thu Jun 8 15:42:39 2017 +0530
Committer: ravipesala <ra...@gmail.com>
Committed: Thu Jun 8 15:42:39 2017 +0530
----------------------------------------------------------------------
.../schema/table/column/CarbonDimension.java | 7 ++
.../carbondata/core/scan/filter/FilterUtil.java | 18 +++
.../executer/ExcludeFilterExecuterImpl.java | 120 ++++++++-----------
.../executer/IncludeFilterExecuterImpl.java | 117 +++++++-----------
.../executer/RangeValueFilterExecuterImpl.java | 24 ++--
.../executer/RowLevelFilterExecuterImpl.java | 5 +
.../RowLevelRangeGrtThanFiterExecuterImpl.java | 24 ++--
...elRangeGrtrThanEquaToFilterExecuterImpl.java | 23 ++--
...velRangeLessThanEqualFilterExecuterImpl.java | 28 +++--
.../RowLevelRangeLessThanFiterExecuterImpl.java | 27 +++--
10 files changed, 208 insertions(+), 185 deletions(-)
----------------------------------------------------------------------