You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by li...@apache.org on 2015/03/21 01:27:12 UTC
[07/50] [abbrv] incubator-kylin git commit: KYLIN-625,
add minimal inverted index
KYLIN-625, add minimal inverted index
Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/05c2d096
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/05c2d096
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/05c2d096
Branch: refs/heads/streaming-localdict
Commit: 05c2d096868f468c361225301901ae0631b008dc
Parents: abca45d
Author: Li, Yang <ya...@ebay.com>
Authored: Wed Mar 18 18:22:45 2015 +0800
Committer: Li, Yang <ya...@ebay.com>
Committed: Wed Mar 18 18:22:45 2015 +0800
----------------------------------------------------------------------
.../org/apache/kylin/common/util/ByteArray.java | 6 +
.../apache/kylin/storage/gridtable/GTInfo.java | 57 +++++-
.../storage/gridtable/GTInvertedIndex.java | 201 +++++++++++++++++++
.../gridtable/GTInvertedIndexOfColumn.java | 116 +++++++++++
.../kylin/storage/gridtable/GTRawScanner.java | 16 +-
.../kylin/storage/gridtable/GTRecord.java | 12 +-
.../storage/gridtable/GTRowBlockIndex.java | 5 -
.../storage/gridtable/GTSampleCodeSystem.java | 8 +-
.../apache/kylin/storage/gridtable/GTUtil.java | 23 +++
.../storage/gridtable/GTInvertedIndexTest.java | 165 +++++++++++++++
.../kylin/storage/gridtable/GridTableTest.java | 9 +-
11 files changed, 581 insertions(+), 37 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/05c2d096/common/src/main/java/org/apache/kylin/common/util/ByteArray.java
----------------------------------------------------------------------
diff --git a/common/src/main/java/org/apache/kylin/common/util/ByteArray.java b/common/src/main/java/org/apache/kylin/common/util/ByteArray.java
index 8c9554a..3f096c6 100644
--- a/common/src/main/java/org/apache/kylin/common/util/ByteArray.java
+++ b/common/src/main/java/org/apache/kylin/common/util/ByteArray.java
@@ -82,6 +82,12 @@ public class ByteArray implements Comparable<ByteArray> {
public void setLength(int length) {
this.length = length;
}
+
+ public ByteArray copy() {
+ ByteArray copy = new ByteArray(length);
+ copy.copyFrom(this);
+ return copy;
+ }
public void copyFrom(ByteArray other) {
System.arraycopy(other.array(), other.offset, data, offset, other.length);
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/05c2d096/storage/src/main/java/org/apache/kylin/storage/gridtable/GTInfo.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTInfo.java b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTInfo.java
index 36d3a47..97cac87 100644
--- a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTInfo.java
+++ b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTInfo.java
@@ -6,9 +6,10 @@ import java.util.Iterator;
import java.util.LinkedList;
import org.apache.kylin.metadata.model.DataType;
+import org.apache.kylin.metadata.model.TblColRef;
public class GTInfo {
-
+
public static Builder builder() {
return new Builder();
}
@@ -20,6 +21,8 @@ public class GTInfo {
int nColumns;
DataType[] colTypes;
BitSet colAll;
+ BitSet colPreferIndex;
+ transient TblColRef[] colRefs;
// grid info
BitSet primaryKey; // columns sorted and unique
@@ -42,6 +45,34 @@ public class GTInfo {
return rowBlockSize > 0;
}
+ public int getRowBlockSize() {
+ return rowBlockSize;
+ }
+
+ public BitSet selectColumnBlocks(BitSet columns) {
+ if (columns == null)
+ columns = colAll;
+
+ BitSet result = new BitSet();
+ for (int i = 0; i < colBlocks.length; i++) {
+ BitSet cb = colBlocks[i];
+ if (cb.intersects(columns)) {
+ result.set(i);
+ }
+ }
+ return result;
+ }
+
+ public TblColRef colRef(int i) {
+ if (colRefs == null) {
+ colRefs = new TblColRef[nColumns];
+ }
+ if (colRefs[i] == null) {
+ colRefs[i] = GTUtil.tblColRef(i, colTypes[i].toString());
+ }
+ return colRefs[i];
+ }
+
void validate() {
if (codeSystem == null)
@@ -51,7 +82,7 @@ public class GTInfo {
throw new IllegalStateException();
codeSystem.init(this);
-
+
validateColumnBlocks();
}
@@ -60,7 +91,10 @@ public class GTInfo {
colAll.flip(0, nColumns);
colBlocksAll = new BitSet();
colBlocksAll.flip(0, colBlocks.length);
-
+
+ if (colPreferIndex == null)
+ colPreferIndex = new BitSet();
+
// column blocks must not overlap
for (int i = 0; i < colBlocks.length; i++) {
for (int j = i + 1; j < colBlocks.length; j++) {
@@ -68,7 +102,7 @@ public class GTInfo {
throw new IllegalStateException();
}
}
-
+
// column block must cover all columns
BitSet merge = new BitSet();
for (int i = 0; i < colBlocks.length; i++) {
@@ -120,7 +154,7 @@ public class GTInfo {
}
return this;
}
-
+
/** required */
public Builder setPrimaryKey(BitSet primaryKey) {
info.primaryKey = primaryKey;
@@ -138,22 +172,29 @@ public class GTInfo {
info.colBlocks = columnBlocks;
return this;
}
-
+
/** optional */
public Builder enableRowBlock(int rowBlockSize) {
info.rowBlockSize = rowBlockSize;
return this;
}
-
+
/** optional */
public Builder enableSharding(int nShards) {
info.nShards = nShards;
return this;
}
-
+
+ /** optional */
+ public Builder setColumnPreferIndex(BitSet colPreferIndex) {
+ info.colPreferIndex = colPreferIndex;
+ return this;
+ }
+
public GTInfo build() {
info.validate();
return info;
}
}
+
}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/05c2d096/storage/src/main/java/org/apache/kylin/storage/gridtable/GTInvertedIndex.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTInvertedIndex.java b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTInvertedIndex.java
new file mode 100644
index 0000000..3c8d862
--- /dev/null
+++ b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTInvertedIndex.java
@@ -0,0 +1,201 @@
+package org.apache.kylin.storage.gridtable;
+
+import it.uniroma3.mat.extendedset.intset.ConciseSet;
+
+import java.util.BitSet;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.kylin.common.util.ByteArray;
+import org.apache.kylin.metadata.filter.CompareTupleFilter;
+import org.apache.kylin.metadata.filter.LogicalTupleFilter;
+import org.apache.kylin.metadata.filter.TupleFilter;
+
+/**
+ * A thread-safe inverted index of row blocks in memory.
+ *
+ * Note function not() must return all blocks, because index only know what block contains a value,
+ * but not sure what block does not contain a value.
+ *
+ * @author yangli9
+ */
+public class GTInvertedIndex {
+
+ private final GTInfo info;
+ private final BitSet colPreferIndex;
+ private final BitSet colBlocks;
+ private final GTInvertedIndexOfColumn[] index; // for each column
+
+ private volatile int nIndexedBlocks;
+
+ public GTInvertedIndex(GTInfo info) {
+ this.info = info;
+ this.colPreferIndex = info.colPreferIndex;
+ this.colBlocks = info.selectColumnBlocks(colPreferIndex);
+
+ index = new GTInvertedIndexOfColumn[info.nColumns];
+ for (int i = colPreferIndex.nextSetBit(0); i >= 0; i = colPreferIndex.nextSetBit(i + 1)) {
+ index[i] = new GTInvertedIndexOfColumn(info.codeSystem.getFilterCodeSystem());
+ }
+ }
+
+ public void add(GTRowBlock block) {
+
+ @SuppressWarnings("unchecked")
+ Set<ByteArray>[] distinctValues = new Set[info.nColumns];
+ for (int i = colPreferIndex.nextSetBit(0); i >= 0; i = colPreferIndex.nextSetBit(i + 1)) {
+ distinctValues[i] = new HashSet<ByteArray>();
+ }
+
+ GTRowBlock.Reader reader = block.getReader(colBlocks);
+ GTRecord record = new GTRecord(info);
+ while (reader.hasNext()) {
+ reader.fetchNext(record);
+ for (int i = colPreferIndex.nextSetBit(0); i >= 0; i = colPreferIndex.nextSetBit(i + 1)) {
+ distinctValues[i].add(record.get(i));
+ }
+ }
+
+ for (int i = colPreferIndex.nextSetBit(0); i >= 0; i = colPreferIndex.nextSetBit(i + 1)) {
+ index[i].add(distinctValues[i], block.sequenceId());
+ }
+
+ nIndexedBlocks = Math.max(nIndexedBlocks, block.seqId + 1);
+ }
+
+ public ConciseSet filter(TupleFilter filter) {
+ return filter(filter, nIndexedBlocks);
+ }
+
+ public ConciseSet filter(TupleFilter filter, int totalBlocks) {
+ // number of indexed blocks may increase as we do evaluation
+ int indexedBlocks = nIndexedBlocks;
+
+ Evaluator evaluator = new Evaluator(indexedBlocks);
+ ConciseSet r = evaluator.evaluate(filter);
+
+ // add blocks that have not been indexed
+ for (int i = indexedBlocks; i < totalBlocks; i++) {
+ r.add(i);
+ }
+
+ return r;
+ }
+
+ private class Evaluator {
+ private int indexedBlocks;
+
+ Evaluator(int indexedBlocks) {
+ this.indexedBlocks = indexedBlocks;
+ }
+
+ public ConciseSet evaluate(TupleFilter filter) {
+ if (filter == null) {
+ return all();
+ }
+
+ if (filter instanceof LogicalTupleFilter)
+ return evalLogical((LogicalTupleFilter) filter);
+
+ if (filter instanceof CompareTupleFilter)
+ return evalCompare((CompareTupleFilter) filter);
+
+ // unable to evaluate
+ return all();
+ }
+
+ @SuppressWarnings("unchecked")
+ private ConciseSet evalCompare(CompareTupleFilter filter) {
+ int col = col(filter);
+ if (index[col] == null)
+ return all();
+
+ switch (filter.getOperator()) {
+ case ISNULL:
+ return index[col].getNull();
+ case ISNOTNULL:
+ return all();
+ case EQ:
+ return index[col].getEquals((ByteArray) filter.getFirstValue());
+ case NEQ:
+ return all();
+ case IN:
+ return index[col].getIn((Iterable<ByteArray>) filter.getValues());
+ case NOTIN:
+ return all();
+ case LT:
+ return index[col].getRange(null, false, (ByteArray) filter.getFirstValue(), false);
+ case LTE:
+ return index[col].getRange(null, false, (ByteArray) filter.getFirstValue(), true);
+ case GT:
+ return index[col].getRange((ByteArray) filter.getFirstValue(), false, null, false);
+ case GTE:
+ return index[col].getRange((ByteArray) filter.getFirstValue(), true, null, false);
+ default:
+ throw new IllegalStateException("Unsupported operator " + filter.getOperator());
+ }
+ }
+
+ private ConciseSet evalLogical(LogicalTupleFilter filter) {
+ List<? extends TupleFilter> children = filter.getChildren();
+
+ switch (filter.getOperator()) {
+ case AND:
+ return evalLogicalAnd(children);
+ case OR:
+ return evalLogicalOr(children);
+ case NOT:
+ return evalLogicalNot(children);
+ default:
+ throw new IllegalStateException("Unsupported operator " + filter.getOperator());
+ }
+ }
+
+ private ConciseSet evalLogicalAnd(List<? extends TupleFilter> children) {
+ ConciseSet set = all();
+
+ for (TupleFilter c : children) {
+ ConciseSet t = evaluate(c);
+ if (t == null)
+ continue; // because it's AND
+
+ set.retainAll(t);
+ }
+ return set;
+ }
+
+ private ConciseSet evalLogicalOr(List<? extends TupleFilter> children) {
+ ConciseSet set = new ConciseSet();
+
+ for (TupleFilter c : children) {
+ ConciseSet t = evaluate(c);
+ if (t == null)
+ return null; // because it's OR
+
+ set.addAll(t);
+ }
+ return set;
+ }
+
+ private ConciseSet evalLogicalNot(List<? extends TupleFilter> children) {
+ return all();
+ }
+
+ private ConciseSet all() {
+ return not(new ConciseSet());
+ }
+
+ private ConciseSet not(ConciseSet set) {
+ set.add(indexedBlocks);
+ set.complement();
+ return set;
+ }
+
+ private int col(CompareTupleFilter filter) {
+ return filter.getColumn().getColumn().getZeroBasedIndex();
+ }
+
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/05c2d096/storage/src/main/java/org/apache/kylin/storage/gridtable/GTInvertedIndexOfColumn.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTInvertedIndexOfColumn.java b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTInvertedIndexOfColumn.java
new file mode 100644
index 0000000..810a7a3
--- /dev/null
+++ b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTInvertedIndexOfColumn.java
@@ -0,0 +1,116 @@
+package org.apache.kylin.storage.gridtable;
+
+import it.uniroma3.mat.extendedset.intset.ConciseSet;
+
+import java.util.NavigableMap;
+import java.util.concurrent.locks.ReentrantReadWriteLock;
+
+import org.apache.kylin.common.util.ByteArray;
+import org.apache.kylin.metadata.filter.IFilterCodeSystem;
+
+import com.google.common.collect.Maps;
+
+public class GTInvertedIndexOfColumn {
+
+ final private IFilterCodeSystem<ByteArray> codeSystem;
+ final private ReentrantReadWriteLock rwLock;
+
+ private int nBlocks;
+ private NavigableMap<ByteArray, ConciseSet> rangeIndex;
+ private ConciseSet nullIndex;
+
+ public GTInvertedIndexOfColumn(IFilterCodeSystem<ByteArray> codeSystem) {
+ this.codeSystem = codeSystem;
+ this.rwLock = new ReentrantReadWriteLock();
+ this.rangeIndex = Maps.newTreeMap(codeSystem);
+ this.nullIndex = new ConciseSet();
+ }
+
+ public void add(Iterable<ByteArray> codes, int blockId) {
+ rwLock.writeLock().lock();
+ try {
+ for (ByteArray code : codes) {
+ if (codeSystem.isNull(code)) {
+ nullIndex.add(blockId);
+ continue;
+ }
+ ConciseSet set = rangeIndex.get(code);
+ if (set == null) {
+ set = new ConciseSet();
+ rangeIndex.put(code.copy(), set);
+ }
+ set.add(blockId);
+ }
+
+ if (blockId >= nBlocks) {
+ nBlocks = blockId + 1;
+ }
+
+ } finally {
+ rwLock.writeLock().unlock();
+ }
+ }
+
+ public ConciseSet getNull() {
+ rwLock.readLock().lock();
+ try {
+ return nullIndex.clone();
+ } finally {
+ rwLock.readLock().unlock();
+ }
+ }
+
+ public ConciseSet getEquals(ByteArray code) {
+ rwLock.readLock().lock();
+ try {
+ ConciseSet set = rangeIndex.get(code);
+ if (set == null)
+ return new ConciseSet();
+ else
+ return set.clone();
+ } finally {
+ rwLock.readLock().unlock();
+ }
+ }
+
+ public ConciseSet getIn(Iterable<ByteArray> codes) {
+ rwLock.readLock().lock();
+ try {
+ ConciseSet r = new ConciseSet();
+ for (ByteArray code : codes) {
+ ConciseSet set = rangeIndex.get(code);
+ if (set != null)
+ r.addAll(set);
+ }
+ return r;
+ } finally {
+ rwLock.readLock().unlock();
+ }
+ }
+
+ public ConciseSet getRange(ByteArray from, boolean fromInclusive, ByteArray to, boolean toInclusive) {
+ rwLock.readLock().lock();
+ try {
+ ConciseSet r = new ConciseSet();
+ if (from == null && to == null) {
+ r.add(nBlocks);
+ r.complement();
+ return r;
+ }
+ NavigableMap<ByteArray, ConciseSet> subMap;
+ if (from == null) {
+ subMap = rangeIndex.headMap(to, toInclusive);
+ } else if (to == null) {
+ subMap = rangeIndex.tailMap(from, fromInclusive);
+ } else {
+ subMap = rangeIndex.subMap(from, fromInclusive, to, toInclusive);
+ }
+ for (ConciseSet set : subMap.values()) {
+ r.addAll(set);
+ }
+ return r;
+ } finally {
+ rwLock.readLock().unlock();
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/05c2d096/storage/src/main/java/org/apache/kylin/storage/gridtable/GTRawScanner.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTRawScanner.java b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTRawScanner.java
index f45f126..978e3cf 100644
--- a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTRawScanner.java
+++ b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTRawScanner.java
@@ -36,27 +36,13 @@ class GTRawScanner implements IGTScanner {
ByteArray start = pkStart == null ? null : pkStart.exportColumns(info.primaryKey);
ByteArray endEx = pkEndExclusive == null ? null : pkEndExclusive.exportColumns(info.primaryKey);
- this.selectedColBlocks = computeHitColumnBlocks(columns);
+ this.selectedColBlocks = info.selectColumnBlocks(columns);
this.storeScanner = store.scan(start, endEx, selectedColBlocks, filterPushDown);
this.oneRecord = new GTRecord(info);
this.oneTuple = new TupleAdapter(oneRecord);
}
- private BitSet computeHitColumnBlocks(BitSet columns) {
- if (columns == null)
- columns = info.colAll;
-
- BitSet result = new BitSet();
- for (int i = 0; i < info.colBlocks.length; i++) {
- BitSet cb = info.colBlocks[i];
- if (cb.intersects(columns)) {
- result.set(i);
- }
- }
- return result;
- }
-
@Override
public int getScannedRowCount() {
return scannedRowCount;
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/05c2d096/storage/src/main/java/org/apache/kylin/storage/gridtable/GTRecord.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTRecord.java b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTRecord.java
index 3f1ba91..2950fad 100644
--- a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTRecord.java
+++ b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTRecord.java
@@ -20,6 +20,14 @@ public class GTRecord implements Comparable<GTRecord> {
this.cols[i] = new ByteArray();
this.maskForEqualHashComp = info.colAll;
}
+
+ public ByteArray get(int i) {
+ return cols[i];
+ }
+
+ public void set(int i, ByteArray data) {
+ cols[i].set(data.array(), data.offset(), data.length());
+ }
/** set record to the codes of specified values, new space allocated to hold the codes */
public GTRecord setValues(Object... values) {
@@ -32,10 +40,6 @@ public class GTRecord implements Comparable<GTRecord> {
ByteBuffer buf = space.asBuffer();
int pos = buf.position();
for (int i = 0; i < info.nColumns; i++) {
- if (values[i] == null) {
- cols[i].set(null, 0, 0);
- continue;
- }
info.codeSystem.encodeColumnValue(i, values[i], buf);
int newPos = buf.position();
cols[i].set(buf.array(), buf.arrayOffset() + pos, newPos - pos);
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/05c2d096/storage/src/main/java/org/apache/kylin/storage/gridtable/GTRowBlockIndex.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTRowBlockIndex.java b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTRowBlockIndex.java
deleted file mode 100644
index 21b0a9a..0000000
--- a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTRowBlockIndex.java
+++ /dev/null
@@ -1,5 +0,0 @@
-package org.apache.kylin.storage.gridtable;
-
-public class GTRowBlockIndex {
-
-}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/05c2d096/storage/src/main/java/org/apache/kylin/storage/gridtable/GTSampleCodeSystem.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTSampleCodeSystem.java b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTSampleCodeSystem.java
index edd70e4..b7f12df 100644
--- a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTSampleCodeSystem.java
+++ b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTSampleCodeSystem.java
@@ -38,7 +38,13 @@ public class GTSampleCodeSystem implements IGTCodeSystem {
this.filterCS = new IFilterCodeSystem<ByteArray>() {
@Override
public boolean isNull(ByteArray code) {
- return (code == null || code.length() == 0);
+ // all 0xff is null
+ byte[] array = code.array();
+ for (int i = 0, j = code.offset(), n = code.length(); i < n; i++, j++) {
+ if (array[j] != (byte) 0xff)
+ return false;
+ }
+ return true;
}
@Override
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/05c2d096/storage/src/main/java/org/apache/kylin/storage/gridtable/GTUtil.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTUtil.java b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTUtil.java
new file mode 100644
index 0000000..b9f2a73
--- /dev/null
+++ b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTUtil.java
@@ -0,0 +1,23 @@
+package org.apache.kylin.storage.gridtable;
+
+import org.apache.kylin.metadata.model.ColumnDesc;
+import org.apache.kylin.metadata.model.TableDesc;
+import org.apache.kylin.metadata.model.TblColRef;
+
+public class GTUtil {
+
+ static final TableDesc MOCKUP_TABLE = new TableDesc();
+ static {
+ MOCKUP_TABLE.setName("GT_MOCKUP_TABLE");
+ }
+
+ public static TblColRef tblColRef(int col, String datatype) {
+ ColumnDesc desc = new ColumnDesc();
+ String id = "" + (col + 1);
+ desc.setId(id);
+ desc.setName(id);
+ desc.setDatatype(datatype);
+ desc.init(MOCKUP_TABLE);
+ return new TblColRef(desc);
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/05c2d096/storage/src/test/java/org/apache/kylin/storage/gridtable/GTInvertedIndexTest.java
----------------------------------------------------------------------
diff --git a/storage/src/test/java/org/apache/kylin/storage/gridtable/GTInvertedIndexTest.java b/storage/src/test/java/org/apache/kylin/storage/gridtable/GTInvertedIndexTest.java
new file mode 100644
index 0000000..1460039
--- /dev/null
+++ b/storage/src/test/java/org/apache/kylin/storage/gridtable/GTInvertedIndexTest.java
@@ -0,0 +1,165 @@
+package org.apache.kylin.storage.gridtable;
+
+import static org.junit.Assert.*;
+import it.uniroma3.mat.extendedset.intset.ConciseSet;
+
+import java.math.BigDecimal;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+
+import org.apache.hadoop.io.LongWritable;
+import org.apache.kylin.common.util.ByteArray;
+import org.apache.kylin.metadata.filter.ColumnTupleFilter;
+import org.apache.kylin.metadata.filter.CompareTupleFilter;
+import org.apache.kylin.metadata.filter.ConstantTupleFilter;
+import org.apache.kylin.metadata.filter.LogicalTupleFilter;
+import org.apache.kylin.metadata.filter.TupleFilter;
+import org.apache.kylin.metadata.filter.TupleFilter.FilterOperatorEnum;
+import org.apache.kylin.metadata.model.TblColRef;
+import org.apache.kylin.metadata.serializer.StringSerializer;
+import org.junit.Test;
+
+import com.google.common.collect.Lists;
+
+public class GTInvertedIndexTest {
+
+ GTInfo info;
+ GTInvertedIndex index;
+ ArrayList<CompareTupleFilter> basicFilters = Lists.newArrayList();
+ ArrayList<ConciseSet> basicResults = Lists.newArrayList();
+
+ public GTInvertedIndexTest() {
+
+ info = GridTableTest.advancedInfo();
+ TblColRef colA = info.colRef(0);
+
+ // block i contains value "i", the last is NULL
+ index = new GTInvertedIndex(info);
+ GTRowBlock mockBlock = GTRowBlock.allocate(info);
+ GTRowBlock.Writer writer = mockBlock.getWriter();
+ GTRecord record = new GTRecord(info);
+ for (int i = 0; i < 10; i++) {
+ record.setValues(i < 9 ? "" + i : null, "", "", new LongWritable(0), new BigDecimal(0));
+ for (int j = 0; j < info.getRowBlockSize(); j++) {
+ writer.append(record);
+ }
+ writer.readyForFlush();
+ index.add(mockBlock);
+
+ writer.clearForNext();
+ }
+
+ basicFilters.add(compare(colA, FilterOperatorEnum.ISNULL));
+ basicResults.add(set(9));
+
+ basicFilters.add(compare(colA, FilterOperatorEnum.ISNOTNULL));
+ basicResults.add(set(0, 1, 2, 3, 4, 5, 6, 7, 8, 9));
+
+ basicFilters.add(compare(colA, FilterOperatorEnum.EQ, 0));
+ basicResults.add(set(0));
+
+ basicFilters.add(compare(colA, FilterOperatorEnum.NEQ, 0));
+ basicResults.add(set(0, 1, 2, 3, 4, 5, 6, 7, 8, 9));
+
+ basicFilters.add(compare(colA, FilterOperatorEnum.IN, 0, 5));
+ basicResults.add(set(0, 5));
+
+ basicFilters.add(compare(colA, FilterOperatorEnum.NOTIN, 0, 5));
+ basicResults.add(set(0, 1, 2, 3, 4, 5, 6, 7, 8, 9));
+
+ basicFilters.add(compare(colA, FilterOperatorEnum.LT, 3));
+ basicResults.add(set(0, 1, 2));
+
+ basicFilters.add(compare(colA, FilterOperatorEnum.LTE, 3));
+ basicResults.add(set(0, 1, 2, 3));
+
+ basicFilters.add(compare(colA, FilterOperatorEnum.GT, 3));
+ basicResults.add(set(4, 5, 6, 7, 8));
+
+ basicFilters.add(compare(colA, FilterOperatorEnum.GTE, 3));
+ basicResults.add(set(3, 4, 5, 6, 7, 8));
+ }
+
+ @Test
+ public void testBasics() {
+ for (int i = 0; i < basicFilters.size(); i++) {
+ assertEquals(basicResults.get(i), index.filter(basicFilters.get(i)));
+ }
+ }
+
+ @Test
+ public void testLogicalAnd() {
+ for (int i = 0; i < basicFilters.size(); i++) {
+ for (int j = 0; j < basicFilters.size(); j++) {
+ LogicalTupleFilter f = logical(FilterOperatorEnum.AND, basicFilters.get(i), basicFilters.get(j));
+ ConciseSet r = basicResults.get(i).clone();
+ r.retainAll(basicResults.get(j));
+ assertEquals(r, index.filter(f));
+ }
+ }
+ }
+
+ @Test
+ public void testLogicalOr() {
+ for (int i = 0; i < basicFilters.size(); i++) {
+ for (int j = 0; j < basicFilters.size(); j++) {
+ LogicalTupleFilter f = logical(FilterOperatorEnum.OR, basicFilters.get(i), basicFilters.get(j));
+ ConciseSet r = basicResults.get(i).clone();
+ r.addAll(basicResults.get(j));
+ assertEquals(r, index.filter(f));
+ }
+ }
+ }
+
+ @Test
+ public void testNotEvaluable() {
+ ConciseSet all = set(0, 1, 2, 3, 4, 5, 6, 7, 8, 9);
+
+ CompareTupleFilter notEvaluable = compare(info.colRef(1), FilterOperatorEnum.EQ, 0);
+ assertEquals(all, index.filter(notEvaluable));
+
+ LogicalTupleFilter or = logical(FilterOperatorEnum.OR, basicFilters.get(0), notEvaluable);
+ assertEquals(all, index.filter(or));
+
+ LogicalTupleFilter and = logical(FilterOperatorEnum.AND, basicFilters.get(0), notEvaluable);
+ assertEquals(basicResults.get(0), index.filter(and));
+ }
+
+ public static CompareTupleFilter compare(TblColRef col, TupleFilter.FilterOperatorEnum op, int... ids) {
+ CompareTupleFilter filter = new CompareTupleFilter(op);
+ filter.addChild(columnFilter(col));
+ for (int i : ids) {
+ filter.addChild(constFilter(i));
+ }
+ return filter;
+ }
+
+ public static LogicalTupleFilter logical(TupleFilter.FilterOperatorEnum op, TupleFilter... filters) {
+ LogicalTupleFilter filter = new LogicalTupleFilter(op);
+ for (TupleFilter f : filters)
+ filter.addChild(f);
+ return filter;
+ }
+
+ public static ColumnTupleFilter columnFilter(TblColRef col) {
+ return new ColumnTupleFilter(col);
+ }
+
+ public static ConstantTupleFilter constFilter(int id) {
+ byte[] space = new byte[10];
+ ByteBuffer buf = ByteBuffer.wrap(space);
+ StringSerializer stringSerializer = new StringSerializer();
+ stringSerializer.serialize("" + id, buf);
+ ByteArray data = new ByteArray(buf.array(), buf.arrayOffset(), buf.position());
+ return new ConstantTupleFilter(data);
+ }
+
+ public static ConciseSet set(int... ints) {
+ ConciseSet set = new ConciseSet();
+ for (int i : ints)
+ set.add(i);
+ return set;
+ }
+
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/05c2d096/storage/src/test/java/org/apache/kylin/storage/gridtable/GridTableTest.java
----------------------------------------------------------------------
diff --git a/storage/src/test/java/org/apache/kylin/storage/gridtable/GridTableTest.java b/storage/src/test/java/org/apache/kylin/storage/gridtable/GridTableTest.java
index 899eb50..95f675f 100644
--- a/storage/src/test/java/org/apache/kylin/storage/gridtable/GridTableTest.java
+++ b/storage/src/test/java/org/apache/kylin/storage/gridtable/GridTableTest.java
@@ -168,13 +168,13 @@ public class GridTableTest {
System.out.println("Written Row Count: " + builder.getWrittenRowCount());
}
- private GTInfo basicInfo() {
+ public static GTInfo basicInfo() {
Builder builder = infoBuilder();
GTInfo info = builder.build();
return info;
}
- private GTInfo advancedInfo() {
+ public static GTInfo advancedInfo() {
Builder builder = infoBuilder();
builder.enableColumnBlock(new BitSet[] { setOf(0, 1, 2), setOf(3, 4) });
builder.enableRowBlock(4);
@@ -182,7 +182,7 @@ public class GridTableTest {
return info;
}
- private Builder infoBuilder() {
+ private static Builder infoBuilder() {
Builder builder = GTInfo.builder();
builder.setCodeSystem(new GTSampleCodeSystem());
builder.setColumns( //
@@ -193,10 +193,11 @@ public class GridTableTest {
DataType.getInstance("decimal") //
);
builder.setPrimaryKey(setOf(0));
+ builder.setColumnPreferIndex(setOf(0));
return builder;
}
- private BitSet setOf(int... values) {
+ private static BitSet setOf(int... values) {
BitSet set = new BitSet();
for (int i : values)
set.set(i);