You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by li...@apache.org on 2015/05/20 02:23:13 UTC
incubator-kylin git commit: KYLIN-775,
cube scanning related refactor done
Repository: incubator-kylin
Updated Branches:
refs/heads/0.8.0 da67e9555 -> 35e0b9d56
KYLIN-775, cube scanning related refactor done
Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/35e0b9d5
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/35e0b9d5
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/35e0b9d5
Branch: refs/heads/0.8.0
Commit: 35e0b9d56b84220a1fe94f2220c074976f834175
Parents: da67e95
Author: Yang Li <li...@apache.org>
Authored: Wed May 20 08:22:53 2015 +0800
Committer: Yang Li <li...@apache.org>
Committed: Wed May 20 08:22:53 2015 +0800
----------------------------------------------------------------------
.../kylin/storage/cube/CubeGridTable.java | 47 ++-----
.../apache/kylin/storage/cube/CubeScanner.java | 41 +++---
.../storage/cube/CuboidToGridTableMapping.java | 131 +++++++++++++++++++
3 files changed, 157 insertions(+), 62 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/35e0b9d5/storage/src/main/java/org/apache/kylin/storage/cube/CubeGridTable.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/cube/CubeGridTable.java b/storage/src/main/java/org/apache/kylin/storage/cube/CubeGridTable.java
index 6ed07b6..bc33213 100644
--- a/storage/src/main/java/org/apache/kylin/storage/cube/CubeGridTable.java
+++ b/storage/src/main/java/org/apache/kylin/storage/cube/CubeGridTable.java
@@ -1,7 +1,5 @@
package org.apache.kylin.storage.cube;
-import java.util.ArrayList;
-import java.util.BitSet;
import java.util.List;
import java.util.Map;
@@ -9,11 +7,7 @@ import org.apache.kylin.cube.CubeManager;
import org.apache.kylin.cube.CubeSegment;
import org.apache.kylin.cube.cuboid.Cuboid;
import org.apache.kylin.cube.model.CubeDesc;
-import org.apache.kylin.cube.model.HBaseColumnDesc;
-import org.apache.kylin.cube.model.HBaseColumnFamilyDesc;
import org.apache.kylin.dict.Dictionary;
-import org.apache.kylin.metadata.model.DataType;
-import org.apache.kylin.metadata.model.MeasureDesc;
import org.apache.kylin.metadata.model.TblColRef;
import org.apache.kylin.storage.gridtable.GTInfo;
@@ -45,54 +39,33 @@ public class CubeGridTable {
public static GTInfo newGTInfo(CubeDesc cubeDesc, long cuboidId, Map<TblColRef, Dictionary<?>> dictionaryMap) {
Cuboid cuboid = Cuboid.findById(cubeDesc, cuboidId);
- List<TblColRef> dimCols = cuboid.getColumns();
-
- int nColumns = dimCols.size() + cubeDesc.getMeasures().size();
- ArrayList<BitSet> colBlocks = new ArrayList<BitSet>();
- BitSet dimensions = new BitSet();
- dimensions.set(0, dimCols.size());
- colBlocks.add(dimensions);
-
- DataType[] dataTypes = new DataType[nColumns];
+ CuboidToGridTableMapping mapping = new CuboidToGridTableMapping(cuboid);
+
Map<Integer, Dictionary> dictionaryByColIdx = Maps.newHashMap();
Map<Integer, Integer> fixLenByColIdx = Maps.newHashMap();
- int colIndex = 0;
- for (TblColRef col : dimCols) {
- dataTypes[colIndex] = col.getType();
- if (cubeDesc.getRowkey().isUseDictionary(col)) {
- Dictionary dict = dictionaryMap.get(col);
+ for (TblColRef dim : cuboid.getColumns()) {
+ int colIndex = mapping.getIndexOf(dim);
+ if (cubeDesc.getRowkey().isUseDictionary(dim)) {
+ Dictionary dict = dictionaryMap.get(dim);
if (dict == null)
throw new IllegalStateException();
dictionaryByColIdx.put(colIndex, dict);
} else {
- int len = cubeDesc.getRowkey().getColumnLength(col);
+ int len = cubeDesc.getRowkey().getColumnLength(dim);
if (len == 0)
throw new IllegalStateException();
fixLenByColIdx.put(colIndex, len);
}
- colIndex++;
}
- for (HBaseColumnFamilyDesc familyDesc : cubeDesc.getHbaseMapping().getColumnFamily()) {
- for (HBaseColumnDesc hbaseColDesc : familyDesc.getColumns()) {
- BitSet colBlock = new BitSet();
- for (MeasureDesc measure : hbaseColDesc.getMeasures()) {
- dataTypes[colIndex] = measure.getFunction().getReturnDataType();
- colBlock.set(colIndex);
- colIndex++;
- }
- colBlocks.add(colBlock);
- }
- }
-
GTInfo.Builder builder = GTInfo.builder();
builder.setCodeSystem(new CubeCodeSystem(dictionaryByColIdx, fixLenByColIdx));
- builder.setColumns(dataTypes);
- builder.setPrimaryKey(dimensions);
- builder.enableColumnBlock((BitSet[]) colBlocks.toArray(new BitSet[colBlocks.size()]));
+ builder.setColumns(mapping.getDataTypes());
+ builder.setPrimaryKey(mapping.getPrimaryKey());
+ builder.enableColumnBlock(mapping.getColumnBlocks());
return builder.build();
}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/35e0b9d5/storage/src/main/java/org/apache/kylin/storage/cube/CubeScanner.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/cube/CubeScanner.java b/storage/src/main/java/org/apache/kylin/storage/cube/CubeScanner.java
index 2bf9a85..5782b30 100644
--- a/storage/src/main/java/org/apache/kylin/storage/cube/CubeScanner.java
+++ b/storage/src/main/java/org/apache/kylin/storage/cube/CubeScanner.java
@@ -10,10 +10,8 @@ import java.util.Set;
import org.apache.kylin.cube.CubeSegment;
import org.apache.kylin.cube.cuboid.Cuboid;
-import org.apache.kylin.cube.model.CubeDesc;
import org.apache.kylin.metadata.filter.TupleFilter;
import org.apache.kylin.metadata.model.FunctionDesc;
-import org.apache.kylin.metadata.model.MeasureDesc;
import org.apache.kylin.metadata.model.TblColRef;
import org.apache.kylin.storage.gridtable.GTInfo;
import org.apache.kylin.storage.gridtable.GTRawScanner;
@@ -42,10 +40,11 @@ public class CubeScanner implements IGTScanner {
this.info = CubeGridTable.newGTInfo(cubeSeg, cuboid.getId());
this.store = new CubeHBaseReadonlyStore(info, cubeSeg, cuboid);
- TupleFilter gtFilter = GTUtil.convertFilterColumnsAndConstants(filter, info, cuboid.getColumns(), groups);
- BitSet gtDimensions = makeGridTableColumns(cuboid, dimensions);
- BitSet gtAggrGroups = makeGridTableColumns(cuboid, groups);
- BitSet gtAggrMetrics = makeGridTableColumns(cubeSeg.getCubeDesc(), cuboid, metrics);
+ CuboidToGridTableMapping mapping = new CuboidToGridTableMapping(cuboid);
+ TupleFilter gtFilter = GTUtil.convertFilterColumnsAndConstants(filter, info, mapping.getCuboidDimensionsInGTOrder(), groups);
+ BitSet gtDimensions = makeGridTableColumns(mapping, dimensions);
+ BitSet gtAggrGroups = makeGridTableColumns(mapping, groups);
+ BitSet gtAggrMetrics = makeGridTableColumns(mapping, metrics);
String[] gtAggrFuncs = makeAggrFuncs(metrics);
GTScanRangePlanner scanRangePlanner = new GTScanRangePlanner(info);
@@ -59,32 +58,24 @@ public class CubeScanner implements IGTScanner {
scanner = new Scanner();
}
- private BitSet makeGridTableColumns(Cuboid cuboid, Set<TblColRef> dimensions) {
+ private BitSet makeGridTableColumns(CuboidToGridTableMapping mapping, Set<TblColRef> dimensions) {
BitSet result = new BitSet();
- List<TblColRef> dimCols = cuboid.getColumns();
- for (int i = 0; i < dimCols.size(); i++) {
- if (dimensions.contains(dimCols.get(i))) {
- result.set(i);
- }
+ for (TblColRef dim : dimensions) {
+ int idx = mapping.getIndexOf(dim);
+ if (idx < 0)
+ throw new IllegalStateException(dim + " not found in " + mapping);
+ result.set(idx);
}
return result;
}
- private BitSet makeGridTableColumns(CubeDesc cubeDesc, Cuboid cuboid, Collection<FunctionDesc> metrics) {
+ private BitSet makeGridTableColumns(CuboidToGridTableMapping mapping, Collection<FunctionDesc> metrics) {
BitSet result = new BitSet();
- int metricsIndexStart = cuboid.getColumns().size();
for (FunctionDesc metric : metrics) {
- int index = 0;
- for (MeasureDesc measure : cubeDesc.getMeasures()) {
- if (metric.equals(measure.getFunction())) {
- break;
- }
- index++;
- }
- if (index == cubeDesc.getMeasures().size())
- throw new IllegalStateException(metric + " not found in " + cubeDesc);
-
- result.set(metricsIndexStart + index);
+ int idx = mapping.getIndexOf(metric);
+ if (idx < 0)
+ throw new IllegalStateException(metric + " not found in " + mapping);
+ result.set(idx);
}
return result;
}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/35e0b9d5/storage/src/main/java/org/apache/kylin/storage/cube/CuboidToGridTableMapping.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/cube/CuboidToGridTableMapping.java b/storage/src/main/java/org/apache/kylin/storage/cube/CuboidToGridTableMapping.java
new file mode 100644
index 0000000..bf5aa68
--- /dev/null
+++ b/storage/src/main/java/org/apache/kylin/storage/cube/CuboidToGridTableMapping.java
@@ -0,0 +1,131 @@
+package org.apache.kylin.storage.cube;
+
+import java.util.BitSet;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.kylin.cube.cuboid.Cuboid;
+import org.apache.kylin.cube.model.HBaseColumnDesc;
+import org.apache.kylin.cube.model.HBaseColumnFamilyDesc;
+import org.apache.kylin.metadata.model.DataType;
+import org.apache.kylin.metadata.model.FunctionDesc;
+import org.apache.kylin.metadata.model.MeasureDesc;
+import org.apache.kylin.metadata.model.TblColRef;
+
+import com.google.common.collect.LinkedListMultimap;
+import com.google.common.collect.ListMultimap;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+
+public class CuboidToGridTableMapping {
+
+ final private Cuboid cuboid;
+
+ private List<DataType> gtDataTypes;
+ private List<BitSet> gtColBlocks;
+
+ private int nDimensions;
+ private Map<TblColRef, Integer> dim2gt;
+ private BitSet gtPrimaryKey;
+
+ private int nMetrics;
+ private ListMultimap<FunctionDesc, Integer> metrics2gt; // because count distinct may have a holistic version
+
+ public CuboidToGridTableMapping(Cuboid cuboid) {
+ this.cuboid = cuboid;
+ init();
+ }
+
+ private void init() {
+ int gtColIdx = 0;
+ gtDataTypes = Lists.newArrayList();
+ gtColBlocks = Lists.newArrayList();
+
+ // dimensions
+ dim2gt = Maps.newHashMap();
+ gtPrimaryKey = new BitSet();
+ for (TblColRef dimension : cuboid.getColumns()) {
+ gtDataTypes.add(dimension.getType());
+ dim2gt.put(dimension, gtColIdx);
+ gtPrimaryKey.set(gtColIdx);
+ gtColIdx++;
+ }
+ gtColBlocks.add(gtPrimaryKey);
+
+ nDimensions = gtColIdx;
+ assert nDimensions == cuboid.getColumns().size();
+
+ // metrics
+ metrics2gt = LinkedListMultimap.create();
+ for (HBaseColumnFamilyDesc familyDesc : cuboid.getCube().getHbaseMapping().getColumnFamily()) {
+ for (HBaseColumnDesc hbaseColDesc : familyDesc.getColumns()) {
+ BitSet colBlock = new BitSet();
+ for (MeasureDesc measure : hbaseColDesc.getMeasures()) {
+ // count distinct & holistic count distinct are equals() but different
+ // assert the holistic version if exists always comes later
+ FunctionDesc func = measure.getFunction();
+ if (func.isHolisticCountDistinct()) {
+ if (metrics2gt.get(func).size() != 1)
+ throw new IllegalStateException();
+ }
+ gtDataTypes.add(func.getReturnDataType());
+ metrics2gt.put(func, gtColIdx);
+ colBlock.set(gtColIdx);
+ gtColIdx++;
+ }
+ gtColBlocks.add(colBlock);
+ }
+ }
+ nMetrics = gtColIdx - nDimensions;
+ assert nMetrics == cuboid.getCube().getMeasures().size();
+ }
+
+ public int getColumnCount() {
+ return nDimensions + nMetrics;
+ }
+
+ public int getDimensionCount() {
+ return nDimensions;
+ }
+
+ public int getMetricsCount() {
+ return nMetrics;
+ }
+
+ public DataType[] getDataTypes() {
+ return (DataType[]) gtDataTypes.toArray(new DataType[gtDataTypes.size()]);
+ }
+
+ public BitSet getPrimaryKey() {
+ return gtPrimaryKey;
+ }
+
+ public BitSet[] getColumnBlocks() {
+ return (BitSet[]) gtColBlocks.toArray(new BitSet[gtColBlocks.size()]);
+ }
+
+ public int getIndexOf(TblColRef dimension) {
+ Integer i = dim2gt.get(dimension);
+ return i == null ? -1 : i.intValue();
+ }
+
+ public int getIndexOf(FunctionDesc metric) {
+ List<Integer> list = metrics2gt.get(metric);
+ // normal case
+ if (list.size() == 1) {
+ return list.get(0);
+ }
+ // count distinct & its holistic version
+ else if (list.size() == 2) {
+ assert metric.isCountDistinct();
+ return metric.isHolisticCountDistinct() ? list.get(1) : list.get(0);
+ }
+ // unexpected
+ else
+ return -1;
+ }
+
+ public List<TblColRef> getCuboidDimensionsInGTOrder() {
+ return cuboid.getColumns();
+ }
+}