You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by li...@apache.org on 2015/05/20 02:23:13 UTC

incubator-kylin git commit: KYLIN-775, cube scanning related refactor done

Repository: incubator-kylin
Updated Branches:
  refs/heads/0.8.0 da67e9555 -> 35e0b9d56


KYLIN-775, cube scanning related refactor done


Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/35e0b9d5
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/35e0b9d5
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/35e0b9d5

Branch: refs/heads/0.8.0
Commit: 35e0b9d56b84220a1fe94f2220c074976f834175
Parents: da67e95
Author: Yang Li <li...@apache.org>
Authored: Wed May 20 08:22:53 2015 +0800
Committer: Yang Li <li...@apache.org>
Committed: Wed May 20 08:22:53 2015 +0800

----------------------------------------------------------------------
 .../kylin/storage/cube/CubeGridTable.java       |  47 ++-----
 .../apache/kylin/storage/cube/CubeScanner.java  |  41 +++---
 .../storage/cube/CuboidToGridTableMapping.java  | 131 +++++++++++++++++++
 3 files changed, 157 insertions(+), 62 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/35e0b9d5/storage/src/main/java/org/apache/kylin/storage/cube/CubeGridTable.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/cube/CubeGridTable.java b/storage/src/main/java/org/apache/kylin/storage/cube/CubeGridTable.java
index 6ed07b6..bc33213 100644
--- a/storage/src/main/java/org/apache/kylin/storage/cube/CubeGridTable.java
+++ b/storage/src/main/java/org/apache/kylin/storage/cube/CubeGridTable.java
@@ -1,7 +1,5 @@
 package org.apache.kylin.storage.cube;
 
-import java.util.ArrayList;
-import java.util.BitSet;
 import java.util.List;
 import java.util.Map;
 
@@ -9,11 +7,7 @@ import org.apache.kylin.cube.CubeManager;
 import org.apache.kylin.cube.CubeSegment;
 import org.apache.kylin.cube.cuboid.Cuboid;
 import org.apache.kylin.cube.model.CubeDesc;
-import org.apache.kylin.cube.model.HBaseColumnDesc;
-import org.apache.kylin.cube.model.HBaseColumnFamilyDesc;
 import org.apache.kylin.dict.Dictionary;
-import org.apache.kylin.metadata.model.DataType;
-import org.apache.kylin.metadata.model.MeasureDesc;
 import org.apache.kylin.metadata.model.TblColRef;
 import org.apache.kylin.storage.gridtable.GTInfo;
 
@@ -45,54 +39,33 @@ public class CubeGridTable {
     
     public static GTInfo newGTInfo(CubeDesc cubeDesc, long cuboidId, Map<TblColRef, Dictionary<?>> dictionaryMap) {
         Cuboid cuboid = Cuboid.findById(cubeDesc, cuboidId);
-        List<TblColRef> dimCols = cuboid.getColumns();
-
-        int nColumns = dimCols.size() + cubeDesc.getMeasures().size();
-        ArrayList<BitSet> colBlocks = new ArrayList<BitSet>();
-        BitSet dimensions = new BitSet();
-        dimensions.set(0, dimCols.size());
-        colBlocks.add(dimensions);
-
-        DataType[] dataTypes = new DataType[nColumns];
+        CuboidToGridTableMapping mapping = new CuboidToGridTableMapping(cuboid);
+        
         Map<Integer, Dictionary> dictionaryByColIdx = Maps.newHashMap();
         Map<Integer, Integer> fixLenByColIdx = Maps.newHashMap();
 
-        int colIndex = 0;
-        for (TblColRef col : dimCols) {
-            dataTypes[colIndex] = col.getType();
-            if (cubeDesc.getRowkey().isUseDictionary(col)) {
-                Dictionary dict = dictionaryMap.get(col);
+        for (TblColRef dim : cuboid.getColumns()) {
+            int colIndex = mapping.getIndexOf(dim);
+            if (cubeDesc.getRowkey().isUseDictionary(dim)) {
+                Dictionary dict = dictionaryMap.get(dim);
                 if (dict == null)
                     throw new IllegalStateException();
 
                 dictionaryByColIdx.put(colIndex, dict);
             } else {
-                int len = cubeDesc.getRowkey().getColumnLength(col);
+                int len = cubeDesc.getRowkey().getColumnLength(dim);
                 if (len == 0)
                     throw new IllegalStateException();
                 
                 fixLenByColIdx.put(colIndex,  len);
             }
-            colIndex++;
         }
 
-        for (HBaseColumnFamilyDesc familyDesc : cubeDesc.getHbaseMapping().getColumnFamily()) {
-            for (HBaseColumnDesc hbaseColDesc : familyDesc.getColumns()) {
-                BitSet colBlock = new BitSet();
-                for (MeasureDesc measure : hbaseColDesc.getMeasures()) {
-                    dataTypes[colIndex] = measure.getFunction().getReturnDataType();
-                    colBlock.set(colIndex);
-                    colIndex++;
-                }
-                colBlocks.add(colBlock);
-            }
-        }
-        
         GTInfo.Builder builder = GTInfo.builder();
         builder.setCodeSystem(new CubeCodeSystem(dictionaryByColIdx, fixLenByColIdx));
-        builder.setColumns(dataTypes);
-        builder.setPrimaryKey(dimensions);
-        builder.enableColumnBlock((BitSet[]) colBlocks.toArray(new BitSet[colBlocks.size()]));
+        builder.setColumns(mapping.getDataTypes());
+        builder.setPrimaryKey(mapping.getPrimaryKey());
+        builder.enableColumnBlock(mapping.getColumnBlocks());
         return builder.build();
     }
 

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/35e0b9d5/storage/src/main/java/org/apache/kylin/storage/cube/CubeScanner.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/cube/CubeScanner.java b/storage/src/main/java/org/apache/kylin/storage/cube/CubeScanner.java
index 2bf9a85..5782b30 100644
--- a/storage/src/main/java/org/apache/kylin/storage/cube/CubeScanner.java
+++ b/storage/src/main/java/org/apache/kylin/storage/cube/CubeScanner.java
@@ -10,10 +10,8 @@ import java.util.Set;
 
 import org.apache.kylin.cube.CubeSegment;
 import org.apache.kylin.cube.cuboid.Cuboid;
-import org.apache.kylin.cube.model.CubeDesc;
 import org.apache.kylin.metadata.filter.TupleFilter;
 import org.apache.kylin.metadata.model.FunctionDesc;
-import org.apache.kylin.metadata.model.MeasureDesc;
 import org.apache.kylin.metadata.model.TblColRef;
 import org.apache.kylin.storage.gridtable.GTInfo;
 import org.apache.kylin.storage.gridtable.GTRawScanner;
@@ -42,10 +40,11 @@ public class CubeScanner implements IGTScanner {
         this.info = CubeGridTable.newGTInfo(cubeSeg, cuboid.getId());
         this.store = new CubeHBaseReadonlyStore(info, cubeSeg, cuboid);
 
-        TupleFilter gtFilter = GTUtil.convertFilterColumnsAndConstants(filter, info, cuboid.getColumns(), groups);
-        BitSet gtDimensions = makeGridTableColumns(cuboid, dimensions);
-        BitSet gtAggrGroups = makeGridTableColumns(cuboid, groups);
-        BitSet gtAggrMetrics = makeGridTableColumns(cubeSeg.getCubeDesc(), cuboid, metrics);
+        CuboidToGridTableMapping mapping = new CuboidToGridTableMapping(cuboid);
+        TupleFilter gtFilter = GTUtil.convertFilterColumnsAndConstants(filter, info, mapping.getCuboidDimensionsInGTOrder(), groups);
+        BitSet gtDimensions = makeGridTableColumns(mapping, dimensions);
+        BitSet gtAggrGroups = makeGridTableColumns(mapping, groups);
+        BitSet gtAggrMetrics = makeGridTableColumns(mapping, metrics);
         String[] gtAggrFuncs = makeAggrFuncs(metrics);
 
         GTScanRangePlanner scanRangePlanner = new GTScanRangePlanner(info);
@@ -59,32 +58,24 @@ public class CubeScanner implements IGTScanner {
         scanner = new Scanner();
     }
 
-    private BitSet makeGridTableColumns(Cuboid cuboid, Set<TblColRef> dimensions) {
+    private BitSet makeGridTableColumns(CuboidToGridTableMapping mapping, Set<TblColRef> dimensions) {
         BitSet result = new BitSet();
-        List<TblColRef> dimCols = cuboid.getColumns();
-        for (int i = 0; i < dimCols.size(); i++) {
-            if (dimensions.contains(dimCols.get(i))) {
-                result.set(i);
-            }
+        for (TblColRef dim : dimensions) {
+            int idx = mapping.getIndexOf(dim);
+            if (idx < 0)
+                throw new IllegalStateException(dim + " not found in " + mapping);
+            result.set(idx);
         }
         return result;
     }
 
-    private BitSet makeGridTableColumns(CubeDesc cubeDesc, Cuboid cuboid, Collection<FunctionDesc> metrics) {
+    private BitSet makeGridTableColumns(CuboidToGridTableMapping mapping, Collection<FunctionDesc> metrics) {
         BitSet result = new BitSet();
-        int metricsIndexStart = cuboid.getColumns().size();
         for (FunctionDesc metric : metrics) {
-            int index = 0;
-            for (MeasureDesc measure : cubeDesc.getMeasures()) {
-                if (metric.equals(measure.getFunction())) {
-                    break;
-                }
-                index++;
-            }
-            if (index == cubeDesc.getMeasures().size())
-                throw new IllegalStateException(metric + " not found in " + cubeDesc);
-
-            result.set(metricsIndexStart + index);
+            int idx = mapping.getIndexOf(metric);
+            if (idx < 0)
+                throw new IllegalStateException(metric + " not found in " + mapping);
+            result.set(idx);
         }
         return result;
     }

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/35e0b9d5/storage/src/main/java/org/apache/kylin/storage/cube/CuboidToGridTableMapping.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/cube/CuboidToGridTableMapping.java b/storage/src/main/java/org/apache/kylin/storage/cube/CuboidToGridTableMapping.java
new file mode 100644
index 0000000..bf5aa68
--- /dev/null
+++ b/storage/src/main/java/org/apache/kylin/storage/cube/CuboidToGridTableMapping.java
@@ -0,0 +1,131 @@
+package org.apache.kylin.storage.cube;
+
+import java.util.BitSet;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.kylin.cube.cuboid.Cuboid;
+import org.apache.kylin.cube.model.HBaseColumnDesc;
+import org.apache.kylin.cube.model.HBaseColumnFamilyDesc;
+import org.apache.kylin.metadata.model.DataType;
+import org.apache.kylin.metadata.model.FunctionDesc;
+import org.apache.kylin.metadata.model.MeasureDesc;
+import org.apache.kylin.metadata.model.TblColRef;
+
+import com.google.common.collect.LinkedListMultimap;
+import com.google.common.collect.ListMultimap;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+
+public class CuboidToGridTableMapping {
+
+    final private Cuboid cuboid;
+    
+    private List<DataType> gtDataTypes;
+    private List<BitSet> gtColBlocks;
+
+    private int nDimensions;
+    private Map<TblColRef, Integer> dim2gt;
+    private BitSet gtPrimaryKey;
+
+    private int nMetrics;
+    private ListMultimap<FunctionDesc, Integer> metrics2gt; // because count distinct may have a holistic version
+
+    public CuboidToGridTableMapping(Cuboid cuboid) {
+        this.cuboid = cuboid;
+        init();
+    }
+
+    private void init() {
+        int gtColIdx = 0;
+        gtDataTypes = Lists.newArrayList();
+        gtColBlocks = Lists.newArrayList();
+
+        // dimensions
+        dim2gt = Maps.newHashMap();
+        gtPrimaryKey = new BitSet();
+        for (TblColRef dimension : cuboid.getColumns()) {
+            gtDataTypes.add(dimension.getType());
+            dim2gt.put(dimension, gtColIdx);
+            gtPrimaryKey.set(gtColIdx);
+            gtColIdx++;
+        }
+        gtColBlocks.add(gtPrimaryKey);
+
+        nDimensions = gtColIdx;
+        assert nDimensions == cuboid.getColumns().size();
+
+        // metrics
+        metrics2gt = LinkedListMultimap.create();
+        for (HBaseColumnFamilyDesc familyDesc : cuboid.getCube().getHbaseMapping().getColumnFamily()) {
+            for (HBaseColumnDesc hbaseColDesc : familyDesc.getColumns()) {
+                BitSet colBlock = new BitSet();
+                for (MeasureDesc measure : hbaseColDesc.getMeasures()) {
+                    // count distinct & holistic count distinct are equals() but different
+                    // assert the holistic version if exists always comes later
+                    FunctionDesc func = measure.getFunction();
+                    if (func.isHolisticCountDistinct()) {
+                        if (metrics2gt.get(func).size() != 1)
+                            throw new IllegalStateException();
+                    }
+                    gtDataTypes.add(func.getReturnDataType());
+                    metrics2gt.put(func, gtColIdx);
+                    colBlock.set(gtColIdx);
+                    gtColIdx++;
+                }
+                gtColBlocks.add(colBlock);
+            }
+        }
+        nMetrics = gtColIdx - nDimensions;
+        assert nMetrics == cuboid.getCube().getMeasures().size();
+    }
+    
+    public int getColumnCount() {
+        return nDimensions + nMetrics;
+    }
+    
+    public int getDimensionCount() {
+        return nDimensions;
+    }
+    
+    public int getMetricsCount() {
+        return nMetrics;
+    }
+    
+    public DataType[] getDataTypes() {
+        return (DataType[]) gtDataTypes.toArray(new DataType[gtDataTypes.size()]);
+    }
+
+    public BitSet getPrimaryKey() {
+        return gtPrimaryKey;
+    }
+
+    public BitSet[] getColumnBlocks() {
+        return (BitSet[]) gtColBlocks.toArray(new BitSet[gtColBlocks.size()]);
+    }
+
+    public int getIndexOf(TblColRef dimension) {
+        Integer i = dim2gt.get(dimension);
+        return i == null ? -1 : i.intValue();
+    }
+
+    public int getIndexOf(FunctionDesc metric) {
+        List<Integer> list = metrics2gt.get(metric);
+        // normal case
+        if (list.size() == 1) {
+            return list.get(0);
+        }
+        // count distinct & its holistic version
+        else if (list.size() == 2) {
+            assert metric.isCountDistinct();
+            return metric.isHolisticCountDistinct() ? list.get(1) : list.get(0);
+        }
+        // unexpected
+        else
+            return -1;
+    }
+
+    public List<TblColRef> getCuboidDimensionsInGTOrder() {
+        return cuboid.getColumns();
+    }
+}