You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by ni...@apache.org on 2019/05/27 03:34:32 UTC

[kylin] 02/02: KYLIN-3986 Add hint about the absent measures after a successful query. Generate the GridTable according to MeasureInstance metadata

This is an automated email from the ASF dual-hosted git repository.

nic pushed a commit to branch dynamic-measure
in repository https://gitbox.apache.org/repos/asf/kylin.git

commit 9f21f0c3de4cd4de2288655990e20ad34a4bb47c
Author: yuzhang <sh...@163.com>
AuthorDate: Tue May 7 21:04:23 2019 +0800

    KYLIN-3986 Add hint about the absent measures after a successful query. Generate the GridTable according to MeasureInstance metadata
---
 .../apache/kylin/common/MissingMeasureSegment.java |  88 +++++++++++++
 .../java/org/apache/kylin/common/QueryContext.java |   8 ++
 .../cube/gridtable/CuboidToGridTableMapping.java   |   4 +
 .../CuboidToGridTableMappingFilterNullCol.java     | 143 +++++++++++++++++++++
 .../storage/gtrecord/CubeScanRangePlanner.java     |  48 ++++++-
 .../kylin/storage/hbase/cube/v2/CubeHBaseRPC.java  |  21 ++-
 6 files changed, 305 insertions(+), 7 deletions(-)

diff --git a/core-common/src/main/java/org/apache/kylin/common/MissingMeasureSegment.java b/core-common/src/main/java/org/apache/kylin/common/MissingMeasureSegment.java
new file mode 100644
index 0000000..6a6f9d2
--- /dev/null
+++ b/core-common/src/main/java/org/apache/kylin/common/MissingMeasureSegment.java
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kylin.common;
+
+import com.google.common.collect.Sets;
+
+import java.util.Set;
+import java.util.stream.Collectors;
+
+public class MissingMeasureSegment {
+
+    public static String getKey(String projectName, String cubeName, String segmentName) {
+        return projectName + "/" + cubeName + "/" + segmentName;
+    }
+
+    private String projectName;
+
+    private String cubeName;
+
+    private String segmentName;
+
+    private Set<String> missMeasures;
+
+    public MissingMeasureSegment(String projectName, String cubeName, String segmentName) {
+        this.projectName = projectName;
+        this.cubeName = cubeName;
+        this.segmentName = segmentName;
+    }
+
+    public String getKey() {
+        return getKey(projectName, cubeName, segmentName);
+    }
+
+    public String getMissingMsg(){
+        return cubeName + "-" + segmentName + " missing measures: " + missMeasures.stream().collect(Collectors.joining(", "));
+    }
+
+    public String getProjectName() {
+        return projectName;
+    }
+
+    public void setProjectName(String projectName) {
+        this.projectName = projectName;
+    }
+
+    public String getCubeName() {
+        return cubeName;
+    }
+
+    public void setCubeName(String cubeName) {
+        this.cubeName = cubeName;
+    }
+
+    public String getSegmentName() {
+        return segmentName;
+    }
+
+    public void setSegmentName(String segmentName) {
+        this.segmentName = segmentName;
+    }
+
+    public Set<String> getMissMeasures() {
+        if (missMeasures == null) {
+            missMeasures = Sets.newHashSet();
+        }
+        return missMeasures;
+    }
+
+    public void setMissMeasures(Set<String> missMeasures) {
+        this.missMeasures = missMeasures;
+    }
+}
diff --git a/core-common/src/main/java/org/apache/kylin/common/QueryContext.java b/core-common/src/main/java/org/apache/kylin/common/QueryContext.java
index 000f7bf..a776095 100644
--- a/core-common/src/main/java/org/apache/kylin/common/QueryContext.java
+++ b/core-common/src/main/java/org/apache/kylin/common/QueryContext.java
@@ -65,6 +65,7 @@ public class QueryContext {
 
     private List<RPCStatistics> rpcStatisticsList = Lists.newCopyOnWriteArrayList();
     private Map<Integer, CubeSegmentStatisticsResult> cubeSegmentStatisticsResultMap = Maps.newConcurrentMap();
+    private List<MissingMeasureSegment> missingMeasureSegments;
 
     QueryContext() {
         this(System.currentTimeMillis());
@@ -575,6 +576,13 @@ public class QueryContext {
         }
     }
 
+    public List<MissingMeasureSegment> getMissingMeasureSegments() {
+        if (null == missingMeasureSegments) {
+            missingMeasureSegments = Lists.newArrayListWithCapacity(5);
+        }
+        return missingMeasureSegments;
+    }
+
     public static class CubeSegmentStatisticsResult implements Serializable {
         protected static final long serialVersionUID = 1L;
 
diff --git a/core-cube/src/main/java/org/apache/kylin/cube/gridtable/CuboidToGridTableMapping.java b/core-cube/src/main/java/org/apache/kylin/cube/gridtable/CuboidToGridTableMapping.java
index 05256cc..2e6f898 100644
--- a/core-cube/src/main/java/org/apache/kylin/cube/gridtable/CuboidToGridTableMapping.java
+++ b/core-cube/src/main/java/org/apache/kylin/cube/gridtable/CuboidToGridTableMapping.java
@@ -56,6 +56,10 @@ public class CuboidToGridTableMapping {
     private int nMetrics;
     private Map<FunctionDesc, Integer> metrics2gt; // because count distinct may have a holistic version
 
+    CuboidToGridTableMapping() {
+        this.cuboid = null;
+    }
+
     public CuboidToGridTableMapping(Cuboid cuboid) {
         this.cuboid = cuboid;
         init();
diff --git a/core-cube/src/main/java/org/apache/kylin/cube/gridtable/CuboidToGridTableMappingFilterNullCol.java b/core-cube/src/main/java/org/apache/kylin/cube/gridtable/CuboidToGridTableMappingFilterNullCol.java
new file mode 100644
index 0000000..6d06ed4
--- /dev/null
+++ b/core-cube/src/main/java/org/apache/kylin/cube/gridtable/CuboidToGridTableMappingFilterNullCol.java
@@ -0,0 +1,143 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kylin.cube.gridtable;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import org.apache.kylin.common.util.BitSets;
+import org.apache.kylin.common.util.ImmutableBitSet;
+import org.apache.kylin.dimension.DimensionEncoding;
+import org.apache.kylin.dimension.IDimensionEncodingMap;
+import org.apache.kylin.metadata.datatype.DataType;
+import org.apache.kylin.metadata.model.FunctionDesc;
+import org.apache.kylin.metadata.model.TblColRef;
+
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+
+
+public class CuboidToGridTableMappingFilterNullCol extends CuboidToGridTableMapping{
+
+    final private CuboidToGridTableMapping internalMapping;
+    private Collection<FunctionDesc> nullMetrics;
+    private Collection<TblColRef> nullDimensions;
+    private int nullMetricsNum;
+    private int nullDimensionsNum;
+    private ImmutableBitSet nullMetricsIdxSet;
+
+    public CuboidToGridTableMappingFilterNullCol(CuboidToGridTableMapping internalMapping, Collection<FunctionDesc> nullMetrics) {
+        this(internalMapping, nullMetrics, Collections.EMPTY_LIST);
+    }
+
+    private CuboidToGridTableMappingFilterNullCol(CuboidToGridTableMapping internalMapping, Collection<FunctionDesc> nullMetrics, Collection<TblColRef> nullDimensions) {
+        this.internalMapping = internalMapping;
+        this.nullMetrics = nullMetrics;
+        this.nullDimensions = nullDimensions;
+        this.nullMetricsNum = nullMetrics.size();
+        this.nullDimensionsNum = nullDimensions.size();
+        int[] nullMetricsIdx = internalMapping.getMetricsIndexes(nullMetrics);
+        this.nullMetricsIdxSet = new ImmutableBitSet(BitSets.valueOf(nullMetricsIdx));
+    }
+
+    @Override
+    public int getColumnCount() {
+        return internalMapping.getColumnCount() - nullMetricsNum - nullDimensionsNum;
+    }
+
+    @Override
+    public DataType[] getDataTypes() {
+        return internalMapping.getDataTypes();
+    }
+
+    @Override
+    // TODO-yuzhang filter null dimensions
+    public ImmutableBitSet getPrimaryKey() {
+        return internalMapping.getPrimaryKey();
+    }
+
+    @Override
+    public ImmutableBitSet[] getColumnBlocks() {
+        ImmutableBitSet[] result = internalMapping.getColumnBlocks();
+        result[0] = getPrimaryKey();
+        for (int i = 1; i < result.length; i++){
+            result[i] = result[i].andNot(nullMetricsIdxSet);
+        }
+        return result;
+    }
+
+    @Override
+    public int getIndexOf(TblColRef dimension) {
+        if (this.nullDimensions.contains(dimension)){
+            return -1;
+        }
+        return internalMapping.getIndexOf(dimension);
+    }
+
+    @Override
+    public int getIndexOf(FunctionDesc metric) {
+        int idx = internalMapping.getIndexOf(metric);
+        if (nullMetricsIdxSet.get(idx)){
+            return -1;
+        }else{
+            return idx;
+        }
+    }
+
+    @Override
+    public List<TblColRef> getCuboidDimensionsInGTOrder() {
+        List<TblColRef> dimsInGTOrder = Lists.newArrayList(internalMapping.getCuboidDimensionsInGTOrder());
+        dimsInGTOrder.removeAll(nullDimensions);
+        return dimsInGTOrder;
+    }
+
+    @Override
+    public DimensionEncoding[] getDimensionEncodings(IDimensionEncodingMap dimEncMap) {
+        List<TblColRef> dims = getCuboidDimensionsInGTOrder();
+        DimensionEncoding[] dimEncs = new DimensionEncoding[dims.size()];
+        for (int i = 0; i < dimEncs.length; i++) {
+            dimEncs[i] = dimEncMap.get(dims.get(i));
+        }
+        return dimEncs;
+    }
+
+    @Override
+    public Map<Integer, Integer> getDependentMetricsMap() {
+        return Collections.<Integer, Integer> emptyMap();
+    }
+
+    @Override
+    public Map<TblColRef, Integer> getDim2gt() {
+        Map<TblColRef, Integer> result = Maps.newHashMap(internalMapping.getDim2gt());
+        nullDimensions.stream().forEach(d -> result.remove(d));
+        return result;
+    }
+
+    @Override
+    public String[] makeAggrFuncs(Collection<FunctionDesc> metrics) {
+        List<FunctionDesc> metricList = Lists.newArrayListWithCapacity(metrics.size());
+        metrics.stream().forEach(m -> {
+            if (getIndexOf(m) >= 0){
+               metricList.add(m);
+            }
+        });
+        return super.makeAggrFuncs(metricList);
+    }
+}
diff --git a/core-storage/src/main/java/org/apache/kylin/storage/gtrecord/CubeScanRangePlanner.java b/core-storage/src/main/java/org/apache/kylin/storage/gtrecord/CubeScanRangePlanner.java
index 3095c8f..bd12209 100644
--- a/core-storage/src/main/java/org/apache/kylin/storage/gtrecord/CubeScanRangePlanner.java
+++ b/core-storage/src/main/java/org/apache/kylin/storage/gtrecord/CubeScanRangePlanner.java
@@ -27,8 +27,12 @@ import java.util.List;
 import java.util.Locale;
 import java.util.Map;
 import java.util.Set;
+import java.util.stream.Collectors;
 
 import org.apache.kylin.common.KylinConfig;
+import org.apache.kylin.common.MissingMeasureSegment;
+import org.apache.kylin.common.QueryContext;
+import org.apache.kylin.common.QueryContextFacade;
 import org.apache.kylin.common.debug.BackdoorToggles;
 import org.apache.kylin.common.util.ByteArray;
 import org.apache.kylin.common.util.ImmutableBitSet;
@@ -37,6 +41,7 @@ import org.apache.kylin.cube.common.FuzzyValueCombination;
 import org.apache.kylin.cube.cuboid.Cuboid;
 import org.apache.kylin.cube.gridtable.CubeGridTable;
 import org.apache.kylin.cube.gridtable.CuboidToGridTableMapping;
+import org.apache.kylin.cube.gridtable.CuboidToGridTableMappingFilterNullCol;
 import org.apache.kylin.cube.gridtable.RecordComparators;
 import org.apache.kylin.cube.gridtable.ScanRangePlannerBase;
 import org.apache.kylin.cube.kv.CubeDimEncMap;
@@ -48,10 +53,13 @@ import org.apache.kylin.gridtable.GTScanRequest;
 import org.apache.kylin.gridtable.GTScanRequestBuilder;
 import org.apache.kylin.gridtable.GTUtil;
 import org.apache.kylin.gridtable.IGTComparator;
+import org.apache.kylin.measure.MeasureInstance;
+import org.apache.kylin.measure.MeasureManager;
 import org.apache.kylin.metadata.expression.TupleExpression;
 import org.apache.kylin.metadata.filter.TupleFilter;
 import org.apache.kylin.metadata.model.DynamicFunctionDesc;
 import org.apache.kylin.metadata.model.FunctionDesc;
+import org.apache.kylin.metadata.model.MeasureDesc;
 import org.apache.kylin.metadata.model.TblColRef;
 import org.apache.kylin.storage.StorageContext;
 import org.slf4j.Logger;
@@ -88,7 +96,7 @@ public class CubeScanRangePlanner extends ScanRangePlannerBase {
         this.cubeDesc = cubeSegment.getCubeDesc();
         this.cuboid = cuboid;
 
-        final CuboidToGridTableMapping mapping = context.getMapping();
+        final CuboidToGridTableMapping mapping = filterNullGTColumn(context.getMapping(), this.cuboid, this.cubeDesc, this.cubeSegment);
 
         this.gtInfo = CubeGridTable.newGTInfo(cuboid, new CubeDimEncMap(cubeSegment), mapping);
 
@@ -387,4 +395,42 @@ public class CubeScanRangePlanner extends ScanRangePlannerBase {
         this.maxScanRanges = maxScanRanges;
     }
 
+    // filter measures(gtMetric) don't exist in this segment
+    private CuboidToGridTableMapping filterNullGTColumn(CuboidToGridTableMapping mapping, Cuboid cuboid, CubeDesc cubeDesc, CubeSegment cubeSegment) {
+        if (KylinConfig.getInstanceFromEnv().isEditableMetricCube()) {
+            List<FunctionDesc> nullMetrics = getNullMeasure(cubeDesc, cubeSegment);
+            return new CuboidToGridTableMappingFilterNullCol(mapping, nullMetrics);
+        } else {
+            return mapping;
+        }
+    }
+
+    // TODO-yuzhang find null metric according to the timestamp
+    private List<FunctionDesc> getNullMeasure(CubeDesc cubeDesc, CubeSegment cubeSegment) {
+        List<FunctionDesc> nullMetrics = Lists.newArrayListWithCapacity(cubeDesc.getMeasures().size());
+        MeasureManager measureManager = MeasureManager.getInstance(KylinConfig.getInstanceFromEnv());
+        Set<String> measuresOnSegment = measureManager.getMeasuresOnSegment(cubeSegment.getProject(), cubeSegment.getCubeDesc().getName(), cubeSegment.getName())
+                .stream()
+                .map(m -> m.getName())
+                .collect(Collectors.toSet());
+        MissingMeasureSegment mms = new MissingMeasureSegment(cubeDesc.getProject(), cubeDesc.getName(), cubeSegment.getName());
+        for (MeasureDesc measure : cubeDesc.getMeasures()){
+            if (!measuresOnSegment.contains(measure.getName())){
+                MeasureInstance measureInstance = measureManager.getMeasure(cubeDesc.getName(), measure.getName());
+                logger.debug("Current segment[{}-{}] doesn't have measure[{}], you can refresh this segment to caculate this measure. Measure[{}]'s segments: {}",
+                        cubeDesc.getName(), cubeSegment.getName(), measure.getName(), measure.getName(), measureInstance.getSegmentsName());
+                mms.getMissMeasures().add(measureInstance.getName());
+                nullMetrics.add(measure.getFunction());
+            }
+        }
+        if (mms.getMissMeasures().size() > 0) {
+            QueryContext ctx = QueryContextFacade.current();
+            if (ctx != null) {
+                ctx.getMissingMeasureSegments().add(mms);
+            }
+        }
+
+        return nullMetrics;
+    }
+
 }
diff --git a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v2/CubeHBaseRPC.java b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v2/CubeHBaseRPC.java
index 634a3cd..f030225 100644
--- a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v2/CubeHBaseRPC.java
+++ b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v2/CubeHBaseRPC.java
@@ -18,9 +18,10 @@
 
 package org.apache.kylin.storage.hbase.cube.v2;
 
-import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
+import java.util.Set;
+import java.util.stream.Collectors;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.HConstants;
@@ -49,6 +50,8 @@ import org.apache.kylin.gridtable.GTInfo;
 import org.apache.kylin.gridtable.GTRecord;
 import org.apache.kylin.gridtable.GTScanRange;
 import org.apache.kylin.gridtable.IGTStorage;
+import org.apache.kylin.measure.MeasureInstance;
+import org.apache.kylin.measure.MeasureManager;
 import org.apache.kylin.metadata.model.ISegment;
 import org.apache.kylin.storage.StorageContext;
 import org.apache.kylin.storage.hbase.HBaseConnection;
@@ -202,17 +205,23 @@ public abstract class CubeHBaseRPC implements IGTStorage {
 
         int colBlkIndex = 1;
         int metricOffset = fullGTInfo.getPrimaryKey().trueBitCount();
-
+        Set<String> measuresOnSegment = MeasureManager.getInstance(cubeSeg.getConfig()).getMeasuresOnSegment(cubeSeg.getProject(), cubeSeg.getCubeDesc().getName(), cubeSeg.getName())
+                .stream()
+                .map(MeasureInstance::getName)
+                .collect(Collectors.toSet());
         HBaseMappingDesc hbaseMapping = cubeSeg.getCubeDesc().getHbaseMapping();
         for (HBaseColumnFamilyDesc familyDesc : hbaseMapping.getColumnFamily()) {
             for (HBaseColumnDesc hbaseColDesc : familyDesc.getColumns()) {
                 if (selectedColBlocks.get(colBlkIndex)) {
                     int[] metricIndexes = hbaseColDesc.getMeasureIndex();
-                    Integer[] gtIndexes = new Integer[metricIndexes.length];
-                    for (int i = 0; i < gtIndexes.length; i++) {
-                        gtIndexes[i] = metricIndexes[i] + metricOffset;
+                    List<Integer> gtIndexList = Lists.newArrayListWithCapacity(metricIndexes.length);
+                    for (int i = 0; i < metricIndexes.length; i++) {
+                        String measureName = cubeSeg.getCubeDesc().getMeasures().get(metricIndexes[i]).getName();
+                        if (measuresOnSegment.contains(measureName)) {
+                            gtIndexList.add(metricIndexes[i] + metricOffset);
+                        }
                     }
-                    ret.add(Arrays.asList(gtIndexes));
+                    ret.add(gtIndexList);
                 }
                 colBlkIndex++;
             }