You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by sh...@apache.org on 2017/02/14 08:45:22 UTC

kylin git commit: KYLIN-2243 TopN memory estimation is inaccurate in some cases

Repository: kylin
Updated Branches:
  refs/heads/master c93ea9c9c -> ab657d1f7


KYLIN-2243 TopN memory estimation is inaccurate in some cases


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/ab657d1f
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/ab657d1f
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/ab657d1f

Branch: refs/heads/master
Commit: ab657d1f77a0a5edd0e6b12f496b5c2d86c74849
Parents: c93ea9c
Author: shaofengshi <sh...@apache.org>
Authored: Tue Feb 14 16:45:16 2017 +0800
Committer: shaofengshi <sh...@apache.org>
Committed: Tue Feb 14 16:45:16 2017 +0800

----------------------------------------------------------------------
 .../org/apache/kylin/cube/CubeDescManager.java  | 54 ++++++++++++++++++++
 .../measure/topn/TopNCounterSerializer.java     | 10 +++-
 .../kylin/metadata/datatype/DataType.java       |  1 +
 .../kylin/metadata/model/FunctionDesc.java      |  4 ++
 .../template/cube_desc/kylin_sales_cube.json    |  2 +-
 .../localmeta/cube_desc/ci_inner_join_cube.json |  2 +-
 .../localmeta/cube_desc/ci_left_join_cube.json  |  2 +-
 7 files changed, 70 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kylin/blob/ab657d1f/core-cube/src/main/java/org/apache/kylin/cube/CubeDescManager.java
----------------------------------------------------------------------
diff --git a/core-cube/src/main/java/org/apache/kylin/cube/CubeDescManager.java b/core-cube/src/main/java/org/apache/kylin/cube/CubeDescManager.java
index 50312bf..dfc8f73 100644
--- a/core-cube/src/main/java/org/apache/kylin/cube/CubeDescManager.java
+++ b/core-cube/src/main/java/org/apache/kylin/cube/CubeDescManager.java
@@ -21,8 +21,10 @@ package org.apache.kylin.cube;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Map;
 import java.util.concurrent.ConcurrentHashMap;
 
+import org.apache.commons.lang3.StringUtils;
 import org.apache.kylin.common.KylinConfig;
 import org.apache.kylin.common.persistence.JsonSerializer;
 import org.apache.kylin.common.persistence.ResourceStore;
@@ -31,10 +33,17 @@ import org.apache.kylin.cube.cuboid.Cuboid;
 import org.apache.kylin.cube.model.CubeDesc;
 import org.apache.kylin.cube.model.validation.CubeMetadataValidator;
 import org.apache.kylin.cube.model.validation.ValidateContext;
+import org.apache.kylin.dimension.DictionaryDimEnc;
+import org.apache.kylin.dimension.DimensionEncoding;
+import org.apache.kylin.dimension.DimensionEncodingFactory;
+import org.apache.kylin.measure.topn.TopNMeasureType;
 import org.apache.kylin.metadata.MetadataConstants;
 import org.apache.kylin.metadata.cachesync.Broadcaster;
 import org.apache.kylin.metadata.cachesync.Broadcaster.Event;
 import org.apache.kylin.metadata.cachesync.CaseInsensitiveStringCache;
+import org.apache.kylin.metadata.datatype.DataType;
+import org.apache.kylin.metadata.model.MeasureDesc;
+import org.apache.kylin.metadata.model.ParameterDesc;
 import org.apache.kylin.metadata.project.ProjectInstance;
 import org.apache.kylin.metadata.project.ProjectManager;
 import org.apache.kylin.metadata.realization.IRealization;
@@ -207,6 +216,7 @@ public class CubeDescManager {
             logger.warn("Broken cube desc " + cubeDesc, e);
             cubeDesc.addError(e.getMessage());
         }
+        postProcessCubeDesc(cubeDesc);
         // Check base validation
         if (!cubeDesc.getError().isEmpty()) {
             return cubeDesc;
@@ -227,6 +237,49 @@ public class CubeDescManager {
         return cubeDesc;
     }
 
+
+    /**
+     * if there is some change need be applied after getting a cubeDesc from front-end, do it here
+     * @param cubeDesc
+     */
+    private void postProcessCubeDesc(CubeDesc cubeDesc) {
+        for (MeasureDesc measureDesc : cubeDesc.getMeasures()) {
+            if (TopNMeasureType.FUNC_TOP_N.equalsIgnoreCase(measureDesc.getFunction().getExpression())) {
+                // update return type scale with the estimated key length
+                Map<String, String> configuration = measureDesc.getFunction().getConfiguration();
+                ParameterDesc parameter = measureDesc.getFunction().getParameter();
+                parameter = parameter.getNextParameter();
+                int keyLength = 0;
+                while (parameter != null) {
+                    String encoding = configuration.get(TopNMeasureType.CONFIG_ENCODING_PREFIX + parameter.getValue());
+                    String encodingVersionStr = configuration.get(TopNMeasureType.CONFIG_ENCODING_VERSION_PREFIX + parameter.getValue());
+                    if (StringUtils.isEmpty(encoding) || DictionaryDimEnc.ENCODING_NAME.equals(encoding)) {
+                        keyLength += 6; // estimation for dict encoding
+                    } else {
+                        // non-dict encoding
+                        int encodingVersion = 1;
+                        if (!StringUtils.isEmpty(encodingVersionStr)) {
+                            try {
+                                encodingVersion = Integer.parseInt(encodingVersionStr);
+                            } catch (NumberFormatException e) {
+                                throw new RuntimeException("invalid encoding version: " + encodingVersionStr);
+                            }
+                        }
+                        Object[] encodingConf = DimensionEncoding.parseEncodingConf(encoding);
+                        DimensionEncoding dimensionEncoding = DimensionEncodingFactory.create((String) encodingConf[0], (String[]) encodingConf[1], encodingVersion);
+                        keyLength += dimensionEncoding.getLengthOfEncoding();
+                    }
+
+                    parameter = parameter.getNextParameter();
+                }
+
+                DataType returnType = DataType.getType(measureDesc.getFunction().getReturnType());
+                DataType newReturnType = new DataType(returnType.getName(), returnType.getPrecision(), keyLength);
+                measureDesc.getFunction().setReturnType(newReturnType.toString());
+            }
+        }
+    }
+
     // remove cubeDesc
     public void removeCubeDesc(CubeDesc cubeDesc) throws IOException {
         String path = cubeDesc.getResourcePath();
@@ -291,6 +344,7 @@ public class CubeDescManager {
             return desc;
         }
 
+        postProcessCubeDesc(desc);
         // Semantic validation
         CubeMetadataValidator validator = new CubeMetadataValidator();
         ValidateContext context = validator.validate(desc);

http://git-wip-us.apache.org/repos/asf/kylin/blob/ab657d1f/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNCounterSerializer.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNCounterSerializer.java b/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNCounterSerializer.java
index cef9177..c35bd30 100644
--- a/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNCounterSerializer.java
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNCounterSerializer.java
@@ -35,8 +35,14 @@ public class TopNCounterSerializer extends DataTypeSerializer<TopNCounter<ByteAr
 
     private int precision;
 
+    private int scale;
+
     public TopNCounterSerializer(DataType dataType) {
         this.precision = dataType.getPrecision();
+        this.scale = dataType.getScale();
+        if (scale < 0) {
+            scale = 6;
+        }
     }
 
     @Override
@@ -54,12 +60,12 @@ public class TopNCounterSerializer extends DataTypeSerializer<TopNCounter<ByteAr
 
     @Override
     public int maxLength() {
-        return Math.max(precision * TopNCounter.EXTRA_SPACE_RATE * (4 + 8), 1024 * 1024); // use at least 1M
+        return Math.max(precision * TopNCounter.EXTRA_SPACE_RATE * (scale + 8), 1024 * 1024); // use at least 1M
     }
 
     @Override
     public int getStorageBytesEstimate() {
-        return precision * TopNCounter.EXTRA_SPACE_RATE * 8;
+        return precision * TopNCounter.EXTRA_SPACE_RATE * (scale + 8);
     }
 
     @Override

http://git-wip-us.apache.org/repos/asf/kylin/blob/ab657d1f/core-metadata/src/main/java/org/apache/kylin/metadata/datatype/DataType.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/metadata/datatype/DataType.java b/core-metadata/src/main/java/org/apache/kylin/metadata/datatype/DataType.java
index d3756b8..83b2391 100644
--- a/core-metadata/src/main/java/org/apache/kylin/metadata/datatype/DataType.java
+++ b/core-metadata/src/main/java/org/apache/kylin/metadata/datatype/DataType.java
@@ -186,6 +186,7 @@ public class DataType implements Serializable {
             precision = 19;
             scale = 4;
         }
+
     }
 
     private String replaceLegacy(String str) {

http://git-wip-us.apache.org/repos/asf/kylin/blob/ab657d1f/core-metadata/src/main/java/org/apache/kylin/metadata/model/FunctionDesc.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/metadata/model/FunctionDesc.java b/core-metadata/src/main/java/org/apache/kylin/metadata/model/FunctionDesc.java
index a49d982..cbd7574 100644
--- a/core-metadata/src/main/java/org/apache/kylin/metadata/model/FunctionDesc.java
+++ b/core-metadata/src/main/java/org/apache/kylin/metadata/model/FunctionDesc.java
@@ -228,6 +228,10 @@ public class FunctionDesc implements Serializable {
         return returnType;
     }
 
+    public void setReturnType(String returnType) {
+        this.returnType = returnType;
+    }
+
     public DataType getReturnDataType() {
         return returnDataType;
     }

http://git-wip-us.apache.org/repos/asf/kylin/blob/ab657d1f/examples/sample_cube/template/cube_desc/kylin_sales_cube.json
----------------------------------------------------------------------
diff --git a/examples/sample_cube/template/cube_desc/kylin_sales_cube.json b/examples/sample_cube/template/cube_desc/kylin_sales_cube.json
index 2a27305..1ad73bb 100644
--- a/examples/sample_cube/template/cube_desc/kylin_sales_cube.json
+++ b/examples/sample_cube/template/cube_desc/kylin_sales_cube.json
@@ -159,7 +159,7 @@
           "next_parameter" : null
         }
       },
-      "returntype" : "topn(100)"
+      "returntype" : "topn(100, 6)"
     }
   } ],
   "rowkey" : {

http://git-wip-us.apache.org/repos/asf/kylin/blob/ab657d1f/examples/test_case_data/localmeta/cube_desc/ci_inner_join_cube.json
----------------------------------------------------------------------
diff --git a/examples/test_case_data/localmeta/cube_desc/ci_inner_join_cube.json b/examples/test_case_data/localmeta/cube_desc/ci_inner_join_cube.json
index 99013ce..7e14d51 100644
--- a/examples/test_case_data/localmeta/cube_desc/ci_inner_join_cube.json
+++ b/examples/test_case_data/localmeta/cube_desc/ci_inner_join_cube.json
@@ -182,7 +182,7 @@
           "value" : "TEST_KYLIN_FACT.SELLER_ID"
         }
       },
-      "returntype" : "topn(100)",
+      "returntype" : "topn(100, 4)",
       "configuration": {"topn.encoding.SELLER_ID" : "int:4"}
     }
   }, {

http://git-wip-us.apache.org/repos/asf/kylin/blob/ab657d1f/examples/test_case_data/localmeta/cube_desc/ci_left_join_cube.json
----------------------------------------------------------------------
diff --git a/examples/test_case_data/localmeta/cube_desc/ci_left_join_cube.json b/examples/test_case_data/localmeta/cube_desc/ci_left_join_cube.json
index 51139ae..b59d6df 100644
--- a/examples/test_case_data/localmeta/cube_desc/ci_left_join_cube.json
+++ b/examples/test_case_data/localmeta/cube_desc/ci_left_join_cube.json
@@ -182,7 +182,7 @@
           "value" : "TEST_KYLIN_FACT.SELLER_ID"
         }
       },
-      "returntype" : "topn(100)",
+      "returntype" : "topn(100, 4)",
       "configuration": {"topn.encoding.SELLER_ID" : "int:4"}
     }
   }, {