You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by ma...@apache.org on 2015/12/16 11:08:00 UTC

kylin git commit: KYLIN-1237 Revisit on cube size estimation

Repository: kylin
Updated Branches:
  refs/heads/2.0-rc 996bbe221 -> e6b55540a


KYLIN-1237 Revisit on cube size estimation


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/e6b55540
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/e6b55540
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/e6b55540

Branch: refs/heads/2.0-rc
Commit: e6b55540a3624cca82c2b01d07877e28cff839c0
Parents: 996bbe2
Author: honma <ho...@ebay.com>
Authored: Wed Dec 16 18:10:09 2015 +0800
Committer: honma <ho...@ebay.com>
Committed: Wed Dec 16 18:13:30 2015 +0800

----------------------------------------------------------------------
 .../measure/serializer/HLLCSerializer.java      | 194 +++++++++----------
 .../storage/hbase/steps/CreateHTableJob.java    |  14 +-
 2 files changed, 107 insertions(+), 101 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kylin/blob/e6b55540/core-metadata/src/main/java/org/apache/kylin/metadata/measure/serializer/HLLCSerializer.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/metadata/measure/serializer/HLLCSerializer.java b/core-metadata/src/main/java/org/apache/kylin/metadata/measure/serializer/HLLCSerializer.java
index 7aed458..ad7cdf9 100644
--- a/core-metadata/src/main/java/org/apache/kylin/metadata/measure/serializer/HLLCSerializer.java
+++ b/core-metadata/src/main/java/org/apache/kylin/metadata/measure/serializer/HLLCSerializer.java
@@ -1,97 +1,97 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
-*/
-
-package org.apache.kylin.metadata.measure.serializer;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-
-import org.apache.kylin.common.hll.HyperLogLogPlusCounter;
-import org.apache.kylin.metadata.model.DataType;
-
-/**
- * @author yangli9
- * 
- */
-public class HLLCSerializer extends DataTypeSerializer<HyperLogLogPlusCounter> {
-
-    // be thread-safe and avoid repeated obj creation
-    private ThreadLocal<HyperLogLogPlusCounter> current = new ThreadLocal<HyperLogLogPlusCounter>();
-
-    private int precision;
-
-    public HLLCSerializer(DataType type) {
-        this.precision = type.getPrecision();
-    }
-
-    @Override
-    public void serialize(HyperLogLogPlusCounter value, ByteBuffer out) {
-        try {
-            value.writeRegisters(out);
-        } catch (IOException e) {
-            throw new RuntimeException(e);
-        }
-    }
-
-    private HyperLogLogPlusCounter current() {
-        HyperLogLogPlusCounter hllc = current.get();
-        if (hllc == null) {
-            hllc = new HyperLogLogPlusCounter(precision);
-            current.set(hllc);
-        }
-        return hllc;
-    }
-
-    @Override
-    public HyperLogLogPlusCounter deserialize(ByteBuffer in) {
-        HyperLogLogPlusCounter hllc = current();
-        try {
-            hllc.readRegisters(in);
-        } catch (IOException e) {
-            throw new RuntimeException(e);
-        }
-        return hllc;
-    }
-
-    @Override
-    public int peekLength(ByteBuffer in) {
-        return current().peekLength(in);
-    }
-
-    @Override
-    public int maxLength() {
-        return current().maxLength();
-    }
-
-    @Override
-    public int getStorageBytesEstimate() {
-        return current().maxLength();
-    }
-
-    @Override
-    public HyperLogLogPlusCounter valueOf(byte[] value) {
-        HyperLogLogPlusCounter hllc = current();
-        hllc.clear();
-        if (value == null)
-            hllc.add("__nUlL__");
-        else
-            hllc.add(value);
-        return hllc;
-    }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.metadata.measure.serializer;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+import org.apache.kylin.common.hll.HyperLogLogPlusCounter;
+import org.apache.kylin.metadata.model.DataType;
+
+/**
+ * @author yangli9
+ * 
+ */
+public class HLLCSerializer extends DataTypeSerializer<HyperLogLogPlusCounter> {
+
+    // be thread-safe and avoid repeated obj creation
+    private ThreadLocal<HyperLogLogPlusCounter> current = new ThreadLocal<HyperLogLogPlusCounter>();
+
+    private int precision;
+
+    public HLLCSerializer(DataType type) {
+        this.precision = type.getPrecision();
+    }
+
+    @Override
+    public void serialize(HyperLogLogPlusCounter value, ByteBuffer out) {
+        try {
+            value.writeRegisters(out);
+        } catch (IOException e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    private HyperLogLogPlusCounter current() {
+        HyperLogLogPlusCounter hllc = current.get();
+        if (hllc == null) {
+            hllc = new HyperLogLogPlusCounter(precision);
+            current.set(hllc);
+        }
+        return hllc;
+    }
+
+    @Override
+    public HyperLogLogPlusCounter deserialize(ByteBuffer in) {
+        HyperLogLogPlusCounter hllc = current();
+        try {
+            hllc.readRegisters(in);
+        } catch (IOException e) {
+            throw new RuntimeException(e);
+        }
+        return hllc;
+    }
+
+    @Override
+    public int peekLength(ByteBuffer in) {
+        return current().peekLength(in);
+    }
+
+    @Override
+    public int maxLength() {
+        return current().maxLength();
+    }
+
+    @Override
+    public int getStorageBytesEstimate() {
+        return current().maxLength();
+    }
+
+    @Override
+    public HyperLogLogPlusCounter valueOf(byte[] value) {
+        HyperLogLogPlusCounter hllc = current();
+        hllc.clear();
+        if (value == null)
+            hllc.add("__nUlL__");
+        else
+            hllc.add(value);
+        return hllc;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/kylin/blob/e6b55540/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/CreateHTableJob.java
----------------------------------------------------------------------
diff --git a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/CreateHTableJob.java b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/CreateHTableJob.java
index e8b21d3..62554b2 100644
--- a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/CreateHTableJob.java
+++ b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/CreateHTableJob.java
@@ -372,18 +372,24 @@ public class CreateHTableJob extends AbstractHadoopJob {
 
         // add the measure length
         int space = 0;
+        boolean isMemoryHungry = false;
         for (MeasureDesc measureDesc : cubeSegment.getCubeDesc().getMeasures()) {
             DataType returnType = measureDesc.getFunction().getReturnDataType();
             if (returnType.isHLLC()) {
-                // for HLL, it will be compressed when export to bytes
-                space += returnType.getStorageBytesEstimate() * 0.75;
-            } else {
-                space += returnType.getStorageBytesEstimate();
+                isMemoryHungry = true;
             }
+            space += returnType.getStorageBytesEstimate();
         }
         bytesLength += space;
 
         double ret = 1.0 * bytesLength * rowCount / (1024L * 1024L);
+        if (isMemoryHungry) {
+            logger.info("Cube is memory hungry, storage size estimation multiply 0.05");
+            ret *= 0.05;
+        } else {
+            logger.info("Cube is not memory hungry, storage size estimation multiply 0.25");
+            ret *= 0.25;
+        }
         logger.info("Cuboid " + cuboidId + " has " + rowCount + " rows, each row size is " + bytesLength + " bytes." + " Total size is " + ret + "M.");
         return ret;
     }