You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by ma...@apache.org on 2015/12/16 11:08:00 UTC
kylin git commit: KYLIN-1237 Revisit on cube size estimation
Repository: kylin
Updated Branches:
refs/heads/2.0-rc 996bbe221 -> e6b55540a
KYLIN-1237 Revisit on cube size estimation
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/e6b55540
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/e6b55540
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/e6b55540
Branch: refs/heads/2.0-rc
Commit: e6b55540a3624cca82c2b01d07877e28cff839c0
Parents: 996bbe2
Author: honma <ho...@ebay.com>
Authored: Wed Dec 16 18:10:09 2015 +0800
Committer: honma <ho...@ebay.com>
Committed: Wed Dec 16 18:13:30 2015 +0800
----------------------------------------------------------------------
.../measure/serializer/HLLCSerializer.java | 194 +++++++++----------
.../storage/hbase/steps/CreateHTableJob.java | 14 +-
2 files changed, 107 insertions(+), 101 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kylin/blob/e6b55540/core-metadata/src/main/java/org/apache/kylin/metadata/measure/serializer/HLLCSerializer.java
----------------------------------------------------------------------
diff --git a/core-metadata/src/main/java/org/apache/kylin/metadata/measure/serializer/HLLCSerializer.java b/core-metadata/src/main/java/org/apache/kylin/metadata/measure/serializer/HLLCSerializer.java
index 7aed458..ad7cdf9 100644
--- a/core-metadata/src/main/java/org/apache/kylin/metadata/measure/serializer/HLLCSerializer.java
+++ b/core-metadata/src/main/java/org/apache/kylin/metadata/measure/serializer/HLLCSerializer.java
@@ -1,97 +1,97 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
-*/
-
-package org.apache.kylin.metadata.measure.serializer;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-
-import org.apache.kylin.common.hll.HyperLogLogPlusCounter;
-import org.apache.kylin.metadata.model.DataType;
-
-/**
- * @author yangli9
- *
- */
-public class HLLCSerializer extends DataTypeSerializer<HyperLogLogPlusCounter> {
-
- // be thread-safe and avoid repeated obj creation
- private ThreadLocal<HyperLogLogPlusCounter> current = new ThreadLocal<HyperLogLogPlusCounter>();
-
- private int precision;
-
- public HLLCSerializer(DataType type) {
- this.precision = type.getPrecision();
- }
-
- @Override
- public void serialize(HyperLogLogPlusCounter value, ByteBuffer out) {
- try {
- value.writeRegisters(out);
- } catch (IOException e) {
- throw new RuntimeException(e);
- }
- }
-
- private HyperLogLogPlusCounter current() {
- HyperLogLogPlusCounter hllc = current.get();
- if (hllc == null) {
- hllc = new HyperLogLogPlusCounter(precision);
- current.set(hllc);
- }
- return hllc;
- }
-
- @Override
- public HyperLogLogPlusCounter deserialize(ByteBuffer in) {
- HyperLogLogPlusCounter hllc = current();
- try {
- hllc.readRegisters(in);
- } catch (IOException e) {
- throw new RuntimeException(e);
- }
- return hllc;
- }
-
- @Override
- public int peekLength(ByteBuffer in) {
- return current().peekLength(in);
- }
-
- @Override
- public int maxLength() {
- return current().maxLength();
- }
-
- @Override
- public int getStorageBytesEstimate() {
- return current().maxLength();
- }
-
- @Override
- public HyperLogLogPlusCounter valueOf(byte[] value) {
- HyperLogLogPlusCounter hllc = current();
- hllc.clear();
- if (value == null)
- hllc.add("__nUlL__");
- else
- hllc.add(value);
- return hllc;
- }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.metadata.measure.serializer;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+import org.apache.kylin.common.hll.HyperLogLogPlusCounter;
+import org.apache.kylin.metadata.model.DataType;
+
+/**
+ * @author yangli9
+ *
+ */
+public class HLLCSerializer extends DataTypeSerializer<HyperLogLogPlusCounter> {
+
+ // be thread-safe and avoid repeated obj creation
+ private ThreadLocal<HyperLogLogPlusCounter> current = new ThreadLocal<HyperLogLogPlusCounter>();
+
+ private int precision;
+
+ public HLLCSerializer(DataType type) {
+ this.precision = type.getPrecision();
+ }
+
+ @Override
+ public void serialize(HyperLogLogPlusCounter value, ByteBuffer out) {
+ try {
+ value.writeRegisters(out);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ private HyperLogLogPlusCounter current() {
+ HyperLogLogPlusCounter hllc = current.get();
+ if (hllc == null) {
+ hllc = new HyperLogLogPlusCounter(precision);
+ current.set(hllc);
+ }
+ return hllc;
+ }
+
+ @Override
+ public HyperLogLogPlusCounter deserialize(ByteBuffer in) {
+ HyperLogLogPlusCounter hllc = current();
+ try {
+ hllc.readRegisters(in);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ return hllc;
+ }
+
+ @Override
+ public int peekLength(ByteBuffer in) {
+ return current().peekLength(in);
+ }
+
+ @Override
+ public int maxLength() {
+ return current().maxLength();
+ }
+
+ @Override
+ public int getStorageBytesEstimate() {
+ return current().maxLength();
+ }
+
+ @Override
+ public HyperLogLogPlusCounter valueOf(byte[] value) {
+ HyperLogLogPlusCounter hllc = current();
+ hllc.clear();
+ if (value == null)
+ hllc.add("__nUlL__");
+ else
+ hllc.add(value);
+ return hllc;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/kylin/blob/e6b55540/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/CreateHTableJob.java
----------------------------------------------------------------------
diff --git a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/CreateHTableJob.java b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/CreateHTableJob.java
index e8b21d3..62554b2 100644
--- a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/CreateHTableJob.java
+++ b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/CreateHTableJob.java
@@ -372,18 +372,24 @@ public class CreateHTableJob extends AbstractHadoopJob {
// add the measure length
int space = 0;
+ boolean isMemoryHungry = false;
for (MeasureDesc measureDesc : cubeSegment.getCubeDesc().getMeasures()) {
DataType returnType = measureDesc.getFunction().getReturnDataType();
if (returnType.isHLLC()) {
- // for HLL, it will be compressed when export to bytes
- space += returnType.getStorageBytesEstimate() * 0.75;
- } else {
- space += returnType.getStorageBytesEstimate();
+ isMemoryHungry = true;
}
+ space += returnType.getStorageBytesEstimate();
}
bytesLength += space;
double ret = 1.0 * bytesLength * rowCount / (1024L * 1024L);
+ if (isMemoryHungry) {
+ logger.info("Cube is memory hungry, storage size estimation multiply 0.05");
+ ret *= 0.05;
+ } else {
+ logger.info("Cube is not memory hungry, storage size estimation multiply 0.25");
+ ret *= 0.25;
+ }
logger.info("Cuboid " + cuboidId + " has " + rowCount + " rows, each row size is " + bytesLength + " bytes." + " Total size is " + ret + "M.");
return ret;
}