You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by sh...@apache.org on 2015/05/15 05:06:48 UTC
[36/50] [abbrv] incubator-kylin git commit: KYLIN-728 use more
accurate memory estimation to reduce disk io, accelerate build speed
KYLIN-728 use more accurate memory estimation to reduce disk io, accelerate build speed
Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/9e58ae87
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/9e58ae87
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/9e58ae87
Branch: refs/heads/streaming-localdict
Commit: 9e58ae8795bfc44b38f04dff47f0d9be4c221e02
Parents: 0d87e8f
Author: qianhao.zhou <qi...@ebay.com>
Authored: Tue May 12 16:46:03 2015 +0800
Committer: qianhao.zhou <qi...@ebay.com>
Committed: Tue May 12 16:46:03 2015 +0800
----------------------------------------------------------------------
.../job/hadoop/cubev2/InMemCubeBuilder.java | 40 +++++++++-----------
.../kylin/storage/gridtable/GTComboStore.java | 2 -
.../storage/gridtable/diskstore/FileSystem.java | 2 +
.../gridtable/diskstore/GTDiskStore.java | 9 ++++-
.../gridtable/diskstore/HadoopFileSystem.java | 14 ++++++-
.../gridtable/diskstore/LocalFileSystem.java | 12 +++++-
6 files changed, 48 insertions(+), 31 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/9e58ae87/job/src/main/java/org/apache/kylin/job/hadoop/cubev2/InMemCubeBuilder.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cubev2/InMemCubeBuilder.java b/job/src/main/java/org/apache/kylin/job/hadoop/cubev2/InMemCubeBuilder.java
index 87ad2d3..f869caa 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cubev2/InMemCubeBuilder.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cubev2/InMemCubeBuilder.java
@@ -69,6 +69,7 @@ import java.util.concurrent.BlockingQueue;
@SuppressWarnings("rawtypes")
public class InMemCubeBuilder implements Runnable {
+ //estimation of (size of aggregation cache) / (size of mem store)
private static final double AGGREGATION_CACHE_FACTOR = 3;
private static Logger logger = LoggerFactory.getLogger(InMemCubeBuilder.class);
@@ -380,48 +381,41 @@ public class InMemCubeBuilder implements Runnable {
record.setValues(recordValues);
}
- private boolean checkMemory(long threshold) {
+ private long checkMemory(long threshold) {
final long freeMemory = Runtime.getRuntime().freeMemory();
- logger.info("available memory:" + (freeMemory>>10) + " KB");
- if (freeMemory >= threshold) {
- logger.info("no need to flush to disk");
- return true;
- } else {
- return false;
- }
+ logger.info("available memory:" + (freeMemory >> 10) + " KB, memory needed:" + (threshold >> 10) + " KB");
+ return freeMemory - threshold;
}
private boolean gc(TreeNode<GridTable> parentNode) {
final long parentCuboidMem = SizeOfUtil.deepSizeOf(parentNode.data.getStore());
long threshold = (long) (parentCuboidMem * (AGGREGATION_CACHE_FACTOR + 1));
- logger.info((threshold >> 10) + " KB is needed to create " + parentNode.id + "'s child");
- if (checkMemory(threshold)) {
- return true;
- }
final List<TreeNode<GridTable>> gridTables = parentNode.getAncestorList();
+ long memoryLeft = checkMemory(threshold);
for (TreeNode<GridTable> gridTable : gridTables) {
- logger.info("wait 10 seconds for gc");
- try {
- Thread.sleep(10 * 1000);
- } catch (InterruptedException e) {
- logger.error("this should not happen", e);
- }
- if (checkMemory(threshold)) {
+ if (memoryLeft >= 0) {
return true;
} else {
logger.info("memory is low, try to select one node to flush to disk from:" + StringUtils.join(",", gridTables));
final IGTStore store = gridTable.data.getStore();
assert store instanceof GTComboStore;
if (store.memoryUsage() > 0) {
- logger.info("cuboid id:" + gridTable.id + " selected, memory used:" + (SizeOfUtil.deepSizeOf(store)>>10) + " KB");
+ final long storeSize = SizeOfUtil.deepSizeOf(store);
+ memoryLeft += storeSize;
+ logger.info("cuboid id:" + gridTable.id + " selected, memory used:" + (storeSize >> 10) + " KB");
long t = System.currentTimeMillis();
((GTComboStore) store).switchToDiskStore();
logger.info("switch to disk store cost:" + (System.currentTimeMillis() - t) + "ms");
}
}
}
- logger.info("no store has been flushed to disk");
- return true;
+ if (memoryLeft >= 0) {
+ return true;
+ } else {
+ logger.warn("all ancestor nodes of " + parentNode.id + " has been flushed to disk, memory is still insufficient, usually due to jvm gc not finished, forced to use memory store");
+ return true;
+ }
+
}
private void createNDCuboidGT(SimpleGridTableTree parentNode, long parentCuboidId, long cuboidId) throws IOException {
@@ -431,7 +425,7 @@ public class InMemCubeBuilder implements Runnable {
if (parentNode.data.getStore().memoryUsage() <= 0) {
long t = System.currentTimeMillis();
((GTComboStore) parentNode.data.getStore()).switchToMemStore();
- logger.info("switch to mem store cost:" + (System.currentTimeMillis() - t) + "ms");
+ logger.info("node " + parentNode.id + " switch to mem store cost:" + (System.currentTimeMillis() - t) + "ms");
}
boolean inMem = gc(parentNode);
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/9e58ae87/storage/src/main/java/org/apache/kylin/storage/gridtable/GTComboStore.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTComboStore.java b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTComboStore.java
index c7d0c2b..a6476ca 100644
--- a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTComboStore.java
+++ b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTComboStore.java
@@ -62,7 +62,6 @@ public class GTComboStore implements IGTStore {
logger.error("fail to switch to mem store", e);
throw new RuntimeException(e);
}
- logger.info("switch to mem store");
}
public void switchToDiskStore() {
@@ -77,7 +76,6 @@ public class GTComboStore implements IGTStore {
logger.error("fail to switch to disk store", e);
throw new RuntimeException(e);
}
- logger.info("switch to disk store");
}
@Override
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/9e58ae87/storage/src/main/java/org/apache/kylin/storage/gridtable/diskstore/FileSystem.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/gridtable/diskstore/FileSystem.java b/storage/src/main/java/org/apache/kylin/storage/gridtable/diskstore/FileSystem.java
index 2ab2c7e..bebc1a2 100644
--- a/storage/src/main/java/org/apache/kylin/storage/gridtable/diskstore/FileSystem.java
+++ b/storage/src/main/java/org/apache/kylin/storage/gridtable/diskstore/FileSystem.java
@@ -12,6 +12,8 @@ interface FileSystem {
boolean delete(String path);
+ void deleteOnExit(String path);
+
boolean createDirectory(String path);
boolean createFile(String path);
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/9e58ae87/storage/src/main/java/org/apache/kylin/storage/gridtable/diskstore/GTDiskStore.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/gridtable/diskstore/GTDiskStore.java b/storage/src/main/java/org/apache/kylin/storage/gridtable/diskstore/GTDiskStore.java
index f48fce3..fec0d13 100644
--- a/storage/src/main/java/org/apache/kylin/storage/gridtable/diskstore/GTDiskStore.java
+++ b/storage/src/main/java/org/apache/kylin/storage/gridtable/diskstore/GTDiskStore.java
@@ -33,6 +33,7 @@ public class GTDiskStore implements IGTStore {
this.identifier = generateIdentifier(fileSystem);
logger.info("disk store created, identifier:" + identifier);
this.writer = new DiskStoreWriter(fileSystem.getWriter(getRowBlockFile(identifier)));
+ deleteTmpFilesOnExit();
}
private String generateIdentifier(FileSystem fs) {
@@ -153,8 +154,12 @@ public class GTDiskStore implements IGTStore {
} catch (Exception e) {
logger.error("error to close writer", e);
}
- fileSystem.delete(getRowBlockFile(identifier));
- fileSystem.delete(getRootDirectory(identifier));
+ deleteTmpFilesOnExit();
+ }
+
+ private void deleteTmpFilesOnExit() {
+ fileSystem.deleteOnExit(getRowBlockFile(identifier));
+ fileSystem.deleteOnExit(getRootDirectory(identifier));
}
}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/9e58ae87/storage/src/main/java/org/apache/kylin/storage/gridtable/diskstore/HadoopFileSystem.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/gridtable/diskstore/HadoopFileSystem.java b/storage/src/main/java/org/apache/kylin/storage/gridtable/diskstore/HadoopFileSystem.java
index e1efd1b..7a6450d 100644
--- a/storage/src/main/java/org/apache/kylin/storage/gridtable/diskstore/HadoopFileSystem.java
+++ b/storage/src/main/java/org/apache/kylin/storage/gridtable/diskstore/HadoopFileSystem.java
@@ -12,13 +12,13 @@ import java.io.OutputStream;
/**
* Created by qianzhou on 5/6/15.
*/
-public class HadoopFileSystem implements FileSystem {
+class HadoopFileSystem implements FileSystem {
private static final Logger logger = LoggerFactory.getLogger(HadoopFileSystem.class);
final org.apache.hadoop.fs.FileSystem fileSystem;
- public HadoopFileSystem() {
+ HadoopFileSystem() {
try {
fileSystem = org.apache.hadoop.fs.FileSystem.get(HadoopUtil.getCurrentConfiguration());
} catch (IOException e) {
@@ -47,6 +47,16 @@ public class HadoopFileSystem implements FileSystem {
}
@Override
+ public void deleteOnExit(String path) {
+ try {
+ fileSystem.deleteOnExit(new Path(path));
+ } catch (IOException e) {
+ logger.error("error deleteOnExit, path:" + path, e);
+ throw new RuntimeException(e);
+ }
+ }
+
+ @Override
public boolean createDirectory(String path) {
try {
return fileSystem.mkdirs(new Path(path));
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/9e58ae87/storage/src/main/java/org/apache/kylin/storage/gridtable/diskstore/LocalFileSystem.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/gridtable/diskstore/LocalFileSystem.java b/storage/src/main/java/org/apache/kylin/storage/gridtable/diskstore/LocalFileSystem.java
index 1c14e3f..d512552 100644
--- a/storage/src/main/java/org/apache/kylin/storage/gridtable/diskstore/LocalFileSystem.java
+++ b/storage/src/main/java/org/apache/kylin/storage/gridtable/diskstore/LocalFileSystem.java
@@ -11,6 +11,9 @@ import java.io.*;
class LocalFileSystem implements FileSystem {
private static Logger logger = LoggerFactory.getLogger(LocalFileSystem.class);
+
+ LocalFileSystem(){}
+
@Override
public boolean checkExistence(String path) {
return new File(path).exists();
@@ -22,6 +25,11 @@ class LocalFileSystem implements FileSystem {
}
@Override
+ public void deleteOnExit(String path) {
+ new File(path).deleteOnExit();
+ }
+
+ @Override
public boolean createDirectory(String path) {
return new File(path).mkdirs();
}
@@ -42,7 +50,7 @@ class LocalFileSystem implements FileSystem {
return new FileOutputStream(path);
} catch (FileNotFoundException e) {
//should not happen
- logger.error("path:" + path + " nout found");
+ logger.error("path:" + path + " out found");
throw new RuntimeException(e);
}
}
@@ -53,7 +61,7 @@ class LocalFileSystem implements FileSystem {
return new FileInputStream(path);
} catch (FileNotFoundException e) {
//should not happen
- logger.error("path:" + path + " nout found");
+ logger.error("path:" + path + " out found");
throw new RuntimeException(e);
}
}