You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by sh...@apache.org on 2015/05/15 05:06:48 UTC

[36/50] [abbrv] incubator-kylin git commit: KYLIN-728 use more accurate memory estimation to reduce disk io, accelerate build speed

KYLIN-728 use more accurate memory estimation to reduce disk io, accelerate build speed


Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/9e58ae87
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/9e58ae87
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/9e58ae87

Branch: refs/heads/streaming-localdict
Commit: 9e58ae8795bfc44b38f04dff47f0d9be4c221e02
Parents: 0d87e8f
Author: qianhao.zhou <qi...@ebay.com>
Authored: Tue May 12 16:46:03 2015 +0800
Committer: qianhao.zhou <qi...@ebay.com>
Committed: Tue May 12 16:46:03 2015 +0800

----------------------------------------------------------------------
 .../job/hadoop/cubev2/InMemCubeBuilder.java     | 40 +++++++++-----------
 .../kylin/storage/gridtable/GTComboStore.java   |  2 -
 .../storage/gridtable/diskstore/FileSystem.java |  2 +
 .../gridtable/diskstore/GTDiskStore.java        |  9 ++++-
 .../gridtable/diskstore/HadoopFileSystem.java   | 14 ++++++-
 .../gridtable/diskstore/LocalFileSystem.java    | 12 +++++-
 6 files changed, 48 insertions(+), 31 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/9e58ae87/job/src/main/java/org/apache/kylin/job/hadoop/cubev2/InMemCubeBuilder.java
----------------------------------------------------------------------
diff --git a/job/src/main/java/org/apache/kylin/job/hadoop/cubev2/InMemCubeBuilder.java b/job/src/main/java/org/apache/kylin/job/hadoop/cubev2/InMemCubeBuilder.java
index 87ad2d3..f869caa 100644
--- a/job/src/main/java/org/apache/kylin/job/hadoop/cubev2/InMemCubeBuilder.java
+++ b/job/src/main/java/org/apache/kylin/job/hadoop/cubev2/InMemCubeBuilder.java
@@ -69,6 +69,7 @@ import java.util.concurrent.BlockingQueue;
 @SuppressWarnings("rawtypes")
 public class InMemCubeBuilder implements Runnable {
 
+    //estimation of (size of aggregation cache) / (size of mem store)
     private static final double AGGREGATION_CACHE_FACTOR = 3;
     private static Logger logger = LoggerFactory.getLogger(InMemCubeBuilder.class);
 
@@ -380,48 +381,41 @@ public class InMemCubeBuilder implements Runnable {
         record.setValues(recordValues);
     }
 
-    private boolean checkMemory(long threshold) {
+    private long checkMemory(long threshold) {
         final long freeMemory = Runtime.getRuntime().freeMemory();
-        logger.info("available memory:" + (freeMemory>>10) + " KB");
-        if (freeMemory >= threshold) {
-            logger.info("no need to flush to disk");
-            return true;
-        } else {
-            return false;
-        }
+        logger.info("available memory:" + (freeMemory >> 10) + " KB, memory needed:" + (threshold >> 10) + " KB");
+        return freeMemory - threshold;
     }
 
     private boolean gc(TreeNode<GridTable> parentNode) {
         final long parentCuboidMem = SizeOfUtil.deepSizeOf(parentNode.data.getStore());
         long threshold = (long) (parentCuboidMem * (AGGREGATION_CACHE_FACTOR + 1));
-        logger.info((threshold >> 10) + " KB is needed to create " + parentNode.id + "'s child");
-        if (checkMemory(threshold)) {
-            return true;
-        }
         final List<TreeNode<GridTable>> gridTables = parentNode.getAncestorList();
+        long memoryLeft = checkMemory(threshold);
         for (TreeNode<GridTable> gridTable : gridTables) {
-            logger.info("wait 10 seconds for gc");
-            try {
-                Thread.sleep(10 * 1000);
-            } catch (InterruptedException e) {
-                logger.error("this should not happen", e);
-            }
-            if (checkMemory(threshold)) {
+            if (memoryLeft >= 0) {
                 return true;
             } else {
                 logger.info("memory is low, try to select one node to flush to disk from:" + StringUtils.join(",", gridTables));
                 final IGTStore store = gridTable.data.getStore();
                 assert store instanceof GTComboStore;
                 if (store.memoryUsage() > 0) {
-                    logger.info("cuboid id:" + gridTable.id + " selected, memory used:" + (SizeOfUtil.deepSizeOf(store)>>10) + " KB");
+                    final long storeSize = SizeOfUtil.deepSizeOf(store);
+                    memoryLeft += storeSize;
+                    logger.info("cuboid id:" + gridTable.id + " selected, memory used:" + (storeSize >> 10) + " KB");
                     long t = System.currentTimeMillis();
                     ((GTComboStore) store).switchToDiskStore();
                     logger.info("switch to disk store cost:" + (System.currentTimeMillis() - t) + "ms");
                 }
             }
         }
-        logger.info("no store has been flushed to disk");
-        return true;
+        if (memoryLeft >= 0) {
+            return true;
+        } else {
+            logger.warn("all ancestor nodes of " + parentNode.id + " has been flushed to disk, memory is still insufficient, usually due to jvm gc not finished, forced to use memory store");
+            return true;
+        }
+
     }
 
     private void createNDCuboidGT(SimpleGridTableTree parentNode, long parentCuboidId, long cuboidId) throws IOException {
@@ -431,7 +425,7 @@ public class InMemCubeBuilder implements Runnable {
         if (parentNode.data.getStore().memoryUsage() <= 0) {
             long t = System.currentTimeMillis();
             ((GTComboStore) parentNode.data.getStore()).switchToMemStore();
-            logger.info("switch to mem store cost:" + (System.currentTimeMillis() - t) + "ms");
+            logger.info("node " + parentNode.id + " switch to mem store cost:" + (System.currentTimeMillis() - t) + "ms");
         }
 
         boolean inMem = gc(parentNode);

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/9e58ae87/storage/src/main/java/org/apache/kylin/storage/gridtable/GTComboStore.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTComboStore.java b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTComboStore.java
index c7d0c2b..a6476ca 100644
--- a/storage/src/main/java/org/apache/kylin/storage/gridtable/GTComboStore.java
+++ b/storage/src/main/java/org/apache/kylin/storage/gridtable/GTComboStore.java
@@ -62,7 +62,6 @@ public class GTComboStore implements IGTStore {
             logger.error("fail to switch to mem store", e);
             throw new RuntimeException(e);
         }
-        logger.info("switch to mem store");
     }
 
     public void switchToDiskStore() {
@@ -77,7 +76,6 @@ public class GTComboStore implements IGTStore {
             logger.error("fail to switch to disk store", e);
             throw new RuntimeException(e);
         }
-        logger.info("switch to disk store");
     }
 
     @Override

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/9e58ae87/storage/src/main/java/org/apache/kylin/storage/gridtable/diskstore/FileSystem.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/gridtable/diskstore/FileSystem.java b/storage/src/main/java/org/apache/kylin/storage/gridtable/diskstore/FileSystem.java
index 2ab2c7e..bebc1a2 100644
--- a/storage/src/main/java/org/apache/kylin/storage/gridtable/diskstore/FileSystem.java
+++ b/storage/src/main/java/org/apache/kylin/storage/gridtable/diskstore/FileSystem.java
@@ -12,6 +12,8 @@ interface FileSystem {
 
     boolean delete(String path);
 
+    void deleteOnExit(String path);
+
     boolean createDirectory(String path);
 
     boolean createFile(String path);

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/9e58ae87/storage/src/main/java/org/apache/kylin/storage/gridtable/diskstore/GTDiskStore.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/gridtable/diskstore/GTDiskStore.java b/storage/src/main/java/org/apache/kylin/storage/gridtable/diskstore/GTDiskStore.java
index f48fce3..fec0d13 100644
--- a/storage/src/main/java/org/apache/kylin/storage/gridtable/diskstore/GTDiskStore.java
+++ b/storage/src/main/java/org/apache/kylin/storage/gridtable/diskstore/GTDiskStore.java
@@ -33,6 +33,7 @@ public class GTDiskStore implements IGTStore {
         this.identifier = generateIdentifier(fileSystem);
         logger.info("disk store created, identifier:" + identifier);
         this.writer = new DiskStoreWriter(fileSystem.getWriter(getRowBlockFile(identifier)));
+        deleteTmpFilesOnExit();
     }
 
     private String generateIdentifier(FileSystem fs) {
@@ -153,8 +154,12 @@ public class GTDiskStore implements IGTStore {
         } catch (Exception e) {
             logger.error("error to close writer", e);
         }
-        fileSystem.delete(getRowBlockFile(identifier));
-        fileSystem.delete(getRootDirectory(identifier));
+        deleteTmpFilesOnExit();
+    }
+
+    private void deleteTmpFilesOnExit() {
+        fileSystem.deleteOnExit(getRowBlockFile(identifier));
+        fileSystem.deleteOnExit(getRootDirectory(identifier));
     }
 
 }

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/9e58ae87/storage/src/main/java/org/apache/kylin/storage/gridtable/diskstore/HadoopFileSystem.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/gridtable/diskstore/HadoopFileSystem.java b/storage/src/main/java/org/apache/kylin/storage/gridtable/diskstore/HadoopFileSystem.java
index e1efd1b..7a6450d 100644
--- a/storage/src/main/java/org/apache/kylin/storage/gridtable/diskstore/HadoopFileSystem.java
+++ b/storage/src/main/java/org/apache/kylin/storage/gridtable/diskstore/HadoopFileSystem.java
@@ -12,13 +12,13 @@ import java.io.OutputStream;
 /**
  * Created by qianzhou on 5/6/15.
  */
-public class HadoopFileSystem implements FileSystem {
+class HadoopFileSystem implements FileSystem {
 
     private static final Logger logger = LoggerFactory.getLogger(HadoopFileSystem.class);
 
     final org.apache.hadoop.fs.FileSystem fileSystem;
 
-    public HadoopFileSystem() {
+    HadoopFileSystem() {
         try {
             fileSystem = org.apache.hadoop.fs.FileSystem.get(HadoopUtil.getCurrentConfiguration());
         } catch (IOException e) {
@@ -47,6 +47,16 @@ public class HadoopFileSystem implements FileSystem {
     }
 
     @Override
+    public void deleteOnExit(String path) {
+        try {
+            fileSystem.deleteOnExit(new Path(path));
+        } catch (IOException e) {
+            logger.error("error deleteOnExit, path:" + path, e);
+            throw new RuntimeException(e);
+        }
+    }
+
+    @Override
     public boolean createDirectory(String path) {
         try {
             return fileSystem.mkdirs(new Path(path));

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/9e58ae87/storage/src/main/java/org/apache/kylin/storage/gridtable/diskstore/LocalFileSystem.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/gridtable/diskstore/LocalFileSystem.java b/storage/src/main/java/org/apache/kylin/storage/gridtable/diskstore/LocalFileSystem.java
index 1c14e3f..d512552 100644
--- a/storage/src/main/java/org/apache/kylin/storage/gridtable/diskstore/LocalFileSystem.java
+++ b/storage/src/main/java/org/apache/kylin/storage/gridtable/diskstore/LocalFileSystem.java
@@ -11,6 +11,9 @@ import java.io.*;
 class LocalFileSystem implements FileSystem {
 
     private static Logger logger = LoggerFactory.getLogger(LocalFileSystem.class);
+
+    LocalFileSystem(){}
+
     @Override
     public boolean checkExistence(String path) {
         return new File(path).exists();
@@ -22,6 +25,11 @@ class LocalFileSystem implements FileSystem {
     }
 
     @Override
+    public void deleteOnExit(String path) {
+        new File(path).deleteOnExit();
+    }
+
+    @Override
     public boolean createDirectory(String path) {
         return new File(path).mkdirs();
     }
@@ -42,7 +50,7 @@ class LocalFileSystem implements FileSystem {
             return new FileOutputStream(path);
         } catch (FileNotFoundException e) {
             //should not happen
-            logger.error("path:" + path + " nout found");
+            logger.error("path:" + path + " out found");
             throw new RuntimeException(e);
         }
     }
@@ -53,7 +61,7 @@ class LocalFileSystem implements FileSystem {
             return new FileInputStream(path);
         } catch (FileNotFoundException e) {
             //should not happen
-            logger.error("path:" + path + " nout found");
+            logger.error("path:" + path + " out found");
             throw new RuntimeException(e);
         }
     }