You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by sh...@apache.org on 2020/03/26 07:39:01 UTC

[kylin] branch 3.0.x updated: KYLIN-4425 Refactor Diagnosis Module

This is an automated email from the ASF dual-hosted git repository.

shaofengshi pushed a commit to branch 3.0.x
in repository https://gitbox.apache.org/repos/asf/kylin.git


The following commit(s) were added to refs/heads/3.0.x by this push:
     new e2aaec4  KYLIN-4425 Refactor Diagnosis Module
e2aaec4 is described below

commit e2aaec4219d4cd7e37d87d5f0d002c791815c3cc
Author: XiaoxiangYu <hi...@126.com>
AuthorDate: Thu Mar 12 17:16:20 2020 +0800

    KYLIN-4425 Refactor Diagnosis Module
---
 build/bin/diag.sh                                  |  17 ++
 .../org/apache/kylin/common/util/HadoopUtil.java   |   1 -
 .../kylin/rest/service/DiagnosisService.java       |   5 +-
 tool-assembly/pom.xml                              |  10 ++
 tool/pom.xml                                       |   6 +
 .../org/apache/kylin/tool/DiagnosisInfoCLI.java    |  53 ++++--
 .../org/apache/kylin/tool/JobDiagnosisInfoCLI.java | 112 +++++-------
 .../{ => extractor}/AbstractInfoExtractor.java     |  51 +++++-
 .../tool/{ => extractor}/ClientEnvExtractor.java   |  34 ++--
 .../tool/{ => extractor}/CubeMetaExtractor.java    |  69 +++++---
 .../tool/{ => extractor}/HBaseUsageExtractor.java  |  56 ++++--
 .../{common => extractor}/HadoopConfExtractor.java |  18 +-
 .../kylin/tool/{ => extractor}/HiveConfigCLI.java  |   2 +-
 .../kylin/tool/extractor/JStackExtractor.java      |  59 +++++++
 .../tool/{ => extractor}/JobInstanceExtractor.java |   4 +-
 .../tool/{ => extractor}/KylinLogExtractor.java    |  10 +-
 .../tool/{ => extractor}/MrJobInfoExtractor.java   |  14 +-
 .../tool/extractor/SparkEnvInfoExtractor.java      |  91 ++++++++++
 .../kylin/tool/extractor/YarnLogExtractor.java     | 194 +++++++++++++++++++++
 .../java/org/apache/kylin/tool/util/ToolUtil.java  |  13 --
 .../apache/kylin/tool/ClientEnvExtractorTest.java  |   2 +-
 .../apache/kylin/tool/CubeMetaExtractorTest.java   |   1 +
 22 files changed, 644 insertions(+), 178 deletions(-)

diff --git a/build/bin/diag.sh b/build/bin/diag.sh
index bb1bdc4..6774a80 100755
--- a/build/bin/diag.sh
+++ b/build/bin/diag.sh
@@ -67,6 +67,23 @@ then
     fi
 
     exit 0
+elif [ $# -gt 2 ] # user can choose to use more flexibly options
+then
+    source ${dir}/find-hive-dependency.sh
+
+    if [ -f "${dir}/setenv-tool.sh" ]
+        then source ${dir}/setenv-tool.sh
+    fi
+    mkdir -p ${KYLIN_HOME}/ext
+    export HBASE_CLASSPATH_PREFIX=${KYLIN_HOME}/conf:${KYLIN_HOME}/tool/*:${KYLIN_HOME}/ext/*:${HBASE_CLASSPATH_PREFIX}
+    export HBASE_CLASSPATH=${HBASE_CLASSPATH}:${hive_dependency}
+
+    hbase ${KYLIN_EXTRA_START_OPTS} \
+      -Dlog4j.configuration=file:${KYLIN_HOME}/conf/kylin-tools-log4j.properties \
+      -Dcatalina.home=${tomcat_root} \
+      "$@"
+
+     exit 0
 else
     echo "usage: diag.sh Project|JobId [target_path]"
     exit 1
diff --git a/core-common/src/main/java/org/apache/kylin/common/util/HadoopUtil.java b/core-common/src/main/java/org/apache/kylin/common/util/HadoopUtil.java
index 5187361..98c7b30 100644
--- a/core-common/src/main/java/org/apache/kylin/common/util/HadoopUtil.java
+++ b/core-common/src/main/java/org/apache/kylin/common/util/HadoopUtil.java
@@ -75,7 +75,6 @@ public class HadoopUtil {
         }
         //  https://issues.apache.org/jira/browse/KYLIN-3064
         conf.set("yarn.timeline-service.enabled", "false");
-
         return conf;
     }
 
diff --git a/server-base/src/main/java/org/apache/kylin/rest/service/DiagnosisService.java b/server-base/src/main/java/org/apache/kylin/rest/service/DiagnosisService.java
index 528858b..e046c3d 100644
--- a/server-base/src/main/java/org/apache/kylin/rest/service/DiagnosisService.java
+++ b/server-base/src/main/java/org/apache/kylin/rest/service/DiagnosisService.java
@@ -98,9 +98,8 @@ public class DiagnosisService extends BasicService {
         Message msg = MsgPicker.getMsg();
 
         File cwd = new File("");
-        logger.debug("Current path: " + cwd.getAbsolutePath());
-
-        logger.debug("DiagnosisInfoCLI args: " + Arrays.toString(args));
+        logger.debug("Current path: {}", cwd.getAbsolutePath());
+        logger.debug("DiagnosisInfoCLI args: {}", Arrays.toString(args));
         File script = new File(KylinConfig.getKylinHome() + File.separator + "bin", "diag.sh");
         if (!script.exists()) {
             throw new BadRequestException(
diff --git a/tool-assembly/pom.xml b/tool-assembly/pom.xml
index 4d5626c..c87b405 100644
--- a/tool-assembly/pom.xml
+++ b/tool-assembly/pom.xml
@@ -63,6 +63,11 @@
             <scope>compile</scope>
         </dependency>
         <dependency>
+            <groupId>org.apache.commons</groupId>
+            <artifactId>commons-compress</artifactId>
+            <scope>compile</scope>
+        </dependency>
+        <dependency>
             <groupId>org.slf4j</groupId>
             <artifactId>slf4j-log4j12</artifactId>
             <scope>compile</scope>
@@ -98,6 +103,7 @@
                                     <include>log4j:log4j</include>
                                     <include>commons-io:commons-io</include>
                                     <include>commons-lang:commons-lang</include>
+                                    <include>org.apache.commons:commons-compress</include>
                                     <include>org.apache.commons:commons-lang3</include>
                                     <include>org.apache.commons:commons-email</include>
                                     <include>com.google.guava:guava</include>
@@ -119,6 +125,10 @@
                                     <shadedPattern>${shadeBase}.org.apache.commons.io</shadedPattern>
                                 </relocation>
                                 <relocation>
+                                    <pattern>org.apache.commons.compress</pattern>
+                                    <shadedPattern>${shadeBase}.org.apache.commons.compress</shadedPattern>
+                                </relocation>
+                                <relocation>
                                     <pattern>org.apache.commons.lang</pattern>
                                     <shadedPattern>${shadeBase}.org.apache.commons.lang</shadedPattern>
                                 </relocation>
diff --git a/tool/pom.xml b/tool/pom.xml
index 166893b..2f23229 100644
--- a/tool/pom.xml
+++ b/tool/pom.xml
@@ -63,6 +63,12 @@
             <groupId>org.apache.kylin</groupId>
             <artifactId>kylin-server-base</artifactId>
         </dependency>
+
+        <dependency>
+            <groupId>org.apache.kylin</groupId>
+            <artifactId>kylin-stream-core</artifactId>
+        </dependency>
+
         <!--Env-->
         <dependency>
             <groupId>org.apache.hbase</groupId>
diff --git a/tool/src/main/java/org/apache/kylin/tool/DiagnosisInfoCLI.java b/tool/src/main/java/org/apache/kylin/tool/DiagnosisInfoCLI.java
index 9063e9e..12077c0 100644
--- a/tool/src/main/java/org/apache/kylin/tool/DiagnosisInfoCLI.java
+++ b/tool/src/main/java/org/apache/kylin/tool/DiagnosisInfoCLI.java
@@ -37,6 +37,14 @@ import org.apache.kylin.common.util.OptionsHelper;
 import org.apache.kylin.metadata.project.ProjectInstance;
 import org.apache.kylin.metadata.project.ProjectManager;
 import org.apache.kylin.tool.util.ToolUtil;
+import org.apache.kylin.stream.core.util.NamedThreadFactory;
+import org.apache.kylin.tool.extractor.AbstractInfoExtractor;
+import org.apache.kylin.tool.extractor.ClientEnvExtractor;
+import org.apache.kylin.tool.extractor.CubeMetaExtractor;
+import org.apache.kylin.tool.extractor.JStackExtractor;
+import org.apache.kylin.tool.extractor.JobInstanceExtractor;
+import org.apache.kylin.tool.extractor.KylinLogExtractor;
+import org.apache.kylin.tool.extractor.SparkEnvInfoExtractor;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -142,7 +150,7 @@ public class DiagnosisInfoCLI extends AbstractInfoExtractor {
                 : DEFAULT_PERIOD;
 
         logger.info("Start diagnosis info extraction in {} threads.", threadsNum);
-        executorService = Executors.newFixedThreadPool(threadsNum);
+        executorService = Executors.newFixedThreadPool(threadsNum, new NamedThreadFactory("GeneralDiagnosis"));
 
         // export cube metadata
         executorService.execute(new Runnable() {
@@ -150,9 +158,9 @@ public class DiagnosisInfoCLI extends AbstractInfoExtractor {
             public void run() {
                 logger.info("Start to extract metadata.");
                 try {
-                    String[] cubeMetaArgs = { "-packagetype", "cubemeta", "-destDir",
+                    String[] cubeMetaArgs = {"-packagetype", "cubemeta", "-destDir",
                             new File(exportDir, "metadata").getAbsolutePath(), "-project", projectNames, "-compress",
-                            "false", "-includeJobs", "false", "-submodule", "true" };
+                            "false", "-includeJobs", "false", "-submodule", "true"};
                     CubeMetaExtractor cubeMetaExtractor = new CubeMetaExtractor();
                     logger.info("CubeMetaExtractor args: " + Arrays.toString(cubeMetaArgs));
                     cubeMetaExtractor.execute(cubeMetaArgs);
@@ -169,8 +177,8 @@ public class DiagnosisInfoCLI extends AbstractInfoExtractor {
                 public void run() {
                     logger.info("Start to extract jobs.");
                     try {
-                        String[] jobArgs = { "-destDir", new File(exportDir, "jobs").getAbsolutePath(), "-period",
-                                Integer.toString(period), "-compress", "false", "-submodule", "true" };
+                        String[] jobArgs = {"-destDir", new File(exportDir, "jobs").getAbsolutePath(), "-period",
+                                Integer.toString(period), "-compress", "false", "-submodule", "true"};
                         JobInstanceExtractor jobInstanceExtractor = new JobInstanceExtractor();
                         jobInstanceExtractor.execute(jobArgs);
                     } catch (Exception e) {
@@ -188,13 +196,13 @@ public class DiagnosisInfoCLI extends AbstractInfoExtractor {
                     logger.info("Start to extract HBase usage.");
                     try {
                         // use reflection to isolate NoClassDef errors when HBase is not available
-                        String[] hbaseArgs = { "-destDir", new File(exportDir, "hbase").getAbsolutePath(), "-project",
-                                projectNames, "-compress", "false", "-submodule", "true" };
+                        String[] hbaseArgs = {"-destDir", new File(exportDir, "hbase").getAbsolutePath(), "-project",
+                                projectNames, "-compress", "false", "-submodule", "true"};
                         logger.info("HBaseUsageExtractor args: " + Arrays.toString(hbaseArgs));
-                        Object extractor = ClassUtil.newInstance("org.apache.kylin.tool.HBaseUsageExtractor");
+                        Object extractor = ClassUtil.newInstance("org.apache.kylin.tool.extractor.HBaseUsageExtractor");
                         Method execute = extractor.getClass().getMethod("execute", String[].class);
                         execute.invoke(extractor, (Object) hbaseArgs);
-                    } catch (Throwable e) {
+                    } catch (Exception e) {
                         logger.error("Error in export HBase usage.", e);
                     }
                 }
@@ -232,8 +240,8 @@ public class DiagnosisInfoCLI extends AbstractInfoExtractor {
                 @Override
                 public void run() {
                     try {
-                        String[] clientArgs = { "-destDir", new File(exportDir, "client").getAbsolutePath(),
-                                "-compress", "false", "-submodule", "true" };
+                        String[] clientArgs = {"-destDir", new File(exportDir, "client").getAbsolutePath(),
+                                "-compress", "false", "-submodule", "true"};
                         ClientEnvExtractor clientEnvExtractor = new ClientEnvExtractor();
                         logger.info("ClientEnvExtractor args: " + Arrays.toString(clientArgs));
                         clientEnvExtractor.execute(clientArgs);
@@ -250,8 +258,8 @@ public class DiagnosisInfoCLI extends AbstractInfoExtractor {
             public void run() {
                 logger.info("Start to extract logs.");
                 try {
-                    String[] logsArgs = { "-destDir", new File(exportDir, "logs").getAbsolutePath(), "-logPeriod",
-                            Integer.toString(period), "-compress", "false", "-submodule", "true" };
+                    String[] logsArgs = {"-destDir", new File(exportDir, "logs").getAbsolutePath(), "-logPeriod",
+                            Integer.toString(period), "-compress", "false", "-submodule", "true"};
                     KylinLogExtractor logExtractor = new KylinLogExtractor();
                     logger.info("KylinLogExtractor args: " + Arrays.toString(logsArgs));
                     logExtractor.execute(logsArgs);
@@ -261,8 +269,27 @@ public class DiagnosisInfoCLI extends AbstractInfoExtractor {
             }
         });
 
+        // dump jstack
+        String[] jstackDumpArgs = {"-destDir", exportDir.getAbsolutePath(), "-compress", "false", "-submodule",
+                "true", "-submodule", "true"};
+        logger.info("JStackExtractor args: {}", Arrays.toString(jstackDumpArgs));
+        try {
+            new JStackExtractor().execute(jstackDumpArgs);
+        } catch (Exception e) {
+            logger.error("Error execute jstack dump extractor");
+        }
+
+        // export spark conf
+        String[] sparkEnvArgs = {"-destDir", new File(exportDir, "spark").getAbsolutePath(), "-compress", "false", "-submodule", "true"};
+        try {
+            new SparkEnvInfoExtractor().execute(sparkEnvArgs);
+        } catch (Exception e) {
+            logger.error("Error execute spark extractor");
+        }
+
         executorService.shutdown();
         try {
+            logger.info("Waiting for completed.");
             executorService.awaitTermination(Long.MAX_VALUE, TimeUnit.MINUTES);
         } catch (InterruptedException e) {
             Thread.currentThread().interrupt();
diff --git a/tool/src/main/java/org/apache/kylin/tool/JobDiagnosisInfoCLI.java b/tool/src/main/java/org/apache/kylin/tool/JobDiagnosisInfoCLI.java
index 8fec48e..cbb32bb 100644
--- a/tool/src/main/java/org/apache/kylin/tool/JobDiagnosisInfoCLI.java
+++ b/tool/src/main/java/org/apache/kylin/tool/JobDiagnosisInfoCLI.java
@@ -19,10 +19,8 @@
 package org.apache.kylin.tool;
 
 import java.io.File;
-import java.io.IOException;
 import java.util.Arrays;
 import java.util.List;
-import java.util.Map;
 
 import org.apache.commons.cli.Option;
 import org.apache.commons.cli.OptionBuilder;
@@ -32,16 +30,20 @@ import org.apache.kylin.common.KylinConfig;
 import org.apache.kylin.common.persistence.ResourceStore;
 import org.apache.kylin.common.persistence.ResourceTool;
 import org.apache.kylin.common.util.OptionsHelper;
-import org.apache.kylin.common.util.StringUtil;
-import org.apache.kylin.job.constant.ExecutableConstants;
 import org.apache.kylin.job.dao.ExecutableDao;
 import org.apache.kylin.job.dao.ExecutablePO;
 import org.apache.kylin.tool.util.ToolUtil;
+import org.apache.kylin.tool.extractor.AbstractInfoExtractor;
+import org.apache.kylin.tool.extractor.ClientEnvExtractor;
+import org.apache.kylin.tool.extractor.CubeMetaExtractor;
+import org.apache.kylin.tool.extractor.JStackExtractor;
+import org.apache.kylin.tool.extractor.KylinLogExtractor;
+import org.apache.kylin.tool.extractor.SparkEnvInfoExtractor;
+import org.apache.kylin.tool.extractor.YarnLogExtractor;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
 
 public class JobDiagnosisInfoCLI extends AbstractInfoExtractor {
     private static final Logger logger = LoggerFactory.getLogger(JobDiagnosisInfoCLI.class);
@@ -73,7 +75,6 @@ public class JobDiagnosisInfoCLI extends AbstractInfoExtractor {
             .create("includeConf");
 
     List<String> requiredResources = Lists.newArrayList();
-    List<String> yarnLogsResources = Lists.newArrayList();
     private KylinConfig kylinConfig;
     private ExecutableDao executableDao;
 
@@ -121,9 +122,6 @@ public class JobDiagnosisInfoCLI extends AbstractInfoExtractor {
         for (ExecutablePO kylinTask : executablePO.getTasks()) {
             addRequired(ResourceStore.EXECUTE_RESOURCE_ROOT + "/" + kylinTask.getUuid());
             addRequired(ResourceStore.EXECUTE_OUTPUT_RESOURCE_ROOT + "/" + kylinTask.getUuid());
-            if (includeYarnLogs) {
-                yarnLogsResources.add(kylinTask.getUuid());
-            }
         }
         extractResources(exportDir);
 
@@ -133,30 +131,30 @@ public class JobDiagnosisInfoCLI extends AbstractInfoExtractor {
             if (!StringUtils.isEmpty(cubeName)) {
                 File metaDir = new File(exportDir, "cube");
                 FileUtils.forceMkdir(metaDir);
-                String[] cubeMetaArgs = { "-packagetype", "cubemeta", "-cube", cubeName, "-destDir",
+                String[] cubeMetaArgs = {"-packagetype", "cubemeta", "-cube", cubeName, "-destDir",
                         new File(metaDir, cubeName).getAbsolutePath(), "-includeJobs", "false", "-compress", "false",
-                        "-submodule", "true" };
-                logger.info("Start to extract related cube: " + StringUtils.join(cubeMetaArgs));
+                        "-submodule", "true"};
+                logger.info("Start to extract related cube: {}", StringUtils.join(cubeMetaArgs));
                 CubeMetaExtractor cubeMetaExtractor = new CubeMetaExtractor();
-                logger.info("CubeMetaExtractor args: " + Arrays.toString(cubeMetaArgs));
+                logger.info("CubeMetaExtractor args: {}", Arrays.toString(cubeMetaArgs));
                 cubeMetaExtractor.execute(cubeMetaArgs);
             }
         }
 
         // dump mr job info
         if (includeYarnLogs) {
-            logger.info("Start to dump mr job info: " + kylinJobId);
+            YarnLogExtractor yarnLogExtractor = new YarnLogExtractor();
+            logger.info("Start to dump mr job info: {}", kylinJobId);
             File yarnDir = new File(exportDir, "yarn");
             FileUtils.forceMkdir(yarnDir);
-            for (String stepId : yarnLogsResources) {
-                extractJobInfo(stepId, new File(yarnDir, stepId));
-                extractJobLog(stepId, new File(yarnDir, stepId), true);
-            }
+            yarnLogExtractor.extractYarnLogAndMRJob(kylinJobId, new File(yarnDir, kylinJobId));
         }
 
+        // host info
         if (includeClient) {
-            String[] clientArgs = { "-destDir", new File(exportDir, "client").getAbsolutePath(), "-compress", "false",
-                    "-submodule", "true" };
+            logger.info("Start to extract client info.");
+            String[] clientArgs = {"-destDir", new File(exportDir, "client").getAbsolutePath(), "-compress", "false",
+                    "-submodule", "true"};
             ClientEnvExtractor clientEnvExtractor = new ClientEnvExtractor();
             logger.info("ClientEnvExtractor args: " + Arrays.toString(clientArgs));
             clientEnvExtractor.execute(clientArgs);
@@ -172,9 +170,28 @@ public class JobDiagnosisInfoCLI extends AbstractInfoExtractor {
             }
         }
 
+        // dump jstack
+        String[] jstackDumpArgs = {"-destDir", exportDir.getAbsolutePath(), "-compress", "false", "-submodule",
+                "true"};
+        logger.info("JStackExtractor args: {}", Arrays.toString(jstackDumpArgs));
+        try {
+            new JStackExtractor().execute(jstackDumpArgs);
+        } catch (Exception e) {
+            logger.error("Error execute jstack dump extractor");
+        }
+
+        // export spark conf
+        String[] sparkEnvArgs = {"-destDir", exportDir.getAbsolutePath(), "-compress", "false", "-submodule",
+                "true"};
+        try {
+            new SparkEnvInfoExtractor().execute(sparkEnvArgs);
+        } catch (Exception e) {
+            logger.error("Error execute spark extractor");
+        }
+
         // export kylin logs
-        String[] logsArgs = { "-destDir", new File(exportDir, "logs").getAbsolutePath(), "-compress", "false",
-                "-submodule", "true" };
+        String[] logsArgs = {"-destDir", new File(exportDir, "logs").getAbsolutePath(), "-compress", "false",
+                "-submodule", "true"};
         KylinLogExtractor logExtractor = new KylinLogExtractor();
         logger.info("KylinLogExtractor args: " + Arrays.toString(logsArgs));
         logExtractor.execute(logsArgs);
@@ -195,57 +212,6 @@ public class JobDiagnosisInfoCLI extends AbstractInfoExtractor {
         }
     }
 
-    private void extractJobLog(String taskId, File destDir, boolean onlyFail) throws Exception {
-        final Map<String, String> jobInfo = executableDao.getJobOutput(taskId).getInfo();
-        FileUtils.forceMkdir(destDir);
-        if (jobInfo.containsKey(ExecutableConstants.MR_JOB_ID)) {
-            String applicationId = jobInfo.get(ExecutableConstants.MR_JOB_ID).replace("job", "application");
-            if (!onlyFail || !isYarnAppSucc(applicationId)) {
-                File destFile = new File(destDir, applicationId + ".log");
-                String yarnCmd = "yarn logs -applicationId " + applicationId + " > " + destFile.getAbsolutePath();
-                logger.debug(yarnCmd);
-                try {
-                    kylinConfig.getCliCommandExecutor().execute(yarnCmd);
-                } catch (Exception ex) {
-                    logger.warn("Failed to get yarn logs. ", ex);
-                }
-            }
-        }
-    }
-
-    private void extractJobInfo(String taskId, File destDir) throws Exception {
-        final Map<String, String> jobInfo = executableDao.getJobOutput(taskId).getInfo();
-        if (jobInfo.containsKey(ExecutableConstants.MR_JOB_ID)) {
-            String mrJobId = jobInfo.get(ExecutableConstants.MR_JOB_ID);
-            FileUtils.forceMkdir(destDir);
-            String[] mrJobArgs = { "-mrJobId", mrJobId, "-destDir", destDir.getAbsolutePath(), "-compress", "false",
-                    "-submodule", "true" };
-            new MrJobInfoExtractor().execute(mrJobArgs);
-        }
-    }
-
-    private boolean isYarnAppSucc(String applicationId) throws IOException {
-        final String yarnCmd = "yarn application -status " + applicationId;
-        final String cmdOutput = kylinConfig.getCliCommandExecutor().execute(yarnCmd).getSecond();
-        final Map<String, String> params = Maps.newHashMap();
-        final String[] cmdOutputLines = StringUtil.split(cmdOutput, "\n");
-        for (String cmdOutputLine : cmdOutputLines) {
-            String[] pair = StringUtil.split(cmdOutputLine, ":");
-            if (pair.length >= 2) {
-                params.put(pair[0].trim(), pair[1].trim());
-            }
-        }
-        for (Map.Entry<String, String> e : params.entrySet()) {
-            logger.info(e.getKey() + ":" + e.getValue());
-        }
-
-        if (params.containsKey("State") && params.get("State").equals("RUNNING")) {
-            return true;
-        }
-
-        return params.containsKey("Final-State") && params.get("Final-State").equals("SUCCEEDED");
-    }
-
     private void addRequired(String record) {
         logger.info("adding required resource {}", record);
         requiredResources.add(record);
diff --git a/tool/src/main/java/org/apache/kylin/tool/AbstractInfoExtractor.java b/tool/src/main/java/org/apache/kylin/tool/extractor/AbstractInfoExtractor.java
similarity index 81%
rename from tool/src/main/java/org/apache/kylin/tool/AbstractInfoExtractor.java
rename to tool/src/main/java/org/apache/kylin/tool/extractor/AbstractInfoExtractor.java
index 994e4d6..965912e 100644
--- a/tool/src/main/java/org/apache/kylin/tool/AbstractInfoExtractor.java
+++ b/tool/src/main/java/org/apache/kylin/tool/extractor/AbstractInfoExtractor.java
@@ -17,7 +17,7 @@
  *
  */
 
-package org.apache.kylin.tool;
+package org.apache.kylin.tool.extractor;
 
 import java.io.File;
 import java.io.IOException;
@@ -34,6 +34,7 @@ import org.apache.commons.lang.StringUtils;
 import org.apache.kylin.common.KylinConfig;
 import org.apache.kylin.common.KylinVersion;
 import org.apache.kylin.common.util.AbstractApplication;
+import org.apache.kylin.common.util.CliCommandExecutor;
 import org.apache.kylin.common.util.OptionsHelper;
 import org.apache.kylin.common.util.ZipFileUtils;
 import org.apache.kylin.tool.util.ToolUtil;
@@ -60,7 +61,8 @@ public abstract class AbstractInfoExtractor extends AbstractApplication {
             .withDescription("specify the package type").create("packagetype");
 
     private static final String DEFAULT_PACKAGE_TYPE = "base";
-    private static final String[] COMMIT_SHA1_FILES = { "commit_SHA1", "commit.sha1" };
+    private static final String[] COMMIT_SHA1_FILES = {"commit_SHA1", "commit.sha1"};
+    protected CliCommandExecutor cmdExecutor;
 
     protected final Options options;
 
@@ -74,6 +76,8 @@ public abstract class AbstractInfoExtractor extends AbstractApplication {
         options.addOption(OPTION_SUBMODULE);
         options.addOption(OPTION_PACKAGETYPE);
         packageType = DEFAULT_PACKAGE_TYPE;
+
+        cmdExecutor = KylinConfig.getInstanceFromEnv().getCliCommandExecutor();
     }
 
     @Override
@@ -179,4 +183,47 @@ public abstract class AbstractInfoExtractor extends AbstractApplication {
     public String getExportDest() {
         return exportDir.getAbsolutePath();
     }
+
+    public static String getKylinPid() {
+        File pidFile = new File(getKylinHome(), "pid");
+        if (pidFile.exists()) {
+            try {
+                return FileUtils.readFileToString(pidFile);
+            } catch (IOException e) {
+                throw new RuntimeException("Error reading KYLIN PID file.", e);
+            }
+        } else {
+            throw new RuntimeException("Cannot find KYLIN PID file.");
+        }
+    }
+
+    public static String getKylinHome() {
+        String path = System.getProperty(KylinConfig.KYLIN_CONF);
+        if (StringUtils.isNotEmpty(path)) {
+            return path;
+        }
+        path = KylinConfig.getKylinHome();
+        if (StringUtils.isNotEmpty(path)) {
+            return path;
+        }
+        throw new RuntimeException("Cannot find KYLIN_HOME.");
+    }
+
+    public void addFile(File srcFile, File destDir) {
+        logger.info("copy file " + srcFile.getName());
+        try {
+            FileUtils.forceMkdir(destDir);
+        } catch (IOException e) {
+            logger.error("Can not create" + destDir, e);
+        }
+
+        File destFile = new File(destDir, srcFile.getName());
+        String copyCmd = String.format(Locale.ROOT, "cp -rL %s %s", srcFile.getAbsolutePath(), destFile.getAbsolutePath());
+        logger.info("The command is: " + copyCmd);
+        try {
+            cmdExecutor.execute(copyCmd);
+        } catch (Exception e) {
+            logger.debug("Failed to execute copyCmd", e);
+        }
+    }
 }
diff --git a/tool/src/main/java/org/apache/kylin/tool/ClientEnvExtractor.java b/tool/src/main/java/org/apache/kylin/tool/extractor/ClientEnvExtractor.java
similarity index 82%
rename from tool/src/main/java/org/apache/kylin/tool/ClientEnvExtractor.java
rename to tool/src/main/java/org/apache/kylin/tool/extractor/ClientEnvExtractor.java
index 80741ea..4c8597b 100644
--- a/tool/src/main/java/org/apache/kylin/tool/ClientEnvExtractor.java
+++ b/tool/src/main/java/org/apache/kylin/tool/extractor/ClientEnvExtractor.java
@@ -17,7 +17,7 @@
  *
  */
 
-package org.apache.kylin.tool;
+package org.apache.kylin.tool.extractor;
 
 import java.io.File;
 import java.io.IOException;
@@ -35,21 +35,25 @@ import org.apache.kylin.common.KylinConfig;
 import org.apache.kylin.common.util.CliCommandExecutor;
 import org.apache.kylin.common.util.OptionsHelper;
 import org.apache.kylin.common.util.Pair;
+import org.apache.kylin.stream.core.util.NamedThreadFactory;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import com.google.common.io.Files;
 
+/**
+ * Host information collection
+ */
 public class ClientEnvExtractor extends AbstractInfoExtractor {
     private static final Logger logger = LoggerFactory.getLogger(ClientEnvExtractor.class);
     private KylinConfig kylinConfig;
     private CliCommandExecutor cmdExecutor;
     private ExecutorService executorService;
-    int maxWaitSeconds = 120;
+    public int maxWaitSeconds = 150;
 
-    public ClientEnvExtractor() throws IOException {
+    public ClientEnvExtractor() {
         super();
-        executorService = Executors.newFixedThreadPool(1);
+        executorService = Executors.newFixedThreadPool(3, new NamedThreadFactory("ClientEnv"));
         packageType = "client";
         kylinConfig = KylinConfig.getInstanceFromEnv();
         cmdExecutor = kylinConfig.getCliCommandExecutor();
@@ -57,6 +61,7 @@ public class ClientEnvExtractor extends AbstractInfoExtractor {
 
     @Override
     protected void executeExtract(OptionsHelper optionsHelper, File exportDir) throws Exception {
+
         // dump os info
         addLocalFile("/sys/kernel/mm/transparent_hugepage/defrag", "linux/transparent_hugepage");
         addLocalFile("/proc/sys/vm/swappiness", "linux/swappiness");
@@ -71,8 +76,11 @@ public class ClientEnvExtractor extends AbstractInfoExtractor {
         addShellOutput("hadoop version", "hadoop", "version");
         addShellOutput("hbase version", "hbase", "version");
         addShellOutput("hive --version", "hive", "version");
+        addShellOutput("klist", "kerberos", "klist");
         addShellOutput("beeline --version", "hive", "beeline_version");
-        executorService.shutdownNow();
+
+        logger.info("ClientEnvExtractor is shutting downing. ");
+        executorService.awaitTermination(maxWaitSeconds, TimeUnit.SECONDS);
     }
 
     private void addLocalFile(String src, String destDir) {
@@ -92,7 +100,7 @@ public class ClientEnvExtractor extends AbstractInfoExtractor {
         }
     }
 
-    void addShellOutput(String cmd, String destDir, String filename) {
+    public void addShellOutput(String cmd, String destDir, String filename) {
         Future f = executorService.submit(() -> {
             try {
                 File destDirFile = null;
@@ -102,8 +110,10 @@ public class ClientEnvExtractor extends AbstractInfoExtractor {
                 } else {
                     destDirFile = exportDir;
                 }
+                logger.debug("Will execute {}", cmd);
                 Pair<Integer, String> result = cmdExecutor.execute(cmd);
                 String output = result.getSecond();
+                logger.debug("Execute command {} return {}", cmd, result.getFirst());
                 FileUtils.writeStringToFile(new File(destDirFile, filename), output, Charset.defaultCharset());
             } catch (IOException e) {
                 logger.warn("Failed to run command: " + cmd + ".", e);
@@ -113,14 +123,18 @@ public class ClientEnvExtractor extends AbstractInfoExtractor {
         try {
             // assume most shell should return in two minutes
             f.get(maxWaitSeconds, TimeUnit.SECONDS);
-        } catch (TimeoutException timeoutException) {
+        } catch (TimeoutException | InterruptedException timeoutException) {
             logger.error("Timeout for \"{}\" in {} seconds.", cmd, maxWaitSeconds);
+            f.cancel(true);
             executorService.shutdownNow();
-            executorService = Executors.newFixedThreadPool(1);
         } catch (ExecutionException runtimeException) {
             logger.error("Runtime error: {}", runtimeException.getLocalizedMessage());
-        } catch (InterruptedException otherException) {
-            // Ignore
+            executorService.shutdownNow();
         }
     }
+
+    public static void main(String[] args) {
+        ClientEnvExtractor extractor = new ClientEnvExtractor();
+        extractor.execute(args);
+    }
 }
diff --git a/tool/src/main/java/org/apache/kylin/tool/CubeMetaExtractor.java b/tool/src/main/java/org/apache/kylin/tool/extractor/CubeMetaExtractor.java
similarity index 92%
rename from tool/src/main/java/org/apache/kylin/tool/CubeMetaExtractor.java
rename to tool/src/main/java/org/apache/kylin/tool/extractor/CubeMetaExtractor.java
index 6b661ca..0789011 100644
--- a/tool/src/main/java/org/apache/kylin/tool/CubeMetaExtractor.java
+++ b/tool/src/main/java/org/apache/kylin/tool/extractor/CubeMetaExtractor.java
@@ -16,11 +16,13 @@
  * limitations under the License.
  */
 
-package org.apache.kylin.tool;
+package org.apache.kylin.tool.extractor;
 
 import java.io.File;
 import java.io.IOException;
+import java.util.Collection;
 import java.util.List;
+import java.util.Objects;
 import java.util.Set;
 
 import org.apache.commons.cli.Option;
@@ -58,6 +60,8 @@ import org.apache.kylin.metadata.streaming.StreamingManager;
 import org.apache.kylin.source.kafka.config.KafkaConfig;
 import org.apache.kylin.storage.hybrid.HybridInstance;
 import org.apache.kylin.storage.hybrid.HybridManager;
+import org.apache.kylin.stream.core.source.StreamingSourceConfig;
+import org.apache.kylin.stream.core.source.StreamingSourceConfigManager;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -85,7 +89,7 @@ public class CubeMetaExtractor extends AbstractInfoExtractor {
             .withDescription("Specify which hybrid to extract").create("hybrid");
     @SuppressWarnings("static-access")
     private static final Option OPTION_PROJECT = OptionBuilder.withArgName("project").hasArg().isRequired(false)
-            .withDescription("Specify realizations in which project to extract").create("project");
+            .withDescription("Specify realizations in which project to extract, project names can be connected by comma.").create("project");
     @SuppressWarnings("static-access")
     private static final Option OPTION_All_PROJECT = OptionBuilder.withArgName("allProjects").hasArg(false)
             .isRequired(false).withDescription("Specify realizations in all projects to extract").create("allProjects");
@@ -112,14 +116,14 @@ public class CubeMetaExtractor extends AbstractInfoExtractor {
 
     @SuppressWarnings("static-access")
     private static final Option OPTION_INCLUDE_ONLY_JOB_OUTPUT = OptionBuilder.withArgName("onlyOutput").hasArg()
-            .isRequired(false).withDescription("when include jobs, onlt extract output of job. Default true")
+            .isRequired(false).withDescription("when include jobs, only extract output of job. Default true")
             .create("onlyOutput");
 
     @SuppressWarnings("static-access")
     private static final Option OPTION_INCLUDE_SEGMENT_DETAILS = OptionBuilder.withArgName("includeSegmentDetails")
             .hasArg().isRequired(false)
             .withDescription(
-                    "set this to true if want to extract segment details too, such as dict, tablesnapshot. Default false")
+                    "set this to true if want to extract segment details too, such as dict, table snapshot. Default false")
             .create("includeSegmentDetails");
 
     private KylinConfig kylinConfig;
@@ -128,6 +132,7 @@ public class CubeMetaExtractor extends AbstractInfoExtractor {
     private HybridManager hybridManager;
     private CubeManager cubeManager;
     private StreamingManager streamingManager;
+    private StreamingSourceConfigManager streamingSourceConfigManager;
     private CubeDescManager cubeDescManager;
     private ExecutableDao executableDao;
     private RealizationRegistry realizationRegistry;
@@ -193,6 +198,7 @@ public class CubeMetaExtractor extends AbstractInfoExtractor {
         executableDao = ExecutableDao.getInstance(kylinConfig);
         realizationRegistry = RealizationRegistry.getInstance(kylinConfig);
         badQueryHistoryManager = BadQueryHistoryManager.getInstance(kylinConfig);
+        streamingSourceConfigManager = StreamingSourceConfigManager.getInstance(kylinConfig);
 
         addRequired(ResourceStore.METASTORE_UUID_TAG);
 
@@ -254,10 +260,10 @@ public class CubeMetaExtractor extends AbstractInfoExtractor {
     private void executeExtraction(String dest) {
         logger.info("The resource paths going to be extracted:");
         for (String s : requiredResources) {
-            logger.info(s + "(required)");
+            logger.info("{} is required resources.", s);
         }
         for (String s : optionalResources) {
-            logger.info(s + "(optional)");
+            logger.info("{} is optional resources.", s);
         }
         for (CubeInstance cube : cubesToTrimAndSave) {
             logger.info("Cube {} will be trimmed and extracted", cube);
@@ -294,7 +300,7 @@ public class CubeMetaExtractor extends AbstractInfoExtractor {
 
     private void engineOverwrite(File dest) throws IOException {
         if (engineType != null || storageType != null) {
-            for (File f : dest.listFiles()) {
+            for (File f : Objects.requireNonNull(dest.listFiles())) {
                 if (f.isDirectory()) {
                     engineOverwrite(f);
                 } else {
@@ -330,17 +336,6 @@ public class CubeMetaExtractor extends AbstractInfoExtractor {
         return realizationRegistry.getRealization(realizationEntry.getType(), realizationEntry.getRealization());
     }
 
-    private void addStreamingConfig(CubeInstance cube) {
-        streamingManager = StreamingManager.getInstance(kylinConfig);
-        for (StreamingConfig streamingConfig : streamingManager.listAllStreaming()) {
-            if (streamingConfig.getName() != null
-                    && streamingConfig.getName().equalsIgnoreCase(cube.getRootFactTable())) {
-                addRequired(StreamingConfig.concatResourcePath(streamingConfig.getName()));
-                addRequired(KafkaConfig.concatResourcePath(streamingConfig.getName()));
-            }
-        }
-    }
-
     private void retrieveResourcePath(IRealization realization) throws IOException {
         if (realization == null) {
             return;
@@ -354,6 +349,8 @@ public class CubeMetaExtractor extends AbstractInfoExtractor {
             addTables(modelDesc);
             // add streaming stuff
             addStreamingConfig(cube);
+            // add streamingV2
+            addStreamingV2Config(cube);
             // add cube
             addRequired(CubeDesc.concatResourcePath(cubeDesc.getName()));
             // add project
@@ -376,14 +373,14 @@ public class CubeMetaExtractor extends AbstractInfoExtractor {
         }
     }
 
-    private void addTables(DataModelDesc modelDesc) throws IOException {
+    private void addTables(DataModelDesc modelDesc) {
         if (modelDesc != null) {
             //fixme should get all tbls in prj not only in cubes when back up by prj.
             for (TableRef tableRef : modelDesc.getAllTables()) {
                 addRequired(tableRef.getTableDesc().getResourcePath());
-                addOptional(TableMetadataManager.getInstance(KylinConfig.getInstanceFromEnv()) //
-                        .getTableExt(tableRef.getTableDesc()) //
-                        .getResourcePath()); //
+                addOptional(TableMetadataManager.getInstance(KylinConfig.getInstanceFromEnv())
+                        .getTableExt(tableRef.getTableDesc())
+                        .getResourcePath());
             }
             addRequired(DataModelDesc.concatResourcePath(modelDesc.getName()));
         }
@@ -410,7 +407,6 @@ public class CubeMetaExtractor extends AbstractInfoExtractor {
                     } else {
                         try {
                             if (onlyJobOutput) {
-                                ExecutablePO executablePO = executableDao.getJob(lastJobId);
                                 addRequired(ResourceStore.EXECUTE_OUTPUT_RESOURCE_ROOT + "/" + lastJobId);
                             } else {
                                 ExecutablePO executablePO = executableDao.getJob(lastJobId);
@@ -436,6 +432,33 @@ public class CubeMetaExtractor extends AbstractInfoExtractor {
         }
     }
 
+    private void addStreamingConfig(CubeInstance cube) {
+        streamingManager = StreamingManager.getInstance(kylinConfig);
+        for (StreamingConfig streamingConfig : streamingManager.listAllStreaming()) {
+            if (streamingConfig.getName() != null
+                    && streamingConfig.getName().equalsIgnoreCase(cube.getRootFactTable())) {
+                addRequired(StreamingConfig.concatResourcePath(streamingConfig.getName()));
+                addRequired(KafkaConfig.concatResourcePath(streamingConfig.getName()));
+            }
+        }
+    }
+
+    private void addStreamingV2Config(CubeInstance cube) {
+        Collection<StreamingSourceConfig> streamingConfigs;
+        try {
+            streamingConfigs = streamingSourceConfigManager.listAllStreaming();
+        } catch (IOException ioe) {
+            logger.error("", ioe);
+            return;
+        }
+        for (StreamingSourceConfig streamingConfig : streamingConfigs) {
+            if (streamingConfig.getName() != null
+                    && streamingConfig.getName().equalsIgnoreCase(cube.getRootFactTable())) {
+                addRequired(StreamingSourceConfig.concatResourcePath(streamingConfig.getName()));
+            }
+        }
+    }
+
     private void addRequired(String record) {
         logger.info("adding required resource {}", record);
         requiredResources.add(record);
diff --git a/tool/src/main/java/org/apache/kylin/tool/HBaseUsageExtractor.java b/tool/src/main/java/org/apache/kylin/tool/extractor/HBaseUsageExtractor.java
similarity index 80%
rename from tool/src/main/java/org/apache/kylin/tool/HBaseUsageExtractor.java
rename to tool/src/main/java/org/apache/kylin/tool/extractor/HBaseUsageExtractor.java
index 8ffa473..e545ba6 100644
--- a/tool/src/main/java/org/apache/kylin/tool/HBaseUsageExtractor.java
+++ b/tool/src/main/java/org/apache/kylin/tool/extractor/HBaseUsageExtractor.java
@@ -16,13 +16,14 @@
  * limitations under the License.
  */
 
-package org.apache.kylin.tool;
+package org.apache.kylin.tool.extractor;
 
 import java.io.File;
 import java.io.IOException;
 import java.net.URL;
 import java.nio.charset.Charset;
 import java.util.List;
+import java.util.Locale;
 
 import org.apache.commons.cli.Option;
 import org.apache.commons.cli.OptionBuilder;
@@ -58,6 +59,10 @@ public class HBaseUsageExtractor extends AbstractInfoExtractor {
     @SuppressWarnings("static-access")
     private static final Option OPTION_PROJECT = OptionBuilder.withArgName("project").hasArg().isRequired(false).withDescription("Specify realizations in which project to extract").create("project");
 
+    public static final String HDFS_CHECK_COMMAND = "hadoop fs -ls -R %s/data/%s/%s*";
+    private final String hbaseRootDir;
+    private final String cachedHMasterUrl;
+
     private List<String> htables = Lists.newArrayList();
     private Configuration conf;
     private CubeManager cubeManager;
@@ -76,6 +81,8 @@ public class HBaseUsageExtractor extends AbstractInfoExtractor {
 
         options.addOptionGroup(realizationOrProject);
         conf = HBaseConfiguration.create();
+        hbaseRootDir = conf.get("hbase.rootdir");
+        cachedHMasterUrl = getHBaseMasterUrl();
     }
 
     public static void main(String[] args) {
@@ -83,18 +90,24 @@ public class HBaseUsageExtractor extends AbstractInfoExtractor {
         extractor.execute(args);
     }
 
-    private String getHBaseMasterUrl() throws IOException, KeeperException {
+    private String getHBaseMasterUrl() {
         String host = conf.get("hbase.master.info.bindAddress");
         if (host.equals("0.0.0.0")) {
-            host = MasterAddressTracker.getMasterAddress(new ZooKeeperWatcher(conf, null, null)).getHostname();
+            try {
+                host = MasterAddressTracker.getMasterAddress(new ZooKeeperWatcher(conf, null, null)).getHostname();
+            } catch (IOException | KeeperException io) {
+                return null;
+            }
         }
-
         String port = conf.get("hbase.master.info.port");
         return "http://" + host + ":" + port + "/";
     }
 
     @Override
     protected void executeExtract(OptionsHelper optionsHelper, File exportDir) throws Exception {
+        if (cachedHMasterUrl == null) {
+            return;
+        }
         kylinConfig = KylinConfig.getInstanceFromEnv();
         cubeManager = CubeManager.getInstance(kylinConfig);
         realizationRegistry = RealizationRegistry.getInstance(kylinConfig);
@@ -124,14 +137,14 @@ public class HBaseUsageExtractor extends AbstractInfoExtractor {
             }
         }
 
-        extractCommonInfo(exportDir);
+        extractCommonInfo(exportDir, kylinConfig);
         extractHTables(exportDir);
     }
 
     private void extractHTables(File dest) throws IOException {
         logger.info("These htables are going to be extracted:");
         for (String htable : htables) {
-            logger.info(htable + "(required)");
+            logger.info("{} is required", htable);
         }
 
         File tableDir = new File(dest, "table");
@@ -139,7 +152,7 @@ public class HBaseUsageExtractor extends AbstractInfoExtractor {
 
         for (String htable : htables) {
             try {
-                URL srcUrl = new URL(getHBaseMasterUrl() + "table.jsp?name=" + htable);
+                URL srcUrl = new URL(cachedHMasterUrl + "table.jsp?name=" + htable);
                 File destFile = new File(tableDir, htable + ".html");
                 FileUtils.copyURLToFile(srcUrl, destFile);
             } catch (Exception e) {
@@ -148,14 +161,16 @@ public class HBaseUsageExtractor extends AbstractInfoExtractor {
         }
     }
 
-    private void extractCommonInfo(File dest) throws IOException {
+    private void extractCommonInfo(File dest, KylinConfig config) throws IOException {
         logger.info("The hbase master info/conf are going to be extracted...");
+        String hbaseNamespace = config.getHBaseStorageNameSpace();
+        String tableNamePrefix = config.getHBaseTableNamePrefix();
 
         // hbase master page
         try {
             File masterDir = new File(dest, "master");
             FileUtils.forceMkdir(masterDir);
-            URL srcMasterUrl = new URL(getHBaseMasterUrl() + "master-status");
+            URL srcMasterUrl = new URL(cachedHMasterUrl + "master-status");
             File masterDestFile = new File(masterDir, "master-status.html");
             FileUtils.copyURLToFile(srcMasterUrl, masterDestFile);
         } catch (Exception e) {
@@ -166,7 +181,7 @@ public class HBaseUsageExtractor extends AbstractInfoExtractor {
         try {
             File confDir = new File(dest, "conf");
             FileUtils.forceMkdir(confDir);
-            URL srcConfUrl = new URL(getHBaseMasterUrl() + "conf");
+            URL srcConfUrl = new URL(cachedHMasterUrl + "conf");
             File destConfFile = new File(confDir, "hbase-conf.xml");
             FileUtils.copyURLToFile(srcConfUrl, destConfFile);
         } catch (Exception e) {
@@ -177,22 +192,33 @@ public class HBaseUsageExtractor extends AbstractInfoExtractor {
         try {
             File jmxDir = new File(dest, "jmx");
             FileUtils.forceMkdir(jmxDir);
-            URL srcJmxUrl = new URL(getHBaseMasterUrl() + "jmx");
+            URL srcJmxUrl = new URL(cachedHMasterUrl + "jmx");
             File jmxDestFile = new File(jmxDir, "jmx.html");
             FileUtils.copyURLToFile(srcJmxUrl, jmxDestFile);
         } catch (Exception e) {
             logger.warn("HBase JMX fetch failed: ", e);
         }
 
+        // dump page
+        try {
+            File dumpDir = new File(dest, "dump");
+            FileUtils.forceMkdir(dumpDir);
+            URL srcDumpUrl = new URL(cachedHMasterUrl + "dump");
+            File dumpDestFile = new File(dumpDir, "dump");
+            FileUtils.copyURLToFile(srcDumpUrl, dumpDestFile);
+        } catch (Exception e) {
+            logger.warn("HBase Dump fetch failed: ", e);
+        }
+
         // hbase hdfs status
         try {
             File hdfsDir = new File(dest, "hdfs");
             FileUtils.forceMkdir(hdfsDir);
             CliCommandExecutor cliCommandExecutor = kylinConfig.getCliCommandExecutor();
-            String output = cliCommandExecutor.execute("hadoop fs -ls -R " + conf.get("hbase.rootdir") + "/data/default/KYLIN_*").getSecond();
+            String command = String.format(Locale.ROOT, HDFS_CHECK_COMMAND, hbaseRootDir, hbaseNamespace, tableNamePrefix);
+            logger.info("Execute command {}", command);
+            String output = cliCommandExecutor.execute(command).getSecond();
             FileUtils.writeStringToFile(new File(hdfsDir, "hdfs-files.list"), output, Charset.defaultCharset());
-            output = cliCommandExecutor.execute("hadoop fs -ls -R " + conf.get("hbase.rootdir") + "/data/default/kylin_*").getSecond();
-            FileUtils.writeStringToFile(new File(hdfsDir, "hdfs-files.list"), output, Charset.defaultCharset(), true);
         } catch (Exception e) {
             logger.warn("HBase hdfs status fetch failed: ", e);
         }
@@ -212,7 +238,7 @@ public class HBaseUsageExtractor extends AbstractInfoExtractor {
                 addHTable(segment.getStorageLocationIdentifier());
             }
         } else {
-            logger.warn("Unknown realization type: " + realization.getType());
+            logger.warn("Unknown realization type: {}", realization.getType());
         }
     }
 
diff --git a/tool/src/main/java/org/apache/kylin/tool/common/HadoopConfExtractor.java b/tool/src/main/java/org/apache/kylin/tool/extractor/HadoopConfExtractor.java
similarity index 82%
rename from tool/src/main/java/org/apache/kylin/tool/common/HadoopConfExtractor.java
rename to tool/src/main/java/org/apache/kylin/tool/extractor/HadoopConfExtractor.java
index d8c8e2e..532478b 100644
--- a/tool/src/main/java/org/apache/kylin/tool/common/HadoopConfExtractor.java
+++ b/tool/src/main/java/org/apache/kylin/tool/extractor/HadoopConfExtractor.java
@@ -17,7 +17,7 @@
  *
  */
 
-package org.apache.kylin.tool.common;
+package org.apache.kylin.tool.extractor;
 
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
@@ -35,20 +35,22 @@ import com.google.common.base.Preconditions;
 
 public class HadoopConfExtractor {
     private static final Logger logger = LoggerFactory.getLogger(HadoopConfExtractor.class);
+    public static final String MR_JOB_HISTORY_URL_CONF_KEY = "mapreduce.jobhistory.webapp.address";
 
     public static String extractYarnMasterUrl(Configuration conf) {
         KylinConfig config = KylinConfig.getInstanceFromEnv();
         final String yarnStatusCheckUrl = config.getYarnStatusCheckUrl();
-        Pattern pattern = Pattern.compile("(http(s)?://)([^:]*):([^/])*.*");
+        Pattern pattern = Pattern.compile("(http[s]*://)([^:]*):([^/])*.*");
         if (yarnStatusCheckUrl != null) {
+            logger.info("Choose user-defined configuration for RM url {}. ", yarnStatusCheckUrl);
             Matcher m = pattern.matcher(yarnStatusCheckUrl);
             if (m.matches()) {
                 return m.group(1) + m.group(2) + ":" + m.group(3);
             }
+        } else {
+            logger.info("kylin.engine.mr.yarn-check-status-url" + " is not set, read from hadoop configuration");
         }
 
-        logger.info("kylin.engine.mr.yarn-check-status-url" + " is not set, read from hadoop configuration");
-
         String webappConfKey, defaultAddr;
         if (YarnConfiguration.useHttps(conf)) {
             webappConfKey = YarnConfiguration.RM_WEBAPP_HTTPS_ADDRESS;
@@ -76,15 +78,15 @@ public class HadoopConfExtractor {
         }
         Matcher m = pattern.matcher(rmWebHost);
         Preconditions.checkArgument(m.matches(), "Yarn master URL not found.");
-        logger.info("yarn master url: " + rmWebHost);
+        logger.info("yarn master url: {}", rmWebHost);
         return rmWebHost;
     }
 
     public static String extractJobHistoryUrl(String yarnWebapp, Configuration conf) {
-        Pattern pattern = Pattern.compile("(http(s)?://)([^:]*):([^/])*.*");
+        Pattern pattern = Pattern.compile("(http[s]*://)([^:]*):([^/])*.*");
         Matcher m = pattern.matcher(yarnWebapp);
         Preconditions.checkArgument(m.matches(), "Yarn master URL" + yarnWebapp + " not right.");
-        return m.group(1)
-                + HAUtil.getConfValueForRMInstance("mapreduce.jobhistory.webapp.address", m.group(2) + ":19888", conf);
+        String defaultHistoryUrl = m.group(2) + ":19888";
+        return m.group(1) + HAUtil.getConfValueForRMInstance(MR_JOB_HISTORY_URL_CONF_KEY, defaultHistoryUrl, conf);
     }
 }
diff --git a/tool/src/main/java/org/apache/kylin/tool/HiveConfigCLI.java b/tool/src/main/java/org/apache/kylin/tool/extractor/HiveConfigCLI.java
similarity index 98%
rename from tool/src/main/java/org/apache/kylin/tool/HiveConfigCLI.java
rename to tool/src/main/java/org/apache/kylin/tool/extractor/HiveConfigCLI.java
index e5b300a..b56f17c 100644
--- a/tool/src/main/java/org/apache/kylin/tool/HiveConfigCLI.java
+++ b/tool/src/main/java/org/apache/kylin/tool/extractor/HiveConfigCLI.java
@@ -16,7 +16,7 @@
  * limitations under the License.
 */
 
-package org.apache.kylin.tool;
+package org.apache.kylin.tool.extractor;
 
 import java.io.File;
 import java.io.IOException;
diff --git a/tool/src/main/java/org/apache/kylin/tool/extractor/JStackExtractor.java b/tool/src/main/java/org/apache/kylin/tool/extractor/JStackExtractor.java
new file mode 100644
index 0000000..7bf5a34
--- /dev/null
+++ b/tool/src/main/java/org/apache/kylin/tool/extractor/JStackExtractor.java
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package org.apache.kylin.tool.extractor;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.kylin.common.util.CliCommandExecutor;
+import org.apache.kylin.common.util.OptionsHelper;
+import org.apache.kylin.common.util.Pair;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Locale;
+
+/**
+ * https://docs.oracle.com/javase/8/docs/technotes/tools/unix/jstack.html
+ */
+public class JStackExtractor extends AbstractInfoExtractor {
+    private static final Logger logger = LoggerFactory.getLogger(JStackExtractor.class);
+
+    public JStackExtractor() {
+        super();
+        packageType = "jstack";
+    }
+
+    protected void executeExtract(OptionsHelper optionsHelper, File exportDir) {
+        try {
+            File logDir = new File(exportDir, "logs");
+            File jstackDumpFile = new File(logDir, String.format(Locale.ROOT, "jstack.log.%s", System.currentTimeMillis()));
+            dumpKylinJStack(jstackDumpFile);
+        } catch (IOException e) {
+            logger.error("IO Error on dump jstack", e);
+        }
+    }
+
+    private static void dumpKylinJStack(File outputFile) throws IOException {
+        String jstackDumpCmd = String.format(Locale.ROOT, "jstack -l %s", getKylinPid());
+        Pair<Integer, String> result = new CliCommandExecutor().execute(jstackDumpCmd, null);
+        FileUtils.writeStringToFile(outputFile, result.getSecond());
+    }
+}
diff --git a/tool/src/main/java/org/apache/kylin/tool/JobInstanceExtractor.java b/tool/src/main/java/org/apache/kylin/tool/extractor/JobInstanceExtractor.java
similarity index 98%
rename from tool/src/main/java/org/apache/kylin/tool/JobInstanceExtractor.java
rename to tool/src/main/java/org/apache/kylin/tool/extractor/JobInstanceExtractor.java
index bd03aaa..7ebc3e0 100644
--- a/tool/src/main/java/org/apache/kylin/tool/JobInstanceExtractor.java
+++ b/tool/src/main/java/org/apache/kylin/tool/extractor/JobInstanceExtractor.java
@@ -16,7 +16,7 @@
  * limitations under the License.
  */
 
-package org.apache.kylin.tool;
+package org.apache.kylin.tool.extractor;
 
 import java.io.File;
 import java.util.List;
@@ -84,7 +84,7 @@ public class JobInstanceExtractor extends AbstractInfoExtractor {
         String cube = optionsHelper.hasOption(OPTION_CUBE) ? optionsHelper.getOptionValue(OPTION_CUBE) : null;
         String project = optionsHelper.hasOption(OPTION_PROJECT) ? optionsHelper.getOptionValue(OPTION_PROJECT) : null;
         int period = optionsHelper.hasOption(OPTION_PERIOD) ? Integer.parseInt(optionsHelper.getOptionValue(OPTION_PERIOD)) : DEFAULT_PERIOD;
-
+        // maybe use start time and end time to instead of period is better
         long endTime = System.currentTimeMillis();
         long startTime = endTime - period * 24 * 3600 * 1000; // time in Millis
         List<JobInstance> jobInstances = listJobInstances(project, cube, startTime, endTime);
diff --git a/tool/src/main/java/org/apache/kylin/tool/KylinLogExtractor.java b/tool/src/main/java/org/apache/kylin/tool/extractor/KylinLogExtractor.java
similarity index 91%
rename from tool/src/main/java/org/apache/kylin/tool/KylinLogExtractor.java
rename to tool/src/main/java/org/apache/kylin/tool/extractor/KylinLogExtractor.java
index a84345b..4ec10e6 100644
--- a/tool/src/main/java/org/apache/kylin/tool/KylinLogExtractor.java
+++ b/tool/src/main/java/org/apache/kylin/tool/extractor/KylinLogExtractor.java
@@ -17,7 +17,7 @@
  *
  */
 
-package org.apache.kylin.tool;
+package org.apache.kylin.tool.extractor;
 
 import java.io.File;
 import java.util.ArrayList;
@@ -32,7 +32,6 @@ import org.apache.kylin.cube.CubeDescManager;
 import org.apache.kylin.cube.CubeManager;
 import org.apache.kylin.metadata.model.DataModelManager;
 import org.apache.kylin.metadata.project.ProjectManager;
-import org.apache.kylin.tool.util.ToolUtil;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -83,11 +82,8 @@ public class KylinLogExtractor extends AbstractInfoExtractor {
         logger.info("Start to extract kylin logs in {} days", logPeriod);
 
         List<File> logDirs = Lists.newArrayList();
-        logDirs.add(new File(KylinConfig.getKylinHome(), "logs"));
-        String kylinVersion = ToolUtil.decideKylinMajorVersionFromCommitFile();
-        if (kylinVersion != null && kylinVersion.equals("1.3")) {
-            logDirs.add(new File(KylinConfig.getKylinHome(), "tomcat/logs"));
-        }
+        logDirs.add(new File(getKylinHome(), "logs"));
+        logDirs.add(new File(getKylinHome(), "tomcat/logs"));
 
         final ArrayList<File> requiredLogFiles = Lists.newArrayList();
         final long logThresholdTime = System.currentTimeMillis() - logPeriod * 24 * 3600 * 1000;
diff --git a/tool/src/main/java/org/apache/kylin/tool/MrJobInfoExtractor.java b/tool/src/main/java/org/apache/kylin/tool/extractor/MrJobInfoExtractor.java
similarity index 97%
rename from tool/src/main/java/org/apache/kylin/tool/MrJobInfoExtractor.java
rename to tool/src/main/java/org/apache/kylin/tool/extractor/MrJobInfoExtractor.java
index 31c0a5c..a4b516d 100644
--- a/tool/src/main/java/org/apache/kylin/tool/MrJobInfoExtractor.java
+++ b/tool/src/main/java/org/apache/kylin/tool/extractor/MrJobInfoExtractor.java
@@ -16,7 +16,7 @@
  * limitations under the License.
 */
 
-package org.apache.kylin.tool;
+package org.apache.kylin.tool.extractor;
 
 import java.io.File;
 import java.io.IOException;
@@ -35,7 +35,6 @@ import org.apache.http.impl.client.DefaultHttpClient;
 import org.apache.http.util.EntityUtils;
 import org.apache.kylin.common.util.HadoopUtil;
 import org.apache.kylin.common.util.OptionsHelper;
-import org.apache.kylin.tool.common.HadoopConfExtractor;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -45,6 +44,9 @@ import com.google.common.base.Preconditions;
 import com.google.common.collect.Maps;
 import com.google.common.collect.Sets;
 
+/**
+ * http://hadoop.apache.org/docs/r2.7.3/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/HistoryServerRest.html
+ */
 public class MrJobInfoExtractor extends AbstractInfoExtractor {
     private static final Logger logger = LoggerFactory.getLogger(MrJobInfoExtractor.class);
 
@@ -83,9 +85,9 @@ public class MrJobInfoExtractor extends AbstractInfoExtractor {
     private String getHttpResponse(String url) {
         DefaultHttpClient client = new DefaultHttpClient();
         String msg = null;
-        int retry_times = 0;
-        while (msg == null && retry_times < HTTP_RETRY) {
-            retry_times++;
+        int retryTimes = 0;
+        while (msg == null && retryTimes < HTTP_RETRY) {
+            retryTimes++;
 
             HttpGet request = new HttpGet(url);
             try {
@@ -93,7 +95,7 @@ public class MrJobInfoExtractor extends AbstractInfoExtractor {
                 HttpResponse response = client.execute(request);
                 msg = EntityUtils.toString(response.getEntity());
             } catch (Exception e) {
-                logger.warn("Failed to fetch http response. Retry={}", retry_times, e);
+                logger.warn("Failed to fetch http response. Retry={}", retryTimes, e);
             } finally {
                 request.releaseConnection();
             }
diff --git a/tool/src/main/java/org/apache/kylin/tool/extractor/SparkEnvInfoExtractor.java b/tool/src/main/java/org/apache/kylin/tool/extractor/SparkEnvInfoExtractor.java
new file mode 100644
index 0000000..6b45548
--- /dev/null
+++ b/tool/src/main/java/org/apache/kylin/tool/extractor/SparkEnvInfoExtractor.java
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package org.apache.kylin.tool.extractor;
+
+import com.google.common.base.Preconditions;
+import org.apache.commons.io.FileUtils;
+import org.apache.kylin.common.KylinConfig;
+import org.apache.kylin.common.util.OptionsHelper;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+
+public class SparkEnvInfoExtractor extends AbstractInfoExtractor {
+    private static final Logger logger = LoggerFactory.getLogger(SparkEnvInfoExtractor.class);
+
+    public SparkEnvInfoExtractor(){
+        super();
+        packageType = "spark";
+    }
+
+    private File getSparkConfDir() {
+        String sparkHome = KylinConfig.getSparkHome();
+        File sparkHomeDir = new File(sparkHome);
+        Preconditions.checkArgument(sparkHomeDir.exists(), "Your SPARK_HOME does not exist.");
+        return new File(sparkHomeDir, "conf");
+    }
+
+    private File getHadoopConfDir() {
+        String hadoopConf = System.getenv("HADOOP_CONF_DIR");
+        // maybe add a user defined env setting in kylin.properties
+        Preconditions.checkNotNull(hadoopConf, "Cannot find HADOOP_CONF_DIR in the environment.");
+        File hadoopConfDir = new File(hadoopConf);
+        Preconditions.checkArgument(hadoopConfDir.exists(), "Your HADOOP_CONF_DIR does not exist: " + hadoopConf);
+        return hadoopConfDir;
+    }
+
+    private void extractConfDir(File from, File to) {
+        File[] confFiles = from.listFiles();
+        if (confFiles != null) {
+            for (File confFile : confFiles) {
+                if (!confFile.getName().endsWith(".template")) {
+                    addFile(confFile, to);
+                }
+            }
+        }
+    }
+
+    @Override
+    protected void executeExtract(OptionsHelper optionsHelper, File exportDir) throws Exception {
+        StringBuilder envStrBuilder = new StringBuilder();
+
+        // extract spark configurations
+        try {
+            File sparkHome = getSparkConfDir();
+            extractConfDir(sparkHome, new File(exportDir, "spark-conf"));
+            envStrBuilder.append("SPARK_HOME=").append(sparkHome.getAbsolutePath()).append("\n");
+        } catch (Exception e) {
+            logger.error("Failed to extract spark conf: error={}", e.getMessage());
+        }
+
+        // extract hadoop configurations for spark
+        try {
+            File hadoopConf = getHadoopConfDir();
+            extractConfDir(hadoopConf, new File(exportDir, "hadoop-conf"));
+            envStrBuilder.append("HADOOP_CONF_DIR=").append(hadoopConf.getAbsolutePath()).append("\n");
+        } catch (Exception e) {
+            logger.error("Failed to extract hadoop conf: error={}", e.getMessage());
+        }
+
+        // extract spark env variables
+        FileUtils.write(new File(exportDir, "env"), envStrBuilder.toString());
+    }
+}
diff --git a/tool/src/main/java/org/apache/kylin/tool/extractor/YarnLogExtractor.java b/tool/src/main/java/org/apache/kylin/tool/extractor/YarnLogExtractor.java
new file mode 100644
index 0000000..658872d
--- /dev/null
+++ b/tool/src/main/java/org/apache/kylin/tool/extractor/YarnLogExtractor.java
@@ -0,0 +1,194 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package org.apache.kylin.tool.extractor;
+
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import org.apache.commons.io.FileUtils;
+import org.apache.kylin.common.KylinConfig;
+import org.apache.kylin.job.constant.ExecutableConstants;
+import org.apache.kylin.job.dao.ExecutableDao;
+import org.apache.kylin.job.dao.ExecutablePO;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * http://hadoop.apache.org/docs/r2.7.3/hadoop-yarn/hadoop-yarn-site/YarnCommands.html#logs
+ */
+public class YarnLogExtractor {
+    private static final Logger logger = LoggerFactory.getLogger(YarnLogExtractor.class);
+    List<String> yarnLogsResources = Lists.newArrayList();
+    private KylinConfig kylinConfig;
+    private ExecutableDao executableDao;
+
+    public void extractYarnLogAndMRJob(String jobId, File yarnLogDir) throws Exception {
+        logger.info("Collecting Yarn logs and MR counters for the Job {}", jobId);
+        kylinConfig = KylinConfig.getInstanceFromEnv();
+        executableDao = ExecutableDao.getInstance(kylinConfig);
+        ExecutablePO executablePO = null;
+        executablePO = executableDao.getJob(jobId);
+
+        if (executablePO == null) {
+            logger.error("Can not find executablePO.");
+            return;
+        }
+
+        for (ExecutablePO task : executablePO.getTasks()) {
+            yarnLogsResources.add(task.getUuid());
+        }
+
+        for (String stepId : yarnLogsResources) {
+            logger.info("Checking step {}", stepId);
+            extractYarnLog(stepId, new File(yarnLogDir, stepId));
+            extractMRJob(stepId, new File(yarnLogDir, stepId));
+        }
+    }
+
+
+    protected void extractMRJob(String taskId, File destDir) {
+        try {
+            final Map<String, String> jobInfo = executableDao.getJobOutput(taskId).getInfo();
+            String jobId = null;
+            if (jobInfo.containsKey(ExecutableConstants.MR_JOB_ID)) {
+                jobId = jobInfo.get(ExecutableConstants.MR_JOB_ID);
+            } else if (taskId.endsWith("00")) {
+                logger.info("Create Intermediate Flat Hive Table's taskId: " + taskId);
+                final String jobContent = executableDao.getJobOutput(taskId).getContent();
+                if (jobContent != null) {
+                    String applicationId = extractApplicationId(jobContent);
+                    if (applicationId != null) {
+                        jobId = applicationId.replace("application", "job");
+                        logger.info("jobId is: " + jobId);
+                    }
+                }
+            }
+
+            if (jobId != null) {
+                FileUtils.forceMkdir(destDir);
+                String[] mrJobArgs = {"-mrJobId", jobId, "-destDir", destDir.getAbsolutePath(), "-compress", "false",
+                        "-submodule", "true"};
+                new MrJobInfoExtractor().execute(mrJobArgs);
+            }
+
+        } catch (Exception e) {
+            logger.error("Failed to extract MRJob .", e);
+        }
+
+    }
+
+    protected void extractYarnLog(String taskId, File destDir) {
+        try {
+            final Map<String, String> jobInfo = executableDao.getJobOutput(taskId).getInfo();
+            FileUtils.forceMkdir(destDir);
+            String appId = null;
+            if (jobInfo.containsKey(ExecutableConstants.MR_JOB_ID)) {
+                appId = jobInfo.get(ExecutableConstants.MR_JOB_ID).replace("job", "application");
+            } else if (jobInfo.containsKey(ExecutableConstants.SPARK_JOB_ID)) {
+                appId = jobInfo.get(ExecutableConstants.SPARK_JOB_ID);
+            }
+
+            if (appId != null) {
+                String applicationId = jobInfo.get(ExecutableConstants.MR_JOB_ID).replace("job", "application");
+                extractYarnLogByApplicationId(applicationId, destDir);
+            } else if (taskId.endsWith("00")) {
+                extractFlatStepInfo(taskId, destDir);
+            }
+        } catch (Exception e) {
+            logger.error("Failed to extract yarn log.", e);
+        }
+
+    }
+
+    private void extractFlatStepInfo(String taskId, File destDir) {
+        try {
+            logger.info("Create Intermediate Flat Hive Table's taskId: " + taskId);
+            final String jobContent = executableDao.getJobOutput(taskId).getContent();
+            if (jobContent != null) {
+                String applicationId = extractApplicationId(jobContent);
+
+                logger.info("applicationId is: " + applicationId);
+                if (applicationId != null && applicationId.startsWith("application")) {
+                    logger.info("Create Intermediate Flat Hive Table's applicationId: " + applicationId);
+                    extractYarnLogByApplicationId(applicationId, destDir);
+                }
+            }
+        } catch (Exception e) {
+            logger.error("Failed to extract FlatStepInfo.", e);
+        }
+    }
+
+    private String extractApplicationId(String jobContent) {
+        Matcher matcher = Pattern.compile("application_[0-9]+[_][0-9]+").matcher(jobContent);
+
+        if (matcher.find()) {
+            return matcher.group(0);
+        }
+        return null;
+    }
+
+    private void extractYarnLogByApplicationId(String applicationId, File destDir) throws Exception {
+        if (shouldDoLogCollection(applicationId, kylinConfig)) {
+            File destFile = new File(destDir, applicationId + ".log");
+            String yarnCmd = "yarn logs -applicationId " + applicationId + " > " + destFile.getAbsolutePath();
+            logger.info(yarnCmd);
+            try {
+                kylinConfig.getCliCommandExecutor().execute(yarnCmd);
+            } catch (Exception ex) {
+                logger.warn("Failed to get yarn logs. ", ex);
+            }
+        } else {
+            logger.info("Skip this application {}.", applicationId);
+        }
+    }
+
+    /**
+     * The log of application which is finished & failed should be collected
+     */
+    public static boolean shouldDoLogCollection(String applicationId, KylinConfig kylinConfig) throws IOException {
+        final String yarnCmd = "yarn application -status " + applicationId;
+        final String cmdOutput = kylinConfig.getCliCommandExecutor().execute(yarnCmd).getSecond();
+        final Map<String, String> params = Maps.newHashMap();
+        final String[] cmdOutputLines = cmdOutput.split("\n");
+        for (String cmdOutputLine : cmdOutputLines) {
+            String[] pair = cmdOutputLine.split(":");
+            if (pair.length >= 2) {
+                params.put(pair[0].trim(), pair[1].trim());
+            }
+        }
+        for (Map.Entry<String, String> e : params.entrySet()) {
+            logger.info("Status of {}  {} : {}", applicationId, e.getKey(), e.getValue());
+        }
+
+        // Skip running application because log agg is not completed
+        if (params.containsKey("State") && params.get("State").equals("RUNNING")) {
+            return false;
+        }
+        // Skip succeed application
+        return params.containsKey("Final-State") && !params.get("Final-State").equals("SUCCEEDED");
+    }
+}
diff --git a/tool/src/main/java/org/apache/kylin/tool/util/ToolUtil.java b/tool/src/main/java/org/apache/kylin/tool/util/ToolUtil.java
index 842beb2..388e569 100644
--- a/tool/src/main/java/org/apache/kylin/tool/util/ToolUtil.java
+++ b/tool/src/main/java/org/apache/kylin/tool/util/ToolUtil.java
@@ -19,7 +19,6 @@
 
 package org.apache.kylin.tool.util;
 
-import com.google.common.collect.Maps;
 import org.apache.commons.lang.StringUtils;
 import org.apache.kylin.common.KylinConfig;
 import org.apache.kylin.common.persistence.ResourceStore;
@@ -28,7 +27,6 @@ import java.io.File;
 import java.io.IOException;
 import java.net.InetAddress;
 import java.net.UnknownHostException;
-import java.util.Map;
 
 public class ToolUtil {
 
@@ -55,17 +53,6 @@ public class ToolUtil {
         return store.getMetaStoreUUID();
     }
 
-    public static String decideKylinMajorVersionFromCommitFile() {
-        Map<String, String> majorVersionCommitMap = Maps.newHashMap();
-        majorVersionCommitMap.put("1.3", "commit.sha1");
-        majorVersionCommitMap.put("1.5", "commit_SHA1");
-        for (Map.Entry<String, String> majorVersionEntry : majorVersionCommitMap.entrySet()) {
-            if (new File(KylinConfig.getKylinHome(), majorVersionEntry.getValue()).exists()) {
-                return majorVersionEntry.getKey();
-            }
-        }
-        return null;
-    }
 
     public static String getHostName() {
         String hostname = System.getenv("COMPUTERNAME");
diff --git a/tool/src/test/java/org/apache/kylin/tool/ClientEnvExtractorTest.java b/tool/src/test/java/org/apache/kylin/tool/ClientEnvExtractorTest.java
index d2b31b0..37e7c17 100644
--- a/tool/src/test/java/org/apache/kylin/tool/ClientEnvExtractorTest.java
+++ b/tool/src/test/java/org/apache/kylin/tool/ClientEnvExtractorTest.java
@@ -19,6 +19,7 @@
 package org.apache.kylin.tool;
 
 import org.apache.kylin.common.util.HBaseMetadataTestCase;
+import org.apache.kylin.tool.extractor.ClientEnvExtractor;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Rule;
@@ -56,7 +57,6 @@ public class ClientEnvExtractorTest extends HBaseMetadataTestCase {
         ClientEnvExtractor executor = new ClientEnvExtractor();
         executor.maxWaitSeconds = 2;
         executor.addShellOutput("sleep 1000", f.getAbsolutePath(), "testTimeout");
-        executor.addShellOutput("pwd", f.getAbsolutePath(), "pwd");
     }
 
     @Test
diff --git a/tool/src/test/java/org/apache/kylin/tool/CubeMetaExtractorTest.java b/tool/src/test/java/org/apache/kylin/tool/CubeMetaExtractorTest.java
index d87bfd8..2343301 100644
--- a/tool/src/test/java/org/apache/kylin/tool/CubeMetaExtractorTest.java
+++ b/tool/src/test/java/org/apache/kylin/tool/CubeMetaExtractorTest.java
@@ -28,6 +28,7 @@ import java.util.Set;
 import org.apache.kylin.common.KylinConfig;
 import org.apache.kylin.common.persistence.ResourceTool;
 import org.apache.kylin.common.util.LocalFileMetadataTestCase;
+import org.apache.kylin.tool.extractor.CubeMetaExtractor;
 import org.junit.After;
 import org.junit.Assert;
 import org.junit.Before;