You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by qh...@apache.org on 2015/07/27 11:21:53 UTC

[51/52] [abbrv] incubator-kylin git commit: KYLIN-883 Using configurable option for Hive intermediate tables

KYLIN-883 Using configurable option for Hive intermediate tables 


Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/39566040
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/39566040
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/39566040

Branch: refs/heads/0.8
Commit: 39566040870050b356146a2aead43858c8de2eee
Parents: 3d04704
Author: shaofengshi <sh...@apache.org>
Authored: Mon Jul 27 14:47:43 2015 +0800
Committer: shaofengshi <sh...@apache.org>
Committed: Mon Jul 27 14:47:43 2015 +0800

----------------------------------------------------------------------
 conf/kylin.properties                                         | 3 +++
 .../src/main/java/org/apache/kylin/common/KylinConfig.java    | 7 +++++++
 .../org/apache/kylin/engine/mr/steps/MetadataCleanupJob.java  | 6 ++++--
 .../main/java/org/apache/kylin/source/hive/HiveMRInput.java   | 5 ++++-
 4 files changed, 18 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/39566040/conf/kylin.properties
----------------------------------------------------------------------
diff --git a/conf/kylin.properties b/conf/kylin.properties
index e55f49d..7da9a9d 100644
--- a/conf/kylin.properties
+++ b/conf/kylin.properties
@@ -36,6 +36,9 @@ kylin.job.concurrent.max.limit=10
 # Time interval to check hadoop job status
 kylin.job.yarn.app.rest.check.interval.seconds=10
 
+# Hive database name for putting the intermediate flat tables
+kylin.job.hive.database.for.intermediatetable=default
+
 # Whether calculate cube in mem in each mapper;
 kylin.job.cubing.inMem=true
 

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/39566040/core-common/src/main/java/org/apache/kylin/common/KylinConfig.java
----------------------------------------------------------------------
diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfig.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfig.java
index 89a7fcc..1352214 100644
--- a/core-common/src/main/java/org/apache/kylin/common/KylinConfig.java
+++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfig.java
@@ -99,6 +99,8 @@ public class KylinConfig {
 
     public static final String KYLIN_HDFS_WORKING_DIR = "kylin.hdfs.working.dir";
 
+    public static final String HIVE_DATABASE_FOR_INTERMEDIATE_TABLE = "kylin.job.hive.database.for.intermediatetable";
+    
     public static final String HIVE_PASSWORD = "hive.password";
 
     public static final String HIVE_USER = "hive.user";
@@ -657,6 +659,11 @@ public class KylinConfig {
         this.storageUrl = storageUrl;
     }
 
+
+    public String getHiveDatabaseForIntermediateTable() {
+        return this.getOptional(HIVE_DATABASE_FOR_INTERMEDIATE_TABLE, "default");
+    }
+
     public void setRunAsRemoteCommand(String v) {
         kylinConfig.setProperty(KYLIN_JOB_RUN_AS_REMOTE_CMD, v);
     }

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/39566040/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/MetadataCleanupJob.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/MetadataCleanupJob.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/MetadataCleanupJob.java
index 62dd343..c82ceb6 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/MetadataCleanupJob.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/MetadataCleanupJob.java
@@ -20,6 +20,7 @@ package org.apache.kylin.engine.mr.steps;
 
 import com.google.common.collect.Lists;
 
+import com.google.common.collect.Sets;
 import org.apache.commons.cli.Option;
 import org.apache.commons.cli.OptionBuilder;
 import org.apache.commons.cli.Options;
@@ -34,6 +35,7 @@ import org.slf4j.LoggerFactory;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
+import java.util.Set;
 
 /**
  */
@@ -48,7 +50,7 @@ public class MetadataCleanupJob extends AbstractHadoopJob {
 
     private KylinConfig config = null;
 
-    public static final long TIME_THREADSHOLD = 2 * 26 * 3600 * 1000l; // 2 days
+    public static final long TIME_THREADSHOLD = 2 * 24 * 3600 * 1000l; // 2 days
 
     /*
      * (non-Javadoc)
@@ -94,7 +96,7 @@ public class MetadataCleanupJob extends AbstractHadoopJob {
     public void cleanup() throws Exception {
         CubeManager cubeManager = CubeManager.getInstance(config);
 
-        List<String> activeResourceList = Lists.newArrayList();
+        Set<String> activeResourceList = Sets.newHashSet();
         for (org.apache.kylin.cube.CubeInstance cube : cubeManager.listAllCubes()) {
             for (org.apache.kylin.cube.CubeSegment segment : cube.getSegments()) {
                 activeResourceList.addAll(segment.getSnapshotPaths());

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/39566040/source-hive/src/main/java/org/apache/kylin/source/hive/HiveMRInput.java
----------------------------------------------------------------------
diff --git a/source-hive/src/main/java/org/apache/kylin/source/hive/HiveMRInput.java b/source-hive/src/main/java/org/apache/kylin/source/hive/HiveMRInput.java
index c40924e..28e23b9 100644
--- a/source-hive/src/main/java/org/apache/kylin/source/hive/HiveMRInput.java
+++ b/source-hive/src/main/java/org/apache/kylin/source/hive/HiveMRInput.java
@@ -101,6 +101,7 @@ public class HiveMRInput implements IMRInput {
         
         public static AbstractExecutable createFlatHiveTableStep(JobEngineConfig conf, IJoinedFlatTableDesc flatTableDesc, String jobId) {
 
+            final String useDatabaseHql = "USE " + conf.getConfig().getHiveDatabaseForIntermediateTable() + ";";
             final String dropTableHql = JoinedFlatTable.generateDropTableStatement(flatTableDesc);
             final String createTableHql = JoinedFlatTable.generateCreateTableStatement(flatTableDesc, JobBuilderSupport.getJobWorkingDir(conf, jobId));
             String insertDataHqls;
@@ -147,7 +148,9 @@ public class HiveMRInput implements IMRInput {
 
             final String hiveTable = this.getOldHiveTable();
             if (StringUtils.isNotEmpty(hiveTable)) {
-                final String dropHiveCMD = "hive -e \"DROP TABLE IF EXISTS  " + hiveTable + ";\"";
+                final String dropSQL = "USE " + context.getConfig().getHiveDatabaseForIntermediateTable() + ";"
+                        + " DROP TABLE IF EXISTS  " + hiveTable + ";";
+                final String dropHiveCMD = "hive -e \"" + dropSQL + "\"";
                 ShellCmdOutput shellCmdOutput = new ShellCmdOutput();
                 try {
                     context.getConfig().getCliCommandExecutor().execute(dropHiveCMD, shellCmdOutput);