You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by sh...@apache.org on 2016/04/20 04:39:33 UTC
[1/3] kylin git commit: KYLIN-1566 use a separate kylin_job_conf.xml
for in-mem cubing
Repository: kylin
Updated Branches:
refs/heads/master fbf2b3814 -> dc1671ca5
KYLIN-1566 use a separate kylin_job_conf.xml for in-mem cubing
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/209068b9
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/209068b9
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/209068b9
Branch: refs/heads/master
Commit: 209068b943bf4a90efe4df618e1aaf5cbfe49cde
Parents: 1b54a40
Author: shaofengshi <sh...@apache.org>
Authored: Fri Apr 15 16:11:44 2016 +0800
Committer: shaofengshi <sh...@apache.org>
Committed: Sat Apr 16 09:05:40 2016 +0800
----------------------------------------------------------------------
build/conf/kylin_job_conf_inmem.xml | 98 ++++++++++++++++++++
.../apache/kylin/common/KylinConfigBase.java | 19 ----
.../kylin/job/engine/JobEngineConfig.java | 44 ++++++---
.../kylin/engine/mr/BatchCubingJobBuilder2.java | 3 +-
.../kylin/engine/mr/JobBuilderSupport.java | 13 ++-
.../kylin/engine/mr/steps/InMemCuboidJob.java | 11 ---
.../cardinality/HiveColumnCardinalityJob.java | 2 +-
7 files changed, 140 insertions(+), 50 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kylin/blob/209068b9/build/conf/kylin_job_conf_inmem.xml
----------------------------------------------------------------------
diff --git a/build/conf/kylin_job_conf_inmem.xml b/build/conf/kylin_job_conf_inmem.xml
new file mode 100644
index 0000000..55bf9ed
--- /dev/null
+++ b/build/conf/kylin_job_conf_inmem.xml
@@ -0,0 +1,98 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<configuration>
+
+ <property>
+ <name>mapreduce.job.split.metainfo.maxsize</name>
+ <value>-1</value>
+ <description>The maximum permissible size of the split metainfo file.
+ The JobTracker won't attempt to read split metainfo files bigger than
+ the configured value. No limits if set to -1.
+ </description>
+ </property>
+
+ <property>
+ <name>mapred.compress.map.output</name>
+ <value>true</value>
+ <description>Compress map outputs</description>
+ </property>
+
+ <property>
+ <name>mapred.map.output.compression.codec</name>
+ <value>org.apache.hadoop.io.compress.SnappyCodec</value>
+ <description>The compression codec to use for map outputs
+ </description>
+ </property>
+
+ <property>
+ <name>mapred.output.compress</name>
+ <value>true</value>
+ <description>Compress the output of a MapReduce job</description>
+ </property>
+
+ <property>
+ <name>mapred.output.compression.codec</name>
+ <value>org.apache.hadoop.io.compress.SnappyCodec</value>
+ <description>The compression codec to use for job outputs
+ </description>
+ </property>
+
+ <property>
+ <name>mapred.output.compression.type</name>
+ <value>BLOCK</value>
+ <description>The compression type to use for job outputs</description>
+ </property>
+
+
+ <property>
+ <name>mapreduce.job.max.split.locations</name>
+ <value>2000</value>
+ <description>No description</description>
+ </property>
+
+ <property>
+ <name>dfs.replication</name>
+ <value>2</value>
+ <description>Block replication</description>
+ </property>
+
+ <property>
+ <name>mapred.task.timeout</name>
+ <value>3600000</value>
+ <description>Set task timeout to 1 hour</description>
+ </property>
+
+ <!--Additional config for in-mem cubing, giving mapper more memory -->
+ <property>
+ <name>mapreduce.map.memory.mb</name>
+ <value>3072</value>
+ <description></description>
+ </property>
+
+ <property>
+ <name>mapreduce.map.java.opts</name>
+ <value>-Xmx2700m</value>
+ <description></description>
+ </property>
+
+ <property>
+ <name>mapreduce.task.io.sort.mb</name>
+ <value>200</value>
+ <description></description>
+ </property>
+
+</configuration>
http://git-wip-us.apache.org/repos/asf/kylin/blob/209068b9/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
----------------------------------------------------------------------
diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
index 51aa8aa..4d65c1d 100644
--- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
+++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
@@ -531,25 +531,6 @@ abstract public class KylinConfigBase implements Serializable {
return percent;
}
- public Map<String, String> getCubingInMemMRJobConfOverride() {
- // in-mem cubing requires big memory, however dev env (sandbox) may not have that much
- String defaultOverride = isDevEnv() ? "" : "mapreduce.map.java.opts=-Xmx2700m; mapreduce.map.memory.mb=3072; mapreduce.task.io.sort.mb=200";
- String override = getOptional("kylin.job.cubing.inmem.mrjob_conf_override", defaultOverride);
-
- Map<String, String> result = Maps.newHashMap();
- for (String pair : override.split(";")) {
- int cut = pair.indexOf('=');
- if (cut < 0)
- continue;
- String k = pair.substring(0, cut).trim();
- String v = pair.substring(cut + 1).trim();
- if (k.isEmpty() || v.isEmpty())
- continue;
- result.put(k, v);
- }
- return result;
- }
-
public String getHbaseDefaultCompressionCodec() {
return getOptional("kylin.hbase.default.compression.codec", "");
}
http://git-wip-us.apache.org/repos/asf/kylin/blob/209068b9/core-job/src/main/java/org/apache/kylin/job/engine/JobEngineConfig.java
----------------------------------------------------------------------
diff --git a/core-job/src/main/java/org/apache/kylin/job/engine/JobEngineConfig.java b/core-job/src/main/java/org/apache/kylin/job/engine/JobEngineConfig.java
index 546c033..fb4ce68 100644
--- a/core-job/src/main/java/org/apache/kylin/job/engine/JobEngineConfig.java
+++ b/core-job/src/main/java/org/apache/kylin/job/engine/JobEngineConfig.java
@@ -33,8 +33,10 @@ import org.slf4j.LoggerFactory;
*/
public class JobEngineConfig {
private static final Logger logger = LoggerFactory.getLogger(JobEngineConfig.class);
- public static String HADOOP_JOB_CONF_FILENAME = "kylin_job_conf";
- public static String HIVE_CONF_FILENAME = "kylin_hive_conf";
+ public static final String HADOOP_JOB_CONF_FILENAME = "kylin_job_conf";
+ public static final String HIVE_CONF_FILENAME = "kylin_hive_conf";
+ public static final String DEFAUL_JOB_CONF_SUFFIX = "";
+ public static final String IN_MEM_JOB_CONF_SUFFIX = "inmem";
private static File getJobConfig(String fileName) {
String path = System.getProperty(KylinConfig.KYLIN_CONF);
@@ -49,10 +51,10 @@ public class JobEngineConfig {
return null;
}
- private String getHadoopJobConfFilePath(RealizationCapacity capaticy, boolean appendSuffix) throws IOException {
+ private String getHadoopJobConfFilePath(String suffix, boolean appendSuffix) throws IOException {
String hadoopJobConfFile;
- if (capaticy != null && appendSuffix) {
- hadoopJobConfFile = (HADOOP_JOB_CONF_FILENAME + "_" + capaticy.toString().toLowerCase() + ".xml");
+ if (suffix != null && appendSuffix) {
+ hadoopJobConfFile = (HADOOP_JOB_CONF_FILENAME + "_" + suffix.toLowerCase() + ".xml");
} else {
hadoopJobConfFile = (HADOOP_JOB_CONF_FILENAME + ".xml");
}
@@ -69,19 +71,31 @@ public class JobEngineConfig {
return OptionsHelper.convertToFileURL(jobConfig.getAbsolutePath());
}
- public String getHadoopJobConfFilePath(RealizationCapacity capaticy) throws IOException {
- String path = getHadoopJobConfFilePath(capaticy, true);
- if (!StringUtils.isEmpty(path)) {
- logger.info("Chosen job conf is : " + path);
- return path;
+ /**
+ *
+ * @param suffix job config file suffix name; if be null, will use the default job conf
+ * @return the job config file path
+ * @throws IOException
+ */
+ public String getHadoopJobConfFilePath(String jobType, String capacity) throws IOException {
+ String suffix;
+ if(!StringUtils.isEmpty(jobType)) {
+ suffix = jobType + "_" + capacity;
} else {
- path = getHadoopJobConfFilePath(capaticy, false);
- if (!StringUtils.isEmpty(path)) {
- logger.info("Chosen job conf is : " + path);
- return path;
+ suffix = capacity;
+ }
+ String path = getHadoopJobConfFilePath(suffix, true);
+ if (StringUtils.isEmpty(path)) {
+ path = getHadoopJobConfFilePath(jobType, true);
+ if (StringUtils.isEmpty(path)) {
+ path = getHadoopJobConfFilePath(jobType, false);
+ if (StringUtils.isEmpty(path)) {
+ path = "";
+ }
}
}
- return "";
+ logger.info("Chosen job conf is : " + path);
+ return path;
}
public String getHiveConfFilePath() throws IOException {
http://git-wip-us.apache.org/repos/asf/kylin/blob/209068b9/engine-mr/src/main/java/org/apache/kylin/engine/mr/BatchCubingJobBuilder2.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/BatchCubingJobBuilder2.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/BatchCubingJobBuilder2.java
index 0b1bd90..a1c9cd9 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/BatchCubingJobBuilder2.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/BatchCubingJobBuilder2.java
@@ -30,6 +30,7 @@ import org.apache.kylin.engine.mr.steps.InMemCuboidJob;
import org.apache.kylin.engine.mr.steps.NDCuboidJob;
import org.apache.kylin.engine.mr.steps.SaveStatisticsStep;
import org.apache.kylin.job.constant.ExecutableConstants;
+import org.apache.kylin.job.engine.JobEngineConfig;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -108,7 +109,7 @@ public class BatchCubingJobBuilder2 extends JobBuilderSupport {
MapReduceExecutable cubeStep = new MapReduceExecutable();
StringBuilder cmd = new StringBuilder();
- appendMapReduceParameters(cmd, ((CubeSegment) seg).getCubeDesc().getModel());
+ appendMapReduceParameters(cmd, JobEngineConfig.IN_MEM_JOB_CONF_SUFFIX, ((CubeSegment) seg).getCubeDesc().getModel());
cubeStep.setName(ExecutableConstants.STEP_NAME_BUILD_IN_MEM_CUBE);
http://git-wip-us.apache.org/repos/asf/kylin/blob/209068b9/engine-mr/src/main/java/org/apache/kylin/engine/mr/JobBuilderSupport.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/JobBuilderSupport.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/JobBuilderSupport.java
index c4fc6b9..841c402 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/JobBuilderSupport.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/JobBuilderSupport.java
@@ -21,7 +21,6 @@ package org.apache.kylin.engine.mr;
import java.io.IOException;
import java.util.List;
-import org.apache.kylin.common.KylinConfig;
import org.apache.kylin.cube.CubeSegment;
import org.apache.kylin.engine.mr.common.BatchConstants;
import org.apache.kylin.engine.mr.common.HadoopShellExecutable;
@@ -162,9 +161,17 @@ public class JobBuilderSupport {
return getRealizationRootPath(jobId) + "/secondary_index/";
}
- public void appendMapReduceParameters(StringBuilder buf, DataModelDesc modelDesc) {
+ public void appendMapReduceParameters(StringBuilder buf, DataModelDesc dataModelDesc) {
+ appendMapReduceParameters(buf, JobEngineConfig.DEFAUL_JOB_CONF_SUFFIX, dataModelDesc.getCapacity().toString());
+ }
+
+ public void appendMapReduceParameters(StringBuilder buf, String jobType, DataModelDesc dataModelDesc) {
+ appendMapReduceParameters(buf, jobType, dataModelDesc.getCapacity().toString());
+ }
+
+ public void appendMapReduceParameters(StringBuilder buf, String jobType, String capacity) {
try {
- String jobConf = config.getHadoopJobConfFilePath(modelDesc.getCapacity());
+ String jobConf = config.getHadoopJobConfFilePath(jobType, capacity);
if (jobConf != null && jobConf.length() > 0) {
buf.append(" -conf ").append(jobConf);
}
http://git-wip-us.apache.org/repos/asf/kylin/blob/209068b9/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/InMemCuboidJob.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/InMemCuboidJob.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/InMemCuboidJob.java
index f440b22..e7bbdf1 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/InMemCuboidJob.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/InMemCuboidJob.java
@@ -101,9 +101,6 @@ public class InMemCuboidJob extends AbstractHadoopJob {
job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME));
logger.info("Starting: " + job.getJobName());
- // some special tuning for in-mem MR job
- overrideJobConf(job.getConfiguration(), config);
-
setJobClasspath(job);
// add metadata to distributed cache
@@ -112,8 +109,6 @@ public class InMemCuboidJob extends AbstractHadoopJob {
// set job configuration
job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName);
job.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_NAME, segmentName);
- long timeout = 1000 * 60 * 60L; // 1 hour
- job.getConfiguration().set("mapred.task.timeout", String.valueOf(timeout));
// set input
IMRTableInputFormat flatTableInputFormat = MRUtil.getBatchCubingInputSide(cubeSeg).getFlatTableInputFormat();
@@ -149,12 +144,6 @@ public class InMemCuboidJob extends AbstractHadoopJob {
}
}
- private void overrideJobConf(Configuration jobConf, KylinConfig kylinConfig) {
- for (Entry<String, String> entry : kylinConfig.getCubingInMemMRJobConfOverride().entrySet()) {
- jobConf.set(entry.getKey(), entry.getValue());
- }
- }
-
private int calculateReducerNum(CubeSegment cubeSeg) throws IOException {
KylinConfig kylinConfig = cubeSeg.getConfig();
http://git-wip-us.apache.org/repos/asf/kylin/blob/209068b9/source-hive/src/main/java/org/apache/kylin/source/hive/cardinality/HiveColumnCardinalityJob.java
----------------------------------------------------------------------
diff --git a/source-hive/src/main/java/org/apache/kylin/source/hive/cardinality/HiveColumnCardinalityJob.java b/source-hive/src/main/java/org/apache/kylin/source/hive/cardinality/HiveColumnCardinalityJob.java
index 9162208..3ce0ab2 100644
--- a/source-hive/src/main/java/org/apache/kylin/source/hive/cardinality/HiveColumnCardinalityJob.java
+++ b/source-hive/src/main/java/org/apache/kylin/source/hive/cardinality/HiveColumnCardinalityJob.java
@@ -71,7 +71,7 @@ public class HiveColumnCardinalityJob extends AbstractHadoopJob {
Configuration conf = getConf();
JobEngineConfig jobEngineConfig = new JobEngineConfig(KylinConfig.getInstanceFromEnv());
- conf.addResource(jobEngineConfig.getHadoopJobConfFilePath(null));
+ conf.addResource(jobEngineConfig.getHadoopJobConfFilePath(null, null));
job = Job.getInstance(conf, jobName);
[3/3] kylin git commit: KYLIN-1418 Memory hungry cube should select
LAYER and INMEM cubing smartly
Posted by sh...@apache.org.
KYLIN-1418 Memory hungry cube should select LAYER and INMEM cubing smartly
Signed-off-by: shaofengshi <sh...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/dc1671ca
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/dc1671ca
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/dc1671ca
Branch: refs/heads/master
Commit: dc1671ca509586d7fecb81e24fddab56d5380dae
Parents: 39becdc
Author: shaofengshi <sh...@apache.org>
Authored: Wed Apr 20 10:08:09 2016 +0800
Committer: shaofengshi <sh...@apache.org>
Committed: Wed Apr 20 10:11:16 2016 +0800
----------------------------------------------------------------------
.../org/apache/kylin/engine/mr/steps/SaveStatisticsStep.java | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kylin/blob/dc1671ca/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/SaveStatisticsStep.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/SaveStatisticsStep.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/SaveStatisticsStep.java
index e1070f4..7d5e178 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/SaveStatisticsStep.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/SaveStatisticsStep.java
@@ -92,16 +92,15 @@ public class SaveStatisticsStep extends AbstractExecutable {
} else if (AlgorithmEnum.LAYER.name().equalsIgnoreCase(algPref)) {
alg = AlgorithmEnum.LAYER;
} else {
- boolean memoryHungry = false;
+ int memoryHungryMeasures = 0;
for (MeasureDesc measure : seg.getCubeDesc().getMeasures()) {
if (measure.getFunction().getMeasureType().isMemoryHungry()) {
logger.info("This cube has memory-hungry measure " + measure.getFunction().getExpression());
- memoryHungry = true;
- break;
+ memoryHungryMeasures++;
}
}
- if (memoryHungry == true) {
+ if (memoryHungryMeasures > 4 || (kylinConf.isDevEnv() && memoryHungryMeasures > 0)) {
alg = AlgorithmEnum.LAYER;
} else if ("random".equalsIgnoreCase(algPref)) { // for testing
alg = new Random().nextBoolean() ? AlgorithmEnum.INMEM : AlgorithmEnum.LAYER;
[2/3] kylin git commit: Merge branch 'master' of
https://git-wip-us.apache.org/repos/asf/kylin
Posted by sh...@apache.org.
Merge branch 'master' of https://git-wip-us.apache.org/repos/asf/kylin
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/39becdcd
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/39becdcd
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/39becdcd
Branch: refs/heads/master
Commit: 39becdcd4a5678ab899ab85977ae0ffffc19e541
Parents: 209068b fbf2b38
Author: shaofengshi <sh...@apache.org>
Authored: Wed Apr 20 10:09:55 2016 +0800
Committer: shaofengshi <sh...@apache.org>
Committed: Wed Apr 20 10:09:55 2016 +0800
----------------------------------------------------------------------
README.md | 2 +-
.../apache/kylin/common/KylinConfigBase.java | 10 +-
.../kylin/cube/model/HBaseColumnFamilyDesc.java | 13 +
.../apache/kylin/gridtable/GTScanRequest.java | 3 +-
.../kylin/cube/common/RowKeySplitterTest.java | 6 +-
.../apache/kylin/cube/kv/RowKeyDecoderTest.java | 2 +-
.../apache/kylin/cube/kv/RowKeyEncoderTest.java | 3 +-
.../gridtable/AggregationCacheSpillTest.java | 4 +-
.../test_kylin_cube_with_slr_desc.json | 2 +-
...test_kylin_cube_with_slr_left_join_desc.json | 407 ++--
.../test_kylin_cube_without_slr_desc.json | 2 +-
.../invertedindex/InvertedIndexLocalTest.java | 7 +-
.../apache/kylin/query/ITMassInQueryTest.java | 31 +-
.../query/sql_massin_distinct/query01.sql | 31 +
pom.xml | 2 +-
.../storage/hbase/cube/v2/CubeStorageQuery.java | 2 +-
.../storage/hbase/steps/CubeHTableUtil.java | 9 +-
.../storage/hbase/util/CubeMigrationCLI.java | 3 +-
webapp/app/index.html | 3 +-
webapp/app/js/controllers/cubeEdit.js | 27 +-
webapp/app/js/controllers/cubeMeasures.js | 4 +-
webapp/app/js/controllers/cubeOverwriteProp.js | 71 +
webapp/app/js/controllers/cubeSchema.js | 49 +-
webapp/app/js/directives/select.js | 1926 ++++++++++++++++++
.../cubeDesigner/cubeOverwriteProp.html | 70 +
webapp/app/partials/cubeDesigner/measures.html | 16 +-
26 files changed, 2470 insertions(+), 235 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kylin/blob/39becdcd/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
----------------------------------------------------------------------