You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by xx...@apache.org on 2020/09/06 15:09:51 UTC
[kylin] 01/01: KYLIN-4660 Remove some module from maven project
This is an automated email from the ASF dual-hosted git repository.
xxyu pushed a commit to branch kylin-on-parquet-v2
in repository https://gitbox.apache.org/repos/asf/kylin.git
commit 97db36c0fc8480b1dbb89bf34eab845e95dc739f
Author: XiaoxiangYu <hi...@126.com>
AuthorDate: Fri Jul 24 22:12:34 2020 +0800
KYLIN-4660 Remove some module from maven project
Some feature is plan to be removed in Kylin 4.X, let us keep the source code clean.
---
assembly/pom.xml | 10 +-
{engine-mr => build-engine}/pom.xml | 17 +-
.../mr/BatchOptimizeJobCheckpointBuilder.java | 90 +
.../apache/kylin/engine/mr/ByteArrayWritable.java | 185 ++
.../java/org/apache/kylin/engine/mr/CubingJob.java | 413 ++++
.../org/apache/kylin/engine/mr/DFSFileTable.java | 112 ++
.../apache/kylin/engine/mr/DFSFileTableReader.java | 253 +++
.../java/org/apache/kylin/engine/mr/IInput.java | 47 +
.../kylin/engine/mr/ILookupMaterializer.java | 40 +
.../java/org/apache/kylin/engine/mr/IMRInput.java | 68 +
.../java/org/apache/kylin/engine/mr/IMROutput.java | 79 +
.../org/apache/kylin/engine/mr/IMROutput2.java | 143 ++
.../apache/kylin/engine/mr/JobBuilderSupport.java | 221 +++
.../org/apache/kylin/engine/mr/KylinMapper.java | 123 ++
.../org/apache/kylin/engine/mr/KylinReducer.java | 123 ++
.../kylin/engine/mr/LookupMaterializeContext.java | 85 +
.../kylin/engine/mr/LookupSnapshotBuildJob.java | 93 +
.../java/org/apache/kylin/engine/mr/MRUtil.java | 94 +
.../kylin/engine/mr/SortedColumnDFSFile.java | 135 ++
.../kylin/engine/mr/SortedColumnDFSFileReader.java | 136 ++
.../kylin/engine/mr/common/AbstractHadoopJob.java | 753 +++++++
.../kylin/engine/mr/common/BatchConstants.java | 128 ++
.../kylin/engine/mr/common/CubeJobLockUtil.java | 39 +-
.../kylin/engine/mr/common/CubeStatsReader.java | 544 +++++
.../kylin/engine/mr/common/CubeStatsWriter.java | 102 +
.../engine/mr/common/CuboidRecommenderUtil.java | 135 ++
.../engine/mr/common/CuboidSchedulerUtil.java | 60 +
.../kylin/engine/mr/common/CuboidShardUtil.java | 57 +
.../engine/mr/common/CuboidStatsReaderUtil.java | 169 ++
.../mr/common/DefaultSslProtocolSocketFactory.java | 150 ++
.../engine/mr/common/DefaultX509TrustManager.java | 114 ++
.../kylin/engine/mr/common/HadoopCmdOutput.java | 159 ++
.../engine/mr/common/HadoopJobStatusChecker.java | 66 +
.../engine/mr/common/HadoopShellExecutable.java | 99 +
.../kylin/engine/mr/common/JobInfoConverter.java | 244 +++
.../kylin/engine/mr/common/JobRelatedMetaUtil.java | 105 +
.../engine/mr/common/MapReduceExecutable.java | 541 +++++
.../kylin/engine/mr/common/MapReduceUtil.java | 164 ++
.../kylin/engine/mr/common/NDCuboidBuilder.java | 117 ++
.../mr/common/SerializableConfiguration.java | 50 +
.../engine/mr/common/StatisticsDecisionUtil.java | 134 ++
.../engine/mr/exception/HadoopShellException.java | 34 +-
.../engine/mr/exception/MapReduceException.java | 30 +-
.../mr/exception/SegmentNotFoundException.java | 33 +-
.../engine/mr/steps/CubingExecutableUtil.java | 159 ++
.../steps/UpdateCubeInfoAfterCheckpointStep.java | 69 +
.../src/test/resources/data/8d_cuboid/part-r-00000 | Bin 0 -> 913974 bytes
.../test/resources/data/base_cuboid/part-r-00000 | Bin 0 -> 243816 bytes
build-engine/src/test/resources/dict/DW_SITES | Bin 0 -> 5020 bytes
.../src/test/resources/multi_file_double/data_0 | 2006 +++++++++++++++++++
.../src/test/resources/multi_file_double/data_1 | 2093 ++++++++++++++++++++
.../src/test/resources/multi_file_double/data_2 | 2006 +++++++++++++++++++
.../src/test/resources/multi_file_double/data_3 | 1947 ++++++++++++++++++
.../src/test/resources/multi_file_double/data_4 | 1948 ++++++++++++++++++
.../test/resources/multi_file_empty_file/empty.txt | 0
.../resources/multi_file_empty_file/has_value.txt | 2 +
.../src/test/resources/multi_file_int/data_0 | 1987 +++++++++++++++++++
.../src/test/resources/multi_file_int/data_1 | 1989 +++++++++++++++++++
.../src/test/resources/multi_file_int/data_2 | 1998 +++++++++++++++++++
.../src/test/resources/multi_file_int/data_3 | 1996 +++++++++++++++++++
.../src/test/resources/multi_file_int/data_4 | 2030 +++++++++++++++++++
.../src/test/resources/multi_file_str/data_0 | 2029 +++++++++++++++++++
.../src/test/resources/multi_file_str/data_1 | 1990 +++++++++++++++++++
.../src/test/resources/multi_file_str/data_2 | 1992 +++++++++++++++++++
.../src/test/resources/multi_file_str/data_3 | 1948 ++++++++++++++++++
.../src/test/resources/multi_file_str/data_4 | 2041 +++++++++++++++++++
build/bin/download-flink.sh | 81 -
build/bin/find-flink-dependency.sh | 52 -
build/script/download-flink.sh | 71 -
.../org/apache/kylin/common/KylinConfigBase.java | 222 ++-
.../java/org/apache/kylin/common/QueryContext.java | 5 +
.../kylin/common/annotation/Clarification.java | 31 +-
.../apache/kylin/common/annotation/ConfigTag.java | 74 +
.../apache/kylin/common/livy/LivyRestBuilder.java | 5 +-
.../java/org/apache/kylin/cube/CubeManager.java | 18 +-
.../kylin/cube/cli/DictionaryGeneratorCLI.java | 6 +-
.../java/org/apache/kylin/cube/model/CubeDesc.java | 18 -
.../apache/kylin/cube/model/SnapshotTableDesc.java | 12 -
.../org/apache/kylin/dict/ShrunkenDictionary.java | 160 --
.../kylin/dict/ShrunkenDictionaryBuilder.java | 49 -
.../dict/lookup/AbstractLookupRowEncoder.java | 122 --
.../kylin/dict/lookup/ExtTableSnapshotInfo.java | 149 --
.../dict/lookup/ExtTableSnapshotInfoManager.java | 213 --
.../kylin/dict/lookup/LookupProviderFactory.java | 82 -
.../dict/lookup/cache/RocksDBLookupBuilder.java | 83 -
.../dict/lookup/cache/RocksDBLookupRowEncoder.java | 70 -
.../dict/lookup/cache/RocksDBLookupTable.java | 116 --
.../dict/lookup/cache/RocksDBLookupTableCache.java | 430 ----
.../apache/kylin/dict/ShrunkenDictionaryTest.java | 144 --
.../lookup/cache/RocksDBLookupRowEncoderTest.java | 80 -
.../lookup/cache/RocksDBLookupTableCacheTest.java | 231 ---
.../dict/lookup/cache/RocksDBLookupTableTest.java | 161 --
.../org/apache/kylin/job/JobBuilderSupport.java | 184 ++
engine-flink/pom.xml | 10 +-
engine-mr/pom.xml | 21 +-
engine-spark/pom.xml | 10 +-
.../kylin/source/hive/ITHiveTableReaderTest.java | 28 +-
.../kylin-spark-classloader/pom.xml | 2 +-
kylin-spark-project/kylin-spark-common/pom.xml | 2 +-
kylin-spark-project/kylin-spark-engine/pom.xml | 40 +-
.../engine/spark/job/CubingExecutableUtil.java | 159 ++
.../apache/kylin/engine/spark/job/CubingJob.java | 365 ++++
.../kylin/engine/spark/job/JobStepFactory.java | 1 -
.../kylin/engine/spark/job/NSparkCubingJob.java | 2 -
.../kylin/engine/spark/job/NSparkMergingJob.java | 2 -
.../NSparkUpdateMetaAndCleanupAfterMergeStep.java | 1 -
.../merger/AfterMergeOrRefreshResourceMerger.java | 2 +-
.../org/apache/spark/conf/rule/SparkConfRule.scala | 8 +-
kylin-spark-project/kylin-spark-metadata/pom.xml | 2 +-
kylin-spark-project/kylin-spark-query/pom.xml | 50 +-
kylin-spark-project/kylin-spark-test/pom.xml | 28 +-
kylin-spark-project/pom.xml | 2 +-
{storage-hbase => metastore-hbase}/pom.xml | 145 +-
.../kylin/storage/hbase/HBaseConnection.java | 411 ++++
.../kylin/storage/hbase/HBaseResourceStore.java | 509 +++++
.../storage/hbase/steps/SandboxMetastoreCLI.java | 73 +
.../src/test/resources/data/8d_cuboid/part-r-00000 | Bin 0 -> 1476517 bytes
.../test/resources/data/base_cuboid/part-r-00000 | Bin 0 -> 394644 bytes
.../test/resources/partition_list/_partition.lst | Bin 0 -> 326 bytes
.../src/test/resources/partition_list/part-r-00000 | Bin 0 -> 15004 bytes
metrics-reporter-hive/pom.xml | 8 -
.../lib/impl}/hive/HiveMetaStoreClientFactory.java | 2 +-
.../kylin/metrics/lib/impl/hive/HiveProducer.java | 1 -
parquet-assembly/pom.xml | 28 +-
pom.xml | 187 +-
server-base/pom.xml | 88 +-
.../kylin/rest/controller/CubeController.java | 140 +-
.../kylin/rest/controller/JobController.java | 40 +-
.../kylin/rest/controller/StreamingController.java | 564 +++---
.../controller/StreamingCoordinatorController.java | 302 ++-
.../rest/controller/StreamingV2Controller.java | 964 +++++----
.../kylin/rest/controller/TableController.java | 62 +-
.../apache/kylin/rest/job/KylinHealthCheckJob.java | 4 +-
.../apache/kylin/rest/job/MetadataCleanupJob.java | 2 +-
.../kylin/rest/job/StorageCleanJobHbaseUtil.java | 35 -
.../apache/kylin/rest/service/BasicService.java | 9 +-
.../org/apache/kylin/rest/service/CubeService.java | 59 +-
.../apache/kylin/rest/service/HBaseInfoUtil.java | 59 +-
.../org/apache/kylin/rest/service/JobService.java | 95 +-
.../kylin/rest/service/KafkaConfigService.java | 129 +-
.../apache/kylin/rest/service/QueryService.java | 27 +-
.../rest/service/StreamingCoordinatorService.java | 173 +-
.../kylin/rest/service/StreamingV2Service.java | 1057 +++++-----
.../apache/kylin/rest/service/TableService.java | 202 +-
.../kylin/rest/job/MetadataCleanupJobTest.java | 2 +-
.../kylin/rest/job/StorageCleanupJobTest.java | 1 -
server/ServiceConfig.md | 37 -
server/pom.xml | 3 +-
.../kylin/rest/service/AdminServiceTest.java | 2 +-
source-jdbc/pom.xml | 10 +-
source-kafka/pom.xml | 10 +-
storage-hbase/pom.xml | 25 +-
storage-stream/pom.xml | 10 +-
stream-coordinator/pom.xml | 10 +-
stream-core/pom.xml | 10 +-
stream-receiver/pom.xml | 10 +-
stream-source-kafka/pom.xml | 10 +-
tool/pom.xml | 19 +-
.../org/apache/kylin/tool/CubeMigrationCLI.java | 9 +-
.../org/apache/kylin/tool/DiagnosisInfoCLI.java | 31 +-
.../kylin/tool/extractor/ClientEnvExtractor.java | 1 -
.../kylin/tool/extractor/CubeMetaExtractor.java | 64 +-
.../kylin/tool/extractor/HBaseUsageExtractor.java | 249 ---
.../kylin/tool/extractor/NamedThreadFactory.java | 34 +-
.../kylin/tool/metrics/systemcube/SCCreator.java | 27 +-
.../streamingv2/StreamingMetadataCreator.java | 100 -
166 files changed, 40804 insertions(+), 5573 deletions(-)
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 4934308..041b2c2 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -23,11 +23,11 @@
<artifactId>kylin-assembly</artifactId>
<name>Apache Kylin - Assembly</name>
- <parent>
- <artifactId>kylin</artifactId>
- <groupId>org.apache.kylin</groupId>
- <version>4.0.0-SNAPSHOT</version>
- </parent>
+<!-- <parent>-->
+<!-- <artifactId>kylin</artifactId>-->
+<!-- <groupId>org.apache.kylin</groupId>-->
+<!-- <version>4.0.0-SNAPSHOT</version>-->
+<!-- </parent>-->
<properties>
<shadeBase>org.apache.kylin.job.shaded</shadeBase>
diff --git a/engine-mr/pom.xml b/build-engine/pom.xml
similarity index 87%
copy from engine-mr/pom.xml
copy to build-engine/pom.xml
index 69ab4ab..a23ec18 100644
--- a/engine-mr/pom.xml
+++ b/build-engine/pom.xml
@@ -20,10 +20,10 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
- <artifactId>kylin-engine-mr</artifactId>
+ <artifactId>kylin-build-engine</artifactId>
<packaging>jar</packaging>
- <name>Apache Kylin - MapReduce Engine</name>
- <description>Apache Kylin - MapReduce Engine</description>
+ <name>Apache Kylin - Build Engine</name>
+ <description>Apache Kylin - Build Engine</description>
<parent>
<groupId>org.apache.kylin</groupId>
@@ -54,10 +54,6 @@
<groupId>org.apache.kylin</groupId>
<artifactId>kylin-core-metrics</artifactId>
</dependency>
- <dependency>
- <groupId>org.apache.kylin</groupId>
- <artifactId>kylin-stream-core</artifactId>
- </dependency>
<!-- Env & Test -->
<dependency>
@@ -113,13 +109,6 @@
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
- <dependency>
- <groupId>org.apache.kylin</groupId>
- <artifactId>kylin-stream-core</artifactId>
- <type>test-jar</type>
- <version>${project.parent.version}</version>
- <scope>test</scope>
- </dependency>
</dependencies>
</project>
diff --git a/build-engine/src/main/java/org/apache/kylin/engine/mr/BatchOptimizeJobCheckpointBuilder.java b/build-engine/src/main/java/org/apache/kylin/engine/mr/BatchOptimizeJobCheckpointBuilder.java
new file mode 100644
index 0000000..9704f66
--- /dev/null
+++ b/build-engine/src/main/java/org/apache/kylin/engine/mr/BatchOptimizeJobCheckpointBuilder.java
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.engine.mr;
+
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.List;
+import java.util.Locale;
+
+import org.apache.kylin.common.KylinConfig;
+import org.apache.kylin.cube.CubeInstance;
+import org.apache.kylin.engine.mr.steps.CubingExecutableUtil;
+import org.apache.kylin.engine.mr.steps.UpdateCubeInfoAfterCheckpointStep;
+import org.apache.kylin.job.constant.ExecutableConstants;
+import org.apache.kylin.job.execution.CheckpointExecutable;
+import org.apache.kylin.metadata.project.ProjectInstance;
+import org.apache.kylin.metadata.project.ProjectManager;
+
+import com.google.common.base.Preconditions;
+
+public class BatchOptimizeJobCheckpointBuilder {
+
+ protected SimpleDateFormat format = new SimpleDateFormat("z yyyy-MM-dd HH:mm:ss", Locale.ROOT);
+
+ final protected CubeInstance cube;
+ final protected String submitter;
+
+ private final IMROutput2.IMRBatchOptimizeOutputSide2 outputSide;
+
+ public BatchOptimizeJobCheckpointBuilder(CubeInstance cube, String submitter) {
+ this.cube = cube;
+ this.submitter = submitter;
+
+ Preconditions.checkNotNull(cube.getFirstSegment(), "Cube " + cube + " is empty!!!");
+ this.outputSide = MRUtil.getBatchOptimizeOutputSide2(cube.getFirstSegment());
+ }
+
+ public CheckpointExecutable build() {
+ KylinConfig kylinConfig = cube.getConfig();
+ List<ProjectInstance> projList = ProjectManager.getInstance(kylinConfig).findProjects(cube.getType(),
+ cube.getName());
+ if (projList == null || projList.size() == 0) {
+ throw new RuntimeException("Cannot find the project containing the cube " + cube.getName() + "!!!");
+ } else if (projList.size() >= 2) {
+ throw new RuntimeException("Find more than one project containing the cube " + cube.getName()
+ + ". It does't meet the uniqueness requirement!!! ");
+ }
+
+ CheckpointExecutable checkpointJob = new CheckpointExecutable();
+ checkpointJob.setSubmitter(submitter);
+ CubingExecutableUtil.setCubeName(cube.getName(), checkpointJob.getParams());
+ checkpointJob.setName(
+ cube.getName() + " - OPTIMIZE CHECKPOINT - " + format.format(new Date(System.currentTimeMillis())));
+ checkpointJob.setDeployEnvName(kylinConfig.getDeployEnv());
+ checkpointJob.setProjectName(projList.get(0).getName());
+
+ // Phase 1: Update cube information
+ checkpointJob.addTask(createUpdateCubeInfoAfterCheckpointStep());
+
+ // Phase 2: Garbage collection
+ outputSide.addStepPhase5_Cleanup(checkpointJob);
+
+ return checkpointJob;
+ }
+
+ private UpdateCubeInfoAfterCheckpointStep createUpdateCubeInfoAfterCheckpointStep() {
+ UpdateCubeInfoAfterCheckpointStep result = new UpdateCubeInfoAfterCheckpointStep();
+ result.setName(ExecutableConstants.STEP_NAME_UPDATE_CUBE_INFO);
+
+ CubingExecutableUtil.setCubeName(cube.getName(), result.getParams());
+ return result;
+ }
+
+}
diff --git a/build-engine/src/main/java/org/apache/kylin/engine/mr/ByteArrayWritable.java b/build-engine/src/main/java/org/apache/kylin/engine/mr/ByteArrayWritable.java
new file mode 100644
index 0000000..62f20b0
--- /dev/null
+++ b/build-engine/src/main/java/org/apache/kylin/engine/mr/ByteArrayWritable.java
@@ -0,0 +1,185 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kylin.engine.mr;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.io.WritableComparator;
+import org.apache.kylin.common.util.Bytes;
+
+public class ByteArrayWritable implements WritableComparable<ByteArrayWritable> {
+
+ private byte[] data;
+ private int offset;
+ private int length;
+
+ public ByteArrayWritable() {
+ this(null, 0, 0);
+ }
+
+ public ByteArrayWritable(int capacity) {
+ this(new byte[capacity], 0, capacity);
+ }
+
+ public ByteArrayWritable(byte[] data) {
+ this(data, 0, data == null ? 0 : data.length);
+ }
+
+ public ByteArrayWritable(byte[] data, int offset, int length) {
+ this.data = data;
+ this.offset = offset;
+ this.length = length;
+ }
+
+ public byte[] array() {
+ return data;
+ }
+
+ public int offset() {
+ return offset;
+ }
+
+ public int length() {
+ return length;
+ }
+
+ public void set(byte[] array) {
+ set(array, 0, array.length);
+ }
+
+ public void set(byte[] array, int offset, int length) {
+ this.data = array;
+ this.offset = offset;
+ this.length = length;
+ }
+
+ public ByteBuffer asBuffer() {
+ if (data == null)
+ return null;
+ else if (offset == 0 && length == data.length)
+ return ByteBuffer.wrap(data);
+ else
+ return ByteBuffer.wrap(data, offset, length).slice();
+ }
+
+ @Override
+ public int hashCode() {
+ if (data == null)
+ return 0;
+ else
+ return Bytes.hashCode(data, offset, length);
+ }
+
+ @Override
+ public void write(DataOutput out) throws IOException {
+ out.writeInt(this.length);
+ out.write(this.data, this.offset, this.length);
+ }
+
+ @Override
+ public void readFields(DataInput in) throws IOException {
+ this.length = in.readInt();
+ this.data = new byte[this.length];
+ in.readFully(this.data, 0, this.length);
+ this.offset = 0;
+ }
+
+ // Below methods copied from BytesWritable
+ /**
+ * Define the sort order of the BytesWritable.
+ * @param that The other bytes writable
+ * @return Positive if left is bigger than right, 0 if they are equal, and
+ * negative if left is smaller than right.
+ */
+ public int compareTo(ByteArrayWritable that) {
+ return WritableComparator.compareBytes(this.data, this.offset, this.length, that.data, that.offset,
+ that.length);
+ }
+
+ /**
+ * Compares the bytes in this object to the specified byte array
+ * @param that
+ * @return Positive if left is bigger than right, 0 if they are equal, and
+ * negative if left is smaller than right.
+ */
+ public int compareToByteArray(final byte[] that) {
+ return WritableComparator.compareBytes(this.data, this.offset, this.length, that, 0, that.length);
+ }
+
+ /**
+ * @see java.lang.Object#equals(java.lang.Object)
+ */
+ @Override
+ public boolean equals(Object other) {
+ if (other instanceof byte[]) {
+ return compareToByteArray((byte[]) other) == 0;
+ }
+ if (other instanceof ByteArrayWritable) {
+ return compareTo((ByteArrayWritable) other) == 0;
+ }
+ return false;
+ }
+
+ /**
+ * @see java.lang.Object#toString()
+ */
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder(3 * this.length);
+ final int endIdx = this.offset + this.length;
+ for (int idx = this.offset; idx < endIdx; idx++) {
+ sb.append(' ');
+ String num = Integer.toHexString(0xff & this.data[idx]);
+ // if it is only one digit, add a leading 0.
+ if (num.length() < 2) {
+ sb.append('0');
+ }
+ sb.append(num);
+ }
+ return sb.length() > 0 ? sb.substring(1) : "";
+ }
+
+ /** A Comparator optimized for ByteArrayWritable.
+ */
+ public static class Comparator extends WritableComparator {
+ private BytesWritable.Comparator instance = new BytesWritable.Comparator();
+
+ /** constructor */
+ public Comparator() {
+ super(ByteArrayWritable.class);
+ }
+
+ /**
+ * @see org.apache.hadoop.io.WritableComparator#compare(byte[], int, int, byte[], int, int)
+ */
+ @Override
+ public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
+ return instance.compare(b1, s1, l1, b2, s2, l2);
+ }
+ }
+
+ static { // register this comparator
+ WritableComparator.define(ByteArrayWritable.class, new Comparator());
+ }
+}
diff --git a/build-engine/src/main/java/org/apache/kylin/engine/mr/CubingJob.java b/build-engine/src/main/java/org/apache/kylin/engine/mr/CubingJob.java
new file mode 100644
index 0000000..dc6350f
--- /dev/null
+++ b/build-engine/src/main/java/org/apache/kylin/engine/mr/CubingJob.java
@@ -0,0 +1,413 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.engine.mr;
+
+import java.io.IOException;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.TimeZone;
+import java.util.regex.Matcher;
+
+import com.google.common.collect.Lists;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.kylin.common.KylinConfig;
+import org.apache.kylin.common.util.HadoopUtil;
+import org.apache.kylin.common.util.Pair;
+import org.apache.kylin.common.util.StringUtil;
+import org.apache.kylin.cube.CubeInstance;
+import org.apache.kylin.cube.CubeManager;
+import org.apache.kylin.cube.CubeSegment;
+import org.apache.kylin.cube.cuboid.CuboidScheduler;
+import org.apache.kylin.engine.mr.common.CubeStatsReader;
+import org.apache.kylin.engine.mr.common.MapReduceExecutable;
+import org.apache.kylin.engine.mr.steps.CubingExecutableUtil;
+import org.apache.kylin.job.constant.ExecutableConstants;
+import org.apache.kylin.job.engine.JobEngineConfig;
+import org.apache.kylin.job.execution.AbstractExecutable;
+import org.apache.kylin.job.execution.DefaultChainedExecutable;
+import org.apache.kylin.job.execution.ExecutableContext;
+import org.apache.kylin.job.execution.ExecutableState;
+import org.apache.kylin.job.execution.ExecuteResult;
+import org.apache.kylin.job.execution.Output;
+import org.apache.kylin.job.metrics.JobMetricsFacade;
+import org.apache.kylin.job.util.MailNotificationUtil;
+import org.apache.kylin.metadata.project.ProjectInstance;
+import org.apache.kylin.metadata.project.ProjectManager;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.base.Preconditions;
+import com.google.common.base.Strings;
+import com.google.common.collect.Maps;
+
+/**
+ */
+public class CubingJob extends DefaultChainedExecutable {
+
+ private static final Logger logger = LoggerFactory.getLogger(CubingJob.class);
+
+ public enum AlgorithmEnum {
+ LAYER, INMEM
+ }
+ public enum CubingJobTypeEnum {
+ BUILD("BUILD", 20), OPTIMIZE("OPTIMIZE", 5), MERGE("MERGE", 25), STREAM("STREAM", 30);
+
+ private final String name;
+ private final int defaultPriority;
+
+ CubingJobTypeEnum(String name, int priority) {
+ this.name = name;
+ this.defaultPriority = priority;
+ }
+
+ public int getDefaultPriority() {
+ return defaultPriority;
+ }
+
+ public String toString() {
+ return name;
+ }
+
+ public static CubingJobTypeEnum getByName(String name) {
+ if (Strings.isNullOrEmpty(name)) {
+ return null;
+ }
+ for (CubingJobTypeEnum jobTypeEnum : CubingJobTypeEnum.values()) {
+ if (jobTypeEnum.name.equals(name.toUpperCase(Locale.ROOT))) {
+ return jobTypeEnum;
+ }
+ }
+ return null;
+ }
+ }
+
+ //32MB per block created by the first step
+ private static final long MIN_SOURCE_SIZE = 33554432L;
+
+ // KEYS of Output.extraInfo map, info passed across job steps
+ public static final String SOURCE_RECORD_COUNT = "sourceRecordCount";
+ public static final String SOURCE_SIZE_BYTES = "sourceSizeBytes";
+ public static final String CUBE_SIZE_BYTES = "byteSizeBytes";
+ public static final String MAP_REDUCE_WAIT_TIME = "mapReduceWaitTime";
+ private static final String DEPLOY_ENV_NAME = "envName";
+ private static final String JOB_TYPE = "jobType";
+ private static final String SEGMENT_NAME = "segmentName";
+
+ public static CubingJob createBuildJob(CubeSegment seg, String submitter, JobEngineConfig config) {
+ return initCubingJob(seg, CubingJobTypeEnum.BUILD.toString(), submitter, config);
+ }
+
+ public static CubingJob createOptimizeJob(CubeSegment seg, String submitter, JobEngineConfig config) {
+ return initCubingJob(seg, CubingJobTypeEnum.OPTIMIZE.toString(), submitter, config);
+ }
+
+ public static CubingJob createMergeJob(CubeSegment seg, String submitter, JobEngineConfig config) {
+ return initCubingJob(seg, CubingJobTypeEnum.MERGE.toString(), submitter, config);
+ }
+
+ public static CubingJob createStreamJob(CubeSegment seg, String submitter, JobEngineConfig config) {
+ return initCubingJob(seg, CubingJobTypeEnum.STREAM.toString(), submitter, config);
+ }
+
+ private static CubingJob initCubingJob(CubeSegment seg, String jobType, String submitter, JobEngineConfig config) {
+ KylinConfig kylinConfig = config.getConfig();
+ CubeInstance cube = seg.getCubeInstance();
+ List<ProjectInstance> projList = ProjectManager.getInstance(kylinConfig).findProjects(cube.getType(),
+ cube.getName());
+ if (projList == null || projList.size() == 0) {
+ throw new RuntimeException("Cannot find the project containing the cube " + cube.getName() + "!!!");
+ } else if (projList.size() >= 2) {
+ String msg = "Find more than one project containing the cube " + cube.getName()
+ + ". It does't meet the uniqueness requirement!!! ";
+ if (!config.getConfig().allowCubeAppearInMultipleProjects()) {
+ throw new RuntimeException(msg);
+ } else {
+ logger.warn(msg);
+ }
+ }
+
+ CubingJob result = new CubingJob();
+ SimpleDateFormat format = new SimpleDateFormat("z yyyy-MM-dd HH:mm:ss", Locale.ROOT);
+ format.setTimeZone(TimeZone.getTimeZone(config.getTimeZone()));
+ result.setDeployEnvName(kylinConfig.getDeployEnv());
+ result.setProjectName(projList.get(0).getName());
+ result.setJobType(jobType);
+ CubingExecutableUtil.setCubeName(seg.getCubeInstance().getName(), result.getParams());
+ CubingExecutableUtil.setSegmentId(seg.getUuid(), result.getParams());
+ CubingExecutableUtil.setSegmentName(seg.getName(), result.getParams());
+ result.setName(jobType + " CUBE - " + seg.getCubeInstance().getDisplayName() + " - " + seg.getName() + " - "
+ + format.format(new Date(System.currentTimeMillis())));
+ result.setSubmitter(submitter);
+ result.setNotifyList(seg.getCubeInstance().getDescriptor().getNotifyList());
+ return result;
+ }
+
+ public CubingJob() {
+ super();
+ }
+
+ @Override
+ public int getDefaultPriority() {
+ CubingJobTypeEnum jobType = CubingJobTypeEnum.getByName(getJobType());
+ if (jobType == null) {
+ return super.getDefaultPriority();
+ }
+ return jobType.getDefaultPriority();
+ }
+
+ protected void setDeployEnvName(String name) {
+ setParam(DEPLOY_ENV_NAME, name);
+ }
+
+ public String getDeployEnvName() {
+ return getParam(DEPLOY_ENV_NAME);
+ }
+
+ public String getJobType() {
+ return getParam(JOB_TYPE);
+ }
+
+ public String getSegmentName() {
+ return getParam(SEGMENT_NAME);
+ }
+
+ void setJobType(String jobType) {
+ setParam(JOB_TYPE, jobType);
+ }
+
+ @Override
+ protected Pair<String, String> formatNotifications(ExecutableContext context, ExecutableState state) {
+ CubeInstance cubeInstance = CubeManager.getInstance(context.getConfig())
+ .getCube(CubingExecutableUtil.getCubeName(this.getParams()));
+ final Output output = getManager().getOutput(getId());
+ if (state != ExecutableState.ERROR
+ && !cubeInstance.getDescriptor().getStatusNeedNotify().contains(state.toString())) {
+ logger.info("state:" + state + " no need to notify users");
+ return null;
+ }
+
+ if (!MailNotificationUtil.hasMailNotification(state)) {
+ logger.info("Cannot find email template for job state: " + state);
+ return null;
+ }
+
+ Map<String, Object> dataMap = Maps.newHashMap();
+ dataMap.put("job_name", getName());
+ dataMap.put("env_name", getDeployEnvName());
+ dataMap.put("submitter", StringUtil.noBlank(getSubmitter(), "missing submitter"));
+ dataMap.put("job_engine", MailNotificationUtil.getLocalHostName());
+ dataMap.put("project_name", getProjectName());
+ dataMap.put("cube_name", cubeInstance.getName());
+ dataMap.put("source_records_count", String.valueOf(findSourceRecordCount()));
+ dataMap.put("start_time", new Date(getStartTime()).toString());
+ dataMap.put("duration", getDuration() / 60000 + "mins");
+ dataMap.put("mr_waiting", getMapReduceWaitTime() / 60000 + "mins");
+ dataMap.put("last_update_time", new Date(getLastModified()).toString());
+
+ if (state == ExecutableState.ERROR) {
+ AbstractExecutable errorTask = null;
+ Output errorOutput = null;
+ for (AbstractExecutable task : getTasks()) {
+ errorOutput = getManager().getOutput(task.getId());
+ if (errorOutput.getState() == ExecutableState.ERROR) {
+ errorTask = task;
+ break;
+ }
+ }
+ Preconditions.checkNotNull(errorTask,
+ "None of the sub tasks of cubing job " + getId() + " is error and this job should become success.");
+
+ dataMap.put("error_step", errorTask.getName());
+ if (errorTask instanceof MapReduceExecutable) {
+ final String mrJobId = errorOutput.getExtra().get(ExecutableConstants.MR_JOB_ID);
+ dataMap.put("mr_job_id", StringUtil.noBlank(mrJobId, "Not initialized"));
+ } else {
+ dataMap.put("mr_job_id", MailNotificationUtil.NA);
+ }
+ dataMap.put("error_log",
+ Matcher.quoteReplacement(StringUtil.noBlank(output.getVerboseMsg(), "no error message")));
+ }
+
+ String content = MailNotificationUtil.getMailContent(state, dataMap);
+ String title = MailNotificationUtil.getMailTitle("JOB", state.toString(), getDeployEnvName(), getProjectName(),
+ cubeInstance.getName());
+ return Pair.newPair(title, content);
+ }
+
+ @Override
+ protected void onExecuteStart(ExecutableContext executableContext) {
+ KylinConfig.setAndUnsetThreadLocalConfig(getCubeSpecificConfig());
+ super.onExecuteStart(executableContext);
+ }
+
+ @Override
+ protected void onExecuteFinished(ExecuteResult result, ExecutableContext executableContext) {
+ long time = 0L;
+ for (AbstractExecutable task : getTasks()) {
+ final ExecutableState status = task.getStatus();
+ if (status != ExecutableState.SUCCEED) {
+ break;
+ }
+ if (task instanceof MapReduceExecutable) {
+ time += ((MapReduceExecutable) task).getMapReduceWaitTime();
+ }
+ }
+ setMapReduceWaitTime(time);
+ super.onExecuteFinished(result, executableContext);
+ }
+
+ protected void onStatusChange(ExecutableContext context, ExecuteResult result, ExecutableState state) {
+ super.onStatusChange(context, result, state);
+
+ updateMetrics(context, result, state);
+ }
+
+ protected void updateMetrics(ExecutableContext context, ExecuteResult result, ExecutableState state) {
+ JobMetricsFacade.JobStatisticsResult jobStats = new JobMetricsFacade.JobStatisticsResult();
+ jobStats.setWrapper(getSubmitter(), getProjectName(), CubingExecutableUtil.getCubeName(getParams()), getId(),
+ getJobType(), getAlgorithm() == null ? "NULL" : getAlgorithm().toString());
+
+ if (state == ExecutableState.SUCCEED) {
+ jobStats.setJobStats(findSourceSizeBytes(), findCubeSizeBytes(), getDuration(), getMapReduceWaitTime(),
+ getPerBytesTimeCost(findSourceSizeBytes(), getDuration()));
+ if (CubingJobTypeEnum.getByName(getJobType()) == CubingJobTypeEnum.BUILD) {
+ jobStats.setJobStepStats(getTaskDurationByName(ExecutableConstants.STEP_NAME_FACT_DISTINCT_COLUMNS),
+ getTaskDurationByName(ExecutableConstants.STEP_NAME_BUILD_DICTIONARY),
+ getTaskDurationByName(ExecutableConstants.STEP_NAME_BUILD_IN_MEM_CUBE),
+ getTaskDurationByName(ExecutableConstants.STEP_NAME_CONVERT_CUBOID_TO_HFILE));
+ }
+ } else if (state == ExecutableState.ERROR) {
+ jobStats.setJobException(result.getThrowable() != null ? result.getThrowable() : new Exception());
+ }
+ JobMetricsFacade.updateMetrics(jobStats);
+ }
+
+ private long getTaskDurationByName(String name) {
+ AbstractExecutable task = getTaskByName(name);
+ if (task != null) {
+ return task.getDuration();
+ } else {
+ return 0;
+ }
+ }
+
+ private static double getPerBytesTimeCost(long size, long timeCost) {
+ if (size <= 0) {
+ return 0;
+ }
+ if (size < MIN_SOURCE_SIZE) {
+ size = MIN_SOURCE_SIZE;
+ }
+ return timeCost * 1.0 / size;
+ }
+
+ public void setAlgorithm(AlgorithmEnum alg) {
+ addExtraInfo("algorithm", alg.name());
+ }
+
+ public AlgorithmEnum getAlgorithm() {
+ String alg = getExtraInfo().get("algorithm");
+ return alg == null ? null : AlgorithmEnum.valueOf(alg);
+ }
+
+ public boolean isLayerCubing() {
+ return AlgorithmEnum.LAYER == getAlgorithm();
+ }
+
+ public boolean isInMemCubing() {
+ return AlgorithmEnum.INMEM == getAlgorithm();
+ }
+
+ public long findSourceRecordCount() {
+ return Long.parseLong(findExtraInfo(SOURCE_RECORD_COUNT, "0"));
+ }
+
+ public long findSourceSizeBytes() {
+ return Long.parseLong(findExtraInfo(SOURCE_SIZE_BYTES, "0"));
+ }
+
+ public long findCubeSizeBytes() {
+ // look for the info BACKWARD, let the last step that claims the cube size win
+ return Long.parseLong(findExtraInfoBackward(CUBE_SIZE_BYTES, "0"));
+ }
+
+ public List<Double> findEstimateRatio(CubeSegment seg, KylinConfig config) {
+ CubeInstance cubeInstance = seg.getCubeInstance();
+ CuboidScheduler cuboidScheduler = cubeInstance.getCuboidScheduler();
+ List<List<Long>> layeredCuboids = cuboidScheduler.getCuboidsByLayer();
+ int totalLevels = cuboidScheduler.getBuildLevel();
+
+ List<Double> result = Lists.newArrayList();
+
+ Map<Long, Double> estimatedSizeMap;
+
+ String cuboidRootPath = getCuboidRootPath(seg, config);
+
+ try {
+ estimatedSizeMap = new CubeStatsReader(seg, config).getCuboidSizeMap(true);
+ } catch (IOException e) {
+ logger.warn("Cannot get segment {} estimated size map", seg.getName());
+
+ return null;
+ }
+
+ for (int level = 0; level <= totalLevels; level++) {
+ double levelEstimatedSize = 0;
+ for (Long cuboidId : layeredCuboids.get(level)) {
+ levelEstimatedSize += estimatedSizeMap.get(cuboidId) == null ? 0.0 : estimatedSizeMap.get(cuboidId);
+ }
+
+ double levelRealSize = getRealSizeByLevel(cuboidRootPath, level);
+
+ if (levelEstimatedSize == 0.0 || levelRealSize == 0.0){
+ result.add(level, -1.0);
+ } else {
+ result.add(level, levelRealSize / levelEstimatedSize);
+ }
+ }
+
+ return result;
+ }
+
+
+ private double getRealSizeByLevel(String rootPath, int level) {
+ try {
+ String levelPath = JobBuilderSupport.getCuboidOutputPathsByLevel(rootPath, level);
+ FileSystem fs = HadoopUtil.getFileSystem(levelPath);
+ return fs.getContentSummary(new Path(levelPath)).getLength() / (1024L * 1024L);
+ } catch (Exception e) {
+ logger.warn("get level real size failed." + e);
+ return 0L;
+ }
+ }
+
+ private String getCuboidRootPath(CubeSegment seg, KylinConfig kylinConfig) {
+ String rootDir = kylinConfig.getHdfsWorkingDirectory();
+ if (!rootDir.endsWith("/")) {
+ rootDir = rootDir + "/";
+ }
+ String jobID = this.getId();
+ return rootDir + "kylin-" + jobID + "/" + seg.getRealization().getName() + "/cuboid/";
+ }
+
+}
diff --git a/build-engine/src/main/java/org/apache/kylin/engine/mr/DFSFileTable.java b/build-engine/src/main/java/org/apache/kylin/engine/mr/DFSFileTable.java
new file mode 100644
index 0000000..c036445
--- /dev/null
+++ b/build-engine/src/main/java/org/apache/kylin/engine/mr/DFSFileTable.java
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.engine.mr;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.kylin.common.util.HadoopUtil;
+import org.apache.kylin.common.util.Pair;
+import org.apache.kylin.source.IReadableTable;
+
+/**
+ */
+public class DFSFileTable implements IReadableTable {
+
+ public static final String DELIM_AUTO = "auto";
+ public static final String DELIM_COMMA = ",";
+
+ String path;
+ String delim;
+ int nColumns;
+
+ public DFSFileTable(String path, int nColumns) {
+ this(path, DELIM_AUTO, nColumns);
+ }
+
+ public DFSFileTable(String path, String delim, int nColumns) {
+ this.path = path;
+ this.delim = delim;
+ this.nColumns = nColumns;
+ }
+
+ public String getColumnDelimeter() {
+ return delim;
+ }
+
+ @Override
+ public TableReader getReader() throws IOException {
+ return new DFSFileTableReader(path, delim, nColumns);
+ }
+
+ @Override
+ public TableSignature getSignature() throws IOException {
+ Pair<Long, Long> sizeAndLastModified;
+ try {
+ sizeAndLastModified = getSizeAndLastModified(path);
+ } catch (FileNotFoundException ex) {
+ sizeAndLastModified = Pair.newPair(-1L, 0L);
+ }
+ return new TableSignature(path, sizeAndLastModified.getFirst(), sizeAndLastModified.getSecond());
+ }
+
+ @Override
+ public boolean exists() throws IOException {
+ try {
+ getSizeAndLastModified(path);
+ return true;
+ } catch (FileNotFoundException ex) {
+ return false;
+ }
+ }
+
+ @Override
+ public String toString() {
+ return path;
+ }
+
+ public static Pair<Long, Long> getSizeAndLastModified(String path) throws IOException {
+ FileSystem fs = HadoopUtil.getFileSystem(path);
+
+ // get all contained files if path is directory
+ ArrayList<FileStatus> allFiles = new ArrayList<>();
+ FileStatus status = fs.getFileStatus(new Path(path));
+ if (status.isFile()) {
+ allFiles.add(status);
+ } else {
+ FileStatus[] listStatus = fs.listStatus(new Path(path));
+ allFiles.addAll(Arrays.asList(listStatus));
+ }
+
+ long size = 0;
+ long lastModified = 0;
+ for (FileStatus file : allFiles) {
+ size += file.getLen();
+ lastModified = Math.max(lastModified, file.getModificationTime());
+ }
+
+ return Pair.newPair(size, lastModified);
+ }
+
+}
diff --git a/build-engine/src/main/java/org/apache/kylin/engine/mr/DFSFileTableReader.java b/build-engine/src/main/java/org/apache/kylin/engine/mr/DFSFileTableReader.java
new file mode 100644
index 0000000..0c9c3fc
--- /dev/null
+++ b/build-engine/src/main/java/org/apache/kylin/engine/mr/DFSFileTableReader.java
@@ -0,0 +1,253 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.engine.mr;
+
+import java.io.BufferedReader;
+import java.io.Closeable;
+import java.io.EOFException;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang.StringEscapeUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.SequenceFile.Reader;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.util.ReflectionUtils;
+import org.apache.kylin.common.util.Bytes;
+import org.apache.kylin.common.util.HadoopUtil;
+import org.apache.kylin.common.util.StringSplitter;
+import org.apache.kylin.source.IReadableTable.TableReader;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Tables are typically CSV or SEQ file.
+ *
+ * @author yangli9
+ */
+public class DFSFileTableReader implements TableReader {
+
+ private static final Logger logger = LoggerFactory.getLogger(DFSFileTableReader.class);
+ private static final char CSV_QUOTE = '"';
+ private static final String[] DETECT_DELIMS = new String[] { "\177", "|", "\t", "," };
+
+ private String filePath;
+ private String delim;
+ private List<RowReader> readerList;
+
+ private String curLine;
+ private String[] curColumns;
+ private int expectedColumnNumber = -1; // helps delimiter detection
+
+ public DFSFileTableReader(String filePath, int expectedColumnNumber) throws IOException {
+ this(filePath, DFSFileTable.DELIM_AUTO, expectedColumnNumber);
+ }
+
+ public DFSFileTableReader(String filePath, String delim, int expectedColumnNumber) throws IOException {
+ filePath = HadoopUtil.fixWindowsPath(filePath);
+ this.filePath = filePath;
+ this.delim = delim;
+ this.expectedColumnNumber = expectedColumnNumber;
+ this.readerList = new ArrayList<RowReader>();
+
+ FileSystem fs = HadoopUtil.getFileSystem(filePath);
+
+ ArrayList<FileStatus> allFiles = new ArrayList<>();
+ FileStatus status = fs.getFileStatus(new Path(filePath));
+ if (status.isFile()) {
+ allFiles.add(status);
+ } else {
+ FileStatus[] listStatus = fs.listStatus(new Path(filePath));
+ allFiles.addAll(Arrays.asList(listStatus));
+ }
+
+ try {
+ for (FileStatus f : allFiles) {
+ RowReader rowReader = new SeqRowReader(HadoopUtil.getCurrentConfiguration(), f.getPath().toString());
+ this.readerList.add(rowReader);
+ }
+ } catch (IOException e) {
+ if (isExceptionSayingNotSeqFile(e) == false)
+ throw e;
+
+ this.readerList = new ArrayList<RowReader>();
+ for (FileStatus f : allFiles) {
+ RowReader rowReader = new CsvRowReader(fs, f.getPath().toString());
+ this.readerList.add(rowReader);
+ }
+ }
+ }
+
+ private boolean isExceptionSayingNotSeqFile(IOException e) {
+ if (e.getMessage() != null && e.getMessage().contains("not a SequenceFile"))
+ return true;
+
+ if (e instanceof EOFException) // in case the file is very very small
+ return true;
+
+ return false;
+ }
+
+ @Override
+ public boolean next() throws IOException {
+ int curReaderIndex = -1;
+ RowReader curReader;
+
+ while (++curReaderIndex < readerList.size()) {
+ curReader = readerList.get(curReaderIndex);
+ curLine = curReader.nextLine();
+ curColumns = null;
+
+ if (curLine != null) {
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ public String getLine() {
+ return curLine;
+ }
+
+ @Override
+ public String[] getRow() {
+ if (curColumns == null) {
+ if (DFSFileTable.DELIM_AUTO.equals(delim))
+ delim = autoDetectDelim(curLine);
+
+ if (delim == null)
+ curColumns = new String[] { curLine };
+ else
+ curColumns = split(curLine, delim);
+ }
+ return curColumns;
+ }
+
+ private String[] split(String line, String delim) {
+ // FIXME CVS line should be parsed considering escapes
+ String[] str = StringSplitter.split(line, delim);
+
+ // un-escape CSV
+ if (DFSFileTable.DELIM_COMMA.equals(delim)) {
+ for (int i = 0; i < str.length; i++) {
+ str[i] = unescapeCsv(str[i]);
+ }
+ }
+
+ return str;
+ }
+
+ private String unescapeCsv(String str) {
+ if (str == null || str.length() < 2)
+ return str;
+
+ str = StringEscapeUtils.unescapeCsv(str);
+
+ // unescapeCsv may not remove the outer most quotes
+ if (str.charAt(0) == CSV_QUOTE && str.charAt(str.length() - 1) == CSV_QUOTE)
+ str = str.substring(1, str.length() - 1);
+
+ return str;
+ }
+
+ @Override
+ public void close() {
+ for (RowReader reader : readerList) {
+ IOUtils.closeQuietly(reader);
+ }
+ }
+
+ private String autoDetectDelim(String line) {
+ if (expectedColumnNumber > 0) {
+ for (String delim : DETECT_DELIMS) {
+ if (StringSplitter.split(line, delim).length == expectedColumnNumber) {
+ logger.info("Auto detect delim to be '" + delim + "', split line to " + expectedColumnNumber + " columns -- " + line);
+ return delim;
+ }
+ }
+ }
+
+ logger.info("Auto detect delim to be null, will take THE-WHOLE-LINE as a single value, for " + filePath);
+ return null;
+ }
+
+ // ============================================================================
+
+ private interface RowReader extends Closeable {
+ String nextLine() throws IOException; // return null on EOF
+ }
+
+ private class SeqRowReader implements RowReader {
+ Reader reader;
+ Writable key;
+ Text value;
+
+ SeqRowReader(Configuration hconf, String path) throws IOException {
+ reader = new Reader(hconf, SequenceFile.Reader.file(new Path(path)));
+ key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), hconf);
+ value = new Text();
+ }
+
+ @Override
+ public String nextLine() throws IOException {
+ boolean hasNext = reader.next(key, value);
+ if (hasNext)
+ return Bytes.toString(value.getBytes(), 0, value.getLength());
+ else
+ return null;
+ }
+
+ @Override
+ public void close() throws IOException {
+ reader.close();
+ }
+ }
+
+ private class CsvRowReader implements RowReader {
+ BufferedReader reader;
+
+ CsvRowReader(FileSystem fs, String path) throws IOException {
+ FSDataInputStream in = fs.open(new Path(path));
+ reader = new BufferedReader(new InputStreamReader(in, "UTF-8"));
+ }
+
+ @Override
+ public String nextLine() throws IOException {
+ return reader.readLine();
+ }
+
+ @Override
+ public void close() throws IOException {
+ reader.close();
+ }
+
+ }
+
+}
diff --git a/build-engine/src/main/java/org/apache/kylin/engine/mr/IInput.java b/build-engine/src/main/java/org/apache/kylin/engine/mr/IInput.java
new file mode 100644
index 0000000..758b081
--- /dev/null
+++ b/build-engine/src/main/java/org/apache/kylin/engine/mr/IInput.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kylin.engine.mr;
+
+import org.apache.kylin.job.execution.DefaultChainedExecutable;
+import org.apache.kylin.metadata.model.IJoinedFlatTableDesc;
+import org.apache.kylin.metadata.model.ISegment;
+
+public interface IInput {
+
+ /** Return a helper to participate in batch cubing job flow. */
+ public IBatchCubingInputSide getBatchCubingInputSide(IJoinedFlatTableDesc flatDesc);
+
+ /** Return a helper to participate in batch cubing merge job flow. */
+ public IBatchMergeInputSide getBatchMergeInputSide(ISegment seg);
+
+ public interface IBatchCubingInputSide {
+ /** Add step that creates an intermediate flat table as defined by CubeJoinedFlatTableDesc */
+ public void addStepPhase1_CreateFlatTable(DefaultChainedExecutable jobFlow);
+
+ /** Add step that does necessary clean up, like delete the intermediate flat table */
+ public void addStepPhase4_Cleanup(DefaultChainedExecutable jobFlow);
+ }
+
+ public interface IBatchMergeInputSide {
+
+ /** Add step that executes before merge dictionary and before merge cube. */
+ public void addStepPhase1_MergeDictionary(DefaultChainedExecutable jobFlow);
+
+ }
+}
diff --git a/build-engine/src/main/java/org/apache/kylin/engine/mr/ILookupMaterializer.java b/build-engine/src/main/java/org/apache/kylin/engine/mr/ILookupMaterializer.java
new file mode 100644
index 0000000..6ec4857
--- /dev/null
+++ b/build-engine/src/main/java/org/apache/kylin/engine/mr/ILookupMaterializer.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.engine.mr;
+
+import org.apache.kylin.cube.CubeInstance;
+
+public interface ILookupMaterializer {
+ /**
+ * materialize lookup table
+ * @param context materialize context, the snapshotPath of lookup table should be put into context
+ * via {@code LookupMaterializeContext.addLookupSnapshotPath} method
+ * @param cube
+ * @param lookupTableName
+ */
+ void materializeLookupTable(LookupMaterializeContext context, CubeInstance cube, String lookupTableName);
+
+ /**
+ * materialize all ext lookup tables in the cube
+ * @param context materialize context, the snapshotPath of lookup table should be put into context
+ * via {@code LookupMaterializeContext.addLookupSnapshotPath} method
+ * @param cube
+ */
+ void materializeLookupTablesForCube(LookupMaterializeContext context, CubeInstance cube);
+}
diff --git a/build-engine/src/main/java/org/apache/kylin/engine/mr/IMRInput.java b/build-engine/src/main/java/org/apache/kylin/engine/mr/IMRInput.java
new file mode 100644
index 0000000..74153e0
--- /dev/null
+++ b/build-engine/src/main/java/org/apache/kylin/engine/mr/IMRInput.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.engine.mr;
+
+import java.util.Collection;
+
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.kylin.metadata.model.TableDesc;
+
+/**
+ * Any ISource that wishes to serve as input of MapReduce build engine must adapt to this interface.
+ */
+public interface IMRInput extends IInput {
+
+ /** Return an InputFormat that reads from specified table. */
+ public IMRTableInputFormat getTableInputFormat(TableDesc table, String uuid);
+
+ /**
+ * Utility that configures mapper to read from a table.
+ */
+ public interface IMRTableInputFormat {
+
+ /** Configure the InputFormat of given job. */
+ public void configureJob(Job job);
+
+ /** Parse a mapper input object into column values. */
+ public Collection<String[]> parseMapperInput(Object mapperInput);
+
+ /** Get the signature for the input split*/
+ public String getInputSplitSignature(InputSplit inputSplit);
+ }
+
+ /**
+ * Participate the batch cubing flow as the input side. Responsible for creating
+ * intermediate flat table (Phase 1) and clean up any leftover (Phase 4).
+ *
+ * - Phase 1: Create Flat Table
+ * - Phase 2: Build Dictionary (with FlatTableInputFormat)
+ * - Phase 3: Build Cube (with FlatTableInputFormat)
+ * - Phase 4: Update Metadata & Cleanup
+ */
+ public interface IMRBatchCubingInputSide extends IBatchCubingInputSide {
+
+ /** Return an InputFormat that reads from the intermediate flat table */
+ public IMRTableInputFormat getFlatTableInputFormat();
+ }
+
+ public interface IMRBatchMergeInputSide extends IBatchMergeInputSide {
+
+ }
+}
diff --git a/build-engine/src/main/java/org/apache/kylin/engine/mr/IMROutput.java b/build-engine/src/main/java/org/apache/kylin/engine/mr/IMROutput.java
new file mode 100644
index 0000000..beebb4e
--- /dev/null
+++ b/build-engine/src/main/java/org/apache/kylin/engine/mr/IMROutput.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.engine.mr;
+
+import org.apache.kylin.cube.CubeSegment;
+import org.apache.kylin.job.execution.DefaultChainedExecutable;
+
+public interface IMROutput {
+
+ /** Return a helper to participate in batch cubing job flow. */
+ public IMRBatchCubingOutputSide getBatchCubingOutputSide(CubeSegment seg);
+
+ /**
+ * Participate the batch cubing flow as the output side. Responsible for savg
+ * the cuboid output to storage (Phase 3).
+ *
+ * - Phase 1: Create Flat Table
+ * - Phase 2: Build Dictionary
+ * - Phase 3: Build Cube
+ * - Phase 4: Update Metadata & Cleanup
+ */
+ public interface IMRBatchCubingOutputSide {
+
+ /**
+ * Add step that saves cuboid output from HDFS to storage.
+ *
+ * The cuboid output is a directory of sequence files, where key is CUBOID+D1+D2+..+Dn,
+ * value is M1+M2+..+Mm. CUBOID is 8 bytes cuboid ID; Dx is dimension value with
+ * dictionary encoding; Mx is measure value serialization form.
+ */
+ public void addStepPhase3_BuildCube(DefaultChainedExecutable jobFlow, String cuboidRootPath);
+
+ /** Add step that does any necessary clean up. */
+ public void addStepPhase4_Cleanup(DefaultChainedExecutable jobFlow);
+ }
+
+ /** Return a helper to participate in batch merge job flow. */
+ public IMRBatchMergeOutputSide getBatchMergeOutputSide(CubeSegment seg);
+
+ /**
+ * Participate the batch cubing flow as the output side. Responsible for saving
+ * the cuboid output to storage (Phase 2).
+ *
+ * - Phase 1: Merge Dictionary
+ * - Phase 2: Merge Cube
+ * - Phase 3: Update Metadata & Cleanup
+ */
+ public interface IMRBatchMergeOutputSide {
+
+ /**
+ * Add step that saves cuboid output from HDFS to storage.
+ *
+ * The cuboid output is a directory of sequence files, where key is CUBOID+D1+D2+..+Dn,
+ * value is M1+M2+..+Mm. CUBOID is 8 bytes cuboid ID; Dx is dimension value with
+ * dictionary encoding; Mx is measure value serialization form.
+ */
+ public void addStepPhase2_BuildCube(DefaultChainedExecutable jobFlow, String cuboidRootPath);
+
+ /** Add step that does any necessary clean up. */
+ public void addStepPhase3_Cleanup(DefaultChainedExecutable jobFlow);
+ }
+
+}
diff --git a/build-engine/src/main/java/org/apache/kylin/engine/mr/IMROutput2.java b/build-engine/src/main/java/org/apache/kylin/engine/mr/IMROutput2.java
new file mode 100644
index 0000000..2334f5e
--- /dev/null
+++ b/build-engine/src/main/java/org/apache/kylin/engine/mr/IMROutput2.java
@@ -0,0 +1,143 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.engine.mr;
+
+import java.util.List;
+
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.lib.input.FileSplit;
+import org.apache.kylin.cube.CubeInstance;
+import org.apache.kylin.cube.CubeSegment;
+import org.apache.kylin.cube.cuboid.CuboidScheduler;
+import org.apache.kylin.job.execution.DefaultChainedExecutable;
+
+public interface IMROutput2 {
+
+ /** Return a helper to participate in batch cubing job flow. */
+ public IMRBatchCubingOutputSide2 getBatchCubingOutputSide(CubeSegment seg);
+
+ /**
+ * Participate the batch cubing flow as the output side. Responsible for saving
+ * the cuboid output to storage at the end of Phase 3.
+ *
+ * - Phase 1: Create Flat Table
+ * - Phase 2: Build Dictionary
+ * - Phase 3: Build Cube
+ * - Phase 4: Update Metadata & Cleanup
+ */
+ public interface IMRBatchCubingOutputSide2 {
+
+ /** Add step that executes after build dictionary and before build cube. */
+ public void addStepPhase2_BuildDictionary(DefaultChainedExecutable jobFlow);
+
+ /**
+ * Add step that saves cuboids from HDFS to storage.
+ *
+ * The cuboid output is a directory of sequence files, where key is CUBOID+D1+D2+..+Dn,
+ * value is M1+M2+..+Mm. CUBOID is 8 bytes cuboid ID; Dx is dimension value with
+ * dictionary encoding; Mx is measure value serialization form.
+ */
+ public void addStepPhase3_BuildCube(DefaultChainedExecutable jobFlow);
+
+ /** Add step that does any necessary clean up. */
+ public void addStepPhase4_Cleanup(DefaultChainedExecutable jobFlow);
+
+ public IMROutputFormat getOutputFormat();
+
+ }
+
+ public interface IMROutputFormat {
+
+ /** Configure the InputFormat of given job. */
+ public void configureJobInput(Job job, String input) throws Exception;
+
+ /** Configure the OutputFormat of given job. */
+ public void configureJobOutput(Job job, String output, CubeSegment segment, CuboidScheduler cuboidScheduler, int level) throws Exception;
+
+ }
+
+ /** Return a helper to participate in batch merge job flow. */
+ public IMRBatchMergeOutputSide2 getBatchMergeOutputSide(CubeSegment seg);
+
+ /**
+ * Participate the batch cubing flow as the output side. Responsible for saving
+ * the cuboid output to storage at the end of Phase 2.
+ *
+ * - Phase 1: Merge Dictionary
+ * - Phase 2: Merge Cube
+ * - Phase 3: Update Metadata & Cleanup
+ */
+ public interface IMRBatchMergeOutputSide2 {
+
+ /** Add step that executes after merge dictionary and before merge cube. */
+ public void addStepPhase1_MergeDictionary(DefaultChainedExecutable jobFlow);
+
+ /**
+ * Add step that saves cuboid output from HDFS to storage.
+ *
+ * The cuboid output is a directory of sequence files, where key is CUBOID+D1+D2+..+Dn,
+ * value is M1+M2+..+Mm. CUBOID is 8 bytes cuboid ID; Dx is dimension value with
+ * dictionary encoding; Mx is measure value serialization form.
+ */
+ public void addStepPhase2_BuildCube(CubeSegment set, List<CubeSegment> mergingSegments, DefaultChainedExecutable jobFlow);
+
+ /** Add step that does any necessary clean up. */
+ public void addStepPhase3_Cleanup(DefaultChainedExecutable jobFlow);
+
+ public IMRMergeOutputFormat getOutputFormat();
+ }
+
+ public interface IMRMergeOutputFormat {
+
+ /** Configure the InputFormat of given job. */
+ public void configureJobInput(Job job, String input) throws Exception;
+
+ /** Configure the OutputFormat of given job. */
+ public void configureJobOutput(Job job, String output, CubeSegment segment) throws Exception;
+
+ public CubeSegment findSourceSegment(FileSplit fileSplit, CubeInstance cube);
+ }
+
+ public IMRBatchOptimizeOutputSide2 getBatchOptimizeOutputSide(CubeSegment seg);
+
+ /**
+ * Participate the batch cubing flow as the output side. Responsible for saving
+ * the cuboid output to storage at the end of Phase 3.
+ *
+ * - Phase 1: Filter Recommended Cuboid Data
+ * - Phase 2: Copy Dictionary & Calculate Statistics & Update Reused Cuboid Shard
+ * - Phase 3: Build Cube
+ * - Phase 4: Cleanup Optimize
+ * - Phase 5: Update Metadata & Cleanup
+ */
+ public interface IMRBatchOptimizeOutputSide2 {
+
+ /** Create HTable based on recommended cuboids & statistics*/
+ public void addStepPhase2_CreateHTable(DefaultChainedExecutable jobFlow);
+
+ /** Build only missing cuboids*/
+ public void addStepPhase3_BuildCube(DefaultChainedExecutable jobFlow);
+
+ /** Cleanup intermediate cuboid data on HDFS*/
+ public void addStepPhase4_Cleanup(DefaultChainedExecutable jobFlow);
+
+ /** Invoked by Checkpoint job & Cleanup old segments' HTables and related working directory*/
+ public void addStepPhase5_Cleanup(DefaultChainedExecutable jobFlow);
+ }
+}
diff --git a/build-engine/src/main/java/org/apache/kylin/engine/mr/JobBuilderSupport.java b/build-engine/src/main/java/org/apache/kylin/engine/mr/JobBuilderSupport.java
new file mode 100644
index 0000000..299a2bc
--- /dev/null
+++ b/build-engine/src/main/java/org/apache/kylin/engine/mr/JobBuilderSupport.java
@@ -0,0 +1,221 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.engine.mr;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.kylin.common.KylinConfig;
+import org.apache.kylin.common.StorageURL;
+import org.apache.kylin.cube.CubeSegment;
+import org.apache.kylin.engine.mr.common.BatchConstants;
+import org.apache.kylin.job.engine.JobEngineConfig;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
+
+/**
+ * Hold reusable steps for builders.
+ */
+public class JobBuilderSupport {
+
+ final protected JobEngineConfig config;
+ final protected CubeSegment seg;
+ final protected String submitter;
+ final protected Integer priorityOffset;
+
+ final public static String LayeredCuboidFolderPrefix = "level_";
+
+ final public static String PathNameCuboidBase = "base_cuboid";
+ final public static String PathNameCuboidOld = "old";
+ final public static String PathNameCuboidInMem = "in_memory";
+ final public static Pattern JOB_NAME_PATTERN = Pattern.compile("kylin-([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})");
+
+ public JobBuilderSupport(CubeSegment seg, String submitter) {
+ this(seg, submitter, 0);
+ }
+
+ public JobBuilderSupport(CubeSegment seg, String submitter, Integer priorityOffset) {
+ Preconditions.checkNotNull(seg, "segment cannot be null");
+ this.config = new JobEngineConfig(seg.getConfig());
+ this.seg = seg;
+ this.submitter = submitter;
+ this.priorityOffset = priorityOffset;
+ }
+
+ // ============================================================================
+
+ public String getJobWorkingDir(String jobId) {
+ return getJobWorkingDir(config, jobId);
+ }
+
+ public String getRealizationRootPath(String jobId) {
+ return getJobWorkingDir(jobId) + "/" + seg.getRealization().getName();
+ }
+
+ public String getCuboidRootPath(String jobId) {
+ return getRealizationRootPath(jobId) + "/cuboid/";
+ }
+
+ public String getCuboidRootPath(CubeSegment seg) {
+ return getCuboidRootPath(seg.getLastBuildJobID());
+ }
+
+ public void appendMapReduceParameters(StringBuilder buf) {
+ appendMapReduceParameters(buf, JobEngineConfig.DEFAULT_JOB_CONF_SUFFIX);
+ }
+
+ public void appendMapReduceParameters(StringBuilder buf, String jobType) {
+ try {
+ String jobConf = config.getHadoopJobConfFilePath(jobType);
+ if (jobConf != null && jobConf.length() > 0) {
+ buf.append(" -conf ").append(jobConf);
+ }
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ public String getFactDistinctColumnsPath(String jobId) {
+ return getRealizationRootPath(jobId) + "/fact_distinct_columns";
+ }
+
+ public String getStatisticsPath(String jobId) {
+ return getRealizationRootPath(jobId) + "/fact_distinct_columns/" + BatchConstants.CFG_OUTPUT_STATISTICS;
+ }
+
+ public String getShrunkenDictionaryPath(String jobId) {
+ return getRealizationRootPath(jobId) + "/dictionary_shrunken";
+ }
+
+ public String getDictRootPath(String jobId) {
+ return getRealizationRootPath(jobId) + "/dict";
+ }
+
+ public String getDictInfoPath(String jobId) {
+ return getRealizationRootPath(jobId) + "/dict_info";
+ }
+
+ public String getOptimizationRootPath(String jobId) {
+ return getRealizationRootPath(jobId) + "/optimize";
+ }
+
+ public String getOptimizationStatisticsPath(String jobId) {
+ return getOptimizationRootPath(jobId) + "/statistics";
+ }
+
+ public String getOptimizationCuboidPath(String jobId) {
+ return getOptimizationRootPath(jobId) + "/cuboid/";
+ }
+
+ public String getHBaseConfFilePath(String jobId) {
+ return getJobWorkingDir(jobId) + "/hbase-conf.xml";
+ }
+
+ public String getCounterOutputPath(String jobId) {
+ return getRealizationRootPath(jobId) + "/counter";
+ }
+
+ // ============================================================================
+ // static methods also shared by other job flow participant
+ // ----------------------------------------------------------------------------
+
+ public static String getJobWorkingDir(JobEngineConfig conf, String jobId) {
+ return getJobWorkingDir(conf.getHdfsWorkingDirectory(), jobId);
+ }
+
+ public static String getJobWorkingDir(String hdfsDir, String jobId) {
+ if (!hdfsDir.endsWith("/")) {
+ hdfsDir = hdfsDir + "/";
+ }
+ return hdfsDir + "kylin-" + jobId;
+ }
+
+ public static StringBuilder appendExecCmdParameters(StringBuilder buf, String paraName, String paraValue) {
+ return buf.append(" -").append(paraName).append(" ").append(paraValue);
+ }
+
+ public static String getCuboidOutputPathsByLevel(String cuboidRootPath, int level) {
+ if (level == 0) {
+ return cuboidRootPath + LayeredCuboidFolderPrefix + PathNameCuboidBase;
+ } else {
+ return cuboidRootPath + LayeredCuboidFolderPrefix + level + "_cuboid";
+ }
+ }
+
+ public static String getBaseCuboidPath(String cuboidRootPath) {
+ return cuboidRootPath + PathNameCuboidBase;
+ }
+
+ public static String getInMemCuboidPath(String cuboidRootPath) {
+ return cuboidRootPath + PathNameCuboidInMem;
+ }
+
+ public String getDumpMetadataPath(String jobId) {
+ return getRealizationRootPath(jobId) + "/metadata";
+ }
+
+ public static String extractJobIDFromPath(String path) {
+ Matcher matcher = JOB_NAME_PATTERN.matcher(path);
+ // check the first occurrence
+ if (matcher.find()) {
+ return matcher.group(1);
+ } else {
+ throw new IllegalStateException("Can not extract job ID from file path : " + path);
+ }
+ }
+
+ public String getSegmentMetadataUrl(KylinConfig kylinConfig, String jobId) {
+ Map<String, String> param = new HashMap<>();
+ param.put("path", getDumpMetadataPath(jobId));
+ return new StorageURL(kylinConfig.getMetadataUrl().getIdentifier(), "hdfs", param).toString();
+ }
+
+ public static void scanFiles(String input, FileSystem fs, List<FileStatus> outputs) throws IOException {
+ Path path = new Path(input);
+ if (!fs.exists(path)) {
+ return;
+ }
+ FileStatus[] fileStatuses = fs.listStatus(path, p -> !p.getName().startsWith("_"));
+ for (FileStatus stat : fileStatuses) {
+ if (stat.isDirectory()) {
+ scanFiles(stat.getPath().toString(), fs, outputs);
+ } else {
+ outputs.add(stat);
+ }
+ }
+ }
+
+ public static long getFileSize(String input, FileSystem fs) throws IOException {
+ List<FileStatus> outputs = Lists.newArrayList();
+ scanFiles(input, fs, outputs);
+ long size = 0L;
+ for (FileStatus stat: outputs) {
+ size += stat.getLen();
+ }
+ return size;
+ }
+}
diff --git a/build-engine/src/main/java/org/apache/kylin/engine/mr/KylinMapper.java b/build-engine/src/main/java/org/apache/kylin/engine/mr/KylinMapper.java
new file mode 100644
index 0000000..81ca5d5
--- /dev/null
+++ b/build-engine/src/main/java/org/apache/kylin/engine/mr/KylinMapper.java
@@ -0,0 +1,123 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.engine.mr;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.kylin.common.util.HadoopUtil;
+import org.apache.kylin.common.util.MemoryBudgetController;
+import org.apache.kylin.engine.mr.common.BatchConstants;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ */
+public class KylinMapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT> extends Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT> {
+ private static final Logger logger = LoggerFactory.getLogger(KylinMapper.class);
+
+ protected int mapCounter = 0;
+
+ protected void bindCurrentConfiguration(Configuration conf) {
+ logger.info("The conf for current mapper will be " + System.identityHashCode(conf));
+ HadoopUtil.setCurrentConfiguration(conf);
+ }
+
+ @Override
+ final protected void setup(Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context context)
+ throws IOException, InterruptedException {
+ try {
+ logger.info("Do setup, available memory: {}m", MemoryBudgetController.getSystemAvailMB());
+ doSetup(context);
+ } catch (IOException ex) { // KYLIN-2170
+ logger.error("", ex);
+ throw ex;
+ } catch (InterruptedException ex) { // KYLIN-2170
+ logger.error("", ex);
+ throw ex;
+ } catch (RuntimeException ex) { // KYLIN-2170
+ logger.error("", ex);
+ throw ex;
+ } catch (Error ex) { // KYLIN-2170
+ logger.error("", ex);
+ throw ex;
+ }
+ }
+
+ protected void doSetup(Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context context)
+ throws IOException, InterruptedException {
+ // NOTHING
+ }
+
+ @Override
+ final public void map(KEYIN key, VALUEIN value, Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context context)
+ throws IOException, InterruptedException {
+ try {
+ if (mapCounter++ % BatchConstants.NORMAL_RECORD_LOG_THRESHOLD == 0) {
+ logger.info("Accepting Mapper Key with ordinal: " + mapCounter);
+ logger.info("Do map, available memory: {}m", MemoryBudgetController.getSystemAvailMB());
+ }
+ doMap(key, value, context);
+ } catch (IOException ex) { // KYLIN-2170
+ logger.error("", ex);
+ throw ex;
+ } catch (InterruptedException ex) { // KYLIN-2170
+ logger.error("", ex);
+ throw ex;
+ } catch (RuntimeException ex) { // KYLIN-2170
+ logger.error("", ex);
+ throw ex;
+ } catch (Error ex) { // KYLIN-2170
+ logger.error("", ex);
+ throw ex;
+ }
+ }
+
+ protected void doMap(KEYIN key, VALUEIN value, Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context context)
+ throws IOException, InterruptedException {
+ super.map(key, value, context);
+ }
+
+ @Override
+ final protected void cleanup(Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context context)
+ throws IOException, InterruptedException {
+ try {
+ logger.info("Do cleanup, available memory: {}m", MemoryBudgetController.getSystemAvailMB());
+ doCleanup(context);
+ logger.info("Total rows: {}", mapCounter);
+ } catch (IOException ex) { // KYLIN-2170
+ logger.error("", ex);
+ throw ex;
+ } catch (InterruptedException ex) { // KYLIN-2170
+ logger.error("", ex);
+ throw ex;
+ } catch (RuntimeException ex) { // KYLIN-2170
+ logger.error("", ex);
+ throw ex;
+ } catch (Error ex) { // KYLIN-2170
+ logger.error("", ex);
+ throw ex;
+ }
+ }
+
+ protected void doCleanup(Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context context)
+ throws IOException, InterruptedException {
+ }
+}
diff --git a/build-engine/src/main/java/org/apache/kylin/engine/mr/KylinReducer.java b/build-engine/src/main/java/org/apache/kylin/engine/mr/KylinReducer.java
new file mode 100644
index 0000000..28c62db
--- /dev/null
+++ b/build-engine/src/main/java/org/apache/kylin/engine/mr/KylinReducer.java
@@ -0,0 +1,123 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.engine.mr;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.kylin.common.util.HadoopUtil;
+import org.apache.kylin.common.util.MemoryBudgetController;
+import org.apache.kylin.engine.mr.common.BatchConstants;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ */
+public class KylinReducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT> extends Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT> {
+ private static final Logger logger = LoggerFactory.getLogger(KylinReducer.class);
+
+ protected int reduceCounter = 0;
+
+ protected void bindCurrentConfiguration(Configuration conf) {
+ HadoopUtil.setCurrentConfiguration(conf);
+ }
+
+ @Override
+ final protected void setup(Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context context)
+ throws IOException, InterruptedException {
+ try {
+ logger.info("Do setup, available memory: {}m", MemoryBudgetController.getSystemAvailMB());
+ doSetup(context);
+ } catch (IOException ex) { // KYLIN-2170
+ logger.error("", ex);
+ throw ex;
+ } catch (InterruptedException ex) { // KYLIN-2170
+ logger.error("", ex);
+ throw ex;
+ } catch (RuntimeException ex) { // KYLIN-2170
+ logger.error("", ex);
+ throw ex;
+ } catch (Error ex) { // KYLIN-2170
+ logger.error("", ex);
+ throw ex;
+ }
+ }
+
+ protected void doSetup(Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context context)
+ throws IOException, InterruptedException {
+ // NOTHING
+ }
+
+ @Override
+ final public void reduce(KEYIN key, Iterable<VALUEIN> values,
+ Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context context) throws IOException, InterruptedException {
+ try {
+ if (reduceCounter++ % BatchConstants.NORMAL_RECORD_LOG_THRESHOLD == 0) {
+ logger.info("Accepting Reducer Key with ordinal: " + reduceCounter);
+ logger.info("Do reduce, available memory: {}m", MemoryBudgetController.getSystemAvailMB());
+ }
+
+ doReduce(key, values, context);
+ } catch (IOException ex) { // KYLIN-2170
+ logger.error("", ex);
+ throw ex;
+ } catch (InterruptedException ex) { // KYLIN-2170
+ logger.error("", ex);
+ throw ex;
+ } catch (RuntimeException ex) { // KYLIN-2170
+ logger.error("", ex);
+ throw ex;
+ } catch (Error ex) { // KYLIN-2170
+ logger.error("", ex);
+ throw ex;
+ }
+ }
+
+ protected void doReduce(KEYIN key, Iterable<VALUEIN> values,
+ Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context context) throws IOException, InterruptedException {
+ super.reduce(key, values, context);
+ }
+
+ @Override
+ final protected void cleanup(Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context context)
+ throws IOException, InterruptedException {
+ try {
+ logger.info("Do cleanup, available memory: {}m", MemoryBudgetController.getSystemAvailMB());
+ doCleanup(context);
+ logger.info("Total rows: " + reduceCounter);
+ } catch (IOException ex) { // KYLIN-2170
+ logger.error("", ex);
+ throw ex;
+ } catch (InterruptedException ex) { // KYLIN-2170
+ logger.error("", ex);
+ throw ex;
+ } catch (RuntimeException ex) { // KYLIN-2170
+ logger.error("", ex);
+ throw ex;
+ } catch (Error ex) { // KYLIN-2170
+ logger.error("", ex);
+ throw ex;
+ }
+ }
+
+ protected void doCleanup(Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context context)
+ throws IOException, InterruptedException {
+ }
+}
diff --git a/build-engine/src/main/java/org/apache/kylin/engine/mr/LookupMaterializeContext.java b/build-engine/src/main/java/org/apache/kylin/engine/mr/LookupMaterializeContext.java
new file mode 100644
index 0000000..4fa9126
--- /dev/null
+++ b/build-engine/src/main/java/org/apache/kylin/engine/mr/LookupMaterializeContext.java
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.engine.mr;
+
+import java.util.Map;
+import java.util.Map.Entry;
+
+import org.apache.kylin.common.util.StringUtil;
+import org.apache.kylin.job.execution.DefaultChainedExecutable;
+
+import com.google.common.collect.Maps;
+
+public class LookupMaterializeContext {
+ private DefaultChainedExecutable jobFlow;
+ private Map<String, String> lookupSnapshotMap;
+
+ public LookupMaterializeContext(DefaultChainedExecutable jobFlow) {
+ this.jobFlow = jobFlow;
+ this.lookupSnapshotMap = Maps.newHashMap();
+ }
+
+ public DefaultChainedExecutable getJobFlow() {
+ return jobFlow;
+ }
+
+ /**
+ * add snapshot path info into the context
+ * @param lookupTable
+ * @param snapshotPath
+ */
+ public void addLookupSnapshotPath(String lookupTable, String snapshotPath) {
+ lookupSnapshotMap.put(lookupTable, snapshotPath);
+ }
+
+ /**
+ *
+ * @return string format of lookup snapshotPath info, it will return like: "lookup1=/path/uuid1,lookup2=/path/uuid2"
+ *
+ */
+ public String getAllLookupSnapshotsInString() {
+ StringBuilder result = new StringBuilder();
+ boolean first = true;
+ for (Entry<String, String> lookupSnapshotEntry : lookupSnapshotMap.entrySet()) {
+ if (!first) {
+ result.append(",");
+ }
+ first = false;
+ result.append(lookupSnapshotEntry.getKey());
+ result.append("=");
+ result.append(lookupSnapshotEntry.getValue());
+ }
+ return result.toString();
+ }
+
+ /**
+ * parse the lookup snapshot string to lookup snapshot path map.
+ * @param snapshotsString
+ * @return
+ */
+ public static Map<String, String> parseLookupSnapshots(String snapshotsString) {
+ Map<String, String> lookupSnapshotMap = Maps.newHashMap();
+ String[] lookupSnapshotEntries = StringUtil.splitByComma(snapshotsString);
+ for (String lookupSnapshotEntryStr : lookupSnapshotEntries) {
+ String[] split = StringUtil.split(lookupSnapshotEntryStr, "=");
+ lookupSnapshotMap.put(split[0], split[1]);
+ }
+ return lookupSnapshotMap;
+ }
+}
diff --git a/build-engine/src/main/java/org/apache/kylin/engine/mr/LookupSnapshotBuildJob.java b/build-engine/src/main/java/org/apache/kylin/engine/mr/LookupSnapshotBuildJob.java
new file mode 100644
index 0000000..c142ace
--- /dev/null
+++ b/build-engine/src/main/java/org/apache/kylin/engine/mr/LookupSnapshotBuildJob.java
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.engine.mr;
+
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.List;
+import java.util.Locale;
+import java.util.TimeZone;
+
+import org.apache.kylin.common.KylinConfig;
+import org.apache.kylin.cube.CubeInstance;
+import org.apache.kylin.engine.mr.steps.CubingExecutableUtil;
+import org.apache.kylin.job.execution.DefaultChainedExecutable;
+import org.apache.kylin.metadata.project.ProjectInstance;
+import org.apache.kylin.metadata.project.ProjectManager;
+
+public class LookupSnapshotBuildJob extends DefaultChainedExecutable {
+
+ public static final Integer DEFAULT_PRIORITY = 30;
+
+ private static final String DEPLOY_ENV_NAME = "envName";
+ private static final String PROJECT_INSTANCE_NAME = "projectName";
+
+ private static final String JOB_TYPE = "Lookup ";
+
+ public static LookupSnapshotBuildJob createJob(CubeInstance cube, String tableName, String submitter,
+ KylinConfig kylinConfig) {
+ return initJob(cube, tableName, submitter, kylinConfig);
+ }
+
+ private static LookupSnapshotBuildJob initJob(CubeInstance cube, String tableName, String submitter,
+ KylinConfig kylinConfig) {
+ List<ProjectInstance> projList = ProjectManager.getInstance(kylinConfig).findProjects(cube.getType(),
+ cube.getName());
+ if (projList == null || projList.size() == 0) {
+ throw new RuntimeException("Cannot find the project containing the cube " + cube.getName() + "!!!");
+ } else if (projList.size() >= 2) {
+ String msg = "Find more than one project containing the cube " + cube.getName()
+ + ". It does't meet the uniqueness requirement!!! ";
+ throw new RuntimeException(msg);
+ }
+
+ LookupSnapshotBuildJob result = new LookupSnapshotBuildJob();
+ SimpleDateFormat format = new SimpleDateFormat("z yyyy-MM-dd HH:mm:ss", Locale.ROOT);
+ format.setTimeZone(TimeZone.getTimeZone(kylinConfig.getTimeZone()));
+ result.setDeployEnvName(kylinConfig.getDeployEnv());
+ result.setProjectName(projList.get(0).getName());
+ CubingExecutableUtil.setCubeName(cube.getName(), result.getParams());
+ result.setName(JOB_TYPE + " CUBE - " + cube.getName() + " - " + " TABLE - " + tableName + " - "
+ + format.format(new Date(System.currentTimeMillis())));
+ result.setSubmitter(submitter);
+ result.setNotifyList(cube.getDescriptor().getNotifyList());
+ return result;
+ }
+
+ protected void setDeployEnvName(String name) {
+ setParam(DEPLOY_ENV_NAME, name);
+ }
+
+ public String getDeployEnvName() {
+ return getParam(DEPLOY_ENV_NAME);
+ }
+
+ public String getProjectName() {
+ return getParam(PROJECT_INSTANCE_NAME);
+ }
+
+ public void setProjectName(String name) {
+ setParam(PROJECT_INSTANCE_NAME, name);
+ }
+
+ @Override
+ public int getDefaultPriority() {
+ return DEFAULT_PRIORITY;
+ }
+}
diff --git a/build-engine/src/main/java/org/apache/kylin/engine/mr/MRUtil.java b/build-engine/src/main/java/org/apache/kylin/engine/mr/MRUtil.java
new file mode 100644
index 0000000..49fe7bf
--- /dev/null
+++ b/build-engine/src/main/java/org/apache/kylin/engine/mr/MRUtil.java
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kylin.engine.mr;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.util.GenericOptionsParser;
+import org.apache.hadoop.util.Tool;
+import org.apache.kylin.common.KylinConfig;
+import org.apache.kylin.cube.CubeSegment;
+import org.apache.kylin.engine.EngineFactory;
+import org.apache.kylin.engine.mr.IMRInput.IMRBatchCubingInputSide;
+import org.apache.kylin.engine.mr.IMRInput.IMRBatchMergeInputSide;
+import org.apache.kylin.engine.mr.IMRInput.IMRTableInputFormat;
+import org.apache.kylin.engine.mr.IMROutput2.IMRBatchCubingOutputSide2;
+import org.apache.kylin.engine.mr.IMROutput2.IMRBatchMergeOutputSide2;
+import org.apache.kylin.metadata.TableMetadataManager;
+import org.apache.kylin.metadata.model.IJoinedFlatTableDesc;
+import org.apache.kylin.metadata.model.TableDesc;
+import org.apache.kylin.source.SourceManager;
+import org.apache.kylin.storage.StorageFactory;
+
+public class MRUtil {
+
+ public static IMRBatchCubingInputSide getBatchCubingInputSide(CubeSegment seg) {
+ IJoinedFlatTableDesc flatDesc = EngineFactory.getJoinedFlatTableDesc(seg);
+ return (IMRBatchCubingInputSide)SourceManager.createEngineAdapter(seg, IMRInput.class).getBatchCubingInputSide(flatDesc);
+ }
+
+ public static IMRTableInputFormat getTableInputFormat(String tableName, String prj, String uuid) {
+ TableDesc t = getTableDesc(tableName, prj);
+ return SourceManager.createEngineAdapter(t, IMRInput.class).getTableInputFormat(t, uuid);
+ }
+
+ public static IMRTableInputFormat getTableInputFormat(TableDesc tableDesc, String uuid) {
+ return SourceManager.createEngineAdapter(tableDesc, IMRInput.class).getTableInputFormat(tableDesc, uuid);
+ }
+
+ private static TableDesc getTableDesc(String tableName, String prj) {
+ return TableMetadataManager.getInstance(KylinConfig.getInstanceFromEnv()).getTableDesc(tableName, prj);
+ }
+
+ public static IMRBatchCubingOutputSide2 getBatchCubingOutputSide2(CubeSegment seg) {
+ return StorageFactory.createEngineAdapter(seg, IMROutput2.class).getBatchCubingOutputSide(seg);
+ }
+
+ public static IMRBatchMergeOutputSide2 getBatchMergeOutputSide2(CubeSegment seg) {
+ return StorageFactory.createEngineAdapter(seg, IMROutput2.class).getBatchMergeOutputSide(seg);
+ }
+
+ public static IMRBatchMergeInputSide getBatchMergeInputSide(CubeSegment seg) {
+ return (IMRBatchMergeInputSide)SourceManager.createEngineAdapter(seg, IMRInput.class).getBatchMergeInputSide(seg);
+ }
+
+ public static IMROutput2.IMRBatchOptimizeOutputSide2 getBatchOptimizeOutputSide2(CubeSegment seg) {
+ return StorageFactory.createEngineAdapter(seg, IMROutput2.class).getBatchOptimizeOutputSide(seg);
+ }
+
+ // use this method instead of ToolRunner.run() because ToolRunner.run() is not thread-sale
+ // Refer to: http://stackoverflow.com/questions/22462665/is-hadoops-toorunner-thread-safe
+ public static int runMRJob(Tool tool, String[] args) throws Exception {
+ Configuration conf = tool.getConf();
+ if (conf == null) {
+ conf = new Configuration();
+ }
+
+ GenericOptionsParser parser = getParser(conf, args);
+ //set the configuration back, so that Tool can configure itself
+ tool.setConf(conf);
+
+ //get the args w/o generic hadoop args
+ String[] toolArgs = parser.getRemainingArgs();
+ return tool.run(toolArgs);
+ }
+
+ private static synchronized GenericOptionsParser getParser(Configuration conf, String[] args) throws Exception {
+ return new GenericOptionsParser(conf, args);
+ }
+}
diff --git a/build-engine/src/main/java/org/apache/kylin/engine/mr/SortedColumnDFSFile.java b/build-engine/src/main/java/org/apache/kylin/engine/mr/SortedColumnDFSFile.java
new file mode 100644
index 0000000..bcf4b98
--- /dev/null
+++ b/build-engine/src/main/java/org/apache/kylin/engine/mr/SortedColumnDFSFile.java
@@ -0,0 +1,135 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+package org.apache.kylin.engine.mr;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Comparator;
+
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.kylin.common.util.HadoopUtil;
+import org.apache.kylin.dict.ByteComparator;
+import org.apache.kylin.dict.StringBytesConverter;
+import org.apache.kylin.metadata.datatype.DataType;
+import org.apache.kylin.source.IReadableTable;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Created by xiefan on 16-11-22.
+ *
+ * Read values from multi col files and ensure their order using a K-Way merge algorithm
+ *
+ * You need to ensure that values inside each file is sorted
+ */
+public class SortedColumnDFSFile implements IReadableTable {
+
+ private static final Logger logger = LoggerFactory.getLogger(SortedColumnDFSFile.class);
+
+ private String dfsPath;
+
+ private DFSFileTable dfsFileTable;
+
+ private DataType dataType;
+
+ public SortedColumnDFSFile(String path, DataType dataType) {
+ this.dfsPath = path;
+ this.dfsFileTable = new DFSFileTable(path, -1);
+ this.dataType = dataType;
+ }
+
+ @Override
+ public TableReader getReader() throws IOException {
+ final Comparator<String> comparator = getComparatorByType(dataType);
+
+ ArrayList<TableReader> readers = new ArrayList<>();
+ String filePath = HadoopUtil.fixWindowsPath(dfsPath);
+ FileSystem fs = HadoopUtil.getFileSystem(filePath);
+ ArrayList<FileStatus> allFiles = new ArrayList<>();
+ FileStatus status = fs.getFileStatus(new Path(filePath));
+ if (status.isFile()) {
+ allFiles.add(status);
+ } else {
+ FileStatus[] listStatus = fs.listStatus(new Path(filePath));
+ for (FileStatus f : listStatus) {
+ if (f.isFile())
+ allFiles.add(f);
+ }
+ }
+ for (FileStatus f : allFiles) {
+ DFSFileTableReader reader = new DFSFileTableReader(f.getPath().toString(), -1);
+ readers.add(reader);
+ }
+
+ return new SortedColumnDFSFileReader(readers, comparator);
+ }
+
+ @Override
+ public TableSignature getSignature() throws IOException {
+ return dfsFileTable.getSignature();
+ }
+
+ @Override
+ public boolean exists() throws IOException {
+ return dfsFileTable.exists();
+ }
+
+ private Comparator<String> getComparatorByType(DataType type) {
+ Comparator<String> comparator;
+ if (!type.isNumberFamily()) {
+ comparator = new ByteComparator<>(new StringBytesConverter());
+ } else if (type.isIntegerFamily()) {
+ comparator = new Comparator<String>() {
+ @Override
+ public int compare(String str1, String str2) {
+ try {
+ Long num1 = Long.parseLong(str1);
+ Long num2 = Long.parseLong(str2);
+ return num1.compareTo(num2);
+ } catch (NumberFormatException e) {
+ logger.error("NumberFormatException when parse integer family number.str1:" + str1 + " str2:" + str2);
+ e.printStackTrace();
+ return 0;
+ }
+ }
+ };
+ } else {
+ comparator = new Comparator<String>() {
+ @Override
+ public int compare(String str1, String str2) {
+ try {
+ Double num1 = Double.parseDouble(str1);
+ Double num2 = Double.parseDouble(str2);
+ return num1.compareTo(num2);
+ } catch (NumberFormatException e) {
+ logger.error("NumberFormatException when parse doul family number.str1:" + str1 + " str2:" + str2);
+ return 0;
+ }
+ }
+ };
+ }
+ return comparator;
+ }
+
+ @Override
+ public String toString() {
+ return dfsPath;
+ }
+}
diff --git a/build-engine/src/main/java/org/apache/kylin/engine/mr/SortedColumnDFSFileReader.java b/build-engine/src/main/java/org/apache/kylin/engine/mr/SortedColumnDFSFileReader.java
new file mode 100644
index 0000000..bb00442
--- /dev/null
+++ b/build-engine/src/main/java/org/apache/kylin/engine/mr/SortedColumnDFSFileReader.java
@@ -0,0 +1,136 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+package org.apache.kylin.engine.mr;
+
+import org.apache.kylin.source.IReadableTable;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Comparator;
+import java.util.PriorityQueue;
+
+/**
+ * Created by xiefan on 16-11-22.
+ */
+public class SortedColumnDFSFileReader implements IReadableTable.TableReader {
+ private Collection<IReadableTable.TableReader> readers;
+
+ @SuppressWarnings("unused")
+ private Comparator<String> comparator;
+
+ private PriorityQueue<ReaderBuffer> pq;
+
+ private String[] row;
+
+ public SortedColumnDFSFileReader(Collection<IReadableTable.TableReader> readers, final Comparator<String> comparator) {
+ this.readers = readers;
+ this.comparator = comparator;
+ pq = new PriorityQueue<ReaderBuffer>(11, new Comparator<ReaderBuffer>() {
+ @Override
+ public int compare(ReaderBuffer i, ReaderBuffer j) {
+ boolean isEmpty1 = i.empty();
+ boolean isEmpty2 = j.empty();
+ if (isEmpty1 && isEmpty2)
+ return 0;
+ if (isEmpty1 && !isEmpty2)
+ return 1;
+ if (!isEmpty1 && isEmpty2)
+ return -1;
+ return comparator.compare(i.peek()[0], j.peek()[0]);
+ }
+ });
+ for (IReadableTable.TableReader reader : readers) {
+ if (reader != null) {
+ try {
+ pq.add(new ReaderBuffer(reader));
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+ }
+ }
+
+ @Override
+ public boolean next() throws IOException {
+ while (pq.size() > 0) {
+ ReaderBuffer buffer = pq.poll();
+ String[] minEntry = buffer.pop();
+ this.row = minEntry;
+ if (buffer.empty()) {
+ pq.remove(buffer);
+ } else {
+ pq.add(buffer); // add it back
+ }
+ if (this.row == null) { //avoid the case of empty file
+ return false;
+ }
+ return true;
+ }
+ return false;
+ }
+
+ @Override
+ public String[] getRow() {
+ return this.row;
+ }
+
+ @Override
+ public void close() throws IOException {
+ for (IReadableTable.TableReader reader : readers)
+ reader.close();
+ }
+
+ static class ReaderBuffer {
+ private IReadableTable.TableReader reader;
+
+ private String[] row;
+
+ public ReaderBuffer(IReadableTable.TableReader reader) throws IOException {
+ this.reader = reader;
+ reload();
+ }
+
+ public void close() throws IOException {
+ if (this.reader != null)
+ reader.close();
+ }
+
+ public boolean empty() {
+ return (this.row == null);
+ }
+
+ public String[] peek() {
+ return this.row;
+ }
+
+ public String[] pop() throws IOException {
+ String[] result = this.row;
+ reload();
+ return result;
+ }
+
+ private void reload() throws IOException {
+ if (reader.next()) {
+ row = reader.getRow();
+ } else {
+ this.row = null;
+ }
+ }
+
+ }
+}
diff --git a/build-engine/src/main/java/org/apache/kylin/engine/mr/common/AbstractHadoopJob.java b/build-engine/src/main/java/org/apache/kylin/engine/mr/common/AbstractHadoopJob.java
new file mode 100644
index 0000000..fd4d413
--- /dev/null
+++ b/build-engine/src/main/java/org/apache/kylin/engine/mr/common/AbstractHadoopJob.java
@@ -0,0 +1,753 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.engine.mr.common;
+
+/**
+ * @author George Song (ysong1)
+ *
+ */
+
+import static org.apache.hadoop.util.StringUtils.formatTime;
+import static org.apache.kylin.engine.mr.common.JobRelatedMetaUtil.collectCubeMetadata;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.HashMap;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.OptionBuilder;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.Counters;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.util.ClassUtil;
+import org.apache.hadoop.util.ReflectionUtils;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.kylin.common.KylinConfig;
+import org.apache.kylin.common.KylinConfig.SetAndUnsetThreadLocalConfig;
+import org.apache.kylin.common.KylinConfigExt;
+import org.apache.kylin.common.StorageURL;
+import org.apache.kylin.common.util.CliCommandExecutor;
+import org.apache.kylin.common.util.HadoopUtil;
+import org.apache.kylin.common.util.OptionsHelper;
+import org.apache.kylin.common.util.StringSplitter;
+import org.apache.kylin.common.util.StringUtil;
+import org.apache.kylin.cube.CubeInstance;
+import org.apache.kylin.cube.CubeSegment;
+import org.apache.kylin.cube.model.CubeDescTiretreeGlobalDomainDictUtil;
+import org.apache.kylin.job.JobInstance;
+import org.apache.kylin.job.exception.JobException;
+import org.apache.kylin.metadata.model.TableDesc;
+import org.apache.kylin.metadata.project.ProjectManager;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.collect.Maps;
+
+@SuppressWarnings("static-access")
+public abstract class AbstractHadoopJob extends Configured implements Tool {
+ private static final Logger logger = LoggerFactory.getLogger(AbstractHadoopJob.class);
+
+ protected static final Option OPTION_PROJECT = OptionBuilder.withArgName(BatchConstants.ARG_PROJECT).hasArg()
+ .isRequired(true).withDescription("Project name.").create(BatchConstants.ARG_PROJECT);
+ protected static final Option OPTION_JOB_NAME = OptionBuilder.withArgName(BatchConstants.ARG_JOB_NAME).hasArg()
+ .isRequired(true).withDescription("Job name. For example, Kylin_Cuboid_Builder-clsfd_v2_Step_22-D)")
+ .create(BatchConstants.ARG_JOB_NAME);
+ protected static final Option OPTION_CUBE_NAME = OptionBuilder.withArgName(BatchConstants.ARG_CUBE_NAME).hasArg()
+ .isRequired(true).withDescription("Cube name. For exmaple, flat_item_cube")
+ .create(BatchConstants.ARG_CUBE_NAME);
+ protected static final Option OPTION_CUBING_JOB_ID = OptionBuilder.withArgName(BatchConstants.ARG_CUBING_JOB_ID)
+ .hasArg().isRequired(false).withDescription("ID of cubing job executable")
+ .create(BatchConstants.ARG_CUBING_JOB_ID);
+ // @Deprecated
+ protected static final Option OPTION_SEGMENT_NAME = OptionBuilder.withArgName(BatchConstants.ARG_SEGMENT_NAME)
+ .hasArg().isRequired(true).withDescription("Cube segment name").create(BatchConstants.ARG_SEGMENT_NAME);
+ protected static final Option OPTION_SEGMENT_ID = OptionBuilder.withArgName(BatchConstants.ARG_SEGMENT_ID).hasArg()
+ .isRequired(true).withDescription("Cube segment id").create(BatchConstants.ARG_SEGMENT_ID);
+ protected static final Option OPTION_INPUT_PATH = OptionBuilder.withArgName(BatchConstants.ARG_INPUT).hasArg()
+ .isRequired(true).withDescription("Input path").create(BatchConstants.ARG_INPUT);
+ protected static final Option OPTION_INPUT_FORMAT = OptionBuilder.withArgName(BatchConstants.ARG_INPUT_FORMAT)
+ .hasArg().isRequired(false).withDescription("Input format").create(BatchConstants.ARG_INPUT_FORMAT);
+ protected static final Option OPTION_OUTPUT_PATH = OptionBuilder.withArgName(BatchConstants.ARG_OUTPUT).hasArg()
+ .isRequired(true).withDescription("Output path").create(BatchConstants.ARG_OUTPUT);
+ protected static final Option OPTION_DICT_PATH = OptionBuilder.withArgName(BatchConstants.ARG_DICT_PATH).hasArg()
+ .isRequired(false).withDescription("Dict path").create(BatchConstants.ARG_DICT_PATH);
+ protected static final Option OPTION_NCUBOID_LEVEL = OptionBuilder.withArgName(BatchConstants.ARG_LEVEL).hasArg()
+ .isRequired(true).withDescription("N-Cuboid build level, e.g. 1, 2, 3...").create(BatchConstants.ARG_LEVEL);
+ protected static final Option OPTION_PARTITION_FILE_PATH = OptionBuilder.withArgName(BatchConstants.ARG_PARTITION)
+ .hasArg().isRequired(true).withDescription("Partition file path.").create(BatchConstants.ARG_PARTITION);
+ protected static final Option OPTION_HTABLE_NAME = OptionBuilder.withArgName(BatchConstants.ARG_HTABLE_NAME)
+ .hasArg().isRequired(true).withDescription("HTable name").create(BatchConstants.ARG_HTABLE_NAME);
+ protected static final Option OPTION_DICTIONARY_SHRUNKEN_PATH = OptionBuilder
+ .withArgName(BatchConstants.ARG_SHRUNKEN_DICT_PATH).hasArg().isRequired(false)
+ .withDescription("Dictionary shrunken path").create(BatchConstants.ARG_SHRUNKEN_DICT_PATH);
+
+ protected static final Option OPTION_STATISTICS_OUTPUT = OptionBuilder.withArgName(BatchConstants.ARG_STATS_OUTPUT)
+ .hasArg().isRequired(false).withDescription("Statistics output").create(BatchConstants.ARG_STATS_OUTPUT);
+ protected static final Option OPTION_STATISTICS_SAMPLING_PERCENT = OptionBuilder
+ .withArgName(BatchConstants.ARG_STATS_SAMPLING_PERCENT).hasArg().isRequired(false)
+ .withDescription("Statistics sampling percentage").create(BatchConstants.ARG_STATS_SAMPLING_PERCENT);
+ protected static final Option OPTION_CUBOID_MODE = OptionBuilder.withArgName(BatchConstants.ARG_CUBOID_MODE)
+ .hasArg().isRequired(false).withDescription("Cuboid Mode").create(BatchConstants.ARG_CUBOID_MODE);
+ protected static final Option OPTION_NEED_UPDATE_BASE_CUBOID_SHARD = OptionBuilder
+ .withArgName(BatchConstants.ARG_UPDATE_SHARD).hasArg().isRequired(false)
+ .withDescription("If need to update base cuboid shard").create(BatchConstants.ARG_UPDATE_SHARD);
+ protected static final Option OPTION_TABLE_NAME = OptionBuilder.withArgName(BatchConstants.ARG_TABLE_NAME).hasArg().isRequired(true).withDescription("Table name. For exmaple, default.table1").create(BatchConstants.ARG_TABLE_NAME);
+ protected static final Option OPTION_LOOKUP_SNAPSHOT_ID = OptionBuilder.withArgName(BatchConstants.ARG_LOOKUP_SNAPSHOT_ID).hasArg()
+ .isRequired(true).withDescription("Lookup table snapshotID")
+ .create(BatchConstants.ARG_LOOKUP_SNAPSHOT_ID);
+ protected static final Option OPTION_META_URL = OptionBuilder.withArgName(BatchConstants.ARG_META_URL)
+ .hasArg().isRequired(true).withDescription("HDFS metadata url").create(BatchConstants.ARG_META_URL);
+ public static final Option OPTION_HBASE_CONF_PATH = OptionBuilder.withArgName(BatchConstants.ARG_HBASE_CONF_PATH).hasArg()
+ .isRequired(true).withDescription("HBase config file path").create(BatchConstants.ARG_HBASE_CONF_PATH);
+
+ private static final String MAP_REDUCE_CLASSPATH = "mapreduce.application.classpath";
+
+ private static final Map<String, KylinConfig> kylinConfigCache = Maps.newConcurrentMap();
+
+ protected static void runJob(Tool job, String[] args) {
+ try {
+ int exitCode = ToolRunner.run(job, args);
+ System.exit(exitCode);
+ } catch (Exception e) {
+ e.printStackTrace(System.err);
+ System.exit(5);
+ }
+ }
+
+ // ============================================================================
+
+ protected String name;
+ protected boolean isAsync = false;
+ protected OptionsHelper optionsHelper = new OptionsHelper();
+
+ protected Job job;
+
+ public AbstractHadoopJob() {
+ super(HadoopUtil.getCurrentConfiguration());
+ }
+
+ protected void parseOptions(Options options, String[] args) throws ParseException {
+ optionsHelper.parseOptions(options, args);
+ }
+
+ public void printUsage(Options options) {
+ optionsHelper.printUsage(getClass().getSimpleName(), options);
+ }
+
+ public Option[] getOptions() {
+ return optionsHelper.getOptions();
+ }
+
+ public String getOptionsAsString() {
+ return optionsHelper.getOptionsAsString();
+ }
+
+ protected String getOptionValue(Option option) {
+ return optionsHelper.getOptionValue(option);
+ }
+
+ protected boolean hasOption(Option option) {
+ return optionsHelper.hasOption(option);
+ }
+
+ protected int waitForCompletion(Job job) throws IOException, InterruptedException, ClassNotFoundException {
+ int retVal = 0;
+ long start = System.nanoTime();
+ if (isAsync) {
+ job.submit();
+ } else {
+ job.waitForCompletion(true);
+ retVal = job.isSuccessful() ? 0 : 1;
+ logger.debug("Job '" + job.getJobName() + "' finished "
+ + (job.isSuccessful() ? "successfully in " : "with failures. Time taken ")
+ + formatTime((System.nanoTime() - start) / 1000000L));
+ }
+ return retVal;
+ }
+
+ protected void setJobClasspath(Job job, KylinConfig kylinConf) {
+ String jarPath = kylinConf.getKylinJobJarPath();
+ File jarFile = new File(jarPath);
+ if (jarFile.exists()) {
+ job.setJar(jarPath);
+ logger.trace("append job jar: " + jarPath);
+ } else {
+ job.setJarByClass(this.getClass());
+ }
+
+ String kylinHiveDependency = System.getProperty("kylin.hive.dependency");
+ String kylinKafkaDependency = System.getProperty("kylin.kafka.dependency");
+
+ Configuration jobConf = job.getConfiguration();
+
+ if (kylinConf.isUseLocalClasspathEnabled()) {
+ String classpath = jobConf.get(MAP_REDUCE_CLASSPATH);
+ if (classpath == null || classpath.length() == 0) {
+ logger.info("Didn't find " + MAP_REDUCE_CLASSPATH
+ + " in job configuration, will run 'mapred classpath' to get the default value.");
+ classpath = getDefaultMapRedClasspath();
+ logger.info("The default mapred classpath is: " + classpath);
+ }
+
+ jobConf.set(MAP_REDUCE_CLASSPATH, classpath);
+ }
+ logger.trace("Hadoop job classpath is: " + job.getConfiguration().get(MAP_REDUCE_CLASSPATH));
+
+ /*
+ * set extra dependencies as tmpjars & tmpfiles if configured
+ */
+ StringBuilder kylinDependency = new StringBuilder();
+
+ // for hive dependencies
+ if (kylinHiveDependency != null) {
+ // yarn classpath is comma separated
+ kylinHiveDependency = kylinHiveDependency.replace(":", ",");
+
+ logger.trace("Hive Dependencies Before Filtered: " + kylinHiveDependency);
+ String filteredHive = filterKylinHiveDependency(kylinHiveDependency, kylinConf);
+ logger.trace("Hive Dependencies After Filtered: " + filteredHive);
+
+ StringUtil.appendWithSeparator(kylinDependency, filteredHive);
+ } else {
+
+ logger.debug("No hive dependency jars set in the environment, will find them from classpath:");
+
+ try {
+ String hiveExecJarPath = ClassUtil.findContainingJar(Class.forName("org.apache.hadoop.hive.ql.Driver"));
+
+ StringUtil.appendWithSeparator(kylinDependency, hiveExecJarPath);
+ logger.debug("hive-exec jar file: " + hiveExecJarPath);
+
+ String hiveHCatJarPath = ClassUtil
+ .findContainingJar(Class.forName("org.apache.hive.hcatalog.mapreduce.HCatInputFormat"));
+ StringUtil.appendWithSeparator(kylinDependency, hiveHCatJarPath);
+ logger.debug("hive-catalog jar file: " + hiveHCatJarPath);
+
+ String hiveMetaStoreJarPath = ClassUtil
+ .findContainingJar(Class.forName("org.apache.hadoop.hive.metastore.api.Table"));
+ StringUtil.appendWithSeparator(kylinDependency, hiveMetaStoreJarPath);
+ logger.debug("hive-metastore jar file: " + hiveMetaStoreJarPath);
+ } catch (ClassNotFoundException e) {
+ logger.error("Cannot found hive dependency jars: " + e);
+ }
+ }
+
+ // for kafka dependencies
+ if (kylinKafkaDependency != null) {
+ kylinKafkaDependency = kylinKafkaDependency.replace(":", ",");
+ logger.trace("Kafka Dependencies: " + kylinKafkaDependency);
+ StringUtil.appendWithSeparator(kylinDependency, kylinKafkaDependency);
+ } else {
+ logger.debug("No Kafka dependency jar set in the environment, will find them from classpath:");
+ try {
+ String kafkaClientJarPath = ClassUtil
+ .findContainingJar(Class.forName("org.apache.kafka.clients.consumer.KafkaConsumer"));
+ StringUtil.appendWithSeparator(kylinDependency, kafkaClientJarPath);
+ logger.debug("kafka jar file: " + kafkaClientJarPath);
+
+ } catch (ClassNotFoundException e) {
+ logger.debug("Not found kafka client jar from classpath, it is optional for normal build: " + e);
+ }
+ }
+
+ // for KylinJobMRLibDir
+ String mrLibDir = kylinConf.getKylinJobMRLibDir();
+ logger.trace("MR additional lib dir: " + mrLibDir);
+ StringUtil.appendWithSeparator(kylinDependency, mrLibDir);
+
+ setJobTmpJarsAndFiles(job, kylinDependency.toString());
+ }
+
+
+
+ private String filterKylinHiveDependency(String kylinHiveDependency, KylinConfig config) {
+ if (StringUtils.isBlank(kylinHiveDependency))
+ return "";
+
+ StringBuilder jarList = new StringBuilder();
+
+ Pattern hivePattern = Pattern.compile(config.getHiveDependencyFilterList());
+ Matcher matcher = hivePattern.matcher(kylinHiveDependency);
+
+ while (matcher.find()) {
+ if (jarList.length() > 0)
+ jarList.append(",");
+ jarList.append(matcher.group());
+ }
+
+ return jarList.toString();
+ }
+
+ private void setJobTmpJarsAndFiles(Job job, String kylinDependency) {
+ if (StringUtils.isBlank(kylinDependency))
+ return;
+
+ logger.trace("setJobTmpJarsAndFiles: " + kylinDependency);
+
+ try {
+ Configuration jobConf = job.getConfiguration();
+ FileSystem localfs = FileSystem.getLocal(jobConf);
+ FileSystem hdfs = HadoopUtil.getWorkingFileSystem(jobConf);
+
+ StringBuilder jarList = new StringBuilder();
+ StringBuilder fileList = new StringBuilder();
+
+ for (String fileName : StringUtil.splitAndTrim(kylinDependency, ",")) {
+ Path p = new Path(fileName);
+ if (p.isAbsolute() == false) {
+ logger.warn("The directory of kylin dependency '" + fileName + "' is not absolute, skip");
+ continue;
+ }
+ FileSystem fs;
+ if (exists(hdfs, p)) {
+ fs = hdfs;
+ } else if (exists(localfs, p)) {
+ fs = localfs;
+ } else {
+ logger.warn("The directory of kylin dependency '" + fileName + "' does not exist, skip");
+ continue;
+ }
+
+ if (fs.getFileStatus(p).isDirectory()) {
+ logger.trace("Expanding depedency directory: " + p);
+ appendTmpDir(job, fs, p, jarList, fileList);
+ continue;
+ }
+
+ StringBuilder list = (p.getName().endsWith(".jar")) ? jarList : fileList;
+ if (list.length() > 0)
+ list.append(",");
+ list.append(fs.getFileStatus(p).getPath());
+ }
+
+ appendTmpFiles(fileList.toString(), jobConf);
+ appendTmpJars(jarList.toString(), jobConf);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ private void appendTmpDir(Job job, FileSystem fs, Path tmpDir, StringBuilder jarList, StringBuilder fileList) {
+ try {
+ FileStatus[] fList = fs.listStatus(tmpDir);
+
+ for (FileStatus file : fList) {
+ Path p = file.getPath();
+ if (fs.getFileStatus(p).isDirectory()) {
+ appendTmpDir(job, fs, p, jarList, fileList);
+ continue;
+ }
+
+ StringBuilder list = (p.getName().endsWith(".jar")) ? jarList : fileList;
+ if (list.length() > 0)
+ list.append(",");
+ list.append(fs.getFileStatus(p).getPath().toString());
+ }
+
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ private void appendTmpJars(String jarList, Configuration conf) {
+ if (StringUtils.isBlank(jarList))
+ return;
+
+ String tmpJars = conf.get("tmpjars", null);
+ if (tmpJars == null) {
+ tmpJars = jarList;
+ } else {
+ tmpJars += "," + jarList;
+ }
+ conf.set("tmpjars", tmpJars);
+ logger.trace("Job 'tmpjars' updated -- " + tmpJars);
+ }
+
+ private void appendTmpFiles(String fileList, Configuration conf) {
+ if (StringUtils.isBlank(fileList))
+ return;
+
+ String tmpFiles = conf.get("tmpfiles", null);
+ if (tmpFiles == null) {
+ tmpFiles = fileList;
+ } else {
+ tmpFiles += "," + fileList;
+ }
+ conf.set("tmpfiles", tmpFiles);
+ logger.trace("Job 'tmpfiles' updated -- " + tmpFiles);
+ }
+
+ private String getDefaultMapRedClasspath() {
+
+ String classpath = "";
+ try {
+ CliCommandExecutor executor = KylinConfig.getInstanceFromEnv().getCliCommandExecutor();
+ String output = executor.execute("mapred classpath").getSecond();
+ classpath = output.trim().replace(':', ',');
+ } catch (IOException e) {
+ logger.error("Failed to run: 'mapred classpath'.", e);
+ }
+
+ return classpath;
+ }
+
+ private static boolean exists(FileSystem fs, Path p) throws IOException {
+ try {
+ return fs.exists(p);
+ } catch (IllegalArgumentException ex) {
+ // can happen when FS mismatch
+ return false;
+ }
+ }
+
+ public static int addInputDirs(String input, Job job) throws IOException {
+ int folderNum = addInputDirs(StringSplitter.split(input, ","), job);
+ logger.info("Number of added folders:" + folderNum);
+ return folderNum;
+ }
+
+ public static int addInputDirs(String[] inputs, Job job) throws IOException {
+ int ret = 0;//return number of added folders
+ for (String inp : inputs) {
+ inp = inp.trim();
+ if (inp.endsWith("/*")) {
+ inp = inp.substring(0, inp.length() - 2);
+ FileSystem fs = HadoopUtil.getWorkingFileSystem(job.getConfiguration());
+ Path path = new Path(inp);
+
+ if (!exists(fs, path)) {
+ logger.warn("Path not exist:" + path.toString());
+ continue;
+ }
+
+ FileStatus[] fileStatuses = fs.listStatus(path);
+ boolean hasDir = false;
+ for (FileStatus stat : fileStatuses) {
+ if (stat.isDirectory() && !stat.getPath().getName().startsWith("_")) {
+ hasDir = true;
+ ret += addInputDirs(new String[] { stat.getPath().toString() }, job);
+ }
+ }
+ if (fileStatuses.length > 0 && !hasDir) {
+ ret += addInputDirs(new String[] { path.toString() }, job);
+ }
+ } else {
+ logger.trace("Add input " + inp);
+ FileInputFormat.addInputPath(job, new Path(inp));
+ ret++;
+ }
+ }
+ return ret;
+ }
+
+ public static KylinConfig loadKylinPropsAndMetadata() throws IOException {
+ File metaDir = new File("meta");
+ if (!metaDir.getAbsolutePath().equals(System.getProperty(KylinConfig.KYLIN_CONF))) {
+ System.setProperty(KylinConfig.KYLIN_CONF, metaDir.getAbsolutePath());
+ logger.info("The absolute path for meta dir is " + metaDir.getAbsolutePath());
+ KylinConfig kylinConfig = KylinConfig.getInstanceFromEnv();
+ Map<String, String> paramsMap = new HashMap<>();
+ paramsMap.put("path", metaDir.getAbsolutePath());
+ StorageURL storageURL = new StorageURL(kylinConfig.getMetadataUrl().getIdentifier(), "ifile", paramsMap);
+ kylinConfig.setMetadataUrl(storageURL.toString());
+ return kylinConfig;
+ } else {
+ return KylinConfig.getInstanceFromEnv();
+ }
+ }
+
+ public static KylinConfig loadKylinConfigFromHdfs(SerializableConfiguration conf, String uri) {
+ HadoopUtil.setCurrentConfiguration(conf.get());
+ KylinConfig config = loadKylinConfigFromHdfs(uri);
+
+ // This is a bad example where the thread local KylinConfig cannot be auto-closed due to
+ // limitation of MR API. It works because MR task runs its own process. Do not copy.
+ @SuppressWarnings("unused")
+ SetAndUnsetThreadLocalConfig shouldAutoClose = KylinConfig.setAndUnsetThreadLocalConfig(config);
+
+ return config;
+ }
+
+ public static KylinConfig loadKylinConfigFromHdfs(String uri) {
+ if (uri == null)
+ throw new IllegalArgumentException("meta url should not be null");
+
+ if (!uri.contains("@hdfs"))
+ throw new IllegalArgumentException("meta url should like @hdfs schema");
+
+ if (kylinConfigCache.get(uri) != null) {
+ logger.info("KylinConfig cached for : {}", uri);
+ return kylinConfigCache.get(uri);
+ }
+
+ logger.info("Ready to load KylinConfig from uri: {}", uri);
+ KylinConfig config;
+ FileSystem fs;
+ String realHdfsPath = StorageURL.valueOf(uri).getParameter("path") + "/" + KylinConfig.KYLIN_CONF_PROPERTIES_FILE;
+ try {
+ fs = HadoopUtil.getFileSystem(realHdfsPath);
+ InputStream is = fs.open(new Path(realHdfsPath));
+ Properties prop = KylinConfig.streamToProps(is);
+ config = KylinConfig.createKylinConfig(prop);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+
+ kylinConfigCache.put(uri, config);
+ return config;
+ }
+
+ protected void attachTableMetadata(TableDesc table, Configuration conf) throws IOException {
+ Set<String> dumpList = new LinkedHashSet<>();
+ dumpList.add(table.getResourcePath());
+ dumpKylinPropsAndMetadata(table.getProject(), dumpList, KylinConfig.getInstanceFromEnv(), conf);
+ }
+
+ protected void attachCubeMetadata(CubeInstance cube, Configuration conf) throws IOException {
+ dumpKylinPropsAndMetadata(cube.getProject(), collectCubeMetadata(cube), cube.getConfig(),
+ conf);
+ }
+
+ protected void attachCubeMetadataWithDict(CubeInstance cube, Configuration conf) throws IOException {
+ Set<String> dumpList = new LinkedHashSet<>(collectCubeMetadata(cube));
+ for (CubeSegment segment : cube.getSegments()) {
+ dumpList.addAll(segment.getDictionaryPaths());
+ }
+ dumpKylinPropsAndMetadata(cube.getProject(), dumpList, cube.getConfig(), conf);
+ }
+
+ protected void attachSegmentsMetadataWithDict(List<CubeSegment> segments, Configuration conf) throws IOException {
+ CubeInstance cube = segments.get(0).getCubeInstance();
+ Set<String> dumpList = new LinkedHashSet<>(collectCubeMetadata(cube));
+ for (CubeSegment segment : segments) {
+ dumpList.addAll(segment.getDictionaryPaths());
+ }
+ dumpKylinPropsAndMetadata(cube.getProject(), dumpList, cube.getConfig(), conf);
+ }
+
+ protected void attachSegmentsMetadataWithDict(List<CubeSegment> segments, String metaUrl) throws IOException {
+ Set<String> dumpList = new LinkedHashSet<>(JobRelatedMetaUtil.collectCubeMetadata(segments.get(0).getCubeInstance()));
+ for (CubeSegment segment : segments) {
+ dumpList.addAll(segment.getDictionaryPaths());
+ dumpList.add(segment.getStatisticsResourcePath());
+ }
+ JobRelatedMetaUtil.dumpAndUploadKylinPropsAndMetadata(dumpList, (KylinConfigExt) segments.get(0).getConfig(), metaUrl);
+ }
+
+ protected void attachSegmentMetadataWithDict(CubeSegment segment, Configuration conf) throws IOException {
+ attachSegmentMetadata(segment, conf, true, false);
+ }
+
+ protected void attachSegmentMetadataWithAll(CubeSegment segment, Configuration conf) throws IOException {
+ attachSegmentMetadata(segment, conf, true, true);
+ }
+
+ protected void attachSegmentMetadata(CubeSegment segment, Configuration conf, boolean ifDictIncluded,
+ boolean ifStatsIncluded) throws IOException {
+ Set<String> dumpList = new LinkedHashSet<>(collectCubeMetadata(segment.getCubeInstance()));
+ if (ifDictIncluded) {
+ dumpList.addAll(segment.getDictionaryPaths());
+ }
+ if (ifStatsIncluded) {
+ dumpList.add(segment.getStatisticsResourcePath());
+ }
+ //tiretree global domain dic
+ CubeDescTiretreeGlobalDomainDictUtil.cuboidJob(segment.getCubeDesc(), dumpList);
+
+ dumpKylinPropsAndMetadata(segment.getProject(), dumpList, segment.getConfig(), conf);
+ }
+
+ protected void dumpKylinPropsAndMetadata(String prj, Set<String> dumpList, KylinConfig kylinConfig,
+ Configuration conf) throws IOException {
+ File tmp = File.createTempFile("kylin_job_meta", "");
+ FileUtils.forceDelete(tmp); // we need a directory, so delete the file first
+
+ File metaDir = new File(tmp, "meta");
+ metaDir.mkdirs();
+
+ // write kylin.properties
+ File kylinPropsFile = new File(metaDir, "kylin.properties");
+ kylinConfig.exportToFile(kylinPropsFile);
+
+ if (prj != null) {
+ dumpList.add(ProjectManager.getInstance(kylinConfig).getProject(prj).getResourcePath());
+ }
+
+ if (prj != null) {
+ dumpList.add(ProjectManager.getInstance(kylinConfig).getProject(prj).getResourcePath());
+ }
+
+ // write resources
+ JobRelatedMetaUtil.dumpResources(kylinConfig, metaDir, dumpList);
+
+ // hadoop distributed cache
+ String hdfsMetaDir = OptionsHelper.convertToFileURL(metaDir.getAbsolutePath());
+ if (hdfsMetaDir.startsWith("/")) // note Path on windows is like "d:/../..."
+ hdfsMetaDir = "file://" + hdfsMetaDir;
+ else
+ hdfsMetaDir = "file:///" + hdfsMetaDir;
+ logger.info("HDFS meta dir is: " + hdfsMetaDir);
+
+ appendTmpFiles(hdfsMetaDir, conf);
+ }
+
+ protected void cleanupTempConfFile(Configuration conf) {
+ String[] tempfiles = StringUtils.split(conf.get("tmpfiles"), ",");
+ if (tempfiles == null) {
+ return;
+ }
+ for (String tempMetaFileString : tempfiles) {
+ logger.trace("tempMetaFileString is : " + tempMetaFileString);
+ if (tempMetaFileString != null) {
+ if (tempMetaFileString.startsWith("file://")) {
+ tempMetaFileString = tempMetaFileString.substring("file://".length());
+ File tempMetaFile = new File(tempMetaFileString);
+ if (tempMetaFile.exists()) {
+ try {
+ FileUtils.forceDelete(tempMetaFile.getParentFile());
+
+ } catch (IOException e) {
+ logger.warn("error when deleting " + tempMetaFile, e);
+ }
+ } else {
+ logger.info("" + tempMetaFileString + " does not exist");
+ }
+ } else {
+ logger.info("tempMetaFileString is not starting with file:// :" + tempMetaFileString);
+ }
+ }
+ }
+ }
+
+ protected void deletePath(Configuration conf, Path path) throws IOException {
+ HadoopUtil.deletePath(conf, path);
+ }
+
+ public static double getTotalMapInputMB(Job job)
+ throws ClassNotFoundException, IOException, InterruptedException, JobException {
+ if (job == null) {
+ throw new JobException("Job is null");
+ }
+
+ long mapInputBytes = 0;
+ InputFormat<?, ?> input = ReflectionUtils.newInstance(job.getInputFormatClass(), job.getConfiguration());
+ for (InputSplit split : input.getSplits(job)) {
+ mapInputBytes += split.getLength();
+ }
+
+ // 0 input bytes is possible when the segment range hits no partition on a partitioned hive table (KYLIN-2470)
+ if (mapInputBytes == 0) {
+ logger.warn("Map input splits are 0 bytes, something is wrong?");
+ }
+
+ double totalMapInputMB = (double) mapInputBytes / 1024 / 1024;
+ return totalMapInputMB;
+ }
+
+ protected double getTotalMapInputMB()
+ throws ClassNotFoundException, IOException, InterruptedException, JobException {
+ return getTotalMapInputMB(job);
+ }
+
+ protected int getMapInputSplitCount()
+ throws ClassNotFoundException, JobException, IOException, InterruptedException {
+ if (job == null) {
+ throw new JobException("Job is null");
+ }
+ InputFormat<?, ?> input = ReflectionUtils.newInstance(job.getInputFormatClass(), job.getConfiguration());
+ return input.getSplits(job).size();
+ }
+
+ public void kill() throws JobException {
+ if (job != null) {
+ try {
+ job.killJob();
+ } catch (IOException e) {
+ throw new JobException(e);
+ }
+ }
+ }
+
+ public Map<String, String> getInfo() throws JobException {
+ if (job != null) {
+ Map<String, String> status = new HashMap<String, String>();
+ if (null != job.getJobID()) {
+ status.put(JobInstance.MR_JOB_ID, job.getJobID().toString());
+ }
+ if (null != job.getTrackingURL()) {
+ status.put(JobInstance.YARN_APP_URL, job.getTrackingURL().toString());
+ }
+
+ return status;
+ } else {
+ throw new JobException("Job is null");
+ }
+ }
+
+ public Counters getCounters() throws JobException {
+ if (job != null) {
+ try {
+ return job.getCounters();
+ } catch (IOException e) {
+ throw new JobException(e);
+ }
+ } else {
+ throw new JobException("Job is null");
+ }
+ }
+
+ public void setAsync(boolean isAsync) {
+ this.isAsync = isAsync;
+ }
+
+ public Job getJob() {
+ return this.job;
+ }
+
+ // tells MapReduceExecutable to skip this job
+ public boolean isSkipped() {
+ return false;
+ }
+
+ @Override
+ public void setConf(Configuration conf) {
+ Configuration healSickConf = HadoopUtil.healSickConfig(conf);
+ super.setConf(healSickConf);
+ }
+}
diff --git a/build-engine/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java b/build-engine/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java
new file mode 100644
index 0000000..3fffad2
--- /dev/null
+++ b/build-engine/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java
@@ -0,0 +1,128 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.engine.mr.common;
+
+public interface BatchConstants {
+
+ /**
+ * source data config
+ */
+ char INTERMEDIATE_TABLE_ROW_DELIMITER = 127;
+
+
+ String SEQUENCE_FILE_DEFAULT_DELIMITER = "\01";
+
+ /**
+ * ConFiGuration entry names for MR jobs
+ */
+
+ String CFG_UPDATE_SHARD = "update.shard";
+ String CFG_CUBOID_MODE = "cuboid.mode";
+ String CFG_CUBE_NAME = "cube.name";
+ String CFG_CUBE_SEGMENT_NAME = "cube.segment.name";
+ String CFG_CUBE_SEGMENT_ID = "cube.segment.id";
+ String CFG_CUBE_CUBOID_LEVEL = "cube.cuboid.level";
+
+ String CFG_II_NAME = "ii.name";
+ String CFG_II_SEGMENT_NAME = "ii.segment.name";
+
+ String CFG_OUTPUT_PATH = "output.path";
+ String CFG_PROJECT_NAME = "project.name";
+ String CFG_TABLE_NAME = "table.name";
+ String CFG_IS_MERGE = "is.merge";
+ String CFG_CUBE_INTERMEDIATE_TABLE_ROW_DELIMITER = "cube.intermediate.table.row.delimiter";
+ String CFG_REGION_NUMBER_MIN = "region.number.min";
+ String CFG_REGION_NUMBER_MAX = "region.number.max";
+ String CFG_REGION_SPLIT_SIZE = "region.split.size";
+ String CFG_HFILE_SIZE_GB = "hfile.size.gb";
+ String CFG_STATS_JOB_ID = "stats.job.id";
+ String CFG_STATS_JOB_FREQUENCY = "stats.sample.frequency";
+
+ String CFG_KYLIN_LOCAL_TEMP_DIR = "/tmp/kylin/";
+ String CFG_KYLIN_HDFS_TEMP_DIR = "/tmp/kylin/";
+
+ String CFG_STATISTICS_LOCAL_DIR = CFG_KYLIN_LOCAL_TEMP_DIR + "cuboidstatistics/";
+ String CFG_STATISTICS_ENABLED = "statistics.enabled";
+ String CFG_STATISTICS_OUTPUT = "statistics.ouput";//spell error, for compatibility issue better not change it
+ String CFG_STATISTICS_SAMPLING_PERCENT = "statistics.sampling.percent";
+ String CFG_STATISTICS_CUBOID_ESTIMATION_FILENAME = "cuboid_statistics.seq";
+
+ String CFG_MAPRED_OUTPUT_COMPRESS = "mapred.output.compress";
+
+ String CFG_OUTPUT_COLUMN = "column";
+ String CFG_OUTPUT_DICT = "dict";
+ String CFG_OUTPUT_STATISTICS = "statistics";
+ String CFG_OUTPUT_PARTITION = "partition";
+ String CFG_MR_SPARK_JOB = "mr.spark.job";
+ String CFG_SPARK_META_URL = "spark.meta.url";
+ String CFG_GLOBAL_DICT_BASE_DIR = "global.dict.base.dir";
+
+ String CFG_HLL_REDUCER_NUM = "cuboidHLLCounterReducerNum";
+
+ String CFG_SHARD_NUM = "shard.num";
+
+ String CFG_CONVERGE_CUBOID_PARTITION_PARAM = "converge.cuboid.partition.param";
+
+ /**
+ * command line ARGuments
+ */
+ String ARG_INPUT = "input";
+ String ARG_OUTPUT = "output";
+ String ARG_PROJECT = "project";
+ String ARG_CUBOID_MODE = "cuboidMode";
+ String ARG_UPDATE_SHARD = "updateShard"; // indicate if need update base cuboid shard
+ String ARG_JOB_NAME = "jobname";
+ String ARG_CUBING_JOB_ID = "cubingJobId";
+ String ARG_CUBE_NAME = "cubename";
+ String ARG_II_NAME = "iiname";
+ String ARG_SEGMENT_NAME = "segmentname";
+ String ARG_SEGMENT_ID = "segmentid";
+ String ARG_PARTITION = "partitions";
+ String ARG_STATS_ENABLED = "statisticsenabled";
+ String ARG_STATS_OUTPUT = "statisticsoutput";
+ String ARG_STATS_SAMPLING_PERCENT = "statisticssamplingpercent";
+ String ARG_HTABLE_NAME = "htablename";
+ String ARG_INPUT_FORMAT = "inputformat";
+ String ARG_LEVEL = "level";
+ String ARG_CONF = "conf";
+ String ARG_DICT_PATH = "dictPath";
+ String ARG_TABLE_NAME = "tableName";
+ String ARG_LOOKUP_SNAPSHOT_ID = "snapshotID";
+ String ARG_EXT_LOOKUP_SNAPSHOTS_INFO = "extlookupsnapshots";
+ String ARG_META_URL = "metadataUrl";
+ String ARG_HBASE_CONF_PATH = "hbaseConfPath";
+ String ARG_SHRUNKEN_DICT_PATH = "shrunkenDictPath";
+ String ARG_COUNTER_OUTPUT = "counterOutput";
+ String ARG_BASE64_ENCODED_STEP_NAME = "base64StepName";
+ String ARG_SQL_COUNT = "sqlCount";
+ String ARG_BASE64_ENCODED_SQL = "base64EncodedSql";
+
+ /**
+ * logger and counter
+ */
+ String MAPREDUCE_COUNTER_GROUP_NAME = "Cube Builder";
+ int NORMAL_RECORD_LOG_THRESHOLD = 100000;
+
+ /**
+ * dictionaries builder class
+ */
+ String GLOBAL_DICTIONNARY_CLASS = "org.apache.kylin.dict.GlobalDictionaryBuilder";
+
+ String LOOKUP_EXT_SNAPSHOT_SRC_RECORD_CNT_PFX = "lookup.ext.snapshot.src.record.cnt.";
+}
diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/lookup/IExtLookupTableCache.java b/build-engine/src/main/java/org/apache/kylin/engine/mr/common/CubeJobLockUtil.java
similarity index 50%
copy from core-dictionary/src/main/java/org/apache/kylin/dict/lookup/IExtLookupTableCache.java
copy to build-engine/src/main/java/org/apache/kylin/engine/mr/common/CubeJobLockUtil.java
index f473059..f3bd3e6 100644
--- a/core-dictionary/src/main/java/org/apache/kylin/dict/lookup/IExtLookupTableCache.java
+++ b/build-engine/src/main/java/org/apache/kylin/engine/mr/common/CubeJobLockUtil.java
@@ -14,26 +14,33 @@
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
-*/
+ */
-package org.apache.kylin.dict.lookup;
+package org.apache.kylin.engine.mr.common;
-import org.apache.kylin.metadata.model.TableDesc;
+public class CubeJobLockUtil {
+ public enum LockType {
+ CubeJobLockPath("/cube_job_lock/"), CubeJobEphemeralLockPath("/cube_job_ephemeral_lock/");
+ private String name;
-public interface IExtLookupTableCache {
- enum CacheState {NONE, IN_BUILDING, AVAILABLE}
+ LockType(String name) {
+ this.name = name;
+ }
- /**
- * @param tableDesc
- * @param extTableSnapshotInfo
- * @param buildIfNotExist if true, when the cached lookup table not exist, build it.
- * @return null if no cached lookup table exist
- */
- ILookupTable getCachedLookupTable(TableDesc tableDesc, ExtTableSnapshotInfo extTableSnapshotInfo, boolean buildIfNotExist);
+ public String getName() {
+ return name;
+ }
+ }
- void buildSnapshotCache(TableDesc tableDesc, ExtTableSnapshotInfo extTableSnapshotInfo, ILookupTable sourceTable);
+ public static String getLockPath(String cubeName, String jobId) {
+ if (jobId == null) {
+ return LockType.CubeJobLockPath.getName() + cubeName;
+ } else {
+ return LockType.CubeJobLockPath.getName() + cubeName + "/" + jobId;
+ }
+ }
- void removeSnapshotCache(ExtTableSnapshotInfo extTableSnapshotInfo);
-
- CacheState getCacheState(ExtTableSnapshotInfo extTableSnapshotInfo);
+ public static String getEphemeralLockPath(String cubeName) {
+ return LockType.CubeJobEphemeralLockPath.getName() + cubeName;
+ }
}
diff --git a/build-engine/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java b/build-engine/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java
new file mode 100644
index 0000000..3c93d05
--- /dev/null
+++ b/build-engine/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java
@@ -0,0 +1,544 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.engine.mr.common;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStreamWriter;
+import java.io.PrintWriter;
+import java.nio.charset.StandardCharsets;
+import java.text.DecimalFormat;
+import java.text.DecimalFormatSymbols;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.SequenceFile.Reader;
+import org.apache.hadoop.io.SequenceFile.Reader.Option;
+import org.apache.hadoop.util.ReflectionUtils;
+import org.apache.kylin.common.KylinConfig;
+import org.apache.kylin.common.persistence.RawResource;
+import org.apache.kylin.common.persistence.ResourceStore;
+import org.apache.kylin.common.util.ByteArray;
+import org.apache.kylin.common.util.Bytes;
+import org.apache.kylin.common.util.HadoopUtil;
+import org.apache.kylin.common.util.SumHelper;
+import org.apache.kylin.cube.CubeInstance;
+import org.apache.kylin.cube.CubeManager;
+import org.apache.kylin.cube.CubeSegment;
+import org.apache.kylin.cube.cuboid.Cuboid;
+import org.apache.kylin.cube.cuboid.CuboidScheduler;
+import org.apache.kylin.cube.kv.CubeDimEncMap;
+import org.apache.kylin.cube.kv.RowKeyEncoder;
+import org.apache.kylin.cube.model.CubeDesc;
+import org.apache.kylin.measure.hllc.HLLCounter;
+import org.apache.kylin.measure.topn.TopNMeasureType;
+import org.apache.kylin.metadata.datatype.DataType;
+import org.apache.kylin.metadata.model.FunctionDesc;
+import org.apache.kylin.metadata.model.MeasureDesc;
+import org.apache.kylin.metadata.model.SegmentStatusEnum;
+import org.apache.kylin.metadata.model.TblColRef;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+
+/**
+ * This should be in cube module. It's here in engine-mr because currently stats
+ * are saved as sequence files thus a hadoop dependency.
+ */
+public class CubeStatsReader {
+
+ private static final Logger logger = LoggerFactory.getLogger(CubeStatsReader.class);
+
+ final CubeSegment seg;
+ final int samplingPercentage;
+ final int mapperNumberOfFirstBuild; // becomes meaningless after merge
+ final double mapperOverlapRatioOfFirstBuild; // becomes meaningless after merge
+ final Map<Long, HLLCounter> cuboidRowEstimatesHLL;
+ final CuboidScheduler cuboidScheduler;
+ final long sourceRowCount;
+
+ public CubeStatsReader(CubeSegment cubeSegment, KylinConfig kylinConfig) throws IOException {
+ this(cubeSegment, cubeSegment.getCuboidScheduler(), kylinConfig);
+ }
+
+ /**
+ * @param cuboidScheduler if it's null, part of it's functions will not be supported
+ */
+ public CubeStatsReader(CubeSegment cubeSegment, CuboidScheduler cuboidScheduler, KylinConfig kylinConfig)
+ throws IOException {
+ ResourceStore store = ResourceStore.getStore(kylinConfig);
+ String statsKey = cubeSegment.getStatisticsResourcePath();
+ RawResource resource = store.getResource(statsKey);
+ if (resource == null)
+ throw new IllegalStateException("Missing resource at " + statsKey);
+
+ File tmpSeqFile = writeTmpSeqFile(resource.content());
+ Path path = new Path(HadoopUtil.fixWindowsPath("file://" + tmpSeqFile.getAbsolutePath()));
+
+ CubeStatsResult cubeStatsResult = new CubeStatsResult(path, kylinConfig.getCubeStatsHLLPrecision());
+ tmpSeqFile.delete();
+
+ this.seg = cubeSegment;
+ this.cuboidScheduler = cuboidScheduler;
+ this.samplingPercentage = cubeStatsResult.getPercentage();
+ this.mapperNumberOfFirstBuild = cubeStatsResult.getMapperNumber();
+ this.mapperOverlapRatioOfFirstBuild = cubeStatsResult.getMapperOverlapRatio();
+ this.cuboidRowEstimatesHLL = cubeStatsResult.getCounterMap();
+ this.sourceRowCount = cubeStatsResult.getSourceRecordCount();
+ }
+
+ /**
+ * Read statistics from
+ * @param path
+ * rather than
+ * @param cubeSegment
+ *
+ * Since the statistics are from
+ * @param path
+ * cuboid scheduler should be provided by default
+ */
+ public CubeStatsReader(CubeSegment cubeSegment, CuboidScheduler cuboidScheduler, KylinConfig kylinConfig, Path path)
+ throws IOException {
+ CubeStatsResult cubeStatsResult = new CubeStatsResult(path, kylinConfig.getCubeStatsHLLPrecision());
+
+ this.seg = cubeSegment;
+ this.cuboidScheduler = cuboidScheduler;
+ this.samplingPercentage = cubeStatsResult.getPercentage();
+ this.mapperNumberOfFirstBuild = cubeStatsResult.getMapperNumber();
+ this.mapperOverlapRatioOfFirstBuild = cubeStatsResult.getMapperOverlapRatio();
+ this.cuboidRowEstimatesHLL = cubeStatsResult.getCounterMap();
+ this.sourceRowCount = cubeStatsResult.getSourceRecordCount();
+ }
+
+ private File writeTmpSeqFile(InputStream inputStream) throws IOException {
+ File tempFile = File.createTempFile("kylin_stats_tmp", ".seq");
+ FileOutputStream out = null;
+ try {
+ out = new FileOutputStream(tempFile);
+ org.apache.commons.io.IOUtils.copy(inputStream, out);
+ } finally {
+ IOUtils.closeStream(inputStream);
+ IOUtils.closeStream(out);
+ }
+ return tempFile;
+ }
+
+ public Map<Long, HLLCounter> getCuboidRowHLLCounters() {
+ return this.cuboidRowEstimatesHLL;
+ }
+
+ public int getSamplingPercentage() {
+ return samplingPercentage;
+ }
+
+ public long getSourceRowCount() {
+ return sourceRowCount;
+ }
+
+ public Map<Long, Long> getCuboidRowEstimatesHLL() {
+ return getCuboidRowCountMapFromSampling(cuboidRowEstimatesHLL, samplingPercentage);
+ }
+
+ // return map of Cuboid ID => MB
+ public Map<Long, Double> getCuboidSizeMap() {
+ return getCuboidSizeMap(false);
+ }
+
+ public Map<Long, Double> getCuboidSizeMap(boolean origin) {
+ return getCuboidSizeMapFromRowCount(seg, getCuboidRowEstimatesHLL(), sourceRowCount, origin);
+ }
+
+ public double estimateCubeSize() {
+ return SumHelper.sumDouble(getCuboidSizeMap().values());
+ }
+
+ public int getMapperNumberOfFirstBuild() {
+ return mapperNumberOfFirstBuild;
+ }
+
+ public double getMapperOverlapRatioOfFirstBuild() {
+ return mapperOverlapRatioOfFirstBuild;
+ }
+
+ public static Map<Long, Long> getCuboidRowCountMapFromSampling(Map<Long, HLLCounter> hllcMap,
+ int samplingPercentage) {
+ Map<Long, Long> cuboidRowCountMap = Maps.newHashMap();
+ for (Map.Entry<Long, HLLCounter> entry : hllcMap.entrySet()) {
+ // No need to adjust according sampling percentage. Assumption is that data set is far
+ // more than cardinality. Even a percentage of the data should already see all cardinalities.
+ cuboidRowCountMap.put(entry.getKey(), entry.getValue().getCountEstimate());
+ }
+ return cuboidRowCountMap;
+ }
+
+ public static Map<Long, Double> getCuboidSizeMapFromRowCount(CubeSegment cubeSegment, Map<Long, Long> rowCountMap,
+ long sourceRowCount) {
+ return getCuboidSizeMapFromRowCount(cubeSegment, rowCountMap, sourceRowCount, true);
+ }
+
+ private static Map<Long, Double> getCuboidSizeMapFromRowCount(CubeSegment cubeSegment, Map<Long, Long> rowCountMap,
+ long sourceRowCount, boolean origin) {
+ final CubeDesc cubeDesc = cubeSegment.getCubeDesc();
+ final List<Integer> rowkeyColumnSize = Lists.newArrayList();
+ final Cuboid baseCuboid = Cuboid.getBaseCuboid(cubeDesc);
+ final List<TblColRef> columnList = baseCuboid.getColumns();
+ final CubeDimEncMap dimEncMap = cubeSegment.getDimensionEncodingMap();
+ final Long baseCuboidRowCount = rowCountMap.get(baseCuboid.getId());
+
+ for (int i = 0; i < columnList.size(); i++) {
+ rowkeyColumnSize.add(dimEncMap.get(columnList.get(i)).getLengthOfEncoding());
+ }
+
+ Map<Long, Double> sizeMap = Maps.newHashMap();
+ for (Map.Entry<Long, Long> entry : rowCountMap.entrySet()) {
+ sizeMap.put(entry.getKey(), estimateCuboidStorageSize(cubeSegment, entry.getKey(), entry.getValue(),
+ baseCuboid.getId(), baseCuboidRowCount, rowkeyColumnSize, sourceRowCount));
+ }
+
+ if (origin == false && cubeSegment.getConfig().enableJobCuboidSizeOptimize()) {
+ optimizeSizeMap(sizeMap, cubeSegment);
+ }
+
+ return sizeMap;
+ }
+
+ private static Double harmonicMean(List<Double> data) {
+ if (data == null || data.size() == 0) {
+ return 1.0;
+ }
+ Double sum = 0.0;
+ for (Double item : data) {
+ sum += 1.0 / item;
+ }
+ return data.size() / sum;
+ }
+
+ private static List<Double> getHistoricalRating(CubeSegment cubeSegment,
+ CubeInstance cubeInstance,
+ int totalLevels) {
+ boolean isMerged = cubeSegment.isMerged();
+
+ Map<Integer, List<Double>> layerRatio = Maps.newHashMap();
+ List<Double> result = Lists.newArrayList();
+
+ for (CubeSegment seg : cubeInstance.getSegments(SegmentStatusEnum.READY)) {
+ if (seg.isMerged() != isMerged || seg.getEstimateRatio() == null) {
+ continue;
+ }
+
+ logger.info("get ratio from {} with: {}", seg.getName(), StringUtils.join(seg.getEstimateRatio(), ","));
+
+ for(int level = 0; level <= totalLevels; level++) {
+ if (seg.getEstimateRatio().get(level) <= 0) {
+ continue;
+ }
+
+ List<Double> temp = layerRatio.get(level) == null ? Lists.newArrayList() : layerRatio.get(level);
+
+ temp.add(seg.getEstimateRatio().get(level));
+ layerRatio.put(level, temp);
+ }
+ }
+
+ if (layerRatio.size() == 0) {
+ logger.info("Fail to get historical rating.");
+ return null;
+ } else {
+ for(int level = 0; level <= totalLevels; level++) {
+ logger.debug("level {}: {}", level, StringUtils.join(layerRatio.get(level), ","));
+ result.add(level, harmonicMean(layerRatio.get(level)));
+ }
+
+ logger.info("Finally estimate ratio is {}", StringUtils.join(result, ","));
+
+ return result;
+ }
+ }
+
+ private static void optimizeSizeMap(Map<Long, Double> sizeMap, CubeSegment cubeSegment) {
+ CubeInstance cubeInstance = cubeSegment.getCubeInstance();
+ int totalLevels = cubeInstance.getCuboidScheduler().getBuildLevel();
+ List<List<Long>> layeredCuboids = cubeInstance.getCuboidScheduler().getCuboidsByLayer();
+
+ logger.info("cube size is {} before optimize", SumHelper.sumDouble(sizeMap.values()));
+
+ List<Double> levelRating = getHistoricalRating(cubeSegment, cubeInstance, totalLevels);
+
+ if (levelRating == null) {
+ logger.info("Fail to optimize, use origin.");
+ return;
+ }
+
+ for (int level = 0; level <= totalLevels; level++) {
+ Double rate = levelRating.get(level);
+
+ for (Long cuboidId : layeredCuboids.get(level)) {
+ double oriValue = (sizeMap.get(cuboidId) == null ? 0.0 : sizeMap.get(cuboidId));
+ sizeMap.put(cuboidId, oriValue * rate);
+ }
+ }
+
+ logger.info("cube size is {} after optimize", SumHelper.sumDouble(sizeMap.values()));
+
+ return;
+ }
+
+
+ /**
+ * Estimate the cuboid's size
+ *
+ * @return the cuboid size in M bytes
+ */
+ private static double estimateCuboidStorageSize(CubeSegment cubeSegment, long cuboidId, long rowCount,
+ long baseCuboidId, long baseCuboidCount, List<Integer> rowKeyColumnLength, long sourceRowCount) {
+
+ int rowkeyLength = cubeSegment.getRowKeyPreambleSize();
+ KylinConfig kylinConf = cubeSegment.getConfig();
+
+ long mask = Long.highestOneBit(baseCuboidId);
+ long parentCuboidIdActualLength = (long) Long.SIZE - Long.numberOfLeadingZeros(baseCuboidId);
+ for (int i = 0; i < parentCuboidIdActualLength; i++) {
+ if ((mask & cuboidId) > 0) {
+ rowkeyLength += rowKeyColumnLength.get(i); //colIO.getColumnLength(columnList.get(i));
+ }
+ mask = mask >> 1;
+ }
+
+ // add the measure length
+ int normalSpace = rowkeyLength;
+ int countDistinctSpace = 0;
+ double percentileSpace = 0;
+ int topNSpace = 0;
+ for (MeasureDesc measureDesc : cubeSegment.getCubeDesc().getMeasures()) {
+ if (rowCount == 0)
+ break;
+ DataType returnType = measureDesc.getFunction().getReturnDataType();
+ if (measureDesc.getFunction().getExpression().equals(FunctionDesc.FUNC_COUNT_DISTINCT)) {
+ long estimateDistinctCount = sourceRowCount / rowCount;
+ estimateDistinctCount = estimateDistinctCount == 0 ? 1L : estimateDistinctCount;
+ countDistinctSpace += returnType.getStorageBytesEstimate(estimateDistinctCount);
+ } else if (measureDesc.getFunction().getExpression().equals(FunctionDesc.FUNC_PERCENTILE)) {
+ percentileSpace += returnType.getStorageBytesEstimate(baseCuboidCount * 1.0 / rowCount);
+ } else if (measureDesc.getFunction().getExpression().equals(TopNMeasureType.FUNC_TOP_N)) {
+ long estimateTopNCount = sourceRowCount / rowCount;
+ estimateTopNCount = estimateTopNCount == 0 ? 1L : estimateTopNCount;
+ topNSpace += returnType.getStorageBytesEstimate(estimateTopNCount);
+ } else {
+ normalSpace += returnType.getStorageBytesEstimate();
+ }
+ }
+
+ double cuboidSizeRatio = kylinConf.getJobCuboidSizeRatio();
+ double cuboidSizeMemHungryRatio = kylinConf.getJobCuboidSizeCountDistinctRatio();
+ double cuboidSizeTopNRatio = kylinConf.getJobCuboidSizeTopNRatio();
+
+ double ret = (1.0 * normalSpace * rowCount * cuboidSizeRatio
+ + 1.0 * countDistinctSpace * rowCount * cuboidSizeMemHungryRatio + 1.0 * percentileSpace * rowCount
+ + 1.0 * topNSpace * rowCount * cuboidSizeTopNRatio) / (1024L * 1024L);
+ return ret;
+ }
+
+ private void print(PrintWriter out) {
+ Map<Long, Long> cuboidRows = getCuboidRowEstimatesHLL();
+ Map<Long, Double> cuboidSizes = getCuboidSizeMap();
+ List<Long> cuboids = new ArrayList<Long>(cuboidRows.keySet());
+ Collections.sort(cuboids);
+
+ out.println("============================================================================");
+ out.println("Statistics of " + seg);
+ out.println();
+ out.println(
+ "Cube statistics hll precision: " + cuboidRowEstimatesHLL.values().iterator().next().getPrecision());
+ out.println("Total cuboids: " + cuboidRows.size());
+ out.println("Total estimated rows: " + SumHelper.sumLong(cuboidRows.values()));
+ out.println("Total estimated size(MB): " + SumHelper.sumDouble(cuboidSizes.values()));
+ out.println("Sampling percentage: " + samplingPercentage);
+ out.println("Mapper overlap ratio: " + mapperOverlapRatioOfFirstBuild);
+ out.println("Mapper number: " + mapperNumberOfFirstBuild);
+ printKVInfo(out);
+ printCuboidInfoTreeEntry(cuboidRows, cuboidSizes, out);
+ out.println("----------------------------------------------------------------------------");
+ }
+
+ //return MB
+ public double estimateLayerSize(int level) {
+ if (cuboidScheduler == null) {
+ throw new UnsupportedOperationException("cuboid scheduler is null");
+ }
+ List<List<Long>> layeredCuboids = cuboidScheduler.getCuboidsByLayer();
+ Map<Long, Double> cuboidSizeMap = getCuboidSizeMap();
+ double ret = 0;
+ for (Long cuboidId : layeredCuboids.get(level)) {
+ ret += cuboidSizeMap.get(cuboidId) == null ? 0.0 : cuboidSizeMap.get(cuboidId);
+ }
+
+ logger.info("Estimating size for layer {}, all cuboids are {}, total size is {}", level,
+ StringUtils.join(layeredCuboids.get(level), ","), ret);
+ return ret;
+ }
+
+ public List<Long> getCuboidsByLayer(int level) {
+ if (cuboidScheduler == null) {
+ throw new UnsupportedOperationException("cuboid scheduler is null");
+ }
+ List<List<Long>> layeredCuboids = cuboidScheduler.getCuboidsByLayer();
+ return layeredCuboids.get(level);
+ }
+
+ private void printCuboidInfoTreeEntry(Map<Long, Long> cuboidRows, Map<Long, Double> cuboidSizes, PrintWriter out) {
+ if (cuboidScheduler == null) {
+ throw new UnsupportedOperationException("cuboid scheduler is null");
+ }
+ long baseCuboid = Cuboid.getBaseCuboidId(seg.getCubeDesc());
+ int dimensionCount = Long.bitCount(baseCuboid);
+ printCuboidInfoTree(-1L, baseCuboid, cuboidScheduler, cuboidRows, cuboidSizes, dimensionCount, 0, out);
+ }
+
+ private void printKVInfo(PrintWriter writer) {
+ Cuboid cuboid = Cuboid.getBaseCuboid(seg.getCubeDesc());
+ RowKeyEncoder encoder = new RowKeyEncoder(seg, cuboid);
+ for (TblColRef col : cuboid.getColumns()) {
+ writer.println("Length of dimension " + col + " is " + encoder.getColumnLength(col));
+ }
+ }
+
+ private static void printCuboidInfoTree(long parent, long cuboidID, final CuboidScheduler scheduler,
+ Map<Long, Long> cuboidRows, Map<Long, Double> cuboidSizes, int dimensionCount, int depth, PrintWriter out) {
+ printOneCuboidInfo(parent, cuboidID, cuboidRows, cuboidSizes, dimensionCount, depth, out);
+
+ List<Long> children = scheduler.getSpanningCuboid(cuboidID);
+ Collections.sort(children);
+
+ for (Long child : children) {
+ printCuboidInfoTree(cuboidID, child, scheduler, cuboidRows, cuboidSizes, dimensionCount, depth + 1, out);
+ }
+ }
+
+ private static void printOneCuboidInfo(long parent, long cuboidID, Map<Long, Long> cuboidRows,
+ Map<Long, Double> cuboidSizes, int dimensionCount, int depth, PrintWriter out) {
+ StringBuffer sb = new StringBuffer();
+ for (int i = 0; i < depth; i++) {
+ sb.append(" ");
+ }
+ String cuboidName = Cuboid.getDisplayName(cuboidID, dimensionCount);
+ sb.append("|---- Cuboid ").append(cuboidName);
+
+ long rowCount = cuboidRows.get(cuboidID);
+ double size = cuboidSizes.get(cuboidID);
+ sb.append(", est row: ").append(rowCount).append(", est MB: ").append(formatDouble(size));
+
+ if (parent != -1) {
+ sb.append(", shrink: ").append(formatDouble(100.0 * cuboidRows.get(cuboidID) / cuboidRows.get(parent)))
+ .append("%");
+ }
+
+ out.println(sb.toString());
+ }
+
+ private static String formatDouble(double input) {
+ return new DecimalFormat("#.##", DecimalFormatSymbols.getInstance(Locale.ROOT)).format(input);
+ }
+
+ public static class CubeStatsResult {
+ private int percentage = 100;
+ private double mapperOverlapRatio = 0;
+ private long sourceRecordCount = 0;
+ private int mapperNumber = 0;
+ private Map<Long, HLLCounter> counterMap = Maps.newHashMap();
+
+ public CubeStatsResult(Path path, int precision) throws IOException {
+ Configuration hadoopConf = HadoopUtil.getCurrentConfiguration();
+ Option seqInput = SequenceFile.Reader.file(path);
+ try (Reader reader = new SequenceFile.Reader(hadoopConf, seqInput)) {
+ LongWritable key = (LongWritable) ReflectionUtils.newInstance(reader.getKeyClass(), hadoopConf);
+ BytesWritable value = (BytesWritable) ReflectionUtils.newInstance(reader.getValueClass(), hadoopConf);
+ while (reader.next(key, value)) {
+ if (key.get() == 0L) {
+ percentage = Bytes.toInt(value.getBytes());
+ } else if (key.get() == -1) {
+ mapperOverlapRatio = Bytes.toDouble(value.getBytes());
+ } else if (key.get() == -2) {
+ mapperNumber = Bytes.toInt(value.getBytes());
+ } else if (key.get() == -3) {
+ sourceRecordCount = Bytes.toLong(value.getBytes());
+ } else if (key.get() > 0) {
+ HLLCounter hll = new HLLCounter(precision);
+ ByteArray byteArray = new ByteArray(value.getBytes());
+ hll.readRegisters(byteArray.asBuffer());
+ counterMap.put(key.get(), hll);
+ }
+ }
+ }
+ }
+
+ public int getPercentage() {
+ return percentage;
+ }
+
+ public double getMapperOverlapRatio() {
+ return mapperOverlapRatio;
+ }
+
+ public int getMapperNumber() {
+ return mapperNumber;
+ }
+
+ public Map<Long, HLLCounter> getCounterMap() {
+ return Collections.unmodifiableMap(counterMap);
+ }
+
+ public long getSourceRecordCount() {
+ return sourceRecordCount;
+ }
+ }
+
+ public static void main(String[] args) throws IOException {
+ System.out.println("CubeStatsReader is used to read cube statistic saved in metadata store");
+ KylinConfig config = KylinConfig.getInstanceFromEnv();
+ CubeInstance cube = CubeManager.getInstance(config).getCube(args[0]);
+ List<CubeSegment> segments = cube.getSegments();
+
+ PrintWriter out = new PrintWriter(
+ new BufferedWriter(new OutputStreamWriter(System.out, StandardCharsets.UTF_8)));
+ for (CubeSegment seg : segments) {
+ try {
+ new CubeStatsReader(seg, config).print(out);
+ } catch (Exception e) {
+ logger.info("CubeStatsReader for Segment {} failed, skip it.", seg.getName());
+ }
+ }
+ out.flush();
+ }
+
+}
diff --git a/build-engine/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsWriter.java b/build-engine/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsWriter.java
new file mode 100644
index 0000000..0945908
--- /dev/null
+++ b/build-engine/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsWriter.java
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kylin.engine.mr.common;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.kylin.common.util.Bytes;
+import org.apache.kylin.measure.BufferedMeasureCodec;
+import org.apache.kylin.measure.hllc.HLLCounter;
+
+import com.google.common.collect.Lists;
+
+public class CubeStatsWriter {
+
+ public static void writeCuboidStatistics(Configuration conf, Path outputPath, //
+ Map<Long, HLLCounter> cuboidHLLMap, int samplingPercentage) throws IOException {
+ writeCuboidStatistics(conf, outputPath, cuboidHLLMap, samplingPercentage, 0, 0, 0);
+ }
+
+ public static void writeCuboidStatistics(Configuration conf, Path outputPath, //
+ Map<Long, HLLCounter> cuboidHLLMap, int samplingPercentage, long sourceRecordCoun) throws IOException {
+ writeCuboidStatistics(conf, outputPath, cuboidHLLMap, samplingPercentage, 0, 0, sourceRecordCoun);
+ }
+
+ public static void writeCuboidStatistics(Configuration conf, Path outputPath, //
+ Map<Long, HLLCounter> cuboidHLLMap, int samplingPercentage, int mapperNumber, double mapperOverlapRatio,
+ long sourceRecordCoun) throws IOException {
+ Path seqFilePath = new Path(outputPath, BatchConstants.CFG_STATISTICS_CUBOID_ESTIMATION_FILENAME);
+ writeCuboidStatisticsInner(conf, seqFilePath, cuboidHLLMap, samplingPercentage, mapperNumber,
+ mapperOverlapRatio, sourceRecordCoun);
+ }
+
+ //Be care of that the file name for partial cuboid statistics should start with BatchConstants.CFG_OUTPUT_STATISTICS,
+ //Then for later statistics merging, only files starting with BatchConstants.CFG_OUTPUT_STATISTICS will be used
+ public static void writePartialCuboidStatistics(Configuration conf, Path outputPath, //
+ Map<Long, HLLCounter> cuboidHLLMap, int samplingPercentage, int mapperNumber, double mapperOverlapRatio,
+ int shard) throws IOException {
+ Path seqFilePath = new Path(outputPath, BatchConstants.CFG_OUTPUT_STATISTICS + "_" + shard);
+ writeCuboidStatisticsInner(conf, seqFilePath, cuboidHLLMap, samplingPercentage, mapperNumber,
+ mapperOverlapRatio, 0);
+ }
+
+ private static void writeCuboidStatisticsInner(Configuration conf, Path outputFilePath, //
+ Map<Long, HLLCounter> cuboidHLLMap, int samplingPercentage, int mapperNumber, double mapperOverlapRatio,
+ long sourceRecordCount) throws IOException {
+ List<Long> allCuboids = Lists.newArrayList();
+ allCuboids.addAll(cuboidHLLMap.keySet());
+ Collections.sort(allCuboids);
+
+ ByteBuffer valueBuf = ByteBuffer.allocate(BufferedMeasureCodec.DEFAULT_BUFFER_SIZE);
+ SequenceFile.Writer writer = SequenceFile.createWriter(conf, SequenceFile.Writer.file(outputFilePath),
+ SequenceFile.Writer.keyClass(LongWritable.class), SequenceFile.Writer.valueClass(BytesWritable.class));
+ try {
+ // mapper overlap ratio at key -1
+ writer.append(new LongWritable(-1), new BytesWritable(Bytes.toBytes(mapperOverlapRatio)));
+
+ // mapper number at key -2
+ writer.append(new LongWritable(-2), new BytesWritable(Bytes.toBytes(mapperNumber)));
+
+ // sampling percentage at key 0
+ writer.append(new LongWritable(0L), new BytesWritable(Bytes.toBytes(samplingPercentage)));
+
+ // flat table source_count at key -3
+ writer.append(new LongWritable(-3), new BytesWritable(Bytes.toBytes(sourceRecordCount)));
+
+ for (long i : allCuboids) {
+ valueBuf.clear();
+ cuboidHLLMap.get(i).writeRegisters(valueBuf);
+ valueBuf.flip();
+ writer.append(new LongWritable(i), new BytesWritable(valueBuf.array(), valueBuf.limit()));
+ }
+ } finally {
+ IOUtils.closeQuietly(writer);
+ }
+ }
+}
diff --git a/build-engine/src/main/java/org/apache/kylin/engine/mr/common/CuboidRecommenderUtil.java b/build-engine/src/main/java/org/apache/kylin/engine/mr/common/CuboidRecommenderUtil.java
new file mode 100644
index 0000000..6d9b748
--- /dev/null
+++ b/build-engine/src/main/java/org/apache/kylin/engine/mr/common/CuboidRecommenderUtil.java
@@ -0,0 +1,135 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.engine.mr.common;
+
+import java.io.IOException;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.kylin.common.KylinConfig;
+import org.apache.kylin.common.util.Pair;
+import org.apache.kylin.cube.CubeInstance;
+import org.apache.kylin.cube.CubeSegment;
+import org.apache.kylin.cube.cuboid.CuboidScheduler;
+import org.apache.kylin.cube.cuboid.algorithm.CuboidRecommender;
+import org.apache.kylin.cube.cuboid.algorithm.CuboidStats;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class CuboidRecommenderUtil {
+
+ private static final Logger logger = LoggerFactory.getLogger(CuboidRecommenderUtil.class);
+ private static final String BASE_CUBOID_COUNT_IN_CUBOID_STATISTICS_IS_ZERO = "Base cuboid count in cuboid statistics is 0.";
+
+ /** Trigger cube planner phase one */
+ public static Map<Long, Long> getRecommendCuboidList(CubeSegment segment) throws IOException {
+ if (segment == null) {
+ return null;
+ }
+
+ CubeStatsReader cubeStatsReader = new CubeStatsReader(segment, null, segment.getConfig());
+ if (cubeStatsReader.getCuboidRowEstimatesHLL() == null
+ || cubeStatsReader.getCuboidRowEstimatesHLL().isEmpty()) {
+ logger.info("Cuboid Statistics is not enabled.");
+ return null;
+ }
+ CubeInstance cube = segment.getCubeInstance();
+ long baseCuboid = cube.getCuboidScheduler().getBaseCuboidId();
+ if (cubeStatsReader.getCuboidRowEstimatesHLL().get(baseCuboid) == null
+ || cubeStatsReader.getCuboidRowEstimatesHLL().get(baseCuboid) == 0L) {
+ logger.info(BASE_CUBOID_COUNT_IN_CUBOID_STATISTICS_IS_ZERO);
+ return null;
+ }
+
+ Set<Long> mandatoryCuboids = segment.getCubeDesc().getMandatoryCuboids();
+
+ String key = cube.getName();
+ CuboidStats cuboidStats = new CuboidStats.Builder(key, baseCuboid, cubeStatsReader.getCuboidRowEstimatesHLL(),
+ cubeStatsReader.getCuboidSizeMap()).setMandatoryCuboids(mandatoryCuboids).setBPUSMinBenefitRatio(segment.getConfig().getCubePlannerBPUSMinBenefitRatio()).build();
+ return CuboidRecommender.getInstance().getRecommendCuboidList(cuboidStats, segment.getConfig(),
+ !mandatoryCuboids.isEmpty());
+ }
+
+ /** Trigger cube planner phase two for optimization */
+ public static Map<Long, Long> getRecommendCuboidList(CubeInstance cube, Map<Long, Long> hitFrequencyMap,
+ Map<Long, Map<Long, Pair<Long, Long>>> rollingUpCountSourceMap) throws IOException {
+
+ CuboidScheduler cuboidScheduler = cube.getCuboidScheduler();
+ Set<Long> currentCuboids = cuboidScheduler.getAllCuboidIds();
+ Pair<Map<Long, Long>, Map<Long, Double>> statsPair = CuboidStatsReaderUtil
+ .readCuboidStatsAndSizeFromCube(currentCuboids, cube);
+ long baseCuboid = cuboidScheduler.getBaseCuboidId();
+ if (statsPair.getFirst().get(baseCuboid) == null || statsPair.getFirst().get(baseCuboid) == 0L) {
+ logger.info(BASE_CUBOID_COUNT_IN_CUBOID_STATISTICS_IS_ZERO);
+ return null;
+ }
+
+ KylinConfig config = cube.getConfig();
+ String key = cube.getName();
+ double queryUncertaintyRatio = config.getCubePlannerQueryUncertaintyRatio();
+ double bpusMinBenefitRatio = config.getCubePlannerBPUSMinBenefitRatio();
+ CuboidStats cuboidStats = new CuboidStats.Builder(key, baseCuboid, statsPair.getFirst(),
+ statsPair.getSecond()) {
+ @Override
+ public Map<Long, Double> estimateCuboidsSize(Map<Long, Long> statistics) {
+ try {
+ return CuboidStatsReaderUtil.readCuboidSizeFromCube(statistics, cube);
+ } catch (IOException e) {
+ logger.warn("Fail to get cuboid size from cube due to ", e);
+ return null;
+ }
+ }
+ }.setQueryUncertaintyRatio(queryUncertaintyRatio) //
+ .setBPUSMinBenefitRatio(bpusMinBenefitRatio) //
+ .setHitFrequencyMap(hitFrequencyMap) //
+ .setRollingUpCountSourceMap(rollingUpCountSourceMap) //
+ .build();
+ return CuboidRecommender.getInstance().getRecommendCuboidList(cuboidStats, config);
+ }
+
+ /** For future segment level recommend */
+ public static Map<Long, Long> getRecommendCuboidList(CubeSegment segment, Map<Long, Long> hitFrequencyMap,
+ Map<Long, Map<Long, Pair<Long, Long>>> rollingUpCountSourceMap, boolean ifForceRecommend)
+ throws IOException {
+ if (segment == null) {
+ return null;
+ }
+
+ CubeStatsReader cubeStatsReader = new CubeStatsReader(segment, null, segment.getConfig());
+ if (cubeStatsReader.getCuboidRowEstimatesHLL() == null
+ || cubeStatsReader.getCuboidRowEstimatesHLL().isEmpty()) {
+ logger.info("Cuboid Statistics is not enabled.");
+ return null;
+ }
+ CubeInstance cube = segment.getCubeInstance();
+ long baseCuboid = cube.getCuboidScheduler().getBaseCuboidId();
+ if (cubeStatsReader.getCuboidRowEstimatesHLL().get(baseCuboid) == null
+ || cubeStatsReader.getCuboidRowEstimatesHLL().get(baseCuboid) == 0L) {
+ logger.info(BASE_CUBOID_COUNT_IN_CUBOID_STATISTICS_IS_ZERO);
+ return null;
+ }
+
+ String key = cube.getName() + "-" + segment.getName();
+ CuboidStats cuboidStats = new CuboidStats.Builder(key, baseCuboid, cubeStatsReader.getCuboidRowEstimatesHLL(),
+ cubeStatsReader.getCuboidSizeMap()).setHitFrequencyMap(hitFrequencyMap)
+ .setRollingUpCountSourceMap(rollingUpCountSourceMap).build();
+ return CuboidRecommender.getInstance().getRecommendCuboidList(cuboidStats, segment.getConfig(),
+ ifForceRecommend);
+ }
+}
diff --git a/build-engine/src/main/java/org/apache/kylin/engine/mr/common/CuboidSchedulerUtil.java b/build-engine/src/main/java/org/apache/kylin/engine/mr/common/CuboidSchedulerUtil.java
new file mode 100644
index 0000000..0e56287
--- /dev/null
+++ b/build-engine/src/main/java/org/apache/kylin/engine/mr/common/CuboidSchedulerUtil.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.engine.mr.common;
+
+import java.io.IOException;
+import java.util.Comparator;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.kylin.cube.CubeSegment;
+import org.apache.kylin.cube.cuboid.Cuboid;
+import org.apache.kylin.cube.cuboid.CuboidModeEnum;
+import org.apache.kylin.cube.cuboid.CuboidScheduler;
+import org.apache.kylin.cube.cuboid.TreeCuboidScheduler;
+
+import com.google.common.collect.Lists;
+
+public class CuboidSchedulerUtil {
+
+ public static CuboidScheduler getCuboidSchedulerByMode(CubeSegment segment, String cuboidModeName) {
+ if (cuboidModeName == null)
+ return segment.getCuboidScheduler();
+ else
+ return getCuboidSchedulerByMode(segment, CuboidModeEnum.getByModeName(cuboidModeName));
+ }
+
+ public static CuboidScheduler getCuboidSchedulerByMode(CubeSegment segment, CuboidModeEnum cuboidMode) {
+ if (cuboidMode == CuboidModeEnum.CURRENT || cuboidMode == null)
+ return segment.getCuboidScheduler();
+ else
+ return getCuboidScheduler(segment, segment.getCubeInstance().getCuboidsByMode(cuboidMode));
+ }
+
+ public static CuboidScheduler getCuboidScheduler(CubeSegment segment, Set<Long> cuboidSet) {
+ try {
+ Map<Long, Long> cuboidsWithRowCnt = CuboidStatsReaderUtil.readCuboidStatsFromSegment(cuboidSet, segment);
+ Comparator<Long> comparator = cuboidsWithRowCnt == null ? Cuboid.cuboidSelectComparator
+ : new TreeCuboidScheduler.CuboidCostComparator(cuboidsWithRowCnt);
+ return new TreeCuboidScheduler(segment.getCubeDesc(), Lists.newArrayList(cuboidSet), comparator);
+ } catch (IOException e) {
+ throw new RuntimeException("Fail to cube stats for segment" + segment + " due to " + e);
+ }
+ }
+}
diff --git a/build-engine/src/main/java/org/apache/kylin/engine/mr/common/CuboidShardUtil.java b/build-engine/src/main/java/org/apache/kylin/engine/mr/common/CuboidShardUtil.java
new file mode 100644
index 0000000..cf1b94a
--- /dev/null
+++ b/build-engine/src/main/java/org/apache/kylin/engine/mr/common/CuboidShardUtil.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kylin.engine.mr.common;
+
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.kylin.cube.CubeInstance;
+import org.apache.kylin.cube.CubeManager;
+import org.apache.kylin.cube.CubeSegment;
+import org.apache.kylin.cube.CubeUpdate;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.collect.Maps;
+
+public class CuboidShardUtil {
+ protected static final Logger logger = LoggerFactory.getLogger(CuboidShardUtil.class);
+
+ public static void saveCuboidShards(CubeSegment segment, Map<Long, Short> cuboidShards, int totalShards) throws IOException {
+ CubeManager cubeManager = CubeManager.getInstance(segment.getConfig());
+
+ Map<Long, Short> filtered = Maps.newHashMap();
+ for (Map.Entry<Long, Short> entry : cuboidShards.entrySet()) {
+ if (entry.getValue() > 1) {
+ filtered.put(entry.getKey(), entry.getValue());
+ }
+ }
+
+ // work on copy instead of cached objects
+ CubeInstance cubeCopy = segment.getCubeInstance().latestCopyForWrite();
+ CubeSegment segCopy = cubeCopy.getSegmentById(segment.getUuid());
+
+ segCopy.setCuboidShardNums(filtered);
+ segCopy.setTotalShards(totalShards);
+
+ CubeUpdate update = new CubeUpdate(cubeCopy);
+ update.setToUpdateSegs(segCopy);
+ cubeManager.updateCube(update);
+ }
+}
diff --git a/build-engine/src/main/java/org/apache/kylin/engine/mr/common/CuboidStatsReaderUtil.java b/build-engine/src/main/java/org/apache/kylin/engine/mr/common/CuboidStatsReaderUtil.java
new file mode 100644
index 0000000..a5fbe2b
--- /dev/null
+++ b/build-engine/src/main/java/org/apache/kylin/engine/mr/common/CuboidStatsReaderUtil.java
@@ -0,0 +1,169 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.engine.mr.common;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.kylin.common.util.Pair;
+import org.apache.kylin.cube.CubeInstance;
+import org.apache.kylin.cube.CubeSegment;
+import org.apache.kylin.measure.hllc.HLLCounter;
+import org.apache.kylin.metadata.model.SegmentStatusEnum;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Maps;
+
+public class CuboidStatsReaderUtil {
+
+ private static final Logger logger = LoggerFactory.getLogger(CuboidStatsReaderUtil.class);
+
+ public static Map<Long, Long> readCuboidStatsFromCube(Set<Long> cuboidIds, CubeInstance cubeInstance) {
+ Map<Long, Long> statisticsMerged = null;
+ try {
+ statisticsMerged = readCuboidStatsAndSizeFromCube(cuboidIds, cubeInstance).getFirst();
+ } catch (IOException e) {
+ logger.warn("Fail to read statistics for cube " + cubeInstance.getName() + " due to " + e);
+ }
+ return statisticsMerged == null ? Collections.emptyMap() : statisticsMerged;
+ }
+
+ public static Pair<Map<Long, Long>, Map<Long, Double>> readCuboidStatsAndSizeFromCube(Set<Long> cuboidIds,
+ CubeInstance cube) throws IOException {
+ Preconditions.checkNotNull(cuboidIds, "The cuboid set can not be null");
+ Preconditions.checkNotNull(cube, "The cube instance can not be null");
+
+ List<CubeSegment> segmentList = cube.getSegments(SegmentStatusEnum.READY);
+ Map<Long, Long> statisticsMerged = Maps.newHashMapWithExpectedSize(cuboidIds.size());
+ Map<Long, Double> sizeMerged = Maps.newHashMapWithExpectedSize(cuboidIds.size());
+ readCuboidStatsFromSegments(cuboidIds, segmentList, statisticsMerged, sizeMerged);
+ return new Pair<>(statisticsMerged, sizeMerged);
+ }
+
+ public static Map<Long, Long> readCuboidStatsFromSegments(Set<Long> cuboidIds, List<CubeSegment> segmentList)
+ throws IOException {
+ Map<Long, Long> statisticsMerged = Maps.newHashMapWithExpectedSize(cuboidIds.size());
+ readCuboidStatsFromSegments(cuboidIds, segmentList, statisticsMerged,
+ Maps.<Long, Double> newHashMapWithExpectedSize(cuboidIds.size()));
+ return statisticsMerged.isEmpty() ? null : statisticsMerged;
+ }
+
+ public static Map<Long, Double> readCuboidSizeFromCube(Map<Long, Long> statistics, CubeInstance cube)
+ throws IOException {
+ List<CubeSegment> segmentList = cube.getSegments(SegmentStatusEnum.READY);
+ Map<Long, Double> sizeMerged = Maps.newHashMapWithExpectedSize(statistics.size());
+ for (CubeSegment pSegment : segmentList) {
+ CubeStatsReader pReader = new CubeStatsReader(pSegment, null, pSegment.getConfig());
+ Map<Long, Double> pSizeMap = CubeStatsReader.getCuboidSizeMapFromRowCount(pSegment, statistics,
+ pReader.sourceRowCount);
+ for (Long pCuboid : statistics.keySet()) {
+ Double pSize = sizeMerged.get(pCuboid);
+ sizeMerged.put(pCuboid, pSize == null ? pSizeMap.get(pCuboid) : pSize + pSizeMap.get(pCuboid));
+ }
+ }
+ int nSegment = segmentList.size();
+ if (nSegment <= 1) {
+ return sizeMerged;
+ }
+ for (Long pCuboid : statistics.keySet()) {
+ sizeMerged.put(pCuboid, sizeMerged.get(pCuboid) / nSegment);
+ }
+ return sizeMerged;
+ }
+
+ private static void readCuboidStatsFromSegments(Set<Long> cuboidSet, List<CubeSegment> segmentList,
+ final Map<Long, Long> statisticsMerged, final Map<Long, Double> sizeMerged) throws IOException {
+ if (segmentList == null || segmentList.isEmpty()) {
+ return;
+ }
+ int nSegment = segmentList.size();
+
+ Map<Long, HLLCounter> cuboidHLLMapMerged = Maps.newHashMapWithExpectedSize(cuboidSet.size());
+ Map<Long, Double> sizeMapMerged = Maps.newHashMapWithExpectedSize(cuboidSet.size());
+ for (CubeSegment pSegment : segmentList) {
+ CubeStatsReader pReader = new CubeStatsReader(pSegment, null, pSegment.getConfig());
+ Map<Long, HLLCounter> pHLLMap = pReader.getCuboidRowHLLCounters();
+ if (pHLLMap == null || pHLLMap.isEmpty()) {
+ logger.info("Cuboid Statistics for segment " + pSegment.getName() + " is not enabled.");
+ nSegment--;
+ continue;
+ }
+ Map<Long, Double> pSizeMap = pReader.getCuboidSizeMap();
+ for (Long pCuboid : cuboidSet) {
+ HLLCounter pInnerHLL = pHLLMap.get(pCuboid);
+ Preconditions.checkNotNull(pInnerHLL, "statistics should exist for cuboid " + pCuboid + " of segment "
+ + pSegment.getCubeDesc().getName() + "[" + pSegment.getName() + "]");
+ if (cuboidHLLMapMerged.get(pCuboid) != null) {
+ cuboidHLLMapMerged.get(pCuboid).merge(pInnerHLL);
+ } else {
+ cuboidHLLMapMerged.put(pCuboid, pInnerHLL);
+ }
+
+ Double pSize = sizeMapMerged.get(pCuboid);
+ sizeMapMerged.put(pCuboid, pSize == null ? pSizeMap.get(pCuboid) : pSizeMap.get(pCuboid) + pSize);
+ }
+ }
+
+ if (nSegment < 1) {
+ return;
+ }
+ for (Long pCuboid : cuboidSet) {
+ statisticsMerged.put(pCuboid, cuboidHLLMapMerged.get(pCuboid).getCountEstimate());
+ sizeMerged.put(pCuboid, sizeMapMerged.get(pCuboid));
+ }
+ }
+
+ public static Map<Long, Long> readCuboidStatsFromSegment(Set<Long> cuboidIds, CubeSegment cubeSegment)
+ throws IOException {
+ Pair<Map<Long, Long>, Long> stats = readCuboidStatsWithSourceFromSegment(cuboidIds, cubeSegment);
+ return stats == null ? null : stats.getFirst();
+ }
+
+ public static Pair<Map<Long, Long>, Long> readCuboidStatsWithSourceFromSegment(Set<Long> cuboidIds,
+ CubeSegment cubeSegment) throws IOException {
+ if (cubeSegment == null) {
+ logger.warn("The cube segment can not be " + null);
+ return null;
+ }
+
+ CubeStatsReader cubeStatsReader = new CubeStatsReader(cubeSegment, null, cubeSegment.getConfig());
+ if (cubeStatsReader.getCuboidRowEstimatesHLL() == null
+ || cubeStatsReader.getCuboidRowEstimatesHLL().isEmpty()) {
+ logger.info("Cuboid Statistics is not enabled.");
+ return null;
+ }
+
+ Map<Long, Long> cuboidsWithStatsAll = cubeStatsReader.getCuboidRowEstimatesHLL();
+ Map<Long, Long> cuboidsWithStats = Maps.newHashMapWithExpectedSize(cuboidIds.size());
+ for (Long cuboid : cuboidIds) {
+ Long rowEstimate = cuboidsWithStatsAll.get(cuboid);
+ if (rowEstimate == null) {
+ logger.warn("Cannot get the row count stats for cuboid " + cuboid);
+ } else {
+ cuboidsWithStats.put(cuboid, rowEstimate);
+ }
+ }
+ return new Pair<>(cuboidsWithStats, cubeStatsReader.sourceRowCount);
+ }
+}
diff --git a/build-engine/src/main/java/org/apache/kylin/engine/mr/common/DefaultSslProtocolSocketFactory.java b/build-engine/src/main/java/org/apache/kylin/engine/mr/common/DefaultSslProtocolSocketFactory.java
new file mode 100644
index 0000000..d66e4eb
--- /dev/null
+++ b/build-engine/src/main/java/org/apache/kylin/engine/mr/common/DefaultSslProtocolSocketFactory.java
@@ -0,0 +1,150 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.engine.mr.common;
+
+import java.io.IOException;
+import java.net.InetAddress;
+import java.net.Socket;
+import java.net.UnknownHostException;
+
+import javax.net.ssl.SSLContext;
+import javax.net.ssl.TrustManager;
+
+import org.apache.commons.httpclient.ConnectTimeoutException;
+import org.apache.commons.httpclient.HttpClientError;
+import org.apache.commons.httpclient.params.HttpConnectionParams;
+import org.apache.commons.httpclient.protocol.ControllerThreadSocketFactory;
+import org.apache.commons.httpclient.protocol.SecureProtocolSocketFactory;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * @author xduo
+ *
+ */
+public class DefaultSslProtocolSocketFactory implements SecureProtocolSocketFactory {
+ /** Log object for this class. */
+ private static Logger logger = LoggerFactory.getLogger(DefaultSslProtocolSocketFactory.class);
+ private SSLContext sslcontext = null;
+
+ /**
+ * Constructor for DefaultSslProtocolSocketFactory.
+ */
+ public DefaultSslProtocolSocketFactory() {
+ super();
+ }
+
+ /**
+ * @see SecureProtocolSocketFactory#createSocket(java.lang.String,int,java.net.InetAddress,int)
+ */
+ public Socket createSocket(String host, int port, InetAddress clientHost, int clientPort) throws IOException, UnknownHostException {
+ return getSSLContext().getSocketFactory().createSocket(host, port, clientHost, clientPort);
+ }
+
+ /**
+ * Attempts to get a new socket connection to the given host within the
+ * given time limit.
+ *
+ * <p>
+ * To circumvent the limitations of older JREs that do not support connect
+ * timeout a controller thread is executed. The controller thread attempts
+ * to create a new socket within the given limit of time. If socket
+ * constructor does not return until the timeout expires, the controller
+ * terminates and throws an {@link ConnectTimeoutException}
+ * </p>
+ *
+ * @param host
+ * the host name/IP
+ * @param port
+ * the port on the host
+ * @param localAddress
+ * the local host name/IP to bind the socket to
+ * @param localPort
+ * the port on the local machine
+ * @param params
+ * {@link HttpConnectionParams Http connection parameters}
+ *
+ * @return Socket a new socket
+ *
+ * @throws IOException
+ * if an I/O error occurs while creating the socket
+ * @throws UnknownHostException
+ * if the IP address of the host cannot be determined
+ * @throws ConnectTimeoutException
+ * DOCUMENT ME!
+ * @throws IllegalArgumentException
+ * DOCUMENT ME!
+ */
+ public Socket createSocket(final String host, final int port, final InetAddress localAddress, final int localPort, final HttpConnectionParams params) throws IOException, UnknownHostException, ConnectTimeoutException {
+ if (params == null) {
+ throw new IllegalArgumentException("Parameters may not be null");
+ }
+
+ int timeout = params.getConnectionTimeout();
+
+ if (timeout == 0) {
+ return createSocket(host, port, localAddress, localPort);
+ } else {
+ // To be eventually deprecated when migrated to Java 1.4 or above
+ return ControllerThreadSocketFactory.createSocket(this, host, port, localAddress, localPort, timeout);
+ }
+ }
+
+ /**
+ * @see SecureProtocolSocketFactory#createSocket(java.lang.String,int)
+ */
+ public Socket createSocket(String host, int port) throws IOException, UnknownHostException {
+ return getSSLContext().getSocketFactory().createSocket(host, port);
+ }
+
+ /**
+ * @see SecureProtocolSocketFactory#createSocket(java.net.Socket,java.lang.String,int,boolean)
+ */
+ public Socket createSocket(Socket socket, String host, int port, boolean autoClose) throws IOException, UnknownHostException {
+ return getSSLContext().getSocketFactory().createSocket(socket, host, port, autoClose);
+ }
+
+ public boolean equals(Object obj) {
+ return ((obj != null) && obj.getClass().equals(DefaultX509TrustManager.class));
+ }
+
+ public int hashCode() {
+ return DefaultX509TrustManager.class.hashCode();
+ }
+
+ private static SSLContext createEasySSLContext() {
+ try {
+ SSLContext context = SSLContext.getInstance("TLS");
+ context.init(null, new TrustManager[] { new DefaultX509TrustManager(null) }, null);
+
+ return context;
+ } catch (Exception e) {
+ logger.error(e.getMessage(), e);
+ throw new HttpClientError(e.toString());
+ }
+ }
+
+ private SSLContext getSSLContext() {
+ if (this.sslcontext == null) {
+ this.sslcontext = createEasySSLContext();
+ }
+
+ return this.sslcontext;
+ }
+}
diff --git a/build-engine/src/main/java/org/apache/kylin/engine/mr/common/DefaultX509TrustManager.java b/build-engine/src/main/java/org/apache/kylin/engine/mr/common/DefaultX509TrustManager.java
new file mode 100644
index 0000000..4a8cfb6
--- /dev/null
+++ b/build-engine/src/main/java/org/apache/kylin/engine/mr/common/DefaultX509TrustManager.java
@@ -0,0 +1,114 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.engine.mr.common;
+
+import java.security.KeyStore;
+import java.security.KeyStoreException;
+import java.security.NoSuchAlgorithmException;
+import java.security.cert.CertificateException;
+import java.security.cert.X509Certificate;
+
+import javax.net.ssl.KeyManagerFactory;
+import javax.net.ssl.TrustManager;
+import javax.net.ssl.TrustManagerFactory;
+import javax.net.ssl.X509TrustManager;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * @author xduo
+ *
+ */
+public class DefaultX509TrustManager implements X509TrustManager {
+
+ /** Log object for this class. */
+ private static Logger logger = LoggerFactory.getLogger(DefaultX509TrustManager.class);
+ private X509TrustManager standardTrustManager = null;
+
+ /**
+ * Constructor for DefaultX509TrustManager.
+ *
+ */
+ public DefaultX509TrustManager(KeyStore keystore) throws NoSuchAlgorithmException, KeyStoreException {
+ super();
+
+ TrustManagerFactory factory = TrustManagerFactory.getInstance(KeyManagerFactory.getDefaultAlgorithm());
+ factory.init(keystore);
+
+ TrustManager[] trustmanagers = factory.getTrustManagers();
+
+ if (trustmanagers.length == 0) {
+ throw new NoSuchAlgorithmException("SunX509 trust manager not supported");
+ }
+
+ this.standardTrustManager = (X509TrustManager) trustmanagers[0];
+ }
+
+ public X509Certificate[] getAcceptedIssuers() {
+ return this.standardTrustManager.getAcceptedIssuers();
+ }
+
+ public boolean isClientTrusted(X509Certificate[] certificates) {
+ return true;
+ // return this.standardTrustManager.isClientTrusted(certificates);
+ }
+
+ public boolean isServerTrusted(X509Certificate[] certificates) {
+ if ((certificates != null) && logger.isDebugEnabled()) {
+ logger.debug("Server certificate chain:");
+
+ for (int i = 0; i < certificates.length; i++) {
+ if (logger.isDebugEnabled()) {
+ logger.debug("X509Certificate[" + i + "]=" + certificates[i]);
+ }
+ }
+ }
+
+ if ((certificates != null) && (certificates.length == 1)) {
+ X509Certificate certificate = certificates[0];
+
+ try {
+ certificate.checkValidity();
+ } catch (CertificateException e) {
+ logger.error(e.toString());
+
+ return false;
+ }
+
+ return true;
+ } else {
+ return true;
+ // return this.standardTrustManager.isServerTrusted(certificates);
+ }
+ }
+
+ @Override
+ public void checkClientTrusted(X509Certificate[] chain, String authType) throws CertificateException {
+ // TODO Auto-generated method stub
+
+ }
+
+ @Override
+ public void checkServerTrusted(X509Certificate[] chain, String authType) throws CertificateException {
+ // TODO Auto-generated method stub
+
+ }
+
+}
diff --git a/build-engine/src/main/java/org/apache/kylin/engine/mr/common/HadoopCmdOutput.java b/build-engine/src/main/java/org/apache/kylin/engine/mr/common/HadoopCmdOutput.java
new file mode 100644
index 0000000..3edbe3e
--- /dev/null
+++ b/build-engine/src/main/java/org/apache/kylin/engine/mr/common/HadoopCmdOutput.java
@@ -0,0 +1,159 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.engine.mr.common;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.Counters;
+import org.apache.hadoop.mapreduce.FileSystemCounter;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.JobStatus;
+import org.apache.hadoop.mapreduce.TaskCompletionEvent;
+import org.apache.hadoop.mapreduce.TaskCounter;
+import org.apache.kylin.common.KylinConfig;
+import org.apache.kylin.job.constant.ExecutableConstants;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * @author xduo
+ *
+ */
+public class HadoopCmdOutput {
+
+ protected static final Logger logger = LoggerFactory.getLogger(HadoopCmdOutput.class);
+
+ private final StringBuilder output;
+ private final Job job;
+
+ public HadoopCmdOutput(Job job, StringBuilder output) {
+ super();
+ this.job = job;
+ this.output = output;
+ }
+
+ public String getMrJobId() {
+ return getInfo().get(ExecutableConstants.MR_JOB_ID);
+ }
+
+ public Map<String, String> getInfo() {
+ if (job != null) {
+ Map<String, String> status = new HashMap<String, String>();
+ if (null != job.getJobID()) {
+ status.put(ExecutableConstants.MR_JOB_ID, job.getJobID().toString());
+ }
+ if (null != job.getTrackingURL()) {
+ status.put(ExecutableConstants.YARN_APP_URL, job.getTrackingURL().toString());
+ }
+ return status;
+ } else {
+ return Collections.emptyMap();
+ }
+ }
+
+ private String mapInputRecords;
+ private String hdfsBytesWritten;
+ private String rawInputBytesRead;
+
+ public String getMapInputRecords() {
+ return mapInputRecords;
+ }
+
+ public String getHdfsBytesWritten() {
+ return hdfsBytesWritten;
+ }
+
+ public String getRawInputBytesRead() {
+ return rawInputBytesRead;
+ }
+
+ public void updateJobCounter() {
+ try {
+ Counters counters = job.getCounters();
+ if (counters == null) {
+ String errorMsg = "no counters for job " + getMrJobId();
+ logger.warn(errorMsg);
+ output.append(errorMsg);
+ } else {
+ this.output.append(counters.toString()).append("\n");
+ logger.debug(counters.toString());
+
+ mapInputRecords = String.valueOf(counters.findCounter(TaskCounter.MAP_INPUT_RECORDS).getValue());
+ rawInputBytesRead = "";
+
+ String outputFolder = job.getConfiguration().get("mapreduce.output.fileoutputformat.outputdir",
+ KylinConfig.getInstanceFromEnv().getHdfsWorkingDirectory());
+ logger.debug("outputFolder is " + outputFolder);
+ Path outputPath = new Path(outputFolder);
+ String fsScheme = outputPath.getFileSystem(job.getConfiguration()).getScheme();
+ long bytesWritten = counters.findCounter(fsScheme, FileSystemCounter.BYTES_WRITTEN).getValue();
+ if (bytesWritten == 0) {
+ logger.debug("Seems no counter found for " + fsScheme);
+ bytesWritten = counters.findCounter("FileSystemCounters", "HDFS_BYTES_WRITTEN").getValue();
+ }
+ hdfsBytesWritten = String.valueOf(bytesWritten);
+ }
+ JobStatus jobStatus = job.getStatus();
+ if (jobStatus.getState() == JobStatus.State.FAILED) {
+ logger.warn("Job Diagnostics:" + jobStatus.getFailureInfo());
+ output.append("Job Diagnostics:").append(jobStatus.getFailureInfo()).append("\n");
+ TaskCompletionEvent taskEvent = getOneTaskFailure(job);
+ if (taskEvent != null) {
+ String[] fails = job.getTaskDiagnostics(taskEvent.getTaskAttemptId());
+ logger.warn("Failure task Diagnostics:");
+ output.append("Failure task Diagnostics:").append("\n");
+ for (String failure : fails) {
+ logger.warn(failure);
+ output.append(failure).append("\n");
+ }
+ }
+ }
+ } catch (Exception e) {
+ logger.error(e.getLocalizedMessage(), e);
+ output.append(e.getLocalizedMessage());
+ }
+ }
+
+ private TaskCompletionEvent getOneTaskFailure(Job job) throws IOException, InterruptedException {
+ TaskCompletionEvent lastEvent = null;
+ int index = 0;
+ int failCount = 0;
+ TaskCompletionEvent[] events = job.getTaskCompletionEvents(index);
+ //This returns either nothing (if no task executions or no exceptions at all) or the last failure event within a subset of the exceptions from the first
+ //index at which exceptions are found in the task completion events
+ if (events == null) {
+ return lastEvent;
+ }
+ while (events.length > 0 && failCount == 0) {
+ for (TaskCompletionEvent event : events) {
+ if (event.getStatus().equals(TaskCompletionEvent.Status.FAILED)) {
+ failCount++;
+ lastEvent = event;
+ }
+ }
+ index += 10;
+ events = job.getTaskCompletionEvents(index);
+ }
+ return lastEvent;
+ }
+}
diff --git a/build-engine/src/main/java/org/apache/kylin/engine/mr/common/HadoopJobStatusChecker.java b/build-engine/src/main/java/org/apache/kylin/engine/mr/common/HadoopJobStatusChecker.java
new file mode 100644
index 0000000..6165652
--- /dev/null
+++ b/build-engine/src/main/java/org/apache/kylin/engine/mr/common/HadoopJobStatusChecker.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.engine.mr.common;
+
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.kylin.job.constant.JobStepStatusEnum;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class HadoopJobStatusChecker {
+
+ protected static final Logger logger = LoggerFactory.getLogger(HadoopJobStatusChecker.class);
+
+ public static JobStepStatusEnum checkStatus(Job job, StringBuilder output) {
+ if (job == null || job.getJobID() == null) {
+ output.append("Skip status check with empty job id..\n");
+ return JobStepStatusEnum.WAITING;
+ }
+
+ JobStepStatusEnum status = null;
+ try {
+ switch (job.getStatus().getState()) {
+ case SUCCEEDED:
+ status = JobStepStatusEnum.FINISHED;
+ break;
+ case FAILED:
+ status = JobStepStatusEnum.ERROR;
+ break;
+ case KILLED:
+ status = JobStepStatusEnum.KILLED;
+ break;
+ case RUNNING:
+ status = JobStepStatusEnum.RUNNING;
+ break;
+ case PREP:
+ status = JobStepStatusEnum.WAITING;
+ break;
+ default:
+ throw new IllegalStateException();
+ }
+ } catch (Exception e) {
+ logger.error("error check status", e);
+ output.append("Exception: ").append(e.getLocalizedMessage()).append("\n");
+ status = JobStepStatusEnum.ERROR;
+ }
+
+ return status;
+ }
+
+}
diff --git a/build-engine/src/main/java/org/apache/kylin/engine/mr/common/HadoopShellExecutable.java b/build-engine/src/main/java/org/apache/kylin/engine/mr/common/HadoopShellExecutable.java
new file mode 100644
index 0000000..f6609c2
--- /dev/null
+++ b/build-engine/src/main/java/org/apache/kylin/engine/mr/common/HadoopShellExecutable.java
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.engine.mr.common;
+
+import java.io.PrintWriter;
+import java.io.StringWriter;
+import java.lang.reflect.Constructor;
+
+import org.apache.kylin.common.util.ClassUtil;
+import org.apache.kylin.engine.mr.MRUtil;
+import org.apache.kylin.engine.mr.exception.HadoopShellException;
+import org.apache.kylin.job.exception.ExecuteException;
+import org.apache.kylin.job.execution.AbstractExecutable;
+import org.apache.kylin.job.execution.ExecutableContext;
+import org.apache.kylin.job.execution.ExecuteResult;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.base.Preconditions;
+
+/**
+ */
+public class HadoopShellExecutable extends AbstractExecutable {
+ private static final Logger logger = LoggerFactory.getLogger(HadoopShellExecutable.class);
+
+ private static final String KEY_MR_JOB = "HADOOP_SHELL_JOB_CLASS";
+ private static final String KEY_PARAMS = "HADOOP_SHELL_JOB_PARAMS";
+
+ public HadoopShellExecutable() {
+ super();
+ }
+
+ @Override
+ protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
+ final String mapReduceJobClass = getJobClass();
+ String params = getJobParams();
+ Preconditions.checkNotNull(mapReduceJobClass);
+ Preconditions.checkNotNull(params);
+ try {
+ final Constructor<? extends AbstractHadoopJob> constructor = ClassUtil.forName(mapReduceJobClass, AbstractHadoopJob.class).getConstructor();
+ final AbstractHadoopJob job = constructor.newInstance();
+ String[] args = params.trim().split("\\s+");
+ logger.info("parameters of the HadoopShellExecutable: {}", params);
+ int result;
+ StringBuilder log = new StringBuilder();
+ try {
+ result = MRUtil.runMRJob(job, args);
+ } catch (Exception ex) {
+ logger.error("error execute " + this.toString(), ex);
+ StringWriter stringWriter = new StringWriter();
+ ex.printStackTrace(new PrintWriter(stringWriter));
+ log.append(stringWriter.toString()).append("\n");
+ result = 2;
+ }
+ log.append("result code:").append(result);
+ return result == 0 ? new ExecuteResult(ExecuteResult.State.SUCCEED, log.toString())
+ : ExecuteResult.createFailed(new HadoopShellException(log.toString()));
+ } catch (ReflectiveOperationException e) {
+ logger.error("error getMapReduceJobClass, class name:" + getParam(KEY_MR_JOB), e);
+ return ExecuteResult.createError(e);
+ } catch (Exception e) {
+ logger.error("error execute " + this.toString(), e);
+ return ExecuteResult.createError(e);
+ }
+ }
+
+ public void setJobClass(Class<? extends AbstractHadoopJob> clazzName) {
+ setParam(KEY_MR_JOB, clazzName.getName());
+ }
+
+ public String getJobClass() throws ExecuteException {
+ return getParam(KEY_MR_JOB);
+ }
+
+ public void setJobParams(String param) {
+ setParam(KEY_PARAMS, param);
+ }
+
+ public String getJobParams() {
+ return getParam(KEY_PARAMS);
+ }
+
+}
diff --git a/build-engine/src/main/java/org/apache/kylin/engine/mr/common/JobInfoConverter.java b/build-engine/src/main/java/org/apache/kylin/engine/mr/common/JobInfoConverter.java
new file mode 100644
index 0000000..9c19065
--- /dev/null
+++ b/build-engine/src/main/java/org/apache/kylin/engine/mr/common/JobInfoConverter.java
@@ -0,0 +1,244 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.engine.mr.common;
+
+import java.util.Map;
+
+import org.apache.kylin.common.KylinConfig;
+import org.apache.kylin.cube.CubeInstance;
+import org.apache.kylin.cube.CubeManager;
+import org.apache.kylin.cube.model.CubeBuildTypeEnum;
+import org.apache.kylin.engine.mr.CubingJob;
+import org.apache.kylin.engine.mr.steps.CubingExecutableUtil;
+import org.apache.kylin.job.JobInstance;
+import org.apache.kylin.job.JobSearchResult;
+import org.apache.kylin.job.common.ShellExecutable;
+import org.apache.kylin.job.constant.JobStatusEnum;
+import org.apache.kylin.job.constant.JobStepStatusEnum;
+import org.apache.kylin.job.dao.ExecutableOutputPO;
+import org.apache.kylin.job.execution.AbstractExecutable;
+import org.apache.kylin.job.execution.CheckpointExecutable;
+import org.apache.kylin.job.execution.DefaultChainedExecutable;
+import org.apache.kylin.job.execution.ExecutableState;
+import org.apache.kylin.job.execution.Output;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class JobInfoConverter {
+ private static final Logger logger = LoggerFactory.getLogger(JobInfoConverter.class);
+
+ public static JobInstance parseToJobInstanceQuietly(CubingJob job, Map<String, Output> outputs) {
+ try {
+ return parseToJobInstance(job, outputs);
+ } catch (Exception e) {
+ logger.error("Failed to parse job instance: uuid={}", job, e);
+ return null;
+ }
+ }
+
+ public static JobInstance parseToJobInstanceQuietly(CheckpointExecutable job, Map<String, Output> outputs) {
+ try {
+ return parseToJobInstance(job, outputs);
+ } catch (Exception e) {
+ logger.error("Failed to parse job instance: uuid={}", job, e);
+ return null;
+ }
+ }
+
+ public static JobInstance parseToJobInstance(CubingJob job, Map<String, Output> outputs) {
+ if (job == null) {
+ logger.warn("job is null.");
+ return null;
+ }
+
+ Output output = outputs.get(job.getId());
+ if (output == null) {
+ logger.warn("job output is null.");
+ return null;
+ }
+
+ CubingJob cubeJob = (CubingJob) job;
+ CubeInstance cube = CubeManager.getInstance(KylinConfig.getInstanceFromEnv())
+ .getCube(CubingExecutableUtil.getCubeName(cubeJob.getParams()));
+
+ final JobInstance result = new JobInstance();
+ result.setName(job.getName());
+ result.setProjectName(cubeJob.getProjectName());
+ result.setRelatedCube(cube != null ? cube.getName() : CubingExecutableUtil.getCubeName(cubeJob.getParams()));
+ result.setDisplayCubeName(cube != null ? cube.getDisplayName() : CubingExecutableUtil.getCubeName(cubeJob.getParams()));
+ result.setRelatedSegment(CubingExecutableUtil.getSegmentId(cubeJob.getParams()));
+ result.setRelatedSegmentName(CubingExecutableUtil.getSegmentName(cubeJob.getParams()));
+ result.setLastModified(output.getLastModified());
+ result.setSubmitter(job.getSubmitter());
+ result.setUuid(job.getId());
+ result.setType(CubeBuildTypeEnum.BUILD);
+ result.setStatus(parseToJobStatus(output.getState()));
+ result.setBuildInstance(AbstractExecutable.getBuildInstance(output));
+ result.setMrWaiting(AbstractExecutable.getExtraInfoAsLong(output, CubingJob.MAP_REDUCE_WAIT_TIME, 0L) / 1000);
+ result.setExecStartTime(AbstractExecutable.getStartTime(output));
+ result.setExecEndTime(AbstractExecutable.getEndTime(output));
+ result.setExecInterruptTime(AbstractExecutable.getInterruptTime(output));
+ result.setDuration(AbstractExecutable.getDuration(result.getExecStartTime(), result.getExecEndTime(),
+ result.getExecInterruptTime()) / 1000);
+ for (int i = 0; i < job.getTasks().size(); ++i) {
+ AbstractExecutable task = job.getTasks().get(i);
+ result.addStep(parseToJobStep(task, i, outputs.get(task.getId())));
+ }
+ return result;
+ }
+
+ public static JobInstance parseToJobInstance(CheckpointExecutable job, Map<String, Output> outputs) {
+ if (job == null) {
+ logger.warn("job is null.");
+ return null;
+ }
+
+ Output output = outputs.get(job.getId());
+ if (output == null) {
+ logger.warn("job output is null.");
+ return null;
+ }
+
+ final JobInstance result = new JobInstance();
+ result.setName(job.getName());
+ result.setProjectName(job.getProjectName());
+ result.setRelatedCube(CubingExecutableUtil.getCubeName(job.getParams()));
+ result.setDisplayCubeName(CubingExecutableUtil.getCubeName(job.getParams()));
+ result.setLastModified(output.getLastModified());
+ result.setSubmitter(job.getSubmitter());
+ result.setUuid(job.getId());
+ result.setType(CubeBuildTypeEnum.CHECKPOINT);
+ result.setStatus(parseToJobStatus(output.getState()));
+ result.setBuildInstance(AbstractExecutable.getBuildInstance(output));
+ result.setExecStartTime(AbstractExecutable.getStartTime(output));
+ result.setExecEndTime(AbstractExecutable.getEndTime(output));
+ result.setExecInterruptTime(AbstractExecutable.getInterruptTime(output));
+ result.setDuration(AbstractExecutable.getDuration(result.getExecStartTime(), result.getExecEndTime(),
+ result.getExecInterruptTime()) / 1000);
+ for (int i = 0; i < job.getTasks().size(); ++i) {
+ AbstractExecutable task = job.getTasks().get(i);
+ result.addStep(parseToJobStep(task, i, outputs.get(task.getId())));
+ }
+ return result;
+ }
+
+ public static JobInstance.JobStep parseToJobStep(AbstractExecutable task, int i, Output stepOutput) {
+ JobInstance.JobStep result = new JobInstance.JobStep();
+ result.setId(task.getId());
+ result.setName(task.getName());
+ result.setSequenceID(i);
+
+ if (stepOutput == null) {
+ logger.warn("Cannot found output for task: id={}", task.getId());
+ return result;
+ }
+
+ result.setStatus(parseToJobStepStatus(stepOutput.getState()));
+ for (Map.Entry<String, String> entry : stepOutput.getExtra().entrySet()) {
+ if (entry.getKey() != null && entry.getValue() != null) {
+ result.putInfo(entry.getKey(), entry.getValue());
+ }
+ }
+ result.setExecStartTime(AbstractExecutable.getStartTime(stepOutput));
+ result.setExecEndTime(AbstractExecutable.getEndTime(stepOutput));
+ if (task instanceof ShellExecutable) {
+ result.setExecCmd(((ShellExecutable) task).getCmd());
+ }
+ if (task instanceof MapReduceExecutable) {
+ result.setExecCmd(((MapReduceExecutable) task).getMapReduceParams());
+ result.setExecWaitTime(
+ AbstractExecutable.getExtraInfoAsLong(stepOutput, MapReduceExecutable.MAP_REDUCE_WAIT_TIME, 0L)
+ / 1000);
+ }
+ if (task instanceof HadoopShellExecutable) {
+ result.setExecCmd(((HadoopShellExecutable) task).getJobParams());
+ }
+ return result;
+ }
+
+ public static JobStatusEnum parseToJobStatus(ExecutableState state) {
+ switch (state) {
+ case READY:
+ return JobStatusEnum.PENDING;
+ case RUNNING:
+ return JobStatusEnum.RUNNING;
+ case ERROR:
+ return JobStatusEnum.ERROR;
+ case DISCARDED:
+ return JobStatusEnum.DISCARDED;
+ case SUCCEED:
+ return JobStatusEnum.FINISHED;
+ case STOPPED:
+ return JobStatusEnum.STOPPED;
+ default:
+ throw new RuntimeException("invalid state:" + state);
+ }
+ }
+
+ public static JobStepStatusEnum parseToJobStepStatus(ExecutableState state) {
+ switch (state) {
+ case READY:
+ return JobStepStatusEnum.PENDING;
+ case RUNNING:
+ return JobStepStatusEnum.RUNNING;
+ case ERROR:
+ return JobStepStatusEnum.ERROR;
+ case DISCARDED:
+ return JobStepStatusEnum.DISCARDED;
+ case SUCCEED:
+ return JobStepStatusEnum.FINISHED;
+ case STOPPED:
+ return JobStepStatusEnum.STOPPED;
+ default:
+ throw new RuntimeException("invalid state:" + state);
+ }
+ }
+
+ public static JobSearchResult parseToJobSearchResult(DefaultChainedExecutable job, Map<String, ExecutableOutputPO> outputs) {
+ if (job == null) {
+ logger.warn("job is null.");
+ return null;
+ }
+
+ ExecutableOutputPO output = outputs.get(job.getId());
+ if (output == null) {
+ logger.warn("job output is null.");
+ return null;
+ }
+
+ final JobSearchResult result = new JobSearchResult();
+
+ String cubeName = CubingExecutableUtil.getCubeName(job.getParams());
+
+ if (cubeName == null) {
+ cubeName = job.getParam("model_name");
+ } else {
+ CubeInstance cube = CubeManager.getInstance(KylinConfig.getInstanceFromEnv()).getCube(cubeName);
+ if (cube != null) {
+ cubeName = cube.getDisplayName();
+ }
+ }
+ result.setCubeName(cubeName);
+ result.setId(job.getId());
+ result.setJobName(job.getName());
+ result.setLastModified(output.getLastModified());
+ result.setJobStatus(JobInfoConverter.parseToJobStatus(job.getStatus()));
+ return result;
+ }
+}
diff --git a/build-engine/src/main/java/org/apache/kylin/engine/mr/common/JobRelatedMetaUtil.java b/build-engine/src/main/java/org/apache/kylin/engine/mr/common/JobRelatedMetaUtil.java
new file mode 100644
index 0000000..906d501
--- /dev/null
+++ b/build-engine/src/main/java/org/apache/kylin/engine/mr/common/JobRelatedMetaUtil.java
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.engine.mr.common;
+
+import org.apache.kylin.common.KylinConfig;
+import org.apache.kylin.common.KylinConfigExt;
+import org.apache.kylin.common.persistence.AutoDeleteDirectory;
+import org.apache.kylin.common.persistence.RawResource;
+import org.apache.kylin.common.persistence.ResourceStore;
+import org.apache.kylin.common.persistence.ResourceTool;
+import org.apache.kylin.cube.CubeInstance;
+import org.apache.kylin.metadata.model.TableDesc;
+import org.apache.kylin.metadata.model.TableRef;
+import org.apache.kylin.source.SourceManager;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.util.LinkedHashSet;
+import java.util.Properties;
+import java.util.Set;
+
+public class JobRelatedMetaUtil {
+ private static final Logger logger = LoggerFactory.getLogger(JobRelatedMetaUtil.class);
+
+ private JobRelatedMetaUtil() {
+ }
+
+ public static Set<String> collectCubeMetadata(CubeInstance cube) {
+ // cube, model_desc, cube_desc, table
+ Set<String> dumpList = new LinkedHashSet<>();
+ dumpList.add(cube.getResourcePath());
+ dumpList.add(cube.getDescriptor().getModel().getResourcePath());
+ dumpList.add(cube.getDescriptor().getResourcePath());
+ dumpList.add(cube.getProjectInstance().getResourcePath());
+
+ for (TableRef tableRef : cube.getDescriptor().getModel().getAllTables()) {
+ TableDesc table = tableRef.getTableDesc();
+ dumpList.add(table.getResourcePath());
+ dumpList.addAll(SourceManager.getMRDependentResources(table));
+ }
+
+ return dumpList;
+ }
+
+ public static void dumpResources(KylinConfig kylinConfig, File metaDir, Set<String> dumpList) throws IOException {
+ long startTime = System.currentTimeMillis();
+
+ ResourceStore from = ResourceStore.getStore(kylinConfig);
+ KylinConfig localConfig = KylinConfig.createInstanceFromUri(metaDir.getAbsolutePath());
+ ResourceStore to = ResourceStore.getStore(localConfig);
+ for (String path : dumpList) {
+ RawResource res = from.getResource(path);
+ if (res == null)
+ throw new IllegalStateException("No resource found at -- " + path);
+ to.putResource(path, res.content(), res.lastModified());
+ res.content().close();
+ }
+
+ logger.debug("Dump resources to {} took {} ms", metaDir, System.currentTimeMillis() - startTime);
+ }
+
+ public static void dumpAndUploadKylinPropsAndMetadata(Set<String> dumpList, KylinConfigExt kylinConfig, String metadataUrl)
+ throws IOException {
+
+ try (AutoDeleteDirectory tmpDir = new AutoDeleteDirectory("kylin_job_meta", "");
+ AutoDeleteDirectory metaDir = tmpDir.child("meta")) {
+ // dump metadata
+ JobRelatedMetaUtil.dumpResources(kylinConfig, metaDir.getFile(), dumpList);
+
+ // dump metadata
+ dumpResources(kylinConfig, metaDir.getFile(), dumpList);
+
+ // write kylin.properties
+ Properties props = kylinConfig.exportToProperties();
+ props.setProperty("kylin.metadata.url", metadataUrl);
+ File kylinPropsFile = new File(metaDir.getFile(), "kylin.properties");
+ try (FileOutputStream os = new FileOutputStream(kylinPropsFile)) {
+ props.store(os, kylinPropsFile.getAbsolutePath());
+ }
+
+ KylinConfig dstConfig = KylinConfig.createKylinConfig(props);
+ //upload metadata
+ new ResourceTool().copy(KylinConfig.createInstanceFromUri(metaDir.getAbsolutePath()), dstConfig);
+ }
+ }
+}
diff --git a/build-engine/src/main/java/org/apache/kylin/engine/mr/common/MapReduceExecutable.java b/build-engine/src/main/java/org/apache/kylin/engine/mr/common/MapReduceExecutable.java
new file mode 100755
index 0000000..a33f171
--- /dev/null
+++ b/build-engine/src/main/java/org/apache/kylin/engine/mr/common/MapReduceExecutable.java
@@ -0,0 +1,541 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.engine.mr.common;
+
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.io.StringWriter;
+import java.lang.reflect.Constructor;
+import java.util.List;
+import java.util.ListIterator;
+import java.util.Map;
+import java.util.concurrent.locks.Lock;
+import java.util.concurrent.locks.ReentrantLock;
+
+import com.google.common.base.Strings;
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.GnuParser;
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.OptionBuilder;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.Cluster;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.JobID;
+import org.apache.hadoop.mapreduce.JobStatus;
+import org.apache.kylin.common.KylinConfig;
+import org.apache.kylin.common.lock.DistributedLock;
+import org.apache.kylin.common.util.ClassUtil;
+import org.apache.kylin.common.util.HadoopUtil;
+import org.apache.kylin.cube.CubeManager;
+import org.apache.kylin.engine.mr.exception.MapReduceException;
+import org.apache.kylin.job.constant.ExecutableConstants;
+import org.apache.kylin.job.constant.JobStepStatusEnum;
+import org.apache.kylin.job.exception.ExecuteException;
+import org.apache.kylin.job.execution.AbstractExecutable;
+import org.apache.kylin.job.execution.ExecutableContext;
+import org.apache.kylin.job.execution.ExecutableManager;
+import org.apache.kylin.job.execution.ExecutableState;
+import org.apache.kylin.job.execution.ExecuteResult;
+import org.apache.kylin.job.execution.Output;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
+
+/**
+ */
+public class MapReduceExecutable extends AbstractExecutable {
+
+ public static final String MAP_REDUCE_WAIT_TIME = "mapReduceWaitTime";
+ private static final String KEY_MR_JOB = "MR_JOB_CLASS";
+ private static final String KEY_PARAMS = "MR_JOB_PARAMS";
+ private static final String KEY_COUNTER_SAVEAS = "MR_COUNTER_SAVEAS";
+ private final Lock threadLock = new ReentrantLock();
+
+ protected static final Logger logger = LoggerFactory.getLogger(MapReduceExecutable.class);
+
+ public MapReduceExecutable() {
+ super();
+ }
+
+ @Override
+ protected void onExecuteStart(ExecutableContext executableContext) {
+ final Output output = getOutput();
+ if (output.getExtra().containsKey(START_TIME)) {
+ final String mrJobId = output.getExtra().get(ExecutableConstants.MR_JOB_ID);
+ if (mrJobId == null) {
+ getManager().updateJobOutput(getId(), ExecutableState.RUNNING, null, null);
+ return;
+ }
+ try {
+ Configuration conf = new Configuration(HadoopUtil.getCurrentConfiguration());
+ overwriteJobConf(conf, executableContext.getConfig(), getMapReduceParams().trim().split("\\s+"));
+ Job job = new Cluster(conf).getJob(JobID.forName(mrJobId));
+ if (job == null || job.getJobState() == JobStatus.State.FAILED) {
+ //remove previous mr job info
+ super.onExecuteStart(executableContext);
+ } else {
+ getManager().updateJobOutput(getId(), ExecutableState.RUNNING, null, null);
+ }
+ } catch (IOException | ParseException e) {
+ logger.warn("error get hadoop status");
+ super.onExecuteStart(executableContext);
+ } catch (InterruptedException e) {
+ Thread.currentThread().interrupt();
+ logger.warn("error get hadoop status");
+ super.onExecuteStart(executableContext);
+ }
+ } else {
+ super.onExecuteStart(executableContext);
+ }
+ }
+
+ @Override
+ protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
+ final String mapReduceJobClass = getMapReduceJobClass();
+ DistributedLock lock = null;
+
+ Preconditions.checkNotNull(mapReduceJobClass);
+ try {
+
+ if (getIsNeedLock()) {
+ lock = KylinConfig.getInstanceFromEnv().getDistributedLockFactory().lockForCurrentThread();
+ getLock(lock);
+ }
+
+ Job job;
+ ExecutableManager mgr = getManager();
+ Configuration conf = new Configuration(HadoopUtil.getCurrentConfiguration());
+ String[] jobArgs = overwriteJobConf(conf, context.getConfig(), getMapReduceParams().trim().split("\\s+"));
+ final Map<String, String> extra = mgr.getOutput(getId()).getExtra();
+ if (extra.containsKey(ExecutableConstants.MR_JOB_ID)) {
+ job = new Cluster(conf).getJob(JobID.forName(extra.get(ExecutableConstants.MR_JOB_ID)));
+ logger.info("mr_job_id:" + extra.get(ExecutableConstants.MR_JOB_ID) + " resumed");
+ } else {
+ final Constructor<? extends AbstractHadoopJob> constructor = ClassUtil
+ .forName(mapReduceJobClass, AbstractHadoopJob.class).getConstructor();
+ final AbstractHadoopJob hadoopJob = constructor.newInstance();
+ hadoopJob.setConf(conf);
+ hadoopJob.setAsync(true); // so the ToolRunner.run() returns right away
+ logger.info("parameters of the MapReduceExecutable: {}", getMapReduceParams());
+ try {
+
+ hadoopJob.run(jobArgs);
+
+ if (hadoopJob.isSkipped()) {
+ if (isDiscarded()) {
+ if (getIsNeedLock()) {
+ releaseLock(lock);
+ }
+ return new ExecuteResult(ExecuteResult.State.DISCARDED, "skipped");
+ } else {
+ return new ExecuteResult(ExecuteResult.State.SUCCEED, "skipped");
+ }
+
+ }
+ } catch (Exception ex) {
+ StringBuilder log = new StringBuilder();
+ logger.error("error execute " + this.toString(), ex);
+ StringWriter stringWriter = new StringWriter();
+ ex.printStackTrace(new PrintWriter(stringWriter));
+ log.append(stringWriter.toString()).append("\n");
+ log.append("result code:").append(2);
+ if (isDiscarded()) {
+ if (getIsNeedLock()) {
+ releaseLock(lock);
+ }
+ return new ExecuteResult(ExecuteResult.State.DISCARDED, log.toString());
+ } else {
+ return new ExecuteResult(ExecuteResult.State.ERROR, log.toString(), ex);
+ }
+ }
+ job = hadoopJob.getJob();
+ }
+ final StringBuilder output = new StringBuilder();
+ final HadoopCmdOutput hadoopCmdOutput = new HadoopCmdOutput(job, output);
+
+ JobStepStatusEnum status = JobStepStatusEnum.NEW;
+ while (!isDiscarded() && !isPaused()) {
+
+ JobStepStatusEnum newStatus = HadoopJobStatusChecker.checkStatus(job, output);
+ if (status == JobStepStatusEnum.KILLED) {
+ mgr.updateJobOutput(getId(), ExecutableState.ERROR, hadoopCmdOutput.getInfo(), "killed by admin");
+ if (isDiscarded()) {
+ if (getIsNeedLock()) {
+ releaseLock(lock);
+ }
+ return new ExecuteResult(ExecuteResult.State.DISCARDED, "killed by admin");
+ } else {
+ return new ExecuteResult(ExecuteResult.State.FAILED, "killed by admin");
+ }
+
+ }
+ if (status == JobStepStatusEnum.WAITING && (newStatus == JobStepStatusEnum.FINISHED
+ || newStatus == JobStepStatusEnum.ERROR || newStatus == JobStepStatusEnum.RUNNING)) {
+ final long waitTime = System.currentTimeMillis() - getStartTime();
+ setMapReduceWaitTime(waitTime);
+ }
+ mgr.addJobInfo(getId(), hadoopCmdOutput.getInfo());
+ status = newStatus;
+ if (status.isComplete()) {
+ final Map<String, String> info = hadoopCmdOutput.getInfo();
+ readCounters(hadoopCmdOutput, info);
+ mgr.addJobInfo(getId(), info);
+
+ if (status == JobStepStatusEnum.FINISHED) {
+ if (isDiscarded()) {
+ if (getIsNeedLock()) {
+ releaseLock(lock);
+ }
+ return new ExecuteResult(ExecuteResult.State.DISCARDED, output.toString());
+ } else {
+ return new ExecuteResult(ExecuteResult.State.SUCCEED, output.toString());
+ }
+
+ } else {
+ if (isDiscarded()) {
+ if (getIsNeedLock()) {
+ releaseLock(lock);
+ }
+ return new ExecuteResult(ExecuteResult.State.DISCARDED, output.toString());
+ } else {
+ return ExecuteResult.createFailed(new MapReduceException(output.toString()));
+ }
+ }
+ }
+ Thread.sleep(context.getConfig().getYarnStatusCheckIntervalSeconds() * 1000L);
+ }
+
+ // try to kill running map-reduce job to release resources.
+ if (job != null) {
+ try {
+ job.killJob();
+ } catch (Exception e) {
+ logger.warn("failed to kill hadoop job: " + job.getJobID(), e);
+ }
+ }
+
+ if (isDiscarded()) {
+ if (getIsNeedLock()) {
+ releaseLock(lock);
+ }
+ return new ExecuteResult(ExecuteResult.State.DISCARDED, output.toString());
+ } else {
+ return new ExecuteResult(ExecuteResult.State.STOPPED, output.toString());
+ }
+
+ } catch (ReflectiveOperationException e) {
+ logger.error("error getMapReduceJobClass, class name:" + getParam(KEY_MR_JOB), e);
+ if (isDiscarded()) {
+ if (getIsNeedLock()) {
+ releaseLock(lock);
+ }
+ return new ExecuteResult(ExecuteResult.State.DISCARDED, e.getMessage());
+ } else {
+ return ExecuteResult.createError(e);
+ }
+ } catch (Exception e) {
+ logger.error("error execute " + this.toString(), e);
+ if (isDiscarded()) {
+ if (getIsNeedLock()) {
+ releaseLock(lock);
+ }
+ return new ExecuteResult(ExecuteResult.State.DISCARDED, e.getMessage());
+ } else {
+ return ExecuteResult.createError(e);
+ }
+ }
+ }
+
+ private void readCounters(final HadoopCmdOutput hadoopCmdOutput, final Map<String, String> info) {
+ hadoopCmdOutput.updateJobCounter();
+ info.put(ExecutableConstants.SOURCE_RECORDS_COUNT, hadoopCmdOutput.getMapInputRecords());
+ info.put(ExecutableConstants.SOURCE_RECORDS_SIZE, hadoopCmdOutput.getRawInputBytesRead());
+ info.put(ExecutableConstants.HDFS_BYTES_WRITTEN, hadoopCmdOutput.getHdfsBytesWritten());
+
+ String saveAs = getParam(KEY_COUNTER_SAVEAS);
+ if (saveAs != null) {
+ String[] saveAsNames = saveAs.split(",");
+ saveCounterAs(hadoopCmdOutput.getMapInputRecords(), saveAsNames, 0, info);
+ saveCounterAs(hadoopCmdOutput.getRawInputBytesRead(), saveAsNames, 1, info);
+ saveCounterAs(hadoopCmdOutput.getHdfsBytesWritten(), saveAsNames, 2, info);
+ }
+ }
+
+ private void saveCounterAs(String counter, String[] saveAsNames, int i, Map<String, String> info) {
+ if (saveAsNames.length > i && StringUtils.isBlank(saveAsNames[i]) == false) {
+ info.put(saveAsNames[i].trim(), counter);
+ }
+ }
+
+ public long getMapReduceWaitTime() {
+ return getExtraInfoAsLong(MAP_REDUCE_WAIT_TIME, 0L);
+ }
+
+ public void setMapReduceWaitTime(long t) {
+ addExtraInfo(MAP_REDUCE_WAIT_TIME, t + "");
+ }
+
+ public String getMapReduceJobClass() throws ExecuteException {
+ return getParam(KEY_MR_JOB);
+ }
+
+ public void setMapReduceJobClass(Class<? extends AbstractHadoopJob> clazzName) {
+ setParam(KEY_MR_JOB, clazzName.getName());
+ }
+
+ public String getMapReduceParams() {
+ return getParam(KEY_PARAMS);
+ }
+
+ public void setMapReduceParams(String param) {
+ setParam(KEY_PARAMS, param);
+ }
+
+ public void setCounterSaveAs(String value) {
+ setParam(KEY_COUNTER_SAVEAS, value);
+ }
+
+ public void setIsNeedLock(Boolean isNeedLock) {
+ setParam("isNeedLock", String.valueOf(isNeedLock));
+ }
+
+ public boolean getIsNeedLock() {
+ String isNeedLock = getParam("isNeedLock");
+ return Strings.isNullOrEmpty(isNeedLock) ? false : Boolean.parseBoolean(isNeedLock);
+ }
+
+ public void setIsNeedReleaseLock(Boolean isNeedReleaseLock) {
+ setParam("isNeedReleaseLock", String.valueOf(isNeedReleaseLock));
+ }
+
+ public boolean getIsNeedReleaseLock() {
+ String isNeedReleaseLock = getParam("isNeedReleaseLock");
+ return Strings.isNullOrEmpty(isNeedReleaseLock) ? false : Boolean.parseBoolean(isNeedReleaseLock);
+ }
+
+ public void setLockPathName(String pathName) {
+ setParam("lockPathName", pathName);
+ }
+
+ public String getLockPathName() {
+ return getParam("lockPathName");
+ }
+
+ public void setJobFlowJobId(String jobId) {
+ setParam("jobFlowJobId", jobId);
+ }
+
+ public String getJobFlowJobId() {
+ return getParam("jobFlowJobId");
+ }
+
+ private void getLock(DistributedLock lock) throws InterruptedException {
+ logger.info("{} try to get zk lock, zk client {} ", getId(), lock.getClient());
+ String ephemeralLockPath = getEphemeralLockPathName();
+ String fullLockPath = getCubeJobLockPathName();
+ boolean isLockedByOther = true;
+ boolean getLocked = false;
+ long lockStartTime = System.currentTimeMillis();
+
+ boolean isLockedByTheJob = lock.isLocked(fullLockPath);
+ logger.info("cube job {} zk lock is isLockedByTheJob:{}", getId(), isLockedByTheJob);
+ if (!isLockedByTheJob) {//not lock by the job
+ while (isLockedByOther) {
+ isLockedByOther = lock.isLocked(getCubeJobLockParentPathName());//other job global lock
+
+ if (!isLockedByOther) {//not lock by other job
+ isLockedByOther = lock.isLocked(ephemeralLockPath);//check the ephemeral current lock
+ logger.info("zookeeper lock path :{}, is locked by other job result is {}", ephemeralLockPath,
+ isLockedByOther);
+
+ if (!isLockedByOther) {//the ephemeral lock not lock by other job
+ //try to get ephemeral lock
+ try {
+ logger.debug("{} before start to get lock ephemeralLockPath {}", getId(),
+ ephemeralLockPath);
+ threadLock.lock();
+ logger.debug("{} start to get lock ephemeralLockPath {}", getId(), ephemeralLockPath);
+ getLocked = lock.lock(ephemeralLockPath);
+ logger.debug("{} finish get lock ephemeralLockPath {},getLocked {}", getId(),
+ ephemeralLockPath, getLocked);
+ } finally {
+ threadLock.unlock();
+ logger.debug("{} finish unlock the thread lock ,ephemeralLockPath {} ", getId(),
+ ephemeralLockPath);
+ }
+
+ if (getLocked) {//get ephemeral lock success
+ try {
+ getLocked = lock.globalPermanentLock(fullLockPath);//add the fullLockPath lock in case of the server crash then the other server can run the same job can get the lock
+ if (getLocked) {
+ break;
+ } else {
+ if (lock.isLocked(ephemeralLockPath)) {
+ lock.unlock(ephemeralLockPath);
+ }
+ }
+ } catch (Exception e) {
+ if (lock.isLocked(ephemeralLockPath)) {
+ lock.unlock(ephemeralLockPath);
+ }
+ }
+ }
+ isLockedByOther = true;//get lock fail,will try again
+ }
+ }
+ // wait 1 min and try again
+ logger.info(
+ "{}, parent lock path({}) is locked by other job result is {} ,ephemeral lock path :{} is locked by other job result is {},will try after one minute",
+ getId(), getCubeJobLockParentPathName(), isLockedByOther, ephemeralLockPath, isLockedByOther);
+ Thread.sleep(60000);
+ }
+ } else {
+ lock.lock(ephemeralLockPath);
+ }
+
+ long useSec = ((System.currentTimeMillis() - lockStartTime) / 1000);
+ logger.info("job {} get zookeeper lock path:{} success,zookeeper get lock costTime : {} s", getId(),
+ fullLockPath, useSec);
+ }
+
+ private void releaseLock(DistributedLock lock) {
+ String parentLockPath = getCubeJobLockParentPathName();
+ String ephemeralLockPath = getEphemeralLockPathName();
+ if (lock.isLocked(getCubeJobLockPathName())) {//release cube job dict lock if exists
+ lock.purgeLocks(parentLockPath);
+ logger.info("{} unlock cube job dict lock path({}) success", getJobFlowJobId(), parentLockPath);
+
+ if (lock.isLocked(ephemeralLockPath)) {//release cube job Ephemeral lock if exists
+ lock.purgeLocks(ephemeralLockPath);
+ logger.info("{} unlock cube job ephemeral lock path({}) success", getJobFlowJobId(), ephemeralLockPath);
+ }
+ }
+ }
+
+ private String getEphemeralLockPathName() {
+ String pathName = getLockPathName();
+ if (Strings.isNullOrEmpty(pathName)) {
+ throw new IllegalArgumentException("cube job lock path name is null");
+ }
+
+ return CubeJobLockUtil.getEphemeralLockPath(pathName);
+ }
+
+ private String getCubeJobLockPathName() {
+ String pathName = getLockPathName();
+ if (Strings.isNullOrEmpty(pathName)) {
+ throw new IllegalArgumentException("cube job lock path name is null");
+ }
+
+ String flowJobId = getJobFlowJobId();
+ if (Strings.isNullOrEmpty(flowJobId)) {
+ throw new IllegalArgumentException("cube job lock path flowJobId is null");
+ }
+ return CubeJobLockUtil.getLockPath(pathName, flowJobId);
+ }
+
+ private String getCubeJobLockParentPathName() {
+ String pathName = getLockPathName();
+ if (Strings.isNullOrEmpty(pathName)) {
+ throw new IllegalArgumentException(" create mr hive dict lock path name is null");
+ }
+ return CubeJobLockUtil.getLockPath(pathName, null);
+ }
+
+
+ @SuppressWarnings("static-access")
+ private static final Option OPTION_JOB_CONF = OptionBuilder.withArgName(BatchConstants.ARG_CONF).hasArg()
+ .isRequired(false).create(BatchConstants.ARG_CONF);
+
+ @SuppressWarnings("static-access")
+ private static final Option OPTION_CUBE_NAME = OptionBuilder.withArgName(BatchConstants.ARG_CUBE_NAME).hasArg()
+ .isRequired(false).create(BatchConstants.ARG_CUBE_NAME);
+
+ private String[] overwriteJobConf(Configuration conf, KylinConfig config, String[] jobParams)
+ throws ParseException {
+ Options options = new Options();
+ options.addOption(OPTION_JOB_CONF);
+ options.addOption(OPTION_CUBE_NAME);
+ CustomParser parser = new CustomParser();
+ CommandLine commandLine = parser.parse(options, jobParams);
+
+ String confFile = commandLine.getOptionValue(BatchConstants.ARG_CONF);
+ String cubeName = commandLine.getOptionValue(BatchConstants.ARG_CUBE_NAME);
+ List<String> remainingArgs = Lists.newArrayList();
+
+ if (StringUtils.isNotBlank(confFile)) {
+ conf.addResource(new Path(confFile));
+ }
+
+ KylinConfig configOverride;
+ if (cubeName != null) {
+ configOverride = CubeManager.getInstance(config).getCube(cubeName).getConfig();
+ } else {
+ configOverride = config;
+ }
+
+ for (Map.Entry<String, String> entry : configOverride.getMRConfigOverride().entrySet()) {
+ conf.set(entry.getKey(), entry.getValue());
+ }
+ if (conf.get("mapreduce.job.is-mem-hungry") != null
+ && Boolean.parseBoolean(conf.get("mapreduce.job.is-mem-hungry"))) {
+ for (Map.Entry<String, String> entry : configOverride.getMemHungryConfigOverride().entrySet()) {
+ conf.set(entry.getKey(), entry.getValue());
+ }
+ }
+
+ if (StringUtils.isNotBlank(cubeName)) {
+ remainingArgs.add("-" + BatchConstants.ARG_CUBE_NAME);
+ remainingArgs.add(cubeName);
+ }
+
+ remainingArgs.addAll(parser.getRemainingArgs());
+ return (String[]) remainingArgs.toArray(new String[remainingArgs.size()]);
+ }
+
+ private static class CustomParser extends GnuParser {
+ private List<String> remainingArgs;
+
+ public CustomParser() {
+ this.remainingArgs = Lists.newArrayList();
+ }
+
+ @Override
+ protected void processOption(final String arg, final ListIterator iter) throws ParseException {
+ boolean hasOption = getOptions().hasOption(arg);
+
+ if (hasOption) {
+ super.processOption(arg, iter);
+ } else {
+ remainingArgs.add(arg);
+ remainingArgs.add(iter.next().toString());
+ }
+ }
+
+ public List<String> getRemainingArgs() {
+ return remainingArgs;
+ }
+ }
+}
diff --git a/build-engine/src/main/java/org/apache/kylin/engine/mr/common/MapReduceUtil.java b/build-engine/src/main/java/org/apache/kylin/engine/mr/common/MapReduceUtil.java
new file mode 100644
index 0000000..ecde4aa
--- /dev/null
+++ b/build-engine/src/main/java/org/apache/kylin/engine/mr/common/MapReduceUtil.java
@@ -0,0 +1,164 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kylin.engine.mr.common;
+
+import java.io.IOException;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.kylin.common.KylinConfig;
+import org.apache.kylin.common.util.Pair;
+import org.apache.kylin.cube.CubeInstance;
+import org.apache.kylin.cube.CubeSegment;
+import org.apache.kylin.cube.cuboid.CuboidScheduler;
+import org.apache.kylin.cube.model.CubeDesc;
+import org.apache.kylin.job.exception.JobException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.collect.Sets;
+
+public class MapReduceUtil {
+
+ private static final Logger logger = LoggerFactory.getLogger(MapReduceUtil.class);
+
+ /**
+ * @return reducer number for calculating hll
+ */
+ public static int getCuboidHLLCounterReducerNum(CubeInstance cube) {
+ int nCuboids = cube.getCuboidScheduler().getAllCuboidIds().size();
+ int shardBase = (nCuboids - 1) / cube.getConfig().getHadoopJobPerReducerHLLCuboidNumber() + 1;
+
+ int hllMaxReducerNumber = cube.getConfig().getHadoopJobHLLMaxReducerNumber();
+ if (shardBase > hllMaxReducerNumber) {
+ shardBase = hllMaxReducerNumber;
+ }
+ return shardBase;
+ }
+
+ /**
+ * @param cuboidScheduler specified can provide more flexibility
+ * */
+ public static int getLayeredCubingReduceTaskNum(CubeSegment cubeSegment, CuboidScheduler cuboidScheduler,
+ double totalMapInputMB, int level)
+ throws ClassNotFoundException, IOException, InterruptedException, JobException {
+ CubeDesc cubeDesc = cubeSegment.getCubeDesc();
+ KylinConfig kylinConfig = cubeDesc.getConfig();
+
+ double perReduceInputMB = kylinConfig.getDefaultHadoopJobReducerInputMB();
+ double reduceCountRatio = kylinConfig.getDefaultHadoopJobReducerCountRatio();
+ logger.info("Having per reduce MB " + perReduceInputMB + ", reduce count ratio " + reduceCountRatio + ", level "
+ + level);
+
+ CubeStatsReader cubeStatsReader = new CubeStatsReader(cubeSegment, cuboidScheduler, kylinConfig);
+
+ double parentLayerSizeEst, currentLayerSizeEst, adjustedCurrentLayerSizeEst;
+
+ if (level == -1) {
+ //merge case
+ double estimatedSize = cubeStatsReader.estimateCubeSize();
+ adjustedCurrentLayerSizeEst = estimatedSize > totalMapInputMB ? totalMapInputMB : estimatedSize;
+ logger.debug("estimated size {}, input size {}, adjustedCurrentLayerSizeEst: {}", estimatedSize,
+ totalMapInputMB, adjustedCurrentLayerSizeEst);
+ } else if (level == 0) {
+ //base cuboid case TODO: the estimation could be very WRONG because it has no correction
+ adjustedCurrentLayerSizeEst = cubeStatsReader.estimateLayerSize(0);
+ logger.debug("adjustedCurrentLayerSizeEst: {}", adjustedCurrentLayerSizeEst);
+ } else {
+ parentLayerSizeEst = cubeStatsReader.estimateLayerSize(level - 1);
+ currentLayerSizeEst = cubeStatsReader.estimateLayerSize(level);
+ adjustedCurrentLayerSizeEst = totalMapInputMB / parentLayerSizeEst * currentLayerSizeEst;
+ logger.debug(
+ "totalMapInputMB: {}, parentLayerSizeEst: {}, currentLayerSizeEst: {}, adjustedCurrentLayerSizeEst: {}",
+ totalMapInputMB, parentLayerSizeEst, currentLayerSizeEst, adjustedCurrentLayerSizeEst);
+ }
+
+ // number of reduce tasks
+ int numReduceTasks = (int) Math.round(adjustedCurrentLayerSizeEst / perReduceInputMB * reduceCountRatio + 0.99);
+
+ // adjust reducer number for cube which has DISTINCT_COUNT measures for better performance
+ if (cubeDesc.hasMemoryHungryMeasures()) {
+ logger.debug("Multiply reducer num by 4 to boost performance for memory hungry measures");
+ numReduceTasks = numReduceTasks * 4;
+ }
+
+ // at least 1 reducer by default
+ numReduceTasks = Math.max(kylinConfig.getHadoopJobMinReducerNumber(), numReduceTasks);
+ // no more than 500 reducer by default
+ numReduceTasks = Math.min(kylinConfig.getHadoopJobMaxReducerNumber(), numReduceTasks);
+
+ return numReduceTasks;
+ }
+
+ public static int getInmemCubingReduceTaskNum(CubeSegment cubeSeg, CuboidScheduler cuboidScheduler)
+ throws IOException {
+ KylinConfig kylinConfig = cubeSeg.getConfig();
+
+ Map<Long, Double> cubeSizeMap = new CubeStatsReader(cubeSeg, cuboidScheduler, kylinConfig).getCuboidSizeMap();
+ double totalSizeInM = 0;
+ for (Double cuboidSize : cubeSizeMap.values()) {
+ totalSizeInM += cuboidSize;
+ }
+ return getReduceTaskNum(totalSizeInM, kylinConfig);
+ }
+
+ // @return the first indicates the total reducer number, the second indicates the reducer number for base cuboid
+ public static Pair<Integer, Integer> getConvergeCuboidDataReduceTaskNums(CubeSegment cubeSeg) throws IOException {
+ long baseCuboidId = cubeSeg.getCuboidScheduler().getBaseCuboidId();
+
+ Set<Long> overlapCuboids = Sets.newHashSet(cubeSeg.getCuboidScheduler().getAllCuboidIds());
+ overlapCuboids.retainAll(cubeSeg.getCubeInstance().getCuboidsRecommend());
+ overlapCuboids.add(baseCuboidId);
+
+ Pair<Map<Long, Long>, Long> cuboidStats = CuboidStatsReaderUtil
+ .readCuboidStatsWithSourceFromSegment(overlapCuboids, cubeSeg);
+ Map<Long, Double> cubeSizeMap = CubeStatsReader.getCuboidSizeMapFromRowCount(cubeSeg, cuboidStats.getFirst(),
+ cuboidStats.getSecond());
+ double totalSizeInM = 0;
+ for (Double cuboidSize : cubeSizeMap.values()) {
+ totalSizeInM += cuboidSize;
+ }
+
+ double baseSizeInM = cubeSizeMap.get(baseCuboidId);
+
+ KylinConfig kylinConfig = cubeSeg.getConfig();
+ int nBase = getReduceTaskNum(baseSizeInM, kylinConfig);
+ int nOther = getReduceTaskNum(totalSizeInM - baseSizeInM, kylinConfig);
+ return new Pair<>(nBase + nOther, nBase);
+ }
+
+ private static int getReduceTaskNum(double totalSizeInM, KylinConfig kylinConfig) {
+ double perReduceInputMB = kylinConfig.getDefaultHadoopJobReducerInputMB();
+ double reduceCountRatio = kylinConfig.getDefaultHadoopJobReducerCountRatio();
+
+ // number of reduce tasks
+ int numReduceTasks = (int) Math.round(totalSizeInM / perReduceInputMB * reduceCountRatio);
+
+ // at least 1 reducer by default
+ numReduceTasks = Math.max(kylinConfig.getHadoopJobMinReducerNumber(), numReduceTasks);
+ // no more than 500 reducer by default
+ numReduceTasks = Math.min(kylinConfig.getHadoopJobMaxReducerNumber(), numReduceTasks);
+
+ logger.info("Having total map input MB " + Math.round(totalSizeInM));
+ logger.info("Having per reduce MB " + perReduceInputMB);
+ logger.info("Setting " + Reducer.Context.NUM_REDUCES + "=" + numReduceTasks);
+ return numReduceTasks;
+ }
+}
diff --git a/build-engine/src/main/java/org/apache/kylin/engine/mr/common/NDCuboidBuilder.java b/build-engine/src/main/java/org/apache/kylin/engine/mr/common/NDCuboidBuilder.java
new file mode 100644
index 0000000..6ad2619
--- /dev/null
+++ b/build-engine/src/main/java/org/apache/kylin/engine/mr/common/NDCuboidBuilder.java
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.engine.mr.common;
+
+import java.io.Serializable;
+
+import org.apache.kylin.common.util.ByteArray;
+import org.apache.kylin.common.util.Pair;
+import org.apache.kylin.cube.CubeSegment;
+import org.apache.kylin.cube.common.RowKeySplitter;
+import org.apache.kylin.cube.cuboid.Cuboid;
+import org.apache.kylin.cube.kv.RowConstants;
+import org.apache.kylin.cube.kv.RowKeyEncoder;
+import org.apache.kylin.cube.kv.RowKeyEncoderProvider;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ */
+@SuppressWarnings("serial")
+public class NDCuboidBuilder implements Serializable {
+
+ protected static final Logger logger = LoggerFactory.getLogger(NDCuboidBuilder.class);
+ protected String cubeName;
+ protected String segmentID;
+ protected CubeSegment cubeSegment;
+ private RowKeySplitter rowKeySplitter;
+ private RowKeyEncoderProvider rowKeyEncoderProvider;
+ private ByteArray newKeyBodyBuf = null;
+
+ public NDCuboidBuilder(CubeSegment cubeSegment) {
+ this(cubeSegment, new RowKeyEncoderProvider(cubeSegment));
+ }
+
+ public NDCuboidBuilder(CubeSegment cubeSegment, RowKeyEncoderProvider rowKeyEncoderProvider) {
+ this.cubeSegment = cubeSegment;
+ this.rowKeyEncoderProvider = rowKeyEncoderProvider;
+ this.rowKeySplitter = new RowKeySplitter(cubeSegment);
+ }
+
+ /**
+ * Build the new key, return a reused ByteArray object. Suitable for MR
+ * @param parentCuboid
+ * @param childCuboid
+ * @param splitBuffers
+ * @return
+ */
+ public Pair<Integer, ByteArray> buildKey(Cuboid parentCuboid, Cuboid childCuboid, ByteArray[] splitBuffers) {
+ RowKeyEncoder rowkeyEncoder = rowKeyEncoderProvider.getRowkeyEncoder(childCuboid);
+ int fullKeySize = rowkeyEncoder.getBytesLength();
+ if (newKeyBodyBuf == null || newKeyBodyBuf.length() < fullKeySize) {
+ newKeyBodyBuf = new ByteArray(fullKeySize);
+ }
+
+ buildKeyInternal(parentCuboid, childCuboid, splitBuffers, newKeyBodyBuf);
+ return new Pair<>(Integer.valueOf(fullKeySize), newKeyBodyBuf);
+
+ }
+
+ /**
+ * Build the new key, return a new ByteArray object each time. Suitable for spark
+ * @param parentCuboid
+ * @param childCuboid
+ * @param splitBuffers
+ * @return
+ */
+ public ByteArray buildKey2(Cuboid parentCuboid, Cuboid childCuboid, ByteArray[] splitBuffers) {
+ RowKeyEncoder rowkeyEncoder = rowKeyEncoderProvider.getRowkeyEncoder(childCuboid);
+ int fullKeySize = rowkeyEncoder.getBytesLength();
+ ByteArray newKey = new ByteArray(fullKeySize);
+ buildKeyInternal(parentCuboid, childCuboid, splitBuffers, newKey);
+ return newKey;
+ }
+
+ private void buildKeyInternal(Cuboid parentCuboid, Cuboid childCuboid, ByteArray[] splitBuffers, ByteArray newKeyBodyBuf) {
+ RowKeyEncoder rowkeyEncoder = rowKeyEncoderProvider.getRowkeyEncoder(childCuboid);
+
+ // rowkey columns
+ long mask = Long.highestOneBit(parentCuboid.getId());
+ long parentCuboidId = parentCuboid.getId();
+ long childCuboidId = childCuboid.getId();
+ long parentCuboidIdActualLength = (long)Long.SIZE - Long.numberOfLeadingZeros(parentCuboid.getId());
+ int index = rowKeySplitter.getBodySplitOffset(); // skip shard and cuboidId
+ int offset = RowConstants.ROWKEY_SHARDID_LEN + RowConstants.ROWKEY_CUBOIDID_LEN; // skip shard and cuboidId
+ for (int i = 0; i < parentCuboidIdActualLength; i++) {
+ if ((mask & parentCuboidId) > 0) {// if the this bit position equals
+ // 1
+ if ((mask & childCuboidId) > 0) {// if the child cuboid has this
+ // column
+ System.arraycopy(splitBuffers[index].array(), splitBuffers[index].offset(), newKeyBodyBuf.array(), offset, splitBuffers[index].length());
+ offset += splitBuffers[index].length();
+ }
+ index++;
+ }
+ mask = mask >> 1;
+ }
+
+ rowkeyEncoder.fillHeader(newKeyBodyBuf.array());
+ }
+
+}
diff --git a/build-engine/src/main/java/org/apache/kylin/engine/mr/common/SerializableConfiguration.java b/build-engine/src/main/java/org/apache/kylin/engine/mr/common/SerializableConfiguration.java
new file mode 100644
index 0000000..b390432
--- /dev/null
+++ b/build-engine/src/main/java/org/apache/kylin/engine/mr/common/SerializableConfiguration.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.engine.mr.common;
+
+import org.apache.hadoop.conf.Configuration;
+
+import java.io.IOException;
+import java.io.Serializable;
+
+//https://stackoverflow.com/questions/38224132/use-sparkcontext-hadoop-configuration-within-rdd-methods-closures-like-foreachp
+public class SerializableConfiguration implements Serializable {
+ Configuration conf;
+
+ public SerializableConfiguration(Configuration hadoopConf) {
+ this.conf = hadoopConf;
+ }
+
+ public SerializableConfiguration() {
+ this.conf = new Configuration();
+ }
+
+ public Configuration get() {
+ return this.conf;
+ }
+
+ private void writeObject(java.io.ObjectOutputStream out) throws IOException {
+ this.conf.write(out);
+ }
+
+ private void readObject(java.io.ObjectInputStream in) throws IOException {
+ this.conf = new Configuration();
+ this.conf.readFields(in);
+ }
+}
diff --git a/build-engine/src/main/java/org/apache/kylin/engine/mr/common/StatisticsDecisionUtil.java b/build-engine/src/main/java/org/apache/kylin/engine/mr/common/StatisticsDecisionUtil.java
new file mode 100644
index 0000000..3890e38
--- /dev/null
+++ b/build-engine/src/main/java/org/apache/kylin/engine/mr/common/StatisticsDecisionUtil.java
@@ -0,0 +1,134 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.engine.mr.common;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+
+import org.apache.kylin.common.KylinConfig;
+import org.apache.kylin.cube.CubeInstance;
+import org.apache.kylin.cube.CubeManager;
+import org.apache.kylin.cube.CubeSegment;
+import org.apache.kylin.cube.CubeUpdate;
+import org.apache.kylin.engine.mr.CubingJob;
+import org.apache.kylin.metadata.model.MeasureDesc;
+import org.apache.kylin.metadata.model.SegmentStatusEnum;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class StatisticsDecisionUtil {
+ protected static final Logger logger = LoggerFactory.getLogger(StatisticsDecisionUtil.class);
+
+ public static void decideCubingAlgorithm(CubingJob cubingJob, CubeSegment seg) throws IOException {
+ CubeStatsReader cubeStats = new CubeStatsReader(seg, null, seg.getConfig());
+ decideCubingAlgorithm(cubingJob, seg, cubeStats.getMapperOverlapRatioOfFirstBuild(),
+ cubeStats.getMapperNumberOfFirstBuild());
+ }
+
+ public static void decideCubingAlgorithm(CubingJob cubingJob, CubeSegment seg, double mapperOverlapRatio,
+ int mapperNumber) throws IOException {
+ KylinConfig kylinConf = seg.getConfig();
+ String algPref = kylinConf.getCubeAlgorithm();
+ CubingJob.AlgorithmEnum alg;
+ if (mapperOverlapRatio == 0 && kylinConf.isAutoInmemToOptimize()) { // no source records
+ alg = CubingJob.AlgorithmEnum.INMEM;
+ } else if (CubingJob.AlgorithmEnum.INMEM.name().equalsIgnoreCase(algPref)) {
+ alg = CubingJob.AlgorithmEnum.INMEM;
+ if (seg.getCubeDesc().isStreamingCube() && CubingJob.CubingJobTypeEnum
+ .getByName(cubingJob.getJobType()) == CubingJob.CubingJobTypeEnum.BUILD) {
+ alg = CubingJob.AlgorithmEnum.LAYER;
+ }
+ } else if (CubingJob.AlgorithmEnum.LAYER.name().equalsIgnoreCase(algPref)) {
+ alg = CubingJob.AlgorithmEnum.LAYER;
+ } else {
+ int memoryHungryMeasures = 0;
+ for (MeasureDesc measure : seg.getCubeDesc().getMeasures()) {
+ if (measure.getFunction().getMeasureType().isMemoryHungry()) {
+ logger.info("This cube has memory-hungry measure " + measure.getFunction().getExpression());
+ memoryHungryMeasures++;
+ }
+ }
+
+ if (memoryHungryMeasures > 0) {
+ alg = CubingJob.AlgorithmEnum.LAYER;
+ } else if ("random".equalsIgnoreCase(algPref)) { // for testing
+ alg = new Random().nextBoolean() ? CubingJob.AlgorithmEnum.INMEM : CubingJob.AlgorithmEnum.LAYER;
+ } else { // the default
+ int mapperNumLimit = kylinConf.getCubeAlgorithmAutoMapperLimit();
+ double overlapThreshold = kylinConf.getCubeAlgorithmAutoThreshold();
+ logger.info("mapperNumber for " + seg + " is " + mapperNumber + " and threshold is " + mapperNumLimit);
+ logger.info("mapperOverlapRatio for " + seg + " is " + mapperOverlapRatio + " and threshold is "
+ + overlapThreshold);
+
+ // in-mem cubing is good when
+ // 1) the cluster has enough mapper slots to run in parallel
+ // 2) the mapper overlap ratio is small, meaning the shuffle of in-mem MR has advantage
+ alg = (mapperNumber <= mapperNumLimit && mapperOverlapRatio <= overlapThreshold)//
+ ? CubingJob.AlgorithmEnum.INMEM
+ : CubingJob.AlgorithmEnum.LAYER;
+ }
+
+ }
+ logger.info("The cube algorithm for " + seg + " is " + alg);
+
+ cubingJob.setAlgorithm(alg);
+ }
+
+ // For triggering cube planner phase one
+ public static void optimizeCubingPlan(CubeSegment segment) throws IOException {
+ if (isAbleToOptimizeCubingPlan(segment)) {
+ logger.info("It's able to trigger cuboid planner algorithm.");
+ } else {
+ return;
+ }
+
+ Map<Long, Long> recommendCuboidsWithStats = CuboidRecommenderUtil.getRecommendCuboidList(segment);
+ if (recommendCuboidsWithStats == null || recommendCuboidsWithStats.isEmpty()) {
+ return;
+ }
+
+ CubeInstance cube = segment.getCubeInstance();
+ CubeUpdate update = new CubeUpdate(cube.latestCopyForWrite());
+ update.setCuboids(recommendCuboidsWithStats);
+ CubeManager.getInstance(cube.getConfig()).updateCube(update);
+ }
+
+ public static boolean isAbleToOptimizeCubingPlan(CubeSegment segment) {
+ CubeInstance cube = segment.getCubeInstance();
+ if (!cube.getConfig().isCubePlannerEnabled())
+ return false;
+
+ if (cube.getSegments(SegmentStatusEnum.READY_PENDING).size() > 0) {
+ logger.info("Has read pending segments and will not enable cube planner.");
+ return false;
+ }
+ List<CubeSegment> readySegments = cube.getSegments(SegmentStatusEnum.READY);
+ List<CubeSegment> newSegments = cube.getSegments(SegmentStatusEnum.NEW);
+ if (newSegments.size() <= 1 && //
+ (readySegments.size() == 0 || //
+ (cube.getConfig().isCubePlannerEnabledForExistingCube() && readySegments.size() == 1
+ && readySegments.get(0).getSegRange().equals(segment.getSegRange())))) {
+ return true;
+ } else {
+ return false;
+ }
+ }
+}
diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/lookup/IExtLookupProvider.java b/build-engine/src/main/java/org/apache/kylin/engine/mr/exception/HadoopShellException.java
similarity index 56%
rename from core-dictionary/src/main/java/org/apache/kylin/dict/lookup/IExtLookupProvider.java
rename to build-engine/src/main/java/org/apache/kylin/engine/mr/exception/HadoopShellException.java
index a09a439..23d4a3a 100644
--- a/core-dictionary/src/main/java/org/apache/kylin/dict/lookup/IExtLookupProvider.java
+++ b/build-engine/src/main/java/org/apache/kylin/engine/mr/exception/HadoopShellException.java
@@ -16,21 +16,29 @@
* limitations under the License.
*/
-package org.apache.kylin.dict.lookup;
+package org.apache.kylin.engine.mr.exception;
-import org.apache.kylin.metadata.model.TableDesc;
+/**
+ */
+public class HadoopShellException extends Exception {
-public interface IExtLookupProvider {
- ILookupTable getLookupTable(TableDesc tableDesc, ExtTableSnapshotInfo extTableSnapshot);
+ public HadoopShellException() {
+ }
- /**
- * @return the local cache if the provider has, return null if no local cache exist
- */
- IExtLookupTableCache getLocalCache();
+ public HadoopShellException(String message) {
+ super(message);
+ }
- /**
- * Return an adaptor that implements specified interface as requested by the build engine.
- * The ILookupMaterializer in particular, is required by the MR build engine.
- */
- <I> I adaptToBuildEngine(Class<I> engineInterface);
+ public HadoopShellException(String message, Throwable cause) {
+ super(message, cause);
+ }
+
+ public HadoopShellException(Throwable cause) {
+ super(cause);
+ }
+
+ public HadoopShellException(String message, Throwable cause, boolean enableSuppression,
+ boolean writableStackTrace) {
+ super(message, cause, enableSuppression, writableStackTrace);
+ }
}
diff --git a/server/src/main/java/org/apache/kylin/rest/DebugDeployCLI.java b/build-engine/src/main/java/org/apache/kylin/engine/mr/exception/MapReduceException.java
similarity index 57%
copy from server/src/main/java/org/apache/kylin/rest/DebugDeployCLI.java
copy to build-engine/src/main/java/org/apache/kylin/engine/mr/exception/MapReduceException.java
index 28c1b12..fc047fe 100644
--- a/server/src/main/java/org/apache/kylin/rest/DebugDeployCLI.java
+++ b/build-engine/src/main/java/org/apache/kylin/engine/mr/exception/MapReduceException.java
@@ -6,9 +6,9 @@
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -16,16 +16,28 @@
* limitations under the License.
*/
-package org.apache.kylin.rest;
+package org.apache.kylin.engine.mr.exception;
-import java.io.IOException;
+/**
+ */
+public class MapReduceException extends Exception {
-import org.apache.kylin.storage.hbase.util.DeployCoprocessorCLI;
+ public MapReduceException() {
+ }
+
+ public MapReduceException(String message) {
+ super(message);
+ }
-public class DebugDeployCLI {
+ public MapReduceException(String message, Throwable cause) {
+ super(message, cause);
+ }
+
+ public MapReduceException(Throwable cause) {
+ super(cause);
+ }
- public static void main(String[] args) throws IOException {
- DebugTomcat.setupDebugEnv();
- DeployCoprocessorCLI.main(new String[] { "default", "all" });
+ public MapReduceException(String message, Throwable cause, boolean enableSuppression, boolean writableStackTrace) {
+ super(message, cause, enableSuppression, writableStackTrace);
}
}
diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/lookup/IExtLookupTableCache.java b/build-engine/src/main/java/org/apache/kylin/engine/mr/exception/SegmentNotFoundException.java
similarity index 50%
copy from core-dictionary/src/main/java/org/apache/kylin/dict/lookup/IExtLookupTableCache.java
copy to build-engine/src/main/java/org/apache/kylin/engine/mr/exception/SegmentNotFoundException.java
index f473059..3e8b59e 100644
--- a/core-dictionary/src/main/java/org/apache/kylin/dict/lookup/IExtLookupTableCache.java
+++ b/build-engine/src/main/java/org/apache/kylin/engine/mr/exception/SegmentNotFoundException.java
@@ -16,24 +16,29 @@
* limitations under the License.
*/
-package org.apache.kylin.dict.lookup;
+package org.apache.kylin.engine.mr.exception;
-import org.apache.kylin.metadata.model.TableDesc;
+/**
+ */
+public class SegmentNotFoundException extends Exception {
-public interface IExtLookupTableCache {
- enum CacheState {NONE, IN_BUILDING, AVAILABLE}
+ public SegmentNotFoundException() {
+ }
- /**
- * @param tableDesc
- * @param extTableSnapshotInfo
- * @param buildIfNotExist if true, when the cached lookup table not exist, build it.
- * @return null if no cached lookup table exist
- */
- ILookupTable getCachedLookupTable(TableDesc tableDesc, ExtTableSnapshotInfo extTableSnapshotInfo, boolean buildIfNotExist);
+ public SegmentNotFoundException(String message) {
+ super(message);
+ }
- void buildSnapshotCache(TableDesc tableDesc, ExtTableSnapshotInfo extTableSnapshotInfo, ILookupTable sourceTable);
+ public SegmentNotFoundException(String message, Throwable cause) {
+ super(message, cause);
+ }
- void removeSnapshotCache(ExtTableSnapshotInfo extTableSnapshotInfo);
+ public SegmentNotFoundException(Throwable cause) {
+ super(cause);
+ }
- CacheState getCacheState(ExtTableSnapshotInfo extTableSnapshotInfo);
+ public SegmentNotFoundException(String message, Throwable cause, boolean enableSuppression,
+ boolean writableStackTrace) {
+ super(message, cause, enableSuppression, writableStackTrace);
+ }
}
diff --git a/build-engine/src/main/java/org/apache/kylin/engine/mr/steps/CubingExecutableUtil.java b/build-engine/src/main/java/org/apache/kylin/engine/mr/steps/CubingExecutableUtil.java
new file mode 100644
index 0000000..216f9ff
--- /dev/null
+++ b/build-engine/src/main/java/org/apache/kylin/engine/mr/steps/CubingExecutableUtil.java
@@ -0,0 +1,159 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kylin.engine.mr.steps;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+
+import javax.annotation.Nullable;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.kylin.cube.CubeInstance;
+import org.apache.kylin.cube.CubeManager;
+import org.apache.kylin.cube.CubeSegment;
+import org.apache.kylin.job.execution.ExecutableContext;
+
+import com.google.common.base.Function;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Lists;
+
+public class CubingExecutableUtil {
+
+ public static final String CUBE_NAME = "cubeName";
+ public static final String DISPALY_NAME = "displayName";
+ public static final String SEGMENT_NAME = "segmentName";
+ public static final String SEGMENT_ID = "segmentId";
+ public static final String MERGING_SEGMENT_IDS = "mergingSegmentIds";
+ public static final String STATISTICS_PATH = "statisticsPath";
+ public static final String CUBING_JOB_ID = "cubingJobId";
+ public static final String MERGED_STATISTICS_PATH = "mergedStatisticsPath";
+ public static final String INDEX_PATH = "indexPath";
+ public static final String DICTIONARIES_PATH = "dictsPath";
+
+ public static void setStatisticsPath(String path, Map<String, String> params) {
+ params.put(STATISTICS_PATH, path);
+ }
+
+ public static String getStatisticsPath(Map<String, String> params) {
+ return params.get(STATISTICS_PATH);
+ }
+
+ public static void setCubeName(String cubeName, Map<String, String> params) {
+ params.put(CUBE_NAME, cubeName);
+ }
+
+ public static String getCubeName(Map<String, String> params) {
+ return params.get(CUBE_NAME);
+ }
+
+ public static void setSegmentName(String segmentName, Map<String, String> params) {
+ params.put(SEGMENT_NAME, segmentName);
+ }
+
+ public static String getSegmentName(Map<String, String> params) {
+ return params.get(SEGMENT_NAME);
+ }
+
+ public static void setSegmentId(String segmentId, Map<String, String> params) {
+ params.put(SEGMENT_ID, segmentId);
+ }
+
+ public static String getSegmentId(Map<String, String> params) {
+ return params.get(SEGMENT_ID);
+ }
+
+ public static void setMergingSegmentIds(List<String> ids, Map<String, String> params) {
+ params.put(MERGING_SEGMENT_IDS, StringUtils.join(ids, ","));
+ }
+
+ public static CubeSegment findSegment(ExecutableContext context, String cubeName, String segmentId) {
+ final CubeManager mgr = CubeManager.getInstance(context.getConfig());
+ final CubeInstance cube = mgr.getCube(cubeName);
+
+ if (cube == null) {
+ String cubeList = StringUtils
+ .join(Iterables.transform(mgr.listAllCubes(), new Function<CubeInstance, String>() {
+ @Nullable
+ @Override
+ public String apply(@Nullable CubeInstance input) {
+ return input.getName();
+ }
+ }).iterator(), ",");
+
+ throw new IllegalStateException("target cube name: " + cubeName + " cube list: " + cubeList);
+ }
+
+ final CubeSegment newSegment = cube.getSegmentById(segmentId);
+
+ if (newSegment == null) {
+ String segmentList = StringUtils
+ .join(Iterables.transform(cube.getSegments(), new Function<CubeSegment, String>() {
+ @Nullable
+ @Override
+ public String apply(@Nullable CubeSegment input) {
+ return input.getUuid();
+ }
+ }).iterator(), ",");
+
+ throw new IllegalStateException("target segment id: " + segmentId + " segment list: " + segmentList);
+ }
+ return newSegment;
+ }
+
+ public static List<String> getMergingSegmentIds(Map<String, String> params) {
+ final String ids = params.get(MERGING_SEGMENT_IDS);
+ if (ids != null) {
+ final String[] splitted = StringUtils.split(ids, ",");
+ ArrayList<String> result = Lists.newArrayListWithExpectedSize(splitted.length);
+ for (String id : splitted) {
+ result.add(id);
+ }
+ return result;
+ } else {
+ return Collections.emptyList();
+ }
+ }
+
+ public static void setCubingJobId(String id, Map<String, String> params) {
+ params.put(CUBING_JOB_ID, id);
+ }
+
+ public static String getCubingJobId(Map<String, String> params) {
+ return params.get(CUBING_JOB_ID);
+ }
+
+ public static void setMergedStatisticsPath(String path, Map<String, String> params) {
+ params.put(MERGED_STATISTICS_PATH, path);
+ }
+
+ public static String getMergedStatisticsPath(Map<String, String> params) {
+ return params.get(MERGED_STATISTICS_PATH);
+ }
+
+ public static void setDictsPath(String path, Map<String, String> params) {
+ params.put(DICTIONARIES_PATH, path);
+ }
+
+ public static String getDictsPath(Map<String, String> params) {
+ return params.get(DICTIONARIES_PATH);
+ }
+
+}
diff --git a/build-engine/src/main/java/org/apache/kylin/engine/mr/steps/UpdateCubeInfoAfterCheckpointStep.java b/build-engine/src/main/java/org/apache/kylin/engine/mr/steps/UpdateCubeInfoAfterCheckpointStep.java
new file mode 100644
index 0000000..80811be
--- /dev/null
+++ b/build-engine/src/main/java/org/apache/kylin/engine/mr/steps/UpdateCubeInfoAfterCheckpointStep.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.engine.mr.steps;
+
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.kylin.cube.CubeInstance;
+import org.apache.kylin.cube.CubeManager;
+import org.apache.kylin.cube.CubeSegment;
+import org.apache.kylin.engine.mr.common.CuboidStatsReaderUtil;
+import org.apache.kylin.job.exception.ExecuteException;
+import org.apache.kylin.job.execution.AbstractExecutable;
+import org.apache.kylin.job.execution.ExecutableContext;
+import org.apache.kylin.job.execution.ExecuteResult;
+import org.apache.kylin.metadata.model.SegmentStatusEnum;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ */
+public class UpdateCubeInfoAfterCheckpointStep extends AbstractExecutable {
+
+ private static final Logger logger = LoggerFactory.getLogger(UpdateCubeInfoAfterCheckpointStep.class);
+
+ public UpdateCubeInfoAfterCheckpointStep() {
+ super();
+ }
+
+ @Override
+ protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
+ final CubeManager cubeManager = CubeManager.getInstance(context.getConfig());
+ final CubeInstance cube = cubeManager.getCube(CubingExecutableUtil.getCubeName(this.getParams()));
+
+ Set<Long> recommendCuboids = cube.getCuboidsRecommend();
+ try {
+ List<CubeSegment> newSegments = cube.getSegments(SegmentStatusEnum.READY_PENDING);
+ Map<Long, Long> recommendCuboidsWithStats = CuboidStatsReaderUtil
+ .readCuboidStatsFromSegments(recommendCuboids, newSegments);
+ if (recommendCuboidsWithStats == null) {
+ throw new RuntimeException("Fail to get statistics info for recommended cuboids after optimization!!!");
+ }
+ cubeManager.promoteCheckpointOptimizeSegments(cube, recommendCuboidsWithStats,
+ newSegments.toArray(new CubeSegment[newSegments.size()]));
+ return new ExecuteResult();
+ } catch (Exception e) {
+ logger.error("fail to update cube after build", e);
+ return ExecuteResult.createError(e);
+ }
+ }
+
+}
diff --git a/build-engine/src/test/resources/data/8d_cuboid/part-r-00000 b/build-engine/src/test/resources/data/8d_cuboid/part-r-00000
new file mode 100755
index 0000000..c412e3a
Binary files /dev/null and b/build-engine/src/test/resources/data/8d_cuboid/part-r-00000 differ
diff --git a/build-engine/src/test/resources/data/base_cuboid/part-r-00000 b/build-engine/src/test/resources/data/base_cuboid/part-r-00000
new file mode 100755
index 0000000..9ade717
Binary files /dev/null and b/build-engine/src/test/resources/data/base_cuboid/part-r-00000 differ
diff --git a/build-engine/src/test/resources/dict/DW_SITES b/build-engine/src/test/resources/dict/DW_SITES
new file mode 100644
index 0000000..6f6af46
Binary files /dev/null and b/build-engine/src/test/resources/dict/DW_SITES differ
diff --git a/build-engine/src/test/resources/multi_file_double/data_0 b/build-engine/src/test/resources/multi_file_double/data_0
new file mode 100644
index 0000000..e3da559
--- /dev/null
+++ b/build-engine/src/test/resources/multi_file_double/data_0
@@ -0,0 +1,2006 @@
+0.0
+2.6
+3.12
+5.199999999999999
+7.279999999999998
+7.799999999999997
+10.399999999999995
+11.439999999999994
+11.959999999999994
+16.63999999999999
+17.15999999999999
+20.279999999999987
+22.879999999999985
+24.439999999999984
+25.479999999999983
+25.999999999999982
+31.719999999999978
+32.23999999999998
+38.48000000000002
+39.00000000000002
+40.56000000000003
+41.080000000000034
+41.60000000000004
+42.64000000000004
+46.80000000000007
+48.88000000000008
+50.44000000000009
+50.96000000000009
+51.480000000000096
+53.040000000000106
+54.600000000000115
+56.160000000000124
+59.800000000000146
+61.88000000000016
+65.00000000000017
+68.12000000000015
+68.64000000000014
+73.8400000000001
+75.40000000000009
+76.44000000000008
+80.60000000000005
+81.12000000000005
+82.16000000000004
+86.32000000000001
+90.99999999999997
+94.63999999999994
+96.71999999999993
+97.23999999999992
+99.8399999999999
+102.95999999999988
+106.07999999999986
+115.43999999999978
+117.51999999999977
+119.59999999999975
+120.11999999999975
+123.23999999999972
+126.8799999999997
+128.9599999999997
+129.4799999999997
+129.99999999999972
+131.03999999999974
+134.1599999999998
+135.71999999999983
+144.56
+145.60000000000002
+150.28000000000011
+151.84000000000015
+152.88000000000017
+153.9200000000002
+157.56000000000026
+159.6400000000003
+161.20000000000033
+165.3600000000004
+166.92000000000044
+172.64000000000055
+173.68000000000058
+175.2400000000006
+178.36000000000067
+179.9200000000007
+180.4400000000007
+181.48000000000073
+183.56000000000077
+184.08000000000078
+185.1200000000008
+186.16000000000082
+190.8400000000009
+196.04000000000102
+197.60000000000105
+198.64000000000107
+201.24000000000112
+203.84000000000117
+213.20000000000135
+214.24000000000137
+215.2800000000014
+219.9600000000015
+222.04000000000153
+223.08000000000155
+224.64000000000158
+225.1600000000016
+228.80000000000166
+229.84000000000168
+230.8800000000017
+239.20000000000186
+241.2800000000019
+243.88000000000196
+244.40000000000197
+247.00000000000202
+250.12000000000208
+251.1600000000021
+254.80000000000217
+255.32000000000218
+256.3600000000022
+260.00000000000205
+266.2400000000018
+269.3600000000017
+269.8800000000017
+271.44000000000165
+275.6000000000015
+277.16000000000145
+277.6800000000014
+280.28000000000134
+281.3200000000013
+286.00000000000114
+286.5200000000011
+293.80000000000086
+294.8400000000008
+296.4000000000008
+300.04000000000065
+301.6000000000006
+303.6800000000005
+304.2000000000005
+304.7200000000005
+309.9200000000003
+310.96000000000026
+312.5200000000002
+315.6400000000001
+317.20000000000005
+323.9599999999998
+326.03999999999974
+326.5599999999997
+338.5199999999993
+339.0399999999993
+341.1199999999992
+341.6399999999992
+346.839999999999
+348.39999999999895
+349.9599999999989
+350.4799999999989
+355.1599999999987
+357.23999999999864
+358.2799999999986
+361.9199999999985
+366.5999999999983
+369.7199999999982
+376.99999999999795
+378.5599999999979
+385.31999999999766
+386.3599999999976
+393.1199999999974
+393.63999999999737
+394.15999999999735
+396.75999999999726
+397.7999999999972
+401.9599999999971
+402.99999999999704
+403.519999999997
+406.6399999999969
+409.2399999999968
+410.2799999999968
+412.3599999999967
+413.91999999999666
+415.9999999999966
+417.55999999999653
+420.6799999999964
+422.23999999999637
+422.75999999999635
+426.3999999999962
+427.4399999999962
+428.99999999999613
+429.5199999999961
+434.19999999999595
+434.71999999999593
+435.2399999999959
+437.31999999999584
+443.03999999999564
+443.5599999999956
+444.0799999999956
+446.15999999999553
+447.7199999999955
+451.35999999999535
+453.95999999999526
+454.47999999999524
+454.9999999999952
+461.239999999995
+462.79999999999495
+463.31999999999493
+463.8399999999949
+464.3599999999949
+471.11999999999466
+471.63999999999464
+476.3199999999945
+478.9199999999944
+480.47999999999433
+480.9999999999943
+483.07999999999424
+484.6399999999942
+485.15999999999417
+487.2399999999941
+488.27999999999406
+495.0399999999938
+497.11999999999375
+500.23999999999364
+500.7599999999936
+501.2799999999936
+506.4799999999934
+506.9999999999934
+508.03999999999337
+508.55999999999335
+509.07999999999333
+510.1199999999933
+511.15999999999326
+516.3599999999931
+516.8799999999931
+517.919999999993
+519.479999999993
+522.0799999999929
+522.5999999999929
+530.9199999999926
+532.4799999999925
+537.6799999999923
+541.3199999999922
+543.9199999999921
+545.4799999999921
+545.999999999992
+549.1199999999919
+553.2799999999918
+553.7999999999918
+554.8399999999917
+561.0799999999915
+561.5999999999915
+565.2399999999914
+568.8799999999912
+571.4799999999912
+579.2799999999909
+579.7999999999909
+580.3199999999908
+587.0799999999906
+592.2799999999904
+593.3199999999904
+594.3599999999904
+595.3999999999903
+603.71999999999
+606.3199999999899
+607.3599999999899
+608.3999999999899
+608.9199999999898
+609.9599999999898
+615.6799999999896
+617.7599999999895
+618.2799999999895
+621.9199999999894
+625.5599999999893
+626.0799999999892
+630.2399999999891
+635.4399999999889
+635.9599999999889
+636.4799999999889
+638.5599999999888
+640.1199999999887
+641.6799999999887
+643.2399999999886
+648.9599999999884
+651.0399999999884
+657.2799999999881
+659.3599999999881
+666.1199999999878
+674.4399999999875
+683.2799999999872
+684.3199999999872
+684.8399999999872
+686.3999999999871
+687.4399999999871
+688.999999999987
+691.079999999987
+696.7999999999868
+697.8399999999867
+699.3999999999867
+700.4399999999866
+703.5599999999865
+705.6399999999865
+707.1999999999864
+711.8799999999862
+715.5199999999861
+717.0799999999861
+719.159999999986
+721.2399999999859
+723.8399999999858
+728.5199999999857
+730.0799999999856
+732.6799999999855
+733.7199999999855
+734.7599999999854
+738.9199999999853
+741.5199999999852
+748.279999999985
+750.8799999999849
+751.3999999999849
+755.5599999999847
+759.7199999999846
+762.3199999999845
+762.8399999999845
+764.9199999999844
+769.5999999999842
+776.879999999984
+777.9199999999839
+779.9999999999839
+781.0399999999838
+783.6399999999837
+787.7999999999836
+790.3999999999835
+792.4799999999834
+793.5199999999834
+794.5599999999833
+795.0799999999833
+797.1599999999833
+803.399999999983
+809.6399999999828
+810.6799999999828
+816.3999999999826
+818.9999999999825
+819.5199999999825
+822.6399999999824
+823.6799999999823
+825.7599999999823
+827.8399999999822
+831.479999999982
+834.079999999982
+834.599999999982
+835.6399999999819
+838.2399999999818
+845.5199999999816
+846.0399999999815
+847.5999999999815
+848.1199999999815
+852.2799999999813
+853.8399999999813
+856.4399999999812
+857.9999999999811
+858.5199999999811
+862.159999999981
+864.7599999999809
+867.8799999999808
+868.3999999999808
+871.5199999999807
+872.0399999999806
+873.0799999999806
+876.7199999999805
+879.3199999999804
+887.6399999999801
+894.3999999999799
+895.4399999999798
+896.4799999999798
+896.9999999999798
+897.5199999999797
+903.7599999999795
+915.1999999999791
+916.7599999999791
+918.319999999979
+922.9999999999789
+925.5999999999788
+927.6799999999787
+940.1599999999783
+947.959999999978
+949.5199999999779
+951.5999999999779
+952.1199999999778
+952.6399999999778
+955.7599999999777
+960.9599999999775
+961.4799999999775
+970.8399999999772
+973.4399999999771
+979.1599999999769
+979.6799999999769
+980.1999999999769
+982.7999999999768
+983.8399999999767
+985.3999999999767
+986.9599999999766
+989.0399999999765
+990.0799999999765
+991.1199999999765
+991.6399999999765
+992.1599999999764
+992.6799999999764
+994.2399999999764
+997.8799999999762
+998.3999999999762
+1010.3599999999758
+1010.8799999999758
+1014.5199999999757
+1016.0799999999756
+1020.7599999999754
+1024.3999999999753
+1025.9599999999753
+1029.0799999999751
+1031.679999999975
+1033.759999999975
+1035.319999999975
+1037.9199999999748
+1039.4799999999748
+1039.9999999999748
+1044.6799999999746
+1048.3199999999745
+1050.9199999999744
+1055.5999999999742
+1058.719999999974
+1060.279999999974
+1065.9999999999739
+1068.0799999999738
+1069.6399999999737
+1072.7599999999736
+1073.2799999999736
+1078.9999999999734
+1083.1599999999733
+1084.1999999999732
+1085.7599999999732
+1093.039999999973
+1095.1199999999728
+1095.6399999999728
+1097.1999999999728
+1100.3199999999727
+1100.8399999999726
+1101.3599999999726
+1106.5599999999724
+1108.1199999999724
+1110.1999999999723
+1113.3199999999722
+1114.8799999999721
+1116.439999999972
+1120.599999999972
+1125.2799999999718
+1127.8799999999717
+1131.5199999999716
+1134.1199999999715
+1136.1999999999714
+1136.7199999999714
+1142.4399999999712
+1142.9599999999712
+1151.7999999999709
+1152.3199999999708
+1155.4399999999707
+1158.0399999999706
+1160.6399999999705
+1161.6799999999705
+1162.1999999999705
+1163.7599999999704
+1166.3599999999703
+1169.9999999999702
+1172.5999999999701
+1173.11999999997
+1177.27999999997
+1178.31999999997
+1183.5199999999697
+1185.0799999999697
+1195.4799999999693
+1196.5199999999693
+1199.6399999999692
+1201.1999999999691
+1203.279999999969
+1205.879999999969
+1206.399999999969
+1208.4799999999689
+1208.9999999999688
+1212.1199999999687
+1214.1999999999687
+1217.3199999999686
+1217.8399999999685
+1221.9999999999684
+1228.2399999999682
+1229.7999999999681
+1230.839999999968
+1231.879999999968
+1234.999999999968
+1238.1199999999678
+1240.1999999999678
+1246.4399999999675
+1247.9999999999675
+1249.0399999999674
+1251.6399999999674
+1253.7199999999673
+1257.8799999999671
+1259.439999999967
+1260.999999999967
+1264.119999999967
+1265.6799999999669
+1266.1999999999668
+1267.7599999999668
+1272.4399999999666
+1277.1199999999665
+1278.1599999999664
+1280.2399999999664
+1283.3599999999662
+1284.3999999999662
+1289.079999999966
+1290.119999999966
+1292.199999999966
+1293.7599999999659
+1294.2799999999659
+1296.3599999999658
+1298.9599999999657
+1300.5199999999656
+1308.3199999999654
+1315.0799999999651
+1315.5999999999651
+1319.759999999965
+1321.839999999965
+1325.9999999999648
+1329.1199999999646
+1330.1599999999646
+1330.6799999999646
+1332.7599999999645
+1333.2799999999645
+1333.7999999999645
+1334.3199999999645
+1334.8399999999644
+1337.9599999999643
+1339.5199999999643
+1341.0799999999642
+1341.5999999999642
+1344.719999999964
+1346.799999999964
+1356.1599999999637
+1358.2399999999636
+1358.7599999999636
+1372.7999999999631
+1374.879999999963
+1377.479999999963
+1378.519999999963
+1379.039999999963
+1380.5999999999628
+1381.1199999999628
+1384.2399999999627
+1385.2799999999627
+1388.3999999999626
+1389.9599999999625
+1394.6399999999624
+1402.959999999962
+1405.039999999962
+1406.599999999962
+1410.2399999999618
+1411.7999999999618
+1412.3199999999617
+1413.3599999999617
+1414.3999999999617
+1415.9599999999616
+1419.0799999999615
+1420.6399999999614
+1421.1599999999614
+1429.4799999999611
+1437.2799999999609
+1441.9599999999607
+1445.5999999999606
+1446.6399999999605
+1451.8399999999604
+1453.9199999999603
+1458.0799999999601
+1458.5999999999601
+1459.63999999996
+1461.71999999996
+1463.79999999996
+1468.9999999999598
+1469.5199999999597
+1471.5999999999597
+1472.1199999999596
+1474.7199999999596
+1478.8799999999594
+1479.9199999999594
+1480.4399999999594
+1483.5599999999592
+1484.0799999999592
+1489.279999999959
+1489.799999999959
+1492.919999999959
+1493.439999999959
+1494.9999999999588
+1496.0399999999588
+1496.5599999999588
+1497.5999999999588
+1500.7199999999586
+1504.8799999999585
+1505.3999999999585
+1506.4399999999584
+1512.6799999999582
+1513.7199999999582
+1514.2399999999582
+1516.319999999958
+1516.839999999958
+1517.359999999958
+1518.919999999958
+1520.479999999958
+1522.039999999958
+1524.6399999999578
+1525.6799999999578
+1526.1999999999578
+1529.3199999999576
+1536.0799999999574
+1537.1199999999574
+1541.2799999999572
+1544.3999999999571
+1549.079999999957
+1551.6799999999569
+1554.7999999999568
+1558.9599999999566
+1559.9999999999566
+1563.1199999999565
+1568.3199999999563
+1570.9199999999562
+1574.559999999956
+1577.159999999956
+1578.719999999956
+1586.5199999999556
+1590.1599999999555
+1592.7599999999554
+1594.8399999999554
+1596.3999999999553
+1597.4399999999553
+1597.9599999999552
+1598.4799999999552
+1598.9999999999552
+1602.119999999955
+1602.639999999955
+1603.159999999955
+1606.279999999955
+1610.4399999999548
+1613.0399999999547
+1614.0799999999547
+1615.1199999999546
+1615.6399999999546
+1618.2399999999545
+1619.2799999999545
+1621.8799999999544
+1624.9999999999543
+1625.5199999999543
+1626.0399999999543
+1629.6799999999541
+1633.319999999954
+1635.399999999954
+1637.4799999999539
+1639.5599999999538
+1645.7999999999536
+1646.3199999999536
+1647.8799999999535
+1650.4799999999534
+1651.5199999999534
+1652.5599999999533
+1656.1999999999532
+1657.7599999999532
+1660.359999999953
+1669.1999999999528
+1669.7199999999527
+1672.3199999999526
+1673.3599999999526
+1674.9199999999526
+1679.5999999999524
+1685.3199999999522
+1687.3999999999521
+1687.919999999952
+1689.999999999952
+1694.1599999999519
+1697.7999999999518
+1699.8799999999517
+1701.4399999999516
+1707.1599999999514
+1710.2799999999513
+1718.079999999951
+1719.119999999951
+1720.159999999951
+1721.719999999951
+1722.7599999999509
+1726.9199999999507
+1727.9599999999507
+1731.0799999999506
+1732.1199999999506
+1732.6399999999505
+1734.1999999999505
+1736.2799999999504
+1736.7999999999504
+1741.4799999999502
+1744.0799999999501
+1745.11999999995
+1745.63999999995
+1750.83999999995
+1753.4399999999498
+1754.4799999999498
+1758.1199999999496
+1764.3599999999494
+1770.5999999999492
+1773.1999999999491
+1775.279999999949
+1776.319999999949
+1781.5199999999488
+1782.0399999999488
+1788.7999999999486
+1790.3599999999485
+1793.4799999999484
+1794.5199999999484
+1795.0399999999483
+1796.5999999999483
+1798.1599999999482
+1798.6799999999482
+1800.2399999999482
+1800.7599999999481
+1801.799999999948
+1802.319999999948
+1808.039999999948
+1813.7599999999477
+1814.2799999999477
+1815.8399999999476
+1816.3599999999476
+1825.1999999999473
+1828.3199999999472
+1831.439999999947
+1835.079999999947
+1836.119999999947
+1836.639999999947
+1838.7199999999468
+1839.2399999999468
+1840.2799999999468
+1844.4399999999466
+1847.0399999999465
+1849.6399999999464
+1850.1599999999464
+1850.6799999999464
+1852.2399999999463
+1853.2799999999463
+1855.3599999999462
+1862.119999999946
+1866.7999999999458
+1878.7599999999454
+1880.8399999999453
+1883.4399999999453
+1884.4799999999452
+1884.9999999999452
+1887.599999999945
+1894.3599999999449
+1899.0399999999447
+1901.1199999999446
+1903.7199999999445
+1904.2399999999445
+1904.7599999999445
+1906.3199999999445
+1906.8399999999444
+1907.3599999999444
+1910.4799999999443
+1912.5599999999442
+1919.319999999944
+1920.879999999944
+1921.399999999944
+1923.4799999999439
+1925.0399999999438
+1927.1199999999437
+1934.3999999999435
+1935.4399999999434
+1936.4799999999434
+1936.9999999999434
+1940.1199999999433
+1942.7199999999432
+1944.2799999999431
+1948.959999999943
+1949.999999999943
+1950.519999999943
+1954.1599999999428
+1957.2799999999427
+1958.3199999999426
+1963.5199999999425
+1971.3199999999422
+1975.999999999942
+1976.519999999942
+1979.639999999942
+1985.8799999999417
+1986.3999999999417
+1988.4799999999416
+1989.5199999999415
+1993.6799999999414
+1995.7599999999413
+1997.8399999999413
+1998.8799999999412
+2005.119999999941
+2006.159999999941
+2014.4799999999407
+2014.9999999999407
+2017.0799999999406
+2017.5999999999406
+2018.6399999999405
+2023.8399999999403
+2027.4799999999402
+2030.0799999999401
+2032.67999999994
+2035.27999999994
+2036.31999999994
+2038.3999999999398
+2040.9999999999397
+2043.5999999999397
+2044.1199999999396
+2044.6399999999396
+2050.8799999999396
+2051.3999999999396
+2053.9999999999395
+2054.5199999999395
+2055.0399999999395
+2059.7199999999393
+2065.439999999939
+2067.519999999939
+2069.599999999939
+2070.119999999939
+2070.639999999939
+2072.199999999939
+2074.799999999939
+2078.4399999999387
+2082.0799999999385
+2085.7199999999384
+2087.2799999999384
+2090.3999999999382
+2091.959999999938
+2093.519999999938
+2096.119999999938
+2097.159999999938
+2097.679999999938
+2105.9999999999377
+2111.7199999999375
+2113.2799999999374
+2114.3199999999374
+2118.4799999999373
+2122.639999999937
+2123.679999999937
+2125.239999999937
+2131.479999999937
+2135.1199999999367
+2138.2399999999366
+2146.0399999999363
+2149.159999999936
+2150.199999999936
+2152.799999999936
+2154.359999999936
+2155.919999999936
+2159.039999999936
+2162.1599999999357
+2172.0399999999354
+2172.5599999999354
+2175.1599999999353
+2178.799999999935
+2179.319999999935
+2180.359999999935
+2189.199999999935
+2192.3199999999347
+2205.839999999934
+2209.479999999934
+2209.999999999934
+2212.079999999934
+2214.679999999934
+2217.799999999934
+2218.8399999999338
+2221.9599999999336
+2222.9999999999336
+2225.0799999999335
+2230.2799999999334
+2230.7999999999333
+2234.959999999933
+2237.559999999933
+2238.599999999933
+2243.799999999933
+2244.319999999933
+2247.4399999999328
+2248.9999999999327
+2250.0399999999327
+2251.0799999999326
+2256.7999999999324
+2258.3599999999324
+2259.3999999999323
+2259.9199999999323
+2263.559999999932
+2264.599999999932
+2267.199999999932
+2269.279999999932
+2271.879999999932
+2273.959999999932
+2276.5599999999317
+2277.0799999999317
+2278.1199999999317
+2283.8399999999315
+2284.3599999999315
+2288.5199999999313
+2289.0399999999313
+2290.5999999999312
+2295.279999999931
+2298.919999999931
+2299.439999999931
+2299.959999999931
+2304.6399999999308
+2310.3599999999306
+2313.9999999999304
+2317.1199999999303
+2319.71999999993
+2322.83999999993
+2323.87999999993
+2324.91999999993
+2328.03999999993
+2329.59999999993
+2330.63999999993
+2331.67999999993
+2332.19999999993
+2333.2399999999298
+2334.2799999999297
+2334.7999999999297
+2337.3999999999296
+2338.4399999999296
+2340.5199999999295
+2345.7199999999293
+2346.7599999999293
+2348.319999999929
+2349.879999999929
+2350.919999999929
+2355.079999999929
+2355.599999999929
+2357.159999999929
+2357.679999999929
+2360.799999999929
+2364.9599999999286
+2370.1599999999285
+2371.7199999999284
+2374.3199999999283
+2377.439999999928
+2377.959999999928
+2378.479999999928
+2379.519999999928
+2380.039999999928
+2381.079999999928
+2381.599999999928
+2383.159999999928
+2384.719999999928
+2385.759999999928
+2387.839999999928
+2394.5999999999276
+2395.6399999999276
+2396.6799999999275
+2397.1999999999275
+2405.519999999927
+2407.599999999927
+2409.159999999927
+2411.239999999927
+2416.439999999927
+2417.999999999927
+2422.6799999999266
+2427.3599999999265
+2428.9199999999264
+2432.0399999999263
+2434.119999999926
+2439.319999999926
+2445.559999999926
+2447.6399999999257
+2449.1999999999257
+2450.2399999999257
+2451.2799999999256
+2454.3999999999255
+2455.9599999999255
+2457.5199999999254
+2461.1599999999253
+2462.719999999925
+2464.799999999925
+2467.399999999925
+2473.639999999925
+2474.679999999925
+2477.2799999999247
+2477.7999999999247
+2485.0799999999244
+2489.7599999999243
+2491.319999999924
+2493.919999999924
+2495.479999999924
+2496.519999999924
+2498.079999999924
+2498.599999999924
+2502.239999999924
+2503.799999999924
+2507.4399999999237
+2510.0399999999236
+2513.6799999999234
+2515.7599999999234
+2516.7999999999233
+2520.439999999923
+2521.479999999923
+2529.279999999923
+2538.1199999999226
+2539.1599999999225
+2539.6799999999225
+2541.7599999999225
+2542.7999999999224
+2545.3999999999223
+2551.639999999922
+2552.679999999922
+2560.479999999922
+2562.0399999999217
+2562.5599999999217
+2565.1599999999216
+2569.8399999999215
+2570.3599999999215
+2571.9199999999214
+2573.9999999999213
+2574.5199999999213
+2577.639999999921
+2585.439999999921
+2590.6399999999207
+2594.2799999999206
+2595.3199999999206
+2597.9199999999205
+2598.4399999999205
+2605.71999999992
+2606.75999999992
+2609.35999999992
+2609.87999999992
+2610.39999999992
+2610.91999999992
+2611.95999999992
+2612.99999999992
+2617.67999999992
+2619.2399999999197
+2624.4399999999196
+2624.9599999999195
+2631.1999999999193
+2632.7599999999193
+2634.319999999919
+2638.479999999919
+2638.999999999919
+2643.679999999919
+2646.279999999919
+2648.8799999999187
+2650.9599999999186
+2655.6399999999185
+2687.8799999999173
+2689.9599999999173
+2698.799999999917
+2700.879999999917
+2705.0399999999167
+2707.1199999999167
+2709.7199999999166
+2710.7599999999165
+2712.8399999999165
+2719.5999999999162
+2720.119999999916
+2725.319999999916
+2726.359999999916
+2728.959999999916
+2731.039999999916
+2731.559999999916
+2738.3199999999156
+2741.9599999999155
+2742.4799999999154
+2745.0799999999153
+2759.119999999915
+2770.0399999999145
+2772.6399999999144
+2783.039999999914
+2783.559999999914
+2787.199999999914
+2790.8399999999137
+2791.8799999999137
+2796.5599999999135
+2800.1999999999134
+2801.2399999999134
+2802.2799999999133
+2805.3999999999132
+2806.959999999913
+2808.519999999913
+2810.599999999913
+2811.119999999913
+2813.199999999913
+2813.719999999913
+2815.799999999913
+2816.839999999913
+2819.4399999999127
+2820.4799999999127
+2821.5199999999127
+2824.1199999999126
+2826.7199999999125
+2827.7599999999125
+2829.3199999999124
+2831.9199999999123
+2834.519999999912
+2839.719999999912
+2841.279999999912
+2841.799999999912
+2844.399999999912
+2848.0399999999117
+2849.5999999999117
+2851.1599999999116
+2853.7599999999115
+2856.8799999999114
+2861.0399999999113
+2861.5599999999113
+2862.0799999999113
+2866.239999999911
+2866.759999999911
+2867.799999999911
+2874.039999999911
+2874.559999999911
+2875.599999999911
+2882.3599999999105
+2885.4799999999104
+2888.5999999999103
+2896.91999999991
+2900.03999999991
+2908.3599999999096
+2910.4399999999096
+2916.6799999999093
+2917.1999999999093
+2918.7599999999093
+2920.839999999909
+2927.079999999909
+2929.159999999909
+2931.239999999909
+2933.3199999999088
+2933.8399999999087
+2935.9199999999087
+2940.5999999999085
+2942.1599999999085
+2946.3199999999083
+2949.439999999908
+2949.959999999908
+2953.599999999908
+2954.639999999908
+2960.879999999908
+2961.9199999999078
+2966.0799999999076
+2966.5999999999076
+2967.1199999999076
+2967.6399999999076
+2969.1999999999075
+2981.159999999907
+2983.759999999907
+2984.799999999907
+2987.399999999907
+2988.959999999907
+2989.999999999907
+2992.0799999999067
+2994.1599999999066
+2997.2799999999065
+3000.3999999999064
+3000.9199999999064
+3001.9599999999064
+3002.9999999999063
+3005.5999999999062
+3009.759999999906
+3014.959999999906
+3016.519999999906
+3020.1599999999057
+3020.6799999999057
+3021.1999999999057
+3022.2399999999056
+3024.8399999999056
+3025.8799999999055
+3027.9599999999054
+3028.4799999999054
+3030.0399999999054
+3032.1199999999053
+3034.719999999905
+3035.759999999905
+3038.359999999905
+3041.999999999905
+3043.039999999905
+3048.2399999999047
+3050.3199999999047
+3050.8399999999046
+3052.9199999999046
+3053.9599999999045
+3054.9999999999045
+3056.0399999999045
+3057.5999999999044
+3059.1599999999044
+3063.839999999904
+3065.919999999904
+3070.599999999904
+3071.119999999904
+3082.5599999999035
+3083.5999999999035
+3085.6799999999034
+3088.2799999999033
+3090.8799999999032
+3091.919999999903
+3092.439999999903
+3092.959999999903
+3093.479999999903
+3097.639999999903
+3103.359999999903
+3103.879999999903
+3104.399999999903
+3111.6799999999025
+3115.3199999999024
+3115.8399999999024
+3116.3599999999024
+3116.8799999999023
+3121.039999999902
+3124.159999999902
+3125.199999999902
+3127.279999999902
+3129.359999999902
+3129.879999999902
+3130.919999999902
+3131.959999999902
+3135.5999999999017
+3138.7199999999016
+3139.2399999999016
+3139.7599999999015
+3140.2799999999015
+3140.7999999999015
+3141.3199999999015
+3142.3599999999014
+3146.5199999999013
+3148.0799999999012
+3149.119999999901
+3151.199999999901
+3151.719999999901
+3153.279999999901
+3154.319999999901
+3161.079999999901
+3162.1199999999008
+3163.1599999999007
+3174.5999999999003
+3175.1199999999003
+3175.6399999999003
+3180.8399999999
+3183.4399999999
+3184.4799999999
+3188.1199999999
+3190.7199999998998
+3192.7999999998997
+3197.4799999998995
+3199.0399999998995
+3204.7599999998993
+3205.7999999998992
+3206.839999999899
+3208.399999999899
+3211.519999999899
+3212.039999999899
+3217.759999999899
+3219.8399999998987
+3222.4399999998986
+3223.4799999998986
+3227.1199999998985
+3228.1599999998984
+3229.7199999998984
+3231.2799999998983
+3232.8399999998983
+3235.439999999898
+3235.959999999898
+3241.679999999898
+3242.199999999898
+3242.719999999898
+3245.839999999898
+3246.359999999898
+3247.399999999898
+3248.9599999998977
+3249.4799999998977
+3251.0399999998976
+3254.6799999998975
+3256.2399999998975
+3258.3199999998974
+3262.4799999998972
+3265.079999999897
+3265.599999999897
+3268.199999999897
+3269.759999999897
+3273.399999999897
+3283.2799999998965
+3287.9599999998964
+3291.599999999896
+3296.799999999896
+3298.879999999896
+3300.959999999896
+3302.519999999896
+3304.079999999896
+3304.5999999998958
+3305.1199999998958
+3305.6399999998957
+3306.1599999998957
+3306.6799999998957
+3309.2799999998956
+3311.3599999998955
+3313.4399999998955
+3320.199999999895
+3321.759999999895
+3326.439999999895
+3327.479999999895
+3328.519999999895
+3333.7199999998948
+3334.2399999998947
+3336.3199999998947
+3344.1199999998944
+3349.839999999894
+3350.359999999894
+3353.479999999894
+3356.079999999894
+3356.599999999894
+3361.7999999998938
+3364.9199999998937
+3371.1599999998934
+3371.6799999998934
+3373.2399999998934
+3377.399999999893
+3377.919999999893
+3379.479999999893
+3380.519999999893
+3381.039999999893
+3385.719999999893
+3391.9599999998927
+3398.1999999998925
+3399.7599999998924
+3401.8399999998924
+3402.3599999998924
+3405.999999999892
+3408.079999999892
+3409.119999999892
+3411.199999999892
+3412.759999999892
+3413.279999999892
+3414.319999999892
+3414.839999999892
+3416.399999999892
+3418.479999999892
+3418.9999999998918
+3423.1599999998916
+3433.0399999998913
+3433.5599999998913
+3437.719999999891
+3443.439999999891
+3443.959999999891
+3444.479999999891
+3446.559999999891
+3447.5999999998908
+3449.1599999998907
+3450.7199999998907
+3452.2799999998906
+3452.7999999998906
+3458.5199999998904
+3460.5999999998903
+3461.1199999998903
+3462.6799999998902
+3466.31999999989
+3467.87999999989
+3472.55999999989
+3476.7199999998898
+3477.7599999998897
+3485.5599999998894
+3486.0799999998894
+3486.5999999998894
+3487.6399999998894
+3488.1599999998894
+3494.919999999889
+3499.599999999889
+3500.119999999889
+3501.159999999889
+3502.719999999889
+3503.239999999889
+3503.759999999889
+3504.279999999889
+3505.8399999998887
+3508.4399999998886
+3508.9599999998886
+3511.0399999998886
+3516.2399999998884
+3518.8399999998883
+3520.919999999888
+3524.039999999888
+3527.159999999888
+3529.759999999888
+3534.9599999998877
+3535.4799999998877
+3535.9999999998877
+3540.1599999998875
+3542.2399999998875
+3548.999999999887
+3551.079999999887
+3556.799999999887
+3560.959999999887
+3563.5599999998867
+3564.0799999998867
+3566.1599999998866
+3568.2399999998865
+3568.7599999998865
+3571.8799999998864
+3576.0399999998863
+3579.159999999886
+3580.719999999886
+3581.239999999886
+3582.279999999886
+3593.7199999998857
+3597.8799999998855
+3599.9599999998854
+3603.5999999998853
+3604.6399999998853
+3609.319999999885
+3609.839999999885
+3610.879999999885
+3615.559999999885
+3616.079999999885
+3623.8799999998846
+3624.3999999998846
+3625.4399999998845
+3626.9999999998845
+3629.0799999998844
+3631.6799999998843
+3632.1999999998843
+3635.319999999884
+3635.839999999884
+3638.959999999884
+3643.119999999884
+3645.199999999884
+3649.8799999998837
+3650.3999999998837
+3651.4399999998836
+3651.9599999998836
+3656.6399999998835
+3657.1599999998834
+3664.959999999883
+3665.999999999883
+3667.559999999883
+3674.839999999883
+3675.359999999883
+3678.9999999998827
+3684.1999999998825
+3685.2399999998825
+3685.7599999998824
+3687.3199999998824
+3694.079999999882
+3702.919999999882
+3704.479999999882
+3707.5999999998817
+3714.3599999998814
+3720.0799999998812
+3720.599999999881
+3724.759999999881
+3725.799999999881
+3726.839999999881
+3729.959999999881
+3730.999999999881
+3731.519999999881
+3733.5999999998808
+3736.1999999998807
+3739.3199999998806
+3739.8399999998805
+3740.3599999998805
+3747.6399999998803
+3748.1599999998803
+3749.71999999988
+3750.23999999988
+3751.27999999988
+3752.83999999988
+3754.39999999988
+3757.51999999988
+3758.55999999988
+3759.07999999988
+3762.1999999998798
+3763.7599999998797
+3764.2799999998797
+3774.6799999998793
+3776.7599999998793
+3777.2799999998792
+3789.239999999879
+3791.8399999998787
+3795.9999999998786
+3796.5199999998786
+3797.5599999998785
+3801.1999999998784
+3802.2399999998784
+3805.8799999998782
+3809.519999999878
+3817.319999999878
+3817.839999999878
+3820.9599999998777
+3828.7599999998774
+3831.3599999998773
+3832.3999999998773
+3837.599999999877
+3842.799999999877
+3843.319999999877
+3848.5199999998767
+3849.0399999998767
+3850.5999999998767
+3851.1199999998767
+3852.1599999998766
+3854.2399999998765
+3854.7599999998765
+3856.8399999998765
+3860.9999999998763
+3862.5599999998763
+3865.679999999876
+3867.759999999876
+3868.799999999876
+3873.479999999876
+3875.039999999876
+3878.1599999998757
+3879.7199999998757
+3882.3199999998756
+3883.3599999998755
+3886.4799999998754
+3888.5599999998753
+3897.399999999875
+3899.999999999875
+3902.599999999875
+3907.2799999998747
+3910.3999999998746
+3912.4799999998745
+3914.5599999998744
+3916.6399999998744
+3920.2799999998742
+3921.839999999874
+3923.399999999874
+3924.439999999874
+3925.999999999874
+3930.679999999874
+3933.7999999998738
+3937.9599999998736
+3940.0399999998735
+3944.1999999998734
+3944.7199999998734
+3945.2399999998734
+3947.8399999998733
+3953.559999999873
+3955.639999999873
+3956.159999999873
+3957.199999999873
+3958.239999999873
+3960.839999999873
+3961.879999999873
+3963.4399999998727
+3975.9199999998723
+3979.039999999872
+3980.599999999872
+3984.759999999872
+3985.279999999872
+3990.479999999872
+3991.5199999998717
+3992.0399999998717
+4000.8799999998714
+4003.4799999998713
+4005.0399999998713
+4005.5599999998713
+4007.639999999871
+4009.199999999871
+4011.279999999871
+4015.439999999871
+4016.479999999871
+4021.6799999998707
+4022.1999999998707
+4023.7599999998706
+4028.4399999998705
+4029.9999999998704
+4030.5199999998704
+4034.1599999998703
+4037.79999999987
+4038.31999999987
+4040.91999999987
+4045.07999999987
+4047.15999999987
+4051.8399999998696
+4055.9999999998695
+4056.5199999998695
+4057.0399999998695
+4059.6399999998694
+4064.319999999869
+4075.239999999869
+4076.279999999869
+4076.7999999998688
+4080.9599999998686
+4083.0399999998685
+4086.1599999998684
+4087.1999999998684
+4087.7199999998684
+4090.3199999998683
+4094.479999999868
+4096.039999999868
+4099.159999999871
+4100.199999999872
+4101.239999999872
+4107.479999999878
+4110.59999999988
+4111.119999999881
+4114.759999999884
+4115.799999999885
+4126.199999999893
+4126.719999999894
+4132.439999999899
+4133.4799999998995
+4136.599999999902
+4139.199999999904
+4140.239999999905
+4141.279999999906
+4145.43999999991
+4145.95999999991
+4146.47999999991
+4151.159999999914
+4152.199999999915
+4153.7599999999165
+4155.319999999918
+4158.959999999921
+4159.999999999922
+4167.279999999928
+4168.319999999929
+4169.35999999993
+4172.999999999933
+4176.639999999936
+4177.159999999936
+4178.199999999937
+4179.239999999938
+4181.31999999994
+4189.119999999946
+4193.79999999995
+4194.839999999951
+4197.959999999954
+4200.039999999955
+4204.199999999959
+4205.23999999996
+4206.279999999961
+4209.399999999963
+4214.079999999967
+4215.6399999999685
+4216.159999999969
+4216.679999999969
+4217.71999999997
+4221.879999999974
+4225.519999999977
+4228.119999999979
+4229.67999999998
+4231.759999999982
+4232.2799999999825
+4236.439999999986
+4237.479999999987
+4237.999999999987
+4245.799999999994
+4250.479999999998
+4254.120000000001
+4255.160000000002
+4259.320000000005
+4263.480000000009
+4266.080000000011
+4267.120000000012
+4268.160000000013
+4269.2000000000135
+4270.760000000015
+4271.800000000016
+4275.960000000019
+4280.640000000023
+4281.1600000000235
+4281.680000000024
+4282.720000000025
+4288.44000000003
+4291.560000000032
+4296.240000000036
+4302.480000000041
+4304.040000000043
+4311.320000000049
+4316.520000000053
+4331.080000000065
+4333.160000000067
+4335.240000000069
+4343.040000000075
+4345.120000000077
+4345.640000000078
+4348.76000000008
+4354.480000000085
+4356.560000000087
+4361.240000000091
+4362.280000000092
+4362.800000000092
+4368.520000000097
+4375.2800000001025
+4376.840000000104
+4377.360000000104
+4378.400000000105
+4383.6000000001095
+4386.720000000112
+4391.9200000001165
+4399.200000000123
+4401.280000000124
+4402.320000000125
+4402.840000000126
+4404.920000000127
+4407.000000000129
+4408.04000000013
+4410.640000000132
+4412.720000000134
+4416.880000000137
+4422.600000000142
+4423.120000000143
+4425.200000000144
+4429.880000000148
+4433.520000000151
+4435.080000000153
+4435.600000000153
+4439.760000000157
+4443.92000000016
+4448.080000000164
+4448.600000000164
+4450.680000000166
+4454.840000000169
+4463.160000000176
+4465.240000000178
+4465.7600000001785
+4467.84000000018
+4470.960000000183
+4472.000000000184
+4472.520000000184
+4474.600000000186
+4475.120000000186
+4475.640000000187
+4476.680000000188
+4478.760000000189
+4480.840000000191
+4483.440000000193
+4487.080000000196
+4492.800000000201
+4493.840000000202
+4497.480000000205
+4500.080000000207
+4501.120000000208
+4502.160000000209
+4504.760000000211
+4507.880000000214
+4508.400000000214
+4513.600000000219
+4519.320000000223
+4526.080000000229
+4527.12000000023
+4528.680000000231
+4530.760000000233
+4531.800000000234
+4532.320000000234
+4544.800000000245
+4545.320000000245
+4546.880000000247
+4549.480000000249
+4552.080000000251
+4554.680000000253
+4555.720000000254
+4558.320000000256
+4559.360000000257
+4560.920000000258
+4564.560000000261
+4566.120000000263
+4566.640000000263
+4568.720000000265
+4571.320000000267
+4576.000000000271
+4577.040000000272
+4577.560000000272
+4579.640000000274
+4580.1600000002745
+4580.680000000275
+4581.720000000276
+4582.240000000276
+4583.280000000277
+4586.92000000028
+4587.960000000281
+4589.000000000282
+4593.680000000286
+4595.240000000287
+4595.760000000288
+4596.280000000288
+4600.440000000292
+4603.040000000294
+4606.160000000296
+4609.280000000299
+4611.360000000301
+4616.560000000305
+4620.720000000309
+4621.240000000309
+4623.840000000311
+4625.920000000313
+4626.960000000314
+4628.000000000315
+4632.680000000319
+4634.24000000032
+4638.400000000323
+4642.0400000003265
+4646.20000000033
+4646.72000000033
+4647.760000000331
+4649.840000000333
+4655.040000000337
+4656.080000000338
+4656.600000000339
+4657.64000000034
+4659.720000000341
+4662.840000000344
+4663.880000000345
+4665.960000000347
+4669.080000000349
+4674.2800000003535
+4675.840000000355
+4676.880000000356
+4677.920000000357
+4684.680000000362
+4690.9200000003675
+4692.480000000369
+4697.680000000373
+4698.720000000374
+4708.080000000382
+4711.720000000385
+4715.8800000003885
+4717.44000000039
+4722.640000000394
+4726.800000000398
+4727.320000000398
+4729.4000000004
+4730.440000000401
+4736.1600000004055
+4746.040000000414
+4746.560000000414
+4747.600000000415
+4749.680000000417
+4751.240000000418
+4752.280000000419
+4756.960000000423
+4759.560000000425
+4763.200000000428
+4773.0800000004365
+4775.160000000438
+4781.4000000004435
+4782.960000000445
+4787.120000000448
+4787.640000000449
+4788.160000000449
+4791.280000000452
+4793.880000000454
+4806.360000000464
+4810.520000000468
+4812.60000000047
+4815.720000000472
+4816.760000000473
+4817.800000000474
+4819.880000000476
+4820.920000000477
+4826.640000000481
+4831.840000000486
+4832.360000000486
+4833.400000000487
+4833.920000000488
+4834.960000000488
+4837.560000000491
+4839.640000000492
+4844.840000000497
+4846.400000000498
+4847.440000000499
+4849.520000000501
+4857.840000000508
+4861.480000000511
+4862.520000000512
+4864.080000000513
+4865.120000000514
+4866.160000000515
+4867.2000000005155
+4867.720000000516
+4868.240000000516
+4868.760000000517
+4870.320000000518
+4873.960000000521
+4874.480000000522
+4876.040000000523
+4880.200000000526
+4881.240000000527
+4881.760000000528
+4890.080000000535
+4894.760000000539
+4895.8000000005395
+4897.360000000541
+4900.480000000543
+4902.040000000545
+4908.80000000055
+4911.920000000553
+4915.560000000556
+4916.0800000005565
+4930.120000000568
+4930.640000000569
+4932.7200000005705
+4934.800000000572
+4941.0400000005775
+4942.080000000578
+4943.64000000058
+4948.840000000584
+4955.60000000059
+4957.6800000005915
+4959.240000000593
+4964.960000000598
+4965.480000000598
+4966.520000000599
+4970.680000000602
+4971.200000000603
+4971.720000000603
+4973.280000000605
+4976.920000000608
+4977.440000000608
+4979.000000000609
+4979.52000000061
+4983.680000000613
+4984.720000000614
+4987.320000000616
+4990.960000000619
+4996.680000000624
+4997.200000000625
+4998.760000000626
+4999.800000000627
+5001.360000000628
+5001.880000000629
+5009.680000000635
+5011.240000000636
+5011.760000000637
+5012.800000000638
+5015.40000000064
+5017.480000000642
+5018.000000000642
+5020.080000000644
+5021.640000000645
+5022.160000000646
+5023.2000000006465
+5026.320000000649
+5028.920000000651
+5034.640000000656
+5042.440000000663
+5043.4800000006635
+5052.840000000671
+5053.360000000672
+5054.920000000673
+5058.040000000676
+5061.680000000679
+5063.7600000006805
+5064.280000000681
+5066.360000000683
+5068.960000000685
+5073.640000000689
+5075.720000000691
+5076.240000000691
+5076.760000000691
+5077.280000000692
+5079.360000000694
+5079.880000000694
+5082.480000000696
+5085.080000000698
+5085.600000000699
+5086.120000000699
+5087.1600000007
+5087.680000000701
+5088.200000000701
+5090.280000000703
+5092.880000000705
+5093.920000000706
+5096.000000000708
+5107.440000000717
+5111.08000000072
+5111.600000000721
+5112.120000000721
+5114.200000000723
+5115.760000000724
+5116.280000000725
+5117.840000000726
+5120.440000000728
+5123.560000000731
+5125.120000000732
+5126.160000000733
+5127.720000000734
+5128.760000000735
+5129.2800000007355
+5132.400000000738
+5134.48000000074
+5136.560000000742
+5138.120000000743
+5143.320000000747
+5143.840000000748
+5145.400000000749
+5146.96000000075
+5147.480000000751
+5148.520000000752
+5149.040000000752
+5151.120000000754
+5151.640000000754
+5154.760000000757
+5156.840000000759
+5158.40000000076
+5163.600000000764
+5164.640000000765
+5165.160000000766
+5168.280000000768
+5169.320000000769
+5170.36000000077
+5171.400000000771
+5171.920000000771
+5175.560000000774
+5176.080000000775
+5177.640000000776
+5179.200000000777
+5180.240000000778
+5184.400000000782
+5187.000000000784
+5188.560000000785
+5192.720000000789
+5194.28000000079
+5197.920000000793
diff --git a/build-engine/src/test/resources/multi_file_double/data_1 b/build-engine/src/test/resources/multi_file_double/data_1
new file mode 100644
index 0000000..f3c6d42
--- /dev/null
+++ b/build-engine/src/test/resources/multi_file_double/data_1
@@ -0,0 +1,2093 @@
+1.56
+9.359999999999996
+13.519999999999992
+14.039999999999992
+17.67999999999999
+19.759999999999987
+20.799999999999986
+22.359999999999985
+34.31999999999999
+35.36
+39.520000000000024
+45.24000000000006
+52.0000000000001
+57.20000000000013
+61.360000000000156
+62.920000000000165
+65.52000000000017
+67.08000000000015
+67.60000000000015
+70.72000000000013
+71.24000000000012
+75.92000000000009
+83.72000000000003
+84.24000000000002
+90.47999999999998
+91.51999999999997
+97.75999999999992
+98.79999999999991
+105.03999999999986
+108.15999999999984
+109.19999999999983
+111.27999999999982
+112.31999999999981
+112.8399999999998
+114.39999999999979
+115.95999999999978
+116.47999999999978
+121.15999999999974
+122.71999999999973
+124.79999999999971
+125.31999999999971
+127.3999999999997
+130.51999999999973
+133.11999999999978
+133.6399999999998
+134.6799999999998
+137.79999999999987
+138.8399999999999
+140.91999999999993
+141.95999999999995
+144.04
+145.08
+146.64000000000004
+147.16000000000005
+148.20000000000007
+148.72000000000008
+151.32000000000014
+152.36000000000016
+154.4400000000002
+156.52000000000024
+157.04000000000025
+160.68000000000032
+162.76000000000036
+165.88000000000042
+167.44000000000045
+168.48000000000047
+169.5200000000005
+171.60000000000053
+172.12000000000054
+173.16000000000057
+174.7200000000006
+177.32000000000065
+180.96000000000072
+182.52000000000075
+183.04000000000076
+187.20000000000084
+187.72000000000085
+188.76000000000087
+189.28000000000088
+190.3200000000009
+192.92000000000095
+195.000000000001
+200.7200000000011
+203.32000000000116
+205.9200000000012
+208.00000000000125
+210.0800000000013
+210.6000000000013
+211.64000000000132
+212.68000000000134
+215.8000000000014
+216.32000000000141
+216.84000000000142
+217.36000000000143
+219.44000000000148
+221.52000000000152
+228.28000000000165
+230.3600000000017
+231.92000000000172
+233.48000000000175
+234.00000000000176
+235.04000000000178
+237.64000000000183
+238.68000000000185
+240.24000000000188
+241.80000000000192
+242.32000000000193
+243.36000000000195
+245.960000000002
+248.56000000000205
+249.08000000000206
+250.6400000000021
+251.6800000000021
+257.40000000000214
+257.9200000000021
+258.9600000000021
+265.72000000000185
+267.2800000000018
+275.0800000000015
+283.4000000000012
+288.08000000000106
+288.60000000000105
+290.680000000001
+292.2400000000009
+294.32000000000085
+299.0000000000007
+303.16000000000054
+310.4400000000003
+312.0000000000002
+313.0400000000002
+321.8799999999999
+323.4399999999998
+324.4799999999998
+332.7999999999995
+333.83999999999946
+334.8799999999994
+335.9199999999994
+336.4399999999994
+342.1599999999992
+343.7199999999991
+347.359999999999
+352.5599999999988
+353.0799999999988
+354.63999999999874
+355.6799999999987
+356.71999999999866
+359.83999999999855
+363.4799999999984
+363.9999999999984
+370.2399999999982
+373.3599999999981
+374.39999999999804
+375.959999999998
+381.1599999999978
+381.6799999999978
+382.1999999999978
+387.9199999999976
+388.43999999999755
+389.4799999999975
+390.5199999999975
+392.0799999999974
+394.67999999999734
+395.7199999999973
+397.27999999999724
+398.3199999999972
+398.8399999999972
+404.039999999997
+407.1599999999969
+409.7599999999968
+414.9599999999966
+417.03999999999655
+418.0799999999965
+418.5999999999965
+420.15999999999644
+426.9199999999962
+430.0399999999961
+430.5599999999961
+435.7599999999959
+436.79999999999586
+437.8399999999958
+438.8799999999958
+439.91999999999575
+441.4799999999957
+445.63999999999555
+455.5199999999952
+456.55999999999517
+458.1199999999951
+458.6399999999951
+460.19999999999504
+465.39999999999486
+469.5599999999947
+473.71999999999457
+483.5999999999942
+486.19999999999413
+487.7599999999941
+488.79999999999404
+491.39999999999395
+496.0799999999938
+497.63999999999373
+498.6799999999937
+505.43999999999346
+509.5999999999933
+511.67999999999324
+512.1999999999932
+518.959999999993
+521.0399999999929
+521.5599999999929
+525.1999999999928
+526.7599999999927
+528.3199999999927
+536.1199999999924
+539.7599999999923
+548.599999999992
+550.6799999999919
+559.5199999999916
+562.1199999999915
+563.6799999999914
+565.7599999999914
+567.3199999999913
+567.8399999999913
+570.4399999999912
+573.5599999999911
+574.599999999991
+575.119999999991
+576.679999999991
+583.9599999999907
+587.5999999999906
+588.6399999999906
+593.8399999999904
+597.4799999999902
+601.1199999999901
+601.6399999999901
+604.75999999999
+606.8399999999899
+607.8799999999899
+609.4399999999898
+616.7199999999896
+622.4399999999894
+622.9599999999893
+623.9999999999893
+628.1599999999892
+628.6799999999891
+632.839999999989
+633.359999999989
+639.0799999999888
+642.7199999999887
+643.7599999999886
+644.7999999999886
+647.3999999999885
+653.6399999999883
+656.2399999999882
+657.7999999999881
+658.3199999999881
+658.8399999999881
+659.8799999999881
+660.919999999988
+661.439999999988
+662.479999999988
+670.2799999999877
+673.9199999999876
+676.5199999999875
+680.6799999999873
+681.1999999999873
+682.7599999999873
+683.7999999999872
+688.4799999999871
+690.039999999987
+690.559999999987
+691.599999999987
+694.7199999999868
+698.3599999999867
+700.9599999999866
+704.5999999999865
+705.1199999999865
+707.7199999999864
+710.3199999999863
+713.4399999999862
+714.4799999999861
+717.599999999986
+726.9599999999857
+732.1599999999855
+735.7999999999854
+736.3199999999854
+737.3599999999853
+737.8799999999853
+738.3999999999853
+743.0799999999851
+744.6399999999851
+756.5999999999847
+757.1199999999847
+758.1599999999846
+765.4399999999844
+766.9999999999843
+768.5599999999843
+771.6799999999841
+778.4399999999839
+778.9599999999839
+779.4799999999839
+781.5599999999838
+782.0799999999838
+782.5999999999838
+785.7199999999837
+788.3199999999836
+788.8399999999835
+789.3599999999835
+791.4399999999835
+791.9599999999834
+794.0399999999834
+799.7599999999832
+800.2799999999831
+802.3599999999831
+804.959999999983
+805.479999999983
+807.0399999999829
+814.3199999999827
+815.8799999999826
+820.0399999999825
+820.5599999999824
+828.3599999999822
+828.8799999999821
+830.4399999999821
+835.1199999999819
+836.1599999999819
+837.7199999999818
+839.2799999999818
+840.8399999999817
+842.3999999999817
+843.4399999999816
+848.6399999999815
+849.6799999999814
+850.1999999999814
+851.2399999999814
+851.7599999999813
+856.9599999999812
+857.4799999999811
+859.5599999999811
+861.639999999981
+863.199999999981
+873.5999999999806
+874.1199999999806
+875.6799999999805
+876.1999999999805
+880.8799999999803
+882.9599999999803
+883.4799999999802
+883.9999999999802
+886.5999999999801
+891.27999999998
+892.8399999999799
+893.3599999999799
+902.1999999999796
+904.2799999999795
+908.9599999999793
+910.5199999999793
+917.799999999979
+919.359999999979
+920.9199999999789
+921.9599999999789
+922.4799999999789
+924.5599999999788
+930.7999999999786
+931.8399999999785
+934.4399999999785
+935.4799999999784
+937.5599999999783
+938.5999999999783
+939.1199999999783
+941.1999999999782
+943.7999999999781
+944.8399999999781
+955.2399999999777
+956.7999999999777
+957.3199999999777
+963.0399999999775
+963.5599999999774
+964.0799999999774
+966.6799999999773
+967.1999999999773
+968.2399999999773
+969.2799999999772
+970.3199999999772
+971.8799999999771
+972.9199999999771
+976.559999999977
+981.7599999999768
+982.2799999999768
+985.9199999999767
+986.4399999999766
+994.7599999999763
+995.2799999999763
+1000.9999999999761
+1002.0399999999761
+1005.159999999976
+1008.2799999999759
+1009.8399999999758
+1013.9999999999757
+1015.5599999999756
+1017.6399999999755
+1019.1999999999755
+1020.2399999999755
+1024.9199999999753
+1025.4399999999753
+1026.4799999999752
+1029.5999999999751
+1034.799999999975
+1040.5199999999747
+1042.0799999999747
+1047.2799999999745
+1049.8799999999744
+1052.9999999999743
+1056.6399999999742
+1059.759999999974
+1061.319999999974
+1064.439999999974
+1067.0399999999738
+1067.5599999999738
+1073.7999999999736
+1081.0799999999733
+1081.5999999999733
+1087.319999999973
+1090.959999999973
+1091.479999999973
+1098.2399999999727
+1098.7599999999727
+1099.2799999999727
+1101.8799999999726
+1103.9599999999725
+1106.0399999999725
+1107.0799999999724
+1114.3599999999722
+1118.519999999972
+1121.119999999972
+1122.6799999999719
+1123.1999999999719
+1123.7199999999718
+1124.7599999999718
+1129.9599999999716
+1145.559999999971
+1148.679999999971
+1150.759999999971
+1153.8799999999708
+1155.9599999999707
+1164.2799999999704
+1165.8399999999704
+1167.9199999999703
+1168.4399999999703
+1175.71999999997
+1176.23999999997
+1176.75999999997
+1181.4399999999698
+1182.9999999999698
+1185.5999999999697
+1187.1599999999696
+1190.2799999999695
+1191.3199999999695
+1194.4399999999694
+1197.0399999999693
+1207.439999999969
+1210.0399999999688
+1210.5599999999688
+1211.0799999999688
+1211.5999999999688
+1213.6799999999687
+1219.3999999999685
+1219.9199999999685
+1221.4799999999684
+1222.5199999999684
+1226.1599999999682
+1227.1999999999682
+1234.479999999968
+1235.519999999968
+1239.1599999999678
+1241.2399999999677
+1242.2799999999677
+1242.7999999999677
+1243.3199999999676
+1246.9599999999675
+1247.4799999999675
+1248.5199999999675
+1252.6799999999673
+1254.2399999999673
+1256.3199999999672
+1259.959999999967
+1261.519999999967
+1263.599999999967
+1267.2399999999668
+1268.7999999999668
+1270.3599999999667
+1271.3999999999667
+1273.4799999999666
+1274.5199999999666
+1276.0799999999665
+1278.6799999999664
+1279.1999999999664
+1279.7199999999664
+1281.7999999999663
+1282.3199999999663
+1283.8799999999662
+1290.639999999966
+1291.679999999966
+1292.719999999966
+1295.8399999999658
+1299.4799999999657
+1307.2799999999654
+1309.8799999999653
+1310.9199999999653
+1311.4399999999653
+1311.9599999999652
+1312.9999999999652
+1316.119999999965
+1317.159999999965
+1317.679999999965
+1320.799999999965
+1322.8799999999649
+1323.3999999999648
+1327.5599999999647
+1328.5999999999647
+1331.7199999999646
+1338.9999999999643
+1348.879999999964
+1349.399999999964
+1351.4799999999639
+1351.9999999999638
+1355.6399999999637
+1356.6799999999637
+1357.1999999999637
+1357.7199999999636
+1359.2799999999636
+1360.8399999999635
+1361.3599999999635
+1362.3999999999635
+1362.9199999999635
+1363.9599999999634
+1364.4799999999634
+1365.5199999999634
+1366.0399999999634
+1368.6399999999633
+1370.1999999999632
+1372.2799999999631
+1374.359999999963
+1377.999999999963
+1384.7599999999627
+1386.8399999999626
+1387.8799999999626
+1388.9199999999626
+1389.4399999999625
+1390.4799999999625
+1392.0399999999624
+1395.1599999999623
+1396.7199999999623
+1400.3599999999622
+1400.8799999999621
+1401.3999999999621
+1402.439999999962
+1408.1599999999619
+1418.5599999999615
+1419.5999999999615
+1420.1199999999615
+1422.7199999999614
+1428.9599999999612
+1430.519999999961
+1431.559999999961
+1436.239999999961
+1440.3999999999608
+1440.9199999999607
+1442.4799999999607
+1448.1999999999605
+1448.7199999999605
+1449.7599999999604
+1450.7999999999604
+1455.9999999999602
+1464.31999999996
+1466.3999999999598
+1468.4799999999598
+1475.7599999999595
+1478.3599999999594
+1480.9599999999593
+1483.0399999999593
+1485.1199999999592
+1486.1599999999592
+1486.6799999999591
+1488.239999999959
+1491.359999999959
+1495.5199999999588
+1499.1599999999587
+1500.1999999999587
+1502.2799999999586
+1508.5199999999584
+1512.1599999999582
+1518.399999999958
+1519.439999999958
+1522.5599999999579
+1530.3599999999576
+1530.8799999999576
+1533.4799999999575
+1534.5199999999575
+1535.0399999999574
+1535.5599999999574
+1538.1599999999573
+1540.7599999999572
+1541.7999999999572
+1544.919999999957
+1545.439999999957
+1548.559999999957
+1550.119999999957
+1551.1599999999569
+1552.1999999999568
+1556.8799999999567
+1561.0399999999565
+1561.5599999999565
+1565.7199999999564
+1566.7599999999563
+1570.3999999999562
+1572.4799999999561
+1580.7999999999558
+1581.3199999999558
+1582.3599999999558
+1583.3999999999558
+1589.1199999999556
+1592.2399999999554
+1593.7999999999554
+1594.3199999999554
+1595.8799999999553
+1599.5199999999552
+1600.0399999999552
+1601.5999999999551
+1604.199999999955
+1606.799999999955
+1607.839999999955
+1609.9199999999548
+1611.9999999999548
+1616.6799999999546
+1617.7199999999546
+1628.1199999999542
+1631.759999999954
+1632.279999999954
+1632.799999999954
+1634.359999999954
+1635.919999999954
+1637.9999999999538
+1642.1599999999537
+1644.2399999999536
+1648.3999999999535
+1648.9199999999535
+1649.4399999999534
+1658.2799999999531
+1659.839999999953
+1665.5599999999529
+1668.6799999999528
+1672.8399999999526
+1675.9599999999525
+1676.4799999999525
+1678.5599999999524
+1681.1599999999523
+1681.6799999999523
+1682.1999999999523
+1686.8799999999521
+1691.039999999952
+1691.559999999952
+1693.119999999952
+1695.7199999999518
+1696.2399999999518
+1700.3999999999517
+1701.9599999999516
+1702.4799999999516
+1704.5599999999515
+1705.5999999999515
+1706.1199999999515
+1708.1999999999514
+1708.7199999999514
+1711.8399999999513
+1714.4399999999512
+1714.9599999999512
+1720.679999999951
+1728.9999999999507
+1730.0399999999506
+1733.1599999999505
+1737.8399999999504
+1738.3599999999503
+1739.9199999999503
+1740.9599999999502
+1743.5599999999502
+1744.5999999999501
+1746.15999999995
+1747.71999999995
+1751.8799999999499
+1754.9999999999498
+1759.6799999999496
+1761.2399999999495
+1761.7599999999495
+1762.7999999999495
+1765.9199999999494
+1767.4799999999493
+1779.439999999949
+1779.9599999999489
+1780.4799999999489
+1783.5999999999487
+1784.1199999999487
+1787.7599999999486
+1788.2799999999486
+1790.8799999999485
+1795.5599999999483
+1799.7199999999482
+1801.2799999999481
+1802.839999999948
+1803.359999999948
+1809.0799999999479
+1809.5999999999478
+1810.1199999999478
+1811.1599999999478
+1812.7199999999477
+1816.8799999999476
+1819.4799999999475
+1821.0399999999474
+1821.5599999999474
... 39159 lines suppressed ...