You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by sh...@apache.org on 2018/09/11 03:18:26 UTC
[kylin] 03/05: KYLIN-3534 Don't compress fact distinct output file
This is an automated email from the ASF dual-hosted git repository.
shaofengshi pushed a commit to branch 2.5.x
in repository https://gitbox.apache.org/repos/asf/kylin.git
commit d90bb046d191dd06f194e7380da13698772e001f
Author: chao long <wa...@qq.com>
AuthorDate: Wed Sep 5 09:55:44 2018 +0800
KYLIN-3534 Don't compress fact distinct output file
---
.../main/java/org/apache/kylin/engine/spark/SparkFactDistinct.java | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkFactDistinct.java b/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkFactDistinct.java
index 77ebd69..213cdfd 100644
--- a/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkFactDistinct.java
+++ b/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkFactDistinct.java
@@ -198,19 +198,22 @@ public class SparkFactDistinct extends AbstractApplication implements Serializab
MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_PARTITION, TextOutputFormat.class, NullWritable.class, LongWritable.class);
FileOutputFormat.setOutputPath(job, new Path(outputPath));
+ FileOutputFormat.setCompressOutput(job, false);
// prevent to create zero-sized default output
LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);
+
MultipleOutputsRDD multipleOutputsRDD = MultipleOutputsRDD.rddToMultipleOutputsRDD(outputRDD);
multipleOutputsRDD.saveAsNewAPIHadoopDatasetWithMultipleOutputs(job.getConfiguration());
- logger.info("Map input records={}", recordRDD.count());
+ long recordCount = recordRDD.count();
+ logger.info("Map input records={}", recordCount);
logger.info("HDFS Read: {} HDFS Write", bytesWritten.value());
Map<String, String> counterMap = Maps.newHashMap();
- counterMap.put(ExecutableConstants.SOURCE_RECORDS_COUNT, String.valueOf(recordRDD.count()));
+ counterMap.put(ExecutableConstants.SOURCE_RECORDS_COUNT, String.valueOf(recordCount));
counterMap.put(ExecutableConstants.SOURCE_RECORDS_SIZE, String.valueOf(bytesWritten.value()));
// save counter to hdfs