You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by sh...@apache.org on 2018/09/10 08:51:41 UTC

[kylin] 02/04: KYLIN-3534 Don't compress fact distinct output file

This is an automated email from the ASF dual-hosted git repository.

shaofengshi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kylin.git

commit d164225e91fa59aa1dd06d2ddf64e6acc1035a3f
Author: chao long <wa...@qq.com>
AuthorDate: Wed Sep 5 09:55:44 2018 +0800

    KYLIN-3534 Don't compress fact distinct output file
---
 .../main/java/org/apache/kylin/engine/spark/SparkFactDistinct.java | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkFactDistinct.java b/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkFactDistinct.java
index d0ba5d5..632a169 100644
--- a/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkFactDistinct.java
+++ b/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkFactDistinct.java
@@ -209,19 +209,22 @@ public class SparkFactDistinct extends AbstractApplication implements Serializab
                 NullWritable.class, LongWritable.class);
 
         FileOutputFormat.setOutputPath(job, new Path(outputPath));
+        FileOutputFormat.setCompressOutput(job, false);
 
         // prevent to create zero-sized default output
         LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);
 
+
         MultipleOutputsRDD multipleOutputsRDD = MultipleOutputsRDD.rddToMultipleOutputsRDD(outputRDD);
 
         multipleOutputsRDD.saveAsNewAPIHadoopDatasetWithMultipleOutputs(job.getConfiguration());
 
-        logger.info("Map input records={}", recordRDD.count());
+        long recordCount = recordRDD.count();
+        logger.info("Map input records={}", recordCount);
         logger.info("HDFS Read: {} HDFS Write", bytesWritten.value());
 
         Map<String, String> counterMap = Maps.newHashMap();
-        counterMap.put(ExecutableConstants.SOURCE_RECORDS_COUNT, String.valueOf(recordRDD.count()));
+        counterMap.put(ExecutableConstants.SOURCE_RECORDS_COUNT, String.valueOf(recordCount));
         counterMap.put(ExecutableConstants.SOURCE_RECORDS_SIZE, String.valueOf(bytesWritten.value()));
 
         // save counter to hdfs