You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by xx...@apache.org on 2023/01/06 11:10:12 UTC

[kylin] 07/12: KYLIN-5347 use spark session hadoop config when aws serverless environment on build snapshot for partition table

This is an automated email from the ASF dual-hosted git repository.

xxyu pushed a commit to branch kylin5
in repository https://gitbox.apache.org/repos/asf/kylin.git

commit 7bd187ae0d6dc88c3518d61b8afd665f4ee13797
Author: xingjian.zheng <xi...@kyligence.io>
AuthorDate: Fri Nov 4 20:37:20 2022 +0800

    KYLIN-5347 use spark session hadoop config when aws serverless environment on build snapshot for partition table
---
 .../scala/org/apache/kylin/engine/spark/builder/SnapshotBuilder.scala | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/spark-project/engine-spark/src/main/scala/org/apache/kylin/engine/spark/builder/SnapshotBuilder.scala b/src/spark-project/engine-spark/src/main/scala/org/apache/kylin/engine/spark/builder/SnapshotBuilder.scala
index ab2c13e3d0..ce5a080601 100644
--- a/src/spark-project/engine-spark/src/main/scala/org/apache/kylin/engine/spark/builder/SnapshotBuilder.scala
+++ b/src/spark-project/engine-spark/src/main/scala/org/apache/kylin/engine/spark/builder/SnapshotBuilder.scala
@@ -459,6 +459,10 @@ class SnapshotBuilder(var jobId: String) extends Logging with Serializable {
   }
 
   private[builder] def decideSparkJobArg(sourceData: Dataset[Row]): (Int, Double) = {
+    var hadoopConf = SparderEnv.getHadoopConfiguration()
+    if (kylinConfig.getClusterManagerClassName.contains("AWSServerless")) {
+      hadoopConf = sourceData.sparkSession.sparkContext.hadoopConfiguration
+    }
     try {
       val sizeInMB = ResourceDetectUtils.getPaths(sourceData.queryExecution.sparkPlan)
         .map(path => HadoopUtil.getContentSummary(path.getFileSystem(SparderEnv.getHadoopConfiguration()), path).getLength)