You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by fo...@apache.org on 2023/01/04 03:32:19 UTC

[hudi] 09/45: [MINOR] Adapt to tianqiong spark

This is an automated email from the ASF dual-hosted git repository.

forwardxu pushed a commit to branch release-0.12.1
in repository https://gitbox.apache.org/repos/asf/hudi.git

commit f80900d91cd134cc2150d4e3ee6d85b8b00c2ad7
Author: XuQianJin-Stars <fo...@apache.com>
AuthorDate: Mon Oct 24 17:18:19 2022 +0800

    [MINOR] Adapt to tianqiong spark
---
 .../datasources/Spark31NestedSchemaPruning.scala   | 24 ++++++++++++++--------
 pom.xml                                            | 10 ++++-----
 2 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/execution/datasources/Spark31NestedSchemaPruning.scala b/hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/execution/datasources/Spark31NestedSchemaPruning.scala
index 1b29c428bb..76cdb443b4 100644
--- a/hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/execution/datasources/Spark31NestedSchemaPruning.scala
+++ b/hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/execution/datasources/Spark31NestedSchemaPruning.scala
@@ -17,15 +17,15 @@
 
 package org.apache.spark.sql.execution.datasources
 
-import org.apache.hudi.{HoodieBaseRelation, SparkAdapterSupport}
-import org.apache.spark.sql.HoodieSpark3CatalystPlanUtils
-import org.apache.spark.sql.catalyst.expressions.{And, AttributeReference, AttributeSet, Expression, NamedExpression, ProjectionOverSchema}
+import org.apache.hudi.HoodieBaseRelation
+import org.apache.spark.sql.catalyst.expressions.{Alias, And, Attribute, AttributeReference, Expression, NamedExpression, ProjectionOverSchema}
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
-import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan, Project}
+import org.apache.spark.sql.catalyst.plans.logical.{Filter, LeafNode, LogicalPlan, Project}
 import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.execution.datasources.orc.OrcFileFormat
+import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
 import org.apache.spark.sql.sources.BaseRelation
 import org.apache.spark.sql.types.{ArrayType, DataType, MapType, StructType}
-import org.apache.spark.sql.util.SchemaUtils.restoreOriginalOutputNames
 
 /**
  * Prunes unnecessary physical columns given a [[PhysicalOperation]] over a data source relation.
@@ -86,10 +86,8 @@ class Spark31NestedSchemaPruning extends Rule[LogicalPlan] {
       // each schemata, assuming the fields in prunedDataSchema are a subset of the fields
       // in dataSchema.
       if (countLeaves(dataSchema) > countLeaves(prunedDataSchema)) {
-        val planUtils = SparkAdapterSupport.sparkAdapter.getCatalystPlanUtils.asInstanceOf[HoodieSpark3CatalystPlanUtils]
-
         val prunedRelation = outputRelationBuilder(prunedDataSchema)
-        val projectionOverSchema = planUtils.projectOverSchema(prunedDataSchema, AttributeSet(output))
+        val projectionOverSchema = ProjectionOverSchema(prunedDataSchema)
 
         Some(buildNewProjection(projects, normalizedProjects, normalizedFilters,
           prunedRelation, projectionOverSchema))
@@ -195,4 +193,14 @@ class Spark31NestedSchemaPruning extends Rule[LogicalPlan] {
       case _ => 1
     }
   }
+
+  def restoreOriginalOutputNames(
+                                  projectList: Seq[NamedExpression],
+                                  originalNames: Seq[String]): Seq[NamedExpression] = {
+    projectList.zip(originalNames).map {
+      case (attr: Attribute, name) => attr.withName(name)
+      case (alias: Alias, name) => alias
+      case (other, _) => other
+    }
+  }
 }
diff --git a/pom.xml b/pom.xml
index 60c13c8f07..0adb64838b 100644
--- a/pom.xml
+++ b/pom.xml
@@ -381,7 +381,7 @@
 <!--                    <exclude>org.sl4fj:slf4j-nop</exclude>-->
 <!--                    <exclude>org.sl4fj:slf4j-jcl</exclude>-->
 <!--                    <exclude>log4j:log4j</exclude>-->
-                    <exclude>ch.qos.logback:logback-classic</exclude>
+<!--                    <exclude>ch.qos.logback:logback-classic</exclude>-->
                     <!-- NOTE: We're banning any HBase deps versions other than the approved ${hbase.version},
                                which is aimed at preventing the classpath collisions w/ transitive deps usually) -->
                     <exclude>org.apache.hbase:hbase-common:*</exclude>
@@ -389,7 +389,7 @@
                     <exclude>org.apache.hbase:hbase-server:*</exclude>
                   </excludes>
                   <includes>
-                    <include>org.slf4j:slf4j-simple:*:*:test</include>
+<!--                    <include>org.slf4j:slf4j-simple:*:*:test</include>-->
                     <include>org.apache.hbase:hbase-common:${hbase.version}</include>
                     <include>org.apache.hbase:hbase-client:${hbase.version}</include>
                     <include>org.apache.hbase:hbase-server:${hbase.version}</include>
@@ -1864,9 +1864,9 @@
                 <configuration>
                   <rules>
                     <bannedDependencies>
-                      <excludes combine.children="append">
-                        <exclude>*:*_2.11</exclude>
-                      </excludes>
+<!--                      <excludes combine.children="append">-->
+<!--                        <exclude>*:*_2.11</exclude>-->
+<!--                      </excludes>-->
                     </bannedDependencies>
                   </rules>
                 </configuration>