You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by fo...@apache.org on 2023/01/04 03:32:19 UTC
[hudi] 09/45: [MINOR] Adapt to tianqiong spark
This is an automated email from the ASF dual-hosted git repository.
forwardxu pushed a commit to branch release-0.12.1
in repository https://gitbox.apache.org/repos/asf/hudi.git
commit f80900d91cd134cc2150d4e3ee6d85b8b00c2ad7
Author: XuQianJin-Stars <fo...@apache.com>
AuthorDate: Mon Oct 24 17:18:19 2022 +0800
[MINOR] Adapt to tianqiong spark
---
.../datasources/Spark31NestedSchemaPruning.scala | 24 ++++++++++++++--------
pom.xml | 10 ++++-----
2 files changed, 21 insertions(+), 13 deletions(-)
diff --git a/hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/execution/datasources/Spark31NestedSchemaPruning.scala b/hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/execution/datasources/Spark31NestedSchemaPruning.scala
index 1b29c428bb..76cdb443b4 100644
--- a/hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/execution/datasources/Spark31NestedSchemaPruning.scala
+++ b/hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/execution/datasources/Spark31NestedSchemaPruning.scala
@@ -17,15 +17,15 @@
package org.apache.spark.sql.execution.datasources
-import org.apache.hudi.{HoodieBaseRelation, SparkAdapterSupport}
-import org.apache.spark.sql.HoodieSpark3CatalystPlanUtils
-import org.apache.spark.sql.catalyst.expressions.{And, AttributeReference, AttributeSet, Expression, NamedExpression, ProjectionOverSchema}
+import org.apache.hudi.HoodieBaseRelation
+import org.apache.spark.sql.catalyst.expressions.{Alias, And, Attribute, AttributeReference, Expression, NamedExpression, ProjectionOverSchema}
import org.apache.spark.sql.catalyst.planning.PhysicalOperation
-import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan, Project}
+import org.apache.spark.sql.catalyst.plans.logical.{Filter, LeafNode, LogicalPlan, Project}
import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.execution.datasources.orc.OrcFileFormat
+import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
import org.apache.spark.sql.sources.BaseRelation
import org.apache.spark.sql.types.{ArrayType, DataType, MapType, StructType}
-import org.apache.spark.sql.util.SchemaUtils.restoreOriginalOutputNames
/**
* Prunes unnecessary physical columns given a [[PhysicalOperation]] over a data source relation.
@@ -86,10 +86,8 @@ class Spark31NestedSchemaPruning extends Rule[LogicalPlan] {
// each schemata, assuming the fields in prunedDataSchema are a subset of the fields
// in dataSchema.
if (countLeaves(dataSchema) > countLeaves(prunedDataSchema)) {
- val planUtils = SparkAdapterSupport.sparkAdapter.getCatalystPlanUtils.asInstanceOf[HoodieSpark3CatalystPlanUtils]
-
val prunedRelation = outputRelationBuilder(prunedDataSchema)
- val projectionOverSchema = planUtils.projectOverSchema(prunedDataSchema, AttributeSet(output))
+ val projectionOverSchema = ProjectionOverSchema(prunedDataSchema)
Some(buildNewProjection(projects, normalizedProjects, normalizedFilters,
prunedRelation, projectionOverSchema))
@@ -195,4 +193,14 @@ class Spark31NestedSchemaPruning extends Rule[LogicalPlan] {
case _ => 1
}
}
+
+ def restoreOriginalOutputNames(
+ projectList: Seq[NamedExpression],
+ originalNames: Seq[String]): Seq[NamedExpression] = {
+ projectList.zip(originalNames).map {
+ case (attr: Attribute, name) => attr.withName(name)
+ case (alias: Alias, name) => alias
+ case (other, _) => other
+ }
+ }
}
diff --git a/pom.xml b/pom.xml
index 60c13c8f07..0adb64838b 100644
--- a/pom.xml
+++ b/pom.xml
@@ -381,7 +381,7 @@
<!-- <exclude>org.sl4fj:slf4j-nop</exclude>-->
<!-- <exclude>org.sl4fj:slf4j-jcl</exclude>-->
<!-- <exclude>log4j:log4j</exclude>-->
- <exclude>ch.qos.logback:logback-classic</exclude>
+<!-- <exclude>ch.qos.logback:logback-classic</exclude>-->
<!-- NOTE: We're banning any HBase deps versions other than the approved ${hbase.version},
which is aimed at preventing the classpath collisions w/ transitive deps usually) -->
<exclude>org.apache.hbase:hbase-common:*</exclude>
@@ -389,7 +389,7 @@
<exclude>org.apache.hbase:hbase-server:*</exclude>
</excludes>
<includes>
- <include>org.slf4j:slf4j-simple:*:*:test</include>
+<!-- <include>org.slf4j:slf4j-simple:*:*:test</include>-->
<include>org.apache.hbase:hbase-common:${hbase.version}</include>
<include>org.apache.hbase:hbase-client:${hbase.version}</include>
<include>org.apache.hbase:hbase-server:${hbase.version}</include>
@@ -1864,9 +1864,9 @@
<configuration>
<rules>
<bannedDependencies>
- <excludes combine.children="append">
- <exclude>*:*_2.11</exclude>
- </excludes>
+<!-- <excludes combine.children="append">-->
+<!-- <exclude>*:*_2.11</exclude>-->
+<!-- </excludes>-->
</bannedDependencies>
</rules>
</configuration>