You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by yi...@apache.org on 2023/02/13 06:24:06 UTC
[hudi] 02/05: [HUDI-5768] Fix Spark Datasource read of metadata table (#7924)
This is an automated email from the ASF dual-hosted git repository.
yihua pushed a commit to branch release-0.13.0
in repository https://gitbox.apache.org/repos/asf/hudi.git
commit 7ccf6e678278ceca592b8d95160bb0b17906928f
Author: Y Ethan Guo <et...@gmail.com>
AuthorDate: Sun Feb 12 03:25:51 2023 -0800
[HUDI-5768] Fix Spark Datasource read of metadata table (#7924)
---
.../src/main/scala/org/apache/hudi/HoodieBaseRelation.scala | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieBaseRelation.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieBaseRelation.scala
index bf3d38b808d..8a730a8334b 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieBaseRelation.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieBaseRelation.scala
@@ -42,6 +42,7 @@ import org.apache.hudi.internal.schema.convert.AvroInternalSchemaConverter
import org.apache.hudi.internal.schema.utils.{InternalSchemaUtils, SerDeHelper}
import org.apache.hudi.internal.schema.{HoodieSchemaException, InternalSchema}
import org.apache.hudi.io.storage.HoodieAvroHFileReader
+import org.apache.hudi.metadata.HoodieTableMetadata
import org.apache.spark.execution.datasources.HoodieInMemoryFileIndex
import org.apache.spark.internal.Logging
import org.apache.spark.rdd.RDD
@@ -59,7 +60,6 @@ import org.apache.spark.sql.{Row, SQLContext, SparkSession}
import org.apache.spark.unsafe.types.UTF8String
import java.net.URI
-import java.util.Locale
import scala.collection.JavaConverters._
import scala.util.control.NonFatal
import scala.util.{Failure, Success, Try}
@@ -292,7 +292,8 @@ abstract class HoodieBaseRelation(val sqlContext: SQLContext,
* Determines whether relation's schema could be pruned by Spark's Optimizer
*/
def canPruneRelationSchema: Boolean =
- (fileFormat.isInstanceOf[ParquetFileFormat] || fileFormat.isInstanceOf[OrcFileFormat]) &&
+ !HoodieTableMetadata.isMetadataTable(basePath.toString) &&
+ (fileFormat.isInstanceOf[ParquetFileFormat] || fileFormat.isInstanceOf[OrcFileFormat]) &&
// NOTE: In case this relation has already been pruned there's no point in pruning it again
prunedDataSchema.isEmpty &&
// TODO(HUDI-5421) internal schema doesn't support nested schema pruning currently