You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by yi...@apache.org on 2022/06/11 20:49:06 UTC
[hudi] 07/08: [HUDI-4223] Fix NullPointerException from getLogRecordScanner when reading metadata table (#5840)
This is an automated email from the ASF dual-hosted git repository.
yihua pushed a commit to branch release-0.11.1-rc2-prep
in repository https://gitbox.apache.org/repos/asf/hudi.git
commit cd1514484fff1ae6e66b4e56ae11013bdf4ac6e9
Author: Y Ethan Guo <et...@gmail.com>
AuthorDate: Sat Jun 11 13:19:24 2022 -0700
[HUDI-4223] Fix NullPointerException from getLogRecordScanner when reading metadata table (#5840)
When explicitly specifying the metadata table path for reading in spark, the "hoodie.metadata.enable" is overwritten to true for proper read behavior.
---
.../src/main/scala/org/apache/hudi/HoodieMergeOnReadRDD.scala | 6 +++---
.../hudi/functional/TestMetadataTableWithSparkDataSource.scala | 4 ++--
2 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieMergeOnReadRDD.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieMergeOnReadRDD.scala
index a7ca60865f..2fdb9b882e 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieMergeOnReadRDD.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieMergeOnReadRDD.scala
@@ -25,7 +25,6 @@ import org.apache.hadoop.fs.Path
import org.apache.hadoop.mapred.JobConf
import org.apache.hudi.HoodieConversionUtils.{toJavaOption, toScalaOption}
import org.apache.hudi.HoodieMergeOnReadRDD.{AvroDeserializerSupport, collectFieldOrdinals, getPartitionPath, projectAvro, projectAvroUnsafe, projectRowUnsafe, resolveAvroSchemaNullability}
-import org.apache.hudi.MergeOnReadSnapshotRelation.getFilePath
import org.apache.hudi.common.config.HoodieMetadataConfig
import org.apache.hudi.common.engine.HoodieLocalEngineContext
import org.apache.hudi.common.fs.FSUtils
@@ -37,9 +36,9 @@ import org.apache.hudi.config.HoodiePayloadConfig
import org.apache.hudi.exception.HoodieException
import org.apache.hudi.hadoop.config.HoodieRealtimeConfig
import org.apache.hudi.hadoop.utils.HoodieRealtimeRecordReaderUtils.getMaxCompactionMemoryInBytes
+import org.apache.hudi.internal.schema.InternalSchema
import org.apache.hudi.metadata.HoodieTableMetadata.getDataTableBasePathFromMetadataTable
import org.apache.hudi.metadata.{HoodieBackedTableMetadata, HoodieTableMetadata}
-import org.apache.hudi.internal.schema.InternalSchema
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.avro.HoodieAvroDeserializer
import org.apache.spark.sql.catalyst.InternalRow
@@ -324,7 +323,8 @@ private object HoodieMergeOnReadRDD {
val fs = FSUtils.getFs(tablePath, hadoopConf)
if (HoodieTableMetadata.isMetadataTable(tablePath)) {
- val metadataConfig = tableState.metadataConfig
+ val metadataConfig = HoodieMetadataConfig.newBuilder()
+ .fromProperties(tableState.metadataConfig.getProps).enable(true).build()
val dataTableBasePath = getDataTableBasePathFromMetadataTable(tablePath)
val metadataTable = new HoodieBackedTableMetadata(
new HoodieLocalEngineContext(hadoopConf), metadataConfig,
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestMetadataTableWithSparkDataSource.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestMetadataTableWithSparkDataSource.scala
index 11705f9eb1..02e0ee6dfd 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestMetadataTableWithSparkDataSource.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestMetadataTableWithSparkDataSource.scala
@@ -78,7 +78,7 @@ class TestMetadataTableWithSparkDataSource extends SparkClientFunctionalTestHarn
.save(basePath)
// Files partition of MT
- val filesPartitionDF = spark.read.options(metadataOpts).format(hudi).load(s"$basePath/.hoodie/metadata/files")
+ val filesPartitionDF = spark.read.format(hudi).load(s"$basePath/.hoodie/metadata/files")
// Smoke test
filesPartitionDF.show()
@@ -96,7 +96,7 @@ class TestMetadataTableWithSparkDataSource extends SparkClientFunctionalTestHarn
assertEquals(expectedKeys, keys)
// Column Stats Index partition of MT
- val colStatsDF = spark.read.options(metadataOpts).format(hudi).load(s"$basePath/.hoodie/metadata/column_stats")
+ val colStatsDF = spark.read.format(hudi).load(s"$basePath/.hoodie/metadata/column_stats")
// Smoke test
colStatsDF.show()