You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by "sivabalan narayanan (Jira)" <ji...@apache.org> on 2022/01/27 16:39:00 UTC
[jira] [Commented] (HUDI-3335) Loading Hudi table fails with NullPointerException
[ https://issues.apache.org/jira/browse/HUDI-3335?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17483261#comment-17483261 ]
sivabalan narayanan commented on HUDI-3335:
-------------------------------------------
Can you furnish more info for us to triage.
hoodie write configs used.
hive sync configs used.
contents of .hoodie
and contents of .hoodie/metadata/.hoodie
> Loading Hudi table fails with NullPointerException
> --------------------------------------------------
>
> Key: HUDI-3335
> URL: https://issues.apache.org/jira/browse/HUDI-3335
> Project: Apache Hudi
> Issue Type: Bug
> Affects Versions: 0.10.1
> Reporter: Harsha Teja Kanna
> Priority: Critical
>
> Have a COW table with metadata enabled. Loading from Spark query fails with java.lang.NullPointerException
> *Environment*
> Spark 3.1.2
> Hudi 0.10.1
> *Query*
> import org.apache.hudi.DataSourceReadOptions
> import org.apache.hudi.common.config.HoodieMetadataConfig
> val basePath = "s3a://datalake-hudi/v1"
> val df = spark.
> read.
> format("org.apache.hudi").
> option(HoodieMetadataConfig.ENABLE.key(), "true").
> option(DataSourceReadOptions.QUERY_TYPE.key(), DataSourceReadOptions.QUERY_TYPE_SNAPSHOT_OPT_VAL).
> load(s"${basePath}/sessions/")
> df.createOrReplaceTempView(table)
> *Passing an individual partition works though*
> val df = spark.
> read.
> format("org.apache.hudi").
> option(HoodieMetadataConfig.ENABLE.key(), "true").
> option(DataSourceReadOptions.QUERY_TYPE.key(), DataSourceReadOptions.QUERY_TYPE_SNAPSHOT_OPT_VAL).
> load(s"${basePath}/sessions/date=2022/01/25")
> df.createOrReplaceTempView(table)
> *Stacktrace:*
> at org.sparkproject.guava.base.Preconditions.checkNotNull(Preconditions.java:191)
> at org.sparkproject.guava.cache.LocalCache.put(LocalCache.java:4210)
> at org.sparkproject.guava.cache.LocalCache$LocalManualCache.put(LocalCache.java:4804)
> at org.apache.spark.sql.execution.datasources.SharedInMemoryCache$$anon$3.putLeafFiles(FileStatusCache.scala:161)
> at org.apache.hudi.HoodieFileIndex.$anonfun$loadPartitionPathFiles$4(HoodieFileIndex.scala:631)
> at org.apache.hudi.HoodieFileIndex.$anonfun$loadPartitionPathFiles$4$adapted(HoodieFileIndex.scala:629)
> at scala.collection.immutable.HashMap$HashMap1.foreach(HashMap.scala:234)
> at scala.collection.immutable.HashMap$HashTrieMap.foreach(HashMap.scala:468)
> at scala.collection.immutable.HashMap$HashTrieMap.foreach(HashMap.scala:468)
> at org.apache.hudi.HoodieFileIndex.loadPartitionPathFiles(HoodieFileIndex.scala:629)
> at org.apache.hudi.HoodieFileIndex.refresh0(HoodieFileIndex.scala:387)
> at org.apache.hudi.HoodieFileIndex.<init>(HoodieFileIndex.scala:184)
> at org.apache.hudi.DefaultSource.getBaseFileOnlyView(DefaultSource.scala:199)
> at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:119)
> at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:69)
> at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:355)
> at org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:325)
> at org.apache.spark.sql.DataFrameReader.$anonfun$load$3(DataFrameReader.scala:307)
> at scala.Option.getOrElse(Option.scala:189)
> at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:307)
> at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:239)
> at $anonfun$res3$1(<console>:46)
> at $anonfun$res3$1$adapted(<console>:40)
> at scala.collection.Iterator.foreach(Iterator.scala:941)
> at scala.collection.Iterator.foreach$(Iterator.scala:941)
> at scala.collection.AbstractIterator.foreach(Iterator.scala:1429)
> at scala.collection.IterableLike.foreach(IterableLike.scala:74)
> at scala.collection.IterableLike.foreach$(IterableLike.scala:73)
> at scala.collection.AbstractIterable.foreach(Iterable.scala:56)
--
This message was sent by Atlassian Jira
(v8.20.1#820001)