You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by "Raymond Xu (Jira)" <ji...@apache.org> on 2022/04/03 18:08:00 UTC

[jira] [Commented] (HUDI-3744) NoSuchMethodError of getReadStatistics with Spark 3.2/Hadoop 3.2 using HBase

    [ https://issues.apache.org/jira/browse/HUDI-3744?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17516563#comment-17516563 ] 

Raymond Xu commented on HUDI-3744:
----------------------------------

i can't reproduce this problem with latest master
- hudi-spark3.2-bundle_2.12-0.11.0-SNAPSHOT.jar
- spark-3.2.1-bin-hadoop3.2.tgz


{code:scala}
val tableName = "hudi_cow_pt_tbl"

spark.sql(
  s"""
    |create table $tableName (
    |  id bigint,
    |  name string,
    |  ts bigint,
    |  dt string
    |) using hudi
    |tblproperties (
    |  type = 'cow',
    |  primaryKey = 'id',
    |  preCombineField = 'ts'
    | )
    |partitioned by (dt)
    |location '/tmp/hudi/tbl';
    |""")

spark.sql(
  s"""
    |insert into $tableName partition (dt)
    |select 1 as id, 'a1' as name, 1000 as ts, '2021-12-09' as dt;
    |""")

spark.sql("show tables;").show(false)
spark.sql(s"show partitions $tableName;").show(false)
{code}


> NoSuchMethodError of getReadStatistics with Spark 3.2/Hadoop 3.2 using HBase 
> -----------------------------------------------------------------------------
>
>                 Key: HUDI-3744
>                 URL: https://issues.apache.org/jira/browse/HUDI-3744
>             Project: Apache Hudi
>          Issue Type: Bug
>            Reporter: Ethan Guo
>            Assignee: Raymond Xu
>            Priority: Blocker
>             Fix For: 0.11.0
>
>
> Environment: Hadoop 3.2.1 & Spark-3.2.1 
> hudi  compile from commit f2a93ead3b5a6964a72b3543ada58aa334edef9c 
> just use spark-sql and default job configuration to execute "show partitions [hudi_table_name];"
> {code:java}
> // command
> spark-sql  --conf spark.serializer=org.apache.spark.serializer.KryoSerializer --conf spark.sql.extensions=org.apache.spark.sql.hudi.HoodieSparkSessionExtension --conf spark.sql.catalog.spark_catalog=org.apache.spark.sql.hudi.catalog.HoodieCatalog
> // spark-sql
> spark-sql> show partitions hudi_partition_table;
> {code}
> // code placeholderjava.lang.NoSuchMethodError: org.apache.hadoop.hdfs.client.HdfsDataInputStream.getReadStatistics()Lorg/apache/hadoop/hdfs/DFSInputStream$ReadStatistics;
>     at org.apache.hudi.org.apache.hadoop.hbase.io.FSDataInputStreamWrapper.updateInputStreamStatistics(FSDataInputStreamWrapper.java:249)
>     at org.apache.hudi.org.apache.hadoop.hbase.io.FSDataInputStreamWrapper.close(FSDataInputStreamWrapper.java:296)
>     at org.apache.hudi.org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl.closeStreams(HFileBlock.java:1825)
>     at org.apache.hudi.org.apache.hadoop.hbase.io.hfile.HFilePreadReader.close(HFilePreadReader.java:107)
>     at org.apache.hudi.org.apache.hadoop.hbase.io.hfile.HFileReaderImpl.close(HFileReaderImpl.java:1421)
>     at org.apache.hudi.io.storage.HoodieHFileReader.close(HoodieHFileReader.java:423)
>     at org.apache.hudi.metadata.HoodieBackedTableMetadata.close(HoodieBackedTableMetadata.java:435)
>     at org.apache.hudi.metadata.HoodieBackedTableMetadata.lambda$getRecordsByKeys$0(HoodieBackedTableMetadata.java:162)
>     at java.util.HashMap.forEach(HashMap.java:1290)
>     at org.apache.hudi.metadata.HoodieBackedTableMetadata.getRecordsByKeys(HoodieBackedTableMetadata.java:138)
>     at org.apache.hudi.metadata.HoodieBackedTableMetadata.getRecordByKey(HoodieBackedTableMetadata.java:128)
>     at org.apache.hudi.metadata.BaseTableMetadata.fetchAllPartitionPaths(BaseTableMetadata.java:281)
>     at org.apache.hudi.metadata.BaseTableMetadata.getAllPartitionPaths(BaseTableMetadata.java:111)
>     at org.apache.hudi.common.fs.FSUtils.getAllPartitionPaths(FSUtils.java:308)
>     at org.apache.spark.sql.hudi.HoodieSqlCommonUtils$.getAllPartitionPaths(HoodieSqlCommonUtils.scala:81)
>     at org.apache.spark.sql.catalyst.catalog.HoodieCatalogTable.getPartitionPaths(HoodieCatalogTable.scala:157)
>     at org.apache.spark.sql.hudi.command.ShowHoodieTablePartitionsCommand.run(ShowHoodieTablePartitionsCommand.scala:51)
>     at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:75)
>     at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:73)
>     at org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:84)
>     at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:110)
>     at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
>     at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
>     at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
>     at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
>     at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
>     at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:110)
>     at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:106)



--
This message was sent by Atlassian Jira
(v8.20.1#820001)