You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by "Raymond Xu (Jira)" <ji...@apache.org> on 2022/04/03 18:08:00 UTC
[jira] [Commented] (HUDI-3744) NoSuchMethodError of getReadStatistics with Spark 3.2/Hadoop 3.2 using HBase
[ https://issues.apache.org/jira/browse/HUDI-3744?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17516563#comment-17516563 ]
Raymond Xu commented on HUDI-3744:
----------------------------------
i can't reproduce this problem with latest master
- hudi-spark3.2-bundle_2.12-0.11.0-SNAPSHOT.jar
- spark-3.2.1-bin-hadoop3.2.tgz
{code:scala}
val tableName = "hudi_cow_pt_tbl"
spark.sql(
s"""
|create table $tableName (
| id bigint,
| name string,
| ts bigint,
| dt string
|) using hudi
|tblproperties (
| type = 'cow',
| primaryKey = 'id',
| preCombineField = 'ts'
| )
|partitioned by (dt)
|location '/tmp/hudi/tbl';
|""")
spark.sql(
s"""
|insert into $tableName partition (dt)
|select 1 as id, 'a1' as name, 1000 as ts, '2021-12-09' as dt;
|""")
spark.sql("show tables;").show(false)
spark.sql(s"show partitions $tableName;").show(false)
{code}
> NoSuchMethodError of getReadStatistics with Spark 3.2/Hadoop 3.2 using HBase
> -----------------------------------------------------------------------------
>
> Key: HUDI-3744
> URL: https://issues.apache.org/jira/browse/HUDI-3744
> Project: Apache Hudi
> Issue Type: Bug
> Reporter: Ethan Guo
> Assignee: Raymond Xu
> Priority: Blocker
> Fix For: 0.11.0
>
>
> Environment: Hadoop 3.2.1 & Spark-3.2.1
> hudi compile from commit f2a93ead3b5a6964a72b3543ada58aa334edef9c
> just use spark-sql and default job configuration to execute "show partitions [hudi_table_name];"
> {code:java}
> // command
> spark-sql --conf spark.serializer=org.apache.spark.serializer.KryoSerializer --conf spark.sql.extensions=org.apache.spark.sql.hudi.HoodieSparkSessionExtension --conf spark.sql.catalog.spark_catalog=org.apache.spark.sql.hudi.catalog.HoodieCatalog
> // spark-sql
> spark-sql> show partitions hudi_partition_table;
> {code}
> // code placeholderjava.lang.NoSuchMethodError: org.apache.hadoop.hdfs.client.HdfsDataInputStream.getReadStatistics()Lorg/apache/hadoop/hdfs/DFSInputStream$ReadStatistics;
> at org.apache.hudi.org.apache.hadoop.hbase.io.FSDataInputStreamWrapper.updateInputStreamStatistics(FSDataInputStreamWrapper.java:249)
> at org.apache.hudi.org.apache.hadoop.hbase.io.FSDataInputStreamWrapper.close(FSDataInputStreamWrapper.java:296)
> at org.apache.hudi.org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl.closeStreams(HFileBlock.java:1825)
> at org.apache.hudi.org.apache.hadoop.hbase.io.hfile.HFilePreadReader.close(HFilePreadReader.java:107)
> at org.apache.hudi.org.apache.hadoop.hbase.io.hfile.HFileReaderImpl.close(HFileReaderImpl.java:1421)
> at org.apache.hudi.io.storage.HoodieHFileReader.close(HoodieHFileReader.java:423)
> at org.apache.hudi.metadata.HoodieBackedTableMetadata.close(HoodieBackedTableMetadata.java:435)
> at org.apache.hudi.metadata.HoodieBackedTableMetadata.lambda$getRecordsByKeys$0(HoodieBackedTableMetadata.java:162)
> at java.util.HashMap.forEach(HashMap.java:1290)
> at org.apache.hudi.metadata.HoodieBackedTableMetadata.getRecordsByKeys(HoodieBackedTableMetadata.java:138)
> at org.apache.hudi.metadata.HoodieBackedTableMetadata.getRecordByKey(HoodieBackedTableMetadata.java:128)
> at org.apache.hudi.metadata.BaseTableMetadata.fetchAllPartitionPaths(BaseTableMetadata.java:281)
> at org.apache.hudi.metadata.BaseTableMetadata.getAllPartitionPaths(BaseTableMetadata.java:111)
> at org.apache.hudi.common.fs.FSUtils.getAllPartitionPaths(FSUtils.java:308)
> at org.apache.spark.sql.hudi.HoodieSqlCommonUtils$.getAllPartitionPaths(HoodieSqlCommonUtils.scala:81)
> at org.apache.spark.sql.catalyst.catalog.HoodieCatalogTable.getPartitionPaths(HoodieCatalogTable.scala:157)
> at org.apache.spark.sql.hudi.command.ShowHoodieTablePartitionsCommand.run(ShowHoodieTablePartitionsCommand.scala:51)
> at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:75)
> at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:73)
> at org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:84)
> at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:110)
> at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
> at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
> at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
> at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
> at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
> at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:110)
> at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:106)
--
This message was sent by Atlassian Jira
(v8.20.1#820001)