You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by "Jian Feng (Jira)" <ji...@apache.org> on 2022/05/09 15:14:00 UTC
[jira] [Created] (HUDI-4066) HiveMetastoreBasedLockProvider can not release lock when writer fails
Jian Feng created HUDI-4066:
-------------------------------
Summary: HiveMetastoreBasedLockProvider can not release lock when writer fails
Key: HUDI-4066
URL: https://issues.apache.org/jira/browse/HUDI-4066
Project: Apache Hudi
Issue Type: Bug
Components: core
Affects Versions: 0.10.1
Reporter: Jian Feng
we use HiveMetastoreBasedLockProvider in the Prod environment, one writer is ingesting data with Flink, and another writer will delete some old partitions with Spark. sometimes spark job failed, but the lock was not released. then all writers failed.
{code:java}
// error log
22/04/01 08:12:18 INFO TransactionManager: Transaction starting without a transaction owner22/04/01 08:12:18 INFO LockManager: LockProvider org.apache.hudi.hive.HiveMetastoreBasedLockProvider22/04/01 08:12:19 INFO metastore: Trying to connect to metastore with URI thrift://10.128.152.245:908322/04/01 08:12:19 INFO metastore: Opened a connection to metastore, current connections: 122/04/01 08:12:19 INFO metastore: Connected to metastore.22/04/01 08:12:20 INFO HiveMetastoreBasedLockProvider: ACQUIRING lock at database dev_video and table dwd_traffic_log22/04/01 08:12:25 INFO TransactionManager: Transaction ending without a transaction owner22/04/01 08:12:25 INFO HiveMetastoreBasedLockProvider: RELEASING lock at database dev_video and table dwd_traffic_log22/04/01 08:12:25 INFO TransactionManager: Transaction ended without a transaction ownerException in thread "main" org.apache.hudi.exception.HoodieLockException: Unable to acquire lock, lock object at org.apache.hudi.client.transaction.lock.LockManager.lock(LockManager.java:71) at org.apache.hudi.client.transaction.TransactionManager.beginTransaction(TransactionManager.java:51) at org.apache.hudi.client.SparkRDDWriteClient.getTableAndInitCtx(SparkRDDWriteClient.java:430) at org.apache.hudi.client.SparkRDDWriteClient.deletePartitions(SparkRDDWriteClient.java:261) at org.apache.hudi.DataSourceUtils.doDeletePartitionsOperation(DataSourceUtils.java:234) at org.apache.hudi.HoodieSparkSqlWriter$.write(HoodieSparkSqlWriter.scala:217) at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:164) at org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:46) at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70) at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68) at org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:90) at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:180) at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:218) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:215) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:176) at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:132) at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:131) at org.apache.spark.sql.DataFrameWriter.$anonfun$runCommand$1(DataFrameWriter.scala:991) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103) at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64) at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:991) at org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:438) at org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:415) at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:293) at com.shopee.ci.hudi.tasks.ExpiredPartitionDelete$.$anonfun$main$2(ExpiredPartitionDelete.scala:82) at com.shopee.ci.hudi.tasks.ExpiredPartitionDelete$.$anonfun$main$2$adapted(ExpiredPartitionDelete.scala:65) at scala.collection.Iterator.foreach(Iterator.scala:941) at scala.collection.Iterator.foreach$(Iterator.scala:941) at scala.collection.AbstractIterator.foreach(Iterator.scala:1429) at scala.collection.IterableLike.foreach(IterableLike.scala:74) at scala.collection.IterableLike.foreach$(IterableLike.scala:73) at scala.collection.AbstractIterable.foreach(Iterable.scala:56) at com.shopee.ci.hudi.tasks.ExpiredPartitionDelete$.$anonfun$main$1(ExpiredPartitionDelete.scala:65) at com.shopee.ci.hudi.tasks.ExpiredPartitionDelete$.$anonfun$main$1$adapted(ExpiredPartitionDelete.scala:61) at scala.collection.Iterator.foreach(Iterator.scala:941) at scala.collection.Iterator.foreach$(Iterator.scala:941) at scala.collection.AbstractIterator.foreach(Iterator.scala:1429) at scala.collection.IterableLike.foreach(IterableLike.scala:74) at scala.collection.IterableLike.foreach$(IterableLike.scala:73) at scala.collection.AbstractIterable.foreach(Iterable.scala:56) at com.shopee.ci.hudi.tasks.ExpiredPartitionDelete$.main(ExpiredPartitionDelete.scala:61) at com.shopee.ci.hudi.tasks.ExpiredPartitionDelete.main(ExpiredPartitionDelete.scala) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52) at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:951) at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180) at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203) at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90) at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1039) at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1048) at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)Caused by: org.apache.hudi.exception.HoodieLockException: FAILED_TO_ACQUIRE lock at database dev_video and table dwd_traffic_log at org.apache.hudi.hive.HiveMetastoreBasedLockProvider.tryLock(HiveMetastoreBasedLockProvider.java:114) at org.apache.hudi.client.transaction.lock.LockManager.lock(LockManager.java:62) ... 57 moreCaused by: java.util.concurrent.ExecutionException: org.apache.thrift.TApplicationException: Internal error processing lock at java.util.concurrent.FutureTask.report(FutureTask.java:122) at java.util.concurrent.FutureTask.get(FutureTask.java:206) at org.apache.hudi.hive.HiveMetastoreBasedLockProvider.acquireLockInternal(HiveMetastoreBasedLockProvider.java:185) at org.apache.hudi.hive.HiveMetastoreBasedLockProvider.acquireLock(HiveMetastoreBasedLockProvider.java:139) at org.apache.hudi.hive.HiveMetastoreBasedLockProvider.tryLock(HiveMetastoreBasedLockProvider.java:112) ... 58 moreCaused by: org.apache.thrift.TApplicationException: Internal error processing lock at org.apache.thrift.TServiceClient.receiveBase(TServiceClient.java:79) at org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.recv_lock(ThriftHiveMetastore.java:4743) at org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.lock(ThriftHiveMetastore.java:4730) at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.lock(HiveMetaStoreClient.java:2174) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.invoke(RetryingMetaStoreClient.java:173) at com.sun.proxy.$Proxy45.lock(Unknown Source) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.hadoop.hive.metastore.HiveMetaStoreClient$SynchronizedHandler.invoke(HiveMetaStoreClient.java:2348) at com.sun.proxy.$Proxy45.lock(Unknown Source) at org.apache.hudi.hive.HiveMetastoreBasedLockProvider.lambda$acquireLockInternal$0(HiveMetastoreBasedLockProvider.java:184) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) {code}
--
This message was sent by Atlassian Jira
(v8.20.7#820007)