You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by "sivabalan narayanan (Jira)" <ji...@apache.org> on 2022/06/07 21:04:00 UTC

[jira] [Assigned] (HUDI-4204) OCC with row writer path fails

     [ https://issues.apache.org/jira/browse/HUDI-4204?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

sivabalan narayanan reassigned HUDI-4204:
-----------------------------------------

    Assignee: sivabalan narayanan

> OCC with row writer path fails
> ------------------------------
>
>                 Key: HUDI-4204
>                 URL: https://issues.apache.org/jira/browse/HUDI-4204
>             Project: Apache Hudi
>          Issue Type: Bug
>          Components: multi-writer
>            Reporter: sivabalan narayanan
>            Assignee: sivabalan narayanan
>            Priority: Major
>             Fix For: 0.11.0
>
>
> Ref: [https://github.com/apache/hudi/issues/5751]
>  
> {code:java}
> Caused by: org.apache.spark.SparkException: Writing job aborted
>         at org.apache.spark.sql.errors.QueryExecutionErrors$.writingJobAbortedError(QueryExecutionErrors.scala:613)
>         at org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec.writeWithV2(WriteToDataSourceV2Exec.scala:386)
>         at org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec.writeWithV2$(WriteToDataSourceV2Exec.scala:330)
>         at org.apache.spark.sql.execution.datasources.v2.AppendDataExec.writeWithV2(WriteToDataSourceV2Exec.scala:236)
>         at org.apache.spark.sql.execution.datasources.v2.V2ExistingTableWriteExec.run(WriteToDataSourceV2Exec.scala:309)
>         at org.apache.spark.sql.execution.datasources.v2.V2ExistingTableWriteExec.run$(WriteToDataSourceV2Exec.scala:308)
>         at org.apache.spark.sql.execution.datasources.v2.AppendDataExec.run(WriteToDataSourceV2Exec.scala:236)
>         at org.apache.spark.sql.execution.datasources.v2.V2CommandExec.result$lzycompute(V2CommandExec.scala:43)
>         at org.apache.spark.sql.execution.datasources.v2.V2CommandExec.result(V2CommandExec.scala:43)
>         at org.apache.spark.sql.execution.datasources.v2.V2CommandExec.executeCollect(V2CommandExec.scala:49)
>         at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:110)
>         at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
>         at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
>         at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
>         at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
>         at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
>         at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:110)
>         at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:106)
>         at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:481)
>         at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:82)
>         at org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:481)
>         at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:30)
>         at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
>         at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
>         at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
>         at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
>         at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:457)
>         at org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:106)
>         at org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:93)
>         at org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:91)
>         at org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:128)
>         at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:848)
>         at org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:311)
>         at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:247)
>         at org.apache.hudi.HoodieSparkSqlWriter$.bulkInsertAsRow(HoodieSparkSqlWriter.scala:559)
>         at org.apache.hudi.HoodieSparkSqlWriter$.write(HoodieSparkSqlWriter.scala:176)
>         at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:163)
>         at org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:45)
>         at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:75)
>         at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:73)
>         at org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:84)
>         at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:110)
>         at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
>         at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
>         at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
>         at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
>         at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
>         at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:110)
>         at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:106)
>         at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:481)
>         at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:82)
>         at org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:481)
>         at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:30)
>         at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
>         at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
>         at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
>         at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
>         at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:457)
>         at org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:106)
>         at org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:93)
>         at org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:91)
>         at org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:128)
>         at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:848)
>         at org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:382)
>         at org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:303)
>         at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:239)
>         ... 11 more
> Caused by: org.apache.hudi.exception.HoodieException
>         at org.apache.hudi.internal.DataSourceInternalWriterHelper.commit(DataSourceInternalWriterHelper.java:86)
>         at org.apache.hudi.spark3.internal.HoodieDataSourceInternalBatchWrite.commit(HoodieDataSourceInternalBatchWrite.java:93)
>         at org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec.writeWithV2(WriteToDataSourceV2Exec.scala:369)
>         ... 78 more
> Caused by: java.lang.NullPointerException
> org.apache.hudi.client.utils.TransactionUtils.lambda$getCompletedInstantsDuringCurrentWriteOperation$1(TransactionUtils.java:159)
>         at java.util.stream.ReferencePipeline$2$1.accept(ReferencePipeline.java:174)
>         at java.util.ArrayList$ArrayListSpliterator.forEachRemaining(ArrayList.java:1384)
>         at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:482)
>         at java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:472)
>         at java.util.stream.StreamSpliterators$WrappingSpliterator.forEachRemaining(StreamSpliterators.java:313)
>         at java.util.stream.Streams$ConcatSpliterator.forEachRemaining(Streams.java:743)
>         at java.util.stream.ReferencePipeline$Head.forEach(ReferencePipeline.java:647)
>         at org.apache.hudi.client.utils.TransactionUtils.resolveWriteConflictIfAny(TransactionUtils.java:79)
>         at org.apache.hudi.client.SparkRDDWriteClient.preCommit(SparkRDDWriteClient.java:475)
>         at org.apache.hudi.client.BaseHoodieWriteClient.commitStats(BaseHoodieWriteClient.java:233)
>         at org.apache.hudi.client.BaseHoodieWriteClient.commitStats(BaseHoodieWriteClient.java:214)
>         at org.apache.hudi.internal.DataSourceInternalWriterHelper.commit(DataSourceInternalWriterHelper.java:83)
>         ... 80 more
>  {code}



--
This message was sent by Atlassian Jira
(v8.20.7#820007)