You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by "Alexey Kudinkin (Jira)" <ji...@apache.org> on 2023/01/31 02:04:00 UTC

[jira] [Updated] (HUDI-5656) Metadata Bootstrap flow resulting in NPE

     [ https://issues.apache.org/jira/browse/HUDI-5656?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Alexey Kudinkin updated HUDI-5656:
----------------------------------
    Sprint: 0.13.0 Final Sprint 3

> Metadata Bootstrap flow resulting in NPE
> ----------------------------------------
>
>                 Key: HUDI-5656
>                 URL: https://issues.apache.org/jira/browse/HUDI-5656
>             Project: Apache Hudi
>          Issue Type: Bug
>          Components: bootstrap
>    Affects Versions: 0.13.0
>            Reporter: Alexey Kudinkin
>            Assignee: Alexey Kudinkin
>            Priority: Blocker
>
> After adding a simple statement forcing the test to read whole bootstrapped table:
> {code:java}
> sqlContext.sql("select * from bootstrapped").show(); {code}
>  
> Following NPE have been observed on master (testBulkInsertsAndUpsertsWithBootstrap):
> {code:java}
> org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 183.0 failed 1 times, most recent failure: Lost task 0.0 in stage 183.0 (TID 971, localhost, executor driver): java.lang.NullPointerException
>     at org.apache.spark.sql.catalyst.expressions.codegen.UnsafeWriter.write(UnsafeWriter.java:109)
>     at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificUnsafeProjection.writeFields_0_1$(Unknown Source)
>     at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificUnsafeProjection.apply(Unknown Source)
>     at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificUnsafeProjection.apply(Unknown Source)
>     at scala.collection.Iterator$$anon$10.next(Iterator.scala:448)
>     at org.apache.spark.sql.execution.SparkPlan.$anonfun$getByteArrayRdd$1(SparkPlan.scala:256)
>     at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:836)
>     at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:836)
>     at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
>     at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
>     at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)
>     at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
>     at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
>     at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)
>     at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>     at org.apache.spark.scheduler.Task.run(Task.scala:123)
>     at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:411)
>     at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
>     at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
>     at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
>     at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
>     at java.lang.Thread.run(Thread.java:748)Driver stacktrace:    at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:1889)
>     at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:1877)
>     at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:1876)
>     at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:59)
>     at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:52)
>     at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
>     at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1876)
>     at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:926)
>     at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:926)
>     at scala.Option.foreach(Option.scala:257)
>     at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:926)
>     at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2110)
>     at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2059)
>     at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2048)
>     at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
>     at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:737)
>     at org.apache.spark.SparkContext.runJob(SparkContext.scala:2061)
>     at org.apache.spark.SparkContext.runJob(SparkContext.scala:2082)
>     at org.apache.spark.SparkContext.runJob(SparkContext.scala:2101)
>     at org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:365)
>     at org.apache.spark.sql.execution.CollectLimitExec.executeCollect(limit.scala:38)
>     at org.apache.spark.sql.Dataset.collectFromPlan(Dataset.scala:3389)
>     at org.apache.spark.sql.Dataset.$anonfun$head$1(Dataset.scala:2550)
>     at org.apache.spark.sql.Dataset.$anonfun$withAction$2(Dataset.scala:3370)
>     at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:78)
>     at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:125)
>     at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:73)
>     at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3370)
>     at org.apache.spark.sql.Dataset.head(Dataset.scala:2550)
>     at org.apache.spark.sql.Dataset.take(Dataset.scala:2764)
>     at org.apache.spark.sql.Dataset.getRows(Dataset.scala:254)
>     at org.apache.spark.sql.Dataset.showString(Dataset.scala:291)
>     at org.apache.spark.sql.Dataset.show(Dataset.scala:751)
>     at org.apache.spark.sql.Dataset.show(Dataset.scala:710)
>     at org.apache.spark.sql.Dataset.show(Dataset.scala:719)
>     at org.apache.hudi.utilities.deltastreamer.TestHoodieDeltaStreamer.testBulkInsertsAndUpsertsWithBootstrap(TestHoodieDeltaStreamer.java:669)
>     at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>     at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
>     at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>     at java.lang.reflect.Method.invoke(Method.java:498)
>     at org.junit.platform.commons.util.ReflectionUtils.invokeMethod(ReflectionUtils.java:688)
>     at org.junit.jupiter.engine.execution.MethodInvocation.proceed(MethodInvocation.java:60)
>     at org.junit.jupiter.engine.execution.InvocationInterceptorChain$ValidatingInvocation.proceed(InvocationInterceptorChain.java:131)
>     at org.junit.jupiter.engine.extension.TimeoutExtension.intercept(TimeoutExtension.java:149)
>     at org.junit.jupiter.engine.extension.TimeoutExtension.interceptTestableMethod(TimeoutExtension.java:140)
>     at org.junit.jupiter.engine.extension.TimeoutExtension.interceptTestTemplateMethod(TimeoutExtension.java:92)
>     at org.junit.jupiter.engine.execution.ExecutableInvoker$ReflectiveInterceptorCall.lambda$ofVoidMethod$0(ExecutableInvoker.java:115)
>     at org.junit.jupiter.engine.execution.ExecutableInvoker.lambda$invoke$0(ExecutableInvoker.java:105)
>     at org.junit.jupiter.engine.execution.InvocationInterceptorChain$InterceptedInvocation.proceed(InvocationInterceptorChain.java:106)
>     at org.junit.jupiter.engine.execution.InvocationInterceptorChain.proceed(InvocationInterceptorChain.java:64)
>     at org.junit.jupiter.engine.execution.InvocationInterceptorChain.chainAndInvoke(InvocationInterceptorChain.java:45)
>     at org.junit.jupiter.engine.execution.InvocationInterceptorChain.invoke(InvocationInterceptorChain.java:37)
>     at org.junit.jupiter.engine.execution.ExecutableInvoker.invoke(ExecutableInvoker.java:104)
>     at org.junit.jupiter.engine.execution.ExecutableInvoker.invoke(ExecutableInvoker.java:98)
>     at org.junit.jupiter.engine.descriptor.TestMethodTestDescriptor.lambda$invokeTestMethod$6(TestMethodTestDescriptor.java:210)
>     at org.junit.platform.engine.support.hierarchical.ThrowableCollector.execute(ThrowableCollector.java:73)
>     at org.junit.jupiter.engine.descriptor.TestMethodTestDescriptor.invokeTestMethod(TestMethodTestDescriptor.java:206)
>     at org.junit.jupiter.engine.descriptor.TestMethodTestDescriptor.execute(TestMethodTestDescriptor.java:131)
>     at org.junit.jupiter.engine.descriptor.TestMethodTestDescriptor.execute(TestMethodTestDescriptor.java:65)
>     at org.junit.platform.engine.support.hierarchical.NodeTestTask.lambda$executeRecursively$5(NodeTestTask.java:139)
>     at org.junit.platform.engine.support.hierarchical.ThrowableCollector.execute(ThrowableCollector.java:73)
>     at org.junit.platform.engine.support.hierarchical.NodeTestTask.lambda$executeRecursively$7(NodeTestTask.java:129)
>     at org.junit.platform.engine.support.hierarchical.Node.around(Node.java:137)
>     at org.junit.platform.engine.support.hierarchical.NodeTestTask.lambda$executeRecursively$8(NodeTestTask.java:127)
>     at org.junit.platform.engine.support.hierarchical.ThrowableCollector.execute(ThrowableCollector.java:73)
>     at org.junit.platform.engine.support.hierarchical.NodeTestTask.executeRecursively(NodeTestTask.java:126)
>     at org.junit.platform.engine.support.hierarchical.NodeTestTask.execute(NodeTestTask.java:84)
>     at org.junit.platform.engine.support.hierarchical.SameThreadHierarchicalTestExecutorService.submit(SameThreadHierarchicalTestExecutorService.java:32)
>     at org.junit.platform.engine.support.hierarchical.NodeTestTask$DefaultDynamicTestExecutor.execute(NodeTestTask.java:212)
>     at org.junit.platform.engine.support.hierarchical.NodeTestTask$DefaultDynamicTestExecutor.execute(NodeTestTask.java:192)
>     at org.junit.jupiter.engine.descriptor.TestTemplateTestDescriptor.execute(TestTemplateTestDescriptor.java:139)
>     at org.junit.jupiter.engine.descriptor.TestTemplateTestDescriptor.lambda$execute$2(TestTemplateTestDescriptor.java:107)
>     at java.util.stream.ForEachOps$ForEachOp$OfRef.accept(ForEachOps.java:183)
>     at java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193)
>     at java.util.stream.ReferencePipeline$2$1.accept(ReferencePipeline.java:175)
>     at java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193)
>     at java.util.stream.ForEachOps$ForEachOp$OfRef.accept(ForEachOps.java:183)
>     at java.util.stream.ReferencePipeline$11$1.accept(ReferencePipeline.java:440)
>     at java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193)
>     at java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193)
>     at java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193)
>     at java.util.stream.ForEachOps$ForEachOp$OfRef.accept(ForEachOps.java:183)
>     at java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193)
>     at java.util.Iterator.forEachRemaining(Iterator.java:116)
>     at java.util.Spliterators$IteratorSpliterator.forEachRemaining(Spliterators.java:1801)
>     at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:482)
>     at java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:472)
>     at java.util.stream.ForEachOps$ForEachOp.evaluateSequential(ForEachOps.java:150)
>     at java.util.stream.ForEachOps$ForEachOp$OfRef.evaluateSequential(ForEachOps.java:173)
>     at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234)
>     at java.util.stream.ReferencePipeline.forEach(ReferencePipeline.java:485)
>     at java.util.stream.ReferencePipeline$7$1.accept(ReferencePipeline.java:272)
>     at java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193)
>     at java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193)
>     at java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193)
>     at java.util.ArrayList$ArrayListSpliterator.forEachRemaining(ArrayList.java:1384)
>     at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:482)
>     at java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:472)
>     at java.util.stream.ForEachOps$ForEachOp.evaluateSequential(ForEachOps.java:150)
>     at java.util.stream.ForEachOps$ForEachOp$OfRef.evaluateSequential(ForEachOps.java:173)
>     at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234)
>     at java.util.stream.ReferencePipeline.forEach(ReferencePipeline.java:485)
>     at java.util.stream.ReferencePipeline$7$1.accept(ReferencePipeline.java:272)
>     at java.util.ArrayList$ArrayListSpliterator.forEachRemaining(ArrayList.java:1384)
>     at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:482)
>     at java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:472)
>     at java.util.stream.ForEachOps$ForEachOp.evaluateSequential(ForEachOps.java:150)
>     at java.util.stream.ForEachOps$ForEachOp$OfRef.evaluateSequential(ForEachOps.java:173)
>     at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234)
>     at java.util.stream.ReferencePipeline.forEach(ReferencePipeline.java:485)
>     at org.junit.jupiter.engine.descriptor.TestTemplateTestDescriptor.execute(TestTemplateTestDescriptor.java:107)
>     at org.junit.jupiter.engine.descriptor.TestTemplateTestDescriptor.execute(TestTemplateTestDescriptor.java:42)
>     at org.junit.platform.engine.support.hierarchical.NodeTestTask.lambda$executeRecursively$5(NodeTestTask.java:139)
>     at org.junit.platform.engine.support.hierarchical.ThrowableCollector.execute(ThrowableCollector.java:73)
>     at org.junit.platform.engine.support.hierarchical.NodeTestTask.lambda$executeRecursively$7(NodeTestTask.java:129)
>     at org.junit.platform.engine.support.hierarchical.Node.around(Node.java:137)
>     at org.junit.platform.engine.support.hierarchical.NodeTestTask.lambda$executeRecursively$8(NodeTestTask.java:127)
>     at org.junit.platform.engine.support.hierarchical.ThrowableCollector.execute(ThrowableCollector.java:73)
>     at org.junit.platform.engine.support.hierarchical.NodeTestTask.executeRecursively(NodeTestTask.java:126)
>     at org.junit.platform.engine.support.hierarchical.NodeTestTask.execute(NodeTestTask.java:84)
>     at java.util.ArrayList.forEach(ArrayList.java:1259)
>     at org.junit.platform.engine.support.hierarchical.SameThreadHierarchicalTestExecutorService.invokeAll(SameThreadHierarchicalTestExecutorService.java:38)
>     at org.junit.platform.engine.support.hierarchical.NodeTestTask.lambda$executeRecursively$5(NodeTestTask.java:143)
>     at org.junit.platform.engine.support.hierarchical.ThrowableCollector.execute(ThrowableCollector.java:73)
>     at org.junit.platform.engine.support.hierarchical.NodeTestTask.lambda$executeRecursively$7(NodeTestTask.java:129)
>     at org.junit.platform.engine.support.hierarchical.Node.around(Node.java:137)
>     at org.junit.platform.engine.support.hierarchical.NodeTestTask.lambda$executeRecursively$8(NodeTestTask.java:127)
>     at org.junit.platform.engine.support.hierarchical.ThrowableCollector.execute(ThrowableCollector.java:73)
>     at org.junit.platform.engine.support.hierarchical.NodeTestTask.executeRecursively(NodeTestTask.java:126)
>     at org.junit.platform.engine.support.hierarchical.NodeTestTask.execute(NodeTestTask.java:84)
>     at java.util.ArrayList.forEach(ArrayList.java:1259)
>     at org.junit.platform.engine.support.hierarchical.SameThreadHierarchicalTestExecutorService.invokeAll(SameThreadHierarchicalTestExecutorService.java:38)
>     at org.junit.platform.engine.support.hierarchical.NodeTestTask.lambda$executeRecursively$5(NodeTestTask.java:143)
>     at org.junit.platform.engine.support.hierarchical.ThrowableCollector.execute(ThrowableCollector.java:73)
>     at org.junit.platform.engine.support.hierarchical.NodeTestTask.lambda$executeRecursively$7(NodeTestTask.java:129)
>     at org.junit.platform.engine.support.hierarchical.Node.around(Node.java:137)
>     at org.junit.platform.engine.support.hierarchical.NodeTestTask.lambda$executeRecursively$8(NodeTestTask.java:127)
>     at org.junit.platform.engine.support.hierarchical.ThrowableCollector.execute(ThrowableCollector.java:73)
>     at org.junit.platform.engine.support.hierarchical.NodeTestTask.executeRecursively(NodeTestTask.java:126)
>     at org.junit.platform.engine.support.hierarchical.NodeTestTask.execute(NodeTestTask.java:84)
>     at org.junit.platform.engine.support.hierarchical.SameThreadHierarchicalTestExecutorService.submit(SameThreadHierarchicalTestExecutorService.java:32)
>     at org.junit.platform.engine.support.hierarchical.HierarchicalTestExecutor.execute(HierarchicalTestExecutor.java:57)
>     at org.junit.platform.engine.support.hierarchical.HierarchicalTestEngine.execute(HierarchicalTestEngine.java:51)
>     at org.junit.platform.launcher.core.EngineExecutionOrchestrator.execute(EngineExecutionOrchestrator.java:108)
>     at org.junit.platform.launcher.core.EngineExecutionOrchestrator.execute(EngineExecutionOrchestrator.java:88)
>     at org.junit.platform.launcher.core.EngineExecutionOrchestrator.lambda$execute$0(EngineExecutionOrchestrator.java:54)
>     at org.junit.platform.launcher.core.EngineExecutionOrchestrator.withInterceptedStreams(EngineExecutionOrchestrator.java:67)
>     at org.junit.platform.launcher.core.EngineExecutionOrchestrator.execute(EngineExecutionOrchestrator.java:52)
>     at org.junit.platform.launcher.core.DefaultLauncher.execute(DefaultLauncher.java:96)
>     at org.junit.platform.launcher.core.DefaultLauncher.execute(DefaultLauncher.java:75)
>     at com.intellij.junit5.JUnit5IdeaTestRunner.startRunnerWithArgs(JUnit5IdeaTestRunner.java:57)
>     at com.intellij.rt.junit.IdeaTestRunner$Repeater$1.execute(IdeaTestRunner.java:38)
>     at com.intellij.rt.execution.junit.TestsRepeater.repeat(TestsRepeater.java:11)
>     at com.intellij.rt.junit.IdeaTestRunner$Repeater.startRunnerWithArgs(IdeaTestRunner.java:35)
>     at com.intellij.rt.junit.JUnitStarter.prepareStreamsAndStart(JUnitStarter.java:235)
>     at com.intellij.rt.junit.JUnitStarter.main(JUnitStarter.java:54)
> Caused by: java.lang.NullPointerException
>     at org.apache.spark.sql.catalyst.expressions.codegen.UnsafeWriter.write(UnsafeWriter.java:109)
>     at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificUnsafeProjection.writeFields_0_1$(Unknown Source)
>     at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificUnsafeProjection.apply(Unknown Source)
>     at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificUnsafeProjection.apply(Unknown Source)
>     at scala.collection.Iterator$$anon$10.next(Iterator.scala:448)
>     at org.apache.spark.sql.execution.SparkPlan.$anonfun$getByteArrayRdd$1(SparkPlan.scala:256)
>     at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:836)
>     at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:836)
>     at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
>     at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
>     at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)
>     at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
>     at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
>     at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)
>     at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>     at org.apache.spark.scheduler.Task.run(Task.scala:123)
>     at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:411)
>     at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
>     at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
>     at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
>     at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
>     at java.lang.Thread.run(Thread.java:748) {code}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)