You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by "liujinhui (Jira)" <ji...@apache.org> on 2020/08/19 11:08:00 UTC

[jira] [Commented] (HUDI-1200) CustomKeyGenerator does not work,java.lang.NullPointerException

    [ https://issues.apache.org/jira/browse/HUDI-1200?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17180455#comment-17180455 ] 

liujinhui commented on HUDI-1200:
---------------------------------

[~shivnarayan]  Thanks for the suggestion, I will modify it

> CustomKeyGenerator does not work,java.lang.NullPointerException
> ---------------------------------------------------------------
>
>                 Key: HUDI-1200
>                 URL: https://issues.apache.org/jira/browse/HUDI-1200
>             Project: Apache Hudi
>          Issue Type: Bug
>    Affects Versions: 0.6.0
>            Reporter: liujinhui
>            Assignee: liujinhui
>            Priority: Blocker
>              Labels: pull-request-available
>             Fix For: 0.6.0
>
>
>  
> {code:java}
> 2020-08-19 11:03:30 INFO Driver scheduler.DAGScheduler:57 - Job 7 failed: isEmpty at DeltaSync.java:377, took 16.357609 s
>  2020-08-19 11:03:30 ERROR Driver deltastreamer.HoodieDeltaStreamer:167 - Got error running delta sync once. Shutting down
>  org.apache.spark.SparkException: Job aborted due to stage failure: Task 22 in stage 7.0 failed 4 times, most recent failure: Lost task 22.3 in stage 7.0 (TID 102, prod-t3-data-lake-004, executor 7): java.lang.NullPointerException
>  at org.apache.hudi.keygen.SimpleKeyGenerator.<init>(SimpleKeyGenerator.java:35)
>  at org.apache.hudi.keygen.CustomKeyGenerator.getRecordKey(CustomKeyGenerator.java:128)
>  at org.apache.hudi.keygen.BuiltinKeyGenerator.getKey(BuiltinKeyGenerator.java:75)
>  at org.apache.hudi.utilities.deltastreamer.DeltaSync.lambda$readFromSource$9fce03f0$1(DeltaSync.java:353)
>  at org.apache.spark.api.java.JavaPairRDD$$anonfun$toScalaFunction$1.apply(JavaPairRDD.scala:1040)
>  at scala.collection.Iterator$$anon$11.next(Iterator.scala:410)
>  at scala.collection.Iterator$$anon$10.next(Iterator.scala:394)
>  at scala.collection.Iterator$class.foreach(Iterator.scala:891)
>  at scala.collection.AbstractIterator.foreach(Iterator.scala:1334)
>  at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:59)
>  at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:104)
>  at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:48)
>  at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:310)
>  at scala.collection.AbstractIterator.to(Iterator.scala:1334)
>  at scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:302)
>  at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1334)
>  at scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:289)
>  at scala.collection.AbstractIterator.toArray(Iterator.scala:1334)
>  at org.apache.spark.rdd.RDD$$anonfun$take$1$$anonfun$29.apply(RDD.scala:1364)
>  at org.apache.spark.rdd.RDD$$anonfun$take$1$$anonfun$29.apply(RDD.scala:1364)
>  at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2121)
>  at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2121)
>  at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>  at org.apache.spark.scheduler.Task.run(Task.scala:121)
>  at org.apache.spark.executor.Executor$TaskRunner$$anonfun$11.apply(Executor.scala:407)
>  at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1408)
>  at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:413)
>  at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
>  at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
>  at java.lang.Thread.run(Thread.java:748)
> Driver stacktrace:
>  at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1890)
>  at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1878)
>  at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1877)
>  at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
>  at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
>  at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1877)
>  at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:929)
>  at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:929)
>  at scala.Option.foreach(Option.scala:257)
>  at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:929)
>  at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2111)
>  at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2060)
>  at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2049)
>  at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
>  at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:740)
>  at org.apache.spark.SparkContext.runJob(SparkContext.scala:2081)
>  at org.apache.spark.SparkContext.runJob(SparkContext.scala:2102)
>  at org.apache.spark.SparkContext.runJob(SparkContext.scala:2121)
>  at org.apache.spark.rdd.RDD$$anonfun$take$1.apply(RDD.scala:1364)
>  at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
>  at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
>  at org.apache.spark.rdd.RDD.withScope(RDD.scala:363)
>  at org.apache.spark.rdd.RDD.take(RDD.scala:1337)
>  at org.apache.spark.rdd.RDD$$anonfun$isEmpty$1.apply$mcZ$sp(RDD.scala:1472)
>  at org.apache.spark.rdd.RDD$$anonfun$isEmpty$1.apply(RDD.scala:1472)
>  at org.apache.spark.rdd.RDD$$anonfun$isEmpty$1.apply(RDD.scala:1472)
>  at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
>  at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
>  at org.apache.spark.rdd.RDD.withScope(RDD.scala:363)
>  at org.apache.spark.rdd.RDD.isEmpty(RDD.scala:1471)
>  at org.apache.spark.api.java.JavaRDDLike$class.isEmpty(JavaRDDLike.scala:544)
>  at org.apache.spark.api.java.AbstractJavaRDDLike.isEmpty(JavaRDDLike.scala:45)
>  at org.apache.hudi.utilities.deltastreamer.DeltaSync.writeToSink(DeltaSync.java:377)
>  at org.apache.hudi.utilities.deltastreamer.DeltaSync.syncOnce(DeltaSync.java:244)
>  at org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer.lambda$sync$2(HoodieDeltaStreamer.java:161)
>  at org.apache.hudi.common.util.Option.ifPresent(Option.java:96)
>  at org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer.sync(HoodieDeltaStreamer.java:159)
>  at org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer.main(HoodieDeltaStreamer.java:464)
>  at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>  at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
>  at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>  at java.lang.reflect.Method.invoke(Method.java:498)
>  at org.apache.spark.deploy.yarn.ApplicationMaster$$anon$2.run(ApplicationMaster.scala:673)
>  Caused by: java.lang.NullPointerException
>  at org.apache.hudi.keygen.SimpleKeyGenerator.<init>(SimpleKeyGenerator.java:35)
>  at org.apache.hudi.keygen.CustomKeyGenerator.getRecordKey(CustomKeyGenerator.java:128)
>  at org.apache.hudi.keygen.BuiltinKeyGenerator.getKey(BuiltinKeyGenerator.java:75)
>  at org.apache.hudi.utilities.deltastreamer.DeltaSync.lambda$readFromSource$9fce03f0$1(DeltaSync.java:353)
>  at org.apache.spark.api.java.JavaPairRDD$$anonfun$toScalaFunction$1.apply(JavaPairRDD.scala:1040)
>  at scala.collection.Iterator$$anon$11.next(Iterator.scala:410)
>  at scala.collection.Iterator$$anon$10.next(Iterator.scala:394)
>  at scala.collection.Iterator$class.foreach(Iterator.scala:891)
>  at scala.collection.AbstractIterator.foreach(Iterator.scala:1334)
>  at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:59)
>  at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:104)
>  at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:48)
>  at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:310)
>  at scala.collection.AbstractIterator.to(Iterator.scala:1334)
>  at scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:302)
>  at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1334)
>  at scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:289)
>  at scala.collection.AbstractIterator.toArray(Iterator.scala:1334)
>  at org.apache.spark.rdd.RDD$$anonfun$take$1$$anonfun$29.apply(RDD.scala:1364)
>  at org.apache.spark.rdd.RDD$$anonfun$take$1$$anonfun$29.apply(RDD.scala:1364)
>  at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2121)
>  at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2121)
>  at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>  at org.apache.spark.scheduler.Task.run(Task.scala:121)
>  at org.apache.spark.executor.Executor$TaskRunner$$anonfun$11.apply(Executor.scala:407)
>  at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1408)
>  at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:413)
>  at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
>  at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
>  at java.lang.Thread.run(Thread.java:748)
>  2020-08-19 11:03:30 INFO Driver deltastreamer.HoodieDeltaStreamer:171 - Shut down delta streamer
>  2020-08-19 11:03:30 INFO Driver server.AbstractConnector:318 - Stopped Spark@6366e60a{HTTP/1.1,[http/1.1]}
> {0.0.0.0:0}
> 2020-08-19 11:03:30 INFO Driver ui.SparkUI:57 - Stopped Spark web UI at http://prod-t3-data-lake-004:33696
>  2020-08-19 11:03:30 INFO dispatcher-event-loop-16 yarn.YarnAllocator:57 - Driver requested a total number of 0 executor(s).
>  2020-08-19 11:03:30 INFO Driver cluster.YarnClusterSchedulerBackend:57 - Shutting down all executors
>  2020-08-19 11:03:30 INFO dispatcher-event-loop-2 cluster.YarnSchedulerBackend$YarnDriverEndpoint:57 - Asking each executor to shut down
>  2020-08-19 11:03:30 INFO Driver cluster.SchedulerExtensionServices:57 - Stopping SchedulerExtensionServices
>  (serviceOption=None,
>  services=List(),
>  started=false)
> {code}
>  



--
This message was sent by Atlassian Jira
(v8.3.4#803005)