You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@spark.apache.org by Michel Hubert <mi...@phact.nl> on 2016/05/25 12:17:00 UTC

StackOverflow in Spark

Hi,


I have an Spark application which generates StackOverflowError exceptions after 30+ min.

Anyone any ideas?

Seems like problems with deserialization of checkpoint data?





16/05/25 10:48:51 WARN scheduler.TaskSetManager: Lost task 0.0 in stage 55449.0 (TID 5584, host81440-cld.opentsp.com): java.lang.StackOverflowError
*at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1382)
*at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1990)
*at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1915)
*at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1798)
*at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1350)
*at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1990)
*at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1915)
*at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1798)
*at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1350)
*at java.io.ObjectInputStream.readObject(ObjectInputStream.java:370)
*at scala.collection.immutable.$colon$colon.readObject(List.scala:362)
*at sun.reflect.GeneratedMethodAccessor2.invoke(Unknown Source)
*at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
*at java.lang.reflect.Method.invoke(Method.java:606)
*at java.io.ObjectStreamClass.invokeReadObject(ObjectStreamClass.java:1017)
*at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1893)
*at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1798)
*at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1350)
*at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1990)
*at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1915)
*at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1798)
*at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1350)
*at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1990)
*at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1915)
*at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1798)
*at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1350)
*at java.io.ObjectInputStream.readObject(ObjectInputStream.java:370)
*at scala.collection.immutable.$colon$colon.readObject(List.scala:362)
                at sun.reflect.GeneratedMethodAccessor2.invoke(Unknown Source)




Driver stacktrace:
*at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1431)
*at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1419)
*at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1418)
*at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
*at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
*at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1418)
*at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:799)
*at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:799)
*at scala.Option.foreach(Option.scala:236)
*at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:799)
*at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1640)
*at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1599)
*at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1588)
*at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
*at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:620)
*at org.apache.spark.SparkContext.runJob(SparkContext.scala:1843)
*at org.apache.spark.SparkContext.runJob(SparkContext.scala:1856)
*at org.apache.spark.SparkContext.runJob(SparkContext.scala:1933)
*at org.elasticsearch.spark.rdd.EsSpark$.saveToEs(EsSpark.scala:67)
*at org.elasticsearch.spark.rdd.EsSpark$.saveToEs(EsSpark.scala:54)
*at org.elasticsearch.spark.rdd.EsSpark$.saveJsonToEs(EsSpark.scala:90)
*at org.elasticsearch.spark.rdd.api.java.JavaEsSpark$.saveJsonToEs(JavaEsSpark.scala:62)
                at org.elasticsearch.spark.rdd.api.java.JavaEsSpark.saveJsonToEs(JavaEsSpark.scala)