You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@spark.apache.org by "Shao, Saisai" <sa...@intel.com> on 2013/10/29 07:30:38 UTC
help with Spark serialize problem (StreamingCorruptedException)
Hi all,
I met a deserialization problem when running Spark application in standalone cluster, Spark throws below exception when deserializing tasks, weird thing is that this exception appears randomly, not every time. I use Java serializer and all my code can be serializable, also I googled this problem but found no clue, Any suggestion would be helpful.
Thanks
Jerry
java.io.StreamCorruptedException: invalid type code: AC
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1355)
at java.io.ObjectInputStream.readObject(ObjectInputStream.java:350)
at org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:39)
at org.apache.spark.serializer.DeserializationStream$$anon$1.getNext(Serializer.scala:101)
at org.apache.spark.util.NextIterator.hasNext(NextIterator.scala:71)
at scala.collection.Iterator$$anon$21.hasNext(Iterator.scala:440)
at org.apache.spark.util.CompletionIterator.hasNext(CompletionIterator.scala:26)
at scala.collection.Iterator$class.foreach(Iterator.scala:772)
at org.apache.spark.util.CompletionIterator.foreach(CompletionIterator.scala:23)
at org.apache.spark.Aggregator.combineValuesByKey(Aggregator.scala:37)
at
org.apache.spark.rdd.PairRDDFunctions$$anonfun$combineByKey$3.apply(PairRDDFunctions.scala:98)
at
org.apache.spark.rdd.PairRDDFunctions$$anonfun$combineByKey$3.apply(PairRDDFunctions.scala:98)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:36)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:237)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:226)
at org.apache.spark.rdd.MappedRDD.compute(MappedRDD.scala:29)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:237)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:226)
at org.apache.spark.rdd.FilteredRDD.compute(FilteredRDD.scala:32)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:237)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:226)
at org.apache.spark.rdd.FlatMappedRDD.compute(FlatMappedRDD.scala:32)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:237)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:226)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:36)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:237)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:226)
at org.apache.spark.scheduler.ShuffleMapTask.run(ShuffleMapTask.scala:149)
at org.apache.spark.scheduler.ShuffleMapTask.run(ShuffleMapTask.scala:88)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:158)
at java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)
at java.lang.Thread.run(Thread.java:662)