You are viewing a plain text version of this content. The canonical link for it is here.

Posted to dev@spark.apache.org by npanj <ni...@gmail.com> on 2014/08/21 23:14:52 UTC

PARSING_ERROR from kryo

Hi All,

I am getting PARSING_ERROR while running my job on the code checked out up
to commit# db56f2df1b8027171da1b8d2571d1f2ef1e103b6. I am running this job
on EC2.

Any idea if there is something wrong with my config?

Here is my config: 
--
    .set("spark.executor.extraJavaOptions", "-XX:+UseCompressedOops
-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps")
      .set("spark.storage.memoryFraction", "0.2")
      .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
      .set("spark.kryo.registrator",
"org.apache.spark.graphx.GraphKryoRegistrator")
      .set("spark.akka.frameSize", "20")
      .set("spark.akka.timeout", "300")
      .set("spark.shuffle.memoryFraction", "0.5")
      .set("spark.core.connection.ack.wait.timeout", "1800")
--



--
Job aborted due to stage failure: Task 947 in stage 11.0 failed 4 times,
most recent failure: Lost task 947.3 in stage 11.0 (TID 12750,
ip-10-167-149-118.ec2.internal): com.esotericsoftware.kryo.KryoException:
java.io.IOException: failed to uncompress the chunk: PARSING_ERROR(2)
Serialization trace:
vids (org.apache.spark.graphx.impl.VertexAttributeBlock)
        com.esotericsoftware.kryo.io.Input.fill(Input.java:142)
        com.esotericsoftware.kryo.io.Input.require(Input.java:169)
        com.esotericsoftware.kryo.io.Input.readLong_slow(Input.java:719)
        com.esotericsoftware.kryo.io.Input.readLong(Input.java:665)
       
com.esotericsoftware.kryo.serializers.DefaultArraySerializers$LongArraySerializer.read(DefaultArraySerializers.java:127)
       
com.esotericsoftware.kryo.serializers.DefaultArraySerializers$LongArraySerializer.read(DefaultArraySerializers.java:107)
        com.esotericsoftware.kryo.Kryo.readObjectOrNull(Kryo.java:699)
       
com.esotericsoftware.kryo.serializers.FieldSerializer$ObjectField.read(FieldSerializer.java:611)
       
com.esotericsoftware.kryo.serializers.FieldSerializer.read(FieldSerializer.java:221)
        com.esotericsoftware.kryo.Kryo.readClassAndObject(Kryo.java:729)
        com.twitter.chill.Tuple2Serializer.read(TupleSerializers.scala:43)
        com.twitter.chill.Tuple2Serializer.read(TupleSerializers.scala:34)
        com.esotericsoftware.kryo.Kryo.readClassAndObject(Kryo.java:729)
       
org.apache.spark.serializer.KryoDeserializationStream.readObject(KryoSerializer.scala:119)
       
org.apache.spark.serializer.DeserializationStream$$anon$1.getNext(Serializer.scala:129)
        org.apache.spark.util.NextIterator.hasNext(NextIterator.scala:71)
       
org.apache.spark.storage.BlockManager$LazyProxyIterator$1.hasNext(BlockManager.scala:1038)
        scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371)
       
org.apache.spark.util.CompletionIterator.hasNext(CompletionIterator.scala:30)
       
org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:39)
        scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371)
        scala.collection.Iterator$class.foreach(Iterator.scala:727)
        scala.collection.AbstractIterator.foreach(Iterator.scala:1157)
       
org.apache.spark.graphx.impl.VertexPartitionBaseOps.innerJoinKeepLeft(VertexPartitionBaseOps.scala:192)
       
org.apache.spark.graphx.impl.EdgePartition.updateVertices(EdgePartition.scala:78)
       
org.apache.spark.graphx.impl.ReplicatedVertexView$$anonfun$2$$anonfun$apply$1.apply(ReplicatedVertexView.scala:75)
       
org.apache.spark.graphx.impl.ReplicatedVertexView$$anonfun$2$$anonfun$apply$1.apply(ReplicatedVertexView.scala:73)
        scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
        scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371)
        scala.collection.Iterator$class.foreach(Iterator.scala:727)
        scala.collection.AbstractIterator.foreach(Iterator.scala:1157)
       
org.apache.spark.shuffle.hash.HashShuffleWriter.write(HashShuffleWriter.scala:57)
       
org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:147)
       
org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:97)
        org.apache.spark.scheduler.Task.run(Task.scala:51)
       
org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:189)
       
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
       
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
        java.lang.Thread.run(Thread.java:745)
--



--
View this message in context: http://apache-spark-developers-list.1001551.n3.nabble.com/PARSING-ERROR-from-kryo-tp7944.html
Sent from the Apache Spark Developers List mailing list archive at Nabble.com.

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@spark.apache.org
For additional commands, e-mail: dev-help@spark.apache.org

Re: PARSING_ERROR from kryo

Posted by Andrew Ash <an...@andrewash.com>.

I should clarify: I'm not using GraphX, it's a different
application-specific Kryo registrator that causes the same stacktrace
ending in PARSING_ERROR:

com.esotericsoftware.kryo.KryoException: java.io.IOException: failed to
uncompress the chunk: PARSING_ERROR(2)
com.esotericsoftware.kryo.io.Input.fill(Input.java:142)
com.esotericsoftware.kryo.io.Input.require(Input.java:169)
com.esotericsoftware.kryo.io.Input.readInt(Input.java:325)
com.esotericsoftware.kryo.io.Input.readFloat(Input.java:624)
com.esotericsoftware.kryo.serializers.DefaultSerializers$FloatSerializer.read(DefaultSerializers.java:127)
com.esotericsoftware.kryo.serializers.DefaultSerializers$FloatSerializer.read(DefaultSerializers.java:117)
com.esotericsoftware.kryo.Kryo.readClassAndObject(Kryo.java:732)
com.esotericsoftware.kryo.serializers.CollectionSerializer.read(CollectionSerializer.java:109)
com.esotericsoftware.kryo.serializers.CollectionSerializer.read(CollectionSerializer.java:18)
com.esotericsoftware.kryo.Kryo.readClassAndObject(Kryo.java:732)
... my registrator

Ankur from my read of the ticket there's not a root cause identified for
those PARSING_ERROR exceptions in GraphX yet?

Andrew

On Mon, Sep 15, 2014 at 2:10 PM, Ankur Dave <an...@gmail.com> wrote:

> At 2014-09-15 08:59:48 -0700, Andrew Ash <an...@andrewash.com> wrote:
> > I'm seeing the same exception now on the Spark 1.1.0 release.  Did you
> ever
> > get this figured out?
> >
> > [...]
> >
> > On Thu, Aug 21, 2014 at 2:14 PM, npanj <ni...@gmail.com> wrote:
> >> I am getting PARSING_ERROR while running my job on the code checked out
> up
> >> to commit# db56f2df1b8027171da1b8d2571d1f2ef1e103b6.
>
> The error is because I merged a GraphX PR that introduced a
> nondeterministic bug [1]. I reverted the faulty PR, but it was too late for
> the 1.1.0 release. The problem should go away if you use branch-1.1 or
> master. Sorry about that...
>
> Ankur
>
> [1] https://issues.apache.org/jira/browse/SPARK-3400
>

Re: PARSING_ERROR from kryo

Posted by Ankur Dave <an...@gmail.com>.

At 2014-09-15 08:59:48 -0700, Andrew Ash <an...@andrewash.com> wrote:
> I'm seeing the same exception now on the Spark 1.1.0 release.  Did you ever
> get this figured out?
>
> [...]
>
> On Thu, Aug 21, 2014 at 2:14 PM, npanj <ni...@gmail.com> wrote:
>> I am getting PARSING_ERROR while running my job on the code checked out up
>> to commit# db56f2df1b8027171da1b8d2571d1f2ef1e103b6.

The error is because I merged a GraphX PR that introduced a nondeterministic bug [1]. I reverted the faulty PR, but it was too late for the 1.1.0 release. The problem should go away if you use branch-1.1 or master. Sorry about that...

Ankur

[1] https://issues.apache.org/jira/browse/SPARK-3400

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@spark.apache.org
For additional commands, e-mail: dev-help@spark.apache.org

Re: PARSING_ERROR from kryo

Posted by Arun Ahuja <aa...@gmail.com>.

I am seeing the same error as well since upgrading to Spark1.1:

14/09/26 15:35:05 ERROR executor.Executor: Exception in task 1032.0 in
stage 5.1 (TID 22449)
com.esotericsoftware.kryo.KryoException: java.io.IOException: failed to
uncompress the chunk: PARSING_ERROR(2)
        at com.esotericsoftware.kryo.io.Input.fill(Input.java:142)
        at com.esotericsoftware.kryo.io.Input.require(Input.java:155)
        at com.esotericsoftware.kryo.io.Input.readInt(Input.java:337)
        at
com.esotericsoftware.kryo.util.DefaultClassResolver.readClass(DefaultClassResolver.java:109)
        at com.esotericsoftware.kryo.Kryo.readClass(Kryo.java:610)
        at com.esotericsoftware.kryo.Kryo.readClassAndObject(Kryo.java:721)
        at
org.apache.spark.serializer.KryoDeserializationStream.readObject(KryoSerializer.scala:133)
        at
org.apache.spark.serializer.DeserializationStream$$anon$1.getNext(Serializer.scala:133)
        at org.apache.spark.util.NextIterator.hasNext(NextIterator.scala:71)
        at
org.apache.spark.storage.BlockManager$LazyProxyIterator$1.hasNext(BlockManager.scala:1082)
        at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371)
        at
org.apache.spark.util.CompletionIterator.hasNext(CompletionIterator.scala:30)
        at
org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:39)

Out of 6000 tasks 5000 something finish fine, so I don't believe there are
any issues with the serialization and some other datasets everything works
fine. Also the same code, same dataset worked fine with Spark 1.0.2

On Mon, Sep 15, 2014 at 9:57 PM, npanj <ni...@gmail.com> wrote:

> Hi Andrew,
>
> No I could not figure out the root cause. This seems to be
> non-deterministic
> error... I didn't see same error after rerunning same program. But I
> noticed
> same error on a different program.
>
> First I thought that this may be related to SPARK-2878, but @Graham replied
> that this looks irrelevant.
>
>
>
>
>
>
> --
> View this message in context:
> http://apache-spark-developers-list.1001551.n3.nabble.com/PARSING-ERROR-from-kryo-tp7944p8433.html
> Sent from the Apache Spark Developers List mailing list archive at
> Nabble.com.
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: dev-unsubscribe@spark.apache.org
> For additional commands, e-mail: dev-help@spark.apache.org
>
>

Re: PARSING_ERROR from kryo

Posted by npanj <ni...@gmail.com>.

Hi Andrew,

No I could not figure out the root cause. This seems to be non-deterministic
error... I didn't see same error after rerunning same program. But I noticed
same error on a different program. 

First I thought that this may be related to SPARK-2878, but @Graham replied
that this looks irrelevant.






--
View this message in context: http://apache-spark-developers-list.1001551.n3.nabble.com/PARSING-ERROR-from-kryo-tp7944p8433.html
Sent from the Apache Spark Developers List mailing list archive at Nabble.com.

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@spark.apache.org
For additional commands, e-mail: dev-help@spark.apache.org

Re: PARSING_ERROR from kryo

Posted by Andrew Ash <an...@andrewash.com>.

Hi npanj,

I'm seeing the same exception now on the Spark 1.1.0 release.  Did you ever
get this figured out?

Andrew

On Thu, Aug 21, 2014 at 2:14 PM, npanj <ni...@gmail.com> wrote:

> Hi All,
>
> I am getting PARSING_ERROR while running my job on the code checked out up
> to commit# db56f2df1b8027171da1b8d2571d1f2ef1e103b6. I am running this job
> on EC2.
>
> Any idea if there is something wrong with my config?
>
> Here is my config:
> --
>     .set("spark.executor.extraJavaOptions", "-XX:+UseCompressedOops
> -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps")
>       .set("spark.storage.memoryFraction", "0.2")
>       .set("spark.serializer",
> "org.apache.spark.serializer.KryoSerializer")
>       .set("spark.kryo.registrator",
> "org.apache.spark.graphx.GraphKryoRegistrator")
>       .set("spark.akka.frameSize", "20")
>       .set("spark.akka.timeout", "300")
>       .set("spark.shuffle.memoryFraction", "0.5")
>       .set("spark.core.connection.ack.wait.timeout", "1800")
> --
>
>
>
> --
> Job aborted due to stage failure: Task 947 in stage 11.0 failed 4 times,
> most recent failure: Lost task 947.3 in stage 11.0 (TID 12750,
> ip-10-167-149-118.ec2.internal): com.esotericsoftware.kryo.KryoException:
> java.io.IOException: failed to uncompress the chunk: PARSING_ERROR(2)
> Serialization trace:
> vids (org.apache.spark.graphx.impl.VertexAttributeBlock)
>         com.esotericsoftware.kryo.io.Input.fill(Input.java:142)
>         com.esotericsoftware.kryo.io.Input.require(Input.java:169)
>         com.esotericsoftware.kryo.io.Input.readLong_slow(Input.java:719)
>         com.esotericsoftware.kryo.io.Input.readLong(Input.java:665)
>
>
> com.esotericsoftware.kryo.serializers.DefaultArraySerializers$LongArraySerializer.read(DefaultArraySerializers.java:127)
>
>
> com.esotericsoftware.kryo.serializers.DefaultArraySerializers$LongArraySerializer.read(DefaultArraySerializers.java:107)
>         com.esotericsoftware.kryo.Kryo.readObjectOrNull(Kryo.java:699)
>
>
> com.esotericsoftware.kryo.serializers.FieldSerializer$ObjectField.read(FieldSerializer.java:611)
>
>
> com.esotericsoftware.kryo.serializers.FieldSerializer.read(FieldSerializer.java:221)
>         com.esotericsoftware.kryo.Kryo.readClassAndObject(Kryo.java:729)
>         com.twitter.chill.Tuple2Serializer.read(TupleSerializers.scala:43)
>         com.twitter.chill.Tuple2Serializer.read(TupleSerializers.scala:34)
>         com.esotericsoftware.kryo.Kryo.readClassAndObject(Kryo.java:729)
>
>
> org.apache.spark.serializer.KryoDeserializationStream.readObject(KryoSerializer.scala:119)
>
>
> org.apache.spark.serializer.DeserializationStream$$anon$1.getNext(Serializer.scala:129)
>         org.apache.spark.util.NextIterator.hasNext(NextIterator.scala:71)
>
>
> org.apache.spark.storage.BlockManager$LazyProxyIterator$1.hasNext(BlockManager.scala:1038)
>         scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371)
>
>
> org.apache.spark.util.CompletionIterator.hasNext(CompletionIterator.scala:30)
>
>
> org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:39)
>         scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371)
>         scala.collection.Iterator$class.foreach(Iterator.scala:727)
>         scala.collection.AbstractIterator.foreach(Iterator.scala:1157)
>
>
> org.apache.spark.graphx.impl.VertexPartitionBaseOps.innerJoinKeepLeft(VertexPartitionBaseOps.scala:192)
>
>
> org.apache.spark.graphx.impl.EdgePartition.updateVertices(EdgePartition.scala:78)
>
>
> org.apache.spark.graphx.impl.ReplicatedVertexView$$anonfun$2$$anonfun$apply$1.apply(ReplicatedVertexView.scala:75)
>
>
> org.apache.spark.graphx.impl.ReplicatedVertexView$$anonfun$2$$anonfun$apply$1.apply(ReplicatedVertexView.scala:73)
>         scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
>         scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371)
>         scala.collection.Iterator$class.foreach(Iterator.scala:727)
>         scala.collection.AbstractIterator.foreach(Iterator.scala:1157)
>
>
> org.apache.spark.shuffle.hash.HashShuffleWriter.write(HashShuffleWriter.scala:57)
>
> org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:147)
>
> org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:97)
>         org.apache.spark.scheduler.Task.run(Task.scala:51)
>
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:189)
>
>
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
>
>
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
>         java.lang.Thread.run(Thread.java:745)
> --
>
>
>
> --
> View this message in context:
> http://apache-spark-developers-list.1001551.n3.nabble.com/PARSING-ERROR-from-kryo-tp7944.html
> Sent from the Apache Spark Developers List mailing list archive at
> Nabble.com.
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: dev-unsubscribe@spark.apache.org
> For additional commands, e-mail: dev-help@spark.apache.org
>
>