You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "Jianshi Huang (JIRA)" <ji...@apache.org> on 2014/10/08 11:41:33 UTC

[jira] [Updated] (SPARK-3846) KryoException when doing joins in SparkSQL

     [ https://issues.apache.org/jira/browse/SPARK-3846?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Jianshi Huang updated SPARK-3846:
---------------------------------
          Description: 
The error is reproducible when I join two tables manually. The error message is like follows.

org.apache.spark.SparkException: Job aborted due to stage failure: Task 645 in stage 3.0 failed 4 times, most recent failure: Lost task 645.3 in stage 3.0 (TID 3802, lvshdc5dn0215.lvs.paypal.com): com.esotericsoftware.kryo.KryoException:
Unable to find class: __wrapper$1$18e31777385a452ba0bc030e899bf5d1.__wrapper$1$18e31777385a452ba0bc030e899bf5d1$SpecificRow$1
        com.esotericsoftware.kryo.util.DefaultClassResolver.readName(DefaultClassResolver.java:138)
        com.esotericsoftware.kryo.util.DefaultClassResolver.readClass(DefaultClassResolver.java:115)
        com.esotericsoftware.kryo.Kryo.readClass(Kryo.java:610)
        com.esotericsoftware.kryo.Kryo.readClassAndObject(Kryo.java:721)
        com.twitter.chill.Tuple2Serializer.read(TupleSerializers.scala:42)
        com.twitter.chill.Tuple2Serializer.read(TupleSerializers.scala:34)
        com.esotericsoftware.kryo.Kryo.readClassAndObject(Kryo.java:732)
        org.apache.spark.serializer.KryoDeserializationStream.readObject(KryoSerializer.scala:133)
        org.apache.spark.serializer.DeserializationStream$$anon$1.getNext(Serializer.scala:133)
        org.apache.spark.util.NextIterator.hasNext(NextIterator.scala:71)
        scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371)
        org.apache.spark.util.CompletionIterator.hasNext(CompletionIterator.scala:30)
        org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:39)
        scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327)
        scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327)
        org.apache.spark.sql.execution.HashJoin$$anon$1.hasNext(joins.scala:101)
        scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327)
        org.apache.spark.sql.execution.GeneratedAggregate$$anonfun$8.apply(GeneratedAggregate.scala:198)
        org.apache.spark.sql.execution.GeneratedAggregate$$anonfun$8.apply(GeneratedAggregate.scala:165)
        org.apache.spark.rdd.RDD$$anonfun$13.apply(RDD.scala:599)
        org.apache.spark.rdd.RDD$$anonfun$13.apply(RDD.scala:599)
        org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35)
        org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
        org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
        org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35)
        org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
        org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
        org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:68)
        org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
        org.apache.spark.scheduler.Task.run(Task.scala:56)
        org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:181)
        java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
        java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
        java.lang.Thread.run(Thread.java:724)

  was:
I built the latest Spark (1.2.0-SNAPSHOT) from master branch and found previous (1.1.0) successful jobs failed. 

The error is reproducible when I join two tables manually. The error message is like follows.

org.apache.spark.SparkException: Job aborted due to stage failure: Task 645 in stage 3.0 failed 4 times, most recent failure: Lost task 645.3 in stage 3.0 (TID 3802, lvshdc5dn0215.lvs.paypal.com): com.esotericsoftware.kryo.KryoException:
Unable to find class: __wrapper$1$18e31777385a452ba0bc030e899bf5d1.__wrapper$1$18e31777385a452ba0bc030e899bf5d1$SpecificRow$1
        com.esotericsoftware.kryo.util.DefaultClassResolver.readName(DefaultClassResolver.java:138)
        com.esotericsoftware.kryo.util.DefaultClassResolver.readClass(DefaultClassResolver.java:115)
        com.esotericsoftware.kryo.Kryo.readClass(Kryo.java:610)
        com.esotericsoftware.kryo.Kryo.readClassAndObject(Kryo.java:721)
        com.twitter.chill.Tuple2Serializer.read(TupleSerializers.scala:42)
        com.twitter.chill.Tuple2Serializer.read(TupleSerializers.scala:34)
        com.esotericsoftware.kryo.Kryo.readClassAndObject(Kryo.java:732)
        org.apache.spark.serializer.KryoDeserializationStream.readObject(KryoSerializer.scala:133)
        org.apache.spark.serializer.DeserializationStream$$anon$1.getNext(Serializer.scala:133)
        org.apache.spark.util.NextIterator.hasNext(NextIterator.scala:71)
        scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371)
        org.apache.spark.util.CompletionIterator.hasNext(CompletionIterator.scala:30)
        org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:39)
        scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327)
        scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327)
        org.apache.spark.sql.execution.HashJoin$$anon$1.hasNext(joins.scala:101)
        scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327)
        org.apache.spark.sql.execution.GeneratedAggregate$$anonfun$8.apply(GeneratedAggregate.scala:198)
        org.apache.spark.sql.execution.GeneratedAggregate$$anonfun$8.apply(GeneratedAggregate.scala:165)
        org.apache.spark.rdd.RDD$$anonfun$13.apply(RDD.scala:599)
        org.apache.spark.rdd.RDD$$anonfun$13.apply(RDD.scala:599)
        org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35)
        org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
        org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
        org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35)
        org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
        org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
        org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:68)
        org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
        org.apache.spark.scheduler.Task.run(Task.scala:56)
        org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:181)
        java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
        java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
        java.lang.Thread.run(Thread.java:724)

    Affects Version/s: 1.1.0

> KryoException when doing joins in SparkSQL 
> -------------------------------------------
>
>                 Key: SPARK-3846
>                 URL: https://issues.apache.org/jira/browse/SPARK-3846
>             Project: Spark
>          Issue Type: Bug
>          Components: SQL
>    Affects Versions: 1.1.0, 1.2.0
>            Reporter: Jianshi Huang
>
> The error is reproducible when I join two tables manually. The error message is like follows.
> org.apache.spark.SparkException: Job aborted due to stage failure: Task 645 in stage 3.0 failed 4 times, most recent failure: Lost task 645.3 in stage 3.0 (TID 3802, lvshdc5dn0215.lvs.paypal.com): com.esotericsoftware.kryo.KryoException:
> Unable to find class: __wrapper$1$18e31777385a452ba0bc030e899bf5d1.__wrapper$1$18e31777385a452ba0bc030e899bf5d1$SpecificRow$1
>         com.esotericsoftware.kryo.util.DefaultClassResolver.readName(DefaultClassResolver.java:138)
>         com.esotericsoftware.kryo.util.DefaultClassResolver.readClass(DefaultClassResolver.java:115)
>         com.esotericsoftware.kryo.Kryo.readClass(Kryo.java:610)
>         com.esotericsoftware.kryo.Kryo.readClassAndObject(Kryo.java:721)
>         com.twitter.chill.Tuple2Serializer.read(TupleSerializers.scala:42)
>         com.twitter.chill.Tuple2Serializer.read(TupleSerializers.scala:34)
>         com.esotericsoftware.kryo.Kryo.readClassAndObject(Kryo.java:732)
>         org.apache.spark.serializer.KryoDeserializationStream.readObject(KryoSerializer.scala:133)
>         org.apache.spark.serializer.DeserializationStream$$anon$1.getNext(Serializer.scala:133)
>         org.apache.spark.util.NextIterator.hasNext(NextIterator.scala:71)
>         scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371)
>         org.apache.spark.util.CompletionIterator.hasNext(CompletionIterator.scala:30)
>         org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:39)
>         scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327)
>         scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327)
>         org.apache.spark.sql.execution.HashJoin$$anon$1.hasNext(joins.scala:101)
>         scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327)
>         org.apache.spark.sql.execution.GeneratedAggregate$$anonfun$8.apply(GeneratedAggregate.scala:198)
>         org.apache.spark.sql.execution.GeneratedAggregate$$anonfun$8.apply(GeneratedAggregate.scala:165)
>         org.apache.spark.rdd.RDD$$anonfun$13.apply(RDD.scala:599)
>         org.apache.spark.rdd.RDD$$anonfun$13.apply(RDD.scala:599)
>         org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35)
>         org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
>         org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
>         org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35)
>         org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
>         org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
>         org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:68)
>         org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
>         org.apache.spark.scheduler.Task.run(Task.scala:56)
>         org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:181)
>         java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
>         java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
>         java.lang.Thread.run(Thread.java:724)



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org