You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "John Hogue (JIRA)" <ji...@apache.org> on 2015/08/17 16:40:45 UTC

[jira] [Commented] (SPARK-6299) ClassNotFoundException in standalone mode when running groupByKey with class defined in REPL.

    [ https://issues.apache.org/jira/browse/SPARK-6299?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14699600#comment-14699600 ] 

John Hogue commented on SPARK-6299:
-----------------------------------

In case anyone else is stuck on 1.3.0 for a while I was able to get around this by setting SPARK_CLASSPATH before starting spark-shell. The jar was not actually used in the REPL but by including it seemed to resolve the issue of distributing Classes in my script.

SPARK_CLASSPATH=/path/ojdbc6.jar spark-shell --master yarn --deploy-mode client --num-executors 100 --driver-memory 7500m --executor-memory 7500m


> ClassNotFoundException in standalone mode when running groupByKey with class defined in REPL.
> ---------------------------------------------------------------------------------------------
>
>                 Key: SPARK-6299
>                 URL: https://issues.apache.org/jira/browse/SPARK-6299
>             Project: Spark
>          Issue Type: Bug
>          Components: Spark Shell
>    Affects Versions: 1.2.1, 1.3.0
>            Reporter: Kevin (Sangwoo) Kim
>            Assignee: Kevin (Sangwoo) Kim
>             Fix For: 1.3.1, 1.4.0
>
>
> Anyone can reproduce this issue by the code below
> (runs well in local mode, got exception with clusters)
> (it runs well in Spark 1.1.1)
> {code}
> case class ClassA(value: String)
> val rdd = sc.parallelize(List(("k1", ClassA("v1")), ("k1", ClassA("v2")) ))
> rdd.groupByKey.collect
> {code}
> {code}
> org.apache.spark.SparkException: Job aborted due to stage failure: Task 162 in stage 1.0 failed 4 times, most recent failure: Lost task 162.3 in stage 1.0 (TID 1027, ip-172-16-182-27.ap-northeast-1.compute.internal): java.lang.ClassNotFoundException: $iwC$$iwC$$iwC$$iwC$UserRelationshipRow
> at java.net.URLClassLoader$1.run(URLClassLoader.java:366)
> at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
> at java.security.AccessController.doPrivileged(Native Method)
> at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:425)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:358)
> at java.lang.Class.forName0(Native Method)
> at java.lang.Class.forName(Class.java:274)
> at org.apache.spark.serializer.JavaDeserializationStream$$anon$1.resolveClass(JavaSerializer.scala:59)
> at java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:1612)
> at java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1517)
> at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1771)
> at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1350)
> at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1990)
> at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1915)
> at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1798)
> at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1350)
> at java.io.ObjectInputStream.readObject(ObjectInputStream.java:370)
> at org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:62)
> at org.apache.spark.serializer.DeserializationStream$$anon$1.getNext(Serializer.scala:133)
> at org.apache.spark.util.NextIterator.hasNext(NextIterator.scala:71)
> at org.apache.spark.util.CompletionIterator.hasNext(CompletionIterator.scala:32)
> at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371)
> at org.apache.spark.util.CompletionIterator.hasNext(CompletionIterator.scala:32)
> at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:39)
> at org.apache.spark.Aggregator.combineCombinersByKey(Aggregator.scala:91)
> at org.apache.spark.shuffle.hash.HashShuffleReader.read(HashShuffleReader.scala:44)
> at org.apache.spark.rdd.ShuffledRDD.compute(ShuffledRDD.scala:92)
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:280)
> at org.apache.spark.rdd.RDD.iterator(RDD.scala:247)
> at org.apache.spark.rdd.MappedRDD.compute(MappedRDD.scala:31)
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:280)
> at org.apache.spark.rdd.RDD.iterator(RDD.scala:247)
> at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35)
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:280)
> at org.apache.spark.rdd.RDD.iterator(RDD.scala:247)
> at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:68)
> at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
> at org.apache.spark.scheduler.Task.run(Task.scala:56)
> at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:200)
> at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
> at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
> at java.lang.Thread.run(Thread.java:745)
> Driver stacktrace:
> at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1214)
> at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1203)
> at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1202)
> at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
> at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
> at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1202)
> at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:696)
> at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:696)
> at scala.Option.foreach(Option.scala:236)
> at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:696)
> at org.apache.spark.scheduler.DAGSchedulerEventProcessActor$$anonfun$receive$2.applyOrElse(DAGScheduler.scala:1420)
> at akka.actor.Actor$class.aroundReceive(Actor.scala:465)
> at org.apache.spark.scheduler.DAGSchedulerEventProcessActor.aroundReceive(DAGScheduler.scala:1375)
> at akka.actor.ActorCell.receiveMessage(ActorCell.scala:516)
> at akka.actor.ActorCell.invoke(ActorCell.scala:487)
> at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:238)
> at akka.dispatch.Mailbox.run(Mailbox.scala:220)
> at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:393)
> at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
> at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
> at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
> at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
> {code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org