You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@spark.apache.org by Nicholas Chammas <ni...@gmail.com> on 2014/07/15 03:43:17 UTC

ec2 clusters launched at 9fe693b5b6 are broken (?)

Just launched an EC2 cluster from git hash
9fe693b5b6ed6af34ee1e800ab89c8a11991ea38. Calling take() on an RDD
accessing data in S3 yields the following error output.

I understand that NoClassDefFoundError errors may mean something in the
deployment was messed up. Is that correct? When I launch a cluster using
spark-ec2, I expect all critical deployment details to be taken care of by
the script.

So is something in the deployment executed by spark-ec2 borked?

Nick

java.lang.NoClassDefFoundError: org/jets3t/service/S3ServiceException
    at org.apache.hadoop.fs.s3native.NativeS3FileSystem.createDefaultStore(NativeS3FileSystem.java:224)
    at org.apache.hadoop.fs.s3native.NativeS3FileSystem.initialize(NativeS3FileSystem.java:214)
    at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:1386)
    at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:66)
    at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:1404)
    at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:254)
    at org.apache.hadoop.fs.Path.getFileSystem(Path.java:187)
    at org.apache.hadoop.mapred.FileInputFormat.listStatus(FileInputFormat.java:176)
    at org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:208)
    at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:176)
    at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:203)
    at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:201)
    at scala.Option.getOrElse(Option.scala:120)
    at org.apache.spark.rdd.RDD.partitions(RDD.scala:201)
    at org.apache.spark.rdd.MappedRDD.getPartitions(MappedRDD.scala:28)
    at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:203)
    at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:201)
    at scala.Option.getOrElse(Option.scala:120)
    at org.apache.spark.rdd.RDD.partitions(RDD.scala:201)
    at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:32)
    at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:203)
    at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:201)
    at scala.Option.getOrElse(Option.scala:120)
    at org.apache.spark.rdd.RDD.partitions(RDD.scala:201)
    at org.apache.spark.ShuffleDependency.<init>(Dependency.scala:71)
    at org.apache.spark.rdd.ShuffledRDD.getDependencies(ShuffledRDD.scala:79)
    at org.apache.spark.rdd.RDD$$anonfun$dependencies$2.apply(RDD.scala:190)
    at org.apache.spark.rdd.RDD$$anonfun$dependencies$2.apply(RDD.scala:188)
    at scala.Option.getOrElse(Option.scala:120)
    at org.apache.spark.rdd.RDD.dependencies(RDD.scala:188)
    at org.apache.spark.scheduler.DAGScheduler.getPreferredLocs(DAGScheduler.scala:1144)
    at org.apache.spark.SparkContext.getPreferredLocs(SparkContext.scala:903)
    at org.apache.spark.rdd.PartitionCoalescer.currPrefLocs(CoalescedRDD.scala:174)
    at org.apache.spark.rdd.PartitionCoalescer$LocationIterator$$anonfun$4$$anonfun$apply$2.apply(CoalescedRDD.scala:191)
    at org.apache.spark.rdd.PartitionCoalescer$LocationIterator$$anonfun$4$$anonfun$apply$2.apply(CoalescedRDD.scala:190)
    at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371)
    at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:350)
    at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:350)
    at org.apache.spark.rdd.PartitionCoalescer$LocationIterator.<init>(CoalescedRDD.scala:185)
    at org.apache.spark.rdd.PartitionCoalescer.setupGroups(CoalescedRDD.scala:236)
    at org.apache.spark.rdd.PartitionCoalescer.run(CoalescedRDD.scala:337)
    at org.apache.spark.rdd.CoalescedRDD.getPartitions(CoalescedRDD.scala:83)
    at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:203)
    at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:201)
    at scala.Option.getOrElse(Option.scala:120)
    at org.apache.spark.rdd.RDD.partitions(RDD.scala:201)
    at org.apache.spark.rdd.MappedRDD.getPartitions(MappedRDD.scala:28)
    at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:203)
    at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:201)
    at scala.Option.getOrElse(Option.scala:120)
    at org.apache.spark.rdd.RDD.partitions(RDD.scala:201)
    at org.apache.spark.rdd.RDD.take(RDD.scala:1036)
    at $iwC$$iwC$$iwC$$iwC.<init>(<console>:26)
    at $iwC$$iwC$$iwC.<init>(<console>:31)
    at $iwC$$iwC.<init>(<console>:33)
    at $iwC.<init>(<console>:35)
    at <init>(<console>:37)
    at .<init>(<console>:41)
    at .<clinit>(<console>)
    at .<init>(<console>:7)
    at .<clinit>(<console>)
    at $print(<console>)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:606)
    at org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:788)
    at org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1056)
    at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:614)
    at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:645)
    at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:609)
    at org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:796)
    at org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:841)
    at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:753)
    at org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:601)
    at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:608)
    at org.apache.spark.repl.SparkILoop.loop(SparkILoop.scala:611)
    at org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply$mcZ$sp(SparkILoop.scala:936)
    at org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply(SparkILoop.scala:884)
    at org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply(SparkILoop.scala:884)
    at scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)
    at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:884)
    at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:982)
    at org.apache.spark.repl.Main$.main(Main.scala:31)
    at org.apache.spark.repl.Main.main(Main.scala)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:606)
    at org.apache.spark.deploy.SparkSubmit$.launch(SparkSubmit.scala:303)
    at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:55)
    at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.lang.ClassNotFoundException:
org.jets3t.service.S3ServiceException
    at java.net.URLClassLoader$1.run(URLClassLoader.java:366)
    at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
    at java.security.AccessController.doPrivileged(Native Method)
    at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
    at java.lang.ClassLoader.loadClass(ClassLoader.java:425)
    at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
    at java.lang.ClassLoader.loadClass(ClassLoader.java:358)
    ... 92 more

​

Re: ec2 clusters launched at 9fe693b5b6 are broken (?)

Posted by Nicholas Chammas <ni...@gmail.com>.
Okie doke--added myself as a watcher on that issue.

On a related note, what are the thoughts on automatically spinning up/down
EC2 clusters and running tests against them? It would probably be way too
cumbersome to do that for every build, but perhaps on some schedule it
could help validate that we are still deploying EC2 clusters correctly.

Would something like that be valuable?

Nick


On Tue, Jul 15, 2014 at 1:19 AM, Patrick Wendell <pw...@gmail.com> wrote:

> Yeah - this is likely caused by SPARK-2471.
>
> On Mon, Jul 14, 2014 at 10:11 PM, Shivaram Venkataraman
> <sh...@eecs.berkeley.edu> wrote:
> > My guess is that this is related to
> > https://issues.apache.org/jira/browse/SPARK-2471 where the S3 library
> gets
> > excluded from the SBT assembly jar. I am not sure if the assembly jar
> used
> > in EC2 is generated using SBT though.
> >
> > Shivaram
> >
> >
> > On Mon, Jul 14, 2014 at 10:02 PM, Aaron Davidson <il...@gmail.com>
> wrote:
> >
> >> This one is typically due to a mismatch between the Hadoop versions --
> >> i.e., Spark is compiled against 1.0.4 but is running with 2.3.0 in the
> >> classpath, or something like that. Not certain why you're seeing this
> with
> >> spark-ec2, but I'm assuming this is related to the issues you posted in
> a
> >> separate thread.
> >>
> >>
> >> On Mon, Jul 14, 2014 at 6:43 PM, Nicholas Chammas <
> >> nicholas.chammas@gmail.com> wrote:
> >>
> >> > Just launched an EC2 cluster from git hash
> >> > 9fe693b5b6ed6af34ee1e800ab89c8a11991ea38. Calling take() on an RDD
> >> > accessing data in S3 yields the following error output.
> >> >
> >> > I understand that NoClassDefFoundError errors may mean something in
> the
> >> > deployment was messed up. Is that correct? When I launch a cluster
> using
> >> > spark-ec2, I expect all critical deployment details to be taken care
> of
> >> by
> >> > the script.
> >> >
> >> > So is something in the deployment executed by spark-ec2 borked?
> >> >
> >> > Nick
> >> >
> >> > java.lang.NoClassDefFoundError: org/jets3t/service/S3ServiceException
> >> >     at
> >> >
> >>
> org.apache.hadoop.fs.s3native.NativeS3FileSystem.createDefaultStore(NativeS3FileSystem.java:224)
> >> >     at
> >> >
> >>
> org.apache.hadoop.fs.s3native.NativeS3FileSystem.initialize(NativeS3FileSystem.java:214)
> >> >     at
> >> > org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:1386)
> >> >     at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:66)
> >> >     at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:1404)
> >> >     at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:254)
> >> >     at org.apache.hadoop.fs.Path.getFileSystem(Path.java:187)
> >> >     at
> >> >
> >>
> org.apache.hadoop.mapred.FileInputFormat.listStatus(FileInputFormat.java:176)
> >> >     at
> >> >
> >>
> org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:208)
> >> >     at
> org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:176)
> >> >     at
> >> org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:203)
> >> >     at
> >> org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:201)
> >> >     at scala.Option.getOrElse(Option.scala:120)
> >> >     at org.apache.spark.rdd.RDD.partitions(RDD.scala:201)
> >> >     at
> org.apache.spark.rdd.MappedRDD.getPartitions(MappedRDD.scala:28)
> >> >     at
> >> org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:203)
> >> >     at
> >> org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:201)
> >> >     at scala.Option.getOrElse(Option.scala:120)
> >> >     at org.apache.spark.rdd.RDD.partitions(RDD.scala:201)
> >> >     at
> >> >
> >>
> org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:32)
> >> >     at
> >> org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:203)
> >> >     at
> >> org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:201)
> >> >     at scala.Option.getOrElse(Option.scala:120)
> >> >     at org.apache.spark.rdd.RDD.partitions(RDD.scala:201)
> >> >     at org.apache.spark.ShuffleDependency.<init>(Dependency.scala:71)
> >> >     at
> >> > org.apache.spark.rdd.ShuffledRDD.getDependencies(ShuffledRDD.scala:79)
> >> >     at
> >> > org.apache.spark.rdd.RDD$$anonfun$dependencies$2.apply(RDD.scala:190)
> >> >     at
> >> > org.apache.spark.rdd.RDD$$anonfun$dependencies$2.apply(RDD.scala:188)
> >> >     at scala.Option.getOrElse(Option.scala:120)
> >> >     at org.apache.spark.rdd.RDD.dependencies(RDD.scala:188)
> >> >     at
> >> >
> >>
> org.apache.spark.scheduler.DAGScheduler.getPreferredLocs(DAGScheduler.scala:1144)
> >> >     at
> >> > org.apache.spark.SparkContext.getPreferredLocs(SparkContext.scala:903)
> >> >     at
> >> >
> >>
> org.apache.spark.rdd.PartitionCoalescer.currPrefLocs(CoalescedRDD.scala:174)
> >> >     at
> >> >
> >>
> org.apache.spark.rdd.PartitionCoalescer$LocationIterator$$anonfun$4$$anonfun$apply$2.apply(CoalescedRDD.scala:191)
> >> >     at
> >> >
> >>
> org.apache.spark.rdd.PartitionCoalescer$LocationIterator$$anonfun$4$$anonfun$apply$2.apply(CoalescedRDD.scala:190)
> >> >     at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371)
> >> >     at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:350)
> >> >     at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:350)
> >> >     at
> >> >
> >>
> org.apache.spark.rdd.PartitionCoalescer$LocationIterator.<init>(CoalescedRDD.scala:185)
> >> >     at
> >> >
> >>
> org.apache.spark.rdd.PartitionCoalescer.setupGroups(CoalescedRDD.scala:236)
> >> >     at
> >> org.apache.spark.rdd.PartitionCoalescer.run(CoalescedRDD.scala:337)
> >> >     at
> >> > org.apache.spark.rdd.CoalescedRDD.getPartitions(CoalescedRDD.scala:83)
> >> >     at
> >> org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:203)
> >> >     at
> >> org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:201)
> >> >     at scala.Option.getOrElse(Option.scala:120)
> >> >     at org.apache.spark.rdd.RDD.partitions(RDD.scala:201)
> >> >     at
> org.apache.spark.rdd.MappedRDD.getPartitions(MappedRDD.scala:28)
> >> >     at
> >> org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:203)
> >> >     at
> >> org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:201)
> >> >     at scala.Option.getOrElse(Option.scala:120)
> >> >     at org.apache.spark.rdd.RDD.partitions(RDD.scala:201)
> >> >     at org.apache.spark.rdd.RDD.take(RDD.scala:1036)
> >> >     at $iwC$$iwC$$iwC$$iwC.<init>(<console>:26)
> >> >     at $iwC$$iwC$$iwC.<init>(<console>:31)
> >> >     at $iwC$$iwC.<init>(<console>:33)
> >> >     at $iwC.<init>(<console>:35)
> >> >     at <init>(<console>:37)
> >> >     at .<init>(<console>:41)
> >> >     at .<clinit>(<console>)
> >> >     at .<init>(<console>:7)
> >> >     at .<clinit>(<console>)
> >> >     at $print(<console>)
> >> >     at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> >> >     at
> >> >
> >>
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
> >> >     at
> >> >
> >>
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> >> >     at java.lang.reflect.Method.invoke(Method.java:606)
> >> >     at
> >> >
> org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:788)
> >> >     at
> >> >
> >>
> org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1056)
> >> >     at
> >> > org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:614)
> >> >     at
> org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:645)
> >> >     at
> org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:609)
> >> >     at
> >> >
> org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:796)
> >> >     at
> >> >
> >>
> org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:841)
> >> >     at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:753)
> >> >     at
> >> org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:601)
> >> >     at
> org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:608)
> >> >     at org.apache.spark.repl.SparkILoop.loop(SparkILoop.scala:611)
> >> >     at
> >> >
> >>
> org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply$mcZ$sp(SparkILoop.scala:936)
> >> >     at
> >> >
> >>
> org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply(SparkILoop.scala:884)
> >> >     at
> >> >
> >>
> org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply(SparkILoop.scala:884)
> >> >     at
> >> >
> >>
> scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)
> >> >     at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:884)
> >> >     at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:982)
> >> >     at org.apache.spark.repl.Main$.main(Main.scala:31)
> >> >     at org.apache.spark.repl.Main.main(Main.scala)
> >> >     at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> >> >     at
> >> >
> >>
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
> >> >     at
> >> >
> >>
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> >> >     at java.lang.reflect.Method.invoke(Method.java:606)
> >> >     at
> org.apache.spark.deploy.SparkSubmit$.launch(SparkSubmit.scala:303)
> >> >     at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:55)
> >> >     at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
> >> > Caused by: java.lang.ClassNotFoundException:
> >> > org.jets3t.service.S3ServiceException
> >> >     at java.net.URLClassLoader$1.run(URLClassLoader.java:366)
> >> >     at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
> >> >     at java.security.AccessController.doPrivileged(Native Method)
> >> >     at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
> >> >     at java.lang.ClassLoader.loadClass(ClassLoader.java:425)
> >> >     at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
> >> >     at java.lang.ClassLoader.loadClass(ClassLoader.java:358)
> >> >     ... 92 more
> >> >
> >> >
> >> >
> >>
>

Re: ec2 clusters launched at 9fe693b5b6 are broken (?)

Posted by Patrick Wendell <pw...@gmail.com>.
Yeah - this is likely caused by SPARK-2471.

On Mon, Jul 14, 2014 at 10:11 PM, Shivaram Venkataraman
<sh...@eecs.berkeley.edu> wrote:
> My guess is that this is related to
> https://issues.apache.org/jira/browse/SPARK-2471 where the S3 library gets
> excluded from the SBT assembly jar. I am not sure if the assembly jar used
> in EC2 is generated using SBT though.
>
> Shivaram
>
>
> On Mon, Jul 14, 2014 at 10:02 PM, Aaron Davidson <il...@gmail.com> wrote:
>
>> This one is typically due to a mismatch between the Hadoop versions --
>> i.e., Spark is compiled against 1.0.4 but is running with 2.3.0 in the
>> classpath, or something like that. Not certain why you're seeing this with
>> spark-ec2, but I'm assuming this is related to the issues you posted in a
>> separate thread.
>>
>>
>> On Mon, Jul 14, 2014 at 6:43 PM, Nicholas Chammas <
>> nicholas.chammas@gmail.com> wrote:
>>
>> > Just launched an EC2 cluster from git hash
>> > 9fe693b5b6ed6af34ee1e800ab89c8a11991ea38. Calling take() on an RDD
>> > accessing data in S3 yields the following error output.
>> >
>> > I understand that NoClassDefFoundError errors may mean something in the
>> > deployment was messed up. Is that correct? When I launch a cluster using
>> > spark-ec2, I expect all critical deployment details to be taken care of
>> by
>> > the script.
>> >
>> > So is something in the deployment executed by spark-ec2 borked?
>> >
>> > Nick
>> >
>> > java.lang.NoClassDefFoundError: org/jets3t/service/S3ServiceException
>> >     at
>> >
>> org.apache.hadoop.fs.s3native.NativeS3FileSystem.createDefaultStore(NativeS3FileSystem.java:224)
>> >     at
>> >
>> org.apache.hadoop.fs.s3native.NativeS3FileSystem.initialize(NativeS3FileSystem.java:214)
>> >     at
>> > org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:1386)
>> >     at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:66)
>> >     at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:1404)
>> >     at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:254)
>> >     at org.apache.hadoop.fs.Path.getFileSystem(Path.java:187)
>> >     at
>> >
>> org.apache.hadoop.mapred.FileInputFormat.listStatus(FileInputFormat.java:176)
>> >     at
>> >
>> org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:208)
>> >     at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:176)
>> >     at
>> org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:203)
>> >     at
>> org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:201)
>> >     at scala.Option.getOrElse(Option.scala:120)
>> >     at org.apache.spark.rdd.RDD.partitions(RDD.scala:201)
>> >     at org.apache.spark.rdd.MappedRDD.getPartitions(MappedRDD.scala:28)
>> >     at
>> org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:203)
>> >     at
>> org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:201)
>> >     at scala.Option.getOrElse(Option.scala:120)
>> >     at org.apache.spark.rdd.RDD.partitions(RDD.scala:201)
>> >     at
>> >
>> org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:32)
>> >     at
>> org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:203)
>> >     at
>> org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:201)
>> >     at scala.Option.getOrElse(Option.scala:120)
>> >     at org.apache.spark.rdd.RDD.partitions(RDD.scala:201)
>> >     at org.apache.spark.ShuffleDependency.<init>(Dependency.scala:71)
>> >     at
>> > org.apache.spark.rdd.ShuffledRDD.getDependencies(ShuffledRDD.scala:79)
>> >     at
>> > org.apache.spark.rdd.RDD$$anonfun$dependencies$2.apply(RDD.scala:190)
>> >     at
>> > org.apache.spark.rdd.RDD$$anonfun$dependencies$2.apply(RDD.scala:188)
>> >     at scala.Option.getOrElse(Option.scala:120)
>> >     at org.apache.spark.rdd.RDD.dependencies(RDD.scala:188)
>> >     at
>> >
>> org.apache.spark.scheduler.DAGScheduler.getPreferredLocs(DAGScheduler.scala:1144)
>> >     at
>> > org.apache.spark.SparkContext.getPreferredLocs(SparkContext.scala:903)
>> >     at
>> >
>> org.apache.spark.rdd.PartitionCoalescer.currPrefLocs(CoalescedRDD.scala:174)
>> >     at
>> >
>> org.apache.spark.rdd.PartitionCoalescer$LocationIterator$$anonfun$4$$anonfun$apply$2.apply(CoalescedRDD.scala:191)
>> >     at
>> >
>> org.apache.spark.rdd.PartitionCoalescer$LocationIterator$$anonfun$4$$anonfun$apply$2.apply(CoalescedRDD.scala:190)
>> >     at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371)
>> >     at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:350)
>> >     at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:350)
>> >     at
>> >
>> org.apache.spark.rdd.PartitionCoalescer$LocationIterator.<init>(CoalescedRDD.scala:185)
>> >     at
>> >
>> org.apache.spark.rdd.PartitionCoalescer.setupGroups(CoalescedRDD.scala:236)
>> >     at
>> org.apache.spark.rdd.PartitionCoalescer.run(CoalescedRDD.scala:337)
>> >     at
>> > org.apache.spark.rdd.CoalescedRDD.getPartitions(CoalescedRDD.scala:83)
>> >     at
>> org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:203)
>> >     at
>> org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:201)
>> >     at scala.Option.getOrElse(Option.scala:120)
>> >     at org.apache.spark.rdd.RDD.partitions(RDD.scala:201)
>> >     at org.apache.spark.rdd.MappedRDD.getPartitions(MappedRDD.scala:28)
>> >     at
>> org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:203)
>> >     at
>> org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:201)
>> >     at scala.Option.getOrElse(Option.scala:120)
>> >     at org.apache.spark.rdd.RDD.partitions(RDD.scala:201)
>> >     at org.apache.spark.rdd.RDD.take(RDD.scala:1036)
>> >     at $iwC$$iwC$$iwC$$iwC.<init>(<console>:26)
>> >     at $iwC$$iwC$$iwC.<init>(<console>:31)
>> >     at $iwC$$iwC.<init>(<console>:33)
>> >     at $iwC.<init>(<console>:35)
>> >     at <init>(<console>:37)
>> >     at .<init>(<console>:41)
>> >     at .<clinit>(<console>)
>> >     at .<init>(<console>:7)
>> >     at .<clinit>(<console>)
>> >     at $print(<console>)
>> >     at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>> >     at
>> >
>> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
>> >     at
>> >
>> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>> >     at java.lang.reflect.Method.invoke(Method.java:606)
>> >     at
>> > org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:788)
>> >     at
>> >
>> org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1056)
>> >     at
>> > org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:614)
>> >     at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:645)
>> >     at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:609)
>> >     at
>> > org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:796)
>> >     at
>> >
>> org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:841)
>> >     at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:753)
>> >     at
>> org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:601)
>> >     at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:608)
>> >     at org.apache.spark.repl.SparkILoop.loop(SparkILoop.scala:611)
>> >     at
>> >
>> org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply$mcZ$sp(SparkILoop.scala:936)
>> >     at
>> >
>> org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply(SparkILoop.scala:884)
>> >     at
>> >
>> org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply(SparkILoop.scala:884)
>> >     at
>> >
>> scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)
>> >     at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:884)
>> >     at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:982)
>> >     at org.apache.spark.repl.Main$.main(Main.scala:31)
>> >     at org.apache.spark.repl.Main.main(Main.scala)
>> >     at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>> >     at
>> >
>> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
>> >     at
>> >
>> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>> >     at java.lang.reflect.Method.invoke(Method.java:606)
>> >     at org.apache.spark.deploy.SparkSubmit$.launch(SparkSubmit.scala:303)
>> >     at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:55)
>> >     at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
>> > Caused by: java.lang.ClassNotFoundException:
>> > org.jets3t.service.S3ServiceException
>> >     at java.net.URLClassLoader$1.run(URLClassLoader.java:366)
>> >     at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
>> >     at java.security.AccessController.doPrivileged(Native Method)
>> >     at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
>> >     at java.lang.ClassLoader.loadClass(ClassLoader.java:425)
>> >     at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
>> >     at java.lang.ClassLoader.loadClass(ClassLoader.java:358)
>> >     ... 92 more
>> >
>> >
>> >
>>

Re: ec2 clusters launched at 9fe693b5b6 are broken (?)

Posted by Shivaram Venkataraman <sh...@eecs.berkeley.edu>.
My guess is that this is related to
https://issues.apache.org/jira/browse/SPARK-2471 where the S3 library gets
excluded from the SBT assembly jar. I am not sure if the assembly jar used
in EC2 is generated using SBT though.

Shivaram


On Mon, Jul 14, 2014 at 10:02 PM, Aaron Davidson <il...@gmail.com> wrote:

> This one is typically due to a mismatch between the Hadoop versions --
> i.e., Spark is compiled against 1.0.4 but is running with 2.3.0 in the
> classpath, or something like that. Not certain why you're seeing this with
> spark-ec2, but I'm assuming this is related to the issues you posted in a
> separate thread.
>
>
> On Mon, Jul 14, 2014 at 6:43 PM, Nicholas Chammas <
> nicholas.chammas@gmail.com> wrote:
>
> > Just launched an EC2 cluster from git hash
> > 9fe693b5b6ed6af34ee1e800ab89c8a11991ea38. Calling take() on an RDD
> > accessing data in S3 yields the following error output.
> >
> > I understand that NoClassDefFoundError errors may mean something in the
> > deployment was messed up. Is that correct? When I launch a cluster using
> > spark-ec2, I expect all critical deployment details to be taken care of
> by
> > the script.
> >
> > So is something in the deployment executed by spark-ec2 borked?
> >
> > Nick
> >
> > java.lang.NoClassDefFoundError: org/jets3t/service/S3ServiceException
> >     at
> >
> org.apache.hadoop.fs.s3native.NativeS3FileSystem.createDefaultStore(NativeS3FileSystem.java:224)
> >     at
> >
> org.apache.hadoop.fs.s3native.NativeS3FileSystem.initialize(NativeS3FileSystem.java:214)
> >     at
> > org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:1386)
> >     at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:66)
> >     at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:1404)
> >     at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:254)
> >     at org.apache.hadoop.fs.Path.getFileSystem(Path.java:187)
> >     at
> >
> org.apache.hadoop.mapred.FileInputFormat.listStatus(FileInputFormat.java:176)
> >     at
> >
> org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:208)
> >     at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:176)
> >     at
> org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:203)
> >     at
> org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:201)
> >     at scala.Option.getOrElse(Option.scala:120)
> >     at org.apache.spark.rdd.RDD.partitions(RDD.scala:201)
> >     at org.apache.spark.rdd.MappedRDD.getPartitions(MappedRDD.scala:28)
> >     at
> org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:203)
> >     at
> org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:201)
> >     at scala.Option.getOrElse(Option.scala:120)
> >     at org.apache.spark.rdd.RDD.partitions(RDD.scala:201)
> >     at
> >
> org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:32)
> >     at
> org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:203)
> >     at
> org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:201)
> >     at scala.Option.getOrElse(Option.scala:120)
> >     at org.apache.spark.rdd.RDD.partitions(RDD.scala:201)
> >     at org.apache.spark.ShuffleDependency.<init>(Dependency.scala:71)
> >     at
> > org.apache.spark.rdd.ShuffledRDD.getDependencies(ShuffledRDD.scala:79)
> >     at
> > org.apache.spark.rdd.RDD$$anonfun$dependencies$2.apply(RDD.scala:190)
> >     at
> > org.apache.spark.rdd.RDD$$anonfun$dependencies$2.apply(RDD.scala:188)
> >     at scala.Option.getOrElse(Option.scala:120)
> >     at org.apache.spark.rdd.RDD.dependencies(RDD.scala:188)
> >     at
> >
> org.apache.spark.scheduler.DAGScheduler.getPreferredLocs(DAGScheduler.scala:1144)
> >     at
> > org.apache.spark.SparkContext.getPreferredLocs(SparkContext.scala:903)
> >     at
> >
> org.apache.spark.rdd.PartitionCoalescer.currPrefLocs(CoalescedRDD.scala:174)
> >     at
> >
> org.apache.spark.rdd.PartitionCoalescer$LocationIterator$$anonfun$4$$anonfun$apply$2.apply(CoalescedRDD.scala:191)
> >     at
> >
> org.apache.spark.rdd.PartitionCoalescer$LocationIterator$$anonfun$4$$anonfun$apply$2.apply(CoalescedRDD.scala:190)
> >     at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371)
> >     at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:350)
> >     at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:350)
> >     at
> >
> org.apache.spark.rdd.PartitionCoalescer$LocationIterator.<init>(CoalescedRDD.scala:185)
> >     at
> >
> org.apache.spark.rdd.PartitionCoalescer.setupGroups(CoalescedRDD.scala:236)
> >     at
> org.apache.spark.rdd.PartitionCoalescer.run(CoalescedRDD.scala:337)
> >     at
> > org.apache.spark.rdd.CoalescedRDD.getPartitions(CoalescedRDD.scala:83)
> >     at
> org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:203)
> >     at
> org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:201)
> >     at scala.Option.getOrElse(Option.scala:120)
> >     at org.apache.spark.rdd.RDD.partitions(RDD.scala:201)
> >     at org.apache.spark.rdd.MappedRDD.getPartitions(MappedRDD.scala:28)
> >     at
> org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:203)
> >     at
> org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:201)
> >     at scala.Option.getOrElse(Option.scala:120)
> >     at org.apache.spark.rdd.RDD.partitions(RDD.scala:201)
> >     at org.apache.spark.rdd.RDD.take(RDD.scala:1036)
> >     at $iwC$$iwC$$iwC$$iwC.<init>(<console>:26)
> >     at $iwC$$iwC$$iwC.<init>(<console>:31)
> >     at $iwC$$iwC.<init>(<console>:33)
> >     at $iwC.<init>(<console>:35)
> >     at <init>(<console>:37)
> >     at .<init>(<console>:41)
> >     at .<clinit>(<console>)
> >     at .<init>(<console>:7)
> >     at .<clinit>(<console>)
> >     at $print(<console>)
> >     at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> >     at
> >
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
> >     at
> >
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> >     at java.lang.reflect.Method.invoke(Method.java:606)
> >     at
> > org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:788)
> >     at
> >
> org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1056)
> >     at
> > org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:614)
> >     at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:645)
> >     at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:609)
> >     at
> > org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:796)
> >     at
> >
> org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:841)
> >     at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:753)
> >     at
> org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:601)
> >     at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:608)
> >     at org.apache.spark.repl.SparkILoop.loop(SparkILoop.scala:611)
> >     at
> >
> org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply$mcZ$sp(SparkILoop.scala:936)
> >     at
> >
> org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply(SparkILoop.scala:884)
> >     at
> >
> org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply(SparkILoop.scala:884)
> >     at
> >
> scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)
> >     at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:884)
> >     at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:982)
> >     at org.apache.spark.repl.Main$.main(Main.scala:31)
> >     at org.apache.spark.repl.Main.main(Main.scala)
> >     at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> >     at
> >
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
> >     at
> >
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> >     at java.lang.reflect.Method.invoke(Method.java:606)
> >     at org.apache.spark.deploy.SparkSubmit$.launch(SparkSubmit.scala:303)
> >     at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:55)
> >     at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
> > Caused by: java.lang.ClassNotFoundException:
> > org.jets3t.service.S3ServiceException
> >     at java.net.URLClassLoader$1.run(URLClassLoader.java:366)
> >     at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
> >     at java.security.AccessController.doPrivileged(Native Method)
> >     at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
> >     at java.lang.ClassLoader.loadClass(ClassLoader.java:425)
> >     at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
> >     at java.lang.ClassLoader.loadClass(ClassLoader.java:358)
> >     ... 92 more
> >
> > ​
> >
>

Re: ec2 clusters launched at 9fe693b5b6 are broken (?)

Posted by Aaron Davidson <il...@gmail.com>.
This one is typically due to a mismatch between the Hadoop versions --
i.e., Spark is compiled against 1.0.4 but is running with 2.3.0 in the
classpath, or something like that. Not certain why you're seeing this with
spark-ec2, but I'm assuming this is related to the issues you posted in a
separate thread.


On Mon, Jul 14, 2014 at 6:43 PM, Nicholas Chammas <
nicholas.chammas@gmail.com> wrote:

> Just launched an EC2 cluster from git hash
> 9fe693b5b6ed6af34ee1e800ab89c8a11991ea38. Calling take() on an RDD
> accessing data in S3 yields the following error output.
>
> I understand that NoClassDefFoundError errors may mean something in the
> deployment was messed up. Is that correct? When I launch a cluster using
> spark-ec2, I expect all critical deployment details to be taken care of by
> the script.
>
> So is something in the deployment executed by spark-ec2 borked?
>
> Nick
>
> java.lang.NoClassDefFoundError: org/jets3t/service/S3ServiceException
>     at
> org.apache.hadoop.fs.s3native.NativeS3FileSystem.createDefaultStore(NativeS3FileSystem.java:224)
>     at
> org.apache.hadoop.fs.s3native.NativeS3FileSystem.initialize(NativeS3FileSystem.java:214)
>     at
> org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:1386)
>     at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:66)
>     at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:1404)
>     at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:254)
>     at org.apache.hadoop.fs.Path.getFileSystem(Path.java:187)
>     at
> org.apache.hadoop.mapred.FileInputFormat.listStatus(FileInputFormat.java:176)
>     at
> org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:208)
>     at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:176)
>     at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:203)
>     at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:201)
>     at scala.Option.getOrElse(Option.scala:120)
>     at org.apache.spark.rdd.RDD.partitions(RDD.scala:201)
>     at org.apache.spark.rdd.MappedRDD.getPartitions(MappedRDD.scala:28)
>     at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:203)
>     at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:201)
>     at scala.Option.getOrElse(Option.scala:120)
>     at org.apache.spark.rdd.RDD.partitions(RDD.scala:201)
>     at
> org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:32)
>     at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:203)
>     at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:201)
>     at scala.Option.getOrElse(Option.scala:120)
>     at org.apache.spark.rdd.RDD.partitions(RDD.scala:201)
>     at org.apache.spark.ShuffleDependency.<init>(Dependency.scala:71)
>     at
> org.apache.spark.rdd.ShuffledRDD.getDependencies(ShuffledRDD.scala:79)
>     at
> org.apache.spark.rdd.RDD$$anonfun$dependencies$2.apply(RDD.scala:190)
>     at
> org.apache.spark.rdd.RDD$$anonfun$dependencies$2.apply(RDD.scala:188)
>     at scala.Option.getOrElse(Option.scala:120)
>     at org.apache.spark.rdd.RDD.dependencies(RDD.scala:188)
>     at
> org.apache.spark.scheduler.DAGScheduler.getPreferredLocs(DAGScheduler.scala:1144)
>     at
> org.apache.spark.SparkContext.getPreferredLocs(SparkContext.scala:903)
>     at
> org.apache.spark.rdd.PartitionCoalescer.currPrefLocs(CoalescedRDD.scala:174)
>     at
> org.apache.spark.rdd.PartitionCoalescer$LocationIterator$$anonfun$4$$anonfun$apply$2.apply(CoalescedRDD.scala:191)
>     at
> org.apache.spark.rdd.PartitionCoalescer$LocationIterator$$anonfun$4$$anonfun$apply$2.apply(CoalescedRDD.scala:190)
>     at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371)
>     at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:350)
>     at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:350)
>     at
> org.apache.spark.rdd.PartitionCoalescer$LocationIterator.<init>(CoalescedRDD.scala:185)
>     at
> org.apache.spark.rdd.PartitionCoalescer.setupGroups(CoalescedRDD.scala:236)
>     at org.apache.spark.rdd.PartitionCoalescer.run(CoalescedRDD.scala:337)
>     at
> org.apache.spark.rdd.CoalescedRDD.getPartitions(CoalescedRDD.scala:83)
>     at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:203)
>     at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:201)
>     at scala.Option.getOrElse(Option.scala:120)
>     at org.apache.spark.rdd.RDD.partitions(RDD.scala:201)
>     at org.apache.spark.rdd.MappedRDD.getPartitions(MappedRDD.scala:28)
>     at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:203)
>     at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:201)
>     at scala.Option.getOrElse(Option.scala:120)
>     at org.apache.spark.rdd.RDD.partitions(RDD.scala:201)
>     at org.apache.spark.rdd.RDD.take(RDD.scala:1036)
>     at $iwC$$iwC$$iwC$$iwC.<init>(<console>:26)
>     at $iwC$$iwC$$iwC.<init>(<console>:31)
>     at $iwC$$iwC.<init>(<console>:33)
>     at $iwC.<init>(<console>:35)
>     at <init>(<console>:37)
>     at .<init>(<console>:41)
>     at .<clinit>(<console>)
>     at .<init>(<console>:7)
>     at .<clinit>(<console>)
>     at $print(<console>)
>     at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>     at
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
>     at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>     at java.lang.reflect.Method.invoke(Method.java:606)
>     at
> org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:788)
>     at
> org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1056)
>     at
> org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:614)
>     at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:645)
>     at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:609)
>     at
> org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:796)
>     at
> org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:841)
>     at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:753)
>     at org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:601)
>     at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:608)
>     at org.apache.spark.repl.SparkILoop.loop(SparkILoop.scala:611)
>     at
> org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply$mcZ$sp(SparkILoop.scala:936)
>     at
> org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply(SparkILoop.scala:884)
>     at
> org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply(SparkILoop.scala:884)
>     at
> scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)
>     at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:884)
>     at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:982)
>     at org.apache.spark.repl.Main$.main(Main.scala:31)
>     at org.apache.spark.repl.Main.main(Main.scala)
>     at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>     at
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
>     at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>     at java.lang.reflect.Method.invoke(Method.java:606)
>     at org.apache.spark.deploy.SparkSubmit$.launch(SparkSubmit.scala:303)
>     at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:55)
>     at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
> Caused by: java.lang.ClassNotFoundException:
> org.jets3t.service.S3ServiceException
>     at java.net.URLClassLoader$1.run(URLClassLoader.java:366)
>     at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
>     at java.security.AccessController.doPrivileged(Native Method)
>     at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
>     at java.lang.ClassLoader.loadClass(ClassLoader.java:425)
>     at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
>     at java.lang.ClassLoader.loadClass(ClassLoader.java:358)
>     ... 92 more
>
> ​
>