You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@spark.apache.org by "Dagan, Arnon" <ar...@ebay.com> on 2016/01/04 13:24:40 UTC

Trying to run GraphX ConnectedComponents for large data with out success

While trying to run a spark job with spark 1.5.1, using the following paramters:
--master "yarn"
--deploy-mode "cluster"
--num-executors 200
 --driver-memory 14G
--executor-memory 14G
--executor-cores 1

Trying to run graphX ConnectedComponent on large data (~4TB) using the following commands:

System.setProperty("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
val edges = ...
val graph = Graph.fromEdgeTuples(edges,0,edgeStorageLevel = StorageLevel.MEMORY_AND_DISK, vertexStorageLevel = StorageLevel.MEMORY_AND_DISK)
val components = graph.connectedComponents().vertices
Some of the tasks complete successfully, and some fail with the following errors:
org.apache.spark.shuffle.MetadataFetchFailedException: Missing an output location for shuffle 2
                at org.apache.spark.MapOutputTracker$$anonfun$org$apache$spark$MapOutputTracker$$convertMapStatuses$2.apply(MapOutputTracker.scala:460)
                at org.apache.spark.MapOutputTracker$$anonfun$org$apache$spark$MapOutputTracker$$convertMapStatuses$2.apply(MapOutputTracker.scala:456)
                at scala.collection.TraversableLike$WithFilter$$anonfun$foreach$1.apply(TraversableLike.scala:772)
                at scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33)
                at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:108)
                at scala.collection.TraversableLike$WithFilter.foreach(TraversableLike.scala:771)
                at org.apache.spark.MapOutputTracker$.org$apache$spark$MapOutputTracker$$convertMapStatuses(MapOutputTracker.scala:456)
                at org.apache.spark.MapOutputTracker.getMapSizesByExecutorId(MapOutputTracker.scala:183)
                at org.apache.spark.shuffle.hash.HashShuffleReader.read(HashShuffleReader.scala:47)
                at org.apache.spark.rdd.ShuffledRDD.compute(ShuffledRDD.scala:90)
                at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:297)
                at org.apache.spark.rdd.RDD.iterator(RDD.scala:264)
                at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
                at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:297)
                at org.apache.spark.rdd.RDD.iterator(RDD.scala:264)
                at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
                at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:297)
                at org.apache.spark.rdd.RDD.iterator(RDD.scala:264)
                at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
                at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:297)
                at org.apache.spark.CacheManager.getOrCompute(CacheManager.scala:69)
                at org.apache.spark.rdd.RDD.iterator(RDD.scala:262)
                at org.apache.spark.rdd.ZippedPartitionsRDD2.compute(ZippedPartitionsRDD.scala:99)
                at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:297)
                at org.apache.spark.rdd.RDD.iterator(RDD.scala:264)
                at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
                at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:297)
                at org.apache.spark.rdd.RDD.iterator(RDD.scala:264)
                at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
                at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
                at org.apache.spark.scheduler.Task.run(Task.scala:88)
                at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
                at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
                at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
                at java.lang.Thread.run(Thread.java:745)

and another error:
org.apache.spark.shuffle.FetchFailedException: Connection from phxaishdc9dn1209.stratus.phx.ebay.com/10.115.60.32:40099 closed
                at org.apache.spark.storage.ShuffleBlockFetcherIterator.throwFetchFailedException(ShuffleBlockFetcherIterator.scala:321)
                at org.apache.spark.storage.ShuffleBlockFetcherIterator.next(ShuffleBlockFetcherIterator.scala:306)
                at org.apache.spark.storage.ShuffleBlockFetcherIterator.next(ShuffleBlockFetcherIterator.scala:51)
                at scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
                at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371)
                at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327)
                at org.apache.spark.util.CompletionIterator.hasNext(CompletionIterator.scala:32)
                at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:39)
                at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371)
                at org.apache.spark.graphx.impl.EdgePartition.updateVertices(EdgePartition.scala:89)
                at org.apache.spark.graphx.impl.ReplicatedVertexView$$anonfun$4$$anonfun$apply$5.apply(ReplicatedVertexView.scala:117)
                at org.apache.spark.graphx.impl.ReplicatedVertexView$$anonfun$4$$anonfun$apply$5.apply(ReplicatedVertexView.scala:115)
                at scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
                at org.apache.spark.storage.MemoryStore.unrollSafely(MemoryStore.scala:278)
                at org.apache.spark.CacheManager.putInBlockManager(CacheManager.scala:171)
                at org.apache.spark.CacheManager.getOrCompute(CacheManager.scala:78)
                at org.apache.spark.rdd.RDD.iterator(RDD.scala:262)
                at org.apache.spark.rdd.ZippedPartitionsRDD2.compute(ZippedPartitionsRDD.scala:99)
                at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:297)
                at org.apache.spark.rdd.RDD.iterator(RDD.scala:264)
                at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
                at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:297)
                at org.apache.spark.rdd.RDD.iterator(RDD.scala:264)
                at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
                at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
                at org.apache.spark.scheduler.Task.run(Task.scala:88)
                at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
                at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
                at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
                at java.lang.Thread.run(Thread.java:745)
Caused by: java.io.IOException: Connection from phxaishdc9dn1209.stratus.phx.ebay.com/10.115.60.32:40099 closed
                at org.apache.spark.network.client.TransportResponseHandler.channelUnregistered(TransportResponseHandler.java:104)
                at org.apache.spark.network.server.TransportChannelHandler.channelUnregistered(TransportChannelHandler.java:91)
                at io.netty.channel.AbstractChannelHandlerContext.invokeChannelUnregistered(AbstractChannelHandlerContext.java:158)
                at io.netty.channel.AbstractChannelHandlerContext.fireChannelUnregistered(AbstractChannelHandlerContext.java:144)
                at io.netty.channel.ChannelInboundHandlerAdapter.channelUnregistered(ChannelInboundHandlerAdapter.java:53)
                at io.netty.channel.AbstractChannelHandlerContext.invokeChannelUnregistered(AbstractChannelHandlerContext.java:158)
                at io.netty.channel.AbstractChannelHandlerContext.fireChannelUnregistered(AbstractChannelHandlerContext.java:144)
                at io.netty.channel.ChannelInboundHandlerAdapter.channelUnregistered(ChannelInboundHandlerAdapter.java:53)
                at io.netty.channel.AbstractChannelHandlerContext.invokeChannelUnregistered(AbstractChannelHandlerContext.java:158)
                at io.netty.channel.AbstractChannelHandlerContext.fireChannelUnregistered(AbstractChannelHandlerContext.java:144)
                at io.netty.channel.ChannelInboundHandlerAdapter.channelUnregistered(ChannelInboundHandlerAdapter.java:53)
                at io.netty.channel.AbstractChannelHandlerContext.invokeChannelUnregistered(AbstractChannelHandlerContext.java:158)
                at io.netty.channel.AbstractChannelHandlerContext.fireChannelUnregistered(AbstractChannelHandlerContext.java:144)
                at io.netty.channel.DefaultChannelPipeline.fireChannelUnregistered(DefaultChannelPipeline.java:739)
                at io.netty.channel.AbstractChannel$AbstractUnsafe$8.run(AbstractChannel.java:659)
                at io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:357)
                at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:357)
                at io.netty.util.concurrent.SingleThreadEventExecutor$2.run(SingleThreadEventExecutor.java:111)
                ... 1 more

Please advise,
Thanks in advance,
Arnon