You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@spark.apache.org by "A.K.M. Ashrafuzzaman" <as...@gmail.com> on 2014/12/18 08:02:55 UTC

Getting OutOfMemoryError and Worker.run caught exception

Hi guys,

Getting the following errors,
2014-12-17 09:05:02,391 [SocialInteractionDAL.scala:Executor task launch worker-110:20] - ----------- Inserting into mongo -------------
2014-12-17 09:05:06,768 [           Logging.scala:Executor task launch worker-110:96] - Exception in task 1.0 in stage 19541.0 (TID 33982)
java.lang.OutOfMemoryError: GC overhead limit exceeded
        at org.bson.io.PoolOutputBuffer.<init>(PoolOutputBuffer.java:253)
        at org.bson.BasicBSONDecoder.<init>(BasicBSONDecoder.java:599)
        at com.mongodb.DefaultDBDecoder.<init>(DefaultDBDecoder.java:44)
        at com.mongodb.DefaultDBDecoder$DefaultFactory.create(DefaultDBDecoder.java:33)
        at com.mongodb.DBPort.<init>(DBPort.java:88)
        at com.mongodb.DBPortFactory.create(DBPortFactory.java:28)
        at com.mongodb.PooledConnectionProvider$ConnectionItemFactory.create(PooledConnectionProvider.java:186)
        at com.mongodb.PooledConnectionProvider$ConnectionItemFactory.create(PooledConnectionProvider.java:183)
        at com.mongodb.ConcurrentPool.createNewAndReleasePermitIfFailure(ConcurrentPool.java:150)
        at com.mongodb.ConcurrentPool.get(ConcurrentPool.java:118)
        at com.mongodb.PooledConnectionProvider.get(PooledConnectionProvider.java:75)
        at com.mongodb.DefaultServer.getConnection(DefaultServer.java:73)
        at com.mongodb.BaseCluster$WrappedServer.getConnection(BaseCluster.java:219)
        at com.mongodb.DBTCPConnector$MyPort.getConnection(DBTCPConnector.java:511)
        at com.mongodb.DBTCPConnector$MyPort.get(DBTCPConnector.java:459)
        at com.mongodb.DBTCPConnector.getPrimaryPort(DBTCPConnector.java:417)
        at com.mongodb.DBCollectionImpl.insert(DBCollectionImpl.java:182)
        at com.mongodb.DBCollectionImpl.insert(DBCollectionImpl.java:165)
        at com.mongodb.DBCollection.insert(DBCollection.java:93)
        at com.mongodb.casbah.MongoCollectionBase$class.insert(MongoCollection.scala:621)
        at com.mongodb.casbah.MongoCollection.insert(MongoCollection.scala:1109)
        at com.mongodb.casbah.MongoCollectionBase$class.insert(MongoCollection.scala:606)
        at com.mongodb.casbah.MongoCollection.insert(MongoCollection.scala:1109)
        at com.newscred.analytics.db.mongo.SocialInteractionDAL.insert(SocialInteractionDAL.scala:25)
        at com.newscred.analytics.streaming.AnalyticsStreamProcessor$$anonfun$process$1.apply(AnalyticsStreamProcessor.scala:16)
        at com.newscred.analytics.streaming.AnalyticsStreamProcessor$$anonfun$process$1.apply(AnalyticsStreamProcessor.scala:11)
        at scala.collection.Iterator$class.foreach(Iterator.scala:727)
        at scala.collection.AbstractIterator.foreach(Iterator.scala:1157)
        at org.apache.spark.rdd.RDD$$anonfun$foreach$1.apply(RDD.scala:759)
        at org.apache.spark.rdd.RDD$$anonfun$foreach$1.apply(RDD.scala:759)
        at org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1121)
        at org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1121)


And,

2014-12-18 01:49:09,770 [AnalyticsStreamProcessor.scala:pool-12-thread-2:10] - Starting processing ...
2014-12-18 01:49:38,050 [       Slf4jLogger.scala:sparkDriver-akka.actor.default-dispatcher-1201:71] - unhandled event Failure(akka.pattern.AskTimeoutException: Timed out) in state WaitTransportShutdown
2014-12-18 01:51:00,576 [           Logging.scala:Spark Context ContextCleaner:96] - Error in cleaning thread
java.lang.InterruptedException
        at java.lang.Object.wait(Native Method)
        at java.lang.ref.ReferenceQueue.remove(ReferenceQueue.java:142)
        at org.apache.spark.ContextCleaner$$anonfun$org$apache$spark$ContextCleaner$$keepCleaning$1.apply$mcV$sp(ContextCleaner.scala:136)
        at org.apache.spark.ContextCleaner$$anonfun$org$apache$spark$ContextCleaner$$keepCleaning$1.apply(ContextCleaner.scala:134)
        at org.apache.spark.ContextCleaner$$anonfun$org$apache$spark$ContextCleaner$$keepCleaning$1.apply(ContextCleaner.scala:134)
        at org.apache.spark.util.Utils$.logUncaughtExceptions(Utils.scala:1311)
        at org.apache.spark.ContextCleaner.org$apache$spark$ContextCleaner$$keepCleaning(ContextCleaner.scala:133)
        at org.apache.spark.ContextCleaner$$anon$3.run(ContextCleaner.scala:65)
2014-12-18 01:52:11,688 [           Logging.scala:SparkListenerBus:96] - Uncaught exception in thread SparkListenerBus
java.lang.InterruptedException
        at java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireSharedInterruptibly(AbstractQueuedSynchronizer.java:998)
        at java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireSharedInterruptibly(AbstractQueuedSynchronizer.java:1304)
        at java.util.concurrent.Semaphore.acquire(Semaphore.java:312)
        at org.apache.spark.scheduler.LiveListenerBus$$anon$1$$anonfun$run$1.apply$mcV$sp(LiveListenerBus.scala:48)
        at org.apache.spark.scheduler.LiveListenerBus$$anon$1$$anonfun$run$1.apply(LiveListenerBus.scala:47)
        at org.apache.spark.scheduler.LiveListenerBus$$anon$1$$anonfun$run$1.apply(LiveListenerBus.scala:47)
        at org.apache.spark.util.Utils$.logUncaughtExceptions(Utils.scala:1311)
        at org.apache.spark.scheduler.LiveListenerBus$$anon$1.run(LiveListenerBus.scala:46)
2014-12-18 01:52:19,171 [             Worker.java:Executor task launch worker-0:353] - Worker.run caught exception, sleeping for 1000 milli seconds!
java.lang.InterruptedException: sleep interrupted
        at java.lang.Thread.sleep(Native Method)
        at com.amazonaws.services.kinesis.clientlibrary.lib.worker.Worker.run(Worker.java:351)
        at org.apache.spark.streaming.kinesis.KinesisReceiver.onStart(KinesisReceiver.scala:131)
        at org.apache.spark.streaming.receiver.ReceiverSupervisor.startReceiver(ReceiverSupervisor.scala:121)
        at org.apache.spark.streaming.receiver.ReceiverSupervisor.start(ReceiverSupervisor.scala:106)
        at org.apache.spark.streaming.scheduler.ReceiverTracker$ReceiverLauncher$$anonfun$9.apply(ReceiverTracker.scala:264)
        at org.apache.spark.streaming.scheduler.ReceiverTracker$ReceiverLauncher$$anonfun$9.apply(ReceiverTracker.scala:257)
        at org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1121)
        at org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1121)
        at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:62)
        at org.apache.spark.scheduler.Task.run(Task.scala:54)
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:177)
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
        at java.lang.Thread.run(Thread.java:745)


Have no clue how to resolve the issue. I will do a memory leak test. But this is a simple and small application. I don’t see a leak there with naked eyes.
Can any one help me with how I should investigate?

A.K.M. Ashrafuzzaman
Lead Software Engineer
NewsCred

(M) 880-175-5592433
Twitter | Blog | Facebook

Check out The Academy, your #1 source
for free content marketing resources


Re: Getting OutOfMemoryError and Worker.run caught exception

Posted by Akhil Das <ak...@sigmoidanalytics.com>.
You can go through this doc for tuning
http://spark.apache.org/docs/latest/tuning.html

Looks like you are creating a lot of objects and the JVM is spending more
time clearing these. If you can paste the code snippet, then it will be
easy to understand whats happening.

Thanks
Best Regards

On Thu, Dec 18, 2014 at 12:32 PM, A.K.M. Ashrafuzzaman <
ashrafuzzaman.g2@gmail.com> wrote:
>
> Hi guys,
>
> Getting the following errors,
> 2014-12-17 09:05:02,391 [SocialInteractionDAL.scala:Executor task launch
> worker-110:20] - ----------- Inserting into mongo -------------
> 2014-12-17 09:05:06,768 [           Logging.scala:Executor task launch
> worker-110:96] - Exception in task 1.0 in stage 19541.0 (TID 33982)
> java.lang.OutOfMemoryError: GC overhead limit exceeded
>         at org.bson.io.PoolOutputBuffer.<init>(PoolOutputBuffer.java:253)
>         at org.bson.BasicBSONDecoder.<init>(BasicBSONDecoder.java:599)
>         at com.mongodb.DefaultDBDecoder.<init>(DefaultDBDecoder.java:44)
>         at
> com.mongodb.DefaultDBDecoder$DefaultFactory.create(DefaultDBDecoder.java:33)
>         at com.mongodb.DBPort.<init>(DBPort.java:88)
>         at com.mongodb.DBPortFactory.create(DBPortFactory.java:28)
>         at
> com.mongodb.PooledConnectionProvider$ConnectionItemFactory.create(PooledConnectionProvider.java:186)
>         at
> com.mongodb.PooledConnectionProvider$ConnectionItemFactory.create(PooledConnectionProvider.java:183)
>         at
> com.mongodb.ConcurrentPool.createNewAndReleasePermitIfFailure(ConcurrentPool.java:150)
>         at com.mongodb.ConcurrentPool.get(ConcurrentPool.java:118)
>         at
> com.mongodb.PooledConnectionProvider.get(PooledConnectionProvider.java:75)
>         at com.mongodb.DefaultServer.getConnection(DefaultServer.java:73)
>         at
> com.mongodb.BaseCluster$WrappedServer.getConnection(BaseCluster.java:219)
>         at
> com.mongodb.DBTCPConnector$MyPort.getConnection(DBTCPConnector.java:511)
>         at com.mongodb.DBTCPConnector$MyPort.get(DBTCPConnector.java:459)
>         at
> com.mongodb.DBTCPConnector.getPrimaryPort(DBTCPConnector.java:417)
>         at com.mongodb.DBCollectionImpl.insert(DBCollectionImpl.java:182)
>         at com.mongodb.DBCollectionImpl.insert(DBCollectionImpl.java:165)
>         at com.mongodb.DBCollection.insert(DBCollection.java:93)
>         at
> com.mongodb.casbah.MongoCollectionBase$class.insert(MongoCollection.scala:621)
>         at
> com.mongodb.casbah.MongoCollection.insert(MongoCollection.scala:1109)
>         at
> com.mongodb.casbah.MongoCollectionBase$class.insert(MongoCollection.scala:606)
>         at
> com.mongodb.casbah.MongoCollection.insert(MongoCollection.scala:1109)
>         at
> com.newscred.analytics.db.mongo.SocialInteractionDAL.insert(SocialInteractionDAL.scala:25)
>         at
> com.newscred.analytics.streaming.AnalyticsStreamProcessor$$anonfun$process$1.apply(AnalyticsStreamProcessor.scala:16)
>         at
> com.newscred.analytics.streaming.AnalyticsStreamProcessor$$anonfun$process$1.apply(AnalyticsStreamProcessor.scala:11)
>         at scala.collection.Iterator$class.foreach(Iterator.scala:727)
>         at scala.collection.AbstractIterator.foreach(Iterator.scala:1157)
>         at org.apache.spark.rdd.RDD$$anonfun$foreach$1.apply(RDD.scala:759)
>         at org.apache.spark.rdd.RDD$$anonfun$foreach$1.apply(RDD.scala:759)
>         at
> org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1121)
>         at
> org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1121)
>
>
> And,
>
> 2014-12-18 01:49:09,770
> [AnalyticsStreamProcessor.scala:pool-12-thread-2:10] - Starting processing
> ...
> 2014-12-18 01:49:38,050 [
> Slf4jLogger.scala:sparkDriver-akka.actor.default-dispatcher-1201:71] -
> unhandled event Failure(akka.pattern.AskTimeoutException: Timed out) in
> state WaitTransportShutdown
> 2014-12-18 01:51:00,576 [           Logging.scala:Spark Context
> ContextCleaner:96] - Error in cleaning thread
> java.lang.InterruptedException
>         at java.lang.Object.wait(Native Method)
>         at java.lang.ref.ReferenceQueue.remove(ReferenceQueue.java:142)
>         at
> org.apache.spark.ContextCleaner$$anonfun$org$apache$spark$ContextCleaner$$keepCleaning$1.apply$mcV$sp(ContextCleaner.scala:136)
>         at
> org.apache.spark.ContextCleaner$$anonfun$org$apache$spark$ContextCleaner$$keepCleaning$1.apply(ContextCleaner.scala:134)
>         at
> org.apache.spark.ContextCleaner$$anonfun$org$apache$spark$ContextCleaner$$keepCleaning$1.apply(ContextCleaner.scala:134)
>         at
> org.apache.spark.util.Utils$.logUncaughtExceptions(Utils.scala:1311)
>         at org.apache.spark.ContextCleaner.org
> $apache$spark$ContextCleaner$$keepCleaning(ContextCleaner.scala:133)
>         at
> org.apache.spark.ContextCleaner$$anon$3.run(ContextCleaner.scala:65)
> 2014-12-18 01:52:11,688 [           Logging.scala:SparkListenerBus:96] -
> Uncaught exception in thread SparkListenerBus
> java.lang.InterruptedException
>         at
> java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireSharedInterruptibly(AbstractQueuedSynchronizer.java:998)
>         at
> java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireSharedInterruptibly(AbstractQueuedSynchronizer.java:1304)
>         at java.util.concurrent.Semaphore.acquire(Semaphore.java:312)
>         at
> org.apache.spark.scheduler.LiveListenerBus$$anon$1$$anonfun$run$1.apply$mcV$sp(LiveListenerBus.scala:48)
>         at
> org.apache.spark.scheduler.LiveListenerBus$$anon$1$$anonfun$run$1.apply(LiveListenerBus.scala:47)
>         at
> org.apache.spark.scheduler.LiveListenerBus$$anon$1$$anonfun$run$1.apply(LiveListenerBus.scala:47)
>         at
> org.apache.spark.util.Utils$.logUncaughtExceptions(Utils.scala:1311)
>         at
> org.apache.spark.scheduler.LiveListenerBus$$anon$1.run(LiveListenerBus.scala:46)
> 2014-12-18 01:52:19,171 [             Worker.java:Executor task launch
> worker-0:353] - Worker.run caught exception, sleeping for 1000 milli
> seconds!
> java.lang.InterruptedException: sleep interrupted
>         at java.lang.Thread.sleep(Native Method)
>         at
> com.amazonaws.services.kinesis.clientlibrary.lib.worker.Worker.run(Worker.java:351)
>         at
> org.apache.spark.streaming.kinesis.KinesisReceiver.onStart(KinesisReceiver.scala:131)
>         at
> org.apache.spark.streaming.receiver.ReceiverSupervisor.startReceiver(ReceiverSupervisor.scala:121)
>         at
> org.apache.spark.streaming.receiver.ReceiverSupervisor.start(ReceiverSupervisor.scala:106)
>         at
> org.apache.spark.streaming.scheduler.ReceiverTracker$ReceiverLauncher$$anonfun$9.apply(ReceiverTracker.scala:264)
>         at
> org.apache.spark.streaming.scheduler.ReceiverTracker$ReceiverLauncher$$anonfun$9.apply(ReceiverTracker.scala:257)
>         at
> org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1121)
>         at
> org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1121)
>         at
> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:62)
>         at org.apache.spark.scheduler.Task.run(Task.scala:54)
>         at
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:177)
>         at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
>         at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
>         at java.lang.Thread.run(Thread.java:745)
>
>
> Have no clue how to resolve the issue. I will do a memory leak test. But
> this is a simple and small application. I don’t see a leak there with naked
> eyes.
> Can any one help me with how I should investigate?
>
> A.K.M. Ashrafuzzaman
> Lead Software Engineer
> NewsCred <http://www.newscred.com/>
>
> (M) 880-175-5592433
> Twitter <https://twitter.com/ashrafuzzaman> | Blog
> <http://jitu-blog.blogspot.com/> | Facebook
> <https://www.facebook.com/ashrafuzzaman.jitu>
>
> Check out The Academy <http://newscred.com/theacademy>, your #1 source
> for free content marketing resources
>
>