You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@spark.apache.org by "sebastian.piu" <se...@gmail.com> on 2016/01/21 15:59:15 UTC
java.lang.ArrayIndexOutOfBoundsException when attempting
broadcastjoin
Hi all,
I'm trying to work out a problem when using Spark Streaming, currently I
have the following piece of code inside a foreachRDD call:
Dataframe results = ... //some dataframe created from the incoming rdd -
moderately big, I don't want this to be shuffled
DataFrame t = sqlContext.table("a_temp_cached_table"); //a very small table
- that might mutate over time
DataFrame x = results.join(*broadcast(t)*, JOIN_COLUMNS_SEQ)
.groupBy("column1").count()
.write()
.mode(SaveMode.Append)
.save("/some-path/");
The intention of the code above is to distribute the "t" dataframe if
required, but avoid shuffling the "results".
This works fine when ran on scala-shell / spark-submit, but when ran from
within my executors I get the exception below...
Any thoughts? If I remove the *broadcast(t)* then it works fine but where my
big table is shuffled around.
2016-01-21 14:47:00 ERROR JobScheduler:95 - Error running job streaming job
1453387560000 ms.0
java.lang.ArrayIndexOutOfBoundsException: 8388607
at
com.esotericsoftware.kryo.util.IdentityObjectIntMap.clear(IdentityObjectIntMap.java:345)
at
com.esotericsoftware.kryo.util.MapReferenceResolver.reset(MapReferenceResolver.java:47)
at com.esotericsoftware.kryo.Kryo.reset(Kryo.java:804)
at com.esotericsoftware.kryo.Kryo.writeClassAndObject(Kryo.java:570)
at
org.apache.spark.serializer.KryoSerializationStream.writeObject(KryoSerializer.scala:194)
at
org.apache.spark.broadcast.TorrentBroadcast$.blockifyObject(TorrentBroadcast.scala:203)
at
org.apache.spark.broadcast.TorrentBroadcast.writeBlocks(TorrentBroadcast.scala:102)
at
org.apache.spark.broadcast.TorrentBroadcast.<init>(TorrentBroadcast.scala:85)
at
org.apache.spark.broadcast.TorrentBroadcastFactory.newBroadcast(TorrentBroadcastFactory.scala:34)
at
org.apache.spark.broadcast.BroadcastManager.newBroadcast(BroadcastManager.scala:63)
at org.apache.spark.SparkContext.broadcast(SparkContext.scala:1326)
at
org.apache.spark.sql.execution.joins.BroadcastHashJoin$$anonfun$broadcastFuture$1$$anonfun$apply$1.apply(BroadcastHashJoin.scala:91)
at
org.apache.spark.sql.execution.joins.BroadcastHashJoin$$anonfun$broadcastFuture$1$$anonfun$apply$1.apply(BroadcastHashJoin.scala:79)
at
org.apache.spark.sql.execution.SQLExecution$.withExecutionId(SQLExecution.scala:100)
at
org.apache.spark.sql.execution.joins.BroadcastHashJoin$$anonfun$broadcastFuture$1.apply(BroadcastHashJoin.scala:79)
at
org.apache.spark.sql.execution.joins.BroadcastHashJoin$$anonfun$broadcastFuture$1.apply(BroadcastHashJoin.scala:79)
at
scala.concurrent.impl.Future$PromiseCompletingRunnable.liftedTree1$1(Future.scala:24)
at
scala.concurrent.impl.Future$PromiseCompletingRunnable.run(Future.scala:24)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
2016-01-21 14:47:00 ERROR RslApp:60 - ERROR executing the application
java.lang.ArrayIndexOutOfBoundsException: 8388607
at
com.esotericsoftware.kryo.util.IdentityObjectIntMap.clear(IdentityObjectIntMap.java:345)
at
com.esotericsoftware.kryo.util.MapReferenceResolver.reset(MapReferenceResolver.java:47)
at com.esotericsoftware.kryo.Kryo.reset(Kryo.java:804)
at com.esotericsoftware.kryo.Kryo.writeClassAndObject(Kryo.java:570)
at
org.apache.spark.serializer.KryoSerializationStream.writeObject(KryoSerializer.scala:194)
at
org.apache.spark.broadcast.TorrentBroadcast$.blockifyObject(TorrentBroadcast.scala:203)
at
org.apache.spark.broadcast.TorrentBroadcast.writeBlocks(TorrentBroadcast.scala:102)
at
org.apache.spark.broadcast.TorrentBroadcast.<init>(TorrentBroadcast.scala:85)
at
org.apache.spark.broadcast.TorrentBroadcastFactory.newBroadcast(TorrentBroadcastFactory.scala:34)
at
org.apache.spark.broadcast.BroadcastManager.newBroadcast(BroadcastManager.scala:63)
at org.apache.spark.SparkContext.broadcast(SparkContext.scala:1326)
at
org.apache.spark.sql.execution.joins.BroadcastHashJoin$$anonfun$broadcastFuture$1$$anonfun$apply$1.apply(BroadcastHashJoin.scala:91)
at
org.apache.spark.sql.execution.joins.BroadcastHashJoin$$anonfun$broadcastFuture$1$$anonfun$apply$1.apply(BroadcastHashJoin.scala:79)
at
org.apache.spark.sql.execution.SQLExecution$.withExecutionId(SQLExecution.scala:100)
at
org.apache.spark.sql.execution.joins.BroadcastHashJoin$$anonfun$broadcastFuture$1.apply(BroadcastHashJoin.scala:79)
at
org.apache.spark.sql.execution.joins.BroadcastHashJoin$$anonfun$broadcastFuture$1.apply(BroadcastHashJoin.scala:79)
at
scala.concurrent.impl.Future$PromiseCompletingRunnable.liftedTree1$1(Future.scala:24)
at
scala.concurrent.impl.Future$PromiseCompletingRunnable.run(Future.scala:24)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
[Stage 10:> (0 + 24) /
24]2016-01-21 14:47:02 ERROR InsertIntoHadoopFsRelation:95 - Aborting job.
java.lang.InterruptedException
at java.lang.Object.wait(Native Method)
at java.lang.Object.wait(Object.java:502)
at
org.apache.spark.scheduler.JobWaiter.awaitResult(JobWaiter.scala:73)
at
org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:612)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1832)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1845)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1922)
at
org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelation$$anonfun$run$1.apply$mcV$sp(InsertIntoHadoopFsRelation.scala:150)
at
org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelation$$anonfun$run$1.apply(InsertIntoHadoopFsRelation.scala:108)
at
org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelation$$anonfun$run$1.apply(InsertIntoHadoopFsRelation.scala:108)
at
org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:56)
at
org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelation.run(InsertIntoHadoopFsRelation.scala:108)
at
org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult$lzycompute(commands.scala:58)
at
org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult(commands.scala:56)
at
org.apache.spark.sql.execution.ExecutedCommand.doExecute(commands.scala:70)
at
org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$5.apply(SparkPlan.scala:127)
at
org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$5.apply(SparkPlan.scala:125)
at
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
at
org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:125)
at
org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:55)
at
org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:55)
at
org.apache.spark.sql.execution.datasources.ResolvedDataSource$.apply(ResolvedDataSource.scala:242)
at
org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:148)
at
org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:139)
at
com.hsbc.rsl.spark.streaming.receiver.functions.PersistLevel3WithDataframes.call(PersistLevel3WithDataframes.java:84)
at
com.hsbc.rsl.spark.streaming.receiver.functions.PersistLevel3WithDataframes.call(PersistLevel3WithDataframes.java:27)
at
org.apache.spark.streaming.api.java.JavaDStreamLike$$anonfun$foreachRDD$3.apply(JavaDStreamLike.scala:335)
at
org.apache.spark.streaming.api.java.JavaDStreamLike$$anonfun$foreachRDD$3.apply(JavaDStreamLike.scala:335)
at
org.apache.spark.streaming.dstream.DStream$$anonfun$foreachRDD$1$$anonfun$apply$mcV$sp$3.apply(DStream.scala:656)
at
org.apache.spark.streaming.dstream.DStream$$anonfun$foreachRDD$1$$anonfun$apply$mcV$sp$3.apply(DStream.scala:656)
at
org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1$$anonfun$apply$mcV$sp$1.apply$mcV$sp(ForEachDStream.scala:50)
at
org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1$$anonfun$apply$mcV$sp$1.apply(ForEachDStream.scala:50)
at
org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1$$anonfun$apply$mcV$sp$1.apply(ForEachDStream.scala:50)
at
org.apache.spark.streaming.dstream.DStream.createRDDWithLocalProperties(DStream.scala:424)
at
org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1.apply$mcV$sp(ForEachDStream.scala:49)
at
org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1.apply(ForEachDStream.scala:49)
at
org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1.apply(ForEachDStream.scala:49)
at scala.util.Try$.apply(Try.scala:161)
at org.apache.spark.streaming.scheduler.Job.run(Job.scala:39)
at
org.apache.spark.streaming.scheduler.JobScheduler$JobHandler$$anonfun$run$1.apply$mcV$sp(JobScheduler.scala:224)
at
org.apache.spark.streaming.scheduler.JobScheduler$JobHandler$$anonfun$run$1.apply(JobScheduler.scala:224)
at
org.apache.spark.streaming.scheduler.JobScheduler$JobHandler$$anonfun$run$1.apply(JobScheduler.scala:224)
at scala.util.DynamicVariable.withValue(DynamicVariable.scala:57)
at
org.apache.spark.streaming.scheduler.JobScheduler$JobHandler.run(JobScheduler.scala:223)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
2016-01-21 14:47:02 ERROR DefaultWriterContainer:74 - Job
job_201601211447_0000 aborted.
--
View this message in context: http://apache-spark-user-list.1001560.n3.nabble.com/java-lang-ArrayIndexOutOfBoundsException-when-attempting-broadcastjoin-tp26034.html
Sent from the Apache Spark User List mailing list archive at Nabble.com.
---------------------------------------------------------------------
To unsubscribe, e-mail: user-unsubscribe@spark.apache.org
For additional commands, e-mail: user-help@spark.apache.org
Re: java.lang.ArrayIndexOutOfBoundsException when attempting broadcastjoin
Posted by Sebastian Piu <se...@gmail.com>.
I'm using Spark 1.6.0.
I tried removing Kryo and reverting back to Java Serialisation, and get a
different error which maybe points in the right direction...
java.lang.AssertionError: assertion failed: No plan for BroadcastHint
+- InMemoryRelation
[tradeId#30,tradeVersion#31,agreement#49,counterParty#38], true, 10000,
StorageLevel(true, true, false, true, 1), Union,
Some(ingest_all_union_trades)
at scala.Predef$.assert(Predef.scala:179)
at
org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:59)
at
org.apache.spark.sql.catalyst.planning.QueryPlanner.planLater(QueryPlanner.scala:54)
at
org.apache.spark.sql.execution.SparkStrategies$BasicOperators$.apply(SparkStrategies.scala:336)
at
org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:58)
at
org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:58)
at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371)
at
org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:59)
at
org.apache.spark.sql.catalyst.planning.QueryPlanner.planLater(QueryPlanner.scala:54)
at
org.apache.spark.sql.execution.SparkStrategies$EquiJoinSelection$.apply(SparkStrategies.scala:105)
at
org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:58)
at
org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:58)
at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371)
at
org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:59)
at
org.apache.spark.sql.catalyst.planning.QueryPlanner.planLater(QueryPlanner.scala:54)
at
org.apache.spark.sql.execution.SparkStrategies$BasicOperators$.apply(SparkStrategies.scala:336)
at
org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:58)
at
org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:58)
at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371)
at
org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:59)
at
org.apache.spark.sql.catalyst.planning.QueryPlanner.planLater(QueryPlanner.scala:54)
at
org.apache.spark.sql.execution.SparkStrategies$Aggregation$.apply(SparkStrategies.scala:217)
at
org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:58)
at
org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:58)
at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371)
at
org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:59)
at
org.apache.spark.sql.execution.QueryExecution.sparkPlan$lzycompute(QueryExecution.scala:47)
at
org.apache.spark.sql.execution.QueryExecution.sparkPlan(QueryExecution.scala:45)
at
org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:52)
at
org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:52)
at
org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:53)
at
org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelation.run(InsertIntoHadoopFsRelation.scala:108)
at
org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult$lzycompute(commands.scala:58)
at
org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult(commands.scala:56)
at
org.apache.spark.sql.execution.ExecutedCommand.doExecute(commands.scala:70)
at
org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$5.apply(SparkPlan.scala:127)
at
org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$5.apply(SparkPlan.scala:125)
at
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
at
org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:125)
at
org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:55)
at
org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:55)
at
org.apache.spark.sql.execution.datasources.ResolvedDataSource$.apply(ResolvedDataSource.scala:242)
at
org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:148)
at
org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:139)
at
com.hsbc.rsl.spark.streaming.receiver.functions.PersistLevel3WithDataframes.preAggregateL4(PersistLevel3WithDataframes.java:133)
at
com.hsbc.rsl.spark.streaming.receiver.functions.PersistLevel3WithDataframes.call(PersistLevel3WithDataframes.java:93)
at
com.hsbc.rsl.spark.streaming.receiver.functions.PersistLevel3WithDataframes.call(PersistLevel3WithDataframes.java:27)
at
org.apache.spark.streaming.api.java.JavaDStreamLike$$anonfun$foreachRDD$3.apply(JavaDStreamLike.scala:335)
at
org.apache.spark.streaming.api.java.JavaDStreamLike$$anonfun$foreachRDD$3.apply(JavaDStreamLike.scala:335)
at
org.apache.spark.streaming.dstream.DStream$$anonfun$foreachRDD$1$$anonfun$apply$mcV$sp$3.apply(DStream.scala:656)
at
org.apache.spark.streaming.dstream.DStream$$anonfun$foreachRDD$1$$anonfun$apply$mcV$sp$3.apply(DStream.scala:656)
at
org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1$$anonfun$apply$mcV$sp$1.apply$mcV$sp(ForEachDStream.scala:50)
at
org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1$$anonfun$apply$mcV$sp$1.apply(ForEachDStream.scala:50)
at
org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1$$anonfun$apply$mcV$sp$1.apply(ForEachDStream.scala:50)
at
org.apache.spark.streaming.dstream.DStream.createRDDWithLocalProperties(DStream.scala:424)
at
org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1.apply$mcV$sp(ForEachDStream.scala:49)
at
org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1.apply(ForEachDStream.scala:49)
at
org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1.apply(ForEachDStream.scala:49)
at scala.util.Try$.apply(Try.scala:161)
at org.apache.spark.streaming.scheduler.Job.run(Job.scala:39)
at
org.apache.spark.streaming.scheduler.JobScheduler$JobHandler$$anonfun$run$1.apply$mcV$sp(JobScheduler.scala:224)
at
org.apache.spark.streaming.scheduler.JobScheduler$JobHandler$$anonfun$run$1.apply(JobScheduler.scala:224)
at
org.apache.spark.streaming.scheduler.JobScheduler$JobHandler$$anonfun$run$1.apply(JobScheduler.scala:224)
at scala.util.DynamicVariable.withValue(DynamicVariable.scala:57)
at
org.apache.spark.streaming.scheduler.JobScheduler$JobHandler.run(JobScheduler.scala:223)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
2016-01-21 15:28:32 ERROR RslApp:60 - ERROR executing the application
java.lang.AssertionError: assertion failed: No plan for BroadcastHint
+- InMemoryRelation
[tradeId#30,tradeVersion#31,agreement#49,counterParty#38], true, 10000,
StorageLevel(true, true, false, true, 1), Union,
Some(ingest_all_union_trades)
at scala.Predef$.assert(Predef.scala:179)
at
org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:59)
at
org.apache.spark.sql.catalyst.planning.QueryPlanner.planLater(QueryPlanner.scala:54)
at
org.apache.spark.sql.execution.SparkStrategies$BasicOperators$.apply(SparkStrategies.scala:336)
at
org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:58)
at
org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:58)
at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371)
at
org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:59)
at
org.apache.spark.sql.catalyst.planning.QueryPlanner.planLater(QueryPlanner.scala:54)
at
org.apache.spark.sql.execution.SparkStrategies$EquiJoinSelection$.apply(SparkStrategies.scala:105)
at
org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:58)
at
org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:58)
at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371)
at
org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:59)
at
org.apache.spark.sql.catalyst.planning.QueryPlanner.planLater(QueryPlanner.scala:54)
at
org.apache.spark.sql.execution.SparkStrategies$BasicOperators$.apply(SparkStrategies.scala:336)
at
org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:58)
at
org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:58)
at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371)
at
org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:59)
at
org.apache.spark.sql.catalyst.planning.QueryPlanner.planLater(QueryPlanner.scala:54)
at
org.apache.spark.sql.execution.SparkStrategies$Aggregation$.apply(SparkStrategies.scala:217)
at
org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:58)
at
org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:58)
at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371)
at
org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:59)
at
org.apache.spark.sql.execution.QueryExecution.sparkPlan$lzycompute(QueryExecution.scala:47)
at
org.apache.spark.sql.execution.QueryExecution.sparkPlan(QueryExecution.scala:45)
at
org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:52)
at
org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:52)
at
org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:53)
at
org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelation.run(InsertIntoHadoopFsRelation.scala:108)
at
org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult$lzycompute(commands.scala:58)
at
org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult(commands.scala:56)
at
org.apache.spark.sql.execution.ExecutedCommand.doExecute(commands.scala:70)
at
org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$5.apply(SparkPlan.scala:127)
at
org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$5.apply(SparkPlan.scala:125)
at
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
at
org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:125)
at
org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:55)
at
org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:55)
at
org.apache.spark.sql.execution.datasources.ResolvedDataSource$.apply(ResolvedDataSource.scala:242)
at
org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:148)
at
org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:139)
at
com.hsbc.rsl.spark.streaming.receiver.functions.PersistLevel3WithDataframes.preAggregateL4(PersistLevel3WithDataframes.java:133)
at
com.hsbc.rsl.spark.streaming.receiver.functions.PersistLevel3WithDataframes.call(PersistLevel3WithDataframes.java:93)
at
com.hsbc.rsl.spark.streaming.receiver.functions.PersistLevel3WithDataframes.call(PersistLevel3WithDataframes.java:27)
at
org.apache.spark.streaming.api.java.JavaDStreamLike$$anonfun$foreachRDD$3.apply(JavaDStreamLike.scala:335)
at
org.apache.spark.streaming.api.java.JavaDStreamLike$$anonfun$foreachRDD$3.apply(JavaDStreamLike.scala:335)
at
org.apache.spark.streaming.dstream.DStream$$anonfun$foreachRDD$1$$anonfun$apply$mcV$sp$3.apply(DStream.scala:656)
at
org.apache.spark.streaming.dstream.DStream$$anonfun$foreachRDD$1$$anonfun$apply$mcV$sp$3.apply(DStream.scala:656)
at
org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1$$anonfun$apply$mcV$sp$1.apply$mcV$sp(ForEachDStream.scala:50)
at
org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1$$anonfun$apply$mcV$sp$1.apply(ForEachDStream.scala:50)
at
org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1$$anonfun$apply$mcV$sp$1.apply(ForEachDStream.scala:50)
at
org.apache.spark.streaming.dstream.DStream.createRDDWithLocalProperties(DStream.scala:424)
at
org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1.apply$mcV$sp(ForEachDStream.scala:49)
at
org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1.apply(ForEachDStream.scala:49)
at
org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1.apply(ForEachDStream.scala:49)
at scala.util.Try$.apply(Try.scala:161)
at org.apache.spark.streaming.scheduler.Job.run(Job.scala:39)
at
org.apache.spark.streaming.scheduler.JobScheduler$JobHandler$$anonfun$run$1.apply$mcV$sp(JobScheduler.scala:224)
at
org.apache.spark.streaming.scheduler.JobScheduler$JobHandler$$anonfun$run$1.apply(JobScheduler.scala:224)
at
org.apache.spark.streaming.scheduler.JobScheduler$JobHandler$$anonfun$run$1.apply(JobScheduler.scala:224)
at scala.util.DynamicVariable.withValue(DynamicVariable.scala:57)
at
org.apache.spark.streaming.scheduler.JobScheduler$JobHandler.run(JobScheduler.scala:223)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
On Thu, Jan 21, 2016 at 3:17 PM, Ted Yu <yu...@gmail.com> wrote:
> You were using Kryo serialization ?
>
> If you switch to Java serialization, your job should run fine.
>
> Which Spark release are you using ?
>
> Thanks
>
> On Thu, Jan 21, 2016 at 6:59 AM, sebastian.piu <se...@gmail.com>
> wrote:
>
>> Hi all,
>>
>> I'm trying to work out a problem when using Spark Streaming, currently I
>> have the following piece of code inside a foreachRDD call:
>>
>> Dataframe results = ... //some dataframe created from the incoming rdd -
>> moderately big, I don't want this to be shuffled
>> DataFrame t = sqlContext.table("a_temp_cached_table"); //a very small
>> table
>> - that might mutate over time
>> DataFrame x = results.join(*broadcast(t)*, JOIN_COLUMNS_SEQ)
>> .groupBy("column1").count()
>> .write()
>> .mode(SaveMode.Append)
>> .save("/some-path/");
>>
>> The intention of the code above is to distribute the "t" dataframe if
>> required, but avoid shuffling the "results".
>>
>> This works fine when ran on scala-shell / spark-submit, but when ran from
>> within my executors I get the exception below...
>>
>> Any thoughts? If I remove the *broadcast(t)* then it works fine but where
>> my
>> big table is shuffled around.
>>
>> 2016-01-21 14:47:00 ERROR JobScheduler:95 - Error running job streaming
>> job
>> 1453387560000 ms.0
>> java.lang.ArrayIndexOutOfBoundsException: 8388607
>> at
>>
>> com.esotericsoftware.kryo.util.IdentityObjectIntMap.clear(IdentityObjectIntMap.java:345)
>> at
>>
>> com.esotericsoftware.kryo.util.MapReferenceResolver.reset(MapReferenceResolver.java:47)
>> at com.esotericsoftware.kryo.Kryo.reset(Kryo.java:804)
>> at
>> com.esotericsoftware.kryo.Kryo.writeClassAndObject(Kryo.java:570)
>> at
>>
>> org.apache.spark.serializer.KryoSerializationStream.writeObject(KryoSerializer.scala:194)
>> at
>>
>> org.apache.spark.broadcast.TorrentBroadcast$.blockifyObject(TorrentBroadcast.scala:203)
>> at
>>
>> org.apache.spark.broadcast.TorrentBroadcast.writeBlocks(TorrentBroadcast.scala:102)
>> at
>>
>> org.apache.spark.broadcast.TorrentBroadcast.<init>(TorrentBroadcast.scala:85)
>> at
>>
>> org.apache.spark.broadcast.TorrentBroadcastFactory.newBroadcast(TorrentBroadcastFactory.scala:34)
>> at
>>
>> org.apache.spark.broadcast.BroadcastManager.newBroadcast(BroadcastManager.scala:63)
>> at
>> org.apache.spark.SparkContext.broadcast(SparkContext.scala:1326)
>> at
>>
>> org.apache.spark.sql.execution.joins.BroadcastHashJoin$$anonfun$broadcastFuture$1$$anonfun$apply$1.apply(BroadcastHashJoin.scala:91)
>> at
>>
>> org.apache.spark.sql.execution.joins.BroadcastHashJoin$$anonfun$broadcastFuture$1$$anonfun$apply$1.apply(BroadcastHashJoin.scala:79)
>> at
>>
>> org.apache.spark.sql.execution.SQLExecution$.withExecutionId(SQLExecution.scala:100)
>> at
>>
>> org.apache.spark.sql.execution.joins.BroadcastHashJoin$$anonfun$broadcastFuture$1.apply(BroadcastHashJoin.scala:79)
>> at
>>
>> org.apache.spark.sql.execution.joins.BroadcastHashJoin$$anonfun$broadcastFuture$1.apply(BroadcastHashJoin.scala:79)
>> at
>>
>> scala.concurrent.impl.Future$PromiseCompletingRunnable.liftedTree1$1(Future.scala:24)
>> at
>>
>> scala.concurrent.impl.Future$PromiseCompletingRunnable.run(Future.scala:24)
>> at
>>
>> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
>> at
>>
>> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
>> at java.lang.Thread.run(Thread.java:745)
>> 2016-01-21 14:47:00 ERROR RslApp:60 - ERROR executing the application
>> java.lang.ArrayIndexOutOfBoundsException: 8388607
>> at
>>
>> com.esotericsoftware.kryo.util.IdentityObjectIntMap.clear(IdentityObjectIntMap.java:345)
>> at
>>
>> com.esotericsoftware.kryo.util.MapReferenceResolver.reset(MapReferenceResolver.java:47)
>> at com.esotericsoftware.kryo.Kryo.reset(Kryo.java:804)
>> at
>> com.esotericsoftware.kryo.Kryo.writeClassAndObject(Kryo.java:570)
>> at
>>
>> org.apache.spark.serializer.KryoSerializationStream.writeObject(KryoSerializer.scala:194)
>> at
>>
>> org.apache.spark.broadcast.TorrentBroadcast$.blockifyObject(TorrentBroadcast.scala:203)
>> at
>>
>> org.apache.spark.broadcast.TorrentBroadcast.writeBlocks(TorrentBroadcast.scala:102)
>> at
>>
>> org.apache.spark.broadcast.TorrentBroadcast.<init>(TorrentBroadcast.scala:85)
>> at
>>
>> org.apache.spark.broadcast.TorrentBroadcastFactory.newBroadcast(TorrentBroadcastFactory.scala:34)
>> at
>>
>> org.apache.spark.broadcast.BroadcastManager.newBroadcast(BroadcastManager.scala:63)
>> at
>> org.apache.spark.SparkContext.broadcast(SparkContext.scala:1326)
>> at
>>
>> org.apache.spark.sql.execution.joins.BroadcastHashJoin$$anonfun$broadcastFuture$1$$anonfun$apply$1.apply(BroadcastHashJoin.scala:91)
>> at
>>
>> org.apache.spark.sql.execution.joins.BroadcastHashJoin$$anonfun$broadcastFuture$1$$anonfun$apply$1.apply(BroadcastHashJoin.scala:79)
>> at
>>
>> org.apache.spark.sql.execution.SQLExecution$.withExecutionId(SQLExecution.scala:100)
>> at
>>
>> org.apache.spark.sql.execution.joins.BroadcastHashJoin$$anonfun$broadcastFuture$1.apply(BroadcastHashJoin.scala:79)
>> at
>>
>> org.apache.spark.sql.execution.joins.BroadcastHashJoin$$anonfun$broadcastFuture$1.apply(BroadcastHashJoin.scala:79)
>> at
>>
>> scala.concurrent.impl.Future$PromiseCompletingRunnable.liftedTree1$1(Future.scala:24)
>> at
>>
>> scala.concurrent.impl.Future$PromiseCompletingRunnable.run(Future.scala:24)
>> at
>>
>> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
>> at
>>
>> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
>> at java.lang.Thread.run(Thread.java:745)
>> [Stage 10:> (0 +
>> 24) /
>> 24]2016-01-21 14:47:02 ERROR InsertIntoHadoopFsRelation:95 - Aborting job.
>> java.lang.InterruptedException
>> at java.lang.Object.wait(Native Method)
>> at java.lang.Object.wait(Object.java:502)
>> at
>> org.apache.spark.scheduler.JobWaiter.awaitResult(JobWaiter.scala:73)
>> at
>> org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:612)
>> at org.apache.spark.SparkContext.runJob(SparkContext.scala:1832)
>> at org.apache.spark.SparkContext.runJob(SparkContext.scala:1845)
>> at org.apache.spark.SparkContext.runJob(SparkContext.scala:1922)
>> at
>>
>> org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelation$$anonfun$run$1.apply$mcV$sp(InsertIntoHadoopFsRelation.scala:150)
>> at
>>
>> org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelation$$anonfun$run$1.apply(InsertIntoHadoopFsRelation.scala:108)
>> at
>>
>> org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelation$$anonfun$run$1.apply(InsertIntoHadoopFsRelation.scala:108)
>> at
>>
>> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:56)
>> at
>>
>> org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelation.run(InsertIntoHadoopFsRelation.scala:108)
>> at
>>
>> org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult$lzycompute(commands.scala:58)
>> at
>>
>> org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult(commands.scala:56)
>> at
>>
>> org.apache.spark.sql.execution.ExecutedCommand.doExecute(commands.scala:70)
>> at
>>
>> org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$5.apply(SparkPlan.scala:127)
>> at
>>
>> org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$5.apply(SparkPlan.scala:125)
>> at
>>
>> org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
>> at
>> org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:125)
>> at
>>
>> org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:55)
>> at
>>
>> org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:55)
>> at
>>
>> org.apache.spark.sql.execution.datasources.ResolvedDataSource$.apply(ResolvedDataSource.scala:242)
>> at
>> org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:148)
>> at
>> org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:139)
>> at
>>
>> com.hsbc.rsl.spark.streaming.receiver.functions.PersistLevel3WithDataframes.call(PersistLevel3WithDataframes.java:84)
>> at
>>
>> com.hsbc.rsl.spark.streaming.receiver.functions.PersistLevel3WithDataframes.call(PersistLevel3WithDataframes.java:27)
>> at
>>
>> org.apache.spark.streaming.api.java.JavaDStreamLike$$anonfun$foreachRDD$3.apply(JavaDStreamLike.scala:335)
>> at
>>
>> org.apache.spark.streaming.api.java.JavaDStreamLike$$anonfun$foreachRDD$3.apply(JavaDStreamLike.scala:335)
>> at
>>
>> org.apache.spark.streaming.dstream.DStream$$anonfun$foreachRDD$1$$anonfun$apply$mcV$sp$3.apply(DStream.scala:656)
>> at
>>
>> org.apache.spark.streaming.dstream.DStream$$anonfun$foreachRDD$1$$anonfun$apply$mcV$sp$3.apply(DStream.scala:656)
>> at
>>
>> org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1$$anonfun$apply$mcV$sp$1.apply$mcV$sp(ForEachDStream.scala:50)
>> at
>>
>> org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1$$anonfun$apply$mcV$sp$1.apply(ForEachDStream.scala:50)
>> at
>>
>> org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1$$anonfun$apply$mcV$sp$1.apply(ForEachDStream.scala:50)
>> at
>>
>> org.apache.spark.streaming.dstream.DStream.createRDDWithLocalProperties(DStream.scala:424)
>> at
>>
>> org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1.apply$mcV$sp(ForEachDStream.scala:49)
>> at
>>
>> org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1.apply(ForEachDStream.scala:49)
>> at
>>
>> org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1.apply(ForEachDStream.scala:49)
>> at scala.util.Try$.apply(Try.scala:161)
>> at org.apache.spark.streaming.scheduler.Job.run(Job.scala:39)
>> at
>>
>> org.apache.spark.streaming.scheduler.JobScheduler$JobHandler$$anonfun$run$1.apply$mcV$sp(JobScheduler.scala:224)
>> at
>>
>> org.apache.spark.streaming.scheduler.JobScheduler$JobHandler$$anonfun$run$1.apply(JobScheduler.scala:224)
>> at
>>
>> org.apache.spark.streaming.scheduler.JobScheduler$JobHandler$$anonfun$run$1.apply(JobScheduler.scala:224)
>> at scala.util.DynamicVariable.withValue(DynamicVariable.scala:57)
>> at
>>
>> org.apache.spark.streaming.scheduler.JobScheduler$JobHandler.run(JobScheduler.scala:223)
>> at
>>
>> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
>> at
>>
>> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
>> at java.lang.Thread.run(Thread.java:745)
>> 2016-01-21 14:47:02 ERROR DefaultWriterContainer:74 - Job
>> job_201601211447_0000 aborted.
>>
>>
>>
>>
>>
>>
>>
>>
>> --
>> View this message in context:
>> http://apache-spark-user-list.1001560.n3.nabble.com/java-lang-ArrayIndexOutOfBoundsException-when-attempting-broadcastjoin-tp26034.html
>> Sent from the Apache Spark User List mailing list archive at Nabble.com.
>>
>> ---------------------------------------------------------------------
>> To unsubscribe, e-mail: user-unsubscribe@spark.apache.org
>> For additional commands, e-mail: user-help@spark.apache.org
>>
>>
>
Re: java.lang.ArrayIndexOutOfBoundsException when attempting broadcastjoin
Posted by Ted Yu <yu...@gmail.com>.
You were using Kryo serialization ?
If you switch to Java serialization, your job should run fine.
Which Spark release are you using ?
Thanks
On Thu, Jan 21, 2016 at 6:59 AM, sebastian.piu <se...@gmail.com>
wrote:
> Hi all,
>
> I'm trying to work out a problem when using Spark Streaming, currently I
> have the following piece of code inside a foreachRDD call:
>
> Dataframe results = ... //some dataframe created from the incoming rdd -
> moderately big, I don't want this to be shuffled
> DataFrame t = sqlContext.table("a_temp_cached_table"); //a very small
> table
> - that might mutate over time
> DataFrame x = results.join(*broadcast(t)*, JOIN_COLUMNS_SEQ)
> .groupBy("column1").count()
> .write()
> .mode(SaveMode.Append)
> .save("/some-path/");
>
> The intention of the code above is to distribute the "t" dataframe if
> required, but avoid shuffling the "results".
>
> This works fine when ran on scala-shell / spark-submit, but when ran from
> within my executors I get the exception below...
>
> Any thoughts? If I remove the *broadcast(t)* then it works fine but where
> my
> big table is shuffled around.
>
> 2016-01-21 14:47:00 ERROR JobScheduler:95 - Error running job streaming job
> 1453387560000 ms.0
> java.lang.ArrayIndexOutOfBoundsException: 8388607
> at
>
> com.esotericsoftware.kryo.util.IdentityObjectIntMap.clear(IdentityObjectIntMap.java:345)
> at
>
> com.esotericsoftware.kryo.util.MapReferenceResolver.reset(MapReferenceResolver.java:47)
> at com.esotericsoftware.kryo.Kryo.reset(Kryo.java:804)
> at
> com.esotericsoftware.kryo.Kryo.writeClassAndObject(Kryo.java:570)
> at
>
> org.apache.spark.serializer.KryoSerializationStream.writeObject(KryoSerializer.scala:194)
> at
>
> org.apache.spark.broadcast.TorrentBroadcast$.blockifyObject(TorrentBroadcast.scala:203)
> at
>
> org.apache.spark.broadcast.TorrentBroadcast.writeBlocks(TorrentBroadcast.scala:102)
> at
>
> org.apache.spark.broadcast.TorrentBroadcast.<init>(TorrentBroadcast.scala:85)
> at
>
> org.apache.spark.broadcast.TorrentBroadcastFactory.newBroadcast(TorrentBroadcastFactory.scala:34)
> at
>
> org.apache.spark.broadcast.BroadcastManager.newBroadcast(BroadcastManager.scala:63)
> at org.apache.spark.SparkContext.broadcast(SparkContext.scala:1326)
> at
>
> org.apache.spark.sql.execution.joins.BroadcastHashJoin$$anonfun$broadcastFuture$1$$anonfun$apply$1.apply(BroadcastHashJoin.scala:91)
> at
>
> org.apache.spark.sql.execution.joins.BroadcastHashJoin$$anonfun$broadcastFuture$1$$anonfun$apply$1.apply(BroadcastHashJoin.scala:79)
> at
>
> org.apache.spark.sql.execution.SQLExecution$.withExecutionId(SQLExecution.scala:100)
> at
>
> org.apache.spark.sql.execution.joins.BroadcastHashJoin$$anonfun$broadcastFuture$1.apply(BroadcastHashJoin.scala:79)
> at
>
> org.apache.spark.sql.execution.joins.BroadcastHashJoin$$anonfun$broadcastFuture$1.apply(BroadcastHashJoin.scala:79)
> at
>
> scala.concurrent.impl.Future$PromiseCompletingRunnable.liftedTree1$1(Future.scala:24)
> at
> scala.concurrent.impl.Future$PromiseCompletingRunnable.run(Future.scala:24)
> at
>
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
> at
>
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
> at java.lang.Thread.run(Thread.java:745)
> 2016-01-21 14:47:00 ERROR RslApp:60 - ERROR executing the application
> java.lang.ArrayIndexOutOfBoundsException: 8388607
> at
>
> com.esotericsoftware.kryo.util.IdentityObjectIntMap.clear(IdentityObjectIntMap.java:345)
> at
>
> com.esotericsoftware.kryo.util.MapReferenceResolver.reset(MapReferenceResolver.java:47)
> at com.esotericsoftware.kryo.Kryo.reset(Kryo.java:804)
> at
> com.esotericsoftware.kryo.Kryo.writeClassAndObject(Kryo.java:570)
> at
>
> org.apache.spark.serializer.KryoSerializationStream.writeObject(KryoSerializer.scala:194)
> at
>
> org.apache.spark.broadcast.TorrentBroadcast$.blockifyObject(TorrentBroadcast.scala:203)
> at
>
> org.apache.spark.broadcast.TorrentBroadcast.writeBlocks(TorrentBroadcast.scala:102)
> at
>
> org.apache.spark.broadcast.TorrentBroadcast.<init>(TorrentBroadcast.scala:85)
> at
>
> org.apache.spark.broadcast.TorrentBroadcastFactory.newBroadcast(TorrentBroadcastFactory.scala:34)
> at
>
> org.apache.spark.broadcast.BroadcastManager.newBroadcast(BroadcastManager.scala:63)
> at org.apache.spark.SparkContext.broadcast(SparkContext.scala:1326)
> at
>
> org.apache.spark.sql.execution.joins.BroadcastHashJoin$$anonfun$broadcastFuture$1$$anonfun$apply$1.apply(BroadcastHashJoin.scala:91)
> at
>
> org.apache.spark.sql.execution.joins.BroadcastHashJoin$$anonfun$broadcastFuture$1$$anonfun$apply$1.apply(BroadcastHashJoin.scala:79)
> at
>
> org.apache.spark.sql.execution.SQLExecution$.withExecutionId(SQLExecution.scala:100)
> at
>
> org.apache.spark.sql.execution.joins.BroadcastHashJoin$$anonfun$broadcastFuture$1.apply(BroadcastHashJoin.scala:79)
> at
>
> org.apache.spark.sql.execution.joins.BroadcastHashJoin$$anonfun$broadcastFuture$1.apply(BroadcastHashJoin.scala:79)
> at
>
> scala.concurrent.impl.Future$PromiseCompletingRunnable.liftedTree1$1(Future.scala:24)
> at
> scala.concurrent.impl.Future$PromiseCompletingRunnable.run(Future.scala:24)
> at
>
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
> at
>
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
> at java.lang.Thread.run(Thread.java:745)
> [Stage 10:> (0 + 24)
> /
> 24]2016-01-21 14:47:02 ERROR InsertIntoHadoopFsRelation:95 - Aborting job.
> java.lang.InterruptedException
> at java.lang.Object.wait(Native Method)
> at java.lang.Object.wait(Object.java:502)
> at
> org.apache.spark.scheduler.JobWaiter.awaitResult(JobWaiter.scala:73)
> at
> org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:612)
> at org.apache.spark.SparkContext.runJob(SparkContext.scala:1832)
> at org.apache.spark.SparkContext.runJob(SparkContext.scala:1845)
> at org.apache.spark.SparkContext.runJob(SparkContext.scala:1922)
> at
>
> org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelation$$anonfun$run$1.apply$mcV$sp(InsertIntoHadoopFsRelation.scala:150)
> at
>
> org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelation$$anonfun$run$1.apply(InsertIntoHadoopFsRelation.scala:108)
> at
>
> org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelation$$anonfun$run$1.apply(InsertIntoHadoopFsRelation.scala:108)
> at
>
> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:56)
> at
>
> org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelation.run(InsertIntoHadoopFsRelation.scala:108)
> at
>
> org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult$lzycompute(commands.scala:58)
> at
>
> org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult(commands.scala:56)
> at
> org.apache.spark.sql.execution.ExecutedCommand.doExecute(commands.scala:70)
> at
>
> org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$5.apply(SparkPlan.scala:127)
> at
>
> org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$5.apply(SparkPlan.scala:125)
> at
>
> org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
> at
> org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:125)
> at
>
> org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:55)
> at
>
> org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:55)
> at
>
> org.apache.spark.sql.execution.datasources.ResolvedDataSource$.apply(ResolvedDataSource.scala:242)
> at
> org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:148)
> at
> org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:139)
> at
>
> com.hsbc.rsl.spark.streaming.receiver.functions.PersistLevel3WithDataframes.call(PersistLevel3WithDataframes.java:84)
> at
>
> com.hsbc.rsl.spark.streaming.receiver.functions.PersistLevel3WithDataframes.call(PersistLevel3WithDataframes.java:27)
> at
>
> org.apache.spark.streaming.api.java.JavaDStreamLike$$anonfun$foreachRDD$3.apply(JavaDStreamLike.scala:335)
> at
>
> org.apache.spark.streaming.api.java.JavaDStreamLike$$anonfun$foreachRDD$3.apply(JavaDStreamLike.scala:335)
> at
>
> org.apache.spark.streaming.dstream.DStream$$anonfun$foreachRDD$1$$anonfun$apply$mcV$sp$3.apply(DStream.scala:656)
> at
>
> org.apache.spark.streaming.dstream.DStream$$anonfun$foreachRDD$1$$anonfun$apply$mcV$sp$3.apply(DStream.scala:656)
> at
>
> org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1$$anonfun$apply$mcV$sp$1.apply$mcV$sp(ForEachDStream.scala:50)
> at
>
> org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1$$anonfun$apply$mcV$sp$1.apply(ForEachDStream.scala:50)
> at
>
> org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1$$anonfun$apply$mcV$sp$1.apply(ForEachDStream.scala:50)
> at
>
> org.apache.spark.streaming.dstream.DStream.createRDDWithLocalProperties(DStream.scala:424)
> at
>
> org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1.apply$mcV$sp(ForEachDStream.scala:49)
> at
>
> org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1.apply(ForEachDStream.scala:49)
> at
>
> org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1.apply(ForEachDStream.scala:49)
> at scala.util.Try$.apply(Try.scala:161)
> at org.apache.spark.streaming.scheduler.Job.run(Job.scala:39)
> at
>
> org.apache.spark.streaming.scheduler.JobScheduler$JobHandler$$anonfun$run$1.apply$mcV$sp(JobScheduler.scala:224)
> at
>
> org.apache.spark.streaming.scheduler.JobScheduler$JobHandler$$anonfun$run$1.apply(JobScheduler.scala:224)
> at
>
> org.apache.spark.streaming.scheduler.JobScheduler$JobHandler$$anonfun$run$1.apply(JobScheduler.scala:224)
> at scala.util.DynamicVariable.withValue(DynamicVariable.scala:57)
> at
>
> org.apache.spark.streaming.scheduler.JobScheduler$JobHandler.run(JobScheduler.scala:223)
> at
>
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
> at
>
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
> at java.lang.Thread.run(Thread.java:745)
> 2016-01-21 14:47:02 ERROR DefaultWriterContainer:74 - Job
> job_201601211447_0000 aborted.
>
>
>
>
>
>
>
>
> --
> View this message in context:
> http://apache-spark-user-list.1001560.n3.nabble.com/java-lang-ArrayIndexOutOfBoundsException-when-attempting-broadcastjoin-tp26034.html
> Sent from the Apache Spark User List mailing list archive at Nabble.com.
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: user-unsubscribe@spark.apache.org
> For additional commands, e-mail: user-help@spark.apache.org
>
>