You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "Hyukjin Kwon (Jira)" <ji...@apache.org> on 2022/10/19 02:02:00 UTC
[jira] [Resolved] (SPARK-40367) Total size of serialized results of 3730 tasks (64.0 GB) is bigger than spark.driver.maxResultSize (64.0 GB)
[ https://issues.apache.org/jira/browse/SPARK-40367?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Hyukjin Kwon resolved SPARK-40367.
----------------------------------
Resolution: Not A Problem
> Total size of serialized results of 3730 tasks (64.0 GB) is bigger than spark.driver.maxResultSize (64.0 GB)
> -------------------------------------------------------------------------------------------------------------
>
> Key: SPARK-40367
> URL: https://issues.apache.org/jira/browse/SPARK-40367
> Project: Spark
> Issue Type: Bug
> Components: SQL
> Affects Versions: 2.3.2
> Reporter: jackyjfhu
> Priority: Major
>
> I use this code:spark.sql("xx").selectExpr(spark.table(target).columns:_*).write.mode("overwrite").insertInto(target),I get an error
>
> Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: Total size of serialized results of 3730 tasks (64.0 GB) is bigger than spark.driver.maxResultSize (64.0 GB)
> at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1609)
> at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1597)
> at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1596)
> at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
> at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
> at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1596)
> at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:831)
> at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:831)
> at scala.Option.foreach(Option.scala:257)
> at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:831)
> at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1830)
> at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1779)
> at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1768)
> at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
> at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:642)
> at org.apache.spark.SparkContext.runJob(SparkContext.scala:2034)
> at org.apache.spark.SparkContext.runJob(SparkContext.scala:2055)
> at org.apache.spark.SparkContext.runJob(SparkContext.scala:2074)
> at org.apache.spark.SparkContext.runJob(SparkContext.scala:2099)
> at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:939)
> at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
> at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
> at org.apache.spark.rdd.RDD.withScope(RDD.scala:363)
> at org.apache.spark.rdd.RDD.collect(RDD.scala:938)
> at org.apache.spark.sql.execution.SparkPlan.executeCollectIterator(SparkPlan.scala:304)
> at org.apache.spark.sql.execution.exchange.BroadcastExchangeExec$$anonfun$relationFuture$1$$anonfun$apply$1.apply(BroadcastExchangeExec.scala:76)
> at org.apache.spark.sql.execution.exchange.BroadcastExchangeExec$$anonfun$relationFuture$1$$anonfun$apply$1.apply(BroadcastExchangeExec.scala:73)
> at org.apache.spark.sql.execution.SQLExecution$.withExecutionId(SQLExecution.scala:97)
> at org.apache.spark.sql.execution.exchange.BroadcastExchangeExec$$anonfun$relationFuture$1.apply(BroadcastExchangeExec.scala:72)
> at org.apache.spark.sql.execution.exchange.BroadcastExchangeExec$$anonfun$relationFuture$1.apply(BroadcastExchangeExec.scala:72)
> at scala.concurrent.impl.Future$PromiseCompletingRunnable.liftedTree1$1(Future.scala:24)
> at scala.concurrent.impl.Future$PromiseCompletingRunnable.run(Future.scala:24)
> at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
> at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
>
> --conf spark.driver.maxResultSize=64g
> --conf spark.sql.broadcastTimeout=36000
> -conf spark.sql.autoBroadcastJoinThreshold=204857600
> --conf spark.memory.offHeap.enabled=true
> --conf spark.memory.offHeap.size=4g
> --num-executors 500
> --executor-memory 16g
> --executor-cores 2 --driver-memory 80G
> --conf spark.sql.shuffle.partitions=4000
> --conf spark.sql.adaptive.enabled=true
>
> When I increase the spark.driver.maxResultSize,it also does not work
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org