You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "Michael Chen (Jira)" <ji...@apache.org> on 2019/10/22 23:26:00 UTC
[jira] [Updated] (SPARK-29561) Large Case Statement Code Generation
OOM
[ https://issues.apache.org/jira/browse/SPARK-29561?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Michael Chen updated SPARK-29561:
---------------------------------
Description:
Spark Configuration
spark.driver.memory = 1g
spark.master = "local"
spark.deploy.mode = "client"
Try to execute a case statement with 3000+ branches.
Spark runs for a while before it OOM
{noformat}
java.lang.OutOfMemoryError: GC overhead limit exceeded
at org.apache.spark.ContextCleaner$$anonfun$org$apache$spark$ContextCleaner$$keepCleaning$1.apply$mcV$sp(ContextCleaner.scala:182)
at org.apache.spark.util.Utils$.tryOrStopSparkContext(Utils.scala:1320)
at org.apache.spark.ContextCleaner.org$apache$spark$ContextCleaner$$keepCleaning(ContextCleaner.scala:178)
at org.apache.spark.ContextCleaner$$anon$1.run(ContextCleaner.scala:73)
19/10/22 16:19:54 ERROR FileFormatWriter: Aborting job null.
java.lang.OutOfMemoryError: GC overhead limit exceeded
at java.util.HashMap.newNode(HashMap.java:1750)
at java.util.HashMap.putVal(HashMap.java:631)
at java.util.HashMap.putMapEntries(HashMap.java:515)
at java.util.HashMap.putAll(HashMap.java:785)
at org.codehaus.janino.UnitCompiler.buildLocalVariableMap(UnitCompiler.java:3345)
at org.codehaus.janino.UnitCompiler.access$5000(UnitCompiler.java:212)
at org.codehaus.janino.UnitCompiler$8.visitLocalVariableDeclarationStatement(UnitCompiler.java:3230)
at org.codehaus.janino.UnitCompiler$8.visitLocalVariableDeclarationStatement(UnitCompiler.java:3198)
at org.codehaus.janino.Java$LocalVariableDeclarationStatement.accept(Java.java:3351)
at org.codehaus.janino.UnitCompiler.buildLocalVariableMap(UnitCompiler.java:3197)
at org.codehaus.janino.UnitCompiler.buildLocalVariableMap(UnitCompiler.java:3254)
at org.codehaus.janino.UnitCompiler.access$3900(UnitCompiler.java:212)
at org.codehaus.janino.UnitCompiler$8.visitBlock(UnitCompiler.java:3216)
at org.codehaus.janino.UnitCompiler$8.visitBlock(UnitCompiler.java:3198)
at org.codehaus.janino.Java$Block.accept(Java.java:2756)
at org.codehaus.janino.UnitCompiler.buildLocalVariableMap(UnitCompiler.java:3197)
at org.codehaus.janino.UnitCompiler.buildLocalVariableMap(UnitCompiler.java:3260)
at org.codehaus.janino.UnitCompiler.access$4000(UnitCompiler.java:212)
at org.codehaus.janino.UnitCompiler$8.visitDoStatement(UnitCompiler.java:3217)
at org.codehaus.janino.UnitCompiler$8.visitDoStatement(UnitCompiler.java:3198)
at org.codehaus.janino.Java$DoStatement.accept(Java.java:3304)
at org.codehaus.janino.UnitCompiler.buildLocalVariableMap(UnitCompiler.java:3197)
at org.codehaus.janino.UnitCompiler.buildLocalVariableMap(UnitCompiler.java:3186)
at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:3009)
at org.codehaus.janino.UnitCompiler.compileDeclaredMethods(UnitCompiler.java:1336)
at org.codehaus.janino.UnitCompiler.compileDeclaredMethods(UnitCompiler.java:1309)
at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:799)
at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:958)
at org.codehaus.janino.UnitCompiler.access$700(UnitCompiler.java:212)
at org.codehaus.janino.UnitCompiler$2.visitMemberClassDeclaration(UnitCompiler.java:393)
at org.codehaus.janino.UnitCompiler$2.visitMemberClassDeclaration(UnitCompiler.java:385)
at org.codehaus.janino.Java$MemberClassDeclaration.accept(Java.java:1286)
19/10/22 16:19:54 ERROR Utils: throw uncaught fatal error in thread Spark Context Cleaner
java.lang.OutOfMemoryError: GC overhead limit exceeded
at org.apache.spark.ContextCleaner$$anonfun$org$apache$spark$ContextCleaner$$keepCleaning$1.apply$mcV$sp(ContextCleaner.scala:182)
at org.apache.spark.util.Utils$.tryOrStopSparkContext(Utils.scala:1320)
at org.apache.spark.ContextCleaner.org$apache$spark$ContextCleaner$$keepCleaning(ContextCleaner.scala:178)
at org.apache.spark.ContextCleaner$$anon$1.run(ContextCleaner.scala:73){noformat}
Generated code looks like
{noformat}
/* 029 */ private void project_doConsume(InternalRow scan_row, UTF8String project_expr_0, boolean project_exprIsNull_0) throws java.io.IOException {
/* 030 */ byte project_caseWhenResultState = -1;
/* 031 */ do {
/* 032 */ boolean project_isNull1 = true;
/* 033 */ boolean project_value1 = false;
/* 034 */
/* 035 */ boolean project_isNull2 = project_exprIsNull_0;
/* 036 */ int project_value2 = -1;
/* 037 */ if (!project_exprIsNull_0) {
/* 038 */ UTF8String.IntWrapper project_intWrapper = new UTF8String.IntWrapper();
/* 039 */ if (project_expr_0.toInt(project_intWrapper)) {
/* 040 */ project_value2 = project_intWrapper.value;
/* 041 */ } else {
/* 042 */ project_isNull2 = true;
/* 043 */ }
/* 044 */ project_intWrapper = null;
/* 045 */
/* 046 */ }{noformat}
... bunch of lines ...
{noformat}
if (!project_isNull15002) {
/* 78048 */ project_isNull15001 = false; // resultCode could change nullability.
/* 78049 */ project_value15001 = project_value15002 == 3000;
/* 78050 */
/* 78051 */ }
/* 78052 */ if (!project_isNull15001 && project_value15001) {
/* 78053 */ project_caseWhenResultState = (byte)(false ? 1 : 0);
/* 78054 */ project_project_value = -3000;
/* 78055 */ continue;
/* 78056 */ }
/* 78057 */
/* 78058 */ } while (false);
/* 78059 */ // TRUE if any condition is met and the result is null, or no any condition is met.
/* 78060 */ final boolean project_isNull = (project_caseWhenResultState != 0);
/* 78061 */ project_mutableStateArray2[0].zeroOutNullBytes();
/* 78062 */
/* 78063 */ if (project_isNull) {
/* 78064 */ project_mutableStateArray2[0].setNullAt(0);
/* 78065 */ } else {
/* 78066 */ project_mutableStateArray2[0].write(0, project_project_value);
/* 78067 */ }
/* 78068 */ append(project_mutableStateArray[0]);
/* 78069 */
/* 78070 */ }
/* 78071 */
/* 78072 */ protected void processNext() throws java.io.IOException {
/* 78073 */ while (scan_mutableStateArray[0].hasNext()) {
/* 78074 */ InternalRow scan_row = (InternalRow) scan_mutableStateArray[0].next();
/* 78075 */ ((org.apache.spark.sql.execution.metric.SQLMetric) references[0] /* numOutputRows */).add(1);
/* 78076 */ boolean scan_isNull = scan_row.isNullAt(0);
/* 78077 */ UTF8String scan_value = scan_isNull ? null : (scan_row.getUTF8String(0));
/* 78078 */
/* 78079 */ project_doConsume(scan_row, scan_value, scan_isNull);
/* 78080 */ if (shouldStop()) return;
/* 78081 */ }
/* 78082 */ }
/* 78083 */
/* 78084 */ }{noformat}
was:
Spark Configuration
spark.driver.memory = 1g
spark.master = "local"
spark.deploy.mode = "client"
Try to execute a case statement with 3000+ branches.
Spark runs for a while before it OOM
{noformat}
java.lang.OutOfMemoryError: GC overhead limit exceeded
at org.apache.spark.ContextCleaner$$anonfun$org$apache$spark$ContextCleaner$$keepCleaning$1.apply$mcV$sp(ContextCleaner.scala:182)
at org.apache.spark.util.Utils$.tryOrStopSparkContext(Utils.scala:1320)
at org.apache.spark.ContextCleaner.org$apache$spark$ContextCleaner$$keepCleaning(ContextCleaner.scala:178)
at org.apache.spark.ContextCleaner$$anon$1.run(ContextCleaner.scala:73)
19/10/22 16:19:54 ERROR FileFormatWriter: Aborting job null.
java.lang.OutOfMemoryError: GC overhead limit exceeded
at java.util.HashMap.newNode(HashMap.java:1750)
at java.util.HashMap.putVal(HashMap.java:631)
at java.util.HashMap.putMapEntries(HashMap.java:515)
at java.util.HashMap.putAll(HashMap.java:785)
at org.codehaus.janino.UnitCompiler.buildLocalVariableMap(UnitCompiler.java:3345)
at org.codehaus.janino.UnitCompiler.access$5000(UnitCompiler.java:212)
at org.codehaus.janino.UnitCompiler$8.visitLocalVariableDeclarationStatement(UnitCompiler.java:3230)
at org.codehaus.janino.UnitCompiler$8.visitLocalVariableDeclarationStatement(UnitCompiler.java:3198)
at org.codehaus.janino.Java$LocalVariableDeclarationStatement.accept(Java.java:3351)
at org.codehaus.janino.UnitCompiler.buildLocalVariableMap(UnitCompiler.java:3197)
at org.codehaus.janino.UnitCompiler.buildLocalVariableMap(UnitCompiler.java:3254)
at org.codehaus.janino.UnitCompiler.access$3900(UnitCompiler.java:212)
at org.codehaus.janino.UnitCompiler$8.visitBlock(UnitCompiler.java:3216)
at org.codehaus.janino.UnitCompiler$8.visitBlock(UnitCompiler.java:3198)
at org.codehaus.janino.Java$Block.accept(Java.java:2756)
at org.codehaus.janino.UnitCompiler.buildLocalVariableMap(UnitCompiler.java:3197)
at org.codehaus.janino.UnitCompiler.buildLocalVariableMap(UnitCompiler.java:3260)
at org.codehaus.janino.UnitCompiler.access$4000(UnitCompiler.java:212)
at org.codehaus.janino.UnitCompiler$8.visitDoStatement(UnitCompiler.java:3217)
at org.codehaus.janino.UnitCompiler$8.visitDoStatement(UnitCompiler.java:3198)
at org.codehaus.janino.Java$DoStatement.accept(Java.java:3304)
at org.codehaus.janino.UnitCompiler.buildLocalVariableMap(UnitCompiler.java:3197)
at org.codehaus.janino.UnitCompiler.buildLocalVariableMap(UnitCompiler.java:3186)
at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:3009)
at org.codehaus.janino.UnitCompiler.compileDeclaredMethods(UnitCompiler.java:1336)
at org.codehaus.janino.UnitCompiler.compileDeclaredMethods(UnitCompiler.java:1309)
at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:799)
at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:958)
at org.codehaus.janino.UnitCompiler.access$700(UnitCompiler.java:212)
at org.codehaus.janino.UnitCompiler$2.visitMemberClassDeclaration(UnitCompiler.java:393)
at org.codehaus.janino.UnitCompiler$2.visitMemberClassDeclaration(UnitCompiler.java:385)
at org.codehaus.janino.Java$MemberClassDeclaration.accept(Java.java:1286)
19/10/22 16:19:54 ERROR Utils: throw uncaught fatal error in thread Spark Context Cleaner
java.lang.OutOfMemoryError: GC overhead limit exceeded
at org.apache.spark.ContextCleaner$$anonfun$org$apache$spark$ContextCleaner$$keepCleaning$1.apply$mcV$sp(ContextCleaner.scala:182)
at org.apache.spark.util.Utils$.tryOrStopSparkContext(Utils.scala:1320)
at org.apache.spark.ContextCleaner.org$apache$spark$ContextCleaner$$keepCleaning(ContextCleaner.scala:178)
at org.apache.spark.ContextCleaner$$anon$1.run(ContextCleaner.scala:73){noformat}
Generated code looks like
{noformat}
/* 029 */ private void project_doConsume(InternalRow scan_row, UTF8String project_expr_0, boolean project_exprIsNull_0) throws java.io.IOException {
/* 030 */ byte project_caseWhenResultState = -1;
/* 031 */ do {
/* 032 */ boolean project_isNull1 = true;
/* 033 */ boolean project_value1 = false;
/* 034 */
/* 035 */ boolean project_isNull2 = project_exprIsNull_0;
/* 036 */ int project_value2 = -1;
/* 037 */ if (!project_exprIsNull_0) {
/* 038 */ UTF8String.IntWrapper project_intWrapper = new UTF8String.IntWrapper();
/* 039 */ if (project_expr_0.toInt(project_intWrapper)) {
/* 040 */ project_value2 = project_intWrapper.value;
/* 041 */ } else {
/* 042 */ project_isNull2 = true;
/* 043 */ }
/* 044 */ project_intWrapper = null;
/* 045 */
/* 046 */ }{noformat}
... bunch of lines ...
{noformat}
if (!project_isNull15002) {
/* 78048 */ project_isNull15001 = false; // resultCode could change nullability.
/* 78049 */ project_value15001 = project_value15002 == 3000;
/* 78050 */
/* 78051 */ }
/* 78052 */ if (!project_isNull15001 && project_value15001) {
/* 78053 */ project_caseWhenResultState = (byte)(false ? 1 : 0);
/* 78054 */ project_project_value = -3000;
/* 78055 */ continue;
/* 78056 */ }
/* 78057 */
/* 78058 */ } while (false);
/* 78059 */ // TRUE if any condition is met and the result is null, or no any condition is met.
/* 78060 */ final boolean project_isNull = (project_caseWhenResultState != 0);
/* 78061 */ project_mutableStateArray2[0].zeroOutNullBytes();
/* 78062 */
/* 78063 */ if (project_isNull) {
/* 78064 */ project_mutableStateArray2[0].setNullAt(0);
/* 78065 */ } else {
/* 78066 */ project_mutableStateArray2[0].write(0, project_project_value);
/* 78067 */ }
/* 78068 */ append(project_mutableStateArray[0]);
/* 78069 */
/* 78070 */ }
/* 78071 */
/* 78072 */ protected void processNext() throws java.io.IOException {
/* 78073 */ while (scan_mutableStateArray[0].hasNext()) {
/* 78074 */ InternalRow scan_row = (InternalRow) scan_mutableStateArray[0].next();
/* 78075 */ ((org.apache.spark.sql.execution.metric.SQLMetric) references[0] /* numOutputRows */).add(1);
/* 78076 */ boolean scan_isNull = scan_row.isNullAt(0);
/* 78077 */ UTF8String scan_value = scan_isNull ? null : (scan_row.getUTF8String(0));
/* 78078 */
/* 78079 */ project_doConsume(scan_row, scan_value, scan_isNull);
/* 78080 */ if (shouldStop()) return;
/* 78081 */ }
/* 78082 */ }
/* 78083 */
/* 78084 */ }{noformat}
> Large Case Statement Code Generation OOM
> ----------------------------------------
>
> Key: SPARK-29561
> URL: https://issues.apache.org/jira/browse/SPARK-29561
> Project: Spark
> Issue Type: Bug
> Components: SQL
> Affects Versions: 2.3.0
> Reporter: Michael Chen
> Priority: Major
>
> Spark Configuration
> spark.driver.memory = 1g
> spark.master = "local"
> spark.deploy.mode = "client"
> Try to execute a case statement with 3000+ branches.
> Spark runs for a while before it OOM
> {noformat}
> java.lang.OutOfMemoryError: GC overhead limit exceeded
> at org.apache.spark.ContextCleaner$$anonfun$org$apache$spark$ContextCleaner$$keepCleaning$1.apply$mcV$sp(ContextCleaner.scala:182)
> at org.apache.spark.util.Utils$.tryOrStopSparkContext(Utils.scala:1320)
> at org.apache.spark.ContextCleaner.org$apache$spark$ContextCleaner$$keepCleaning(ContextCleaner.scala:178)
> at org.apache.spark.ContextCleaner$$anon$1.run(ContextCleaner.scala:73)
> 19/10/22 16:19:54 ERROR FileFormatWriter: Aborting job null.
> java.lang.OutOfMemoryError: GC overhead limit exceeded
> at java.util.HashMap.newNode(HashMap.java:1750)
> at java.util.HashMap.putVal(HashMap.java:631)
> at java.util.HashMap.putMapEntries(HashMap.java:515)
> at java.util.HashMap.putAll(HashMap.java:785)
> at org.codehaus.janino.UnitCompiler.buildLocalVariableMap(UnitCompiler.java:3345)
> at org.codehaus.janino.UnitCompiler.access$5000(UnitCompiler.java:212)
> at org.codehaus.janino.UnitCompiler$8.visitLocalVariableDeclarationStatement(UnitCompiler.java:3230)
> at org.codehaus.janino.UnitCompiler$8.visitLocalVariableDeclarationStatement(UnitCompiler.java:3198)
> at org.codehaus.janino.Java$LocalVariableDeclarationStatement.accept(Java.java:3351)
> at org.codehaus.janino.UnitCompiler.buildLocalVariableMap(UnitCompiler.java:3197)
> at org.codehaus.janino.UnitCompiler.buildLocalVariableMap(UnitCompiler.java:3254)
> at org.codehaus.janino.UnitCompiler.access$3900(UnitCompiler.java:212)
> at org.codehaus.janino.UnitCompiler$8.visitBlock(UnitCompiler.java:3216)
> at org.codehaus.janino.UnitCompiler$8.visitBlock(UnitCompiler.java:3198)
> at org.codehaus.janino.Java$Block.accept(Java.java:2756)
> at org.codehaus.janino.UnitCompiler.buildLocalVariableMap(UnitCompiler.java:3197)
> at org.codehaus.janino.UnitCompiler.buildLocalVariableMap(UnitCompiler.java:3260)
> at org.codehaus.janino.UnitCompiler.access$4000(UnitCompiler.java:212)
> at org.codehaus.janino.UnitCompiler$8.visitDoStatement(UnitCompiler.java:3217)
> at org.codehaus.janino.UnitCompiler$8.visitDoStatement(UnitCompiler.java:3198)
> at org.codehaus.janino.Java$DoStatement.accept(Java.java:3304)
> at org.codehaus.janino.UnitCompiler.buildLocalVariableMap(UnitCompiler.java:3197)
> at org.codehaus.janino.UnitCompiler.buildLocalVariableMap(UnitCompiler.java:3186)
> at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:3009)
> at org.codehaus.janino.UnitCompiler.compileDeclaredMethods(UnitCompiler.java:1336)
> at org.codehaus.janino.UnitCompiler.compileDeclaredMethods(UnitCompiler.java:1309)
> at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:799)
> at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:958)
> at org.codehaus.janino.UnitCompiler.access$700(UnitCompiler.java:212)
> at org.codehaus.janino.UnitCompiler$2.visitMemberClassDeclaration(UnitCompiler.java:393)
> at org.codehaus.janino.UnitCompiler$2.visitMemberClassDeclaration(UnitCompiler.java:385)
> at org.codehaus.janino.Java$MemberClassDeclaration.accept(Java.java:1286)
> 19/10/22 16:19:54 ERROR Utils: throw uncaught fatal error in thread Spark Context Cleaner
> java.lang.OutOfMemoryError: GC overhead limit exceeded
> at org.apache.spark.ContextCleaner$$anonfun$org$apache$spark$ContextCleaner$$keepCleaning$1.apply$mcV$sp(ContextCleaner.scala:182)
> at org.apache.spark.util.Utils$.tryOrStopSparkContext(Utils.scala:1320)
> at org.apache.spark.ContextCleaner.org$apache$spark$ContextCleaner$$keepCleaning(ContextCleaner.scala:178)
> at org.apache.spark.ContextCleaner$$anon$1.run(ContextCleaner.scala:73){noformat}
> Generated code looks like
> {noformat}
> /* 029 */ private void project_doConsume(InternalRow scan_row, UTF8String project_expr_0, boolean project_exprIsNull_0) throws java.io.IOException {
> /* 030 */ byte project_caseWhenResultState = -1;
> /* 031 */ do {
> /* 032 */ boolean project_isNull1 = true;
> /* 033 */ boolean project_value1 = false;
> /* 034 */
> /* 035 */ boolean project_isNull2 = project_exprIsNull_0;
> /* 036 */ int project_value2 = -1;
> /* 037 */ if (!project_exprIsNull_0) {
> /* 038 */ UTF8String.IntWrapper project_intWrapper = new UTF8String.IntWrapper();
> /* 039 */ if (project_expr_0.toInt(project_intWrapper)) {
> /* 040 */ project_value2 = project_intWrapper.value;
> /* 041 */ } else {
> /* 042 */ project_isNull2 = true;
> /* 043 */ }
> /* 044 */ project_intWrapper = null;
> /* 045 */
> /* 046 */ }{noformat}
> ... bunch of lines ...
> {noformat}
> if (!project_isNull15002) {
> /* 78048 */ project_isNull15001 = false; // resultCode could change nullability.
> /* 78049 */ project_value15001 = project_value15002 == 3000;
> /* 78050 */
> /* 78051 */ }
> /* 78052 */ if (!project_isNull15001 && project_value15001) {
> /* 78053 */ project_caseWhenResultState = (byte)(false ? 1 : 0);
> /* 78054 */ project_project_value = -3000;
> /* 78055 */ continue;
> /* 78056 */ }
> /* 78057 */
> /* 78058 */ } while (false);
> /* 78059 */ // TRUE if any condition is met and the result is null, or no any condition is met.
> /* 78060 */ final boolean project_isNull = (project_caseWhenResultState != 0);
> /* 78061 */ project_mutableStateArray2[0].zeroOutNullBytes();
> /* 78062 */
> /* 78063 */ if (project_isNull) {
> /* 78064 */ project_mutableStateArray2[0].setNullAt(0);
> /* 78065 */ } else {
> /* 78066 */ project_mutableStateArray2[0].write(0, project_project_value);
> /* 78067 */ }
> /* 78068 */ append(project_mutableStateArray[0]);
> /* 78069 */
> /* 78070 */ }
> /* 78071 */
> /* 78072 */ protected void processNext() throws java.io.IOException {
> /* 78073 */ while (scan_mutableStateArray[0].hasNext()) {
> /* 78074 */ InternalRow scan_row = (InternalRow) scan_mutableStateArray[0].next();
> /* 78075 */ ((org.apache.spark.sql.execution.metric.SQLMetric) references[0] /* numOutputRows */).add(1);
> /* 78076 */ boolean scan_isNull = scan_row.isNullAt(0);
> /* 78077 */ UTF8String scan_value = scan_isNull ? null : (scan_row.getUTF8String(0));
> /* 78078 */
> /* 78079 */ project_doConsume(scan_row, scan_value, scan_isNull);
> /* 78080 */ if (shouldStop()) return;
> /* 78081 */ }
> /* 78082 */ }
> /* 78083 */
> /* 78084 */ }{noformat}
--
This message was sent by Atlassian Jira
(v8.3.4#803005)
---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org