You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "Apache Spark (JIRA)" <ji...@apache.org> on 2018/07/24 11:18:00 UTC

[jira] [Commented] (SPARK-24901) Merge the codegen of RegularHashMap and fastHashMap to reduce compiler maxCodesize when VectorizedHashMap is false

    [ https://issues.apache.org/jira/browse/SPARK-24901?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16554106#comment-16554106 ] 

Apache Spark commented on SPARK-24901:
--------------------------------------

User 'heary-cao' has created a pull request for this issue:
https://github.com/apache/spark/pull/21860

> Merge the codegen of RegularHashMap and fastHashMap to reduce compiler maxCodesize when VectorizedHashMap is false
> ------------------------------------------------------------------------------------------------------------------
>
>                 Key: SPARK-24901
>                 URL: https://issues.apache.org/jira/browse/SPARK-24901
>             Project: Spark
>          Issue Type: Improvement
>          Components: SQL
>    Affects Versions: 2.4.0
>            Reporter: caoxuewen
>            Priority: Major
>
> Currently, Generate code of update UnsafeRow in hash aggregation.
> FastHashMap and RegularHashMap are two separate codes,These two separate codes need only when VectorizedHashMap is true. but other cases, we can merge together to reduce compiler maxCodesize. thanks.
> case class DistinctAgg(a: Int, b: Float, c: Double, d: Int, e: String)
> spark.sparkContext.parallelize(
>       DistinctAgg(8, 2, 3, 4, "a") ::
>       DistinctAgg(9, 3, 4, 5, "b") ::Nil).toDF()createOrReplaceTempView("distinctAgg")
> val df = sql("select a,b,e, min(d) as mind, min(case when a > 10 then a else null end) as mincasea, min(a) as mina from distinctAgg group by a, b, e")
> println(org.apache.spark.sql.execution.debug.codegenString(df.queryExecution.executedPlan))
> df.show()
> Generate code like:
>  *Before modified:*
> Generated code:
> /* 001 */ public Object generate(Object[] references) {
> /* 002 */   return new GeneratedIteratorForCodegenStage1(references);
> /* 003 */ }
> /* 004 */
> ...............
> /* 354 */
> /* 355 */     if (agg_fastAggBuffer_0 != null) {
> /* 356 */       // common sub-expressions
> /* 357 */
> /* 358 */       // evaluate aggregate function
> /* 359 */       agg_agg_isNull_31_0 = true;
> /* 360 */       int agg_value_34 = -1;
> /* 361 */
> /* 362 */       boolean agg_isNull_32 = agg_fastAggBuffer_0.isNullAt(0);
> /* 363 */       int agg_value_35 = agg_isNull_32 ?
> /* 364 */       -1 : (agg_fastAggBuffer_0.getInt(0));
> /* 365 */
> /* 366 */       if (!agg_isNull_32 && (agg_agg_isNull_31_0 ||
> /* 367 */           agg_value_34 > agg_value_35)) {
> /* 368 */         agg_agg_isNull_31_0 = false;
> /* 369 */         agg_value_34 = agg_value_35;
> /* 370 */       }
> /* 371 */
> /* 372 */       if (!false && (agg_agg_isNull_31_0 ||
> /* 373 */           agg_value_34 > agg_expr_2_0)) {
> /* 374 */         agg_agg_isNull_31_0 = false;
> /* 375 */         agg_value_34 = agg_expr_2_0;
> /* 376 */       }
> /* 377 */       agg_agg_isNull_34_0 = true;
> /* 378 */       int agg_value_37 = -1;
> /* 379 */
> /* 380 */       boolean agg_isNull_35 = agg_fastAggBuffer_0.isNullAt(1);
> /* 381 */       int agg_value_38 = agg_isNull_35 ?
> /* 382 */       -1 : (agg_fastAggBuffer_0.getInt(1));
> /* 383 */
> /* 384 */       if (!agg_isNull_35 && (agg_agg_isNull_34_0 ||
> /* 385 */           agg_value_37 > agg_value_38)) {
> /* 386 */         agg_agg_isNull_34_0 = false;
> /* 387 */         agg_value_37 = agg_value_38;
> /* 388 */       }
> /* 389 */
> /* 390 */       byte agg_caseWhenResultState_1 = -1;
> /* 391 */       do {
> /* 392 */         boolean agg_value_40 = false;
> /* 393 */         agg_value_40 = agg_expr_0_0 > 10;
> /* 394 */         if (!false && agg_value_40) {
> /* 395 */           agg_caseWhenResultState_1 = (byte)(false ? 1 : 0);
> /* 396 */           agg_agg_value_39_0 = agg_expr_0_0;
> /* 397 */           continue;
> /* 398 */         }
> /* 399 */
> /* 400 */         agg_caseWhenResultState_1 = (byte)(true ? 1 : 0);
> /* 401 */         agg_agg_value_39_0 = -1;
> /* 402 */
> /* 403 */       } while (false);
> /* 404 */       // TRUE if any condition is met and the result is null, or no any condition is met.
> /* 405 */       final boolean agg_isNull_36 = (agg_caseWhenResultState_1 != 0);
> /* 406 */
> /* 407 */       if (!agg_isNull_36 && (agg_agg_isNull_34_0 ||
> /* 408 */           agg_value_37 > agg_agg_value_39_0)) {
> /* 409 */         agg_agg_isNull_34_0 = false;
> /* 410 */         agg_value_37 = agg_agg_value_39_0;
> /* 411 */       }
> /* 412 */       agg_agg_isNull_42_0 = true;
> /* 413 */       int agg_value_45 = -1;
> /* 414 */
> /* 415 */       boolean agg_isNull_43 = agg_fastAggBuffer_0.isNullAt(2);
> /* 416 */       int agg_value_46 = agg_isNull_43 ?
> /* 417 */       -1 : (agg_fastAggBuffer_0.getInt(2));
> /* 418 */
> /* 419 */       if (!agg_isNull_43 && (agg_agg_isNull_42_0 ||
> /* 420 */           agg_value_45 > agg_value_46)) {
> /* 421 */         agg_agg_isNull_42_0 = false;
> /* 422 */         agg_value_45 = agg_value_46;
> /* 423 */       }
> /* 424 */
> /* 425 */       if (!false && (agg_agg_isNull_42_0 ||
> /* 426 */           agg_value_45 > agg_expr_0_0)) {
> /* 427 */         agg_agg_isNull_42_0 = false;
> /* 428 */         agg_value_45 = agg_expr_0_0;
> /* 429 */       }
> /* 430 */       // update fast row
> /* 431 */       agg_fastAggBuffer_0.setInt(0, agg_value_34);
> /* 432 */
> /* 433 */       if (!agg_agg_isNull_34_0) {
> /* 434 */         agg_fastAggBuffer_0.setInt(1, agg_value_37);
> /* 435 */       } else {
> /* 436 */         agg_fastAggBuffer_0.setNullAt(1);
> /* 437 */       }
> /* 438 */
> /* 439 */       agg_fastAggBuffer_0.setInt(2, agg_value_45);
> /* 440 */     } else {
> /* 441 */       // common sub-expressions
> /* 442 */
> /* 443 */       // evaluate aggregate function
> /* 444 */       agg_agg_isNull_17_0 = true;
> /* 445 */       int agg_value_20 = -1;
> /* 446 */
> /* 447 */       boolean agg_isNull_18 = agg_unsafeRowAggBuffer_0.isNullAt(0);
> /* 448 */       int agg_value_21 = agg_isNull_18 ?
> /* 449 */       -1 : (agg_unsafeRowAggBuffer_0.getInt(0));
> /* 450 */
> /* 451 */       if (!agg_isNull_18 && (agg_agg_isNull_17_0 ||
> /* 452 */           agg_value_20 > agg_value_21)) {
> /* 453 */         agg_agg_isNull_17_0 = false;
> /* 454 */         agg_value_20 = agg_value_21;
> /* 455 */       }
> /* 456 */
> /* 457 */       if (!false && (agg_agg_isNull_17_0 ||
> /* 458 */           agg_value_20 > agg_expr_2_0)) {
> /* 459 */         agg_agg_isNull_17_0 = false;
> /* 460 */         agg_value_20 = agg_expr_2_0;
> /* 461 */       }
> /* 462 */       agg_agg_isNull_20_0 = true;
> /* 463 */       int agg_value_23 = -1;
> /* 464 */
> /* 465 */       boolean agg_isNull_21 = agg_unsafeRowAggBuffer_0.isNullAt(1);
> /* 466 */       int agg_value_24 = agg_isNull_21 ?
> /* 467 */       -1 : (agg_unsafeRowAggBuffer_0.getInt(1));
> /* 468 */
> /* 469 */       if (!agg_isNull_21 && (agg_agg_isNull_20_0 ||
> /* 470 */           agg_value_23 > agg_value_24)) {
> /* 471 */         agg_agg_isNull_20_0 = false;
> /* 472 */         agg_value_23 = agg_value_24;
> /* 473 */       }
> /* 474 */
> /* 475 */       byte agg_caseWhenResultState_0 = -1;
> /* 476 */       do {
> /* 477 */         boolean agg_value_26 = false;
> /* 478 */         agg_value_26 = agg_expr_0_0 > 10;
> /* 479 */         if (!false && agg_value_26) {
> /* 480 */           agg_caseWhenResultState_0 = (byte)(false ? 1 : 0);
> /* 481 */           agg_agg_value_25_0 = agg_expr_0_0;
> /* 482 */           continue;
> /* 483 */         }
> /* 484 */
> /* 485 */         agg_caseWhenResultState_0 = (byte)(true ? 1 : 0);
> /* 486 */         agg_agg_value_25_0 = -1;
> /* 487 */
> /* 488 */       } while (false);
> /* 489 */       // TRUE if any condition is met and the result is null, or no any condition is met.
> /* 490 */       final boolean agg_isNull_22 = (agg_caseWhenResultState_0 != 0);
> /* 491 */
> /* 492 */       if (!agg_isNull_22 && (agg_agg_isNull_20_0 ||
> /* 493 */           agg_value_23 > agg_agg_value_25_0)) {
> /* 494 */         agg_agg_isNull_20_0 = false;
> /* 495 */         agg_value_23 = agg_agg_value_25_0;
> /* 496 */       }
> /* 497 */       agg_agg_isNull_28_0 = true;
> /* 498 */       int agg_value_31 = -1;
> /* 499 */
> /* 500 */       boolean agg_isNull_29 = agg_unsafeRowAggBuffer_0.isNullAt(2);
> /* 501 */       int agg_value_32 = agg_isNull_29 ?
> /* 502 */       -1 : (agg_unsafeRowAggBuffer_0.getInt(2));
> /* 503 */
> /* 504 */       if (!agg_isNull_29 && (agg_agg_isNull_28_0 ||
> /* 505 */           agg_value_31 > agg_value_32)) {
> /* 506 */         agg_agg_isNull_28_0 = false;
> /* 507 */         agg_value_31 = agg_value_32;
> /* 508 */       }
> /* 509 */
> /* 510 */       if (!false && (agg_agg_isNull_28_0 ||
> /* 511 */           agg_value_31 > agg_expr_0_0)) {
> /* 512 */         agg_agg_isNull_28_0 = false;
> /* 513 */         agg_value_31 = agg_expr_0_0;
> /* 514 */       }
> /* 515 */       // update unsafe row buffer
> /* 516 */       agg_unsafeRowAggBuffer_0.setInt(0, agg_value_20);
> /* 517 */
> /* 518 */       if (!agg_agg_isNull_20_0) {
> /* 519 */         agg_unsafeRowAggBuffer_0.setInt(1, agg_value_23);
> /* 520 */       } else {
> /* 521 */         agg_unsafeRowAggBuffer_0.setNullAt(1);
> /* 522 */       }
> /* 523 */
> /* 524 */       agg_unsafeRowAggBuffer_0.setInt(2, agg_value_31);
> /* 525 */
> /* 526 */     }
> /* 527 */
> /* 528 */   }
> ......................
> /* 554 */     // output the result
> /* 555 */
> /* 556 */     while (agg_fastHashMapIter_0.next()) {
> /* 557 */       UnsafeRow agg_aggKey_0 = (UnsafeRow) agg_fastHashMapIter_0.getKey();
> /* 558 */       UnsafeRow agg_aggBuffer_0 = (UnsafeRow) agg_fastHashMapIter_0.getValue();
> /* 559 */       agg_doAggregateWithKeysOutput_0(agg_aggKey_0, agg_aggBuffer_0);
> /* 560 */
> /* 561 */       if (shouldStop()) return;
> /* 562 */     }
> /* 563 */     agg_fastHashMap_0.close();
> /* 564 */
> /* 565 */     while (agg_mapIter_0.next()) {
> /* 566 */       UnsafeRow agg_aggKey_0 = (UnsafeRow) agg_mapIter_0.getKey();
> /* 567 */       UnsafeRow agg_aggBuffer_0 = (UnsafeRow) agg_mapIter_0.getValue();
> /* 568 */       agg_doAggregateWithKeysOutput_0(agg_aggKey_0, agg_aggBuffer_0);
> /* 569 */
> /* 570 */       if (shouldStop()) return;
> /* 571 */     }
> /* 572 */
> /* 573 */     agg_mapIter_0.close();
> /* 574 */     if (agg_sorter_0 == null) {
> /* 575 */       agg_hashMap_0.free();
> /* 576 */     }
> /* 577 */   }
> /* 578 */
> /* 579 */ }
> whole codegen max code size:954
>  *After modified:*
> Generated code:
> /* 001 */ public Object generate(Object[] references) {
> /* 002 */   return new GeneratedIteratorForCodegenStage1(references);
> /* 003 */ }
> /* 004 */
> .............
> /* 350 */
> /* 351 */     if (agg_fastAggBuffer_0 != null) {
> /* 352 */       agg_unsafeRowAggBuffer_0 = agg_fastAggBuffer_0;
> /* 353 */     }
> /* 354 */
> /* 355 */     // common sub-expressions
> /* 356 */
> /* 357 */     // evaluate aggregate function
> /* 358 */     agg_agg_isNull_17_0 = true;
> /* 359 */     int agg_value_20 = -1;
> /* 360 */
> /* 361 */     boolean agg_isNull_18 = agg_unsafeRowAggBuffer_0.isNullAt(0);
> /* 362 */     int agg_value_21 = agg_isNull_18 ?
> /* 363 */     -1 : (agg_unsafeRowAggBuffer_0.getInt(0));
> /* 364 */
> /* 365 */     if (!agg_isNull_18 && (agg_agg_isNull_17_0 ||
> /* 366 */         agg_value_20 > agg_value_21)) {
> /* 367 */       agg_agg_isNull_17_0 = false;
> /* 368 */       agg_value_20 = agg_value_21;
> /* 369 */     }
> /* 370 */
> /* 371 */     if (!false && (agg_agg_isNull_17_0 ||
> /* 372 */         agg_value_20 > agg_expr_2_0)) {
> /* 373 */       agg_agg_isNull_17_0 = false;
> /* 374 */       agg_value_20 = agg_expr_2_0;
> /* 375 */     }
> /* 376 */     agg_agg_isNull_20_0 = true;
> /* 377 */     int agg_value_23 = -1;
> /* 378 */
> /* 379 */     boolean agg_isNull_21 = agg_unsafeRowAggBuffer_0.isNullAt(1);
> /* 380 */     int agg_value_24 = agg_isNull_21 ?
> /* 381 */     -1 : (agg_unsafeRowAggBuffer_0.getInt(1));
> /* 382 */
> /* 383 */     if (!agg_isNull_21 && (agg_agg_isNull_20_0 ||
> /* 384 */         agg_value_23 > agg_value_24)) {
> /* 385 */       agg_agg_isNull_20_0 = false;
> /* 386 */       agg_value_23 = agg_value_24;
> /* 387 */     }
> /* 388 */
> /* 389 */     byte agg_caseWhenResultState_0 = -1;
> /* 390 */     do {
> /* 391 */       boolean agg_value_26 = false;
> /* 392 */       agg_value_26 = agg_expr_0_0 > 10;
> /* 393 */       if (!false && agg_value_26) {
> /* 394 */         agg_caseWhenResultState_0 = (byte)(false ? 1 : 0);
> /* 395 */         agg_agg_value_25_0 = agg_expr_0_0;
> /* 396 */         continue;
> /* 397 */       }
> /* 398 */
> /* 399 */       agg_caseWhenResultState_0 = (byte)(true ? 1 : 0);
> /* 400 */       agg_agg_value_25_0 = -1;
> /* 401 */
> /* 402 */     } while (false);
> /* 403 */     // TRUE if any condition is met and the result is null, or no any condition is met.
> /* 404 */     final boolean agg_isNull_22 = (agg_caseWhenResultState_0 != 0);
> /* 405 */
> /* 406 */     if (!agg_isNull_22 && (agg_agg_isNull_20_0 ||
> /* 407 */         agg_value_23 > agg_agg_value_25_0)) {
> /* 408 */       agg_agg_isNull_20_0 = false;
> /* 409 */       agg_value_23 = agg_agg_value_25_0;
> /* 410 */     }
> /* 411 */     agg_agg_isNull_28_0 = true;
> /* 412 */     int agg_value_31 = -1;
> /* 413 */
> /* 414 */     boolean agg_isNull_29 = agg_unsafeRowAggBuffer_0.isNullAt(2);
> /* 415 */     int agg_value_32 = agg_isNull_29 ?
> /* 416 */     -1 : (agg_unsafeRowAggBuffer_0.getInt(2));
> /* 417 */
> /* 418 */     if (!agg_isNull_29 && (agg_agg_isNull_28_0 ||
> /* 419 */         agg_value_31 > agg_value_32)) {
> /* 420 */       agg_agg_isNull_28_0 = false;
> /* 421 */       agg_value_31 = agg_value_32;
> /* 422 */     }
> /* 423 */
> /* 424 */     if (!false && (agg_agg_isNull_28_0 ||
> /* 425 */         agg_value_31 > agg_expr_0_0)) {
> /* 426 */       agg_agg_isNull_28_0 = false;
> /* 427 */       agg_value_31 = agg_expr_0_0;
> /* 428 */     }
> /* 429 */     // update unsafe row buffer
> /* 430 */     agg_unsafeRowAggBuffer_0.setInt(0, agg_value_20);
> /* 431 */
> /* 432 */     if (!agg_agg_isNull_20_0) {
> /* 433 */       agg_unsafeRowAggBuffer_0.setInt(1, agg_value_23);
> /* 434 */     } else {
> /* 435 */       agg_unsafeRowAggBuffer_0.setNullAt(1);
> /* 436 */     }
> /* 437 */
> /* 438 */     agg_unsafeRowAggBuffer_0.setInt(2, agg_value_31);
> /* 439 */
> /* 440 */   }
> /* 441 */
> ...........
> /* 466 */     // output the result
> /* 467 */
> /* 468 */     while (agg_fastHashMapIter_0.next()) {
> /* 469 */       UnsafeRow agg_aggKey_0 = (UnsafeRow) agg_fastHashMapIter_0.getKey();
> /* 470 */       UnsafeRow agg_aggBuffer_0 = (UnsafeRow) agg_fastHashMapIter_0.getValue();
> /* 471 */       agg_doAggregateWithKeysOutput_0(agg_aggKey_0, agg_aggBuffer_0);
> /* 472 */
> /* 473 */       if (shouldStop()) return;
> /* 474 */     }
> /* 475 */     agg_fastHashMap_0.close();
> /* 476 */
> /* 477 */     while (agg_mapIter_0.next()) {
> /* 478 */       UnsafeRow agg_aggKey_0 = (UnsafeRow) agg_mapIter_0.getKey();
> /* 479 */       UnsafeRow agg_aggBuffer_0 = (UnsafeRow) agg_mapIter_0.getValue();
> /* 480 */       agg_doAggregateWithKeysOutput_0(agg_aggKey_0, agg_aggBuffer_0);
> /* 481 */
> /* 482 */       if (shouldStop()) return;
> /* 483 */     }
> /* 484 */
> /* 485 */     agg_mapIter_0.close();
> /* 486 */     if (agg_sorter_0 == null) {
> /* 487 */       agg_hashMap_0.free();
> /* 488 */     }
> /* 489 */   }
> /* 490 */
> /* 491 */ }
> whole codegen max code size:598



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org