You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "Apache Spark (JIRA)" <ji...@apache.org> on 2018/07/24 11:18:00 UTC
[jira] [Commented] (SPARK-24901) Merge the codegen of
RegularHashMap and fastHashMap to reduce compiler maxCodesize when
VectorizedHashMap is false
[ https://issues.apache.org/jira/browse/SPARK-24901?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16554106#comment-16554106 ]
Apache Spark commented on SPARK-24901:
--------------------------------------
User 'heary-cao' has created a pull request for this issue:
https://github.com/apache/spark/pull/21860
> Merge the codegen of RegularHashMap and fastHashMap to reduce compiler maxCodesize when VectorizedHashMap is false
> ------------------------------------------------------------------------------------------------------------------
>
> Key: SPARK-24901
> URL: https://issues.apache.org/jira/browse/SPARK-24901
> Project: Spark
> Issue Type: Improvement
> Components: SQL
> Affects Versions: 2.4.0
> Reporter: caoxuewen
> Priority: Major
>
> Currently, Generate code of update UnsafeRow in hash aggregation.
> FastHashMap and RegularHashMap are two separate codes,These two separate codes need only when VectorizedHashMap is true. but other cases, we can merge together to reduce compiler maxCodesize. thanks.
> case class DistinctAgg(a: Int, b: Float, c: Double, d: Int, e: String)
> spark.sparkContext.parallelize(
> DistinctAgg(8, 2, 3, 4, "a") ::
> DistinctAgg(9, 3, 4, 5, "b") ::Nil).toDF()createOrReplaceTempView("distinctAgg")
> val df = sql("select a,b,e, min(d) as mind, min(case when a > 10 then a else null end) as mincasea, min(a) as mina from distinctAgg group by a, b, e")
> println(org.apache.spark.sql.execution.debug.codegenString(df.queryExecution.executedPlan))
> df.show()
> Generate code like:
> *Before modified:*
> Generated code:
> /* 001 */ public Object generate(Object[] references) {
> /* 002 */ return new GeneratedIteratorForCodegenStage1(references);
> /* 003 */ }
> /* 004 */
> ...............
> /* 354 */
> /* 355 */ if (agg_fastAggBuffer_0 != null) {
> /* 356 */ // common sub-expressions
> /* 357 */
> /* 358 */ // evaluate aggregate function
> /* 359 */ agg_agg_isNull_31_0 = true;
> /* 360 */ int agg_value_34 = -1;
> /* 361 */
> /* 362 */ boolean agg_isNull_32 = agg_fastAggBuffer_0.isNullAt(0);
> /* 363 */ int agg_value_35 = agg_isNull_32 ?
> /* 364 */ -1 : (agg_fastAggBuffer_0.getInt(0));
> /* 365 */
> /* 366 */ if (!agg_isNull_32 && (agg_agg_isNull_31_0 ||
> /* 367 */ agg_value_34 > agg_value_35)) {
> /* 368 */ agg_agg_isNull_31_0 = false;
> /* 369 */ agg_value_34 = agg_value_35;
> /* 370 */ }
> /* 371 */
> /* 372 */ if (!false && (agg_agg_isNull_31_0 ||
> /* 373 */ agg_value_34 > agg_expr_2_0)) {
> /* 374 */ agg_agg_isNull_31_0 = false;
> /* 375 */ agg_value_34 = agg_expr_2_0;
> /* 376 */ }
> /* 377 */ agg_agg_isNull_34_0 = true;
> /* 378 */ int agg_value_37 = -1;
> /* 379 */
> /* 380 */ boolean agg_isNull_35 = agg_fastAggBuffer_0.isNullAt(1);
> /* 381 */ int agg_value_38 = agg_isNull_35 ?
> /* 382 */ -1 : (agg_fastAggBuffer_0.getInt(1));
> /* 383 */
> /* 384 */ if (!agg_isNull_35 && (agg_agg_isNull_34_0 ||
> /* 385 */ agg_value_37 > agg_value_38)) {
> /* 386 */ agg_agg_isNull_34_0 = false;
> /* 387 */ agg_value_37 = agg_value_38;
> /* 388 */ }
> /* 389 */
> /* 390 */ byte agg_caseWhenResultState_1 = -1;
> /* 391 */ do {
> /* 392 */ boolean agg_value_40 = false;
> /* 393 */ agg_value_40 = agg_expr_0_0 > 10;
> /* 394 */ if (!false && agg_value_40) {
> /* 395 */ agg_caseWhenResultState_1 = (byte)(false ? 1 : 0);
> /* 396 */ agg_agg_value_39_0 = agg_expr_0_0;
> /* 397 */ continue;
> /* 398 */ }
> /* 399 */
> /* 400 */ agg_caseWhenResultState_1 = (byte)(true ? 1 : 0);
> /* 401 */ agg_agg_value_39_0 = -1;
> /* 402 */
> /* 403 */ } while (false);
> /* 404 */ // TRUE if any condition is met and the result is null, or no any condition is met.
> /* 405 */ final boolean agg_isNull_36 = (agg_caseWhenResultState_1 != 0);
> /* 406 */
> /* 407 */ if (!agg_isNull_36 && (agg_agg_isNull_34_0 ||
> /* 408 */ agg_value_37 > agg_agg_value_39_0)) {
> /* 409 */ agg_agg_isNull_34_0 = false;
> /* 410 */ agg_value_37 = agg_agg_value_39_0;
> /* 411 */ }
> /* 412 */ agg_agg_isNull_42_0 = true;
> /* 413 */ int agg_value_45 = -1;
> /* 414 */
> /* 415 */ boolean agg_isNull_43 = agg_fastAggBuffer_0.isNullAt(2);
> /* 416 */ int agg_value_46 = agg_isNull_43 ?
> /* 417 */ -1 : (agg_fastAggBuffer_0.getInt(2));
> /* 418 */
> /* 419 */ if (!agg_isNull_43 && (agg_agg_isNull_42_0 ||
> /* 420 */ agg_value_45 > agg_value_46)) {
> /* 421 */ agg_agg_isNull_42_0 = false;
> /* 422 */ agg_value_45 = agg_value_46;
> /* 423 */ }
> /* 424 */
> /* 425 */ if (!false && (agg_agg_isNull_42_0 ||
> /* 426 */ agg_value_45 > agg_expr_0_0)) {
> /* 427 */ agg_agg_isNull_42_0 = false;
> /* 428 */ agg_value_45 = agg_expr_0_0;
> /* 429 */ }
> /* 430 */ // update fast row
> /* 431 */ agg_fastAggBuffer_0.setInt(0, agg_value_34);
> /* 432 */
> /* 433 */ if (!agg_agg_isNull_34_0) {
> /* 434 */ agg_fastAggBuffer_0.setInt(1, agg_value_37);
> /* 435 */ } else {
> /* 436 */ agg_fastAggBuffer_0.setNullAt(1);
> /* 437 */ }
> /* 438 */
> /* 439 */ agg_fastAggBuffer_0.setInt(2, agg_value_45);
> /* 440 */ } else {
> /* 441 */ // common sub-expressions
> /* 442 */
> /* 443 */ // evaluate aggregate function
> /* 444 */ agg_agg_isNull_17_0 = true;
> /* 445 */ int agg_value_20 = -1;
> /* 446 */
> /* 447 */ boolean agg_isNull_18 = agg_unsafeRowAggBuffer_0.isNullAt(0);
> /* 448 */ int agg_value_21 = agg_isNull_18 ?
> /* 449 */ -1 : (agg_unsafeRowAggBuffer_0.getInt(0));
> /* 450 */
> /* 451 */ if (!agg_isNull_18 && (agg_agg_isNull_17_0 ||
> /* 452 */ agg_value_20 > agg_value_21)) {
> /* 453 */ agg_agg_isNull_17_0 = false;
> /* 454 */ agg_value_20 = agg_value_21;
> /* 455 */ }
> /* 456 */
> /* 457 */ if (!false && (agg_agg_isNull_17_0 ||
> /* 458 */ agg_value_20 > agg_expr_2_0)) {
> /* 459 */ agg_agg_isNull_17_0 = false;
> /* 460 */ agg_value_20 = agg_expr_2_0;
> /* 461 */ }
> /* 462 */ agg_agg_isNull_20_0 = true;
> /* 463 */ int agg_value_23 = -1;
> /* 464 */
> /* 465 */ boolean agg_isNull_21 = agg_unsafeRowAggBuffer_0.isNullAt(1);
> /* 466 */ int agg_value_24 = agg_isNull_21 ?
> /* 467 */ -1 : (agg_unsafeRowAggBuffer_0.getInt(1));
> /* 468 */
> /* 469 */ if (!agg_isNull_21 && (agg_agg_isNull_20_0 ||
> /* 470 */ agg_value_23 > agg_value_24)) {
> /* 471 */ agg_agg_isNull_20_0 = false;
> /* 472 */ agg_value_23 = agg_value_24;
> /* 473 */ }
> /* 474 */
> /* 475 */ byte agg_caseWhenResultState_0 = -1;
> /* 476 */ do {
> /* 477 */ boolean agg_value_26 = false;
> /* 478 */ agg_value_26 = agg_expr_0_0 > 10;
> /* 479 */ if (!false && agg_value_26) {
> /* 480 */ agg_caseWhenResultState_0 = (byte)(false ? 1 : 0);
> /* 481 */ agg_agg_value_25_0 = agg_expr_0_0;
> /* 482 */ continue;
> /* 483 */ }
> /* 484 */
> /* 485 */ agg_caseWhenResultState_0 = (byte)(true ? 1 : 0);
> /* 486 */ agg_agg_value_25_0 = -1;
> /* 487 */
> /* 488 */ } while (false);
> /* 489 */ // TRUE if any condition is met and the result is null, or no any condition is met.
> /* 490 */ final boolean agg_isNull_22 = (agg_caseWhenResultState_0 != 0);
> /* 491 */
> /* 492 */ if (!agg_isNull_22 && (agg_agg_isNull_20_0 ||
> /* 493 */ agg_value_23 > agg_agg_value_25_0)) {
> /* 494 */ agg_agg_isNull_20_0 = false;
> /* 495 */ agg_value_23 = agg_agg_value_25_0;
> /* 496 */ }
> /* 497 */ agg_agg_isNull_28_0 = true;
> /* 498 */ int agg_value_31 = -1;
> /* 499 */
> /* 500 */ boolean agg_isNull_29 = agg_unsafeRowAggBuffer_0.isNullAt(2);
> /* 501 */ int agg_value_32 = agg_isNull_29 ?
> /* 502 */ -1 : (agg_unsafeRowAggBuffer_0.getInt(2));
> /* 503 */
> /* 504 */ if (!agg_isNull_29 && (agg_agg_isNull_28_0 ||
> /* 505 */ agg_value_31 > agg_value_32)) {
> /* 506 */ agg_agg_isNull_28_0 = false;
> /* 507 */ agg_value_31 = agg_value_32;
> /* 508 */ }
> /* 509 */
> /* 510 */ if (!false && (agg_agg_isNull_28_0 ||
> /* 511 */ agg_value_31 > agg_expr_0_0)) {
> /* 512 */ agg_agg_isNull_28_0 = false;
> /* 513 */ agg_value_31 = agg_expr_0_0;
> /* 514 */ }
> /* 515 */ // update unsafe row buffer
> /* 516 */ agg_unsafeRowAggBuffer_0.setInt(0, agg_value_20);
> /* 517 */
> /* 518 */ if (!agg_agg_isNull_20_0) {
> /* 519 */ agg_unsafeRowAggBuffer_0.setInt(1, agg_value_23);
> /* 520 */ } else {
> /* 521 */ agg_unsafeRowAggBuffer_0.setNullAt(1);
> /* 522 */ }
> /* 523 */
> /* 524 */ agg_unsafeRowAggBuffer_0.setInt(2, agg_value_31);
> /* 525 */
> /* 526 */ }
> /* 527 */
> /* 528 */ }
> ......................
> /* 554 */ // output the result
> /* 555 */
> /* 556 */ while (agg_fastHashMapIter_0.next()) {
> /* 557 */ UnsafeRow agg_aggKey_0 = (UnsafeRow) agg_fastHashMapIter_0.getKey();
> /* 558 */ UnsafeRow agg_aggBuffer_0 = (UnsafeRow) agg_fastHashMapIter_0.getValue();
> /* 559 */ agg_doAggregateWithKeysOutput_0(agg_aggKey_0, agg_aggBuffer_0);
> /* 560 */
> /* 561 */ if (shouldStop()) return;
> /* 562 */ }
> /* 563 */ agg_fastHashMap_0.close();
> /* 564 */
> /* 565 */ while (agg_mapIter_0.next()) {
> /* 566 */ UnsafeRow agg_aggKey_0 = (UnsafeRow) agg_mapIter_0.getKey();
> /* 567 */ UnsafeRow agg_aggBuffer_0 = (UnsafeRow) agg_mapIter_0.getValue();
> /* 568 */ agg_doAggregateWithKeysOutput_0(agg_aggKey_0, agg_aggBuffer_0);
> /* 569 */
> /* 570 */ if (shouldStop()) return;
> /* 571 */ }
> /* 572 */
> /* 573 */ agg_mapIter_0.close();
> /* 574 */ if (agg_sorter_0 == null) {
> /* 575 */ agg_hashMap_0.free();
> /* 576 */ }
> /* 577 */ }
> /* 578 */
> /* 579 */ }
> whole codegen max code size:954
> *After modified:*
> Generated code:
> /* 001 */ public Object generate(Object[] references) {
> /* 002 */ return new GeneratedIteratorForCodegenStage1(references);
> /* 003 */ }
> /* 004 */
> .............
> /* 350 */
> /* 351 */ if (agg_fastAggBuffer_0 != null) {
> /* 352 */ agg_unsafeRowAggBuffer_0 = agg_fastAggBuffer_0;
> /* 353 */ }
> /* 354 */
> /* 355 */ // common sub-expressions
> /* 356 */
> /* 357 */ // evaluate aggregate function
> /* 358 */ agg_agg_isNull_17_0 = true;
> /* 359 */ int agg_value_20 = -1;
> /* 360 */
> /* 361 */ boolean agg_isNull_18 = agg_unsafeRowAggBuffer_0.isNullAt(0);
> /* 362 */ int agg_value_21 = agg_isNull_18 ?
> /* 363 */ -1 : (agg_unsafeRowAggBuffer_0.getInt(0));
> /* 364 */
> /* 365 */ if (!agg_isNull_18 && (agg_agg_isNull_17_0 ||
> /* 366 */ agg_value_20 > agg_value_21)) {
> /* 367 */ agg_agg_isNull_17_0 = false;
> /* 368 */ agg_value_20 = agg_value_21;
> /* 369 */ }
> /* 370 */
> /* 371 */ if (!false && (agg_agg_isNull_17_0 ||
> /* 372 */ agg_value_20 > agg_expr_2_0)) {
> /* 373 */ agg_agg_isNull_17_0 = false;
> /* 374 */ agg_value_20 = agg_expr_2_0;
> /* 375 */ }
> /* 376 */ agg_agg_isNull_20_0 = true;
> /* 377 */ int agg_value_23 = -1;
> /* 378 */
> /* 379 */ boolean agg_isNull_21 = agg_unsafeRowAggBuffer_0.isNullAt(1);
> /* 380 */ int agg_value_24 = agg_isNull_21 ?
> /* 381 */ -1 : (agg_unsafeRowAggBuffer_0.getInt(1));
> /* 382 */
> /* 383 */ if (!agg_isNull_21 && (agg_agg_isNull_20_0 ||
> /* 384 */ agg_value_23 > agg_value_24)) {
> /* 385 */ agg_agg_isNull_20_0 = false;
> /* 386 */ agg_value_23 = agg_value_24;
> /* 387 */ }
> /* 388 */
> /* 389 */ byte agg_caseWhenResultState_0 = -1;
> /* 390 */ do {
> /* 391 */ boolean agg_value_26 = false;
> /* 392 */ agg_value_26 = agg_expr_0_0 > 10;
> /* 393 */ if (!false && agg_value_26) {
> /* 394 */ agg_caseWhenResultState_0 = (byte)(false ? 1 : 0);
> /* 395 */ agg_agg_value_25_0 = agg_expr_0_0;
> /* 396 */ continue;
> /* 397 */ }
> /* 398 */
> /* 399 */ agg_caseWhenResultState_0 = (byte)(true ? 1 : 0);
> /* 400 */ agg_agg_value_25_0 = -1;
> /* 401 */
> /* 402 */ } while (false);
> /* 403 */ // TRUE if any condition is met and the result is null, or no any condition is met.
> /* 404 */ final boolean agg_isNull_22 = (agg_caseWhenResultState_0 != 0);
> /* 405 */
> /* 406 */ if (!agg_isNull_22 && (agg_agg_isNull_20_0 ||
> /* 407 */ agg_value_23 > agg_agg_value_25_0)) {
> /* 408 */ agg_agg_isNull_20_0 = false;
> /* 409 */ agg_value_23 = agg_agg_value_25_0;
> /* 410 */ }
> /* 411 */ agg_agg_isNull_28_0 = true;
> /* 412 */ int agg_value_31 = -1;
> /* 413 */
> /* 414 */ boolean agg_isNull_29 = agg_unsafeRowAggBuffer_0.isNullAt(2);
> /* 415 */ int agg_value_32 = agg_isNull_29 ?
> /* 416 */ -1 : (agg_unsafeRowAggBuffer_0.getInt(2));
> /* 417 */
> /* 418 */ if (!agg_isNull_29 && (agg_agg_isNull_28_0 ||
> /* 419 */ agg_value_31 > agg_value_32)) {
> /* 420 */ agg_agg_isNull_28_0 = false;
> /* 421 */ agg_value_31 = agg_value_32;
> /* 422 */ }
> /* 423 */
> /* 424 */ if (!false && (agg_agg_isNull_28_0 ||
> /* 425 */ agg_value_31 > agg_expr_0_0)) {
> /* 426 */ agg_agg_isNull_28_0 = false;
> /* 427 */ agg_value_31 = agg_expr_0_0;
> /* 428 */ }
> /* 429 */ // update unsafe row buffer
> /* 430 */ agg_unsafeRowAggBuffer_0.setInt(0, agg_value_20);
> /* 431 */
> /* 432 */ if (!agg_agg_isNull_20_0) {
> /* 433 */ agg_unsafeRowAggBuffer_0.setInt(1, agg_value_23);
> /* 434 */ } else {
> /* 435 */ agg_unsafeRowAggBuffer_0.setNullAt(1);
> /* 436 */ }
> /* 437 */
> /* 438 */ agg_unsafeRowAggBuffer_0.setInt(2, agg_value_31);
> /* 439 */
> /* 440 */ }
> /* 441 */
> ...........
> /* 466 */ // output the result
> /* 467 */
> /* 468 */ while (agg_fastHashMapIter_0.next()) {
> /* 469 */ UnsafeRow agg_aggKey_0 = (UnsafeRow) agg_fastHashMapIter_0.getKey();
> /* 470 */ UnsafeRow agg_aggBuffer_0 = (UnsafeRow) agg_fastHashMapIter_0.getValue();
> /* 471 */ agg_doAggregateWithKeysOutput_0(agg_aggKey_0, agg_aggBuffer_0);
> /* 472 */
> /* 473 */ if (shouldStop()) return;
> /* 474 */ }
> /* 475 */ agg_fastHashMap_0.close();
> /* 476 */
> /* 477 */ while (agg_mapIter_0.next()) {
> /* 478 */ UnsafeRow agg_aggKey_0 = (UnsafeRow) agg_mapIter_0.getKey();
> /* 479 */ UnsafeRow agg_aggBuffer_0 = (UnsafeRow) agg_mapIter_0.getValue();
> /* 480 */ agg_doAggregateWithKeysOutput_0(agg_aggKey_0, agg_aggBuffer_0);
> /* 481 */
> /* 482 */ if (shouldStop()) return;
> /* 483 */ }
> /* 484 */
> /* 485 */ agg_mapIter_0.close();
> /* 486 */ if (agg_sorter_0 == null) {
> /* 487 */ agg_hashMap_0.free();
> /* 488 */ }
> /* 489 */ }
> /* 490 */
> /* 491 */ }
> whole codegen max code size:598
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)
---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org