You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "caoxuewen (JIRA)" <ji...@apache.org> on 2018/07/24 10:55:00 UTC

[jira] [Created] (SPARK-24901) Merge the codegen of RegularHashMap and fastHashMap to reduce compiler maxCodesize when VectorizedHashMap is false

caoxuewen created SPARK-24901:
---------------------------------

             Summary: Merge the codegen of RegularHashMap and fastHashMap to reduce compiler maxCodesize when VectorizedHashMap is false
                 Key: SPARK-24901
                 URL: https://issues.apache.org/jira/browse/SPARK-24901
             Project: Spark
          Issue Type: Improvement
          Components: SQL
    Affects Versions: 2.4.0
            Reporter: caoxuewen


Currently, Generate code of update UnsafeRow in hash aggregation.
FastHashMap and RegularHashMap are two separate codes,These two separate codes need only when VectorizedHashMap is true. but other cases, we can merge together to reduce compiler maxCodesize. thanks.
case class DistinctAgg(a: Int, b: Float, c: Double, d: Int, e: String)
spark.sparkContext.parallelize(
      DistinctAgg(8, 2, 3, 4, "a") ::
      DistinctAgg(9, 3, 4, 5, "b") ::Nil).toDF()createOrReplaceTempView("distinctAgg")
val df = sql("select a,b,e, min(d) as mind, min(case when a > 10 then a else null end) as mincasea, min(a) as mina from distinctAgg group by a, b, e")
println(org.apache.spark.sql.execution.debug.codegenString(df.queryExecution.executedPlan))
df.show()

Generate code like:
 *Before modified:*
Generated code:
/* 001 */ public Object generate(Object[] references) {
/* 002 */   return new GeneratedIteratorForCodegenStage1(references);
/* 003 */ }
/* 004 */
...............
/* 354 */
/* 355 */     if (agg_fastAggBuffer_0 != null) {
/* 356 */       // common sub-expressions
/* 357 */
/* 358 */       // evaluate aggregate function
/* 359 */       agg_agg_isNull_31_0 = true;
/* 360 */       int agg_value_34 = -1;
/* 361 */
/* 362 */       boolean agg_isNull_32 = agg_fastAggBuffer_0.isNullAt(0);
/* 363 */       int agg_value_35 = agg_isNull_32 ?
/* 364 */       -1 : (agg_fastAggBuffer_0.getInt(0));
/* 365 */
/* 366 */       if (!agg_isNull_32 && (agg_agg_isNull_31_0 ||
/* 367 */           agg_value_34 > agg_value_35)) {
/* 368 */         agg_agg_isNull_31_0 = false;
/* 369 */         agg_value_34 = agg_value_35;
/* 370 */       }
/* 371 */
/* 372 */       if (!false && (agg_agg_isNull_31_0 ||
/* 373 */           agg_value_34 > agg_expr_2_0)) {
/* 374 */         agg_agg_isNull_31_0 = false;
/* 375 */         agg_value_34 = agg_expr_2_0;
/* 376 */       }
/* 377 */       agg_agg_isNull_34_0 = true;
/* 378 */       int agg_value_37 = -1;
/* 379 */
/* 380 */       boolean agg_isNull_35 = agg_fastAggBuffer_0.isNullAt(1);
/* 381 */       int agg_value_38 = agg_isNull_35 ?
/* 382 */       -1 : (agg_fastAggBuffer_0.getInt(1));
/* 383 */
/* 384 */       if (!agg_isNull_35 && (agg_agg_isNull_34_0 ||
/* 385 */           agg_value_37 > agg_value_38)) {
/* 386 */         agg_agg_isNull_34_0 = false;
/* 387 */         agg_value_37 = agg_value_38;
/* 388 */       }
/* 389 */
/* 390 */       byte agg_caseWhenResultState_1 = -1;
/* 391 */       do {
/* 392 */         boolean agg_value_40 = false;
/* 393 */         agg_value_40 = agg_expr_0_0 > 10;
/* 394 */         if (!false && agg_value_40) {
/* 395 */           agg_caseWhenResultState_1 = (byte)(false ? 1 : 0);
/* 396 */           agg_agg_value_39_0 = agg_expr_0_0;
/* 397 */           continue;
/* 398 */         }
/* 399 */
/* 400 */         agg_caseWhenResultState_1 = (byte)(true ? 1 : 0);
/* 401 */         agg_agg_value_39_0 = -1;
/* 402 */
/* 403 */       } while (false);
/* 404 */       // TRUE if any condition is met and the result is null, or no any condition is met.
/* 405 */       final boolean agg_isNull_36 = (agg_caseWhenResultState_1 != 0);
/* 406 */
/* 407 */       if (!agg_isNull_36 && (agg_agg_isNull_34_0 ||
/* 408 */           agg_value_37 > agg_agg_value_39_0)) {
/* 409 */         agg_agg_isNull_34_0 = false;
/* 410 */         agg_value_37 = agg_agg_value_39_0;
/* 411 */       }
/* 412 */       agg_agg_isNull_42_0 = true;
/* 413 */       int agg_value_45 = -1;
/* 414 */
/* 415 */       boolean agg_isNull_43 = agg_fastAggBuffer_0.isNullAt(2);
/* 416 */       int agg_value_46 = agg_isNull_43 ?
/* 417 */       -1 : (agg_fastAggBuffer_0.getInt(2));
/* 418 */
/* 419 */       if (!agg_isNull_43 && (agg_agg_isNull_42_0 ||
/* 420 */           agg_value_45 > agg_value_46)) {
/* 421 */         agg_agg_isNull_42_0 = false;
/* 422 */         agg_value_45 = agg_value_46;
/* 423 */       }
/* 424 */
/* 425 */       if (!false && (agg_agg_isNull_42_0 ||
/* 426 */           agg_value_45 > agg_expr_0_0)) {
/* 427 */         agg_agg_isNull_42_0 = false;
/* 428 */         agg_value_45 = agg_expr_0_0;
/* 429 */       }
/* 430 */       // update fast row
/* 431 */       agg_fastAggBuffer_0.setInt(0, agg_value_34);
/* 432 */
/* 433 */       if (!agg_agg_isNull_34_0) {
/* 434 */         agg_fastAggBuffer_0.setInt(1, agg_value_37);
/* 435 */       } else {
/* 436 */         agg_fastAggBuffer_0.setNullAt(1);
/* 437 */       }
/* 438 */
/* 439 */       agg_fastAggBuffer_0.setInt(2, agg_value_45);
/* 440 */     } else {
/* 441 */       // common sub-expressions
/* 442 */
/* 443 */       // evaluate aggregate function
/* 444 */       agg_agg_isNull_17_0 = true;
/* 445 */       int agg_value_20 = -1;
/* 446 */
/* 447 */       boolean agg_isNull_18 = agg_unsafeRowAggBuffer_0.isNullAt(0);
/* 448 */       int agg_value_21 = agg_isNull_18 ?
/* 449 */       -1 : (agg_unsafeRowAggBuffer_0.getInt(0));
/* 450 */
/* 451 */       if (!agg_isNull_18 && (agg_agg_isNull_17_0 ||
/* 452 */           agg_value_20 > agg_value_21)) {
/* 453 */         agg_agg_isNull_17_0 = false;
/* 454 */         agg_value_20 = agg_value_21;
/* 455 */       }
/* 456 */
/* 457 */       if (!false && (agg_agg_isNull_17_0 ||
/* 458 */           agg_value_20 > agg_expr_2_0)) {
/* 459 */         agg_agg_isNull_17_0 = false;
/* 460 */         agg_value_20 = agg_expr_2_0;
/* 461 */       }
/* 462 */       agg_agg_isNull_20_0 = true;
/* 463 */       int agg_value_23 = -1;
/* 464 */
/* 465 */       boolean agg_isNull_21 = agg_unsafeRowAggBuffer_0.isNullAt(1);
/* 466 */       int agg_value_24 = agg_isNull_21 ?
/* 467 */       -1 : (agg_unsafeRowAggBuffer_0.getInt(1));
/* 468 */
/* 469 */       if (!agg_isNull_21 && (agg_agg_isNull_20_0 ||
/* 470 */           agg_value_23 > agg_value_24)) {
/* 471 */         agg_agg_isNull_20_0 = false;
/* 472 */         agg_value_23 = agg_value_24;
/* 473 */       }
/* 474 */
/* 475 */       byte agg_caseWhenResultState_0 = -1;
/* 476 */       do {
/* 477 */         boolean agg_value_26 = false;
/* 478 */         agg_value_26 = agg_expr_0_0 > 10;
/* 479 */         if (!false && agg_value_26) {
/* 480 */           agg_caseWhenResultState_0 = (byte)(false ? 1 : 0);
/* 481 */           agg_agg_value_25_0 = agg_expr_0_0;
/* 482 */           continue;
/* 483 */         }
/* 484 */
/* 485 */         agg_caseWhenResultState_0 = (byte)(true ? 1 : 0);
/* 486 */         agg_agg_value_25_0 = -1;
/* 487 */
/* 488 */       } while (false);
/* 489 */       // TRUE if any condition is met and the result is null, or no any condition is met.
/* 490 */       final boolean agg_isNull_22 = (agg_caseWhenResultState_0 != 0);
/* 491 */
/* 492 */       if (!agg_isNull_22 && (agg_agg_isNull_20_0 ||
/* 493 */           agg_value_23 > agg_agg_value_25_0)) {
/* 494 */         agg_agg_isNull_20_0 = false;
/* 495 */         agg_value_23 = agg_agg_value_25_0;
/* 496 */       }
/* 497 */       agg_agg_isNull_28_0 = true;
/* 498 */       int agg_value_31 = -1;
/* 499 */
/* 500 */       boolean agg_isNull_29 = agg_unsafeRowAggBuffer_0.isNullAt(2);
/* 501 */       int agg_value_32 = agg_isNull_29 ?
/* 502 */       -1 : (agg_unsafeRowAggBuffer_0.getInt(2));
/* 503 */
/* 504 */       if (!agg_isNull_29 && (agg_agg_isNull_28_0 ||
/* 505 */           agg_value_31 > agg_value_32)) {
/* 506 */         agg_agg_isNull_28_0 = false;
/* 507 */         agg_value_31 = agg_value_32;
/* 508 */       }
/* 509 */
/* 510 */       if (!false && (agg_agg_isNull_28_0 ||
/* 511 */           agg_value_31 > agg_expr_0_0)) {
/* 512 */         agg_agg_isNull_28_0 = false;
/* 513 */         agg_value_31 = agg_expr_0_0;
/* 514 */       }
/* 515 */       // update unsafe row buffer
/* 516 */       agg_unsafeRowAggBuffer_0.setInt(0, agg_value_20);
/* 517 */
/* 518 */       if (!agg_agg_isNull_20_0) {
/* 519 */         agg_unsafeRowAggBuffer_0.setInt(1, agg_value_23);
/* 520 */       } else {
/* 521 */         agg_unsafeRowAggBuffer_0.setNullAt(1);
/* 522 */       }
/* 523 */
/* 524 */       agg_unsafeRowAggBuffer_0.setInt(2, agg_value_31);
/* 525 */
/* 526 */     }
/* 527 */
/* 528 */   }
......................
/* 554 */     // output the result
/* 555 */
/* 556 */     while (agg_fastHashMapIter_0.next()) {
/* 557 */       UnsafeRow agg_aggKey_0 = (UnsafeRow) agg_fastHashMapIter_0.getKey();
/* 558 */       UnsafeRow agg_aggBuffer_0 = (UnsafeRow) agg_fastHashMapIter_0.getValue();
/* 559 */       agg_doAggregateWithKeysOutput_0(agg_aggKey_0, agg_aggBuffer_0);
/* 560 */
/* 561 */       if (shouldStop()) return;
/* 562 */     }
/* 563 */     agg_fastHashMap_0.close();
/* 564 */
/* 565 */     while (agg_mapIter_0.next()) {
/* 566 */       UnsafeRow agg_aggKey_0 = (UnsafeRow) agg_mapIter_0.getKey();
/* 567 */       UnsafeRow agg_aggBuffer_0 = (UnsafeRow) agg_mapIter_0.getValue();
/* 568 */       agg_doAggregateWithKeysOutput_0(agg_aggKey_0, agg_aggBuffer_0);
/* 569 */
/* 570 */       if (shouldStop()) return;
/* 571 */     }
/* 572 */
/* 573 */     agg_mapIter_0.close();
/* 574 */     if (agg_sorter_0 == null) {
/* 575 */       agg_hashMap_0.free();
/* 576 */     }
/* 577 */   }
/* 578 */
/* 579 */ }

whole codegen max code size:954

 *After modified:*
Generated code:
/* 001 */ public Object generate(Object[] references) {
/* 002 */   return new GeneratedIteratorForCodegenStage1(references);
/* 003 */ }
/* 004 */
.............
/* 350 */
/* 351 */     if (agg_fastAggBuffer_0 != null) {
/* 352 */       agg_unsafeRowAggBuffer_0 = agg_fastAggBuffer_0;
/* 353 */     }
/* 354 */
/* 355 */     // common sub-expressions
/* 356 */
/* 357 */     // evaluate aggregate function
/* 358 */     agg_agg_isNull_17_0 = true;
/* 359 */     int agg_value_20 = -1;
/* 360 */
/* 361 */     boolean agg_isNull_18 = agg_unsafeRowAggBuffer_0.isNullAt(0);
/* 362 */     int agg_value_21 = agg_isNull_18 ?
/* 363 */     -1 : (agg_unsafeRowAggBuffer_0.getInt(0));
/* 364 */
/* 365 */     if (!agg_isNull_18 && (agg_agg_isNull_17_0 ||
/* 366 */         agg_value_20 > agg_value_21)) {
/* 367 */       agg_agg_isNull_17_0 = false;
/* 368 */       agg_value_20 = agg_value_21;
/* 369 */     }
/* 370 */
/* 371 */     if (!false && (agg_agg_isNull_17_0 ||
/* 372 */         agg_value_20 > agg_expr_2_0)) {
/* 373 */       agg_agg_isNull_17_0 = false;
/* 374 */       agg_value_20 = agg_expr_2_0;
/* 375 */     }
/* 376 */     agg_agg_isNull_20_0 = true;
/* 377 */     int agg_value_23 = -1;
/* 378 */
/* 379 */     boolean agg_isNull_21 = agg_unsafeRowAggBuffer_0.isNullAt(1);
/* 380 */     int agg_value_24 = agg_isNull_21 ?
/* 381 */     -1 : (agg_unsafeRowAggBuffer_0.getInt(1));
/* 382 */
/* 383 */     if (!agg_isNull_21 && (agg_agg_isNull_20_0 ||
/* 384 */         agg_value_23 > agg_value_24)) {
/* 385 */       agg_agg_isNull_20_0 = false;
/* 386 */       agg_value_23 = agg_value_24;
/* 387 */     }
/* 388 */
/* 389 */     byte agg_caseWhenResultState_0 = -1;
/* 390 */     do {
/* 391 */       boolean agg_value_26 = false;
/* 392 */       agg_value_26 = agg_expr_0_0 > 10;
/* 393 */       if (!false && agg_value_26) {
/* 394 */         agg_caseWhenResultState_0 = (byte)(false ? 1 : 0);
/* 395 */         agg_agg_value_25_0 = agg_expr_0_0;
/* 396 */         continue;
/* 397 */       }
/* 398 */
/* 399 */       agg_caseWhenResultState_0 = (byte)(true ? 1 : 0);
/* 400 */       agg_agg_value_25_0 = -1;
/* 401 */
/* 402 */     } while (false);
/* 403 */     // TRUE if any condition is met and the result is null, or no any condition is met.
/* 404 */     final boolean agg_isNull_22 = (agg_caseWhenResultState_0 != 0);
/* 405 */
/* 406 */     if (!agg_isNull_22 && (agg_agg_isNull_20_0 ||
/* 407 */         agg_value_23 > agg_agg_value_25_0)) {
/* 408 */       agg_agg_isNull_20_0 = false;
/* 409 */       agg_value_23 = agg_agg_value_25_0;
/* 410 */     }
/* 411 */     agg_agg_isNull_28_0 = true;
/* 412 */     int agg_value_31 = -1;
/* 413 */
/* 414 */     boolean agg_isNull_29 = agg_unsafeRowAggBuffer_0.isNullAt(2);
/* 415 */     int agg_value_32 = agg_isNull_29 ?
/* 416 */     -1 : (agg_unsafeRowAggBuffer_0.getInt(2));
/* 417 */
/* 418 */     if (!agg_isNull_29 && (agg_agg_isNull_28_0 ||
/* 419 */         agg_value_31 > agg_value_32)) {
/* 420 */       agg_agg_isNull_28_0 = false;
/* 421 */       agg_value_31 = agg_value_32;
/* 422 */     }
/* 423 */
/* 424 */     if (!false && (agg_agg_isNull_28_0 ||
/* 425 */         agg_value_31 > agg_expr_0_0)) {
/* 426 */       agg_agg_isNull_28_0 = false;
/* 427 */       agg_value_31 = agg_expr_0_0;
/* 428 */     }
/* 429 */     // update unsafe row buffer
/* 430 */     agg_unsafeRowAggBuffer_0.setInt(0, agg_value_20);
/* 431 */
/* 432 */     if (!agg_agg_isNull_20_0) {
/* 433 */       agg_unsafeRowAggBuffer_0.setInt(1, agg_value_23);
/* 434 */     } else {
/* 435 */       agg_unsafeRowAggBuffer_0.setNullAt(1);
/* 436 */     }
/* 437 */
/* 438 */     agg_unsafeRowAggBuffer_0.setInt(2, agg_value_31);
/* 439 */
/* 440 */   }
/* 441 */
...........
/* 466 */     // output the result
/* 467 */
/* 468 */     while (agg_fastHashMapIter_0.next()) {
/* 469 */       UnsafeRow agg_aggKey_0 = (UnsafeRow) agg_fastHashMapIter_0.getKey();
/* 470 */       UnsafeRow agg_aggBuffer_0 = (UnsafeRow) agg_fastHashMapIter_0.getValue();
/* 471 */       agg_doAggregateWithKeysOutput_0(agg_aggKey_0, agg_aggBuffer_0);
/* 472 */
/* 473 */       if (shouldStop()) return;
/* 474 */     }
/* 475 */     agg_fastHashMap_0.close();
/* 476 */
/* 477 */     while (agg_mapIter_0.next()) {
/* 478 */       UnsafeRow agg_aggKey_0 = (UnsafeRow) agg_mapIter_0.getKey();
/* 479 */       UnsafeRow agg_aggBuffer_0 = (UnsafeRow) agg_mapIter_0.getValue();
/* 480 */       agg_doAggregateWithKeysOutput_0(agg_aggKey_0, agg_aggBuffer_0);
/* 481 */
/* 482 */       if (shouldStop()) return;
/* 483 */     }
/* 484 */
/* 485 */     agg_mapIter_0.close();
/* 486 */     if (agg_sorter_0 == null) {
/* 487 */       agg_hashMap_0.free();
/* 488 */     }
/* 489 */   }
/* 490 */
/* 491 */ }

whole codegen max code size:598



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org