You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by GitBox <gi...@apache.org> on 2020/06/01 00:47:46 UTC

[GitHub] [spark] wangyum commented on a change in pull request #28575: [SPARK-31705][SQL] Push predicate through join by rewriting join condition to conjunctive normal form

wangyum commented on a change in pull request #28575:
URL: https://github.com/apache/spark/pull/28575#discussion_r432966520



##########
File path: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
##########
@@ -1372,6 +1376,108 @@ object PushPredicateThroughJoin extends Rule[LogicalPlan] with PredicateHelper {
   }
 }
 
+/**
+ * Rewriting join condition to conjunctive normal form expression so that we can push
+ * more predicate.
+ */
+object PushPredicateThroughJoinByCNF extends Rule[LogicalPlan] with PredicateHelper {
+  /**
+   * Rewrite pattern:
+   * 1. (a && b) || c --> (a || c) && (b || c)
+   * 2. a || (b && c) --> (a || b) && (a || c)
+   *
+   * To avoid generating too many predicates, we first group the filter columns from the same table.
+   */
+  private def toCNF(condition: Expression, depth: Int = 0): Expression = {
+    if (depth < SQLConf.get.maxRewritingCNFDepth) {
+      condition match {
+        case or @ Or(left: And, right: And) =>
+          val lhs = splitConjunctivePredicates(left).groupBy(_.references.map(_.qualifier))

Review comment:
       group by  qualifier to avoid generating too many predicates. For example:
   TPCDS q85:
   Without group by qualifier:
   ```
   == Physical Plan ==
   TakeOrderedAndProject(limit=100, orderBy=[substr(r_reason_desc, 1, 20)#137 ASC NULLS FIRST,aggOrder#142 ASC NULLS FIRST,avg(wr_refunded_cash)#139 ASC NULLS FIRST,avg(wr_fee)#140 ASC NULLS FIRST], output=[substr(r_reason_desc, 1, 20)#137,avg(ws_quantity)#138,avg(wr_refunded_cash)#139,avg(wr_fee)#140])
   +- *(9) HashAggregate(keys=[r_reason_desc#124], functions=[avg(cast(ws_quantity#18 as bigint)), avg(UnscaledValue(wr_refunded_cash#54)), avg(UnscaledValue(wr_fee#52))])
      +- Exchange hashpartitioning(r_reason_desc#124, 5), true, [id=#351]
         +- *(8) HashAggregate(keys=[r_reason_desc#124], functions=[partial_avg(cast(ws_quantity#18 as bigint)), partial_avg(UnscaledValue(wr_refunded_cash#54)), partial_avg(UnscaledValue(wr_fee#52))])
            +- *(8) Project [ws_quantity#18, wr_fee#52, wr_refunded_cash#54, r_reason_desc#124]
               +- *(8) BroadcastHashJoin [wr_reason_sk#46L], [cast(r_reason_sk#122 as bigint)], Inner, BuildRight
                  :- *(8) Project [ws_quantity#18, wr_reason_sk#46L, wr_fee#52, wr_refunded_cash#54]
                  :  +- *(8) BroadcastHashJoin [ws_sold_date_sk#0], [d_date_sk#94], Inner, BuildRight
                  :     :- *(8) Project [ws_sold_date_sk#0, ws_quantity#18, wr_reason_sk#46L, wr_fee#52, wr_refunded_cash#54]
                  :     :  +- *(8) BroadcastHashJoin [wr_refunded_addr_sk#40L], [cast(ca_address_sk#81 as bigint)], Inner, BuildRight, ((((ca_state#89 IN (IN,OH,NJ) AND (ws_net_profit#33 >= 100.00)) AND (ws_net_profit#33 <= 200.00)) OR ((ca_state#89 IN (WI,CT,KY) AND (ws_net_profit#33 >= 150.00)) AND (ws_net_profit#33 <= 300.00))) OR ((ca_state#89 IN (LA,IA,AR) AND (ws_net_profit#33 >= 50.00)) AND (ws_net_profit#33 <= 250.00)))
                  :     :     :- *(8) Project [ws_sold_date_sk#0, ws_quantity#18, ws_net_profit#33, wr_refunded_addr_sk#40L, wr_reason_sk#46L, wr_fee#52, wr_refunded_cash#54]
                  :     :     :  +- *(8) BroadcastHashJoin [wr_returning_cdemo_sk#42L, cd_marital_status#74, cd_education_status#75], [cast(cd_demo_sk#125 as bigint), cd_marital_status#127, cd_education_status#128], Inner, BuildRight
                  :     :     :     :- *(8) Project [ws_sold_date_sk#0, ws_quantity#18, ws_net_profit#33, wr_refunded_addr_sk#40L, wr_returning_cdemo_sk#42L, wr_reason_sk#46L, wr_fee#52, wr_refunded_cash#54, cd_marital_status#74, cd_education_status#75]
                  :     :     :     :  +- *(8) BroadcastHashJoin [wr_refunded_cdemo_sk#38L], [cast(cd_demo_sk#72 as bigint)], Inner, BuildRight, ((((((cd_marital_status#74 = M) AND (cd_education_status#75 = Advanced Degree)) AND (ws_sales_price#21 >= 100.00)) AND (ws_sales_price#21 <= 150.00)) OR ((((cd_marital_status#74 = S) AND (cd_education_status#75 = College)) AND (ws_sales_price#21 >= 50.00)) AND (ws_sales_price#21 <= 100.00))) OR ((((cd_marital_status#74 = W) AND (cd_education_status#75 = 2 yr Degree)) AND (ws_sales_price#21 >= 150.00)) AND (ws_sales_price#21 <= 200.00)))
                  :     :     :     :     :- *(8) Project [ws_sold_date_sk#0, ws_quantity#18, ws_sales_price#21, ws_net_profit#33, wr_refunded_cdemo_sk#38L, wr_refunded_addr_sk#40L, wr_returning_cdemo_sk#42L, wr_reason_sk#46L, wr_fee#52, wr_refunded_cash#54]
                  :     :     :     :     :  +- *(8) BroadcastHashJoin [ws_web_page_sk#12], [wp_web_page_sk#58], Inner, BuildRight
                  :     :     :     :     :     :- *(8) Project [ws_sold_date_sk#0, ws_web_page_sk#12, ws_quantity#18, ws_sales_price#21, ws_net_profit#33, wr_refunded_cdemo_sk#38L, wr_refunded_addr_sk#40L, wr_returning_cdemo_sk#42L, wr_reason_sk#46L, wr_fee#52, wr_refunded_cash#54]
                  :     :     :     :     :     :  +- *(8) BroadcastHashJoin [cast(ws_item_sk#3 as bigint), cast(ws_order_number#17 as bigint)], [wr_item_sk#36L, wr_order_number#47L], Inner, BuildRight
                  :     :     :     :     :     :     :- *(8) Project [ws_sold_date_sk#0, ws_item_sk#3, ws_web_page_sk#12, ws_order_number#17, ws_quantity#18, ws_sales_price#21, ws_net_profit#33]
                  :     :     :     :     :     :     :  +- *(8) Filter (((((((((((((((((((isnotnull(ws_item_sk#3) AND isnotnull(ws_order_number#17)) AND isnotnull(ws_web_page_sk#12)) AND isnotnull(ws_sold_date_sk#0)) AND (((ws_sales_price#21 >= 100.00) OR (ws_sales_price#21 >= 50.00)) OR (ws_sales_price#21 >= 150.00))) AND (((ws_sales_price#21 >= 100.00) OR (ws_sales_price#21 <= 100.00)) OR (ws_sales_price#21 >= 150.00))) AND (((ws_sales_price#21 <= 150.00) OR (ws_sales_price#21 >= 50.00)) OR (ws_sales_price#21 >= 150.00))) AND (((ws_sales_price#21 <= 150.00) OR (ws_sales_price#21 <= 100.00)) OR (ws_sales_price#21 >= 150.00))) AND (((ws_sales_price#21 >= 100.00) OR (ws_sales_price#21 >= 50.00)) OR (ws_sales_price#21 <= 200.00))) AND (((ws_sales_price#21 >= 100.00) OR (ws_sales_price#21 <= 100.00)) OR (ws_sales_price#21 <= 200.00))) AND (((ws_sales_price#21 <= 150.00) OR (ws_sales_price#21 >= 50.00)) OR (ws_sales_price#21 <= 200.00))) AND (((ws_sales_price#21 <= 150.00) OR (ws_sales_price#21 <= 100.00)) OR (ws_sales_price#21 <= 200.00))) AND (((ws_net_profit#33 >= 100.00) OR (ws_net_profit#33 >= 150.00)) OR (ws_net_profit#33 >= 50.00))) AND (((ws_net_profit#33 >= 100.00) OR (ws_net_profit#33 <= 300.00)) OR (ws_net_profit#33 >= 50.00))) AND (((ws_net_profit#33 <= 200.00) OR (ws_net_profit#33 >= 150.00)) OR (ws_net_profit#33 >= 50.00))) AND (((ws_net_profit#33 <= 200.00) OR (ws_net_profit#33 <= 300.00)) OR (ws_net_profit#33 >= 50.00))) AND (((ws_net_profit#33 >= 100.00) OR (ws_net_profit#33 >= 150.00)) OR (ws_net_profit#33 <= 250.00))) AND (((ws_net_profit#33 >= 100.00) OR (ws_net_profit#33 <= 300.00)) OR (ws_net_profit#33 <= 250.00))) AND (((ws_net_profit#33 <= 200.00) OR (ws_net_profit#33 >= 150.00)) OR (ws_net_profit#33 <= 250.00))) AND (((ws_net_profit#33 <= 200.00) OR (ws_net_profit#33 <= 300.00)) OR (ws_net_profit#33 <= 250.00)))
                  :     :     :     :     :     :     :     +- *(8) ColumnarToRow
                  :     :     :     :     :     :     :        +- FileScan parquet default.web_sales[ws_sold_date_sk#0,ws_item_sk#3,ws_web_page_sk#12,ws_order_number#17,ws_quantity#18,ws_sales_price#21,ws_net_profit#33] Batched: true, DataFilters: [isnotnull(ws_item_sk#3), isnotnull(ws_order_number#17), isnotnull(ws_web_page_sk#12), isnotnull(..., Format: Parquet, Location: InMemoryFileIndex[file:/Users/yumwang/spark/SPARK-28216/sql/core/spark-warehouse/org.apache.spark..., PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_order_number), IsNotNull(ws_web_page_sk), IsNotNull(ws_sold_..., ReadSchema: struct<ws_sold_date_sk:int,ws_item_sk:int,ws_web_page_sk:int,ws_order_number:int,ws_quantity:int,...
                  :     :     :     :     :     :     +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, bigint, true], input[5, bigint, true])), [id=#291]
                  :     :     :     :     :     :        +- *(1) Project [wr_item_sk#36L, wr_refunded_cdemo_sk#38L, wr_refunded_addr_sk#40L, wr_returning_cdemo_sk#42L, wr_reason_sk#46L, wr_order_number#47L, wr_fee#52, wr_refunded_cash#54]
                  :     :     :     :     :     :           +- *(1) Filter (((((isnotnull(wr_item_sk#36L) AND isnotnull(wr_order_number#47L)) AND isnotnull(wr_refunded_cdemo_sk#38L)) AND isnotnull(wr_returning_cdemo_sk#42L)) AND isnotnull(wr_refunded_addr_sk#40L)) AND isnotnull(wr_reason_sk#46L))
                  :     :     :     :     :     :              +- *(1) ColumnarToRow
                  :     :     :     :     :     :                 +- FileScan parquet default.web_returns[wr_item_sk#36L,wr_refunded_cdemo_sk#38L,wr_refunded_addr_sk#40L,wr_returning_cdemo_sk#42L,wr_reason_sk#46L,wr_order_number#47L,wr_fee#52,wr_refunded_cash#54] Batched: true, DataFilters: [isnotnull(wr_item_sk#36L), isnotnull(wr_order_number#47L), isnotnull(wr_refunded_cdemo_sk#38L), ..., Format: Parquet, Location: InMemoryFileIndex[file:/Users/yumwang/spark/SPARK-28216/sql/core/spark-warehouse/org.apache.spark..., PartitionFilters: [], PushedFilters: [IsNotNull(wr_item_sk), IsNotNull(wr_order_number), IsNotNull(wr_refunded_cdemo_sk), IsNotNull(wr..., ReadSchema: struct<wr_item_sk:bigint,wr_refunded_cdemo_sk:bigint,wr_refunded_addr_sk:bigint,wr_returning_cdem...
                  :     :     :     :     :     +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))), [id=#300]
                  :     :     :     :     :        +- *(2) Project [wp_web_page_sk#58]
                  :     :     :     :     :           +- *(2) Filter isnotnull(wp_web_page_sk#58)
                  :     :     :     :     :              +- *(2) ColumnarToRow
                  :     :     :     :     :                 +- FileScan parquet default.web_page[wp_web_page_sk#58] Batched: true, DataFilters: [isnotnull(wp_web_page_sk#58)], Format: Parquet, Location: InMemoryFileIndex[file:/Users/yumwang/spark/SPARK-28216/sql/core/spark-warehouse/org.apache.spark..., PartitionFilters: [], PushedFilters: [IsNotNull(wp_web_page_sk)], ReadSchema: struct<wp_web_page_sk:int>
                  :     :     :     :     +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))), [id=#309]
                  :     :     :     :        +- *(3) Project [cd_demo_sk#72, cd_marital_status#74, cd_education_status#75]
                  :     :     :     :           +- *(3) Filter ((((((((((isnotnull(cd_demo_sk#72) AND isnotnull(cd_education_status#75)) AND isnotnull(cd_marital_status#74)) AND (((cd_marital_status#74 = M) OR (cd_marital_status#74 = S)) OR (cd_marital_status#74 = W))) AND (((cd_marital_status#74 = M) OR (cd_marital_status#74 = S)) OR (cd_education_status#75 = 2 yr Degree))) AND (((cd_marital_status#74 = M) OR (cd_education_status#75 = College)) OR (cd_marital_status#74 = W))) AND (((cd_marital_status#74 = M) OR (cd_education_status#75 = College)) OR (cd_education_status#75 = 2 yr Degree))) AND (((cd_education_status#75 = Advanced Degree) OR (cd_marital_status#74 = S)) OR (cd_marital_status#74 = W))) AND (((cd_education_status#75 = Advanced Degree) OR (cd_marital_status#74 = S)) OR (cd_education_status#75 = 2 yr Degree))) AND (((cd_education_status#75 = Advanced Degree) OR (cd_education_status#75 = College)) OR (cd_marital_status#74 = W))) AND (((cd_education_status#75 = Advanced Degree) OR (cd_education_status#75 = College)) OR (cd_education_status#75 = 2 yr Degree)))
                  :     :     :     :              +- *(3) ColumnarToRow
                  :     :     :     :                 +- FileScan parquet default.customer_demographics[cd_demo_sk#72,cd_marital_status#74,cd_education_status#75] Batched: true, DataFilters: [isnotnull(cd_demo_sk#72), isnotnull(cd_education_status#75), isnotnull(cd_marital_status#74), ((..., Format: Parquet, Location: InMemoryFileIndex[file:/Users/yumwang/spark/SPARK-28216/sql/core/spark-warehouse/org.apache.spark..., PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_education_status), IsNotNull(cd_marital_status), Or(Or(Equal..., ReadSchema: struct<cd_demo_sk:int,cd_marital_status:string,cd_education_status:string>
                  :     :     :     +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint), input[1, string, true], input[2, string, true])), [id=#318]
                  :     :     :        +- *(4) Project [cd_demo_sk#125, cd_marital_status#127, cd_education_status#128]
                  :     :     :           +- *(4) Filter ((isnotnull(cd_demo_sk#125) AND isnotnull(cd_education_status#128)) AND isnotnull(cd_marital_status#127))
                  :     :     :              +- *(4) ColumnarToRow
                  :     :     :                 +- FileScan parquet default.customer_demographics[cd_demo_sk#125,cd_marital_status#127,cd_education_status#128] Batched: true, DataFilters: [isnotnull(cd_demo_sk#125), isnotnull(cd_education_status#128), isnotnull(cd_marital_status#127)], Format: Parquet, Location: InMemoryFileIndex[file:/Users/yumwang/spark/SPARK-28216/sql/core/spark-warehouse/org.apache.spark..., PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_education_status), IsNotNull(cd_marital_status)], ReadSchema: struct<cd_demo_sk:int,cd_marital_status:string,cd_education_status:string>
                  :     :     +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))), [id=#327]
                  :     :        +- *(5) Project [ca_address_sk#81, ca_state#89]
                  :     :           +- *(5) Filter (((isnotnull(ca_country#91) AND (ca_country#91 = United States)) AND isnotnull(ca_address_sk#81)) AND ((ca_state#89 IN (IN,OH,NJ) OR ca_state#89 IN (WI,CT,KY)) OR ca_state#89 IN (LA,IA,AR)))
                  :     :              +- *(5) ColumnarToRow
                  :     :                 +- FileScan parquet default.customer_address[ca_address_sk#81,ca_state#89,ca_country#91] Batched: true, DataFilters: [isnotnull(ca_country#91), (ca_country#91 = United States), isnotnull(ca_address_sk#81), ((ca_sta..., Format: Parquet, Location: InMemoryFileIndex[file:/Users/yumwang/spark/SPARK-28216/sql/core/spark-warehouse/org.apache.spark..., PartitionFilters: [], PushedFilters: [IsNotNull(ca_country), EqualTo(ca_country,United States), IsNotNull(ca_address_sk), Or(Or(In(ca_..., ReadSchema: struct<ca_address_sk:int,ca_state:string,ca_country:string>
                  :     +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))), [id=#336]
                  :        +- *(6) Project [d_date_sk#94]
                  :           +- *(6) Filter ((isnotnull(d_year#100) AND (d_year#100 = 2000)) AND isnotnull(d_date_sk#94))
                  :              +- *(6) ColumnarToRow
                  :                 +- FileScan parquet default.date_dim[d_date_sk#94,d_year#100] Batched: true, DataFilters: [isnotnull(d_year#100), (d_year#100 = 2000), isnotnull(d_date_sk#94)], Format: Parquet, Location: InMemoryFileIndex[file:/Users/yumwang/spark/SPARK-28216/sql/core/spark-warehouse/org.apache.spark..., PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct<d_date_sk:int,d_year:int>
                  +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))), [id=#345]
                     +- *(7) Project [r_reason_sk#122, r_reason_desc#124]
                        +- *(7) Filter isnotnull(r_reason_sk#122)
                           +- *(7) ColumnarToRow
                              +- FileScan parquet default.reason[r_reason_sk#122,r_reason_desc#124] Batched: true, DataFilters: [isnotnull(r_reason_sk#122)], Format: Parquet, Location: InMemoryFileIndex[file:/Users/yumwang/spark/SPARK-28216/sql/core/spark-warehouse/org.apache.spark..., PartitionFilters: [], PushedFilters: [IsNotNull(r_reason_sk)], ReadSchema: struct<r_reason_sk:int,r_reason_desc:string>
   
   ```
   
   Group by qualifier:
   ```
   == Physical Plan ==
   TakeOrderedAndProject(limit=100, orderBy=[substr(r_reason_desc, 1, 20)#137 ASC NULLS FIRST,aggOrder#142 ASC NULLS FIRST,avg(wr_refunded_cash)#139 ASC NULLS FIRST,avg(wr_fee)#140 ASC NULLS FIRST], output=[substr(r_reason_desc, 1, 20)#137,avg(ws_quantity)#138,avg(wr_refunded_cash)#139,avg(wr_fee)#140])
   +- *(9) HashAggregate(keys=[r_reason_desc#124], functions=[avg(cast(ws_quantity#18 as bigint)), avg(UnscaledValue(wr_refunded_cash#54)), avg(UnscaledValue(wr_fee#52))])
      +- Exchange hashpartitioning(r_reason_desc#124, 5), true, [id=#351]
         +- *(8) HashAggregate(keys=[r_reason_desc#124], functions=[partial_avg(cast(ws_quantity#18 as bigint)), partial_avg(UnscaledValue(wr_refunded_cash#54)), partial_avg(UnscaledValue(wr_fee#52))])
            +- *(8) Project [ws_quantity#18, wr_fee#52, wr_refunded_cash#54, r_reason_desc#124]
               +- *(8) BroadcastHashJoin [wr_reason_sk#46L], [cast(r_reason_sk#122 as bigint)], Inner, BuildRight
                  :- *(8) Project [ws_quantity#18, wr_reason_sk#46L, wr_fee#52, wr_refunded_cash#54]
                  :  +- *(8) BroadcastHashJoin [ws_sold_date_sk#0], [d_date_sk#94], Inner, BuildRight
                  :     :- *(8) Project [ws_sold_date_sk#0, ws_quantity#18, wr_reason_sk#46L, wr_fee#52, wr_refunded_cash#54]
                  :     :  +- *(8) BroadcastHashJoin [wr_refunded_addr_sk#40L], [cast(ca_address_sk#81 as bigint)], Inner, BuildRight, ((((ca_state#89 IN (IN,OH,NJ) AND (ws_net_profit#33 >= 100.00)) AND (ws_net_profit#33 <= 200.00)) OR ((ca_state#89 IN (WI,CT,KY) AND (ws_net_profit#33 >= 150.00)) AND (ws_net_profit#33 <= 300.00))) OR ((ca_state#89 IN (LA,IA,AR) AND (ws_net_profit#33 >= 50.00)) AND (ws_net_profit#33 <= 250.00)))
                  :     :     :- *(8) Project [ws_sold_date_sk#0, ws_quantity#18, ws_net_profit#33, wr_refunded_addr_sk#40L, wr_reason_sk#46L, wr_fee#52, wr_refunded_cash#54]
                  :     :     :  +- *(8) BroadcastHashJoin [wr_returning_cdemo_sk#42L, cd_marital_status#74, cd_education_status#75], [cast(cd_demo_sk#125 as bigint), cd_marital_status#127, cd_education_status#128], Inner, BuildRight
                  :     :     :     :- *(8) Project [ws_sold_date_sk#0, ws_quantity#18, ws_net_profit#33, wr_refunded_addr_sk#40L, wr_returning_cdemo_sk#42L, wr_reason_sk#46L, wr_fee#52, wr_refunded_cash#54, cd_marital_status#74, cd_education_status#75]
                  :     :     :     :  +- *(8) BroadcastHashJoin [wr_refunded_cdemo_sk#38L], [cast(cd_demo_sk#72 as bigint)], Inner, BuildRight, ((((((cd_marital_status#74 = M) AND (cd_education_status#75 = Advanced Degree)) AND (ws_sales_price#21 >= 100.00)) AND (ws_sales_price#21 <= 150.00)) OR ((((cd_marital_status#74 = S) AND (cd_education_status#75 = College)) AND (ws_sales_price#21 >= 50.00)) AND (ws_sales_price#21 <= 100.00))) OR ((((cd_marital_status#74 = W) AND (cd_education_status#75 = 2 yr Degree)) AND (ws_sales_price#21 >= 150.00)) AND (ws_sales_price#21 <= 200.00)))
                  :     :     :     :     :- *(8) Project [ws_sold_date_sk#0, ws_quantity#18, ws_sales_price#21, ws_net_profit#33, wr_refunded_cdemo_sk#38L, wr_refunded_addr_sk#40L, wr_returning_cdemo_sk#42L, wr_reason_sk#46L, wr_fee#52, wr_refunded_cash#54]
                  :     :     :     :     :  +- *(8) BroadcastHashJoin [ws_web_page_sk#12], [wp_web_page_sk#58], Inner, BuildRight
                  :     :     :     :     :     :- *(8) Project [ws_sold_date_sk#0, ws_web_page_sk#12, ws_quantity#18, ws_sales_price#21, ws_net_profit#33, wr_refunded_cdemo_sk#38L, wr_refunded_addr_sk#40L, wr_returning_cdemo_sk#42L, wr_reason_sk#46L, wr_fee#52, wr_refunded_cash#54]
                  :     :     :     :     :     :  +- *(8) BroadcastHashJoin [cast(ws_item_sk#3 as bigint), cast(ws_order_number#17 as bigint)], [wr_item_sk#36L, wr_order_number#47L], Inner, BuildRight
                  :     :     :     :     :     :     :- *(8) Project [ws_sold_date_sk#0, ws_item_sk#3, ws_web_page_sk#12, ws_order_number#17, ws_quantity#18, ws_sales_price#21, ws_net_profit#33]
                  :     :     :     :     :     :     :  +- *(8) Filter (((((isnotnull(ws_item_sk#3) AND isnotnull(ws_order_number#17)) AND isnotnull(ws_web_page_sk#12)) AND isnotnull(ws_sold_date_sk#0)) AND ((((ws_sales_price#21 >= 100.00) AND (ws_sales_price#21 <= 150.00)) OR ((ws_sales_price#21 >= 50.00) AND (ws_sales_price#21 <= 100.00))) OR ((ws_sales_price#21 >= 150.00) AND (ws_sales_price#21 <= 200.00)))) AND ((((ws_net_profit#33 >= 100.00) AND (ws_net_profit#33 <= 200.00)) OR ((ws_net_profit#33 >= 150.00) AND (ws_net_profit#33 <= 300.00))) OR ((ws_net_profit#33 >= 50.00) AND (ws_net_profit#33 <= 250.00))))
                  :     :     :     :     :     :     :     +- *(8) ColumnarToRow
                  :     :     :     :     :     :     :        +- FileScan parquet default.web_sales[ws_sold_date_sk#0,ws_item_sk#3,ws_web_page_sk#12,ws_order_number#17,ws_quantity#18,ws_sales_price#21,ws_net_profit#33] Batched: true, DataFilters: [isnotnull(ws_item_sk#3), isnotnull(ws_order_number#17), isnotnull(ws_web_page_sk#12), isnotnull(..., Format: Parquet, Location: InMemoryFileIndex[file:/Users/yumwang/spark/SPARK-28216/sql/core/spark-warehouse/org.apache.spark..., PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_order_number), IsNotNull(ws_web_page_sk), IsNotNull(ws_sold_..., ReadSchema: struct<ws_sold_date_sk:int,ws_item_sk:int,ws_web_page_sk:int,ws_order_number:int,ws_quantity:int,...
                  :     :     :     :     :     :     +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, bigint, true], input[5, bigint, true])), [id=#291]
                  :     :     :     :     :     :        +- *(1) Project [wr_item_sk#36L, wr_refunded_cdemo_sk#38L, wr_refunded_addr_sk#40L, wr_returning_cdemo_sk#42L, wr_reason_sk#46L, wr_order_number#47L, wr_fee#52, wr_refunded_cash#54]
                  :     :     :     :     :     :           +- *(1) Filter (((((isnotnull(wr_item_sk#36L) AND isnotnull(wr_order_number#47L)) AND isnotnull(wr_refunded_cdemo_sk#38L)) AND isnotnull(wr_returning_cdemo_sk#42L)) AND isnotnull(wr_refunded_addr_sk#40L)) AND isnotnull(wr_reason_sk#46L))
                  :     :     :     :     :     :              +- *(1) ColumnarToRow
                  :     :     :     :     :     :                 +- FileScan parquet default.web_returns[wr_item_sk#36L,wr_refunded_cdemo_sk#38L,wr_refunded_addr_sk#40L,wr_returning_cdemo_sk#42L,wr_reason_sk#46L,wr_order_number#47L,wr_fee#52,wr_refunded_cash#54] Batched: true, DataFilters: [isnotnull(wr_item_sk#36L), isnotnull(wr_order_number#47L), isnotnull(wr_refunded_cdemo_sk#38L), ..., Format: Parquet, Location: InMemoryFileIndex[file:/Users/yumwang/spark/SPARK-28216/sql/core/spark-warehouse/org.apache.spark..., PartitionFilters: [], PushedFilters: [IsNotNull(wr_item_sk), IsNotNull(wr_order_number), IsNotNull(wr_refunded_cdemo_sk), IsNotNull(wr..., ReadSchema: struct<wr_item_sk:bigint,wr_refunded_cdemo_sk:bigint,wr_refunded_addr_sk:bigint,wr_returning_cdem...
                  :     :     :     :     :     +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))), [id=#300]
                  :     :     :     :     :        +- *(2) Project [wp_web_page_sk#58]
                  :     :     :     :     :           +- *(2) Filter isnotnull(wp_web_page_sk#58)
                  :     :     :     :     :              +- *(2) ColumnarToRow
                  :     :     :     :     :                 +- FileScan parquet default.web_page[wp_web_page_sk#58] Batched: true, DataFilters: [isnotnull(wp_web_page_sk#58)], Format: Parquet, Location: InMemoryFileIndex[file:/Users/yumwang/spark/SPARK-28216/sql/core/spark-warehouse/org.apache.spark..., PartitionFilters: [], PushedFilters: [IsNotNull(wp_web_page_sk)], ReadSchema: struct<wp_web_page_sk:int>
                  :     :     :     :     +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))), [id=#309]
                  :     :     :     :        +- *(3) Project [cd_demo_sk#72, cd_marital_status#74, cd_education_status#75]
                  :     :     :     :           +- *(3) Filter (((isnotnull(cd_demo_sk#72) AND isnotnull(cd_education_status#75)) AND isnotnull(cd_marital_status#74)) AND ((((cd_marital_status#74 = M) AND (cd_education_status#75 = Advanced Degree)) OR ((cd_marital_status#74 = S) AND (cd_education_status#75 = College))) OR ((cd_marital_status#74 = W) AND (cd_education_status#75 = 2 yr Degree))))
                  :     :     :     :              +- *(3) ColumnarToRow
                  :     :     :     :                 +- FileScan parquet default.customer_demographics[cd_demo_sk#72,cd_marital_status#74,cd_education_status#75] Batched: true, DataFilters: [isnotnull(cd_demo_sk#72), isnotnull(cd_education_status#75), isnotnull(cd_marital_status#74), ((..., Format: Parquet, Location: InMemoryFileIndex[file:/Users/yumwang/spark/SPARK-28216/sql/core/spark-warehouse/org.apache.spark..., PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_education_status), IsNotNull(cd_marital_status), Or(Or(And(E..., ReadSchema: struct<cd_demo_sk:int,cd_marital_status:string,cd_education_status:string>
                  :     :     :     +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint), input[1, string, true], input[2, string, true])), [id=#318]
                  :     :     :        +- *(4) Project [cd_demo_sk#125, cd_marital_status#127, cd_education_status#128]
                  :     :     :           +- *(4) Filter ((isnotnull(cd_demo_sk#125) AND isnotnull(cd_education_status#128)) AND isnotnull(cd_marital_status#127))
                  :     :     :              +- *(4) ColumnarToRow
                  :     :     :                 +- FileScan parquet default.customer_demographics[cd_demo_sk#125,cd_marital_status#127,cd_education_status#128] Batched: true, DataFilters: [isnotnull(cd_demo_sk#125), isnotnull(cd_education_status#128), isnotnull(cd_marital_status#127)], Format: Parquet, Location: InMemoryFileIndex[file:/Users/yumwang/spark/SPARK-28216/sql/core/spark-warehouse/org.apache.spark..., PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_education_status), IsNotNull(cd_marital_status)], ReadSchema: struct<cd_demo_sk:int,cd_marital_status:string,cd_education_status:string>
                  :     :     +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))), [id=#327]
                  :     :        +- *(5) Project [ca_address_sk#81, ca_state#89]
                  :     :           +- *(5) Filter (((isnotnull(ca_country#91) AND (ca_country#91 = United States)) AND isnotnull(ca_address_sk#81)) AND ((ca_state#89 IN (IN,OH,NJ) OR ca_state#89 IN (WI,CT,KY)) OR ca_state#89 IN (LA,IA,AR)))
                  :     :              +- *(5) ColumnarToRow
                  :     :                 +- FileScan parquet default.customer_address[ca_address_sk#81,ca_state#89,ca_country#91] Batched: true, DataFilters: [isnotnull(ca_country#91), (ca_country#91 = United States), isnotnull(ca_address_sk#81), ((ca_sta..., Format: Parquet, Location: InMemoryFileIndex[file:/Users/yumwang/spark/SPARK-28216/sql/core/spark-warehouse/org.apache.spark..., PartitionFilters: [], PushedFilters: [IsNotNull(ca_country), EqualTo(ca_country,United States), IsNotNull(ca_address_sk), Or(Or(In(ca_..., ReadSchema: struct<ca_address_sk:int,ca_state:string,ca_country:string>
                  :     +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))), [id=#336]
                  :        +- *(6) Project [d_date_sk#94]
                  :           +- *(6) Filter ((isnotnull(d_year#100) AND (d_year#100 = 2000)) AND isnotnull(d_date_sk#94))
                  :              +- *(6) ColumnarToRow
                  :                 +- FileScan parquet default.date_dim[d_date_sk#94,d_year#100] Batched: true, DataFilters: [isnotnull(d_year#100), (d_year#100 = 2000), isnotnull(d_date_sk#94)], Format: Parquet, Location: InMemoryFileIndex[file:/Users/yumwang/spark/SPARK-28216/sql/core/spark-warehouse/org.apache.spark..., PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct<d_date_sk:int,d_year:int>
                  +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))), [id=#345]
                     +- *(7) Project [r_reason_sk#122, r_reason_desc#124]
                        +- *(7) Filter isnotnull(r_reason_sk#122)
                           +- *(7) ColumnarToRow
                              +- FileScan parquet default.reason[r_reason_sk#122,r_reason_desc#124] Batched: true, DataFilters: [isnotnull(r_reason_sk#122)], Format: Parquet, Location: InMemoryFileIndex[file:/Users/yumwang/spark/SPARK-28216/sql/core/spark-warehouse/org.apache.spark..., PartitionFilters: [], PushedFilters: [IsNotNull(r_reason_sk)], ReadSchema: struct<r_reason_sk:int,r_reason_desc:string>
   
   
   ```




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org