You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "JESSE CHEN (JIRA)" <ji...@apache.org> on 2016/04/14 22:35:25 UTC

[jira] [Closed] (SPARK-14616) TreeNodeException running Q44 and 58 on Parquet tables

     [ https://issues.apache.org/jira/browse/SPARK-14616?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

JESSE CHEN closed SPARK-14616.
------------------------------
    Resolution: Not A Problem

> TreeNodeException running Q44 and 58 on Parquet tables
> ------------------------------------------------------
>
>                 Key: SPARK-14616
>                 URL: https://issues.apache.org/jira/browse/SPARK-14616
>             Project: Spark
>          Issue Type: Bug
>          Components: SQL
>    Affects Versions: 2.0.0
>            Reporter: JESSE CHEN
>
> {code:title=tpcds q44}
>  select  asceding.rnk, i1.i_product_name best_performing, i2.i_product_name worst_performing
> from(select *
>      from (select item_sk,rank() over (order by rank_col asc) rnk
>            from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col
>                  from store_sales ss1
>                  where ss_store_sk = 4
>                  group by ss_item_sk
>                  having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col
>                                                   from store_sales
>                                                   where ss_store_sk = 4
>                                                     and ss_addr_sk is null
>                                                   group by ss_store_sk))V1)V11
>      where rnk  < 11) asceding,
>     (select *
>      from (select item_sk,rank() over (order by rank_col desc) rnk
>            from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col
>                  from store_sales ss1
>                  where ss_store_sk = 4
>                  group by ss_item_sk
>                  having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col
>                                                   from store_sales
>                                                   where ss_store_sk = 4
>                                                     and ss_addr_sk is null
>                                                   group by ss_store_sk))V2)V21
>      where rnk  < 11) descending,
> item i1,
> item i2
> where asceding.rnk = descending.rnk
>   and i1.i_item_sk=asceding.item_sk
>   and i2.i_item_sk=descending.item_sk
> order by asceding.rnk
>  limit 100;
> {code}
> {noformat}
> bin/spark-sql  --driver-memory 10g --verbose --master yarn-client  --packages com.databricks:spark-csv_2.10:1.3.0 --executor-memory 4g --num-executors 80 --executor-cores 2 --database hadoopds1g  -f q44.sql
> {noformat}
> {noformat}
> org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree:
> Exchange SinglePartition, None
> +- WholeStageCodegen
>    :  +- Project [item_sk#0,rank_col#1]
>    :     +- Filter havingCondition#219: boolean
>    :        +- TungstenAggregate(key=[ss_item_sk#12], functions=[(avg(ss_net_profit#32),mode=Final,isDistinct=false)], output=[havingCondition#219,item_sk#0,rank_col#1])
>    :           +- INPUT
>    +- Exchange hashpartitioning(ss_item_sk#12,200), None
>       +- WholeStageCodegen
>          :  +- TungstenAggregate(key=[ss_item_sk#12], functions=[(avg(ss_net_profit#32),mode=Partial,isDistinct=false)], output=[ss_item_sk#12,sum#612,count#613L])
>          :     +- Project [ss_item_sk#12,ss_net_profit#32]
>          :        +- Filter (ss_store_sk#17 = 4)
>          :           +- INPUT
>          +- Scan ParquetRelation: hadoopds1g.store_sales[ss_item_sk#12,ss_net_profit#32,ss_store_sk#17] InputPaths: hdfs://bigaperf116.svl.ibm.com:8020/apps/hive/warehouse/hadoopds1g.db/store_sales, PushedFilters: [EqualTo(ss_store_sk,4)]
>         at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:47)
>         at org.apache.spark.sql.execution.Exchange.doExecute(Exchange.scala:105)
>         at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:118)
>         at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:116)
>         at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
>         at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116)
>         at org.apache.spark.sql.execution.Sort.doExecute(Sort.scala:60)
>         at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:118)
>         at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:116)
>         at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
>         at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116)
>         at org.apache.spark.sql.execution.Window.doExecute(Window.scala:288)
>         at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:118)
>         at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:116)
>         at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
>         at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116)
>         at org.apache.spark.sql.execution.InputAdapter.upstream(WholeStageCodegen.scala:176)
>         at org.apache.spark.sql.execution.Filter.upstream(basicOperators.scala:73)
>         at org.apache.spark.sql.execution.Project.upstream(basicOperators.scala:35)
>         at org.apache.spark.sql.execution.WholeStageCodegen.doExecute(WholeStageCodegen.scala:279)
>         at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:118)
>         at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:116)
>         at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
>         at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116)
>         at org.apache.spark.sql.execution.Sort.doExecute(Sort.scala:60)
>         at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:118)
>         at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:116)
>         at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
>         at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116)
>         at org.apache.spark.sql.execution.joins.SortMergeJoin.doExecute(SortMergeJoin.scala:63)
>         at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:118)
> {noformat}
> Note this happens only when using parquet tables. Flat file (csv) works fine and returns correct query results.
> Additionally, query 58 also encounters the same exception. The query is as follows:
> {noformat}
>  with ss_items as
>  (select i_item_id item_id
>         ,sum(ss_ext_sales_price) ss_item_rev 
>  from store_sales
>      ,item
>      ,date_dim
>  where ss_item_sk = i_item_sk
>    and d_date in (select d_date
>                   from date_dim
>                   where d_week_seq = (select d_week_seq 
>                                       from date_dim
>                                       where d_date = '2000-01-03'))
>    and ss_sold_date_sk   = d_date_sk
>  group by i_item_id),
>  cs_items as
>  (select i_item_id item_id
>         ,sum(cs_ext_sales_price) cs_item_rev
>   from catalog_sales
>       ,item
>       ,date_dim
>  where cs_item_sk = i_item_sk
>   and  d_date in (select d_date
>                   from date_dim
>                   where d_week_seq = (select d_week_seq 
>                                       from date_dim
>                                       where d_date = '2000-01-03'))
>   and  cs_sold_date_sk = d_date_sk
>  group by i_item_id),
>  ws_items as
>  (select i_item_id item_id
>         ,sum(ws_ext_sales_price) ws_item_rev
>   from web_sales
>       ,item
>       ,date_dim
>  where ws_item_sk = i_item_sk
>   and  d_date in (select d_date
>                   from date_dim
>                   where d_week_seq =(select d_week_seq 
>                                      from date_dim
>                                      where d_date = '2000-01-03'))
>   and ws_sold_date_sk   = d_date_sk
>  group by i_item_id)
>   select  ss_items.item_id
>        ,ss_item_rev
>        ,ss_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 ss_dev
>        ,cs_item_rev
>        ,cs_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 cs_dev
>        ,ws_item_rev
>        ,ws_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 ws_dev
>        ,(ss_item_rev+cs_item_rev+ws_item_rev)/3 average
>  from ss_items,cs_items,ws_items
>  where ss_items.item_id=cs_items.item_id
>    and ss_items.item_id=ws_items.item_id 
>    and ss_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev
>    and ss_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev
>    and cs_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev
>    and cs_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev
>    and ws_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev
>    and ws_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev
>  order by item_id
>          ,ss_item_rev
>   limit 100;
> {noformat}
> Not certain if related. But seems a case a bad plan was created.
> Here is are logical, optimized and physical plans created for query 44:
> {noformat}
> Logical Plan =='Limit 100
> +- 'Sort ['asceding.rnk ASC], true
>    +- 'Project [unresolvedalias('asceding.rnk,None),unresolvedalias('i1.i_product_name AS best_performing#433,None),unresolvedalias('i2.i_product_name AS worst_performing#434,None)]
>       +- 'Filter ((('asceding.rnk = 'descending.rnk) && ('i1.i_item_sk = 'asceding.item_sk)) && ('i2.i_item_sk = 'descending.item_sk))
>          +- 'Join Inner, None
>             :- 'Join Inner, None
>             :  :- 'Join Inner, None
>             :  :  :- 'Subquery asceding
>             :  :  :  +- 'Project [unresolvedalias(*,None)]
>             :  :  :     +- 'Filter ('rnk < 11)
>             :  :  :        +- 'Subquery V11
>             :  :  :           +- 'Project [unresolvedalias('item_sk,None),unresolvedalias('rank() windowspecdefinition('rank_col ASC,UnspecifiedFrame) AS rnk#428,None)]
>             :  :  :              +- 'Subquery V1
>             :  :  :                 +- 'Filter cast(('avg('ss_net_profit) > (0.9 * scalarsubquery('Aggregate ['ss_store_sk], [unresolvedalias('avg('ss_net_profit) AS rank_col#427,None)]
> +- 'Filter (('ss_store_sk = 4) && isnull('ss_addr_sk))
>    +- 'UnresolvedRelation `store_sales`, None
> ))) as boolean)
>             :  :  :                    +- 'Aggregate ['ss_item_sk], [unresolvedalias('ss_item_sk AS item_sk#425,None),unresolvedalias('avg('ss_net_profit) AS rank_col#426,None)]
>             :  :  :                       +- 'Filter ('ss_store_sk = 4)
>             :  :  :                          +- 'UnresolvedRelation `store_sales`, Some(ss1)
>             :  :  +- 'Subquery descending
>             :  :     +- 'Project [unresolvedalias(*,None)]
>             :  :        +- 'Filter ('rnk < 11)
>             :  :           +- 'Subquery V21
>             :  :              +- 'Project [unresolvedalias('item_sk,None),unresolvedalias('rank() windowspecdefinition('rank_col DESC,UnspecifiedFrame) AS rnk#432,None)]
>             :  :                 +- 'Subquery V2
>             :  :                    +- 'Filter cast(('avg('ss_net_profit) > (0.9 * scalarsubquery('Aggregate ['ss_store_sk], [unresolvedalias('avg('ss_net_profit) AS rank_col#431,None)]
> +- 'Filter (('ss_store_sk = 4) && isnull('ss_addr_sk))
>    +- 'UnresolvedRelation `store_sales`, None
> ))) as boolean)
>             :  :                       +- 'Aggregate ['ss_item_sk], [unresolvedalias('ss_item_sk AS item_sk#429,None),unresolvedalias('avg('ss_net_profit) AS rank_col#430,None)]
>             :  :                          +- 'Filter ('ss_store_sk = 4)
>             :  :                             +- 'UnresolvedRelation `store_sales`, Some(ss1)
>             :  +- 'UnresolvedRelation `item`, Some(i1)
>             +- 'UnresolvedRelation `item`, Some(i2)
> ==
> Analyzed Plan ==Limit 100
> +- Sort [rnk#428 ASC], true
>    +- Project [rnk#428,i_product_name#202 AS best_performing#433,i_product_name#478 AS worst_performing#434]
>       +- Filter (((rnk#428 = rnk#432) && (i_item_sk#181 = item_sk#425)) && (i_item_sk#457 = item_sk#429))
>          +- Join Inner, None
>             :- Join Inner, None
>             :  :- Join Inner, None
>             :  :  :- Subquery asceding
>             :  :  :  +- Project [item_sk#425,rnk#428]
>             :  :  :     +- Filter (rnk#428 < 11)
>             :  :  :        +- Subquery V11
>             :  :  :           +- Project [item_sk#425,rnk#428]
>             :  :  :              +- Project [item_sk#425,rank_col#426,rnk#428,rnk#428]
>             :  :  :                 +- Window [item_sk#425,rank_col#426], [rank(rank_col#426) windowspecdefinition(rank_col#426 ASC,ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS rnk#428], [rank_col#426 ASC]
>             :  :  :                    +- Project [item_sk#425,rank_col#426]
>             :  :  :                       +- Subquery V1
>             :  :  :                          +- Project [item_sk#425,rank_col#426]
>             :  :  :                             +- Filter havingCondition#439: boolean
>             :  :  :                                +- Aggregate [ss_item_sk#282], [cast(((avg(cast(ss_net_profit#302 as double)),mode=Complete,isDistinct=false) > (cast(0.9 as double) * scalarsubquery(Aggregate [ss_store_sk#287], [(avg(cast(ss_net_profit#302 as double)),mode=Complete,isDistinct=false) AS rank_col#427]
> +- Filter ((ss_store_sk#287 = 4) && isnull(ss_addr_sk#286))
>    +- Subquery store_sales
>       +- Relation[ss_sold_date_sk#280,ss_sold_time_sk#281,ss_item_sk#282,ss_customer_sk#283,ss_cdemo_sk#284,ss_hdemo_sk#285,ss_addr_sk#286,ss_store_sk#287,ss_promo_sk#288,ss_ticket_number#289L,ss_quantity#290L,ss_wholesale_cost#291,ss_list_price#292,ss_sales_price#293,ss_ext_discount_amt#294,ss_ext_sales_price#295,ss_ext_wholesale_cost#296,ss_ext_list_price#297,ss_ext_tax#298,ss_coupon_amt#299,ss_net_paid#300,ss_net_paid_inc_tax#301,ss_net_profit#302] ParquetRelation
> ))) as boolean) AS havingCondition#439,ss_item_sk#282 AS item_sk#425,(avg(cast(ss_net_profit#302 as double)),mode=Complete,isDistinct=false) AS rank_col#426]
>             :  :  :                                   +- Filter (ss_store_sk#287 = 4)
>             :  :  :                                      +- Subquery ss1
>             :  :  :                                         +- Subquery store_sales
>             :  :  :                                            +- Relation[ss_sold_date_sk#280,ss_sold_time_sk#281,ss_item_sk#282,ss_customer_sk#283,ss_cdemo_sk#284,ss_hdemo_sk#285,ss_addr_sk#286,ss_store_sk#287,ss_promo_sk#288,ss_ticket_number#289L,ss_quantity#290L,ss_wholesale_cost#291,ss_list_price#292,ss_sales_price#293,ss_ext_discount_amt#294,ss_ext_sales_price#295,ss_ext_wholesale_cost#296,ss_ext_list_price#297,ss_ext_tax#298,ss_coupon_amt#299,ss_net_paid#300,ss_net_paid_inc_tax#301,ss_net_profit#302] ParquetRelation
>             :  :  +- Subquery descending
>             :  :     +- Project [item_sk#429,rnk#432]
>             :  :        +- Filter (rnk#432 < 11)
>             :  :           +- Subquery V21
>             :  :              +- Project [item_sk#429,rnk#432]
>             :  :                 +- Project [item_sk#429,rank_col#430,rnk#432,rnk#432]
>             :  :                    +- Window [item_sk#429,rank_col#430], [rank(rank_col#430) windowspecdefinition(rank_col#430 DESC,ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS rnk#432], [rank_col#430 DESC]
>             :  :                       +- Project [item_sk#429,rank_col#430]
>             :  :                          +- Subquery V2
>             :  :                             +- Project [item_sk#429,rank_col#430]
>             :  :                                +- Filter havingCondition#440: boolean
>             :  :                                   +- Aggregate [ss_item_sk#282], [cast(((avg(cast(ss_net_profit#302 as double)),mode=Complete,isDistinct=false) > (cast(0.9 as double) * scalarsubquery(Aggregate [ss_store_sk#287], [(avg(cast(ss_net_profit#302 as double)),mode=Complete,isDistinct=false) AS rank_col#431]
> +- Filter ((ss_store_sk#287 = 4) && isnull(ss_addr_sk#286))
>    +- Subquery store_sales
>       +- Relation[ss_sold_date_sk#280,ss_sold_time_sk#281,ss_item_sk#282,ss_customer_sk#283,ss_cdemo_sk#284,ss_hdemo_sk#285,ss_addr_sk#286,ss_store_sk#287,ss_promo_sk#288,ss_ticket_number#289L,ss_quantity#290L,ss_wholesale_cost#291,ss_list_price#292,ss_sales_price#293,ss_ext_discount_amt#294,ss_ext_sales_price#295,ss_ext_wholesale_cost#296,ss_ext_list_price#297,ss_ext_tax#298,ss_coupon_amt#299,ss_net_paid#300,ss_net_paid_inc_tax#301,ss_net_profit#302] ParquetRelation
> ))) as boolean) AS havingCondition#440,ss_item_sk#282 AS item_sk#429,(avg(cast(ss_net_profit#302 as double)),mode=Complete,isDistinct=false) AS rank_col#430]
>             :  :                                      +- Filter (ss_store_sk#287 = 4)
>             :  :                                         +- Subquery ss1
>             :  :                                            +- Subquery store_sales
>             :  :                                               +- Relation[ss_sold_date_sk#280,ss_sold_time_sk#281,ss_item_sk#282,ss_customer_sk#283,ss_cdemo_sk#284,ss_hdemo_sk#285,ss_addr_sk#286,ss_store_sk#287,ss_promo_sk#288,ss_ticket_number#289L,ss_quantity#290L,ss_wholesale_cost#291,ss_list_price#292,ss_sales_price#293,ss_ext_discount_amt#294,ss_ext_sales_price#295,ss_ext_wholesale_cost#296,ss_ext_list_price#297,ss_ext_tax#298,ss_coupon_amt#299,ss_net_paid#300,ss_net_paid_inc_tax#301,ss_net_profit#302] ParquetRelation
>             :  +- Subquery i1
>             :     +- Subquery item
>             :        +- Relation[i_item_sk#181,i_item_id#182,i_rec_start_date#183,i_rec_end_date#184,i_item_desc#185,i_current_price#186,i_wholesale_cost#187,i_brand_id#188L,i_brand#189,i_class_id#190L,i_class#191,i_category_id#192L,i_category#193,i_manufact_id#194L,i_manufact#195,i_size#196,i_formulation#197,i_color#198,i_units#199,i_container#200,i_manager_id#201L,i_product_name#202] ParquetRelation
>             +- Subquery i2
>                +- Subquery item
>                   +- Relation[i_item_sk#457,i_item_id#458,i_rec_start_date#459,i_rec_end_date#460,i_item_desc#461,i_current_price#462,i_wholesale_cost#463,i_brand_id#464L,i_brand#465,i_class_id#466L,i_class#467,i_category_id#468L,i_category#469,i_manufact_id#470L,i_manufact#471,i_size#472,i_formulation#473,i_color#474,i_units#475,i_container#476,i_manager_id#477L,i_product_name#478] ParquetRelation
> ==
> Optimized Plan ==Limit 100
> +- Sort [rnk#428 ASC], true
>    +- Project [rnk#428,i_product_name#202 AS best_performing#433,i_product_name#478 AS worst_performing#434]
>       +- Join Inner, Some((i_item_sk#457 = item_sk#429))
>          :- Project [rnk#428,item_sk#429,i_product_name#202]
>          :  +- Join Inner, Some((i_item_sk#181 = item_sk#425))
>          :     :- Project [rnk#428,item_sk#429,item_sk#425]
>          :     :  +- Join Inner, Some((rnk#428 = rnk#432))
>          :     :     :- Project [item_sk#425,rnk#428]
>          :     :     :  +- Filter (rnk#428 < 11)
>          :     :     :     +- Window [item_sk#425,rank_col#426], [rank(rank_col#426) windowspecdefinition(rank_col#426 ASC,ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS rnk#428], [rank_col#426 ASC]
>          :     :     :        +- Project [item_sk#425,rank_col#426]
>          :     :     :           +- Filter havingCondition#439: boolean
>          :     :     :              +- Aggregate [ss_item_sk#282], [((avg(cast(ss_net_profit#302 as double)),mode=Complete,isDistinct=false) > (0.9 * scalarsubquery(Aggregate [ss_store_sk#287], [(avg(cast(ss_net_profit#302 as double)),mode=Complete,isDistinct=false) AS rank_col#427]
> +- Filter ((ss_store_sk#287 = 4) && isnull(ss_addr_sk#286))
>    +- Subquery store_sales
>       +- Relation[ss_sold_date_sk#280,ss_sold_time_sk#281,ss_item_sk#282,ss_customer_sk#283,ss_cdemo_sk#284,ss_hdemo_sk#285,ss_addr_sk#286,ss_store_sk#287,ss_promo_sk#288,ss_ticket_number#289L,ss_quantity#290L,ss_wholesale_cost#291,ss_list_price#292,ss_sales_price#293,ss_ext_discount_amt#294,ss_ext_sales_price#295,ss_ext_wholesale_cost#296,ss_ext_list_price#297,ss_ext_tax#298,ss_coupon_amt#299,ss_net_paid#300,ss_net_paid_inc_tax#301,ss_net_profit#302] ParquetRelation
> ))) AS havingCondition#439,ss_item_sk#282 AS item_sk#425,(avg(cast(ss_net_profit#302 as double)),mode=Complete,isDistinct=false) AS rank_col#426]
>          :     :     :                 +- Project [ss_item_sk#282,ss_net_profit#302]
>          :     :     :                    +- Filter (ss_store_sk#287 = 4)
>          :     :     :                       +- Relation[ss_sold_date_sk#280,ss_sold_time_sk#281,ss_item_sk#282,ss_customer_sk#283,ss_cdemo_sk#284,ss_hdemo_sk#285,ss_addr_sk#286,ss_store_sk#287,ss_promo_sk#288,ss_ticket_number#289L,ss_quantity#290L,ss_wholesale_cost#291,ss_list_price#292,ss_sales_price#293,ss_ext_discount_amt#294,ss_ext_sales_price#295,ss_ext_wholesale_cost#296,ss_ext_list_price#297,ss_ext_tax#298,ss_coupon_amt#299,ss_net_paid#300,ss_net_paid_inc_tax#301,ss_net_profit#302] ParquetRelation
>          :     :     +- Project [item_sk#429,rnk#432]
>          :     :        +- Filter (rnk#432 < 11)
>          :     :           +- Window [item_sk#429,rank_col#430], [rank(rank_col#430) windowspecdefinition(rank_col#430 DESC,ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS rnk#432], [rank_col#430 DESC]
>          :     :              +- Project [item_sk#429,rank_col#430]
>          :     :                 +- Filter havingCondition#440: boolean
>          :     :                    +- Aggregate [ss_item_sk#282], [((avg(cast(ss_net_profit#302 as double)),mode=Complete,isDistinct=false) > (0.9 * scalarsubquery(Aggregate [ss_store_sk#287], [(avg(cast(ss_net_profit#302 as double)),mode=Complete,isDistinct=false) AS rank_col#431]
> +- Filter ((ss_store_sk#287 = 4) && isnull(ss_addr_sk#286))
>    +- Subquery store_sales
>       +- Relation[ss_sold_date_sk#280,ss_sold_time_sk#281,ss_item_sk#282,ss_customer_sk#283,ss_cdemo_sk#284,ss_hdemo_sk#285,ss_addr_sk#286,ss_store_sk#287,ss_promo_sk#288,ss_ticket_number#289L,ss_quantity#290L,ss_wholesale_cost#291,ss_list_price#292,ss_sales_price#293,ss_ext_discount_amt#294,ss_ext_sales_price#295,ss_ext_wholesale_cost#296,ss_ext_list_price#297,ss_ext_tax#298,ss_coupon_amt#299,ss_net_paid#300,ss_net_paid_inc_tax#301,ss_net_profit#302] ParquetRelation
> ))) AS havingCondition#440,ss_item_sk#282 AS item_sk#429,(avg(cast(ss_net_profit#302 as double)),mode=Complete,isDistinct=false) AS rank_col#430]
>          :     :                       +- Project [ss_item_sk#282,ss_net_profit#302]
>          :     :                          +- Filter (ss_store_sk#287 = 4)
>          :     :                             +- Relation[ss_sold_date_sk#280,ss_sold_time_sk#281,ss_item_sk#282,ss_customer_sk#283,ss_cdemo_sk#284,ss_hdemo_sk#285,ss_addr_sk#286,ss_store_sk#287,ss_promo_sk#288,ss_ticket_number#289L,ss_quantity#290L,ss_wholesale_cost#291,ss_list_price#292,ss_sales_price#293,ss_ext_discount_amt#294,ss_ext_sales_price#295,ss_ext_wholesale_cost#296,ss_ext_list_price#297,ss_ext_tax#298,ss_coupon_amt#299,ss_net_paid#300,ss_net_paid_inc_tax#301,ss_net_profit#302] ParquetRelation
>          :     +- Project [i_item_sk#181,i_product_name#202]
>          :        +- Relation[i_item_sk#181,i_item_id#182,i_rec_start_date#183,i_rec_end_date#184,i_item_desc#185,i_current_price#186,i_wholesale_cost#187,i_brand_id#188L,i_brand#189,i_class_id#190L,i_class#191,i_category_id#192L,i_category#193,i_manufact_id#194L,i_manufact#195,i_size#196,i_formulation#197,i_color#198,i_units#199,i_container#200,i_manager_id#201L,i_product_name#202] ParquetRelation
>          +- Project [i_item_sk#457,i_product_name#478]
>             +- Relation[i_item_sk#457,i_item_id#458,i_rec_start_date#459,i_rec_end_date#460,i_item_desc#461,i_current_price#462,i_wholesale_cost#463,i_brand_id#464L,i_brand#465,i_class_id#466L,i_class#467,i_category_id#468L,i_category#469,i_manufact_id#470L,i_manufact#471,i_size#472,i_formulation#473,i_color#474,i_units#475,i_container#476,i_manager_id#477L,i_product_name#478] ParquetRelation
> ==
> Physical Plan ==TakeOrderedAndProject(limit=100, orderBy=[rnk#428 ASC], output=[rnk#428,best_performing#433,worst_performing#434])
> +- WholeStageCodegen
>    :  +- Project [rnk#428,i_product_name#202 AS best_performing#433,i_product_name#478 AS worst_performing#434]
>    :     +- BroadcastHashJoin [item_sk#429], [i_item_sk#457], BuildRight, None
>    :        :- Project [rnk#428,item_sk#429,i_product_name#202]
>    :        :  +- BroadcastHashJoin [item_sk#425], [i_item_sk#181], BuildRight, None
>    :        :     :- Project [rnk#428,item_sk#429,item_sk#425]
>    :        :     :  +- INPUT
>    :        :     +- INPUT
>    :        +- INPUT
>    :- SortMergeJoin [rnk#428], [rnk#432], None
>    :  :- Sort [rnk#428 ASC], false, 0
>    :  :  +- WholeStageCodegen
>    :  :     :  +- Project [item_sk#425,rnk#428]
>    :  :     :     +- Filter (rnk#428 < 11)
>    :  :     :        +- INPUT
>    :  :     +- Window [item_sk#425,rank_col#426], [rank(rank_col#426) windowspecdefinition(rank_col#426 ASC,ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS rnk#428], [rank_col#426 ASC]
>    :  :        +- Sort [rank_col#426 ASC], false, 0
>    :  :           +- Exchange SinglePartition, None
>    :  :              +- WholeStageCodegen
>    :  :                 :  +- Project [item_sk#425,rank_col#426]
>    :  :                 :     +- Filter havingCondition#439: boolean
>    :  :                 :        +- TungstenAggregate(key=[ss_item_sk#282], functions=[(avg(cast(ss_net_profit#302 as double)),mode=Final,isDistinct=false)], output=[havingCondition#439,item_sk#425,rank_col#426])
>    :  :                 :           +- INPUT
>    :  :                 +- Exchange hashpartitioning(ss_item_sk#282,200), None
>    :  :                    +- WholeStageCodegen
>    :  :                       :  +- TungstenAggregate(key=[ss_item_sk#282], functions=[(avg(cast(ss_net_profit#302 as double)),mode=Partial,isDistinct=false)], output=[ss_item_sk#282,sum#506,count#507L])
>    :  :                       :     +- Project [ss_item_sk#282,ss_net_profit#302]
>    :  :                       :        +- Filter (ss_store_sk#287 = 4)
>    :  :                       :           +- INPUT
>    :  :                       +- Scan ParquetRelation[ss_item_sk#282,ss_net_profit#302,ss_store_sk#287] InputPaths: hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/store_sales/_SUCCESS, hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/store_sales/_common_metadata, hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/store_sales/_metadata, hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/store_sales/part-r-00000-2245f9f3-e00c-4c6c-b9cd-62179efcb22c.gz.parquet, hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/store_sales/part-r-00001-2245f9f3-e00c-4c6c-b9cd-62179efcb22c.gz.parquet, hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/store_sales/part-r-00002-2245f9f3-e00c-4c6c-b9cd-62179efcb22c.gz.parquet, PushedFilters: [EqualTo(ss_store_sk,4)]
>    :  +- Sort [rnk#432 ASC], false, 0
>    :     +- WholeStageCodegen
>    :        :  +- Project [item_sk#429,rnk#432]
>    :        :     +- Filter (rnk#432 < 11)
>    :        :        +- INPUT
>    :        +- Window [item_sk#429,rank_col#430], [rank(rank_col#430) windowspecdefinition(rank_col#430 DESC,ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS rnk#432], [rank_col#430 DESC]
>    :           +- Sort [rank_col#430 DESC], false, 0
>    :              +- Exchange SinglePartition, None
>    :                 +- WholeStageCodegen
>    :                    :  +- Project [item_sk#429,rank_col#430]
>    :                    :     +- Filter havingCondition#440: boolean
>    :                    :        +- TungstenAggregate(key=[ss_item_sk#282], functions=[(avg(cast(ss_net_profit#302 as double)),mode=Final,isDistinct=false)], output=[havingCondition#440,item_sk#429,rank_col#430])
>    :                    :           +- INPUT
>    :                    +- Exchange hashpartitioning(ss_item_sk#282,200), None
>    :                       +- WholeStageCodegen
>    :                          :  +- TungstenAggregate(key=[ss_item_sk#282], functions=[(avg(cast(ss_net_profit#302 as double)),mode=Partial,isDistinct=false)], output=[ss_item_sk#282,sum#511,count#512L])
>    :                          :     +- Project [ss_item_sk#282,ss_net_profit#302]
>    :                          :        +- Filter (ss_store_sk#287 = 4)
>    :                          :           +- INPUT
>    :                          +- Scan ParquetRelation[ss_item_sk#282,ss_net_profit#302,ss_store_sk#287] InputPaths: hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/store_sales/_SUCCESS, hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/store_sales/_common_metadata, hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/store_sales/_metadata, hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/store_sales/part-r-00000-2245f9f3-e00c-4c6c-b9cd-62179efcb22c.gz.parquet, hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/store_sales/part-r-00001-2245f9f3-e00c-4c6c-b9cd-62179efcb22c.gz.parquet, hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/store_sales/part-r-00002-2245f9f3-e00c-4c6c-b9cd-62179efcb22c.gz.parquet, PushedFilters: [EqualTo(ss_store_sk,4)]
>    :- Scan ParquetRelation[i_item_sk#181,i_product_name#202] InputPaths: hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/item/_SUCCESS, hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/item/_common_metadata, hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/item/_metadata, hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/item/part-r-00000-09ab6229-4b98-42a8-988e-2f1706436007.gz.parquet, hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/item/part-r-00001-09ab6229-4b98-42a8-988e-2f1706436007.gz.parquet
>    +- Scan ParquetRelation[i_item_sk#457,i_product_name#478] InputPaths: hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/item/_SUCCESS, hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/item/_common_metadata, hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/item/_metadata, hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/item/part-r-00000-09ab6229-4b98-42a8-988e-2f1706436007.gz.parquet, hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/item/part-r-00001-09ab6229-4b98-42a8-988e-2f1706436007.gz.parquet
> {noformat}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org