You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "JESSE CHEN (JIRA)" <ji...@apache.org> on 2016/03/29 20:00:28 UTC
[jira] [Closed] (SPARK-13862) TPCDS query 49 returns wrong results
compared to TPC official result set
[ https://issues.apache.org/jira/browse/SPARK-13862?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
JESSE CHEN closed SPARK-13862.
------------------------------
PR fixed this issue. Thanks, [~smilegator]
> TPCDS query 49 returns wrong results compared to TPC official result set
> -------------------------------------------------------------------------
>
> Key: SPARK-13862
> URL: https://issues.apache.org/jira/browse/SPARK-13862
> Project: Spark
> Issue Type: Bug
> Components: SQL
> Affects Versions: 1.6.0
> Reporter: JESSE CHEN
> Labels: tpcds-result-mismatch
>
> Testing Spark SQL using TPC queries. Query 49 returns wrong results compared to official result set. This is at 1GB SF (validation run).
> SparkSQL has right answer but in wrong order (and there is an 'order by' in the query).
> Actual results:
> {noformat}
> store,9797,0.80000000000000000000,2,2]
> [store,12641,0.81609195402298850575,3,3]
> [store,6661,0.92207792207792207792,7,7]
> [store,13013,0.94202898550724637681,8,8]
> [store,9029,1.00000000000000000000,10,10]
> [web,15597,0.66197183098591549296,3,3]
> [store,14925,0.96470588235294117647,9,9]
> [store,4063,1.00000000000000000000,10,10]
> [catalog,8929,0.76250000000000000000,7,7]
> [store,11589,0.82653061224489795918,6,6]
> [store,1171,0.82417582417582417582,5,5]
> [store,9471,0.77500000000000000000,1,1]
> [catalog,12577,0.65591397849462365591,3,3]
> [web,97,0.90361445783132530120,9,8]
> [web,85,0.85714285714285714286,8,7]
> [catalog,361,0.74647887323943661972,5,5]
> [web,2915,0.69863013698630136986,4,4]
> [web,117,0.92500000000000000000,10,9]
> [catalog,9295,0.77894736842105263158,9,9]
> [web,3305,0.73750000000000000000,6,16]
> [catalog,16215,0.79069767441860465116,10,10]
> [web,7539,0.59000000000000000000,1,1]
> [catalog,17543,0.57142857142857142857,1,1]
> [catalog,3411,0.71641791044776119403,4,4]
> [web,11933,0.71717171717171717172,5,5]
> [catalog,14513,0.63541666666666666667,2,2]
> [store,15839,0.81632653061224489796,4,4]
> [web,3337,0.62650602409638554217,2,2]
> [web,5299,0.92708333333333333333,11,10]
> [catalog,8189,0.74698795180722891566,6,6]
> [catalog,14869,0.77173913043478260870,8,8]
> [web,483,0.80000000000000000000,7,6]
> {noformat}
> Expected results:
> {noformat}
> +---------+-------+--------------------+-------------+---------------+
> | CHANNEL | ITEM | RETURN_RATIO | RETURN_RANK | CURRENCY_RANK |
> +---------+-------+--------------------+-------------+---------------+
> | catalog | 17543 | .5714285714285714 | 1 | 1 |
> | catalog | 14513 | .6354166666666666 | 2 | 2 |
> | catalog | 12577 | .6559139784946236 | 3 | 3 |
> | catalog | 3411 | .7164179104477611 | 4 | 4 |
> | catalog | 361 | .7464788732394366 | 5 | 5 |
> | catalog | 8189 | .7469879518072289 | 6 | 6 |
> | catalog | 8929 | .7625000000000000 | 7 | 7 |
> | catalog | 14869 | .7717391304347826 | 8 | 8 |
> | catalog | 9295 | .7789473684210526 | 9 | 9 |
> | catalog | 16215 | .7906976744186046 | 10 | 10 |
> | store | 9471 | .7750000000000000 | 1 | 1 |
> | store | 9797 | .8000000000000000 | 2 | 2 |
> | store | 12641 | .8160919540229885 | 3 | 3 |
> | store | 15839 | .8163265306122448 | 4 | 4 |
> | store | 1171 | .8241758241758241 | 5 | 5 |
> | store | 11589 | .8265306122448979 | 6 | 6 |
> | store | 6661 | .9220779220779220 | 7 | 7 |
> | store | 13013 | .9420289855072463 | 8 | 8 |
> | store | 14925 | .9647058823529411 | 9 | 9 |
> | store | 4063 | 1.0000000000000000 | 10 | 10 |
> | store | 9029 | 1.0000000000000000 | 10 | 10 |
> | web | 7539 | .5900000000000000 | 1 | 1 |
> | web | 3337 | .6265060240963855 | 2 | 2 |
> | web | 15597 | .6619718309859154 | 3 | 3 |
> | web | 2915 | .6986301369863013 | 4 | 4 |
> | web | 11933 | .7171717171717171 | 5 | 5 |
> | web | 3305 | .7375000000000000 | 6 | 16 |
> | web | 483 | .8000000000000000 | 7 | 6 |
> | web | 85 | .8571428571428571 | 8 | 7 |
> | web | 97 | .9036144578313253 | 9 | 8 |
> | web | 117 | .9250000000000000 | 10 | 9 |
> | web | 5299 | .9270833333333333 | 11 | 10 |
> +---------+-------+--------------------+-------------+---------------+
> {noformat}
> Query used:
> {noformat}
> -- start query 49 in stream 0 using template query49.tpl and seed QUALIFICATION
> select
> 'web' as channel
> ,web.item
> ,web.return_ratio
> ,web.return_rank
> ,web.currency_rank
> from (
> select
> item
> ,return_ratio
> ,currency_ratio
> ,rank() over (order by return_ratio) as return_rank
> ,rank() over (order by currency_ratio) as currency_rank
> from
> ( select ws.ws_item_sk as item
> ,(cast(sum(coalesce(wr.wr_return_quantity,0)) as decimal(15,4))/
> cast(sum(coalesce(ws.ws_quantity,0)) as decimal(15,4) )) as return_ratio
> ,(cast(sum(coalesce(wr.wr_return_amt,0)) as decimal(15,4))/
> cast(sum(coalesce(ws.ws_net_paid,0)) as decimal(15,4) )) as currency_ratio
> from
> web_sales ws left outer join web_returns wr
> on (ws.ws_order_number = wr.wr_order_number and
> ws.ws_item_sk = wr.wr_item_sk)
> ,date_dim
> where
> wr.wr_return_amt > 10000
> and ws.ws_net_profit > 1
> and ws.ws_net_paid > 0
> and ws.ws_quantity > 0
> and ws_sold_date_sk = d_date_sk
> and d_year = 2001
> and d_moy = 12
> group by ws.ws_item_sk
> ) in_web
> ) web
> where
> (
> web.return_rank <= 10
> or
> web.currency_rank <= 10
> )
> union
> select
> 'catalog' as channel
> ,catalog.item
> ,catalog.return_ratio
> ,catalog.return_rank
> ,catalog.currency_rank
> from (
> select
> item
> ,return_ratio
> ,currency_ratio
> ,rank() over (order by return_ratio) as return_rank
> ,rank() over (order by currency_ratio) as currency_rank
> from
> ( select
> cs.cs_item_sk as item
> ,(cast(sum(coalesce(cr.cr_return_quantity,0)) as decimal(15,4))/
> cast(sum(coalesce(cs.cs_quantity,0)) as decimal(15,4) )) as return_ratio
> ,(cast(sum(coalesce(cr.cr_return_amount,0)) as decimal(15,4))/
> cast(sum(coalesce(cs.cs_net_paid,0)) as decimal(15,4) )) as currency_ratio
> from
> catalog_sales cs left outer join catalog_returns cr
> on (cs.cs_order_number = cr.cr_order_number and
> cs.cs_item_sk = cr.cr_item_sk)
> ,date_dim
> where
> cr.cr_return_amount > 10000
> and cs.cs_net_profit > 1
> and cs.cs_net_paid > 0
> and cs.cs_quantity > 0
> and cs_sold_date_sk = d_date_sk
> and d_year = 2001
> and d_moy = 12
> group by cs.cs_item_sk
> ) in_cat
> ) catalog
> where
> (
> catalog.return_rank <= 10
> or
> catalog.currency_rank <=10
> )
> union
> select
> 'store' as channel
> ,store.item
> ,store.return_ratio
> ,store.return_rank
> ,store.currency_rank
> from (
> select
> item
> ,return_ratio
> ,currency_ratio
> ,rank() over (order by return_ratio) as return_rank
> ,rank() over (order by currency_ratio) as currency_rank
> from
> ( select sts.ss_item_sk as item
> ,(cast(sum(coalesce(sr.sr_return_quantity,0)) as decimal(15,4))/cast(sum(coalesce(sts.ss_quantity,0)) as decimal(15,4) )) as return_ratio
> ,(cast(sum(coalesce(sr.sr_return_amt,0)) as decimal(15,4))/cast(sum(coalesce(sts.ss_net_paid,0)) as decimal(15,4) )) as currency_ratio
> from
> store_sales sts left outer join store_returns sr
> on (sts.ss_ticket_number = sr.sr_ticket_number and sts.ss_item_sk = sr.sr_item_sk)
> ,date_dim
> where
> sr.sr_return_amt > 10000
> and sts.ss_net_profit > 1
> and sts.ss_net_paid > 0
> and sts.ss_quantity > 0
> and ss_sold_date_sk = d_date_sk
> and d_year = 2001
> and d_moy = 12
> group by sts.ss_item_sk
> ) in_store
> ) store
> where (
> store.return_rank <= 10
> or
> store.currency_rank <= 10
> )
> order by 1,4,5
> limit 100;
> -- end query 49 in stream 0 using template query49.tpl
> {noformat}
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)
---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org