You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@pig.apache.org by Daniel Dai <ji...@yahoo-inc.com> on 2011/06/14 19:20:47 UTC

Re: pb in translate requete sql en script pig

On 06/11/2011 06:32 AM, hiba houimli wrote:
> Hi,
>
> When I translate a query sql on a script pig like this exple:
>
> exple 1:
>
> query sql:
>
> select
> sum(l_extendedprice*l_discount) as revenue
> from
> lineitem
> where
> l_shipdate >= date '[DATE]'
> and l_shipdate < date '[DATE]' + interval '1' year
> and l_discount between [DISCOUNT] - 0.01 and [DISCOUNT] + 0.01
> and l_quantity < [QUANTITY];
>
>
> script pig :
>
> lineitem = LOAD '/home/hiba/tpch/lineitem.tbl' using 
> PigStorage('|') AS (l_orderkey:int, l_partkey:int, l_suppkey:int, 
> l_linenumber:int, l_quantity:int, l_extendedprice:double, 
> l_discount:double, l_tax:double, l_returnflag:chararray, 
> l_linestatus:chararray, l_shipdate:chararray, l_commitdate:chararray, 
> l_receiptdate:chararray, l_shipinstruct:chararray, 
> l_shipmode:chararray, l_comment:chararray);
> filtred_line = filter lineitem by l_shipdate >= '1994-01-01' and 
> l_shipdate < '1995-01-01' and l_discount > 0.05 and l_discount < 0.07 
> and l_quantity < 24;
> project_lineitem = foreach filtred_line generate SUM(l_extendedprice * 
> l_discount) as revenue:double;
> store project_lineitem into 'OUTPUT_PATH/tpch_query6';
>
>
> I have this problem: see the FJ: erreur_query_Q6.Png
lineitem = LOAD '/home/hiba/tpch/lineitem.tbl' using PigStorage('|') AS 
(l_orderkey:int, l_partkey:int, l_suppkey:int, l_linenumber:int, 
l_quantity:int, l_extendedprice:double, l_discount:double, l_tax:double, 
l_returnflag:chararray, l_linestatus:chararray, l_shipdate:chararray, 
l_commitdate:chararray, l_receiptdate:chararray, 
l_shipinstruct:chararray, l_shipmode:chararray, l_comment:chararray);
filtred_line = filter lineitem by l_shipdate >= '1994-01-01' and 
l_shipdate < '1995-01-01' and l_discount > 0.05 and l_discount < 0.07 
and l_quantity < 24;
enhanced = foreach filtred_line generate 
(double)l_extendedprice*(double)l_discount as l_realprice;
grouped = group enhanced all;
project_lineitem = foreach grouped generate SUM(enhanced.l_realprice) as 
revenue:double;
store project_lineitem into 'OUTPUT_PATH/tpch_query6';

>
>
> and exple 2:
>
> query sql:
>
> select
> s_acctbal,
> s_name,
> n_name,
> p_partkey,
> p_mfgr,
> s_address,
> s_phone,
> s_comment
> from
> part,
> supplier,
> partsupp,
> nation,
> region
> where
> p_partkey = ps_partkey
> and s_suppkey = ps_suppkey
> and p_size = [SIZE]
> and p_type like '%[TYPE]'
> and s_nationkey = n_nationkey
> and n_regionkey = r_regionkey
> and r_name = '[REGION]'
> and ps_supplycost = (
> select
> min(ps_supplycost)
> from
> partsupp, supplier,
> nation, region
> where
> p_partkey = ps_partkey
> and s_suppkey = ps_suppkey
> and s_nationkey = n_nationkey
> and n_regionkey = r_regionkey
> and r_name = '[REGION]'
> )
> order by
> s_acctbal desc,
> n_name,
> s_name,
> p_partkey;
>
> script pig:
>
> partsupp = load '/home/hiba/tpch/partsupp.tbl' using PigStorage('|')
>             as (ps_partkey:int, ps_suppkey:int, ps_availqty:int, 
> ps_supplycost:double, ps_comment:chararray);
> supplier = load '/home/hiba/tpch/supplier.tbl' using PigStorage('|')
>             as (s_suppkey:int, s_name:chararray, s_address:chararray, 
> s_nationkey:int, s_phone:chararray, s_acctbal:double, 
> s_comment:chararray);
> nation = load '/home/hiba/tpch/nation.tbl' using PigStorage('|')
>             as (n_nationkey:int, n_name:chararray, n_regionkey:int, 
> n_comment:chararray);
> region = load '/home/hiba/tpch/region.tbl' using PigStorage('|')
>             as (r_regionkey:int, r_name:chararray, r_comment:chararray);
> part = load '/home/hiba/tpch/part.tbl' using PigStorage('|')
>             as (p_partkey:int, p_name:chararray, p_mfgr:chararray, 
> p_brand:chararray, p_type:chararray, p_size:int, 
> p_container:chararray, p_retailprice:double, p_comment:chararray);
> filter_region = filter region by r_name matches 'EUROPE.*';
> joined_part_partsuppr = join partsupp by ps_partkey, part by p_partkey;
> joined_supp_ps = join joined_part_partsuppr by ps_partkey, supplier by 
> s_suppkey;
> joined_nation_supp = join nation by n_nationkey, joined_supp_ps by 
> s_nationkey;
> join_nation_region = join filter_region by r_regionkey, 
> joined_nation_supp by n_regionkey;
> project_join = foreach join_nation_region generate MIN(ps_supplycost) 
> as min_ps:double;
> filter_part = filter part by p_size == 15 and p_type matches '.*BRASS';
> filter_r = filter region by r_name matches 'EUROPE.*';
> join_p_ps = join partsupp by ps_partkey, filter_part by p_partkey;
> join_all = join join_p_ps by ps_partkey, supplier by s_suppkey;
> join_s_n = join nation by n_nationkey, join_all by s_nationkey;
> join_r_n = join filter_r by r_regionkey,join_s_n by n_regionkey;
> grouped = cogroup join_r_n by ps_supplycost, project_join by min_ps;
> filtered = filter grouped by COUNT(project_join) == COUNT(join_r_n);
> project = foreach filtered generate flatten(join_r_n.s_acctbal), 
> flatten(join_r_n.s_name), flatten(join_r_n.n_name), 
> flatten(join_r_n.p_partkey), (join_r_n.p_mfgr), 
> flatten(join_r_n.s_address), flatten(join_r_n.s_phone), 
> flatten(join_r_n.s_comment);
> store project into 'OUTPUT_PATH/tpch_query2';
>
> I have this problem: see the FJ: Erreur_Query_Q2.Png
>
partsupp = load '/home/hiba/tpch/partsupp.tbl' using PigStorage('|')
             as (ps_partkey:int, ps_suppkey:int, ps_availqty:int, 
ps_supplycost:double, ps_comment:chararray);
supplier = load '/home/hiba/tpch/supplier.tbl' using PigStorage('|')
             as (s_suppkey:int, s_name:chararray, s_address:chararray, 
s_nationkey:int, s_phone:chararray, s_acctbal:double, s_comment:chararray);
nation = load '/home/hiba/tpch/nation.tbl' using PigStorage('|')
             as (n_nationkey:int, n_name:chararray, n_regionkey:int, 
n_comment:chararray);
region = load '/home/hiba/tpch/region.tbl' using PigStorage('|')
             as (r_regionkey:int, r_name:chararray, r_comment:chararray);
part = load '/home/hiba/tpch/part.tbl' using PigStorage('|')
             as (p_partkey:int, p_name:chararray, p_mfgr:chararray, 
p_brand:chararray, p_type:chararray, p_size:int, p_container:chararray, 
p_retailprice:double, p_comment:chararray);

filter_region = filter region by r_name matches 'EUROPE.*';
joined_part_partsuppr = join partsupp by ps_partkey, part by p_partkey;
joined_supp_ps = join joined_part_partsuppr by ps_partkey, supplier by 
s_suppkey;
joined_nation_supp = join nation by n_nationkey, joined_supp_ps by 
s_nationkey;
join_nation_region = join filter_region by r_regionkey, 
joined_nation_supp by n_regionkey;
grouped = group join_nation_region all;
stats = foreach grouped generate MIN(join_nation_region.ps_supplycost) 
as min_ps;

filter_part = filter part by p_size == 15 and p_type matches '.*BRASS';
filter_r = filter region by r_name matches 'EUROPE.*';
join_p_ps = join partsupp by ps_partkey, filter_part by p_partkey;
join_all = join join_p_ps by ps_partkey, supplier by s_suppkey;
join_s_n = join nation by n_nationkey, join_all by s_nationkey;
join_r_n = join filter_r by r_regionkey,join_s_n by n_regionkey;

filter_min = filter join_r_n by ps_supplycost==stats.min_ps;

ordered = order filter_min by s_acctbal desc, n_name, s_name, p_partkey;

store ordered into 'OUTPUT_PATH/tpch_query2';
>
> I don't Know what I do,
>
> thak you for your help ...
>
>
>
>
>