You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tajo.apache.org by Apache Wiki <wi...@apache.org> on 2013/03/26 08:47:52 UTC

[Tajo Wiki] Update of "TpchBenchmark" by HyunsikChoi

Dear Wiki user,

You have subscribed to a wiki page or wiki category on "Tajo Wiki" for change notification.

The "TpchBenchmark" page has been changed by HyunsikChoi:
http://wiki.apache.org/tajo/TpchBenchmark

Comment:
Add TPC-H DDL and some queries.

New page:
= TPC-H Benchmark =
TPC-H is an ad-hoc and decision support benchmark. Some of queries are available in the current Tajo. You can download the TPC-H data generator from [[http://www.tpc.org/tpch/|here]].

== DDL for TPC-H datasets ==
TPC-H benchmark provides 8 table datasets. The below DDL statements is for them.

{{{#!SQL
create external table supplier (S_SUPPKEY long, S_NAME string, S_ADDRESS string, S_NATIONKEY long, S_PHONE string, S_ACCTBAL double, S_COMMENT string) using csv with ('csvfile.delimiter'='|') location 'hdfs://x/y'

create external table lineitem (L_ORDERKEY long, L_PARTKEY long, L_SUPPKEY long, L_LINENUMBER long, L_QUANTITY double, L_EXTENDEDPRICE double, L_DISCOUNT double, L_TAX double, L_RETURNFLAG string, L_LINESTATUS string, L_SHIPDATE string, L_COMMITDATE string, L_RECEIPTDATE string, L_SHIPINSTRUCT string, L_SHIPMODE string, L_COMMENT string) using csv with ('csvfile.delimiter'='|') location 'hdfs://x/y'

create external table part (P_PARTKEY long, P_NAME string, P_MFGR string, P_BRAND string, P_TYPE string, P_SIZE int, P_CONTAINER string, P_RETAILPRICE double, P_COMMENT string) using csv with ('csvfile.delimiter'='|') location 'hdfs://x/y'

create external table partsupp (PS_PARTKEY long, PS_SUPPKEY long, PS_AVAILQTY int, PS_SUPPLYCOST double, PS_COMMENT string) using csv with ('csvfile.delimiter'='|') location 'hdfs://x/y'

create external table customer (C_CUSTKEY long, C_NAME string, C_ADDRESS string, C_NATIONKEY long, C_PHONE string, C_ACCTBAL double, C_MKTSEGMENT string, C_COMMENT string) using csv with ('csvfile.delimiter'='|') location 'hdfs://x/y'

create external table orders (O_ORDERKEY long, O_CUSTKEY long, O_ORDERSTATUS string, O_TOTALPRICE double, O_ORDERDATE string, O_ORDERPRIORITY string, O_CLERK string, O_SHIPPRIORITY int, O_COMMENT string) using csv with ('csvfile.delimiter'='|') location 'hdfs://x/y'

create external table nation (N_NATIONKEY long, N_NAME string, N_REGIONKEY long, N_COMMENT string) using csv with ('csvfile.delimiter'='|') location 'hdfs://x/y' 

create external table region (R_REGIONKEY long, R_NAME string, R_COMMENT string) using csv with ('csvfile.delimiter'='|') location 'hdfs://x/y'
}}}

== TPC-H Queries ==
=== Q1 ===
{{{#!SQL
select l_returnflag, l_linestatus, sum(l_quantity) as sum_qty, sum(l_extendedprice) as sum_base_price, sum(l_extendedprice*(1-l_discount)) as sum_disc_price, sum(l_extendedprice*(1-l_discount)*(1+l_tax)) as sum_charge, avg(l_quantity) as avg_qty, avg(l_extendedprice) as avg_price, avg(l_discount) as avg_disc, count(*) as count_order from lineitem where l_shipdate <= '1998-09-01' group by l_returnflag, l_linestatus order by l_returnflag, l_linestatus
}}}

=== Q2 ===
Tajo does not support sclar subquery yet. So, you should use multiple queries as follows:
{{{#!SQL
create table r2_1 as select s_acctbal, s_name, n_name, p_partkey, p_mfgr, s_address, s_phone, s_comment, ps_supplycost from region join nation on n_regionkey = r_regionkey join supplier on s_nationkey = n_nationkey join partsupp on s_suppkey = ps_suppkey join part on p_partkey = ps_partkey  where p_size = 15 and p_type like '%BRASS' and r_name = 'EUROPE'

create table r2_2 as select p_partkey, min(ps_supplycost) as min_ps_supplycost from r2_1 group by p_partkey

select s_acctbal, s_name, n_name, r2_1.p_partkey, p_mfgr, s_address, s_phone, s_comment from r2_1 join r2_2 on r2_1.p_partkey = r2_2.p_partkey where ps_supplycost = min_ps_supplycost order by s_acctbal, n_name, s_name, r2_1.p_partkey
}}}

== Q3 ==
{{{#!SQL
select l_orderkey,  sum(l_extendedprice*(1-l_discount)) as revenue, o_orderdate, o_shippriority from customer, orders, lineitem where c_mktsegment = 'BUILDING' and c_custkey = o_custkey and l_orderkey = o_orderkey and o_orderdate < '1995-03-15' and l_shipdate > '1995-03-15' group by l_orderkey, o_orderdate, o_shippriority order by revenue desc, o_orderdate
}}}

== Q6 ==
{{{#!SQL
select sum(l_extendedprice*l_discount) as revenue from lineitem where l_shipdate >= '1994-01-01' and l_shipdate < '1995-01-01' and l_discount >= 0.05 and l_discount <= 0.07 and l_quantity < 24;
}}}

== Q10 ==
{{{#!SQL
select c_custkey, c_name, sum(l_extendedprice * (1 - l_discount)) as revenue, c_acctbal, n_name, c_address, c_phone, c_comment from customer, orders, lineitem, nation where c_custkey = o_custkey and l_orderkey = o_orderkey and o_orderdate >= '1993-10-01' and o_orderdate < '1994-01-01' and l_returnflag = 'R' and c_nationkey = n_nationkey group by c_custkey, c_name, c_acctbal, c_phone, n_name, c_address, c_comment order by c_custkey
}}}

== Q12 ==
{{{#!SQL
select l_shipmode, sum(case when o_orderpriority ='1-URGENT' or o_orderpriority ='2-HIGH' then 1 else 0 end) as high_line_count, sum(case when o_orderpriority != '1-URGENT' and o_orderpriority != '2-HIGH' then 1 else 0 end) as low_line_count from orders, lineitem where o_orderkey = l_orderkey and (l_shipmode = 'MAIL' or l_shipmode = 'SHIP') and l_commitdate < l_receiptdate and l_shipdate < l_commitdate and l_receiptdate >= '1994-01-01' and l_receiptdate < '1995-01-01' group by l_shipmode order by l_shipmode
}}}

== Q14 ==
{{{#!SQL
select 100.00 * sum(case when p_type like 'PROMO%' then l_extendedprice*(1-l_discount) else 0 end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue from lineitem, part where l_partkey = p_partkey and l_shipdate >= '1995-09-01' and l_shipdate < '1995-10-01'
}}}