You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ag...@apache.org on 2020/12/06 22:34:49 UTC
[arrow] branch master updated: ARROW-10820: [Rust] [DataFusion]
Complete TPC-H Benchmark Queries
This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new bce15dc ARROW-10820: [Rust] [DataFusion] Complete TPC-H Benchmark Queries
bce15dc is described below
commit bce15dcc743ea40b2c627a7c3ab9454f8f4b9d76
Author: Mike Seddon <se...@gmail.com>
AuthorDate: Sun Dec 6 15:33:31 2020 -0700
ARROW-10820: [Rust] [DataFusion] Complete TPC-H Benchmark Queries
Changes:
- can now execute queries 3, 5 and 6 (in addition to 1, 12).
- add all tables.
- add all queries with query validation parameters provided.
- make basic modifications to queries 5 and 6 to allow supported syntax.
- queries 1 and 12 retain existing modifications.
this was an easy way to begin understanding some of the code base and get involved.
Closes #8845 from seddonm1/complete-tpch-queries
Authored-by: Mike Seddon <se...@gmail.com>
Signed-off-by: Andy Grove <an...@gmail.com>
---
rust/benchmarks/src/bin/tpch.rs | 918 +++++++++++++++++++++++++++++++++++++---
1 file changed, 856 insertions(+), 62 deletions(-)
diff --git a/rust/benchmarks/src/bin/tpch.rs b/rust/benchmarks/src/bin/tpch.rs
index 82edd98..fd71022 100644
--- a/rust/benchmarks/src/bin/tpch.rs
+++ b/rust/benchmarks/src/bin/tpch.rs
@@ -89,7 +89,9 @@ enum TpchOpt {
Convert(ConvertOpt),
}
-const TABLES: &[&str] = &["lineitem", "orders"];
+const TABLES: &[&str] = &[
+ "part", "supplier", "partsupp", "customer", "orders", "lineitem", "nation", "region",
+];
#[tokio::main]
async fn main() -> Result<()> {
@@ -145,59 +147,797 @@ async fn benchmark(opt: BenchmarkOpt) -> Result<()> {
fn create_logical_plan(ctx: &mut ExecutionContext, query: usize) -> Result<LogicalPlan> {
match query {
+
+ // original
+ // 1 => ctx.create_logical_plan(
+ // "select
+ // l_returnflag,
+ // l_linestatus,
+ // sum(l_quantity) as sum_qty,
+ // sum(l_extendedprice) as sum_base_price,
+ // sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
+ // sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
+ // avg(l_quantity) as avg_qty,
+ // avg(l_extendedprice) as avg_price,
+ // avg(l_discount) as avg_disc,
+ // count(*) as count_order
+ // from
+ // lineitem
+ // where
+ // l_shipdate <= date '1998-12-01' - interval '90' day (3)
+ // group by
+ // l_returnflag,
+ // l_linestatus
+ // order by
+ // l_returnflag,
+ // l_linestatus;"
+ // ),
1 => ctx.create_logical_plan(
"select
- l_returnflag,
- l_linestatus,
- sum(l_quantity),
- sum(l_extendedprice),
- sum(l_extendedprice * (1 - l_discount)),
- sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)),
- avg(l_quantity),
- avg(l_extendedprice),
- avg(l_discount),
- count(*)
- from
- lineitem
- where
- l_shipdate <= '1998-12-01'
- group by
- l_returnflag,
- l_linestatus
- order by
- l_returnflag,
- l_linestatus",
+ l_returnflag,
+ l_linestatus,
+ sum(l_quantity) as sum_qty,
+ sum(l_extendedprice) as sum_base_price,
+ sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
+ sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
+ avg(l_quantity) as avg_qty,
+ avg(l_extendedprice) as avg_price,
+ avg(l_discount) as avg_disc,
+ count(*) as count_order
+ from
+ lineitem
+ where
+ l_shipdate <= '1998-09-02'
+ group by
+ l_returnflag,
+ l_linestatus
+ order by
+ l_returnflag,
+ l_linestatus;",
+ ),
+
+ 2 => ctx.create_logical_plan(
+ "select
+ s_acctbal,
+ s_name,
+ n_name,
+ p_partkey,
+ p_mfgr,
+ s_address,
+ s_phone,
+ s_comment
+ from
+ part,
+ supplier,
+ partsupp,
+ nation,
+ region
+ where
+ p_partkey = ps_partkey
+ and s_suppkey = ps_suppkey
+ and p_size = 15
+ and p_type like '%BRASS'
+ and s_nationkey = n_nationkey
+ and n_regionkey = r_regionkey
+ and r_name = 'EUROPE'
+ and ps_supplycost = (
+ select
+ min(ps_supplycost)
+ from
+ partsupp,
+ supplier,
+ nation,
+ region
+ where
+ p_partkey = ps_partkey
+ and s_suppkey = ps_suppkey
+ and s_nationkey = n_nationkey
+ and n_regionkey = r_regionkey
+ and r_name = 'EUROPE'
+ )
+ order by
+ s_acctbal desc,
+ n_name,
+ s_name,
+ p_partkey;"
+ ),
+
+ 3 => ctx.create_logical_plan(
+ "select
+ l_orderkey,
+ sum(l_extendedprice * (1 - l_discount)) as revenue,
+ o_orderdate,
+ o_shippriority
+ from
+ customer,
+ orders,
+ lineitem
+ where
+ c_mktsegment = 'BUILDING'
+ and c_custkey = o_custkey
+ and l_orderkey = o_orderkey
+ and o_orderdate < '1995-03-15'
+ and l_shipdate > '1995-03-15'
+ group by
+ l_orderkey,
+ o_orderdate,
+ o_shippriority
+ order by
+ revenue desc,
+ o_orderdate;"
+ ),
+
+ 4 => ctx.create_logical_plan(
+ "select
+ o_orderpriority,
+ count(*) as order_count
+ from
+ orders
+ where
+ o_orderdate >= '1993-07-01'
+ and o_orderdate < date '1993-07-01' + interval '3' month
+ and exists (
+ select
+ *
+ from
+ lineitem
+ where
+ l_orderkey = o_orderkey
+ and l_commitdate < l_receiptdate
+ )
+ group by
+ o_orderpriority
+ order by
+ o_orderpriority;"
+ ),
+
+ // original
+ // 5 => ctx.create_logical_plan(
+ // "select
+ // n_name,
+ // sum(l_extendedprice * (1 - l_discount)) as revenue
+ // from
+ // customer,
+ // orders,
+ // lineitem,
+ // supplier,
+ // nation,
+ // region
+ // where
+ // c_custkey = o_custkey
+ // and l_orderkey = o_orderkey
+ // and l_suppkey = s_suppkey
+ // and c_nationkey = s_nationkey
+ // and s_nationkey = n_nationkey
+ // and n_regionkey = r_regionkey
+ // and r_name = 'ASIA'
+ // and o_orderdate >= date '1994-01-01'
+ // and o_orderdate < date '1994-01-01' + interval '1' year
+ // group by
+ // n_name
+ // order by
+ // revenue desc;"
+ // ),
+ 5 => ctx.create_logical_plan(
+ "select
+ n_name,
+ sum(l_extendedprice * (1 - l_discount)) as revenue
+ from
+ customer,
+ orders,
+ lineitem,
+ supplier,
+ nation,
+ region
+ where
+ c_custkey = o_custkey
+ and l_orderkey = o_orderkey
+ and l_suppkey = s_suppkey
+ and c_nationkey = s_nationkey
+ and s_nationkey = n_nationkey
+ and n_regionkey = r_regionkey
+ and r_name = 'ASIA'
+ and o_orderdate >= '1994-01-01'
+ and o_orderdate < '1995-01-01'
+ group by
+ n_name
+ order by
+ revenue desc;"
+ ),
+
+ // original
+ // 6 => ctx.create_logical_plan(
+ // "select
+ // sum(l_extendedprice * l_discount) as revenue
+ // from
+ // lineitem
+ // where
+ // l_shipdate >= date '1994-01-01'
+ // and l_shipdate < date '1994-01-01' + interval '1' year
+ // and l_discount between 0.06 - 0.01 and 0.06 + 0.01
+ // and l_quantity < 24;"
+ // ),
+ 6 => ctx.create_logical_plan(
+ "select
+ sum(l_extendedprice * l_discount) as revenue
+ from
+ lineitem
+ where
+ l_shipdate >= '1994-01-01'
+ and l_shipdate < '1995-01-01'
+ and l_discount > 0.06 - 0.01 and l_discount < 0.06 + 0.01
+ and l_quantity < 24;"
+ ),
+
+ 7 => ctx.create_logical_plan(
+ "select
+ supp_nation,
+ cust_nation,
+ l_year,
+ sum(volume) as revenue
+ from
+ (
+ select
+ n1.n_name as supp_nation,
+ n2.n_name as cust_nation,
+ extract(year from l_shipdate) as l_year,
+ l_extendedprice * (1 - l_discount) as volume
+ from
+ supplier,
+ lineitem,
+ orders,
+ customer,
+ nation n1,
+ nation n2
+ where
+ s_suppkey = l_suppkey
+ and o_orderkey = l_orderkey
+ and c_custkey = o_custkey
+ and s_nationkey = n1.n_nationkey
+ and c_nationkey = n2.n_nationkey
+ and (
+ (n1.n_name = 'FRANCE' and n2.n_name = 'GERMANY')
+ or (n1.n_name = 'GERMANY' and n2.n_name = 'FRANCE')
+ )
+ and l_shipdate > '1995-01-01' and l_shipdate < '1996-12-31'
+ ) as shipping
+ group by
+ supp_nation,
+ cust_nation,
+ l_year
+ order by
+ supp_nation,
+ cust_nation,
+ l_year;"
+ ),
+
+ 8 => ctx.create_logical_plan(
+ "select
+ o_year,
+ sum(case
+ when nation = 'BRAZIL' then volume
+ else 0
+ end) / sum(volume) as mkt_share
+ from
+ (
+ select
+ extract(year from o_orderdate) as o_year,
+ l_extendedprice * (1 - l_discount) as volume,
+ n2.n_name as nation
+ from
+ part,
+ supplier,
+ lineitem,
+ orders,
+ customer,
+ nation n1,
+ nation n2,
+ region
+ where
+ p_partkey = l_partkey
+ and s_suppkey = l_suppkey
+ and l_orderkey = o_orderkey
+ and o_custkey = c_custkey
+ and c_nationkey = n1.n_nationkey
+ and n1.n_regionkey = r_regionkey
+ and r_name = 'AMERICA'
+ and s_nationkey = n2.n_nationkey
+ and o_orderdate between '1995-01-01' and '1996-12-31'
+ and p_type = 'ECONOMY ANODIZED STEEL'
+ ) as all_nations
+ group by
+ o_year
+ order by
+ o_year;"
+ ),
+
+ 9 => ctx.create_logical_plan(
+ "select
+ nation,
+ o_year,
+ sum(amount) as sum_profit
+ from
+ (
+ select
+ n_name as nation,
+ extract(year from o_orderdate) as o_year,
+ l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount
+ from
+ part,
+ supplier,
+ lineitem,
+ partsupp,
+ orders,
+ nation
+ where
+ s_suppkey = l_suppkey
+ and ps_suppkey = l_suppkey
+ and ps_partkey = l_partkey
+ and p_partkey = l_partkey
+ and o_orderkey = l_orderkey
+ and s_nationkey = n_nationkey
+ and p_name like '%green%'
+ ) as profit
+ group by
+ nation,
+ o_year
+ order by
+ nation,
+ o_year desc;"
+ ),
+
+ 10 => ctx.create_logical_plan(
+ "select
+ c_custkey,
+ c_name,
+ sum(l_extendedprice * (1 - l_discount)) as revenue,
+ c_acctbal,
+ n_name,
+ c_address,
+ c_phone,
+ c_comment
+ from
+ customer,
+ orders,
+ lineitem,
+ nation
+ where
+ c_custkey = o_custkey
+ and l_orderkey = o_orderkey
+ and o_orderdate >= '1993-10-01'
+ and o_orderdate < '1994-01-01'
+ and l_returnflag = 'R'
+ and c_nationkey = n_nationkey
+ group by
+ c_custkey,
+ c_name,
+ c_acctbal,
+ c_phone,
+ n_name,
+ c_address,
+ c_comment
+ order by
+ revenue desc;"
+ ),
+
+ 11 => ctx.create_logical_plan(
+ "select
+ ps_partkey,
+ sum(ps_supplycost * ps_availqty) as value
+ from
+ partsupp,
+ supplier,
+ nation
+ where
+ ps_suppkey = s_suppkey
+ and s_nationkey = n_nationkey
+ and n_name = 'GERMANY'
+ group by
+ ps_partkey having
+ sum(ps_supplycost * ps_availqty) > (
+ select
+ sum(ps_supplycost * ps_availqty) * 0.0001
+ from
+ partsupp,
+ supplier,
+ nation
+ where
+ ps_suppkey = s_suppkey
+ and s_nationkey = n_nationkey
+ and n_name = 'GERMANY'
+ )
+ order by
+ value desc;"
),
+ // original
+ // 12 => ctx.create_logical_plan(
+ // "select
+ // l_shipmode,
+ // sum(case
+ // when o_orderpriority = '1-URGENT'
+ // or o_orderpriority = '2-HIGH'
+ // then 1
+ // else 0
+ // end) as high_line_count,
+ // sum(case
+ // when o_orderpriority <> '1-URGENT'
+ // and o_orderpriority <> '2-HIGH'
+ // then 1
+ // else 0
+ // end) as low_line_count
+ // from
+ // orders,
+ // lineitem
+ // where
+ // o_orderkey = l_orderkey
+ // and l_shipmode in ('MAIL', 'SHIP')
+ // and l_commitdate < l_receiptdate
+ // and l_shipdate < l_commitdate
+ // and l_receiptdate >= date '1994-01-01'
+ // and l_receiptdate < date '1994-01-01' + interval '1' year
+ // group by
+ // l_shipmode
+ // order by
+ // l_shipmode;"
+ // ),
12 => ctx.create_logical_plan(
- "SELECT
+ "select
l_shipmode,
sum(case
when o_orderpriority = '1-URGENT'
- OR o_orderpriority = '2-HIGH'
+ or o_orderpriority = '2-HIGH'
then 1
else 0
end) as high_line_count,
sum(case
when o_orderpriority <> '1-URGENT'
- AND o_orderpriority <> '2-HIGH'
+ and o_orderpriority <> '2-HIGH'
then 1
else 0
- end) AS low_line_count
- FROM
- lineitem JOIN orders ON l_orderkey = o_orderkey
- WHERE (l_shipmode = 'MAIL' OR l_shipmode = 'SHIP')
- AND l_commitdate < l_receiptdate
- AND l_shipdate < l_commitdate
- AND l_receiptdate >= '1994-01-01'
- AND l_receiptdate < '1995-01-01'
- GROUP BY
+ end) as low_line_count
+ from
+ lineitem
+ join
+ orders
+ on
+ l_orderkey = o_orderkey
+ where
+ (l_shipmode = 'MAIL' or l_shipmode = 'SHIP')
+ and l_commitdate < l_receiptdate
+ and l_shipdate < l_commitdate
+ and l_receiptdate >= '1994-01-01'
+ and l_receiptdate < '1995-01-01'
+ group by
l_shipmode
- ORDER BY
- l_shipmode",
+ order by
+ l_shipmode;"
+ ),
+
+ 13 => ctx.create_logical_plan(
+ "select
+ c_count,
+ count(*) as custdist
+ from
+ (
+ select
+ c_custkey,
+ count(o_orderkey)
+ from
+ customer left outer join orders on
+ c_custkey = o_custkey
+ and o_comment not like '%special%requests%'
+ group by
+ c_custkey
+ ) as c_orders (c_custkey, c_count)
+ group by
+ c_count
+ order by
+ custdist desc,
+ c_count desc;"
+ ),
+
+ 14 => ctx.create_logical_plan(
+ "select
+ 100.00 * sum(case
+ when p_type like 'PROMO%'
+ then l_extendedprice * (1 - l_discount)
+ else 0
+ end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue
+ from
+ lineitem,
+ part
+ where
+ l_partkey = p_partkey
+ and l_shipdate >= '1995-09-01'
+ and l_shipdate < '1995-10-01';"
+ ),
+
+ 15 => ctx.create_logical_plan(
+ "create view revenue0 (supplier_no, total_revenue) as
+ select
+ l_suppkey,
+ sum(l_extendedprice * (1 - l_discount))
+ from
+ lineitem
+ where
+ l_shipdate >= date '1996-01-01'
+ and l_shipdate < date '1996-01-01' + interval '3' month
+ group by
+ l_suppkey;
+
+ select
+ s_suppkey,
+ s_name,
+ s_address,
+ s_phone,
+ total_revenue
+ from
+ supplier,
+ revenue0
+ where
+ s_suppkey = supplier_no
+ and total_revenue = (
+ select
+ max(total_revenue)
+ from
+ revenue0
+ )
+ order by
+ s_suppkey;
+
+ drop view revenue0;"
+ ),
+
+ 16 => ctx.create_logical_plan(
+ "select
+ p_brand,
+ p_type,
+ p_size,
+ count(distinct ps_suppkey) as supplier_cnt
+ from
+ partsupp,
+ part
+ where
+ p_partkey = ps_partkey
+ and p_brand <> 'Brand#45'
+ and p_type not like 'MEDIUM POLISHED%'
+ and p_size in (49, 14, 23, 45, 19, 3, 36, 9)
+ and ps_suppkey not in (
+ select
+ s_suppkey
+ from
+ supplier
+ where
+ s_comment like '%Customer%Complaints%'
+ )
+ group by
+ p_brand,
+ p_type,
+ p_size
+ order by
+ supplier_cnt desc,
+ p_brand,
+ p_type,
+ p_size;"
+ ),
+
+ 17 => ctx.create_logical_plan(
+ "select
+ sum(l_extendedprice) / 7.0 as avg_yearly
+ from
+ lineitem,
+ part
+ where
+ p_partkey = l_partkey
+ and p_brand = 'Brand#23'
+ and p_container = 'MED BOX'
+ and l_quantity < (
+ select
+ 0.2 * avg(l_quantity)
+ from
+ lineitem
+ where
+ l_partkey = p_partkey
+ );"
+ ),
+
+ 18 => ctx.create_logical_plan(
+ "select
+ c_name,
+ c_custkey,
+ o_orderkey,
+ o_orderdate,
+ o_totalprice,
+ sum(l_quantity)
+ from
+ customer,
+ orders,
+ lineitem
+ where
+ o_orderkey in (
+ select
+ l_orderkey
+ from
+ lineitem
+ group by
+ l_orderkey having
+ sum(l_quantity) > 300
+ )
+ and c_custkey = o_custkey
+ and o_orderkey = l_orderkey
+ group by
+ c_name,
+ c_custkey,
+ o_orderkey,
+ o_orderdate,
+ o_totalprice
+ order by
+ o_totalprice desc,
+ o_orderdate;"
+ ),
+
+ 19 => ctx.create_logical_plan(
+ "select
+ sum(l_extendedprice* (1 - l_discount)) as revenue
+ from
+ lineitem,
+ part
+ where
+ (
+ p_partkey = l_partkey
+ and p_brand = 'Brand#12'
+ and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG')
+ and l_quantity >= 1 and l_quantity <= 1 + 10
+ and p_size between 1 and 5
+ and l_shipmode in ('AIR', 'AIR REG')
+ and l_shipinstruct = 'DELIVER IN PERSON'
+ )
+ or
+ (
+ p_partkey = l_partkey
+ and p_brand = 'Brand#23'
+ and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK')
+ and l_quantity >= 10 and l_quantity <= 10 + 10
+ and p_size between 1 and 10
+ and l_shipmode in ('AIR', 'AIR REG')
+ and l_shipinstruct = 'DELIVER IN PERSON'
+ )
+ or
+ (
+ p_partkey = l_partkey
+ and p_brand = 'Brand#34'
+ and p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG')
+ and l_quantity >= 20 and l_quantity <= 20 + 10
+ and p_size between 1 and 15
+ and l_shipmode in ('AIR', 'AIR REG')
+ and l_shipinstruct = 'DELIVER IN PERSON'
+ );"
+ ),
+
+ 20 => ctx.create_logical_plan(
+ "select
+ s_name,
+ s_address
+ from
+ supplier,
+ nation
+ where
+ s_suppkey in (
+ select
+ ps_suppkey
+ from
+ partsupp
+ where
+ ps_partkey in (
+ select
+ p_partkey
+ from
+ part
+ where
+ p_name like 'forest%'
+ )
+ and ps_availqty > (
+ select
+ 0.5 * sum(l_quantity)
+ from
+ lineitem
+ where
+ l_partkey = ps_partkey
+ and l_suppkey = ps_suppkey
+ and l_shipdate >= date '1994-01-01'
+ and l_shipdate < 'date 1994-01-01' + interval '1' year
+ )
+ )
+ and s_nationkey = n_nationkey
+ and n_name = 'CANADA'
+ order by
+ s_name;"
+ ),
+
+ 21 => ctx.create_logical_plan(
+ "select
+ s_name,
+ count(*) as numwait
+ from
+ supplier,
+ lineitem l1,
+ orders,
+ nation
+ where
+ s_suppkey = l1.l_suppkey
+ and o_orderkey = l1.l_orderkey
+ and o_orderstatus = 'F'
+ and l1.l_receiptdate > l1.l_commitdate
+ and exists (
+ select
+ *
+ from
+ lineitem l2
+ where
+ l2.l_orderkey = l1.l_orderkey
+ and l2.l_suppkey <> l1.l_suppkey
+ )
+ and not exists (
+ select
+ *
+ from
+ lineitem l3
+ where
+ l3.l_orderkey = l1.l_orderkey
+ and l3.l_suppkey <> l1.l_suppkey
+ and l3.l_receiptdate > l3.l_commitdate
+ )
+ and s_nationkey = n_nationkey
+ and n_name = 'SAUDI ARABIA'
+ group by
+ s_name
+ order by
+ numwait desc,
+ s_name;"
),
- _ => unimplemented!("unsupported query"),
+ 22 => ctx.create_logical_plan(
+ "select
+ cntrycode,
+ count(*) as numcust,
+ sum(c_acctbal) as totacctbal
+ from
+ (
+ select
+ substring(c_phone from 1 for 2) as cntrycode,
+ c_acctbal
+ from
+ customer
+ where
+ substring(c_phone from 1 for 2) in
+ ('13', '31', '23', '29', '30', '18', '17')
+ and c_acctbal > (
+ select
+ avg(c_acctbal)
+ from
+ customer
+ where
+ c_acctbal > 0.00
+ and substring(c_phone from 1 for 2) in
+ ('13', '31', '23', '29', '30', '18', '17')
+ )
+ and not exists (
+ select
+ *
+ from
+ orders
+ where
+ o_custkey = c_custkey
+ )
+ ) as custsale
+ group by
+ cntrycode
+ order by
+ cntrycode;"
+ ),
+
+ _ => unimplemented!("invalid query. Expected value between 1 and 22"),
}
}
@@ -286,35 +1026,89 @@ fn get_table(
fn get_schema(table: &str) -> Schema {
match table {
- "lineitem" => Schema::new(vec![
- Field::new("l_orderkey", DataType::UInt32, true),
- Field::new("l_partkey", DataType::UInt32, true),
- Field::new("l_suppkey", DataType::UInt32, true),
- Field::new("l_linenumber", DataType::UInt32, true),
- Field::new("l_quantity", DataType::Float64, true),
- Field::new("l_extendedprice", DataType::Float64, true),
- Field::new("l_discount", DataType::Float64, true),
- Field::new("l_tax", DataType::Float64, true),
- Field::new("l_returnflag", DataType::Utf8, true),
- Field::new("l_linestatus", DataType::Utf8, true),
- Field::new("l_shipdate", DataType::Utf8, true),
- Field::new("l_commitdate", DataType::Utf8, true),
- Field::new("l_receiptdate", DataType::Utf8, true),
- Field::new("l_shipinstruct", DataType::Utf8, true),
- Field::new("l_shipmode", DataType::Utf8, true),
- Field::new("l_comment", DataType::Utf8, true),
+ "part" => Schema::new(vec![
+ Field::new("p_partkey", DataType::UInt32, false),
+ Field::new("p_name", DataType::Utf8, false),
+ Field::new("p_mfgr", DataType::Utf8, false),
+ Field::new("p_brand", DataType::Utf8, false),
+ Field::new("p_type", DataType::Utf8, false),
+ Field::new("p_size", DataType::UInt32, false),
+ Field::new("p_container", DataType::Utf8, false),
+ Field::new("p_retailprice", DataType::Float64, false), // decimal
+ Field::new("p_comment", DataType::Utf8, false),
+ ]),
+
+ "supplier" => Schema::new(vec![
+ Field::new("s_suppkey", DataType::UInt32, false),
+ Field::new("s_name", DataType::Utf8, false),
+ Field::new("s_address", DataType::Utf8, false),
+ Field::new("s_nationkey", DataType::UInt32, false),
+ Field::new("s_phone", DataType::Utf8, false),
+ Field::new("s_acctbal", DataType::Float64, false), // decimal
+ Field::new("s_comment", DataType::Utf8, false),
+ ]),
+
+ "partsupp" => Schema::new(vec![
+ Field::new("ps_partkey", DataType::UInt32, false),
+ Field::new("ps_suppkey", DataType::UInt32, false),
+ Field::new("ps_availqty", DataType::UInt32, false),
+ Field::new("ps_supplycost", DataType::Float64, false), // decimal
+ Field::new("ps_comment", DataType::Utf8, false),
+ ]),
+
+ "customer" => Schema::new(vec![
+ Field::new("c_custkey", DataType::UInt32, false),
+ Field::new("c_name", DataType::Utf8, false),
+ Field::new("c_address", DataType::Utf8, false),
+ Field::new("c_nationkey", DataType::UInt32, false),
+ Field::new("c_phone", DataType::Utf8, false),
+ Field::new("c_acctbal", DataType::Float64, false), // decimal
+ Field::new("c_mktsegment", DataType::Utf8, false),
+ Field::new("c_comment", DataType::Utf8, false),
]),
"orders" => Schema::new(vec![
- Field::new("o_orderkey", DataType::UInt32, true),
- Field::new("custkey", DataType::UInt32, true),
- Field::new("o_orderstatus", DataType::Utf8, true),
- Field::new("o_totalprice", DataType::Float64, true),
- Field::new("o_orderdate", DataType::Utf8, true),
- Field::new("o_orderpriority", DataType::Utf8, true),
- Field::new("o_clerk", DataType::Utf8, true),
- Field::new("o_shippriority", DataType::Utf8, true),
- Field::new("o_comment", DataType::Utf8, true),
+ Field::new("o_orderkey", DataType::UInt32, false),
+ Field::new("o_custkey", DataType::UInt32, false),
+ Field::new("o_orderstatus", DataType::Utf8, false),
+ Field::new("o_totalprice", DataType::Float64, false), // decimal
+ Field::new("o_orderdate", DataType::Utf8, false),
+ Field::new("o_orderpriority", DataType::Utf8, false),
+ Field::new("o_clerk", DataType::Utf8, false),
+ Field::new("o_shippriority", DataType::UInt32, false),
+ Field::new("o_comment", DataType::Utf8, false),
+ ]),
+
+ "lineitem" => Schema::new(vec![
+ Field::new("l_orderkey", DataType::UInt32, false),
+ Field::new("l_partkey", DataType::UInt32, false),
+ Field::new("l_suppkey", DataType::UInt32, false),
+ Field::new("l_linenumber", DataType::UInt32, false),
+ Field::new("l_quantity", DataType::Float64, false), // decimal
+ Field::new("l_extendedprice", DataType::Float64, false), // decimal
+ Field::new("l_discount", DataType::Float64, false), // decimal
+ Field::new("l_tax", DataType::Float64, false), // decimal
+ Field::new("l_returnflag", DataType::Utf8, false),
+ Field::new("l_linestatus", DataType::Utf8, false),
+ Field::new("l_shipdate", DataType::Utf8, false),
+ Field::new("l_commitdate", DataType::Utf8, false),
+ Field::new("l_receiptdate", DataType::Utf8, false),
+ Field::new("l_shipinstruct", DataType::Utf8, false),
+ Field::new("l_shipmode", DataType::Utf8, false),
+ Field::new("l_comment", DataType::Utf8, false),
+ ]),
+
+ "nation" => Schema::new(vec![
+ Field::new("n_nationkey", DataType::UInt32, false),
+ Field::new("n_name", DataType::Utf8, false),
+ Field::new("n_regionkey", DataType::UInt32, false),
+ Field::new("n_comment", DataType::Utf8, false),
+ ]),
+
+ "region" => Schema::new(vec![
+ Field::new("r_regionkey", DataType::UInt32, false),
+ Field::new("r_name", DataType::Utf8, false),
+ Field::new("r_comment", DataType::Utf8, false),
]),
_ => unimplemented!(),