You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ag...@apache.org on 2020/12/06 22:34:49 UTC

[arrow] branch master updated: ARROW-10820: [Rust] [DataFusion] Complete TPC-H Benchmark Queries

This is an automated email from the ASF dual-hosted git repository.

agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new bce15dc  ARROW-10820: [Rust] [DataFusion] Complete TPC-H Benchmark Queries
bce15dc is described below

commit bce15dcc743ea40b2c627a7c3ab9454f8f4b9d76
Author: Mike Seddon <se...@gmail.com>
AuthorDate: Sun Dec 6 15:33:31 2020 -0700

    ARROW-10820: [Rust] [DataFusion] Complete TPC-H Benchmark Queries
    
    Changes:
    - can now execute queries 3, 5 and 6 (in addition to 1, 12).
    - add all tables.
    - add all queries with query validation parameters provided.
    - make basic modifications to queries 5 and 6 to allow supported syntax.
    - queries 1 and 12 retain existing modifications.
    
    this was an easy way to begin understanding some of the code base and get involved.
    
    Closes #8845 from seddonm1/complete-tpch-queries
    
    Authored-by: Mike Seddon <se...@gmail.com>
    Signed-off-by: Andy Grove <an...@gmail.com>
---
 rust/benchmarks/src/bin/tpch.rs | 918 +++++++++++++++++++++++++++++++++++++---
 1 file changed, 856 insertions(+), 62 deletions(-)

diff --git a/rust/benchmarks/src/bin/tpch.rs b/rust/benchmarks/src/bin/tpch.rs
index 82edd98..fd71022 100644
--- a/rust/benchmarks/src/bin/tpch.rs
+++ b/rust/benchmarks/src/bin/tpch.rs
@@ -89,7 +89,9 @@ enum TpchOpt {
     Convert(ConvertOpt),
 }
 
-const TABLES: &[&str] = &["lineitem", "orders"];
+const TABLES: &[&str] = &[
+    "part", "supplier", "partsupp", "customer", "orders", "lineitem", "nation", "region",
+];
 
 #[tokio::main]
 async fn main() -> Result<()> {
@@ -145,59 +147,797 @@ async fn benchmark(opt: BenchmarkOpt) -> Result<()> {
 
 fn create_logical_plan(ctx: &mut ExecutionContext, query: usize) -> Result<LogicalPlan> {
     match query {
+
+        // original
+        // 1 => ctx.create_logical_plan(
+        //     "select
+        //         l_returnflag,
+        //         l_linestatus,
+        //         sum(l_quantity) as sum_qty,
+        //         sum(l_extendedprice) as sum_base_price,
+        //         sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
+        //         sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
+        //         avg(l_quantity) as avg_qty,
+        //         avg(l_extendedprice) as avg_price,
+        //         avg(l_discount) as avg_disc,
+        //         count(*) as count_order
+        //     from
+        //         lineitem
+        //     where
+        //         l_shipdate <= date '1998-12-01' - interval '90' day (3)
+        //     group by
+        //         l_returnflag,
+        //         l_linestatus
+        //     order by
+        //         l_returnflag,
+        //         l_linestatus;"
+        // ),
         1 => ctx.create_logical_plan(
             "select
-                    l_returnflag,
-                    l_linestatus,
-                    sum(l_quantity),
-                    sum(l_extendedprice),
-                    sum(l_extendedprice * (1 - l_discount)),
-                    sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)),
-                    avg(l_quantity),
-                    avg(l_extendedprice),
-                    avg(l_discount),
-                    count(*)
-                from
-                    lineitem
-                where
-                    l_shipdate <= '1998-12-01'
-                group by
-                    l_returnflag,
-                    l_linestatus
-                order by
-                    l_returnflag,
-                    l_linestatus",
+                l_returnflag,
+                l_linestatus,
+                sum(l_quantity) as sum_qty,
+                sum(l_extendedprice) as sum_base_price,
+                sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
+                sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
+                avg(l_quantity) as avg_qty,
+                avg(l_extendedprice) as avg_price,
+                avg(l_discount) as avg_disc,
+                count(*) as count_order
+            from
+                lineitem
+            where
+                l_shipdate <= '1998-09-02'
+            group by
+                l_returnflag,
+                l_linestatus
+            order by
+                l_returnflag,
+                l_linestatus;",
+        ),
+
+        2 => ctx.create_logical_plan(
+            "select
+                s_acctbal,
+                s_name,
+                n_name,
+                p_partkey,
+                p_mfgr,
+                s_address,
+                s_phone,
+                s_comment
+            from
+                part,
+                supplier,
+                partsupp,
+                nation,
+                region
+            where
+                p_partkey = ps_partkey
+                and s_suppkey = ps_suppkey
+                and p_size = 15
+                and p_type like '%BRASS'
+                and s_nationkey = n_nationkey
+                and n_regionkey = r_regionkey
+                and r_name = 'EUROPE'
+                and ps_supplycost = (
+                    select
+                        min(ps_supplycost)
+                    from
+                        partsupp,
+                        supplier,
+                        nation,
+                        region
+                    where
+                        p_partkey = ps_partkey
+                        and s_suppkey = ps_suppkey
+                        and s_nationkey = n_nationkey
+                        and n_regionkey = r_regionkey
+                        and r_name = 'EUROPE'
+                )
+            order by
+                s_acctbal desc,
+                n_name,
+                s_name,
+                p_partkey;"
+        ),
+
+        3 => ctx.create_logical_plan(
+            "select
+                l_orderkey,
+                sum(l_extendedprice * (1 - l_discount)) as revenue,
+                o_orderdate,
+                o_shippriority
+            from
+                customer,
+                orders,
+                lineitem
+            where
+                c_mktsegment = 'BUILDING'
+                and c_custkey = o_custkey
+                and l_orderkey = o_orderkey
+                and o_orderdate < '1995-03-15'
+                and l_shipdate > '1995-03-15'
+            group by
+                l_orderkey,
+                o_orderdate,
+                o_shippriority
+            order by
+                revenue desc,
+                o_orderdate;"
+        ),
+
+        4 => ctx.create_logical_plan(
+            "select
+                o_orderpriority,
+                count(*) as order_count
+            from
+                orders
+            where
+                o_orderdate >= '1993-07-01'
+                and o_orderdate < date '1993-07-01' + interval '3' month
+                and exists (
+                    select
+                        *
+                    from
+                        lineitem
+                    where
+                        l_orderkey = o_orderkey
+                        and l_commitdate < l_receiptdate
+                )
+            group by
+                o_orderpriority
+            order by
+                o_orderpriority;"
+        ),
+
+        // original
+        // 5 => ctx.create_logical_plan(
+        //     "select
+        //         n_name,
+        //         sum(l_extendedprice * (1 - l_discount)) as revenue
+        //     from
+        //         customer,
+        //         orders,
+        //         lineitem,
+        //         supplier,
+        //         nation,
+        //         region
+        //     where
+        //         c_custkey = o_custkey
+        //         and l_orderkey = o_orderkey
+        //         and l_suppkey = s_suppkey
+        //         and c_nationkey = s_nationkey
+        //         and s_nationkey = n_nationkey
+        //         and n_regionkey = r_regionkey
+        //         and r_name = 'ASIA'
+        //         and o_orderdate >= date '1994-01-01'
+        //         and o_orderdate < date '1994-01-01' + interval '1' year
+        //     group by
+        //         n_name
+        //     order by
+        //         revenue desc;"
+        // ),
+        5 => ctx.create_logical_plan(
+            "select
+                n_name,
+                sum(l_extendedprice * (1 - l_discount)) as revenue
+            from
+                customer,
+                orders,
+                lineitem,
+                supplier,
+                nation,
+                region
+            where
+                c_custkey = o_custkey
+                and l_orderkey = o_orderkey
+                and l_suppkey = s_suppkey
+                and c_nationkey = s_nationkey
+                and s_nationkey = n_nationkey
+                and n_regionkey = r_regionkey
+                and r_name = 'ASIA'
+                and o_orderdate >= '1994-01-01'
+                and o_orderdate < '1995-01-01'
+            group by
+                n_name
+            order by
+                revenue desc;"
+        ),
+
+        // original
+        // 6 => ctx.create_logical_plan(
+        //     "select
+        //         sum(l_extendedprice * l_discount) as revenue
+        //     from
+        //         lineitem
+        //     where
+        //         l_shipdate >= date '1994-01-01'
+        //         and l_shipdate < date '1994-01-01' + interval '1' year
+        //         and l_discount between 0.06 - 0.01 and 0.06 + 0.01
+        //         and l_quantity < 24;"
+        // ),
+        6 => ctx.create_logical_plan(
+            "select
+                sum(l_extendedprice * l_discount) as revenue
+            from
+                lineitem
+            where
+                l_shipdate >= '1994-01-01'
+                and l_shipdate < '1995-01-01'
+                and l_discount > 0.06 - 0.01 and l_discount < 0.06 + 0.01
+                and l_quantity < 24;"
+        ),
+
+        7 => ctx.create_logical_plan(
+            "select
+                supp_nation,
+                cust_nation,
+                l_year,
+                sum(volume) as revenue
+            from
+                (
+                    select
+                        n1.n_name as supp_nation,
+                        n2.n_name as cust_nation,
+                        extract(year from l_shipdate) as l_year,
+                        l_extendedprice * (1 - l_discount) as volume
+                    from
+                        supplier,
+                        lineitem,
+                        orders,
+                        customer,
+                        nation n1,
+                        nation n2
+                    where
+                        s_suppkey = l_suppkey
+                        and o_orderkey = l_orderkey
+                        and c_custkey = o_custkey
+                        and s_nationkey = n1.n_nationkey
+                        and c_nationkey = n2.n_nationkey
+                        and (
+                            (n1.n_name = 'FRANCE' and n2.n_name = 'GERMANY')
+                            or (n1.n_name = 'GERMANY' and n2.n_name = 'FRANCE')
+                        )
+                        and l_shipdate > '1995-01-01' and l_shipdate < '1996-12-31'
+                ) as shipping
+            group by
+                supp_nation,
+                cust_nation,
+                l_year
+            order by
+                supp_nation,
+                cust_nation,
+                l_year;"
+        ),
+
+        8 => ctx.create_logical_plan(
+            "select
+                o_year,
+                sum(case
+                    when nation = 'BRAZIL' then volume
+                    else 0
+                end) / sum(volume) as mkt_share
+            from
+                (
+                    select
+                        extract(year from o_orderdate) as o_year,
+                        l_extendedprice * (1 - l_discount) as volume,
+                        n2.n_name as nation
+                    from
+                        part,
+                        supplier,
+                        lineitem,
+                        orders,
+                        customer,
+                        nation n1,
+                        nation n2,
+                        region
+                    where
+                        p_partkey = l_partkey
+                        and s_suppkey = l_suppkey
+                        and l_orderkey = o_orderkey
+                        and o_custkey = c_custkey
+                        and c_nationkey = n1.n_nationkey
+                        and n1.n_regionkey = r_regionkey
+                        and r_name = 'AMERICA'
+                        and s_nationkey = n2.n_nationkey
+                        and o_orderdate between '1995-01-01' and '1996-12-31'
+                        and p_type = 'ECONOMY ANODIZED STEEL'
+                ) as all_nations
+            group by
+                o_year
+            order by
+                o_year;"
+        ),
+
+        9 => ctx.create_logical_plan(
+            "select
+                nation,
+                o_year,
+                sum(amount) as sum_profit
+            from
+                (
+                    select
+                        n_name as nation,
+                        extract(year from o_orderdate) as o_year,
+                        l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount
+                    from
+                        part,
+                        supplier,
+                        lineitem,
+                        partsupp,
+                        orders,
+                        nation
+                    where
+                        s_suppkey = l_suppkey
+                        and ps_suppkey = l_suppkey
+                        and ps_partkey = l_partkey
+                        and p_partkey = l_partkey
+                        and o_orderkey = l_orderkey
+                        and s_nationkey = n_nationkey
+                        and p_name like '%green%'
+                ) as profit
+            group by
+                nation,
+                o_year
+            order by
+                nation,
+                o_year desc;"
+        ),
+
+        10 => ctx.create_logical_plan(
+            "select
+                c_custkey,
+                c_name,
+                sum(l_extendedprice * (1 - l_discount)) as revenue,
+                c_acctbal,
+                n_name,
+                c_address,
+                c_phone,
+                c_comment
+            from
+                customer,
+                orders,
+                lineitem,
+                nation
+            where
+                c_custkey = o_custkey
+                and l_orderkey = o_orderkey
+                and o_orderdate >= '1993-10-01'
+                and o_orderdate < '1994-01-01'
+                and l_returnflag = 'R'
+                and c_nationkey = n_nationkey
+            group by
+                c_custkey,
+                c_name,
+                c_acctbal,
+                c_phone,
+                n_name,
+                c_address,
+                c_comment
+            order by
+                revenue desc;"
+        ),
+
+        11 => ctx.create_logical_plan(
+            "select
+                ps_partkey,
+                sum(ps_supplycost * ps_availqty) as value
+            from
+                partsupp,
+                supplier,
+                nation
+            where
+                ps_suppkey = s_suppkey
+                and s_nationkey = n_nationkey
+                and n_name = 'GERMANY'
+            group by
+                ps_partkey having
+                    sum(ps_supplycost * ps_availqty) > (
+                        select
+                            sum(ps_supplycost * ps_availqty) * 0.0001
+                        from
+                            partsupp,
+                            supplier,
+                            nation
+                        where
+                            ps_suppkey = s_suppkey
+                            and s_nationkey = n_nationkey
+                            and n_name = 'GERMANY'
+                    )
+            order by
+                value desc;"
         ),
 
+        // original
+        // 12 => ctx.create_logical_plan(
+        //     "select
+        //         l_shipmode,
+        //         sum(case
+        //             when o_orderpriority = '1-URGENT'
+        //                 or o_orderpriority = '2-HIGH'
+        //                 then 1
+        //             else 0
+        //         end) as high_line_count,
+        //         sum(case
+        //             when o_orderpriority <> '1-URGENT'
+        //                 and o_orderpriority <> '2-HIGH'
+        //                 then 1
+        //             else 0
+        //         end) as low_line_count
+        //     from
+        //         orders,
+        //         lineitem
+        //     where
+        //         o_orderkey = l_orderkey
+        //         and l_shipmode in ('MAIL', 'SHIP')
+        //         and l_commitdate < l_receiptdate
+        //         and l_shipdate < l_commitdate
+        //         and l_receiptdate >= date '1994-01-01'
+        //         and l_receiptdate < date '1994-01-01' + interval '1' year
+        //     group by
+        //         l_shipmode
+        //     order by
+        //         l_shipmode;"
+        // ),
         12 => ctx.create_logical_plan(
-            "SELECT
+            "select
                 l_shipmode,
                 sum(case
                     when o_orderpriority = '1-URGENT'
-                        OR o_orderpriority = '2-HIGH'
+                        or o_orderpriority = '2-HIGH'
                         then 1
                     else 0
                 end) as high_line_count,
                 sum(case
                     when o_orderpriority <> '1-URGENT'
-                        AND o_orderpriority <> '2-HIGH'
+                        and o_orderpriority <> '2-HIGH'
                         then 1
                     else 0
-                end) AS low_line_count
-            FROM
-                lineitem JOIN orders ON l_orderkey = o_orderkey
-            WHERE (l_shipmode = 'MAIL' OR l_shipmode = 'SHIP')
-                AND l_commitdate < l_receiptdate
-                AND l_shipdate < l_commitdate
-                AND l_receiptdate >= '1994-01-01'
-                AND l_receiptdate < '1995-01-01'
-            GROUP BY
+                end) as low_line_count
+            from
+                lineitem
+            join
+                orders
+            on
+                l_orderkey = o_orderkey
+            where
+                (l_shipmode = 'MAIL' or l_shipmode = 'SHIP')
+                and l_commitdate < l_receiptdate
+                and l_shipdate < l_commitdate
+                and l_receiptdate >= '1994-01-01'
+                and l_receiptdate < '1995-01-01'
+            group by
                 l_shipmode
-            ORDER BY
-                l_shipmode",
+            order by
+                l_shipmode;"
+        ),
+
+        13 => ctx.create_logical_plan(
+            "select
+                c_count,
+                count(*) as custdist
+            from
+                (
+                    select
+                        c_custkey,
+                        count(o_orderkey)
+                    from
+                        customer left outer join orders on
+                            c_custkey = o_custkey
+                            and o_comment not like '%special%requests%'
+                    group by
+                        c_custkey
+                ) as c_orders (c_custkey, c_count)
+            group by
+                c_count
+            order by
+                custdist desc,
+                c_count desc;"
+        ),
+
+        14 => ctx.create_logical_plan(
+            "select
+                100.00 * sum(case
+                    when p_type like 'PROMO%'
+                        then l_extendedprice * (1 - l_discount)
+                    else 0
+                end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue
+            from
+                lineitem,
+                part
+            where
+                l_partkey = p_partkey
+                and l_shipdate >= '1995-09-01'
+                and l_shipdate < '1995-10-01';"
+        ),
+
+        15 => ctx.create_logical_plan(
+            "create view revenue0 (supplier_no, total_revenue) as
+                select
+                    l_suppkey,
+                    sum(l_extendedprice * (1 - l_discount))
+                from
+                    lineitem
+                where
+                    l_shipdate >= date '1996-01-01'
+                    and l_shipdate < date '1996-01-01' + interval '3' month
+                group by
+                    l_suppkey;
+
+            select
+                s_suppkey,
+                s_name,
+                s_address,
+                s_phone,
+                total_revenue
+            from
+                supplier,
+                revenue0
+            where
+                s_suppkey = supplier_no
+                and total_revenue = (
+                    select
+                        max(total_revenue)
+                    from
+                        revenue0
+                )
+            order by
+                s_suppkey;
+
+            drop view revenue0;"
+        ),
+
+        16 => ctx.create_logical_plan(
+            "select
+                p_brand,
+                p_type,
+                p_size,
+                count(distinct ps_suppkey) as supplier_cnt
+            from
+                partsupp,
+                part
+            where
+                p_partkey = ps_partkey
+                and p_brand <> 'Brand#45'
+                and p_type not like 'MEDIUM POLISHED%'
+                and p_size in (49, 14, 23, 45, 19, 3, 36, 9)
+                and ps_suppkey not in (
+                    select
+                        s_suppkey
+                    from
+                        supplier
+                    where
+                        s_comment like '%Customer%Complaints%'
+                )
+            group by
+                p_brand,
+                p_type,
+                p_size
+            order by
+                supplier_cnt desc,
+                p_brand,
+                p_type,
+                p_size;"
+        ),
+
+        17 => ctx.create_logical_plan(
+            "select
+                sum(l_extendedprice) / 7.0 as avg_yearly
+            from
+                lineitem,
+                part
+            where
+                p_partkey = l_partkey
+                and p_brand = 'Brand#23'
+                and p_container = 'MED BOX'
+                and l_quantity < (
+                    select
+                        0.2 * avg(l_quantity)
+                    from
+                        lineitem
+                    where
+                        l_partkey = p_partkey
+                );"
+        ),
+
+        18 => ctx.create_logical_plan(
+            "select
+                c_name,
+                c_custkey,
+                o_orderkey,
+                o_orderdate,
+                o_totalprice,
+                sum(l_quantity)
+            from
+                customer,
+                orders,
+                lineitem
+            where
+                o_orderkey in (
+                    select
+                        l_orderkey
+                    from
+                        lineitem
+                    group by
+                        l_orderkey having
+                            sum(l_quantity) > 300
+                )
+                and c_custkey = o_custkey
+                and o_orderkey = l_orderkey
+            group by
+                c_name,
+                c_custkey,
+                o_orderkey,
+                o_orderdate,
+                o_totalprice
+            order by
+                o_totalprice desc,
+                o_orderdate;"
+        ),
+
+        19 => ctx.create_logical_plan(
+            "select
+                sum(l_extendedprice* (1 - l_discount)) as revenue
+            from
+                lineitem,
+                part
+            where
+                (
+                    p_partkey = l_partkey
+                    and p_brand = 'Brand#12'
+                    and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG')
+                    and l_quantity >= 1 and l_quantity <= 1 + 10
+                    and p_size between 1 and 5
+                    and l_shipmode in ('AIR', 'AIR REG')
+                    and l_shipinstruct = 'DELIVER IN PERSON'
+                )
+                or
+                (
+                    p_partkey = l_partkey
+                    and p_brand = 'Brand#23'
+                    and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK')
+                    and l_quantity >= 10 and l_quantity <= 10 + 10
+                    and p_size between 1 and 10
+                    and l_shipmode in ('AIR', 'AIR REG')
+                    and l_shipinstruct = 'DELIVER IN PERSON'
+                )
+                or
+                (
+                    p_partkey = l_partkey
+                    and p_brand = 'Brand#34'
+                    and p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG')
+                    and l_quantity >= 20 and l_quantity <= 20 + 10
+                    and p_size between 1 and 15
+                    and l_shipmode in ('AIR', 'AIR REG')
+                    and l_shipinstruct = 'DELIVER IN PERSON'
+                );"
+        ),
+
+        20 => ctx.create_logical_plan(
+            "select
+                s_name,
+                s_address
+            from
+                supplier,
+                nation
+            where
+                s_suppkey in (
+                    select
+                        ps_suppkey
+                    from
+                        partsupp
+                    where
+                        ps_partkey in (
+                            select
+                                p_partkey
+                            from
+                                part
+                            where
+                                p_name like 'forest%'
+                        )
+                        and ps_availqty > (
+                            select
+                                0.5 * sum(l_quantity)
+                            from
+                                lineitem
+                            where
+                                l_partkey = ps_partkey
+                                and l_suppkey = ps_suppkey
+                                and l_shipdate >= date '1994-01-01'
+                                and l_shipdate < 'date 1994-01-01' + interval '1' year
+                        )
+                )
+                and s_nationkey = n_nationkey
+                and n_name = 'CANADA'
+            order by
+                s_name;"
+        ),
+
+        21 => ctx.create_logical_plan(
+            "select
+                s_name,
+                count(*) as numwait
+            from
+                supplier,
+                lineitem l1,
+                orders,
+                nation
+            where
+                s_suppkey = l1.l_suppkey
+                and o_orderkey = l1.l_orderkey
+                and o_orderstatus = 'F'
+                and l1.l_receiptdate > l1.l_commitdate
+                and exists (
+                    select
+                        *
+                    from
+                        lineitem l2
+                    where
+                        l2.l_orderkey = l1.l_orderkey
+                        and l2.l_suppkey <> l1.l_suppkey
+                )
+                and not exists (
+                    select
+                        *
+                    from
+                        lineitem l3
+                    where
+                        l3.l_orderkey = l1.l_orderkey
+                        and l3.l_suppkey <> l1.l_suppkey
+                        and l3.l_receiptdate > l3.l_commitdate
+                )
+                and s_nationkey = n_nationkey
+                and n_name = 'SAUDI ARABIA'
+            group by
+                s_name
+            order by
+                numwait desc,
+                s_name;"
         ),
 
-        _ => unimplemented!("unsupported query"),
+        22 => ctx.create_logical_plan(
+            "select
+                cntrycode,
+                count(*) as numcust,
+                sum(c_acctbal) as totacctbal
+            from
+                (
+                    select
+                        substring(c_phone from 1 for 2) as cntrycode,
+                        c_acctbal
+                    from
+                        customer
+                    where
+                        substring(c_phone from 1 for 2) in
+                            ('13', '31', '23', '29', '30', '18', '17')
+                        and c_acctbal > (
+                            select
+                                avg(c_acctbal)
+                            from
+                                customer
+                            where
+                                c_acctbal > 0.00
+                                and substring(c_phone from 1 for 2) in
+                                    ('13', '31', '23', '29', '30', '18', '17')
+                        )
+                        and not exists (
+                            select
+                                *
+                            from
+                                orders
+                            where
+                                o_custkey = c_custkey
+                        )
+                ) as custsale
+            group by
+                cntrycode
+            order by
+                cntrycode;"
+        ),
+
+        _ => unimplemented!("invalid query. Expected value between 1 and 22"),
     }
 }
 
@@ -286,35 +1026,89 @@ fn get_table(
 
 fn get_schema(table: &str) -> Schema {
     match table {
-        "lineitem" => Schema::new(vec![
-            Field::new("l_orderkey", DataType::UInt32, true),
-            Field::new("l_partkey", DataType::UInt32, true),
-            Field::new("l_suppkey", DataType::UInt32, true),
-            Field::new("l_linenumber", DataType::UInt32, true),
-            Field::new("l_quantity", DataType::Float64, true),
-            Field::new("l_extendedprice", DataType::Float64, true),
-            Field::new("l_discount", DataType::Float64, true),
-            Field::new("l_tax", DataType::Float64, true),
-            Field::new("l_returnflag", DataType::Utf8, true),
-            Field::new("l_linestatus", DataType::Utf8, true),
-            Field::new("l_shipdate", DataType::Utf8, true),
-            Field::new("l_commitdate", DataType::Utf8, true),
-            Field::new("l_receiptdate", DataType::Utf8, true),
-            Field::new("l_shipinstruct", DataType::Utf8, true),
-            Field::new("l_shipmode", DataType::Utf8, true),
-            Field::new("l_comment", DataType::Utf8, true),
+        "part" => Schema::new(vec![
+            Field::new("p_partkey", DataType::UInt32, false),
+            Field::new("p_name", DataType::Utf8, false),
+            Field::new("p_mfgr", DataType::Utf8, false),
+            Field::new("p_brand", DataType::Utf8, false),
+            Field::new("p_type", DataType::Utf8, false),
+            Field::new("p_size", DataType::UInt32, false),
+            Field::new("p_container", DataType::Utf8, false),
+            Field::new("p_retailprice", DataType::Float64, false), // decimal
+            Field::new("p_comment", DataType::Utf8, false),
+        ]),
+
+        "supplier" => Schema::new(vec![
+            Field::new("s_suppkey", DataType::UInt32, false),
+            Field::new("s_name", DataType::Utf8, false),
+            Field::new("s_address", DataType::Utf8, false),
+            Field::new("s_nationkey", DataType::UInt32, false),
+            Field::new("s_phone", DataType::Utf8, false),
+            Field::new("s_acctbal", DataType::Float64, false), // decimal
+            Field::new("s_comment", DataType::Utf8, false),
+        ]),
+
+        "partsupp" => Schema::new(vec![
+            Field::new("ps_partkey", DataType::UInt32, false),
+            Field::new("ps_suppkey", DataType::UInt32, false),
+            Field::new("ps_availqty", DataType::UInt32, false),
+            Field::new("ps_supplycost", DataType::Float64, false), // decimal
+            Field::new("ps_comment", DataType::Utf8, false),
+        ]),
+
+        "customer" => Schema::new(vec![
+            Field::new("c_custkey", DataType::UInt32, false),
+            Field::new("c_name", DataType::Utf8, false),
+            Field::new("c_address", DataType::Utf8, false),
+            Field::new("c_nationkey", DataType::UInt32, false),
+            Field::new("c_phone", DataType::Utf8, false),
+            Field::new("c_acctbal", DataType::Float64, false), // decimal
+            Field::new("c_mktsegment", DataType::Utf8, false),
+            Field::new("c_comment", DataType::Utf8, false),
         ]),
 
         "orders" => Schema::new(vec![
-            Field::new("o_orderkey", DataType::UInt32, true),
-            Field::new("custkey", DataType::UInt32, true),
-            Field::new("o_orderstatus", DataType::Utf8, true),
-            Field::new("o_totalprice", DataType::Float64, true),
-            Field::new("o_orderdate", DataType::Utf8, true),
-            Field::new("o_orderpriority", DataType::Utf8, true),
-            Field::new("o_clerk", DataType::Utf8, true),
-            Field::new("o_shippriority", DataType::Utf8, true),
-            Field::new("o_comment", DataType::Utf8, true),
+            Field::new("o_orderkey", DataType::UInt32, false),
+            Field::new("o_custkey", DataType::UInt32, false),
+            Field::new("o_orderstatus", DataType::Utf8, false),
+            Field::new("o_totalprice", DataType::Float64, false), // decimal
+            Field::new("o_orderdate", DataType::Utf8, false),
+            Field::new("o_orderpriority", DataType::Utf8, false),
+            Field::new("o_clerk", DataType::Utf8, false),
+            Field::new("o_shippriority", DataType::UInt32, false),
+            Field::new("o_comment", DataType::Utf8, false),
+        ]),
+
+        "lineitem" => Schema::new(vec![
+            Field::new("l_orderkey", DataType::UInt32, false),
+            Field::new("l_partkey", DataType::UInt32, false),
+            Field::new("l_suppkey", DataType::UInt32, false),
+            Field::new("l_linenumber", DataType::UInt32, false),
+            Field::new("l_quantity", DataType::Float64, false), // decimal
+            Field::new("l_extendedprice", DataType::Float64, false), // decimal
+            Field::new("l_discount", DataType::Float64, false), // decimal
+            Field::new("l_tax", DataType::Float64, false),      // decimal
+            Field::new("l_returnflag", DataType::Utf8, false),
+            Field::new("l_linestatus", DataType::Utf8, false),
+            Field::new("l_shipdate", DataType::Utf8, false),
+            Field::new("l_commitdate", DataType::Utf8, false),
+            Field::new("l_receiptdate", DataType::Utf8, false),
+            Field::new("l_shipinstruct", DataType::Utf8, false),
+            Field::new("l_shipmode", DataType::Utf8, false),
+            Field::new("l_comment", DataType::Utf8, false),
+        ]),
+
+        "nation" => Schema::new(vec![
+            Field::new("n_nationkey", DataType::UInt32, false),
+            Field::new("n_name", DataType::Utf8, false),
+            Field::new("n_regionkey", DataType::UInt32, false),
+            Field::new("n_comment", DataType::Utf8, false),
+        ]),
+
+        "region" => Schema::new(vec![
+            Field::new("r_regionkey", DataType::UInt32, false),
+            Field::new("r_name", DataType::Utf8, false),
+            Field::new("r_comment", DataType::Utf8, false),
         ]),
 
         _ => unimplemented!(),