You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2023/01/08 22:39:56 UTC

[arrow-datafusion] branch master updated: add tpch sqllogicaltest and remove some duplicated test (#4802)

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
     new e393b2827 add tpch sqllogicaltest and remove some duplicated test (#4802)
e393b2827 is described below

commit e393b28271b0e05274ea525de33a96b12a8a2ab5
Author: jakevin <ja...@gmail.com>
AuthorDate: Mon Jan 9 06:39:51 2023 +0800

    add tpch sqllogicaltest and remove some duplicated test (#4802)
    
    * add tpch sqllogicaltest
    
    * remove
---
 datafusion/core/tests/sql/subqueries.rs            | 334 --------
 .../core/tests/sqllogictests/test_files/tpch.slt   | 867 +++++++++++++++++++++
 2 files changed, 867 insertions(+), 334 deletions(-)

diff --git a/datafusion/core/tests/sql/subqueries.rs b/datafusion/core/tests/sql/subqueries.rs
index 3fff5ba3e..2627a2db0 100644
--- a/datafusion/core/tests/sql/subqueries.rs
+++ b/datafusion/core/tests/sql/subqueries.rs
@@ -115,337 +115,3 @@ where o_orderstatus in (
 
     Ok(())
 }
-
-#[tokio::test]
-async fn tpch_q2_correlated() -> Result<()> {
-    let ctx = SessionContext::new();
-    register_tpch_csv(&ctx, "part").await?;
-    register_tpch_csv(&ctx, "supplier").await?;
-    register_tpch_csv(&ctx, "partsupp").await?;
-    register_tpch_csv(&ctx, "nation").await?;
-    register_tpch_csv(&ctx, "region").await?;
-
-    let sql = r#"select s_acctbal, s_name, n_name, p_partkey, p_mfgr, s_address, s_phone, s_comment
-from part, supplier, partsupp, nation, region
-where p_partkey = ps_partkey and s_suppkey = ps_suppkey and p_size = 15 and p_type like '%BRASS'
-    and s_nationkey = n_nationkey and n_regionkey = r_regionkey and r_name = 'EUROPE'
-    and ps_supplycost = (
-        select min(ps_supplycost) from partsupp, supplier, nation, region
-        where p_partkey = ps_partkey and s_suppkey = ps_suppkey and s_nationkey = n_nationkey
-        and n_regionkey = r_regionkey and r_name = 'EUROPE'
-    )
-order by s_acctbal desc, n_name, s_name, p_partkey;"#;
-
-    // assert plan
-    let dataframe = ctx.sql(sql).await.unwrap();
-    let plan = dataframe.into_optimized_plan().unwrap();
-    let actual = format!("{}", plan.display_indent());
-    let expected = r#"Sort: supplier.s_acctbal DESC NULLS FIRST, nation.n_name ASC NULLS LAST, supplier.s_name ASC NULLS LAST, part.p_partkey ASC NULLS LAST
-  Projection: supplier.s_acctbal, supplier.s_name, nation.n_name, part.p_partkey, part.p_mfgr, supplier.s_address, supplier.s_phone, supplier.s_comment
-    Projection: part.p_partkey, part.p_mfgr, supplier.s_name, supplier.s_address, supplier.s_phone, supplier.s_acctbal, supplier.s_comment, nation.n_name
-      Inner Join: part.p_partkey = __scalar_sq_1.ps_partkey, partsupp.ps_supplycost = __scalar_sq_1.__value
-        Inner Join: nation.n_regionkey = region.r_regionkey
-          Inner Join: supplier.s_nationkey = nation.n_nationkey
-            Inner Join: partsupp.ps_suppkey = supplier.s_suppkey
-              Inner Join: part.p_partkey = partsupp.ps_partkey
-                Filter: part.p_size = Int32(15) AND part.p_type LIKE Utf8("%BRASS")
-                  TableScan: part projection=[p_partkey, p_mfgr, p_type, p_size], partial_filters=[part.p_size = Int32(15), part.p_type LIKE Utf8("%BRASS")]
-                TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_supplycost]
-              TableScan: supplier projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment]
-            TableScan: nation projection=[n_nationkey, n_name, n_regionkey]
-          Filter: region.r_name = Utf8("EUROPE")
-            TableScan: region projection=[r_regionkey, r_name], partial_filters=[region.r_name = Utf8("EUROPE")]
-        SubqueryAlias: __scalar_sq_1
-          Projection: partsupp.ps_partkey, MIN(partsupp.ps_supplycost) AS __value
-            Aggregate: groupBy=[[partsupp.ps_partkey]], aggr=[[MIN(partsupp.ps_supplycost)]]
-              Inner Join: nation.n_regionkey = region.r_regionkey
-                Inner Join: supplier.s_nationkey = nation.n_nationkey
-                  Inner Join: partsupp.ps_suppkey = supplier.s_suppkey
-                    TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_supplycost]
-                    TableScan: supplier projection=[s_suppkey, s_nationkey]
-                  TableScan: nation projection=[n_nationkey, n_regionkey]
-                Filter: region.r_name = Utf8("EUROPE")
-                  TableScan: region projection=[r_regionkey, r_name], partial_filters=[region.r_name = Utf8("EUROPE")]"#;
-    assert_eq!(actual, expected);
-
-    // assert data
-    let results = execute_to_batches(&ctx, sql).await;
-    let expected = vec!["++", "++"];
-    assert_batches_eq!(expected, &results);
-
-    Ok(())
-}
-
-#[tokio::test]
-async fn tpch_q4_correlated() -> Result<()> {
-    let orders = r#"4,13678,O,53829.87,1995-10-11,5-LOW,Clerk#000000124,0,
-35,12760,O,192885.43,1995-10-23,4-NOT SPECIFIED,Clerk#000000259,0,
-65,1627,P,99763.79,1995-03-18,1-URGENT,Clerk#000000632,0,
-"#;
-    let lineitems = r#"4,8804,579,1,30,51384,0.03,0.08,N,O,1996-01-10,1995-12-14,1996-01-18,DELIVER IN PERSON,REG AIR,
-35,45,296,1,24,22680.96,0.02,0,N,O,1996-02-21,1996-01-03,1996-03-18,TAKE BACK RETURN,FOB,
-65,5970,481,1,26,48775.22,0.03,0.03,A,F,1995-04-20,1995-04-25,1995-05-13,NONE,TRUCK,
-"#;
-
-    let ctx = SessionContext::new();
-    register_tpch_csv_data(&ctx, "orders", orders).await?;
-    register_tpch_csv_data(&ctx, "lineitem", lineitems).await?;
-
-    let sql = r#"
-        select o_orderpriority, count(*) as order_count
-        from orders
-        where exists (
-            select * from lineitem where l_orderkey = o_orderkey and l_commitdate < l_receiptdate)
-        group by o_orderpriority
-        order by o_orderpriority;
-        "#;
-
-    // assert plan
-    let dataframe = ctx.sql(sql).await.unwrap();
-    let plan = dataframe.into_optimized_plan().unwrap();
-    let actual = format!("{}", plan.display_indent());
-    let expected = r#"Sort: orders.o_orderpriority ASC NULLS LAST
-  Projection: orders.o_orderpriority, COUNT(UInt8(1)) AS order_count
-    Aggregate: groupBy=[[orders.o_orderpriority]], aggr=[[COUNT(UInt8(1))]]
-      LeftSemi Join: orders.o_orderkey = lineitem.l_orderkey
-        TableScan: orders projection=[o_orderkey, o_orderpriority]
-        Filter: lineitem.l_commitdate < lineitem.l_receiptdate
-          TableScan: lineitem projection=[l_orderkey, l_commitdate, l_receiptdate]"#
-        .to_string();
-    assert_eq!(actual, expected);
-
-    // assert data
-    let results = execute_to_batches(&ctx, sql).await;
-    let expected = vec![
-        "+-----------------+-------------+",
-        "| o_orderpriority | order_count |",
-        "+-----------------+-------------+",
-        "| 1-URGENT        | 1           |",
-        "| 4-NOT SPECIFIED | 1           |",
-        "| 5-LOW           | 1           |",
-        "+-----------------+-------------+",
-    ];
-    assert_batches_eq!(expected, &results);
-
-    Ok(())
-}
-
-#[tokio::test]
-async fn tpch_q17_correlated() -> Result<()> {
-    let parts = r#"63700,goldenrod lavender spring chocolate lace,Manufacturer#1,Brand#23,PROMO BURNISHED COPPER,7,MED BOX,901.00,ly. slyly ironi
-"#;
-    let lineitems = r#"1,63700,7311,2,36.0,45983.16,0.09,0.06,N,O,1996-04-12,1996-02-28,1996-04-20,TAKE BACK RETURN,MAIL,ly final dependencies: slyly bold
-1,63700,3701,3,1.0,13309.6,0.1,0.02,N,O,1996-01-29,1996-03-05,1996-01-31,TAKE BACK RETURN,REG AIR,"riously. regular, express dep"
-"#;
-
-    let ctx = SessionContext::new();
-    register_tpch_csv_data(&ctx, "part", parts).await?;
-    register_tpch_csv_data(&ctx, "lineitem", lineitems).await?;
-
-    let sql = r#"select sum(l_extendedprice) / 7.0 as avg_yearly
-        from lineitem, part
-        where p_partkey = l_partkey and p_brand = 'Brand#23' and p_container = 'MED BOX'
-        and l_quantity < (
-            select 0.2 * avg(l_quantity)
-            from lineitem where l_partkey = p_partkey
-        );"#;
-
-    // assert plan
-    let dataframe = ctx.sql(sql).await.unwrap();
-    let plan = dataframe.into_optimized_plan().unwrap();
-    let actual = format!("{}", plan.display_indent());
-    let expected = r#"Projection: CAST(SUM(lineitem.l_extendedprice) AS Float64) / Float64(7) AS avg_yearly
-  Aggregate: groupBy=[[]], aggr=[[SUM(lineitem.l_extendedprice)]]
-    Filter: CAST(lineitem.l_quantity AS Decimal128(30, 15)) < CAST(__scalar_sq_1.__value AS Decimal128(30, 15))
-      Inner Join: part.p_partkey = __scalar_sq_1.l_partkey, lineitem.l_partkey = __scalar_sq_1.l_partkey
-        Inner Join: lineitem.l_partkey = part.p_partkey
-          TableScan: lineitem projection=[l_partkey, l_quantity, l_extendedprice]
-          Filter: part.p_brand = Utf8("Brand#23") AND part.p_container = Utf8("MED BOX")
-            TableScan: part projection=[p_partkey, p_brand, p_container]
-        SubqueryAlias: __scalar_sq_1
-          Projection: lineitem.l_partkey, Float64(0.2) * CAST(AVG(lineitem.l_quantity) AS Float64) AS __value
-            Aggregate: groupBy=[[lineitem.l_partkey]], aggr=[[AVG(lineitem.l_quantity)]]
-              TableScan: lineitem projection=[l_partkey, l_quantity]"#
-        .to_string();
-    assert_eq!(actual, expected);
-
-    // assert data
-    let results = execute_to_batches(&ctx, sql).await;
-    let expected = vec![
-        "+--------------------+",
-        "| avg_yearly         |",
-        "+--------------------+",
-        "| 190.13714285714286 |",
-        "+--------------------+",
-    ];
-    assert_batches_eq!(expected, &results);
-
-    Ok(())
-}
-
-#[tokio::test]
-async fn tpch_q20_correlated() -> Result<()> {
-    let ctx = SessionContext::new();
-    register_tpch_csv(&ctx, "supplier").await?;
-    register_tpch_csv(&ctx, "nation").await?;
-    register_tpch_csv(&ctx, "partsupp").await?;
-    register_tpch_csv(&ctx, "part").await?;
-    register_tpch_csv(&ctx, "lineitem").await?;
-
-    let sql = r#"select s_name, s_address
-from supplier, nation
-where s_suppkey in (
-    select ps_suppkey from partsupp
-    where ps_partkey in ( select p_partkey from part where p_name like 'forest%' )
-      and ps_availqty > ( select 0.5 * sum(l_quantity) from lineitem
-        where l_partkey = ps_partkey and l_suppkey = ps_suppkey and l_shipdate >= date '1994-01-01'
-    )
-)
-and s_nationkey = n_nationkey and n_name = 'CANADA'
-order by s_name;
-"#;
-
-    // assert plan
-    let dataframe = ctx.sql(sql).await.unwrap();
-    let plan = dataframe.into_optimized_plan().unwrap();
-    let actual = format!("{}", plan.display_indent());
-    let expected = r#"Sort: supplier.s_name ASC NULLS LAST
-  Projection: supplier.s_name, supplier.s_address
-    LeftSemi Join: supplier.s_suppkey = __correlated_sq_1.ps_suppkey
-      Inner Join: supplier.s_nationkey = nation.n_nationkey
-        TableScan: supplier projection=[s_suppkey, s_name, s_address, s_nationkey]
-        Filter: nation.n_name = Utf8("CANADA")
-          TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = Utf8("CANADA")]
-      SubqueryAlias: __correlated_sq_1
-        Projection: partsupp.ps_suppkey AS ps_suppkey
-          Filter: CAST(partsupp.ps_availqty AS Float64) > __scalar_sq_1.__value
-            Inner Join: partsupp.ps_partkey = __scalar_sq_1.l_partkey, partsupp.ps_suppkey = __scalar_sq_1.l_suppkey
-              LeftSemi Join: partsupp.ps_partkey = __correlated_sq_2.p_partkey
-                TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_availqty]
-                SubqueryAlias: __correlated_sq_2
-                  Projection: part.p_partkey AS p_partkey
-                    Filter: part.p_name LIKE Utf8("forest%")
-                      TableScan: part projection=[p_partkey, p_name], partial_filters=[part.p_name LIKE Utf8("forest%")]
-              SubqueryAlias: __scalar_sq_1
-                Projection: lineitem.l_partkey, lineitem.l_suppkey, Float64(0.5) * CAST(SUM(lineitem.l_quantity) AS Float64) AS __value
-                  Aggregate: groupBy=[[lineitem.l_partkey, lineitem.l_suppkey]], aggr=[[SUM(lineitem.l_quantity)]]
-                    Filter: lineitem.l_shipdate >= Date32("8766")
-                      TableScan: lineitem projection=[l_partkey, l_suppkey, l_quantity, l_shipdate], partial_filters=[lineitem.l_shipdate >= Date32("8766")]"#;
-    assert_eq!(actual, expected);
-
-    // assert data
-    let results = execute_to_batches(&ctx, sql).await;
-    let expected = vec!["++", "++"];
-    assert_batches_eq!(expected, &results);
-
-    Ok(())
-}
-
-#[tokio::test]
-async fn tpch_q22_correlated() -> Result<()> {
-    let ctx = SessionContext::new();
-    register_tpch_csv(&ctx, "customer").await?;
-    register_tpch_csv(&ctx, "orders").await?;
-
-    let sql = r#"select cntrycode, count(*) as numcust, sum(c_acctbal) as totacctbal
-from (
-        select substring(c_phone from 1 for 2) as cntrycode, c_acctbal from customer
-        where substring(c_phone from 1 for 2) in ('13', '31', '23', '29', '30', '18', '17')
-          and c_acctbal > (
-            select avg(c_acctbal) from customer where c_acctbal > 0.00
-              and substring(c_phone from 1 for 2) in ('13', '31', '23', '29', '30', '18', '17')
-        )
-          and not exists ( select * from orders where o_custkey = c_custkey )
-    ) as custsale
-group by cntrycode
-order by cntrycode;"#;
-
-    // assert plan
-    let dataframe = ctx.sql(sql).await.unwrap();
-    let plan = dataframe.into_optimized_plan().unwrap();
-    let actual = format!("{}", plan.display_indent());
-    let expected = r#"Sort: custsale.cntrycode ASC NULLS LAST
-  Projection: custsale.cntrycode, COUNT(UInt8(1)) AS numcust, SUM(custsale.c_acctbal) AS totacctbal
-    Aggregate: groupBy=[[custsale.cntrycode]], aggr=[[COUNT(UInt8(1)), SUM(custsale.c_acctbal)]]
-      SubqueryAlias: custsale
-        Projection: substr(customer.c_phone, Int64(1), Int64(2)) AS cntrycode, customer.c_acctbal
-          Filter: CAST(customer.c_acctbal AS Decimal128(19, 6)) > __scalar_sq_1.__value
-            CrossJoin:
-              LeftAnti Join: customer.c_custkey = orders.o_custkey
-                Filter: substr(customer.c_phone, Int64(1), Int64(2)) IN ([Utf8("13"), Utf8("31"), Utf8("23"), Utf8("29"), Utf8("30"), Utf8("18"), Utf8("17")])
-                  TableScan: customer projection=[c_custkey, c_phone, c_acctbal], partial_filters=[substr(customer.c_phone, Int64(1), Int64(2)) IN ([Utf8("13"), Utf8("31"), Utf8("23"), Utf8("29"), Utf8("30"), Utf8("18"), Utf8("17")])]
-                TableScan: orders projection=[o_custkey]
-              SubqueryAlias: __scalar_sq_1
-                Projection: AVG(customer.c_acctbal) AS __value
-                  Aggregate: groupBy=[[]], aggr=[[AVG(customer.c_acctbal)]]
-                    Filter: customer.c_acctbal > Decimal128(Some(0),15,2) AND substr(customer.c_phone, Int64(1), Int64(2)) IN ([Utf8("13"), Utf8("31"), Utf8("23"), Utf8("29"), Utf8("30"), Utf8("18"), Utf8("17")])
-                      TableScan: customer projection=[c_phone, c_acctbal], partial_filters=[customer.c_acctbal > Decimal128(Some(0),15,2) AS customer.c_acctbal > Decimal128(Some(0),30,15), substr(customer.c_phone, Int64(1), Int64(2)) IN ([Utf8("13"), Utf8("31"), Utf8("23"), Utf8("29"), Utf8("30"), Utf8("18"), Utf8("17")]), customer.c_acctbal > Decimal128(Some(0),15,2)]"#;
-    assert_eq!(expected, actual);
-
-    // assert data
-    let results = execute_to_batches(&ctx, sql).await;
-    let expected = vec![
-        "+-----------+---------+------------+",
-        "| cntrycode | numcust | totacctbal |",
-        "+-----------+---------+------------+",
-        "| 18        | 1       | 8324.07    |",
-        "| 30        | 1       | 7638.57    |",
-        "+-----------+---------+------------+",
-    ];
-    assert_batches_eq!(expected, &results);
-
-    Ok(())
-}
-
-#[tokio::test]
-async fn tpch_q11_correlated() -> Result<()> {
-    let ctx = SessionContext::new();
-    register_tpch_csv(&ctx, "partsupp").await?;
-    register_tpch_csv(&ctx, "supplier").await?;
-    register_tpch_csv(&ctx, "nation").await?;
-
-    let sql = r#"select ps_partkey, sum(ps_supplycost * ps_availqty) as value
-from partsupp, supplier, nation
-where ps_suppkey = s_suppkey and s_nationkey = n_nationkey and n_name = 'GERMANY'
-group by ps_partkey having
-    sum(ps_supplycost * ps_availqty) > (
-        select sum(ps_supplycost * ps_availqty) * 0.0001
-        from partsupp, supplier, nation
-        where ps_suppkey = s_suppkey and s_nationkey = n_nationkey and n_name = 'GERMANY'
-    )
-order by value desc;
-"#;
-
-    // assert plan
-    let dataframe = ctx.sql(sql).await.unwrap();
-    let plan = dataframe.into_optimized_plan().unwrap();
-    let actual = format!("{}", plan.display_indent());
-    let expected = r#"Sort: value DESC NULLS FIRST
-  Projection: partsupp.ps_partkey, SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS value
-    Filter: CAST(SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS Decimal128(38, 15)) > CAST(__scalar_sq_1.__value AS Decimal128(38, 15))
-      CrossJoin:
-        Aggregate: groupBy=[[partsupp.ps_partkey]], aggr=[[SUM(CAST(partsupp.ps_supplycost AS Decimal128(26, 2)) * CAST(partsupp.ps_availqty AS Decimal128(26, 2)))]]
-          Inner Join: supplier.s_nationkey = nation.n_nationkey
-            Inner Join: partsupp.ps_suppkey = supplier.s_suppkey
-              TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost]
-              TableScan: supplier projection=[s_suppkey, s_nationkey]
-            Filter: nation.n_name = Utf8("GERMANY")
-              TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = Utf8("GERMANY")]
-        SubqueryAlias: __scalar_sq_1
-          Projection: CAST(SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS Float64) * Float64(0.0001) AS __value
-            Aggregate: groupBy=[[]], aggr=[[SUM(CAST(partsupp.ps_supplycost AS Decimal128(26, 2)) * CAST(partsupp.ps_availqty AS Decimal128(26, 2)))]]
-              Inner Join: supplier.s_nationkey = nation.n_nationkey
-                Inner Join: partsupp.ps_suppkey = supplier.s_suppkey
-                  TableScan: partsupp projection=[ps_suppkey, ps_availqty, ps_supplycost]
-                  TableScan: supplier projection=[s_suppkey, s_nationkey]
-                Filter: nation.n_name = Utf8("GERMANY")
-                  TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = Utf8("GERMANY")]"#;
-    assert_eq!(actual, expected);
-
-    // assert data
-    let results = execute_to_batches(&ctx, sql).await;
-    let expected = vec!["++", "++"];
-    assert_batches_eq!(expected, &results);
-
-    Ok(())
-}
diff --git a/datafusion/core/tests/sqllogictests/test_files/tpch.slt b/datafusion/core/tests/sqllogictests/test_files/tpch.slt
new file mode 100644
index 000000000..2c11983d5
--- /dev/null
+++ b/datafusion/core/tests/sqllogictests/test_files/tpch.slt
@@ -0,0 +1,867 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+
+#   http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+##########
+## TPC-H DDL
+##########
+
+statement ok
+CREATE EXTERNAL TABLE IF NOT EXISTS customer (
+    c_custkey INTEGER,
+    c_name VARCHAR,
+    c_address VARCHAR,
+    c_nationkey INTEGER,
+    c_phone VARCHAR,
+    c_acctbal NUMERIC,
+    c_mktsegment VARCHAR,
+    c_comment VARCHAR
+) STORED AS CSV WITH HEADER ROW LOCATION 'tests/tpch-csv/customer.csv';
+
+statement ok
+CREATE EXTERNAL TABLE IF NOT EXISTS lineitem (
+    l_orderkey BIGINT,
+    l_partkey INTEGER,
+    l_suppkey INTEGER,
+    l_linenumber INTEGER,
+    l_quantity NUMERIC,
+    l_extendedprice NUMERIC,
+    l_discount NUMERIC,
+    l_tax NUMERIC,
+    l_returnflag VARCHAR,
+    l_linestatus VARCHAR,
+    l_shipdate DATE,
+    l_commitdate DATE,
+    l_receiptdate DATE,
+    l_shipinstruct VARCHAR,
+    l_shipmode VARCHAR,
+    l_comment VARCHAR
+) STORED AS CSV WITH HEADER ROW LOCATION 'tests/tpch-csv/lineitem.csv';
+
+statement ok
+CREATE EXTERNAL TABLE IF NOT EXISTS nation (
+    n_nationkey INTEGER,
+    n_name VARCHAR,
+    n_regionkey INTEGER,
+    n_comment VARCHAR
+) STORED AS CSV WITH HEADER ROW LOCATION 'tests/tpch-csv/nation.csv';
+
+statement ok
+CREATE EXTERNAL TABLE IF NOT EXISTS orders (
+    o_orderkey BIGINT,
+    o_custkey INTEGER,
+    o_orderstatus VARCHAR,
+    o_totalprice NUMERIC,
+    o_orderdate DATE,
+    o_orderpriority VARCHAR,
+    o_clerk VARCHAR,
+    o_shippriority INTEGER,
+    o_comment VARCHAR
+) STORED AS CSV WITH HEADER ROW LOCATION 'tests/tpch-csv/orders.csv';
+
+statement ok
+CREATE EXTERNAL TABLE IF NOT EXISTS part (
+    p_partkey INTEGER,
+    p_name VARCHAR,
+    p_mfgr VARCHAR,
+    p_brand VARCHAR,
+    p_type VARCHAR,
+    p_size INTEGER,
+    p_container VARCHAR,
+    p_retailprice NUMERIC,
+    p_comment VARCHAR
+) STORED AS CSV WITH HEADER ROW LOCATION 'tests/tpch-csv/part.csv';
+
+statement ok
+CREATE EXTERNAL TABLE IF NOT EXISTS partsupp (
+    ps_partkey INTEGER,
+    ps_suppkey INTEGER,
+    ps_availqty INTEGER,
+    ps_supplycost NUMERIC,
+    ps_comment VARCHAR
+) STORED AS CSV WITH HEADER ROW LOCATION 'tests/tpch-csv/partsupp.csv';
+
+statement ok
+CREATE EXTERNAL TABLE IF NOT EXISTS region (
+    r_regionkey INTEGER,
+    r_name VARCHAR,
+    r_comment VARCHAR
+) STORED AS CSV WITH HEADER ROW LOCATION 'tests/tpch-csv/region.csv';
+
+statement ok
+CREATE EXTERNAL TABLE IF NOT EXISTS supplier (
+    s_suppkey  INTEGER,
+    s_name VARCHAR,
+    s_address VARCHAR,
+    s_nationkey INTEGER,
+    s_phone VARCHAR,
+    s_acctbal NUMERIC,
+    s_comment VARCHAR
+) STORED AS CSV WITH HEADER ROW LOCATION 'tests/tpch-csv/supplier.csv';
+
+##########
+## TPC-H DDL
+##########
+
+
+# q1
+query T
+select
+    l_returnflag,
+    l_linestatus,
+    sum(l_quantity) as sum_qty,
+    sum(l_extendedprice) as sum_base_price,
+    sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
+    sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
+    avg(l_quantity) as avg_qty,
+    avg(l_extendedprice) as avg_price,
+    avg(l_discount) as avg_disc,
+    count(*) as count_order
+from
+    lineitem
+where
+        l_shipdate <= date '1998-09-02'
+group by
+    l_returnflag,
+    l_linestatus
+order by
+    l_returnflag,
+    l_linestatus;
+----
+A F 27.0000000000 39890.8800000000 37497.4272000000 40122.2471040000 27.00000000000000 39890.88000000000000 0.06000000000000 1
+N O 166.0000000000 205387.5000000000 191556.1888000000 199866.7593360000 27.66666666666666 34231.25000000000000 0.07500000000000 6
+R F 94.0000000000 100854.5200000000 92931.3900000000 92931.3900000000 47.00000000000000 50427.26000000000000 0.08000000000000 2
+
+
+# q2
+query T
+select
+    s_acctbal,
+    s_name,
+    n_name,
+    p_partkey,
+    p_mfgr,
+    s_address,
+    s_phone,
+    s_comment
+from
+    part,
+    supplier,
+    partsupp,
+    nation,
+    region
+where
+        p_partkey = ps_partkey
+  and s_suppkey = ps_suppkey
+  and p_size = 15
+  and p_type like '%BRASS'
+  and s_nationkey = n_nationkey
+  and n_regionkey = r_regionkey
+  and r_name = 'EUROPE'
+  and ps_supplycost = (
+    select
+        min(ps_supplycost)
+    from
+        partsupp,
+        supplier,
+        nation,
+        region
+    where
+            p_partkey = ps_partkey
+      and s_suppkey = ps_suppkey
+      and s_nationkey = n_nationkey
+      and n_regionkey = r_regionkey
+      and r_name = 'EUROPE'
+)
+order by
+    s_acctbal desc,
+    n_name,
+    s_name,
+    p_partkey;
+----
+
+
+# q3
+query T
+select
+    l_orderkey,
+    sum(l_extendedprice * (1 - l_discount)) as revenue,
+    o_orderdate,
+    o_shippriority
+from
+    customer,
+    orders,
+    lineitem
+where
+        c_mktsegment = 'BUILDING'
+  and c_custkey = o_custkey
+  and l_orderkey = o_orderkey
+  and o_orderdate < date '1995-03-15'
+  and l_shipdate > date '1995-03-15'
+group by
+    l_orderkey,
+    o_orderdate,
+    o_shippriority
+order by
+    revenue desc,
+    o_orderdate;
+----
+
+
+# q4
+query T
+select
+    l_orderkey,
+    sum(l_extendedprice * (1 - l_discount)) as revenue,
+    o_orderdate,
+    o_shippriority
+from
+    customer,
+    orders,
+    lineitem
+where
+        c_mktsegment = 'BUILDING'
+  and c_custkey = o_custkey
+  and l_orderkey = o_orderkey
+  and o_orderdate < date '1995-03-15'
+  and l_shipdate > date '1995-03-15'
+group by
+    l_orderkey,
+    o_orderdate,
+    o_shippriority
+order by
+    revenue desc,
+    o_orderdate;
+----
+
+
+
+# q5
+query T
+select
+    n_name,
+    sum(l_extendedprice * (1 - l_discount)) as revenue
+from
+    customer,
+    orders,
+    lineitem,
+    supplier,
+    nation,
+    region
+where
+        c_custkey = o_custkey
+  and l_orderkey = o_orderkey
+  and l_suppkey = s_suppkey
+  and c_nationkey = s_nationkey
+  and s_nationkey = n_nationkey
+  and n_regionkey = r_regionkey
+  and r_name = 'ASIA'
+  and o_orderdate >= date '1994-01-01'
+  and o_orderdate < date '1995-01-01'
+group by
+    n_name
+order by
+    revenue desc;
+----
+
+
+# q6
+query T
+select
+    sum(l_extendedprice * l_discount) as revenue
+from
+    lineitem
+where
+        l_shipdate >= date '1994-01-01'
+  and l_shipdate < date '1995-01-01'
+  and l_discount between 0.06 - 0.01 and 0.06 + 0.01
+  and l_quantity < 24;
+----
+NULL
+
+
+# q7
+query T
+select
+    supp_nation,
+    cust_nation,
+    l_year,
+    sum(volume) as revenue
+from
+    (
+        select
+            n1.n_name as supp_nation,
+            n2.n_name as cust_nation,
+            extract(year from l_shipdate) as l_year,
+            l_extendedprice * (1 - l_discount) as volume
+        from
+            supplier,
+            lineitem,
+            orders,
+            customer,
+            nation n1,
+            nation n2
+        where
+                s_suppkey = l_suppkey
+          and o_orderkey = l_orderkey
+          and c_custkey = o_custkey
+          and s_nationkey = n1.n_nationkey
+          and c_nationkey = n2.n_nationkey
+          and (
+                (n1.n_name = 'FRANCE' and n2.n_name = 'GERMANY')
+                or (n1.n_name = 'GERMANY' and n2.n_name = 'FRANCE')
+            )
+          and l_shipdate between date '1995-01-01' and date '1996-12-31'
+    ) as shipping
+group by
+    supp_nation,
+    cust_nation,
+    l_year
+order by
+    supp_nation,
+    cust_nation,
+    l_year;
+----
+
+
+# q8
+query T
+select
+    o_year,
+    sum(case
+            when nation = 'BRAZIL' then volume
+            else 0
+        end) / sum(volume) as mkt_share
+from
+    (
+        select
+            extract(year from o_orderdate) as o_year,
+            l_extendedprice * (1 - l_discount) as volume,
+            n2.n_name as nation
+        from
+            part,
+            supplier,
+            lineitem,
+            orders,
+            customer,
+            nation n1,
+            nation n2,
+            region
+        where
+                p_partkey = l_partkey
+          and s_suppkey = l_suppkey
+          and l_orderkey = o_orderkey
+          and o_custkey = c_custkey
+          and c_nationkey = n1.n_nationkey
+          and n1.n_regionkey = r_regionkey
+          and r_name = 'AMERICA'
+          and s_nationkey = n2.n_nationkey
+          and o_orderdate between date '1995-01-01' and date '1996-12-31'
+          and p_type = 'ECONOMY ANODIZED STEEL'
+    ) as all_nations
+group by
+    o_year
+order by
+    o_year;
+----
+
+
+# q9
+query T
+select
+    nation,
+    o_year,
+    sum(amount) as sum_profit
+from
+    (
+        select
+            n_name as nation,
+            extract(year from o_orderdate) as o_year,
+            l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount
+        from
+            part,
+            supplier,
+            lineitem,
+            partsupp,
+            orders,
+            nation
+        where
+                s_suppkey = l_suppkey
+          and ps_suppkey = l_suppkey
+          and ps_partkey = l_partkey
+          and p_partkey = l_partkey
+          and o_orderkey = l_orderkey
+          and s_nationkey = n_nationkey
+          and p_name like '%green%'
+    ) as profit
+group by
+    nation,
+    o_year
+order by
+    nation,
+    o_year desc;
+----
+
+
+# q10
+query T
+select
+    c_custkey,
+    c_name,
+    sum(l_extendedprice * (1 - l_discount)) as revenue,
+    c_acctbal,
+    n_name,
+    c_address,
+    c_phone,
+    c_comment
+from
+    customer,
+    orders,
+    lineitem,
+    nation
+where
+        c_custkey = o_custkey
+  and l_orderkey = o_orderkey
+  and o_orderdate >= date '1993-10-01'
+  and o_orderdate < date '1994-01-01'
+  and l_returnflag = 'R'
+  and c_nationkey = n_nationkey
+group by
+    c_custkey,
+    c_name,
+    c_acctbal,
+    c_phone,
+    n_name,
+    c_address,
+    c_comment
+order by
+    revenue desc;
+----
+
+
+# q11
+query T
+select
+    ps_partkey,
+    sum(ps_supplycost * ps_availqty) as value
+from
+    partsupp,
+    supplier,
+    nation
+where
+    ps_suppkey = s_suppkey
+  and s_nationkey = n_nationkey
+  and n_name = 'GERMANY'
+group by
+    ps_partkey having
+    sum(ps_supplycost * ps_availqty) > (
+    select
+    sum(ps_supplycost * ps_availqty) * 0.0001
+    from
+    partsupp,
+    supplier,
+    nation
+    where
+    ps_suppkey = s_suppkey
+                  and s_nationkey = n_nationkey
+                  and n_name = 'GERMANY'
+    )
+order by
+    value desc;
+----
+
+
+# q12
+query T
+select
+    l_shipmode,
+    sum(case
+            when o_orderpriority = '1-URGENT'
+                or o_orderpriority = '2-HIGH'
+                then 1
+            else 0
+        end) as high_line_count,
+    sum(case
+            when o_orderpriority <> '1-URGENT'
+                and o_orderpriority <> '2-HIGH'
+                then 1
+            else 0
+        end) as low_line_count
+from
+    lineitem
+        join
+    orders
+    on
+            l_orderkey = o_orderkey
+where
+        l_shipmode in ('MAIL', 'SHIP')
+  and l_commitdate < l_receiptdate
+  and l_shipdate < l_commitdate
+  and l_receiptdate >= date '1994-01-01'
+  and l_receiptdate < date '1995-01-01'
+group by
+    l_shipmode
+order by
+    l_shipmode;
+----
+
+
+# q13
+query T
+select
+    c_count,
+    count(*) as custdist
+from
+    (
+        select
+            c_custkey,
+            count(o_orderkey)
+        from
+            customer left outer join orders on
+                        c_custkey = o_custkey
+                    and o_comment not like '%special%requests%'
+        group by
+            c_custkey
+    ) as c_orders (c_custkey, c_count)
+group by
+    c_count
+order by
+    custdist desc,
+    c_count desc;
+----
+0 9
+
+# q14
+query T
+select
+            100.00 * sum(case
+                             when p_type like 'PROMO%'
+                                 then l_extendedprice * (1 - l_discount)
+                             else 0
+            end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue
+from
+    lineitem,
+    part
+where
+        l_partkey = p_partkey
+  and l_shipdate >= date '1995-09-01'
+  and l_shipdate < date '1995-10-01';
+----
+NULL
+
+statement ok
+create view revenue0 (supplier_no, total_revenue) as
+	select
+		l_suppkey,
+		sum(l_extendedprice * (1 - l_discount))
+	from
+		lineitem
+	where
+		l_shipdate >= date '1996-01-01'
+		and l_shipdate < date '1996-01-01' + interval '3' month
+	group by
+		l_suppkey;
+
+# q15
+query T
+select
+	s_suppkey,
+	s_name,
+	s_address,
+	s_phone,
+	total_revenue
+from
+	supplier,
+	revenue0
+where
+	s_suppkey = supplier_no
+	and total_revenue = (
+		select
+			max(total_revenue)
+		from
+			revenue0
+	)
+order by
+	s_suppkey;
+----
+
+statement ok
+drop view revenue0;
+
+
+# q16
+query T
+select
+    p_brand,
+    p_type,
+    p_size,
+    count(distinct ps_suppkey) as supplier_cnt
+from
+    partsupp,
+    part
+where
+        p_partkey = ps_partkey
+  and p_brand <> 'Brand#45'
+  and p_type not like 'MEDIUM POLISHED%'
+  and p_size in (49, 14, 23, 45, 19, 3, 36, 9)
+  and ps_suppkey not in (
+    select
+        s_suppkey
+    from
+        supplier
+    where
+            s_comment like '%Customer%Complaints%'
+)
+group by
+    p_brand,
+    p_type,
+    p_size
+order by
+    supplier_cnt desc,
+    p_brand,
+    p_type,
+    p_size;
+----
+
+
+# q17
+query T
+select
+        sum(l_extendedprice) / 7.0 as avg_yearly
+from
+    lineitem,
+    part
+where
+        p_partkey = l_partkey
+  and p_brand = 'Brand#23'
+  and p_container = 'MED BOX'
+  and l_quantity < (
+    select
+            0.2 * avg(l_quantity)
+    from
+        lineitem
+    where
+            l_partkey = p_partkey
+);
+----
+NULL
+
+# q18
+query T
+select
+    c_name,
+    c_custkey,
+    o_orderkey,
+    o_orderdate,
+    o_totalprice,
+    sum(l_quantity)
+from
+    customer,
+    orders,
+    lineitem
+where
+        o_orderkey in (
+        select
+            l_orderkey
+        from
+            lineitem
+        group by
+            l_orderkey having
+                sum(l_quantity) > 300
+    )
+  and c_custkey = o_custkey
+  and o_orderkey = l_orderkey
+group by
+    c_name,
+    c_custkey,
+    o_orderkey,
+    o_orderdate,
+    o_totalprice
+order by
+    o_totalprice desc,
+    o_orderdate;
+----
+
+
+# q19
+query T
+select
+    sum(l_extendedprice* (1 - l_discount)) as revenue
+from
+    lineitem,
+    part
+where
+    (
+                p_partkey = l_partkey
+            and p_brand = 'Brand#12'
+            and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG')
+            and l_quantity >= 1 and l_quantity <= 1 + 10
+            and p_size between 1 and 5
+            and l_shipmode in ('AIR', 'AIR REG')
+            and l_shipinstruct = 'DELIVER IN PERSON'
+        )
+   or
+    (
+                p_partkey = l_partkey
+            and p_brand = 'Brand#23'
+            and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK')
+            and l_quantity >= 10 and l_quantity <= 10 + 10
+            and p_size between 1 and 10
+            and l_shipmode in ('AIR', 'AIR REG')
+            and l_shipinstruct = 'DELIVER IN PERSON'
+        )
+   or
+    (
+                p_partkey = l_partkey
+            and p_brand = 'Brand#34'
+            and p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG')
+            and l_quantity >= 20 and l_quantity <= 20 + 10
+            and p_size between 1 and 15
+            and l_shipmode in ('AIR', 'AIR REG')
+            and l_shipinstruct = 'DELIVER IN PERSON'
+        );
+----
+NULL
+
+
+# q20
+query T
+select
+    s_name,
+    s_address
+from
+    supplier,
+    nation
+where
+        s_suppkey in (
+        select
+            ps_suppkey
+        from
+            partsupp
+        where
+                ps_partkey in (
+                select
+                    p_partkey
+                from
+                    part
+                where
+                        p_name like 'forest%'
+            )
+          and ps_availqty > (
+            select
+                    0.5 * sum(l_quantity)
+            from
+                lineitem
+            where
+                    l_partkey = ps_partkey
+              and l_suppkey = ps_suppkey
+              and l_shipdate >= date '1994-01-01'
+              and l_shipdate < date '1994-01-01' + interval '1' year
+        )
+    )
+  and s_nationkey = n_nationkey
+  and n_name = 'CANADA'
+order by
+    s_name;
+----
+
+
+# q21
+query T
+select
+    s_name,
+    count(*) as numwait
+from
+    supplier,
+    lineitem l1,
+    orders,
+    nation
+where
+        s_suppkey = l1.l_suppkey
+  and o_orderkey = l1.l_orderkey
+  and o_orderstatus = 'F'
+  and l1.l_receiptdate > l1.l_commitdate
+  and exists (
+        select
+            *
+        from
+            lineitem l2
+        where
+                l2.l_orderkey = l1.l_orderkey
+          and l2.l_suppkey <> l1.l_suppkey
+    )
+  and not exists (
+        select
+            *
+        from
+            lineitem l3
+        where
+                l3.l_orderkey = l1.l_orderkey
+          and l3.l_suppkey <> l1.l_suppkey
+          and l3.l_receiptdate > l3.l_commitdate
+    )
+  and s_nationkey = n_nationkey
+  and n_name = 'SAUDI ARABIA'
+group by
+    s_name
+order by
+    numwait desc,
+    s_name;
+----
+
+
+# q22
+query T
+select
+    cntrycode,
+    count(*) as numcust,
+    sum(c_acctbal) as totacctbal
+from
+    (
+        select
+            substring(c_phone from 1 for 2) as cntrycode,
+            c_acctbal
+        from
+            customer
+        where
+                substring(c_phone from 1 for 2) in
+                ('13', '31', '23', '29', '30', '18', '17')
+          and c_acctbal > (
+            select
+                avg(c_acctbal)
+            from
+                customer
+            where
+                    c_acctbal > 0.00
+              and substring(c_phone from 1 for 2) in
+                  ('13', '31', '23', '29', '30', '18', '17')
+        )
+          and not exists (
+                select
+                    *
+                from
+                    orders
+                where
+                        o_custkey = c_custkey
+            )
+    ) as custsale
+group by
+    cntrycode
+order by
+    cntrycode;
+----
+18 1 8324.0700000000
+30 1 7638.5700000000