You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2023/01/08 22:39:56 UTC
[arrow-datafusion] branch master updated: add tpch sqllogicaltest and remove some duplicated test (#4802)
This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new e393b2827 add tpch sqllogicaltest and remove some duplicated test (#4802)
e393b2827 is described below
commit e393b28271b0e05274ea525de33a96b12a8a2ab5
Author: jakevin <ja...@gmail.com>
AuthorDate: Mon Jan 9 06:39:51 2023 +0800
add tpch sqllogicaltest and remove some duplicated test (#4802)
* add tpch sqllogicaltest
* remove
---
datafusion/core/tests/sql/subqueries.rs | 334 --------
.../core/tests/sqllogictests/test_files/tpch.slt | 867 +++++++++++++++++++++
2 files changed, 867 insertions(+), 334 deletions(-)
diff --git a/datafusion/core/tests/sql/subqueries.rs b/datafusion/core/tests/sql/subqueries.rs
index 3fff5ba3e..2627a2db0 100644
--- a/datafusion/core/tests/sql/subqueries.rs
+++ b/datafusion/core/tests/sql/subqueries.rs
@@ -115,337 +115,3 @@ where o_orderstatus in (
Ok(())
}
-
-#[tokio::test]
-async fn tpch_q2_correlated() -> Result<()> {
- let ctx = SessionContext::new();
- register_tpch_csv(&ctx, "part").await?;
- register_tpch_csv(&ctx, "supplier").await?;
- register_tpch_csv(&ctx, "partsupp").await?;
- register_tpch_csv(&ctx, "nation").await?;
- register_tpch_csv(&ctx, "region").await?;
-
- let sql = r#"select s_acctbal, s_name, n_name, p_partkey, p_mfgr, s_address, s_phone, s_comment
-from part, supplier, partsupp, nation, region
-where p_partkey = ps_partkey and s_suppkey = ps_suppkey and p_size = 15 and p_type like '%BRASS'
- and s_nationkey = n_nationkey and n_regionkey = r_regionkey and r_name = 'EUROPE'
- and ps_supplycost = (
- select min(ps_supplycost) from partsupp, supplier, nation, region
- where p_partkey = ps_partkey and s_suppkey = ps_suppkey and s_nationkey = n_nationkey
- and n_regionkey = r_regionkey and r_name = 'EUROPE'
- )
-order by s_acctbal desc, n_name, s_name, p_partkey;"#;
-
- // assert plan
- let dataframe = ctx.sql(sql).await.unwrap();
- let plan = dataframe.into_optimized_plan().unwrap();
- let actual = format!("{}", plan.display_indent());
- let expected = r#"Sort: supplier.s_acctbal DESC NULLS FIRST, nation.n_name ASC NULLS LAST, supplier.s_name ASC NULLS LAST, part.p_partkey ASC NULLS LAST
- Projection: supplier.s_acctbal, supplier.s_name, nation.n_name, part.p_partkey, part.p_mfgr, supplier.s_address, supplier.s_phone, supplier.s_comment
- Projection: part.p_partkey, part.p_mfgr, supplier.s_name, supplier.s_address, supplier.s_phone, supplier.s_acctbal, supplier.s_comment, nation.n_name
- Inner Join: part.p_partkey = __scalar_sq_1.ps_partkey, partsupp.ps_supplycost = __scalar_sq_1.__value
- Inner Join: nation.n_regionkey = region.r_regionkey
- Inner Join: supplier.s_nationkey = nation.n_nationkey
- Inner Join: partsupp.ps_suppkey = supplier.s_suppkey
- Inner Join: part.p_partkey = partsupp.ps_partkey
- Filter: part.p_size = Int32(15) AND part.p_type LIKE Utf8("%BRASS")
- TableScan: part projection=[p_partkey, p_mfgr, p_type, p_size], partial_filters=[part.p_size = Int32(15), part.p_type LIKE Utf8("%BRASS")]
- TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_supplycost]
- TableScan: supplier projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment]
- TableScan: nation projection=[n_nationkey, n_name, n_regionkey]
- Filter: region.r_name = Utf8("EUROPE")
- TableScan: region projection=[r_regionkey, r_name], partial_filters=[region.r_name = Utf8("EUROPE")]
- SubqueryAlias: __scalar_sq_1
- Projection: partsupp.ps_partkey, MIN(partsupp.ps_supplycost) AS __value
- Aggregate: groupBy=[[partsupp.ps_partkey]], aggr=[[MIN(partsupp.ps_supplycost)]]
- Inner Join: nation.n_regionkey = region.r_regionkey
- Inner Join: supplier.s_nationkey = nation.n_nationkey
- Inner Join: partsupp.ps_suppkey = supplier.s_suppkey
- TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_supplycost]
- TableScan: supplier projection=[s_suppkey, s_nationkey]
- TableScan: nation projection=[n_nationkey, n_regionkey]
- Filter: region.r_name = Utf8("EUROPE")
- TableScan: region projection=[r_regionkey, r_name], partial_filters=[region.r_name = Utf8("EUROPE")]"#;
- assert_eq!(actual, expected);
-
- // assert data
- let results = execute_to_batches(&ctx, sql).await;
- let expected = vec!["++", "++"];
- assert_batches_eq!(expected, &results);
-
- Ok(())
-}
-
-#[tokio::test]
-async fn tpch_q4_correlated() -> Result<()> {
- let orders = r#"4,13678,O,53829.87,1995-10-11,5-LOW,Clerk#000000124,0,
-35,12760,O,192885.43,1995-10-23,4-NOT SPECIFIED,Clerk#000000259,0,
-65,1627,P,99763.79,1995-03-18,1-URGENT,Clerk#000000632,0,
-"#;
- let lineitems = r#"4,8804,579,1,30,51384,0.03,0.08,N,O,1996-01-10,1995-12-14,1996-01-18,DELIVER IN PERSON,REG AIR,
-35,45,296,1,24,22680.96,0.02,0,N,O,1996-02-21,1996-01-03,1996-03-18,TAKE BACK RETURN,FOB,
-65,5970,481,1,26,48775.22,0.03,0.03,A,F,1995-04-20,1995-04-25,1995-05-13,NONE,TRUCK,
-"#;
-
- let ctx = SessionContext::new();
- register_tpch_csv_data(&ctx, "orders", orders).await?;
- register_tpch_csv_data(&ctx, "lineitem", lineitems).await?;
-
- let sql = r#"
- select o_orderpriority, count(*) as order_count
- from orders
- where exists (
- select * from lineitem where l_orderkey = o_orderkey and l_commitdate < l_receiptdate)
- group by o_orderpriority
- order by o_orderpriority;
- "#;
-
- // assert plan
- let dataframe = ctx.sql(sql).await.unwrap();
- let plan = dataframe.into_optimized_plan().unwrap();
- let actual = format!("{}", plan.display_indent());
- let expected = r#"Sort: orders.o_orderpriority ASC NULLS LAST
- Projection: orders.o_orderpriority, COUNT(UInt8(1)) AS order_count
- Aggregate: groupBy=[[orders.o_orderpriority]], aggr=[[COUNT(UInt8(1))]]
- LeftSemi Join: orders.o_orderkey = lineitem.l_orderkey
- TableScan: orders projection=[o_orderkey, o_orderpriority]
- Filter: lineitem.l_commitdate < lineitem.l_receiptdate
- TableScan: lineitem projection=[l_orderkey, l_commitdate, l_receiptdate]"#
- .to_string();
- assert_eq!(actual, expected);
-
- // assert data
- let results = execute_to_batches(&ctx, sql).await;
- let expected = vec![
- "+-----------------+-------------+",
- "| o_orderpriority | order_count |",
- "+-----------------+-------------+",
- "| 1-URGENT | 1 |",
- "| 4-NOT SPECIFIED | 1 |",
- "| 5-LOW | 1 |",
- "+-----------------+-------------+",
- ];
- assert_batches_eq!(expected, &results);
-
- Ok(())
-}
-
-#[tokio::test]
-async fn tpch_q17_correlated() -> Result<()> {
- let parts = r#"63700,goldenrod lavender spring chocolate lace,Manufacturer#1,Brand#23,PROMO BURNISHED COPPER,7,MED BOX,901.00,ly. slyly ironi
-"#;
- let lineitems = r#"1,63700,7311,2,36.0,45983.16,0.09,0.06,N,O,1996-04-12,1996-02-28,1996-04-20,TAKE BACK RETURN,MAIL,ly final dependencies: slyly bold
-1,63700,3701,3,1.0,13309.6,0.1,0.02,N,O,1996-01-29,1996-03-05,1996-01-31,TAKE BACK RETURN,REG AIR,"riously. regular, express dep"
-"#;
-
- let ctx = SessionContext::new();
- register_tpch_csv_data(&ctx, "part", parts).await?;
- register_tpch_csv_data(&ctx, "lineitem", lineitems).await?;
-
- let sql = r#"select sum(l_extendedprice) / 7.0 as avg_yearly
- from lineitem, part
- where p_partkey = l_partkey and p_brand = 'Brand#23' and p_container = 'MED BOX'
- and l_quantity < (
- select 0.2 * avg(l_quantity)
- from lineitem where l_partkey = p_partkey
- );"#;
-
- // assert plan
- let dataframe = ctx.sql(sql).await.unwrap();
- let plan = dataframe.into_optimized_plan().unwrap();
- let actual = format!("{}", plan.display_indent());
- let expected = r#"Projection: CAST(SUM(lineitem.l_extendedprice) AS Float64) / Float64(7) AS avg_yearly
- Aggregate: groupBy=[[]], aggr=[[SUM(lineitem.l_extendedprice)]]
- Filter: CAST(lineitem.l_quantity AS Decimal128(30, 15)) < CAST(__scalar_sq_1.__value AS Decimal128(30, 15))
- Inner Join: part.p_partkey = __scalar_sq_1.l_partkey, lineitem.l_partkey = __scalar_sq_1.l_partkey
- Inner Join: lineitem.l_partkey = part.p_partkey
- TableScan: lineitem projection=[l_partkey, l_quantity, l_extendedprice]
- Filter: part.p_brand = Utf8("Brand#23") AND part.p_container = Utf8("MED BOX")
- TableScan: part projection=[p_partkey, p_brand, p_container]
- SubqueryAlias: __scalar_sq_1
- Projection: lineitem.l_partkey, Float64(0.2) * CAST(AVG(lineitem.l_quantity) AS Float64) AS __value
- Aggregate: groupBy=[[lineitem.l_partkey]], aggr=[[AVG(lineitem.l_quantity)]]
- TableScan: lineitem projection=[l_partkey, l_quantity]"#
- .to_string();
- assert_eq!(actual, expected);
-
- // assert data
- let results = execute_to_batches(&ctx, sql).await;
- let expected = vec![
- "+--------------------+",
- "| avg_yearly |",
- "+--------------------+",
- "| 190.13714285714286 |",
- "+--------------------+",
- ];
- assert_batches_eq!(expected, &results);
-
- Ok(())
-}
-
-#[tokio::test]
-async fn tpch_q20_correlated() -> Result<()> {
- let ctx = SessionContext::new();
- register_tpch_csv(&ctx, "supplier").await?;
- register_tpch_csv(&ctx, "nation").await?;
- register_tpch_csv(&ctx, "partsupp").await?;
- register_tpch_csv(&ctx, "part").await?;
- register_tpch_csv(&ctx, "lineitem").await?;
-
- let sql = r#"select s_name, s_address
-from supplier, nation
-where s_suppkey in (
- select ps_suppkey from partsupp
- where ps_partkey in ( select p_partkey from part where p_name like 'forest%' )
- and ps_availqty > ( select 0.5 * sum(l_quantity) from lineitem
- where l_partkey = ps_partkey and l_suppkey = ps_suppkey and l_shipdate >= date '1994-01-01'
- )
-)
-and s_nationkey = n_nationkey and n_name = 'CANADA'
-order by s_name;
-"#;
-
- // assert plan
- let dataframe = ctx.sql(sql).await.unwrap();
- let plan = dataframe.into_optimized_plan().unwrap();
- let actual = format!("{}", plan.display_indent());
- let expected = r#"Sort: supplier.s_name ASC NULLS LAST
- Projection: supplier.s_name, supplier.s_address
- LeftSemi Join: supplier.s_suppkey = __correlated_sq_1.ps_suppkey
- Inner Join: supplier.s_nationkey = nation.n_nationkey
- TableScan: supplier projection=[s_suppkey, s_name, s_address, s_nationkey]
- Filter: nation.n_name = Utf8("CANADA")
- TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = Utf8("CANADA")]
- SubqueryAlias: __correlated_sq_1
- Projection: partsupp.ps_suppkey AS ps_suppkey
- Filter: CAST(partsupp.ps_availqty AS Float64) > __scalar_sq_1.__value
- Inner Join: partsupp.ps_partkey = __scalar_sq_1.l_partkey, partsupp.ps_suppkey = __scalar_sq_1.l_suppkey
- LeftSemi Join: partsupp.ps_partkey = __correlated_sq_2.p_partkey
- TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_availqty]
- SubqueryAlias: __correlated_sq_2
- Projection: part.p_partkey AS p_partkey
- Filter: part.p_name LIKE Utf8("forest%")
- TableScan: part projection=[p_partkey, p_name], partial_filters=[part.p_name LIKE Utf8("forest%")]
- SubqueryAlias: __scalar_sq_1
- Projection: lineitem.l_partkey, lineitem.l_suppkey, Float64(0.5) * CAST(SUM(lineitem.l_quantity) AS Float64) AS __value
- Aggregate: groupBy=[[lineitem.l_partkey, lineitem.l_suppkey]], aggr=[[SUM(lineitem.l_quantity)]]
- Filter: lineitem.l_shipdate >= Date32("8766")
- TableScan: lineitem projection=[l_partkey, l_suppkey, l_quantity, l_shipdate], partial_filters=[lineitem.l_shipdate >= Date32("8766")]"#;
- assert_eq!(actual, expected);
-
- // assert data
- let results = execute_to_batches(&ctx, sql).await;
- let expected = vec!["++", "++"];
- assert_batches_eq!(expected, &results);
-
- Ok(())
-}
-
-#[tokio::test]
-async fn tpch_q22_correlated() -> Result<()> {
- let ctx = SessionContext::new();
- register_tpch_csv(&ctx, "customer").await?;
- register_tpch_csv(&ctx, "orders").await?;
-
- let sql = r#"select cntrycode, count(*) as numcust, sum(c_acctbal) as totacctbal
-from (
- select substring(c_phone from 1 for 2) as cntrycode, c_acctbal from customer
- where substring(c_phone from 1 for 2) in ('13', '31', '23', '29', '30', '18', '17')
- and c_acctbal > (
- select avg(c_acctbal) from customer where c_acctbal > 0.00
- and substring(c_phone from 1 for 2) in ('13', '31', '23', '29', '30', '18', '17')
- )
- and not exists ( select * from orders where o_custkey = c_custkey )
- ) as custsale
-group by cntrycode
-order by cntrycode;"#;
-
- // assert plan
- let dataframe = ctx.sql(sql).await.unwrap();
- let plan = dataframe.into_optimized_plan().unwrap();
- let actual = format!("{}", plan.display_indent());
- let expected = r#"Sort: custsale.cntrycode ASC NULLS LAST
- Projection: custsale.cntrycode, COUNT(UInt8(1)) AS numcust, SUM(custsale.c_acctbal) AS totacctbal
- Aggregate: groupBy=[[custsale.cntrycode]], aggr=[[COUNT(UInt8(1)), SUM(custsale.c_acctbal)]]
- SubqueryAlias: custsale
- Projection: substr(customer.c_phone, Int64(1), Int64(2)) AS cntrycode, customer.c_acctbal
- Filter: CAST(customer.c_acctbal AS Decimal128(19, 6)) > __scalar_sq_1.__value
- CrossJoin:
- LeftAnti Join: customer.c_custkey = orders.o_custkey
- Filter: substr(customer.c_phone, Int64(1), Int64(2)) IN ([Utf8("13"), Utf8("31"), Utf8("23"), Utf8("29"), Utf8("30"), Utf8("18"), Utf8("17")])
- TableScan: customer projection=[c_custkey, c_phone, c_acctbal], partial_filters=[substr(customer.c_phone, Int64(1), Int64(2)) IN ([Utf8("13"), Utf8("31"), Utf8("23"), Utf8("29"), Utf8("30"), Utf8("18"), Utf8("17")])]
- TableScan: orders projection=[o_custkey]
- SubqueryAlias: __scalar_sq_1
- Projection: AVG(customer.c_acctbal) AS __value
- Aggregate: groupBy=[[]], aggr=[[AVG(customer.c_acctbal)]]
- Filter: customer.c_acctbal > Decimal128(Some(0),15,2) AND substr(customer.c_phone, Int64(1), Int64(2)) IN ([Utf8("13"), Utf8("31"), Utf8("23"), Utf8("29"), Utf8("30"), Utf8("18"), Utf8("17")])
- TableScan: customer projection=[c_phone, c_acctbal], partial_filters=[customer.c_acctbal > Decimal128(Some(0),15,2) AS customer.c_acctbal > Decimal128(Some(0),30,15), substr(customer.c_phone, Int64(1), Int64(2)) IN ([Utf8("13"), Utf8("31"), Utf8("23"), Utf8("29"), Utf8("30"), Utf8("18"), Utf8("17")]), customer.c_acctbal > Decimal128(Some(0),15,2)]"#;
- assert_eq!(expected, actual);
-
- // assert data
- let results = execute_to_batches(&ctx, sql).await;
- let expected = vec![
- "+-----------+---------+------------+",
- "| cntrycode | numcust | totacctbal |",
- "+-----------+---------+------------+",
- "| 18 | 1 | 8324.07 |",
- "| 30 | 1 | 7638.57 |",
- "+-----------+---------+------------+",
- ];
- assert_batches_eq!(expected, &results);
-
- Ok(())
-}
-
-#[tokio::test]
-async fn tpch_q11_correlated() -> Result<()> {
- let ctx = SessionContext::new();
- register_tpch_csv(&ctx, "partsupp").await?;
- register_tpch_csv(&ctx, "supplier").await?;
- register_tpch_csv(&ctx, "nation").await?;
-
- let sql = r#"select ps_partkey, sum(ps_supplycost * ps_availqty) as value
-from partsupp, supplier, nation
-where ps_suppkey = s_suppkey and s_nationkey = n_nationkey and n_name = 'GERMANY'
-group by ps_partkey having
- sum(ps_supplycost * ps_availqty) > (
- select sum(ps_supplycost * ps_availqty) * 0.0001
- from partsupp, supplier, nation
- where ps_suppkey = s_suppkey and s_nationkey = n_nationkey and n_name = 'GERMANY'
- )
-order by value desc;
-"#;
-
- // assert plan
- let dataframe = ctx.sql(sql).await.unwrap();
- let plan = dataframe.into_optimized_plan().unwrap();
- let actual = format!("{}", plan.display_indent());
- let expected = r#"Sort: value DESC NULLS FIRST
- Projection: partsupp.ps_partkey, SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS value
- Filter: CAST(SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS Decimal128(38, 15)) > CAST(__scalar_sq_1.__value AS Decimal128(38, 15))
- CrossJoin:
- Aggregate: groupBy=[[partsupp.ps_partkey]], aggr=[[SUM(CAST(partsupp.ps_supplycost AS Decimal128(26, 2)) * CAST(partsupp.ps_availqty AS Decimal128(26, 2)))]]
- Inner Join: supplier.s_nationkey = nation.n_nationkey
- Inner Join: partsupp.ps_suppkey = supplier.s_suppkey
- TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost]
- TableScan: supplier projection=[s_suppkey, s_nationkey]
- Filter: nation.n_name = Utf8("GERMANY")
- TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = Utf8("GERMANY")]
- SubqueryAlias: __scalar_sq_1
- Projection: CAST(SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS Float64) * Float64(0.0001) AS __value
- Aggregate: groupBy=[[]], aggr=[[SUM(CAST(partsupp.ps_supplycost AS Decimal128(26, 2)) * CAST(partsupp.ps_availqty AS Decimal128(26, 2)))]]
- Inner Join: supplier.s_nationkey = nation.n_nationkey
- Inner Join: partsupp.ps_suppkey = supplier.s_suppkey
- TableScan: partsupp projection=[ps_suppkey, ps_availqty, ps_supplycost]
- TableScan: supplier projection=[s_suppkey, s_nationkey]
- Filter: nation.n_name = Utf8("GERMANY")
- TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = Utf8("GERMANY")]"#;
- assert_eq!(actual, expected);
-
- // assert data
- let results = execute_to_batches(&ctx, sql).await;
- let expected = vec!["++", "++"];
- assert_batches_eq!(expected, &results);
-
- Ok(())
-}
diff --git a/datafusion/core/tests/sqllogictests/test_files/tpch.slt b/datafusion/core/tests/sqllogictests/test_files/tpch.slt
new file mode 100644
index 000000000..2c11983d5
--- /dev/null
+++ b/datafusion/core/tests/sqllogictests/test_files/tpch.slt
@@ -0,0 +1,867 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+
+# http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+##########
+## TPC-H DDL
+##########
+
+statement ok
+CREATE EXTERNAL TABLE IF NOT EXISTS customer (
+ c_custkey INTEGER,
+ c_name VARCHAR,
+ c_address VARCHAR,
+ c_nationkey INTEGER,
+ c_phone VARCHAR,
+ c_acctbal NUMERIC,
+ c_mktsegment VARCHAR,
+ c_comment VARCHAR
+) STORED AS CSV WITH HEADER ROW LOCATION 'tests/tpch-csv/customer.csv';
+
+statement ok
+CREATE EXTERNAL TABLE IF NOT EXISTS lineitem (
+ l_orderkey BIGINT,
+ l_partkey INTEGER,
+ l_suppkey INTEGER,
+ l_linenumber INTEGER,
+ l_quantity NUMERIC,
+ l_extendedprice NUMERIC,
+ l_discount NUMERIC,
+ l_tax NUMERIC,
+ l_returnflag VARCHAR,
+ l_linestatus VARCHAR,
+ l_shipdate DATE,
+ l_commitdate DATE,
+ l_receiptdate DATE,
+ l_shipinstruct VARCHAR,
+ l_shipmode VARCHAR,
+ l_comment VARCHAR
+) STORED AS CSV WITH HEADER ROW LOCATION 'tests/tpch-csv/lineitem.csv';
+
+statement ok
+CREATE EXTERNAL TABLE IF NOT EXISTS nation (
+ n_nationkey INTEGER,
+ n_name VARCHAR,
+ n_regionkey INTEGER,
+ n_comment VARCHAR
+) STORED AS CSV WITH HEADER ROW LOCATION 'tests/tpch-csv/nation.csv';
+
+statement ok
+CREATE EXTERNAL TABLE IF NOT EXISTS orders (
+ o_orderkey BIGINT,
+ o_custkey INTEGER,
+ o_orderstatus VARCHAR,
+ o_totalprice NUMERIC,
+ o_orderdate DATE,
+ o_orderpriority VARCHAR,
+ o_clerk VARCHAR,
+ o_shippriority INTEGER,
+ o_comment VARCHAR
+) STORED AS CSV WITH HEADER ROW LOCATION 'tests/tpch-csv/orders.csv';
+
+statement ok
+CREATE EXTERNAL TABLE IF NOT EXISTS part (
+ p_partkey INTEGER,
+ p_name VARCHAR,
+ p_mfgr VARCHAR,
+ p_brand VARCHAR,
+ p_type VARCHAR,
+ p_size INTEGER,
+ p_container VARCHAR,
+ p_retailprice NUMERIC,
+ p_comment VARCHAR
+) STORED AS CSV WITH HEADER ROW LOCATION 'tests/tpch-csv/part.csv';
+
+statement ok
+CREATE EXTERNAL TABLE IF NOT EXISTS partsupp (
+ ps_partkey INTEGER,
+ ps_suppkey INTEGER,
+ ps_availqty INTEGER,
+ ps_supplycost NUMERIC,
+ ps_comment VARCHAR
+) STORED AS CSV WITH HEADER ROW LOCATION 'tests/tpch-csv/partsupp.csv';
+
+statement ok
+CREATE EXTERNAL TABLE IF NOT EXISTS region (
+ r_regionkey INTEGER,
+ r_name VARCHAR,
+ r_comment VARCHAR
+) STORED AS CSV WITH HEADER ROW LOCATION 'tests/tpch-csv/region.csv';
+
+statement ok
+CREATE EXTERNAL TABLE IF NOT EXISTS supplier (
+ s_suppkey INTEGER,
+ s_name VARCHAR,
+ s_address VARCHAR,
+ s_nationkey INTEGER,
+ s_phone VARCHAR,
+ s_acctbal NUMERIC,
+ s_comment VARCHAR
+) STORED AS CSV WITH HEADER ROW LOCATION 'tests/tpch-csv/supplier.csv';
+
+##########
+## TPC-H DDL
+##########
+
+
+# q1
+query T
+select
+ l_returnflag,
+ l_linestatus,
+ sum(l_quantity) as sum_qty,
+ sum(l_extendedprice) as sum_base_price,
+ sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
+ sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
+ avg(l_quantity) as avg_qty,
+ avg(l_extendedprice) as avg_price,
+ avg(l_discount) as avg_disc,
+ count(*) as count_order
+from
+ lineitem
+where
+ l_shipdate <= date '1998-09-02'
+group by
+ l_returnflag,
+ l_linestatus
+order by
+ l_returnflag,
+ l_linestatus;
+----
+A F 27.0000000000 39890.8800000000 37497.4272000000 40122.2471040000 27.00000000000000 39890.88000000000000 0.06000000000000 1
+N O 166.0000000000 205387.5000000000 191556.1888000000 199866.7593360000 27.66666666666666 34231.25000000000000 0.07500000000000 6
+R F 94.0000000000 100854.5200000000 92931.3900000000 92931.3900000000 47.00000000000000 50427.26000000000000 0.08000000000000 2
+
+
+# q2
+query T
+select
+ s_acctbal,
+ s_name,
+ n_name,
+ p_partkey,
+ p_mfgr,
+ s_address,
+ s_phone,
+ s_comment
+from
+ part,
+ supplier,
+ partsupp,
+ nation,
+ region
+where
+ p_partkey = ps_partkey
+ and s_suppkey = ps_suppkey
+ and p_size = 15
+ and p_type like '%BRASS'
+ and s_nationkey = n_nationkey
+ and n_regionkey = r_regionkey
+ and r_name = 'EUROPE'
+ and ps_supplycost = (
+ select
+ min(ps_supplycost)
+ from
+ partsupp,
+ supplier,
+ nation,
+ region
+ where
+ p_partkey = ps_partkey
+ and s_suppkey = ps_suppkey
+ and s_nationkey = n_nationkey
+ and n_regionkey = r_regionkey
+ and r_name = 'EUROPE'
+)
+order by
+ s_acctbal desc,
+ n_name,
+ s_name,
+ p_partkey;
+----
+
+
+# q3
+query T
+select
+ l_orderkey,
+ sum(l_extendedprice * (1 - l_discount)) as revenue,
+ o_orderdate,
+ o_shippriority
+from
+ customer,
+ orders,
+ lineitem
+where
+ c_mktsegment = 'BUILDING'
+ and c_custkey = o_custkey
+ and l_orderkey = o_orderkey
+ and o_orderdate < date '1995-03-15'
+ and l_shipdate > date '1995-03-15'
+group by
+ l_orderkey,
+ o_orderdate,
+ o_shippriority
+order by
+ revenue desc,
+ o_orderdate;
+----
+
+
+# q4
+query T
+select
+ l_orderkey,
+ sum(l_extendedprice * (1 - l_discount)) as revenue,
+ o_orderdate,
+ o_shippriority
+from
+ customer,
+ orders,
+ lineitem
+where
+ c_mktsegment = 'BUILDING'
+ and c_custkey = o_custkey
+ and l_orderkey = o_orderkey
+ and o_orderdate < date '1995-03-15'
+ and l_shipdate > date '1995-03-15'
+group by
+ l_orderkey,
+ o_orderdate,
+ o_shippriority
+order by
+ revenue desc,
+ o_orderdate;
+----
+
+
+
+# q5
+query T
+select
+ n_name,
+ sum(l_extendedprice * (1 - l_discount)) as revenue
+from
+ customer,
+ orders,
+ lineitem,
+ supplier,
+ nation,
+ region
+where
+ c_custkey = o_custkey
+ and l_orderkey = o_orderkey
+ and l_suppkey = s_suppkey
+ and c_nationkey = s_nationkey
+ and s_nationkey = n_nationkey
+ and n_regionkey = r_regionkey
+ and r_name = 'ASIA'
+ and o_orderdate >= date '1994-01-01'
+ and o_orderdate < date '1995-01-01'
+group by
+ n_name
+order by
+ revenue desc;
+----
+
+
+# q6
+query T
+select
+ sum(l_extendedprice * l_discount) as revenue
+from
+ lineitem
+where
+ l_shipdate >= date '1994-01-01'
+ and l_shipdate < date '1995-01-01'
+ and l_discount between 0.06 - 0.01 and 0.06 + 0.01
+ and l_quantity < 24;
+----
+NULL
+
+
+# q7
+query T
+select
+ supp_nation,
+ cust_nation,
+ l_year,
+ sum(volume) as revenue
+from
+ (
+ select
+ n1.n_name as supp_nation,
+ n2.n_name as cust_nation,
+ extract(year from l_shipdate) as l_year,
+ l_extendedprice * (1 - l_discount) as volume
+ from
+ supplier,
+ lineitem,
+ orders,
+ customer,
+ nation n1,
+ nation n2
+ where
+ s_suppkey = l_suppkey
+ and o_orderkey = l_orderkey
+ and c_custkey = o_custkey
+ and s_nationkey = n1.n_nationkey
+ and c_nationkey = n2.n_nationkey
+ and (
+ (n1.n_name = 'FRANCE' and n2.n_name = 'GERMANY')
+ or (n1.n_name = 'GERMANY' and n2.n_name = 'FRANCE')
+ )
+ and l_shipdate between date '1995-01-01' and date '1996-12-31'
+ ) as shipping
+group by
+ supp_nation,
+ cust_nation,
+ l_year
+order by
+ supp_nation,
+ cust_nation,
+ l_year;
+----
+
+
+# q8
+query T
+select
+ o_year,
+ sum(case
+ when nation = 'BRAZIL' then volume
+ else 0
+ end) / sum(volume) as mkt_share
+from
+ (
+ select
+ extract(year from o_orderdate) as o_year,
+ l_extendedprice * (1 - l_discount) as volume,
+ n2.n_name as nation
+ from
+ part,
+ supplier,
+ lineitem,
+ orders,
+ customer,
+ nation n1,
+ nation n2,
+ region
+ where
+ p_partkey = l_partkey
+ and s_suppkey = l_suppkey
+ and l_orderkey = o_orderkey
+ and o_custkey = c_custkey
+ and c_nationkey = n1.n_nationkey
+ and n1.n_regionkey = r_regionkey
+ and r_name = 'AMERICA'
+ and s_nationkey = n2.n_nationkey
+ and o_orderdate between date '1995-01-01' and date '1996-12-31'
+ and p_type = 'ECONOMY ANODIZED STEEL'
+ ) as all_nations
+group by
+ o_year
+order by
+ o_year;
+----
+
+
+# q9
+query T
+select
+ nation,
+ o_year,
+ sum(amount) as sum_profit
+from
+ (
+ select
+ n_name as nation,
+ extract(year from o_orderdate) as o_year,
+ l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount
+ from
+ part,
+ supplier,
+ lineitem,
+ partsupp,
+ orders,
+ nation
+ where
+ s_suppkey = l_suppkey
+ and ps_suppkey = l_suppkey
+ and ps_partkey = l_partkey
+ and p_partkey = l_partkey
+ and o_orderkey = l_orderkey
+ and s_nationkey = n_nationkey
+ and p_name like '%green%'
+ ) as profit
+group by
+ nation,
+ o_year
+order by
+ nation,
+ o_year desc;
+----
+
+
+# q10
+query T
+select
+ c_custkey,
+ c_name,
+ sum(l_extendedprice * (1 - l_discount)) as revenue,
+ c_acctbal,
+ n_name,
+ c_address,
+ c_phone,
+ c_comment
+from
+ customer,
+ orders,
+ lineitem,
+ nation
+where
+ c_custkey = o_custkey
+ and l_orderkey = o_orderkey
+ and o_orderdate >= date '1993-10-01'
+ and o_orderdate < date '1994-01-01'
+ and l_returnflag = 'R'
+ and c_nationkey = n_nationkey
+group by
+ c_custkey,
+ c_name,
+ c_acctbal,
+ c_phone,
+ n_name,
+ c_address,
+ c_comment
+order by
+ revenue desc;
+----
+
+
+# q11
+query T
+select
+ ps_partkey,
+ sum(ps_supplycost * ps_availqty) as value
+from
+ partsupp,
+ supplier,
+ nation
+where
+ ps_suppkey = s_suppkey
+ and s_nationkey = n_nationkey
+ and n_name = 'GERMANY'
+group by
+ ps_partkey having
+ sum(ps_supplycost * ps_availqty) > (
+ select
+ sum(ps_supplycost * ps_availqty) * 0.0001
+ from
+ partsupp,
+ supplier,
+ nation
+ where
+ ps_suppkey = s_suppkey
+ and s_nationkey = n_nationkey
+ and n_name = 'GERMANY'
+ )
+order by
+ value desc;
+----
+
+
+# q12
+query T
+select
+ l_shipmode,
+ sum(case
+ when o_orderpriority = '1-URGENT'
+ or o_orderpriority = '2-HIGH'
+ then 1
+ else 0
+ end) as high_line_count,
+ sum(case
+ when o_orderpriority <> '1-URGENT'
+ and o_orderpriority <> '2-HIGH'
+ then 1
+ else 0
+ end) as low_line_count
+from
+ lineitem
+ join
+ orders
+ on
+ l_orderkey = o_orderkey
+where
+ l_shipmode in ('MAIL', 'SHIP')
+ and l_commitdate < l_receiptdate
+ and l_shipdate < l_commitdate
+ and l_receiptdate >= date '1994-01-01'
+ and l_receiptdate < date '1995-01-01'
+group by
+ l_shipmode
+order by
+ l_shipmode;
+----
+
+
+# q13
+query T
+select
+ c_count,
+ count(*) as custdist
+from
+ (
+ select
+ c_custkey,
+ count(o_orderkey)
+ from
+ customer left outer join orders on
+ c_custkey = o_custkey
+ and o_comment not like '%special%requests%'
+ group by
+ c_custkey
+ ) as c_orders (c_custkey, c_count)
+group by
+ c_count
+order by
+ custdist desc,
+ c_count desc;
+----
+0 9
+
+# q14
+query T
+select
+ 100.00 * sum(case
+ when p_type like 'PROMO%'
+ then l_extendedprice * (1 - l_discount)
+ else 0
+ end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue
+from
+ lineitem,
+ part
+where
+ l_partkey = p_partkey
+ and l_shipdate >= date '1995-09-01'
+ and l_shipdate < date '1995-10-01';
+----
+NULL
+
+statement ok
+create view revenue0 (supplier_no, total_revenue) as
+ select
+ l_suppkey,
+ sum(l_extendedprice * (1 - l_discount))
+ from
+ lineitem
+ where
+ l_shipdate >= date '1996-01-01'
+ and l_shipdate < date '1996-01-01' + interval '3' month
+ group by
+ l_suppkey;
+
+# q15
+query T
+select
+ s_suppkey,
+ s_name,
+ s_address,
+ s_phone,
+ total_revenue
+from
+ supplier,
+ revenue0
+where
+ s_suppkey = supplier_no
+ and total_revenue = (
+ select
+ max(total_revenue)
+ from
+ revenue0
+ )
+order by
+ s_suppkey;
+----
+
+statement ok
+drop view revenue0;
+
+
+# q16
+query T
+select
+ p_brand,
+ p_type,
+ p_size,
+ count(distinct ps_suppkey) as supplier_cnt
+from
+ partsupp,
+ part
+where
+ p_partkey = ps_partkey
+ and p_brand <> 'Brand#45'
+ and p_type not like 'MEDIUM POLISHED%'
+ and p_size in (49, 14, 23, 45, 19, 3, 36, 9)
+ and ps_suppkey not in (
+ select
+ s_suppkey
+ from
+ supplier
+ where
+ s_comment like '%Customer%Complaints%'
+)
+group by
+ p_brand,
+ p_type,
+ p_size
+order by
+ supplier_cnt desc,
+ p_brand,
+ p_type,
+ p_size;
+----
+
+
+# q17
+query T
+select
+ sum(l_extendedprice) / 7.0 as avg_yearly
+from
+ lineitem,
+ part
+where
+ p_partkey = l_partkey
+ and p_brand = 'Brand#23'
+ and p_container = 'MED BOX'
+ and l_quantity < (
+ select
+ 0.2 * avg(l_quantity)
+ from
+ lineitem
+ where
+ l_partkey = p_partkey
+);
+----
+NULL
+
+# q18
+query T
+select
+ c_name,
+ c_custkey,
+ o_orderkey,
+ o_orderdate,
+ o_totalprice,
+ sum(l_quantity)
+from
+ customer,
+ orders,
+ lineitem
+where
+ o_orderkey in (
+ select
+ l_orderkey
+ from
+ lineitem
+ group by
+ l_orderkey having
+ sum(l_quantity) > 300
+ )
+ and c_custkey = o_custkey
+ and o_orderkey = l_orderkey
+group by
+ c_name,
+ c_custkey,
+ o_orderkey,
+ o_orderdate,
+ o_totalprice
+order by
+ o_totalprice desc,
+ o_orderdate;
+----
+
+
+# q19
+query T
+select
+ sum(l_extendedprice* (1 - l_discount)) as revenue
+from
+ lineitem,
+ part
+where
+ (
+ p_partkey = l_partkey
+ and p_brand = 'Brand#12'
+ and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG')
+ and l_quantity >= 1 and l_quantity <= 1 + 10
+ and p_size between 1 and 5
+ and l_shipmode in ('AIR', 'AIR REG')
+ and l_shipinstruct = 'DELIVER IN PERSON'
+ )
+ or
+ (
+ p_partkey = l_partkey
+ and p_brand = 'Brand#23'
+ and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK')
+ and l_quantity >= 10 and l_quantity <= 10 + 10
+ and p_size between 1 and 10
+ and l_shipmode in ('AIR', 'AIR REG')
+ and l_shipinstruct = 'DELIVER IN PERSON'
+ )
+ or
+ (
+ p_partkey = l_partkey
+ and p_brand = 'Brand#34'
+ and p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG')
+ and l_quantity >= 20 and l_quantity <= 20 + 10
+ and p_size between 1 and 15
+ and l_shipmode in ('AIR', 'AIR REG')
+ and l_shipinstruct = 'DELIVER IN PERSON'
+ );
+----
+NULL
+
+
+# q20
+query T
+select
+ s_name,
+ s_address
+from
+ supplier,
+ nation
+where
+ s_suppkey in (
+ select
+ ps_suppkey
+ from
+ partsupp
+ where
+ ps_partkey in (
+ select
+ p_partkey
+ from
+ part
+ where
+ p_name like 'forest%'
+ )
+ and ps_availqty > (
+ select
+ 0.5 * sum(l_quantity)
+ from
+ lineitem
+ where
+ l_partkey = ps_partkey
+ and l_suppkey = ps_suppkey
+ and l_shipdate >= date '1994-01-01'
+ and l_shipdate < date '1994-01-01' + interval '1' year
+ )
+ )
+ and s_nationkey = n_nationkey
+ and n_name = 'CANADA'
+order by
+ s_name;
+----
+
+
+# q21
+query T
+select
+ s_name,
+ count(*) as numwait
+from
+ supplier,
+ lineitem l1,
+ orders,
+ nation
+where
+ s_suppkey = l1.l_suppkey
+ and o_orderkey = l1.l_orderkey
+ and o_orderstatus = 'F'
+ and l1.l_receiptdate > l1.l_commitdate
+ and exists (
+ select
+ *
+ from
+ lineitem l2
+ where
+ l2.l_orderkey = l1.l_orderkey
+ and l2.l_suppkey <> l1.l_suppkey
+ )
+ and not exists (
+ select
+ *
+ from
+ lineitem l3
+ where
+ l3.l_orderkey = l1.l_orderkey
+ and l3.l_suppkey <> l1.l_suppkey
+ and l3.l_receiptdate > l3.l_commitdate
+ )
+ and s_nationkey = n_nationkey
+ and n_name = 'SAUDI ARABIA'
+group by
+ s_name
+order by
+ numwait desc,
+ s_name;
+----
+
+
+# q22
+query T
+select
+ cntrycode,
+ count(*) as numcust,
+ sum(c_acctbal) as totacctbal
+from
+ (
+ select
+ substring(c_phone from 1 for 2) as cntrycode,
+ c_acctbal
+ from
+ customer
+ where
+ substring(c_phone from 1 for 2) in
+ ('13', '31', '23', '29', '30', '18', '17')
+ and c_acctbal > (
+ select
+ avg(c_acctbal)
+ from
+ customer
+ where
+ c_acctbal > 0.00
+ and substring(c_phone from 1 for 2) in
+ ('13', '31', '23', '29', '30', '18', '17')
+ )
+ and not exists (
+ select
+ *
+ from
+ orders
+ where
+ o_custkey = c_custkey
+ )
+ ) as custsale
+group by
+ cntrycode
+order by
+ cntrycode;
+----
+18 1 8324.0700000000
+30 1 7638.5700000000