You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2022/06/08 03:22:02 UTC

[incubator-doris] branch master updated: [tools](tpc-h) Modify tpch tools (#9982)

This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git


The following commit(s) were added to refs/heads/master by this push:
     new b15b1a9f07 [tools](tpc-h) Modify tpch tools (#9982)
b15b1a9f07 is described below

commit b15b1a9f074087a640f06dca73d53f0f651a73c2
Author: Mingyu Chen <mo...@gmail.com>
AuthorDate: Wed Jun 8 11:21:57 2022 +0800

    [tools](tpc-h) Modify tpch tools (#9982)
    
    1. Modify the create table stmt.
    2. Modify part of queries.
---
 tools/tpch-tools/README.md              |  13 ++
 tools/tpch-tools/create-tpch-tables.sql | 228 +++++++++++++++++++++-----------
 tools/tpch-tools/queries/q1.sql         |   2 +-
 tools/tpch-tools/queries/q10.sql        |  21 +--
 tools/tpch-tools/queries/q11.sql        |   2 +-
 tools/tpch-tools/queries/q12.sql        |   2 +-
 tools/tpch-tools/queries/q13.sql        |   6 +-
 tools/tpch-tools/queries/q14.sql        |   8 +-
 tools/tpch-tools/queries/q15.sql        |  17 +--
 tools/tpch-tools/queries/q16.sql        |   2 +-
 tools/tpch-tools/queries/q17.sql        |  10 +-
 tools/tpch-tools/queries/q18.sql        |  57 ++++----
 tools/tpch-tools/queries/q19.sql        |   2 +-
 tools/tpch-tools/queries/q2.sql         |  43 +++---
 tools/tpch-tools/queries/q20.sql        |  62 ++++-----
 tools/tpch-tools/queries/q21.sql        |  65 ++++-----
 tools/tpch-tools/queries/q22.sql        |   2 +-
 tools/tpch-tools/queries/q3.sql         |  23 ++--
 tools/tpch-tools/queries/q4.sql         |  21 +--
 tools/tpch-tools/queries/q5.sql         |   2 +-
 tools/tpch-tools/queries/q6.sql         |   2 +-
 tools/tpch-tools/queries/q7.sql         |   2 +-
 tools/tpch-tools/queries/q8.sql         |   8 +-
 tools/tpch-tools/queries/q9.sql         |   4 +-
 tools/tpch-tools/run-tpch-queries.sh    |  22 ++-
 25 files changed, 351 insertions(+), 275 deletions(-)

diff --git a/tools/tpch-tools/README.md b/tools/tpch-tools/README.md
index f06e96289b..dbef056fa7 100644
--- a/tools/tpch-tools/README.md
+++ b/tools/tpch-tools/README.md
@@ -23,12 +23,25 @@ These scripts are used to make tpc-h test.
 follow the steps below:
 
 ### 1. build tpc-h dbgen tool.
+
     ./build-tpch-dbgen.sh
+
 ### 2. generate tpc-h data. use -h for more infomations.
+
     ./gen-tpch-data.sh -s 1
+
 ### 3. create tpc-h tables. modify `doris-cluster.conf` to specify doris info, then run script below.
+
     ./create-tpch-tables.sh
+
 ### 4. load tpc-h data. use -h for help.
+
     ./load-tpch-data.sh
+
 ### 5. run tpc-h queries.
+
     ./run-tpch-queries.sh
+
+    NOTICE: At present, Doris's query optimizer and statistical information functions are not complete, so we rewrite some queries in TPC-H to adapt to Doris' execution framework, but it does not affect the correctness of the results. The rewritten SQL is marked with "Modified" in the corresponding .sql file.
+
+    A new query optimizer will be released in subsequent releases.
diff --git a/tools/tpch-tools/create-tpch-tables.sql b/tools/tpch-tools/create-tpch-tables.sql
index de3d936857..c86d7b6c51 100644
--- a/tools/tpch-tools/create-tpch-tables.sql
+++ b/tools/tpch-tools/create-tpch-tables.sql
@@ -14,87 +14,161 @@
 -- KIND, either express or implied.  See the License for the
 -- specific language governing permissions and limitations
 -- under the License.
-CREATE TABLE `region` (
-  `r_regionkey` integer NOT NULL,
-  `r_name` char(25) NOT NULL,
-  `r_comment` varchar(152)
-) DISTRIBUTED BY HASH(`r_regionkey`) BUCKETS 1 PROPERTIES ("replication_num" = "1");
 
-CREATE TABLE `nation` (
-  `n_nationkey` integer NOT NULL,
-  `n_name` char(25) NOT NULL,
-  `n_regionkey` integer NOT NULL,
-  `n_comment` varchar(152)
-) DISTRIBUTED BY HASH(`n_nationkey`) BUCKETS 1 PROPERTIES ("replication_num" = "1");
+drop table if exists lineitem;
+CREATE TABLE lineitem (
+    l_shipdate    DATE NOT NULL,
+    l_orderkey    bigint NOT NULL,
+    l_linenumber  int not null,
+    l_partkey     int NOT NULL,
+    l_suppkey     int not null,
+    l_quantity    decimal(15, 2) NOT NULL,
+    l_extendedprice  decimal(15, 2) NOT NULL,
+    l_discount    decimal(15, 2) NOT NULL,
+    l_tax         decimal(15, 2) NOT NULL,
+    l_returnflag  VARCHAR(1) NOT NULL,
+    l_linestatus  VARCHAR(1) NOT NULL,
+    l_commitdate  DATE NOT NULL,
+    l_receiptdate DATE NOT NULL,
+    l_shipinstruct VARCHAR(25) NOT NULL,
+    l_shipmode     VARCHAR(10) NOT NULL,
+    l_comment      VARCHAR(44) NOT NULL
+)ENGINE=OLAP
+DUPLICATE KEY(`l_shipdate`, `l_orderkey`)
+COMMENT "OLAP"
+DISTRIBUTED BY HASH(`l_orderkey`) BUCKETS 96
+PROPERTIES (
+    "replication_num" = "1",
+    "colocate_with" = "lineitem_orders"
+);
+
+drop table if exists orders;
+CREATE TABLE orders  (
+    o_orderkey       bigint NOT NULL,
+    o_orderdate      DATE NOT NULL,
+    o_custkey        int NOT NULL,
+    o_orderstatus    VARCHAR(1) NOT NULL,
+    o_totalprice     decimal(15, 2) NOT NULL,
+    o_orderpriority  VARCHAR(15) NOT NULL,
+    o_clerk          VARCHAR(15) NOT NULL,
+    o_shippriority   int NOT NULL,
+    o_comment        VARCHAR(79) NOT NULL
+)ENGINE=OLAP
+DUPLICATE KEY(`o_orderkey`, `o_orderdate`)
+COMMENT "OLAP"
+DISTRIBUTED BY HASH(`o_orderkey`) BUCKETS 96
+PROPERTIES (
+    "replication_num" = "1",
+    "colocate_with" = "lineitem_orders"
+);
 
-CREATE TABLE `part` (
-  `p_partkey` integer NOT NULL,
-  `p_name` varchar(55) NOT NULL,
-  `p_mfgr` char(25) NOT NULL,
-  `p_brand` char(10) NOT NULL,
-  `p_type` varchar(25) NOT NULL,
-  `p_size` integer NOT NULL,
-  `p_container` char(10) NOT NULL,
-  `p_retailprice` decimal(12, 2) NOT NULL,
-  `p_comment` varchar(23) NOT NULL
-) DISTRIBUTED BY HASH(`p_partkey`) BUCKETS 32 PROPERTIES ("replication_num" = "1");
+drop table if exists partsupp;
+CREATE TABLE partsupp (
+    ps_partkey          int NOT NULL,
+    ps_suppkey     int NOT NULL,
+    ps_availqty    int NOT NULL,
+    ps_supplycost  decimal(15, 2)  NOT NULL,
+    ps_comment     VARCHAR(199) NOT NULL
+)ENGINE=OLAP
+DUPLICATE KEY(`ps_partkey`)
+COMMENT "OLAP"
+DISTRIBUTED BY HASH(`ps_partkey`) BUCKETS 24
+PROPERTIES (
+    "replication_num" = "1",
+    "colocate_with" = "part_partsupp"
+);
 
-CREATE TABLE `supplier` (
-  `s_suppkey` integer NOT NULL,
-  `s_name` char(25) NOT NULL,
-  `s_address` varchar(40) NOT NULL,
-  `s_nationkey` integer NOT NULL,
-  `s_phone` char(15) NOT NULL,
-  `s_acctbal` decimal(12, 2) NOT NULL,
-  `s_comment` varchar(101) NOT NULL
-) DISTRIBUTED BY HASH(`s_suppkey`) BUCKETS 32 PROPERTIES ("replication_num" = "1");
+drop table if exists part;
+CREATE TABLE part (
+    p_partkey          int NOT NULL,
+    p_name        VARCHAR(55) NOT NULL,
+    p_mfgr        VARCHAR(25) NOT NULL,
+    p_brand       VARCHAR(10) NOT NULL,
+    p_type        VARCHAR(25) NOT NULL,
+    p_size        int NOT NULL,
+    p_container   VARCHAR(10) NOT NULL,
+    p_retailprice decimal(15, 2) NOT NULL,
+    p_comment     VARCHAR(23) NOT NULL
+)ENGINE=OLAP
+DUPLICATE KEY(`p_partkey`)
+COMMENT "OLAP"
+DISTRIBUTED BY HASH(`p_partkey`) BUCKETS 24
+PROPERTIES (
+    "replication_num" = "1",
+    "colocate_with" = "part_partsupp"
+);
 
-CREATE TABLE `customer` (
-  `c_custkey` integer NOT NULL,
-  `c_name` varchar(25) NOT NULL,
-  `c_address` varchar(40) NOT NULL,
-  `c_nationkey` integer NOT NULL,
-  `c_phone` char(15) NOT NULL,
-  `c_acctbal` decimal(12, 2) NOT NULL,
-  `c_mktsegment` char(10) NOT NULL,
-  `c_comment` varchar(117) NOT NULL
-) DISTRIBUTED BY HASH(`c_custkey`) BUCKETS 32 PROPERTIES ("replication_num" = "1");
+drop table if exists customer;
+CREATE TABLE customer (
+    c_custkey     int NOT NULL,
+    c_name        VARCHAR(25) NOT NULL,
+    c_address     VARCHAR(40) NOT NULL,
+    c_nationkey   int NOT NULL,
+    c_phone       VARCHAR(15) NOT NULL,
+    c_acctbal     decimal(15, 2)   NOT NULL,
+    c_mktsegment  VARCHAR(10) NOT NULL,
+    c_comment     VARCHAR(117) NOT NULL
+)ENGINE=OLAP
+DUPLICATE KEY(`c_custkey`)
+COMMENT "OLAP"
+DISTRIBUTED BY HASH(`c_custkey`) BUCKETS 24
+PROPERTIES (
+    "replication_num" = "1"
+);
 
-CREATE TABLE `partsupp` (
-  `ps_partkey` integer NOT NULL,
-  `ps_suppkey` integer NOT NULL,
-  `ps_availqty` integer NOT NULL,
-  `ps_supplycost` decimal(12, 2) NOT NULL,
-  `ps_comment` varchar(199) NOT NULL
-) DISTRIBUTED BY HASH(`ps_partkey`) BUCKETS 32 PROPERTIES ("replication_num" = "1");
+drop table if exists supplier;
+CREATE TABLE supplier (
+    s_suppkey       int NOT NULL,
+    s_name        VARCHAR(25) NOT NULL,
+    s_address     VARCHAR(40) NOT NULL,
+    s_nationkey   int NOT NULL,
+    s_phone       VARCHAR(15) NOT NULL,
+    s_acctbal     decimal(15, 2) NOT NULL,
+    s_comment     VARCHAR(101) NOT NULL
+)ENGINE=OLAP
+DUPLICATE KEY(`s_suppkey`)
+COMMENT "OLAP"
+DISTRIBUTED BY HASH(`s_suppkey`) BUCKETS 12
+PROPERTIES (
+    "replication_num" = "1"
+);
+
+drop table if exists nation;
+CREATE TABLE `nation` (
+  `n_nationkey` int(11) NOT NULL,
+  `n_name`      varchar(25) NOT NULL,
+  `n_regionkey` int(11) NOT NULL,
+  `n_comment`   varchar(152) NULL
+) ENGINE=OLAP
+DUPLICATE KEY(`N_NATIONKEY`)
+COMMENT "OLAP"
+DISTRIBUTED BY HASH(`N_NATIONKEY`) BUCKETS 1
+PROPERTIES (
+    "replication_num" = "3"
+);
 
-CREATE TABLE `orders` (
-  `o_orderkey` integer NOT NULL,
-  `o_custkey` integer NOT NULL,
-  `o_orderstatus` char(1) NOT NULL,
-  `o_totalprice` decimal(12, 2) NOT NULL,
-  `o_orderdate` date NOT NULL,
-  `o_orderpriority` char(15) NOT NULL,
-  `o_clerk` char(15) NOT NULL,
-  `o_shippriority` integer NOT NULL,
-  `o_comment` varchar(79) NOT NULL
-) DISTRIBUTED BY HASH(`o_orderkey`) BUCKETS 32 PROPERTIES ("replication_num" = "1");
+drop table if exists region;
+CREATE TABLE region  (
+    r_regionkey      int NOT NULL,
+    r_name       VARCHAR(25) NOT NULL,
+    r_comment    VARCHAR(152)
+)ENGINE=OLAP
+DUPLICATE KEY(`r_regionkey`)
+COMMENT "OLAP"
+DISTRIBUTED BY HASH(`r_regionkey`) BUCKETS 1
+PROPERTIES (
+    "replication_num" = "3"
+);
 
-CREATE TABLE `lineitem` (
-  `l_orderkey` integer NOT NULL,
-  `l_linenumber` integer NOT NULL,
-  `l_partkey` integer NOT NULL,
-  `l_suppkey` integer NOT NULL,
-  `l_quantity` decimal(12, 2) NOT NULL,
-  `l_extendedprice` decimal(12, 2) NOT NULL,
-  `l_discount` decimal(12, 2) NOT NULL,
-  `l_tax` decimal(12, 2) NOT NULL,
-  `l_returnflag` char(1) NOT NULL,
-  `l_linestatus` char(1) NOT NULL,
-  `l_shipdate` date NOT NULL,
-  `l_commitdate` date NOT NULL,
-  `l_receiptdate` date NOT NULL,
-  `l_shipinstruct` char(25) NOT NULL,
-  `l_shipmode` char(10) NOT NULL,
-  `l_comment` varchar(44) NOT NULL
-) DISTRIBUTED BY HASH(`l_orderkey`) BUCKETS 32 PROPERTIES ("replication_num" = "1");
+drop view if exists revenue0;
+create view revenue0 (supplier_no, total_revenue) as
+select
+    l_suppkey,
+    sum(l_extendedprice * (1 - l_discount))
+from
+    lineitem
+where
+    l_shipdate >= date '1996-01-01'
+    and l_shipdate < date '1996-01-01' + interval '3' month
+group by
+    l_suppkey;
diff --git a/tools/tpch-tools/queries/q1.sql b/tools/tpch-tools/queries/q1.sql
index a888c2bdb1..dfa27c7446 100644
--- a/tools/tpch-tools/queries/q1.sql
+++ b/tools/tpch-tools/queries/q1.sql
@@ -15,7 +15,7 @@
 -- specific language governing permissions and limitations
 -- under the License.
 
-select
+select /*+SET_VAR(parallel_fragment_exec_instance_num=8, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=false, enable_cost_based_join_reorder=false, enable_projection=false) */
     l_returnflag,
     l_linestatus,
     sum(l_quantity) as sum_qty,
diff --git a/tools/tpch-tools/queries/q10.sql b/tools/tpch-tools/queries/q10.sql
index 6a12c1f5b4..af68dd7ece 100644
--- a/tools/tpch-tools/queries/q10.sql
+++ b/tools/tpch-tools/queries/q10.sql
@@ -15,10 +15,12 @@
 -- specific language governing permissions and limitations
 -- under the License.
 
-select
+-- Modified
+
+select /*+SET_VAR(parallel_fragment_exec_instance_num=10, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=true, enable_cost_based_join_reorder=false, enable_projection=true) */
     c_custkey,
     c_name,
-    sum(l_extendedprice * (1 - l_discount)) as revenue,
+    sum(t1.l_extendedprice * (1 - t1.l_discount)) as revenue,
     c_acctbal,
     n_name,
     c_address,
@@ -26,15 +28,16 @@ select
     c_comment
 from
     customer,
-    orders,
-    lineitem,
+    (
+        select o_custkey,l_extendedprice,l_discount from lineitem, orders
+        where l_orderkey = o_orderkey
+        and o_orderdate >= date '1993-10-01'
+        and o_orderdate < date '1993-10-01' + interval '3' month
+        and l_returnflag = 'R'
+    ) t1,
     nation
 where
-    c_custkey = o_custkey
-    and l_orderkey = o_orderkey
-    and o_orderdate >= date '1993-10-01'
-    and o_orderdate < date '1993-10-01' + interval '3' month
-    and l_returnflag = 'R'
+    c_custkey = t1.o_custkey
     and c_nationkey = n_nationkey
 group by
     c_custkey,
diff --git a/tools/tpch-tools/queries/q11.sql b/tools/tpch-tools/queries/q11.sql
index f70fab2be9..609798ab36 100644
--- a/tools/tpch-tools/queries/q11.sql
+++ b/tools/tpch-tools/queries/q11.sql
@@ -15,7 +15,7 @@
 -- specific language governing permissions and limitations
 -- under the License.
 
-select
+select /*+SET_VAR(parallel_fragment_exec_instance_num=2, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=false, enable_cost_based_join_reorder=true, enable_projection=true) */
     ps_partkey,
     sum(ps_supplycost * ps_availqty) as value
 from
diff --git a/tools/tpch-tools/queries/q12.sql b/tools/tpch-tools/queries/q12.sql
index d53dd9c809..4401f61163 100644
--- a/tools/tpch-tools/queries/q12.sql
+++ b/tools/tpch-tools/queries/q12.sql
@@ -15,7 +15,7 @@
 -- specific language governing permissions and limitations
 -- under the License.
 
-select
+select /*+SET_VAR(parallel_fragment_exec_instance_num=2, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=false, enable_cost_based_join_reorder=true, enable_projection=true) */
     l_shipmode,
     sum(case
         when o_orderpriority = '1-URGENT'
diff --git a/tools/tpch-tools/queries/q13.sql b/tools/tpch-tools/queries/q13.sql
index a35497cd57..60449da5c8 100644
--- a/tools/tpch-tools/queries/q13.sql
+++ b/tools/tpch-tools/queries/q13.sql
@@ -15,7 +15,9 @@
 -- specific language governing permissions and limitations
 -- under the License.
 
-select
+-- Modified
+
+select /*+SET_VAR(parallel_fragment_exec_instance_num=4, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=true, enable_cost_based_join_reorder=true, enable_projection=true) */
     c_count,
     count(*) as custdist
 from
@@ -24,7 +26,7 @@ from
             c_custkey,
             count(o_orderkey) as c_count
         from
-            customer left outer join orders on
+            orders right outer join customer on
                 c_custkey = o_custkey
                 and o_comment not like '%special%requests%'
         group by
diff --git a/tools/tpch-tools/queries/q14.sql b/tools/tpch-tools/queries/q14.sql
index 960cd15fb3..39829045a2 100644
--- a/tools/tpch-tools/queries/q14.sql
+++ b/tools/tpch-tools/queries/q14.sql
@@ -15,15 +15,17 @@
 -- specific language governing permissions and limitations
 -- under the License.
 
-select
+-- Modified
+
+select /*+SET_VAR(parallel_fragment_exec_instance_num=8, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=true, enable_cost_based_join_reorder=true, enable_projection=true) */
     100.00 * sum(case
         when p_type like 'PROMO%'
             then l_extendedprice * (1 - l_discount)
         else 0
     end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue
 from
-    lineitem,
-    part
+    part,
+    lineitem
 where
     l_partkey = p_partkey
     and l_shipdate >= date '1995-09-01'
diff --git a/tools/tpch-tools/queries/q15.sql b/tools/tpch-tools/queries/q15.sql
index 1f7f19b34b..74ce5f0c3c 100644
--- a/tools/tpch-tools/queries/q15.sql
+++ b/tools/tpch-tools/queries/q15.sql
@@ -15,20 +15,7 @@
 -- specific language governing permissions and limitations
 -- under the License.
 
-create view revenue0 (supplier_no, total_revenue) as
-    select
-        l_suppkey,
-        sum(l_extendedprice * (1 - l_discount))
-    from
-        lineitem
-    where
-        l_shipdate >= date '1996-01-01'
-        and l_shipdate < date '1996-01-01' + interval '3' month
-    group by
-        l_suppkey;
-
-
-select
+select /*+SET_VAR(parallel_fragment_exec_instance_num=4, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=false, enable_cost_based_join_reorder=true, enable_projection=true) */
     s_suppkey,
     s_name,
     s_address,
@@ -47,5 +34,3 @@ where
     )
 order by
     s_suppkey;
-
-drop view revenue0;
diff --git a/tools/tpch-tools/queries/q16.sql b/tools/tpch-tools/queries/q16.sql
index 6027b5c2d0..0b1b93875b 100644
--- a/tools/tpch-tools/queries/q16.sql
+++ b/tools/tpch-tools/queries/q16.sql
@@ -15,7 +15,7 @@
 -- specific language governing permissions and limitations
 -- under the License.
 
-select
+select /*+SET_VAR(parallel_fragment_exec_instance_num=8, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=false, enable_cost_based_join_reorder=true, enable_projection=true) */
     p_brand,
     p_type,
     p_size,
diff --git a/tools/tpch-tools/queries/q17.sql b/tools/tpch-tools/queries/q17.sql
index 7724caac8f..3d3fc2c671 100644
--- a/tools/tpch-tools/queries/q17.sql
+++ b/tools/tpch-tools/queries/q17.sql
@@ -15,7 +15,9 @@
 -- specific language governing permissions and limitations
 -- under the License.
 
-select
+-- Modified
+
+select /*+SET_VAR(parallel_fragment_exec_instance_num=1, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=false, enable_cost_based_join_reorder=true, enable_projection=true) */
     sum(l_extendedprice) / 7.0 as avg_yearly
 from
     lineitem,
@@ -28,7 +30,9 @@ where
         select
             0.2 * avg(l_quantity)
         from
-            lineitem
+            lineitem, part
         where
             l_partkey = p_partkey
-    );
+            and p_brand = 'Brand#23'
+            and p_container = 'MED BOX'
+    )
diff --git a/tools/tpch-tools/queries/q18.sql b/tools/tpch-tools/queries/q18.sql
index 352d8be84a..1b88388bb2 100644
--- a/tools/tpch-tools/queries/q18.sql
+++ b/tools/tpch-tools/queries/q18.sql
@@ -15,36 +15,43 @@
 -- specific language governing permissions and limitations
 -- under the License.
 
-select
+-- Modified
+
+select /*+SET_VAR(parallel_fragment_exec_instance_num=8, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=true, enable_cost_based_join_reorder=true, enable_projection=true) */
     c_name,
     c_custkey,
-    o_orderkey,
-    o_orderdate,
-    o_totalprice,
-    sum(l_quantity)
+    t3.o_orderkey,
+    t3.o_orderdate,
+    t3.o_totalprice,
+    sum(t3.l_quantity)
 from
-    customer,
-    orders,
-    lineitem
-where
-    o_orderkey in (
-        select
-            l_orderkey
-        from
-            lineitem
-        group by
-            l_orderkey having
-                sum(l_quantity) > 300
-    )
-    and c_custkey = o_custkey
-    and o_orderkey = l_orderkey
+customer join
+(
+  select * from
+  lineitem join
+  (
+    select * from
+    orders left semi join
+    (
+      select
+          l_orderkey
+      from
+          lineitem
+      group by
+          l_orderkey having sum(l_quantity) > 300
+    ) t1
+    on o_orderkey = t1.l_orderkey
+  ) t2
+  on t2.o_orderkey = l_orderkey
+) t3
+on c_custkey = t3.o_custkey
 group by
     c_name,
     c_custkey,
-    o_orderkey,
-    o_orderdate,
-    o_totalprice
+    t3.o_orderkey,
+    t3.o_orderdate,
+    t3.o_totalprice
 order by
-    o_totalprice desc,
-    o_orderdate
+    t3.o_totalprice desc,
+    t3.o_orderdate
 limit 100;
diff --git a/tools/tpch-tools/queries/q19.sql b/tools/tpch-tools/queries/q19.sql
index a20a62533a..9cf162eacf 100644
--- a/tools/tpch-tools/queries/q19.sql
+++ b/tools/tpch-tools/queries/q19.sql
@@ -15,7 +15,7 @@
 -- specific language governing permissions and limitations
 -- under the License.
 
-select
+select /*+SET_VAR(parallel_fragment_exec_instance_num=2, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=false, enable_cost_based_join_reorder=false, enable_projection=true) */
     sum(l_extendedprice* (1 - l_discount)) as revenue
 from
     lineitem,
diff --git a/tools/tpch-tools/queries/q2.sql b/tools/tpch-tools/queries/q2.sql
index d1ccca0933..1f69856266 100644
--- a/tools/tpch-tools/queries/q2.sql
+++ b/tools/tpch-tools/queries/q2.sql
@@ -15,7 +15,9 @@
 -- specific language governing permissions and limitations
 -- under the License.
 
-select
+-- Modified
+
+select /*+SET_VAR(parallel_fragment_exec_instance_num=1, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=true, enable_cost_based_join_reorder=false, enable_projection=true) */
     s_acctbal,
     s_name,
     n_name,
@@ -25,34 +27,29 @@ select
     s_phone,
     s_comment
 from
-    part,
-    supplier,
-    partsupp,
-    nation,
-    region
-where
-    p_partkey = ps_partkey
+partsupp,
+(
+  select ps_partkey, min(ps_supplycost) as ps_s from
+  partsupp, supplier, nation, region
+  where s_suppkey = ps_suppkey
+    and s_nationkey = n_nationkey
+    and n_regionkey = r_regionkey
+    and r_name = 'EUROPE'
+  group by ps_partkey
+) t1,
+supplier,
+part,
+nation,
+region
+where p_partkey = t1.ps_partkey
+    and p_partkey = partsupp.ps_partkey
     and s_suppkey = ps_suppkey
     and p_size = 15
     and p_type like '%BRASS'
     and s_nationkey = n_nationkey
     and n_regionkey = r_regionkey
     and r_name = 'EUROPE'
-    and ps_supplycost = (
-        select
-            min(ps_supplycost)
-        from
-            partsupp,
-            supplier,
-            nation,
-            region
-        where
-            p_partkey = ps_partkey
-            and s_suppkey = ps_suppkey
-            and s_nationkey = n_nationkey
-            and n_regionkey = r_regionkey
-            and r_name = 'EUROPE'
-    )
+    and ps_supplycost = t1.ps_s
 order by
     s_acctbal desc,
     n_name,
diff --git a/tools/tpch-tools/queries/q20.sql b/tools/tpch-tools/queries/q20.sql
index 6803b4e3c3..7c621e2244 100644
--- a/tools/tpch-tools/queries/q20.sql
+++ b/tools/tpch-tools/queries/q20.sql
@@ -15,40 +15,30 @@
 -- specific language governing permissions and limitations
 -- under the License.
 
-select
-    s_name,
-    s_address
-from
-    supplier,
-    nation
-where
-    s_suppkey in (
-        select
-            ps_suppkey
-        from
-            partsupp
-        where
-            ps_partkey in (
-                select
-                    p_partkey
-                from
-                    part
-                where
-                    p_name like 'forest%'
-            )
-            and ps_availqty > (
-                select
-                    0.5 * sum(l_quantity)
-                from
-                    lineitem
-                where
-                    l_partkey = ps_partkey
-                    and l_suppkey = ps_suppkey
-                    and l_shipdate >= date '1994-01-01'
-                    and l_shipdate < date '1994-01-01' + interval '1' year
-            )
-    )
-    and s_nationkey = n_nationkey
+-- Modified
+
+select /*+SET_VAR(parallel_fragment_exec_instance_num=8, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=true, enable_cost_based_join_reorder=true, enable_projection=true) */
+s_name, s_address from
+supplier left semi join
+(
+    select * from
+    (
+        select l_partkey,l_suppkey, 0.5 * sum(l_quantity) as l_q
+        from lineitem
+        where l_shipdate >= date '1994-01-01'
+            and l_shipdate < date '1994-01-01' + interval '1' year
+        group by l_partkey,l_suppkey
+    ) t2 join
+    (
+        select ps_partkey, ps_suppkey, ps_availqty
+        from partsupp left semi join part
+        on ps_partkey = p_partkey and p_name like 'forest%'
+    ) t1
+    on t2.l_partkey = t1.ps_partkey and t2.l_suppkey = t1.ps_suppkey
+    and t1.ps_availqty > t2.l_q
+) t3
+on s_suppkey = t3.ps_suppkey
+join nation
+where s_nationkey = n_nationkey
     and n_name = 'CANADA'
-order by
-    s_name;
+order by s_name;
diff --git a/tools/tpch-tools/queries/q21.sql b/tools/tpch-tools/queries/q21.sql
index f3cdf2228d..4353a3a2cb 100644
--- a/tools/tpch-tools/queries/q21.sql
+++ b/tools/tpch-tools/queries/q21.sql
@@ -15,43 +15,36 @@
 -- specific language governing permissions and limitations
 -- under the License.
 
-select
-    s_name,
-    count(*) as numwait
-from
-    supplier,
-    lineitem l1,
-    orders,
-    nation
-where
-    s_suppkey = l1.l_suppkey
-    and o_orderkey = l1.l_orderkey
-    and o_orderstatus = 'F'
-    and l1.l_receiptdate > l1.l_commitdate
-    and exists (
-        select
-            *
-        from
-            lineitem l2
-        where
-            l2.l_orderkey = l1.l_orderkey
-            and l2.l_suppkey <> l1.l_suppkey
-    )
-    and not exists (
-        select
-            *
-        from
-            lineitem l3
-        where
-            l3.l_orderkey = l1.l_orderkey
-            and l3.l_suppkey <> l1.l_suppkey
-            and l3.l_receiptdate > l3.l_commitdate
-    )
-    and s_nationkey = n_nationkey
-    and n_name = 'SAUDI ARABIA'
+-- Modified
+
+select /*+SET_VAR(parallel_fragment_exec_instance_num=8, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=true, enable_cost_based_join_reorder=true, enable_projection=true) */
+s_name, count(*) as numwait
+from orders join
+(
+  select * from
+  lineitem l2 right semi join
+  (
+    select * from
+    lineitem l3 right anti join
+    (
+      select * from
+      lineitem l1 join
+      (
+        select * from
+        supplier join nation
+        where s_nationkey = n_nationkey
+          and n_name = 'SAUDI ARABIA'
+      ) t1
+      where t1.s_suppkey = l1.l_suppkey and l1.l_receiptdate > l1.l_commitdate
+    ) t2
+    on l3.l_orderkey = t2.l_orderkey and l3.l_suppkey <> t2.l_suppkey and l3.l_receiptdate > l3.l_commitdate
+  ) t3
+  on l2.l_orderkey = t3.l_orderkey and l2.l_suppkey <> t3.l_suppkey
+) t4
+on o_orderkey = t4.l_orderkey and o_orderstatus = 'F'
 group by
-    s_name
+    t4.s_name
 order by
     numwait desc,
-    s_name
+    t4.s_name
 limit 100;
diff --git a/tools/tpch-tools/queries/q22.sql b/tools/tpch-tools/queries/q22.sql
index 48d7384bb5..3077b7e2c8 100644
--- a/tools/tpch-tools/queries/q22.sql
+++ b/tools/tpch-tools/queries/q22.sql
@@ -15,7 +15,7 @@
 -- specific language governing permissions and limitations
 -- under the License.
 
-select
+select /*+SET_VAR(parallel_fragment_exec_instance_num=8, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=false, enable_cost_based_join_reorder=true, enable_projection=true) */
     cntrycode,
     count(*) as numcust,
     sum(c_acctbal) as totacctbal
diff --git a/tools/tpch-tools/queries/q3.sql b/tools/tpch-tools/queries/q3.sql
index 73fbb51ede..316a728ee3 100644
--- a/tools/tpch-tools/queries/q3.sql
+++ b/tools/tpch-tools/queries/q3.sql
@@ -15,21 +15,24 @@
 -- specific language governing permissions and limitations
 -- under the License.
 
-select
+-- Modified
+
+select /*+SET_VAR(parallel_fragment_exec_instance_num=8, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=true, enable_cost_based_join_reorder=false, enable_projection=true) */
     l_orderkey,
     sum(l_extendedprice * (1 - l_discount)) as revenue,
     o_orderdate,
     o_shippriority
 from
-    customer,
-    orders,
-    lineitem
-where
-    c_mktsegment = 'BUILDING'
-    and c_custkey = o_custkey
-    and l_orderkey = o_orderkey
-    and o_orderdate < date '1995-03-15'
-    and l_shipdate > date '1995-03-15'
+    customer c join
+    (
+        select l_orderkey, l_extendedprice, l_discount, o_orderdate, o_shippriority, o_custkey from
+        lineitem join orders
+        where l_orderkey = o_orderkey
+        and o_orderdate < date '1995-03-15'
+        and l_shipdate > date '1995-03-15'
+    ) t1
+    where c_mktsegment = 'BUILDING'
+    and c.c_custkey = t1.o_custkey
 group by
     l_orderkey,
     o_orderdate,
diff --git a/tools/tpch-tools/queries/q4.sql b/tools/tpch-tools/queries/q4.sql
index b6bc24d511..1292883ec5 100644
--- a/tools/tpch-tools/queries/q4.sql
+++ b/tools/tpch-tools/queries/q4.sql
@@ -15,23 +15,24 @@
 -- specific language governing permissions and limitations
 -- under the License.
 
-select
+-- Modified
+
+select /*+SET_VAR(parallel_fragment_exec_instance_num=1, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=true, enable_cost_based_join_reorder=false, enable_projection=true) */
     o_orderpriority,
     count(*) as order_count
 from
-    orders
-where
-    o_orderdate >= date '1993-07-01'
-    and o_orderdate < date '1993-07-01' + interval '3' month
-    and exists (
+    (
         select
             *
         from
             lineitem
-        where
-            l_orderkey = o_orderkey
-            and l_commitdate < l_receiptdate
-    )
+        where l_commitdate < l_receiptdate
+    ) t1
+    right semi join orders
+    on t1.l_orderkey = o_orderkey
+where
+    o_orderdate >= date '1993-07-01'
+    and o_orderdate < date '1993-07-01' + interval '3' month
 group by
     o_orderpriority
 order by
diff --git a/tools/tpch-tools/queries/q5.sql b/tools/tpch-tools/queries/q5.sql
index 5e3bc4116f..19372edf2b 100644
--- a/tools/tpch-tools/queries/q5.sql
+++ b/tools/tpch-tools/queries/q5.sql
@@ -15,7 +15,7 @@
 -- specific language governing permissions and limitations
 -- under the License.
 
-select
+select /*+SET_VAR(parallel_fragment_exec_instance_num=8, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=false, enable_cost_based_join_reorder=false, enable_projection=true) */
     n_name,
     sum(l_extendedprice * (1 - l_discount)) as revenue
 from
diff --git a/tools/tpch-tools/queries/q6.sql b/tools/tpch-tools/queries/q6.sql
index 2d62a6a4f1..417023b436 100644
--- a/tools/tpch-tools/queries/q6.sql
+++ b/tools/tpch-tools/queries/q6.sql
@@ -15,7 +15,7 @@
 -- specific language governing permissions and limitations
 -- under the License.
 
-select
+select /*+SET_VAR(parallel_fragment_exec_instance_num=1, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=false, enable_cost_based_join_reorder=false, enable_projection=true) */
     sum(l_extendedprice * l_discount) as revenue
 from
     lineitem
diff --git a/tools/tpch-tools/queries/q7.sql b/tools/tpch-tools/queries/q7.sql
index 9a6e4a9b9f..97ee9af11b 100644
--- a/tools/tpch-tools/queries/q7.sql
+++ b/tools/tpch-tools/queries/q7.sql
@@ -15,7 +15,7 @@
 -- specific language governing permissions and limitations
 -- under the License.
 
-select
+select /*+SET_VAR(parallel_fragment_exec_instance_num=4, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=false, enable_cost_based_join_reorder=false, enable_projection=true) */
     supp_nation,
     cust_nation,
     l_year,
diff --git a/tools/tpch-tools/queries/q8.sql b/tools/tpch-tools/queries/q8.sql
index e61c8632d0..f27609ac2d 100644
--- a/tools/tpch-tools/queries/q8.sql
+++ b/tools/tpch-tools/queries/q8.sql
@@ -15,7 +15,9 @@
 -- specific language governing permissions and limitations
 -- under the License.
 
-select
+-- Modified
+
+select /*+SET_VAR(parallel_fragment_exec_instance_num=1, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=true, enable_cost_based_join_reorder=false, enable_projection=true) */
     o_year,
     sum(case
         when nation = 'BRAZIL' then volume
@@ -28,11 +30,11 @@ from
             l_extendedprice * (1 - l_discount) as volume,
             n2.n_name as nation
         from
-            part,
-            supplier,
             lineitem,
             orders,
             customer,
+            supplier,
+            part,
             nation n1,
             nation n2,
             region
diff --git a/tools/tpch-tools/queries/q9.sql b/tools/tpch-tools/queries/q9.sql
index 8e486d8fa3..715d845bb7 100644
--- a/tools/tpch-tools/queries/q9.sql
+++ b/tools/tpch-tools/queries/q9.sql
@@ -15,7 +15,9 @@
 -- specific language governing permissions and limitations
 -- under the License.
 
-select
+-- Modified
+
+select /*+SET_VAR(parallel_fragment_exec_instance_num=4, enable_vectorized_engine=true, batch_size=4096, disable_join_reorder=false, enable_cost_based_join_reorder=false, enable_projection=true) */
     nation,
     o_year,
     sum(amount) as sum_profit
diff --git a/tools/tpch-tools/run-tpch-queries.sh b/tools/tpch-tools/run-tpch-queries.sh
index 457be4447f..c384a2132a 100755
--- a/tools/tpch-tools/run-tpch-queries.sh
+++ b/tools/tpch-tools/run-tpch-queries.sh
@@ -17,7 +17,7 @@
 # under the License.
 
 ##############################################################
-# This script is used to run TPC-H queries
+# This script is used to create TPC-H tables
 ##############################################################
 
 set -eo pipefail
@@ -43,7 +43,6 @@ Usage: $0
 OPTS=$(getopt \
     -n $0 \
     -o '' \
-    -o 'h' \
     -- "$@")
 
 eval set -- "$OPTS"
@@ -87,33 +86,32 @@ check_prerequest() {
 check_prerequest "mysql --version" "mysql"
 
 source $CURDIR/doris-cluster.conf
-export MYSQL_PWD=$PASSWORD
 
 echo "FE_HOST: $FE_HOST"
 echo "FE_QUERY_PORT: $FE_QUERY_PORT"
 echo "USER: $USER"
 echo "PASSWORD: $PASSWORD"
 echo "DB: $DB"
+echo "Time Unit: ms"
 
 pre_set() {
     echo $@
     mysql -h$FE_HOST -u$USER -P$FE_QUERY_PORT -D$DB -e "$@"
 }
 
-pre_set "set global enable_vectorized_engine=1;"
-pre_set "set global parallel_fragment_exec_instance_num=8;"
-pre_set "set global exec_mem_limit=48G;"
-pre_set "set global batch_size=4096;"
-# pre_set "show variables like 'batch_size';"
-
+sum=0
 for i in $(seq 1 22); do
     total=0
-    # Each query is executed three times and takes the average time
-    for j in $(seq 1 3); do
+    run=3
+    # Each query is executed ${run} times and takes the average time
+    for j in $(seq 1 ${run}); do
         start=$(date +%s%3N)
         mysql -h$FE_HOST -u $USER -P$FE_QUERY_PORT -D$DB <$QUERIES_DIR/q$i.sql >/dev/null
         end=$(date +%s%3N)
         total=$((total + end - start))
     done
-    echo "q$i: $((total / 3))ms"
+    cost=$((total / ${run}))
+    echo "q$i: ${cost}"
+    sum=$((sum + $cost))
 done
+echo "Total cost: $sum"


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org