You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kyuubi.apache.org by ch...@apache.org on 2022/09/20 13:22:05 UTC

[incubator-kyuubi] branch master updated: [KYUUBI #3500] Supply TPC data initialization SQL script for Kyuubi playground

This is an automated email from the ASF dual-hosted git repository.

chengpan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-kyuubi.git


The following commit(s) were added to refs/heads/master by this push:
     new 66f28efbf [KYUUBI #3500] Supply TPC data initialization SQL script for Kyuubi playground
66f28efbf is described below

commit 66f28efbf73b855c09b59ed08f078863b6958131
Author: yikf <yi...@gmail.com>
AuthorDate: Tue Sep 20 21:21:54 2022 +0800

    [KYUUBI #3500] Supply TPC data initialization SQL script for Kyuubi playground
    
    ### _Why are the changes needed?_
    
    Fix https://github.com/apache/incubator-kyuubi/issues/3500
    
    Supply a SQL script to create TPC tables in spark_catalog and load tiny scale data.
    
    ### _How was this patch tested?_
    - [ ] Add some test cases that check the changes thoroughly including negative and positive cases if possible
    
    - [ ] Add screenshots for manual tests if appropriate
    
    - [x] [Run test](https://kyuubi.apache.org/docs/latest/develop_tools/testing.html#running-tests) locally before make a pull request
    
    Closes #3526 from Yikf/playgroud-tiny-dataset.
    
    Closes #3500
    
    8d06322d [yikf] supply tpcds tiny dataset
    
    Authored-by: yikf <yi...@gmail.com>
    Signed-off-by: Cheng Pan <ch...@apache.org>
---
 docker/playground/README.md                        |   7 +
 docker/playground/compose.yml                      |   2 +
 .../playground/image/load-dataset-tpcds-tiny.sql   | 148 +++++++++++++++++++++
 docker/playground/image/load-dataset-tpch-tiny.sql |  61 +++++++++
 4 files changed, 218 insertions(+)

diff --git a/docker/playground/README.md b/docker/playground/README.md
index 818e8c864..b5518bce5 100644
--- a/docker/playground/README.md
+++ b/docker/playground/README.md
@@ -22,6 +22,13 @@ Add a Kyuubi datasource with
 - username: `anonymous`
 - password: `<empty>`
 
+3. Using built-in dataset
+
+Kyuubi supply some built-in dataset, After the Kyuubi starts, you can run the following command to load the different datasets:
+
+- For loading TPC-DS tiny dataset to spark_catalog.tpcds_tiny, run `docker exec -it kyuubi /opt/kyuubi/bin/beeline -u 'jdbc:hive2://0.0.0.0:10009/' -f /opt/load_data/load-dataset-tpcds-tiny.sql`
+- For loading TPC-H tiny dataset to spark_catalog.tpch_tiny, run `docker exec -it kyuubi /opt/kyuubi/bin/beeline -u 'jdbc:hive2://0.0.0.0:10009/' -f /opt/load_data/load-dataset-tpch-tiny.sql`
+
 ### Access Service
 
 - MinIO: http://localhost:9001
diff --git a/docker/playground/compose.yml b/docker/playground/compose.yml
index 611cb6e62..dfa68782f 100644
--- a/docker/playground/compose.yml
+++ b/docker/playground/compose.yml
@@ -74,6 +74,8 @@ services:
       - ./image/spark-defaults.conf:/etc/spark/conf/spark-defaults.conf
       - ./image/kyuubi-defaults.conf:/etc/kyuubi/conf/kyuubi-defaults.conf
       - ./image/kyuubi-log4j2.xml:/etc/kyuubi/conf/log4j2.xml
+      - ./image/load-dataset-tpcds-tiny.sql:/opt/load_data/load-dataset-tpcds-tiny.sql
+      - ./image/load-dataset-tpch-tiny.sql:/opt/load_data/load-dataset-tpch-tiny.sql
     depends_on:
       - metastore
       - minio
diff --git a/docker/playground/image/load-dataset-tpcds-tiny.sql b/docker/playground/image/load-dataset-tpcds-tiny.sql
new file mode 100644
index 000000000..e76cd0cd9
--- /dev/null
+++ b/docker/playground/image/load-dataset-tpcds-tiny.sql
@@ -0,0 +1,148 @@
+-- Licensed to the Apache Software Foundation (ASF) under one or more
+-- contributor license agreements.  See the NOTICE file distributed with
+-- this work for additional information regarding copyright ownership.
+-- The ASF licenses this file to You under the Apache License, Version 2.0
+-- (the "License"); you may not use this file except in compliance with
+-- the License.  You may obtain a copy of the License at
+--
+--     http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+
+SET tiny_schema=tpcds.tiny;
+
+CREATE DATABASE IF NOT EXISTS tpcds_tiny;
+
+USE tpcds_tiny;
+
+--
+-- Name: catalog_sales; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS catalog_sales USING parquet PARTITIONED BY (cs_sold_date_sk)
+AS SELECT * FROM ${tiny_schema}.catalog_sales;
+
+--
+-- Name: catalog_returns; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS catalog_returns USING parquet PARTITIONED BY (cr_returned_date_sk)
+AS SELECT * FROM ${tiny_schema}.catalog_returns;
+
+--
+-- Name: inventory; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS inventory USING parquet PARTITIONED BY (inv_date_sk)
+AS SELECT * FROM ${tiny_schema}.inventory;
+
+--
+-- Name: store_sales; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS store_sales USING parquet PARTITIONED BY (ss_sold_date_sk)
+AS SELECT * FROM ${tiny_schema}.store_sales;
+
+--
+-- Name: store_returns; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS store_returns USING parquet PARTITIONED BY (sr_returned_date_sk)
+AS SELECT * FROM ${tiny_schema}.store_returns;
+
+--
+-- Name: web_sales; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS web_sales USING parquet PARTITIONED BY (ws_sold_date_sk)
+AS SELECT * FROM ${tiny_schema}.web_sales;
+
+--
+-- Name: web_returns; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS web_returns USING parquet PARTITIONED BY (wr_returned_date_sk)
+AS SELECT * FROM ${tiny_schema}.web_returns;
+
+--
+-- Name: call_center; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS call_center USING parquet AS SELECT * FROM ${tiny_schema}.call_center;
+
+--
+-- Name: catalog_page; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS catalog_page USING parquet AS SELECT * FROM ${tiny_schema}.catalog_page;
+
+--
+-- Name: customer; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS customer USING parquet AS SELECT * FROM ${tiny_schema}.customer;
+
+--
+-- Name: customer_address; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS customer_address USING parquet AS SELECT * FROM ${tiny_schema}.customer_address;
+
+--
+-- Name: customer_demographics; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS customer_demographics USING parquet AS SELECT * FROM ${tiny_schema}.customer_demographics;
+
+--
+-- Name: date_dim; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS date_dim USING parquet AS SELECT * FROM ${tiny_schema}.date_dim;
+
+--
+-- Name: household_demographics; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS household_demographics USING parquet AS SELECT * FROM ${tiny_schema}.household_demographics;
+
+--
+-- Name: income_band; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS income_band USING parquet AS SELECT * FROM ${tiny_schema}.income_band;
+
+--
+-- Name: item; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS item USING parquet AS SELECT * FROM ${tiny_schema}.item;
+
+--
+-- Name: promotion; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS promotion USING parquet AS SELECT * FROM ${tiny_schema}.promotion;
+
+--
+-- Name: reason; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS reason USING parquet AS SELECT * FROM ${tiny_schema}.reason;
+
+--
+-- Name: ship_mode; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS ship_mode USING parquet AS SELECT * FROM ${tiny_schema}.ship_mode;
+
+--
+-- Name: store; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS store USING parquet AS SELECT * FROM ${tiny_schema}.store;
+
+--
+-- Name: time_dim; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS time_dim USING parquet AS SELECT * FROM ${tiny_schema}.time_dim;
+
+--
+-- Name: warehouse; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS warehouse USING parquet AS SELECT * FROM ${tiny_schema}.warehouse;
+
+--
+-- Name: web_page; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS web_page USING parquet AS SELECT * FROM ${tiny_schema}.web_page;
+
+--
+-- Name: web_site; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS web_site USING parquet AS SELECT * FROM ${tiny_schema}.web_site;
diff --git a/docker/playground/image/load-dataset-tpch-tiny.sql b/docker/playground/image/load-dataset-tpch-tiny.sql
new file mode 100644
index 000000000..9f18466ae
--- /dev/null
+++ b/docker/playground/image/load-dataset-tpch-tiny.sql
@@ -0,0 +1,61 @@
+-- Licensed to the Apache Software Foundation (ASF) under one or more
+-- contributor license agreements.  See the NOTICE file distributed with
+-- this work for additional information regarding copyright ownership.
+-- The ASF licenses this file to You under the Apache License, Version 2.0
+-- (the "License"); you may not use this file except in compliance with
+-- the License.  You may obtain a copy of the License at
+--
+--     http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+
+SET tiny_schema=tpch.tiny;
+
+CREATE DATABASE IF NOT EXISTS tpch_tiny;
+
+USE tpch_tiny;
+
+--
+-- Name: customer; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS customer USING parquet AS SELECT * FROM ${tiny_schema}.customer;
+
+--
+-- Name: orders; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS orders USING parquet AS SELECT * FROM ${tiny_schema}.orders;
+
+--
+-- Name: lineitem; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS lineitem USING parquet AS SELECT * FROM ${tiny_schema}.lineitem;
+
+--
+-- Name: part; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS part USING parquet AS SELECT * FROM ${tiny_schema}.part;
+
+--
+-- Name: partsupp; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS partsupp USING parquet AS SELECT * FROM ${tiny_schema}.partsupp;
+
+--
+-- Name: supplier; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS supplier USING parquet AS SELECT * FROM ${tiny_schema}.supplier;
+
+--
+-- Name: nation; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS nation USING parquet AS SELECT * FROM ${tiny_schema}.nation;
+
+--
+-- Name: region; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS region USING parquet AS SELECT * FROM ${tiny_schema}.region;