You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by xi...@apache.org on 2023/07/07 22:46:27 UTC
[pinot] branch master updated: [multistage] Add TPC-H Test for Multistage Engine (#10918)
This is an automated email from the ASF dual-hosted git repository.
xiangfu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 96407ecc5f [multistage] Add TPC-H Test for Multistage Engine (#10918)
96407ecc5f is described below
commit 96407ecc5fe0773abbe66c7011110468e8b0b286
Author: Ankit Sultana <an...@uber.com>
AuthorDate: Fri Jul 7 15:46:22 2023 -0700
[multistage] Add TPC-H Test for Multistage Engine (#10918)
* [multistage] Add TPC-H Test for Multistage Engine
* Remove unnecessary parquet code
* Fix test stability
* Add queries with default parameters
* Add separate query files
* Fix date/time syntax
* fix rat + fix schema
* Fix extract syntax
* Fix tests and remove h2 comparison
* Fix checkstyle
* Add symlink + revert base class changes
---
.../tests/TPCHQueryIntegrationTest.java | 194 +++++++++++++++++++++
.../src/test/resources/examples/batch/tpch | 1 +
.../src/test/resources/tpch/1.sql | 25 +++
.../src/test/resources/tpch/10.sql | 33 ++++
.../src/test/resources/tpch/11.sql | 28 +++
.../src/test/resources/tpch/12.sql | 24 +++
.../src/test/resources/tpch/13.sql | 20 +++
.../src/test/resources/tpch/14.sql | 13 ++
.../src/test/resources/tpch/15.sql | 13 ++
.../src/test/resources/tpch/16.sql | 19 ++
.../src/test/resources/tpch/17.sql | 1 +
.../src/test/resources/tpch/18.sql | 30 ++++
.../src/test/resources/tpch/19.sql | 17 ++
.../src/test/resources/tpch/2.sql | 43 +++++
.../src/test/resources/tpch/20.sql | 33 ++++
.../src/test/resources/tpch/21.sql | 47 +++++
.../src/test/resources/tpch/22.sql | 37 ++++
.../src/test/resources/tpch/23.sql | 39 +++++
.../src/test/resources/tpch/24.sql | 51 ++++++
.../src/test/resources/tpch/3.sql | 24 +++
.../src/test/resources/tpch/4.sql | 21 +++
.../src/test/resources/tpch/5.sql | 26 +++
.../src/test/resources/tpch/6.sql | 10 ++
.../src/test/resources/tpch/7.sql | 46 +++++
.../src/test/resources/tpch/8.sql | 37 ++++
.../src/test/resources/tpch/9.sql | 32 ++++
.../org/apache/pinot/tools/TPCHQuickStart.java | 63 +++++++
.../customer/customer_offline_table_config.json | 27 +++
.../batch/tpch/customer/customer_schema.json | 39 +++++
.../batch/tpch/customer/ingestionJobSpec.yaml | 119 +++++++++++++
.../batch/tpch/customer/rawdata/customer.avro | Bin 0 -> 1741 bytes
.../batch/tpch/lineitem/ingestionJobSpec.yaml | 119 +++++++++++++
.../lineitem/lineitem_offline_table_config.json | 26 +++
.../batch/tpch/lineitem/lineitem_schema.json | 71 ++++++++
.../batch/tpch/lineitem/rawdata/lineitem.avro | Bin 0 -> 1721 bytes
.../batch/tpch/nation/ingestionJobSpec.yaml | 119 +++++++++++++
.../tpch/nation/nation_offline_table_config.json | 27 +++
.../examples/batch/tpch/nation/nation_schema.json | 23 +++
.../examples/batch/tpch/nation/rawdata/nation.avro | Bin 0 -> 1668 bytes
.../batch/tpch/orders/ingestionJobSpec.yaml | 119 +++++++++++++
.../tpch/orders/orders_offline_table_config.json | 34 ++++
.../examples/batch/tpch/orders/orders_schema.json | 43 +++++
.../examples/batch/tpch/orders/rawdata/orders.avro | Bin 0 -> 1301 bytes
.../examples/batch/tpch/part/ingestionJobSpec.yaml | 119 +++++++++++++
.../batch/tpch/part/part_offline_table_config.json | 27 +++
.../examples/batch/tpch/part/part_schema.json | 43 +++++
.../examples/batch/tpch/part/rawdata/part.avro | Bin 0 -> 1502 bytes
.../batch/tpch/partsupp/ingestionJobSpec.yaml | 119 +++++++++++++
.../partsupp/partsupp_offline_table_config.json | 27 +++
.../batch/tpch/partsupp/partsupp_schema.json | 28 +++
.../batch/tpch/partsupp/rawdata/partsupp.avro | Bin 0 -> 1322 bytes
.../batch/tpch/region/ingestionJobSpec.yaml | 119 +++++++++++++
.../examples/batch/tpch/region/rawdata/region.avro | Bin 0 -> 580 bytes
.../tpch/region/region_offline_table_config.json | 26 +++
.../examples/batch/tpch/region/region_schema.json | 19 ++
.../batch/tpch/supplier/ingestionJobSpec.yaml | 119 +++++++++++++
.../batch/tpch/supplier/rawdata/supplier.avro | Bin 0 -> 1466 bytes
.../supplier/supplier_offline_table_config.json | 26 +++
.../batch/tpch/supplier/supplier_schema.json | 35 ++++
pom.xml | 2 +
60 files changed, 2402 insertions(+)
diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/TPCHQueryIntegrationTest.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/TPCHQueryIntegrationTest.java
new file mode 100644
index 0000000000..cca6a0f740
--- /dev/null
+++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/TPCHQueryIntegrationTest.java
@@ -0,0 +1,194 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.integration.tests;
+
+import com.google.common.collect.ImmutableList;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URL;
+import java.nio.charset.Charset;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Properties;
+import java.util.Set;
+import org.apache.commons.collections.CollectionUtils;
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.io.IOUtils;
+import org.apache.pinot.client.Connection;
+import org.apache.pinot.client.ConnectionFactory;
+import org.apache.pinot.client.ResultSetGroup;
+import org.apache.pinot.spi.config.table.TableConfig;
+import org.apache.pinot.spi.data.Schema;
+import org.apache.pinot.tools.utils.JarUtils;
+import org.apache.pinot.util.TestUtils;
+import org.testng.Assert;
+import org.testng.annotations.AfterClass;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+
+public class TPCHQueryIntegrationTest extends BaseClusterIntegrationTest {
+ private static final Map<String, String> TPCH_QUICKSTART_TABLE_RESOURCES;
+ private static final int NUM_TPCH_QUERIES = 24;
+ private static final Set<Integer> EXEMPT_QUERIES;
+
+ static {
+ TPCH_QUICKSTART_TABLE_RESOURCES = new HashMap<>();
+ TPCH_QUICKSTART_TABLE_RESOURCES.put("orders", "examples/batch/tpch/orders");
+ TPCH_QUICKSTART_TABLE_RESOURCES.put("lineitem", "examples/batch/tpch/lineitem");
+ TPCH_QUICKSTART_TABLE_RESOURCES.put("region", "examples/batch/tpch/region");
+ TPCH_QUICKSTART_TABLE_RESOURCES.put("partsupp", "examples/batch/tpch/partsupp");
+ TPCH_QUICKSTART_TABLE_RESOURCES.put("customer", "examples/batch/tpch/customer");
+ TPCH_QUICKSTART_TABLE_RESOURCES.put("nation", "examples/batch/tpch/nation");
+ TPCH_QUICKSTART_TABLE_RESOURCES.put("part", "examples/batch/tpch/part");
+ TPCH_QUICKSTART_TABLE_RESOURCES.put("supplier", "examples/batch/tpch/supplier");
+ EXEMPT_QUERIES = new HashSet<>();
+ // The following query fails due to a bug in resolving table names from query
+ EXEMPT_QUERIES.add(13);
+ // The following queries fail due to lack of support for views.
+ EXEMPT_QUERIES.addAll(ImmutableList.of(15, 16, 17));
+ // The following query fails due to inability of Aggregation operator to handle conversion from Boolean to Number.
+ EXEMPT_QUERIES.add(23);
+ }
+
+ @BeforeClass
+ public void setUp()
+ throws Exception {
+ TestUtils.ensureDirectoriesExistAndEmpty(_tempDir, _segmentDir, _tarDir);
+
+ // Start the Pinot cluster
+ startZk();
+ startController();
+ startBroker();
+ startServer();
+
+ for (Map.Entry<String, String> tableResource : TPCH_QUICKSTART_TABLE_RESOURCES.entrySet()) {
+ File tableSegmentDir = new File(_segmentDir, tableResource.getKey());
+ File tarDir = new File(_tarDir, tableResource.getKey());
+ String tableName = tableResource.getKey();
+ URL resourceUrl = getClass().getClassLoader().getResource(tableResource.getValue());
+ Assert.assertNotNull(resourceUrl, "Unable to find resource from: " + tableResource.getValue());
+ File resourceFile;
+ if ("jar".equals(resourceUrl.getProtocol())) {
+ String[] splits = resourceUrl.getFile().split("!");
+ File tempUnpackDir = new File(_tempDir.getAbsolutePath() + File.separator + splits[1]);
+ TestUtils.ensureDirectoriesExistAndEmpty(tempUnpackDir);
+ JarUtils.copyResourcesToDirectory(splits[0], splits[1].substring(1), tempUnpackDir.getAbsolutePath());
+ resourceFile = tempUnpackDir;
+ } else {
+ resourceFile = new File(resourceUrl.getFile());
+ }
+ File dataFile = new File(resourceFile.getAbsolutePath(), "rawdata" + File.separator + tableName + ".avro");
+ Assert.assertTrue(dataFile.exists(), "Unable to load resource file from URL: " + dataFile);
+ File schemaFile = new File(resourceFile.getPath(), tableName + "_schema.json");
+ File tableFile = new File(resourceFile.getPath(), tableName + "_offline_table_config.json");
+ // Pinot
+ TestUtils.ensureDirectoriesExistAndEmpty(tableSegmentDir, tarDir);
+ Schema schema = createSchema(schemaFile);
+ addSchema(schema);
+ TableConfig tableConfig = createTableConfig(tableFile);
+ addTableConfig(tableConfig);
+ ClusterIntegrationTestUtils.buildSegmentsFromAvro(Collections.singletonList(dataFile), tableConfig, schema, 0,
+ tableSegmentDir, tarDir);
+ uploadSegments(tableName, tarDir);
+ }
+ }
+
+ @Test(dataProvider = "QueryDataProvider")
+ public void testTPCHQueries(String query) {
+ testQueriesSucceed(query);
+ }
+
+ protected void testQueriesSucceed(String query) {
+ ResultSetGroup pinotResultSetGroup = getPinotConnection().execute(query);
+ org.apache.pinot.client.ResultSet resultTableResultSet = pinotResultSetGroup.getResultSet(0);
+
+ if (CollectionUtils.isNotEmpty(pinotResultSetGroup.getExceptions())) {
+ Assert.fail(String.format(
+ "TPC-H query raised exception: %s. query: %s", pinotResultSetGroup.getExceptions().get(0), query));
+ }
+ // TODO: Enable the following 2 assertions after fixing the data so each query returns non-zero rows
+ /*
+ Assert.assertTrue(resultTableResultSet.getRowCount() > 0,
+ String.format("Expected non-zero rows for tpc-h query: %s", query));
+ Assert.assertTrue(resultTableResultSet.getColumnCount() > 0,
+ String.format("Expected non-zero columns for tpc-h query: %s", query)); */
+ }
+
+ @Override
+ protected long getCurrentCountStarResult() {
+ return getPinotConnection().execute("SELECT COUNT(*) FROM orders").getResultSet(0).getLong(0);
+ }
+
+ @Override
+ protected long getCountStarResult() {
+ return 9999L;
+ }
+
+ @Override
+ protected Connection getPinotConnection() {
+ Properties properties = new Properties();
+ properties.put("queryOptions", "useMultistageEngine=true");
+ if (_pinotConnection == null) {
+ _pinotConnection = ConnectionFactory.fromZookeeper(properties, getZkUrl() + "/" + getHelixClusterName());
+ }
+ return _pinotConnection;
+ }
+
+ @AfterClass
+ public void tearDown()
+ throws Exception {
+ // unload all TPCH tables.
+ for (String table : TPCH_QUICKSTART_TABLE_RESOURCES.keySet()) {
+ dropOfflineTable(table);
+ }
+
+ // stop components and clean up
+ stopServer();
+ stopBroker();
+ stopController();
+ stopZk();
+
+ FileUtils.deleteDirectory(_tempDir);
+ }
+
+ @DataProvider(name = "QueryDataProvider")
+ public static Object[][] queryDataProvider()
+ throws IOException {
+ Object[][] queries = new Object[NUM_TPCH_QUERIES - EXEMPT_QUERIES.size()][];
+ for (int query = 1, iter = 0; query <= NUM_TPCH_QUERIES; query++) {
+ if (EXEMPT_QUERIES.contains(query)) {
+ continue;
+ }
+ String path = String.format("tpch/%s.sql", query);
+ try (InputStream inputStream = TPCHQueryIntegrationTest.class.getClassLoader()
+ .getResourceAsStream(path)) {
+ queries[iter] = new Object[1];
+ queries[iter][0] = IOUtils.toString(Objects.requireNonNull(inputStream), Charset.defaultCharset());
+ iter++;
+ }
+ }
+ return queries;
+ }
+}
diff --git a/pinot-integration-tests/src/test/resources/examples/batch/tpch b/pinot-integration-tests/src/test/resources/examples/batch/tpch
new file mode 120000
index 0000000000..4b077392b7
--- /dev/null
+++ b/pinot-integration-tests/src/test/resources/examples/batch/tpch
@@ -0,0 +1 @@
+../../../../../../pinot-tools/src/main/resources/examples/batch/tpch
\ No newline at end of file
diff --git a/pinot-integration-tests/src/test/resources/tpch/1.sql b/pinot-integration-tests/src/test/resources/tpch/1.sql
new file mode 100644
index 0000000000..c43e3122c8
--- /dev/null
+++ b/pinot-integration-tests/src/test/resources/tpch/1.sql
@@ -0,0 +1,25 @@
+select
+ l_returnflag,
+ l_linestatus,
+ sum(l_quantity) as sum_qty,
+ sum(l_extendedprice) as sum_base_price,
+ sum(
+ l_extendedprice * (1 - l_discount)
+ ) as sum_disc_price,
+ sum(
+ l_extendedprice * (1 - l_discount) * (1 + l_tax)
+ ) as sum_charge,
+ avg(l_quantity) as avg_qty,
+ avg(l_extendedprice) as avg_price,
+ avg(l_discount) as avg_disc,
+ count(*) as count_order
+from
+ lineitem
+where
+ l_shipdate <= 904694400
+group by
+ l_returnflag,
+ l_linestatus
+order by
+ l_returnflag,
+ l_linestatus;
diff --git a/pinot-integration-tests/src/test/resources/tpch/10.sql b/pinot-integration-tests/src/test/resources/tpch/10.sql
new file mode 100644
index 0000000000..847d7a67df
--- /dev/null
+++ b/pinot-integration-tests/src/test/resources/tpch/10.sql
@@ -0,0 +1,33 @@
+select
+ c_custkey,
+ c_name,
+ sum(
+ l_extendedprice * (1 - l_discount)
+ ) as revenue,
+ c_acctbal,
+ n_name,
+ c_address,
+ c_phone,
+ c_comment
+from
+ customer,
+ orders,
+ lineitem,
+ nation
+where
+ c_custkey = o_custkey
+ and l_orderkey = o_orderkey
+ and o_orderdate >= 749413800
+ and o_orderdate < 757362600
+ and l_returnflag = 'R'
+ and c_nationkey = n_nationkey
+group by
+ c_custkey,
+ c_name,
+ c_acctbal,
+ c_phone,
+ n_name,
+ c_address,
+ c_comment
+order by
+ revenue desc;
diff --git a/pinot-integration-tests/src/test/resources/tpch/11.sql b/pinot-integration-tests/src/test/resources/tpch/11.sql
new file mode 100644
index 0000000000..ed0d037b7b
--- /dev/null
+++ b/pinot-integration-tests/src/test/resources/tpch/11.sql
@@ -0,0 +1,28 @@
+select
+ ps_partkey,
+ sum(ps_supplycost * ps_availqty) as value
+from
+ partsupp,
+ supplier,
+ nation
+where
+ ps_suppkey = s_suppkey
+ and s_nationkey = n_nationkey
+ and n_name = 'GERMANY'
+group by
+ ps_partkey
+having
+ sum(ps_supplycost * ps_availqty) > (
+ select
+ sum(ps_supplycost * ps_availqty) * 0.0001000000
+ from
+ partsupp,
+ supplier,
+ nation
+ where
+ ps_suppkey = s_suppkey
+ and s_nationkey = n_nationkey
+ and n_name = 'GERMANY'
+ )
+order by
+ value desc;
diff --git a/pinot-integration-tests/src/test/resources/tpch/12.sql b/pinot-integration-tests/src/test/resources/tpch/12.sql
new file mode 100644
index 0000000000..4f18a9fb51
--- /dev/null
+++ b/pinot-integration-tests/src/test/resources/tpch/12.sql
@@ -0,0 +1,24 @@
+select
+ l_shipmode,
+ sum(
+ case when o_orderpriority = '1-URGENT'
+ or o_orderpriority = '2-HIGH' then 1 else 0 end
+ ) as high_line_count,
+ sum(
+ case when o_orderpriority <> '1-URGENT'
+ and o_orderpriority <> '2-HIGH' then 1 else 0 end
+ ) as low_line_count
+from
+ orders,
+ lineitem
+where
+ o_orderkey = l_orderkey
+ and l_shipmode in ('MAIL', 'SHIP')
+ and l_commitdate < l_receiptdate
+ and l_shipdate < l_commitdate
+ and l_receiptdate >= 757362600
+ and l_receiptdate < 788898600
+group by
+ l_shipmode
+order by
+ l_shipmode;
diff --git a/pinot-integration-tests/src/test/resources/tpch/13.sql b/pinot-integration-tests/src/test/resources/tpch/13.sql
new file mode 100644
index 0000000000..9f44835201
--- /dev/null
+++ b/pinot-integration-tests/src/test/resources/tpch/13.sql
@@ -0,0 +1,20 @@
+select
+ c_count,
+ count(*) as custdist
+from
+ (
+ select
+ c_custkey,
+ count(o_orderkey)
+ from
+ customer
+ left outer join orders on c_custkey = o_custkey
+ and o_comment not like '%special%requests%'
+ group by
+ c_custkey
+ ) as c_orders (c_custkey, c_count)
+group by
+ c_count
+order by
+ custdist desc,
+ c_count desc;
diff --git a/pinot-integration-tests/src/test/resources/tpch/14.sql b/pinot-integration-tests/src/test/resources/tpch/14.sql
new file mode 100644
index 0000000000..96cc20eed7
--- /dev/null
+++ b/pinot-integration-tests/src/test/resources/tpch/14.sql
@@ -0,0 +1,13 @@
+select
+ 100.00 * sum(
+ case when p_type like 'PROMO%' then l_extendedprice * (1 - l_discount) else 0 end
+ ) / sum(
+ l_extendedprice * (1 - l_discount)
+ ) as promo_revenue
+from
+ lineitem,
+ part
+where
+ l_partkey = p_partkey
+ and l_shipdate >= 809893800
+ and l_shipdate < 812485800;
diff --git a/pinot-integration-tests/src/test/resources/tpch/15.sql b/pinot-integration-tests/src/test/resources/tpch/15.sql
new file mode 100644
index 0000000000..33b7ab34eb
--- /dev/null
+++ b/pinot-integration-tests/src/test/resources/tpch/15.sql
@@ -0,0 +1,13 @@
+create view revenue0 (supplier_no, total_revenue) as
+select
+ l_suppkey,
+ sum(
+ l_extendedprice * (1 - l_discount)
+ )
+from
+ lineitem
+where
+ l_shipdate >= 820434600
+ and l_shipdate < 828297000
+group by
+ l_suppkey;
diff --git a/pinot-integration-tests/src/test/resources/tpch/16.sql b/pinot-integration-tests/src/test/resources/tpch/16.sql
new file mode 100644
index 0000000000..96163b6a06
--- /dev/null
+++ b/pinot-integration-tests/src/test/resources/tpch/16.sql
@@ -0,0 +1,19 @@
+select
+ s_suppkey,
+ s_name,
+ s_address,
+ s_phone,
+ total_revenue
+from
+ supplier,
+ revenue0
+where
+ s_suppkey = supplier_no
+ and total_revenue = (
+ select
+ max(total_revenue)
+ from
+ revenue0
+ )
+order by
+ s_suppkey;
diff --git a/pinot-integration-tests/src/test/resources/tpch/17.sql b/pinot-integration-tests/src/test/resources/tpch/17.sql
new file mode 100644
index 0000000000..317e885f9c
--- /dev/null
+++ b/pinot-integration-tests/src/test/resources/tpch/17.sql
@@ -0,0 +1 @@
+drop view revenue0;
\ No newline at end of file
diff --git a/pinot-integration-tests/src/test/resources/tpch/18.sql b/pinot-integration-tests/src/test/resources/tpch/18.sql
new file mode 100644
index 0000000000..acc737035c
--- /dev/null
+++ b/pinot-integration-tests/src/test/resources/tpch/18.sql
@@ -0,0 +1,30 @@
+select
+ p_brand,
+ p_type,
+ p_size,
+ count(distinct ps_suppkey) as supplier_cnt
+from
+ partsupp,
+ part
+where
+ p_partkey = ps_partkey
+ and p_brand <> 'Brand#45'
+ and p_type not like 'MEDIUM POLISHED%'
+ and p_size in (49, 14, 23, 45, 19, 3, 36, 9)
+ and ps_suppkey not in (
+ select
+ s_suppkey
+ from
+ supplier
+ where
+ s_comment like '%Customer%Complaints%'
+ )
+group by
+ p_brand,
+ p_type,
+ p_size
+order by
+ supplier_cnt desc,
+ p_brand,
+ p_type,
+ p_size;
diff --git a/pinot-integration-tests/src/test/resources/tpch/19.sql b/pinot-integration-tests/src/test/resources/tpch/19.sql
new file mode 100644
index 0000000000..9d6322d387
--- /dev/null
+++ b/pinot-integration-tests/src/test/resources/tpch/19.sql
@@ -0,0 +1,17 @@
+select
+ sum(l_extendedprice) / 7.0 as avg_yearly
+from
+ lineitem,
+ part
+where
+ p_partkey = l_partkey
+ and p_brand = 'Brand#23'
+ and p_container = 'MED BOX'
+ and l_quantity < (
+ select
+ 0.2 * avg(l_quantity)
+ from
+ lineitem
+ where
+ l_partkey = p_partkey
+ );
diff --git a/pinot-integration-tests/src/test/resources/tpch/2.sql b/pinot-integration-tests/src/test/resources/tpch/2.sql
new file mode 100644
index 0000000000..9695cbe0ad
--- /dev/null
+++ b/pinot-integration-tests/src/test/resources/tpch/2.sql
@@ -0,0 +1,43 @@
+select
+ s_acctbal,
+ s_name,
+ n_name,
+ p_partkey,
+ p_mfgr,
+ s_address,
+ s_phone,
+ s_comment
+from
+ part,
+ supplier,
+ partsupp,
+ nation,
+ region
+where
+ p_partkey = ps_partkey
+ and s_suppkey = ps_suppkey
+ and p_size = 15
+ and p_type like '%BRASS'
+ and s_nationkey = n_nationkey
+ and n_regionkey = r_regionkey
+ and r_name = 'EUROPE'
+ and ps_supplycost = (
+ select
+ min(ps_supplycost)
+ from
+ partsupp,
+ supplier,
+ nation,
+ region
+ where
+ p_partkey = ps_partkey
+ and s_suppkey = ps_suppkey
+ and s_nationkey = n_nationkey
+ and n_regionkey = r_regionkey
+ and r_name = 'EUROPE'
+ )
+order by
+ s_acctbal desc,
+ n_name,
+ s_name,
+ p_partkey;
diff --git a/pinot-integration-tests/src/test/resources/tpch/20.sql b/pinot-integration-tests/src/test/resources/tpch/20.sql
new file mode 100644
index 0000000000..da59efd52e
--- /dev/null
+++ b/pinot-integration-tests/src/test/resources/tpch/20.sql
@@ -0,0 +1,33 @@
+select
+ c_name,
+ c_custkey,
+ o_orderkey,
+ o_orderdate,
+ o_totalprice,
+ sum(l_quantity)
+from
+ customer,
+ orders,
+ lineitem
+where
+ o_orderkey in (
+ select
+ l_orderkey
+ from
+ lineitem
+ group by
+ l_orderkey
+ having
+ sum(l_quantity) > 300
+ )
+ and c_custkey = o_custkey
+ and o_orderkey = l_orderkey
+group by
+ c_name,
+ c_custkey,
+ o_orderkey,
+ o_orderdate,
+ o_totalprice
+order by
+ o_totalprice desc,
+ o_orderdate;
diff --git a/pinot-integration-tests/src/test/resources/tpch/21.sql b/pinot-integration-tests/src/test/resources/tpch/21.sql
new file mode 100644
index 0000000000..c697f43b34
--- /dev/null
+++ b/pinot-integration-tests/src/test/resources/tpch/21.sql
@@ -0,0 +1,47 @@
+select
+ sum(
+ l_extendedprice * (1 - l_discount)
+ ) as revenue
+from
+ lineitem,
+ part
+where
+ (
+ p_partkey = l_partkey
+ and p_brand = 'Brand#12'
+ and p_container in (
+ 'SM CASE', 'SM BOX', 'SM PACK', 'SM PKG'
+ )
+ and l_quantity >= 1
+ and l_quantity <= 1 + 10
+ and p_size between 1
+ and 5
+ and l_shipmode in ('AIR', 'AIR REG')
+ and l_shipinstruct = 'DELIVER IN PERSON'
+ )
+ or (
+ p_partkey = l_partkey
+ and p_brand = 'Brand#23'
+ and p_container in (
+ 'MED BAG', 'MED BOX', 'MED PKG', 'MED PACK'
+ )
+ and l_quantity >= 10
+ and l_quantity <= 10 + 10
+ and p_size between 1
+ and 10
+ and l_shipmode in ('AIR', 'AIR REG')
+ and l_shipinstruct = 'DELIVER IN PERSON'
+ )
+ or (
+ p_partkey = l_partkey
+ and p_brand = 'Brand#34'
+ and p_container in (
+ 'LG CASE', 'LG BOX', 'LG PACK', 'LG PKG'
+ )
+ and l_quantity >= 20
+ and l_quantity <= 20 + 10
+ and p_size between 1
+ and 15
+ and l_shipmode in ('AIR', 'AIR REG')
+ and l_shipinstruct = 'DELIVER IN PERSON'
+ );
diff --git a/pinot-integration-tests/src/test/resources/tpch/22.sql b/pinot-integration-tests/src/test/resources/tpch/22.sql
new file mode 100644
index 0000000000..31663daad4
--- /dev/null
+++ b/pinot-integration-tests/src/test/resources/tpch/22.sql
@@ -0,0 +1,37 @@
+select
+ s_name,
+ s_address
+from
+ supplier,
+ nation
+where
+ s_suppkey in (
+ select
+ ps_suppkey
+ from
+ partsupp
+ where
+ ps_partkey in (
+ select
+ p_partkey
+ from
+ part
+ where
+ p_name like 'forest%'
+ )
+ and ps_availqty > (
+ select
+ 0.5 * sum(l_quantity)
+ from
+ lineitem
+ where
+ l_partkey = ps_partkey
+ and l_suppkey = ps_suppkey
+ and l_shipdate >= 757362600
+ and l_shipdate < 788898600
+ )
+ )
+ and s_nationkey = n_nationkey
+ and n_name = 'CANADA'
+order by
+ s_name;
diff --git a/pinot-integration-tests/src/test/resources/tpch/23.sql b/pinot-integration-tests/src/test/resources/tpch/23.sql
new file mode 100644
index 0000000000..bfaec5f04a
--- /dev/null
+++ b/pinot-integration-tests/src/test/resources/tpch/23.sql
@@ -0,0 +1,39 @@
+select
+ s_name,
+ count(*) as numwait
+from
+ supplier,
+ lineitem l1,
+ orders,
+ nation
+where
+ s_suppkey = l1.l_suppkey
+ and o_orderkey = l1.l_orderkey
+ and o_orderstatus = 'F'
+ and l1.l_receiptdate > l1.l_commitdate
+ and exists (
+ select
+ *
+ from
+ lineitem l2
+ where
+ l2.l_orderkey = l1.l_orderkey
+ and l2.l_suppkey <> l1.l_suppkey
+ )
+ and not exists (
+ select
+ *
+ from
+ lineitem l3
+ where
+ l3.l_orderkey = l1.l_orderkey
+ and l3.l_suppkey <> l1.l_suppkey
+ and l3.l_receiptdate > l3.l_commitdate
+ )
+ and s_nationkey = n_nationkey
+ and n_name = 'SAUDI ARABIA'
+group by
+ s_name
+order by
+ numwait desc,
+ s_name;
diff --git a/pinot-integration-tests/src/test/resources/tpch/24.sql b/pinot-integration-tests/src/test/resources/tpch/24.sql
new file mode 100644
index 0000000000..dc95d6dd57
--- /dev/null
+++ b/pinot-integration-tests/src/test/resources/tpch/24.sql
@@ -0,0 +1,51 @@
+select
+ cntrycode,
+ count(*) as numcust,
+ sum(c_acctbal) as totacctbal
+from
+ (
+ select
+ substring(
+ c_phone
+ from
+ 1 for 2
+ ) as cntrycode,
+ c_acctbal
+ from
+ customer
+ where
+ substring(
+ c_phone
+ from
+ 1 for 2
+ ) in (
+ '13', '31', '23', '29', '30', '18', '17'
+ )
+ and c_acctbal > (
+ select
+ avg(c_acctbal)
+ from
+ customer
+ where
+ c_acctbal > 0.00
+ and substring(
+ c_phone
+ from
+ 1 for 2
+ ) in (
+ '13', '31', '23', '29', '30', '18', '17'
+ )
+ )
+ and not exists (
+ select
+ *
+ from
+ orders
+ where
+ o_custkey = c_custkey
+ )
+ ) as custsale
+group by
+ cntrycode
+order by
+ cntrycode;
diff --git a/pinot-integration-tests/src/test/resources/tpch/3.sql b/pinot-integration-tests/src/test/resources/tpch/3.sql
new file mode 100644
index 0000000000..f702c55dc5
--- /dev/null
+++ b/pinot-integration-tests/src/test/resources/tpch/3.sql
@@ -0,0 +1,24 @@
+select
+ l_orderkey,
+ sum(
+ l_extendedprice * (1 - l_discount)
+ ) as revenue,
+ o_orderdate,
+ o_shippriority
+from
+ customer,
+ orders,
+ lineitem
+where
+ c_mktsegment = 'BUILDING'
+ and c_custkey = o_custkey
+ and l_orderkey = o_orderkey
+ and o_orderdate < 795225600
+ and l_shipdate > 795225600
+group by
+ l_orderkey,
+ o_orderdate,
+ o_shippriority
+order by
+ revenue desc,
+ o_orderdate;
diff --git a/pinot-integration-tests/src/test/resources/tpch/4.sql b/pinot-integration-tests/src/test/resources/tpch/4.sql
new file mode 100644
index 0000000000..8c5572cc29
--- /dev/null
+++ b/pinot-integration-tests/src/test/resources/tpch/4.sql
@@ -0,0 +1,21 @@
+select
+ o_orderpriority,
+ count(*) as order_count
+from
+ orders
+where
+ o_orderdate >= 741484800
+ and o_orderdate < 749433600
+ and exists (
+ select
+ *
+ from
+ lineitem
+ where
+ l_orderkey = o_orderkey
+ and l_commitdate < l_receiptdate
+ )
+group by
+ o_orderpriority
+order by
+ o_orderpriority;
diff --git a/pinot-integration-tests/src/test/resources/tpch/5.sql b/pinot-integration-tests/src/test/resources/tpch/5.sql
new file mode 100644
index 0000000000..3dbc214431
--- /dev/null
+++ b/pinot-integration-tests/src/test/resources/tpch/5.sql
@@ -0,0 +1,26 @@
+select
+ n_name,
+ sum(
+ l_extendedprice * (1 - l_discount)
+ ) as revenue
+from
+ customer,
+ orders,
+ lineitem,
+ supplier,
+ nation,
+ region
+where
+ c_custkey = o_custkey
+ and l_orderkey = o_orderkey
+ and l_suppkey = s_suppkey
+ and c_nationkey = s_nationkey
+ and s_nationkey = n_nationkey
+ and n_regionkey = r_regionkey
+ and r_name = 'ASIA'
+ and o_orderdate >= 757382400
+ and o_orderdate < 788918400
+group by
+ n_name
+order by
+ revenue desc;
diff --git a/pinot-integration-tests/src/test/resources/tpch/6.sql b/pinot-integration-tests/src/test/resources/tpch/6.sql
new file mode 100644
index 0000000000..e29fa408c8
--- /dev/null
+++ b/pinot-integration-tests/src/test/resources/tpch/6.sql
@@ -0,0 +1,10 @@
+select
+ sum(l_extendedprice * l_discount) as revenue
+from
+ lineitem
+where
+ l_shipdate >= 757382400
+ and l_shipdate < 788918400
+ and l_discount between.06 - 0.01
+ and.06 + 0.01
+ and l_quantity < 24;
diff --git a/pinot-integration-tests/src/test/resources/tpch/7.sql b/pinot-integration-tests/src/test/resources/tpch/7.sql
new file mode 100644
index 0000000000..852d61f838
--- /dev/null
+++ b/pinot-integration-tests/src/test/resources/tpch/7.sql
@@ -0,0 +1,46 @@
+select
+ supp_nation,
+ cust_nation,
+ l_year,
+ sum(volume) as revenue
+from
+ (
+ select
+ n1.n_name as supp_nation,
+ n2.n_name as cust_nation,
+ ToDateTime(l_shipdate * 1000, 'yyyy') as l_year,
+ l_extendedprice * (1 - l_discount) as volume
+ from
+ supplier,
+ lineitem,
+ orders,
+ customer,
+ nation n1,
+ nation n2
+ where
+ s_suppkey = l_suppkey
+ and o_orderkey = l_orderkey
+ and c_custkey = o_custkey
+ and s_nationkey = n1.n_nationkey
+ and c_nationkey = n2.n_nationkey
+ and (
+ (
+ n1.n_name = 'FRANCE'
+ and n2.n_name = 'GERMANY'
+ )
+ or (
+ n1.n_name = 'GERMANY'
+ and n2.n_name = 'FRANCE'
+ )
+ )
+ and l_shipdate between 788898600
+ and 851970600
+ ) as shipping
+group by
+ supp_nation,
+ cust_nation,
+ l_year
+order by
+ supp_nation,
+ cust_nation,
+ l_year;
diff --git a/pinot-integration-tests/src/test/resources/tpch/8.sql b/pinot-integration-tests/src/test/resources/tpch/8.sql
new file mode 100644
index 0000000000..8416732df6
--- /dev/null
+++ b/pinot-integration-tests/src/test/resources/tpch/8.sql
@@ -0,0 +1,37 @@
+select
+ o_year,
+ sum(
+ case when nation = 'BRAZIL' then volume else 0 end
+ ) / sum(volume) as mkt_share
+from
+ (
+ select
+ ToDateTime(o_orderdate * 1000, 'yyyy') as o_year,
+ l_extendedprice * (1 - l_discount) as volume,
+ n2.n_name as nation
+ from
+ part,
+ supplier,
+ lineitem,
+ orders,
+ customer,
+ nation n1,
+ nation n2,
+ region
+ where
+ p_partkey = l_partkey
+ and s_suppkey = l_suppkey
+ and l_orderkey = o_orderkey
+ and o_custkey = c_custkey
+ and c_nationkey = n1.n_nationkey
+ and n1.n_regionkey = r_regionkey
+ and r_name = 'AMERICA'
+ and s_nationkey = n2.n_nationkey
+ and o_orderdate between 788898600
+ and 851970600
+ and p_type = 'ECONOMY ANODIZED STEEL'
+ ) as all_nations
+group by
+ o_year
+order by
+ o_year;
diff --git a/pinot-integration-tests/src/test/resources/tpch/9.sql b/pinot-integration-tests/src/test/resources/tpch/9.sql
new file mode 100644
index 0000000000..84ca85dcdc
--- /dev/null
+++ b/pinot-integration-tests/src/test/resources/tpch/9.sql
@@ -0,0 +1,32 @@
+select
+ nation,
+ o_year,
+ sum(amount) as sum_profit
+from
+ (
+ select
+ n_name as nation,
+ ToDateTime(o_orderdate * 1000, 'yyyy') as o_year,
+ l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount
+ from
+ part,
+ supplier,
+ lineitem,
+ partsupp,
+ orders,
+ nation
+ where
+ s_suppkey = l_suppkey
+ and ps_suppkey = l_suppkey
+ and ps_partkey = l_partkey
+ and p_partkey = l_partkey
+ and o_orderkey = l_orderkey
+ and s_nationkey = n_nationkey
+ and p_name like '%green%'
+ ) as profit
+group by
+ nation,
+ o_year
+order by
+ nation,
+ o_year desc;
diff --git a/pinot-tools/src/main/java/org/apache/pinot/tools/TPCHQuickStart.java b/pinot-tools/src/main/java/org/apache/pinot/tools/TPCHQuickStart.java
new file mode 100644
index 0000000000..b0dc58dd28
--- /dev/null
+++ b/pinot-tools/src/main/java/org/apache/pinot/tools/TPCHQuickStart.java
@@ -0,0 +1,63 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.tools;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import org.apache.pinot.tools.admin.PinotAdministrator;
+
+
+public class TPCHQuickStart extends MultistageEngineQuickStart {
+ private static final String QUICKSTART_IDENTIFIER = "TPCH";
+ private static final String[] TPCH_DIRECTORIES = new String[]{
+ "examples/batch/tpch/customer",
+ "examples/batch/tpch/lineitem",
+ "examples/batch/tpch/nation",
+ "examples/batch/tpch/orders",
+ "examples/batch/tpch/part",
+ "examples/batch/tpch/partsupp",
+ "examples/batch/tpch/region",
+ "examples/batch/tpch/supplier"
+ };
+
+ @Override
+ public List<String> types() {
+ return Collections.singletonList(QUICKSTART_IDENTIFIER);
+ }
+
+ @Override
+ public String[] getDefaultBatchTableDirectories() {
+ return TPCH_DIRECTORIES;
+ }
+
+ @Override
+ protected int getNumQuickstartRunnerServers() {
+ return 1;
+ }
+
+ public static void main(String[] args)
+ throws Exception {
+ List<String> arguments = new ArrayList<>();
+ arguments.addAll(Arrays.asList("QuickStart", "-type", QUICKSTART_IDENTIFIER));
+ arguments.addAll(Arrays.asList(args));
+ PinotAdministrator.main(arguments.toArray(new String[arguments.size()]));
+ }
+}
diff --git a/pinot-tools/src/main/resources/examples/batch/tpch/customer/customer_offline_table_config.json b/pinot-tools/src/main/resources/examples/batch/tpch/customer/customer_offline_table_config.json
new file mode 100644
index 0000000000..a8f954ca5d
--- /dev/null
+++ b/pinot-tools/src/main/resources/examples/batch/tpch/customer/customer_offline_table_config.json
@@ -0,0 +1,27 @@
+{
+ "tableName": "customer",
+ "tableType": "OFFLINE",
+ "tenants": {
+ },
+ "segmentsConfig": {
+ "segmentPushType": "REFRESH",
+ "replication": "1",
+ "schemaName": "customer"
+ },
+ "tableIndexConfig": {
+ "loadMode": "MMAP",
+ "invertedIndexColumns": [
+ ],
+ "noDictionaryColumns": [
+ "c_comment"
+ ]
+ },
+ "ingestionConfig": {
+ "transformConfigs": [
+ ]
+ },
+ "metadata": {
+ "customConfigs": {
+ }
+ }
+}
diff --git a/pinot-tools/src/main/resources/examples/batch/tpch/customer/customer_schema.json b/pinot-tools/src/main/resources/examples/batch/tpch/customer/customer_schema.json
new file mode 100644
index 0000000000..da08354f1e
--- /dev/null
+++ b/pinot-tools/src/main/resources/examples/batch/tpch/customer/customer_schema.json
@@ -0,0 +1,39 @@
+{
+ "schemaName": "customer",
+ "metricFieldSpecs": [
+ {
+ "dataType": "DOUBLE",
+ "name": "c_acctbal"
+ }
+ ],
+ "dimensionFieldSpecs": [
+ {
+ "dataType": "INT",
+ "name": "c_custkey"
+ },
+ {
+ "dataType": "STRING",
+ "name": "c_name"
+ },
+ {
+ "dataType": "STRING",
+ "name": "c_address"
+ },
+ {
+ "dataType": "INT",
+ "name": "c_nationkey"
+ },
+ {
+ "dataType": "STRING",
+ "name": "c_phone"
+ },
+ {
+ "dataType": "STRING",
+ "name": "c_mktsegment"
+ },
+ {
+ "dataType": "STRING",
+ "name": "c_comment"
+ }
+ ]
+}
diff --git a/pinot-tools/src/main/resources/examples/batch/tpch/customer/ingestionJobSpec.yaml b/pinot-tools/src/main/resources/examples/batch/tpch/customer/ingestionJobSpec.yaml
new file mode 100644
index 0000000000..2297e8ed6a
--- /dev/null
+++ b/pinot-tools/src/main/resources/examples/batch/tpch/customer/ingestionJobSpec.yaml
@@ -0,0 +1,119 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+# executionFrameworkSpec: Defines ingestion jobs to be running.
+executionFrameworkSpec:
+
+ # name: execution framework name
+ name: 'standalone'
+
+ # Class to use for segment generation and different push types.
+ segmentGenerationJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentGenerationJobRunner'
+ segmentTarPushJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentTarPushJobRunner'
+ segmentUriPushJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentUriPushJobRunner'
+ segmentMetadataPushJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentMetadataPushJobRunner'
+
+
+# jobType: Pinot ingestion job type.
+# Supported job types are defined in PinotIngestionJobType class.
+# 'SegmentCreation'
+# 'SegmentTarPush'
+# 'SegmentUriPush'
+# 'SegmentMetadataPush'
+# 'SegmentCreationAndTarPush'
+# 'SegmentCreationAndUriPush'
+# 'SegmentCreationAndMetadataPush'
+jobType: SegmentCreationAndTarPush
+
+# inputDirURI: Root directory of input data, expected to have scheme configured in PinotFS.
+inputDirURI: 'examples/batch/tpch/customer/rawdata'
+
+# includeFileNamePattern: include file name pattern, supported glob pattern.
+# Sample usage:
+# 'glob:*.avro' will include all avro files just under the inputDirURI, not sub directories;
+# 'glob:**/*.avro' will include all the avro files under inputDirURI recursively.
+includeFileNamePattern: 'glob:**/*.avro'
+
+# excludeFileNamePattern: exclude file name pattern, supported glob pattern.
+# Sample usage:
+# 'glob:*.avro' will exclude all avro files just under the inputDirURI, not sub directories;
+# 'glob:**/*.avro' will exclude all the avro files under inputDirURI recursively.
+# _excludeFileNamePattern: ''
+
+# outputDirURI: Root directory of output segments, expected to have scheme configured in PinotFS.
+outputDirURI: 'examples/batch/tpch/customer/segments'
+
+# overwriteOutput: Overwrite output segments if existed.
+overwriteOutput: true
+
+# pinotFSSpecs: defines all related Pinot file systems.
+pinotFSSpecs:
+
+ - # scheme: used to identify a PinotFS.
+ # E.g. local, hdfs, dbfs, etc
+ scheme: file
+
+ # className: Class name used to create the PinotFS instance.
+ # E.g.
+ # org.apache.pinot.spi.filesystem.LocalPinotFS is used for local filesystem
+ # org.apache.pinot.plugin.filesystem.AzurePinotFS is used for Azure Data Lake
+ # org.apache.pinot.plugin.filesystem.HadoopPinotFS is used for HDFS
+ className: org.apache.pinot.spi.filesystem.LocalPinotFS
+
+# recordReaderSpec: defines all record reader
+recordReaderSpec:
+ dataFormat: 'avro'
+ className: 'org.apache.pinot.plugin.inputformat.avro.AvroRecordReader'
+
+
+# tableSpec: defines table name and where to fetch corresponding table config and table schema.
+tableSpec:
+
+ # tableName: Table name
+ tableName: 'customer'
+
+ # schemaURI: defines where to read the table schema, supports PinotFS or HTTP.
+ # E.g.
+ # hdfs://path/to/table_schema.json
+ # http://localhost:9000/tables/myTable/schema
+ schemaURI: 'http://localhost:9000/tables/customer/schema'
+
+ # tableConfigURI: defines where to reade the table config.
+ # Supports using PinotFS or HTTP.
+ # E.g.
+ # hdfs://path/to/table_config.json
+ # http://localhost:9000/tables/myTable
+ # Note that the API to read Pinot table config directly from pinot controller contains a JSON wrapper.
+ # The real table config is the object under the field 'OFFLINE'.
+ tableConfigURI: 'http://localhost:9000/tables/customer'
+
+# pinotClusterSpecs: defines the Pinot Cluster Access Point.
+pinotClusterSpecs:
+ - # controllerURI: used to fetch table/schema information and data push.
+ # E.g. http://localhost:9000
+ controllerURI: 'http://localhost:9000'
+
+# pushJobSpec: defines segment push job related configuration.
+pushJobSpec:
+
+ # pushAttempts: number of attempts for push job, default is 1, which means no retry.
+ pushAttempts: 2
+
+ # pushRetryIntervalMillis: retry wait Ms, default to 1 second.
+ pushRetryIntervalMillis: 1000
diff --git a/pinot-tools/src/main/resources/examples/batch/tpch/customer/rawdata/customer.avro b/pinot-tools/src/main/resources/examples/batch/tpch/customer/rawdata/customer.avro
new file mode 100644
index 0000000000..f446eef075
Binary files /dev/null and b/pinot-tools/src/main/resources/examples/batch/tpch/customer/rawdata/customer.avro differ
diff --git a/pinot-tools/src/main/resources/examples/batch/tpch/lineitem/ingestionJobSpec.yaml b/pinot-tools/src/main/resources/examples/batch/tpch/lineitem/ingestionJobSpec.yaml
new file mode 100644
index 0000000000..13bf410ece
--- /dev/null
+++ b/pinot-tools/src/main/resources/examples/batch/tpch/lineitem/ingestionJobSpec.yaml
@@ -0,0 +1,119 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+# executionFrameworkSpec: Defines ingestion jobs to be running.
+executionFrameworkSpec:
+
+ # name: execution framework name
+ name: 'standalone'
+
+ # Class to use for segment generation and different push types.
+ segmentGenerationJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentGenerationJobRunner'
+ segmentTarPushJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentTarPushJobRunner'
+ segmentUriPushJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentUriPushJobRunner'
+ segmentMetadataPushJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentMetadataPushJobRunner'
+
+
+# jobType: Pinot ingestion job type.
+# Supported job types are defined in PinotIngestionJobType class.
+# 'SegmentCreation'
+# 'SegmentTarPush'
+# 'SegmentUriPush'
+# 'SegmentMetadataPush'
+# 'SegmentCreationAndTarPush'
+# 'SegmentCreationAndUriPush'
+# 'SegmentCreationAndMetadataPush'
+jobType: SegmentCreationAndTarPush
+
+# inputDirURI: Root directory of input data, expected to have scheme configured in PinotFS.
+inputDirURI: 'examples/batch/tpch/lineitem/rawdata'
+
+# includeFileNamePattern: include file name pattern, supported glob pattern.
+# Sample usage:
+# 'glob:*.avro' will include all avro files just under the inputDirURI, not sub directories;
+# 'glob:**/*.avro' will include all the avro files under inputDirURI recursively.
+includeFileNamePattern: 'glob:**/*.avro'
+
+# excludeFileNamePattern: exclude file name pattern, supported glob pattern.
+# Sample usage:
+# 'glob:*.avro' will exclude all avro files just under the inputDirURI, not sub directories;
+# 'glob:**/*.avro' will exclude all the avro files under inputDirURI recursively.
+# _excludeFileNamePattern: ''
+
+# outputDirURI: Root directory of output segments, expected to have scheme configured in PinotFS.
+outputDirURI: 'examples/batch/tpch/lineitem/segments'
+
+# overwriteOutput: Overwrite output segments if existed.
+overwriteOutput: true
+
+# pinotFSSpecs: defines all related Pinot file systems.
+pinotFSSpecs:
+
+ - # scheme: used to identify a PinotFS.
+ # E.g. local, hdfs, dbfs, etc
+ scheme: file
+
+ # className: Class name used to create the PinotFS instance.
+ # E.g.
+ # org.apache.pinot.spi.filesystem.LocalPinotFS is used for local filesystem
+ # org.apache.pinot.plugin.filesystem.AzurePinotFS is used for Azure Data Lake
+ # org.apache.pinot.plugin.filesystem.HadoopPinotFS is used for HDFS
+ className: org.apache.pinot.spi.filesystem.LocalPinotFS
+
+# recordReaderSpec: defines all record reader
+recordReaderSpec:
+ dataFormat: 'avro'
+ className: 'org.apache.pinot.plugin.inputformat.avro.AvroRecordReader'
+
+
+# tableSpec: defines table name and where to fetch corresponding table config and table schema.
+tableSpec:
+
+ # tableName: Table name
+ tableName: 'lineitem'
+
+ # schemaURI: defines where to read the table schema, supports PinotFS or HTTP.
+ # E.g.
+ # hdfs://path/to/table_schema.json
+ # http://localhost:9000/tables/myTable/schema
+ schemaURI: 'http://localhost:9000/tables/lineitem/schema'
+
+ # tableConfigURI: defines where to reade the table config.
+ # Supports using PinotFS or HTTP.
+ # E.g.
+ # hdfs://path/to/table_config.json
+ # http://localhost:9000/tables/myTable
+ # Note that the API to read Pinot table config directly from pinot controller contains a JSON wrapper.
+ # The real table config is the object under the field 'OFFLINE'.
+ tableConfigURI: 'http://localhost:9000/tables/lineitem'
+
+# pinotClusterSpecs: defines the Pinot Cluster Access Point.
+pinotClusterSpecs:
+ - # controllerURI: used to fetch table/schema information and data push.
+ # E.g. http://localhost:9000
+ controllerURI: 'http://localhost:9000'
+
+# pushJobSpec: defines segment push job related configuration.
+pushJobSpec:
+
+ # pushAttempts: number of attempts for push job, default is 1, which means no retry.
+ pushAttempts: 2
+
+ # pushRetryIntervalMillis: retry wait Ms, default to 1 second.
+ pushRetryIntervalMillis: 1000
diff --git a/pinot-tools/src/main/resources/examples/batch/tpch/lineitem/lineitem_offline_table_config.json b/pinot-tools/src/main/resources/examples/batch/tpch/lineitem/lineitem_offline_table_config.json
new file mode 100644
index 0000000000..84ecc8ab00
--- /dev/null
+++ b/pinot-tools/src/main/resources/examples/batch/tpch/lineitem/lineitem_offline_table_config.json
@@ -0,0 +1,26 @@
+{
+ "tableName": "lineitem",
+ "tableType": "OFFLINE",
+ "tenants": {
+ },
+ "segmentsConfig": {
+ "segmentPushType": "REFRESH",
+ "replication": "1",
+ "schemaName": "lineitem"
+ },
+ "tableIndexConfig": {
+ "loadMode": "MMAP",
+ "invertedIndexColumns": [
+ ],
+ "noDictionaryColumns": [
+ ]
+ },
+ "ingestionConfig": {
+ "transformConfigs": [
+ ]
+ },
+ "metadata": {
+ "customConfigs": {
+ }
+ }
+}
diff --git a/pinot-tools/src/main/resources/examples/batch/tpch/lineitem/lineitem_schema.json b/pinot-tools/src/main/resources/examples/batch/tpch/lineitem/lineitem_schema.json
new file mode 100644
index 0000000000..4a2ae474cf
--- /dev/null
+++ b/pinot-tools/src/main/resources/examples/batch/tpch/lineitem/lineitem_schema.json
@@ -0,0 +1,71 @@
+{
+ "schemaName": "lineitem",
+ "metricFieldSpecs": [
+ {
+ "dataType": "DOUBLE",
+ "name": "l_extendedprice"
+ },
+ {
+ "dataType": "DOUBLE",
+ "name": "l_tax"
+ },
+ {
+ "dataType": "DOUBLE",
+ "name": "l_discount"
+ }
+ ],
+ "dimensionFieldSpecs": [
+ {
+ "dataType": "INT",
+ "name": "l_orderkey"
+ },
+ {
+ "dataType": "INT",
+ "name": "l_partkey"
+ },
+ {
+ "dataType": "INT",
+ "name": "l_suppkey"
+ },
+ {
+ "dataType": "INT",
+ "name": "l_linenumber"
+ },
+ {
+ "dataType": "INT",
+ "name": "l_quantity"
+ },
+ {
+ "dataType": "STRING",
+ "name": "l_returnflag"
+ },
+ {
+ "dataType": "STRING",
+ "name": "l_linestatus"
+ },
+ {
+ "dataType": "LONG",
+ "name": "l_shipdate"
+ },
+ {
+ "dataType": "LONG",
+ "name": "l_commitdate"
+ },
+ {
+ "dataType": "LONG",
+ "name": "l_receiptdate"
+ },
+ {
+ "dataType": "STRING",
+ "name": "l_shipinstruct"
+ },
+ {
+ "dataType": "STRING",
+ "name": "l_shipmode"
+ },
+ {
+ "dataType": "STRING",
+ "name": "l_comment"
+ }
+ ]
+}
diff --git a/pinot-tools/src/main/resources/examples/batch/tpch/lineitem/rawdata/lineitem.avro b/pinot-tools/src/main/resources/examples/batch/tpch/lineitem/rawdata/lineitem.avro
new file mode 100644
index 0000000000..fa68458255
Binary files /dev/null and b/pinot-tools/src/main/resources/examples/batch/tpch/lineitem/rawdata/lineitem.avro differ
diff --git a/pinot-tools/src/main/resources/examples/batch/tpch/nation/ingestionJobSpec.yaml b/pinot-tools/src/main/resources/examples/batch/tpch/nation/ingestionJobSpec.yaml
new file mode 100644
index 0000000000..fd5d608610
--- /dev/null
+++ b/pinot-tools/src/main/resources/examples/batch/tpch/nation/ingestionJobSpec.yaml
@@ -0,0 +1,119 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+# executionFrameworkSpec: Defines ingestion jobs to be running.
+executionFrameworkSpec:
+
+ # name: execution framework name
+ name: 'standalone'
+
+ # Class to use for segment generation and different push types.
+ segmentGenerationJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentGenerationJobRunner'
+ segmentTarPushJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentTarPushJobRunner'
+ segmentUriPushJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentUriPushJobRunner'
+ segmentMetadataPushJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentMetadataPushJobRunner'
+
+
+# jobType: Pinot ingestion job type.
+# Supported job types are defined in PinotIngestionJobType class.
+# 'SegmentCreation'
+# 'SegmentTarPush'
+# 'SegmentUriPush'
+# 'SegmentMetadataPush'
+# 'SegmentCreationAndTarPush'
+# 'SegmentCreationAndUriPush'
+# 'SegmentCreationAndMetadataPush'
+jobType: SegmentCreationAndTarPush
+
+# inputDirURI: Root directory of input data, expected to have scheme configured in PinotFS.
+inputDirURI: 'examples/batch/tpch/nation/rawdata'
+
+# includeFileNamePattern: include file name pattern, supported glob pattern.
+# Sample usage:
+# 'glob:*.avro' will include all avro files just under the inputDirURI, not sub directories;
+# 'glob:**/*.avro' will include all the avro files under inputDirURI recursively.
+includeFileNamePattern: 'glob:**/*.avro'
+
+# excludeFileNamePattern: exclude file name pattern, supported glob pattern.
+# Sample usage:
+# 'glob:*.avro' will exclude all avro files just under the inputDirURI, not sub directories;
+# 'glob:**/*.avro' will exclude all the avro files under inputDirURI recursively.
+# _excludeFileNamePattern: ''
+
+# outputDirURI: Root directory of output segments, expected to have scheme configured in PinotFS.
+outputDirURI: 'examples/batch/tpch/nation/segments'
+
+# overwriteOutput: Overwrite output segments if existed.
+overwriteOutput: true
+
+# pinotFSSpecs: defines all related Pinot file systems.
+pinotFSSpecs:
+
+ - # scheme: used to identify a PinotFS.
+ # E.g. local, hdfs, dbfs, etc
+ scheme: file
+
+ # className: Class name used to create the PinotFS instance.
+ # E.g.
+ # org.apache.pinot.spi.filesystem.LocalPinotFS is used for local filesystem
+ # org.apache.pinot.plugin.filesystem.AzurePinotFS is used for Azure Data Lake
+ # org.apache.pinot.plugin.filesystem.HadoopPinotFS is used for HDFS
+ className: org.apache.pinot.spi.filesystem.LocalPinotFS
+
+# recordReaderSpec: defines all record reader
+recordReaderSpec:
+ dataFormat: 'avro'
+ className: 'org.apache.pinot.plugin.inputformat.avro.AvroRecordReader'
+
+
+# tableSpec: defines table name and where to fetch corresponding table config and table schema.
+tableSpec:
+
+ # tableName: Table name
+ tableName: 'nation'
+
+ # schemaURI: defines where to read the table schema, supports PinotFS or HTTP.
+ # E.g.
+ # hdfs://path/to/table_schema.json
+ # http://localhost:9000/tables/myTable/schema
+ schemaURI: 'http://localhost:9000/tables/nation/schema'
+
+ # tableConfigURI: defines where to reade the table config.
+ # Supports using PinotFS or HTTP.
+ # E.g.
+ # hdfs://path/to/table_config.json
+ # http://localhost:9000/tables/myTable
+ # Note that the API to read Pinot table config directly from pinot controller contains a JSON wrapper.
+ # The real table config is the object under the field 'OFFLINE'.
+ tableConfigURI: 'http://localhost:9000/tables/nation'
+
+# pinotClusterSpecs: defines the Pinot Cluster Access Point.
+pinotClusterSpecs:
+ - # controllerURI: used to fetch table/schema information and data push.
+ # E.g. http://localhost:9000
+ controllerURI: 'http://localhost:9000'
+
+# pushJobSpec: defines segment push job related configuration.
+pushJobSpec:
+
+ # pushAttempts: number of attempts for push job, default is 1, which means no retry.
+ pushAttempts: 2
+
+ # pushRetryIntervalMillis: retry wait Ms, default to 1 second.
+ pushRetryIntervalMillis: 1000
diff --git a/pinot-tools/src/main/resources/examples/batch/tpch/nation/nation_offline_table_config.json b/pinot-tools/src/main/resources/examples/batch/tpch/nation/nation_offline_table_config.json
new file mode 100644
index 0000000000..f8bab64048
--- /dev/null
+++ b/pinot-tools/src/main/resources/examples/batch/tpch/nation/nation_offline_table_config.json
@@ -0,0 +1,27 @@
+{
+ "tableName": "nation",
+ "tableType": "OFFLINE",
+ "tenants": {
+ },
+ "segmentsConfig": {
+ "segmentPushType": "REFRESH",
+ "replication": "1",
+ "schemaName": "nation"
+ },
+ "tableIndexConfig": {
+ "loadMode": "MMAP",
+ "invertedIndexColumns": [
+ ],
+ "noDictionaryColumns": [
+ "n_comment"
+ ]
+ },
+ "ingestionConfig": {
+ "transformConfigs": [
+ ]
+ },
+ "metadata": {
+ "customConfigs": {
+ }
+ }
+}
diff --git a/pinot-tools/src/main/resources/examples/batch/tpch/nation/nation_schema.json b/pinot-tools/src/main/resources/examples/batch/tpch/nation/nation_schema.json
new file mode 100644
index 0000000000..a69f08d10b
--- /dev/null
+++ b/pinot-tools/src/main/resources/examples/batch/tpch/nation/nation_schema.json
@@ -0,0 +1,23 @@
+{
+ "schemaName": "nation",
+ "metricFieldSpecs": [
+ ],
+ "dimensionFieldSpecs": [
+ {
+ "dataType": "INT",
+ "name": "n_nationkey"
+ },
+ {
+ "dataType": "STRING",
+ "name": "n_name"
+ },
+ {
+ "dataType": "INT",
+ "name": "n_regionkey"
+ },
+ {
+ "dataType": "STRING",
+ "name": "n_comment"
+ }
+ ]
+}
diff --git a/pinot-tools/src/main/resources/examples/batch/tpch/nation/rawdata/nation.avro b/pinot-tools/src/main/resources/examples/batch/tpch/nation/rawdata/nation.avro
new file mode 100644
index 0000000000..12cb1fc805
Binary files /dev/null and b/pinot-tools/src/main/resources/examples/batch/tpch/nation/rawdata/nation.avro differ
diff --git a/pinot-tools/src/main/resources/examples/batch/tpch/orders/ingestionJobSpec.yaml b/pinot-tools/src/main/resources/examples/batch/tpch/orders/ingestionJobSpec.yaml
new file mode 100644
index 0000000000..ac0412a54d
--- /dev/null
+++ b/pinot-tools/src/main/resources/examples/batch/tpch/orders/ingestionJobSpec.yaml
@@ -0,0 +1,119 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+# executionFrameworkSpec: Defines ingestion jobs to be running.
+executionFrameworkSpec:
+
+ # name: execution framework name
+ name: 'standalone'
+
+ # Class to use for segment generation and different push types.
+ segmentGenerationJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentGenerationJobRunner'
+ segmentTarPushJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentTarPushJobRunner'
+ segmentUriPushJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentUriPushJobRunner'
+ segmentMetadataPushJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentMetadataPushJobRunner'
+
+
+# jobType: Pinot ingestion job type.
+# Supported job types are defined in PinotIngestionJobType class.
+# 'SegmentCreation'
+# 'SegmentTarPush'
+# 'SegmentUriPush'
+# 'SegmentMetadataPush'
+# 'SegmentCreationAndTarPush'
+# 'SegmentCreationAndUriPush'
+# 'SegmentCreationAndMetadataPush'
+jobType: SegmentCreationAndTarPush
+
+# inputDirURI: Root directory of input data, expected to have scheme configured in PinotFS.
+inputDirURI: 'examples/batch/tpch/orders/rawdata'
+
+# includeFileNamePattern: include file name pattern, supported glob pattern.
+# Sample usage:
+# 'glob:*.avro' will include all avro files just under the inputDirURI, not sub directories;
+# 'glob:**/*.avro' will include all the avro files under inputDirURI recursively.
+includeFileNamePattern: 'glob:**/*.avro'
+
+# excludeFileNamePattern: exclude file name pattern, supported glob pattern.
+# Sample usage:
+# 'glob:*.avro' will exclude all avro files just under the inputDirURI, not sub directories;
+# 'glob:**/*.avro' will exclude all the avro files under inputDirURI recursively.
+# _excludeFileNamePattern: ''
+
+# outputDirURI: Root directory of output segments, expected to have scheme configured in PinotFS.
+outputDirURI: 'examples/batch/tpch/orders/segments'
+
+# overwriteOutput: Overwrite output segments if existed.
+overwriteOutput: true
+
+# pinotFSSpecs: defines all related Pinot file systems.
+pinotFSSpecs:
+
+ - # scheme: used to identify a PinotFS.
+ # E.g. local, hdfs, dbfs, etc
+ scheme: file
+
+ # className: Class name used to create the PinotFS instance.
+ # E.g.
+ # org.apache.pinot.spi.filesystem.LocalPinotFS is used for local filesystem
+ # org.apache.pinot.plugin.filesystem.AzurePinotFS is used for Azure Data Lake
+ # org.apache.pinot.plugin.filesystem.HadoopPinotFS is used for HDFS
+ className: org.apache.pinot.spi.filesystem.LocalPinotFS
+
+# recordReaderSpec: defines all record reader
+recordReaderSpec:
+ dataFormat: 'avro'
+ className: 'org.apache.pinot.plugin.inputformat.avro.AvroRecordReader'
+
+
+# tableSpec: defines table name and where to fetch corresponding table config and table schema.
+tableSpec:
+
+ # tableName: Table name
+ tableName: 'orders'
+
+ # schemaURI: defines where to read the table schema, supports PinotFS or HTTP.
+ # E.g.
+ # hdfs://path/to/table_schema.json
+ # http://localhost:9000/tables/myTable/schema
+ schemaURI: 'http://localhost:9000/tables/orders/schema'
+
+ # tableConfigURI: defines where to reade the table config.
+ # Supports using PinotFS or HTTP.
+ # E.g.
+ # hdfs://path/to/table_config.json
+ # http://localhost:9000/tables/myTable
+ # Note that the API to read Pinot table config directly from pinot controller contains a JSON wrapper.
+ # The real table config is the object under the field 'OFFLINE'.
+ tableConfigURI: 'http://localhost:9000/tables/orders'
+
+# pinotClusterSpecs: defines the Pinot Cluster Access Point.
+pinotClusterSpecs:
+ - # controllerURI: used to fetch table/schema information and data push.
+ # E.g. http://localhost:9000
+ controllerURI: 'http://localhost:9000'
+
+# pushJobSpec: defines segment push job related configuration.
+pushJobSpec:
+
+ # pushAttempts: number of attempts for push job, default is 1, which means no retry.
+ pushAttempts: 2
+
+ # pushRetryIntervalMillis: retry wait Ms, default to 1 second.
+ pushRetryIntervalMillis: 1000
diff --git a/pinot-tools/src/main/resources/examples/batch/tpch/orders/orders_offline_table_config.json b/pinot-tools/src/main/resources/examples/batch/tpch/orders/orders_offline_table_config.json
new file mode 100644
index 0000000000..80c4e5c2d9
--- /dev/null
+++ b/pinot-tools/src/main/resources/examples/batch/tpch/orders/orders_offline_table_config.json
@@ -0,0 +1,34 @@
+{
+ "tableName": "orders",
+ "tableType": "OFFLINE",
+ "tenants": {
+ },
+ "segmentsConfig": {
+ "segmentPushType": "REFRESH",
+ "replication": "1",
+ "schemaName": "orders"
+ },
+ "tableIndexConfig": {
+ "loadMode": "MMAP",
+ "invertedIndexColumns": [
+ "o_orderkey",
+ "o_custkey",
+ "o_orderstatus",
+ "o_orderdate",
+ "o_orderpriority",
+ "o_clerk",
+ "o_shippriority"
+ ],
+ "noDictionaryColumns": [
+ "o_comment"
+ ]
+ },
+ "ingestionConfig": {
+ "transformConfigs": [
+ ]
+ },
+ "metadata": {
+ "customConfigs": {
+ }
+ }
+}
diff --git a/pinot-tools/src/main/resources/examples/batch/tpch/orders/orders_schema.json b/pinot-tools/src/main/resources/examples/batch/tpch/orders/orders_schema.json
new file mode 100644
index 0000000000..6efde76bed
--- /dev/null
+++ b/pinot-tools/src/main/resources/examples/batch/tpch/orders/orders_schema.json
@@ -0,0 +1,43 @@
+{
+ "schemaName": "orders",
+ "metricFieldSpecs": [
+ {
+ "dataType": "DOUBLE",
+ "name": "o_totalprice"
+ }
+ ],
+ "dimensionFieldSpecs": [
+ {
+ "dataType": "INT",
+ "name": "o_orderkey"
+ },
+ {
+ "dataType": "INT",
+ "name": "o_custkey"
+ },
+ {
+ "dataType": "STRING",
+ "name": "o_orderstatus"
+ },
+ {
+ "dataType": "LONG",
+ "name": "o_orderdate"
+ },
+ {
+ "dataType": "STRING",
+ "name": "o_orderpriority"
+ },
+ {
+ "dataType": "STRING",
+ "name": "o_clerk"
+ },
+ {
+ "dataType": "STRING",
+ "name": "o_shippriority"
+ },
+ {
+ "dataType": "STRING",
+ "name": "o_comment"
+ }
+ ]
+}
diff --git a/pinot-tools/src/main/resources/examples/batch/tpch/orders/rawdata/orders.avro b/pinot-tools/src/main/resources/examples/batch/tpch/orders/rawdata/orders.avro
new file mode 100644
index 0000000000..003f91eb20
Binary files /dev/null and b/pinot-tools/src/main/resources/examples/batch/tpch/orders/rawdata/orders.avro differ
diff --git a/pinot-tools/src/main/resources/examples/batch/tpch/part/ingestionJobSpec.yaml b/pinot-tools/src/main/resources/examples/batch/tpch/part/ingestionJobSpec.yaml
new file mode 100644
index 0000000000..02e62bef06
--- /dev/null
+++ b/pinot-tools/src/main/resources/examples/batch/tpch/part/ingestionJobSpec.yaml
@@ -0,0 +1,119 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+# executionFrameworkSpec: Defines ingestion jobs to be running.
+executionFrameworkSpec:
+
+ # name: execution framework name
+ name: 'standalone'
+
+ # Class to use for segment generation and different push types.
+ segmentGenerationJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentGenerationJobRunner'
+ segmentTarPushJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentTarPushJobRunner'
+ segmentUriPushJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentUriPushJobRunner'
+ segmentMetadataPushJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentMetadataPushJobRunner'
+
+
+# jobType: Pinot ingestion job type.
+# Supported job types are defined in PinotIngestionJobType class.
+# 'SegmentCreation'
+# 'SegmentTarPush'
+# 'SegmentUriPush'
+# 'SegmentMetadataPush'
+# 'SegmentCreationAndTarPush'
+# 'SegmentCreationAndUriPush'
+# 'SegmentCreationAndMetadataPush'
+jobType: SegmentCreationAndTarPush
+
+# inputDirURI: Root directory of input data, expected to have scheme configured in PinotFS.
+inputDirURI: 'examples/batch/tpch/part/rawdata'
+
+# includeFileNamePattern: include file name pattern, supported glob pattern.
+# Sample usage:
+# 'glob:*.avro' will include all avro files just under the inputDirURI, not sub directories;
+# 'glob:**/*.avro' will include all the avro files under inputDirURI recursively.
+includeFileNamePattern: 'glob:**/*.avro'
+
+# excludeFileNamePattern: exclude file name pattern, supported glob pattern.
+# Sample usage:
+# 'glob:*.avro' will exclude all avro files just under the inputDirURI, not sub directories;
+# 'glob:**/*.avro' will exclude all the avro files under inputDirURI recursively.
+# _excludeFileNamePattern: ''
+
+# outputDirURI: Root directory of output segments, expected to have scheme configured in PinotFS.
+outputDirURI: 'examples/batch/tpch/part/segments'
+
+# overwriteOutput: Overwrite output segments if existed.
+overwriteOutput: true
+
+# pinotFSSpecs: defines all related Pinot file systems.
+pinotFSSpecs:
+
+ - # scheme: used to identify a PinotFS.
+ # E.g. local, hdfs, dbfs, etc
+ scheme: file
+
+ # className: Class name used to create the PinotFS instance.
+ # E.g.
+ # org.apache.pinot.spi.filesystem.LocalPinotFS is used for local filesystem
+ # org.apache.pinot.plugin.filesystem.AzurePinotFS is used for Azure Data Lake
+ # org.apache.pinot.plugin.filesystem.HadoopPinotFS is used for HDFS
+ className: org.apache.pinot.spi.filesystem.LocalPinotFS
+
+# recordReaderSpec: defines all record reader
+recordReaderSpec:
+ dataFormat: 'avro'
+ className: 'org.apache.pinot.plugin.inputformat.avro.AvroRecordReader'
+
+
+# tableSpec: defines table name and where to fetch corresponding table config and table schema.
+tableSpec:
+
+ # tableName: Table name
+ tableName: 'part'
+
+ # schemaURI: defines where to read the table schema, supports PinotFS or HTTP.
+ # E.g.
+ # hdfs://path/to/table_schema.json
+ # http://localhost:9000/tables/myTable/schema
+ schemaURI: 'http://localhost:9000/tables/part/schema'
+
+ # tableConfigURI: defines where to reade the table config.
+ # Supports using PinotFS or HTTP.
+ # E.g.
+ # hdfs://path/to/table_config.json
+ # http://localhost:9000/tables/myTable
+ # Note that the API to read Pinot table config directly from pinot controller contains a JSON wrapper.
+ # The real table config is the object under the field 'OFFLINE'.
+ tableConfigURI: 'http://localhost:9000/tables/part'
+
+# pinotClusterSpecs: defines the Pinot Cluster Access Point.
+pinotClusterSpecs:
+ - # controllerURI: used to fetch table/schema information and data push.
+ # E.g. http://localhost:9000
+ controllerURI: 'http://localhost:9000'
+
+# pushJobSpec: defines segment push job related configuration.
+pushJobSpec:
+
+ # pushAttempts: number of attempts for push job, default is 1, which means no retry.
+ pushAttempts: 2
+
+ # pushRetryIntervalMillis: retry wait Ms, default to 1 second.
+ pushRetryIntervalMillis: 1000
diff --git a/pinot-tools/src/main/resources/examples/batch/tpch/part/part_offline_table_config.json b/pinot-tools/src/main/resources/examples/batch/tpch/part/part_offline_table_config.json
new file mode 100644
index 0000000000..19b76f90c3
--- /dev/null
+++ b/pinot-tools/src/main/resources/examples/batch/tpch/part/part_offline_table_config.json
@@ -0,0 +1,27 @@
+{
+ "tableName": "part",
+ "tableType": "OFFLINE",
+ "tenants": {
+ },
+ "segmentsConfig": {
+ "segmentPushType": "REFRESH",
+ "replication": "1",
+ "schemaName": "part"
+ },
+ "tableIndexConfig": {
+ "loadMode": "MMAP",
+ "invertedIndexColumns": [
+ ],
+ "noDictionaryColumns": [
+ "p_comment"
+ ]
+ },
+ "ingestionConfig": {
+ "transformConfigs": [
+ ]
+ },
+ "metadata": {
+ "customConfigs": {
+ }
+ }
+}
diff --git a/pinot-tools/src/main/resources/examples/batch/tpch/part/part_schema.json b/pinot-tools/src/main/resources/examples/batch/tpch/part/part_schema.json
new file mode 100644
index 0000000000..cebdf93a79
--- /dev/null
+++ b/pinot-tools/src/main/resources/examples/batch/tpch/part/part_schema.json
@@ -0,0 +1,43 @@
+{
+ "schemaName": "part",
+ "metricFieldSpecs": [
+ {
+ "dataType": "DOUBLE",
+ "name": "p_retailprice"
+ }
+ ],
+ "dimensionFieldSpecs": [
+ {
+ "dataType": "INT",
+ "name": "p_partkey"
+ },
+ {
+ "dataType": "STRING",
+ "name": "p_name"
+ },
+ {
+ "dataType": "STRING",
+ "name": "p_mfgr"
+ },
+ {
+ "dataType": "STRING",
+ "name": "p_brand"
+ },
+ {
+ "dataType": "STRING",
+ "name": "p_type"
+ },
+ {
+ "dataType": "INT",
+ "name": "p_size"
+ },
+ {
+ "dataType": "STRING",
+ "name": "p_container"
+ },
+ {
+ "dataType": "STRING",
+ "name": "p_comment"
+ }
+ ]
+}
diff --git a/pinot-tools/src/main/resources/examples/batch/tpch/part/rawdata/part.avro b/pinot-tools/src/main/resources/examples/batch/tpch/part/rawdata/part.avro
new file mode 100644
index 0000000000..ae74dfa289
Binary files /dev/null and b/pinot-tools/src/main/resources/examples/batch/tpch/part/rawdata/part.avro differ
diff --git a/pinot-tools/src/main/resources/examples/batch/tpch/partsupp/ingestionJobSpec.yaml b/pinot-tools/src/main/resources/examples/batch/tpch/partsupp/ingestionJobSpec.yaml
new file mode 100644
index 0000000000..4189f91967
--- /dev/null
+++ b/pinot-tools/src/main/resources/examples/batch/tpch/partsupp/ingestionJobSpec.yaml
@@ -0,0 +1,119 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+# executionFrameworkSpec: Defines ingestion jobs to be running.
+executionFrameworkSpec:
+
+ # name: execution framework name
+ name: 'standalone'
+
+ # Class to use for segment generation and different push types.
+ segmentGenerationJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentGenerationJobRunner'
+ segmentTarPushJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentTarPushJobRunner'
+ segmentUriPushJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentUriPushJobRunner'
+ segmentMetadataPushJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentMetadataPushJobRunner'
+
+
+# jobType: Pinot ingestion job type.
+# Supported job types are defined in PinotIngestionJobType class.
+# 'SegmentCreation'
+# 'SegmentTarPush'
+# 'SegmentUriPush'
+# 'SegmentMetadataPush'
+# 'SegmentCreationAndTarPush'
+# 'SegmentCreationAndUriPush'
+# 'SegmentCreationAndMetadataPush'
+jobType: SegmentCreationAndTarPush
+
+# inputDirURI: Root directory of input data, expected to have scheme configured in PinotFS.
+inputDirURI: 'examples/batch/tpch/partsupp/rawdata'
+
+# includeFileNamePattern: include file name pattern, supported glob pattern.
+# Sample usage:
+# 'glob:*.avro' will include all avro files just under the inputDirURI, not sub directories;
+# 'glob:**/*.avro' will include all the avro files under inputDirURI recursively.
+includeFileNamePattern: 'glob:**/*.avro'
+
+# excludeFileNamePattern: exclude file name pattern, supported glob pattern.
+# Sample usage:
+# 'glob:*.avro' will exclude all avro files just under the inputDirURI, not sub directories;
+# 'glob:**/*.avro' will exclude all the avro files under inputDirURI recursively.
+# _excludeFileNamePattern: ''
+
+# outputDirURI: Root directory of output segments, expected to have scheme configured in PinotFS.
+outputDirURI: 'examples/batch/tpch/partsupp/segments'
+
+# overwriteOutput: Overwrite output segments if existed.
+overwriteOutput: true
+
+# pinotFSSpecs: defines all related Pinot file systems.
+pinotFSSpecs:
+
+ - # scheme: used to identify a PinotFS.
+ # E.g. local, hdfs, dbfs, etc
+ scheme: file
+
+ # className: Class name used to create the PinotFS instance.
+ # E.g.
+ # org.apache.pinot.spi.filesystem.LocalPinotFS is used for local filesystem
+ # org.apache.pinot.plugin.filesystem.AzurePinotFS is used for Azure Data Lake
+ # org.apache.pinot.plugin.filesystem.HadoopPinotFS is used for HDFS
+ className: org.apache.pinot.spi.filesystem.LocalPinotFS
+
+# recordReaderSpec: defines all record reader
+recordReaderSpec:
+ dataFormat: 'avro'
+ className: 'org.apache.pinot.plugin.inputformat.avro.AvroRecordReader'
+
+
+# tableSpec: defines table name and where to fetch corresponding table config and table schema.
+tableSpec:
+
+ # tableName: Table name
+ tableName: 'partsupp'
+
+ # schemaURI: defines where to read the table schema, supports PinotFS or HTTP.
+ # E.g.
+ # hdfs://path/to/table_schema.json
+ # http://localhost:9000/tables/myTable/schema
+ schemaURI: 'http://localhost:9000/tables/partsupp/schema'
+
+ # tableConfigURI: defines where to reade the table config.
+ # Supports using PinotFS or HTTP.
+ # E.g.
+ # hdfs://path/to/table_config.json
+ # http://localhost:9000/tables/myTable
+ # Note that the API to read Pinot table config directly from pinot controller contains a JSON wrapper.
+ # The real table config is the object under the field 'OFFLINE'.
+ tableConfigURI: 'http://localhost:9000/tables/partsupp'
+
+# pinotClusterSpecs: defines the Pinot Cluster Access Point.
+pinotClusterSpecs:
+ - # controllerURI: used to fetch table/schema information and data push.
+ # E.g. http://localhost:9000
+ controllerURI: 'http://localhost:9000'
+
+# pushJobSpec: defines segment push job related configuration.
+pushJobSpec:
+
+ # pushAttempts: number of attempts for push job, default is 1, which means no retry.
+ pushAttempts: 2
+
+ # pushRetryIntervalMillis: retry wait Ms, default to 1 second.
+ pushRetryIntervalMillis: 1000
diff --git a/pinot-tools/src/main/resources/examples/batch/tpch/partsupp/partsupp_offline_table_config.json b/pinot-tools/src/main/resources/examples/batch/tpch/partsupp/partsupp_offline_table_config.json
new file mode 100644
index 0000000000..f7d32c5206
--- /dev/null
+++ b/pinot-tools/src/main/resources/examples/batch/tpch/partsupp/partsupp_offline_table_config.json
@@ -0,0 +1,27 @@
+{
+ "tableName": "partsupp",
+ "tableType": "OFFLINE",
+ "tenants": {
+ },
+ "segmentsConfig": {
+ "segmentPushType": "REFRESH",
+ "replication": "1",
+ "schemaName": "partsupp"
+ },
+ "tableIndexConfig": {
+ "loadMode": "MMAP",
+ "invertedIndexColumns": [
+ ],
+ "noDictionaryColumns": [
+ "ps_comment"
+ ]
+ },
+ "ingestionConfig": {
+ "transformConfigs": [
+ ]
+ },
+ "metadata": {
+ "customConfigs": {
+ }
+ }
+}
diff --git a/pinot-tools/src/main/resources/examples/batch/tpch/partsupp/partsupp_schema.json b/pinot-tools/src/main/resources/examples/batch/tpch/partsupp/partsupp_schema.json
new file mode 100644
index 0000000000..c07c56baec
--- /dev/null
+++ b/pinot-tools/src/main/resources/examples/batch/tpch/partsupp/partsupp_schema.json
@@ -0,0 +1,28 @@
+{
+ "schemaName": "partsupp",
+ "metricFieldSpecs": [
+ {
+ "dataType": "DOUBLE",
+ "name": "ps_supplycost"
+ }
+ ],
+ "dimensionFieldSpecs": [
+ {
+ "dataType": "INT",
+ "name": "ps_partkey"
+ },
+ {
+ "dataType": "INT",
+ "name": "ps_suppkey"
+ },
+ {
+ "dataType": "INT",
+ "name": "ps_availqty"
+ },
+ {
+ "dataType": "STRING",
+ "name": "ps_comment",
+ "maxLength": 4096
+ }
+ ]
+}
diff --git a/pinot-tools/src/main/resources/examples/batch/tpch/partsupp/rawdata/partsupp.avro b/pinot-tools/src/main/resources/examples/batch/tpch/partsupp/rawdata/partsupp.avro
new file mode 100644
index 0000000000..83e846fba2
Binary files /dev/null and b/pinot-tools/src/main/resources/examples/batch/tpch/partsupp/rawdata/partsupp.avro differ
diff --git a/pinot-tools/src/main/resources/examples/batch/tpch/region/ingestionJobSpec.yaml b/pinot-tools/src/main/resources/examples/batch/tpch/region/ingestionJobSpec.yaml
new file mode 100644
index 0000000000..a94d6a778c
--- /dev/null
+++ b/pinot-tools/src/main/resources/examples/batch/tpch/region/ingestionJobSpec.yaml
@@ -0,0 +1,119 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+# executionFrameworkSpec: Defines ingestion jobs to be running.
+executionFrameworkSpec:
+
+ # name: execution framework name
+ name: 'standalone'
+
+ # Class to use for segment generation and different push types.
+ segmentGenerationJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentGenerationJobRunner'
+ segmentTarPushJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentTarPushJobRunner'
+ segmentUriPushJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentUriPushJobRunner'
+ segmentMetadataPushJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentMetadataPushJobRunner'
+
+
+# jobType: Pinot ingestion job type.
+# Supported job types are defined in PinotIngestionJobType class.
+# 'SegmentCreation'
+# 'SegmentTarPush'
+# 'SegmentUriPush'
+# 'SegmentMetadataPush'
+# 'SegmentCreationAndTarPush'
+# 'SegmentCreationAndUriPush'
+# 'SegmentCreationAndMetadataPush'
+jobType: SegmentCreationAndTarPush
+
+# inputDirURI: Root directory of input data, expected to have scheme configured in PinotFS.
+inputDirURI: 'examples/batch/tpch/region/rawdata'
+
+# includeFileNamePattern: include file name pattern, supported glob pattern.
+# Sample usage:
+# 'glob:*.avro' will include all avro files just under the inputDirURI, not sub directories;
+# 'glob:**/*.avro' will include all the avro files under inputDirURI recursively.
+includeFileNamePattern: 'glob:**/*.avro'
+
+# excludeFileNamePattern: exclude file name pattern, supported glob pattern.
+# Sample usage:
+# 'glob:*.avro' will exclude all avro files just under the inputDirURI, not sub directories;
+# 'glob:**/*.avro' will exclude all the avro files under inputDirURI recursively.
+# _excludeFileNamePattern: ''
+
+# outputDirURI: Root directory of output segments, expected to have scheme configured in PinotFS.
+outputDirURI: 'examples/batch/tpch/region/segments'
+
+# overwriteOutput: Overwrite output segments if existed.
+overwriteOutput: true
+
+# pinotFSSpecs: defines all related Pinot file systems.
+pinotFSSpecs:
+
+ - # scheme: used to identify a PinotFS.
+ # E.g. local, hdfs, dbfs, etc
+ scheme: file
+
+ # className: Class name used to create the PinotFS instance.
+ # E.g.
+ # org.apache.pinot.spi.filesystem.LocalPinotFS is used for local filesystem
+ # org.apache.pinot.plugin.filesystem.AzurePinotFS is used for Azure Data Lake
+ # org.apache.pinot.plugin.filesystem.HadoopPinotFS is used for HDFS
+ className: org.apache.pinot.spi.filesystem.LocalPinotFS
+
+# recordReaderSpec: defines all record reader
+recordReaderSpec:
+ dataFormat: 'avro'
+ className: 'org.apache.pinot.plugin.inputformat.avro.AvroRecordReader'
+
+
+# tableSpec: defines table name and where to fetch corresponding table config and table schema.
+tableSpec:
+
+ # tableName: Table name
+ tableName: 'region'
+
+ # schemaURI: defines where to read the table schema, supports PinotFS or HTTP.
+ # E.g.
+ # hdfs://path/to/table_schema.json
+ # http://localhost:9000/tables/myTable/schema
+ schemaURI: 'http://localhost:9000/tables/region/schema'
+
+ # tableConfigURI: defines where to reade the table config.
+ # Supports using PinotFS or HTTP.
+ # E.g.
+ # hdfs://path/to/table_config.json
+ # http://localhost:9000/tables/myTable
+ # Note that the API to read Pinot table config directly from pinot controller contains a JSON wrapper.
+ # The real table config is the object under the field 'OFFLINE'.
+ tableConfigURI: 'http://localhost:9000/tables/region'
+
+# pinotClusterSpecs: defines the Pinot Cluster Access Point.
+pinotClusterSpecs:
+ - # controllerURI: used to fetch table/schema information and data push.
+ # E.g. http://localhost:9000
+ controllerURI: 'http://localhost:9000'
+
+# pushJobSpec: defines segment push job related configuration.
+pushJobSpec:
+
+ # pushAttempts: number of attempts for push job, default is 1, which means no retry.
+ pushAttempts: 2
+
+ # pushRetryIntervalMillis: retry wait Ms, default to 1 second.
+ pushRetryIntervalMillis: 1000
diff --git a/pinot-tools/src/main/resources/examples/batch/tpch/region/rawdata/region.avro b/pinot-tools/src/main/resources/examples/batch/tpch/region/rawdata/region.avro
new file mode 100644
index 0000000000..f8ac435feb
Binary files /dev/null and b/pinot-tools/src/main/resources/examples/batch/tpch/region/rawdata/region.avro differ
diff --git a/pinot-tools/src/main/resources/examples/batch/tpch/region/region_offline_table_config.json b/pinot-tools/src/main/resources/examples/batch/tpch/region/region_offline_table_config.json
new file mode 100644
index 0000000000..88d62a8315
--- /dev/null
+++ b/pinot-tools/src/main/resources/examples/batch/tpch/region/region_offline_table_config.json
@@ -0,0 +1,26 @@
+{
+ "tableName": "region",
+ "tableType": "OFFLINE",
+ "tenants": {
+ },
+ "segmentsConfig": {
+ "segmentPushType": "REFRESH",
+ "replication": "1",
+ "schemaName": "region"
+ },
+ "tableIndexConfig": {
+ "loadMode": "MMAP",
+ "invertedIndexColumns": [
+ ],
+ "noDictionaryColumns": [
+ ]
+ },
+ "ingestionConfig": {
+ "transformConfigs": [
+ ]
+ },
+ "metadata": {
+ "customConfigs": {
+ }
+ }
+}
diff --git a/pinot-tools/src/main/resources/examples/batch/tpch/region/region_schema.json b/pinot-tools/src/main/resources/examples/batch/tpch/region/region_schema.json
new file mode 100644
index 0000000000..e1cc5728fd
--- /dev/null
+++ b/pinot-tools/src/main/resources/examples/batch/tpch/region/region_schema.json
@@ -0,0 +1,19 @@
+{
+ "schemaName": "region",
+ "metricFieldSpecs": [
+ ],
+ "dimensionFieldSpecs": [
+ {
+ "dataType": "INT",
+ "name": "r_regionkey"
+ },
+ {
+ "dataType": "STRING",
+ "name": "r_name"
+ },
+ {
+ "dataType": "STRING",
+ "name": "r_comment"
+ }
+ ]
+}
diff --git a/pinot-tools/src/main/resources/examples/batch/tpch/supplier/ingestionJobSpec.yaml b/pinot-tools/src/main/resources/examples/batch/tpch/supplier/ingestionJobSpec.yaml
new file mode 100644
index 0000000000..e25baeb6e6
--- /dev/null
+++ b/pinot-tools/src/main/resources/examples/batch/tpch/supplier/ingestionJobSpec.yaml
@@ -0,0 +1,119 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+# executionFrameworkSpec: Defines ingestion jobs to be running.
+executionFrameworkSpec:
+
+ # name: execution framework name
+ name: 'standalone'
+
+ # Class to use for segment generation and different push types.
+ segmentGenerationJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentGenerationJobRunner'
+ segmentTarPushJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentTarPushJobRunner'
+ segmentUriPushJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentUriPushJobRunner'
+ segmentMetadataPushJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentMetadataPushJobRunner'
+
+
+# jobType: Pinot ingestion job type.
+# Supported job types are defined in PinotIngestionJobType class.
+# 'SegmentCreation'
+# 'SegmentTarPush'
+# 'SegmentUriPush'
+# 'SegmentMetadataPush'
+# 'SegmentCreationAndTarPush'
+# 'SegmentCreationAndUriPush'
+# 'SegmentCreationAndMetadataPush'
+jobType: SegmentCreationAndTarPush
+
+# inputDirURI: Root directory of input data, expected to have scheme configured in PinotFS.
+inputDirURI: 'examples/batch/tpch/supplier/rawdata'
+
+# includeFileNamePattern: include file name pattern, supported glob pattern.
+# Sample usage:
+# 'glob:*.avro' will include all avro files just under the inputDirURI, not sub directories;
+# 'glob:**/*.avro' will include all the avro files under inputDirURI recursively.
+includeFileNamePattern: 'glob:**/*.avro'
+
+# excludeFileNamePattern: exclude file name pattern, supported glob pattern.
+# Sample usage:
+# 'glob:*.avro' will exclude all avro files just under the inputDirURI, not sub directories;
+# 'glob:**/*.avro' will exclude all the avro files under inputDirURI recursively.
+# _excludeFileNamePattern: ''
+
+# outputDirURI: Root directory of output segments, expected to have scheme configured in PinotFS.
+outputDirURI: 'examples/batch/tpch/supplier/segments'
+
+# overwriteOutput: Overwrite output segments if existed.
+overwriteOutput: true
+
+# pinotFSSpecs: defines all related Pinot file systems.
+pinotFSSpecs:
+
+ - # scheme: used to identify a PinotFS.
+ # E.g. local, hdfs, dbfs, etc
+ scheme: file
+
+ # className: Class name used to create the PinotFS instance.
+ # E.g.
+ # org.apache.pinot.spi.filesystem.LocalPinotFS is used for local filesystem
+ # org.apache.pinot.plugin.filesystem.AzurePinotFS is used for Azure Data Lake
+ # org.apache.pinot.plugin.filesystem.HadoopPinotFS is used for HDFS
+ className: org.apache.pinot.spi.filesystem.LocalPinotFS
+
+# recordReaderSpec: defines all record reader
+recordReaderSpec:
+ dataFormat: 'avro'
+ className: 'org.apache.pinot.plugin.inputformat.avro.AvroRecordReader'
+
+
+# tableSpec: defines table name and where to fetch corresponding table config and table schema.
+tableSpec:
+
+ # tableName: Table name
+ tableName: 'supplier'
+
+ # schemaURI: defines where to read the table schema, supports PinotFS or HTTP.
+ # E.g.
+ # hdfs://path/to/table_schema.json
+ # http://localhost:9000/tables/myTable/schema
+ schemaURI: 'http://localhost:9000/tables/supplier/schema'
+
+ # tableConfigURI: defines where to reade the table config.
+ # Supports using PinotFS or HTTP.
+ # E.g.
+ # hdfs://path/to/table_config.json
+ # http://localhost:9000/tables/myTable
+ # Note that the API to read Pinot table config directly from pinot controller contains a JSON wrapper.
+ # The real table config is the object under the field 'OFFLINE'.
+ tableConfigURI: 'http://localhost:9000/tables/supplier'
+
+# pinotClusterSpecs: defines the Pinot Cluster Access Point.
+pinotClusterSpecs:
+ - # controllerURI: used to fetch table/schema information and data push.
+ # E.g. http://localhost:9000
+ controllerURI: 'http://localhost:9000'
+
+# pushJobSpec: defines segment push job related configuration.
+pushJobSpec:
+
+ # pushAttempts: number of attempts for push job, default is 1, which means no retry.
+ pushAttempts: 2
+
+ # pushRetryIntervalMillis: retry wait Ms, default to 1 second.
+ pushRetryIntervalMillis: 1000
diff --git a/pinot-tools/src/main/resources/examples/batch/tpch/supplier/rawdata/supplier.avro b/pinot-tools/src/main/resources/examples/batch/tpch/supplier/rawdata/supplier.avro
new file mode 100644
index 0000000000..0961696e69
Binary files /dev/null and b/pinot-tools/src/main/resources/examples/batch/tpch/supplier/rawdata/supplier.avro differ
diff --git a/pinot-tools/src/main/resources/examples/batch/tpch/supplier/supplier_offline_table_config.json b/pinot-tools/src/main/resources/examples/batch/tpch/supplier/supplier_offline_table_config.json
new file mode 100644
index 0000000000..adeaaa5d06
--- /dev/null
+++ b/pinot-tools/src/main/resources/examples/batch/tpch/supplier/supplier_offline_table_config.json
@@ -0,0 +1,26 @@
+{
+ "tableName": "supplier",
+ "tableType": "OFFLINE",
+ "tenants": {
+ },
+ "segmentsConfig": {
+ "segmentPushType": "REFRESH",
+ "replication": "1",
+ "schemaName": "supplier"
+ },
+ "tableIndexConfig": {
+ "loadMode": "MMAP",
+ "invertedIndexColumns": [
+ ],
+ "noDictionaryColumns": [
+ ]
+ },
+ "ingestionConfig": {
+ "transformConfigs": [
+ ]
+ },
+ "metadata": {
+ "customConfigs": {
+ }
+ }
+}
diff --git a/pinot-tools/src/main/resources/examples/batch/tpch/supplier/supplier_schema.json b/pinot-tools/src/main/resources/examples/batch/tpch/supplier/supplier_schema.json
new file mode 100644
index 0000000000..69227960e3
--- /dev/null
+++ b/pinot-tools/src/main/resources/examples/batch/tpch/supplier/supplier_schema.json
@@ -0,0 +1,35 @@
+{
+ "schemaName": "supplier",
+ "metricFieldSpecs": [
+ {
+ "dataType": "DOUBLE",
+ "name": "s_acctbal"
+ }
+ ],
+ "dimensionFieldSpecs": [
+ {
+ "dataType": "INT",
+ "name": "s_suppkey"
+ },
+ {
+ "dataType": "STRING",
+ "name": "s_name"
+ },
+ {
+ "dataType": "STRING",
+ "name": "s_address"
+ },
+ {
+ "dataType": "INT",
+ "name": "s_nationkey"
+ },
+ {
+ "dataType": "STRING",
+ "name": "s_phone"
+ },
+ {
+ "dataType": "STRING",
+ "name": "s_comment"
+ }
+ ]
+}
diff --git a/pom.xml b/pom.xml
index 3fc23a8c6d..eae64d7748 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1642,6 +1642,7 @@
<exclude>**/*.generated</exclude>
<exclude>**/*.json</exclude>
<exclude>**/*.schema</exclude>
+ <exclude>**/*.sql</exclude>
<!-- JDBC Drivers -->
<exclude>**/java.sql.*</exclude>
@@ -1755,6 +1756,7 @@
<exclude>**/*.generated</exclude>
<exclude>**/*.json</exclude>
<exclude>**/*.schema</exclude>
+ <exclude>**/*.sql</exclude>
<!-- JDBC Drivers -->
<exclude>**/java.sql.*</exclude>
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org