You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by xx...@apache.org on 2021/03/05 06:55:11 UTC
[kylin] 02/02: add test case for interger type partition pruner
This is an automated email from the ASF dual-hosted git repository.
xxyu pushed a commit to branch kylin-on-parquet-v2
in repository https://gitbox.apache.org/repos/asf/kylin.git
commit de95ab977e700178efd70705ead4edaca272d41f
Author: zhengshengjun <sh...@sina.com>
AuthorDate: Mon Mar 1 11:43:52 2021 +0800
add test case for interger type partition pruner
---
examples/test_case_data/parquet_test/cube/ssb.json | 17 +++
.../test_case_data/parquet_test/cube_desc/ssb.json | 115 +++++++++++++++++++++
.../parquet_test/data/SSB.P_LINEORDER.csv | 18 ++++
.../parquet_test/model_desc/ssb.json | 25 +++++
.../parquet_test/project/default.json | 2 +-
.../parquet_test/table/SSB.P_LINEORDER.json | 82 +++++++++++++++
.../resources/query/sql_prune_segment/query00.sql | 20 ++++
.../resources/query/sql_prune_segment/query01.sql | 20 ++++
.../kylin/engine/spark2/NBuildAndQueryTest.java | 37 +++++--
.../apache/kylin/engine/spark2/NExecAndComp.java | 5 +-
10 files changed, 328 insertions(+), 13 deletions(-)
diff --git a/examples/test_case_data/parquet_test/cube/ssb.json b/examples/test_case_data/parquet_test/cube/ssb.json
new file mode 100644
index 0000000..8e4f825
--- /dev/null
+++ b/examples/test_case_data/parquet_test/cube/ssb.json
@@ -0,0 +1,17 @@
+{
+ "uuid" : "70a9f288-3c01-4745-a04b-5641e82d6c69",
+ "last_modified" : 1594722761733,
+ "version" : "1.5.3",
+ "name" : "ssb",
+ "owner" : "ADMIN",
+ "descriptor" : "ssb",
+ "display_name" : "ssb",
+ "cost" : 50,
+ "status" : "DISABLED",
+ "segments" : [ ],
+ "create_time_utc" : 1457444500888,
+ "cuboid_bytes" : null,
+ "cuboid_bytes_recommend" : null,
+ "cuboid_last_optimized" : 0,
+ "snapshots" : { }
+}
\ No newline at end of file
diff --git a/examples/test_case_data/parquet_test/cube_desc/ssb.json b/examples/test_case_data/parquet_test/cube_desc/ssb.json
new file mode 100644
index 0000000..62ca74a
--- /dev/null
+++ b/examples/test_case_data/parquet_test/cube_desc/ssb.json
@@ -0,0 +1,115 @@
+{
+ "uuid" : "5c44df30-daec-486e-af90-927bf7851057",
+ "last_modified" : 1491925122527,
+ "version" : "1.5.3",
+ "name" : "ssb",
+ "model_name" : "ssb",
+ "description" : "",
+ "null_string" : null,
+ "dimensions" : [ {
+ "name" : "LO_QUANTITY",
+ "table" : "P_LINEORDER",
+ "column" : "LO_QUANTITY",
+ "derived" : null
+ }, {
+ "name" : "LO_DISCOUNT",
+ "table" : "P_LINEORDER",
+ "column" : "LO_DISCOUNT",
+ "derived" : null
+ }, {
+ "name" : "LO_ORDERDATE",
+ "table" : "P_LINEORDER",
+ "column" : "LO_ORDERDATE",
+ "derived" : null
+ } ],
+ "measures" : [ {
+ "name" : "_COUNT_",
+ "function" : {
+ "expression" : "COUNT",
+ "parameter" : {
+ "type" : "constant",
+ "value" : "1"
+ },
+ "returntype" : "bigint"
+ }
+ }, {
+ "name" : "P_LINEORDER.V_REVENUE_SUM",
+ "function" : {
+ "expression" : "SUM",
+ "parameter" : {
+ "type" : "column",
+ "value" : "P_LINEORDER.V_REVENUE"
+ },
+ "returntype" : "bigint"
+ }
+ }, {
+ "name" : "P_LINEORDER.LO_SUPPLYCOST_SUM",
+ "function" : {
+ "expression" : "SUM",
+ "parameter" : {
+ "type" : "column",
+ "value" : "P_LINEORDER.LO_SUPPLYCOST"
+ },
+ "returntype" : "bigint"
+ }
+ }, {
+ "name" : "P_LINEORDER.LO_REVENUE_SUM",
+ "function" : {
+ "expression" : "SUM",
+ "parameter" : {
+ "type" : "column",
+ "value" : "P_LINEORDER.LO_REVENUE"
+ },
+ "returntype" : "bigint"
+ }
+ } ],
+ "rowkey" : {
+ "rowkey_columns" : [ {
+ "column" : "P_LINEORDER.LO_ORDERDATE",
+ "encoding" : "integer:8",
+ "isShardBy" : false,
+ "index" : "eq"
+ }, {
+ "column" : "P_LINEORDER.LO_QUANTITY",
+ "encoding" : "integer:8",
+ "isShardBy" : false,
+ "index" : "eq"
+ }, {
+ "column" : "P_LINEORDER.LO_DISCOUNT",
+ "encoding" : "integer:8",
+ "isShardBy" : false,
+ "index" : "eq"
+ } ]
+ },
+ "hbase_mapping" : {
+ "column_family" : [ {
+ "name" : "F1",
+ "columns" : [ {
+ "qualifier" : "M",
+ "measure_refs" : [ "_COUNT_", "P_LINEORDER.V_REVENUE_SUM", "P_LINEORDER.LO_SUPPLYCOST_SUM", "P_LINEORDER.LO_REVENUE_SUM" ]
+ } ]
+ } ]
+ },
+ "aggregation_groups" : [ {
+ "includes" : [ "P_LINEORDER.LO_QUANTITY", "P_LINEORDER.LO_DISCOUNT" ],
+ "select_rule" : {
+ "hierarchy_dims" : [ ],
+ "mandatory_dims" : [ ],
+ "joint_dims" : [
+ [ "P_LINEORDER.LO_QUANTITY", "P_LINEORDER.LO_DISCOUNT" ]
+ ]
+ }
+ } ],
+ "notify_list" : [ ],
+ "status_need_notify" : [ ],
+ "partition_date_start" : 694224000000,
+ "partition_date_end" : 3153600000000,
+ "auto_merge_time_ranges" : [ ],
+ "retention_range" : 0,
+ "engine_type" : 6,
+ "storage_type" : 4,
+ "override_kylin_properties" : {
+ "kylin.cube.aggrgroup.is-mandatory-only-valid" : "true",
+ "kylin.storage.hbase.min-region-count" : "4"
+ }
+}
diff --git a/examples/test_case_data/parquet_test/data/SSB.P_LINEORDER.csv b/examples/test_case_data/parquet_test/data/SSB.P_LINEORDER.csv
new file mode 100644
index 0000000..24ada80
--- /dev/null
+++ b/examples/test_case_data/parquet_test/data/SSB.P_LINEORDER.csv
@@ -0,0 +1,18 @@
+4581,1,16,165,1,19920904,4-NOT SPECI,0,37,3941092,8959211,1,3901681,63909,4,19921105,MAIL,3941092
+4581,2,16,50,2,19920904,4-NOT SPECI,0,7,665035,8959211,1,658384,57003,2,19921020,MAIL,665035
+4581,3,16,21,2,19920904,4-NOT SPECI,0,46,4236692,8959211,4,4067224,55261,4,19921127,REG AIR,16946768
+2560,1,28,169,1,19920905,1-URGENT,0,41,4383556,15342679,7,4076707,64149,1,19921111,SHIP,30684892
+2560,2,28,4,2,19920905,1-URGENT,0,27,2440800,15342679,0,2440800,54240,1,19921116,MAIL,0
+2560,3,28,46,2,19920905,1-URGENT,0,31,2932724,15342679,1,2903396,56762,5,19921014,AIR,2932724
+2560,4,28,72,2,19920905,1-URGENT,0,36,3499452,15342679,1,3464457,58324,2,19921030,MAIL,3499452
+2560,5,28,42,2,19920905,1-URGENT,0,9,847836,15342679,4,813922,56522,2,19921029,REG AIR,3391344
+2560,6,28,108,1,19920905,1-URGENT,0,13,1310530,15342679,3,1271214,60486,6,19921021,FOB,3931590
+2147,1,20,29,2,19920906,4-NOT SPECI,0,50,4645100,9151379,4,4459296,55741,6,19921130,RAIL,18580400
+2147,2,20,101,1,19920906,4-NOT SPECI,0,4,400440,9151379,1,396435,60066,4,19921115,AIR,400440
+2147,3,20,44,2,19920906,4-NOT SPECI,0,34,3209736,9151379,10,2888762,56642,4,19921108,REG AIR,32097360
+2147,4,20,11,2,19920906,4-NOT SPECI,0,11,1002111,9151379,6,941984,54660,7,19921116,AIR,6012666
+1991,1,4,110,1,19920907,4-NOT SPECI,0,39,3939429,13985441,6,3703063,60606,2,19921129,TRUCK,23636574
+1991,2,4,53,1,19920907,4-NOT SPECI,0,49,4669945,13985441,8,4296349,57183,6,19921129,SHIP,37359560
+1991,3,4,174,1,19920907,4-NOT SPECI,0,6,644502,13985441,2,631611,64450,1,19921008,REG AIR,1289004
+1991,4,4,138,2,19920907,4-NOT SPECI,0,6,622878,13985441,10,560590,62287,6,19921103,RAIL,6228780
+1991,5,4,60,1,19920907,4-NOT SPECI,0,49,4704294,13985441,6,4422036,57603,0,19921130,AIR,28225764
\ No newline at end of file
diff --git a/examples/test_case_data/parquet_test/model_desc/ssb.json b/examples/test_case_data/parquet_test/model_desc/ssb.json
new file mode 100644
index 0000000..f896765
--- /dev/null
+++ b/examples/test_case_data/parquet_test/model_desc/ssb.json
@@ -0,0 +1,25 @@
+{
+ "uuid" : "cd92588f-b987-4a12-b90f-e32c44345c64",
+ "version" : "1.5.3",
+ "name" : "ssb",
+ "description" : "",
+ "lookups" : [ ],
+ "dimensions" : [ {
+ "table" : "SSB.P_LINEORDER",
+ "columns" : [ "LO_ORDERDATE" ]
+ } ],
+ "metrics" : [ "LO_REVENUE", "LO_SUPPLYCOST", "V_REVENUE" ],
+ "capacity" : "MEDIUM",
+ "last_modified" : 1464441928669,
+ "fact_table" : "SSB.P_LINEORDER",
+ "filter_condition" : "",
+ "partition_desc" : {
+ "partition_date_column" : "SSB.P_LINEORDER.LO_ORDERDATE",
+ "partition_time_column" : null,
+ "partition_date_start" : 0,
+ "partition_date_format" : "yyyyMMdd",
+ "partition_time_format" : "HH:mm:ss",
+ "partition_type" : "APPEND",
+ "partition_condition_builder" : "org.apache.kylin.metadata.model.PartitionDesc$DefaultPartitionConditionBuilder"
+ }
+}
\ No newline at end of file
diff --git a/examples/test_case_data/parquet_test/project/default.json b/examples/test_case_data/parquet_test/project/default.json
index 7db3136..3af3911 100644
--- a/examples/test_case_data/parquet_test/project/default.json
+++ b/examples/test_case_data/parquet_test/project/default.json
@@ -3,7 +3,7 @@
"last_modified" : 1585736623334,
"version" : "3.0.0.20500",
"name" : "default",
- "tables" : [ "DEFAULT.TEST_COUNTRY", "EDW.TEST_SITES", "SSB.SUPPLIER", "SSB.CUSTOMER", "SSB.PART", "DEFAULT.TEST_KYLIN_FACT", "DEFAULT.TEST_CATEGORY_GROUPINGS", "DEFAULT.TEST_ORDER", "SSB.DATES", "EDW.TEST_SELLER_TYPE_DIM", "SSB.V_LINEORDER", "EDW.TEST_CAL_DT", "DEFAULT.TEST_ACCOUNT" ],
+ "tables" : [ "DEFAULT.TEST_COUNTRY", "EDW.TEST_SITES", "SSB.SUPPLIER", "SSB.CUSTOMER", "SSB.PART", "DEFAULT.TEST_KYLIN_FACT", "DEFAULT.TEST_CATEGORY_GROUPINGS", "DEFAULT.TEST_ORDER", "SSB.DATES", "EDW.TEST_SELLER_TYPE_DIM", "SSB.V_LINEORDER", "EDW.TEST_CAL_DT", "DEFAULT.TEST_ACCOUNT", "SSB.P_LINEORDER" ],
"owner" : null,
"status" : null,
"create_time_utc" : 0,
diff --git a/examples/test_case_data/parquet_test/table/SSB.P_LINEORDER.json b/examples/test_case_data/parquet_test/table/SSB.P_LINEORDER.json
new file mode 100644
index 0000000..03d91fa
--- /dev/null
+++ b/examples/test_case_data/parquet_test/table/SSB.P_LINEORDER.json
@@ -0,0 +1,82 @@
+{
+ "uuid" : "b017d54b-e7b7-465b-a4db-b47f68baf1ad",
+ "version" : "2.1",
+ "name" : "P_LINEORDER",
+ "columns" : [ {
+ "id" : "1",
+ "name" : "LO_ORDERKEY",
+ "datatype" : "bigint"
+ }, {
+ "id" : "2",
+ "name" : "LO_LINENUMBER",
+ "datatype" : "bigint"
+ }, {
+ "id" : "3",
+ "name" : "LO_CUSTKEY",
+ "datatype" : "integer"
+ }, {
+ "id" : "4",
+ "name" : "LO_PARTKEY",
+ "datatype" : "integer"
+ }, {
+ "id" : "5",
+ "name" : "LO_SUPPKEY",
+ "datatype" : "integer"
+ }, {
+ "id" : "6",
+ "name" : "LO_ORDERDATE",
+ "datatype" : "integer"
+ }, {
+ "id" : "7",
+ "name" : "LO_ORDERPRIOTITY",
+ "datatype" : "varchar(15)"
+ }, {
+ "id" : "8",
+ "name" : "LO_SHIPPRIOTITY",
+ "datatype" : "integer"
+ }, {
+ "id" : "9",
+ "name" : "LO_QUANTITY",
+ "datatype" : "bigint"
+ }, {
+ "id" : "10",
+ "name" : "LO_EXTENDEDPRICE",
+ "datatype" : "bigint"
+ }, {
+ "id" : "11",
+ "name" : "LO_ORDTOTALPRICE",
+ "datatype" : "bigint"
+ }, {
+ "id" : "12",
+ "name" : "LO_DISCOUNT",
+ "datatype" : "bigint"
+ }, {
+ "id" : "13",
+ "name" : "LO_REVENUE",
+ "datatype" : "bigint"
+ }, {
+ "id" : "14",
+ "name" : "LO_SUPPLYCOST",
+ "datatype" : "bigint"
+ }, {
+ "id" : "15",
+ "name" : "LO_TAX",
+ "datatype" : "bigint"
+ }, {
+ "id" : "16",
+ "name" : "LO_COMMITDATE",
+ "datatype" : "integer"
+ }, {
+ "id" : "17",
+ "name" : "LO_SHIPMODE",
+ "datatype" : "varchar(10)"
+ }, {
+ "id" : "18",
+ "name" : "V_REVENUE",
+ "datatype" : "bigint"
+ } ],
+ "database" : "SSB",
+ "last_modified" : 1457444145578,
+ "source_type" : 9,
+ "table_type" : "VIRTUAL_VIEW"
+}
diff --git a/kylin-it/src/test/resources/query/sql_prune_segment/query00.sql b/kylin-it/src/test/resources/query/sql_prune_segment/query00.sql
new file mode 100644
index 0000000..aaf8bdb
--- /dev/null
+++ b/kylin-it/src/test/resources/query/sql_prune_segment/query00.sql
@@ -0,0 +1,20 @@
+--
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+
+select sum(LO_REVENUE) from SSB.P_LINEORDER where LO_ORDERDATE = 19920906
+;{"scanRowCount":4,"scanBytes":0,"scanFiles":1,"cuboidId":[7]}
\ No newline at end of file
diff --git a/kylin-it/src/test/resources/query/sql_prune_segment/query01.sql b/kylin-it/src/test/resources/query/sql_prune_segment/query01.sql
new file mode 100644
index 0000000..c93432a
--- /dev/null
+++ b/kylin-it/src/test/resources/query/sql_prune_segment/query01.sql
@@ -0,0 +1,20 @@
+--
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+
+select sum(LO_REVENUE) from SSB.P_LINEORDER where LO_ORDERDATE = '19920906'
+;{"scanRowCount":4,"scanBytes":0,"scanFiles":1,"cuboidId":[7]}
\ No newline at end of file
diff --git a/kylin-spark-project/kylin-spark-test/src/test/java/org/apache/kylin/engine/spark2/NBuildAndQueryTest.java b/kylin-spark-project/kylin-spark-test/src/test/java/org/apache/kylin/engine/spark2/NBuildAndQueryTest.java
index dc84886..58570c9 100644
--- a/kylin-spark-project/kylin-spark-test/src/test/java/org/apache/kylin/engine/spark2/NBuildAndQueryTest.java
+++ b/kylin-spark-project/kylin-spark-test/src/test/java/org/apache/kylin/engine/spark2/NBuildAndQueryTest.java
@@ -200,7 +200,9 @@ public class NBuildAndQueryTest extends LocalWithSparkSessionTest {
tasks.add(new QueryCallable(CompareLevel.SAME, joinType, "sql_unionall"));
tasks.add(new QueryCallable(CompareLevel.SAME, joinType, "sql_values"));
tasks.add(new QueryCallable(CompareLevel.SAME, joinType, "sql_window"));
+
tasks.add(new QueryCallable(CompareLevel.SAME, joinType, "sql_limit"));
+ tasks.add(new QueryCallable(CompareLevel.SAME, joinType, "sql_prune_segment"));
}
logger.info("Total {} tasks.", tasks.size());
return tasks;
@@ -213,6 +215,10 @@ public class NBuildAndQueryTest extends LocalWithSparkSessionTest {
} else if (Boolean.parseBoolean(System.getProperty("isDeveloperMode", "false"))) {
//fullBuildCube("ci_inner_join_cube");
fullBuildCube("ci_left_join_cube");
+ buildSegments("ssb", new SegmentRange.TSRange(dateToLong("1992-09-04"), dateToLong("1992-09-05")),
+ new SegmentRange.TSRange(dateToLong("1992-09-05"), dateToLong("1992-09-06")),
+ new SegmentRange.TSRange(dateToLong("1992-09-06"), dateToLong("1992-09-07")),
+ new SegmentRange.TSRange(dateToLong("1992-09-07"), dateToLong("1992-09-08")));
} else {
//buildAndMergeCube("ci_inner_join_cube");
buildAndMergeCube("ci_left_join_cube");
@@ -223,6 +229,9 @@ public class NBuildAndQueryTest extends LocalWithSparkSessionTest {
if (cubeName.equals("ci_inner_join_cube")) {
buildFourSegmentAndMerge(cubeName);
}
+ if (cubeName.equals("ssb")) {
+ buildSegments(cubeName, new SegmentRange.TSRange(dateToLong("1992-0-01"), dateToLong("2015-01-01")));
+ }
if (cubeName.equals("ci_left_join_cube")) {
buildTwoSegmentAndMerge(cubeName);
}
@@ -287,17 +296,10 @@ public class NBuildAndQueryTest extends LocalWithSparkSessionTest {
// Round 1: Build 4 segment
ExecutableState state;
- state = buildCuboid(cubeName, new SegmentRange.TSRange(dateToLong("2010-01-01"), dateToLong("2012-06-01")));
- Assert.assertEquals(ExecutableState.SUCCEED, state);
-
- state = buildCuboid(cubeName, new SegmentRange.TSRange(dateToLong("2012-06-01"), dateToLong("2013-01-01")));
- Assert.assertEquals(ExecutableState.SUCCEED, state);
-
- state = buildCuboid(cubeName, new SegmentRange.TSRange(dateToLong("2013-01-01"), dateToLong("2013-06-01")));
- Assert.assertEquals(ExecutableState.SUCCEED, state);
-
- state = buildCuboid(cubeName, new SegmentRange.TSRange(dateToLong("2013-06-01"), dateToLong("2015-01-01")));
- Assert.assertEquals(ExecutableState.SUCCEED, state);
+ buildSegments(cubeName, new SegmentRange.TSRange(dateToLong("2010-01-01"), dateToLong("2012-06-01")),
+ new SegmentRange.TSRange(dateToLong("2012-06-01"), dateToLong("2013-01-01")),
+ new SegmentRange.TSRange(dateToLong("2013-01-01"), dateToLong("2013-06-01")),
+ new SegmentRange.TSRange(dateToLong("2013-06-01"), dateToLong("2015-01-01")));
// Round 2: Merge two segments
state = mergeSegments(cubeName, dateToLong("2010-01-01"), dateToLong("2013-01-01"), false);
@@ -316,6 +318,19 @@ public class NBuildAndQueryTest extends LocalWithSparkSessionTest {
secondSegment.getSegRange());
}
+ public void buildSegments(String cubeName, SegmentRange.TSRange ... toBuildRanges) throws Exception{
+ Assert.assertTrue(config.getHdfsWorkingDirectory().startsWith("file:"));
+
+ // cleanup all segments first
+ cleanupSegments(cubeName);
+
+ ExecutableState state;
+ for (SegmentRange.TSRange toBuildRange : toBuildRanges) {
+ state = buildCuboid(cubeName, toBuildRange);
+ Assert.assertEquals(ExecutableState.SUCCEED, state);
+ }
+ }
+
class QueryCallable implements Callable<Pair<String, Throwable>> {
private NExecAndComp.CompareLevel compareLevel;
diff --git a/kylin-spark-project/kylin-spark-test/src/test/java/org/apache/kylin/engine/spark2/NExecAndComp.java b/kylin-spark-project/kylin-spark-test/src/test/java/org/apache/kylin/engine/spark2/NExecAndComp.java
index b570965..5a742a1 100644
--- a/kylin-spark-project/kylin-spark-test/src/test/java/org/apache/kylin/engine/spark2/NExecAndComp.java
+++ b/kylin-spark-project/kylin-spark-test/src/test/java/org/apache/kylin/engine/spark2/NExecAndComp.java
@@ -395,7 +395,10 @@ public class NExecAndComp {
.replaceAll("`TDVT`\\.", "") //
.replaceAll("\"POPHEALTH_ANALYTICS\"\\.", "") //
.replaceAll("`POPHEALTH_ANALYTICS`\\.", "") //
- .replaceAll("(?i)ISSUES\\.", "");
+ .replaceAll("(?i)ISSUES\\.", "")
+ .replaceAll("SSB\\.", "")
+ .replaceAll("\"SSB\"\\.", "")
+ .replaceAll("`SSB`\\.", "");
}
public static List<Pair<String, String>> fetchQueries(String folder) throws IOException {