You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by xx...@apache.org on 2021/03/05 06:55:11 UTC

[kylin] 02/02: add test case for interger type partition pruner

This is an automated email from the ASF dual-hosted git repository.

xxyu pushed a commit to branch kylin-on-parquet-v2
in repository https://gitbox.apache.org/repos/asf/kylin.git

commit de95ab977e700178efd70705ead4edaca272d41f
Author: zhengshengjun <sh...@sina.com>
AuthorDate: Mon Mar 1 11:43:52 2021 +0800

    add test case for interger type partition pruner
---
 examples/test_case_data/parquet_test/cube/ssb.json |  17 +++
 .../test_case_data/parquet_test/cube_desc/ssb.json | 115 +++++++++++++++++++++
 .../parquet_test/data/SSB.P_LINEORDER.csv          |  18 ++++
 .../parquet_test/model_desc/ssb.json               |  25 +++++
 .../parquet_test/project/default.json              |   2 +-
 .../parquet_test/table/SSB.P_LINEORDER.json        |  82 +++++++++++++++
 .../resources/query/sql_prune_segment/query00.sql  |  20 ++++
 .../resources/query/sql_prune_segment/query01.sql  |  20 ++++
 .../kylin/engine/spark2/NBuildAndQueryTest.java    |  37 +++++--
 .../apache/kylin/engine/spark2/NExecAndComp.java   |   5 +-
 10 files changed, 328 insertions(+), 13 deletions(-)

diff --git a/examples/test_case_data/parquet_test/cube/ssb.json b/examples/test_case_data/parquet_test/cube/ssb.json
new file mode 100644
index 0000000..8e4f825
--- /dev/null
+++ b/examples/test_case_data/parquet_test/cube/ssb.json
@@ -0,0 +1,17 @@
+{
+  "uuid" : "70a9f288-3c01-4745-a04b-5641e82d6c69",
+  "last_modified" : 1594722761733,
+  "version" : "1.5.3",
+  "name" : "ssb",
+  "owner" : "ADMIN",
+  "descriptor" : "ssb",
+  "display_name" : "ssb",
+  "cost" : 50,
+  "status" : "DISABLED",
+  "segments" : [ ],
+  "create_time_utc" : 1457444500888,
+  "cuboid_bytes" : null,
+  "cuboid_bytes_recommend" : null,
+  "cuboid_last_optimized" : 0,
+  "snapshots" : { }
+}
\ No newline at end of file
diff --git a/examples/test_case_data/parquet_test/cube_desc/ssb.json b/examples/test_case_data/parquet_test/cube_desc/ssb.json
new file mode 100644
index 0000000..62ca74a
--- /dev/null
+++ b/examples/test_case_data/parquet_test/cube_desc/ssb.json
@@ -0,0 +1,115 @@
+{
+  "uuid" : "5c44df30-daec-486e-af90-927bf7851057",
+  "last_modified" : 1491925122527,
+  "version" : "1.5.3",
+  "name" : "ssb",
+  "model_name" : "ssb",
+  "description" : "",
+  "null_string" : null,
+  "dimensions" : [ {
+    "name" : "LO_QUANTITY",
+    "table" : "P_LINEORDER",
+    "column" : "LO_QUANTITY",
+    "derived" : null
+  }, {
+    "name" : "LO_DISCOUNT",
+    "table" : "P_LINEORDER",
+    "column" : "LO_DISCOUNT",
+    "derived" : null
+  }, {
+    "name" : "LO_ORDERDATE",
+    "table" : "P_LINEORDER",
+    "column" : "LO_ORDERDATE",
+    "derived" : null
+  } ],
+  "measures" : [ {
+    "name" : "_COUNT_",
+    "function" : {
+      "expression" : "COUNT",
+      "parameter" : {
+        "type" : "constant",
+        "value" : "1"
+      },
+      "returntype" : "bigint"
+    }
+  }, {
+    "name" : "P_LINEORDER.V_REVENUE_SUM",
+    "function" : {
+      "expression" : "SUM",
+      "parameter" : {
+        "type" : "column",
+        "value" : "P_LINEORDER.V_REVENUE"
+      },
+      "returntype" : "bigint"
+    }
+  }, {
+    "name" : "P_LINEORDER.LO_SUPPLYCOST_SUM",
+    "function" : {
+      "expression" : "SUM",
+      "parameter" : {
+        "type" : "column",
+        "value" : "P_LINEORDER.LO_SUPPLYCOST"
+      },
+      "returntype" : "bigint"
+    }
+  }, {
+    "name" : "P_LINEORDER.LO_REVENUE_SUM",
+    "function" : {
+      "expression" : "SUM",
+      "parameter" : {
+        "type" : "column",
+        "value" : "P_LINEORDER.LO_REVENUE"
+      },
+      "returntype" : "bigint"
+    }
+  } ],
+  "rowkey" : {
+    "rowkey_columns" : [ {
+      "column" : "P_LINEORDER.LO_ORDERDATE",
+      "encoding" : "integer:8",
+      "isShardBy" : false,
+      "index" : "eq"
+    }, {
+      "column" : "P_LINEORDER.LO_QUANTITY",
+      "encoding" : "integer:8",
+      "isShardBy" : false,
+      "index" : "eq"
+    }, {
+      "column" : "P_LINEORDER.LO_DISCOUNT",
+      "encoding" : "integer:8",
+      "isShardBy" : false,
+      "index" : "eq"
+    } ]
+  },
+  "hbase_mapping" : {
+    "column_family" : [ {
+      "name" : "F1",
+      "columns" : [ {
+        "qualifier" : "M",
+        "measure_refs" : [ "_COUNT_", "P_LINEORDER.V_REVENUE_SUM", "P_LINEORDER.LO_SUPPLYCOST_SUM", "P_LINEORDER.LO_REVENUE_SUM" ]
+      } ]
+    } ]
+  },
+  "aggregation_groups" : [ {
+    "includes" : [ "P_LINEORDER.LO_QUANTITY", "P_LINEORDER.LO_DISCOUNT" ],
+    "select_rule" : {
+      "hierarchy_dims" : [ ],
+      "mandatory_dims" : [ ],
+      "joint_dims" : [
+        [ "P_LINEORDER.LO_QUANTITY", "P_LINEORDER.LO_DISCOUNT" ]
+      ]
+    }
+  } ],
+  "notify_list" : [ ],
+  "status_need_notify" : [ ],
+  "partition_date_start" : 694224000000,
+  "partition_date_end" : 3153600000000,
+  "auto_merge_time_ranges" : [ ],
+  "retention_range" : 0,
+  "engine_type" : 6,
+  "storage_type" : 4,
+  "override_kylin_properties" : {
+    "kylin.cube.aggrgroup.is-mandatory-only-valid" : "true",
+    "kylin.storage.hbase.min-region-count" : "4"
+  }
+}
diff --git a/examples/test_case_data/parquet_test/data/SSB.P_LINEORDER.csv b/examples/test_case_data/parquet_test/data/SSB.P_LINEORDER.csv
new file mode 100644
index 0000000..24ada80
--- /dev/null
+++ b/examples/test_case_data/parquet_test/data/SSB.P_LINEORDER.csv
@@ -0,0 +1,18 @@
+4581,1,16,165,1,19920904,4-NOT SPECI,0,37,3941092,8959211,1,3901681,63909,4,19921105,MAIL,3941092
+4581,2,16,50,2,19920904,4-NOT SPECI,0,7,665035,8959211,1,658384,57003,2,19921020,MAIL,665035
+4581,3,16,21,2,19920904,4-NOT SPECI,0,46,4236692,8959211,4,4067224,55261,4,19921127,REG AIR,16946768
+2560,1,28,169,1,19920905,1-URGENT,0,41,4383556,15342679,7,4076707,64149,1,19921111,SHIP,30684892
+2560,2,28,4,2,19920905,1-URGENT,0,27,2440800,15342679,0,2440800,54240,1,19921116,MAIL,0
+2560,3,28,46,2,19920905,1-URGENT,0,31,2932724,15342679,1,2903396,56762,5,19921014,AIR,2932724
+2560,4,28,72,2,19920905,1-URGENT,0,36,3499452,15342679,1,3464457,58324,2,19921030,MAIL,3499452
+2560,5,28,42,2,19920905,1-URGENT,0,9,847836,15342679,4,813922,56522,2,19921029,REG AIR,3391344
+2560,6,28,108,1,19920905,1-URGENT,0,13,1310530,15342679,3,1271214,60486,6,19921021,FOB,3931590
+2147,1,20,29,2,19920906,4-NOT SPECI,0,50,4645100,9151379,4,4459296,55741,6,19921130,RAIL,18580400
+2147,2,20,101,1,19920906,4-NOT SPECI,0,4,400440,9151379,1,396435,60066,4,19921115,AIR,400440
+2147,3,20,44,2,19920906,4-NOT SPECI,0,34,3209736,9151379,10,2888762,56642,4,19921108,REG AIR,32097360
+2147,4,20,11,2,19920906,4-NOT SPECI,0,11,1002111,9151379,6,941984,54660,7,19921116,AIR,6012666
+1991,1,4,110,1,19920907,4-NOT SPECI,0,39,3939429,13985441,6,3703063,60606,2,19921129,TRUCK,23636574
+1991,2,4,53,1,19920907,4-NOT SPECI,0,49,4669945,13985441,8,4296349,57183,6,19921129,SHIP,37359560
+1991,3,4,174,1,19920907,4-NOT SPECI,0,6,644502,13985441,2,631611,64450,1,19921008,REG AIR,1289004
+1991,4,4,138,2,19920907,4-NOT SPECI,0,6,622878,13985441,10,560590,62287,6,19921103,RAIL,6228780
+1991,5,4,60,1,19920907,4-NOT SPECI,0,49,4704294,13985441,6,4422036,57603,0,19921130,AIR,28225764
\ No newline at end of file
diff --git a/examples/test_case_data/parquet_test/model_desc/ssb.json b/examples/test_case_data/parquet_test/model_desc/ssb.json
new file mode 100644
index 0000000..f896765
--- /dev/null
+++ b/examples/test_case_data/parquet_test/model_desc/ssb.json
@@ -0,0 +1,25 @@
+{
+  "uuid" : "cd92588f-b987-4a12-b90f-e32c44345c64",
+  "version" : "1.5.3",
+  "name" : "ssb",
+  "description" : "",
+  "lookups" : [ ],
+  "dimensions" : [ {
+    "table" : "SSB.P_LINEORDER",
+    "columns" : [ "LO_ORDERDATE" ]
+  } ],
+  "metrics" : [ "LO_REVENUE", "LO_SUPPLYCOST", "V_REVENUE" ],
+  "capacity" : "MEDIUM",
+  "last_modified" : 1464441928669,
+  "fact_table" : "SSB.P_LINEORDER",
+  "filter_condition" : "",
+  "partition_desc" : {
+    "partition_date_column" : "SSB.P_LINEORDER.LO_ORDERDATE",
+    "partition_time_column" : null,
+    "partition_date_start" : 0,
+    "partition_date_format" : "yyyyMMdd",
+    "partition_time_format" : "HH:mm:ss",
+    "partition_type" : "APPEND",
+    "partition_condition_builder" : "org.apache.kylin.metadata.model.PartitionDesc$DefaultPartitionConditionBuilder"
+  }
+}
\ No newline at end of file
diff --git a/examples/test_case_data/parquet_test/project/default.json b/examples/test_case_data/parquet_test/project/default.json
index 7db3136..3af3911 100644
--- a/examples/test_case_data/parquet_test/project/default.json
+++ b/examples/test_case_data/parquet_test/project/default.json
@@ -3,7 +3,7 @@
   "last_modified" : 1585736623334,
   "version" : "3.0.0.20500",
   "name" : "default",
-  "tables" : [ "DEFAULT.TEST_COUNTRY", "EDW.TEST_SITES", "SSB.SUPPLIER", "SSB.CUSTOMER", "SSB.PART", "DEFAULT.TEST_KYLIN_FACT", "DEFAULT.TEST_CATEGORY_GROUPINGS", "DEFAULT.TEST_ORDER", "SSB.DATES", "EDW.TEST_SELLER_TYPE_DIM", "SSB.V_LINEORDER", "EDW.TEST_CAL_DT", "DEFAULT.TEST_ACCOUNT" ],
+  "tables" : [ "DEFAULT.TEST_COUNTRY", "EDW.TEST_SITES", "SSB.SUPPLIER", "SSB.CUSTOMER", "SSB.PART", "DEFAULT.TEST_KYLIN_FACT", "DEFAULT.TEST_CATEGORY_GROUPINGS", "DEFAULT.TEST_ORDER", "SSB.DATES", "EDW.TEST_SELLER_TYPE_DIM", "SSB.V_LINEORDER", "EDW.TEST_CAL_DT", "DEFAULT.TEST_ACCOUNT", "SSB.P_LINEORDER" ],
   "owner" : null,
   "status" : null,
   "create_time_utc" : 0,
diff --git a/examples/test_case_data/parquet_test/table/SSB.P_LINEORDER.json b/examples/test_case_data/parquet_test/table/SSB.P_LINEORDER.json
new file mode 100644
index 0000000..03d91fa
--- /dev/null
+++ b/examples/test_case_data/parquet_test/table/SSB.P_LINEORDER.json
@@ -0,0 +1,82 @@
+{
+  "uuid" : "b017d54b-e7b7-465b-a4db-b47f68baf1ad",
+  "version" : "2.1",
+  "name" : "P_LINEORDER",
+  "columns" : [ {
+    "id" : "1",
+    "name" : "LO_ORDERKEY",
+    "datatype" : "bigint"
+  }, {
+    "id" : "2",
+    "name" : "LO_LINENUMBER",
+    "datatype" : "bigint"
+  }, {
+    "id" : "3",
+    "name" : "LO_CUSTKEY",
+    "datatype" : "integer"
+  }, {
+    "id" : "4",
+    "name" : "LO_PARTKEY",
+    "datatype" : "integer"
+  }, {
+    "id" : "5",
+    "name" : "LO_SUPPKEY",
+    "datatype" : "integer"
+  }, {
+    "id" : "6",
+    "name" : "LO_ORDERDATE",
+    "datatype" : "integer"
+  }, {
+    "id" : "7",
+    "name" : "LO_ORDERPRIOTITY",
+    "datatype" : "varchar(15)"
+  }, {
+    "id" : "8",
+    "name" : "LO_SHIPPRIOTITY",
+    "datatype" : "integer"
+  }, {
+    "id" : "9",
+    "name" : "LO_QUANTITY",
+    "datatype" : "bigint"
+  }, {
+    "id" : "10",
+    "name" : "LO_EXTENDEDPRICE",
+    "datatype" : "bigint"
+  }, {
+    "id" : "11",
+    "name" : "LO_ORDTOTALPRICE",
+    "datatype" : "bigint"
+  }, {
+    "id" : "12",
+    "name" : "LO_DISCOUNT",
+    "datatype" : "bigint"
+  }, {
+    "id" : "13",
+    "name" : "LO_REVENUE",
+    "datatype" : "bigint"
+  }, {
+    "id" : "14",
+    "name" : "LO_SUPPLYCOST",
+    "datatype" : "bigint"
+  }, {
+    "id" : "15",
+    "name" : "LO_TAX",
+    "datatype" : "bigint"
+  }, {
+    "id" : "16",
+    "name" : "LO_COMMITDATE",
+    "datatype" : "integer"
+  }, {
+    "id" : "17",
+    "name" : "LO_SHIPMODE",
+    "datatype" : "varchar(10)"
+  }, {
+    "id" : "18",
+    "name" : "V_REVENUE",
+    "datatype" : "bigint"
+  } ],
+  "database" : "SSB",
+  "last_modified" : 1457444145578,
+  "source_type" : 9,
+  "table_type" : "VIRTUAL_VIEW"
+}
diff --git a/kylin-it/src/test/resources/query/sql_prune_segment/query00.sql b/kylin-it/src/test/resources/query/sql_prune_segment/query00.sql
new file mode 100644
index 0000000..aaf8bdb
--- /dev/null
+++ b/kylin-it/src/test/resources/query/sql_prune_segment/query00.sql
@@ -0,0 +1,20 @@
+--
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements.  See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership.  The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License.  You may obtain a copy of the License at
+--
+--     http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+
+select sum(LO_REVENUE) from SSB.P_LINEORDER where LO_ORDERDATE = 19920906
+;{"scanRowCount":4,"scanBytes":0,"scanFiles":1,"cuboidId":[7]}
\ No newline at end of file
diff --git a/kylin-it/src/test/resources/query/sql_prune_segment/query01.sql b/kylin-it/src/test/resources/query/sql_prune_segment/query01.sql
new file mode 100644
index 0000000..c93432a
--- /dev/null
+++ b/kylin-it/src/test/resources/query/sql_prune_segment/query01.sql
@@ -0,0 +1,20 @@
+--
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements.  See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership.  The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License.  You may obtain a copy of the License at
+--
+--     http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+
+select sum(LO_REVENUE) from SSB.P_LINEORDER where LO_ORDERDATE = '19920906'
+;{"scanRowCount":4,"scanBytes":0,"scanFiles":1,"cuboidId":[7]}
\ No newline at end of file
diff --git a/kylin-spark-project/kylin-spark-test/src/test/java/org/apache/kylin/engine/spark2/NBuildAndQueryTest.java b/kylin-spark-project/kylin-spark-test/src/test/java/org/apache/kylin/engine/spark2/NBuildAndQueryTest.java
index dc84886..58570c9 100644
--- a/kylin-spark-project/kylin-spark-test/src/test/java/org/apache/kylin/engine/spark2/NBuildAndQueryTest.java
+++ b/kylin-spark-project/kylin-spark-test/src/test/java/org/apache/kylin/engine/spark2/NBuildAndQueryTest.java
@@ -200,7 +200,9 @@ public class NBuildAndQueryTest extends LocalWithSparkSessionTest {
             tasks.add(new QueryCallable(CompareLevel.SAME, joinType, "sql_unionall"));
             tasks.add(new QueryCallable(CompareLevel.SAME, joinType, "sql_values"));
             tasks.add(new QueryCallable(CompareLevel.SAME, joinType, "sql_window"));
+
             tasks.add(new QueryCallable(CompareLevel.SAME, joinType, "sql_limit"));
+            tasks.add(new QueryCallable(CompareLevel.SAME, joinType, "sql_prune_segment"));
         }
         logger.info("Total {} tasks.", tasks.size());
         return tasks;
@@ -213,6 +215,10 @@ public class NBuildAndQueryTest extends LocalWithSparkSessionTest {
         } else if (Boolean.parseBoolean(System.getProperty("isDeveloperMode", "false"))) {
             //fullBuildCube("ci_inner_join_cube");
             fullBuildCube("ci_left_join_cube");
+            buildSegments("ssb", new SegmentRange.TSRange(dateToLong("1992-09-04"), dateToLong("1992-09-05")),
+                    new SegmentRange.TSRange(dateToLong("1992-09-05"), dateToLong("1992-09-06")),
+                    new SegmentRange.TSRange(dateToLong("1992-09-06"), dateToLong("1992-09-07")),
+                    new SegmentRange.TSRange(dateToLong("1992-09-07"), dateToLong("1992-09-08")));
         } else {
             //buildAndMergeCube("ci_inner_join_cube");
             buildAndMergeCube("ci_left_join_cube");
@@ -223,6 +229,9 @@ public class NBuildAndQueryTest extends LocalWithSparkSessionTest {
         if (cubeName.equals("ci_inner_join_cube")) {
             buildFourSegmentAndMerge(cubeName);
         }
+        if (cubeName.equals("ssb")) {
+            buildSegments(cubeName, new SegmentRange.TSRange(dateToLong("1992-0-01"), dateToLong("2015-01-01")));
+        }
         if (cubeName.equals("ci_left_join_cube")) {
             buildTwoSegmentAndMerge(cubeName);
         }
@@ -287,17 +296,10 @@ public class NBuildAndQueryTest extends LocalWithSparkSessionTest {
 
         // Round 1: Build 4 segment
         ExecutableState state;
-        state = buildCuboid(cubeName, new SegmentRange.TSRange(dateToLong("2010-01-01"), dateToLong("2012-06-01")));
-        Assert.assertEquals(ExecutableState.SUCCEED, state);
-
-        state = buildCuboid(cubeName, new SegmentRange.TSRange(dateToLong("2012-06-01"), dateToLong("2013-01-01")));
-        Assert.assertEquals(ExecutableState.SUCCEED, state);
-
-        state = buildCuboid(cubeName, new SegmentRange.TSRange(dateToLong("2013-01-01"), dateToLong("2013-06-01")));
-        Assert.assertEquals(ExecutableState.SUCCEED, state);
-
-        state = buildCuboid(cubeName, new SegmentRange.TSRange(dateToLong("2013-06-01"), dateToLong("2015-01-01")));
-        Assert.assertEquals(ExecutableState.SUCCEED, state);
+        buildSegments(cubeName, new SegmentRange.TSRange(dateToLong("2010-01-01"), dateToLong("2012-06-01")),
+                new SegmentRange.TSRange(dateToLong("2012-06-01"), dateToLong("2013-01-01")),
+                new SegmentRange.TSRange(dateToLong("2013-01-01"), dateToLong("2013-06-01")),
+                new SegmentRange.TSRange(dateToLong("2013-06-01"), dateToLong("2015-01-01")));
 
         // Round 2: Merge two segments
         state = mergeSegments(cubeName, dateToLong("2010-01-01"), dateToLong("2013-01-01"), false);
@@ -316,6 +318,19 @@ public class NBuildAndQueryTest extends LocalWithSparkSessionTest {
                 secondSegment.getSegRange());
     }
 
+    public void buildSegments(String cubeName, SegmentRange.TSRange ... toBuildRanges) throws Exception{
+        Assert.assertTrue(config.getHdfsWorkingDirectory().startsWith("file:"));
+
+        // cleanup all segments first
+        cleanupSegments(cubeName);
+
+        ExecutableState state;
+        for (SegmentRange.TSRange toBuildRange : toBuildRanges) {
+            state = buildCuboid(cubeName, toBuildRange);
+            Assert.assertEquals(ExecutableState.SUCCEED, state);
+        }
+    }
+
     class QueryCallable implements Callable<Pair<String, Throwable>> {
 
         private NExecAndComp.CompareLevel compareLevel;
diff --git a/kylin-spark-project/kylin-spark-test/src/test/java/org/apache/kylin/engine/spark2/NExecAndComp.java b/kylin-spark-project/kylin-spark-test/src/test/java/org/apache/kylin/engine/spark2/NExecAndComp.java
index b570965..5a742a1 100644
--- a/kylin-spark-project/kylin-spark-test/src/test/java/org/apache/kylin/engine/spark2/NExecAndComp.java
+++ b/kylin-spark-project/kylin-spark-test/src/test/java/org/apache/kylin/engine/spark2/NExecAndComp.java
@@ -395,7 +395,10 @@ public class NExecAndComp {
                 .replaceAll("`TDVT`\\.", "") //
                 .replaceAll("\"POPHEALTH_ANALYTICS\"\\.", "") //
                 .replaceAll("`POPHEALTH_ANALYTICS`\\.", "") //
-                .replaceAll("(?i)ISSUES\\.", "");
+                .replaceAll("(?i)ISSUES\\.", "")
+                .replaceAll("SSB\\.", "")
+                .replaceAll("\"SSB\"\\.", "")
+                .replaceAll("`SSB`\\.", "");
     }
 
     public static List<Pair<String, String>> fetchQueries(String folder) throws IOException {