You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by ya...@apache.org on 2021/11/08 07:18:04 UTC

[kylin] branch main updated: KYLIN-5117 Support percentile function after aggregate sub query (#1760)

This is an automated email from the ASF dual-hosted git repository.

yaqian pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/kylin.git


The following commit(s) were added to refs/heads/main by this push:
     new 12aeea9  KYLIN-5117 Support percentile function after aggregate sub query (#1760)
12aeea9 is described below

commit 12aeea918df9a0065cd1f507a67dbf271de45336
Author: MattHu <hu...@youzan.com>
AuthorDate: Mon Nov 8 15:17:57 2021 +0800

    KYLIN-5117 Support percentile function after aggregate sub query (#1760)
    
    * Support percentile function after aggregate sub query
    
    * add test
---
 .../test/resources/query/sql_percentile/query03.sql  | 20 ++++++++++++++++++++
 .../org/apache/kylin/query/SchemaProcessor.scala     |  2 +-
 .../kylin/query/runtime/plans/AggregatePlan.scala    | 16 +++++++++++-----
 3 files changed, 32 insertions(+), 6 deletions(-)

diff --git a/kylin-it/src/test/resources/query/sql_percentile/query03.sql b/kylin-it/src/test/resources/query/sql_percentile/query03.sql
new file mode 100644
index 0000000..2b20008
--- /dev/null
+++ b/kylin-it/src/test/resources/query/sql_percentile/query03.sql
@@ -0,0 +1,20 @@
+--
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements.  See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership.  The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License.  You may obtain a copy of the License at
+--
+--     http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+
+select percentile_approx(t.gmv, 0.5) from ( select seller_id,sum(price) as gmv from test_kylin_fact group by seller_id ) t
+;{"scanRowCount":9928,"scanBytes":0,"scanFiles":1,"cuboidId":[1310735]}
\ No newline at end of file
diff --git a/kylin-spark-project/kylin-spark-query/src/main/scala/org/apache/kylin/query/SchemaProcessor.scala b/kylin-spark-project/kylin-spark-query/src/main/scala/org/apache/kylin/query/SchemaProcessor.scala
index 3aa800d..225c3e0 100644
--- a/kylin-spark-project/kylin-spark-query/src/main/scala/org/apache/kylin/query/SchemaProcessor.scala
+++ b/kylin-spark-project/kylin-spark-query/src/main/scala/org/apache/kylin/query/SchemaProcessor.scala
@@ -194,7 +194,7 @@ case class AggColumnInfo(
   hash: String,
   args: String*) {
   override def toString: String =
-    s"$funcName(${args.mkString("_")})_${index}_$hash"
+    s"${funcName}_${args.mkString("_")}__${index}_$hash"
 }
 
 case class TopNColumnInfo(tableName: String, columnId: Int, columnName: String)
diff --git a/kylin-spark-project/kylin-spark-query/src/main/scala/org/apache/kylin/query/runtime/plans/AggregatePlan.scala b/kylin-spark-project/kylin-spark-query/src/main/scala/org/apache/kylin/query/runtime/plans/AggregatePlan.scala
index ebc1fad..4354b5d 100644
--- a/kylin-spark-project/kylin-spark-query/src/main/scala/org/apache/kylin/query/runtime/plans/AggregatePlan.scala
+++ b/kylin-spark-project/kylin-spark-query/src/main/scala/org/apache/kylin/query/runtime/plans/AggregatePlan.scala
@@ -18,15 +18,14 @@
 package org.apache.kylin.query.runtime.plans
 
 import org.apache.calcite.DataContext
-import org.apache.calcite.rel.core.Aggregate
-import org.apache.calcite.rel.core.AggregateCall
+import org.apache.calcite.rel.core.{Aggregate, AggregateCall}
 import org.apache.calcite.sql.SqlKind
 import org.apache.kylin.common.KylinConfig
 import org.apache.kylin.cube.CubeInstance
-import org.apache.kylin.metadata.model.{FunctionDesc, PartitionDesc, SegmentStatusEnum, TblColRef}
-import org.apache.kylin.query.relnode.{KylinAggregateCall, OLAPAggregateRel}
-import org.apache.kylin.query.runtime.RuntimeHelper
+import org.apache.kylin.metadata.model.{FunctionDesc, PartitionDesc, SegmentStatusEnum}
 import org.apache.kylin.query.SchemaProcessor
+import org.apache.kylin.query.relnode.{KylinAggregateCall, OLAPAggregateRel, OLAPRel}
+import org.apache.kylin.query.runtime.RuntimeHelper
 import org.apache.spark.sql.KylinFunctions._
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.expressions.{CreateArray, In}
@@ -217,6 +216,13 @@ object AggregatePlan extends LogEx {
             first(argNames.head).alias(aggName)
           case FunctionDesc.FUNC_GROUPING =>
             grouping(argNames.head).alias(aggName)
+          case FunctionDesc.FUNC_PERCENTILE => {
+            val col = argNames(0)
+            val inputColumnRowType = rel.getInput.asInstanceOf[OLAPRel].getColumnRowType
+            val percentage = inputColumnRowType.getColumnByIndex(call.getArgList.get(1)).getName
+            expr(s"approx_percentile($col, $percentage)").alias(aggName)
+          }
+
           case _ =>
             throw new IllegalArgumentException(
               s"""Unsupported function name $funcName""")