You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@kylin.apache.org by GitBox <gi...@apache.org> on 2021/04/24 11:24:09 UTC

[GitHub] [kylin] zhengshengjun commented on a change in pull request #1642: KYLIN-4980 Support prunning segments from complex filter co…

zhengshengjun commented on a change in pull request #1642:
URL: https://github.com/apache/kylin/pull/1642#discussion_r619646558



##########
File path: kylin-spark-project/kylin-spark-common/src/main/scala/org/apache/spark/sql/execution/datasource/FilePruner.scala
##########
@@ -295,8 +295,48 @@ class FilePruner(cubeInstance: CubeInstance,
     }
   }
 
-  private def getSpecFilter(dataFilters: Seq[Expression], col: Attribute): Seq[Expression] = {
-    dataFilters.filter(_.references.subsetOf(AttributeSet(col)))
+  private def getSegmentFilter(dataFilters: Seq[Expression], col: Attribute): Seq[Expression] = {
+    dataFilters.map(extractSegmentFilter(_, col)).filter(!_.equals(None)).map(_.get)
+  }
+
+  private def extractSegmentFilter(filter: Expression, col: Attribute): Option[Expression] = {
+    filter match {
+      case expressions.Or(left, right) =>
+        val leftChild = extractSegmentFilter(left, col)
+        val rightChild = extractSegmentFilter(right, col)
+
+        //if there exists leaf-node that doesn't contain partition column, the parent filter is
+        //unnecessary for segment prunning.
+        //e.g. "where a = xxx or partition = xxx", we can't filter any segment
+        if (leftChild.eq(None) || rightChild.eq(None)) {
+          None
+        } else {
+          Some(expressions.Or(leftChild.get, rightChild.get))
+        }
+      case expressions.And(left, right) =>
+        val leftChild = extractSegmentFilter(left, col)
+        val rightChild = extractSegmentFilter(right, col)
+
+        //if there is only one leaf-node that contains partition column
+        //e.g. "where a = xxx and partition = xxx",
+        //then we can filter segment using "where partition = xxx"
+        if (!leftChild.eq(None) && !rightChild.eq(None)) {
+          Some(expressions.And(leftChild.get, rightChild.get))
+        } else if (!rightChild.eq(None)) {
+          rightChild
+        } else if (!leftChild.eq(None)) {
+          leftChild
+        } else {
+          None
+        }
+      case _ =>
+        //other unary filter like EqualTo, GreaterThan, GreaterThanOrEqual, etc.
+        if (filter.references.contains(col)) {

Review comment:
       good suggestion ~




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org