You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2021/03/04 14:31:40 UTC

[incubator-doris] branch master updated: [Rewrite]Rewrite from_unixtime to reduce calling this function (#5444)

This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git


The following commit(s) were added to refs/heads/master by this push:
     new c9ddd88  [Rewrite]Rewrite from_unixtime to reduce calling this function (#5444)
c9ddd88 is described below

commit c9ddd88e14f814e745ec0a3fc8f82450dd29256d
Author: xinghuayu007 <14...@qq.com>
AuthorDate: Thu Mar 4 22:31:28 2021 +0800

    [Rewrite]Rewrite from_unixtime to reduce calling this function (#5444)
    
    from_unxitime is a cpu-exhausted function.
    SQL: select filed from table where from_unixtime(field) > '2021-03-02',
    if there are one million rows of data. Function from_unixtime will be called one million times,
    which will make query very slow.
    
    In issue #5443, we try to rewrite from_unixtime into timestamp to reduce calling this function.
    This rewriting can bring 2 times query performance improvement.
---
 .../java/org/apache/doris/analysis/Analyzer.java   |   2 +
 .../doris/rewrite/RewriteFromUnixTimeRule.java     | 108 +++++++++++++++++++++
 .../org/apache/doris/planner/QueryPlanTest.java    |  40 ++++++++
 3 files changed, 150 insertions(+)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/Analyzer.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/Analyzer.java
index 0956a6e..dabf2f5 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/Analyzer.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/Analyzer.java
@@ -39,6 +39,7 @@ import org.apache.doris.rewrite.BetweenToCompoundRule;
 import org.apache.doris.rewrite.ExprRewriteRule;
 import org.apache.doris.rewrite.ExprRewriter;
 import org.apache.doris.rewrite.FoldConstantsRule;
+import org.apache.doris.rewrite.RewriteFromUnixTimeRule;
 import org.apache.doris.rewrite.NormalizeBinaryPredicatesRule;
 import org.apache.doris.rewrite.mvrewrite.CountDistinctToBitmap;
 import org.apache.doris.rewrite.mvrewrite.CountDistinctToBitmapOrHLLRule;
@@ -255,6 +256,7 @@ public class Analyzer {
             // pushdown and Parquet row group pruning based on min/max statistics.
             rules.add(NormalizeBinaryPredicatesRule.INSTANCE);
             rules.add(FoldConstantsRule.INSTANCE);
+            rules.add(RewriteFromUnixTimeRule.INSTANCE);
             exprRewriter_ = new ExprRewriter(rules);
             // init mv rewriter
             List<ExprRewriteRule> mvRewriteRules = Lists.newArrayList();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/rewrite/RewriteFromUnixTimeRule.java b/fe/fe-core/src/main/java/org/apache/doris/rewrite/RewriteFromUnixTimeRule.java
new file mode 100644
index 0000000..a6d6d70
--- /dev/null
+++ b/fe/fe-core/src/main/java/org/apache/doris/rewrite/RewriteFromUnixTimeRule.java
@@ -0,0 +1,108 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.rewrite;
+
+import org.apache.doris.analysis.Analyzer;
+import org.apache.doris.analysis.BinaryPredicate;
+import org.apache.doris.analysis.BoolLiteral;
+import org.apache.doris.analysis.CompoundPredicate;
+import org.apache.doris.analysis.Expr;
+import org.apache.doris.analysis.FunctionCallExpr;
+import org.apache.doris.analysis.FunctionParams;
+import org.apache.doris.analysis.LiteralExpr;
+import org.apache.doris.analysis.SlotRef;
+import org.apache.doris.catalog.Type;
+import org.apache.doris.common.AnalysisException;
+
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+
+/*
+ * rewrite sql: select * from table where from_unixtime(query_time) > '2021-03-02 12:12:23'
+ * into: select * from table where query_time > 1614658343
+ * query_time is integer type
+ * 1614658343 is the timestamp of 2021-03-02 12:12:23
+ * this rewrite can improve the query performance, because from_unixtime is cpu-exhausted
+ * */
+public class RewriteFromUnixTimeRule implements ExprRewriteRule {
+    public static RewriteFromUnixTimeRule INSTANCE = new RewriteFromUnixTimeRule();
+
+    @Override
+    public Expr apply(Expr expr, Analyzer analyzer) throws AnalysisException {
+        if (!(expr instanceof BinaryPredicate)) {
+            return expr;
+        }
+        BinaryPredicate bp = (BinaryPredicate) expr;
+        Expr left = bp.getChild(0);
+        if (!(left instanceof FunctionCallExpr)) {
+            return expr;
+        }
+        FunctionCallExpr fce = (FunctionCallExpr) left;
+        if (!fce.getFnName().getFunction().equalsIgnoreCase("from_unixtime")) {
+            return expr;
+        }
+        FunctionParams params = fce.getParams();
+        if (params == null) {
+            return expr;
+        }
+        // definition: from_unixtime(int, format)
+        if (params.exprs().size() != 1 && params.exprs().size() != 2) {
+            return expr;
+        }
+        Expr paramSlot = params.exprs().get(0);
+        if (!(paramSlot instanceof SlotRef)) {
+            return expr;
+        }
+        SlotRef sr = (SlotRef) paramSlot;
+        if (!sr.getColumn().getType().isIntegerType()) {
+            return new BoolLiteral(false);
+        }
+        Expr right = bp.getChild(1);
+        if (!(right instanceof LiteralExpr)) {
+            return expr;
+        }
+        LiteralExpr le = (LiteralExpr) right;
+        SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd");
+        // default format is "yyyy-MM-dd HH:mm:ss"
+        if (params.exprs().size() == 1) {
+            format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+        } else {
+            LiteralExpr fm = (LiteralExpr) params.exprs().get(1);
+            format = new SimpleDateFormat(fm.getStringValue());
+        }
+        try {
+            Date date = format.parse(le.getStringValue());
+            // it must adds low bound 0, because when a field contains negative data like -100, it will be queried as a result
+            if (bp.getOp() == BinaryPredicate.Operator.LT || bp.getOp() == BinaryPredicate.Operator.LE) {
+                BinaryPredicate r = new BinaryPredicate(bp.getOp(), sr, LiteralExpr.create(String.valueOf(date.getTime() / 1000), Type.BIGINT));
+                BinaryPredicate l = new BinaryPredicate(BinaryPredicate.Operator.GE, sr, LiteralExpr.create("0", Type.BIGINT));
+                return new CompoundPredicate(CompoundPredicate.Operator.AND, r, l);
+            } else if (bp.getOp() == BinaryPredicate.Operator.GT || bp.getOp() == BinaryPredicate.Operator.GE) {
+                // also it must adds upper bound 253402271999, because from_unixtime support time range is [1970-01-01 00:00:00 ~ 9999-12-31 23:59:59]
+                BinaryPredicate l = new BinaryPredicate(bp.getOp(), sr, LiteralExpr.create(String.valueOf(date.getTime() / 1000), Type.BIGINT));
+                BinaryPredicate r = new BinaryPredicate(BinaryPredicate.Operator.LE, sr, LiteralExpr.create("253402271999", Type.BIGINT));
+                return new CompoundPredicate(CompoundPredicate.Operator.AND, r, l);
+            } else {
+                return new BinaryPredicate(bp.getOp(), sr, LiteralExpr.create(String.valueOf(date.getTime() / 1000), Type.BIGINT));
+            }
+        } catch (ParseException e) {
+            return expr;
+        }
+    }
+}
diff --git a/fe/fe-core/src/test/java/org/apache/doris/planner/QueryPlanTest.java b/fe/fe-core/src/test/java/org/apache/doris/planner/QueryPlanTest.java
index e1bc9d3..bf4288c 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/planner/QueryPlanTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/planner/QueryPlanTest.java
@@ -1420,6 +1420,46 @@ public class QueryPlanTest {
         explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
         Assert.assertTrue(explainString.contains("OUTPUT EXPRS:3 | 4"));
     }
+
+    @Test
+    public void testFromUnixTimeRewrite() throws Exception {
+        connectContext.setDatabase("default_cluster:test");
+        //default format
+        String sql = "select * from test1 where from_unixtime(query_time) > '2021-03-02 10:01:28'";
+        String explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
+        Assert.assertTrue(explainString.contains("PREDICATES: `query_time` <= 253402271999, `query_time` > 1614650488"));
+        //format yyyy-MM-dd HH:mm:ss
+        sql = "select * from test1 where from_unixtime(query_time, 'yyyy-MM-dd HH:mm:ss') > '2021-03-02 10:01:28'";
+        explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
+        Assert.assertTrue(explainString.contains("PREDICATES: `query_time` <= 253402271999, `query_time` > 1614650488"));
+        //format yyyy-MM-dd HH:mm
+        sql = "select * from test1 where from_unixtime(query_time, 'yyyy-MM-dd HH:mm') > '2021-03-02 10:01:28'";
+        explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
+        Assert.assertTrue(explainString.contains("PREDICATES: `query_time` <= 253402271999, `query_time` > 1614650460"));
+        //format yyyy-MM-dd HH
+        sql = "select * from test1 where from_unixtime(query_time, 'yyyy-MM-dd HH') > '2021-03-02 10:01:28'";
+        explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
+        Assert.assertTrue(explainString.contains("PREDICATES: `query_time` <= 253402271999, `query_time` > 1614650400"));
+        //format yyyy-MM-dd
+        sql = "select * from test1 where from_unixtime(query_time, 'yyyy-MM-dd') > '2021-03-02 10:01:28'";
+        explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
+        Assert.assertTrue(explainString.contains("PREDICATES: `query_time` <= 253402271999, `query_time` > 1614614400"));
+        //format yyyy-MM
+        sql = "select * from test1 where from_unixtime(query_time, 'yyyy-MM') > '2021-03-02 10:01:28'";
+        explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
+        Assert.assertTrue(explainString.contains("PREDICATES: `query_time` <= 253402271999, `query_time` > 1614528000"));
+        //format yyyy
+        sql = "select * from test1 where from_unixtime(query_time, 'yyyy') > '2021-03-02 10:01:28'";
+        explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
+        Assert.assertTrue(explainString.contains("PREDICATES: `query_time` <= 253402271999, `query_time` > 1609430400"));
+
+        //format less than
+        sql = "select * from test1 where from_unixtime(query_time, 'yyyy-MM-dd') < '2021-03-02 10:01:28'";
+        explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
+        System.out.println("wangxixu-explain:"+explainString);
+        Assert.assertTrue(explainString.contains("PREDICATES: `query_time` < 1614614400, `query_time` >= 0"));
+        
+    }
 }
 
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org