You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by xx...@apache.org on 2023/04/23 06:00:59 UTC

[kylin] 18/22: KYLIN-5499 fix partition col format mismatch with type

This is an automated email from the ASF dual-hosted git repository.

xxyu pushed a commit to branch kylin5
in repository https://gitbox.apache.org/repos/asf/kylin.git

commit 08dc9f9b35c314e5a97f4b23b3e383d12417e2bb
Author: Jiale He <35...@users.noreply.github.com>
AuthorDate: Fri Feb 3 16:00:30 2023 +0800

    KYLIN-5499 fix partition col format mismatch with type
---
 .../kylin/query/routing/RealizationPrunerTest.java | 86 ++++++++++++++++++++++
 .../kylin/query/routing/RealizationPruner.java     | 64 ++++++++++++++--
 2 files changed, 142 insertions(+), 8 deletions(-)

diff --git a/src/kylin-it/src/test/java/org/apache/kylin/query/routing/RealizationPrunerTest.java b/src/kylin-it/src/test/java/org/apache/kylin/query/routing/RealizationPrunerTest.java
new file mode 100644
index 0000000000..b22b90a244
--- /dev/null
+++ b/src/kylin-it/src/test/java/org/apache/kylin/query/routing/RealizationPrunerTest.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kylin.query.routing;
+
+import org.apache.kylin.metadata.datatype.DataType;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+import org.springframework.test.util.ReflectionTestUtils;
+
+class RealizationPrunerTest {
+
+    @Test
+    void testCheckAndReformatDateType() {
+        long segmentTs = 1675396800000L;
+        {
+            String formattedValue = ReflectionTestUtils.invokeMethod(RealizationPruner.class,
+                    "checkAndReformatDateType", "2023-02-03", segmentTs, new DataType("date", 0, 0));
+            Assertions.assertEquals("2023-02-03", formattedValue);
+        }
+
+        {
+            String formattedValue = ReflectionTestUtils.invokeMethod(RealizationPruner.class,
+                    "checkAndReformatDateType", "2023-02-03 12:00:00", segmentTs, new DataType("date", 0, 0));
+            Assertions.assertEquals("2023-02-03", formattedValue);
+        }
+
+        {
+            String formattedValue = ReflectionTestUtils.invokeMethod(RealizationPruner.class,
+                    "checkAndReformatDateType", "2023-02-03", segmentTs, new DataType("timestamp", 0, 0));
+            Assertions.assertEquals("2023-02-03 12:00:00", formattedValue);
+        }
+
+        {
+            String formattedValue = ReflectionTestUtils.invokeMethod(RealizationPruner.class,
+                    "checkAndReformatDateType", "2023-02-03 12:00:00", segmentTs, new DataType("timestamp", 0, 0));
+            Assertions.assertEquals("2023-02-03 12:00:00", formattedValue);
+        }
+
+        {
+            String formattedValue = ReflectionTestUtils.invokeMethod(RealizationPruner.class,
+                    "checkAndReformatDateType", "2023-02-03 12:00:00", segmentTs, new DataType("varchar", 0, 0));
+            Assertions.assertEquals("2023-02-03 12:00:00", formattedValue);
+        }
+
+        {
+            String formattedValue = ReflectionTestUtils.invokeMethod(RealizationPruner.class,
+                    "checkAndReformatDateType", "2023-02-03 12:00:00", segmentTs, new DataType("string", 0, 0));
+            Assertions.assertEquals("2023-02-03 12:00:00", formattedValue);
+        }
+
+        {
+            String formattedValue = ReflectionTestUtils.invokeMethod(RealizationPruner.class,
+                    "checkAndReformatDateType", "2023-02-03 12:00:00", segmentTs, new DataType("integer", 0, 0));
+            Assertions.assertEquals("2023-02-03 12:00:00", formattedValue);
+        }
+
+        {
+            String formattedValue = ReflectionTestUtils.invokeMethod(RealizationPruner.class,
+                    "checkAndReformatDateType", "2023-02-03 12:00:00", segmentTs, new DataType("bigint", 0, 0));
+            Assertions.assertEquals("2023-02-03 12:00:00", formattedValue);
+        }
+
+        {
+            DataType errorType = new DataType("error_type", 0, 0);
+            Assertions.assertThrows(IllegalArgumentException.class,
+                    () -> ReflectionTestUtils.invokeMethod(RealizationPruner.class, "checkAndReformatDateType",
+                            "2023-02-03 12:00:00", segmentTs, errorType));
+        }
+    }
+}
diff --git a/src/query-common/src/main/java/org/apache/kylin/query/routing/RealizationPruner.java b/src/query-common/src/main/java/org/apache/kylin/query/routing/RealizationPruner.java
index dc54aae001..5c56de2fb3 100644
--- a/src/query-common/src/main/java/org/apache/kylin/query/routing/RealizationPruner.java
+++ b/src/query-common/src/main/java/org/apache/kylin/query/routing/RealizationPruner.java
@@ -18,6 +18,9 @@
 
 package org.apache.kylin.query.routing;
 
+import static org.apache.kylin.common.util.DateFormat.DEFAULT_DATETIME_PATTERN_WITHOUT_MILLISECONDS;
+import static org.apache.kylin.common.util.DateFormat.DEFAULT_DATE_PATTERN;
+
 import java.util.ArrayList;
 import java.util.Calendar;
 import java.util.Collection;
@@ -27,6 +30,7 @@ import java.util.Locale;
 import java.util.Map;
 import java.util.Set;
 import java.util.TimeZone;
+import java.util.regex.Pattern;
 import java.util.stream.Collectors;
 
 import org.apache.calcite.plan.RelOptPredicateList;
@@ -86,6 +90,9 @@ public class RealizationPruner {
     private static final String INTEGER = "integer";
     private static final String BIGINT = "bigint";
     private static final TimeZone UTC_ZONE = TimeZone.getTimeZone("UTC");
+    private static final Pattern DATE_PATTERN = Pattern.compile("[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]");
+    private static final Pattern TIMESTAMP_PATTERN = Pattern.compile(
+            "[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]" + " " + "[0-9][0-9]:[0-9][0-9]:[0-9][0-9](\\.[0-9]*[1-9])?");
     private static final Set<SqlKind> COMPARISON_OP_KIND_SET = ImmutableSet.of(SqlKind.GREATER_THAN,
             SqlKind.GREATER_THAN_OR_EQUAL, //
             SqlKind.LESS_THAN, SqlKind.LESS_THAN_OR_EQUAL, //
@@ -134,12 +141,18 @@ public class RealizationPruner {
 
         val partitionColInputRef = transformColumn2RexInputRef(partitionColumn, olapContext.allTableScans);
         if (allReadySegments.size() > 0 && dateFormat != null) {
-            val firstSegmentRanges = transformSegment2RexCall(allReadySegments.get(0), dateFormat, rexBuilder,
-                    partitionColInputRef, partitionColumn.getType(), dataflow.isStreaming());
-            RelDataTypeFamily segmentLiteralTypeFamily = getSegmentLiteralTypeFamily(firstSegmentRanges.getFirst());
-            filterConditions = filterConditions.stream().map(filterCondition -> rewriteRexCall(filterCondition,
-                    rexBuilder, segmentLiteralTypeFamily, partitionColInputRef, dateFormat))
-                    .collect(Collectors.toList());
+            try {
+                val firstSegmentRanges = transformSegment2RexCall(allReadySegments.get(0), dateFormat, rexBuilder,
+                        partitionColInputRef, partitionColumn.getType(), dataflow.isStreaming());
+                RelDataTypeFamily segmentLiteralTypeFamily = getSegmentLiteralTypeFamily(firstSegmentRanges.getFirst());
+                filterConditions = filterConditions.stream()//
+                        .map(filterCondition -> rewriteRexCall(filterCondition, rexBuilder, segmentLiteralTypeFamily,
+                                partitionColInputRef, dateFormat))
+                        .collect(Collectors.toList());
+            } catch (Exception ex) {
+                log.warn("Segment pruning error: ", ex);
+                return allReadySegments;
+            }
         }
         var simplifiedSqlFilter = rexSimplify.simplifyAnds(filterConditions);
 
@@ -307,8 +320,9 @@ public class RealizationPruner {
             start = DateFormat.formatToDateStr(dataSegment.getKSRange().getStart(), dateFormat);
             end = DateFormat.formatToDateStr(dataSegment.getKSRange().getEnd(), dateFormat);
         } else {
-            start = DateFormat.formatToDateStr(dataSegment.getTSRange().getStart(), dateFormat);
-            end = DateFormat.formatToDateStr(dataSegment.getTSRange().getEnd(), dateFormat);
+            Pair<String, String> pair = transformDateType(dataSegment, partitionColType, dateFormat);
+            start = pair.getFirst();
+            end = pair.getSecond();
         }
 
         val startRexLiteral = transformValue2RexLiteral(rexBuilder, start, partitionColType);
@@ -322,6 +336,40 @@ public class RealizationPruner {
         return Pair.newPair(greaterThanOrEqualCall, lessCall);
     }
 
+    private static Pair<String, String> transformDateType(NDataSegment dataSegment, DataType colType,
+            String dateFormat) {
+        long segmentStartTs = dataSegment.getTSRange().getStart();
+        long segmentEndTs = dataSegment.getTSRange().getEnd();
+        String formattedStart = DateFormat.formatToDateStr(segmentStartTs, dateFormat);
+        String formattedEnd = DateFormat.formatToDateStr(segmentEndTs, dateFormat);
+        String start = checkAndReformatDateType(formattedStart, segmentStartTs, colType);
+        String end = checkAndReformatDateType(formattedEnd, segmentEndTs, colType);
+        return Pair.newPair(start, end);
+    }
+
+    private static String checkAndReformatDateType(String formattedValue, long segmentTs, DataType colType) {
+        switch (colType.getName()) {
+        case DATE:
+            if (DATE_PATTERN.matcher(formattedValue).matches()) {
+                return formattedValue;
+            }
+            return DateFormat.formatToDateStr(segmentTs, DEFAULT_DATE_PATTERN);
+        case TIMESTAMP:
+            if (TIMESTAMP_PATTERN.matcher(formattedValue).matches()) {
+                return formattedValue;
+            }
+            return DateFormat.formatToDateStr(segmentTs, DEFAULT_DATETIME_PATTERN_WITHOUT_MILLISECONDS);
+        case VARCHAR:
+        case STRING:
+        case INTEGER:
+        case BIGINT:
+            return formattedValue;
+        default:
+            throw new IllegalArgumentException(
+                    String.format(Locale.ROOT, "%s data type is not supported for partition column", colType));
+        }
+    }
+
     private static RexNode transformValue2RexLiteral(RexBuilder rexBuilder, String value, DataType colType) {
         switch (colType.getName()) {
         case DATE: