You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by xx...@apache.org on 2023/04/23 06:00:59 UTC
[kylin] 18/22: KYLIN-5499 fix partition col format mismatch with type
This is an automated email from the ASF dual-hosted git repository.
xxyu pushed a commit to branch kylin5
in repository https://gitbox.apache.org/repos/asf/kylin.git
commit 08dc9f9b35c314e5a97f4b23b3e383d12417e2bb
Author: Jiale He <35...@users.noreply.github.com>
AuthorDate: Fri Feb 3 16:00:30 2023 +0800
KYLIN-5499 fix partition col format mismatch with type
---
.../kylin/query/routing/RealizationPrunerTest.java | 86 ++++++++++++++++++++++
.../kylin/query/routing/RealizationPruner.java | 64 ++++++++++++++--
2 files changed, 142 insertions(+), 8 deletions(-)
diff --git a/src/kylin-it/src/test/java/org/apache/kylin/query/routing/RealizationPrunerTest.java b/src/kylin-it/src/test/java/org/apache/kylin/query/routing/RealizationPrunerTest.java
new file mode 100644
index 0000000000..b22b90a244
--- /dev/null
+++ b/src/kylin-it/src/test/java/org/apache/kylin/query/routing/RealizationPrunerTest.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kylin.query.routing;
+
+import org.apache.kylin.metadata.datatype.DataType;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+import org.springframework.test.util.ReflectionTestUtils;
+
+class RealizationPrunerTest {
+
+ @Test
+ void testCheckAndReformatDateType() {
+ long segmentTs = 1675396800000L;
+ {
+ String formattedValue = ReflectionTestUtils.invokeMethod(RealizationPruner.class,
+ "checkAndReformatDateType", "2023-02-03", segmentTs, new DataType("date", 0, 0));
+ Assertions.assertEquals("2023-02-03", formattedValue);
+ }
+
+ {
+ String formattedValue = ReflectionTestUtils.invokeMethod(RealizationPruner.class,
+ "checkAndReformatDateType", "2023-02-03 12:00:00", segmentTs, new DataType("date", 0, 0));
+ Assertions.assertEquals("2023-02-03", formattedValue);
+ }
+
+ {
+ String formattedValue = ReflectionTestUtils.invokeMethod(RealizationPruner.class,
+ "checkAndReformatDateType", "2023-02-03", segmentTs, new DataType("timestamp", 0, 0));
+ Assertions.assertEquals("2023-02-03 12:00:00", formattedValue);
+ }
+
+ {
+ String formattedValue = ReflectionTestUtils.invokeMethod(RealizationPruner.class,
+ "checkAndReformatDateType", "2023-02-03 12:00:00", segmentTs, new DataType("timestamp", 0, 0));
+ Assertions.assertEquals("2023-02-03 12:00:00", formattedValue);
+ }
+
+ {
+ String formattedValue = ReflectionTestUtils.invokeMethod(RealizationPruner.class,
+ "checkAndReformatDateType", "2023-02-03 12:00:00", segmentTs, new DataType("varchar", 0, 0));
+ Assertions.assertEquals("2023-02-03 12:00:00", formattedValue);
+ }
+
+ {
+ String formattedValue = ReflectionTestUtils.invokeMethod(RealizationPruner.class,
+ "checkAndReformatDateType", "2023-02-03 12:00:00", segmentTs, new DataType("string", 0, 0));
+ Assertions.assertEquals("2023-02-03 12:00:00", formattedValue);
+ }
+
+ {
+ String formattedValue = ReflectionTestUtils.invokeMethod(RealizationPruner.class,
+ "checkAndReformatDateType", "2023-02-03 12:00:00", segmentTs, new DataType("integer", 0, 0));
+ Assertions.assertEquals("2023-02-03 12:00:00", formattedValue);
+ }
+
+ {
+ String formattedValue = ReflectionTestUtils.invokeMethod(RealizationPruner.class,
+ "checkAndReformatDateType", "2023-02-03 12:00:00", segmentTs, new DataType("bigint", 0, 0));
+ Assertions.assertEquals("2023-02-03 12:00:00", formattedValue);
+ }
+
+ {
+ DataType errorType = new DataType("error_type", 0, 0);
+ Assertions.assertThrows(IllegalArgumentException.class,
+ () -> ReflectionTestUtils.invokeMethod(RealizationPruner.class, "checkAndReformatDateType",
+ "2023-02-03 12:00:00", segmentTs, errorType));
+ }
+ }
+}
diff --git a/src/query-common/src/main/java/org/apache/kylin/query/routing/RealizationPruner.java b/src/query-common/src/main/java/org/apache/kylin/query/routing/RealizationPruner.java
index dc54aae001..5c56de2fb3 100644
--- a/src/query-common/src/main/java/org/apache/kylin/query/routing/RealizationPruner.java
+++ b/src/query-common/src/main/java/org/apache/kylin/query/routing/RealizationPruner.java
@@ -18,6 +18,9 @@
package org.apache.kylin.query.routing;
+import static org.apache.kylin.common.util.DateFormat.DEFAULT_DATETIME_PATTERN_WITHOUT_MILLISECONDS;
+import static org.apache.kylin.common.util.DateFormat.DEFAULT_DATE_PATTERN;
+
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Collection;
@@ -27,6 +30,7 @@ import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.TimeZone;
+import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.apache.calcite.plan.RelOptPredicateList;
@@ -86,6 +90,9 @@ public class RealizationPruner {
private static final String INTEGER = "integer";
private static final String BIGINT = "bigint";
private static final TimeZone UTC_ZONE = TimeZone.getTimeZone("UTC");
+ private static final Pattern DATE_PATTERN = Pattern.compile("[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]");
+ private static final Pattern TIMESTAMP_PATTERN = Pattern.compile(
+ "[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]" + " " + "[0-9][0-9]:[0-9][0-9]:[0-9][0-9](\\.[0-9]*[1-9])?");
private static final Set<SqlKind> COMPARISON_OP_KIND_SET = ImmutableSet.of(SqlKind.GREATER_THAN,
SqlKind.GREATER_THAN_OR_EQUAL, //
SqlKind.LESS_THAN, SqlKind.LESS_THAN_OR_EQUAL, //
@@ -134,12 +141,18 @@ public class RealizationPruner {
val partitionColInputRef = transformColumn2RexInputRef(partitionColumn, olapContext.allTableScans);
if (allReadySegments.size() > 0 && dateFormat != null) {
- val firstSegmentRanges = transformSegment2RexCall(allReadySegments.get(0), dateFormat, rexBuilder,
- partitionColInputRef, partitionColumn.getType(), dataflow.isStreaming());
- RelDataTypeFamily segmentLiteralTypeFamily = getSegmentLiteralTypeFamily(firstSegmentRanges.getFirst());
- filterConditions = filterConditions.stream().map(filterCondition -> rewriteRexCall(filterCondition,
- rexBuilder, segmentLiteralTypeFamily, partitionColInputRef, dateFormat))
- .collect(Collectors.toList());
+ try {
+ val firstSegmentRanges = transformSegment2RexCall(allReadySegments.get(0), dateFormat, rexBuilder,
+ partitionColInputRef, partitionColumn.getType(), dataflow.isStreaming());
+ RelDataTypeFamily segmentLiteralTypeFamily = getSegmentLiteralTypeFamily(firstSegmentRanges.getFirst());
+ filterConditions = filterConditions.stream()//
+ .map(filterCondition -> rewriteRexCall(filterCondition, rexBuilder, segmentLiteralTypeFamily,
+ partitionColInputRef, dateFormat))
+ .collect(Collectors.toList());
+ } catch (Exception ex) {
+ log.warn("Segment pruning error: ", ex);
+ return allReadySegments;
+ }
}
var simplifiedSqlFilter = rexSimplify.simplifyAnds(filterConditions);
@@ -307,8 +320,9 @@ public class RealizationPruner {
start = DateFormat.formatToDateStr(dataSegment.getKSRange().getStart(), dateFormat);
end = DateFormat.formatToDateStr(dataSegment.getKSRange().getEnd(), dateFormat);
} else {
- start = DateFormat.formatToDateStr(dataSegment.getTSRange().getStart(), dateFormat);
- end = DateFormat.formatToDateStr(dataSegment.getTSRange().getEnd(), dateFormat);
+ Pair<String, String> pair = transformDateType(dataSegment, partitionColType, dateFormat);
+ start = pair.getFirst();
+ end = pair.getSecond();
}
val startRexLiteral = transformValue2RexLiteral(rexBuilder, start, partitionColType);
@@ -322,6 +336,40 @@ public class RealizationPruner {
return Pair.newPair(greaterThanOrEqualCall, lessCall);
}
+ private static Pair<String, String> transformDateType(NDataSegment dataSegment, DataType colType,
+ String dateFormat) {
+ long segmentStartTs = dataSegment.getTSRange().getStart();
+ long segmentEndTs = dataSegment.getTSRange().getEnd();
+ String formattedStart = DateFormat.formatToDateStr(segmentStartTs, dateFormat);
+ String formattedEnd = DateFormat.formatToDateStr(segmentEndTs, dateFormat);
+ String start = checkAndReformatDateType(formattedStart, segmentStartTs, colType);
+ String end = checkAndReformatDateType(formattedEnd, segmentEndTs, colType);
+ return Pair.newPair(start, end);
+ }
+
+ private static String checkAndReformatDateType(String formattedValue, long segmentTs, DataType colType) {
+ switch (colType.getName()) {
+ case DATE:
+ if (DATE_PATTERN.matcher(formattedValue).matches()) {
+ return formattedValue;
+ }
+ return DateFormat.formatToDateStr(segmentTs, DEFAULT_DATE_PATTERN);
+ case TIMESTAMP:
+ if (TIMESTAMP_PATTERN.matcher(formattedValue).matches()) {
+ return formattedValue;
+ }
+ return DateFormat.formatToDateStr(segmentTs, DEFAULT_DATETIME_PATTERN_WITHOUT_MILLISECONDS);
+ case VARCHAR:
+ case STRING:
+ case INTEGER:
+ case BIGINT:
+ return formattedValue;
+ default:
+ throw new IllegalArgumentException(
+ String.format(Locale.ROOT, "%s data type is not supported for partition column", colType));
+ }
+ }
+
private static RexNode transformValue2RexLiteral(RexBuilder rexBuilder, String value, DataType colType) {
switch (colType.getName()) {
case DATE: