You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iotdb.apache.org by qi...@apache.org on 2022/10/15 09:34:58 UTC
[iotdb] branch master updated: Optimizing regex matching in Regexp (#7618)
This is an automated email from the ASF dual-hosted git repository.
qiaojialin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iotdb.git
The following commit(s) were added to refs/heads/master by this push:
new cbf51cc26f Optimizing regex matching in Regexp (#7618)
cbf51cc26f is described below
commit cbf51cc26f49b4fb8848034b7dd293a20747545c
Author: Liu Xuxin <37...@users.noreply.github.com>
AuthorDate: Sat Oct 15 17:34:51 2022 +0800
Optimizing regex matching in Regexp (#7618)
---
.../util/TSFileConfigUtilCompletenessTest.java | 3 +-
.../db/integration/IoTDBRepeatPatternNameIT.java | 67 ++++++++++++++++++++++
.../resources/conf/iotdb-datanode.properties | 4 ++
.../java/org/apache/iotdb/db/conf/IoTDBConfig.java | 10 ++++
.../org/apache/iotdb/db/conf/IoTDBDescriptor.java | 7 +++
.../iotdb/tsfile/common/conf/TSFileConfig.java | 10 ++++
.../iotdb/tsfile/read/filter/operator/Regexp.java | 48 +++++++++++++++-
7 files changed, 147 insertions(+), 2 deletions(-)
diff --git a/flink-tsfile-connector/src/test/java/org/apache/iotdb/flink/util/TSFileConfigUtilCompletenessTest.java b/flink-tsfile-connector/src/test/java/org/apache/iotdb/flink/util/TSFileConfigUtilCompletenessTest.java
index 5ed14b4051..a3c40fbfc9 100644
--- a/flink-tsfile-connector/src/test/java/org/apache/iotdb/flink/util/TSFileConfigUtilCompletenessTest.java
+++ b/flink-tsfile-connector/src/test/java/org/apache/iotdb/flink/util/TSFileConfigUtilCompletenessTest.java
@@ -74,7 +74,8 @@ public class TSFileConfigUtilCompletenessTest {
"setFreqEncodingSNR",
"setFreqEncodingBlockSize",
"setMaxTsBlockLineNumber",
- "setMaxTsBlockSizeInBytes"
+ "setMaxTsBlockSizeInBytes",
+ "setPatternMatchingThreshold"
};
Set<String> newSetters =
Arrays.stream(TSFileConfig.class.getMethods())
diff --git a/integration/src/test/java/org/apache/iotdb/db/integration/IoTDBRepeatPatternNameIT.java b/integration/src/test/java/org/apache/iotdb/db/integration/IoTDBRepeatPatternNameIT.java
new file mode 100644
index 0000000000..301d8d5a14
--- /dev/null
+++ b/integration/src/test/java/org/apache/iotdb/db/integration/IoTDBRepeatPatternNameIT.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iotdb.db.integration;
+
+import org.apache.iotdb.integration.env.EnvFactory;
+import org.apache.iotdb.jdbc.IoTDBSQLException;
+
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.sql.Connection;
+import java.sql.Statement;
+
+public class IoTDBRepeatPatternNameIT {
+ @Before
+ public void startUp() throws Exception {
+ EnvFactory.getEnv().initBeforeClass();
+ }
+
+ @After
+ public void tearDown() throws Exception {
+ EnvFactory.getEnv().cleanAfterClass();
+ }
+
+ @Test
+ public void testLargePattern() throws Exception {
+ StringBuilder sb = new StringBuilder();
+ sb.append("insert into root.ln.wf01.wt01(timestamp,status,s) values(1509465780000,false,'");
+ // we should make sure that the pattern is repeated enough time to make exception occurs
+ // so that system can pass the test
+ for (int i = 0; i < 20; ++i) {
+ sb.append('a');
+ }
+ sb.append("b');");
+ long startTime = System.currentTimeMillis();
+ try (Connection connection = EnvFactory.getEnv().getConnection();
+ Statement statement = connection.createStatement()) {
+ statement.execute(sb.toString());
+ try {
+ statement.execute("select s from root.ln.wf01.wt01 where s REGEXP'(a+)+s'");
+ } catch (IoTDBSQLException e) {
+ Assert.assertTrue(e.getMessage().contains("Pattern access threshold exceeded"));
+ }
+ long timeCost = System.currentTimeMillis() - startTime;
+ Assert.assertTrue(timeCost < 5_000L);
+ }
+ }
+}
diff --git a/server/src/assembly/resources/conf/iotdb-datanode.properties b/server/src/assembly/resources/conf/iotdb-datanode.properties
index 73150bd4be..3bc9c0e3df 100644
--- a/server/src/assembly/resources/conf/iotdb-datanode.properties
+++ b/server/src/assembly/resources/conf/iotdb-datanode.properties
@@ -818,6 +818,10 @@ timestamp_precision=ms
# Datatype: int
# max_number_of_points_in_page=1048576
+# The threshold for pattern matching in regex
+# Datatype: int
+# pattern_matching_threshold=1000000
+
# Max size limitation of input string
# Datatype: int
# max_string_length=128
diff --git a/server/src/main/java/org/apache/iotdb/db/conf/IoTDBConfig.java b/server/src/main/java/org/apache/iotdb/db/conf/IoTDBConfig.java
index 4e89c78d0f..451efde8a6 100644
--- a/server/src/main/java/org/apache/iotdb/db/conf/IoTDBConfig.java
+++ b/server/src/main/java/org/apache/iotdb/db/conf/IoTDBConfig.java
@@ -828,6 +828,8 @@ public class IoTDBConfig {
/** time cost(ms) threshold for slow query. Unit: millisecond */
private long slowQueryThreshold = 5000;
+ private int patternMatchingThreshold = 1000000;
+
/**
* whether enable the rpc service. This parameter has no a corresponding field in the
* iotdb-datanode.properties
@@ -3425,4 +3427,12 @@ public class IoTDBConfig {
public double getUsableCompactionMemoryProportion() {
return 1.0d - chunkMetadataSizeProportion;
}
+
+ public int getPatternMatchingThreshold() {
+ return patternMatchingThreshold;
+ }
+
+ public void setPatternMatchingThreshold(int patternMatchingThreshold) {
+ this.patternMatchingThreshold = patternMatchingThreshold;
+ }
}
diff --git a/server/src/main/java/org/apache/iotdb/db/conf/IoTDBDescriptor.java b/server/src/main/java/org/apache/iotdb/db/conf/IoTDBDescriptor.java
index a2092c9cd5..5c76305aa3 100644
--- a/server/src/main/java/org/apache/iotdb/db/conf/IoTDBDescriptor.java
+++ b/server/src/main/java/org/apache/iotdb/db/conf/IoTDBDescriptor.java
@@ -947,6 +947,13 @@ public class IoTDBDescriptor {
.setDfsClientFailoverProxyProvider(
properties.getProperty(
"dfs_client_failover_proxy_provider", conf.getDfsClientFailoverProxyProvider()));
+ TSFileDescriptor.getInstance()
+ .getConfig()
+ .setPatternMatchingThreshold(
+ Integer.parseInt(
+ properties.getProperty(
+ "pattern_matching_threshold",
+ String.valueOf(conf.getPatternMatchingThreshold()))));
TSFileDescriptor.getInstance()
.getConfig()
.setUseKerberos(
diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/common/conf/TSFileConfig.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/common/conf/TSFileConfig.java
index a157d57058..c79c55d229 100644
--- a/tsfile/src/main/java/org/apache/iotdb/tsfile/common/conf/TSFileConfig.java
+++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/common/conf/TSFileConfig.java
@@ -156,6 +156,8 @@ public class TSFileConfig implements Serializable {
/** Maximum number of lines in a single TsBlock */
private int maxTsBlockLineNumber = 1000;
+ private int patternMatchingThreshold = 1000000;
+
public TSFileConfig() {}
public int getGroupSizeInByte() {
@@ -453,4 +455,12 @@ public class TSFileConfig implements Serializable {
public void setMaxTsBlockLineNumber(int maxTsBlockLineNumber) {
this.maxTsBlockLineNumber = maxTsBlockLineNumber;
}
+
+ public int getPatternMatchingThreshold() {
+ return patternMatchingThreshold;
+ }
+
+ public void setPatternMatchingThreshold(int patternMatchingThreshold) {
+ this.patternMatchingThreshold = patternMatchingThreshold;
+ }
}
diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/read/filter/operator/Regexp.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/read/filter/operator/Regexp.java
index 646b51ba57..31e7260469 100644
--- a/tsfile/src/main/java/org/apache/iotdb/tsfile/read/filter/operator/Regexp.java
+++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/read/filter/operator/Regexp.java
@@ -18,6 +18,7 @@
*/
package org.apache.iotdb.tsfile.read.filter.operator;
+import org.apache.iotdb.tsfile.common.conf.TSFileDescriptor;
import org.apache.iotdb.tsfile.file.metadata.statistics.Statistics;
import org.apache.iotdb.tsfile.read.filter.basic.Filter;
import org.apache.iotdb.tsfile.read.filter.factory.FilterSerializeId;
@@ -66,7 +67,7 @@ public class Regexp<T extends Comparable<T>> implements Filter {
if (filterType != FilterType.VALUE_FILTER) {
return false;
}
- return pattern.matcher(value.toString()).find();
+ return pattern.matcher(new MatcherInput(value.toString(), new AccessCount())).find();
}
@Override
@@ -124,4 +125,49 @@ public class Regexp<T extends Comparable<T>> implements Filter {
public FilterSerializeId getSerializeId() {
return FilterSerializeId.REGEXP;
}
+
+ private static class AccessCount {
+ private int count;
+ private final int accessThreshold =
+ TSFileDescriptor.getInstance().getConfig().getPatternMatchingThreshold();
+
+ public void check() throws IllegalStateException {
+ if (this.count++ > accessThreshold) {
+ throw new IllegalStateException("Pattern access threshold exceeded");
+ }
+ }
+ }
+
+ private static class MatcherInput implements CharSequence {
+
+ private final CharSequence value;
+
+ private final AccessCount access;
+
+ public MatcherInput(CharSequence value, AccessCount access) {
+ this.value = value;
+ this.access = access;
+ }
+
+ @Override
+ public char charAt(int index) {
+ this.access.check();
+ return this.value.charAt(index);
+ }
+
+ @Override
+ public CharSequence subSequence(int start, int end) {
+ return new MatcherInput(this.value.subSequence(start, end), this.access);
+ }
+
+ @Override
+ public int length() {
+ return this.value.length();
+ }
+
+ @Override
+ public String toString() {
+ return this.value.toString();
+ }
+ }
}