You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iotdb.apache.org by qi...@apache.org on 2022/10/15 09:34:58 UTC

[iotdb] branch master updated: Optimizing regex matching in Regexp (#7618)

This is an automated email from the ASF dual-hosted git repository.

qiaojialin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iotdb.git


The following commit(s) were added to refs/heads/master by this push:
     new cbf51cc26f Optimizing regex matching in Regexp (#7618)
cbf51cc26f is described below

commit cbf51cc26f49b4fb8848034b7dd293a20747545c
Author: Liu Xuxin <37...@users.noreply.github.com>
AuthorDate: Sat Oct 15 17:34:51 2022 +0800

    Optimizing regex matching in Regexp (#7618)
---
 .../util/TSFileConfigUtilCompletenessTest.java     |  3 +-
 .../db/integration/IoTDBRepeatPatternNameIT.java   | 67 ++++++++++++++++++++++
 .../resources/conf/iotdb-datanode.properties       |  4 ++
 .../java/org/apache/iotdb/db/conf/IoTDBConfig.java | 10 ++++
 .../org/apache/iotdb/db/conf/IoTDBDescriptor.java  |  7 +++
 .../iotdb/tsfile/common/conf/TSFileConfig.java     | 10 ++++
 .../iotdb/tsfile/read/filter/operator/Regexp.java  | 48 +++++++++++++++-
 7 files changed, 147 insertions(+), 2 deletions(-)

diff --git a/flink-tsfile-connector/src/test/java/org/apache/iotdb/flink/util/TSFileConfigUtilCompletenessTest.java b/flink-tsfile-connector/src/test/java/org/apache/iotdb/flink/util/TSFileConfigUtilCompletenessTest.java
index 5ed14b4051..a3c40fbfc9 100644
--- a/flink-tsfile-connector/src/test/java/org/apache/iotdb/flink/util/TSFileConfigUtilCompletenessTest.java
+++ b/flink-tsfile-connector/src/test/java/org/apache/iotdb/flink/util/TSFileConfigUtilCompletenessTest.java
@@ -74,7 +74,8 @@ public class TSFileConfigUtilCompletenessTest {
       "setFreqEncodingSNR",
       "setFreqEncodingBlockSize",
       "setMaxTsBlockLineNumber",
-      "setMaxTsBlockSizeInBytes"
+      "setMaxTsBlockSizeInBytes",
+      "setPatternMatchingThreshold"
     };
     Set<String> newSetters =
         Arrays.stream(TSFileConfig.class.getMethods())
diff --git a/integration/src/test/java/org/apache/iotdb/db/integration/IoTDBRepeatPatternNameIT.java b/integration/src/test/java/org/apache/iotdb/db/integration/IoTDBRepeatPatternNameIT.java
new file mode 100644
index 0000000000..301d8d5a14
--- /dev/null
+++ b/integration/src/test/java/org/apache/iotdb/db/integration/IoTDBRepeatPatternNameIT.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iotdb.db.integration;
+
+import org.apache.iotdb.integration.env.EnvFactory;
+import org.apache.iotdb.jdbc.IoTDBSQLException;
+
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.sql.Connection;
+import java.sql.Statement;
+
+public class IoTDBRepeatPatternNameIT {
+  @Before
+  public void startUp() throws Exception {
+    EnvFactory.getEnv().initBeforeClass();
+  }
+
+  @After
+  public void tearDown() throws Exception {
+    EnvFactory.getEnv().cleanAfterClass();
+  }
+
+  @Test
+  public void testLargePattern() throws Exception {
+    StringBuilder sb = new StringBuilder();
+    sb.append("insert into root.ln.wf01.wt01(timestamp,status,s) values(1509465780000,false,'");
+    // we should make sure that the pattern is repeated enough time to make exception occurs
+    // so that system can pass the test
+    for (int i = 0; i < 20; ++i) {
+      sb.append('a');
+    }
+    sb.append("b');");
+    long startTime = System.currentTimeMillis();
+    try (Connection connection = EnvFactory.getEnv().getConnection();
+        Statement statement = connection.createStatement()) {
+      statement.execute(sb.toString());
+      try {
+        statement.execute("select s from root.ln.wf01.wt01 where s REGEXP'(a+)+s'");
+      } catch (IoTDBSQLException e) {
+        Assert.assertTrue(e.getMessage().contains("Pattern access threshold exceeded"));
+      }
+      long timeCost = System.currentTimeMillis() - startTime;
+      Assert.assertTrue(timeCost < 5_000L);
+    }
+  }
+}
diff --git a/server/src/assembly/resources/conf/iotdb-datanode.properties b/server/src/assembly/resources/conf/iotdb-datanode.properties
index 73150bd4be..3bc9c0e3df 100644
--- a/server/src/assembly/resources/conf/iotdb-datanode.properties
+++ b/server/src/assembly/resources/conf/iotdb-datanode.properties
@@ -818,6 +818,10 @@ timestamp_precision=ms
 # Datatype: int
 # max_number_of_points_in_page=1048576
 
+# The threshold for pattern matching in regex
+# Datatype: int
+# pattern_matching_threshold=1000000
+
 # Max size limitation of input string
 # Datatype: int
 # max_string_length=128
diff --git a/server/src/main/java/org/apache/iotdb/db/conf/IoTDBConfig.java b/server/src/main/java/org/apache/iotdb/db/conf/IoTDBConfig.java
index 4e89c78d0f..451efde8a6 100644
--- a/server/src/main/java/org/apache/iotdb/db/conf/IoTDBConfig.java
+++ b/server/src/main/java/org/apache/iotdb/db/conf/IoTDBConfig.java
@@ -828,6 +828,8 @@ public class IoTDBConfig {
   /** time cost(ms) threshold for slow query. Unit: millisecond */
   private long slowQueryThreshold = 5000;
 
+  private int patternMatchingThreshold = 1000000;
+
   /**
    * whether enable the rpc service. This parameter has no a corresponding field in the
    * iotdb-datanode.properties
@@ -3425,4 +3427,12 @@ public class IoTDBConfig {
   public double getUsableCompactionMemoryProportion() {
     return 1.0d - chunkMetadataSizeProportion;
   }
+
+  public int getPatternMatchingThreshold() {
+    return patternMatchingThreshold;
+  }
+
+  public void setPatternMatchingThreshold(int patternMatchingThreshold) {
+    this.patternMatchingThreshold = patternMatchingThreshold;
+  }
 }
diff --git a/server/src/main/java/org/apache/iotdb/db/conf/IoTDBDescriptor.java b/server/src/main/java/org/apache/iotdb/db/conf/IoTDBDescriptor.java
index a2092c9cd5..5c76305aa3 100644
--- a/server/src/main/java/org/apache/iotdb/db/conf/IoTDBDescriptor.java
+++ b/server/src/main/java/org/apache/iotdb/db/conf/IoTDBDescriptor.java
@@ -947,6 +947,13 @@ public class IoTDBDescriptor {
         .setDfsClientFailoverProxyProvider(
             properties.getProperty(
                 "dfs_client_failover_proxy_provider", conf.getDfsClientFailoverProxyProvider()));
+    TSFileDescriptor.getInstance()
+        .getConfig()
+        .setPatternMatchingThreshold(
+            Integer.parseInt(
+                properties.getProperty(
+                    "pattern_matching_threshold",
+                    String.valueOf(conf.getPatternMatchingThreshold()))));
     TSFileDescriptor.getInstance()
         .getConfig()
         .setUseKerberos(
diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/common/conf/TSFileConfig.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/common/conf/TSFileConfig.java
index a157d57058..c79c55d229 100644
--- a/tsfile/src/main/java/org/apache/iotdb/tsfile/common/conf/TSFileConfig.java
+++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/common/conf/TSFileConfig.java
@@ -156,6 +156,8 @@ public class TSFileConfig implements Serializable {
   /** Maximum number of lines in a single TsBlock */
   private int maxTsBlockLineNumber = 1000;
 
+  private int patternMatchingThreshold = 1000000;
+
   public TSFileConfig() {}
 
   public int getGroupSizeInByte() {
@@ -453,4 +455,12 @@ public class TSFileConfig implements Serializable {
   public void setMaxTsBlockLineNumber(int maxTsBlockLineNumber) {
     this.maxTsBlockLineNumber = maxTsBlockLineNumber;
   }
+
+  public int getPatternMatchingThreshold() {
+    return patternMatchingThreshold;
+  }
+
+  public void setPatternMatchingThreshold(int patternMatchingThreshold) {
+    this.patternMatchingThreshold = patternMatchingThreshold;
+  }
 }
diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/read/filter/operator/Regexp.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/read/filter/operator/Regexp.java
index 646b51ba57..31e7260469 100644
--- a/tsfile/src/main/java/org/apache/iotdb/tsfile/read/filter/operator/Regexp.java
+++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/read/filter/operator/Regexp.java
@@ -18,6 +18,7 @@
  */
 package org.apache.iotdb.tsfile.read.filter.operator;
 
+import org.apache.iotdb.tsfile.common.conf.TSFileDescriptor;
 import org.apache.iotdb.tsfile.file.metadata.statistics.Statistics;
 import org.apache.iotdb.tsfile.read.filter.basic.Filter;
 import org.apache.iotdb.tsfile.read.filter.factory.FilterSerializeId;
@@ -66,7 +67,7 @@ public class Regexp<T extends Comparable<T>> implements Filter {
     if (filterType != FilterType.VALUE_FILTER) {
       return false;
     }
-    return pattern.matcher(value.toString()).find();
+    return pattern.matcher(new MatcherInput(value.toString(), new AccessCount())).find();
   }
 
   @Override
@@ -124,4 +125,49 @@ public class Regexp<T extends Comparable<T>> implements Filter {
   public FilterSerializeId getSerializeId() {
     return FilterSerializeId.REGEXP;
   }
+
+  private static class AccessCount {
+    private int count;
+    private final int accessThreshold =
+        TSFileDescriptor.getInstance().getConfig().getPatternMatchingThreshold();
+
+    public void check() throws IllegalStateException {
+      if (this.count++ > accessThreshold) {
+        throw new IllegalStateException("Pattern access threshold exceeded");
+      }
+    }
+  }
+
+  private static class MatcherInput implements CharSequence {
+
+    private final CharSequence value;
+
+    private final AccessCount access;
+
+    public MatcherInput(CharSequence value, AccessCount access) {
+      this.value = value;
+      this.access = access;
+    }
+
+    @Override
+    public char charAt(int index) {
+      this.access.check();
+      return this.value.charAt(index);
+    }
+
+    @Override
+    public CharSequence subSequence(int start, int end) {
+      return new MatcherInput(this.value.subSequence(start, end), this.access);
+    }
+
+    @Override
+    public int length() {
+      return this.value.length();
+    }
+
+    @Override
+    public String toString() {
+      return this.value.toString();
+    }
+  }
 }