You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iotdb.apache.org by ha...@apache.org on 2022/10/12 02:41:36 UTC

[iotdb] branch rel/0.13 updated: [To rel/0.13] Optimize pattern matching in Regexp (#7564)

This is an automated email from the ASF dual-hosted git repository.

haonan pushed a commit to branch rel/0.13
in repository https://gitbox.apache.org/repos/asf/iotdb.git


The following commit(s) were added to refs/heads/rel/0.13 by this push:
     new 89c0ec3b96 [To rel/0.13] Optimize pattern matching in Regexp (#7564)
89c0ec3b96 is described below

commit 89c0ec3b963cb44cf94a1efac8f28a5fc475d214
Author: Liu Xuxin <37...@users.noreply.github.com>
AuthorDate: Wed Oct 12 10:41:30 2022 +0800

    [To rel/0.13] Optimize pattern matching in Regexp (#7564)
---
 .../util/TSFileConfigUtilCompletenessTest.java     |  3 +-
 .../db/integration/IoTDBRepeatPatternNameIT.java   | 67 ++++++++++++++++++++++
 .../resources/conf/iotdb-engine.properties         |  4 ++
 .../java/org/apache/iotdb/db/conf/IoTDBConfig.java | 10 ++++
 .../org/apache/iotdb/db/conf/IoTDBDescriptor.java  |  7 +++
 .../iotdb/tsfile/common/conf/TSFileConfig.java     | 10 ++++
 .../iotdb/tsfile/read/filter/operator/Regexp.java  | 48 +++++++++++++++-
 7 files changed, 147 insertions(+), 2 deletions(-)

diff --git a/flink-tsfile-connector/src/test/java/org/apache/iotdb/flink/util/TSFileConfigUtilCompletenessTest.java b/flink-tsfile-connector/src/test/java/org/apache/iotdb/flink/util/TSFileConfigUtilCompletenessTest.java
index 16373c3057..9220db0a82 100644
--- a/flink-tsfile-connector/src/test/java/org/apache/iotdb/flink/util/TSFileConfigUtilCompletenessTest.java
+++ b/flink-tsfile-connector/src/test/java/org/apache/iotdb/flink/util/TSFileConfigUtilCompletenessTest.java
@@ -74,7 +74,8 @@ public class TSFileConfigUtilCompletenessTest {
       "setTSFileStorageFs",
       "setUseKerberos",
       "setValueEncoder",
-      "setCustomizedProperties"
+      "setCustomizedProperties",
+      "setPatternMatchingThreshold"
     };
     Set<String> addedSetters = new HashSet<>();
     Collections.addAll(addedSetters, setters);
diff --git a/integration/src/test/java/org/apache/iotdb/db/integration/IoTDBRepeatPatternNameIT.java b/integration/src/test/java/org/apache/iotdb/db/integration/IoTDBRepeatPatternNameIT.java
new file mode 100644
index 0000000000..301d8d5a14
--- /dev/null
+++ b/integration/src/test/java/org/apache/iotdb/db/integration/IoTDBRepeatPatternNameIT.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iotdb.db.integration;
+
+import org.apache.iotdb.integration.env.EnvFactory;
+import org.apache.iotdb.jdbc.IoTDBSQLException;
+
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.sql.Connection;
+import java.sql.Statement;
+
+public class IoTDBRepeatPatternNameIT {
+  @Before
+  public void startUp() throws Exception {
+    EnvFactory.getEnv().initBeforeClass();
+  }
+
+  @After
+  public void tearDown() throws Exception {
+    EnvFactory.getEnv().cleanAfterClass();
+  }
+
+  @Test
+  public void testLargePattern() throws Exception {
+    StringBuilder sb = new StringBuilder();
+    sb.append("insert into root.ln.wf01.wt01(timestamp,status,s) values(1509465780000,false,'");
+    // we should make sure that the pattern is repeated enough time to make exception occurs
+    // so that system can pass the test
+    for (int i = 0; i < 20; ++i) {
+      sb.append('a');
+    }
+    sb.append("b');");
+    long startTime = System.currentTimeMillis();
+    try (Connection connection = EnvFactory.getEnv().getConnection();
+        Statement statement = connection.createStatement()) {
+      statement.execute(sb.toString());
+      try {
+        statement.execute("select s from root.ln.wf01.wt01 where s REGEXP'(a+)+s'");
+      } catch (IoTDBSQLException e) {
+        Assert.assertTrue(e.getMessage().contains("Pattern access threshold exceeded"));
+      }
+      long timeCost = System.currentTimeMillis() - startTime;
+      Assert.assertTrue(timeCost < 5_000L);
+    }
+  }
+}
diff --git a/server/src/assembly/resources/conf/iotdb-engine.properties b/server/src/assembly/resources/conf/iotdb-engine.properties
index 8991b9933a..f8267b9c38 100644
--- a/server/src/assembly/resources/conf/iotdb-engine.properties
+++ b/server/src/assembly/resources/conf/iotdb-engine.properties
@@ -741,6 +741,10 @@ timestamp_precision=ms
 # Datatype: long
 # slow_query_threshold=5000
 
+# max pattern access time in regex filter
+# Datatype: int
+# pattern_matching_threshold=1000000
+
 ####################
 ### MQTT Broker Configuration
 ####################
diff --git a/server/src/main/java/org/apache/iotdb/db/conf/IoTDBConfig.java b/server/src/main/java/org/apache/iotdb/db/conf/IoTDBConfig.java
index d92bd997e9..f4517811da 100644
--- a/server/src/main/java/org/apache/iotdb/db/conf/IoTDBConfig.java
+++ b/server/src/main/java/org/apache/iotdb/db/conf/IoTDBConfig.java
@@ -792,6 +792,8 @@ public class IoTDBConfig {
   /** time cost(ms) threshold for slow query. Unit: millisecond */
   private long slowQueryThreshold = 5000;
 
+  private int patternMatchingThreshold = 1000000;
+
   /**
    * whether enable the rpc service. This parameter has no a corresponding field in the
    * iotdb-engine.properties
@@ -2796,4 +2798,12 @@ public class IoTDBConfig {
   public void setChunkMetadataMemorySizeProportion(double chunkMetadataMemorySizeProportion) {
     this.chunkMetadataMemorySizeProportion = chunkMetadataMemorySizeProportion;
   }
+
+  public int getPatternMatchingThreshold() {
+    return patternMatchingThreshold;
+  }
+
+  public void setPatternMatchingThreshold(int patternMatchingThreshold) {
+    this.patternMatchingThreshold = patternMatchingThreshold;
+  }
 }
diff --git a/server/src/main/java/org/apache/iotdb/db/conf/IoTDBDescriptor.java b/server/src/main/java/org/apache/iotdb/db/conf/IoTDBDescriptor.java
index 7516081328..fadf77ccf0 100644
--- a/server/src/main/java/org/apache/iotdb/db/conf/IoTDBDescriptor.java
+++ b/server/src/main/java/org/apache/iotdb/db/conf/IoTDBDescriptor.java
@@ -928,6 +928,13 @@ public class IoTDBDescriptor {
         .setDfsClientFailoverProxyProvider(
             properties.getProperty(
                 "dfs_client_failover_proxy_provider", conf.getDfsClientFailoverProxyProvider()));
+    TSFileDescriptor.getInstance()
+        .getConfig()
+        .setPatternMatchingThreshold(
+            Integer.parseInt(
+                properties.getProperty(
+                    "pattern_matching_threshold",
+                    String.valueOf(conf.getPatternMatchingThreshold()))));
     TSFileDescriptor.getInstance()
         .getConfig()
         .setUseKerberos(
diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/common/conf/TSFileConfig.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/common/conf/TSFileConfig.java
index df5df28446..0fd1bdcaa6 100644
--- a/tsfile/src/main/java/org/apache/iotdb/tsfile/common/conf/TSFileConfig.java
+++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/common/conf/TSFileConfig.java
@@ -146,6 +146,8 @@ public class TSFileConfig implements Serializable {
   private double bloomFilterErrorRate = 0.05;
   /** The amount of data iterate each time */
   private int batchSize = 1000;
+
+  private int patternMatchingThreshold = 1000000;
   /** customizedProperties, this should be empty by default. */
   private Properties customizedProperties = new Properties();
 
@@ -422,4 +424,12 @@ public class TSFileConfig implements Serializable {
   public void setCustomizedProperties(Properties customizedProperties) {
     this.customizedProperties = customizedProperties;
   }
+
+  public int getPatternMatchingThreshold() {
+    return patternMatchingThreshold;
+  }
+
+  public void setPatternMatchingThreshold(int patternMatchingThreshold) {
+    this.patternMatchingThreshold = patternMatchingThreshold;
+  }
 }
diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/read/filter/operator/Regexp.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/read/filter/operator/Regexp.java
index 646b51ba57..31e7260469 100644
--- a/tsfile/src/main/java/org/apache/iotdb/tsfile/read/filter/operator/Regexp.java
+++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/read/filter/operator/Regexp.java
@@ -18,6 +18,7 @@
  */
 package org.apache.iotdb.tsfile.read.filter.operator;
 
+import org.apache.iotdb.tsfile.common.conf.TSFileDescriptor;
 import org.apache.iotdb.tsfile.file.metadata.statistics.Statistics;
 import org.apache.iotdb.tsfile.read.filter.basic.Filter;
 import org.apache.iotdb.tsfile.read.filter.factory.FilterSerializeId;
@@ -66,7 +67,7 @@ public class Regexp<T extends Comparable<T>> implements Filter {
     if (filterType != FilterType.VALUE_FILTER) {
       return false;
     }
-    return pattern.matcher(value.toString()).find();
+    return pattern.matcher(new MatcherInput(value.toString(), new AccessCount())).find();
   }
 
   @Override
@@ -124,4 +125,49 @@ public class Regexp<T extends Comparable<T>> implements Filter {
   public FilterSerializeId getSerializeId() {
     return FilterSerializeId.REGEXP;
   }
+
+  private static class AccessCount {
+    private int count;
+    private final int accessThreshold =
+        TSFileDescriptor.getInstance().getConfig().getPatternMatchingThreshold();
+
+    public void check() throws IllegalStateException {
+      if (this.count++ > accessThreshold) {
+        throw new IllegalStateException("Pattern access threshold exceeded");
+      }
+    }
+  }
+
+  private static class MatcherInput implements CharSequence {
+
+    private final CharSequence value;
+
+    private final AccessCount access;
+
+    public MatcherInput(CharSequence value, AccessCount access) {
+      this.value = value;
+      this.access = access;
+    }
+
+    @Override
+    public char charAt(int index) {
+      this.access.check();
+      return this.value.charAt(index);
+    }
+
+    @Override
+    public CharSequence subSequence(int start, int end) {
+      return new MatcherInput(this.value.subSequence(start, end), this.access);
+    }
+
+    @Override
+    public int length() {
+      return this.value.length();
+    }
+
+    @Override
+    public String toString() {
+      return this.value.toString();
+    }
+  }
 }