You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@iotdb.apache.org by "JackieTien97 (via GitHub)" <gi...@apache.org> on 2023/08/17 12:21:27 UTC

[GitHub] [iotdb] JackieTien97 commented on a diff in pull request #10874: Implement intersect with prefix pattern for PartialPath and PathPatternTree

JackieTien97 commented on code in PR #10874:
URL: https://github.com/apache/iotdb/pull/10874#discussion_r1296881065


##########
iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/path/PartialPath.java:
##########
@@ -492,6 +497,54 @@ public boolean overlapWith(PartialPath rPath) {
     return this.nodes.length == rNodes.length;
   }
 
+  /**
+   * Try to check overlap between nodes1 and nodes2 with MULTI_LEVEL_PATH_WILDCARD. Time complexity
+   * O(n^2).
+   *
+   * @return true if overlapping, otherwise return false
+   */
+  private boolean checkOverlapWithMultiLevelWildcard(String[] nodes1, String[] nodes2) {
+    // dp[i][j] means if nodes1[0:i) and nodes[0:j) overlapping
+    boolean[][] dp = new boolean[nodes1.length + 1][nodes2.length + 1];
+    dp[0][0] = true;
+    for (int i = 1; i <= nodes1.length; i++) {
+      for (int j = 1; j <= nodes2.length; j++) {
+        if (nodes1[i - 1].equals(MULTI_LEVEL_PATH_WILDCARD)
+            || nodes2[j - 1].equals(MULTI_LEVEL_PATH_WILDCARD)) {
+          // if encounter MULTI_LEVEL_PATH_WILDCARD
+          if (nodes1[i - 1].equals(MULTI_LEVEL_PATH_WILDCARD)) {
+            // if nodes1[i-1] is MULTI_LEVEL_PATH_WILDCARD, dp[i][k(k>=j)]=dp[i-1][j-1]
+            if (dp[i - 1][j - 1]) {
+              for (int k = j; k <= nodes2.length; k++) {
+                dp[i][k] = true;
+              }
+            }
+          }
+          if (nodes2[j - 1].equals(MULTI_LEVEL_PATH_WILDCARD)) {
+            // if nodes2[j-1] is MULTI_LEVEL_PATH_WILDCARD, dp[k(k>=i)][j]=dp[i-1][j-1]
+            if (dp[i - 1][j - 1]) {
+              for (int k = i; k <= nodes1.length; k++) {
+                dp[k][j] = true;
+              }
+            }
+          }
+        } else {
+          // if without MULTI_LEVEL_PATH_WILDCARD, scan and check
+          if ((PathPatternUtil.hasWildcard(nodes1[i - 1])
+                  && PathPatternUtil.isNodeMatch(nodes1[i - 1], nodes2[j - 1]))
+              || (PathPatternUtil.hasWildcard(nodes2[j - 1])
+                  && PathPatternUtil.isNodeMatch(nodes2[j - 1], nodes1[i - 1]))
+              || nodes1[i - 1].equals(nodes2[j - 1])) {
+            // if nodes1[i-1] and nodes[2] is matched, dp[i][j] = dp[i-1][j-1]

Review Comment:
   ```suggestion
               // if nodes1[i-1] and nodes[j-1] is matched, dp[i][j] = dp[i-1][j-1]
   ```



##########
iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/path/PartialPath.java:
##########
@@ -533,51 +586,66 @@ public boolean overlapWithFullPathPrefix(PartialPath prefixFullPath) {
   }
 
   /**
-   * Try to check overlap between nodes1 and nodes2 with MULTI_LEVEL_PATH_WILDCARD. Time complexity
-   * O(n^2).
+   * Generate a list of partial paths which are the intersection of this path pattern and input
+   * prefix pattern.
    *
-   * @return true if overlapping, otherwise return false
+   * @param prefixPattern must be a prefix full path ending with one "**", like "root.sg.**"
+   * @return a list of partial paths which are the intersection of this path pattern and input
+   *     prefix pattern.
    */
-  private boolean checkOverlapWithMultiLevelWildcard(String[] nodes1, String[] nodes2) {
-    // dp[i][j] means if nodes1[0:i) and nodes[0:j) overlapping
-    boolean[][] dp = new boolean[nodes1.length + 1][nodes2.length + 1];
-    dp[0][0] = true;
-    for (int i = 1; i <= nodes1.length; i++) {
-      for (int j = 1; j <= nodes2.length; j++) {
-        if (nodes1[i - 1].equals(MULTI_LEVEL_PATH_WILDCARD)
-            || nodes2[j - 1].equals(MULTI_LEVEL_PATH_WILDCARD)) {
-          // if encounter MULTI_LEVEL_PATH_WILDCARD
-          if (nodes1[i - 1].equals(MULTI_LEVEL_PATH_WILDCARD)) {
-            // if nodes1[i-1] is MULTI_LEVEL_PATH_WILDCARD, dp[i][k(k>=j)]=dp[i-1][j-1]
-            if (dp[i - 1][j - 1]) {
-              for (int k = j; k <= nodes2.length; k++) {
-                dp[i][k] = true;
-              }
-            }
-          }
-          if (nodes2[j - 1].equals(MULTI_LEVEL_PATH_WILDCARD)) {
-            // if nodes2[j-1] is MULTI_LEVEL_PATH_WILDCARD, dp[k(k>=i)][j]=dp[i-1][j-1]
-            if (dp[i - 1][j - 1]) {
-              for (int k = i; k <= nodes1.length; k++) {
-                dp[k][j] = true;
-              }
-            }
-          }
-        } else {
-          // if without MULTI_LEVEL_PATH_WILDCARD, scan and check
-          if ((PathPatternUtil.hasWildcard(nodes1[i - 1])
-                  && PathPatternUtil.isNodeMatch(nodes1[i - 1], nodes2[j - 1]))
-              || (PathPatternUtil.hasWildcard(nodes2[j - 1])
-                  && PathPatternUtil.isNodeMatch(nodes2[j - 1], nodes1[i - 1]))
-              || nodes1[i - 1].equals(nodes2[j - 1])) {
-            // if nodes1[i-1] and nodes[2] is matched, dp[i][j] = dp[i-1][j-1]
-            dp[i][j] |= dp[i - 1][j - 1];
+  public List<PartialPath> intersectWithPrefixPattern(PartialPath prefixPattern) {
+    if (prefixPattern.equals(ALL_MATCH_PATTERN)) {
+      return Collections.singletonList(this);
+    }
+    String[] prefixFullPath =
+        Arrays.copyOf(prefixPattern.getNodes(), prefixPattern.getNodeLength() - 1);
+    // init dp array
+    int thisLength = this.getNodeLength();
+    boolean[] matchIndex = new boolean[thisLength];
+    matchIndex[0] = true; // "root" must match "root"
+
+    // dp[i][j] means if nodes[0:i) matches prefixFullPath[0:j)
+    // for example: "root.**.d1.**" intersect "root.sg1.d1(.**)"
+    // dp[i][j] = (nodes[i]=="**"&&dp[i][j-1]) || (nodes[i]==prefixFullPath[j]&&dp[i-1][j-1])
+    // 1 0 0 0 |→| 1 0 0 0 |→| 1 0 0 0
+    // 0 0 0 0 |↓| 0 1 0 0 |→| 0 1 0 0
+    // 0 0 0 0 |↓| 0 0 0 0 |↓| 0 1 1 0
+    // Since the derivation of the next row depends only on the previous row, the calculation can
+    // be performed using a one-dimensional array named "matchIndex"
+    for (int i = 1; i < prefixFullPath.length; i++) {
+      boolean[] newMatchIndex = new boolean[thisLength];
+      for (int j = 1; j < thisLength; j++) {
+        if (nodes[j].equals(MULTI_LEVEL_PATH_WILDCARD)) {
+          newMatchIndex[j] = matchIndex[j] || matchIndex[j - 1];
+        } else if (PathPatternUtil.isNodeMatch(nodes[j], prefixFullPath[i])) {
+          newMatchIndex[j] = matchIndex[j - 1];
+        }
+      }
+      matchIndex = newMatchIndex;
+    }
+    // Scan in reverse order to construct the result set.
+    // The structure of the result set is prefixFullPath+remaining nodes. 【E.g.root.sg1.d1 + **】
+    // It can be pruned if the remaining nodes start with **.
+    List<PartialPath> res = new ArrayList<>();
+    for (int j = matchIndex.length - 1; j >= 0; j--) {
+      if (matchIndex[j]) {
+        res.add(
+            new PartialPath(
+                ArrayUtils.addAll(prefixFullPath, Arrays.copyOfRange(nodes, j + 1, thisLength))));
+        if (j + 1 < thisLength && nodes[j + 1].equals(MULTI_LEVEL_PATH_WILDCARD)) {
+          break;
+        }
+        if (nodes[j].equals(MULTI_LEVEL_PATH_WILDCARD)) {
+          res.add(
+              new PartialPath(
+                  ArrayUtils.addAll(prefixFullPath, Arrays.copyOfRange(nodes, j, thisLength))));
+          if (nodes[j].equals(MULTI_LEVEL_PATH_WILDCARD)) {

Review Comment:
   must be `true`?



##########
iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/path/PartialPath.java:
##########
@@ -492,6 +497,54 @@ public boolean overlapWith(PartialPath rPath) {
     return this.nodes.length == rNodes.length;
   }
 
+  /**
+   * Try to check overlap between nodes1 and nodes2 with MULTI_LEVEL_PATH_WILDCARD. Time complexity
+   * O(n^2).
+   *
+   * @return true if overlapping, otherwise return false
+   */
+  private boolean checkOverlapWithMultiLevelWildcard(String[] nodes1, String[] nodes2) {

Review Comment:
   do we really need to scan n^2 each time? Can we return false if we've already found that these two path are never overlapped in the beginning steps, lile `root.db1.**.s1`  and `root.db2.**.s1`



##########
iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/path/PartialPath.java:
##########
@@ -533,51 +586,66 @@ public boolean overlapWithFullPathPrefix(PartialPath prefixFullPath) {
   }
 
   /**
-   * Try to check overlap between nodes1 and nodes2 with MULTI_LEVEL_PATH_WILDCARD. Time complexity
-   * O(n^2).
+   * Generate a list of partial paths which are the intersection of this path pattern and input
+   * prefix pattern.
    *
-   * @return true if overlapping, otherwise return false
+   * @param prefixPattern must be a prefix full path ending with one "**", like "root.sg.**"
+   * @return a list of partial paths which are the intersection of this path pattern and input
+   *     prefix pattern.
    */
-  private boolean checkOverlapWithMultiLevelWildcard(String[] nodes1, String[] nodes2) {
-    // dp[i][j] means if nodes1[0:i) and nodes[0:j) overlapping
-    boolean[][] dp = new boolean[nodes1.length + 1][nodes2.length + 1];
-    dp[0][0] = true;
-    for (int i = 1; i <= nodes1.length; i++) {
-      for (int j = 1; j <= nodes2.length; j++) {
-        if (nodes1[i - 1].equals(MULTI_LEVEL_PATH_WILDCARD)
-            || nodes2[j - 1].equals(MULTI_LEVEL_PATH_WILDCARD)) {
-          // if encounter MULTI_LEVEL_PATH_WILDCARD
-          if (nodes1[i - 1].equals(MULTI_LEVEL_PATH_WILDCARD)) {
-            // if nodes1[i-1] is MULTI_LEVEL_PATH_WILDCARD, dp[i][k(k>=j)]=dp[i-1][j-1]
-            if (dp[i - 1][j - 1]) {
-              for (int k = j; k <= nodes2.length; k++) {
-                dp[i][k] = true;
-              }
-            }
-          }
-          if (nodes2[j - 1].equals(MULTI_LEVEL_PATH_WILDCARD)) {
-            // if nodes2[j-1] is MULTI_LEVEL_PATH_WILDCARD, dp[k(k>=i)][j]=dp[i-1][j-1]
-            if (dp[i - 1][j - 1]) {
-              for (int k = i; k <= nodes1.length; k++) {
-                dp[k][j] = true;
-              }
-            }
-          }
-        } else {
-          // if without MULTI_LEVEL_PATH_WILDCARD, scan and check
-          if ((PathPatternUtil.hasWildcard(nodes1[i - 1])
-                  && PathPatternUtil.isNodeMatch(nodes1[i - 1], nodes2[j - 1]))
-              || (PathPatternUtil.hasWildcard(nodes2[j - 1])
-                  && PathPatternUtil.isNodeMatch(nodes2[j - 1], nodes1[i - 1]))
-              || nodes1[i - 1].equals(nodes2[j - 1])) {
-            // if nodes1[i-1] and nodes[2] is matched, dp[i][j] = dp[i-1][j-1]
-            dp[i][j] |= dp[i - 1][j - 1];
+  public List<PartialPath> intersectWithPrefixPattern(PartialPath prefixPattern) {
+    if (prefixPattern.equals(ALL_MATCH_PATTERN)) {
+      return Collections.singletonList(this);
+    }
+    String[] prefixFullPath =
+        Arrays.copyOf(prefixPattern.getNodes(), prefixPattern.getNodeLength() - 1);
+    // init dp array
+    int thisLength = this.getNodeLength();
+    boolean[] matchIndex = new boolean[thisLength];
+    matchIndex[0] = true; // "root" must match "root"
+
+    // dp[i][j] means if nodes[0:i) matches prefixFullPath[0:j)
+    // for example: "root.**.d1.**" intersect "root.sg1.d1(.**)"
+    // dp[i][j] = (nodes[i]=="**"&&dp[i][j-1]) || (nodes[i]==prefixFullPath[j]&&dp[i-1][j-1])
+    // 1 0 0 0 |→| 1 0 0 0 |→| 1 0 0 0
+    // 0 0 0 0 |↓| 0 1 0 0 |→| 0 1 0 0
+    // 0 0 0 0 |↓| 0 0 0 0 |↓| 0 1 1 0
+    // Since the derivation of the next row depends only on the previous row, the calculation can
+    // be performed using a one-dimensional array named "matchIndex"
+    for (int i = 1; i < prefixFullPath.length; i++) {
+      boolean[] newMatchIndex = new boolean[thisLength];
+      for (int j = 1; j < thisLength; j++) {
+        if (nodes[j].equals(MULTI_LEVEL_PATH_WILDCARD)) {
+          newMatchIndex[j] = matchIndex[j] || matchIndex[j - 1];
+        } else if (PathPatternUtil.isNodeMatch(nodes[j], prefixFullPath[i])) {
+          newMatchIndex[j] = matchIndex[j - 1];
+        }
+      }
+      matchIndex = newMatchIndex;

Review Comment:
   no need to new an array, just update the origin one.



##########
iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/path/PartialPath.java:
##########
@@ -533,51 +586,66 @@ public boolean overlapWithFullPathPrefix(PartialPath prefixFullPath) {
   }
 
   /**
-   * Try to check overlap between nodes1 and nodes2 with MULTI_LEVEL_PATH_WILDCARD. Time complexity
-   * O(n^2).
+   * Generate a list of partial paths which are the intersection of this path pattern and input
+   * prefix pattern.
    *
-   * @return true if overlapping, otherwise return false
+   * @param prefixPattern must be a prefix full path ending with one "**", like "root.sg.**"
+   * @return a list of partial paths which are the intersection of this path pattern and input
+   *     prefix pattern.
    */
-  private boolean checkOverlapWithMultiLevelWildcard(String[] nodes1, String[] nodes2) {
-    // dp[i][j] means if nodes1[0:i) and nodes[0:j) overlapping
-    boolean[][] dp = new boolean[nodes1.length + 1][nodes2.length + 1];
-    dp[0][0] = true;
-    for (int i = 1; i <= nodes1.length; i++) {
-      for (int j = 1; j <= nodes2.length; j++) {
-        if (nodes1[i - 1].equals(MULTI_LEVEL_PATH_WILDCARD)
-            || nodes2[j - 1].equals(MULTI_LEVEL_PATH_WILDCARD)) {
-          // if encounter MULTI_LEVEL_PATH_WILDCARD
-          if (nodes1[i - 1].equals(MULTI_LEVEL_PATH_WILDCARD)) {
-            // if nodes1[i-1] is MULTI_LEVEL_PATH_WILDCARD, dp[i][k(k>=j)]=dp[i-1][j-1]
-            if (dp[i - 1][j - 1]) {
-              for (int k = j; k <= nodes2.length; k++) {
-                dp[i][k] = true;
-              }
-            }
-          }
-          if (nodes2[j - 1].equals(MULTI_LEVEL_PATH_WILDCARD)) {
-            // if nodes2[j-1] is MULTI_LEVEL_PATH_WILDCARD, dp[k(k>=i)][j]=dp[i-1][j-1]
-            if (dp[i - 1][j - 1]) {
-              for (int k = i; k <= nodes1.length; k++) {
-                dp[k][j] = true;
-              }
-            }
-          }
-        } else {
-          // if without MULTI_LEVEL_PATH_WILDCARD, scan and check
-          if ((PathPatternUtil.hasWildcard(nodes1[i - 1])
-                  && PathPatternUtil.isNodeMatch(nodes1[i - 1], nodes2[j - 1]))
-              || (PathPatternUtil.hasWildcard(nodes2[j - 1])
-                  && PathPatternUtil.isNodeMatch(nodes2[j - 1], nodes1[i - 1]))
-              || nodes1[i - 1].equals(nodes2[j - 1])) {
-            // if nodes1[i-1] and nodes[2] is matched, dp[i][j] = dp[i-1][j-1]
-            dp[i][j] |= dp[i - 1][j - 1];
+  public List<PartialPath> intersectWithPrefixPattern(PartialPath prefixPattern) {
+    if (prefixPattern.equals(ALL_MATCH_PATTERN)) {
+      return Collections.singletonList(this);
+    }
+    String[] prefixFullPath =
+        Arrays.copyOf(prefixPattern.getNodes(), prefixPattern.getNodeLength() - 1);
+    // init dp array
+    int thisLength = this.getNodeLength();
+    boolean[] matchIndex = new boolean[thisLength];
+    matchIndex[0] = true; // "root" must match "root"
+
+    // dp[i][j] means if nodes[0:i) matches prefixFullPath[0:j)
+    // for example: "root.**.d1.**" intersect "root.sg1.d1(.**)"
+    // dp[i][j] = (nodes[i]=="**"&&dp[i][j-1]) || (nodes[i]==prefixFullPath[j]&&dp[i-1][j-1])

Review Comment:
   ```suggestion
       // dp[i][j] = (nodes[i]=="**"&&dp[i][j-1]) || (nodes[i] matchs prefixFullPath[j]&&dp[i-1][j-1])
   ```



##########
iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/path/PartialPath.java:
##########
@@ -533,51 +586,66 @@ public boolean overlapWithFullPathPrefix(PartialPath prefixFullPath) {
   }
 
   /**
-   * Try to check overlap between nodes1 and nodes2 with MULTI_LEVEL_PATH_WILDCARD. Time complexity
-   * O(n^2).
+   * Generate a list of partial paths which are the intersection of this path pattern and input
+   * prefix pattern.
    *
-   * @return true if overlapping, otherwise return false
+   * @param prefixPattern must be a prefix full path ending with one "**", like "root.sg.**"
+   * @return a list of partial paths which are the intersection of this path pattern and input
+   *     prefix pattern.
    */
-  private boolean checkOverlapWithMultiLevelWildcard(String[] nodes1, String[] nodes2) {
-    // dp[i][j] means if nodes1[0:i) and nodes[0:j) overlapping
-    boolean[][] dp = new boolean[nodes1.length + 1][nodes2.length + 1];
-    dp[0][0] = true;
-    for (int i = 1; i <= nodes1.length; i++) {
-      for (int j = 1; j <= nodes2.length; j++) {
-        if (nodes1[i - 1].equals(MULTI_LEVEL_PATH_WILDCARD)
-            || nodes2[j - 1].equals(MULTI_LEVEL_PATH_WILDCARD)) {
-          // if encounter MULTI_LEVEL_PATH_WILDCARD
-          if (nodes1[i - 1].equals(MULTI_LEVEL_PATH_WILDCARD)) {
-            // if nodes1[i-1] is MULTI_LEVEL_PATH_WILDCARD, dp[i][k(k>=j)]=dp[i-1][j-1]
-            if (dp[i - 1][j - 1]) {
-              for (int k = j; k <= nodes2.length; k++) {
-                dp[i][k] = true;
-              }
-            }
-          }
-          if (nodes2[j - 1].equals(MULTI_LEVEL_PATH_WILDCARD)) {
-            // if nodes2[j-1] is MULTI_LEVEL_PATH_WILDCARD, dp[k(k>=i)][j]=dp[i-1][j-1]
-            if (dp[i - 1][j - 1]) {
-              for (int k = i; k <= nodes1.length; k++) {
-                dp[k][j] = true;
-              }
-            }
-          }
-        } else {
-          // if without MULTI_LEVEL_PATH_WILDCARD, scan and check
-          if ((PathPatternUtil.hasWildcard(nodes1[i - 1])
-                  && PathPatternUtil.isNodeMatch(nodes1[i - 1], nodes2[j - 1]))
-              || (PathPatternUtil.hasWildcard(nodes2[j - 1])
-                  && PathPatternUtil.isNodeMatch(nodes2[j - 1], nodes1[i - 1]))
-              || nodes1[i - 1].equals(nodes2[j - 1])) {
-            // if nodes1[i-1] and nodes[2] is matched, dp[i][j] = dp[i-1][j-1]
-            dp[i][j] |= dp[i - 1][j - 1];
+  public List<PartialPath> intersectWithPrefixPattern(PartialPath prefixPattern) {
+    if (prefixPattern.equals(ALL_MATCH_PATTERN)) {
+      return Collections.singletonList(this);
+    }
+    String[] prefixFullPath =
+        Arrays.copyOf(prefixPattern.getNodes(), prefixPattern.getNodeLength() - 1);
+    // init dp array
+    int thisLength = this.getNodeLength();
+    boolean[] matchIndex = new boolean[thisLength];
+    matchIndex[0] = true; // "root" must match "root"
+
+    // dp[i][j] means if nodes[0:i) matches prefixFullPath[0:j)
+    // for example: "root.**.d1.**" intersect "root.sg1.d1(.**)"
+    // dp[i][j] = (nodes[i]=="**"&&dp[i][j-1]) || (nodes[i]==prefixFullPath[j]&&dp[i-1][j-1])
+    // 1 0 0 0 |→| 1 0 0 0 |→| 1 0 0 0
+    // 0 0 0 0 |↓| 0 1 0 0 |→| 0 1 0 0
+    // 0 0 0 0 |↓| 0 0 0 0 |↓| 0 1 1 0
+    // Since the derivation of the next row depends only on the previous row, the calculation can
+    // be performed using a one-dimensional array named "matchIndex"
+    for (int i = 1; i < prefixFullPath.length; i++) {
+      boolean[] newMatchIndex = new boolean[thisLength];
+      for (int j = 1; j < thisLength; j++) {
+        if (nodes[j].equals(MULTI_LEVEL_PATH_WILDCARD)) {
+          newMatchIndex[j] = matchIndex[j] || matchIndex[j - 1];
+        } else if (PathPatternUtil.isNodeMatch(nodes[j], prefixFullPath[i])) {
+          newMatchIndex[j] = matchIndex[j - 1];
+        }
+      }
+      matchIndex = newMatchIndex;
+    }
+    // Scan in reverse order to construct the result set.
+    // The structure of the result set is prefixFullPath+remaining nodes. 【E.g.root.sg1.d1 + **】
+    // It can be pruned if the remaining nodes start with **.
+    List<PartialPath> res = new ArrayList<>();
+    for (int j = matchIndex.length - 1; j >= 0; j--) {
+      if (matchIndex[j]) {
+        res.add(
+            new PartialPath(
+                ArrayUtils.addAll(prefixFullPath, Arrays.copyOfRange(nodes, j + 1, thisLength))));

Review Comment:
   what if j + 1 >= matchIndex.length? Actually, I think j can be initialized to matchIndex.length - 2



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscribe@iotdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org