You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iotdb.apache.org by ka...@apache.org on 2021/05/15 06:51:38 UTC

[iotdb] branch f_index_dev updated: optimize etree disk, add index log

This is an automated email from the ASF dual-hosted git repository.

kangrong pushed a commit to branch f_index_dev
in repository https://gitbox.apache.org/repos/asf/iotdb.git


The following commit(s) were added to refs/heads/f_index_dev by this push:
     new 7739e83  optimize etree disk, add index log
7739e83 is described below

commit 7739e8374f92cdb6cffecf78d2479fe2d39a3959
Author: kr11 <3095717866.com>
AuthorDate: Sat May 15 14:50:53 2021 +0800

    optimize etree disk, add index log
---
 .../iotdb/db/index/algorithm/RTreeIndex.java       | 11 +++--
 .../iotdb/db/index/algorithm/elb/ELBIndex.java     | 10 +++-
 .../iotdb/db/index/algorithm/mmhh/MMHHIndex.java   |  8 ++++
 .../iotdb/db/index/algorithm/rtree/RTree.java      | 56 +++++++++++++++++-----
 .../iotdb/db/index/algorithm/rtree/RTreeTest.java  |  5 +-
 .../apache/iotdb/db/index/it/DemoELBWindIT.java    |  2 +-
 6 files changed, 72 insertions(+), 20 deletions(-)

diff --git a/server/src/main/java/org/apache/iotdb/db/index/algorithm/RTreeIndex.java b/server/src/main/java/org/apache/iotdb/db/index/algorithm/RTreeIndex.java
index 0f9d933..b9b7e1f 100644
--- a/server/src/main/java/org/apache/iotdb/db/index/algorithm/RTreeIndex.java
+++ b/server/src/main/java/org/apache/iotdb/db/index/algorithm/RTreeIndex.java
@@ -51,6 +51,7 @@ import org.apache.iotdb.tsfile.read.reader.IBatchReader;
 import org.apache.iotdb.tsfile.utils.Pair;
 import org.apache.iotdb.tsfile.utils.ReadWriteIOUtils;
 
+import org.apache.commons.io.FileUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -162,7 +163,8 @@ public abstract class RTreeIndex extends IoTDBIndex {
                   return null;
                 }
               });
-      logger.info("Deserialize RTreeIndex rTree: {}", rTree.toString().substring(0, 10));
+      //      logger.info("Deserialize RTreeIndex rTree: {}", rTree.toString().substring(0, 10));
+      logger.info("Deserialize RTreeIndex rTree: {}", rTree);
       logger.info("Deserialize InvolvedSet: {}, {}", involvedPathSet.size(), involvedPathSet);
     } catch (IOException e) {
       logger.error("Error when deserialize ELB features. Given up.", e);
@@ -172,8 +174,10 @@ public abstract class RTreeIndex extends IoTDBIndex {
   @Override
   public void serializeIndex() {
     logger.info("RTreeIndex {} starts serialization", indexSeries);
-    logger.info("RTreeIndex RTree to serialized: {}", rTree.toString().substring(0, 10));
-    logger.info("Serialize InvolvedSet: {}, {}", involvedPathSet.size(), involvedPathSet);
+    //    logger.info("RTreeIndex RTree to serialized: {}", rTree.toString().substring(0, 10));
+    logger.info("RTreeIndex RTree to serialized: {}", rTree);
+    logger.info("RTreeIndex RTree to serialized: {}", rTree.toDetailedString());
+    logger.info("Serialize InvolvedSet: {}", involvedPathSet.size());
     try (OutputStream outputStream = new FileOutputStream(featureFile)) {
       // out is outputStream exactly. It seems redundant, but it would be really weird if the second
       // parameter "serializeItem" doesn't input an outputStream.
@@ -187,6 +191,7 @@ public abstract class RTreeIndex extends IoTDBIndex {
               e.printStackTrace();
             }
           });
+      System.out.println("rtree file size: " + FileUtils.sizeOf(featureFile));
     } catch (IOException e) {
       logger.error("Error when serialize router. Given up.", e);
     }
diff --git a/server/src/main/java/org/apache/iotdb/db/index/algorithm/elb/ELBIndex.java b/server/src/main/java/org/apache/iotdb/db/index/algorithm/elb/ELBIndex.java
index 8335e66..15c395d 100644
--- a/server/src/main/java/org/apache/iotdb/db/index/algorithm/elb/ELBIndex.java
+++ b/server/src/main/java/org/apache/iotdb/db/index/algorithm/elb/ELBIndex.java
@@ -50,6 +50,7 @@ import org.apache.iotdb.tsfile.read.reader.IBatchReader;
 import org.apache.iotdb.tsfile.utils.Pair;
 import org.apache.iotdb.tsfile.utils.ReadWriteIOUtils;
 
+import org.apache.commons.io.FileUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -191,6 +192,12 @@ public class ELBIndex extends IoTDBIndex {
         ReadWriteIOUtils.write(features.feature, outputStream);
       }
       //      usableBlocks.clearAndRelease();
+      long expectSize = 4L + windowBlockFeatures.size() * (8 + 8 + 8);
+      System.out.println(
+          String.format(
+              "calc size: %d=4L + block_size(%d) * (8 + 8 + 8)",
+              expectSize, windowBlockFeatures.size()));
+      System.out.println("hashtable file size: " + FileUtils.sizeOf(featureFile));
     } catch (IOException e) {
       logger.error("Error when serialize router. Given up.", e);
     }
@@ -394,8 +401,9 @@ public class ELBIndex extends IoTDBIndex {
       // thres_sql ** p = (thres_avg ** p) * len(subpattern)
       // thres_avg = [(thres_sql ** p) / len(subpattern)] ** 1/p
 
-      if (struct.distance instanceof LInfinityNormdouble)
+      if (struct.distance instanceof LInfinityNormdouble) {
         throw new IllegalIndexParamException("L-inf not supported yet");
+      }
       double p = struct.distance.getP();
       struct.thresholds[i] = Math.pow(Math.pow(thresholdList.get(i), p) / pattern.length, 1 / p);
       //      struct.thresholds[i] = thresholdList.get(i);
diff --git a/server/src/main/java/org/apache/iotdb/db/index/algorithm/mmhh/MMHHIndex.java b/server/src/main/java/org/apache/iotdb/db/index/algorithm/mmhh/MMHHIndex.java
index 2d20a73..474cfa7 100644
--- a/server/src/main/java/org/apache/iotdb/db/index/algorithm/mmhh/MMHHIndex.java
+++ b/server/src/main/java/org/apache/iotdb/db/index/algorithm/mmhh/MMHHIndex.java
@@ -45,6 +45,7 @@ import org.apache.iotdb.tsfile.utils.Pair;
 import org.apache.iotdb.tsfile.utils.ReadWriteIOUtils;
 
 import ai.djl.MalformedModelException;
+import org.apache.commons.io.FileUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -221,6 +222,13 @@ public class MMHHIndex extends IoTDBIndex {
           ReadWriteIOUtils.write(v, outputStream);
         }
       }
+      System.out.println("hashtable bucket size: " + hashLookupTable.size());
+      long expectSize = 4L + hashLookupTable.size() * (8 + 4) + itemSize * 8;
+      System.out.println(
+          String.format(
+              "calc size: %d=4L + bucket_size(%d) * (8 + 4) + itemSize(%d) * 8",
+              expectSize, hashLookupTable.size(), itemSize));
+      System.out.println("hashtable file size: " + FileUtils.sizeOf(featureFile));
     } catch (IOException e) {
       logger.error("Error when serialize router. Given up.", e);
     }
diff --git a/server/src/main/java/org/apache/iotdb/db/index/algorithm/rtree/RTree.java b/server/src/main/java/org/apache/iotdb/db/index/algorithm/rtree/RTree.java
index 04eeed2..ae62f45 100644
--- a/server/src/main/java/org/apache/iotdb/db/index/algorithm/rtree/RTree.java
+++ b/server/src/main/java/org/apache/iotdb/db/index/algorithm/rtree/RTree.java
@@ -54,9 +54,9 @@ public class RTree<T> {
 
   private static final Logger logger = LoggerFactory.getLogger(RTree.class);
 
-  private static final int INNER_NODE = 0;
-  private static final int LEAF_NODE = 1;
-  private static final int ITEM = 2;
+  private static final short INNER_NODE = 0;
+  private static final short LEAF_NODE = 1;
+  private static final short ITEM = 2;
   private static final float VAGUE_ERROR = 0.0001f;
 
   final int dim;
@@ -418,8 +418,10 @@ public class RTree<T> {
     for (float lb : node.lbs) {
       ReadWriteIOUtils.write(lb, outputStream);
     }
-    for (float ub : node.ubs) {
-      ReadWriteIOUtils.write(ub, outputStream);
+    if (!(node instanceof Item)) {
+      for (float ub : node.ubs) {
+        ReadWriteIOUtils.write(ub, outputStream);
+      }
     }
     if (node instanceof Item) {
       T value = ((Item<T>) node).v;
@@ -458,14 +460,18 @@ public class RTree<T> {
       InputStream inputStream,
       Function<InputStream, T> deserializeItemFunc)
       throws IOException {
-    int nodeType = ReadWriteIOUtils.readInt(inputStream);
+    short nodeType = ReadWriteIOUtils.readShort(inputStream);
     float[] lbs = new float[rTree.dim];
     float[] ubs = new float[rTree.dim];
     for (int i = 0; i < rTree.dim; i++) {
       lbs[i] = ReadWriteIOUtils.readFloat(inputStream);
     }
-    for (int i = 0; i < rTree.dim; i++) {
-      ubs[i] = ReadWriteIOUtils.readFloat(inputStream);
+    if (nodeType == ITEM) {
+      System.arraycopy(lbs, 0, ubs, 0, rTree.dim);
+    } else {
+      for (int i = 0; i < rTree.dim; i++) {
+        ubs[i] = ReadWriteIOUtils.readFloat(inputStream);
+      }
     }
     RNode node;
     if (nodeType == ITEM) {
@@ -492,14 +498,38 @@ public class RTree<T> {
     sb.append(String.format("nMax:%d,", nMaxPerNode));
     sb.append(String.format("nMin:%d,", nMinPerNode));
     sb.append(String.format("dim:%d,", dim));
-    sb.append(String.format("seedsPicker:%s%n", seedsPicker));
-    if (root == null) {
-      return sb.toString();
-    }
-    toString(root, 0, sb);
+    sb.append(String.format("seedsPicker:%s", seedsPicker));
+    int[] countNodes = {0, 0, 0}; // inner, leaf, item
+    statRTree(countNodes, root);
+    sb.append(
+        String.format("inner:%d,leaf:%d,item:%d;%n ", countNodes[0], countNodes[1], countNodes[2]));
+    long expectSize =
+        4 * 3
+            + 2
+            + (countNodes[0] + countNodes[1]) * (2L * dim * 4 + 2 + 4)
+            + countNodes[2] * (dim * 4 + 2 + 8);
+    sb.append(
+        String.format(
+            "calc size: %d=4*3+2+(#inner(%d)+#leaf(%d)) * (2*dim(%d)*4+2+4) + #item(%d) * (dim(%d)*4 + 2 + 8)",
+            expectSize, countNodes[0], countNodes[1], dim, countNodes[2], dim));
     return sb.toString();
   }
 
+  private void statRTree(int[] countNodes, RNode node) {
+    if (node instanceof Item) {
+      countNodes[2]++;
+    } else {
+      if (node.isLeaf) {
+        countNodes[1]++;
+      } else {
+        countNodes[0]++;
+      }
+      for (RNode child : node.children) {
+        statRTree(countNodes, child);
+      }
+    }
+  }
+
   public String toDetailedString() {
     StringBuilder sb = new StringBuilder();
     sb.append(String.format("nMax:%d,", nMaxPerNode));
diff --git a/server/src/test/java/org/apache/iotdb/db/index/algorithm/rtree/RTreeTest.java b/server/src/test/java/org/apache/iotdb/db/index/algorithm/rtree/RTreeTest.java
index d3d41ef..5fb6560 100644
--- a/server/src/test/java/org/apache/iotdb/db/index/algorithm/rtree/RTreeTest.java
+++ b/server/src/test/java/org/apache/iotdb/db/index/algorithm/rtree/RTreeTest.java
@@ -130,9 +130,10 @@ public class RTreeTest {
       if (!checkRTree(rTree)) {
         fail();
       }
+      System.out.println(rTree.toDetailedString());
     }
-    System.out.println(rTree);
-    Assert.assertEquals(gt, rTree.toString());
+    System.out.println(rTree.toDetailedString());
+    Assert.assertEquals(gt, rTree.toDetailedString());
   }
 
   /**
diff --git a/server/src/test/java/org/apache/iotdb/db/index/it/DemoELBWindIT.java b/server/src/test/java/org/apache/iotdb/db/index/it/DemoELBWindIT.java
index e6c9df8..b3d7371 100644
--- a/server/src/test/java/org/apache/iotdb/db/index/it/DemoELBWindIT.java
+++ b/server/src/test/java/org/apache/iotdb/db/index/it/DemoELBWindIT.java
@@ -153,7 +153,7 @@ public class DemoELBWindIT {
                     ZoneId.systemDefault()),
                 subInput.getFloat(i));
         statement.execute(insertSQL);
-        System.out.println(insertSQL);
+        //        System.out.println(insertSQL);
       }
       statement.execute("flush");
       //      System.out.println("==========================");