You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iotdb.apache.org by ka...@apache.org on 2021/05/15 06:51:38 UTC
[iotdb] branch f_index_dev updated: optimize etree disk,
add index log
This is an automated email from the ASF dual-hosted git repository.
kangrong pushed a commit to branch f_index_dev
in repository https://gitbox.apache.org/repos/asf/iotdb.git
The following commit(s) were added to refs/heads/f_index_dev by this push:
new 7739e83 optimize etree disk, add index log
7739e83 is described below
commit 7739e8374f92cdb6cffecf78d2479fe2d39a3959
Author: kr11 <3095717866.com>
AuthorDate: Sat May 15 14:50:53 2021 +0800
optimize etree disk, add index log
---
.../iotdb/db/index/algorithm/RTreeIndex.java | 11 +++--
.../iotdb/db/index/algorithm/elb/ELBIndex.java | 10 +++-
.../iotdb/db/index/algorithm/mmhh/MMHHIndex.java | 8 ++++
.../iotdb/db/index/algorithm/rtree/RTree.java | 56 +++++++++++++++++-----
.../iotdb/db/index/algorithm/rtree/RTreeTest.java | 5 +-
.../apache/iotdb/db/index/it/DemoELBWindIT.java | 2 +-
6 files changed, 72 insertions(+), 20 deletions(-)
diff --git a/server/src/main/java/org/apache/iotdb/db/index/algorithm/RTreeIndex.java b/server/src/main/java/org/apache/iotdb/db/index/algorithm/RTreeIndex.java
index 0f9d933..b9b7e1f 100644
--- a/server/src/main/java/org/apache/iotdb/db/index/algorithm/RTreeIndex.java
+++ b/server/src/main/java/org/apache/iotdb/db/index/algorithm/RTreeIndex.java
@@ -51,6 +51,7 @@ import org.apache.iotdb.tsfile.read.reader.IBatchReader;
import org.apache.iotdb.tsfile.utils.Pair;
import org.apache.iotdb.tsfile.utils.ReadWriteIOUtils;
+import org.apache.commons.io.FileUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -162,7 +163,8 @@ public abstract class RTreeIndex extends IoTDBIndex {
return null;
}
});
- logger.info("Deserialize RTreeIndex rTree: {}", rTree.toString().substring(0, 10));
+ // logger.info("Deserialize RTreeIndex rTree: {}", rTree.toString().substring(0, 10));
+ logger.info("Deserialize RTreeIndex rTree: {}", rTree);
logger.info("Deserialize InvolvedSet: {}, {}", involvedPathSet.size(), involvedPathSet);
} catch (IOException e) {
logger.error("Error when deserialize ELB features. Given up.", e);
@@ -172,8 +174,10 @@ public abstract class RTreeIndex extends IoTDBIndex {
@Override
public void serializeIndex() {
logger.info("RTreeIndex {} starts serialization", indexSeries);
- logger.info("RTreeIndex RTree to serialized: {}", rTree.toString().substring(0, 10));
- logger.info("Serialize InvolvedSet: {}, {}", involvedPathSet.size(), involvedPathSet);
+ // logger.info("RTreeIndex RTree to serialized: {}", rTree.toString().substring(0, 10));
+ logger.info("RTreeIndex RTree to serialized: {}", rTree);
+ logger.info("RTreeIndex RTree to serialized: {}", rTree.toDetailedString());
+ logger.info("Serialize InvolvedSet: {}", involvedPathSet.size());
try (OutputStream outputStream = new FileOutputStream(featureFile)) {
// out is outputStream exactly. It seems redundant, but it would be really weird if the second
// parameter "serializeItem" doesn't input an outputStream.
@@ -187,6 +191,7 @@ public abstract class RTreeIndex extends IoTDBIndex {
e.printStackTrace();
}
});
+ System.out.println("rtree file size: " + FileUtils.sizeOf(featureFile));
} catch (IOException e) {
logger.error("Error when serialize router. Given up.", e);
}
diff --git a/server/src/main/java/org/apache/iotdb/db/index/algorithm/elb/ELBIndex.java b/server/src/main/java/org/apache/iotdb/db/index/algorithm/elb/ELBIndex.java
index 8335e66..15c395d 100644
--- a/server/src/main/java/org/apache/iotdb/db/index/algorithm/elb/ELBIndex.java
+++ b/server/src/main/java/org/apache/iotdb/db/index/algorithm/elb/ELBIndex.java
@@ -50,6 +50,7 @@ import org.apache.iotdb.tsfile.read.reader.IBatchReader;
import org.apache.iotdb.tsfile.utils.Pair;
import org.apache.iotdb.tsfile.utils.ReadWriteIOUtils;
+import org.apache.commons.io.FileUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -191,6 +192,12 @@ public class ELBIndex extends IoTDBIndex {
ReadWriteIOUtils.write(features.feature, outputStream);
}
// usableBlocks.clearAndRelease();
+ long expectSize = 4L + windowBlockFeatures.size() * (8 + 8 + 8);
+ System.out.println(
+ String.format(
+ "calc size: %d=4L + block_size(%d) * (8 + 8 + 8)",
+ expectSize, windowBlockFeatures.size()));
+ System.out.println("hashtable file size: " + FileUtils.sizeOf(featureFile));
} catch (IOException e) {
logger.error("Error when serialize router. Given up.", e);
}
@@ -394,8 +401,9 @@ public class ELBIndex extends IoTDBIndex {
// thres_sql ** p = (thres_avg ** p) * len(subpattern)
// thres_avg = [(thres_sql ** p) / len(subpattern)] ** 1/p
- if (struct.distance instanceof LInfinityNormdouble)
+ if (struct.distance instanceof LInfinityNormdouble) {
throw new IllegalIndexParamException("L-inf not supported yet");
+ }
double p = struct.distance.getP();
struct.thresholds[i] = Math.pow(Math.pow(thresholdList.get(i), p) / pattern.length, 1 / p);
// struct.thresholds[i] = thresholdList.get(i);
diff --git a/server/src/main/java/org/apache/iotdb/db/index/algorithm/mmhh/MMHHIndex.java b/server/src/main/java/org/apache/iotdb/db/index/algorithm/mmhh/MMHHIndex.java
index 2d20a73..474cfa7 100644
--- a/server/src/main/java/org/apache/iotdb/db/index/algorithm/mmhh/MMHHIndex.java
+++ b/server/src/main/java/org/apache/iotdb/db/index/algorithm/mmhh/MMHHIndex.java
@@ -45,6 +45,7 @@ import org.apache.iotdb.tsfile.utils.Pair;
import org.apache.iotdb.tsfile.utils.ReadWriteIOUtils;
import ai.djl.MalformedModelException;
+import org.apache.commons.io.FileUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -221,6 +222,13 @@ public class MMHHIndex extends IoTDBIndex {
ReadWriteIOUtils.write(v, outputStream);
}
}
+ System.out.println("hashtable bucket size: " + hashLookupTable.size());
+ long expectSize = 4L + hashLookupTable.size() * (8 + 4) + itemSize * 8;
+ System.out.println(
+ String.format(
+ "calc size: %d=4L + bucket_size(%d) * (8 + 4) + itemSize(%d) * 8",
+ expectSize, hashLookupTable.size(), itemSize));
+ System.out.println("hashtable file size: " + FileUtils.sizeOf(featureFile));
} catch (IOException e) {
logger.error("Error when serialize router. Given up.", e);
}
diff --git a/server/src/main/java/org/apache/iotdb/db/index/algorithm/rtree/RTree.java b/server/src/main/java/org/apache/iotdb/db/index/algorithm/rtree/RTree.java
index 04eeed2..ae62f45 100644
--- a/server/src/main/java/org/apache/iotdb/db/index/algorithm/rtree/RTree.java
+++ b/server/src/main/java/org/apache/iotdb/db/index/algorithm/rtree/RTree.java
@@ -54,9 +54,9 @@ public class RTree<T> {
private static final Logger logger = LoggerFactory.getLogger(RTree.class);
- private static final int INNER_NODE = 0;
- private static final int LEAF_NODE = 1;
- private static final int ITEM = 2;
+ private static final short INNER_NODE = 0;
+ private static final short LEAF_NODE = 1;
+ private static final short ITEM = 2;
private static final float VAGUE_ERROR = 0.0001f;
final int dim;
@@ -418,8 +418,10 @@ public class RTree<T> {
for (float lb : node.lbs) {
ReadWriteIOUtils.write(lb, outputStream);
}
- for (float ub : node.ubs) {
- ReadWriteIOUtils.write(ub, outputStream);
+ if (!(node instanceof Item)) {
+ for (float ub : node.ubs) {
+ ReadWriteIOUtils.write(ub, outputStream);
+ }
}
if (node instanceof Item) {
T value = ((Item<T>) node).v;
@@ -458,14 +460,18 @@ public class RTree<T> {
InputStream inputStream,
Function<InputStream, T> deserializeItemFunc)
throws IOException {
- int nodeType = ReadWriteIOUtils.readInt(inputStream);
+ short nodeType = ReadWriteIOUtils.readShort(inputStream);
float[] lbs = new float[rTree.dim];
float[] ubs = new float[rTree.dim];
for (int i = 0; i < rTree.dim; i++) {
lbs[i] = ReadWriteIOUtils.readFloat(inputStream);
}
- for (int i = 0; i < rTree.dim; i++) {
- ubs[i] = ReadWriteIOUtils.readFloat(inputStream);
+ if (nodeType == ITEM) {
+ System.arraycopy(lbs, 0, ubs, 0, rTree.dim);
+ } else {
+ for (int i = 0; i < rTree.dim; i++) {
+ ubs[i] = ReadWriteIOUtils.readFloat(inputStream);
+ }
}
RNode node;
if (nodeType == ITEM) {
@@ -492,14 +498,38 @@ public class RTree<T> {
sb.append(String.format("nMax:%d,", nMaxPerNode));
sb.append(String.format("nMin:%d,", nMinPerNode));
sb.append(String.format("dim:%d,", dim));
- sb.append(String.format("seedsPicker:%s%n", seedsPicker));
- if (root == null) {
- return sb.toString();
- }
- toString(root, 0, sb);
+ sb.append(String.format("seedsPicker:%s", seedsPicker));
+ int[] countNodes = {0, 0, 0}; // inner, leaf, item
+ statRTree(countNodes, root);
+ sb.append(
+ String.format("inner:%d,leaf:%d,item:%d;%n ", countNodes[0], countNodes[1], countNodes[2]));
+ long expectSize =
+ 4 * 3
+ + 2
+ + (countNodes[0] + countNodes[1]) * (2L * dim * 4 + 2 + 4)
+ + countNodes[2] * (dim * 4 + 2 + 8);
+ sb.append(
+ String.format(
+ "calc size: %d=4*3+2+(#inner(%d)+#leaf(%d)) * (2*dim(%d)*4+2+4) + #item(%d) * (dim(%d)*4 + 2 + 8)",
+ expectSize, countNodes[0], countNodes[1], dim, countNodes[2], dim));
return sb.toString();
}
+ private void statRTree(int[] countNodes, RNode node) {
+ if (node instanceof Item) {
+ countNodes[2]++;
+ } else {
+ if (node.isLeaf) {
+ countNodes[1]++;
+ } else {
+ countNodes[0]++;
+ }
+ for (RNode child : node.children) {
+ statRTree(countNodes, child);
+ }
+ }
+ }
+
public String toDetailedString() {
StringBuilder sb = new StringBuilder();
sb.append(String.format("nMax:%d,", nMaxPerNode));
diff --git a/server/src/test/java/org/apache/iotdb/db/index/algorithm/rtree/RTreeTest.java b/server/src/test/java/org/apache/iotdb/db/index/algorithm/rtree/RTreeTest.java
index d3d41ef..5fb6560 100644
--- a/server/src/test/java/org/apache/iotdb/db/index/algorithm/rtree/RTreeTest.java
+++ b/server/src/test/java/org/apache/iotdb/db/index/algorithm/rtree/RTreeTest.java
@@ -130,9 +130,10 @@ public class RTreeTest {
if (!checkRTree(rTree)) {
fail();
}
+ System.out.println(rTree.toDetailedString());
}
- System.out.println(rTree);
- Assert.assertEquals(gt, rTree.toString());
+ System.out.println(rTree.toDetailedString());
+ Assert.assertEquals(gt, rTree.toDetailedString());
}
/**
diff --git a/server/src/test/java/org/apache/iotdb/db/index/it/DemoELBWindIT.java b/server/src/test/java/org/apache/iotdb/db/index/it/DemoELBWindIT.java
index e6c9df8..b3d7371 100644
--- a/server/src/test/java/org/apache/iotdb/db/index/it/DemoELBWindIT.java
+++ b/server/src/test/java/org/apache/iotdb/db/index/it/DemoELBWindIT.java
@@ -153,7 +153,7 @@ public class DemoELBWindIT {
ZoneId.systemDefault()),
subInput.getFloat(i));
statement.execute(insertSQL);
- System.out.println(insertSQL);
+ // System.out.println(insertSQL);
}
statement.execute("flush");
// System.out.println("==========================");