You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@iotdb.apache.org by GitBox <gi...@apache.org> on 2021/07/26 09:43:40 UTC

[GitHub] [iotdb] wangchao316 commented on a change in pull request #3627: [IOTDB-1517] Refactor TsFile Index for Vector(multi-variable timeseries)

wangchao316 commented on a change in pull request #3627:
URL: https://github.com/apache/iotdb/pull/3627#discussion_r676448942



##########
File path: tsfile/src/test/java/org/apache/iotdb/tsfile/write/MetadataIndexConstructorTest.java
##########
@@ -0,0 +1,469 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iotdb.tsfile.write;
+
+import org.apache.iotdb.tsfile.common.conf.TSFileConfig;
+import org.apache.iotdb.tsfile.common.conf.TSFileDescriptor;
+import org.apache.iotdb.tsfile.common.constant.TsFileConstant;
+import org.apache.iotdb.tsfile.constant.TestConstant;
+import org.apache.iotdb.tsfile.file.metadata.MetadataIndexEntry;
+import org.apache.iotdb.tsfile.file.metadata.MetadataIndexNode;
+import org.apache.iotdb.tsfile.file.metadata.TimeseriesMetadata;
+import org.apache.iotdb.tsfile.file.metadata.TsFileMetadata;
+import org.apache.iotdb.tsfile.file.metadata.enums.MetadataIndexNodeType;
+import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType;
+import org.apache.iotdb.tsfile.file.metadata.enums.TSEncoding;
+import org.apache.iotdb.tsfile.fileSystem.FSFactoryProducer;
+import org.apache.iotdb.tsfile.read.TsFileSequenceReader;
+import org.apache.iotdb.tsfile.read.common.Path;
+import org.apache.iotdb.tsfile.write.record.TSRecord;
+import org.apache.iotdb.tsfile.write.record.Tablet;
+import org.apache.iotdb.tsfile.write.record.datapoint.DataPoint;
+import org.apache.iotdb.tsfile.write.record.datapoint.LongDataPoint;
+import org.apache.iotdb.tsfile.write.schema.IMeasurementSchema;
+import org.apache.iotdb.tsfile.write.schema.MeasurementSchema;
+import org.apache.iotdb.tsfile.write.schema.Schema;
+import org.apache.iotdb.tsfile.write.schema.VectorMeasurementSchema;
+
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.testcontainers.shaded.org.apache.commons.lang.text.StrBuilder;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.*;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+/** test for MetadataIndexConstructor */
+public class MetadataIndexConstructorTest {
+  private static final Logger logger = LoggerFactory.getLogger(MetadataIndexConstructorTest.class);
+  private final TSFileConfig conf = TSFileDescriptor.getInstance().getConfig();
+  private static final String FILE_PATH =
+      TestConstant.BASE_OUTPUT_PATH.concat("MetadataIndexConstructorTest.tsfile");
+
+  private static final String measurementPrefix = "sensor_";
+  private static final String vectorPrefix = "vector_";
+  private int maxDegreeOfIndexNode;
+
+  @Before
+  public void before() {
+    maxDegreeOfIndexNode = conf.getMaxDegreeOfIndexNode();
+    conf.setMaxDegreeOfIndexNode(10);
+  }
+
+  @After
+  public void after() {
+    conf.setMaxDegreeOfIndexNode(maxDegreeOfIndexNode);
+  }
+
+  /** 5个实体,每个实体有5个物理量(docs.例1) */
+  @Test
+  public void singleIndexTest1() {
+    int deviceNum = 5;
+    int measurementNum = 5;
+    String[] devices = new String[deviceNum];
+    int[][] vectorMeasurement = new int[deviceNum][];
+    String[][] measurements = new String[deviceNum][];
+    for (int i = 0; i < deviceNum; i++) {
+      devices[i] = "d" + i;
+      vectorMeasurement[i] = new int[0];
+      measurements[i] = new String[measurementNum];
+      for (int j = 0; j < measurementNum; j++) {
+        measurements[i][j] = measurementPrefix + generateIndexString(j, measurementNum);
+      }
+    }
+    test(devices, vectorMeasurement, measurements);
+  }
+
+  /** 1个实体,每个实体有150个物理量(docs.例2) */
+  @Test
+  public void singleIndexTest2() {
+    int deviceNum = 1;
+    int measurementNum = 150;
+    String[] devices = new String[deviceNum];
+    int[][] vectorMeasurement = new int[deviceNum][];
+    String[][] measurements = new String[deviceNum][];
+    for (int i = 0; i < deviceNum; i++) {
+      devices[i] = "d" + i;
+      vectorMeasurement[i] = new int[0];
+      measurements[i] = new String[measurementNum];
+      for (int j = 0; j < measurementNum; j++) {
+        measurements[i][j] = measurementPrefix + generateIndexString(j, measurementNum);
+      }
+    }
+    test(devices, vectorMeasurement, measurements);
+  }
+
+  /** 150个实体,每个实体有1个物理量(docs.例3) */

Review comment:
       the same 

##########
File path: tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/MetadataIndexConstructor.java
##########
@@ -62,49 +68,45 @@ public static MetadataIndexNode constructMetadataIndex(
       MetadataIndexNode currentIndexNode =
           new MetadataIndexNode(MetadataIndexNodeType.LEAF_MEASUREMENT);
       int serializedTimeseriesMetadataNum = 0;
+      String vectorName = ""; // record previous vector name
+      int numOfValueColumns = 0;
       for (int i = 0; i < entry.getValue().size(); i++) {
         timeseriesMetadata = entry.getValue().get(i);
-        if (timeseriesMetadata.isTimeColumn()) {
-          // calculate the number of value columns in this vector
-          int numOfValueColumns = 0;
+        if (numOfValueColumns > 0) {
+          // must be value column,不清楚这里是否需要加一层检验?外层可否保证?

Review comment:
       Good  job.Thanks for your contribution.
   I have a little question.
   generate , chinese is used in  notes

##########
File path: tsfile/src/test/java/org/apache/iotdb/tsfile/write/MetadataIndexConstructorTest.java
##########
@@ -0,0 +1,469 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iotdb.tsfile.write;
+
+import org.apache.iotdb.tsfile.common.conf.TSFileConfig;
+import org.apache.iotdb.tsfile.common.conf.TSFileDescriptor;
+import org.apache.iotdb.tsfile.common.constant.TsFileConstant;
+import org.apache.iotdb.tsfile.constant.TestConstant;
+import org.apache.iotdb.tsfile.file.metadata.MetadataIndexEntry;
+import org.apache.iotdb.tsfile.file.metadata.MetadataIndexNode;
+import org.apache.iotdb.tsfile.file.metadata.TimeseriesMetadata;
+import org.apache.iotdb.tsfile.file.metadata.TsFileMetadata;
+import org.apache.iotdb.tsfile.file.metadata.enums.MetadataIndexNodeType;
+import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType;
+import org.apache.iotdb.tsfile.file.metadata.enums.TSEncoding;
+import org.apache.iotdb.tsfile.fileSystem.FSFactoryProducer;
+import org.apache.iotdb.tsfile.read.TsFileSequenceReader;
+import org.apache.iotdb.tsfile.read.common.Path;
+import org.apache.iotdb.tsfile.write.record.TSRecord;
+import org.apache.iotdb.tsfile.write.record.Tablet;
+import org.apache.iotdb.tsfile.write.record.datapoint.DataPoint;
+import org.apache.iotdb.tsfile.write.record.datapoint.LongDataPoint;
+import org.apache.iotdb.tsfile.write.schema.IMeasurementSchema;
+import org.apache.iotdb.tsfile.write.schema.MeasurementSchema;
+import org.apache.iotdb.tsfile.write.schema.Schema;
+import org.apache.iotdb.tsfile.write.schema.VectorMeasurementSchema;
+
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.testcontainers.shaded.org.apache.commons.lang.text.StrBuilder;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.*;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+/** test for MetadataIndexConstructor */
+public class MetadataIndexConstructorTest {
+  private static final Logger logger = LoggerFactory.getLogger(MetadataIndexConstructorTest.class);
+  private final TSFileConfig conf = TSFileDescriptor.getInstance().getConfig();
+  private static final String FILE_PATH =
+      TestConstant.BASE_OUTPUT_PATH.concat("MetadataIndexConstructorTest.tsfile");
+
+  private static final String measurementPrefix = "sensor_";
+  private static final String vectorPrefix = "vector_";
+  private int maxDegreeOfIndexNode;
+
+  @Before
+  public void before() {
+    maxDegreeOfIndexNode = conf.getMaxDegreeOfIndexNode();
+    conf.setMaxDegreeOfIndexNode(10);
+  }
+
+  @After
+  public void after() {
+    conf.setMaxDegreeOfIndexNode(maxDegreeOfIndexNode);
+  }
+
+  /** 5个实体,每个实体有5个物理量(docs.例1) */
+  @Test
+  public void singleIndexTest1() {
+    int deviceNum = 5;
+    int measurementNum = 5;
+    String[] devices = new String[deviceNum];
+    int[][] vectorMeasurement = new int[deviceNum][];
+    String[][] measurements = new String[deviceNum][];
+    for (int i = 0; i < deviceNum; i++) {
+      devices[i] = "d" + i;
+      vectorMeasurement[i] = new int[0];
+      measurements[i] = new String[measurementNum];
+      for (int j = 0; j < measurementNum; j++) {
+        measurements[i][j] = measurementPrefix + generateIndexString(j, measurementNum);
+      }
+    }
+    test(devices, vectorMeasurement, measurements);
+  }
+
+  /** 1个实体,每个实体有150个物理量(docs.例2) */
+  @Test
+  public void singleIndexTest2() {
+    int deviceNum = 1;
+    int measurementNum = 150;
+    String[] devices = new String[deviceNum];
+    int[][] vectorMeasurement = new int[deviceNum][];
+    String[][] measurements = new String[deviceNum][];
+    for (int i = 0; i < deviceNum; i++) {
+      devices[i] = "d" + i;
+      vectorMeasurement[i] = new int[0];
+      measurements[i] = new String[measurementNum];
+      for (int j = 0; j < measurementNum; j++) {
+        measurements[i][j] = measurementPrefix + generateIndexString(j, measurementNum);
+      }
+    }
+    test(devices, vectorMeasurement, measurements);
+  }
+
+  /** 150个实体,每个实体有1个物理量(docs.例3) */
+  @Test
+  public void singleIndexTest3() {
+    int deviceNum = 150;
+    int measurementNum = 1;
+    String[] devices = new String[deviceNum];
+    int[][] vectorMeasurement = new int[deviceNum][];
+    String[][] measurements = new String[deviceNum][];
+    for (int i = 0; i < deviceNum; i++) {
+      devices[i] = "d" + generateIndexString(i, deviceNum);
+      vectorMeasurement[i] = new int[0];
+      measurements[i] = new String[measurementNum];
+      for (int j = 0; j < measurementNum; j++) {
+        measurements[i][j] = measurementPrefix + generateIndexString(j, measurementNum);
+      }
+    }
+    test(devices, vectorMeasurement, measurements);
+  }
+
+  /** 150个实体,每个实体有150个物理量(docs.例4) */
+  @Test
+  public void singleIndexTest4() {
+    int deviceNum = 150;
+    int measurementNum = 1;
+    String[] devices = new String[deviceNum];
+    int[][] vectorMeasurement = new int[deviceNum][];
+    String[][] measurements = new String[deviceNum][];
+    for (int i = 0; i < deviceNum; i++) {
+      devices[i] = "d" + i;
+      vectorMeasurement[i] = new int[0];
+      measurements[i] = new String[measurementNum];
+      for (int j = 0; j < measurementNum; j++) {
+        measurements[i][j] = measurementPrefix + generateIndexString(j, measurementNum);
+      }
+    }
+    test(devices, vectorMeasurement, measurements);
+  }
+
+  /** 1个实体,20个物理量,2个多元时间序列组,每个多元时间序列组分别有9个物理量(docs.例5) */
+  @Test
+  public void vectorIndexTest1() {
+    String[] devices = {"d0"};
+    int[][] vectorMeasurement = {{9, 9}};
+    test(devices, vectorMeasurement, null);
+  }
+
+  /** 1个实体,30个物理量,2个多元时间序列组,每个多元时间序列组分别有15个物理量(docs.例6) */
+  @Test
+  public void vectorIndexTest2() {
+    String[] devices = {"d0"};
+    int[][] vectorMeasurement = {{15, 15}};
+    test(devices, vectorMeasurement, null);
+  }
+
+  /**
+   * 综合测试,包括多元时间序列与简单时间序列(docs.例7)
+   *
+   * <p>d0.s0~s4 | d0.v0.(s0~s8) | d0.z0~z3 d1.s0~s14 | d1.v0.(s0~s3)
+   */
+  @Test
+  public void compositeIndexTest() {
+    String[] devices = {"d0", "d1"};
+    int[][] vectorMeasurement = {{9}, {4}};
+    String[][] measurements = {
+      {"s0", "s1", "s2", "s3", "s4", "z0", "z1", "z2", "z3"},
+      {
+        "s00", "s01", "s02", "s03", "s04", "s05", "s06", "s07", "s08", "s09", "s10", "s11", "s12",
+        "s13", "s14"
+      }
+    };
+    test(devices, vectorMeasurement, measurements);
+  }
+
+  /**
+   * start test
+   *
+   * @param devices name and number of device
+   * @param vectorMeasurement the number of device and the number of values to include in the tablet
+   * @param singleMeasurement non-vector measurement name, set null if no need
+   */
+  private void test(String[] devices, int[][] vectorMeasurement, String[][] singleMeasurement) {
+    // 1. generate file
+    generateFile(devices, vectorMeasurement, singleMeasurement);
+    // 2. read metadata from file
+    List<String> actualDevices = new ArrayList<>(); // contains all device by sequence
+    List<List<String>> actualMeasurements =
+        new ArrayList<>(); // contains all measurements group by device
+    readMetaDataDFS(actualDevices, actualMeasurements);
+    // 3. generate correct result
+    List<String> correctDevices = new ArrayList<>(); // contains all device by sequence
+    List<List<String>> correctFirstMeasurements =
+        new ArrayList<>(); // contains first measurements of every leaf, group by device
+    generateCorrectResult(
+        correctDevices, correctFirstMeasurements, devices, vectorMeasurement, singleMeasurement);
+    // 4. compare correct result with TsFile's metadata
+    Arrays.sort(devices);
+    // 4.1 make sure device in order
+    assertEquals(correctDevices.size(), devices.length);
+    assertEquals(actualDevices.size(), correctDevices.size());
+    for (int i = 0; i < actualDevices.size(); i++) {
+      assertEquals(actualDevices.get(i), correctDevices.get(i));
+    }
+    // 4.2 make sure timeseries in order
+    try (TsFileSequenceReader reader = new TsFileSequenceReader(FILE_PATH)) {
+      Map<String, List<TimeseriesMetadata>> allTimeseriesMetadata =
+          reader.getAllTimeseriesMetadata();
+      for (int j = 0; j < actualDevices.size(); j++) {
+        for (int i = 0; i < actualMeasurements.get(j).size(); i++) {
+          assertEquals(
+              allTimeseriesMetadata.get(actualDevices.get(j)).get(i).getMeasurementId(),
+              correctFirstMeasurements.get(j).get(i));
+        }
+      }
+    } catch (IOException e) {
+      e.printStackTrace();
+    }
+    // 4.3 make sure split leaf correctly
+    for (int j = 0; j < actualDevices.size(); j++) {
+      for (int i = 0; i < actualMeasurements.get(j).size(); i++) {
+        assertEquals(
+            actualMeasurements.get(j).get(i),
+            correctFirstMeasurements.get(j).get(i * conf.getMaxDegreeOfIndexNode()));
+      }
+    }
+  }
+
+  /**
+   * read TsFile metadata, load actual message in devices and measurements
+   *
+   * @param devices load actual devices
+   * @param measurements load actual measurement(first of every leaf)
+   */
+  private void readMetaDataDFS(List<String> devices, List<List<String>> measurements) {
+    try (TsFileSequenceReader reader = new TsFileSequenceReader(FILE_PATH)) {
+      TsFileMetadata tsFileMetaData = reader.readFileMetadata();
+      MetadataIndexNode metadataIndexNode = tsFileMetaData.getMetadataIndex();
+      deviceDFS(devices, measurements, reader, metadataIndexNode);
+    } catch (IOException e) {
+      e.printStackTrace();
+    }
+  }
+
+  /** DFS in device level load actual devices */
+  private void deviceDFS(
+      List<String> devices,
+      List<List<String>> measurements,
+      TsFileSequenceReader reader,
+      MetadataIndexNode node) {
+    assertTrue(
+        node.getNodeType().equals(MetadataIndexNodeType.LEAF_DEVICE)
+            || node.getNodeType().equals(MetadataIndexNodeType.INTERNAL_DEVICE));
+    for (int i = 0; i < node.getChildren().size(); i++) {
+      MetadataIndexEntry metadataIndexEntry = node.getChildren().get(i);
+      long endOffset = node.getEndOffset();
+      if (i != node.getChildren().size() - 1) {
+        endOffset = node.getChildren().get(i + 1).getOffset();
+      }
+      MetadataIndexNode subNode =
+          reader.getMetadataIndexNode(metadataIndexEntry.getOffset(), endOffset);
+      if (node.getNodeType().equals(MetadataIndexNodeType.LEAF_DEVICE)) {
+        devices.add(metadataIndexEntry.getName());
+        measurements.add(new ArrayList<>());
+        measurementDFS(devices.size() - 1, measurements, reader, subNode);
+      } else if (node.getNodeType().equals(MetadataIndexNodeType.INTERNAL_DEVICE)) {
+        deviceDFS(devices, measurements, reader, subNode);
+      }
+    }
+  }
+  /** DFS in measurement level load actual measurements */
+  private void measurementDFS(
+      int deviceIndex,
+      List<List<String>> measurements,
+      TsFileSequenceReader reader,
+      MetadataIndexNode node) {
+    assertTrue(
+        node.getNodeType().equals(MetadataIndexNodeType.LEAF_MEASUREMENT)
+            || node.getNodeType().equals(MetadataIndexNodeType.INTERNAL_MEASUREMENT));
+    for (int i = 0; i < node.getChildren().size(); i++) {
+      MetadataIndexEntry metadataIndexEntry = node.getChildren().get(i);
+      long endOffset = node.getEndOffset();
+      if (i != node.getChildren().size() - 1) {
+        endOffset = node.getChildren().get(i + 1).getOffset();
+      }
+      if (node.getNodeType().equals(MetadataIndexNodeType.LEAF_MEASUREMENT)) {
+        // 把每个叶子节点的第一个加进来

Review comment:
       the same 




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscribe@iotdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org