You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@iotdb.apache.org by GitBox <gi...@apache.org> on 2020/05/02 09:29:04 UTC

[GitHub] [incubator-iotdb] qiaojialin commented on a change in pull request #1134: [IOTDB-631] Using new TsFile MetadataIndex to optimize query and cache

qiaojialin commented on a change in pull request #1134:
URL: https://github.com/apache/incubator-iotdb/pull/1134#discussion_r418936196



##########
File path: tsfile/src/main/java/org/apache/iotdb/tsfile/read/TsFileSequenceReader.java
##########
@@ -314,28 +311,76 @@ public TsFileMetadata readFileMetadata() throws IOException {
   public TimeseriesMetadata readTimeseriesMetadata(Path path) throws IOException {
     readFileMetadata();
     MetadataIndexNode deviceMetadataIndexNode = tsFileMetaData.getMetadataIndex();
-    Pair<MetadataIndexEntry, Long> metadataIndexPair = getMetaDataAndEndOffset(
+    Pair<MetadataIndexEntry, Long> metadataIndexPair = getMetadataAndEndOffset(
         deviceMetadataIndexNode, path.getDevice(), MetadataIndexNodeType.INTERNAL_DEVICE);
     ByteBuffer buffer = readData(metadataIndexPair.left.getOffset(), metadataIndexPair.right);
     while (!metadataIndexPair.left.getChildNodeType()
         .equals(MetadataIndexNodeType.LEAF_MEASUREMENT)) {
       MetadataIndexNode metadataIndexNode = MetadataIndexNode.deserializeFrom(buffer);
-      metadataIndexPair = getMetaDataAndEndOffset(metadataIndexNode,
+      metadataIndexPair = getMetadataAndEndOffset(metadataIndexNode,
           path.getMeasurement(), MetadataIndexNodeType.INTERNAL_MEASUREMENT);
     }
     List<TimeseriesMetadata> timeseriesMetadataList = new ArrayList<>();
     buffer = readData(metadataIndexPair.left.getOffset(), metadataIndexPair.right);
     while (buffer.hasRemaining()) {
       timeseriesMetadataList.add(TimeseriesMetadata.deserializeFrom(buffer));
     }
-    String[] measurementNameList = timeseriesMetadataList.stream()
-        .map(TimeseriesMetadata::getMeasurementId).collect(Collectors.toList())
-        .toArray(new String[timeseriesMetadataList.size()]);
-
     // return null if path does not exist in the TsFile
-    int searchResult;
-    return (searchResult = Arrays.binarySearch(measurementNameList, path.getMeasurement())) >= 0
-        ? timeseriesMetadataList.get(searchResult) : null;
+    int searchResult = binarySearchInTimeseriesMetadataList(timeseriesMetadataList,
+        path.getMeasurement());
+    return searchResult >= 0 ? timeseriesMetadataList.get(searchResult) : null;
+  }
+
+  public List<TimeseriesMetadata> readTimeseriesMetadata(String device, Set<String> measurements)
+      throws IOException {
+    readFileMetadata();
+    MetadataIndexNode deviceMetadataIndexNode = tsFileMetaData.getMetadataIndex();
+    Pair<MetadataIndexEntry, Long> metadataIndexPair = getMetadataAndEndOffset(
+        deviceMetadataIndexNode, device, MetadataIndexNodeType.INTERNAL_DEVICE);

Review comment:
       I'm not clear about the MetadataIndexNodeType.INTERNAL_DEVICE, how can I see this parameter, the queried node type? Shouldn't this be Leaf_Device?

##########
File path: server/src/main/java/org/apache/iotdb/db/engine/cache/TimeSeriesMetadataCache.java
##########
@@ -125,9 +130,20 @@ public TimeseriesMetadata get(TimeSeriesMetadataCacheKey key, Set<String> allSen
         return null;
       }
       TsFileSequenceReader reader = FileReaderManager.getInstance().get(key.filePath, true);
-      TimeseriesMetadata timeseriesMetadata = reader.readTimeseriesMetadata(new Path(key.device, key.measurement));
-      lruCache.put(key, timeseriesMetadata);
-      return timeseriesMetadata;
+      TimeseriesMetadata resultTimeseriesMetadata = reader
+          .readTimeseriesMetadata(new Path(key.device, key.measurement));
+      lruCache.put(key, resultTimeseriesMetadata);
+
+      List<TimeseriesMetadata> timeSeriesMetadataList = reader
+          .readTimeseriesMetadata(key.device, allSensors);
+      if (!allSensors.isEmpty()) {
+        // put TimeSeriesMetadata of all sensors used in this query into cache
+        timeSeriesMetadataList.forEach(timeseriesMetadata -> {
+          lruCache.put(new TimeSeriesMetadataCacheKey(key.filePath, key.device,
+              timeseriesMetadata.getMeasurementId()), timeseriesMetadata);
+        });
+      }

Review comment:
       Hi, the key.measurement should be also in the allSensors set. Therefore, we only need to query the allSensor, put them into cache. Then return the queried sensor's TimeseriesMetadata.

##########
File path: tsfile/src/main/java/org/apache/iotdb/tsfile/read/TsFileSequenceReader.java
##########
@@ -314,28 +311,76 @@ public TsFileMetadata readFileMetadata() throws IOException {
   public TimeseriesMetadata readTimeseriesMetadata(Path path) throws IOException {
     readFileMetadata();
     MetadataIndexNode deviceMetadataIndexNode = tsFileMetaData.getMetadataIndex();
-    Pair<MetadataIndexEntry, Long> metadataIndexPair = getMetaDataAndEndOffset(
+    Pair<MetadataIndexEntry, Long> metadataIndexPair = getMetadataAndEndOffset(
         deviceMetadataIndexNode, path.getDevice(), MetadataIndexNodeType.INTERNAL_DEVICE);
     ByteBuffer buffer = readData(metadataIndexPair.left.getOffset(), metadataIndexPair.right);
     while (!metadataIndexPair.left.getChildNodeType()
         .equals(MetadataIndexNodeType.LEAF_MEASUREMENT)) {
       MetadataIndexNode metadataIndexNode = MetadataIndexNode.deserializeFrom(buffer);
-      metadataIndexPair = getMetaDataAndEndOffset(metadataIndexNode,
+      metadataIndexPair = getMetadataAndEndOffset(metadataIndexNode,
           path.getMeasurement(), MetadataIndexNodeType.INTERNAL_MEASUREMENT);
     }
     List<TimeseriesMetadata> timeseriesMetadataList = new ArrayList<>();
     buffer = readData(metadataIndexPair.left.getOffset(), metadataIndexPair.right);
     while (buffer.hasRemaining()) {
       timeseriesMetadataList.add(TimeseriesMetadata.deserializeFrom(buffer));
     }
-    String[] measurementNameList = timeseriesMetadataList.stream()
-        .map(TimeseriesMetadata::getMeasurementId).collect(Collectors.toList())
-        .toArray(new String[timeseriesMetadataList.size()]);
-
     // return null if path does not exist in the TsFile
-    int searchResult;
-    return (searchResult = Arrays.binarySearch(measurementNameList, path.getMeasurement())) >= 0
-        ? timeseriesMetadataList.get(searchResult) : null;
+    int searchResult = binarySearchInTimeseriesMetadataList(timeseriesMetadataList,
+        path.getMeasurement());
+    return searchResult >= 0 ? timeseriesMetadataList.get(searchResult) : null;
+  }
+
+  public List<TimeseriesMetadata> readTimeseriesMetadata(String device, Set<String> measurements)
+      throws IOException {
+    readFileMetadata();
+    MetadataIndexNode deviceMetadataIndexNode = tsFileMetaData.getMetadataIndex();
+    Pair<MetadataIndexEntry, Long> metadataIndexPair = getMetadataAndEndOffset(
+        deviceMetadataIndexNode, device, MetadataIndexNodeType.INTERNAL_DEVICE);
+    List<TimeseriesMetadata> resultTimeseriesMetadataList = new ArrayList<>();
+    for (String measurement : measurements) {
+      ByteBuffer buffer = readData(metadataIndexPair.left.getOffset(), metadataIndexPair.right);
+      Pair<MetadataIndexEntry, Long> measurementMetadataIndexPair = metadataIndexPair;
+      List<TimeseriesMetadata> timeseriesMetadataList = new ArrayList<>();
+      while (!measurementMetadataIndexPair.left.getChildNodeType()
+          .equals(MetadataIndexNodeType.LEAF_MEASUREMENT)) {
+        MetadataIndexNode metadataIndexNode = MetadataIndexNode.deserializeFrom(buffer);
+        measurementMetadataIndexPair = getMetadataAndEndOffset(metadataIndexNode,
+            measurement, MetadataIndexNodeType.INTERNAL_MEASUREMENT);
+      }
+      buffer = readData(measurementMetadataIndexPair.left.getOffset(),
+          measurementMetadataIndexPair.right);
+      while (buffer.hasRemaining()) {
+        timeseriesMetadataList.add(TimeseriesMetadata.deserializeFrom(buffer));
+      }
+      int searchResult = binarySearchInTimeseriesMetadataList(timeseriesMetadataList,

Review comment:
       check if the size of measurements reaches a threshold, just traversing from start to end is quicker, no need to binary search for each




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org