You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by ro...@apache.org on 2022/11/04 18:41:03 UTC

[pinot] branch master updated: another appraoch (#9712)

This is an automated email from the ASF dual-hosted git repository.

rongr pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 9898b913e0 another appraoch (#9712)
9898b913e0 is described below

commit 9898b913e03592200e5743c61b1c18972901346a
Author: Rong Rong <ro...@apache.org>
AuthorDate: Fri Nov 4 11:40:54 2022 -0700

    another appraoch (#9712)
    
    Co-authored-by: Rong Rong <ro...@startree.ai>
---
 .../segment/store/SingleFileIndexDirectory.java    | 27 ++++--------
 .../apache/pinot/segment/spi/ColumnMetadata.java   |  4 ++
 .../spi/index/metadata/ColumnMetadataImpl.java     | 19 ++++++++-
 .../spi/index/metadata/SegmentMetadataImpl.java    | 23 ++++++++++
 .../pinot/segment/spi/store/ColumnIndexUtils.java  | 49 ++++++++++++++++++++++
 5 files changed, 100 insertions(+), 22 deletions(-)

diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectory.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectory.java
index 744dd59e2f..8b0faacb9c 100644
--- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectory.java
+++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectory.java
@@ -44,6 +44,7 @@ import org.apache.pinot.segment.spi.index.metadata.SegmentMetadataImpl;
 import org.apache.pinot.segment.spi.memory.PinotDataBuffer;
 import org.apache.pinot.segment.spi.store.ColumnIndexDirectory;
 import org.apache.pinot.segment.spi.store.ColumnIndexType;
+import org.apache.pinot.segment.spi.store.ColumnIndexUtils;
 import org.apache.pinot.spi.env.CommonsConfigurationUtils;
 import org.apache.pinot.spi.utils.ReadMode;
 import org.slf4j.Logger;
@@ -70,9 +71,6 @@ class SingleFileIndexDirectory extends ColumnIndexDirectory {
 
   private static final long MAGIC_MARKER = 0xdeadbeefdeafbeadL;
   private static final int MAGIC_MARKER_SIZE_BYTES = 8;
-  private static final String MAP_KEY_SEPARATOR = ".";
-  private static final String MAP_KEY_NAME_START_OFFSET = "startOffset";
-  private static final String MAP_KEY_NAME_SIZE = "size";
 
   // Max size of buffer we want to allocate
   // ByteBuffer limits the size to 2GB - (some platform dependent size)
@@ -218,29 +216,17 @@ class SingleFileIndexDirectory extends ColumnIndexDirectory {
     PropertiesConfiguration mapConfig = CommonsConfigurationUtils.fromFile(mapFile);
 
     for (String key : CommonsConfigurationUtils.getKeys(mapConfig)) {
-      // column names can have '.' in it hence scan from backwards
-      // parsing names like "column.name.dictionary.startOffset"
-      // or, "column.name.dictionary.endOffset" where column.name is the key
-      int lastSeparatorPos = key.lastIndexOf(MAP_KEY_SEPARATOR);
-      Preconditions
-          .checkState(lastSeparatorPos != -1, "Key separator not found: " + key + ", segment: " + _segmentDirectory);
-      String propertyName = key.substring(lastSeparatorPos + 1);
-
-      int indexSeparatorPos = key.lastIndexOf(MAP_KEY_SEPARATOR, lastSeparatorPos - 1);
-      Preconditions.checkState(indexSeparatorPos != -1,
-          "Index separator not found: " + key + " , segment: " + _segmentDirectory);
-      String indexName = key.substring(indexSeparatorPos + 1, lastSeparatorPos);
-      String columnName = key.substring(0, indexSeparatorPos);
-      IndexKey indexKey = new IndexKey(columnName, ColumnIndexType.getValue(indexName));
+      String[] parsedKeys = ColumnIndexUtils.parseIndexMapKeys(key, _segmentDirectory.getPath());
+      IndexKey indexKey = new IndexKey(parsedKeys[0], ColumnIndexType.getValue(parsedKeys[1]));
       IndexEntry entry = _columnEntries.get(indexKey);
       if (entry == null) {
         entry = new IndexEntry(indexKey);
         _columnEntries.put(indexKey, entry);
       }
 
-      if (propertyName.equals(MAP_KEY_NAME_START_OFFSET)) {
+      if (parsedKeys[2].equals(ColumnIndexUtils.MAP_KEY_NAME_START_OFFSET)) {
         entry._startOffset = mapConfig.getLong(key);
-      } else if (propertyName.equals(MAP_KEY_NAME_SIZE)) {
+      } else if (parsedKeys[2].equals(ColumnIndexUtils.MAP_KEY_NAME_SIZE)) {
         entry._size = mapConfig.getLong(key);
       } else {
         throw new ConfigurationException(
@@ -439,7 +425,8 @@ class SingleFileIndexDirectory extends ColumnIndexDirectory {
   }
 
   private static String getKey(String column, String indexName, boolean isStartOffset) {
-    return column + MAP_KEY_SEPARATOR + indexName + MAP_KEY_SEPARATOR + (isStartOffset ? "startOffset" : "size");
+    return column + ColumnIndexUtils.MAP_KEY_SEPARATOR + indexName + ColumnIndexUtils.MAP_KEY_SEPARATOR
+        + (isStartOffset ? "startOffset" : "size");
   }
 
   @VisibleForTesting
diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/ColumnMetadata.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/ColumnMetadata.java
index 00975a3c43..4346d1f744 100644
--- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/ColumnMetadata.java
+++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/ColumnMetadata.java
@@ -19,9 +19,11 @@
 package org.apache.pinot.segment.spi;
 
 import com.fasterxml.jackson.annotation.JsonProperty;
+import java.util.Map;
 import java.util.Set;
 import javax.annotation.Nullable;
 import org.apache.pinot.segment.spi.partition.PartitionFunction;
+import org.apache.pinot.segment.spi.store.ColumnIndexType;
 import org.apache.pinot.spi.annotations.InterfaceAudience;
 import org.apache.pinot.spi.data.FieldSpec;
 import org.apache.pinot.spi.data.FieldSpec.DataType;
@@ -90,5 +92,7 @@ public interface ColumnMetadata {
   @Nullable
   Set<Integer> getPartitions();
 
+  Map<ColumnIndexType, Long> getIndexSizeMap();
+
   boolean isAutoGenerated();
 }
diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/metadata/ColumnMetadataImpl.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/metadata/ColumnMetadataImpl.java
index 55b076bfbb..6b0b3d72a9 100644
--- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/metadata/ColumnMetadataImpl.java
+++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/metadata/ColumnMetadataImpl.java
@@ -37,6 +37,7 @@ import org.apache.pinot.segment.spi.V1Constants.MetadataKeys.Segment;
 import org.apache.pinot.segment.spi.partition.PartitionFunction;
 import org.apache.pinot.segment.spi.partition.PartitionFunctionFactory;
 import org.apache.pinot.segment.spi.partition.metadata.ColumnPartitionMetadata;
+import org.apache.pinot.segment.spi.store.ColumnIndexType;
 import org.apache.pinot.spi.data.DateTimeFieldSpec;
 import org.apache.pinot.spi.data.DimensionFieldSpec;
 import org.apache.pinot.spi.data.FieldSpec;
@@ -65,13 +66,14 @@ public class ColumnMetadataImpl implements ColumnMetadata {
   private final int _totalNumberOfEntries;
   private final PartitionFunction _partitionFunction;
   private final Set<Integer> _partitions;
+  private final Map<ColumnIndexType, Long> _indexSizeMap;
   private final boolean _autoGenerated;
 
   private ColumnMetadataImpl(FieldSpec fieldSpec, int totalDocs, int cardinality, boolean sorted,
       Comparable<?> minValue, Comparable<?> maxValue, boolean minMaxValueInvalid, boolean hasDictionary,
       int columnMaxLength, char paddingCharacter, int bitsPerElement, int maxNumberOfMultiValues,
       int totalNumberOfEntries, @Nullable PartitionFunction partitionFunction, @Nullable Set<Integer> partitions,
-      boolean autoGenerated) {
+      Map<ColumnIndexType, Long> indexSizeMap, boolean autoGenerated) {
     _fieldSpec = fieldSpec;
     _totalDocs = totalDocs;
     _cardinality = cardinality;
@@ -87,6 +89,7 @@ public class ColumnMetadataImpl implements ColumnMetadata {
     _totalNumberOfEntries = totalNumberOfEntries;
     _partitionFunction = partitionFunction;
     _partitions = partitions;
+    _indexSizeMap = indexSizeMap;
     _autoGenerated = autoGenerated;
   }
 
@@ -166,6 +169,12 @@ public class ColumnMetadataImpl implements ColumnMetadata {
     return _partitions;
   }
 
+  @Nullable
+  @Override
+  public Map<ColumnIndexType, Long> getIndexSizeMap() {
+    return _indexSizeMap;
+  }
+
   @Override
   public boolean isAutoGenerated() {
     return _autoGenerated;
@@ -348,6 +357,7 @@ public class ColumnMetadataImpl implements ColumnMetadata {
     private PartitionFunction _partitionFunction;
     private Set<Integer> _partitions;
     private boolean _autoGenerated;
+    private Map<ColumnIndexType, Long> _indexSizeMap = new HashMap<>();
 
     public Builder setFieldSpec(FieldSpec fieldSpec) {
       _fieldSpec = fieldSpec;
@@ -424,6 +434,10 @@ public class ColumnMetadataImpl implements ColumnMetadata {
       return this;
     }
 
+    public void setIndexSizeMap(Map<ColumnIndexType, Long> indexSizeMap) {
+      _indexSizeMap = indexSizeMap;
+    }
+
     public Builder setAutoGenerated(boolean autoGenerated) {
       _autoGenerated = autoGenerated;
       return this;
@@ -432,7 +446,8 @@ public class ColumnMetadataImpl implements ColumnMetadata {
     public ColumnMetadataImpl build() {
       return new ColumnMetadataImpl(_fieldSpec, _totalDocs, _cardinality, _sorted, _minValue, _maxValue,
           _minMaxValueInvalid, _hasDictionary, _columnMaxLength, _paddingCharacter, _bitsPerElement,
-          _maxNumberOfMultiValues, _totalNumberOfEntries, _partitionFunction, _partitions, _autoGenerated);
+          _maxNumberOfMultiValues, _totalNumberOfEntries, _partitionFunction, _partitions, _indexSizeMap,
+          _autoGenerated);
     }
   }
 }
diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/metadata/SegmentMetadataImpl.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/metadata/SegmentMetadataImpl.java
index b17f5d41ea..196ef4ef30 100644
--- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/metadata/SegmentMetadataImpl.java
+++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/metadata/SegmentMetadataImpl.java
@@ -45,10 +45,13 @@ import org.apache.commons.configuration.Configuration;
 import org.apache.commons.configuration.PropertiesConfiguration;
 import org.apache.pinot.segment.spi.ColumnMetadata;
 import org.apache.pinot.segment.spi.SegmentMetadata;
+import org.apache.pinot.segment.spi.V1Constants;
 import org.apache.pinot.segment.spi.V1Constants.MetadataKeys.Segment;
 import org.apache.pinot.segment.spi.creator.SegmentVersion;
 import org.apache.pinot.segment.spi.index.startree.StarTreeV2Constants;
 import org.apache.pinot.segment.spi.index.startree.StarTreeV2Metadata;
+import org.apache.pinot.segment.spi.store.ColumnIndexType;
+import org.apache.pinot.segment.spi.store.ColumnIndexUtils;
 import org.apache.pinot.segment.spi.store.SegmentDirectoryPaths;
 import org.apache.pinot.spi.config.table.TimestampIndexGranularity;
 import org.apache.pinot.spi.data.Schema;
@@ -245,6 +248,26 @@ public class SegmentMetadataImpl implements SegmentMetadata {
       _schema.addField(columnMetadata.getFieldSpec());
     }
 
+    // Load index metadata
+    // Support V3 (e.g. SingleFileIndexDirectory only)
+    if (_segmentVersion == SegmentVersion.v3) {
+      File indexMapFile = new File(_indexDir, "v3" + File.separator + V1Constants.INDEX_MAP_FILE_NAME);
+      if (indexMapFile.exists()) {
+        PropertiesConfiguration mapConfig = CommonsConfigurationUtils.fromFile(indexMapFile);
+        for (String key : CommonsConfigurationUtils.getKeys(mapConfig)) {
+          try {
+            String[] parsedKeys = ColumnIndexUtils.parseIndexMapKeys(key, _indexDir.getPath());
+            if (parsedKeys[2].equals(ColumnIndexUtils.MAP_KEY_NAME_SIZE)) {
+              ColumnIndexType columnIndexType = ColumnIndexType.getValue(parsedKeys[1]);
+              _columnMetadataMap.get(parsedKeys[0]).getIndexSizeMap().put(columnIndexType, mapConfig.getLong(key));
+            }
+          } catch (Exception e) {
+            LOGGER.debug("Unable to load index metadata in {} for {}!", indexMapFile, key, e);
+          }
+        }
+      }
+    }
+
     // Build star-tree v2 metadata
     int starTreeV2Count =
         segmentMetadataPropertiesConfiguration.getInt(StarTreeV2Constants.MetadataKey.STAR_TREE_COUNT, 0);
diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/store/ColumnIndexUtils.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/store/ColumnIndexUtils.java
new file mode 100644
index 0000000000..4571b5f85b
--- /dev/null
+++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/store/ColumnIndexUtils.java
@@ -0,0 +1,49 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.spi.store;
+
+import com.google.common.base.Preconditions;
+
+
+public class ColumnIndexUtils {
+  public static final String MAP_KEY_SEPARATOR = ".";
+  public static final String MAP_KEY_NAME_START_OFFSET = "startOffset";
+  public static final String MAP_KEY_NAME_SIZE = "size";
+
+  private ColumnIndexUtils() {
+    // do not instantiate.
+  }
+
+  public static String[] parseIndexMapKeys(String key, String segmentDir) {
+    // column names can have '.' in it hence scan from backwards
+    // parsing names like "column.name.dictionary.startOffset"
+    // or, "column.name.dictionary.endOffset" where column.name is the key
+    int lastSeparatorPos = key.lastIndexOf(MAP_KEY_SEPARATOR);
+    Preconditions
+        .checkState(lastSeparatorPos != -1, "Key separator not found: " + key + ", segment: " + segmentDir);
+    String propertyName = key.substring(lastSeparatorPos + 1);
+
+    int indexSeparatorPos = key.lastIndexOf(MAP_KEY_SEPARATOR, lastSeparatorPos - 1);
+    Preconditions.checkState(indexSeparatorPos != -1,
+        "Index separator not found: " + key + " , segment: " + segmentDir);
+    String indexName = key.substring(indexSeparatorPos + 1, lastSeparatorPos);
+    String columnName = key.substring(0, indexSeparatorPos);
+    return new String[]{columnName, indexName, propertyName};
+  }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org