You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by ro...@apache.org on 2022/11/04 18:41:03 UTC
[pinot] branch master updated: another appraoch (#9712)
This is an automated email from the ASF dual-hosted git repository.
rongr pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 9898b913e0 another appraoch (#9712)
9898b913e0 is described below
commit 9898b913e03592200e5743c61b1c18972901346a
Author: Rong Rong <ro...@apache.org>
AuthorDate: Fri Nov 4 11:40:54 2022 -0700
another appraoch (#9712)
Co-authored-by: Rong Rong <ro...@startree.ai>
---
.../segment/store/SingleFileIndexDirectory.java | 27 ++++--------
.../apache/pinot/segment/spi/ColumnMetadata.java | 4 ++
.../spi/index/metadata/ColumnMetadataImpl.java | 19 ++++++++-
.../spi/index/metadata/SegmentMetadataImpl.java | 23 ++++++++++
.../pinot/segment/spi/store/ColumnIndexUtils.java | 49 ++++++++++++++++++++++
5 files changed, 100 insertions(+), 22 deletions(-)
diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectory.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectory.java
index 744dd59e2f..8b0faacb9c 100644
--- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectory.java
+++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectory.java
@@ -44,6 +44,7 @@ import org.apache.pinot.segment.spi.index.metadata.SegmentMetadataImpl;
import org.apache.pinot.segment.spi.memory.PinotDataBuffer;
import org.apache.pinot.segment.spi.store.ColumnIndexDirectory;
import org.apache.pinot.segment.spi.store.ColumnIndexType;
+import org.apache.pinot.segment.spi.store.ColumnIndexUtils;
import org.apache.pinot.spi.env.CommonsConfigurationUtils;
import org.apache.pinot.spi.utils.ReadMode;
import org.slf4j.Logger;
@@ -70,9 +71,6 @@ class SingleFileIndexDirectory extends ColumnIndexDirectory {
private static final long MAGIC_MARKER = 0xdeadbeefdeafbeadL;
private static final int MAGIC_MARKER_SIZE_BYTES = 8;
- private static final String MAP_KEY_SEPARATOR = ".";
- private static final String MAP_KEY_NAME_START_OFFSET = "startOffset";
- private static final String MAP_KEY_NAME_SIZE = "size";
// Max size of buffer we want to allocate
// ByteBuffer limits the size to 2GB - (some platform dependent size)
@@ -218,29 +216,17 @@ class SingleFileIndexDirectory extends ColumnIndexDirectory {
PropertiesConfiguration mapConfig = CommonsConfigurationUtils.fromFile(mapFile);
for (String key : CommonsConfigurationUtils.getKeys(mapConfig)) {
- // column names can have '.' in it hence scan from backwards
- // parsing names like "column.name.dictionary.startOffset"
- // or, "column.name.dictionary.endOffset" where column.name is the key
- int lastSeparatorPos = key.lastIndexOf(MAP_KEY_SEPARATOR);
- Preconditions
- .checkState(lastSeparatorPos != -1, "Key separator not found: " + key + ", segment: " + _segmentDirectory);
- String propertyName = key.substring(lastSeparatorPos + 1);
-
- int indexSeparatorPos = key.lastIndexOf(MAP_KEY_SEPARATOR, lastSeparatorPos - 1);
- Preconditions.checkState(indexSeparatorPos != -1,
- "Index separator not found: " + key + " , segment: " + _segmentDirectory);
- String indexName = key.substring(indexSeparatorPos + 1, lastSeparatorPos);
- String columnName = key.substring(0, indexSeparatorPos);
- IndexKey indexKey = new IndexKey(columnName, ColumnIndexType.getValue(indexName));
+ String[] parsedKeys = ColumnIndexUtils.parseIndexMapKeys(key, _segmentDirectory.getPath());
+ IndexKey indexKey = new IndexKey(parsedKeys[0], ColumnIndexType.getValue(parsedKeys[1]));
IndexEntry entry = _columnEntries.get(indexKey);
if (entry == null) {
entry = new IndexEntry(indexKey);
_columnEntries.put(indexKey, entry);
}
- if (propertyName.equals(MAP_KEY_NAME_START_OFFSET)) {
+ if (parsedKeys[2].equals(ColumnIndexUtils.MAP_KEY_NAME_START_OFFSET)) {
entry._startOffset = mapConfig.getLong(key);
- } else if (propertyName.equals(MAP_KEY_NAME_SIZE)) {
+ } else if (parsedKeys[2].equals(ColumnIndexUtils.MAP_KEY_NAME_SIZE)) {
entry._size = mapConfig.getLong(key);
} else {
throw new ConfigurationException(
@@ -439,7 +425,8 @@ class SingleFileIndexDirectory extends ColumnIndexDirectory {
}
private static String getKey(String column, String indexName, boolean isStartOffset) {
- return column + MAP_KEY_SEPARATOR + indexName + MAP_KEY_SEPARATOR + (isStartOffset ? "startOffset" : "size");
+ return column + ColumnIndexUtils.MAP_KEY_SEPARATOR + indexName + ColumnIndexUtils.MAP_KEY_SEPARATOR
+ + (isStartOffset ? "startOffset" : "size");
}
@VisibleForTesting
diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/ColumnMetadata.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/ColumnMetadata.java
index 00975a3c43..4346d1f744 100644
--- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/ColumnMetadata.java
+++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/ColumnMetadata.java
@@ -19,9 +19,11 @@
package org.apache.pinot.segment.spi;
import com.fasterxml.jackson.annotation.JsonProperty;
+import java.util.Map;
import java.util.Set;
import javax.annotation.Nullable;
import org.apache.pinot.segment.spi.partition.PartitionFunction;
+import org.apache.pinot.segment.spi.store.ColumnIndexType;
import org.apache.pinot.spi.annotations.InterfaceAudience;
import org.apache.pinot.spi.data.FieldSpec;
import org.apache.pinot.spi.data.FieldSpec.DataType;
@@ -90,5 +92,7 @@ public interface ColumnMetadata {
@Nullable
Set<Integer> getPartitions();
+ Map<ColumnIndexType, Long> getIndexSizeMap();
+
boolean isAutoGenerated();
}
diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/metadata/ColumnMetadataImpl.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/metadata/ColumnMetadataImpl.java
index 55b076bfbb..6b0b3d72a9 100644
--- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/metadata/ColumnMetadataImpl.java
+++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/metadata/ColumnMetadataImpl.java
@@ -37,6 +37,7 @@ import org.apache.pinot.segment.spi.V1Constants.MetadataKeys.Segment;
import org.apache.pinot.segment.spi.partition.PartitionFunction;
import org.apache.pinot.segment.spi.partition.PartitionFunctionFactory;
import org.apache.pinot.segment.spi.partition.metadata.ColumnPartitionMetadata;
+import org.apache.pinot.segment.spi.store.ColumnIndexType;
import org.apache.pinot.spi.data.DateTimeFieldSpec;
import org.apache.pinot.spi.data.DimensionFieldSpec;
import org.apache.pinot.spi.data.FieldSpec;
@@ -65,13 +66,14 @@ public class ColumnMetadataImpl implements ColumnMetadata {
private final int _totalNumberOfEntries;
private final PartitionFunction _partitionFunction;
private final Set<Integer> _partitions;
+ private final Map<ColumnIndexType, Long> _indexSizeMap;
private final boolean _autoGenerated;
private ColumnMetadataImpl(FieldSpec fieldSpec, int totalDocs, int cardinality, boolean sorted,
Comparable<?> minValue, Comparable<?> maxValue, boolean minMaxValueInvalid, boolean hasDictionary,
int columnMaxLength, char paddingCharacter, int bitsPerElement, int maxNumberOfMultiValues,
int totalNumberOfEntries, @Nullable PartitionFunction partitionFunction, @Nullable Set<Integer> partitions,
- boolean autoGenerated) {
+ Map<ColumnIndexType, Long> indexSizeMap, boolean autoGenerated) {
_fieldSpec = fieldSpec;
_totalDocs = totalDocs;
_cardinality = cardinality;
@@ -87,6 +89,7 @@ public class ColumnMetadataImpl implements ColumnMetadata {
_totalNumberOfEntries = totalNumberOfEntries;
_partitionFunction = partitionFunction;
_partitions = partitions;
+ _indexSizeMap = indexSizeMap;
_autoGenerated = autoGenerated;
}
@@ -166,6 +169,12 @@ public class ColumnMetadataImpl implements ColumnMetadata {
return _partitions;
}
+ @Nullable
+ @Override
+ public Map<ColumnIndexType, Long> getIndexSizeMap() {
+ return _indexSizeMap;
+ }
+
@Override
public boolean isAutoGenerated() {
return _autoGenerated;
@@ -348,6 +357,7 @@ public class ColumnMetadataImpl implements ColumnMetadata {
private PartitionFunction _partitionFunction;
private Set<Integer> _partitions;
private boolean _autoGenerated;
+ private Map<ColumnIndexType, Long> _indexSizeMap = new HashMap<>();
public Builder setFieldSpec(FieldSpec fieldSpec) {
_fieldSpec = fieldSpec;
@@ -424,6 +434,10 @@ public class ColumnMetadataImpl implements ColumnMetadata {
return this;
}
+ public void setIndexSizeMap(Map<ColumnIndexType, Long> indexSizeMap) {
+ _indexSizeMap = indexSizeMap;
+ }
+
public Builder setAutoGenerated(boolean autoGenerated) {
_autoGenerated = autoGenerated;
return this;
@@ -432,7 +446,8 @@ public class ColumnMetadataImpl implements ColumnMetadata {
public ColumnMetadataImpl build() {
return new ColumnMetadataImpl(_fieldSpec, _totalDocs, _cardinality, _sorted, _minValue, _maxValue,
_minMaxValueInvalid, _hasDictionary, _columnMaxLength, _paddingCharacter, _bitsPerElement,
- _maxNumberOfMultiValues, _totalNumberOfEntries, _partitionFunction, _partitions, _autoGenerated);
+ _maxNumberOfMultiValues, _totalNumberOfEntries, _partitionFunction, _partitions, _indexSizeMap,
+ _autoGenerated);
}
}
}
diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/metadata/SegmentMetadataImpl.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/metadata/SegmentMetadataImpl.java
index b17f5d41ea..196ef4ef30 100644
--- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/metadata/SegmentMetadataImpl.java
+++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/metadata/SegmentMetadataImpl.java
@@ -45,10 +45,13 @@ import org.apache.commons.configuration.Configuration;
import org.apache.commons.configuration.PropertiesConfiguration;
import org.apache.pinot.segment.spi.ColumnMetadata;
import org.apache.pinot.segment.spi.SegmentMetadata;
+import org.apache.pinot.segment.spi.V1Constants;
import org.apache.pinot.segment.spi.V1Constants.MetadataKeys.Segment;
import org.apache.pinot.segment.spi.creator.SegmentVersion;
import org.apache.pinot.segment.spi.index.startree.StarTreeV2Constants;
import org.apache.pinot.segment.spi.index.startree.StarTreeV2Metadata;
+import org.apache.pinot.segment.spi.store.ColumnIndexType;
+import org.apache.pinot.segment.spi.store.ColumnIndexUtils;
import org.apache.pinot.segment.spi.store.SegmentDirectoryPaths;
import org.apache.pinot.spi.config.table.TimestampIndexGranularity;
import org.apache.pinot.spi.data.Schema;
@@ -245,6 +248,26 @@ public class SegmentMetadataImpl implements SegmentMetadata {
_schema.addField(columnMetadata.getFieldSpec());
}
+ // Load index metadata
+ // Support V3 (e.g. SingleFileIndexDirectory only)
+ if (_segmentVersion == SegmentVersion.v3) {
+ File indexMapFile = new File(_indexDir, "v3" + File.separator + V1Constants.INDEX_MAP_FILE_NAME);
+ if (indexMapFile.exists()) {
+ PropertiesConfiguration mapConfig = CommonsConfigurationUtils.fromFile(indexMapFile);
+ for (String key : CommonsConfigurationUtils.getKeys(mapConfig)) {
+ try {
+ String[] parsedKeys = ColumnIndexUtils.parseIndexMapKeys(key, _indexDir.getPath());
+ if (parsedKeys[2].equals(ColumnIndexUtils.MAP_KEY_NAME_SIZE)) {
+ ColumnIndexType columnIndexType = ColumnIndexType.getValue(parsedKeys[1]);
+ _columnMetadataMap.get(parsedKeys[0]).getIndexSizeMap().put(columnIndexType, mapConfig.getLong(key));
+ }
+ } catch (Exception e) {
+ LOGGER.debug("Unable to load index metadata in {} for {}!", indexMapFile, key, e);
+ }
+ }
+ }
+ }
+
// Build star-tree v2 metadata
int starTreeV2Count =
segmentMetadataPropertiesConfiguration.getInt(StarTreeV2Constants.MetadataKey.STAR_TREE_COUNT, 0);
diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/store/ColumnIndexUtils.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/store/ColumnIndexUtils.java
new file mode 100644
index 0000000000..4571b5f85b
--- /dev/null
+++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/store/ColumnIndexUtils.java
@@ -0,0 +1,49 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.spi.store;
+
+import com.google.common.base.Preconditions;
+
+
+public class ColumnIndexUtils {
+ public static final String MAP_KEY_SEPARATOR = ".";
+ public static final String MAP_KEY_NAME_START_OFFSET = "startOffset";
+ public static final String MAP_KEY_NAME_SIZE = "size";
+
+ private ColumnIndexUtils() {
+ // do not instantiate.
+ }
+
+ public static String[] parseIndexMapKeys(String key, String segmentDir) {
+ // column names can have '.' in it hence scan from backwards
+ // parsing names like "column.name.dictionary.startOffset"
+ // or, "column.name.dictionary.endOffset" where column.name is the key
+ int lastSeparatorPos = key.lastIndexOf(MAP_KEY_SEPARATOR);
+ Preconditions
+ .checkState(lastSeparatorPos != -1, "Key separator not found: " + key + ", segment: " + segmentDir);
+ String propertyName = key.substring(lastSeparatorPos + 1);
+
+ int indexSeparatorPos = key.lastIndexOf(MAP_KEY_SEPARATOR, lastSeparatorPos - 1);
+ Preconditions.checkState(indexSeparatorPos != -1,
+ "Index separator not found: " + key + " , segment: " + segmentDir);
+ String indexName = key.substring(indexSeparatorPos + 1, lastSeparatorPos);
+ String columnName = key.substring(0, indexSeparatorPos);
+ return new String[]{columnName, indexName, propertyName};
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org