You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by ja...@apache.org on 2020/05/15 00:10:12 UTC
[incubator-pinot] branch master updated: Fix the compilation error
and bug introduced in Range Index (#5389)
This is an automated email from the ASF dual-hosted git repository.
jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 6f840db Fix the compilation error and bug introduced in Range Index (#5389)
6f840db is described below
commit 6f840db6c84c69d2ebf137f1407332b517ce9322
Author: Xiaotian (Jackie) Jiang <17...@users.noreply.github.com>
AuthorDate: Thu May 14 17:09:59 2020 -0700
Fix the compilation error and bug introduced in Range Index (#5389)
1. Fix the compilation error introduced because of the merge of #4597 and #5240
2. Fix the bug of not loading the range index if both inverted index and range index exist
TODO: The range index triggeres another severe issue of accessing closed DataBuffer which can cause JVM crash. Will address in a separate PR
---
.../index/column/PhysicalColumnIndexContainer.java | 14 ++++----
.../loader/invertedindex/InvertedIndexHandler.java | 9 +++--
.../loader/invertedindex/RangeIndexHandler.java | 41 +++++++---------------
.../pinot/integration/tests/ClusterTest.java | 2 ++
.../tests/JsonPathClusterIntegrationTest.java | 5 +--
.../tests/OfflineClusterIntegrationTest.java | 4 +--
6 files changed, 30 insertions(+), 45 deletions(-)
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/column/PhysicalColumnIndexContainer.java b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/column/PhysicalColumnIndexContainer.java
index eb33185..7725dea 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/column/PhysicalColumnIndexContainer.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/column/PhysicalColumnIndexContainer.java
@@ -33,7 +33,6 @@ import org.apache.pinot.core.segment.index.loader.IndexLoadingConfig;
import org.apache.pinot.core.segment.index.metadata.ColumnMetadata;
import org.apache.pinot.core.segment.index.readers.BaseImmutableDictionary;
import org.apache.pinot.core.segment.index.readers.BitmapInvertedIndexReader;
-import org.apache.pinot.core.segment.index.readers.RangeIndexReader;
import org.apache.pinot.core.segment.index.readers.BloomFilterReader;
import org.apache.pinot.core.segment.index.readers.BytesDictionary;
import org.apache.pinot.core.segment.index.readers.DoubleDictionary;
@@ -47,6 +46,7 @@ import org.apache.pinot.core.segment.index.readers.OnHeapFloatDictionary;
import org.apache.pinot.core.segment.index.readers.OnHeapIntDictionary;
import org.apache.pinot.core.segment.index.readers.OnHeapLongDictionary;
import org.apache.pinot.core.segment.index.readers.OnHeapStringDictionary;
+import org.apache.pinot.core.segment.index.readers.RangeIndexReader;
import org.apache.pinot.core.segment.index.readers.StringDictionary;
import org.apache.pinot.core.segment.index.readers.text.LuceneTextIndexReader;
import org.apache.pinot.core.segment.memory.PinotDataBuffer;
@@ -108,8 +108,7 @@ public final class PhysicalColumnIndexContainer implements ColumnIndexContainer
// Single-value
if (metadata.isSorted()) {
// Sorted
- SortedIndexReader sortedIndexReader =
- new SortedIndexReaderImpl(fwdIndexBuffer, metadata.getCardinality());
+ SortedIndexReader sortedIndexReader = new SortedIndexReaderImpl(fwdIndexBuffer, metadata.getCardinality());
_forwardIndex = sortedIndexReader;
_invertedIndex = sortedIndexReader;
_rangeIndex = null;
@@ -129,13 +128,12 @@ public final class PhysicalColumnIndexContainer implements ColumnIndexContainer
_invertedIndex =
new BitmapInvertedIndexReader(segmentReader.getIndexFor(columnName, ColumnIndexType.INVERTED_INDEX),
metadata.getCardinality());
- _rangeIndex = null;
- } else if (loadRangeIndex) {
- _invertedIndex = null;
- _rangeIndex =
- new RangeIndexReader(segmentReader.getIndexFor(columnName, ColumnIndexType.RANGE_INDEX));
} else {
_invertedIndex = null;
+ }
+ if (loadRangeIndex) {
+ _rangeIndex = new RangeIndexReader(segmentReader.getIndexFor(columnName, ColumnIndexType.RANGE_INDEX));
+ } else {
_rangeIndex = null;
}
} else {
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/loader/invertedindex/InvertedIndexHandler.java b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/loader/invertedindex/InvertedIndexHandler.java
index dbbdc5d..36e3806 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/loader/invertedindex/InvertedIndexHandler.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/loader/invertedindex/InvertedIndexHandler.java
@@ -22,7 +22,6 @@ import java.io.File;
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
-import javax.annotation.Nonnull;
import org.apache.commons.io.FileUtils;
import org.apache.pinot.core.indexsegment.generator.SegmentVersion;
import org.apache.pinot.core.io.reader.DataFileReader;
@@ -51,17 +50,17 @@ public class InvertedIndexHandler {
private final SegmentVersion _segmentVersion;
private final Set<ColumnMetadata> _invertedIndexColumns = new HashSet<>();
- public InvertedIndexHandler(@Nonnull File indexDir, @Nonnull SegmentMetadataImpl segmentMetadata,
- @Nonnull IndexLoadingConfig indexLoadingConfig, @Nonnull SegmentDirectory.Writer segmentWriter) {
+ public InvertedIndexHandler(File indexDir, SegmentMetadataImpl segmentMetadata, IndexLoadingConfig indexLoadingConfig,
+ SegmentDirectory.Writer segmentWriter) {
_indexDir = indexDir;
_segmentWriter = segmentWriter;
_segmentName = segmentMetadata.getName();
_segmentVersion = SegmentVersion.valueOf(segmentMetadata.getVersion());
- // Do not create inverted index for sorted column
+ // Only create inverted index on dictionary-encoded unsorted columns
for (String column : indexLoadingConfig.getInvertedIndexColumns()) {
ColumnMetadata columnMetadata = segmentMetadata.getColumnMetadataFor(column);
- if (columnMetadata != null && !columnMetadata.isSorted()) {
+ if (columnMetadata != null && !columnMetadata.isSorted() && columnMetadata.hasDictionary()) {
_invertedIndexColumns.add(columnMetadata);
}
}
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/loader/invertedindex/RangeIndexHandler.java b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/loader/invertedindex/RangeIndexHandler.java
index e7685a7..e792986 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/loader/invertedindex/RangeIndexHandler.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/loader/invertedindex/RangeIndexHandler.java
@@ -22,7 +22,6 @@ import java.io.File;
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
-import javax.annotation.Nonnull;
import org.apache.commons.io.FileUtils;
import org.apache.pinot.core.indexsegment.generator.SegmentVersion;
import org.apache.pinot.core.io.reader.DataFileReader;
@@ -31,12 +30,10 @@ import org.apache.pinot.core.io.reader.impl.v1.FixedBitMultiValueReader;
import org.apache.pinot.core.io.reader.impl.v1.FixedBitSingleValueReader;
import org.apache.pinot.core.segment.creator.impl.V1Constants;
import org.apache.pinot.core.segment.creator.impl.inv.RangeIndexCreator;
-import org.apache.pinot.core.segment.index.column.PhysicalColumnIndexContainer;
import org.apache.pinot.core.segment.index.loader.IndexLoadingConfig;
import org.apache.pinot.core.segment.index.loader.LoaderUtils;
import org.apache.pinot.core.segment.index.metadata.ColumnMetadata;
import org.apache.pinot.core.segment.index.metadata.SegmentMetadataImpl;
-import org.apache.pinot.core.segment.index.readers.BaseImmutableDictionary;
import org.apache.pinot.core.segment.memory.PinotDataBuffer;
import org.apache.pinot.core.segment.store.ColumnIndexType;
import org.apache.pinot.core.segment.store.SegmentDirectory;
@@ -54,17 +51,17 @@ public class RangeIndexHandler {
private final SegmentVersion _segmentVersion;
private final Set<ColumnMetadata> _rangeIndexColumns = new HashSet<>();
- public RangeIndexHandler(@Nonnull File indexDir, @Nonnull SegmentMetadataImpl segmentMetadata,
- @Nonnull IndexLoadingConfig indexLoadingConfig, @Nonnull SegmentDirectory.Writer segmentWriter) {
+ public RangeIndexHandler(File indexDir, SegmentMetadataImpl segmentMetadata, IndexLoadingConfig indexLoadingConfig,
+ SegmentDirectory.Writer segmentWriter) {
_indexDir = indexDir;
_segmentWriter = segmentWriter;
_segmentName = segmentMetadata.getName();
_segmentVersion = SegmentVersion.valueOf(segmentMetadata.getVersion());
- // Do not create inverted index for sorted column
+ // Only create range index on dictionary-encoded unsorted columns
for (String column : indexLoadingConfig.getRangeIndexColumns()) {
ColumnMetadata columnMetadata = segmentMetadata.getColumnMetadataFor(column);
- if (columnMetadata != null && !columnMetadata.isSorted()) {
+ if (columnMetadata != null && !columnMetadata.isSorted() && columnMetadata.hasDictionary()) {
_rangeIndexColumns.add(columnMetadata);
}
}
@@ -79,15 +76,7 @@ public class RangeIndexHandler {
private void createRangeIndexForColumn(ColumnMetadata columnMetadata)
throws IOException {
- //Range index supported only for dictionary encoded columns for now
- if (!columnMetadata.hasDictionary()) {
- LOGGER.warn("Skipping creation of Range index for column:{}. It's only supported for dictionary encoded columns",
- columnMetadata.getColumnName());
-
- return;
- }
String column = columnMetadata.getColumnName();
-
File inProgress = new File(_indexDir, column + ".range.inprogress");
File rangeIndexFile = new File(_indexDir, column + V1Constants.Indexes.BITMAP_RANGE_INDEX_FILE_EXTENSION);
@@ -97,7 +86,7 @@ public class RangeIndexHandler {
if (_segmentWriter.hasIndexFor(column, ColumnIndexType.RANGE_INDEX)) {
// Skip creating range index if already exists.
- LOGGER.info("Found Range index for segment: {}, column: {}", _segmentName, column);
+ LOGGER.info("Found range index for segment: {}, column: {}", _segmentName, column);
return;
}
@@ -105,20 +94,16 @@ public class RangeIndexHandler {
FileUtils.touch(inProgress);
} else {
// Marker file exists, which means last run gets interrupted.
- // Remove inverted index if exists.
- // For v1 and v2, it's the actual inverted index. For v3, it's the temporary inverted index.
+ // Remove range index if exists.
+ // For v1 and v2, it's the actual range index. For v3, it's the temporary range index.
FileUtils.deleteQuietly(rangeIndexFile);
}
- // Create new inverted index for the column.
+ // Create new range index for the column.
LOGGER.info("Creating new range index for segment: {}, column: {}", _segmentName, column);
- int numDocs = columnMetadata.getTotalDocs();
-
- PinotDataBuffer dictBuffer = _segmentWriter.getIndexFor(columnMetadata.getColumnName(), ColumnIndexType.DICTIONARY);
- BaseImmutableDictionary dictionary = PhysicalColumnIndexContainer.loadDictionary(dictBuffer, columnMetadata, false);
- handleDictionaryBasedColumn(dictionary, columnMetadata, numDocs);
+ handleDictionaryBasedColumn(columnMetadata);
- // For v3, write the generated inverted index file into the single file and remove it.
+ // For v3, write the generated range index file into the single file and remove it.
if (_segmentVersion == SegmentVersion.v3) {
LoaderUtils.writeIndexToV3Format(_segmentWriter, column, rangeIndexFile, ColumnIndexType.RANGE_INDEX);
}
@@ -126,12 +111,12 @@ public class RangeIndexHandler {
// Delete the marker file.
FileUtils.deleteQuietly(inProgress);
- LOGGER.info("Created inverted index for segment: {}, column: {}", _segmentName, column);
+ LOGGER.info("Created range index for segment: {}, column: {}", _segmentName, column);
}
- private void handleDictionaryBasedColumn(BaseImmutableDictionary dictionary, ColumnMetadata columnMetadata,
- int numDocs)
+ private void handleDictionaryBasedColumn(ColumnMetadata columnMetadata)
throws IOException {
+ int numDocs = columnMetadata.getTotalDocs();
try (RangeIndexCreator creator = new RangeIndexCreator(_indexDir, columnMetadata.getFieldSpec(),
FieldSpec.DataType.INT, -1, -1, numDocs, columnMetadata.getTotalNumberOfEntries())) {
try (DataFileReader fwdIndex = getForwardIndexReader(columnMetadata, _segmentWriter)) {
diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/ClusterTest.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/ClusterTest.java
index 19fe79c..1f221ea 100644
--- a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/ClusterTest.java
+++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/ClusterTest.java
@@ -158,6 +158,7 @@ public abstract class ClusterTest extends ControllerTest {
protected void startServers(int numServers, Configuration configuration, int baseAdminApiPort, int baseNettyPort,
String zkStr) {
+ FileUtils.deleteQuietly(new File(Server.DEFAULT_INSTANCE_BASE_DIR));
_serverStarters = new ArrayList<>(numServers);
overrideServerConf(configuration);
try {
@@ -178,6 +179,7 @@ public abstract class ClusterTest extends ControllerTest {
// to manage the instance level configs
protected void startMinion(@Nullable Map<String, PinotTaskExecutorFactory> taskExecutorFactoryRegistry,
@Nullable Map<String, MinionEventObserverFactory> eventObserverFactoryRegistry) {
+ FileUtils.deleteQuietly(new File(Minion.DEFAULT_INSTANCE_BASE_DIR));
try {
_minionStarter =
new MinionStarter(ZkStarter.DEFAULT_ZK_STR, getHelixClusterName(), new PropertiesConfiguration());
diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/JsonPathClusterIntegrationTest.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/JsonPathClusterIntegrationTest.java
index 7546b2b..db22049 100644
--- a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/JsonPathClusterIntegrationTest.java
+++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/JsonPathClusterIntegrationTest.java
@@ -110,7 +110,7 @@ public class JsonPathClusterIntegrationTest extends BaseClusterIntegrationTest {
// Create the tables
ArrayList<String> invertedIndexColumns = Lists.newArrayList();
addOfflineTable(DEFAULT_TABLE_NAME, null, null, null, null, null, SegmentVersion.v1, invertedIndexColumns, null,
- null, null, null);
+ null, null, null, null);
setUpSegmentsAndQueryGenerator();
@@ -269,7 +269,8 @@ public class JsonPathClusterIntegrationTest extends BaseClusterIntegrationTest {
Assert.assertTrue(selectionResults.size() == 1);
for (int i = 0; i < selectionResults.size(); i++) {
String value = selectionResults.get(i).get(0).textValue();
- Assert.assertEquals(value, "{\"k4-k1\":\"value-k4-k1-0\",\"k4-k2\":\"value-k4-k2-0\",\"k4-k3\":\"value-k4-k3-0\",\"met\":0}");
+ Assert.assertEquals(value,
+ "{\"k4-k1\":\"value-k4-k1-0\",\"k4-k2\":\"value-k4-k2-0\",\"k4-k3\":\"value-k4-k3-0\",\"met\":0}");
}
//selection order by
diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java
index db3bd93..14b5bc4 100644
--- a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java
+++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java
@@ -73,7 +73,7 @@ public class OfflineClusterIntegrationTest extends BaseClusterIntegrationTestSet
"SELECT COUNT(*) FROM mytable WHERE DivActualElapsedTime = 305";
// For inverted index triggering test
- private static final List<String> UPDATED_RANGE_INDEX_COLUMNS = Arrays.asList("DivActualElapsedTime");
+ private static final List<String> UPDATED_RANGE_INDEX_COLUMNS = Collections.singletonList("DivActualElapsedTime");
private static final String TEST_UPDATED_RANGE_INDEX_QUERY =
"SELECT COUNT(*) FROM mytable WHERE DivActualElapsedTime > 305";
@@ -351,7 +351,7 @@ public class OfflineClusterIntegrationTest extends BaseClusterIntegrationTestSet
long afterCount = queryResponse.get("aggregationResults").get(0).get("value").asLong();
//we should be scanning less than numTotalDocs with index enabled.
//In the current implementation its 8785, but it
- return beforeCount == afterCount && queryResponse1.get("numEntriesScannedInFilter").asLong() < numTotalDocs;
+ return beforeCount == afterCount && queryResponse1.get("numEntriesScannedInFilter").asLong() < numTotalDocs;
} catch (Exception e) {
throw new RuntimeException(e);
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org