You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by ja...@apache.org on 2020/05/15 00:10:12 UTC

[incubator-pinot] branch master updated: Fix the compilation error and bug introduced in Range Index (#5389)

This is an automated email from the ASF dual-hosted git repository.

jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 6f840db  Fix the compilation error and bug introduced in Range Index (#5389)
6f840db is described below

commit 6f840db6c84c69d2ebf137f1407332b517ce9322
Author: Xiaotian (Jackie) Jiang <17...@users.noreply.github.com>
AuthorDate: Thu May 14 17:09:59 2020 -0700

    Fix the compilation error and bug introduced in Range Index (#5389)
    
    1. Fix the compilation error introduced because of the merge of #4597 and #5240
    2. Fix the bug of not loading the range index if both inverted index and range index exist
    
    TODO: The range index triggeres another severe issue of accessing closed DataBuffer which can cause JVM crash. Will address in a separate PR
---
 .../index/column/PhysicalColumnIndexContainer.java | 14 ++++----
 .../loader/invertedindex/InvertedIndexHandler.java |  9 +++--
 .../loader/invertedindex/RangeIndexHandler.java    | 41 +++++++---------------
 .../pinot/integration/tests/ClusterTest.java       |  2 ++
 .../tests/JsonPathClusterIntegrationTest.java      |  5 +--
 .../tests/OfflineClusterIntegrationTest.java       |  4 +--
 6 files changed, 30 insertions(+), 45 deletions(-)

diff --git a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/column/PhysicalColumnIndexContainer.java b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/column/PhysicalColumnIndexContainer.java
index eb33185..7725dea 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/column/PhysicalColumnIndexContainer.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/column/PhysicalColumnIndexContainer.java
@@ -33,7 +33,6 @@ import org.apache.pinot.core.segment.index.loader.IndexLoadingConfig;
 import org.apache.pinot.core.segment.index.metadata.ColumnMetadata;
 import org.apache.pinot.core.segment.index.readers.BaseImmutableDictionary;
 import org.apache.pinot.core.segment.index.readers.BitmapInvertedIndexReader;
-import org.apache.pinot.core.segment.index.readers.RangeIndexReader;
 import org.apache.pinot.core.segment.index.readers.BloomFilterReader;
 import org.apache.pinot.core.segment.index.readers.BytesDictionary;
 import org.apache.pinot.core.segment.index.readers.DoubleDictionary;
@@ -47,6 +46,7 @@ import org.apache.pinot.core.segment.index.readers.OnHeapFloatDictionary;
 import org.apache.pinot.core.segment.index.readers.OnHeapIntDictionary;
 import org.apache.pinot.core.segment.index.readers.OnHeapLongDictionary;
 import org.apache.pinot.core.segment.index.readers.OnHeapStringDictionary;
+import org.apache.pinot.core.segment.index.readers.RangeIndexReader;
 import org.apache.pinot.core.segment.index.readers.StringDictionary;
 import org.apache.pinot.core.segment.index.readers.text.LuceneTextIndexReader;
 import org.apache.pinot.core.segment.memory.PinotDataBuffer;
@@ -108,8 +108,7 @@ public final class PhysicalColumnIndexContainer implements ColumnIndexContainer
         // Single-value
         if (metadata.isSorted()) {
           // Sorted
-          SortedIndexReader sortedIndexReader =
-              new SortedIndexReaderImpl(fwdIndexBuffer, metadata.getCardinality());
+          SortedIndexReader sortedIndexReader = new SortedIndexReaderImpl(fwdIndexBuffer, metadata.getCardinality());
           _forwardIndex = sortedIndexReader;
           _invertedIndex = sortedIndexReader;
           _rangeIndex = null;
@@ -129,13 +128,12 @@ public final class PhysicalColumnIndexContainer implements ColumnIndexContainer
         _invertedIndex =
             new BitmapInvertedIndexReader(segmentReader.getIndexFor(columnName, ColumnIndexType.INVERTED_INDEX),
                 metadata.getCardinality());
-        _rangeIndex = null;
-      } else if (loadRangeIndex) {
-        _invertedIndex = null;
-        _rangeIndex =
-            new RangeIndexReader(segmentReader.getIndexFor(columnName, ColumnIndexType.RANGE_INDEX));
       } else {
         _invertedIndex = null;
+      }
+      if (loadRangeIndex) {
+        _rangeIndex = new RangeIndexReader(segmentReader.getIndexFor(columnName, ColumnIndexType.RANGE_INDEX));
+      } else {
         _rangeIndex = null;
       }
     } else {
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/loader/invertedindex/InvertedIndexHandler.java b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/loader/invertedindex/InvertedIndexHandler.java
index dbbdc5d..36e3806 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/loader/invertedindex/InvertedIndexHandler.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/loader/invertedindex/InvertedIndexHandler.java
@@ -22,7 +22,6 @@ import java.io.File;
 import java.io.IOException;
 import java.util.HashSet;
 import java.util.Set;
-import javax.annotation.Nonnull;
 import org.apache.commons.io.FileUtils;
 import org.apache.pinot.core.indexsegment.generator.SegmentVersion;
 import org.apache.pinot.core.io.reader.DataFileReader;
@@ -51,17 +50,17 @@ public class InvertedIndexHandler {
   private final SegmentVersion _segmentVersion;
   private final Set<ColumnMetadata> _invertedIndexColumns = new HashSet<>();
 
-  public InvertedIndexHandler(@Nonnull File indexDir, @Nonnull SegmentMetadataImpl segmentMetadata,
-      @Nonnull IndexLoadingConfig indexLoadingConfig, @Nonnull SegmentDirectory.Writer segmentWriter) {
+  public InvertedIndexHandler(File indexDir, SegmentMetadataImpl segmentMetadata, IndexLoadingConfig indexLoadingConfig,
+      SegmentDirectory.Writer segmentWriter) {
     _indexDir = indexDir;
     _segmentWriter = segmentWriter;
     _segmentName = segmentMetadata.getName();
     _segmentVersion = SegmentVersion.valueOf(segmentMetadata.getVersion());
 
-    // Do not create inverted index for sorted column
+    // Only create inverted index on dictionary-encoded unsorted columns
     for (String column : indexLoadingConfig.getInvertedIndexColumns()) {
       ColumnMetadata columnMetadata = segmentMetadata.getColumnMetadataFor(column);
-      if (columnMetadata != null && !columnMetadata.isSorted()) {
+      if (columnMetadata != null && !columnMetadata.isSorted() && columnMetadata.hasDictionary()) {
         _invertedIndexColumns.add(columnMetadata);
       }
     }
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/loader/invertedindex/RangeIndexHandler.java b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/loader/invertedindex/RangeIndexHandler.java
index e7685a7..e792986 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/loader/invertedindex/RangeIndexHandler.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/loader/invertedindex/RangeIndexHandler.java
@@ -22,7 +22,6 @@ import java.io.File;
 import java.io.IOException;
 import java.util.HashSet;
 import java.util.Set;
-import javax.annotation.Nonnull;
 import org.apache.commons.io.FileUtils;
 import org.apache.pinot.core.indexsegment.generator.SegmentVersion;
 import org.apache.pinot.core.io.reader.DataFileReader;
@@ -31,12 +30,10 @@ import org.apache.pinot.core.io.reader.impl.v1.FixedBitMultiValueReader;
 import org.apache.pinot.core.io.reader.impl.v1.FixedBitSingleValueReader;
 import org.apache.pinot.core.segment.creator.impl.V1Constants;
 import org.apache.pinot.core.segment.creator.impl.inv.RangeIndexCreator;
-import org.apache.pinot.core.segment.index.column.PhysicalColumnIndexContainer;
 import org.apache.pinot.core.segment.index.loader.IndexLoadingConfig;
 import org.apache.pinot.core.segment.index.loader.LoaderUtils;
 import org.apache.pinot.core.segment.index.metadata.ColumnMetadata;
 import org.apache.pinot.core.segment.index.metadata.SegmentMetadataImpl;
-import org.apache.pinot.core.segment.index.readers.BaseImmutableDictionary;
 import org.apache.pinot.core.segment.memory.PinotDataBuffer;
 import org.apache.pinot.core.segment.store.ColumnIndexType;
 import org.apache.pinot.core.segment.store.SegmentDirectory;
@@ -54,17 +51,17 @@ public class RangeIndexHandler {
   private final SegmentVersion _segmentVersion;
   private final Set<ColumnMetadata> _rangeIndexColumns = new HashSet<>();
 
-  public RangeIndexHandler(@Nonnull File indexDir, @Nonnull SegmentMetadataImpl segmentMetadata,
-      @Nonnull IndexLoadingConfig indexLoadingConfig, @Nonnull SegmentDirectory.Writer segmentWriter) {
+  public RangeIndexHandler(File indexDir, SegmentMetadataImpl segmentMetadata, IndexLoadingConfig indexLoadingConfig,
+      SegmentDirectory.Writer segmentWriter) {
     _indexDir = indexDir;
     _segmentWriter = segmentWriter;
     _segmentName = segmentMetadata.getName();
     _segmentVersion = SegmentVersion.valueOf(segmentMetadata.getVersion());
 
-    // Do not create inverted index for sorted column
+    // Only create range index on dictionary-encoded unsorted columns
     for (String column : indexLoadingConfig.getRangeIndexColumns()) {
       ColumnMetadata columnMetadata = segmentMetadata.getColumnMetadataFor(column);
-      if (columnMetadata != null && !columnMetadata.isSorted()) {
+      if (columnMetadata != null && !columnMetadata.isSorted() && columnMetadata.hasDictionary()) {
         _rangeIndexColumns.add(columnMetadata);
       }
     }
@@ -79,15 +76,7 @@ public class RangeIndexHandler {
 
   private void createRangeIndexForColumn(ColumnMetadata columnMetadata)
       throws IOException {
-    //Range index supported only for dictionary encoded columns for now
-    if (!columnMetadata.hasDictionary()) {
-      LOGGER.warn("Skipping creation of Range index for column:{}. It's only supported for dictionary encoded columns",
-          columnMetadata.getColumnName());
-
-      return;
-    }
     String column = columnMetadata.getColumnName();
-
     File inProgress = new File(_indexDir, column + ".range.inprogress");
     File rangeIndexFile = new File(_indexDir, column + V1Constants.Indexes.BITMAP_RANGE_INDEX_FILE_EXTENSION);
 
@@ -97,7 +86,7 @@ public class RangeIndexHandler {
       if (_segmentWriter.hasIndexFor(column, ColumnIndexType.RANGE_INDEX)) {
         // Skip creating range index if already exists.
 
-        LOGGER.info("Found Range index for segment: {}, column: {}", _segmentName, column);
+        LOGGER.info("Found range index for segment: {}, column: {}", _segmentName, column);
         return;
       }
 
@@ -105,20 +94,16 @@ public class RangeIndexHandler {
       FileUtils.touch(inProgress);
     } else {
       // Marker file exists, which means last run gets interrupted.
-      // Remove inverted index if exists.
-      // For v1 and v2, it's the actual inverted index. For v3, it's the temporary inverted index.
+      // Remove range index if exists.
+      // For v1 and v2, it's the actual range index. For v3, it's the temporary range index.
       FileUtils.deleteQuietly(rangeIndexFile);
     }
 
-    // Create new inverted index for the column.
+    // Create new range index for the column.
     LOGGER.info("Creating new range index for segment: {}, column: {}", _segmentName, column);
-    int numDocs = columnMetadata.getTotalDocs();
-
-    PinotDataBuffer dictBuffer = _segmentWriter.getIndexFor(columnMetadata.getColumnName(), ColumnIndexType.DICTIONARY);
-    BaseImmutableDictionary dictionary = PhysicalColumnIndexContainer.loadDictionary(dictBuffer, columnMetadata, false);
-    handleDictionaryBasedColumn(dictionary, columnMetadata, numDocs);
+    handleDictionaryBasedColumn(columnMetadata);
 
-    // For v3, write the generated inverted index file into the single file and remove it.
+    // For v3, write the generated range index file into the single file and remove it.
     if (_segmentVersion == SegmentVersion.v3) {
       LoaderUtils.writeIndexToV3Format(_segmentWriter, column, rangeIndexFile, ColumnIndexType.RANGE_INDEX);
     }
@@ -126,12 +111,12 @@ public class RangeIndexHandler {
     // Delete the marker file.
     FileUtils.deleteQuietly(inProgress);
 
-    LOGGER.info("Created inverted index for segment: {}, column: {}", _segmentName, column);
+    LOGGER.info("Created range index for segment: {}, column: {}", _segmentName, column);
   }
 
-  private void handleDictionaryBasedColumn(BaseImmutableDictionary dictionary, ColumnMetadata columnMetadata,
-      int numDocs)
+  private void handleDictionaryBasedColumn(ColumnMetadata columnMetadata)
       throws IOException {
+    int numDocs = columnMetadata.getTotalDocs();
     try (RangeIndexCreator creator = new RangeIndexCreator(_indexDir, columnMetadata.getFieldSpec(),
         FieldSpec.DataType.INT, -1, -1, numDocs, columnMetadata.getTotalNumberOfEntries())) {
       try (DataFileReader fwdIndex = getForwardIndexReader(columnMetadata, _segmentWriter)) {
diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/ClusterTest.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/ClusterTest.java
index 19fe79c..1f221ea 100644
--- a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/ClusterTest.java
+++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/ClusterTest.java
@@ -158,6 +158,7 @@ public abstract class ClusterTest extends ControllerTest {
 
   protected void startServers(int numServers, Configuration configuration, int baseAdminApiPort, int baseNettyPort,
       String zkStr) {
+    FileUtils.deleteQuietly(new File(Server.DEFAULT_INSTANCE_BASE_DIR));
     _serverStarters = new ArrayList<>(numServers);
     overrideServerConf(configuration);
     try {
@@ -178,6 +179,7 @@ public abstract class ClusterTest extends ControllerTest {
   //       to manage the instance level configs
   protected void startMinion(@Nullable Map<String, PinotTaskExecutorFactory> taskExecutorFactoryRegistry,
       @Nullable Map<String, MinionEventObserverFactory> eventObserverFactoryRegistry) {
+    FileUtils.deleteQuietly(new File(Minion.DEFAULT_INSTANCE_BASE_DIR));
     try {
       _minionStarter =
           new MinionStarter(ZkStarter.DEFAULT_ZK_STR, getHelixClusterName(), new PropertiesConfiguration());
diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/JsonPathClusterIntegrationTest.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/JsonPathClusterIntegrationTest.java
index 7546b2b..db22049 100644
--- a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/JsonPathClusterIntegrationTest.java
+++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/JsonPathClusterIntegrationTest.java
@@ -110,7 +110,7 @@ public class JsonPathClusterIntegrationTest extends BaseClusterIntegrationTest {
     // Create the tables
     ArrayList<String> invertedIndexColumns = Lists.newArrayList();
     addOfflineTable(DEFAULT_TABLE_NAME, null, null, null, null, null, SegmentVersion.v1, invertedIndexColumns, null,
-        null, null, null);
+        null, null, null, null);
 
     setUpSegmentsAndQueryGenerator();
 
@@ -269,7 +269,8 @@ public class JsonPathClusterIntegrationTest extends BaseClusterIntegrationTest {
     Assert.assertTrue(selectionResults.size() == 1);
     for (int i = 0; i < selectionResults.size(); i++) {
       String value = selectionResults.get(i).get(0).textValue();
-      Assert.assertEquals(value, "{\"k4-k1\":\"value-k4-k1-0\",\"k4-k2\":\"value-k4-k2-0\",\"k4-k3\":\"value-k4-k3-0\",\"met\":0}");
+      Assert.assertEquals(value,
+          "{\"k4-k1\":\"value-k4-k1-0\",\"k4-k2\":\"value-k4-k2-0\",\"k4-k3\":\"value-k4-k3-0\",\"met\":0}");
     }
 
     //selection order by
diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java
index db3bd93..14b5bc4 100644
--- a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java
+++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java
@@ -73,7 +73,7 @@ public class OfflineClusterIntegrationTest extends BaseClusterIntegrationTestSet
       "SELECT COUNT(*) FROM mytable WHERE DivActualElapsedTime = 305";
 
   // For inverted index triggering test
-  private static final List<String> UPDATED_RANGE_INDEX_COLUMNS = Arrays.asList("DivActualElapsedTime");
+  private static final List<String> UPDATED_RANGE_INDEX_COLUMNS = Collections.singletonList("DivActualElapsedTime");
   private static final String TEST_UPDATED_RANGE_INDEX_QUERY =
       "SELECT COUNT(*) FROM mytable WHERE DivActualElapsedTime > 305";
 
@@ -351,7 +351,7 @@ public class OfflineClusterIntegrationTest extends BaseClusterIntegrationTestSet
         long afterCount = queryResponse.get("aggregationResults").get(0).get("value").asLong();
         //we should be scanning less than numTotalDocs with index enabled.
         //In the current implementation its 8785, but it
-        return beforeCount == afterCount  && queryResponse1.get("numEntriesScannedInFilter").asLong() < numTotalDocs;
+        return beforeCount == afterCount && queryResponse1.get("numEntriesScannedInFilter").asLong() < numTotalDocs;
       } catch (Exception e) {
         throw new RuntimeException(e);
       }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org