You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by si...@apache.org on 2020/03/31 04:47:32 UTC
[incubator-pinot] branch master updated: Lucene DocId to PinotDocId
cache to improve performance (#5177)
This is an automated email from the ASF dual-hosted git repository.
siddteotia pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 8dfa51a Lucene DocId to PinotDocId cache to improve performance (#5177)
8dfa51a is described below
commit 8dfa51af244f911d52824f94e95f983ffc50b5fb
Author: Sidd <si...@gmail.com>
AuthorDate: Mon Mar 30 21:47:22 2020 -0700
Lucene DocId to PinotDocId cache to improve performance (#5177)
Co-authored-by: Siddharth Teotia <st...@steotia-mn1.linkedin.biz>
---
.../index/column/PhysicalColumnIndexContainer.java | 2 +-
.../converter/SegmentV1V2ToV3FormatConverter.java | 18 +++++
.../loader/invertedindex/TextIndexHandler.java | 59 ++++++++--------
.../index/readers/text/LuceneTextIndexReader.java | 79 ++++++++++++++++++----
.../core/segment/store/SegmentDirectoryPaths.java | 9 +++
.../core/segment/index/loader/LoaderTest.java | 57 ++++++++++++++--
...archQueries.java => TextSearchQueriesTest.java} | 3 +-
7 files changed, 177 insertions(+), 50 deletions(-)
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/column/PhysicalColumnIndexContainer.java b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/column/PhysicalColumnIndexContainer.java
index 33ba360..76ff19e 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/column/PhysicalColumnIndexContainer.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/column/PhysicalColumnIndexContainer.java
@@ -132,7 +132,7 @@ public final class PhysicalColumnIndexContainer implements ColumnIndexContainer
_dictionary = null;
_bloomFilterReader = null;
if (loadTextIndex) {
- _invertedIndex = new LuceneTextIndexReader(columnName, segmentIndexDir);
+ _invertedIndex = new LuceneTextIndexReader(columnName, segmentIndexDir, metadata.getTotalDocs());
} else {
_invertedIndex = null;
}
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/converter/SegmentV1V2ToV3FormatConverter.java b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/converter/SegmentV1V2ToV3FormatConverter.java
index 71c56ae..f534fe9 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/converter/SegmentV1V2ToV3FormatConverter.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/converter/SegmentV1V2ToV3FormatConverter.java
@@ -35,6 +35,7 @@ import org.apache.pinot.core.indexsegment.generator.SegmentVersion;
import org.apache.pinot.core.segment.creator.impl.V1Constants;
import org.apache.pinot.core.segment.creator.impl.inv.text.LuceneTextIndexCreator;
import org.apache.pinot.core.segment.index.metadata.SegmentMetadataImpl;
+import org.apache.pinot.core.segment.index.readers.text.LuceneTextIndexReader;
import org.apache.pinot.core.segment.memory.PinotDataBuffer;
import org.apache.pinot.core.segment.store.ColumnIndexType;
import org.apache.pinot.core.segment.store.SegmentDirectory;
@@ -225,6 +226,7 @@ public class SegmentV1V2ToV3FormatConverter implements SegmentFormatConverter {
private void copyLuceneTextIndexIfExists(File segmentDirectory, File v3Dir)
throws IOException {
+ // TODO: see if this can be done by reusing some existing methods
String suffix = LuceneTextIndexCreator.LUCENE_TEXT_INDEX_FILE_EXTENSION;
File[] textIndexFiles = segmentDirectory.listFiles(new FilenameFilter() {
@Override
@@ -241,6 +243,22 @@ public class SegmentV1V2ToV3FormatConverter implements SegmentFormatConverter {
Files.copy(indexFile.toPath(), v3LuceneIndexFile.toPath());
}
}
+ // if segment reload is issued asking for up-conversion of
+ // on-disk segment format from v1/v2 to v3, then in addition
+ // to moving the lucene text index files, we need to move the
+ // docID mapping/cache file created by us in v1/v2 during an earlier
+ // load of the segment.
+ String docIDFileSuffix = LuceneTextIndexReader.LUCENE_TEXT_INDEX_DOCID_MAPPING_FILE_EXTENSION;
+ File[] textIndexDocIdMappingFiles = segmentDirectory.listFiles(new FilenameFilter() {
+ @Override
+ public boolean accept(File dir, String name) {
+ return name.endsWith(docIDFileSuffix);
+ }
+ });
+ for (File docIdMappingFile : textIndexDocIdMappingFiles) {
+ File v3DocIdMappingFile = new File(v3Dir, docIdMappingFile.getName());
+ Files.copy(docIdMappingFile.toPath(), v3DocIdMappingFile.toPath());
+ }
}
private void deleteStaleConversionDirectories(File segmentDirectory) {
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/loader/invertedindex/TextIndexHandler.java b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/loader/invertedindex/TextIndexHandler.java
index a501596..1c1786f 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/loader/invertedindex/TextIndexHandler.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/loader/invertedindex/TextIndexHandler.java
@@ -61,8 +61,26 @@ import static org.apache.pinot.core.segment.creator.impl.V1Constants.MetadataKey
import static org.apache.pinot.core.segment.creator.impl.V1Constants.MetadataKeys.Column.getKeyFor;
+/**
+ * Helper class for text indexes used by {@link org.apache.pinot.core.segment.index.loader.SegmentPreProcessor}.
+ * to create text index for column during segment load time. Currently text index is always
+ * created (if enabled on a column) during segment generation
+ *
+ * (1) A new segment with text index is created/refreshed. Server loads the segment. The handler
+ * detects the existence of text index and returns.
+ *
+ * (2) A reload is issued on an existing segment with existing text index. The handler
+ * detects the existence of text index and returns.
+ *
+ * (3) A reload is issued on an existing segment after text index is enabled on an existing
+ * column. Read the forward index to create text index.
+ *
+ * (4) A reload is issued on an existing segment after text index is enabled on a newly
+ * added column. In this case, the default column handler would have taken care of adding
+ * forward index for the new column. Read the forward index to create text index.
+ */
public class TextIndexHandler {
- private static final Logger LOGGER = LoggerFactory.getLogger(InvertedIndexHandler.class);
+ private static final Logger LOGGER = LoggerFactory.getLogger(TextIndexHandler.class);
private final File _indexDir;
private final SegmentDirectory.Writer _segmentWriter;
@@ -85,26 +103,6 @@ public class TextIndexHandler {
}
}
- /**
- * Create text index for column during segment load time. Currently text index is always
- * created (if enabled on a column) during segment generation (true for both offline
- * and realtime segments). So this function is a NO-OP for case when a new segment is loaded
- * after creation. However, when segment reload is issued in the following scenarios, we generate
- * text index.
- *
- * SCENARIO 1: user enables text index on an existing column (table config change)
- * SCENARIO 2: user adds a new column and enables text index (both schema and table config change)
- *
- * This function is a NO-OP for the above two cases. Later we can also add a segment generator
- * config option to not necessarily generate text index during segment generation. When we do
- * so, this function should be able to take care of that scenario too.
- *
- * For scenario 2, {@link org.apache.pinot.core.segment.index.loader.defaultcolumn.V3DefaultColumnHandler}
- * would have already added the forward index for the column with default value. We use the forward
- * index here to get the raw data and build text index.
- *
- * @throws IOException
- */
public void createTextIndexesOnSegmentLoad()
throws Exception {
for (ColumnMetadata columnMetadata : _textIndexColumns) {
@@ -125,21 +123,23 @@ public class TextIndexHandler {
private void checkUnsupportedOperationsForTextIndex(ColumnMetadata columnMetadata) {
String column = columnMetadata.getColumnName();
if (columnMetadata.hasDictionary()) {
- throw new UnsupportedOperationException("Text index is currently not supported on dictionary encoded column: "+column);
+ throw new UnsupportedOperationException(
+ "Text index is currently not supported on dictionary encoded column: " + column);
}
if (columnMetadata.isSorted()) {
// since Pinot's current implementation doesn't support raw sorted columns,
// we need to check for this too
- throw new UnsupportedOperationException("Text index is currently not supported on sorted columns: "+column);
+ throw new UnsupportedOperationException("Text index is currently not supported on sorted columns: " + column);
}
if (!columnMetadata.isSingleValue()) {
- throw new UnsupportedOperationException("Text index is currently not supported on multi-value columns: "+column);
+ throw new UnsupportedOperationException(
+ "Text index is currently not supported on multi-value columns: " + column);
}
if (columnMetadata.getDataType() != FieldSpec.DataType.STRING) {
- throw new UnsupportedOperationException("Text index is currently only supported on STRING columns: "+column);
+ throw new UnsupportedOperationException("Text index is currently only supported on STRING columns: " + column);
}
}
@@ -153,8 +153,13 @@ public class TextIndexHandler {
}
int numDocs = columnMetadata.getTotalDocs();
LOGGER.info("Creating new text index for column: {} in segment: {}", column, _segmentName);
- File segmentIndexDir = SegmentDirectoryPaths.segmentDirectoryFor(_indexDir, _segmentVersion);
- try (LuceneTextIndexCreator textIndexCreator = new LuceneTextIndexCreator(column, segmentIndexDir, true)) {
+ File segmentDirectory = SegmentDirectoryPaths.segmentDirectoryFor(_indexDir, _segmentVersion);
+ // The handlers are always invoked by the preprocessor. Before this ImmutableSegmentLoader would have already
+ // up-converted the segment from v1/v2 -> v3 (if needed). So based on the segmentVersion, whatever segment
+ // segmentDirectory is indicated to us by SegmentDirectoryPaths, we create lucene index there. There is no
+ // further need to move around the lucene index directory since it is created with correct directory structure
+ // based on segmentVersion.
+ try (LuceneTextIndexCreator textIndexCreator = new LuceneTextIndexCreator(column, segmentDirectory, true)) {
try (DataFileReader forwardIndexReader = getForwardIndexReader(columnMetadata)) {
VarByteChunkSingleValueReader forwardIndex = (VarByteChunkSingleValueReader) forwardIndexReader;
for (int docID = 0; docID < numDocs; docID++) {
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/text/LuceneTextIndexReader.java b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/text/LuceneTextIndexReader.java
index abefe15..d55c453 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/text/LuceneTextIndexReader.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/text/LuceneTextIndexReader.java
@@ -18,8 +18,10 @@
*/
package org.apache.pinot.core.segment.index.readers.text;
+import java.io.Closeable;
import java.io.File;
import java.io.IOException;
+import java.nio.ByteOrder;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
@@ -32,6 +34,7 @@ import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.pinot.core.segment.creator.impl.inv.text.LuceneTextIndexCreator;
import org.apache.pinot.core.segment.index.readers.InvertedIndexReader;
+import org.apache.pinot.core.segment.memory.PinotDataBuffer;
import org.apache.pinot.core.segment.store.SegmentDirectoryPaths;
import org.roaringbitmap.IntIterator;
import org.roaringbitmap.buffer.MutableRoaringBitmap;
@@ -51,6 +54,9 @@ public class LuceneTextIndexReader implements InvertedIndexReader<MutableRoaring
private final IndexSearcher _indexSearcher;
private final QueryParser _queryParser;
private final String _column;
+ private final DocIdTranslator _docIdTranslator;
+
+ public static final String LUCENE_TEXT_INDEX_DOCID_MAPPING_FILE_EXTENSION = ".lucene.mapping";
/**
* As part of loading the segment in ImmutableSegmentLoader,
@@ -58,18 +64,22 @@ public class LuceneTextIndexReader implements InvertedIndexReader<MutableRoaring
* the reference in {@link org.apache.pinot.core.segment.index.column.PhysicalColumnIndexContainer}
* similar to how it is done for other types of indexes.
* @param column column name
- * @param segmentIndexDir segment index directory
+ * @param indexDir segment index directory
+ * @param numDocs number of documents in the segment
*/
- public LuceneTextIndexReader(String column, File segmentIndexDir) {
+ public LuceneTextIndexReader(String column, File indexDir, int numDocs) {
_column = column;
try {
- File indexFile = getTextIndexFile(segmentIndexDir);
+ File indexFile = getTextIndexFile(indexDir);
_indexDirectory = FSDirectory.open(indexFile.toPath());
_indexReader = DirectoryReader.open(_indexDirectory);
_indexSearcher = new IndexSearcher(_indexReader);
// Disable Lucene query result cache. While it helps a lot with performance for
// repeated queries, on the downside it cause heap issues.
_indexSearcher.setQueryCache(null);
+ // TODO: consider using a threshold of num docs per segment to decide between building
+ // mapping file upfront on segment load v/s on-the-fly during query processing
+ _docIdTranslator = new DocIdTranslator(indexDir, _column, numDocs, _indexSearcher);
} catch (Exception e) {
LOGGER
.error("Failed to instantiate Lucene text index reader for column {}, exception {}", column, e.getMessage());
@@ -123,9 +133,8 @@ public class LuceneTextIndexReader implements InvertedIndexReader<MutableRoaring
_indexSearcher.search(query, docIDCollector);
return getPinotDocIds(docIDs);
} catch (Exception e) {
- LOGGER.error("Failed while searching the text index for column {}, search query {}, exception {}", _column,
- searchQuery, e.getMessage());
- throw new RuntimeException(e);
+ String msg = "Caught excepttion while searching the text index for column:" + _column + " search query:" + searchQuery;
+ throw new RuntimeException(msg, e);
}
}
@@ -145,15 +154,10 @@ public class LuceneTextIndexReader implements InvertedIndexReader<MutableRoaring
private MutableRoaringBitmap getPinotDocIds(MutableRoaringBitmap luceneDocIds) {
IntIterator luceneDocIDIterator = luceneDocIds.getIntIterator();
MutableRoaringBitmap actualDocIDs = new MutableRoaringBitmap();
- try {
- while (luceneDocIDIterator.hasNext()) {
- int luceneDocId = luceneDocIDIterator.next();
- Document document = _indexSearcher.doc(luceneDocId);
- int pinotDocId = Integer.valueOf(document.get(LuceneTextIndexCreator.LUCENE_INDEX_DOC_ID_COLUMN_NAME));
- actualDocIDs.add(pinotDocId);
- }
- } catch (Exception e) {
- throw new RuntimeException("Error: failed while retrieving document from index: " + e);
+ while (luceneDocIDIterator.hasNext()) {
+ int luceneDocId = luceneDocIDIterator.next();
+ int pinotDocId = _docIdTranslator.getPinotDocId(luceneDocId);
+ actualDocIDs.add(pinotDocId);
}
return actualDocIDs;
}
@@ -169,5 +173,50 @@ public class LuceneTextIndexReader implements InvertedIndexReader<MutableRoaring
throws IOException {
_indexReader.close();
_indexDirectory.close();
+ _docIdTranslator.close();
+ }
+
+ private static class DocIdTranslator implements Closeable {
+ final PinotDataBuffer _buffer;
+
+ DocIdTranslator(File segmentIndexDir, String column, int numDocs, IndexSearcher indexSearcher)
+ throws Exception {
+ int length = Integer.BYTES * numDocs;
+ File docIdMappingFile = new File(SegmentDirectoryPaths.findSegmentDirectory(segmentIndexDir),
+ column + LUCENE_TEXT_INDEX_DOCID_MAPPING_FILE_EXTENSION);
+ // The mapping is local to a segment. It is created on the server during segment load.
+ // Unless we are running Pinot on Solaris/SPARC, the underlying architecture is
+ // LITTLE_ENDIAN (Linux/x86). So use that as byte order.
+ String desc = "Text index docId mapping buffer: " + column;
+ if (docIdMappingFile.exists()) {
+ // we will be here for segment reload and server restart
+ // for refresh, we will not be here since segment is deleted/replaced
+ // TODO: see if we can prefetch the pages
+ _buffer =
+ PinotDataBuffer.mapFile(docIdMappingFile, /* readOnly */ true, 0, length, ByteOrder.LITTLE_ENDIAN, desc);
+ } else {
+ _buffer =
+ PinotDataBuffer.mapFile(docIdMappingFile, /* readOnly */ false, 0, length, ByteOrder.LITTLE_ENDIAN, desc);
+ for (int i = 0; i < numDocs; i++) {
+ try {
+ Document document = indexSearcher.doc(i);
+ int pinotDocId = Integer.parseInt(document.get(LuceneTextIndexCreator.LUCENE_INDEX_DOC_ID_COLUMN_NAME));
+ _buffer.putInt(i * Integer.BYTES, pinotDocId);
+ } catch (Exception e) {
+ throw new RuntimeException("Caught exception while building doc id mapping for text index column: " + column, e);
+ }
+ }
+ }
+ }
+
+ int getPinotDocId(int luceneDocId) {
+ return _buffer.getInt(luceneDocId * Integer.BYTES);
+ }
+
+ @Override
+ public void close()
+ throws IOException {
+ _buffer.close();
+ }
}
}
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/segment/store/SegmentDirectoryPaths.java b/pinot-core/src/main/java/org/apache/pinot/core/segment/store/SegmentDirectoryPaths.java
index 322710c..9e1588a 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/segment/store/SegmentDirectoryPaths.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/segment/store/SegmentDirectoryPaths.java
@@ -18,12 +18,14 @@
*/
package org.apache.pinot.core.segment.store;
+import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import java.io.File;
import javax.annotation.Nullable;
import org.apache.pinot.core.indexsegment.generator.SegmentVersion;
import org.apache.pinot.core.segment.creator.impl.V1Constants;
import org.apache.pinot.core.segment.creator.impl.inv.text.LuceneTextIndexCreator;
+import org.apache.pinot.core.segment.index.readers.text.LuceneTextIndexReader;
public class SegmentDirectoryPaths {
@@ -83,6 +85,13 @@ public class SegmentDirectoryPaths {
return findFormatFile(indexDir, luceneIndexDirectory);
}
+ @Nullable
+ @VisibleForTesting
+ public static File findTextIndexDocIdMappingFile(File indexDir, String column) {
+ String file = column + LuceneTextIndexReader.LUCENE_TEXT_INDEX_DOCID_MAPPING_FILE_EXTENSION;
+ return findFormatFile(indexDir, file);
+ }
+
/**
* Find a file in any segment version.
* <p>Index directory passed in should be top level segment directory.
diff --git a/pinot-core/src/test/java/org/apache/pinot/core/segment/index/loader/LoaderTest.java b/pinot-core/src/test/java/org/apache/pinot/core/segment/index/loader/LoaderTest.java
index 7d81b7b..c6c1e7c 100644
--- a/pinot-core/src/test/java/org/apache/pinot/core/segment/index/loader/LoaderTest.java
+++ b/pinot-core/src/test/java/org/apache/pinot/core/segment/index/loader/LoaderTest.java
@@ -20,7 +20,10 @@ package org.apache.pinot.core.segment.index.loader;
import java.io.File;
import java.net.URL;
+import java.util.Arrays;
+import java.util.HashSet;
import java.util.List;
+import java.util.Set;
import org.apache.commons.io.FileUtils;
import org.apache.pinot.common.segment.ReadMode;
import org.apache.pinot.common.utils.TarGzCompressionUtils;
@@ -36,6 +39,7 @@ import org.apache.pinot.core.segment.index.converter.SegmentV1V2ToV3FormatConver
import org.apache.pinot.core.segment.index.metadata.ColumnMetadata;
import org.apache.pinot.core.segment.index.metadata.SegmentMetadataImpl;
import org.apache.pinot.core.segment.index.readers.StringDictionary;
+import org.apache.pinot.core.segment.index.readers.text.LuceneTextIndexReader;
import org.apache.pinot.core.segment.memory.PinotDataBuffer;
import org.apache.pinot.core.segment.store.ColumnIndexType;
import org.apache.pinot.core.segment.store.SegmentDirectory;
@@ -268,6 +272,9 @@ public class LoaderTest {
@Test
public void testTextIndexLoad() throws Exception {
+ // Tests for scenarios by creating on-disk segment in V3 and then loading
+ // the segment with and without specifying segmentVersion in IndexLoadingConfig
+
// create on-disk segment in V3
// this generates the segment in V1 but converts to V3 as part of post-creation processing
constructSegmentWithTextIndex(SegmentVersion.v3);
@@ -288,7 +295,10 @@ public class LoaderTest {
// CASE 1: don't set the segment version to load in IndexLoadingConfig
// there should be no conversion done by ImmutableSegmentLoader and it should
// be able to create text index reader with on-disk version V3
- IndexSegment indexSegment = ImmutableSegmentLoader.load(_indexDir, ReadMode.mmap);
+ IndexLoadingConfig indexLoadingConfig = new IndexLoadingConfig();
+ indexLoadingConfig.setTextIndexColumns(new HashSet<>(Arrays.asList(TEXT_INDEX_COL_NAME)));
+ indexLoadingConfig.setReadMode(ReadMode.mmap);
+ IndexSegment indexSegment = ImmutableSegmentLoader.load(_indexDir, indexLoadingConfig);
// check that loaded segment version is v3
Assert.assertEquals(indexSegment.getSegmentMetadata().getVersion(), SegmentVersion.v3.toString());
// no change/conversion should have happened in indexDir
@@ -297,16 +307,23 @@ public class LoaderTest {
verifyIndexDirIsV3(_indexDir);
// no change/conversion should have happened for textIndex dir
textIndexFile = SegmentDirectoryPaths.findTextIndexIndexFile(_indexDir, TEXT_INDEX_COL_NAME);
+ // segment load should have created the docID mapping file in V3 structure
+ File textIndexDocIdMappingFile = SegmentDirectoryPaths.findTextIndexDocIdMappingFile(_indexDir, TEXT_INDEX_COL_NAME);
Assert.assertNotNull(textIndexFile);
+ Assert.assertNotNull(textIndexDocIdMappingFile);
Assert.assertTrue(textIndexFile.isDirectory());
+ Assert.assertFalse(textIndexDocIdMappingFile.isDirectory());
Assert.assertEquals(textIndexFile.getName(), TEXT_INDEX_COL_NAME + LuceneTextIndexCreator.LUCENE_TEXT_INDEX_FILE_EXTENSION);
Assert.assertEquals(textIndexFile.getParentFile().getName(), SegmentDirectoryPaths.V3_SUBDIRECTORY_NAME);
+ Assert.assertEquals(textIndexDocIdMappingFile.getName(), TEXT_INDEX_COL_NAME + LuceneTextIndexReader.LUCENE_TEXT_INDEX_DOCID_MAPPING_FILE_EXTENSION);
+ Assert.assertEquals(textIndexDocIdMappingFile.getParentFile().getName(), SegmentDirectoryPaths.V3_SUBDIRECTORY_NAME);
indexSegment.destroy();
// CASE 2: set the segment version to load in IndexLoadingConfig as V3
// there should be no conversion done by ImmutableSegmentLoader since the segmentVersionToLoad
// is same as the version of segment on disk (V3)
- indexSegment = ImmutableSegmentLoader.load(_indexDir, _v3IndexLoadingConfig);
+ indexLoadingConfig.setSegmentVersion(SegmentVersion.v3);
+ indexSegment = ImmutableSegmentLoader.load(_indexDir, indexLoadingConfig);
// check that loaded segment version is v3
Assert.assertEquals(indexSegment.getSegmentMetadata().getVersion(), SegmentVersion.v3.toString());
// no change/conversion should have happened in indexDir
@@ -315,12 +332,21 @@ public class LoaderTest {
verifyIndexDirIsV3(_indexDir);
// no change/conversion should have happened for textIndex dir
textIndexFile = SegmentDirectoryPaths.findTextIndexIndexFile(_indexDir, TEXT_INDEX_COL_NAME);
+ // segment load should have created the docID mapping file in V3 structure
+ textIndexDocIdMappingFile = SegmentDirectoryPaths.findTextIndexDocIdMappingFile(_indexDir, TEXT_INDEX_COL_NAME);
Assert.assertNotNull(textIndexFile);
+ Assert.assertNotNull(textIndexDocIdMappingFile);
Assert.assertTrue(textIndexFile.isDirectory());
+ Assert.assertFalse(textIndexDocIdMappingFile.isDirectory());
Assert.assertEquals(textIndexFile.getName(), TEXT_INDEX_COL_NAME + LuceneTextIndexCreator.LUCENE_TEXT_INDEX_FILE_EXTENSION);
Assert.assertEquals(textIndexFile.getParentFile().getName(), SegmentDirectoryPaths.V3_SUBDIRECTORY_NAME);
+ Assert.assertEquals(textIndexDocIdMappingFile.getName(), TEXT_INDEX_COL_NAME + LuceneTextIndexReader.LUCENE_TEXT_INDEX_DOCID_MAPPING_FILE_EXTENSION);
+ Assert.assertEquals(textIndexDocIdMappingFile.getParentFile().getName(), SegmentDirectoryPaths.V3_SUBDIRECTORY_NAME);
indexSegment.destroy();
+ // Test for scenarios by creating on-disk segment in V1 and then loading
+ // the segment with and without specifying segmentVersion in IndexLoadingConfig
+
// create on-disk segment in V1
// this generates the segment in V1 and does not convert to V3 as part of post-creation processing
constructSegmentWithTextIndex(SegmentVersion.v1);
@@ -335,13 +361,17 @@ public class LoaderTest {
textIndexFile = SegmentDirectoryPaths.findTextIndexIndexFile(_indexDir, TEXT_INDEX_COL_NAME);
Assert.assertNotNull(textIndexFile);
Assert.assertTrue(textIndexFile.isDirectory());
+ Assert.assertFalse(textIndexDocIdMappingFile.isDirectory());
Assert.assertEquals(textIndexFile.getName(), TEXT_INDEX_COL_NAME + LuceneTextIndexCreator.LUCENE_TEXT_INDEX_FILE_EXTENSION);
Assert.assertEquals(textIndexFile.getParentFile().getName(), new SegmentMetadataImpl(_indexDir).getName());
// CASE 1: don't set the segment version to load in IndexLoadingConfig
// there should be no conversion done by ImmutableSegmentLoader and it should
// be able to create text index reader with on-disk version V1
- indexSegment = ImmutableSegmentLoader.load(_indexDir, ReadMode.mmap);
+ indexLoadingConfig = new IndexLoadingConfig();
+ indexLoadingConfig.setTextIndexColumns(new HashSet<>(Arrays.asList(TEXT_INDEX_COL_NAME)));
+ indexLoadingConfig.setReadMode(ReadMode.mmap);
+ indexSegment = ImmutableSegmentLoader.load(_indexDir, indexLoadingConfig);
// check that loaded segment version is v1
Assert.assertEquals(indexSegment.getSegmentMetadata().getVersion(), SegmentVersion.v1.toString());
// no change/conversion should have happened in indexDir
@@ -349,16 +379,22 @@ public class LoaderTest {
Assert.assertFalse(SegmentDirectoryPaths.segmentDirectoryFor(_indexDir, SegmentVersion.v3).exists());
// no change/conversion should have happened in text index Dir
textIndexFile = SegmentDirectoryPaths.findTextIndexIndexFile(_indexDir, TEXT_INDEX_COL_NAME);
+ // segment load should have created the docID mapping file in V1 structure
+ textIndexDocIdMappingFile = SegmentDirectoryPaths.findTextIndexDocIdMappingFile(_indexDir, TEXT_INDEX_COL_NAME);
Assert.assertNotNull(textIndexFile);
+ Assert.assertNotNull(textIndexDocIdMappingFile);
Assert.assertTrue(textIndexFile.isDirectory());
Assert.assertEquals(textIndexFile.getName(), TEXT_INDEX_COL_NAME + LuceneTextIndexCreator.LUCENE_TEXT_INDEX_FILE_EXTENSION);
Assert.assertEquals(textIndexFile.getParentFile().getName(), new SegmentMetadataImpl(_indexDir).getName());
+ Assert.assertEquals(textIndexDocIdMappingFile.getName(), TEXT_INDEX_COL_NAME + LuceneTextIndexReader.LUCENE_TEXT_INDEX_DOCID_MAPPING_FILE_EXTENSION);
+ Assert.assertEquals(textIndexDocIdMappingFile.getParentFile().getName(), new SegmentMetadataImpl(_indexDir).getName());
indexSegment.destroy();
// CASE 2: set the segment version to load in IndexLoadingConfig to V1
// there should be no conversion done by ImmutableSegmentLoader since the segmentVersionToLoad
// is same as the version of segment on fisk
- indexSegment = ImmutableSegmentLoader.load(_indexDir, _v1IndexLoadingConfig);
+ indexLoadingConfig.setSegmentVersion(SegmentVersion.v1);
+ indexSegment = ImmutableSegmentLoader.load(_indexDir, indexLoadingConfig);
// check that loaded segment version is v1
Assert.assertEquals(indexSegment.getSegmentMetadata().getVersion(), SegmentVersion.v1.toString());
// no change/conversion should have happened in indexDir
@@ -366,16 +402,22 @@ public class LoaderTest {
Assert.assertFalse(SegmentDirectoryPaths.segmentDirectoryFor(_indexDir, SegmentVersion.v3).exists());
// no change/conversion should have happened in text index Dir
textIndexFile = SegmentDirectoryPaths.findTextIndexIndexFile(_indexDir, TEXT_INDEX_COL_NAME);
+ // segment load should have created the docID mapping file in V1 structure
+ textIndexDocIdMappingFile = SegmentDirectoryPaths.findTextIndexDocIdMappingFile(_indexDir, TEXT_INDEX_COL_NAME);
Assert.assertNotNull(textIndexFile);
+ Assert.assertNotNull(textIndexDocIdMappingFile);
Assert.assertTrue(textIndexFile.isDirectory());
Assert.assertEquals(textIndexFile.getName(), TEXT_INDEX_COL_NAME + LuceneTextIndexCreator.LUCENE_TEXT_INDEX_FILE_EXTENSION);
Assert.assertEquals(textIndexFile.getParentFile().getName(), new SegmentMetadataImpl(_indexDir).getName());
+ Assert.assertEquals(textIndexDocIdMappingFile.getName(), TEXT_INDEX_COL_NAME + LuceneTextIndexReader.LUCENE_TEXT_INDEX_DOCID_MAPPING_FILE_EXTENSION);
+ Assert.assertEquals(textIndexDocIdMappingFile.getParentFile().getName(), new SegmentMetadataImpl(_indexDir).getName());
indexSegment.destroy();
// CASE 3: set the segment version to load in IndexLoadingConfig to V3
// there should be conversion done by ImmutableSegmentLoader since the segmentVersionToLoad
// is different than the version of segment on disk
- indexSegment = ImmutableSegmentLoader.load(_indexDir, _v3IndexLoadingConfig);
+ indexLoadingConfig.setSegmentVersion(SegmentVersion.v3);
+ indexSegment = ImmutableSegmentLoader.load(_indexDir, indexLoadingConfig);
// check that loaded segment version is v3
Assert.assertEquals(indexSegment.getSegmentMetadata().getVersion(), SegmentVersion.v3.toString());
// the index dir should exist in v3 format due to conversion
@@ -383,10 +425,15 @@ public class LoaderTest {
verifyIndexDirIsV3(_indexDir);
// check that text index exists under V3 subdir. It should exist and should be a subdir
textIndexFile = SegmentDirectoryPaths.findTextIndexIndexFile(_indexDir, TEXT_INDEX_COL_NAME);
+ // segment load should have created the docID mapping file in V3 structure
+ textIndexDocIdMappingFile = SegmentDirectoryPaths.findTextIndexDocIdMappingFile(_indexDir, TEXT_INDEX_COL_NAME);
Assert.assertNotNull(textIndexFile);
+ Assert.assertNotNull(textIndexDocIdMappingFile);
Assert.assertTrue(textIndexFile.isDirectory());
Assert.assertEquals(textIndexFile.getName(), TEXT_INDEX_COL_NAME + LuceneTextIndexCreator.LUCENE_TEXT_INDEX_FILE_EXTENSION);
Assert.assertEquals(textIndexFile.getParentFile().getName(), SegmentDirectoryPaths.V3_SUBDIRECTORY_NAME);
+ Assert.assertEquals(textIndexDocIdMappingFile.getName(), TEXT_INDEX_COL_NAME + LuceneTextIndexReader.LUCENE_TEXT_INDEX_DOCID_MAPPING_FILE_EXTENSION);
+ Assert.assertEquals(textIndexDocIdMappingFile.getParentFile().getName(), SegmentDirectoryPaths.V3_SUBDIRECTORY_NAME);
indexSegment.destroy();
}
diff --git a/pinot-core/src/test/java/org/apache/pinot/queries/TestTextSearchQueries.java b/pinot-core/src/test/java/org/apache/pinot/queries/TextSearchQueriesTest.java
similarity index 99%
rename from pinot-core/src/test/java/org/apache/pinot/queries/TestTextSearchQueries.java
rename to pinot-core/src/test/java/org/apache/pinot/queries/TextSearchQueriesTest.java
index 7cba996..7ed16b7 100644
--- a/pinot-core/src/test/java/org/apache/pinot/queries/TestTextSearchQueries.java
+++ b/pinot-core/src/test/java/org/apache/pinot/queries/TextSearchQueriesTest.java
@@ -56,7 +56,6 @@ import org.apache.pinot.core.operator.query.AggregationOperator;
import org.apache.pinot.core.operator.query.SelectionOnlyOperator;
import org.apache.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl;
import org.apache.pinot.core.segment.index.loader.IndexLoadingConfig;
-import org.apache.pinot.pql.parsers.Pql2Compiler;
import org.apache.pinot.spi.data.FieldSpec;
import org.apache.pinot.spi.data.Schema;
import org.apache.pinot.spi.data.readers.GenericRow;
@@ -75,7 +74,7 @@ import org.testng.annotations.Test;
* The test table has a SKILLS column and QUERY_LOG column. Text index is created
* on each of these columns.
*/
-public class TestTextSearchQueries extends BaseQueriesTest {
+public class TextSearchQueriesTest extends BaseQueriesTest {
private static final File INDEX_DIR = new File(FileUtils.getTempDirectory(), "TextSearchQueries");
private static final String TABLE_NAME = "MyTable";
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org