You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by ja...@apache.org on 2021/11/12 23:32:03 UTC

[pinot] branch master updated: Support Native FST As An Index Subtype for FST Indices (#7729)

This is an automated email from the ASF dual-hosted git repository.

jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 781f5cd  Support Native FST As An Index Subtype for FST Indices (#7729)
781f5cd is described below

commit 781f5cdeee37d42b0eb0eb6069107f5b4588b13a
Author: Atri Sharma <at...@gmail.com>
AuthorDate: Sat Nov 13 05:01:47 2021 +0530

    Support Native FST As An Index Subtype for FST Indices (#7729)
    
    This PR introduces the notion of subtypes to FST index -- allowing users to set a segment level flag indicating whether the index should be built using native FST or Lucene FST.
---
 .../queries/FSTBasedRegexpLikeQueriesTest.java     | 39 ++++++-----
 .../creator/impl/SegmentColumnarIndexCreator.java  | 13 +++-
 .../index/column/PhysicalColumnIndexContainer.java | 10 ++-
 .../segment/index/loader/IndexHandlerFactory.java  |  5 +-
 .../segment/index/loader/IndexLoadingConfig.java   | 16 ++++-
 ...neFSTIndexHandler.java => FSTIndexHandler.java} | 26 ++++++--
 .../segment/local/utils/nativefst/FSTHeader.java   |  2 +-
 .../utils/nativefst/NativeFSTIndexCreator.java     |  2 +-
 .../index/creator/NativeFSTIndexCreatorTest.java   | 75 ++++++++++++++++++++++
 .../org/apache/pinot/segment/spi/V1Constants.java  |  1 -
 .../spi/creator/SegmentGeneratorConfig.java        | 12 ++++
 .../org/apache/pinot/spi/config/table/FSTType.java | 26 ++++++++
 .../pinot/spi/config/table/IndexingConfig.java     |  9 +++
 13 files changed, 204 insertions(+), 32 deletions(-)

diff --git a/pinot-core/src/test/java/org/apache/pinot/queries/FSTBasedRegexpLikeQueriesTest.java b/pinot-core/src/test/java/org/apache/pinot/queries/FSTBasedRegexpLikeQueriesTest.java
index 84c8007..0a0e8ba 100644
--- a/pinot-core/src/test/java/org/apache/pinot/queries/FSTBasedRegexpLikeQueriesTest.java
+++ b/pinot-core/src/test/java/org/apache/pinot/queries/FSTBasedRegexpLikeQueriesTest.java
@@ -26,6 +26,7 @@ import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Set;
+import java.util.concurrent.ThreadLocalRandom;
 import org.apache.commons.io.FileUtils;
 import org.apache.pinot.common.response.broker.AggregationResult;
 import org.apache.pinot.common.response.broker.BrokerResponseNative;
@@ -44,6 +45,7 @@ import org.apache.pinot.segment.local.segment.readers.GenericRowRecordReader;
 import org.apache.pinot.segment.spi.ImmutableSegment;
 import org.apache.pinot.segment.spi.IndexSegment;
 import org.apache.pinot.segment.spi.creator.SegmentGeneratorConfig;
+import org.apache.pinot.spi.config.table.FSTType;
 import org.apache.pinot.spi.config.table.FieldConfig;
 import org.apache.pinot.spi.config.table.TableConfig;
 import org.apache.pinot.spi.config.table.TableType;
@@ -69,8 +71,6 @@ public class FSTBasedRegexpLikeQueriesTest extends BaseQueriesTest {
   private static final Integer INT_BASE_VALUE = 1000;
   private static final Integer NUM_ROWS = 1024;
 
-  private final List<GenericRow> _rows = new ArrayList<>();
-
   private IndexSegment _indexSegment;
   private List<IndexSegment> _indexSegments;
 
@@ -94,19 +94,25 @@ public class FSTBasedRegexpLikeQueriesTest extends BaseQueriesTest {
       throws Exception {
     FileUtils.deleteQuietly(INDEX_DIR);
 
-    buildSegment();
-    IndexLoadingConfig indexLoadingConfig = new IndexLoadingConfig();
-    Set<String> fstIndexCols = new HashSet<>();
-    fstIndexCols.add(DOMAIN_NAMES_COL);
-    indexLoadingConfig.setFSTIndexColumns(fstIndexCols);
-
-    Set<String> invertedIndexCols = new HashSet<>();
-    invertedIndexCols.add(DOMAIN_NAMES_COL);
-    indexLoadingConfig.setInvertedIndexColumns(invertedIndexCols);
-    ImmutableSegment immutableSegment =
-        ImmutableSegmentLoader.load(new File(INDEX_DIR, SEGMENT_NAME), indexLoadingConfig);
-    _indexSegment = immutableSegment;
-    _indexSegments = Arrays.asList(immutableSegment, immutableSegment);
+    List<IndexSegment> segments = new ArrayList<>();
+    for (FSTType fstType : Arrays.asList(FSTType.LUCENE, FSTType.NATIVE)) {
+      buildSegment(fstType);
+
+      IndexLoadingConfig indexLoadingConfig = new IndexLoadingConfig();
+      Set<String> fstIndexCols = new HashSet<>();
+      fstIndexCols.add(DOMAIN_NAMES_COL);
+      indexLoadingConfig.setFSTIndexColumns(fstIndexCols);
+      indexLoadingConfig.setFSTIndexType(fstType);
+      Set<String> invertedIndexCols = new HashSet<>();
+      invertedIndexCols.add(DOMAIN_NAMES_COL);
+      indexLoadingConfig.setInvertedIndexColumns(invertedIndexCols);
+      ImmutableSegment segment = ImmutableSegmentLoader.load(new File(INDEX_DIR, SEGMENT_NAME), indexLoadingConfig);
+
+      segments.add(segment);
+    }
+
+    _indexSegment = segments.get(ThreadLocalRandom.current().nextInt(2));
+    _indexSegments = segments;
   }
 
   @AfterClass
@@ -151,7 +157,7 @@ public class FSTBasedRegexpLikeQueriesTest extends BaseQueriesTest {
     return rows;
   }
 
-  private void buildSegment()
+  private void buildSegment(FSTType fstType)
       throws Exception {
     List<GenericRow> rows = createTestData(NUM_ROWS);
     List<FieldConfig> fieldConfigs = new ArrayList<>();
@@ -171,6 +177,7 @@ public class FSTBasedRegexpLikeQueriesTest extends BaseQueriesTest {
     config.setOutDir(INDEX_DIR.getPath());
     config.setTableName(TABLE_NAME);
     config.setSegmentName(SEGMENT_NAME);
+    config.setFSTIndexType(fstType);
 
     SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl();
     try (RecordReader recordReader = new GenericRowRecordReader(rows)) {
diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java
index 8315d9a..476c4c1 100644
--- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java
+++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java
@@ -51,6 +51,7 @@ import org.apache.pinot.segment.local.segment.creator.impl.inv.text.LuceneFSTInd
 import org.apache.pinot.segment.local.segment.creator.impl.nullvalue.NullValueVectorCreator;
 import org.apache.pinot.segment.local.segment.creator.impl.text.LuceneTextIndexCreator;
 import org.apache.pinot.segment.local.utils.GeometrySerializer;
+import org.apache.pinot.segment.local.utils.nativefst.NativeFSTIndexCreator;
 import org.apache.pinot.segment.spi.V1Constants;
 import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
 import org.apache.pinot.segment.spi.creator.ColumnIndexCreationInfo;
@@ -66,6 +67,7 @@ import org.apache.pinot.segment.spi.index.creator.TextIndexCreator;
 import org.apache.pinot.segment.spi.index.creator.TextIndexType;
 import org.apache.pinot.segment.spi.index.reader.H3IndexResolution;
 import org.apache.pinot.segment.spi.partition.PartitionFunction;
+import org.apache.pinot.spi.config.table.FSTType;
 import org.apache.pinot.spi.config.table.FieldConfig;
 import org.apache.pinot.spi.data.DateTimeFieldSpec;
 import org.apache.pinot.spi.data.DateTimeFormatSpec;
@@ -266,8 +268,15 @@ public class SegmentColumnarIndexCreator implements SegmentCreator {
             "FST index is currently only supported on STRING type columns");
         Preconditions.checkState(dictEnabledColumn,
             "FST index is currently only supported on dictionary-encoded columns");
-        _fstIndexCreatorMap.put(columnName, new LuceneFSTIndexCreator(_indexDir, columnName,
-            (String[]) indexCreationInfo.getSortedUniqueElementsArray()));
+        String[] sortedValues = (String[]) indexCreationInfo.getSortedUniqueElementsArray();
+        TextIndexCreator textIndexCreator;
+        if (_config.getFSTIndexType() == FSTType.NATIVE) {
+          textIndexCreator = new NativeFSTIndexCreator(_indexDir, columnName, sortedValues);
+        } else {
+          textIndexCreator = new LuceneFSTIndexCreator(_indexDir, columnName, sortedValues);
+        }
+
+        _fstIndexCreatorMap.put(columnName, textIndexCreator);
       }
 
       if (jsonIndexColumns.contains(columnName)) {
diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/column/PhysicalColumnIndexContainer.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/column/PhysicalColumnIndexContainer.java
index 8dfd2c7..d7de202 100644
--- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/column/PhysicalColumnIndexContainer.java
+++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/column/PhysicalColumnIndexContainer.java
@@ -55,6 +55,8 @@ import org.apache.pinot.segment.local.segment.index.readers.geospatial.Immutable
 import org.apache.pinot.segment.local.segment.index.readers.json.ImmutableJsonIndexReader;
 import org.apache.pinot.segment.local.segment.index.readers.sorted.SortedIndexReaderImpl;
 import org.apache.pinot.segment.local.segment.index.readers.text.LuceneTextIndexReader;
+import org.apache.pinot.segment.local.utils.nativefst.FSTHeader;
+import org.apache.pinot.segment.local.utils.nativefst.NativeFSTIndexReader;
 import org.apache.pinot.segment.spi.ColumnMetadata;
 import org.apache.pinot.segment.spi.index.column.ColumnIndexContainer;
 import org.apache.pinot.segment.spi.index.reader.BloomFilterReader;
@@ -176,7 +178,13 @@ public final class PhysicalColumnIndexContainer implements ColumnIndexContainer
       }
 
       if (loadFSTIndex) {
-        _fstIndex = new LuceneFSTIndexReader(segmentReader.getIndexFor(columnName, ColumnIndexType.FST_INDEX));
+        PinotDataBuffer buffer = segmentReader.getIndexFor(columnName, ColumnIndexType.FST_INDEX);
+        int magicHeader = buffer.getInt(0);
+        if (magicHeader == FSTHeader.FST_MAGIC) {
+          _fstIndex = new NativeFSTIndexReader(buffer);
+        } else {
+          _fstIndex = new LuceneFSTIndexReader(buffer);
+        }
       } else {
         _fstIndex = null;
       }
diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/IndexHandlerFactory.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/IndexHandlerFactory.java
index 7da4024..9dd9ecb 100644
--- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/IndexHandlerFactory.java
+++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/IndexHandlerFactory.java
@@ -20,10 +20,10 @@ package org.apache.pinot.segment.local.segment.index.loader;
 
 import java.io.File;
 import org.apache.pinot.segment.local.segment.index.loader.bloomfilter.BloomFilterHandler;
+import org.apache.pinot.segment.local.segment.index.loader.invertedindex.FSTIndexHandler;
 import org.apache.pinot.segment.local.segment.index.loader.invertedindex.H3IndexHandler;
 import org.apache.pinot.segment.local.segment.index.loader.invertedindex.InvertedIndexHandler;
 import org.apache.pinot.segment.local.segment.index.loader.invertedindex.JsonIndexHandler;
-import org.apache.pinot.segment.local.segment.index.loader.invertedindex.LuceneFSTIndexHandler;
 import org.apache.pinot.segment.local.segment.index.loader.invertedindex.RangeIndexHandler;
 import org.apache.pinot.segment.local.segment.index.loader.invertedindex.TextIndexHandler;
 import org.apache.pinot.segment.spi.index.metadata.SegmentMetadataImpl;
@@ -48,7 +48,8 @@ public class IndexHandlerFactory {
       case TEXT_INDEX:
         return new TextIndexHandler(indexDir, segmentMetadata, indexLoadingConfig, segmentWriter);
       case FST_INDEX:
-        return new LuceneFSTIndexHandler(indexDir, segmentMetadata, indexLoadingConfig, segmentWriter);
+        return new FSTIndexHandler(indexDir, segmentMetadata, indexLoadingConfig, segmentWriter,
+            indexLoadingConfig.getFSTIndexType());
       case JSON_INDEX:
         return new JsonIndexHandler(indexDir, segmentMetadata, indexLoadingConfig, segmentWriter);
       case H3_INDEX:
diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/IndexLoadingConfig.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/IndexLoadingConfig.java
index ef6d95d..0b04661 100644
--- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/IndexLoadingConfig.java
+++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/IndexLoadingConfig.java
@@ -34,6 +34,7 @@ import org.apache.pinot.segment.spi.index.creator.H3IndexConfig;
 import org.apache.pinot.segment.spi.loader.SegmentDirectoryLoaderRegistry;
 import org.apache.pinot.spi.config.instance.InstanceDataManagerConfig;
 import org.apache.pinot.spi.config.table.BloomFilterConfig;
+import org.apache.pinot.spi.config.table.FSTType;
 import org.apache.pinot.spi.config.table.FieldConfig;
 import org.apache.pinot.spi.config.table.IndexingConfig;
 import org.apache.pinot.spi.config.table.StarTreeIndexConfig;
@@ -57,6 +58,7 @@ public class IndexLoadingConfig {
   private int _rangeIndexVersion = IndexingConfig.DEFAULT_RANGE_INDEX_VERSION;
   private Set<String> _textIndexColumns = new HashSet<>();
   private Set<String> _fstIndexColumns = new HashSet<>();
+  private FSTType _fstIndexType = FSTType.LUCENE;
   private Set<String> _jsonIndexColumns = new HashSet<>();
   private Map<String, H3IndexConfig> _h3IndexConfigs = new HashMap<>();
   private Set<String> _noDictionaryColumns = new HashSet<>(); // TODO: replace this by _noDictionaryConfig.
@@ -107,7 +109,6 @@ public class IndexLoadingConfig {
     if (invertedIndexColumns != null) {
       _invertedIndexColumns.addAll(invertedIndexColumns);
     }
-    _rangeIndexVersion = indexingConfig.getRangeIndexVersion();
 
     List<String> jsonIndexColumns = indexingConfig.getJsonIndexColumns();
     if (jsonIndexColumns != null) {
@@ -119,6 +120,10 @@ public class IndexLoadingConfig {
       _rangeIndexColumns.addAll(rangeIndexColumns);
     }
 
+    _rangeIndexVersion = indexingConfig.getRangeIndexVersion();
+
+    _fstIndexType = indexingConfig.getFSTIndexType();
+
     List<String> bloomFilterColumns = indexingConfig.getBloomFilterColumns();
     if (bloomFilterColumns != null) {
       for (String bloomFilterColumn : bloomFilterColumns) {
@@ -285,6 +290,10 @@ public class IndexLoadingConfig {
     return _rangeIndexVersion;
   }
 
+  public FSTType getFSTIndexType() {
+    return _fstIndexType;
+  }
+
   /**
    * Used in two places:
    * (1) In {@link PhysicalColumnIndexContainer} to create the index loading info for immutable segments
@@ -350,6 +359,11 @@ public class IndexLoadingConfig {
   }
 
   @VisibleForTesting
+  public void setFSTIndexType(FSTType fstType) {
+    _fstIndexType = fstType;
+  }
+
+  @VisibleForTesting
   public void setJsonIndexColumns(Set<String> jsonIndexColumns) {
     _jsonIndexColumns = jsonIndexColumns;
   }
diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/LuceneFSTIndexHandler.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/FSTIndexHandler.java
similarity index 87%
rename from pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/LuceneFSTIndexHandler.java
rename to pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/FSTIndexHandler.java
index 24e6538..50f2b67 100644
--- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/LuceneFSTIndexHandler.java
+++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/FSTIndexHandler.java
@@ -29,12 +29,15 @@ import org.apache.pinot.segment.local.segment.index.loader.IndexHandler;
 import org.apache.pinot.segment.local.segment.index.loader.IndexLoadingConfig;
 import org.apache.pinot.segment.local.segment.index.loader.LoaderUtils;
 import org.apache.pinot.segment.local.segment.index.loader.SegmentPreProcessor;
+import org.apache.pinot.segment.local.utils.nativefst.NativeFSTIndexCreator;
 import org.apache.pinot.segment.spi.ColumnMetadata;
 import org.apache.pinot.segment.spi.SegmentMetadata;
 import org.apache.pinot.segment.spi.creator.SegmentVersion;
+import org.apache.pinot.segment.spi.index.creator.TextIndexCreator;
 import org.apache.pinot.segment.spi.index.reader.Dictionary;
 import org.apache.pinot.segment.spi.store.ColumnIndexType;
 import org.apache.pinot.segment.spi.store.SegmentDirectory;
+import org.apache.pinot.spi.config.table.FSTType;
 import org.apache.pinot.spi.data.FieldSpec;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -60,20 +63,22 @@ import static org.apache.pinot.segment.spi.V1Constants.Indexes.FST_INDEX_FILE_EX
  * added column. In this case, the default column handler would have taken care of adding
  * dictionary for the new column. Read the dictionary to create FST index.
  */
-public class LuceneFSTIndexHandler implements IndexHandler {
-  private static final Logger LOGGER = LoggerFactory.getLogger(LuceneFSTIndexHandler.class);
+public class FSTIndexHandler implements IndexHandler {
+  private static final Logger LOGGER = LoggerFactory.getLogger(FSTIndexHandler.class);
 
   private final File _indexDir;
   private final SegmentMetadata _segmentMetadata;
   private final SegmentDirectory.Writer _segmentWriter;
   private final Set<String> _columnsToAddIdx;
+  private final FSTType _fstType;
 
-  public LuceneFSTIndexHandler(File indexDir, SegmentMetadata segmentMetadata, IndexLoadingConfig indexLoadingConfig,
-      SegmentDirectory.Writer segmentWriter) {
+  public FSTIndexHandler(File indexDir, SegmentMetadata segmentMetadata, IndexLoadingConfig indexLoadingConfig,
+      SegmentDirectory.Writer segmentWriter, FSTType fstType) {
     _indexDir = indexDir;
     _segmentMetadata = segmentMetadata;
     _segmentWriter = segmentWriter;
     _columnsToAddIdx = new HashSet<>(indexLoadingConfig.getFSTIndexColumns());
+    _fstType = fstType;
   }
 
   @Override
@@ -130,13 +135,20 @@ public class LuceneFSTIndexHandler implements IndexHandler {
 
     LOGGER.info("Creating new FST index for column: {} in segment: {}, cardinality: {}", column, segmentName,
         columnMetadata.getCardinality());
-    LuceneFSTIndexCreator luceneFSTIndexCreator = new LuceneFSTIndexCreator(_indexDir, column, null);
+
+    TextIndexCreator fstIndexCreator;
+    if (_fstType == FSTType.LUCENE) {
+      fstIndexCreator = new LuceneFSTIndexCreator(_indexDir, column, null);
+    } else {
+      fstIndexCreator = new NativeFSTIndexCreator(_indexDir, column, null);
+    }
+
     try (Dictionary dictionary = LoaderUtils.getDictionary(_segmentWriter, columnMetadata)) {
       for (int dictId = 0; dictId < dictionary.length(); dictId++) {
-        luceneFSTIndexCreator.add(dictionary.getStringValue(dictId));
+        fstIndexCreator.add(dictionary.getStringValue(dictId));
       }
     }
-    luceneFSTIndexCreator.seal();
+    fstIndexCreator.seal();
 
     // For v3, write the generated range index file into the single file and remove it.
     if (_segmentMetadata.getVersion() == SegmentVersion.v3) {
diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/nativefst/FSTHeader.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/nativefst/FSTHeader.java
index a053c30..6f67fcb 100644
--- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/nativefst/FSTHeader.java
+++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/nativefst/FSTHeader.java
@@ -31,7 +31,7 @@ public final class FSTHeader {
   /**
    * FST magic (4 bytes).
    */
-  final static int FST_MAGIC = ('\\' << 24) | ('f' << 16) | ('s' << 8) | ('a');
+  public static final int FST_MAGIC = ('\\' << 24) | ('f' << 16) | ('s' << 8) | 'a';
 
   /** FST version number. */
   final byte _version;
diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/nativefst/NativeFSTIndexCreator.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/nativefst/NativeFSTIndexCreator.java
index 678105a..99298b1 100644
--- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/nativefst/NativeFSTIndexCreator.java
+++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/nativefst/NativeFSTIndexCreator.java
@@ -47,7 +47,7 @@ public class NativeFSTIndexCreator implements TextIndexCreator {
    * @throws IOException
    */
   public NativeFSTIndexCreator(File indexDir, String columnName, String[] sortedEntries) {
-    _fstIndexFile = new File(indexDir, columnName + V1Constants.Indexes.NATIVE_FST_INDEX_FILE_EXTENSION);
+    _fstIndexFile = new File(indexDir, columnName + V1Constants.Indexes.FST_INDEX_FILE_EXTENSION);
 
     _fstBuilder = new FSTBuilder();
     _dictId = 0;
diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/NativeFSTIndexCreatorTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/NativeFSTIndexCreatorTest.java
new file mode 100644
index 0000000..cc4f153
--- /dev/null
+++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/NativeFSTIndexCreatorTest.java
@@ -0,0 +1,75 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.segment.index.creator;
+
+import java.io.File;
+import java.io.IOException;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.utils.nativefst.NativeFSTIndexCreator;
+import org.apache.pinot.segment.local.utils.nativefst.NativeFSTIndexReader;
+import org.apache.pinot.segment.spi.memory.PinotDataBuffer;
+import org.testng.Assert;
+import org.testng.annotations.AfterClass;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.Test;
+
+import static org.apache.pinot.segment.spi.V1Constants.Indexes.FST_INDEX_FILE_EXTENSION;
+
+
+public class NativeFSTIndexCreatorTest {
+  private static final File INDEX_DIR = new File(FileUtils.getTempDirectory(), "NativeFSTIndexCreatorTest");
+
+  @BeforeClass
+  public void setUp()
+      throws IOException {
+    FileUtils.forceMkdir(INDEX_DIR);
+  }
+
+  @AfterClass
+  public void tearDown()
+      throws IOException {
+    FileUtils.deleteDirectory(INDEX_DIR);
+  }
+
+  @Test
+  public void testIndexWriterReader()
+      throws IOException {
+    String[] uniqueValues = new String[3];
+    uniqueValues[0] = "hello-world";
+    uniqueValues[1] = "hello-world123";
+    uniqueValues[2] = "still";
+
+    try (NativeFSTIndexCreator creator = new NativeFSTIndexCreator(INDEX_DIR, "testFSTColumn", uniqueValues)) {
+      creator.seal();
+    }
+
+    File fstFile = new File(INDEX_DIR, "testFSTColumn" + FST_INDEX_FILE_EXTENSION);
+    try (PinotDataBuffer dataBuffer = PinotDataBuffer.mapReadOnlyBigEndianFile(fstFile);
+        NativeFSTIndexReader reader = new NativeFSTIndexReader(dataBuffer)) {
+
+      int[] matchedDictIds = reader.getDictIds("hello.*").toArray();
+      Assert.assertEquals(2, matchedDictIds.length);
+      Assert.assertEquals(0, matchedDictIds[0]);
+      Assert.assertEquals(1, matchedDictIds[1]);
+
+      matchedDictIds = reader.getDictIds(".*llo").toArray();
+      Assert.assertEquals(0, matchedDictIds.length);
+    }
+  }
+}
diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/V1Constants.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/V1Constants.java
index 744f0bc..e037544 100644
--- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/V1Constants.java
+++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/V1Constants.java
@@ -44,7 +44,6 @@ public class V1Constants {
     public static final String BITMAP_INVERTED_INDEX_FILE_EXTENSION = ".bitmap.inv";
     public static final String BITMAP_RANGE_INDEX_FILE_EXTENSION = ".bitmap.range";
     public static final String FST_INDEX_FILE_EXTENSION = ".lucene.fst";
-    public static final String NATIVE_FST_INDEX_FILE_EXTENSION = ".native.fst";
     public static final String JSON_INDEX_FILE_EXTENSION = ".json.idx";
     public static final String H3_INDEX_FILE_EXTENSION = ".h3.idx";
     public static final String BLOOM_FILTER_FILE_EXTENSION = ".bloom";
diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/SegmentGeneratorConfig.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/SegmentGeneratorConfig.java
index d1b16d7..0ed1d60 100644
--- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/SegmentGeneratorConfig.java
+++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/SegmentGeneratorConfig.java
@@ -38,6 +38,7 @@ import org.apache.pinot.segment.spi.creator.name.FixedSegmentNameGenerator;
 import org.apache.pinot.segment.spi.creator.name.SegmentNameGenerator;
 import org.apache.pinot.segment.spi.creator.name.SimpleSegmentNameGenerator;
 import org.apache.pinot.segment.spi.index.creator.H3IndexConfig;
+import org.apache.pinot.spi.config.table.FSTType;
 import org.apache.pinot.spi.config.table.FieldConfig;
 import org.apache.pinot.spi.config.table.IndexingConfig;
 import org.apache.pinot.spi.config.table.SegmentPartitionConfig;
@@ -91,6 +92,7 @@ public class SegmentGeneratorConfig implements Serializable {
   private String _segmentEndTime = null;
   private SegmentVersion _segmentVersion = SegmentVersion.v3;
   private Schema _schema = null;
+  private FSTType _fstTypeForFSTIndex = FSTType.LUCENE;
   private RecordReaderConfig _readerConfig = null;
   private List<StarTreeIndexConfig> _starTreeIndexConfigs = null;
   private boolean _enableDefaultStarTree = false;
@@ -194,6 +196,8 @@ public class SegmentGeneratorConfig implements Serializable {
       extractH3IndexConfigsFromTableConfig(tableConfig);
       extractCompressionCodecConfigsFromTableConfig(tableConfig);
 
+      _fstTypeForFSTIndex = tableConfig.getIndexingConfig().getFSTIndexType();
+
       _nullHandlingEnabled = indexingConfig.isNullHandlingEnabled();
     }
   }
@@ -503,6 +507,14 @@ public class SegmentGeneratorConfig implements Serializable {
     return _sequenceId;
   }
 
+  public void setFSTIndexType(FSTType fstType) {
+    _fstTypeForFSTIndex = fstType;
+  }
+
+  public FSTType getFSTIndexType() {
+    return _fstTypeForFSTIndex;
+  }
+
   /**
    * This method should be used instead of setPostfix if you are adding a sequence number.
    */
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/FSTType.java b/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/FSTType.java
new file mode 100644
index 0000000..8f20390
--- /dev/null
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/FSTType.java
@@ -0,0 +1,26 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.spi.config.table;
+
+/**
+ * Type of FST to be used
+ */
+public enum FSTType {
+    LUCENE, NATIVE
+}
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/IndexingConfig.java b/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/IndexingConfig.java
index 07219d2..4d32b67 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/IndexingConfig.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/IndexingConfig.java
@@ -41,6 +41,7 @@ public class IndexingConfig extends BaseJsonConfig {
   @Deprecated // Moved to {@link IngestionConfig#getStreamIngestionConfig}
   private Map<String, String> _streamConfigs;
   private String _segmentFormatVersion;
+  private FSTType _fstTypeForFSTIndex;
   private String _columnMinMaxValueGeneratorMode;
   private List<String> _noDictionaryColumns; // TODO: replace this with noDictionaryConfig.
   private Map<String, String> _noDictionaryConfig;
@@ -86,6 +87,14 @@ public class IndexingConfig extends BaseJsonConfig {
     return _rangeIndexVersion;
   }
 
+  public void setFSTIndexType(FSTType fstType) {
+    _fstTypeForFSTIndex = fstType;
+  }
+
+  public FSTType getFSTIndexType() {
+    return _fstTypeForFSTIndex;
+  }
+
   public void setRangeIndexVersion(int rangeIndexVersion) {
     _rangeIndexVersion = rangeIndexVersion;
   }

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org