You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by xi...@apache.org on 2023/11/07 08:33:11 UTC

(pinot) 01/03: upgrade lucene to 9.8.0

This is an automated email from the ASF dual-hosted git repository.

xiangfu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git

commit 8f69299aefcd41b834e8671ebc55b0eaf4ea2834
Author: Xiang Fu <xi...@gmail.com>
AuthorDate: Fri Oct 27 14:10:38 2023 -0700

    upgrade lucene to 9.8.0
---
 pinot-core/pom.xml                                    |  2 +-
 pinot-segment-local/pom.xml                           |  2 +-
 .../creator/impl/inv/text/LuceneFSTIndexCreator.java  |  2 +-
 .../segment/index/readers/LuceneFSTIndexReader.java   |  3 ++-
 .../pinot/segment/local/utils/fst/FSTBuilder.java     | 19 +++++++++----------
 .../pinot/segment/local/utils/fst/RegexpMatcher.java  |  8 ++++----
 .../pinot/segment/local/utils/fst/FSTBuilderTest.java |  4 ++--
 .../org/apache/pinot/segment/spi/V1Constants.java     |  6 +++---
 pom.xml                                               |  2 +-
 9 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/pinot-core/pom.xml b/pinot-core/pom.xml
index f47444e42e..89ced53e82 100644
--- a/pinot-core/pom.xml
+++ b/pinot-core/pom.xml
@@ -258,7 +258,7 @@
     </dependency>
     <dependency>
       <groupId>org.apache.lucene</groupId>
-      <artifactId>lucene-analyzers-common</artifactId>
+      <artifactId>lucene-analysis-common</artifactId>
       <version>${lucene.version}</version>
     </dependency>
   </dependencies>
diff --git a/pinot-segment-local/pom.xml b/pinot-segment-local/pom.xml
index 9724e3bfd0..2d90fbed46 100644
--- a/pinot-segment-local/pom.xml
+++ b/pinot-segment-local/pom.xml
@@ -69,7 +69,7 @@
     </dependency>
     <dependency>
       <groupId>org.apache.lucene</groupId>
-      <artifactId>lucene-analyzers-common</artifactId>
+      <artifactId>lucene-analysis-common</artifactId>
       <version>${lucene.version}</version>
     </dependency>
     <dependency>
diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/inv/text/LuceneFSTIndexCreator.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/inv/text/LuceneFSTIndexCreator.java
index ab9d7490e8..695ce4b1ea 100644
--- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/inv/text/LuceneFSTIndexCreator.java
+++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/inv/text/LuceneFSTIndexCreator.java
@@ -97,7 +97,7 @@ public class LuceneFSTIndexCreator implements FSTIndexCreator {
       fileOutputStream = new FileOutputStream(_fstIndexFile);
       FST<Long> fst = _fstBuilder.done();
       OutputStreamDataOutput d = new OutputStreamDataOutput(fileOutputStream);
-      fst.save(d);
+      fst.save(d, d);
     } finally {
       if (fileOutputStream != null) {
         fileOutputStream.close();
diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/LuceneFSTIndexReader.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/LuceneFSTIndexReader.java
index f859889e7f..6e579562c4 100644
--- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/LuceneFSTIndexReader.java
+++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/LuceneFSTIndexReader.java
@@ -51,7 +51,8 @@ public class LuceneFSTIndexReader implements TextIndexReader {
     _dataBuffer = pinotDataBuffer;
     _dataBufferIndexInput = new PinotBufferIndexInput(_dataBuffer, 0L, _dataBuffer.size());
 
-    _readFST = new FST(_dataBufferIndexInput, PositiveIntOutputs.getSingleton(), new OffHeapFSTStore());
+    _readFST =
+        new FST(_dataBufferIndexInput, _dataBufferIndexInput, PositiveIntOutputs.getSingleton(), new OffHeapFSTStore());
   }
 
   @Override
diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/fst/FSTBuilder.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/fst/FSTBuilder.java
index c998b7113d..0a4596d173 100644
--- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/fst/FSTBuilder.java
+++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/fst/FSTBuilder.java
@@ -22,8 +22,8 @@ import java.io.IOException;
 import java.util.Map;
 import java.util.SortedMap;
 import org.apache.lucene.util.IntsRefBuilder;
-import org.apache.lucene.util.fst.Builder;
 import org.apache.lucene.util.fst.FST;
+import org.apache.lucene.util.fst.FSTCompiler;
 import org.apache.lucene.util.fst.PositiveIntOutputs;
 import org.apache.lucene.util.fst.Util;
 import org.slf4j.Logger;
@@ -36,20 +36,19 @@ import org.slf4j.LoggerFactory;
  */
 public class FSTBuilder {
   public static final Logger LOGGER = LoggerFactory.getLogger(FSTBuilder.class);
-  private Builder<Long> _builder = new Builder<>(FST.INPUT_TYPE.BYTE4, PositiveIntOutputs.getSingleton());
-  private IntsRefBuilder _scratch = new IntsRefBuilder();
+  private final FSTCompiler<Long> _builder = new FSTCompiler<>(FST.INPUT_TYPE.BYTE4, PositiveIntOutputs.getSingleton());
+  private final IntsRefBuilder _scratch = new IntsRefBuilder();
 
-  public static FST buildFST(SortedMap<String, Integer> input)
+  public static FST<Long> buildFST(SortedMap<String, Integer> input)
       throws IOException {
     PositiveIntOutputs fstOutput = PositiveIntOutputs.getSingleton();
-    Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE4, fstOutput);
+    FSTCompiler<Long> fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE4, fstOutput);
 
     IntsRefBuilder scratch = new IntsRefBuilder();
     for (Map.Entry<String, Integer> entry : input.entrySet()) {
-      builder.add(Util.toUTF16(entry.getKey(), scratch), entry.getValue().longValue());
+      fstCompiler.add(Util.toUTF16(entry.getKey(), scratch), entry.getValue().longValue());
     }
-    FST<Long> result = builder.finish();
-    return result;
+    return fstCompiler.compile();
   }
 
   public void addEntry(String key, Integer value)
@@ -57,8 +56,8 @@ public class FSTBuilder {
     _builder.add(Util.toUTF16(key, _scratch), value.longValue());
   }
 
-  public FST done()
+  public FST<Long> done()
       throws IOException {
-    return _builder.finish();
+    return _builder.compile();
   }
 }
diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/fst/RegexpMatcher.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/fst/RegexpMatcher.java
index eb6e0d9866..f761c2b8dd 100644
--- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/fst/RegexpMatcher.java
+++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/fst/RegexpMatcher.java
@@ -122,16 +122,16 @@ public class RegexpMatcher {
             newInput.copyInts(currentInput.get());
             newInput.append(t.min);
             queue.add(new Path<Long>(t.dest, new FST.Arc<Long>().copyFrom(nextArc),
-                _fst.outputs.add(path._output, nextArc.output), newInput));
+                _fst.outputs.add(path._output, nextArc.output()), newInput));
           }
         } else {
           FST.Arc<Long> nextArc = Util.readCeilArc(min, _fst, path._fstNode, scratchArc, fstReader);
-          while (nextArc != null && nextArc.label <= max) {
+          while (nextArc != null && nextArc.label() <= max) {
             final IntsRefBuilder newInput = new IntsRefBuilder();
             newInput.copyInts(currentInput.get());
-            newInput.append(nextArc.label);
+            newInput.append(nextArc.label());
             queue.add(new Path<>(t.dest, new FST.Arc<Long>().copyFrom(nextArc),
-                _fst.outputs.add(path._output, nextArc.output), newInput));
+                _fst.outputs.add(path._output, nextArc.output()), newInput));
             nextArc = nextArc.isLast() ? null : _fst.readNextRealArc(nextArc, fstReader);
           }
         }
diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/fst/FSTBuilderTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/fst/FSTBuilderTest.java
index 883faeb447..493e7b3449 100644
--- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/fst/FSTBuilderTest.java
+++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/fst/FSTBuilderTest.java
@@ -69,7 +69,7 @@ public class FSTBuilderTest {
     File outputFile = new File(TEMP_DIR, "test.lucene");
     FileOutputStream fileOutputStream = new FileOutputStream(outputFile);
     OutputStreamDataOutput d = new OutputStreamDataOutput(fileOutputStream);
-    fst.save(d);
+    fst.save(d, d);
     fileOutputStream.close();
 
     Outputs<Long> outputs = PositiveIntOutputs.getSingleton();
@@ -78,7 +78,7 @@ public class FSTBuilderTest {
     PinotDataBuffer pinotDataBuffer =
         PinotDataBuffer.mapFile(fstFile, true, 0, fstFile.length(), ByteOrder.BIG_ENDIAN, "");
     PinotBufferIndexInput indexInput = new PinotBufferIndexInput(pinotDataBuffer, 0L, fstFile.length());
-    FST<Long> readFST = new FST(indexInput, outputs, new OffHeapFSTStore());
+    FST<Long> readFST = new FST(indexInput, indexInput, outputs, new OffHeapFSTStore());
 
     List<Long> results = RegexpMatcher.regexMatch("hello.*123", fst);
     Assert.assertEquals(results.size(), 1);
diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/V1Constants.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/V1Constants.java
index 9f2c02fddd..8900510ab6 100644
--- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/V1Constants.java
+++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/V1Constants.java
@@ -44,14 +44,14 @@ public class V1Constants {
     public static final String UNSORTED_MV_FORWARD_INDEX_FILE_EXTENSION = ".mv.fwd";
     public static final String BITMAP_INVERTED_INDEX_FILE_EXTENSION = ".bitmap.inv";
     public static final String BITMAP_RANGE_INDEX_FILE_EXTENSION = ".bitmap.range";
-    public static final String FST_INDEX_FILE_EXTENSION = ".lucene.fst";
+    public static final String FST_INDEX_FILE_EXTENSION = ".lucene.v9.fst";
     public static final String JSON_INDEX_FILE_EXTENSION = ".json.idx";
     public static final String NATIVE_TEXT_INDEX_FILE_EXTENSION = ".nativetext.idx";
     public static final String H3_INDEX_FILE_EXTENSION = ".h3.idx";
     public static final String BLOOM_FILTER_FILE_EXTENSION = ".bloom";
     public static final String NULLVALUE_VECTOR_FILE_EXTENSION = ".bitmap.nullvalue";
-    public static final String LUCENE_TEXT_INDEX_DOCID_MAPPING_FILE_EXTENSION = ".lucene.mapping";
-    public static final String LUCENE_TEXT_INDEX_FILE_EXTENSION = ".lucene.index";
+    public static final String LUCENE_TEXT_INDEX_DOCID_MAPPING_FILE_EXTENSION = ".lucene.v9.mapping";
+    public static final String LUCENE_TEXT_INDEX_FILE_EXTENSION = ".lucene.v9.index";
   }
 
   public static class MetadataKeys {
diff --git a/pom.xml b/pom.xml
index 7c9ffcb4b3..29a739be48 100644
--- a/pom.xml
+++ b/pom.xml
@@ -138,7 +138,7 @@
     <jsonsmart.version>2.5.0</jsonsmart.version>
     <quartz.version>2.3.2</quartz.version>
     <calcite.version>1.30.0</calcite.version>
-    <lucene.version>8.2.0</lucene.version>
+    <lucene.version>9.8.0</lucene.version>
     <reflections.version>0.9.11</reflections.version>
     <!-- commons-configuration, hadoop-common, hadoop-client use commons-lang -->
     <commons-lang.version>2.6</commons-lang.version>


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org