You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by xi...@apache.org on 2023/11/07 08:33:11 UTC
(pinot) 01/03: upgrade lucene to 9.8.0
This is an automated email from the ASF dual-hosted git repository.
xiangfu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
commit 8f69299aefcd41b834e8671ebc55b0eaf4ea2834
Author: Xiang Fu <xi...@gmail.com>
AuthorDate: Fri Oct 27 14:10:38 2023 -0700
upgrade lucene to 9.8.0
---
pinot-core/pom.xml | 2 +-
pinot-segment-local/pom.xml | 2 +-
.../creator/impl/inv/text/LuceneFSTIndexCreator.java | 2 +-
.../segment/index/readers/LuceneFSTIndexReader.java | 3 ++-
.../pinot/segment/local/utils/fst/FSTBuilder.java | 19 +++++++++----------
.../pinot/segment/local/utils/fst/RegexpMatcher.java | 8 ++++----
.../pinot/segment/local/utils/fst/FSTBuilderTest.java | 4 ++--
.../org/apache/pinot/segment/spi/V1Constants.java | 6 +++---
pom.xml | 2 +-
9 files changed, 24 insertions(+), 24 deletions(-)
diff --git a/pinot-core/pom.xml b/pinot-core/pom.xml
index f47444e42e..89ced53e82 100644
--- a/pinot-core/pom.xml
+++ b/pinot-core/pom.xml
@@ -258,7 +258,7 @@
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
- <artifactId>lucene-analyzers-common</artifactId>
+ <artifactId>lucene-analysis-common</artifactId>
<version>${lucene.version}</version>
</dependency>
</dependencies>
diff --git a/pinot-segment-local/pom.xml b/pinot-segment-local/pom.xml
index 9724e3bfd0..2d90fbed46 100644
--- a/pinot-segment-local/pom.xml
+++ b/pinot-segment-local/pom.xml
@@ -69,7 +69,7 @@
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
- <artifactId>lucene-analyzers-common</artifactId>
+ <artifactId>lucene-analysis-common</artifactId>
<version>${lucene.version}</version>
</dependency>
<dependency>
diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/inv/text/LuceneFSTIndexCreator.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/inv/text/LuceneFSTIndexCreator.java
index ab9d7490e8..695ce4b1ea 100644
--- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/inv/text/LuceneFSTIndexCreator.java
+++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/inv/text/LuceneFSTIndexCreator.java
@@ -97,7 +97,7 @@ public class LuceneFSTIndexCreator implements FSTIndexCreator {
fileOutputStream = new FileOutputStream(_fstIndexFile);
FST<Long> fst = _fstBuilder.done();
OutputStreamDataOutput d = new OutputStreamDataOutput(fileOutputStream);
- fst.save(d);
+ fst.save(d, d);
} finally {
if (fileOutputStream != null) {
fileOutputStream.close();
diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/LuceneFSTIndexReader.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/LuceneFSTIndexReader.java
index f859889e7f..6e579562c4 100644
--- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/LuceneFSTIndexReader.java
+++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/LuceneFSTIndexReader.java
@@ -51,7 +51,8 @@ public class LuceneFSTIndexReader implements TextIndexReader {
_dataBuffer = pinotDataBuffer;
_dataBufferIndexInput = new PinotBufferIndexInput(_dataBuffer, 0L, _dataBuffer.size());
- _readFST = new FST(_dataBufferIndexInput, PositiveIntOutputs.getSingleton(), new OffHeapFSTStore());
+ _readFST =
+ new FST(_dataBufferIndexInput, _dataBufferIndexInput, PositiveIntOutputs.getSingleton(), new OffHeapFSTStore());
}
@Override
diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/fst/FSTBuilder.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/fst/FSTBuilder.java
index c998b7113d..0a4596d173 100644
--- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/fst/FSTBuilder.java
+++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/fst/FSTBuilder.java
@@ -22,8 +22,8 @@ import java.io.IOException;
import java.util.Map;
import java.util.SortedMap;
import org.apache.lucene.util.IntsRefBuilder;
-import org.apache.lucene.util.fst.Builder;
import org.apache.lucene.util.fst.FST;
+import org.apache.lucene.util.fst.FSTCompiler;
import org.apache.lucene.util.fst.PositiveIntOutputs;
import org.apache.lucene.util.fst.Util;
import org.slf4j.Logger;
@@ -36,20 +36,19 @@ import org.slf4j.LoggerFactory;
*/
public class FSTBuilder {
public static final Logger LOGGER = LoggerFactory.getLogger(FSTBuilder.class);
- private Builder<Long> _builder = new Builder<>(FST.INPUT_TYPE.BYTE4, PositiveIntOutputs.getSingleton());
- private IntsRefBuilder _scratch = new IntsRefBuilder();
+ private final FSTCompiler<Long> _builder = new FSTCompiler<>(FST.INPUT_TYPE.BYTE4, PositiveIntOutputs.getSingleton());
+ private final IntsRefBuilder _scratch = new IntsRefBuilder();
- public static FST buildFST(SortedMap<String, Integer> input)
+ public static FST<Long> buildFST(SortedMap<String, Integer> input)
throws IOException {
PositiveIntOutputs fstOutput = PositiveIntOutputs.getSingleton();
- Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE4, fstOutput);
+ FSTCompiler<Long> fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE4, fstOutput);
IntsRefBuilder scratch = new IntsRefBuilder();
for (Map.Entry<String, Integer> entry : input.entrySet()) {
- builder.add(Util.toUTF16(entry.getKey(), scratch), entry.getValue().longValue());
+ fstCompiler.add(Util.toUTF16(entry.getKey(), scratch), entry.getValue().longValue());
}
- FST<Long> result = builder.finish();
- return result;
+ return fstCompiler.compile();
}
public void addEntry(String key, Integer value)
@@ -57,8 +56,8 @@ public class FSTBuilder {
_builder.add(Util.toUTF16(key, _scratch), value.longValue());
}
- public FST done()
+ public FST<Long> done()
throws IOException {
- return _builder.finish();
+ return _builder.compile();
}
}
diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/fst/RegexpMatcher.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/fst/RegexpMatcher.java
index eb6e0d9866..f761c2b8dd 100644
--- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/fst/RegexpMatcher.java
+++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/fst/RegexpMatcher.java
@@ -122,16 +122,16 @@ public class RegexpMatcher {
newInput.copyInts(currentInput.get());
newInput.append(t.min);
queue.add(new Path<Long>(t.dest, new FST.Arc<Long>().copyFrom(nextArc),
- _fst.outputs.add(path._output, nextArc.output), newInput));
+ _fst.outputs.add(path._output, nextArc.output()), newInput));
}
} else {
FST.Arc<Long> nextArc = Util.readCeilArc(min, _fst, path._fstNode, scratchArc, fstReader);
- while (nextArc != null && nextArc.label <= max) {
+ while (nextArc != null && nextArc.label() <= max) {
final IntsRefBuilder newInput = new IntsRefBuilder();
newInput.copyInts(currentInput.get());
- newInput.append(nextArc.label);
+ newInput.append(nextArc.label());
queue.add(new Path<>(t.dest, new FST.Arc<Long>().copyFrom(nextArc),
- _fst.outputs.add(path._output, nextArc.output), newInput));
+ _fst.outputs.add(path._output, nextArc.output()), newInput));
nextArc = nextArc.isLast() ? null : _fst.readNextRealArc(nextArc, fstReader);
}
}
diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/fst/FSTBuilderTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/fst/FSTBuilderTest.java
index 883faeb447..493e7b3449 100644
--- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/fst/FSTBuilderTest.java
+++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/fst/FSTBuilderTest.java
@@ -69,7 +69,7 @@ public class FSTBuilderTest {
File outputFile = new File(TEMP_DIR, "test.lucene");
FileOutputStream fileOutputStream = new FileOutputStream(outputFile);
OutputStreamDataOutput d = new OutputStreamDataOutput(fileOutputStream);
- fst.save(d);
+ fst.save(d, d);
fileOutputStream.close();
Outputs<Long> outputs = PositiveIntOutputs.getSingleton();
@@ -78,7 +78,7 @@ public class FSTBuilderTest {
PinotDataBuffer pinotDataBuffer =
PinotDataBuffer.mapFile(fstFile, true, 0, fstFile.length(), ByteOrder.BIG_ENDIAN, "");
PinotBufferIndexInput indexInput = new PinotBufferIndexInput(pinotDataBuffer, 0L, fstFile.length());
- FST<Long> readFST = new FST(indexInput, outputs, new OffHeapFSTStore());
+ FST<Long> readFST = new FST(indexInput, indexInput, outputs, new OffHeapFSTStore());
List<Long> results = RegexpMatcher.regexMatch("hello.*123", fst);
Assert.assertEquals(results.size(), 1);
diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/V1Constants.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/V1Constants.java
index 9f2c02fddd..8900510ab6 100644
--- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/V1Constants.java
+++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/V1Constants.java
@@ -44,14 +44,14 @@ public class V1Constants {
public static final String UNSORTED_MV_FORWARD_INDEX_FILE_EXTENSION = ".mv.fwd";
public static final String BITMAP_INVERTED_INDEX_FILE_EXTENSION = ".bitmap.inv";
public static final String BITMAP_RANGE_INDEX_FILE_EXTENSION = ".bitmap.range";
- public static final String FST_INDEX_FILE_EXTENSION = ".lucene.fst";
+ public static final String FST_INDEX_FILE_EXTENSION = ".lucene.v9.fst";
public static final String JSON_INDEX_FILE_EXTENSION = ".json.idx";
public static final String NATIVE_TEXT_INDEX_FILE_EXTENSION = ".nativetext.idx";
public static final String H3_INDEX_FILE_EXTENSION = ".h3.idx";
public static final String BLOOM_FILTER_FILE_EXTENSION = ".bloom";
public static final String NULLVALUE_VECTOR_FILE_EXTENSION = ".bitmap.nullvalue";
- public static final String LUCENE_TEXT_INDEX_DOCID_MAPPING_FILE_EXTENSION = ".lucene.mapping";
- public static final String LUCENE_TEXT_INDEX_FILE_EXTENSION = ".lucene.index";
+ public static final String LUCENE_TEXT_INDEX_DOCID_MAPPING_FILE_EXTENSION = ".lucene.v9.mapping";
+ public static final String LUCENE_TEXT_INDEX_FILE_EXTENSION = ".lucene.v9.index";
}
public static class MetadataKeys {
diff --git a/pom.xml b/pom.xml
index 7c9ffcb4b3..29a739be48 100644
--- a/pom.xml
+++ b/pom.xml
@@ -138,7 +138,7 @@
<jsonsmart.version>2.5.0</jsonsmart.version>
<quartz.version>2.3.2</quartz.version>
<calcite.version>1.30.0</calcite.version>
- <lucene.version>8.2.0</lucene.version>
+ <lucene.version>9.8.0</lucene.version>
<reflections.version>0.9.11</reflections.version>
<!-- commons-configuration, hadoop-common, hadoop-client use commons-lang -->
<commons-lang.version>2.6</commons-lang.version>
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org