You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ma...@apache.org on 2021/10/11 23:52:12 UTC
[lucene] 01/02: Merge remote-tracking branch 'upstream/main' into
hnsw
This is an automated email from the ASF dual-hosted git repository.
mayya pushed a commit to branch hnsw
in repository https://gitbox.apache.org/repos/asf/lucene.git
commit 3b4296d57cf15c7afea49462937f8b2bc7f02aab
Merge: 5e42fc2 f67dec1
Author: Mayya Sharipova <ma...@elastic.co>
AuthorDate: Mon Oct 11 19:25:44 2021 -0400
Merge remote-tracking branch 'upstream/main' into hnsw
README.md | 5 +-
build.gradle | 7 +-
dev-tools/doap/lucene.rdf | 7 +
dev-tools/scripts/README.md | 17 +-
dev-tools/scripts/addVersion.py | 51 +-
dev-tools/scripts/buildAndPushRelease.py | 31 +-
dev-tools/scripts/githubPRs.py | 16 +-
dev-tools/scripts/poll-mirrors.py | 4 +-
dev-tools/scripts/releaseWizard.py | 59 +-
dev-tools/scripts/releaseWizard.yaml | 327 ++----
dev-tools/scripts/releasedJirasRegex.py | 16 +-
dev-tools/scripts/scriptutil.py | 2 +-
gradle/documentation/render-javadoc.gradle | 19 +-
gradle/generation/antlr.gradle | 88 ++
gradle/generation/javacc.gradle | 16 +-
gradle/generation/jflex.gradle | 2 +-
gradle/generation/jflex/skeleton.default.txt | 6 +-
.../jflex/skeleton.disable.buffer.expansion.txt | 4 +-
gradle/generation/local-settings.gradle | 62 +-
gradle/generation/regenerate.gradle | 21 +
gradle/generation/snowball/snowball.patch | 6 +-
gradle/publishing/distribution.gradle | 3 +-
gradle/testing/alternative-jdk-support.gradle | 63 +-
gradle/testing/defaults-tests.gradle | 7 +-
gradle/validation/check-environment.gradle | 2 +-
gradle/validation/rat-sources.gradle | 10 +-
gradle/validation/spotless.gradle | 5 +-
gradle/validation/validate-source-patterns.gradle | 53 +-
gradle/wrapper/gradle-wrapper.jar.sha256 | 2 +-
gradle/wrapper/gradle-wrapper.jar.version | 2 +-
gradle/wrapper/gradle-wrapper.properties | 2 +-
gradlew.bat | 24 +-
lucene/CHANGES.txt | 147 ++-
lucene/MIGRATE.md | 11 +
.../checksums/generateClassicTokenizer.json | 4 +-
.../checksums/generateHTMLStripCharFilter.json | 6 +-
.../checksums/generateUAX29URLEmailTokenizer.json | 4 +-
.../checksums/generateWikipediaTokenizer.json | 4 +-
.../common/src/generated/checksums/snowball.json | 2 +-
.../lucene/analysis/ar/ArabicNormalizer.java | 2 +-
.../apache/lucene/analysis/ar/ArabicStemmer.java | 14 +-
.../lucene/analysis/bg/BulgarianStemmer.java | 6 +-
.../lucene/analysis/bn/BengaliNormalizer.java | 2 +-
.../apache/lucene/analysis/bn/BengaliStemmer.java | 2 +-
.../lucene/analysis/charfilter/BaseCharFilter.java | 4 +-
.../analysis/charfilter/HTMLStripCharFilter.java | 8 +-
.../analysis/charfilter/HTMLStripCharFilter.jflex | 2 +-
.../lucene/analysis/cjk/CJKBigramFilter.java | 12 +-
.../lucene/analysis/cjk/CJKWidthCharFilter.java | 6 +-
.../apache/lucene/analysis/cjk/CJKWidthFilter.java | 10 +-
.../lucene/analysis/ckb/SoraniNormalizer.java | 2 +-
.../apache/lucene/analysis/ckb/SoraniStemmer.java | 2 +-
.../analysis/classic/ClassicTokenizerImpl.java | 6 +-
.../analysis/commongrams/CommonGramsFilter.java | 2 +-
.../compound/hyphenation/PatternParser.java | 2 +-
.../analysis/compound/hyphenation/TernaryTree.java | 4 +-
.../lucene/analysis/core/DecimalDigitFilter.java | 2 +-
.../apache/lucene/analysis/cz/CzechStemmer.java | 8 +-
.../apache/lucene/analysis/de/GermanAnalyzer.java | 7 +
.../lucene/analysis/de/GermanLightStemmer.java | 6 +-
.../lucene/analysis/de/GermanMinimalStemmer.java | 2 +-
.../analysis/de/GermanNormalizationFilter.java | 2 +-
.../apache/lucene/analysis/el/GreekStemmer.java | 54 +-
.../analysis/email/UAX29URLEmailTokenizerImpl.java | 4 +-
.../lucene/analysis/en/EnglishMinimalStemmer.java | 2 +-
.../lucene/analysis/es/SpanishLightStemmer.java | 2 +-
.../lucene/analysis/es/SpanishMinimalStemmer.java | 2 +-
.../lucene/analysis/fa/PersianNormalizer.java | 2 +-
.../lucene/analysis/fi/FinnishLightStemmer.java | 12 +-
.../lucene/analysis/fr/FrenchLightStemmer.java | 4 +-
.../lucene/analysis/fr/FrenchMinimalStemmer.java | 2 +-
.../lucene/analysis/gl/GalicianMinimalStemmer.java | 2 +-
.../apache/lucene/analysis/gl/GalicianStemmer.java | 2 +-
.../apache/lucene/analysis/hi/HindiNormalizer.java | 2 +-
.../apache/lucene/analysis/hi/HindiStemmer.java | 2 +-
.../lucene/analysis/hu/HungarianLightStemmer.java | 10 +-
.../hunspell/HunspellStemFilterFactory.java | 2 +-
.../analysis/hunspell/ISO8859_14Decoder.java | 2 +-
.../lucene/analysis/id/IndonesianStemmer.java | 14 +-
.../apache/lucene/analysis/in/IndicNormalizer.java | 6 +-
.../lucene/analysis/it/ItalianLightStemmer.java | 2 +-
.../apache/lucene/analysis/lv/LatvianStemmer.java | 10 +-
.../lucene/analysis/minhash/MinHashFilter.java | 6 +-
.../analysis/miscellaneous/ASCIIFoldingFilter.java | 2 +-
.../analysis/miscellaneous/FingerprintFilter.java | 8 +-
.../miscellaneous/HyphenatedWordsFilter.java | 2 +-
.../miscellaneous/RemoveDuplicatesTokenFilter.java | 4 +-
.../miscellaneous/WordDelimiterFilter.java | 12 +-
.../miscellaneous/WordDelimiterFilterFactory.java | 2 +-
.../miscellaneous/WordDelimiterGraphFilter.java | 2 +-
.../WordDelimiterGraphFilterFactory.java | 2 +-
.../miscellaneous/WordDelimiterIterator.java | 4 +-
.../GermanAnalyzer.java => ne/NepaliAnalyzer.java} | 99 +-
.../apache/lucene/analysis/ne/package-info.java} | 23 +-
.../lucene/analysis/no/NorwegianLightStemmer.java | 2 +-
.../analysis/no/NorwegianMinimalStemmer.java | 2 +-
.../lucene/analysis/payloads/PayloadHelper.java | 18 +-
.../lucene/analysis/pt/PortugueseLightStemmer.java | 6 +-
.../analysis/pt/PortugueseMinimalStemmer.java | 2 +-
.../lucene/analysis/pt/PortugueseStemmer.java | 2 +-
.../apache/lucene/analysis/pt/RSLPStemmerBase.java | 24 +-
.../lucene/analysis/ru/RussianLightStemmer.java | 6 +-
.../lucene/analysis/snowball/SnowballFilter.java | 4 +-
.../analysis/sr/SerbianNormalizationFilter.java | 2 +-
.../sr/SerbianNormalizationRegularFilter.java | 2 +-
.../lucene/analysis/sv/SwedishLightStemmer.java | 2 +-
.../lucene/analysis/sv/SwedishMinimalStemmer.java | 2 +-
.../lucene/analysis/synonym/SolrSynonymParser.java | 8 +-
.../apache/lucene/analysis/synonym/SynonymMap.java | 2 +-
.../analysis/synonym/WordnetSynonymParser.java | 4 +-
.../GermanAnalyzer.java => ta/TamilAnalyzer.java} | 98 +-
.../apache/lucene/analysis/ta/package-info.java} | 23 +-
.../GermanAnalyzer.java => te/TeluguAnalyzer.java} | 103 +-
.../TeluguNormalizationFilter.java} | 45 +-
.../te/TeluguNormalizationFilterFactory.java | 64 ++
.../lucene/analysis/te/TeluguNormalizer.java | 116 ++
.../TeluguStemFilter.java} | 36 +-
.../analysis/te/TeluguStemFilterFactory.java | 58 +
.../apache/lucene/analysis/te/TeluguStemmer.java | 64 ++
.../apache/lucene/analysis/te/package-info.java} | 23 +-
.../lucene/analysis/tr/TurkishLowerCaseFilter.java | 4 +-
.../lucene/analysis/util/CharArrayIterator.java | 4 +-
.../lucene/analysis/util/OpenStringBuilder.java | 8 +-
.../analysis/util/SegmentingTokenizerBase.java | 2 +-
.../apache/lucene/analysis/util/StemmerUtil.java | 10 +-
.../analysis/wikipedia/WikipediaTokenizerImpl.java | 6 +-
.../org/tartarus/snowball/SnowballProgram.java | 6 +-
.../org.apache.lucene.analysis.TokenFilterFactory | 2 +
.../org/apache/lucene/analysis/ne/stopwords.txt | 256 +++++
.../org/apache/lucene/analysis/ta/stopwords.txt | 126 +++
.../org/apache/lucene/analysis/te/stopwords.txt | 51 +
.../apache/lucene/analysis/core/TestAnalyzers.java | 4 +-
.../lucene/analysis/core/TestBugInSomething.java | 2 +-
.../lucene/analysis/core/TestRandomChains.java | 10 +-
.../lucene/analysis/core/TestStopAnalyzer.java | 2 +-
.../lucene/analysis/core/TestTypeTokenFilter.java | 2 +-
.../analysis/email/TestUAX29URLEmailAnalyzer.java | 2 +-
.../analysis/email/TestUAX29URLEmailTokenizer.java | 2 +-
.../miscellaneous/TestCapitalizationFilter.java | 4 +-
.../lucene/analysis/ne/TestNepaliAnalyzer.java | 76 ++
.../query/TestQueryAutoStopWordAnalyzer.java | 4 +-
.../lucene/analysis/shingle/TestShingleFilter.java | 6 +-
.../lucene/analysis/ta/TestTamilAnalyzer.java | 67 ++
.../lucene/analysis/te/TestTeluguAnalyzer.java | 57 +
.../lucene/analysis/te/TestTeluguFilters.java | 79 ++
.../lucene/analysis/te/TestTeluguNormalizer.java | 60 +
.../lucene/analysis/te/TestTeluguStemmer.java | 59 +
.../analysis/util/TestCharArrayIterator.java | 10 +-
.../analysis/util/TestSegmentingTokenizerBase.java | 6 +-
.../analysis/wikipedia/TestWikipediaTokenizer.java | 2 +-
.../lucene/analysis/icu/ICUTransformFilter.java | 4 +-
.../icu/segmentation/BreakIteratorWrapper.java | 4 +-
.../icu/segmentation/CharArrayIterator.java | 4 +-
.../icu/segmentation/CompositeBreakIterator.java | 6 +-
.../analysis/icu/segmentation/ICUTokenizer.java | 2 +-
.../icu/segmentation/ICUTokenizerFactory.java | 2 +-
.../analysis/icu/segmentation/ScriptIterator.java | 6 +-
.../icu/segmentation/TestCharArrayIterator.java | 2 +-
.../icu/segmentation/TestICUTokenizer.java | 8 +-
.../analysis/icu/GenerateUTR30DataFiles.java | 2 +-
.../lucene/analysis/icu/RBBIRuleCompiler.java | 2 +-
.../lucene/analysis/ja/JapaneseAnalyzer.java | 2 +-
.../analysis/ja/JapaneseCompletionAnalyzer.java | 65 ++
.../analysis/ja/JapaneseCompletionFilter.java | 272 +++++
.../ja/JapaneseCompletionFilterFactory.java | 66 ++
.../ja/JapanesePartOfSpeechStopFilterFactory.java | 2 +-
.../analysis/ja/completion/CharSequenceUtils.java | 91 ++
.../analysis/ja/completion/KatakanaRomanizer.java | 193 ++++
.../analysis/ja/completion/package-info.java} | 23 +-
.../lucene/analysis/ja/dict/BinaryDictionary.java | 12 +-
.../apache/lucene/analysis/ja/dict/Dictionary.java | 6 +-
.../lucene/analysis/ja/dict/TokenInfoFST.java | 4 +-
.../lucene/analysis/ja/dict/UnknownDictionary.java | 2 +-
.../lucene/analysis/ja/dict/UserDictionary.java | 10 +-
.../org.apache.lucene.analysis.TokenFilterFactory | 1 +
.../lucene/analysis/ja/completion/romaji_map.txt | 344 ++++++
.../analysis/ja/dict/TokenInfoDictionary$fst.dat | Bin 1686422 -> 1686422 bytes
.../ja/TestJapaneseCompletionAnalyzer.java | 72 ++
.../analysis/ja/TestJapaneseCompletionFilter.java | 271 +++++
.../ja/TestJapaneseCompletionFilterFactory.java | 59 +
.../ja/completion/TestKatakanaRomanizer.java | 68 ++
.../analysis/morfologik/MorfologikFilter.java | 2 +-
.../apache/lucene/analysis/ko/KoreanTokenizer.java | 18 +-
.../analysis/ko/dict/TokenInfoDictionary$fst.dat | Bin 5632456 -> 5632456 bytes
.../analysis/phonetic/TestPhoneticFilter.java | 4 +-
.../analysis/cn/smart/hhmm/AbstractDictionary.java | 4 +-
.../analysis/cn/smart/hhmm/BigramDictionary.java | 6 +-
.../cn/smart/TestSmartChineseAnalyzer.java | 18 +-
.../src/java/org/egothor/stemmer/Compile.java | 2 +-
.../stempel/src/java/org/egothor/stemmer/Diff.java | 6 +-
.../src/java/org/egothor/stemmer/Gener.java | 4 +-
.../stempel/src/java/org/egothor/stemmer/Lift.java | 2 +-
.../src/java/org/egothor/stemmer/MultiTrie2.java | 8 +-
.../src/java/org/egothor/stemmer/Optimizer.java | 2 +-
.../src/java/org/egothor/stemmer/Reduce.java | 6 +-
.../stempel/src/java/org/egothor/stemmer/Trie.java | 4 +-
.../src/test/org/egothor/stemmer/TestStemmer.java | 26 +-
.../lucene50/Lucene50LiveDocsFormat.java | 2 +-
.../lucene50/Lucene50SkipReader.java | 10 +-
.../lucene84/Lucene84SkipReader.java | 10 +-
.../lucene87/LZ4WithPresetDictCompressionMode.java | 4 +-
.../lucene87/Lucene87StoredFieldsFormat.java | 4 +-
.../backward_codecs/packed/LegacyDirectWriter.java | 2 +-
.../lucene50/TestBlockPostingsFormat3.java | 2 +-
.../lucene70/TestLucene70DocValuesFormat.java | 6 +-
.../BaseLucene80DocValuesFormatTestCase.java | 6 +-
.../lucene87/Lucene87RWStoredFieldsFormat.java | 2 -
.../packed/TestLegacyDirectPacked.java | 6 +-
.../backward_index/TestBackwardsCompatibility.java | 9 +-
.../backward_index/TestManyPointsInOldIndex.java | 2 +-
.../lucene/backward_index/index.8.10.0-cfs.zip | Bin 0 -> 16967 bytes
.../lucene/backward_index/index.8.10.0-nocfs.zip | Bin 0 -> 16973 bytes
.../apache/lucene/backward_index/sorted.8.10.0.zip | Bin 0 -> 142502 bytes
.../byTask/feeds/SimpleSloppyPhraseQueryMaker.java | 2 +-
.../benchmark/byTask/feeds/TrecContentSource.java | 2 +-
.../benchmark/byTask/feeds/TrecDocParser.java | 2 +-
.../benchmark/byTask/tasks/AddIndexesTask.java | 2 +-
.../lucene/benchmark/byTask/tasks/PerfTask.java | 2 +-
.../lucene/benchmark/byTask/tasks/ReportTask.java | 2 +-
.../benchmark/byTask/tasks/TaskSequence.java | 2 +-
.../lucene/benchmark/byTask/utils/Config.java | 30 +-
.../lucene/benchmark/byTask/utils/Format.java | 2 +-
.../org/apache/lucene/benchmark/quality/Judge.java | 2 +-
.../lucene/benchmark/quality/QualityBenchmark.java | 8 +-
.../lucene/benchmark/quality/QualityStats.java | 2 +-
.../lucene/benchmark/quality/trec/QueryDriver.java | 4 +-
.../benchmark/quality/trec/Trec1MQReader.java | 2 +-
.../benchmark/quality/trec/TrecTopicsReader.java | 2 +-
.../quality/utils/QualityQueriesFinder.java | 8 +-
.../benchmark/quality/utils/SimpleQQParser.java | 4 +-
.../benchmark/quality/utils/SubmissionReport.java | 2 +-
.../apache/lucene/benchmark/BenchmarkTestCase.java | 2 +-
.../benchmark/byTask/TestPerfTasksLogic.java | 42 +-
.../lucene/benchmark/quality/TestQualityRun.java | 8 +-
.../org/apache/lucene/codecs/bloom/FuzzySet.java | 2 +-
.../lucene/codecs/memory/FSTTermsReader.java | 2 +-
.../simpletext/SimpleTextCompoundFormat.java | 14 +-
.../simpletext/SimpleTextDocValuesReader.java | 8 +-
.../simpletext/SimpleTextFieldInfosFormat.java | 2 +-
.../codecs/simpletext/SimpleTextFieldsReader.java | 184 ++-
.../codecs/simpletext/SimpleTextFieldsWriter.java | 53 +-
.../simpletext/SimpleTextKnnVectorsReader.java | 31 +-
.../codecs/simpletext/SimpleTextSkipReader.java | 206 ++++
.../codecs/simpletext/SimpleTextSkipWriter.java | 157 +++
.../simpletext/SimpleTextStoredFieldsReader.java | 4 +-
.../simpletext/SimpleTextTermVectorsReader.java | 16 +-
.../checksums/generateStandardTokenizer.json | 4 +-
.../apache/lucene/analysis/AnalysisSPILoader.java | 6 +-
.../org/apache/lucene/analysis/WordlistLoader.java | 2 +-
.../analysis/standard/StandardTokenizerImpl.java | 4 +-
.../java/org/apache/lucene/codecs/CodecUtil.java | 4 +-
.../apache/lucene/codecs/DocValuesConsumer.java | 259 +++--
.../lucene/codecs/MultiLevelSkipListReader.java | 34 +-
.../lucene/codecs/MultiLevelSkipListWriter.java | 24 +-
.../apache/lucene/codecs/SegmentInfoFormat.java | 2 +-
.../apache/lucene/codecs/TermVectorsReader.java | 12 +-
.../lucene90/LZ4WithPresetDictCompressionMode.java | 4 +-
.../codecs/lucene90/Lucene90DocValuesConsumer.java | 77 +-
.../codecs/lucene90/Lucene90DocValuesProducer.java | 113 ++
.../codecs/lucene90/Lucene90FieldInfosFormat.java | 11 +-
.../codecs/lucene90/Lucene90HnswVectorsFormat.java | 2 +-
.../codecs/lucene90/Lucene90HnswVectorsReader.java | 19 +-
.../codecs/lucene90/Lucene90LiveDocsFormat.java | 2 +-
.../codecs/lucene90/Lucene90PostingsWriter.java | 9 -
.../lucene/codecs/lucene90/Lucene90SkipReader.java | 10 +-
.../lucene90/Lucene90StoredFieldsFormat.java | 4 +-
.../apache/lucene/codecs/lucene90/PForUtil.java | 31 +-
.../org/apache/lucene/document/DoublePoint.java | 4 +-
.../org/apache/lucene/document/FloatPoint.java | 4 +-
.../apache/lucene/document/InetAddressPoint.java | 6 +-
.../java/org/apache/lucene/document/IntPoint.java | 4 +-
.../org/apache/lucene/document/KnnVectorField.java | 5 +-
.../org/apache/lucene/document/LatLonPoint.java | 2 +-
.../document/LatLonPointDistanceFeatureQuery.java | 8 +-
.../lucene/document/LatLonPointDistanceQuery.java | 70 +-
.../apache/lucene/document/LatLonPointQuery.java | 25 +-
.../document/LatLonShapeBoundingBoxQuery.java | 83 +-
.../lucene/document/LongDistanceFeatureQuery.java | 24 +-
.../java/org/apache/lucene/document/LongPoint.java | 4 +-
.../org/apache/lucene/document/XYPointField.java | 2 +-
.../src/java/org/apache/lucene/geo/EdgeTree.java | 4 +-
.../src/java/org/apache/lucene/geo/Polygon2D.java | 4 +-
.../apache/lucene/index/BaseCompositeReader.java | 27 +-
.../org/apache/lucene/index/ByteSliceReader.java | 7 +-
.../java/org/apache/lucene/index/CheckIndex.java | 1028 +++++++++++------
.../java/org/apache/lucene/index/CodecReader.java | 13 +-
.../lucene/index/ConcurrentMergeScheduler.java | 25 +-
.../java/org/apache/lucene/index/DocIDMerger.java | 3 +-
.../java/org/apache/lucene/index/DocValues.java | 89 +-
.../apache/lucene/index/DocValuesLeafReader.java | 2 +-
.../org/apache/lucene/index/FilterLeafReader.java | 5 +-
.../lucene/index/FreqProxTermsWriterPerField.java | 10 +-
.../apache/lucene/index/FrozenBufferedUpdates.java | 2 +-
.../org/apache/lucene/index/IndexFileNames.java | 2 +-
.../java/org/apache/lucene/index/IndexReader.java | 14 +-
.../org/apache/lucene/index/IndexingChain.java | 84 +-
.../java/org/apache/lucene/index/MergePolicy.java | 2 +-
.../apache/lucene/index/MergeReaderWrapper.java | 19 +-
.../java/org/apache/lucene/index/MultiFields.java | 2 +-
.../java/org/apache/lucene/index/MultiSorter.java | 2 +-
.../org/apache/lucene/index/NormValuesWriter.java | 2 +-
.../java/org/apache/lucene/index/OrdinalMap.java | 6 +-
.../apache/lucene/index/ParallelLeafReader.java | 31 +-
.../java/org/apache/lucene/index/PointValues.java | 25 +-
.../apache/lucene/index/SegmentCoreReaders.java | 17 +-
.../java/org/apache/lucene/index/SegmentInfo.java | 15 +
.../java/org/apache/lucene/index/SegmentInfos.java | 8 +-
.../org/apache/lucene/index/SegmentReader.java | 16 +-
.../lucene/index/SortedSetDocValuesWriter.java | 13 +-
.../java/org/apache/lucene/index/TermVectors.java | 33 -
.../lucene/index/VectorSimilarityFunction.java | 42 +-
.../org/apache/lucene/search/BooleanScorer.java | 2 +-
.../apache/lucene/search/ConstantScoreQuery.java | 5 +
.../apache/lucene/search/FieldValueHitQueue.java | 5 -
.../org/apache/lucene/search/IndexSearcher.java | 94 +-
.../org/apache/lucene/search/LRUQueryCache.java | 7 +-
.../apache/lucene/search/MatchAllDocsQuery.java | 5 +
.../org/apache/lucene/search/MatchNoDocsQuery.java | 5 +
.../org/apache/lucene/search/MultiCollector.java | 28 +-
.../java/org/apache/lucene/search/PhraseQuery.java | 4 +-
.../org/apache/lucene/search/PointInSetQuery.java | 22 +-
.../org/apache/lucene/search/PointRangeQuery.java | 75 +-
.../org/apache/lucene/search/ScoringRewrite.java | 2 +-
.../src/java/org/apache/lucene/search/Sort.java | 84 +-
.../java/org/apache/lucene/search/SortField.java | 78 +-
.../lucene/search/SortedNumericSortField.java | 101 +-
.../org/apache/lucene/search/SynonymQuery.java | 4 +-
.../java/org/apache/lucene/search/TermQuery.java | 16 +
.../apache/lucene/search/TopFieldCollector.java | 11 +-
.../src/java/org/apache/lucene/search/Weight.java | 48 +-
.../lucene/search/comparators/DocComparator.java | 2 +-
.../search/comparators/NumericComparator.java | 54 +-
.../lucene/search/similarities/BM25Similarity.java | 2 +-
.../search/similarities/MultiSimilarity.java | 10 +-
.../lucene/search/similarities/SimilarityBase.java | 2 +-
.../search/similarities/TFIDFSimilarity.java | 2 +-
.../org/apache/lucene/store/BufferedChecksum.java | 2 +-
.../apache/lucene/store/ByteArrayDataInput.java | 40 +-
.../apache/lucene/store/ByteArrayDataOutput.java | 22 +
.../apache/lucene/store/ByteBufferIndexInput.java | 4 +-
.../apache/lucene/store/ByteBuffersDataInput.java | 86 +-
.../apache/lucene/store/ByteBuffersDataOutput.java | 48 +-
.../apache/lucene/store/ByteBuffersIndexInput.java | 6 +
.../java/org/apache/lucene/store/DataInput.java | 11 +-
.../java/org/apache/lucene/store/DataOutput.java | 13 +-
.../org/apache/lucene/store/MMapDirectory.java | 2 +-
.../lucene/store/OutputStreamIndexOutput.java | 61 +-
.../org/apache/lucene/store/RandomAccessInput.java | 10 +-
.../lucene/store/RateLimitedIndexOutput.java | 21 +
.../src/java/org/apache/lucene/util/ArrayUtil.java | 38 +
.../src/java/org/apache/lucene/util/BitSet.java | 3 +
.../src/java/org/apache/lucene/util/BitUtil.java | 92 +-
.../java/org/apache/lucene/util/ByteBlockPool.java | 2 +-
.../java/org/apache/lucene/util/FixedBitSet.java | 1 +
.../java/org/apache/lucene/util/NumericUtils.java | 30 +-
.../org/apache/lucene/util/SparseFixedBitSet.java | 61 +-
.../java/org/apache/lucene/util/StringHelper.java | 10 +-
.../org/apache/lucene/util/Unwrappable.java} | 28 +-
.../java/org/apache/lucene/util/VectorUtil.java | 47 +-
.../src/java/org/apache/lucene/util/Version.java | 14 +
.../lucene/util/automaton/LevenshteinAutomata.java | 12 +-
.../apache/lucene/util/bkd/BKDRadixSelector.java | 48 +-
.../java/org/apache/lucene/util/bkd/BKDUtil.java | 104 ++
.../java/org/apache/lucene/util/bkd/BKDWriter.java | 219 +---
.../apache/lucene/util/bkd/HeapPointReader.java | 6 +-
.../apache/lucene/util/bkd/HeapPointWriter.java | 6 +-
.../lucene/util/bkd/MutablePointsReaderUtils.java | 29 +-
.../apache/lucene/util/bkd/OfflinePointReader.java | 6 +-
.../java/org/apache/lucene/util/compress/LZ4.java | 9 +-
.../apache/lucene/util/fst/PositiveIntOutputs.java | 3 +-
.../org/apache/lucene/util/hnsw/HnswGraph.java | 11 +-
.../apache/lucene/util/hnsw/HnswGraphBuilder.java | 16 +-
.../apache/lucene/util/packed/DirectWriter.java | 95 +-
.../apache/lucene/analysis/TestCharArraySet.java | 2 +-
.../analysis/standard/TestStandardAnalyzer.java | 2 +-
.../org/apache/lucene/codecs/TestCodecUtil.java | 8 +-
.../lucene90/TestLucene90DocValuesFormat.java | 6 +-
.../test/org/apache/lucene/document/TestField.java | 28 +-
.../document/TestLatLonPointDistanceSort.java | 2 +-
.../apache/lucene/document/TestLatLonShape.java | 8 +-
.../lucene/document/TestPerFieldConsistency.java | 8 +-
.../org/apache/lucene/document/TestXYShape.java | 4 +-
.../test/org/apache/lucene/geo/TestGeoUtils.java | 6 +-
.../test/org/apache/lucene/geo/TestPolygon2D.java | 20 +-
.../apache/lucene/index/Test2BBinaryDocValues.java | 4 +-
.../apache/lucene/index/Test2BPostingsBytes.java | 2 +-
.../index/Test2BSortedDocValuesFixedSorted.java | 2 +-
.../lucene/index/Test2BSortedDocValuesOrds.java | 2 +-
.../lucene/index/TestAllFilesDetectTruncation.java | 2 +-
.../org/apache/lucene/index/TestCheckIndex.java | 155 +++
.../lucene/index/TestConcurrentMergeScheduler.java | 80 ++
.../apache/lucene/index/TestCustomTermFreq.java | 29 +-
.../lucene/index/TestDemoParallelLeafReader.java | 5 +-
.../apache/lucene/index/TestDocValuesIndexing.java | 70 +-
.../apache/lucene/index/TestDocsAndPositions.java | 14 +-
.../org/apache/lucene/index/TestDuelingCodecs.java | 2 +-
.../lucene/index/TestExitableDirectoryReader.java | 9 +-
.../apache/lucene/index/TestFilterCodecReader.java | 13 +-
.../apache/lucene/index/TestIndexFileDeleter.java | 6 +-
.../org/apache/lucene/index/TestIndexOptions.java | 7 +-
.../org/apache/lucene/index/TestIndexSorting.java | 6 +-
.../org/apache/lucene/index/TestIndexWriter.java | 4 +-
.../lucene/index/TestIndexWriterExceptions.java | 6 +-
.../lucene/index/TestIndexWriterMaxDocs.java | 8 +-
.../lucene/index/TestIndexWriterOnDiskFull.java | 2 +-
.../index/TestIndexWriterThreadsToSegments.java | 4 +-
.../lucene/index/TestNumericDocValuesUpdates.java | 20 +-
.../test/org/apache/lucene/index/TestPayloads.java | 2 +-
.../org/apache/lucene/index/TestPointValues.java | 14 +-
.../apache/lucene/index/TestPostingsOffsets.java | 2 +-
.../lucene/index/TestSameTokenSamePosition.java | 8 +-
.../org/apache/lucene/index/TestSegmentInfos.java | 54 +-
.../lucene/index/TestSegmentToThreadMapping.java | 3 +-
.../apache/lucene/index/TestSwappedIndexFiles.java | 7 +-
.../org/apache/lucene/index/TestTermsEnum.java | 36 +-
.../apache/lucene/index/TestTermsHashPerField.java | 28 +-
.../apache/lucene/index/TestTieredMergePolicy.java | 26 +-
.../apache/lucene/search/TestAutomatonQuery.java | 2 +-
.../org/apache/lucene/search/TestBooleanQuery.java | 40 +
.../search/TestBooleanQueryVisitSubscorers.java | 2 +-
.../org/apache/lucene/search/TestDateSort.java | 3 +-
.../apache/lucene/search/TestDocValuesQueries.java | 41 +-
.../lucene/search/TestDoubleValuesSource.java | 6 +-
.../org/apache/lucene/search/TestFilterWeight.java | 3 +-
.../org/apache/lucene/search/TestFuzzyQuery.java | 8 +-
.../apache/lucene/search/TestIndexSearcher.java | 21 +-
.../apache/lucene/search/TestKnnVectorQuery.java | 157 ++-
.../apache/lucene/search/TestLRUQueryCache.java | 26 +-
.../apache/lucene/search/TestLiveFieldValues.java | 5 +-
.../apache/lucene/search/TestLongValuesSource.java | 6 +-
.../apache/lucene/search/TestMinShouldMatch2.java | 20 +-
.../apache/lucene/search/TestMultiCollector.java | 63 ++
.../apache/lucene/search/TestMultiPhraseQuery.java | 4 +-
.../apache/lucene/search/TestMultiSliceMerge.java | 4 +-
.../lucene/search/TestMultiThreadTermVectors.java | 12 +-
.../org/apache/lucene/search/TestPhraseQuery.java | 2 +-
.../org/apache/lucene/search/TestPointQueries.java | 8 +-
.../apache/lucene/search/TestRegexpRandom2.java | 6 +-
.../lucene/search/TestSloppyPhraseQuery2.java | 2 +-
.../test/org/apache/lucene/search/TestSort.java | 25 +-
.../apache/lucene/search/TestSortOptimization.java | 258 ++++-
.../lucene/search/TestSortedNumericSortField.java | 17 +-
.../lucene/search/TestSortedSetSelector.java | 117 +-
.../lucene/search/TestSortedSetSortField.java | 38 +-
.../apache/lucene/search/TestTermInSetQuery.java | 45 +-
.../org/apache/lucene/search/TestTermQuery.java | 30 +
.../lucene/search/TestTimeLimitingCollector.java | 2 +-
.../org/apache/lucene/search/TestWildcard.java | 10 +-
.../lucene/search/TestXYPointDistanceSort.java | 2 +-
.../lucene/store/BaseDataOutputTestCase.java | 8 +-
.../apache/lucene/store/TestBufferedChecksum.java | 2 +-
.../lucene/store/TestByteArrayDataInput.java | 32 +
.../lucene/store/TestByteBuffersDataOutput.java | 78 +-
.../org/apache/lucene/store/TestMmapDirectory.java | 2 +-
.../store/TestMultiByteBuffersDirectory.java | 49 +
.../org/apache/lucene/store/TestMultiMMap.java | 315 +-----
.../lucene/store/TestOutputStreamIndexOutput.java | 54 +
.../test/org/apache/lucene/util/TestArrayUtil.java | 50 +
.../test/org/apache/lucene/util/TestBytesRef.java | 2 +-
.../org/apache/lucene/util/TestBytesRefHash.java | 12 +-
.../test/org/apache/lucene/util/TestCharsRef.java | 6 +-
.../test/org/apache/lucene/util/TestIntsRef.java | 2 +-
.../test/org/apache/lucene/util/TestLongsRef.java | 2 +-
.../org/apache/lucene/util/TestPagedBytes.java | 2 +-
.../org/apache/lucene/util/TestStringHelper.java | 52 +-
.../org/apache/lucene/util/TestUnicodeUtil.java | 18 +-
.../org/apache/lucene/util/TestVectorUtil.java | 27 +
.../test/org/apache/lucene/util/TestVersion.java | 13 +
.../lucene/util/automaton/TestAutomaton.java | 108 +-
.../util/automaton/TestDeterminizeLexicon.java | 2 +-
.../util/automaton/TestLevenshteinAutomata.java | 12 +-
.../apache/lucene/util/automaton/TestRegExp.java | 2 +-
.../lucene/util/automaton/TestUTF32ToUTF8.java | 2 +-
.../org/apache/lucene/util/bkd/TestBKDUtil.java | 136 +++
.../test/org/apache/lucene/util/fst/TestFSTs.java | 98 +-
.../org/apache/lucene/util/hnsw/TestHnswGraph.java | 42 +-
.../lucene/util/packed/TestDirectPacked.java | 6 +-
.../lucene/demo/facet/DistanceFacetsExample.java | 8 +-
.../src/generated/checksums/generateAntlr.json | 7 +
.../lucene/expressions/js/ExpressionMath.java | 46 +
.../lucene/expressions/js/JavascriptLexer.java | 349 +++---
.../lucene/expressions/js/JavascriptParser.java | 1185 ++++++++++++--------
.../lucene/expressions/js/JavascriptVisitor.java | 85 +-
.../apache/lucene/expressions/js/package-info.java | 2 +-
.../expressions/js/JavascriptCompiler.properties | 3 +-
.../lucene/expressions/TestDemoExpressions.java | 21 +-
.../lucene/expressions/TestExpressionSorts.java | 6 +-
.../lucene/expressions/js/TestExpressionMath.java | 70 ++
.../expressions/js/TestJavascriptFunction.java | 2 +-
.../org/apache/lucene/facet/DrillSideways.java | 14 +-
.../apache/lucene/facet/DrillSidewaysQuery.java | 17 +-
.../java/org/apache/lucene/facet/FacetUtils.java | 84 ++
.../java/org/apache/lucene/facet/FacetsConfig.java | 7 +-
.../apache/lucene/facet/LongValueFacetCounts.java | 14 +-
.../lucene/facet/StringValueFacetCounts.java | 16 +-
.../ConcurrentSortedSetDocValuesFacetCounts.java | 9 +-
.../sortedset/SortedSetDocValuesFacetCounts.java | 13 +-
.../lucene/facet/taxonomy/FloatTaxonomyFacets.java | 12 +-
.../facet/taxonomy/IntAssociationFacetField.java | 11 +-
.../lucene/facet/taxonomy/IntTaxonomyFacets.java | 12 +-
.../TaxonomyFacetSumFloatAssociations.java | 15 +-
.../taxonomy/TaxonomyFacetSumIntAssociations.java | 13 +-
.../lucene/facet/taxonomy/TaxonomyMergeUtils.java | 4 +-
.../lucene/facet/taxonomy/TaxonomyReader.java | 15 +
.../directory/DirectoryTaxonomyReader.java | 153 ++-
.../org/apache/lucene/facet/TestDrillSideways.java | 142 ++-
.../org/apache/lucene/facet/TestFacetUtils.java | 106 ++
.../lucene/facet/TestLongValueFacetCounts.java | 28 +
.../lucene/facet/TestStringValueFacetCounts.java | 35 +
.../sortedset/TestSortedSetDocValuesFacets.java | 837 +++++++-------
.../facet/taxonomy/TestTaxonomyCombined.java | 8 +-
.../facet/taxonomy/directory/TestAddTaxonomy.java | 2 +-
.../directory/TestBackwardsCompatibility.java | 22 +
.../directory/TestDirectoryTaxonomyReader.java | 78 +-
.../directory/TestDirectoryTaxonomyWriter.java | 3 +-
.../lucene/search/highlight/GradientFormatter.java | 2 +-
.../lucene/search/highlight/Highlighter.java | 4 +-
.../search/highlight/TermVectorLeafReader.java | 16 +-
.../lucene/search/matchhighlight/OffsetRange.java | 12 +-
.../search/matchhighlight/PassageSelector.java | 20 +-
.../search/uhighlight/DefaultPassageFormatter.java | 2 +-
.../apache/lucene/search/uhighlight/Passage.java | 8 +-
.../lucene/search/uhighlight/PassageFormatter.java | 2 +-
.../PostingsWithTermVectorsOffsetStrategy.java | 18 +-
.../uhighlight/TermVectorOffsetStrategy.java | 13 +-
.../uhighlight/TokenStreamOffsetStrategy.java | 2 +-
.../search/uhighlight/UnifiedHighlighter.java | 37 +-
.../lucene/search/highlight/TestHighlighter.java | 8 +-
.../matchhighlight/TestMatchHighlighter.java | 3 +-
.../search/matchhighlight/TestPassageSelector.java | 94 +-
.../search/uhighlight/TestUnifiedHighlighter.java | 52 +-
.../uhighlight/TestUnifiedHighlighterMTQ.java | 44 +-
.../uhighlight/TestUnifiedHighlighterRanking.java | 6 +-
.../TestUnifiedHighlighterTermIntervals.java | 52 +-
.../uhighlight/TestUnifiedHighlighterTermVec.java | 30 +-
lucene/licenses/jetty-NOTICE.txt | 32 +-
lucene/licenses/junit-LICENSE-CPL.txt | 24 +-
lucene/licenses/pddl-10.txt | 2 +-
lucene/luke/build.gradle | 7 +-
.../apache/lucene/index/memory/MemoryIndex.java | 23 +-
lucene/misc/native/build.gradle | 5 +-
.../org/apache/lucene/misc/IndexMergeTool.java | 4 +-
.../apache/lucene/misc/store/WindowsDirectory.java | 2 +-
.../misc/util/fst/UpToTwoPositiveIntOutputs.java | 3 +-
.../search/TestDiversifiedTopDocsCollector.java | 2 +-
lucene/packaging/build.gradle | 38 +-
.../queries/payloads/PayloadMatcherFactory.java | 12 +-
.../lucene/queries/function/FunctionTestSetup.java | 4 +-
.../queries/function/TestFieldScoreQuery.java | 2 +-
.../queries/function/TestFunctionScoreQuery.java | 2 +-
.../queries/function/TestIndexReaderFunctions.java | 8 +-
.../queries/function/TestLongNormValueSource.java | 6 +-
.../lucene/queries/function/TestValueSources.java | 6 +-
.../lucene/queries/intervals/TestIntervals.java | 4 +-
.../queries/payloads/TestPayloadExplanations.java | 2 +-
.../queries/spans/TestSpanSearchEquivalence.java | 32 +-
.../queryparser/classic/QueryParserBase.java | 27 +-
.../queryparser/simple/SimpleQueryParser.java | 6 +-
.../queryparser/xml/CorePlusQueriesParser.java | 2 +-
.../xml/builders/LikeThisQueryBuilder.java | 4 +-
.../classic/TestMultiFieldQueryParser.java | 2 +-
.../queryparser/classic/TestQueryParser.java | 30 +-
.../complexPhrase/TestComplexPhraseQuery.java | 2 +-
.../flexible/standard/TestMultiFieldQPHelper.java | 2 +-
.../flexible/standard/TestQPHelper.java | 6 +-
.../queryparser/simple/TestSimpleQueryParser.java | 2 +-
.../surround/query/Test01Exceptions.java | 2 +-
.../queryparser/surround/query/Test02Boolean.java | 2 +-
.../queryparser/surround/query/Test03Distance.java | 2 +-
.../queryparser/util/QueryParserTestBase.java | 10 +-
lucene/replicator/build.gradle | 8 +-
.../lucene/replicator/nrt/SimpleTransLog.java | 12 +-
.../codecs/idversion/IDVersionPostingsFormat.java | 19 +-
.../lucene/sandbox/document/BigIntegerPoint.java | 4 +-
.../lucene/sandbox/document/HalfFloatPoint.java | 10 +-
.../lucene/sandbox/search/CombinedFieldQuery.java | 6 +-
.../sandbox/search/QueryProfilerTimingType.java | 1 +
.../lucene/sandbox/search/QueryProfilerWeight.java | 11 +
.../search/TestQueryProfilerIndexSearcher.java | 41 +-
.../apache/lucene/spatial/StrategyTestCase.java | 2 +-
.../org/apache/lucene/spatial3d/Geo3DPoint.java | 4 +-
.../lucene/search/spell/DirectSpellChecker.java | 4 +-
.../lucene/search/spell/LevenshteinDistance.java | 6 +-
.../search/spell/LuceneLevenshteinDistance.java | 2 +-
.../apache/lucene/search/spell/NGramDistance.java | 6 +-
.../suggest/analyzing/AnalyzingSuggester.java | 2 +-
.../search/suggest/analyzing/FuzzySuggester.java | 2 +-
.../suggest/document/FuzzyCompletionQuery.java | 2 +-
.../search/suggest/fst/ExternalRefSorter.java | 65 +-
.../analyzing/TestAnalyzingInfixSuggester.java | 26 +-
.../suggest/analyzing/TestAnalyzingSuggester.java | 6 +-
.../analyzing/TestBlendedInfixSuggester.java | 18 +-
.../suggest/analyzing/TestFuzzySuggester.java | 10 +-
.../search/suggest/document/TestSuggestField.java | 2 +-
.../search/suggest/fst/TestBytesRefSorters.java | 34 +-
.../search/suggest/fst/TestWFSTCompletion.java | 2 +-
.../lucene/analysis/BaseTokenStreamTestCase.java | 134 +--
.../apache/lucene/analysis/CollationTestBase.java | 2 +-
.../org/apache/lucene/analysis/MockTokenizer.java | 8 +-
.../apache/lucene/analysis/VocabularyAssert.java | 2 +-
.../apache/lucene/geo/BaseGeoPointTestCase.java | 2 +-
.../org/apache/lucene/geo/BaseXYPointTestCase.java | 2 +-
.../java/org/apache/lucene/geo/GeoTestUtil.java | 40 +-
.../java/org/apache/lucene/geo/ShapeTestUtil.java | 8 +-
.../apache/lucene/index/AssertingLeafReader.java | 6 +
.../lucene/index/BaseCompoundFormatTestCase.java | 10 +-
.../lucene/index/BaseDocValuesFormatTestCase.java | 34 +-
.../lucene/index/BaseKnnVectorsFormatTestCase.java | 32 +-
.../lucene/index/BaseNormsFormatTestCase.java | 8 +-
.../index/BaseSegmentInfoFormatTestCase.java | 24 +-
.../index/BaseStoredFieldsFormatTestCase.java | 2 +-
.../apache/lucene/index/FieldFilterLeafReader.java | 23 +-
.../org/apache/lucene/mockfile/FilterPath.java | 13 +-
.../apache/lucene/search/AssertingBulkScorer.java | 19 +-
.../apache/lucene/search/AssertingScorable.java | 30 +-
.../org/apache/lucene/search/AssertingWeight.java | 9 +
.../java/org/apache/lucene/search/CheckHits.java | 2 +-
.../java/org/apache/lucene/search/QueryUtils.java | 18 +-
.../store/BaseChunkedDirectoryTestCase.java} | 256 ++---
.../apache/lucene/store/BaseDirectoryTestCase.java | 34 +-
.../apache/lucene/store/MockDirectoryWrapper.java | 6 +-
.../org/apache/lucene/util/BaseBitSetTestCase.java | 23 +
.../org/apache/lucene/util/LuceneTestCase.java | 20 +-
.../src/java/org/apache/lucene/util/TestUtil.java | 14 +-
.../TestCompressingStoredFieldsFormat.java | 10 +-
.../apache/lucene/mockfile/TestDisableFsyncFS.java | 2 +-
settings.gradle | 7 +
versions.lock | 14 +-
versions.props | 2 +-
629 files changed, 12238 insertions(+), 6010 deletions(-)
diff --cc lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90HnswVectorsReader.java
index e2aced6,56dcf89..a6325e1
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90HnswVectorsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90HnswVectorsReader.java
@@@ -67,7 -65,8 +67,7 @@@ public final class Lucene90HnswVectorsR
Lucene90HnswVectorsReader(SegmentReadState state) throws IOException {
this.fieldInfos = state.fieldInfos;
- int versionMeta = readMetadata(state, Lucene90HnswVectorsFormat.META_EXTENSION);
-
+ int versionMeta = readMetadata(state);
long[] checksumRef = new long[1];
boolean success = false;
try {
diff --cc lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraph.java
index c3ffbb1,511f889..30141c0
--- a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraph.java
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraph.java
@@@ -21,9 -21,8 +21,9 @@@ import static org.apache.lucene.search.
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.List;
- import java.util.Random;
+ import java.util.SplittableRandom;
import org.apache.lucene.index.KnnGraphValues;
import org.apache.lucene.index.RandomAccessVectorValues;
import org.apache.lucene.index.VectorSimilarityFunction;
@@@ -119,70 -97,27 +119,68 @@@ public final class HnswGraph extends Kn
VectorSimilarityFunction similarityFunction,
KnnGraphValues graphValues,
Bits acceptOrds,
- Random random)
+ SplittableRandom random)
throws IOException {
+
int size = graphValues.size();
+ int boundedNumSeed = Math.max(topK, Math.min(numSeed, 2 * size));
+ NeighborQueue results;
+
+ int[] eps = new int[] {graphValues.entryNode()};
+ for (int level = graphValues.numLevels() - 1; level >= 1; level--) {
+ results =
+ HnswGraph.searchLevel(
+ query, 1, level, eps, vectors, similarityFunction, graphValues, null);
+ eps[0] = results.pop();
+ }
+ results =
+ HnswGraph.searchLevel(
+ query, boundedNumSeed, 0, eps, vectors, similarityFunction, graphValues, acceptOrds);
+ while (results.size() > topK) {
+ results.pop();
+ }
+ return results;
+ }
+ /**
+ * Searches for the nearest neighbors of a query vector in a given level
+ *
+ * @param query search query vector
+ * @param topK the number of nearest to query results to return
+ * @param level level to search
+ * @param eps the entry points for search at this level expressed as level 0th ordinals
+ * @param vectors vector values
+ * @param similarityFunction similarity function
+ * @param graphValues the graph values
+ * @param acceptOrds {@link Bits} that represents the allowed document ordinals to match, or
+ * {@code null} if they are all allowed to match.
+ * @return a priority queue holding the closest neighbors found
+ */
+ static NeighborQueue searchLevel(
+ float[] query,
+ int topK,
+ int level,
+ final int[] eps,
+ RandomAccessVectorValues vectors,
+ VectorSimilarityFunction similarityFunction,
+ KnnGraphValues graphValues,
+ Bits acceptOrds)
+ throws IOException {
+
+ int size = graphValues.size();
+ int queueSize = Math.max(eps.length, topK);
// MIN heap, holding the top results
- NeighborQueue results = new NeighborQueue(numSeed, similarityFunction.reversed);
+ NeighborQueue results = new NeighborQueue(queueSize, similarityFunction.reversed);
// MAX heap, from which to pull the candidate nodes
- NeighborQueue candidates = new NeighborQueue(numSeed, !similarityFunction.reversed);
-
+ NeighborQueue candidates = new NeighborQueue(queueSize, !similarityFunction.reversed);
// set of ordinals that have been visited by search on this layer, used to avoid backtracking
SparseFixedBitSet visited = new SparseFixedBitSet(size);
-
- // get initial candidates at random
- int boundedNumSeed = Math.min(numSeed, 2 * size);
- for (int i = 0; i < boundedNumSeed; i++) {
- int entryPoint = random.nextInt(size);
- if (visited.getAndSet(entryPoint) == false) {
- // explore the topK starting points of some random numSeed probes
- float score = similarityFunction.compare(query, vectors.vectorValue(entryPoint));
- candidates.add(entryPoint, score);
- if (acceptOrds == null || acceptOrds.get(entryPoint)) {
- results.add(entryPoint, score);
+ for (int ep : eps) {
- if (visited.get(ep) == false) {
- visited.set(ep);
++ if (visited.getAndSet(ep) == false) {
+ float score = similarityFunction.compare(query, vectors.vectorValue(ep));
+ candidates.add(ep, score);
+ if (acceptOrds == null || acceptOrds.get(ep)) {
+ results.add(ep, score);
}
}
}
@@@ -204,13 -140,12 +202,12 @@@
int friendOrd;
while ((friendOrd = graphValues.nextNeighbor()) != NO_MORE_DOCS) {
assert friendOrd < size : "friendOrd=" + friendOrd + "; size=" + size;
- if (visited.get(friendOrd)) {
+ if (visited.getAndSet(friendOrd)) {
continue;
}
- visited.set(friendOrd);
float score = similarityFunction.compare(query, vectors.vectorValue(friendOrd));
- if (results.size() < numSeed || bound.check(score) == false) {
+ if (results.size() < topK || bound.check(score) == false) {
candidates.add(friendOrd, score);
if (acceptOrds == null || acceptOrds.get(friendOrd)) {
results.insertWithOverflow(friendOrd, score);
diff --cc lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphBuilder.java
index f21c8f7,f5cfc6a..4364d1d
--- a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphBuilder.java
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphBuilder.java
@@@ -87,12 -84,9 +87,12 @@@ public final class HnswGraphBuilder
}
this.maxConn = maxConn;
this.beamWidth = beamWidth;
- this.hnsw = new HnswGraph(maxConn);
+ // normalization factor for level generation; currently not configurable
+ this.ml = 1 / Math.log(1.0 * maxConn);
- this.random = new Random(seed);
++ this.random = new SplittableRandom(seed);
+ int levelOfFirstNode = getRandomGraphLevel(ml, random);
+ this.hnsw = new HnswGraph(maxConn, levelOfFirstNode);
bound = BoundsChecker.create(similarityFunction.reversed);
- random = new SplittableRandom(seed);
scratch = new NeighborArray(Math.max(beamWidth, maxConn + 1));
}
@@@ -287,12 -266,4 +287,12 @@@
}
return -1;
}
+
- private static int getRandomGraphLevel(double ml, Random random) {
- float randFloat;
++ private static int getRandomGraphLevel(double ml, SplittableRandom random) {
++ double randDouble;
+ do {
- randFloat = random.nextFloat(); // avoid 0 value, as log(0) is undefined
- } while (randFloat == 0.0f);
- return ((int) (-log(randFloat) * ml));
++ randDouble = random.nextDouble(); // avoid 0 value, as log(0) is undefined
++ } while (randDouble == 0.0);
++ return ((int) (-log(randDouble) * ml));
+ }
}
diff --cc lucene/core/src/test/org/apache/lucene/util/hnsw/TestHnswGraph.java
index 6d2f827,16a3e60..9fd6838
--- a/lucene/core/src/test/org/apache/lucene/util/hnsw/TestHnswGraph.java
+++ b/lucene/core/src/test/org/apache/lucene/util/hnsw/TestHnswGraph.java
@@@ -179,26 -153,27 +183,27 @@@ public class TestHnswGraph extends Luce
// We expect to get approximately 100% recall;
// the lowest docIds are closest to zero; sum(0,9) = 45
assertTrue("sum(result docs)=" + sum, sum < 75);
+
for (int i = 0; i < nDoc; i++) {
- NeighborArray neighbors = hnsw.getNeighbors(i);
+ NeighborArray neighbors = hnsw.getNeighbors(0, i);
- int[] nodes = neighbors.node;
+ int[] nnodes = neighbors.node;
for (int j = 0; j < neighbors.size(); j++) {
// all neighbors should be valid node ids.
- assertTrue(nodes[j] < nDoc);
+ assertTrue(nnodes[j] < nDoc);
}
}
}
public void testSearchWithAcceptOrds() throws IOException {
int nDoc = 100;
+ int maxConn = 16;
CircularVectorValues vectors = new CircularVectorValues(nDoc);
- // the first 10 docs must not be deleted to ensure the expected recall
- Bits acceptOrds = createRandomAcceptOrds(10, vectors.size);
HnswGraphBuilder builder =
new HnswGraphBuilder(
- vectors, VectorSimilarityFunction.DOT_PRODUCT, 16, 100, random().nextInt());
+ vectors, VectorSimilarityFunction.DOT_PRODUCT, maxConn, 100, random().nextInt());
HnswGraph hnsw = builder.build(vectors);
-
+ // the first 10 docs must not be deleted to ensure the expected recall
+ Bits acceptOrds = createRandomAcceptOrds(10, vectors.size);
NeighborQueue nn =
HnswGraph.search(
new float[] {1, 0},
@@@ -516,7 -507,10 +530,11 @@@
}
}
- /** Generate a random bitset where each entry has a 2/3 probability of being set. */
++
+ /**
+ * Generate a random bitset where before startIndex all bits are set, and after startIndex each
+ * entry has a 2/3 probability of being set.
+ */
private static Bits createRandomAcceptOrds(int startIndex, int length) {
FixedBitSet bits = new FixedBitSet(length);
// all bits are set before startIndex