You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ma...@apache.org on 2021/10/11 23:52:11 UTC
[lucene] branch hnsw updated (5e42fc2 -> 43083ed)
This is an automated email from the ASF dual-hosted git repository.
mayya pushed a change to branch hnsw
in repository https://gitbox.apache.org/repos/asf/lucene.git.
from 5e42fc2 Disk write and read of hnsw graph (#315)
add bac22d6 Adding initial patch by Gautam Worah
add 523cea2 Revert "Adding initial patch by Gautam Worah" (restore pristine main)
add 3ff4263 Upgrade gradle to 7.1.1
add 72f3737 Upgrade palantir's plugin.
add 68cf86b Experiments with the new apis.
add 2b0378c Use JavaInfo instead of toolchains. Internal but works and is free of toolchain's quirks.
add 26eb84a Fix immutable properties. Fix ant uri namespace no longer working (seems like gradle regression).
add fdccdee Move logging to info leve.
add 0cbafa4 Fix gradle error hints.
add a8d4f65 Upgrade to gradle 7.2
add 0d07104 Piggyback spotless upgrade to 5.14.3
add 45868a5 LUCENE-9990: upgrade to gradle 7.2.
add 8917fbe LUCENE-9613, LUCENE-10067: Add more specialization for the ordinals case.
add 88588e3 LUCENE-10052: cutover more tests to newBytesRef, and finally catches a fly (FSTTermsReader.IntersectEnum was illegally ignoring BytesRef.offset, yay!) (#258)
add 8ac2673 LUCENE-10003: No C style array declaration (#206)
add 2d7590a LUCENE-9613, LUCENE-10067: Further specialize ordinals. (#260)
add 39a2fc6 LUCENE-10066: Build does not work with JDK16 as gradle's runtime (#259)
add f6e3b08 LUCENE-10072: Regenerate FST dictionaries after LUCENE-9047. (#265)
add f1fdd24 LUCENE-9917: Smaller block sizes for BEST_SPEED. (#257)
add dbf7e18 LUCENE-10060: Ensure DrillSidewaysQuery instances never get cached (#261)
add 3b3f960 Fix a DrillSideways unit test I broke when adding more tests in LUCENE-10060 (#268)
add e470535 LUCENE-9654: Expressions module gramar antlr code regeneration (#269)
add 6ade29c LUCENE-10035: Simple text codec add multi level skip list data (#224)
add 9c7f0d4 LUCENE-10063: implement SimpleTextKnnvectorsReader.search
add 424192e LUCENE-9662: CheckIndex should be concurrent - parallelizing index check across segments (#128)
add e3e54c9 LUCENE-10063: test fixes relating to SimpleTextKnnVectorsReader (#273)
add ee7a719 LUCENE-10082: add detail to schema inconsistency error messages
add 3423243 LUCENE-9662: fix test failure from merging away soft-deletes (#276)
add 1036c70 LUCENE-9476: Add getBulkPath API to DirectoryTaxonomyReader for faster ordinal -> FacetLabel lookup (#179)
add 544dbbe LUCENE-10083: Analyzer and stemmer for Telugu language (#275)
add b0611a1 LUCENE-10083: add CHANGES entry for Telugu analyzer
add eb2509c LUCENE-10035: Fix CHANGES entry.
add 54179e9 LUCENE-10063 Correct BaseKnnVectorsFormatTestCase.testRandomWithUpdatesAndGraph (#278)
add 059d06c Fix gpg key download in release wizard. (#279)
add 44e9f5d LUCENE-9620 Add Weight#count(LeafReaderContext) (#242)
add d4e4fe2 Revert "LUCENE-9959: Add non thread local based API for term vector reader usage (#180)" (#280)
add de661d6 LUCENE-9620: Address profiling test failures.
add 4bb018e LUCENE-9620: Fix TestTermQuery failure.
add b3ce44c LUCENE-9620: Implement AssertingWeight#count.
add 34f37d0 LUCENE-10035: move CHANGES.txt entry from 9.0 to 8.10
add 4df8d64 LUCENE-10081: KoreanTokenizer should check the max backtrace gap on whitespaces (#272)
add bc161e6 LUCENE-10040 Correct TestHnswGraph.testSearchWithAcceptOrds (#277)
add 7eb35be LUCENE-10087: Validate number of dimensions and bytes per dimension for numeric SortFields. (#283)
add ee0695e LUCENE-10092: fix test bug by forceMerging the index down to one segment
add cc58c51 LUCENE-10089 Disable numeric sort optim when needed (#286)
add 42242b1 add a small test to TestVersion to confirm we handle non-floating-point release numbers correctly
add 7f8607b LUCENE-9662: Update concurrent index checking usage instructions and default thread count to CPU cores (#281)
add 1bb5285 LUCENE-10094: Delegate count() from CachingWrapperWeight (#289)
add cc8c428 LUCENE-10094: Fix test bug
add 8bce765 LUCENE-10095: Nepali Analyzer (#290)
add 24aa45d LUCENE-10096: Tamil Analyzer (#292)
add 56968b7 LUCENE-10098: add note/link to GermanAnalyzer for decompounding nouns. (#294)
add 1953757 LUCENE-10089: Disable numeric sort optimization early (#291)
add 3802bdc LUCENE-10101: Use getField() instead of getDeclaredField() to minimize security impact by analysis SPI discovery (#298)
add 3c6d4a00 LUCENE-10104, SOLR-15631: Upgrade forbiddenapis to version 3.2
add 2609373 LUCENE-8638: Expressions haversin() method should continue to return its value in km (#299)
add 1586933 Merge branch 'main' of https://gitbox.apache.org/repos/asf/lucene into main
add b7a286d LUCENE-10106: Sort optimization wrongly skip first docs (#300)
add de45b68 LUCENE-9448, LUCENE-9990: fix Luke's launcher task.
add 4e86df9 LUCENE-10102: Add JapaneseCompletionFilter for Input Method-aware auto-completion (#297)
add deff5a1 LUCENE-10070: Skip deleted documents during facet counting for all documents (#293)
add 5dfbef3 LUCENE-10102: Fix JapaneseCompletionFilter javadoc
add f3c3b90 LUCENE-9047: fix typo in javadocs (still referred to big endian)
add 8b95e51 Add additional docs refs (nightly, build system help/) to README.md (#302)
add 6c1e592 LUCENE-10102: do not call incrementToken() against already consumed input stream.
add ccf0d54 LUCENE-10110: MultiCollector should conditionally wrap single leaf collector (#303)
add 075d801 LUCENE-10114: Remove unused byte order mark in Lucene90PostingsWriter (#309)
add 4bcd64c LUCENE-9620: Fix test bug.
add c57d6e5 LUCENE-10113: Use VarHandles to access int/long/short types in byte arrays (e.g. ByteArrayDataInput) (#308)
add 57524c6 LUCENE-9809: replace 'master' with 'main' in release wizard (#305)
add 5871ea7 LUCENE-10112: Improve LZ4 Compression performance with direct primitive read/writes (#310)
add b2a04a4 LUCENE-10069: Adjust TestKnnVectorQuery#testRandom to stop failures
add a757870 LUCENE-10115: Add a fuzzy parsing extension point for custom query parsers
add a7bddfa LUCENE-10111: Missing calculating the bytes used of DocsWithFieldSet in NormValuesWriter (#307)
add ed7fb8d LUCENE-10116: Missing calculating the bytes used of DocsWithFieldSet and currentValues in SortedSetDocValuesWriter (#316)
add fc47536 Only pass "--illegal-access=deny" up to JDK-15, later versions deprecate the option and default to "deny"
add 7390d1a LUCENE-10119: Do not set single sort with search after (#317)
add eb44d1e Add slightly more language in the README Contributing section (#318)
add eaa4210 LUCENE-10109: Bump default beam width for HNSW (#312)
add 849d5fc LUCENE-10125: Optimize primitive writes in OutputStreamIndexOutput (#321)
add 1ebd193 Move CHANGES entry for LUCENE-10070 under 8.11 after backport (#323)
add 7357bdc LUCENE-10123: Handling of singletons in DocValuesConsumer. (#320)
add 6ac3110 LUCENE-10128: avoid costly reflection in SparseFixedBitSet ctor
add 8f3f2ea LUCENE-10127: Minor speedup to doc values writes. (#325)
add 9f80b4d LUCENE-10125: Speed up computation of exceptions. (#322)
add 5ab900e LUCENE-10126: Fix competitiveIterator wrongly skip documents (#324)
add a73848c DOAP changes for release 8.10.0
add cb15388 LUCENE-10126: Fix AssertingBulkScorer
add e56995d LUCENE-10126: Remove chunk scoring in AssertingBulkScorer
add 84e4050 LUCENE-10125: Speed up DirectWriter. (#327)
add 88b264a LUCENE-10126 Add extra test on _doc sort (#326)
add d2b88b7 LUCENE-10134: clean up the test from leaking threads and resources if an error occurs somewhere - this obscures the original cause of the problem.
add 0c13a52 Correct test error that allowed an empty array.
add 3e568b9 Support addition of diagnostics by custom merge policies (#329)
add ca810e7 LUCENE-10138: Use maven central to resolve third-party gradle plugins (#336)
add 797cfbf LUCENE-10118: Improve CMS infostream messages (#337)
add 1bb4554 LUCENE-10135: Correct passage selector behavior for long matching snippets (#334)
add 3aa0676 LUCENE-9713: apply source validation to txt files outside of src/* folders. Fix offenders. (#339)
add 93c66e1 LUCENE-9713: exclude .idea/ (sync with Solr's version).
add 4d0fabf LUCENE-9713: we don't need those symbol-escape checks. They're valid adoc and we don't produce PDFs.
add 4c97b9e LUCENE-10131: Add backcompat indices for 8.10 and add LUCENE_8_10_0 to Version (#343)
add cb366d0 LUCENE-10134: Move initialization of liveDocs bits outside the constructor to avoid AssertionError (#345)
add 5748743 LUCENE-10126: Re-introduce chunk scoring logic in tests (#331)
add d395435 LUCENE-10130: HnswGraph could make use of a SparseFixedBitSet.getAndSet
add 3dee08a LUCENE-10130: small optimizations to SparseFixedBitSet set() codepath
add b4fcdd9 LUCENE-10142: use a better RNG for HNSW vectors
add 2e57a40 LUCENE-10139: ExternalRefSorter returns a covariant with a subtype of BytesRefIterator that is Closeable. (#340)
add 04fb8c0 LUCENE-10118: Test fix
add 18fc6c1 LUCENE-10145: Speed up byte[] comparisons using VarHandles. (#349)
add c18e623 LUCENE-10106: Add CHANGES entry
add 45e8f63 LUCENE-10119: Add CHANGES entry
add 92a53d3 LUCENE-10126: Add CHANGES entry
add 5d2a031 LUCENE-10134: Add CHANGES entry (#351)
add 9e0f375 LUCENE-10143: Delegate primitive writes in RateLimitedIndexOutput (#352)
add 321d274 Fix DataInput/Output/RandomAccessInput javadocs, MIGRATE.txt to document endianness
add 5cd0d68 LUCENE-9488 Assemble source tar, with checksum and signing (#353)
add 674b66d LUCENE-9809 Adapt Release Wizard to only release Lucene (#344)
add feac4cd LUCENE-10182: No longer check dvGen. (#350)
add b20ffa5 LUCENE-10152 Fix sha512 file syntax (#356)
add 9e9c3bd LUCENE-9325: Make Sort final (#338)
add 5511bce LUCENE-10153: Speed up BKDWriter using VarHandles. (#357)
add ba75dc5 LUCENE-10150: override ByteBuffersDataInput readLong/readInt/readShort
add 9b1fc0e LUCENE-10147: ensure that KnnVectorQuery scores are positive (#361)
add a613021 LUCENE-10136: allow 'var' declarations in source code (be reasonable though). (#368)
add 61c15c8 LUCENE-10150: override readLongs() in ByteBuffersDataInput (#363)
add 6c6a3bd LUCENE-10155: Refactor TestMultiMMap into a BaseChunkedDirectoryTestCase (#360)
add c1fe9ef LUCENE-10160: improve assert to be easier to debug
add 6f232b6 Add CHANGES entry for 8.10.1
add c94aca7 LUCENE-10158: Add a new interface Unwrappable to the utils package to ease migration to new MMAPDirectory and its testing (#369)
add ed69f60 Update CHANGES entry for 8.10.1
add f486115 LUCENE-10146: Add VectorSimilarityFunction.COSINE (#366)
add f67dec1 LUCENE-10164: lucene/replicator should only have jetty as a test dependency (#373)
new 3b4296d Merge remote-tracking branch 'upstream/main' into hnsw
new 43083ed Format changes
The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails. The revisions
listed as "add" were already present in the repository and have only
been added to this reference.
Summary of changes:
README.md | 5 +-
build.gradle | 7 +-
dev-tools/doap/lucene.rdf | 7 +
dev-tools/scripts/README.md | 17 +-
dev-tools/scripts/addVersion.py | 51 +-
dev-tools/scripts/buildAndPushRelease.py | 31 +-
dev-tools/scripts/githubPRs.py | 16 +-
dev-tools/scripts/poll-mirrors.py | 4 +-
dev-tools/scripts/releaseWizard.py | 59 +-
dev-tools/scripts/releaseWizard.yaml | 327 ++----
dev-tools/scripts/releasedJirasRegex.py | 16 +-
dev-tools/scripts/scriptutil.py | 2 +-
gradle/documentation/render-javadoc.gradle | 19 +-
gradle/generation/antlr.gradle | 88 ++
gradle/generation/javacc.gradle | 16 +-
gradle/generation/jflex.gradle | 2 +-
gradle/generation/jflex/skeleton.default.txt | 6 +-
.../jflex/skeleton.disable.buffer.expansion.txt | 4 +-
gradle/generation/local-settings.gradle | 62 +-
gradle/generation/regenerate.gradle | 21 +
gradle/generation/snowball/snowball.patch | 6 +-
gradle/publishing/distribution.gradle | 3 +-
gradle/testing/alternative-jdk-support.gradle | 63 +-
gradle/testing/defaults-tests.gradle | 7 +-
gradle/validation/check-environment.gradle | 2 +-
gradle/validation/rat-sources.gradle | 10 +-
gradle/validation/spotless.gradle | 5 +-
gradle/validation/validate-source-patterns.gradle | 53 +-
gradle/wrapper/gradle-wrapper.jar.sha256 | 2 +-
gradle/wrapper/gradle-wrapper.jar.version | 2 +-
gradle/wrapper/gradle-wrapper.properties | 2 +-
gradlew.bat | 24 +-
lucene/CHANGES.txt | 147 ++-
lucene/MIGRATE.md | 11 +
.../checksums/generateClassicTokenizer.json | 4 +-
.../checksums/generateHTMLStripCharFilter.json | 6 +-
.../checksums/generateUAX29URLEmailTokenizer.json | 4 +-
.../checksums/generateWikipediaTokenizer.json | 4 +-
.../common/src/generated/checksums/snowball.json | 2 +-
.../lucene/analysis/ar/ArabicNormalizer.java | 2 +-
.../apache/lucene/analysis/ar/ArabicStemmer.java | 14 +-
.../lucene/analysis/bg/BulgarianStemmer.java | 6 +-
.../lucene/analysis/bn/BengaliNormalizer.java | 2 +-
.../apache/lucene/analysis/bn/BengaliStemmer.java | 2 +-
.../lucene/analysis/charfilter/BaseCharFilter.java | 4 +-
.../analysis/charfilter/HTMLStripCharFilter.java | 8 +-
.../analysis/charfilter/HTMLStripCharFilter.jflex | 2 +-
.../lucene/analysis/cjk/CJKBigramFilter.java | 12 +-
.../lucene/analysis/cjk/CJKWidthCharFilter.java | 6 +-
.../apache/lucene/analysis/cjk/CJKWidthFilter.java | 10 +-
.../lucene/analysis/ckb/SoraniNormalizer.java | 2 +-
.../apache/lucene/analysis/ckb/SoraniStemmer.java | 2 +-
.../analysis/classic/ClassicTokenizerImpl.java | 6 +-
.../analysis/commongrams/CommonGramsFilter.java | 2 +-
.../compound/hyphenation/PatternParser.java | 2 +-
.../analysis/compound/hyphenation/TernaryTree.java | 4 +-
.../lucene/analysis/core/DecimalDigitFilter.java | 2 +-
.../apache/lucene/analysis/cz/CzechStemmer.java | 8 +-
.../apache/lucene/analysis/de/GermanAnalyzer.java | 7 +
.../lucene/analysis/de/GermanLightStemmer.java | 6 +-
.../lucene/analysis/de/GermanMinimalStemmer.java | 2 +-
.../analysis/de/GermanNormalizationFilter.java | 2 +-
.../apache/lucene/analysis/el/GreekStemmer.java | 54 +-
.../analysis/email/UAX29URLEmailTokenizerImpl.java | 4 +-
.../lucene/analysis/en/EnglishMinimalStemmer.java | 2 +-
.../lucene/analysis/es/SpanishLightStemmer.java | 2 +-
.../lucene/analysis/es/SpanishMinimalStemmer.java | 2 +-
.../lucene/analysis/fa/PersianNormalizer.java | 2 +-
.../lucene/analysis/fi/FinnishLightStemmer.java | 12 +-
.../lucene/analysis/fr/FrenchLightStemmer.java | 4 +-
.../lucene/analysis/fr/FrenchMinimalStemmer.java | 2 +-
.../lucene/analysis/gl/GalicianMinimalStemmer.java | 2 +-
.../apache/lucene/analysis/gl/GalicianStemmer.java | 2 +-
.../apache/lucene/analysis/hi/HindiNormalizer.java | 2 +-
.../apache/lucene/analysis/hi/HindiStemmer.java | 2 +-
.../lucene/analysis/hu/HungarianLightStemmer.java | 10 +-
.../hunspell/HunspellStemFilterFactory.java | 2 +-
.../analysis/hunspell/ISO8859_14Decoder.java | 2 +-
.../lucene/analysis/id/IndonesianStemmer.java | 14 +-
.../apache/lucene/analysis/in/IndicNormalizer.java | 6 +-
.../lucene/analysis/it/ItalianLightStemmer.java | 2 +-
.../apache/lucene/analysis/lv/LatvianStemmer.java | 10 +-
.../lucene/analysis/minhash/MinHashFilter.java | 6 +-
.../analysis/miscellaneous/ASCIIFoldingFilter.java | 2 +-
.../analysis/miscellaneous/FingerprintFilter.java | 8 +-
.../miscellaneous/HyphenatedWordsFilter.java | 2 +-
.../miscellaneous/RemoveDuplicatesTokenFilter.java | 4 +-
.../miscellaneous/WordDelimiterFilter.java | 12 +-
.../miscellaneous/WordDelimiterFilterFactory.java | 2 +-
.../miscellaneous/WordDelimiterGraphFilter.java | 2 +-
.../WordDelimiterGraphFilterFactory.java | 2 +-
.../miscellaneous/WordDelimiterIterator.java | 4 +-
.../GermanAnalyzer.java => ne/NepaliAnalyzer.java} | 99 +-
.../apache/lucene/analysis/ne/package-info.java} | 23 +-
.../lucene/analysis/no/NorwegianLightStemmer.java | 2 +-
.../analysis/no/NorwegianMinimalStemmer.java | 2 +-
.../lucene/analysis/payloads/PayloadHelper.java | 18 +-
.../lucene/analysis/pt/PortugueseLightStemmer.java | 6 +-
.../analysis/pt/PortugueseMinimalStemmer.java | 2 +-
.../lucene/analysis/pt/PortugueseStemmer.java | 2 +-
.../apache/lucene/analysis/pt/RSLPStemmerBase.java | 24 +-
.../lucene/analysis/ru/RussianLightStemmer.java | 6 +-
.../lucene/analysis/snowball/SnowballFilter.java | 4 +-
.../analysis/sr/SerbianNormalizationFilter.java | 2 +-
.../sr/SerbianNormalizationRegularFilter.java | 2 +-
.../lucene/analysis/sv/SwedishLightStemmer.java | 2 +-
.../lucene/analysis/sv/SwedishMinimalStemmer.java | 2 +-
.../lucene/analysis/synonym/SolrSynonymParser.java | 8 +-
.../apache/lucene/analysis/synonym/SynonymMap.java | 2 +-
.../analysis/synonym/WordnetSynonymParser.java | 4 +-
.../GermanAnalyzer.java => ta/TamilAnalyzer.java} | 98 +-
.../apache/lucene/analysis/ta/package-info.java} | 23 +-
.../GermanAnalyzer.java => te/TeluguAnalyzer.java} | 103 +-
.../TeluguNormalizationFilter.java} | 45 +-
.../te/TeluguNormalizationFilterFactory.java | 64 ++
.../lucene/analysis/te/TeluguNormalizer.java | 116 ++
.../TeluguStemFilter.java} | 36 +-
.../analysis/te/TeluguStemFilterFactory.java | 58 +
.../apache/lucene/analysis/te/TeluguStemmer.java | 64 ++
.../apache/lucene/analysis/te/package-info.java} | 23 +-
.../lucene/analysis/tr/TurkishLowerCaseFilter.java | 4 +-
.../lucene/analysis/util/CharArrayIterator.java | 4 +-
.../lucene/analysis/util/OpenStringBuilder.java | 8 +-
.../analysis/util/SegmentingTokenizerBase.java | 2 +-
.../apache/lucene/analysis/util/StemmerUtil.java | 10 +-
.../analysis/wikipedia/WikipediaTokenizerImpl.java | 6 +-
.../org/tartarus/snowball/SnowballProgram.java | 6 +-
.../org.apache.lucene.analysis.TokenFilterFactory | 2 +
.../org/apache/lucene/analysis/ne/stopwords.txt | 256 +++++
.../org/apache/lucene/analysis/ta/stopwords.txt | 126 +++
.../org/apache/lucene/analysis/te/stopwords.txt | 51 +
.../apache/lucene/analysis/core/TestAnalyzers.java | 4 +-
.../lucene/analysis/core/TestBugInSomething.java | 2 +-
.../lucene/analysis/core/TestRandomChains.java | 10 +-
.../lucene/analysis/core/TestStopAnalyzer.java | 2 +-
.../lucene/analysis/core/TestTypeTokenFilter.java | 2 +-
.../analysis/email/TestUAX29URLEmailAnalyzer.java | 2 +-
.../analysis/email/TestUAX29URLEmailTokenizer.java | 2 +-
.../miscellaneous/TestCapitalizationFilter.java | 4 +-
.../lucene/analysis/ne/TestNepaliAnalyzer.java | 76 ++
.../query/TestQueryAutoStopWordAnalyzer.java | 4 +-
.../lucene/analysis/shingle/TestShingleFilter.java | 6 +-
.../lucene/analysis/ta/TestTamilAnalyzer.java | 67 ++
.../lucene/analysis/te/TestTeluguAnalyzer.java | 57 +
.../lucene/analysis/te/TestTeluguFilters.java | 79 ++
.../lucene/analysis/te/TestTeluguNormalizer.java | 60 +
.../lucene/analysis/te/TestTeluguStemmer.java | 59 +
.../analysis/util/TestCharArrayIterator.java | 10 +-
.../analysis/util/TestSegmentingTokenizerBase.java | 6 +-
.../analysis/wikipedia/TestWikipediaTokenizer.java | 2 +-
.../lucene/analysis/icu/ICUTransformFilter.java | 4 +-
.../icu/segmentation/BreakIteratorWrapper.java | 4 +-
.../icu/segmentation/CharArrayIterator.java | 4 +-
.../icu/segmentation/CompositeBreakIterator.java | 6 +-
.../analysis/icu/segmentation/ICUTokenizer.java | 2 +-
.../icu/segmentation/ICUTokenizerFactory.java | 2 +-
.../analysis/icu/segmentation/ScriptIterator.java | 6 +-
.../icu/segmentation/TestCharArrayIterator.java | 2 +-
.../icu/segmentation/TestICUTokenizer.java | 8 +-
.../analysis/icu/GenerateUTR30DataFiles.java | 2 +-
.../lucene/analysis/icu/RBBIRuleCompiler.java | 2 +-
.../lucene/analysis/ja/JapaneseAnalyzer.java | 2 +-
.../analysis/ja/JapaneseCompletionAnalyzer.java | 65 ++
.../analysis/ja/JapaneseCompletionFilter.java | 272 +++++
.../ja/JapaneseCompletionFilterFactory.java | 66 ++
.../ja/JapanesePartOfSpeechStopFilterFactory.java | 2 +-
.../analysis/ja/completion/CharSequenceUtils.java | 91 ++
.../analysis/ja/completion/KatakanaRomanizer.java | 193 ++++
.../analysis/ja/completion/package-info.java} | 23 +-
.../lucene/analysis/ja/dict/BinaryDictionary.java | 12 +-
.../apache/lucene/analysis/ja/dict/Dictionary.java | 6 +-
.../lucene/analysis/ja/dict/TokenInfoFST.java | 4 +-
.../lucene/analysis/ja/dict/UnknownDictionary.java | 2 +-
.../lucene/analysis/ja/dict/UserDictionary.java | 10 +-
.../org.apache.lucene.analysis.TokenFilterFactory | 1 +
.../lucene/analysis/ja/completion/romaji_map.txt | 344 ++++++
.../analysis/ja/dict/TokenInfoDictionary$fst.dat | Bin 1686422 -> 1686422 bytes
.../ja/TestJapaneseCompletionAnalyzer.java | 72 ++
.../analysis/ja/TestJapaneseCompletionFilter.java | 271 +++++
.../ja/TestJapaneseCompletionFilterFactory.java | 59 +
.../ja/completion/TestKatakanaRomanizer.java | 68 ++
.../analysis/morfologik/MorfologikFilter.java | 2 +-
.../apache/lucene/analysis/ko/KoreanTokenizer.java | 18 +-
.../analysis/ko/dict/TokenInfoDictionary$fst.dat | Bin 5632456 -> 5632456 bytes
.../analysis/phonetic/TestPhoneticFilter.java | 4 +-
.../analysis/cn/smart/hhmm/AbstractDictionary.java | 4 +-
.../analysis/cn/smart/hhmm/BigramDictionary.java | 6 +-
.../cn/smart/TestSmartChineseAnalyzer.java | 18 +-
.../src/java/org/egothor/stemmer/Compile.java | 2 +-
.../stempel/src/java/org/egothor/stemmer/Diff.java | 6 +-
.../src/java/org/egothor/stemmer/Gener.java | 4 +-
.../stempel/src/java/org/egothor/stemmer/Lift.java | 2 +-
.../src/java/org/egothor/stemmer/MultiTrie2.java | 8 +-
.../src/java/org/egothor/stemmer/Optimizer.java | 2 +-
.../src/java/org/egothor/stemmer/Reduce.java | 6 +-
.../stempel/src/java/org/egothor/stemmer/Trie.java | 4 +-
.../src/test/org/egothor/stemmer/TestStemmer.java | 26 +-
.../lucene50/Lucene50LiveDocsFormat.java | 2 +-
.../lucene50/Lucene50SkipReader.java | 10 +-
.../lucene84/Lucene84SkipReader.java | 10 +-
.../lucene87/LZ4WithPresetDictCompressionMode.java | 4 +-
.../lucene87/Lucene87StoredFieldsFormat.java | 4 +-
.../backward_codecs/packed/LegacyDirectWriter.java | 2 +-
.../lucene50/TestBlockPostingsFormat3.java | 2 +-
.../lucene70/TestLucene70DocValuesFormat.java | 6 +-
.../BaseLucene80DocValuesFormatTestCase.java | 6 +-
.../lucene87/Lucene87RWStoredFieldsFormat.java | 2 -
.../packed/TestLegacyDirectPacked.java | 6 +-
.../backward_index/TestBackwardsCompatibility.java | 9 +-
.../backward_index/TestManyPointsInOldIndex.java | 2 +-
.../lucene/backward_index/index.8.10.0-cfs.zip | Bin 0 -> 16967 bytes
.../lucene/backward_index/index.8.10.0-nocfs.zip | Bin 0 -> 16973 bytes
.../apache/lucene/backward_index/sorted.8.10.0.zip | Bin 0 -> 142502 bytes
.../byTask/feeds/SimpleSloppyPhraseQueryMaker.java | 2 +-
.../benchmark/byTask/feeds/TrecContentSource.java | 2 +-
.../benchmark/byTask/feeds/TrecDocParser.java | 2 +-
.../benchmark/byTask/tasks/AddIndexesTask.java | 2 +-
.../lucene/benchmark/byTask/tasks/PerfTask.java | 2 +-
.../lucene/benchmark/byTask/tasks/ReportTask.java | 2 +-
.../benchmark/byTask/tasks/TaskSequence.java | 2 +-
.../lucene/benchmark/byTask/utils/Config.java | 30 +-
.../lucene/benchmark/byTask/utils/Format.java | 2 +-
.../org/apache/lucene/benchmark/quality/Judge.java | 2 +-
.../lucene/benchmark/quality/QualityBenchmark.java | 8 +-
.../lucene/benchmark/quality/QualityStats.java | 2 +-
.../lucene/benchmark/quality/trec/QueryDriver.java | 4 +-
.../benchmark/quality/trec/Trec1MQReader.java | 2 +-
.../benchmark/quality/trec/TrecTopicsReader.java | 2 +-
.../quality/utils/QualityQueriesFinder.java | 8 +-
.../benchmark/quality/utils/SimpleQQParser.java | 4 +-
.../benchmark/quality/utils/SubmissionReport.java | 2 +-
.../apache/lucene/benchmark/BenchmarkTestCase.java | 2 +-
.../benchmark/byTask/TestPerfTasksLogic.java | 42 +-
.../lucene/benchmark/quality/TestQualityRun.java | 8 +-
.../org/apache/lucene/codecs/bloom/FuzzySet.java | 2 +-
.../lucene/codecs/memory/FSTTermsReader.java | 2 +-
.../simpletext/SimpleTextCompoundFormat.java | 14 +-
.../simpletext/SimpleTextDocValuesReader.java | 8 +-
.../simpletext/SimpleTextFieldInfosFormat.java | 2 +-
.../codecs/simpletext/SimpleTextFieldsReader.java | 184 ++-
.../codecs/simpletext/SimpleTextFieldsWriter.java | 53 +-
.../simpletext/SimpleTextKnnVectorsReader.java | 31 +-
.../codecs/simpletext/SimpleTextSkipReader.java | 206 ++++
.../codecs/simpletext/SimpleTextSkipWriter.java | 157 +++
.../simpletext/SimpleTextStoredFieldsReader.java | 4 +-
.../simpletext/SimpleTextTermVectorsReader.java | 16 +-
.../checksums/generateStandardTokenizer.json | 4 +-
.../apache/lucene/analysis/AnalysisSPILoader.java | 6 +-
.../org/apache/lucene/analysis/WordlistLoader.java | 2 +-
.../analysis/standard/StandardTokenizerImpl.java | 4 +-
.../java/org/apache/lucene/codecs/CodecUtil.java | 4 +-
.../apache/lucene/codecs/DocValuesConsumer.java | 259 +++--
.../lucene/codecs/MultiLevelSkipListReader.java | 34 +-
.../lucene/codecs/MultiLevelSkipListWriter.java | 24 +-
.../apache/lucene/codecs/SegmentInfoFormat.java | 2 +-
.../apache/lucene/codecs/TermVectorsReader.java | 12 +-
.../lucene90/LZ4WithPresetDictCompressionMode.java | 4 +-
.../codecs/lucene90/Lucene90DocValuesConsumer.java | 77 +-
.../codecs/lucene90/Lucene90DocValuesProducer.java | 113 ++
.../codecs/lucene90/Lucene90FieldInfosFormat.java | 11 +-
.../codecs/lucene90/Lucene90HnswVectorsFormat.java | 2 +-
.../codecs/lucene90/Lucene90HnswVectorsReader.java | 19 +-
.../codecs/lucene90/Lucene90LiveDocsFormat.java | 2 +-
.../codecs/lucene90/Lucene90PostingsWriter.java | 9 -
.../lucene/codecs/lucene90/Lucene90SkipReader.java | 10 +-
.../lucene90/Lucene90StoredFieldsFormat.java | 4 +-
.../apache/lucene/codecs/lucene90/PForUtil.java | 31 +-
.../org/apache/lucene/document/DoublePoint.java | 4 +-
.../org/apache/lucene/document/FloatPoint.java | 4 +-
.../apache/lucene/document/InetAddressPoint.java | 6 +-
.../java/org/apache/lucene/document/IntPoint.java | 4 +-
.../org/apache/lucene/document/KnnVectorField.java | 5 +-
.../org/apache/lucene/document/LatLonPoint.java | 2 +-
.../document/LatLonPointDistanceFeatureQuery.java | 8 +-
.../lucene/document/LatLonPointDistanceQuery.java | 70 +-
.../apache/lucene/document/LatLonPointQuery.java | 25 +-
.../document/LatLonShapeBoundingBoxQuery.java | 83 +-
.../lucene/document/LongDistanceFeatureQuery.java | 24 +-
.../java/org/apache/lucene/document/LongPoint.java | 4 +-
.../org/apache/lucene/document/XYPointField.java | 2 +-
.../src/java/org/apache/lucene/geo/EdgeTree.java | 4 +-
.../src/java/org/apache/lucene/geo/Polygon2D.java | 4 +-
.../apache/lucene/index/BaseCompositeReader.java | 27 +-
.../org/apache/lucene/index/ByteSliceReader.java | 7 +-
.../java/org/apache/lucene/index/CheckIndex.java | 1028 +++++++++++------
.../java/org/apache/lucene/index/CodecReader.java | 13 +-
.../lucene/index/ConcurrentMergeScheduler.java | 25 +-
.../java/org/apache/lucene/index/DocIDMerger.java | 3 +-
.../java/org/apache/lucene/index/DocValues.java | 89 +-
.../apache/lucene/index/DocValuesLeafReader.java | 2 +-
.../org/apache/lucene/index/FilterLeafReader.java | 5 +-
.../lucene/index/FreqProxTermsWriterPerField.java | 10 +-
.../apache/lucene/index/FrozenBufferedUpdates.java | 2 +-
.../org/apache/lucene/index/IndexFileNames.java | 2 +-
.../java/org/apache/lucene/index/IndexReader.java | 14 +-
.../org/apache/lucene/index/IndexingChain.java | 84 +-
.../java/org/apache/lucene/index/MergePolicy.java | 2 +-
.../apache/lucene/index/MergeReaderWrapper.java | 19 +-
.../java/org/apache/lucene/index/MultiFields.java | 2 +-
.../java/org/apache/lucene/index/MultiSorter.java | 2 +-
.../org/apache/lucene/index/NormValuesWriter.java | 2 +-
.../java/org/apache/lucene/index/OrdinalMap.java | 6 +-
.../apache/lucene/index/ParallelLeafReader.java | 31 +-
.../java/org/apache/lucene/index/PointValues.java | 25 +-
.../apache/lucene/index/SegmentCoreReaders.java | 17 +-
.../java/org/apache/lucene/index/SegmentInfo.java | 15 +
.../java/org/apache/lucene/index/SegmentInfos.java | 8 +-
.../org/apache/lucene/index/SegmentReader.java | 16 +-
.../lucene/index/SortedSetDocValuesWriter.java | 13 +-
.../java/org/apache/lucene/index/TermVectors.java | 33 -
.../lucene/index/VectorSimilarityFunction.java | 42 +-
.../org/apache/lucene/search/BooleanScorer.java | 2 +-
.../apache/lucene/search/ConstantScoreQuery.java | 5 +
.../apache/lucene/search/FieldValueHitQueue.java | 5 -
.../org/apache/lucene/search/IndexSearcher.java | 94 +-
.../org/apache/lucene/search/LRUQueryCache.java | 7 +-
.../apache/lucene/search/MatchAllDocsQuery.java | 5 +
.../org/apache/lucene/search/MatchNoDocsQuery.java | 5 +
.../org/apache/lucene/search/MultiCollector.java | 28 +-
.../java/org/apache/lucene/search/PhraseQuery.java | 4 +-
.../org/apache/lucene/search/PointInSetQuery.java | 22 +-
.../org/apache/lucene/search/PointRangeQuery.java | 75 +-
.../org/apache/lucene/search/ScoringRewrite.java | 2 +-
.../src/java/org/apache/lucene/search/Sort.java | 84 +-
.../java/org/apache/lucene/search/SortField.java | 78 +-
.../lucene/search/SortedNumericSortField.java | 101 +-
.../org/apache/lucene/search/SynonymQuery.java | 4 +-
.../java/org/apache/lucene/search/TermQuery.java | 16 +
.../apache/lucene/search/TopFieldCollector.java | 11 +-
.../src/java/org/apache/lucene/search/Weight.java | 48 +-
.../lucene/search/comparators/DocComparator.java | 2 +-
.../search/comparators/NumericComparator.java | 54 +-
.../lucene/search/similarities/BM25Similarity.java | 2 +-
.../search/similarities/MultiSimilarity.java | 10 +-
.../lucene/search/similarities/SimilarityBase.java | 2 +-
.../search/similarities/TFIDFSimilarity.java | 2 +-
.../org/apache/lucene/store/BufferedChecksum.java | 2 +-
.../apache/lucene/store/ByteArrayDataInput.java | 40 +-
.../apache/lucene/store/ByteArrayDataOutput.java | 22 +
.../apache/lucene/store/ByteBufferIndexInput.java | 4 +-
.../apache/lucene/store/ByteBuffersDataInput.java | 86 +-
.../apache/lucene/store/ByteBuffersDataOutput.java | 48 +-
.../apache/lucene/store/ByteBuffersIndexInput.java | 6 +
.../java/org/apache/lucene/store/DataInput.java | 11 +-
.../java/org/apache/lucene/store/DataOutput.java | 13 +-
.../org/apache/lucene/store/MMapDirectory.java | 2 +-
.../lucene/store/OutputStreamIndexOutput.java | 61 +-
.../org/apache/lucene/store/RandomAccessInput.java | 10 +-
.../lucene/store/RateLimitedIndexOutput.java | 21 +
.../src/java/org/apache/lucene/util/ArrayUtil.java | 38 +
.../src/java/org/apache/lucene/util/BitSet.java | 3 +
.../src/java/org/apache/lucene/util/BitUtil.java | 92 +-
.../java/org/apache/lucene/util/ByteBlockPool.java | 2 +-
.../java/org/apache/lucene/util/FixedBitSet.java | 1 +
.../java/org/apache/lucene/util/NumericUtils.java | 30 +-
.../org/apache/lucene/util/SparseFixedBitSet.java | 61 +-
.../java/org/apache/lucene/util/StringHelper.java | 10 +-
.../org/apache/lucene/util/Unwrappable.java} | 28 +-
.../java/org/apache/lucene/util/VectorUtil.java | 47 +-
.../src/java/org/apache/lucene/util/Version.java | 14 +
.../lucene/util/automaton/LevenshteinAutomata.java | 12 +-
.../apache/lucene/util/bkd/BKDRadixSelector.java | 48 +-
.../java/org/apache/lucene/util/bkd/BKDUtil.java | 104 ++
.../java/org/apache/lucene/util/bkd/BKDWriter.java | 219 +---
.../apache/lucene/util/bkd/HeapPointReader.java | 6 +-
.../apache/lucene/util/bkd/HeapPointWriter.java | 6 +-
.../lucene/util/bkd/MutablePointsReaderUtils.java | 29 +-
.../apache/lucene/util/bkd/OfflinePointReader.java | 6 +-
.../java/org/apache/lucene/util/compress/LZ4.java | 9 +-
.../apache/lucene/util/fst/PositiveIntOutputs.java | 3 +-
.../org/apache/lucene/util/hnsw/HnswGraph.java | 11 +-
.../apache/lucene/util/hnsw/HnswGraphBuilder.java | 16 +-
.../apache/lucene/util/packed/DirectWriter.java | 95 +-
.../apache/lucene/analysis/TestCharArraySet.java | 2 +-
.../analysis/standard/TestStandardAnalyzer.java | 2 +-
.../org/apache/lucene/codecs/TestCodecUtil.java | 8 +-
.../lucene90/TestLucene90DocValuesFormat.java | 6 +-
.../test/org/apache/lucene/document/TestField.java | 28 +-
.../document/TestLatLonPointDistanceSort.java | 2 +-
.../apache/lucene/document/TestLatLonShape.java | 8 +-
.../lucene/document/TestPerFieldConsistency.java | 8 +-
.../org/apache/lucene/document/TestXYShape.java | 4 +-
.../test/org/apache/lucene/geo/TestGeoUtils.java | 6 +-
.../test/org/apache/lucene/geo/TestPolygon2D.java | 20 +-
.../apache/lucene/index/Test2BBinaryDocValues.java | 4 +-
.../apache/lucene/index/Test2BPostingsBytes.java | 2 +-
.../index/Test2BSortedDocValuesFixedSorted.java | 2 +-
.../lucene/index/Test2BSortedDocValuesOrds.java | 2 +-
.../lucene/index/TestAllFilesDetectTruncation.java | 2 +-
.../org/apache/lucene/index/TestCheckIndex.java | 155 +++
.../lucene/index/TestConcurrentMergeScheduler.java | 80 ++
.../apache/lucene/index/TestCustomTermFreq.java | 29 +-
.../lucene/index/TestDemoParallelLeafReader.java | 5 +-
.../apache/lucene/index/TestDocValuesIndexing.java | 70 +-
.../apache/lucene/index/TestDocsAndPositions.java | 14 +-
.../org/apache/lucene/index/TestDuelingCodecs.java | 2 +-
.../lucene/index/TestExitableDirectoryReader.java | 9 +-
.../apache/lucene/index/TestFilterCodecReader.java | 13 +-
.../apache/lucene/index/TestIndexFileDeleter.java | 6 +-
.../org/apache/lucene/index/TestIndexOptions.java | 7 +-
.../org/apache/lucene/index/TestIndexSorting.java | 6 +-
.../org/apache/lucene/index/TestIndexWriter.java | 4 +-
.../lucene/index/TestIndexWriterExceptions.java | 6 +-
.../lucene/index/TestIndexWriterMaxDocs.java | 8 +-
.../lucene/index/TestIndexWriterOnDiskFull.java | 2 +-
.../index/TestIndexWriterThreadsToSegments.java | 4 +-
.../lucene/index/TestNumericDocValuesUpdates.java | 20 +-
.../test/org/apache/lucene/index/TestPayloads.java | 2 +-
.../org/apache/lucene/index/TestPointValues.java | 14 +-
.../apache/lucene/index/TestPostingsOffsets.java | 2 +-
.../lucene/index/TestSameTokenSamePosition.java | 8 +-
.../org/apache/lucene/index/TestSegmentInfos.java | 54 +-
.../lucene/index/TestSegmentToThreadMapping.java | 3 +-
.../apache/lucene/index/TestSwappedIndexFiles.java | 7 +-
.../org/apache/lucene/index/TestTermsEnum.java | 36 +-
.../apache/lucene/index/TestTermsHashPerField.java | 28 +-
.../apache/lucene/index/TestTieredMergePolicy.java | 26 +-
.../apache/lucene/search/TestAutomatonQuery.java | 2 +-
.../org/apache/lucene/search/TestBooleanQuery.java | 40 +
.../search/TestBooleanQueryVisitSubscorers.java | 2 +-
.../org/apache/lucene/search/TestDateSort.java | 3 +-
.../apache/lucene/search/TestDocValuesQueries.java | 41 +-
.../lucene/search/TestDoubleValuesSource.java | 6 +-
.../org/apache/lucene/search/TestFilterWeight.java | 3 +-
.../org/apache/lucene/search/TestFuzzyQuery.java | 8 +-
.../apache/lucene/search/TestIndexSearcher.java | 21 +-
.../apache/lucene/search/TestKnnVectorQuery.java | 157 ++-
.../apache/lucene/search/TestLRUQueryCache.java | 26 +-
.../apache/lucene/search/TestLiveFieldValues.java | 5 +-
.../apache/lucene/search/TestLongValuesSource.java | 6 +-
.../apache/lucene/search/TestMinShouldMatch2.java | 20 +-
.../apache/lucene/search/TestMultiCollector.java | 63 ++
.../apache/lucene/search/TestMultiPhraseQuery.java | 4 +-
.../apache/lucene/search/TestMultiSliceMerge.java | 4 +-
.../lucene/search/TestMultiThreadTermVectors.java | 12 +-
.../org/apache/lucene/search/TestPhraseQuery.java | 2 +-
.../org/apache/lucene/search/TestPointQueries.java | 8 +-
.../apache/lucene/search/TestRegexpRandom2.java | 6 +-
.../lucene/search/TestSloppyPhraseQuery2.java | 2 +-
.../test/org/apache/lucene/search/TestSort.java | 25 +-
.../apache/lucene/search/TestSortOptimization.java | 258 ++++-
.../lucene/search/TestSortedNumericSortField.java | 17 +-
.../lucene/search/TestSortedSetSelector.java | 117 +-
.../lucene/search/TestSortedSetSortField.java | 38 +-
.../apache/lucene/search/TestTermInSetQuery.java | 45 +-
.../org/apache/lucene/search/TestTermQuery.java | 30 +
.../lucene/search/TestTimeLimitingCollector.java | 2 +-
.../org/apache/lucene/search/TestWildcard.java | 10 +-
.../lucene/search/TestXYPointDistanceSort.java | 2 +-
.../lucene/store/BaseDataOutputTestCase.java | 8 +-
.../apache/lucene/store/TestBufferedChecksum.java | 2 +-
.../lucene/store/TestByteArrayDataInput.java | 32 +
.../lucene/store/TestByteBuffersDataOutput.java | 78 +-
.../org/apache/lucene/store/TestMmapDirectory.java | 2 +-
.../store/TestMultiByteBuffersDirectory.java | 49 +
.../org/apache/lucene/store/TestMultiMMap.java | 315 +-----
.../lucene/store/TestOutputStreamIndexOutput.java | 54 +
.../test/org/apache/lucene/util/TestArrayUtil.java | 50 +
.../test/org/apache/lucene/util/TestBytesRef.java | 2 +-
.../org/apache/lucene/util/TestBytesRefHash.java | 12 +-
.../test/org/apache/lucene/util/TestCharsRef.java | 6 +-
.../test/org/apache/lucene/util/TestIntsRef.java | 2 +-
.../test/org/apache/lucene/util/TestLongsRef.java | 2 +-
.../org/apache/lucene/util/TestPagedBytes.java | 2 +-
.../org/apache/lucene/util/TestStringHelper.java | 52 +-
.../org/apache/lucene/util/TestUnicodeUtil.java | 18 +-
.../org/apache/lucene/util/TestVectorUtil.java | 27 +
.../test/org/apache/lucene/util/TestVersion.java | 13 +
.../lucene/util/automaton/TestAutomaton.java | 108 +-
.../util/automaton/TestDeterminizeLexicon.java | 2 +-
.../util/automaton/TestLevenshteinAutomata.java | 12 +-
.../apache/lucene/util/automaton/TestRegExp.java | 2 +-
.../lucene/util/automaton/TestUTF32ToUTF8.java | 2 +-
.../org/apache/lucene/util/bkd/TestBKDUtil.java | 136 +++
.../test/org/apache/lucene/util/fst/TestFSTs.java | 98 +-
.../org/apache/lucene/util/hnsw/TestHnswGraph.java | 41 +-
.../lucene/util/packed/TestDirectPacked.java | 6 +-
.../lucene/demo/facet/DistanceFacetsExample.java | 8 +-
.../src/generated/checksums/generateAntlr.json | 7 +
.../lucene/expressions/js/ExpressionMath.java | 46 +
.../lucene/expressions/js/JavascriptLexer.java | 349 +++---
.../lucene/expressions/js/JavascriptParser.java | 1185 ++++++++++++--------
.../lucene/expressions/js/JavascriptVisitor.java | 85 +-
.../apache/lucene/expressions/js/package-info.java | 2 +-
.../expressions/js/JavascriptCompiler.properties | 3 +-
.../lucene/expressions/TestDemoExpressions.java | 21 +-
.../lucene/expressions/TestExpressionSorts.java | 6 +-
.../lucene/expressions/js/TestExpressionMath.java | 70 ++
.../expressions/js/TestJavascriptFunction.java | 2 +-
.../org/apache/lucene/facet/DrillSideways.java | 14 +-
.../apache/lucene/facet/DrillSidewaysQuery.java | 17 +-
.../java/org/apache/lucene/facet/FacetUtils.java | 84 ++
.../java/org/apache/lucene/facet/FacetsConfig.java | 7 +-
.../apache/lucene/facet/LongValueFacetCounts.java | 14 +-
.../lucene/facet/StringValueFacetCounts.java | 16 +-
.../ConcurrentSortedSetDocValuesFacetCounts.java | 9 +-
.../sortedset/SortedSetDocValuesFacetCounts.java | 13 +-
.../lucene/facet/taxonomy/FloatTaxonomyFacets.java | 12 +-
.../facet/taxonomy/IntAssociationFacetField.java | 11 +-
.../lucene/facet/taxonomy/IntTaxonomyFacets.java | 12 +-
.../TaxonomyFacetSumFloatAssociations.java | 15 +-
.../taxonomy/TaxonomyFacetSumIntAssociations.java | 13 +-
.../lucene/facet/taxonomy/TaxonomyMergeUtils.java | 4 +-
.../lucene/facet/taxonomy/TaxonomyReader.java | 15 +
.../directory/DirectoryTaxonomyReader.java | 153 ++-
.../org/apache/lucene/facet/TestDrillSideways.java | 142 ++-
.../org/apache/lucene/facet/TestFacetUtils.java | 106 ++
.../lucene/facet/TestLongValueFacetCounts.java | 28 +
.../lucene/facet/TestStringValueFacetCounts.java | 35 +
.../sortedset/TestSortedSetDocValuesFacets.java | 837 +++++++-------
.../facet/taxonomy/TestTaxonomyCombined.java | 8 +-
.../facet/taxonomy/directory/TestAddTaxonomy.java | 2 +-
.../directory/TestBackwardsCompatibility.java | 22 +
.../directory/TestDirectoryTaxonomyReader.java | 78 +-
.../directory/TestDirectoryTaxonomyWriter.java | 3 +-
.../lucene/search/highlight/GradientFormatter.java | 2 +-
.../lucene/search/highlight/Highlighter.java | 4 +-
.../search/highlight/TermVectorLeafReader.java | 16 +-
.../lucene/search/matchhighlight/OffsetRange.java | 12 +-
.../search/matchhighlight/PassageSelector.java | 20 +-
.../search/uhighlight/DefaultPassageFormatter.java | 2 +-
.../apache/lucene/search/uhighlight/Passage.java | 8 +-
.../lucene/search/uhighlight/PassageFormatter.java | 2 +-
.../PostingsWithTermVectorsOffsetStrategy.java | 18 +-
.../uhighlight/TermVectorOffsetStrategy.java | 13 +-
.../uhighlight/TokenStreamOffsetStrategy.java | 2 +-
.../search/uhighlight/UnifiedHighlighter.java | 37 +-
.../lucene/search/highlight/TestHighlighter.java | 8 +-
.../matchhighlight/TestMatchHighlighter.java | 3 +-
.../search/matchhighlight/TestPassageSelector.java | 94 +-
.../search/uhighlight/TestUnifiedHighlighter.java | 52 +-
.../uhighlight/TestUnifiedHighlighterMTQ.java | 44 +-
.../uhighlight/TestUnifiedHighlighterRanking.java | 6 +-
.../TestUnifiedHighlighterTermIntervals.java | 52 +-
.../uhighlight/TestUnifiedHighlighterTermVec.java | 30 +-
lucene/licenses/jetty-NOTICE.txt | 32 +-
lucene/licenses/junit-LICENSE-CPL.txt | 24 +-
lucene/licenses/pddl-10.txt | 2 +-
lucene/luke/build.gradle | 7 +-
.../apache/lucene/index/memory/MemoryIndex.java | 23 +-
lucene/misc/native/build.gradle | 5 +-
.../org/apache/lucene/misc/IndexMergeTool.java | 4 +-
.../apache/lucene/misc/store/WindowsDirectory.java | 2 +-
.../misc/util/fst/UpToTwoPositiveIntOutputs.java | 3 +-
.../search/TestDiversifiedTopDocsCollector.java | 2 +-
lucene/packaging/build.gradle | 38 +-
.../queries/payloads/PayloadMatcherFactory.java | 12 +-
.../lucene/queries/function/FunctionTestSetup.java | 4 +-
.../queries/function/TestFieldScoreQuery.java | 2 +-
.../queries/function/TestFunctionScoreQuery.java | 2 +-
.../queries/function/TestIndexReaderFunctions.java | 8 +-
.../queries/function/TestLongNormValueSource.java | 6 +-
.../lucene/queries/function/TestValueSources.java | 6 +-
.../lucene/queries/intervals/TestIntervals.java | 4 +-
.../queries/payloads/TestPayloadExplanations.java | 2 +-
.../queries/spans/TestSpanSearchEquivalence.java | 32 +-
.../queryparser/classic/QueryParserBase.java | 27 +-
.../queryparser/simple/SimpleQueryParser.java | 6 +-
.../queryparser/xml/CorePlusQueriesParser.java | 2 +-
.../xml/builders/LikeThisQueryBuilder.java | 4 +-
.../classic/TestMultiFieldQueryParser.java | 2 +-
.../queryparser/classic/TestQueryParser.java | 30 +-
.../complexPhrase/TestComplexPhraseQuery.java | 2 +-
.../flexible/standard/TestMultiFieldQPHelper.java | 2 +-
.../flexible/standard/TestQPHelper.java | 6 +-
.../queryparser/simple/TestSimpleQueryParser.java | 2 +-
.../surround/query/Test01Exceptions.java | 2 +-
.../queryparser/surround/query/Test02Boolean.java | 2 +-
.../queryparser/surround/query/Test03Distance.java | 2 +-
.../queryparser/util/QueryParserTestBase.java | 10 +-
lucene/replicator/build.gradle | 8 +-
.../lucene/replicator/nrt/SimpleTransLog.java | 12 +-
.../codecs/idversion/IDVersionPostingsFormat.java | 19 +-
.../lucene/sandbox/document/BigIntegerPoint.java | 4 +-
.../lucene/sandbox/document/HalfFloatPoint.java | 10 +-
.../lucene/sandbox/search/CombinedFieldQuery.java | 6 +-
.../sandbox/search/QueryProfilerTimingType.java | 1 +
.../lucene/sandbox/search/QueryProfilerWeight.java | 11 +
.../search/TestQueryProfilerIndexSearcher.java | 41 +-
.../apache/lucene/spatial/StrategyTestCase.java | 2 +-
.../org/apache/lucene/spatial3d/Geo3DPoint.java | 4 +-
.../lucene/search/spell/DirectSpellChecker.java | 4 +-
.../lucene/search/spell/LevenshteinDistance.java | 6 +-
.../search/spell/LuceneLevenshteinDistance.java | 2 +-
.../apache/lucene/search/spell/NGramDistance.java | 6 +-
.../suggest/analyzing/AnalyzingSuggester.java | 2 +-
.../search/suggest/analyzing/FuzzySuggester.java | 2 +-
.../suggest/document/FuzzyCompletionQuery.java | 2 +-
.../search/suggest/fst/ExternalRefSorter.java | 65 +-
.../analyzing/TestAnalyzingInfixSuggester.java | 26 +-
.../suggest/analyzing/TestAnalyzingSuggester.java | 6 +-
.../analyzing/TestBlendedInfixSuggester.java | 18 +-
.../suggest/analyzing/TestFuzzySuggester.java | 10 +-
.../search/suggest/document/TestSuggestField.java | 2 +-
.../search/suggest/fst/TestBytesRefSorters.java | 34 +-
.../search/suggest/fst/TestWFSTCompletion.java | 2 +-
.../lucene/analysis/BaseTokenStreamTestCase.java | 134 +--
.../apache/lucene/analysis/CollationTestBase.java | 2 +-
.../org/apache/lucene/analysis/MockTokenizer.java | 8 +-
.../apache/lucene/analysis/VocabularyAssert.java | 2 +-
.../apache/lucene/geo/BaseGeoPointTestCase.java | 2 +-
.../org/apache/lucene/geo/BaseXYPointTestCase.java | 2 +-
.../java/org/apache/lucene/geo/GeoTestUtil.java | 40 +-
.../java/org/apache/lucene/geo/ShapeTestUtil.java | 8 +-
.../apache/lucene/index/AssertingLeafReader.java | 6 +
.../lucene/index/BaseCompoundFormatTestCase.java | 10 +-
.../lucene/index/BaseDocValuesFormatTestCase.java | 34 +-
.../lucene/index/BaseKnnVectorsFormatTestCase.java | 32 +-
.../lucene/index/BaseNormsFormatTestCase.java | 8 +-
.../index/BaseSegmentInfoFormatTestCase.java | 24 +-
.../index/BaseStoredFieldsFormatTestCase.java | 2 +-
.../apache/lucene/index/FieldFilterLeafReader.java | 23 +-
.../org/apache/lucene/mockfile/FilterPath.java | 13 +-
.../apache/lucene/search/AssertingBulkScorer.java | 19 +-
.../apache/lucene/search/AssertingScorable.java | 30 +-
.../org/apache/lucene/search/AssertingWeight.java | 9 +
.../java/org/apache/lucene/search/CheckHits.java | 2 +-
.../java/org/apache/lucene/search/QueryUtils.java | 18 +-
.../store/BaseChunkedDirectoryTestCase.java} | 256 ++---
.../apache/lucene/store/BaseDirectoryTestCase.java | 34 +-
.../apache/lucene/store/MockDirectoryWrapper.java | 6 +-
.../org/apache/lucene/util/BaseBitSetTestCase.java | 23 +
.../org/apache/lucene/util/LuceneTestCase.java | 20 +-
.../src/java/org/apache/lucene/util/TestUtil.java | 14 +-
.../TestCompressingStoredFieldsFormat.java | 10 +-
.../apache/lucene/mockfile/TestDisableFsyncFS.java | 2 +-
settings.gradle | 7 +
versions.lock | 14 +-
versions.props | 2 +-
629 files changed, 12237 insertions(+), 6010 deletions(-)
create mode 100644 gradle/generation/antlr.gradle
copy lucene/analysis/common/src/java/org/apache/lucene/analysis/{de/GermanAnalyzer.java => ne/NepaliAnalyzer.java} (54%)
copy lucene/{sandbox/src/java/org/apache/lucene/sandbox/search/QueryProfilerTimingType.java => analysis/common/src/java/org/apache/lucene/analysis/ne/package-info.java} (64%)
copy lucene/analysis/common/src/java/org/apache/lucene/analysis/{de/GermanAnalyzer.java => ta/TamilAnalyzer.java} (54%)
copy lucene/{sandbox/src/java/org/apache/lucene/sandbox/search/QueryProfilerTimingType.java => analysis/common/src/java/org/apache/lucene/analysis/ta/package-info.java} (64%)
copy lucene/analysis/common/src/java/org/apache/lucene/analysis/{de/GermanAnalyzer.java => te/TeluguAnalyzer.java} (50%)
copy lucene/analysis/common/src/java/org/apache/lucene/analysis/{core/DecimalDigitFilter.java => te/TeluguNormalizationFilter.java} (54%)
create mode 100644 lucene/analysis/common/src/java/org/apache/lucene/analysis/te/TeluguNormalizationFilterFactory.java
create mode 100644 lucene/analysis/common/src/java/org/apache/lucene/analysis/te/TeluguNormalizer.java
copy lucene/analysis/common/src/java/org/apache/lucene/analysis/{core/DecimalDigitFilter.java => te/TeluguStemFilter.java} (52%)
create mode 100644 lucene/analysis/common/src/java/org/apache/lucene/analysis/te/TeluguStemFilterFactory.java
create mode 100644 lucene/analysis/common/src/java/org/apache/lucene/analysis/te/TeluguStemmer.java
copy lucene/{sandbox/src/java/org/apache/lucene/sandbox/search/QueryProfilerTimingType.java => analysis/common/src/java/org/apache/lucene/analysis/te/package-info.java} (64%)
create mode 100644 lucene/analysis/common/src/resources/org/apache/lucene/analysis/ne/stopwords.txt
create mode 100644 lucene/analysis/common/src/resources/org/apache/lucene/analysis/ta/stopwords.txt
create mode 100644 lucene/analysis/common/src/resources/org/apache/lucene/analysis/te/stopwords.txt
create mode 100644 lucene/analysis/common/src/test/org/apache/lucene/analysis/ne/TestNepaliAnalyzer.java
create mode 100644 lucene/analysis/common/src/test/org/apache/lucene/analysis/ta/TestTamilAnalyzer.java
create mode 100644 lucene/analysis/common/src/test/org/apache/lucene/analysis/te/TestTeluguAnalyzer.java
create mode 100644 lucene/analysis/common/src/test/org/apache/lucene/analysis/te/TestTeluguFilters.java
create mode 100644 lucene/analysis/common/src/test/org/apache/lucene/analysis/te/TestTeluguNormalizer.java
create mode 100644 lucene/analysis/common/src/test/org/apache/lucene/analysis/te/TestTeluguStemmer.java
create mode 100644 lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseCompletionAnalyzer.java
create mode 100644 lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseCompletionFilter.java
create mode 100644 lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseCompletionFilterFactory.java
create mode 100644 lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/completion/CharSequenceUtils.java
create mode 100644 lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/completion/KatakanaRomanizer.java
copy lucene/{sandbox/src/java/org/apache/lucene/sandbox/search/QueryProfilerTimingType.java => analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/completion/package-info.java} (64%)
create mode 100644 lucene/analysis/kuromoji/src/resources/org/apache/lucene/analysis/ja/completion/romaji_map.txt
create mode 100644 lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseCompletionAnalyzer.java
create mode 100644 lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseCompletionFilter.java
create mode 100644 lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseCompletionFilterFactory.java
create mode 100644 lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/completion/TestKatakanaRomanizer.java
create mode 100644 lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.8.10.0-cfs.zip
create mode 100644 lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.8.10.0-nocfs.zip
create mode 100644 lucene/backward-codecs/src/test/org/apache/lucene/backward_index/sorted.8.10.0.zip
create mode 100644 lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSkipReader.java
create mode 100644 lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSkipWriter.java
delete mode 100644 lucene/core/src/java/org/apache/lucene/index/TermVectors.java
copy lucene/core/src/{test/org/apache/lucene/store/TestByteArrayDataInput.java => java/org/apache/lucene/util/Unwrappable.java} (59%)
create mode 100644 lucene/core/src/java/org/apache/lucene/util/bkd/BKDUtil.java
create mode 100644 lucene/core/src/test/org/apache/lucene/store/TestMultiByteBuffersDirectory.java
create mode 100644 lucene/core/src/test/org/apache/lucene/store/TestOutputStreamIndexOutput.java
create mode 100644 lucene/core/src/test/org/apache/lucene/util/bkd/TestBKDUtil.java
create mode 100644 lucene/expressions/src/generated/checksums/generateAntlr.json
create mode 100644 lucene/expressions/src/java/org/apache/lucene/expressions/js/ExpressionMath.java
create mode 100644 lucene/expressions/src/test/org/apache/lucene/expressions/js/TestExpressionMath.java
create mode 100644 lucene/facet/src/java/org/apache/lucene/facet/FacetUtils.java
create mode 100644 lucene/facet/src/test/org/apache/lucene/facet/TestFacetUtils.java
copy lucene/{core/src/test/org/apache/lucene/store/TestMultiMMap.java => test-framework/src/java/org/apache/lucene/store/BaseChunkedDirectoryTestCase.java} (53%)
[lucene] 02/02: Format changes
Posted by ma...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
mayya pushed a commit to branch hnsw
in repository https://gitbox.apache.org/repos/asf/lucene.git
commit 43083edfdb41fe650da0201cb080a6d3e13056d3
Author: Mayya Sharipova <ma...@elastic.co>
AuthorDate: Mon Oct 11 19:49:29 2021 -0400
Format changes
---
lucene/core/src/test/org/apache/lucene/util/hnsw/TestHnswGraph.java | 1 -
1 file changed, 1 deletion(-)
diff --git a/lucene/core/src/test/org/apache/lucene/util/hnsw/TestHnswGraph.java b/lucene/core/src/test/org/apache/lucene/util/hnsw/TestHnswGraph.java
index 9fd6838..8b05096 100644
--- a/lucene/core/src/test/org/apache/lucene/util/hnsw/TestHnswGraph.java
+++ b/lucene/core/src/test/org/apache/lucene/util/hnsw/TestHnswGraph.java
@@ -530,7 +530,6 @@ public class TestHnswGraph extends LuceneTestCase {
}
}
-
/**
* Generate a random bitset where before startIndex all bits are set, and after startIndex each
* entry has a 2/3 probability of being set.
[lucene] 01/02: Merge remote-tracking branch 'upstream/main' into
hnsw
Posted by ma...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
mayya pushed a commit to branch hnsw
in repository https://gitbox.apache.org/repos/asf/lucene.git
commit 3b4296d57cf15c7afea49462937f8b2bc7f02aab
Merge: 5e42fc2 f67dec1
Author: Mayya Sharipova <ma...@elastic.co>
AuthorDate: Mon Oct 11 19:25:44 2021 -0400
Merge remote-tracking branch 'upstream/main' into hnsw
README.md | 5 +-
build.gradle | 7 +-
dev-tools/doap/lucene.rdf | 7 +
dev-tools/scripts/README.md | 17 +-
dev-tools/scripts/addVersion.py | 51 +-
dev-tools/scripts/buildAndPushRelease.py | 31 +-
dev-tools/scripts/githubPRs.py | 16 +-
dev-tools/scripts/poll-mirrors.py | 4 +-
dev-tools/scripts/releaseWizard.py | 59 +-
dev-tools/scripts/releaseWizard.yaml | 327 ++----
dev-tools/scripts/releasedJirasRegex.py | 16 +-
dev-tools/scripts/scriptutil.py | 2 +-
gradle/documentation/render-javadoc.gradle | 19 +-
gradle/generation/antlr.gradle | 88 ++
gradle/generation/javacc.gradle | 16 +-
gradle/generation/jflex.gradle | 2 +-
gradle/generation/jflex/skeleton.default.txt | 6 +-
.../jflex/skeleton.disable.buffer.expansion.txt | 4 +-
gradle/generation/local-settings.gradle | 62 +-
gradle/generation/regenerate.gradle | 21 +
gradle/generation/snowball/snowball.patch | 6 +-
gradle/publishing/distribution.gradle | 3 +-
gradle/testing/alternative-jdk-support.gradle | 63 +-
gradle/testing/defaults-tests.gradle | 7 +-
gradle/validation/check-environment.gradle | 2 +-
gradle/validation/rat-sources.gradle | 10 +-
gradle/validation/spotless.gradle | 5 +-
gradle/validation/validate-source-patterns.gradle | 53 +-
gradle/wrapper/gradle-wrapper.jar.sha256 | 2 +-
gradle/wrapper/gradle-wrapper.jar.version | 2 +-
gradle/wrapper/gradle-wrapper.properties | 2 +-
gradlew.bat | 24 +-
lucene/CHANGES.txt | 147 ++-
lucene/MIGRATE.md | 11 +
.../checksums/generateClassicTokenizer.json | 4 +-
.../checksums/generateHTMLStripCharFilter.json | 6 +-
.../checksums/generateUAX29URLEmailTokenizer.json | 4 +-
.../checksums/generateWikipediaTokenizer.json | 4 +-
.../common/src/generated/checksums/snowball.json | 2 +-
.../lucene/analysis/ar/ArabicNormalizer.java | 2 +-
.../apache/lucene/analysis/ar/ArabicStemmer.java | 14 +-
.../lucene/analysis/bg/BulgarianStemmer.java | 6 +-
.../lucene/analysis/bn/BengaliNormalizer.java | 2 +-
.../apache/lucene/analysis/bn/BengaliStemmer.java | 2 +-
.../lucene/analysis/charfilter/BaseCharFilter.java | 4 +-
.../analysis/charfilter/HTMLStripCharFilter.java | 8 +-
.../analysis/charfilter/HTMLStripCharFilter.jflex | 2 +-
.../lucene/analysis/cjk/CJKBigramFilter.java | 12 +-
.../lucene/analysis/cjk/CJKWidthCharFilter.java | 6 +-
.../apache/lucene/analysis/cjk/CJKWidthFilter.java | 10 +-
.../lucene/analysis/ckb/SoraniNormalizer.java | 2 +-
.../apache/lucene/analysis/ckb/SoraniStemmer.java | 2 +-
.../analysis/classic/ClassicTokenizerImpl.java | 6 +-
.../analysis/commongrams/CommonGramsFilter.java | 2 +-
.../compound/hyphenation/PatternParser.java | 2 +-
.../analysis/compound/hyphenation/TernaryTree.java | 4 +-
.../lucene/analysis/core/DecimalDigitFilter.java | 2 +-
.../apache/lucene/analysis/cz/CzechStemmer.java | 8 +-
.../apache/lucene/analysis/de/GermanAnalyzer.java | 7 +
.../lucene/analysis/de/GermanLightStemmer.java | 6 +-
.../lucene/analysis/de/GermanMinimalStemmer.java | 2 +-
.../analysis/de/GermanNormalizationFilter.java | 2 +-
.../apache/lucene/analysis/el/GreekStemmer.java | 54 +-
.../analysis/email/UAX29URLEmailTokenizerImpl.java | 4 +-
.../lucene/analysis/en/EnglishMinimalStemmer.java | 2 +-
.../lucene/analysis/es/SpanishLightStemmer.java | 2 +-
.../lucene/analysis/es/SpanishMinimalStemmer.java | 2 +-
.../lucene/analysis/fa/PersianNormalizer.java | 2 +-
.../lucene/analysis/fi/FinnishLightStemmer.java | 12 +-
.../lucene/analysis/fr/FrenchLightStemmer.java | 4 +-
.../lucene/analysis/fr/FrenchMinimalStemmer.java | 2 +-
.../lucene/analysis/gl/GalicianMinimalStemmer.java | 2 +-
.../apache/lucene/analysis/gl/GalicianStemmer.java | 2 +-
.../apache/lucene/analysis/hi/HindiNormalizer.java | 2 +-
.../apache/lucene/analysis/hi/HindiStemmer.java | 2 +-
.../lucene/analysis/hu/HungarianLightStemmer.java | 10 +-
.../hunspell/HunspellStemFilterFactory.java | 2 +-
.../analysis/hunspell/ISO8859_14Decoder.java | 2 +-
.../lucene/analysis/id/IndonesianStemmer.java | 14 +-
.../apache/lucene/analysis/in/IndicNormalizer.java | 6 +-
.../lucene/analysis/it/ItalianLightStemmer.java | 2 +-
.../apache/lucene/analysis/lv/LatvianStemmer.java | 10 +-
.../lucene/analysis/minhash/MinHashFilter.java | 6 +-
.../analysis/miscellaneous/ASCIIFoldingFilter.java | 2 +-
.../analysis/miscellaneous/FingerprintFilter.java | 8 +-
.../miscellaneous/HyphenatedWordsFilter.java | 2 +-
.../miscellaneous/RemoveDuplicatesTokenFilter.java | 4 +-
.../miscellaneous/WordDelimiterFilter.java | 12 +-
.../miscellaneous/WordDelimiterFilterFactory.java | 2 +-
.../miscellaneous/WordDelimiterGraphFilter.java | 2 +-
.../WordDelimiterGraphFilterFactory.java | 2 +-
.../miscellaneous/WordDelimiterIterator.java | 4 +-
.../GermanAnalyzer.java => ne/NepaliAnalyzer.java} | 99 +-
.../apache/lucene/analysis/ne/package-info.java} | 23 +-
.../lucene/analysis/no/NorwegianLightStemmer.java | 2 +-
.../analysis/no/NorwegianMinimalStemmer.java | 2 +-
.../lucene/analysis/payloads/PayloadHelper.java | 18 +-
.../lucene/analysis/pt/PortugueseLightStemmer.java | 6 +-
.../analysis/pt/PortugueseMinimalStemmer.java | 2 +-
.../lucene/analysis/pt/PortugueseStemmer.java | 2 +-
.../apache/lucene/analysis/pt/RSLPStemmerBase.java | 24 +-
.../lucene/analysis/ru/RussianLightStemmer.java | 6 +-
.../lucene/analysis/snowball/SnowballFilter.java | 4 +-
.../analysis/sr/SerbianNormalizationFilter.java | 2 +-
.../sr/SerbianNormalizationRegularFilter.java | 2 +-
.../lucene/analysis/sv/SwedishLightStemmer.java | 2 +-
.../lucene/analysis/sv/SwedishMinimalStemmer.java | 2 +-
.../lucene/analysis/synonym/SolrSynonymParser.java | 8 +-
.../apache/lucene/analysis/synonym/SynonymMap.java | 2 +-
.../analysis/synonym/WordnetSynonymParser.java | 4 +-
.../GermanAnalyzer.java => ta/TamilAnalyzer.java} | 98 +-
.../apache/lucene/analysis/ta/package-info.java} | 23 +-
.../GermanAnalyzer.java => te/TeluguAnalyzer.java} | 103 +-
.../TeluguNormalizationFilter.java} | 45 +-
.../te/TeluguNormalizationFilterFactory.java | 64 ++
.../lucene/analysis/te/TeluguNormalizer.java | 116 ++
.../TeluguStemFilter.java} | 36 +-
.../analysis/te/TeluguStemFilterFactory.java | 58 +
.../apache/lucene/analysis/te/TeluguStemmer.java | 64 ++
.../apache/lucene/analysis/te/package-info.java} | 23 +-
.../lucene/analysis/tr/TurkishLowerCaseFilter.java | 4 +-
.../lucene/analysis/util/CharArrayIterator.java | 4 +-
.../lucene/analysis/util/OpenStringBuilder.java | 8 +-
.../analysis/util/SegmentingTokenizerBase.java | 2 +-
.../apache/lucene/analysis/util/StemmerUtil.java | 10 +-
.../analysis/wikipedia/WikipediaTokenizerImpl.java | 6 +-
.../org/tartarus/snowball/SnowballProgram.java | 6 +-
.../org.apache.lucene.analysis.TokenFilterFactory | 2 +
.../org/apache/lucene/analysis/ne/stopwords.txt | 256 +++++
.../org/apache/lucene/analysis/ta/stopwords.txt | 126 +++
.../org/apache/lucene/analysis/te/stopwords.txt | 51 +
.../apache/lucene/analysis/core/TestAnalyzers.java | 4 +-
.../lucene/analysis/core/TestBugInSomething.java | 2 +-
.../lucene/analysis/core/TestRandomChains.java | 10 +-
.../lucene/analysis/core/TestStopAnalyzer.java | 2 +-
.../lucene/analysis/core/TestTypeTokenFilter.java | 2 +-
.../analysis/email/TestUAX29URLEmailAnalyzer.java | 2 +-
.../analysis/email/TestUAX29URLEmailTokenizer.java | 2 +-
.../miscellaneous/TestCapitalizationFilter.java | 4 +-
.../lucene/analysis/ne/TestNepaliAnalyzer.java | 76 ++
.../query/TestQueryAutoStopWordAnalyzer.java | 4 +-
.../lucene/analysis/shingle/TestShingleFilter.java | 6 +-
.../lucene/analysis/ta/TestTamilAnalyzer.java | 67 ++
.../lucene/analysis/te/TestTeluguAnalyzer.java | 57 +
.../lucene/analysis/te/TestTeluguFilters.java | 79 ++
.../lucene/analysis/te/TestTeluguNormalizer.java | 60 +
.../lucene/analysis/te/TestTeluguStemmer.java | 59 +
.../analysis/util/TestCharArrayIterator.java | 10 +-
.../analysis/util/TestSegmentingTokenizerBase.java | 6 +-
.../analysis/wikipedia/TestWikipediaTokenizer.java | 2 +-
.../lucene/analysis/icu/ICUTransformFilter.java | 4 +-
.../icu/segmentation/BreakIteratorWrapper.java | 4 +-
.../icu/segmentation/CharArrayIterator.java | 4 +-
.../icu/segmentation/CompositeBreakIterator.java | 6 +-
.../analysis/icu/segmentation/ICUTokenizer.java | 2 +-
.../icu/segmentation/ICUTokenizerFactory.java | 2 +-
.../analysis/icu/segmentation/ScriptIterator.java | 6 +-
.../icu/segmentation/TestCharArrayIterator.java | 2 +-
.../icu/segmentation/TestICUTokenizer.java | 8 +-
.../analysis/icu/GenerateUTR30DataFiles.java | 2 +-
.../lucene/analysis/icu/RBBIRuleCompiler.java | 2 +-
.../lucene/analysis/ja/JapaneseAnalyzer.java | 2 +-
.../analysis/ja/JapaneseCompletionAnalyzer.java | 65 ++
.../analysis/ja/JapaneseCompletionFilter.java | 272 +++++
.../ja/JapaneseCompletionFilterFactory.java | 66 ++
.../ja/JapanesePartOfSpeechStopFilterFactory.java | 2 +-
.../analysis/ja/completion/CharSequenceUtils.java | 91 ++
.../analysis/ja/completion/KatakanaRomanizer.java | 193 ++++
.../analysis/ja/completion/package-info.java} | 23 +-
.../lucene/analysis/ja/dict/BinaryDictionary.java | 12 +-
.../apache/lucene/analysis/ja/dict/Dictionary.java | 6 +-
.../lucene/analysis/ja/dict/TokenInfoFST.java | 4 +-
.../lucene/analysis/ja/dict/UnknownDictionary.java | 2 +-
.../lucene/analysis/ja/dict/UserDictionary.java | 10 +-
.../org.apache.lucene.analysis.TokenFilterFactory | 1 +
.../lucene/analysis/ja/completion/romaji_map.txt | 344 ++++++
.../analysis/ja/dict/TokenInfoDictionary$fst.dat | Bin 1686422 -> 1686422 bytes
.../ja/TestJapaneseCompletionAnalyzer.java | 72 ++
.../analysis/ja/TestJapaneseCompletionFilter.java | 271 +++++
.../ja/TestJapaneseCompletionFilterFactory.java | 59 +
.../ja/completion/TestKatakanaRomanizer.java | 68 ++
.../analysis/morfologik/MorfologikFilter.java | 2 +-
.../apache/lucene/analysis/ko/KoreanTokenizer.java | 18 +-
.../analysis/ko/dict/TokenInfoDictionary$fst.dat | Bin 5632456 -> 5632456 bytes
.../analysis/phonetic/TestPhoneticFilter.java | 4 +-
.../analysis/cn/smart/hhmm/AbstractDictionary.java | 4 +-
.../analysis/cn/smart/hhmm/BigramDictionary.java | 6 +-
.../cn/smart/TestSmartChineseAnalyzer.java | 18 +-
.../src/java/org/egothor/stemmer/Compile.java | 2 +-
.../stempel/src/java/org/egothor/stemmer/Diff.java | 6 +-
.../src/java/org/egothor/stemmer/Gener.java | 4 +-
.../stempel/src/java/org/egothor/stemmer/Lift.java | 2 +-
.../src/java/org/egothor/stemmer/MultiTrie2.java | 8 +-
.../src/java/org/egothor/stemmer/Optimizer.java | 2 +-
.../src/java/org/egothor/stemmer/Reduce.java | 6 +-
.../stempel/src/java/org/egothor/stemmer/Trie.java | 4 +-
.../src/test/org/egothor/stemmer/TestStemmer.java | 26 +-
.../lucene50/Lucene50LiveDocsFormat.java | 2 +-
.../lucene50/Lucene50SkipReader.java | 10 +-
.../lucene84/Lucene84SkipReader.java | 10 +-
.../lucene87/LZ4WithPresetDictCompressionMode.java | 4 +-
.../lucene87/Lucene87StoredFieldsFormat.java | 4 +-
.../backward_codecs/packed/LegacyDirectWriter.java | 2 +-
.../lucene50/TestBlockPostingsFormat3.java | 2 +-
.../lucene70/TestLucene70DocValuesFormat.java | 6 +-
.../BaseLucene80DocValuesFormatTestCase.java | 6 +-
.../lucene87/Lucene87RWStoredFieldsFormat.java | 2 -
.../packed/TestLegacyDirectPacked.java | 6 +-
.../backward_index/TestBackwardsCompatibility.java | 9 +-
.../backward_index/TestManyPointsInOldIndex.java | 2 +-
.../lucene/backward_index/index.8.10.0-cfs.zip | Bin 0 -> 16967 bytes
.../lucene/backward_index/index.8.10.0-nocfs.zip | Bin 0 -> 16973 bytes
.../apache/lucene/backward_index/sorted.8.10.0.zip | Bin 0 -> 142502 bytes
.../byTask/feeds/SimpleSloppyPhraseQueryMaker.java | 2 +-
.../benchmark/byTask/feeds/TrecContentSource.java | 2 +-
.../benchmark/byTask/feeds/TrecDocParser.java | 2 +-
.../benchmark/byTask/tasks/AddIndexesTask.java | 2 +-
.../lucene/benchmark/byTask/tasks/PerfTask.java | 2 +-
.../lucene/benchmark/byTask/tasks/ReportTask.java | 2 +-
.../benchmark/byTask/tasks/TaskSequence.java | 2 +-
.../lucene/benchmark/byTask/utils/Config.java | 30 +-
.../lucene/benchmark/byTask/utils/Format.java | 2 +-
.../org/apache/lucene/benchmark/quality/Judge.java | 2 +-
.../lucene/benchmark/quality/QualityBenchmark.java | 8 +-
.../lucene/benchmark/quality/QualityStats.java | 2 +-
.../lucene/benchmark/quality/trec/QueryDriver.java | 4 +-
.../benchmark/quality/trec/Trec1MQReader.java | 2 +-
.../benchmark/quality/trec/TrecTopicsReader.java | 2 +-
.../quality/utils/QualityQueriesFinder.java | 8 +-
.../benchmark/quality/utils/SimpleQQParser.java | 4 +-
.../benchmark/quality/utils/SubmissionReport.java | 2 +-
.../apache/lucene/benchmark/BenchmarkTestCase.java | 2 +-
.../benchmark/byTask/TestPerfTasksLogic.java | 42 +-
.../lucene/benchmark/quality/TestQualityRun.java | 8 +-
.../org/apache/lucene/codecs/bloom/FuzzySet.java | 2 +-
.../lucene/codecs/memory/FSTTermsReader.java | 2 +-
.../simpletext/SimpleTextCompoundFormat.java | 14 +-
.../simpletext/SimpleTextDocValuesReader.java | 8 +-
.../simpletext/SimpleTextFieldInfosFormat.java | 2 +-
.../codecs/simpletext/SimpleTextFieldsReader.java | 184 ++-
.../codecs/simpletext/SimpleTextFieldsWriter.java | 53 +-
.../simpletext/SimpleTextKnnVectorsReader.java | 31 +-
.../codecs/simpletext/SimpleTextSkipReader.java | 206 ++++
.../codecs/simpletext/SimpleTextSkipWriter.java | 157 +++
.../simpletext/SimpleTextStoredFieldsReader.java | 4 +-
.../simpletext/SimpleTextTermVectorsReader.java | 16 +-
.../checksums/generateStandardTokenizer.json | 4 +-
.../apache/lucene/analysis/AnalysisSPILoader.java | 6 +-
.../org/apache/lucene/analysis/WordlistLoader.java | 2 +-
.../analysis/standard/StandardTokenizerImpl.java | 4 +-
.../java/org/apache/lucene/codecs/CodecUtil.java | 4 +-
.../apache/lucene/codecs/DocValuesConsumer.java | 259 +++--
.../lucene/codecs/MultiLevelSkipListReader.java | 34 +-
.../lucene/codecs/MultiLevelSkipListWriter.java | 24 +-
.../apache/lucene/codecs/SegmentInfoFormat.java | 2 +-
.../apache/lucene/codecs/TermVectorsReader.java | 12 +-
.../lucene90/LZ4WithPresetDictCompressionMode.java | 4 +-
.../codecs/lucene90/Lucene90DocValuesConsumer.java | 77 +-
.../codecs/lucene90/Lucene90DocValuesProducer.java | 113 ++
.../codecs/lucene90/Lucene90FieldInfosFormat.java | 11 +-
.../codecs/lucene90/Lucene90HnswVectorsFormat.java | 2 +-
.../codecs/lucene90/Lucene90HnswVectorsReader.java | 19 +-
.../codecs/lucene90/Lucene90LiveDocsFormat.java | 2 +-
.../codecs/lucene90/Lucene90PostingsWriter.java | 9 -
.../lucene/codecs/lucene90/Lucene90SkipReader.java | 10 +-
.../lucene90/Lucene90StoredFieldsFormat.java | 4 +-
.../apache/lucene/codecs/lucene90/PForUtil.java | 31 +-
.../org/apache/lucene/document/DoublePoint.java | 4 +-
.../org/apache/lucene/document/FloatPoint.java | 4 +-
.../apache/lucene/document/InetAddressPoint.java | 6 +-
.../java/org/apache/lucene/document/IntPoint.java | 4 +-
.../org/apache/lucene/document/KnnVectorField.java | 5 +-
.../org/apache/lucene/document/LatLonPoint.java | 2 +-
.../document/LatLonPointDistanceFeatureQuery.java | 8 +-
.../lucene/document/LatLonPointDistanceQuery.java | 70 +-
.../apache/lucene/document/LatLonPointQuery.java | 25 +-
.../document/LatLonShapeBoundingBoxQuery.java | 83 +-
.../lucene/document/LongDistanceFeatureQuery.java | 24 +-
.../java/org/apache/lucene/document/LongPoint.java | 4 +-
.../org/apache/lucene/document/XYPointField.java | 2 +-
.../src/java/org/apache/lucene/geo/EdgeTree.java | 4 +-
.../src/java/org/apache/lucene/geo/Polygon2D.java | 4 +-
.../apache/lucene/index/BaseCompositeReader.java | 27 +-
.../org/apache/lucene/index/ByteSliceReader.java | 7 +-
.../java/org/apache/lucene/index/CheckIndex.java | 1028 +++++++++++------
.../java/org/apache/lucene/index/CodecReader.java | 13 +-
.../lucene/index/ConcurrentMergeScheduler.java | 25 +-
.../java/org/apache/lucene/index/DocIDMerger.java | 3 +-
.../java/org/apache/lucene/index/DocValues.java | 89 +-
.../apache/lucene/index/DocValuesLeafReader.java | 2 +-
.../org/apache/lucene/index/FilterLeafReader.java | 5 +-
.../lucene/index/FreqProxTermsWriterPerField.java | 10 +-
.../apache/lucene/index/FrozenBufferedUpdates.java | 2 +-
.../org/apache/lucene/index/IndexFileNames.java | 2 +-
.../java/org/apache/lucene/index/IndexReader.java | 14 +-
.../org/apache/lucene/index/IndexingChain.java | 84 +-
.../java/org/apache/lucene/index/MergePolicy.java | 2 +-
.../apache/lucene/index/MergeReaderWrapper.java | 19 +-
.../java/org/apache/lucene/index/MultiFields.java | 2 +-
.../java/org/apache/lucene/index/MultiSorter.java | 2 +-
.../org/apache/lucene/index/NormValuesWriter.java | 2 +-
.../java/org/apache/lucene/index/OrdinalMap.java | 6 +-
.../apache/lucene/index/ParallelLeafReader.java | 31 +-
.../java/org/apache/lucene/index/PointValues.java | 25 +-
.../apache/lucene/index/SegmentCoreReaders.java | 17 +-
.../java/org/apache/lucene/index/SegmentInfo.java | 15 +
.../java/org/apache/lucene/index/SegmentInfos.java | 8 +-
.../org/apache/lucene/index/SegmentReader.java | 16 +-
.../lucene/index/SortedSetDocValuesWriter.java | 13 +-
.../java/org/apache/lucene/index/TermVectors.java | 33 -
.../lucene/index/VectorSimilarityFunction.java | 42 +-
.../org/apache/lucene/search/BooleanScorer.java | 2 +-
.../apache/lucene/search/ConstantScoreQuery.java | 5 +
.../apache/lucene/search/FieldValueHitQueue.java | 5 -
.../org/apache/lucene/search/IndexSearcher.java | 94 +-
.../org/apache/lucene/search/LRUQueryCache.java | 7 +-
.../apache/lucene/search/MatchAllDocsQuery.java | 5 +
.../org/apache/lucene/search/MatchNoDocsQuery.java | 5 +
.../org/apache/lucene/search/MultiCollector.java | 28 +-
.../java/org/apache/lucene/search/PhraseQuery.java | 4 +-
.../org/apache/lucene/search/PointInSetQuery.java | 22 +-
.../org/apache/lucene/search/PointRangeQuery.java | 75 +-
.../org/apache/lucene/search/ScoringRewrite.java | 2 +-
.../src/java/org/apache/lucene/search/Sort.java | 84 +-
.../java/org/apache/lucene/search/SortField.java | 78 +-
.../lucene/search/SortedNumericSortField.java | 101 +-
.../org/apache/lucene/search/SynonymQuery.java | 4 +-
.../java/org/apache/lucene/search/TermQuery.java | 16 +
.../apache/lucene/search/TopFieldCollector.java | 11 +-
.../src/java/org/apache/lucene/search/Weight.java | 48 +-
.../lucene/search/comparators/DocComparator.java | 2 +-
.../search/comparators/NumericComparator.java | 54 +-
.../lucene/search/similarities/BM25Similarity.java | 2 +-
.../search/similarities/MultiSimilarity.java | 10 +-
.../lucene/search/similarities/SimilarityBase.java | 2 +-
.../search/similarities/TFIDFSimilarity.java | 2 +-
.../org/apache/lucene/store/BufferedChecksum.java | 2 +-
.../apache/lucene/store/ByteArrayDataInput.java | 40 +-
.../apache/lucene/store/ByteArrayDataOutput.java | 22 +
.../apache/lucene/store/ByteBufferIndexInput.java | 4 +-
.../apache/lucene/store/ByteBuffersDataInput.java | 86 +-
.../apache/lucene/store/ByteBuffersDataOutput.java | 48 +-
.../apache/lucene/store/ByteBuffersIndexInput.java | 6 +
.../java/org/apache/lucene/store/DataInput.java | 11 +-
.../java/org/apache/lucene/store/DataOutput.java | 13 +-
.../org/apache/lucene/store/MMapDirectory.java | 2 +-
.../lucene/store/OutputStreamIndexOutput.java | 61 +-
.../org/apache/lucene/store/RandomAccessInput.java | 10 +-
.../lucene/store/RateLimitedIndexOutput.java | 21 +
.../src/java/org/apache/lucene/util/ArrayUtil.java | 38 +
.../src/java/org/apache/lucene/util/BitSet.java | 3 +
.../src/java/org/apache/lucene/util/BitUtil.java | 92 +-
.../java/org/apache/lucene/util/ByteBlockPool.java | 2 +-
.../java/org/apache/lucene/util/FixedBitSet.java | 1 +
.../java/org/apache/lucene/util/NumericUtils.java | 30 +-
.../org/apache/lucene/util/SparseFixedBitSet.java | 61 +-
.../java/org/apache/lucene/util/StringHelper.java | 10 +-
.../org/apache/lucene/util/Unwrappable.java} | 28 +-
.../java/org/apache/lucene/util/VectorUtil.java | 47 +-
.../src/java/org/apache/lucene/util/Version.java | 14 +
.../lucene/util/automaton/LevenshteinAutomata.java | 12 +-
.../apache/lucene/util/bkd/BKDRadixSelector.java | 48 +-
.../java/org/apache/lucene/util/bkd/BKDUtil.java | 104 ++
.../java/org/apache/lucene/util/bkd/BKDWriter.java | 219 +---
.../apache/lucene/util/bkd/HeapPointReader.java | 6 +-
.../apache/lucene/util/bkd/HeapPointWriter.java | 6 +-
.../lucene/util/bkd/MutablePointsReaderUtils.java | 29 +-
.../apache/lucene/util/bkd/OfflinePointReader.java | 6 +-
.../java/org/apache/lucene/util/compress/LZ4.java | 9 +-
.../apache/lucene/util/fst/PositiveIntOutputs.java | 3 +-
.../org/apache/lucene/util/hnsw/HnswGraph.java | 11 +-
.../apache/lucene/util/hnsw/HnswGraphBuilder.java | 16 +-
.../apache/lucene/util/packed/DirectWriter.java | 95 +-
.../apache/lucene/analysis/TestCharArraySet.java | 2 +-
.../analysis/standard/TestStandardAnalyzer.java | 2 +-
.../org/apache/lucene/codecs/TestCodecUtil.java | 8 +-
.../lucene90/TestLucene90DocValuesFormat.java | 6 +-
.../test/org/apache/lucene/document/TestField.java | 28 +-
.../document/TestLatLonPointDistanceSort.java | 2 +-
.../apache/lucene/document/TestLatLonShape.java | 8 +-
.../lucene/document/TestPerFieldConsistency.java | 8 +-
.../org/apache/lucene/document/TestXYShape.java | 4 +-
.../test/org/apache/lucene/geo/TestGeoUtils.java | 6 +-
.../test/org/apache/lucene/geo/TestPolygon2D.java | 20 +-
.../apache/lucene/index/Test2BBinaryDocValues.java | 4 +-
.../apache/lucene/index/Test2BPostingsBytes.java | 2 +-
.../index/Test2BSortedDocValuesFixedSorted.java | 2 +-
.../lucene/index/Test2BSortedDocValuesOrds.java | 2 +-
.../lucene/index/TestAllFilesDetectTruncation.java | 2 +-
.../org/apache/lucene/index/TestCheckIndex.java | 155 +++
.../lucene/index/TestConcurrentMergeScheduler.java | 80 ++
.../apache/lucene/index/TestCustomTermFreq.java | 29 +-
.../lucene/index/TestDemoParallelLeafReader.java | 5 +-
.../apache/lucene/index/TestDocValuesIndexing.java | 70 +-
.../apache/lucene/index/TestDocsAndPositions.java | 14 +-
.../org/apache/lucene/index/TestDuelingCodecs.java | 2 +-
.../lucene/index/TestExitableDirectoryReader.java | 9 +-
.../apache/lucene/index/TestFilterCodecReader.java | 13 +-
.../apache/lucene/index/TestIndexFileDeleter.java | 6 +-
.../org/apache/lucene/index/TestIndexOptions.java | 7 +-
.../org/apache/lucene/index/TestIndexSorting.java | 6 +-
.../org/apache/lucene/index/TestIndexWriter.java | 4 +-
.../lucene/index/TestIndexWriterExceptions.java | 6 +-
.../lucene/index/TestIndexWriterMaxDocs.java | 8 +-
.../lucene/index/TestIndexWriterOnDiskFull.java | 2 +-
.../index/TestIndexWriterThreadsToSegments.java | 4 +-
.../lucene/index/TestNumericDocValuesUpdates.java | 20 +-
.../test/org/apache/lucene/index/TestPayloads.java | 2 +-
.../org/apache/lucene/index/TestPointValues.java | 14 +-
.../apache/lucene/index/TestPostingsOffsets.java | 2 +-
.../lucene/index/TestSameTokenSamePosition.java | 8 +-
.../org/apache/lucene/index/TestSegmentInfos.java | 54 +-
.../lucene/index/TestSegmentToThreadMapping.java | 3 +-
.../apache/lucene/index/TestSwappedIndexFiles.java | 7 +-
.../org/apache/lucene/index/TestTermsEnum.java | 36 +-
.../apache/lucene/index/TestTermsHashPerField.java | 28 +-
.../apache/lucene/index/TestTieredMergePolicy.java | 26 +-
.../apache/lucene/search/TestAutomatonQuery.java | 2 +-
.../org/apache/lucene/search/TestBooleanQuery.java | 40 +
.../search/TestBooleanQueryVisitSubscorers.java | 2 +-
.../org/apache/lucene/search/TestDateSort.java | 3 +-
.../apache/lucene/search/TestDocValuesQueries.java | 41 +-
.../lucene/search/TestDoubleValuesSource.java | 6 +-
.../org/apache/lucene/search/TestFilterWeight.java | 3 +-
.../org/apache/lucene/search/TestFuzzyQuery.java | 8 +-
.../apache/lucene/search/TestIndexSearcher.java | 21 +-
.../apache/lucene/search/TestKnnVectorQuery.java | 157 ++-
.../apache/lucene/search/TestLRUQueryCache.java | 26 +-
.../apache/lucene/search/TestLiveFieldValues.java | 5 +-
.../apache/lucene/search/TestLongValuesSource.java | 6 +-
.../apache/lucene/search/TestMinShouldMatch2.java | 20 +-
.../apache/lucene/search/TestMultiCollector.java | 63 ++
.../apache/lucene/search/TestMultiPhraseQuery.java | 4 +-
.../apache/lucene/search/TestMultiSliceMerge.java | 4 +-
.../lucene/search/TestMultiThreadTermVectors.java | 12 +-
.../org/apache/lucene/search/TestPhraseQuery.java | 2 +-
.../org/apache/lucene/search/TestPointQueries.java | 8 +-
.../apache/lucene/search/TestRegexpRandom2.java | 6 +-
.../lucene/search/TestSloppyPhraseQuery2.java | 2 +-
.../test/org/apache/lucene/search/TestSort.java | 25 +-
.../apache/lucene/search/TestSortOptimization.java | 258 ++++-
.../lucene/search/TestSortedNumericSortField.java | 17 +-
.../lucene/search/TestSortedSetSelector.java | 117 +-
.../lucene/search/TestSortedSetSortField.java | 38 +-
.../apache/lucene/search/TestTermInSetQuery.java | 45 +-
.../org/apache/lucene/search/TestTermQuery.java | 30 +
.../lucene/search/TestTimeLimitingCollector.java | 2 +-
.../org/apache/lucene/search/TestWildcard.java | 10 +-
.../lucene/search/TestXYPointDistanceSort.java | 2 +-
.../lucene/store/BaseDataOutputTestCase.java | 8 +-
.../apache/lucene/store/TestBufferedChecksum.java | 2 +-
.../lucene/store/TestByteArrayDataInput.java | 32 +
.../lucene/store/TestByteBuffersDataOutput.java | 78 +-
.../org/apache/lucene/store/TestMmapDirectory.java | 2 +-
.../store/TestMultiByteBuffersDirectory.java | 49 +
.../org/apache/lucene/store/TestMultiMMap.java | 315 +-----
.../lucene/store/TestOutputStreamIndexOutput.java | 54 +
.../test/org/apache/lucene/util/TestArrayUtil.java | 50 +
.../test/org/apache/lucene/util/TestBytesRef.java | 2 +-
.../org/apache/lucene/util/TestBytesRefHash.java | 12 +-
.../test/org/apache/lucene/util/TestCharsRef.java | 6 +-
.../test/org/apache/lucene/util/TestIntsRef.java | 2 +-
.../test/org/apache/lucene/util/TestLongsRef.java | 2 +-
.../org/apache/lucene/util/TestPagedBytes.java | 2 +-
.../org/apache/lucene/util/TestStringHelper.java | 52 +-
.../org/apache/lucene/util/TestUnicodeUtil.java | 18 +-
.../org/apache/lucene/util/TestVectorUtil.java | 27 +
.../test/org/apache/lucene/util/TestVersion.java | 13 +
.../lucene/util/automaton/TestAutomaton.java | 108 +-
.../util/automaton/TestDeterminizeLexicon.java | 2 +-
.../util/automaton/TestLevenshteinAutomata.java | 12 +-
.../apache/lucene/util/automaton/TestRegExp.java | 2 +-
.../lucene/util/automaton/TestUTF32ToUTF8.java | 2 +-
.../org/apache/lucene/util/bkd/TestBKDUtil.java | 136 +++
.../test/org/apache/lucene/util/fst/TestFSTs.java | 98 +-
.../org/apache/lucene/util/hnsw/TestHnswGraph.java | 42 +-
.../lucene/util/packed/TestDirectPacked.java | 6 +-
.../lucene/demo/facet/DistanceFacetsExample.java | 8 +-
.../src/generated/checksums/generateAntlr.json | 7 +
.../lucene/expressions/js/ExpressionMath.java | 46 +
.../lucene/expressions/js/JavascriptLexer.java | 349 +++---
.../lucene/expressions/js/JavascriptParser.java | 1185 ++++++++++++--------
.../lucene/expressions/js/JavascriptVisitor.java | 85 +-
.../apache/lucene/expressions/js/package-info.java | 2 +-
.../expressions/js/JavascriptCompiler.properties | 3 +-
.../lucene/expressions/TestDemoExpressions.java | 21 +-
.../lucene/expressions/TestExpressionSorts.java | 6 +-
.../lucene/expressions/js/TestExpressionMath.java | 70 ++
.../expressions/js/TestJavascriptFunction.java | 2 +-
.../org/apache/lucene/facet/DrillSideways.java | 14 +-
.../apache/lucene/facet/DrillSidewaysQuery.java | 17 +-
.../java/org/apache/lucene/facet/FacetUtils.java | 84 ++
.../java/org/apache/lucene/facet/FacetsConfig.java | 7 +-
.../apache/lucene/facet/LongValueFacetCounts.java | 14 +-
.../lucene/facet/StringValueFacetCounts.java | 16 +-
.../ConcurrentSortedSetDocValuesFacetCounts.java | 9 +-
.../sortedset/SortedSetDocValuesFacetCounts.java | 13 +-
.../lucene/facet/taxonomy/FloatTaxonomyFacets.java | 12 +-
.../facet/taxonomy/IntAssociationFacetField.java | 11 +-
.../lucene/facet/taxonomy/IntTaxonomyFacets.java | 12 +-
.../TaxonomyFacetSumFloatAssociations.java | 15 +-
.../taxonomy/TaxonomyFacetSumIntAssociations.java | 13 +-
.../lucene/facet/taxonomy/TaxonomyMergeUtils.java | 4 +-
.../lucene/facet/taxonomy/TaxonomyReader.java | 15 +
.../directory/DirectoryTaxonomyReader.java | 153 ++-
.../org/apache/lucene/facet/TestDrillSideways.java | 142 ++-
.../org/apache/lucene/facet/TestFacetUtils.java | 106 ++
.../lucene/facet/TestLongValueFacetCounts.java | 28 +
.../lucene/facet/TestStringValueFacetCounts.java | 35 +
.../sortedset/TestSortedSetDocValuesFacets.java | 837 +++++++-------
.../facet/taxonomy/TestTaxonomyCombined.java | 8 +-
.../facet/taxonomy/directory/TestAddTaxonomy.java | 2 +-
.../directory/TestBackwardsCompatibility.java | 22 +
.../directory/TestDirectoryTaxonomyReader.java | 78 +-
.../directory/TestDirectoryTaxonomyWriter.java | 3 +-
.../lucene/search/highlight/GradientFormatter.java | 2 +-
.../lucene/search/highlight/Highlighter.java | 4 +-
.../search/highlight/TermVectorLeafReader.java | 16 +-
.../lucene/search/matchhighlight/OffsetRange.java | 12 +-
.../search/matchhighlight/PassageSelector.java | 20 +-
.../search/uhighlight/DefaultPassageFormatter.java | 2 +-
.../apache/lucene/search/uhighlight/Passage.java | 8 +-
.../lucene/search/uhighlight/PassageFormatter.java | 2 +-
.../PostingsWithTermVectorsOffsetStrategy.java | 18 +-
.../uhighlight/TermVectorOffsetStrategy.java | 13 +-
.../uhighlight/TokenStreamOffsetStrategy.java | 2 +-
.../search/uhighlight/UnifiedHighlighter.java | 37 +-
.../lucene/search/highlight/TestHighlighter.java | 8 +-
.../matchhighlight/TestMatchHighlighter.java | 3 +-
.../search/matchhighlight/TestPassageSelector.java | 94 +-
.../search/uhighlight/TestUnifiedHighlighter.java | 52 +-
.../uhighlight/TestUnifiedHighlighterMTQ.java | 44 +-
.../uhighlight/TestUnifiedHighlighterRanking.java | 6 +-
.../TestUnifiedHighlighterTermIntervals.java | 52 +-
.../uhighlight/TestUnifiedHighlighterTermVec.java | 30 +-
lucene/licenses/jetty-NOTICE.txt | 32 +-
lucene/licenses/junit-LICENSE-CPL.txt | 24 +-
lucene/licenses/pddl-10.txt | 2 +-
lucene/luke/build.gradle | 7 +-
.../apache/lucene/index/memory/MemoryIndex.java | 23 +-
lucene/misc/native/build.gradle | 5 +-
.../org/apache/lucene/misc/IndexMergeTool.java | 4 +-
.../apache/lucene/misc/store/WindowsDirectory.java | 2 +-
.../misc/util/fst/UpToTwoPositiveIntOutputs.java | 3 +-
.../search/TestDiversifiedTopDocsCollector.java | 2 +-
lucene/packaging/build.gradle | 38 +-
.../queries/payloads/PayloadMatcherFactory.java | 12 +-
.../lucene/queries/function/FunctionTestSetup.java | 4 +-
.../queries/function/TestFieldScoreQuery.java | 2 +-
.../queries/function/TestFunctionScoreQuery.java | 2 +-
.../queries/function/TestIndexReaderFunctions.java | 8 +-
.../queries/function/TestLongNormValueSource.java | 6 +-
.../lucene/queries/function/TestValueSources.java | 6 +-
.../lucene/queries/intervals/TestIntervals.java | 4 +-
.../queries/payloads/TestPayloadExplanations.java | 2 +-
.../queries/spans/TestSpanSearchEquivalence.java | 32 +-
.../queryparser/classic/QueryParserBase.java | 27 +-
.../queryparser/simple/SimpleQueryParser.java | 6 +-
.../queryparser/xml/CorePlusQueriesParser.java | 2 +-
.../xml/builders/LikeThisQueryBuilder.java | 4 +-
.../classic/TestMultiFieldQueryParser.java | 2 +-
.../queryparser/classic/TestQueryParser.java | 30 +-
.../complexPhrase/TestComplexPhraseQuery.java | 2 +-
.../flexible/standard/TestMultiFieldQPHelper.java | 2 +-
.../flexible/standard/TestQPHelper.java | 6 +-
.../queryparser/simple/TestSimpleQueryParser.java | 2 +-
.../surround/query/Test01Exceptions.java | 2 +-
.../queryparser/surround/query/Test02Boolean.java | 2 +-
.../queryparser/surround/query/Test03Distance.java | 2 +-
.../queryparser/util/QueryParserTestBase.java | 10 +-
lucene/replicator/build.gradle | 8 +-
.../lucene/replicator/nrt/SimpleTransLog.java | 12 +-
.../codecs/idversion/IDVersionPostingsFormat.java | 19 +-
.../lucene/sandbox/document/BigIntegerPoint.java | 4 +-
.../lucene/sandbox/document/HalfFloatPoint.java | 10 +-
.../lucene/sandbox/search/CombinedFieldQuery.java | 6 +-
.../sandbox/search/QueryProfilerTimingType.java | 1 +
.../lucene/sandbox/search/QueryProfilerWeight.java | 11 +
.../search/TestQueryProfilerIndexSearcher.java | 41 +-
.../apache/lucene/spatial/StrategyTestCase.java | 2 +-
.../org/apache/lucene/spatial3d/Geo3DPoint.java | 4 +-
.../lucene/search/spell/DirectSpellChecker.java | 4 +-
.../lucene/search/spell/LevenshteinDistance.java | 6 +-
.../search/spell/LuceneLevenshteinDistance.java | 2 +-
.../apache/lucene/search/spell/NGramDistance.java | 6 +-
.../suggest/analyzing/AnalyzingSuggester.java | 2 +-
.../search/suggest/analyzing/FuzzySuggester.java | 2 +-
.../suggest/document/FuzzyCompletionQuery.java | 2 +-
.../search/suggest/fst/ExternalRefSorter.java | 65 +-
.../analyzing/TestAnalyzingInfixSuggester.java | 26 +-
.../suggest/analyzing/TestAnalyzingSuggester.java | 6 +-
.../analyzing/TestBlendedInfixSuggester.java | 18 +-
.../suggest/analyzing/TestFuzzySuggester.java | 10 +-
.../search/suggest/document/TestSuggestField.java | 2 +-
.../search/suggest/fst/TestBytesRefSorters.java | 34 +-
.../search/suggest/fst/TestWFSTCompletion.java | 2 +-
.../lucene/analysis/BaseTokenStreamTestCase.java | 134 +--
.../apache/lucene/analysis/CollationTestBase.java | 2 +-
.../org/apache/lucene/analysis/MockTokenizer.java | 8 +-
.../apache/lucene/analysis/VocabularyAssert.java | 2 +-
.../apache/lucene/geo/BaseGeoPointTestCase.java | 2 +-
.../org/apache/lucene/geo/BaseXYPointTestCase.java | 2 +-
.../java/org/apache/lucene/geo/GeoTestUtil.java | 40 +-
.../java/org/apache/lucene/geo/ShapeTestUtil.java | 8 +-
.../apache/lucene/index/AssertingLeafReader.java | 6 +
.../lucene/index/BaseCompoundFormatTestCase.java | 10 +-
.../lucene/index/BaseDocValuesFormatTestCase.java | 34 +-
.../lucene/index/BaseKnnVectorsFormatTestCase.java | 32 +-
.../lucene/index/BaseNormsFormatTestCase.java | 8 +-
.../index/BaseSegmentInfoFormatTestCase.java | 24 +-
.../index/BaseStoredFieldsFormatTestCase.java | 2 +-
.../apache/lucene/index/FieldFilterLeafReader.java | 23 +-
.../org/apache/lucene/mockfile/FilterPath.java | 13 +-
.../apache/lucene/search/AssertingBulkScorer.java | 19 +-
.../apache/lucene/search/AssertingScorable.java | 30 +-
.../org/apache/lucene/search/AssertingWeight.java | 9 +
.../java/org/apache/lucene/search/CheckHits.java | 2 +-
.../java/org/apache/lucene/search/QueryUtils.java | 18 +-
.../store/BaseChunkedDirectoryTestCase.java} | 256 ++---
.../apache/lucene/store/BaseDirectoryTestCase.java | 34 +-
.../apache/lucene/store/MockDirectoryWrapper.java | 6 +-
.../org/apache/lucene/util/BaseBitSetTestCase.java | 23 +
.../org/apache/lucene/util/LuceneTestCase.java | 20 +-
.../src/java/org/apache/lucene/util/TestUtil.java | 14 +-
.../TestCompressingStoredFieldsFormat.java | 10 +-
.../apache/lucene/mockfile/TestDisableFsyncFS.java | 2 +-
settings.gradle | 7 +
versions.lock | 14 +-
versions.props | 2 +-
629 files changed, 12238 insertions(+), 6010 deletions(-)
diff --cc lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90HnswVectorsReader.java
index e2aced6,56dcf89..a6325e1
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90HnswVectorsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90HnswVectorsReader.java
@@@ -67,7 -65,8 +67,7 @@@ public final class Lucene90HnswVectorsR
Lucene90HnswVectorsReader(SegmentReadState state) throws IOException {
this.fieldInfos = state.fieldInfos;
- int versionMeta = readMetadata(state, Lucene90HnswVectorsFormat.META_EXTENSION);
-
+ int versionMeta = readMetadata(state);
long[] checksumRef = new long[1];
boolean success = false;
try {
diff --cc lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraph.java
index c3ffbb1,511f889..30141c0
--- a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraph.java
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraph.java
@@@ -21,9 -21,8 +21,9 @@@ import static org.apache.lucene.search.
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.List;
- import java.util.Random;
+ import java.util.SplittableRandom;
import org.apache.lucene.index.KnnGraphValues;
import org.apache.lucene.index.RandomAccessVectorValues;
import org.apache.lucene.index.VectorSimilarityFunction;
@@@ -119,70 -97,27 +119,68 @@@ public final class HnswGraph extends Kn
VectorSimilarityFunction similarityFunction,
KnnGraphValues graphValues,
Bits acceptOrds,
- Random random)
+ SplittableRandom random)
throws IOException {
+
int size = graphValues.size();
+ int boundedNumSeed = Math.max(topK, Math.min(numSeed, 2 * size));
+ NeighborQueue results;
+
+ int[] eps = new int[] {graphValues.entryNode()};
+ for (int level = graphValues.numLevels() - 1; level >= 1; level--) {
+ results =
+ HnswGraph.searchLevel(
+ query, 1, level, eps, vectors, similarityFunction, graphValues, null);
+ eps[0] = results.pop();
+ }
+ results =
+ HnswGraph.searchLevel(
+ query, boundedNumSeed, 0, eps, vectors, similarityFunction, graphValues, acceptOrds);
+ while (results.size() > topK) {
+ results.pop();
+ }
+ return results;
+ }
+ /**
+ * Searches for the nearest neighbors of a query vector in a given level
+ *
+ * @param query search query vector
+ * @param topK the number of nearest to query results to return
+ * @param level level to search
+ * @param eps the entry points for search at this level expressed as level 0th ordinals
+ * @param vectors vector values
+ * @param similarityFunction similarity function
+ * @param graphValues the graph values
+ * @param acceptOrds {@link Bits} that represents the allowed document ordinals to match, or
+ * {@code null} if they are all allowed to match.
+ * @return a priority queue holding the closest neighbors found
+ */
+ static NeighborQueue searchLevel(
+ float[] query,
+ int topK,
+ int level,
+ final int[] eps,
+ RandomAccessVectorValues vectors,
+ VectorSimilarityFunction similarityFunction,
+ KnnGraphValues graphValues,
+ Bits acceptOrds)
+ throws IOException {
+
+ int size = graphValues.size();
+ int queueSize = Math.max(eps.length, topK);
// MIN heap, holding the top results
- NeighborQueue results = new NeighborQueue(numSeed, similarityFunction.reversed);
+ NeighborQueue results = new NeighborQueue(queueSize, similarityFunction.reversed);
// MAX heap, from which to pull the candidate nodes
- NeighborQueue candidates = new NeighborQueue(numSeed, !similarityFunction.reversed);
-
+ NeighborQueue candidates = new NeighborQueue(queueSize, !similarityFunction.reversed);
// set of ordinals that have been visited by search on this layer, used to avoid backtracking
SparseFixedBitSet visited = new SparseFixedBitSet(size);
-
- // get initial candidates at random
- int boundedNumSeed = Math.min(numSeed, 2 * size);
- for (int i = 0; i < boundedNumSeed; i++) {
- int entryPoint = random.nextInt(size);
- if (visited.getAndSet(entryPoint) == false) {
- // explore the topK starting points of some random numSeed probes
- float score = similarityFunction.compare(query, vectors.vectorValue(entryPoint));
- candidates.add(entryPoint, score);
- if (acceptOrds == null || acceptOrds.get(entryPoint)) {
- results.add(entryPoint, score);
+ for (int ep : eps) {
- if (visited.get(ep) == false) {
- visited.set(ep);
++ if (visited.getAndSet(ep) == false) {
+ float score = similarityFunction.compare(query, vectors.vectorValue(ep));
+ candidates.add(ep, score);
+ if (acceptOrds == null || acceptOrds.get(ep)) {
+ results.add(ep, score);
}
}
}
@@@ -204,13 -140,12 +202,12 @@@
int friendOrd;
while ((friendOrd = graphValues.nextNeighbor()) != NO_MORE_DOCS) {
assert friendOrd < size : "friendOrd=" + friendOrd + "; size=" + size;
- if (visited.get(friendOrd)) {
+ if (visited.getAndSet(friendOrd)) {
continue;
}
- visited.set(friendOrd);
float score = similarityFunction.compare(query, vectors.vectorValue(friendOrd));
- if (results.size() < numSeed || bound.check(score) == false) {
+ if (results.size() < topK || bound.check(score) == false) {
candidates.add(friendOrd, score);
if (acceptOrds == null || acceptOrds.get(friendOrd)) {
results.insertWithOverflow(friendOrd, score);
diff --cc lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphBuilder.java
index f21c8f7,f5cfc6a..4364d1d
--- a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphBuilder.java
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphBuilder.java
@@@ -87,12 -84,9 +87,12 @@@ public final class HnswGraphBuilder
}
this.maxConn = maxConn;
this.beamWidth = beamWidth;
- this.hnsw = new HnswGraph(maxConn);
+ // normalization factor for level generation; currently not configurable
+ this.ml = 1 / Math.log(1.0 * maxConn);
- this.random = new Random(seed);
++ this.random = new SplittableRandom(seed);
+ int levelOfFirstNode = getRandomGraphLevel(ml, random);
+ this.hnsw = new HnswGraph(maxConn, levelOfFirstNode);
bound = BoundsChecker.create(similarityFunction.reversed);
- random = new SplittableRandom(seed);
scratch = new NeighborArray(Math.max(beamWidth, maxConn + 1));
}
@@@ -287,12 -266,4 +287,12 @@@
}
return -1;
}
+
- private static int getRandomGraphLevel(double ml, Random random) {
- float randFloat;
++ private static int getRandomGraphLevel(double ml, SplittableRandom random) {
++ double randDouble;
+ do {
- randFloat = random.nextFloat(); // avoid 0 value, as log(0) is undefined
- } while (randFloat == 0.0f);
- return ((int) (-log(randFloat) * ml));
++ randDouble = random.nextDouble(); // avoid 0 value, as log(0) is undefined
++ } while (randDouble == 0.0);
++ return ((int) (-log(randDouble) * ml));
+ }
}
diff --cc lucene/core/src/test/org/apache/lucene/util/hnsw/TestHnswGraph.java
index 6d2f827,16a3e60..9fd6838
--- a/lucene/core/src/test/org/apache/lucene/util/hnsw/TestHnswGraph.java
+++ b/lucene/core/src/test/org/apache/lucene/util/hnsw/TestHnswGraph.java
@@@ -179,26 -153,27 +183,27 @@@ public class TestHnswGraph extends Luce
// We expect to get approximately 100% recall;
// the lowest docIds are closest to zero; sum(0,9) = 45
assertTrue("sum(result docs)=" + sum, sum < 75);
+
for (int i = 0; i < nDoc; i++) {
- NeighborArray neighbors = hnsw.getNeighbors(i);
+ NeighborArray neighbors = hnsw.getNeighbors(0, i);
- int[] nodes = neighbors.node;
+ int[] nnodes = neighbors.node;
for (int j = 0; j < neighbors.size(); j++) {
// all neighbors should be valid node ids.
- assertTrue(nodes[j] < nDoc);
+ assertTrue(nnodes[j] < nDoc);
}
}
}
public void testSearchWithAcceptOrds() throws IOException {
int nDoc = 100;
+ int maxConn = 16;
CircularVectorValues vectors = new CircularVectorValues(nDoc);
- // the first 10 docs must not be deleted to ensure the expected recall
- Bits acceptOrds = createRandomAcceptOrds(10, vectors.size);
HnswGraphBuilder builder =
new HnswGraphBuilder(
- vectors, VectorSimilarityFunction.DOT_PRODUCT, 16, 100, random().nextInt());
+ vectors, VectorSimilarityFunction.DOT_PRODUCT, maxConn, 100, random().nextInt());
HnswGraph hnsw = builder.build(vectors);
-
+ // the first 10 docs must not be deleted to ensure the expected recall
+ Bits acceptOrds = createRandomAcceptOrds(10, vectors.size);
NeighborQueue nn =
HnswGraph.search(
new float[] {1, 0},
@@@ -516,7 -507,10 +530,11 @@@
}
}
- /** Generate a random bitset where each entry has a 2/3 probability of being set. */
++
+ /**
+ * Generate a random bitset where before startIndex all bits are set, and after startIndex each
+ * entry has a 2/3 probability of being set.
+ */
private static Bits createRandomAcceptOrds(int startIndex, int length) {
FixedBitSet bits = new FixedBitSet(length);
// all bits are set before startIndex