You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2014/07/31 12:44:44 UTC
svn commit: r1614852 [1/9] - in /lucene/dev/trunk:
lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/
lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/
lucene/analysis/common/src/java/org/apache/lucene/analysis/br/
lucene/analy...
Author: rmuir
Date: Thu Jul 31 10:44:39 2014
New Revision: 1614852
URL: http://svn.apache.org/r1614852
Log:
LUCENE-5859: Literally add back dead code to please a bunch of fucking babies
Modified:
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ckb/SoraniAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilterFactory.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizerFactory.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizerFactory.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilterFactory.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizerFactory.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilter.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilterFactory.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilter.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilterFactory.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/UpperCaseFilter.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/UpperCaseFilterFactory.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizerFactory.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilterFactory.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekStemmer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilter.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilterFactory.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/KStemmer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/eu/BasqueAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hy/ArmenianAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilterFactory.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilter.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilterFactory.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilter.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilterFactory.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilterFactory.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilter.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilterFactory.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/RSLPStemmerBase.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilter.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilterFactory.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapper.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerFactory.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilter.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilterFactory.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerFactory.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerFactory.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilterFactory.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArraySet.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharacterUtils.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/FilteringTokenFilter.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/StopwordAnalyzerBase.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicStemFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianStemmer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/ca/TestCatalanAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKBigramFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/ckb/TestSoraniAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/ckb/TestSoraniStemFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestBugInSomething.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestDuelingAnalyzers.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestTypeTokenFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechStemmer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/da/TestDanishAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/el/TestGreekStemmer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/eu/TestBasqueAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/ga/TestIrishAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianMinimalStemFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellStemFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hy/TestArmenianAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/lv/TestLatvianAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCapitalizationFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCodepointCountFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepWordFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLucene47WordDelimiterFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPerFieldAnalyzerWrapper.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianLightStemFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianMinimalStemFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSolrSynonymParser.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArrayMap.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArraySet.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharacterUtils.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestElision.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestFilesystemResourceLoader.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestWordlistLoader.java
lucene/dev/trunk/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestWithCJKBigramFilter.java
lucene/dev/trunk/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseAnalyzer.java
lucene/dev/trunk/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilter.java
lucene/dev/trunk/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilterFactory.java
lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseAnalyzer.java
lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilter.java
lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilter.java
lucene/dev/trunk/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikAnalyzer.java
lucene/dev/trunk/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java
lucene/dev/trunk/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilterFactory.java
lucene/dev/trunk/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java
lucene/dev/trunk/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilter.java
lucene/dev/trunk/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java
lucene/dev/trunk/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java
lucene/dev/trunk/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java
lucene/dev/trunk/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiQueryMaker.java
lucene/dev/trunk/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/FileBasedQueryMaker.java
lucene/dev/trunk/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LongToEnglishQueryMaker.java
lucene/dev/trunk/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersQueryMaker.java
lucene/dev/trunk/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleQueryMaker.java
lucene/dev/trunk/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SimpleQQParser.java
lucene/dev/trunk/lucene/classification/src/test/org/apache/lucene/classification/SimpleNaiveBayesClassifierTest.java
lucene/dev/trunk/lucene/demo/src/java/org/apache/lucene/demo/IndexFiles.java
lucene/dev/trunk/lucene/demo/src/java/org/apache/lucene/demo/SearchFiles.java
lucene/dev/trunk/lucene/demo/src/java/org/apache/lucene/demo/facet/AssociationsFacetsExample.java
lucene/dev/trunk/lucene/demo/src/java/org/apache/lucene/demo/facet/DistanceFacetsExample.java
lucene/dev/trunk/lucene/demo/src/java/org/apache/lucene/demo/facet/ExpressionAggregationFacetsExample.java
lucene/dev/trunk/lucene/demo/src/java/org/apache/lucene/demo/facet/MultiCategoryListsFacetsExample.java
lucene/dev/trunk/lucene/demo/src/java/org/apache/lucene/demo/facet/RangeFacetsExample.java
lucene/dev/trunk/lucene/demo/src/java/org/apache/lucene/demo/facet/SimpleFacetsExample.java
lucene/dev/trunk/lucene/demo/src/java/org/apache/lucene/demo/facet/SimpleSortedSetFacetsExample.java
lucene/dev/trunk/lucene/demo/src/java/org/apache/lucene/demo/xmlparser/FormBasedXmlQueryDemo.java
lucene/dev/trunk/lucene/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java
lucene/dev/trunk/lucene/queryparser/src/java/org/apache/lucene/queryparser/analyzing/AnalyzingQueryParser.java
lucene/dev/trunk/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/MultiFieldQueryParser.java
lucene/dev/trunk/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.java
lucene/dev/trunk/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.jj
lucene/dev/trunk/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java
lucene/dev/trunk/lucene/queryparser/src/java/org/apache/lucene/queryparser/complexPhrase/ComplexPhraseQueryParser.java
lucene/dev/trunk/lucene/queryparser/src/java/org/apache/lucene/queryparser/ext/ExtendableQueryParser.java
lucene/dev/trunk/lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/UserInputQueryBuilder.java
lucene/dev/trunk/lucene/queryparser/src/test/org/apache/lucene/queryparser/analyzing/TestAnalyzingQueryParser.java
lucene/dev/trunk/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiAnalyzer.java
lucene/dev/trunk/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiFieldQueryParser.java
lucene/dev/trunk/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiPhraseQueryParsing.java
lucene/dev/trunk/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestQueryParser.java
lucene/dev/trunk/lucene/queryparser/src/test/org/apache/lucene/queryparser/complexPhrase/TestComplexPhraseQuery.java
lucene/dev/trunk/lucene/queryparser/src/test/org/apache/lucene/queryparser/ext/TestExtendableQueryParser.java
lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java
lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggesterTest.java
lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestFreeTextSuggester.java
lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestSuggestStopFilter.java
lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java
lucene/dev/trunk/solr/core/src/java/org/apache/solr/rest/schema/analysis/ManagedStopFilterFactory.java
lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/ComplexPhraseQParserPlugin.java
lucene/dev/trunk/solr/core/src/java/org/apache/solr/spelling/SolrSpellChecker.java
lucene/dev/trunk/solr/core/src/test-files/solr/collection1/conf/schema-luceneMatchVersion.xml
lucene/dev/trunk/solr/core/src/test/org/apache/solr/analysis/TestLuceneMatchVersion.java
lucene/dev/trunk/solr/core/src/test/org/apache/solr/core/TestArbitraryIndexDir.java
lucene/dev/trunk/solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java
lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/TestSort.java
lucene/dev/trunk/solr/core/src/test/org/apache/solr/spelling/IndexBasedSpellCheckerTest.java
lucene/dev/trunk/solr/core/src/test/org/apache/solr/spelling/SimpleQueryConverter.java
lucene/dev/trunk/solr/core/src/test/org/apache/solr/spelling/SpellingQueryConverterTest.java
lucene/dev/trunk/solr/core/src/test/org/apache/solr/spelling/TestSuggestSpellingConverter.java
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java?rev=1614852&r1=1614851&r2=1614852&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java Thu Jul 31 10:44:39 2014
@@ -29,6 +29,7 @@ import org.apache.lucene.analysis.util.C
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.util.Version;
/**
* {@link Analyzer} for Arabic.
@@ -88,18 +89,20 @@ public final class ArabicAnalyzer extend
/**
* Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
*/
- public ArabicAnalyzer() {
- this(DefaultSetHolder.DEFAULT_STOP_SET);
+ public ArabicAnalyzer(Version matchVersion) {
+ this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
}
/**
* Builds an analyzer with the given stop words
*
+ * @param matchVersion
+ * lucene compatibility version
* @param stopwords
* a stopword set
*/
- public ArabicAnalyzer(CharArraySet stopwords){
- this(stopwords, CharArraySet.EMPTY_SET);
+ public ArabicAnalyzer(Version matchVersion, CharArraySet stopwords){
+ this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
}
/**
@@ -107,14 +110,17 @@ public final class ArabicAnalyzer extend
* provided this analyzer will add a {@link SetKeywordMarkerFilter} before
* {@link ArabicStemFilter}.
*
+ * @param matchVersion
+ * lucene compatibility version
* @param stopwords
* a stopword set
* @param stemExclusionSet
* a set of terms not to be stemmed
*/
- public ArabicAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet){
- super(stopwords);
- this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
+ public ArabicAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet){
+ super(matchVersion, stopwords);
+ this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
+ matchVersion, stemExclusionSet));
}
/**
@@ -130,10 +136,10 @@ public final class ArabicAnalyzer extend
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer();
- TokenStream result = new LowerCaseFilter(source);
+ final Tokenizer source = new StandardTokenizer(matchVersion);
+ TokenStream result = new LowerCaseFilter(matchVersion, source);
// the order here is important: the stopword list is not normalized!
- result = new StopFilter(result, stopwords);
+ result = new StopFilter( matchVersion, result, stopwords);
// TODO maybe we should make ArabicNormalization filter also KeywordAttribute aware?!
result = new ArabicNormalizationFilter(result);
if(!stemExclusionSet.isEmpty()) {
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianAnalyzer.java?rev=1614852&r1=1614851&r2=1614852&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianAnalyzer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianAnalyzer.java Thu Jul 31 10:44:39 2014
@@ -19,6 +19,7 @@ package org.apache.lucene.analysis.bg;
import java.io.IOException;
import java.io.Reader;
+import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
@@ -30,6 +31,7 @@ import org.apache.lucene.analysis.standa
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+import org.apache.lucene.util.Version;
/**
* {@link Analyzer} for Bulgarian.
@@ -40,7 +42,6 @@ import org.apache.lucene.analysis.util.S
* <p>
*/
public final class BulgarianAnalyzer extends StopwordAnalyzerBase {
-
/**
* File containing default Bulgarian stopwords.
*
@@ -83,15 +84,15 @@ public final class BulgarianAnalyzer ext
* Builds an analyzer with the default stop words:
* {@link #DEFAULT_STOPWORD_FILE}.
*/
- public BulgarianAnalyzer() {
- this(DefaultSetHolder.DEFAULT_STOP_SET);
+ public BulgarianAnalyzer(Version matchVersion) {
+ this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
}
/**
* Builds an analyzer with the given stop words.
*/
- public BulgarianAnalyzer(CharArraySet stopwords) {
- this(stopwords, CharArraySet.EMPTY_SET);
+ public BulgarianAnalyzer(Version matchVersion, CharArraySet stopwords) {
+ this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
}
/**
@@ -99,10 +100,10 @@ public final class BulgarianAnalyzer ext
* If a stem exclusion set is provided this analyzer will add a {@link SetKeywordMarkerFilter}
* before {@link BulgarianStemFilter}.
*/
- public BulgarianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
- super(stopwords);
- this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
- }
+ public BulgarianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
+ super(matchVersion, stopwords);
+ this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
+ matchVersion, stemExclusionSet)); }
/**
* Creates a
@@ -118,10 +119,10 @@ public final class BulgarianAnalyzer ext
*/
@Override
public TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer();
- TokenStream result = new StandardFilter(source);
- result = new LowerCaseFilter(result);
- result = new StopFilter(result, stopwords);
+ final Tokenizer source = new StandardTokenizer(matchVersion);
+ TokenStream result = new StandardFilter(matchVersion, source);
+ result = new LowerCaseFilter(matchVersion, result);
+ result = new StopFilter(matchVersion, result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
result = new BulgarianStemFilter(result);
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java?rev=1614852&r1=1614851&r2=1614852&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java Thu Jul 31 10:44:39 2014
@@ -65,7 +65,7 @@ public final class BrazilianAnalyzer ext
static {
try {
DEFAULT_STOP_SET = WordlistLoader.getWordSet(IOUtils.getDecodingReader(BrazilianAnalyzer.class,
- DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), "#");
+ DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), "#", Version.LUCENE_CURRENT);
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
@@ -83,29 +83,35 @@ public final class BrazilianAnalyzer ext
/**
* Builds an analyzer with the default stop words ({@link #getDefaultStopSet()}).
*/
- public BrazilianAnalyzer() {
- this(DefaultSetHolder.DEFAULT_STOP_SET);
+ public BrazilianAnalyzer(Version matchVersion) {
+ this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
}
/**
* Builds an analyzer with the given stop words
*
+ * @param matchVersion
+ * lucene compatibility version
* @param stopwords
* a stopword set
*/
- public BrazilianAnalyzer(CharArraySet stopwords) {
- super(stopwords);
+ public BrazilianAnalyzer(Version matchVersion, CharArraySet stopwords) {
+ super(matchVersion, stopwords);
}
/**
* Builds an analyzer with the given stop words and stemming exclusion words
*
+ * @param matchVersion
+ * lucene compatibility version
* @param stopwords
* a stopword set
*/
- public BrazilianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
- this(stopwords);
- excltable = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
+ public BrazilianAnalyzer(Version matchVersion, CharArraySet stopwords,
+ CharArraySet stemExclusionSet) {
+ this(matchVersion, stopwords);
+ excltable = CharArraySet.unmodifiableSet(CharArraySet
+ .copy(matchVersion, stemExclusionSet));
}
/**
@@ -120,10 +126,10 @@ public final class BrazilianAnalyzer ext
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- Tokenizer source = new StandardTokenizer();
- TokenStream result = new LowerCaseFilter(source);
- result = new StandardFilter(result);
- result = new StopFilter(result, stopwords);
+ Tokenizer source = new StandardTokenizer(matchVersion);
+ TokenStream result = new LowerCaseFilter(matchVersion, source);
+ result = new StandardFilter(matchVersion, result);
+ result = new StopFilter(matchVersion, result, stopwords);
if(excltable != null && !excltable.isEmpty())
result = new SetKeywordMarkerFilter(result, excltable);
return new TokenStreamComponents(source, new BrazilianStemFilter(result));
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java?rev=1614852&r1=1614851&r2=1614852&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java Thu Jul 31 10:44:39 2014
@@ -33,6 +33,7 @@ import org.apache.lucene.analysis.standa
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.ElisionFilter;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+import org.apache.lucene.util.Version;
import org.tartarus.snowball.ext.CatalanStemmer;
/**
@@ -45,7 +46,7 @@ public final class CatalanAnalyzer exten
public final static String DEFAULT_STOPWORD_FILE = "stopwords.txt";
private static final CharArraySet DEFAULT_ARTICLES = CharArraySet.unmodifiableSet(
- new CharArraySet(
+ new CharArraySet(Version.LUCENE_CURRENT,
Arrays.asList(
"d", "l", "m", "n", "s", "t"
), true));
@@ -80,17 +81,18 @@ public final class CatalanAnalyzer exten
/**
* Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
*/
- public CatalanAnalyzer() {
- this(DefaultSetHolder.DEFAULT_STOP_SET);
+ public CatalanAnalyzer(Version matchVersion) {
+ this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
}
/**
* Builds an analyzer with the given stop words.
*
+ * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
*/
- public CatalanAnalyzer(CharArraySet stopwords) {
- this(stopwords, CharArraySet.EMPTY_SET);
+ public CatalanAnalyzer(Version matchVersion, CharArraySet stopwords) {
+ this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
}
/**
@@ -98,12 +100,14 @@ public final class CatalanAnalyzer exten
* provided this analyzer will add a {@link SetKeywordMarkerFilter} before
* stemming.
*
+ * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
* @param stemExclusionSet a set of terms not to be stemmed
*/
- public CatalanAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
- super(stopwords);
- this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
+ public CatalanAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
+ super(matchVersion, stopwords);
+ this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
+ matchVersion, stemExclusionSet));
}
/**
@@ -120,11 +124,11 @@ public final class CatalanAnalyzer exten
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer();
- TokenStream result = new StandardFilter(source);
+ final Tokenizer source = new StandardTokenizer(matchVersion);
+ TokenStream result = new StandardFilter(matchVersion, source);
result = new ElisionFilter(result, DEFAULT_ARTICLES);
- result = new LowerCaseFilter(result);
- result = new StopFilter(result, stopwords);
+ result = new LowerCaseFilter(matchVersion, result);
+ result = new StopFilter(matchVersion, result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
result = new SnowballFilter(result, new CatalanStemmer());
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java?rev=1614852&r1=1614851&r2=1614852&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java Thu Jul 31 10:44:39 2014
@@ -26,6 +26,7 @@ import java.util.HashMap;
import java.util.Map;
import java.util.Set;
+import org.apache.lucene.util.Version;
import org.apache.lucene.analysis.util.CharArrayMap;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.OpenStringBuilder;
@@ -29840,7 +29841,7 @@ public final class HTMLStripCharFilter e
upperCaseVariantsAccepted.put("amp", "AMP");
}
private static final CharArrayMap<Character> entityValues
- = new CharArrayMap<>(253, false);
+ = new CharArrayMap<>(Version.LUCENE_CURRENT, 253, false);
static {
String[] entities = {
"AElig", "\u00C6", "Aacute", "\u00C1", "Acirc", "\u00C2",
@@ -29979,7 +29980,7 @@ public final class HTMLStripCharFilter e
escapeSTYLE = true;
} else {
if (null == this.escapedTags) {
- this.escapedTags = new CharArraySet(16, true);
+ this.escapedTags = new CharArraySet(Version.LUCENE_CURRENT, 16, true);
}
this.escapedTags.add(tag);
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex?rev=1614852&r1=1614851&r2=1614852&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex Thu Jul 31 10:44:39 2014
@@ -24,6 +24,7 @@ import java.util.HashMap;
import java.util.Map;
import java.util.Set;
+import org.apache.lucene.util.Version;
import org.apache.lucene.analysis.util.CharArrayMap;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.OpenStringBuilder;
@@ -194,7 +195,7 @@ InlineElment = ( [aAbBiIqQsSuU]
escapeSTYLE = true;
} else {
if (null == this.escapedTags) {
- this.escapedTags = new CharArraySet(16, true);
+ this.escapedTags = new CharArraySet(Version.LUCENE_CURRENT, 16, true);
}
this.escapedTags.add(tag);
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java?rev=1614852&r1=1614851&r2=1614852&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java Thu Jul 31 10:44:39 2014
@@ -18,6 +18,7 @@ package org.apache.lucene.analysis.cjk;
*/
import java.io.IOException;
+import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
@@ -27,6 +28,7 @@ import org.apache.lucene.analysis.core.S
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+import org.apache.lucene.util.Version;
/**
* An {@link Analyzer} that tokenizes text with {@link StandardTokenizer},
@@ -35,7 +37,6 @@ import org.apache.lucene.analysis.util.S
* and filters stopwords with {@link StopFilter}
*/
public final class CJKAnalyzer extends StopwordAnalyzerBase {
-
/**
* File containing default CJK stopwords.
* <p/>
@@ -69,27 +70,29 @@ public final class CJKAnalyzer extends S
/**
* Builds an analyzer which removes words in {@link #getDefaultStopSet()}.
*/
- public CJKAnalyzer() {
- this(DefaultSetHolder.DEFAULT_STOP_SET);
+ public CJKAnalyzer(Version matchVersion) {
+ this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
}
/**
* Builds an analyzer with the given stop words
*
+ * @param matchVersion
+ * lucene compatibility version
* @param stopwords
* a stopword set
*/
- public CJKAnalyzer(CharArraySet stopwords){
- super(stopwords);
+ public CJKAnalyzer(Version matchVersion, CharArraySet stopwords){
+ super(matchVersion, stopwords);
}
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer();
+ final Tokenizer source = new StandardTokenizer(matchVersion);
// run the widthfilter first before bigramming, it sometimes combines characters.
TokenStream result = new CJKWidthFilter(source);
- result = new LowerCaseFilter(result);
+ result = new LowerCaseFilter(matchVersion, result);
result = new CJKBigramFilter(result);
- return new TokenStreamComponents(source, new StopFilter(result, stopwords));
+ return new TokenStreamComponents(source, new StopFilter(matchVersion, result, stopwords));
}
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ckb/SoraniAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ckb/SoraniAnalyzer.java?rev=1614852&r1=1614851&r2=1614852&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ckb/SoraniAnalyzer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ckb/SoraniAnalyzer.java Thu Jul 31 10:44:39 2014
@@ -33,6 +33,7 @@ import org.apache.lucene.analysis.util.C
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.Version;
/**
* {@link Analyzer} for Sorani Kurdish.
@@ -61,7 +62,7 @@ public final class SoraniAnalyzer extend
static {
try {
DEFAULT_STOP_SET = WordlistLoader.getWordSet(IOUtils.getDecodingReader(SoraniAnalyzer.class,
- DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
+ DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
@@ -73,17 +74,18 @@ public final class SoraniAnalyzer extend
/**
* Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
*/
- public SoraniAnalyzer() {
- this(DefaultSetHolder.DEFAULT_STOP_SET);
+ public SoraniAnalyzer(Version matchVersion) {
+ this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
}
/**
* Builds an analyzer with the given stop words.
*
+ * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
*/
- public SoraniAnalyzer(CharArraySet stopwords) {
- this(stopwords, CharArraySet.EMPTY_SET);
+ public SoraniAnalyzer(Version matchVersion, CharArraySet stopwords) {
+ this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
}
/**
@@ -91,12 +93,14 @@ public final class SoraniAnalyzer extend
* provided this analyzer will add a {@link SetKeywordMarkerFilter} before
* stemming.
*
+ * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
* @param stemExclusionSet a set of terms not to be stemmed
*/
- public SoraniAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
- super(stopwords);
- this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
+ public SoraniAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
+ super(matchVersion, stopwords);
+ this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
+ matchVersion, stemExclusionSet));
}
/**
@@ -114,11 +118,11 @@ public final class SoraniAnalyzer extend
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer();
- TokenStream result = new StandardFilter(source);
+ final Tokenizer source = new StandardTokenizer(matchVersion);
+ TokenStream result = new StandardFilter(matchVersion, source);
result = new SoraniNormalizationFilter(result);
- result = new LowerCaseFilter(result);
- result = new StopFilter(result, stopwords);
+ result = new LowerCaseFilter(matchVersion, result);
+ result = new StopFilter(matchVersion, result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
result = new SoraniStemFilter(result);
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java?rev=1614852&r1=1614851&r2=1614852&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java Thu Jul 31 10:44:39 2014
@@ -78,7 +78,7 @@ public final class CommonGramsFilter ext
* @param input TokenStream input in filter chain
* @param commonWords The set of common words.
*/
- public CommonGramsFilter(TokenStream input, CharArraySet commonWords) {
+ public CommonGramsFilter(Version matchVersion, TokenStream input, CharArraySet commonWords) {
super(input);
this.commonWords = commonWords;
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilterFactory.java?rev=1614852&r1=1614851&r2=1614852&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilterFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilterFactory.java Thu Jul 31 10:44:39 2014
@@ -76,7 +76,7 @@ public class CommonGramsFilterFactory ex
@Override
public TokenFilter create(TokenStream input) {
- CommonGramsFilter commonGrams = new CommonGramsFilter(input, commonWords);
+ CommonGramsFilter commonGrams = new CommonGramsFilter(luceneMatchVersion, input, commonWords);
return commonGrams;
}
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordAnalyzer.java?rev=1614852&r1=1614851&r2=1614852&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordAnalyzer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordAnalyzer.java Thu Jul 31 10:44:39 2014
@@ -17,6 +17,8 @@ package org.apache.lucene.analysis.core;
* limitations under the License.
*/
+import java.io.Reader;
+
import org.apache.lucene.analysis.Analyzer;
/**
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java?rev=1614852&r1=1614851&r2=1614852&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java Thu Jul 31 10:44:39 2014
@@ -18,11 +18,13 @@ package org.apache.lucene.analysis.core;
*/
import java.io.IOException;
+import java.io.Reader;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.AttributeFactory;
+import org.apache.lucene.util.AttributeSource;
/**
* Emits the entire input as a single token.
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizerFactory.java?rev=1614852&r1=1614851&r2=1614852&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizerFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizerFactory.java Thu Jul 31 10:44:39 2014
@@ -20,6 +20,7 @@ package org.apache.lucene.analysis.core;
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeFactory;
+import java.io.Reader;
import java.util.Map;
/**
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizer.java?rev=1614852&r1=1614851&r2=1614852&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizer.java Thu Jul 31 10:44:39 2014
@@ -20,6 +20,7 @@ package org.apache.lucene.analysis.core;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.util.AttributeFactory;
+import org.apache.lucene.util.Version;
/**
* A LetterTokenizer is a tokenizer that divides text at non-letters. That's to
@@ -29,25 +30,41 @@ import org.apache.lucene.util.AttributeF
* Note: this does a decent job for most European languages, but does a terrible
* job for some Asian languages, where words are not separated by spaces.
* </p>
+ * <p>
+ * <a name="version"/>
+ * You must specify the required {@link Version} compatibility when creating
+ * {@link LetterTokenizer}:
+ * <ul>
+ * <li>As of 3.1, {@link CharTokenizer} uses an int based API to normalize and
+ * detect token characters. See {@link CharTokenizer#isTokenChar(int)} and
+ * {@link CharTokenizer#normalize(int)} for details.</li>
+ * </ul>
+ * </p>
*/
public class LetterTokenizer extends CharTokenizer {
/**
* Construct a new LetterTokenizer.
+ *
+ * @param matchVersion
+ * Lucene version to match See {@link <a href="#version">above</a>}
*/
- public LetterTokenizer() {
+ public LetterTokenizer(Version matchVersion) {
+ super(matchVersion);
}
/**
* Construct a new LetterTokenizer using a given
* {@link org.apache.lucene.util.AttributeFactory}.
*
+ * @param matchVersion
+ * Lucene version to match See {@link <a href="#version">above</a>}
* @param factory
* the attribute factory to use for this {@link Tokenizer}
*/
- public LetterTokenizer(AttributeFactory factory) {
- super(factory);
+ public LetterTokenizer(Version matchVersion, AttributeFactory factory) {
+ super(matchVersion, factory);
}
/** Collects only characters which satisfy
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizerFactory.java?rev=1614852&r1=1614851&r2=1614852&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizerFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizerFactory.java Thu Jul 31 10:44:39 2014
@@ -36,6 +36,7 @@ public class LetterTokenizerFactory exte
/** Creates a new LetterTokenizerFactory */
public LetterTokenizerFactory(Map<String,String> args) {
super(args);
+ assureMatchVersion();
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
@@ -43,6 +44,6 @@ public class LetterTokenizerFactory exte
@Override
public LetterTokenizer create(AttributeFactory factory) {
- return new LetterTokenizer(factory);
+ return new LetterTokenizer(luceneMatchVersion, factory);
}
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java?rev=1614852&r1=1614851&r2=1614852&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java Thu Jul 31 10:44:39 2014
@@ -23,21 +23,30 @@ import org.apache.lucene.analysis.TokenF
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.util.CharacterUtils;
+import org.apache.lucene.util.Version;
/**
* Normalizes token text to lower case.
+ * <a name="version"/>
+ * <p>You must specify the required {@link Version}
+ * compatibility when creating LowerCaseFilter:
+ * <ul>
+ * <li> As of 3.1, supplementary characters are properly lowercased.
+ * </ul>
*/
public final class LowerCaseFilter extends TokenFilter {
- private final CharacterUtils charUtils = CharacterUtils.getInstance();
+ private final CharacterUtils charUtils;
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
/**
* Create a new LowerCaseFilter, that normalizes token text to lower case.
*
+ * @param matchVersion See <a href="#version">above</a>
* @param in TokenStream to filter
*/
- public LowerCaseFilter(TokenStream in) {
+ public LowerCaseFilter(Version matchVersion, TokenStream in) {
super(in);
+ charUtils = CharacterUtils.getInstance(matchVersion);
}
@Override
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilterFactory.java?rev=1614852&r1=1614851&r2=1614852&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilterFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilterFactory.java Thu Jul 31 10:44:39 2014
@@ -40,6 +40,7 @@ public class LowerCaseFilterFactory exte
/** Creates a new LowerCaseFilterFactory */
public LowerCaseFilterFactory(Map<String,String> args) {
super(args);
+ assureMatchVersion();
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
@@ -47,7 +48,7 @@ public class LowerCaseFilterFactory exte
@Override
public LowerCaseFilter create(TokenStream input) {
- return new LowerCaseFilter(input);
+ return new LowerCaseFilter(luceneMatchVersion,input);
}
@Override
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java?rev=1614852&r1=1614851&r2=1614852&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java Thu Jul 31 10:44:39 2014
@@ -17,8 +17,13 @@ package org.apache.lucene.analysis.core;
* limitations under the License.
*/
+import java.io.Reader;
+
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.util.AttributeFactory;
+import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.Version;
/**
* LowerCaseTokenizer performs the function of LetterTokenizer
@@ -30,24 +35,41 @@ import org.apache.lucene.util.AttributeF
* Note: this does a decent job for most European languages, but does a terrible
* job for some Asian languages, where words are not separated by spaces.
* </p>
+ * <p>
+ * <a name="version"/>
+ * You must specify the required {@link Version} compatibility when creating
+ * {@link LowerCaseTokenizer}:
+ * <ul>
+ * <li>As of 3.1, {@link CharTokenizer} uses an int based API to normalize and
+ * detect token characters. See {@link CharTokenizer#isTokenChar(int)} and
+ * {@link CharTokenizer#normalize(int)} for details.</li>
+ * </ul>
+ * </p>
*/
public final class LowerCaseTokenizer extends LetterTokenizer {
/**
* Construct a new LowerCaseTokenizer.
+ *
+ * @param matchVersion
+ * Lucene version to match See {@link <a href="#version">above</a>}
+ *
*/
- public LowerCaseTokenizer() {
+ public LowerCaseTokenizer(Version matchVersion) {
+ super(matchVersion);
}
/**
* Construct a new LowerCaseTokenizer using a given
* {@link org.apache.lucene.util.AttributeFactory}.
*
+ * @param matchVersion
+ * Lucene version to match See {@link <a href="#version">above</a>}
* @param factory
* the attribute factory to use for this {@link Tokenizer}
*/
- public LowerCaseTokenizer(AttributeFactory factory) {
- super(factory);
+ public LowerCaseTokenizer(Version matchVersion, AttributeFactory factory) {
+ super(matchVersion, factory);
}
/** Converts char to lower case
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizerFactory.java?rev=1614852&r1=1614851&r2=1614852&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizerFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizerFactory.java Thu Jul 31 10:44:39 2014
@@ -39,6 +39,7 @@ public class LowerCaseTokenizerFactory e
/** Creates a new LowerCaseTokenizerFactory */
public LowerCaseTokenizerFactory(Map<String,String> args) {
super(args);
+ assureMatchVersion();
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
@@ -46,7 +47,7 @@ public class LowerCaseTokenizerFactory e
@Override
public LowerCaseTokenizer create(AttributeFactory factory) {
- return new LowerCaseTokenizer(factory);
+ return new LowerCaseTokenizer(luceneMatchVersion, factory);
}
@Override
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java?rev=1614852&r1=1614851&r2=1614852&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java Thu Jul 31 10:44:39 2014
@@ -17,22 +17,38 @@ package org.apache.lucene.analysis.core;
* limitations under the License.
*/
+import java.io.Reader;
+
import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.util.Version;
/** An {@link Analyzer} that filters {@link LetterTokenizer}
* with {@link LowerCaseFilter}
+ * <p>
+ * <a name="version">You must specify the required {@link Version} compatibility
+ * when creating {@link CharTokenizer}:
+ * <ul>
+ * <li>As of 3.1, {@link LowerCaseTokenizer} uses an int based API to normalize and
+ * detect token codepoints. See {@link CharTokenizer#isTokenChar(int)} and
+ * {@link CharTokenizer#normalize(int)} for details.</li>
+ * </ul>
+ * <p>
**/
public final class SimpleAnalyzer extends Analyzer {
+ private final Version matchVersion;
+
/**
* Creates a new {@link SimpleAnalyzer}
+ * @param matchVersion Lucene version to match See {@link <a href="#version">above</a>}
*/
- public SimpleAnalyzer() {
+ public SimpleAnalyzer(Version matchVersion) {
+ this.matchVersion = matchVersion;
}
@Override
protected TokenStreamComponents createComponents(final String fieldName) {
- return new TokenStreamComponents(new LowerCaseTokenizer());
+ return new TokenStreamComponents(new LowerCaseTokenizer(matchVersion));
}
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopAnalyzer.java?rev=1614852&r1=1614851&r2=1614852&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopAnalyzer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopAnalyzer.java Thu Jul 31 10:44:39 2014
@@ -27,10 +27,20 @@ import org.apache.lucene.analysis.Tokeni
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
+import org.apache.lucene.util.Version;
+
+/** Filters {@link LetterTokenizer} with {@link LowerCaseFilter} and {@link StopFilter}.
+ *
+ * <a name="version"/>
+ * <p>You must specify the required {@link Version}
+ * compatibility when creating StopAnalyzer:
+ * <ul>
+ * <li> As of 3.1, StopFilter correctly handles Unicode 4.0
+ * supplementary characters in stopwords
+ * <li> As of 2.9, position increments are preserved
+ * </ul>
+*/
-/**
- * Filters {@link LetterTokenizer} with {@link LowerCaseFilter} and {@link StopFilter}.
- */
public final class StopAnalyzer extends StopwordAnalyzerBase {
/** An unmodifiable set containing some common English words that are not usually useful
@@ -45,35 +55,40 @@ public final class StopAnalyzer extends
"that", "the", "their", "then", "there", "these",
"they", "this", "to", "was", "will", "with"
);
- final CharArraySet stopSet = new CharArraySet(stopWords, false);
+ final CharArraySet stopSet = new CharArraySet(Version.LUCENE_CURRENT,
+ stopWords, false);
ENGLISH_STOP_WORDS_SET = CharArraySet.unmodifiableSet(stopSet);
}
/** Builds an analyzer which removes words in
* {@link #ENGLISH_STOP_WORDS_SET}.
+ * @param matchVersion See <a href="#version">above</a>
*/
- public StopAnalyzer() {
- this(ENGLISH_STOP_WORDS_SET);
+ public StopAnalyzer(Version matchVersion) {
+ this(matchVersion, ENGLISH_STOP_WORDS_SET);
}
/** Builds an analyzer with the stop words from the given set.
+ * @param matchVersion See <a href="#version">above</a>
* @param stopWords Set of stop words */
- public StopAnalyzer(CharArraySet stopWords) {
- super(stopWords);
+ public StopAnalyzer(Version matchVersion, CharArraySet stopWords) {
+ super(matchVersion, stopWords);
}
/** Builds an analyzer with the stop words from the given file.
- * @see WordlistLoader#getWordSet(Reader)
+ * @see WordlistLoader#getWordSet(Reader, Version)
+ * @param matchVersion See <a href="#version">above</a>
* @param stopwordsFile File to load stop words from */
- public StopAnalyzer(File stopwordsFile) throws IOException {
- this(loadStopwordSet(stopwordsFile));
+ public StopAnalyzer(Version matchVersion, File stopwordsFile) throws IOException {
+ this(matchVersion, loadStopwordSet(stopwordsFile, matchVersion));
}
/** Builds an analyzer with the stop words from the given reader.
- * @see WordlistLoader#getWordSet(Reader)
+ * @see WordlistLoader#getWordSet(Reader, Version)
+ * @param matchVersion See <a href="#version">above</a>
* @param stopwords Reader to load stop words from */
- public StopAnalyzer(Reader stopwords) throws IOException {
- this(loadStopwordSet(stopwords));
+ public StopAnalyzer(Version matchVersion, Reader stopwords) throws IOException {
+ this(matchVersion, loadStopwordSet(stopwords, matchVersion));
}
/**
@@ -87,8 +102,9 @@ public final class StopAnalyzer extends
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new LowerCaseTokenizer();
- return new TokenStreamComponents(source, new StopFilter(source, stopwords));
+ final Tokenizer source = new LowerCaseTokenizer(matchVersion);
+ return new TokenStreamComponents(source, new StopFilter(matchVersion,
+ source, stopwords));
}
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilter.java?rev=1614852&r1=1614851&r2=1614852&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilter.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilter.java Thu Jul 31 10:44:39 2014
@@ -24,9 +24,19 @@ import org.apache.lucene.analysis.util.F
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.util.Version;
/**
* Removes stop words from a token stream.
+ *
+ * <a name="version"/>
+ * <p>You must specify the required {@link Version}
+ * compatibility when creating StopFilter:
+ * <ul>
+ * <li> As of 3.1, StopFilter correctly handles Unicode 4.0
+ * supplementary characters in stopwords and position
+ * increments are preserved
+ * </ul>
*/
public final class StopFilter extends FilteringTokenFilter {
@@ -37,14 +47,17 @@ public final class StopFilter extends Fi
* Constructs a filter which removes words from the input TokenStream that are
* named in the Set.
*
+ * @param matchVersion
+ * Lucene version to enable correct Unicode 4.0 behavior in the stop
+ * set if Version > 3.0. See <a href="#version">above</a> for details.
* @param in
* Input stream
* @param stopWords
* A {@link CharArraySet} representing the stopwords.
- * @see #makeStopSet(java.lang.String...)
+ * @see #makeStopSet(Version, java.lang.String...)
*/
- public StopFilter(TokenStream in, CharArraySet stopWords) {
- super(in);
+ public StopFilter(Version matchVersion, TokenStream in, CharArraySet stopWords) {
+ super(matchVersion, in);
this.stopWords = stopWords;
}
@@ -54,11 +67,12 @@ public final class StopFilter extends Fi
* This permits this stopWords construction to be cached once when
* an Analyzer is constructed.
*
+ * @param matchVersion Lucene version to enable correct Unicode 4.0 behavior in the returned set if Version > 3.0
* @param stopWords An array of stopwords
- * @see #makeStopSet(java.lang.String[], boolean) passing false to ignoreCase
+ * @see #makeStopSet(Version, java.lang.String[], boolean) passing false to ignoreCase
*/
- public static CharArraySet makeStopSet(String... stopWords) {
- return makeStopSet(stopWords, false);
+ public static CharArraySet makeStopSet(Version matchVersion, String... stopWords) {
+ return makeStopSet(matchVersion, stopWords, false);
}
/**
@@ -67,35 +81,38 @@ public final class StopFilter extends Fi
* This permits this stopWords construction to be cached once when
* an Analyzer is constructed.
*
+ * @param matchVersion Lucene version to enable correct Unicode 4.0 behavior in the returned set if Version > 3.0
* @param stopWords A List of Strings or char[] or any other toString()-able list representing the stopwords
* @return A Set ({@link CharArraySet}) containing the words
- * @see #makeStopSet(java.lang.String[], boolean) passing false to ignoreCase
+ * @see #makeStopSet(Version, java.lang.String[], boolean) passing false to ignoreCase
*/
- public static CharArraySet makeStopSet(List<?> stopWords) {
- return makeStopSet(stopWords, false);
+ public static CharArraySet makeStopSet(Version matchVersion, List<?> stopWords) {
+ return makeStopSet(matchVersion, stopWords, false);
}
/**
* Creates a stopword set from the given stopword array.
*
+ * @param matchVersion Lucene version to enable correct Unicode 4.0 behavior in the returned set if Version > 3.0
* @param stopWords An array of stopwords
* @param ignoreCase If true, all words are lower cased first.
* @return a Set containing the words
*/
- public static CharArraySet makeStopSet(String[] stopWords, boolean ignoreCase) {
- CharArraySet stopSet = new CharArraySet(stopWords.length, ignoreCase);
+ public static CharArraySet makeStopSet(Version matchVersion, String[] stopWords, boolean ignoreCase) {
+ CharArraySet stopSet = new CharArraySet(matchVersion, stopWords.length, ignoreCase);
stopSet.addAll(Arrays.asList(stopWords));
return stopSet;
}
/**
* Creates a stopword set from the given stopword list.
+ * @param matchVersion Lucene version to enable correct Unicode 4.0 behavior in the returned set if Version > 3.0
* @param stopWords A List of Strings or char[] or any other toString()-able list representing the stopwords
* @param ignoreCase if true, all words are lower cased first
* @return A Set ({@link CharArraySet}) containing the words
*/
- public static CharArraySet makeStopSet(List<?> stopWords, boolean ignoreCase){
- CharArraySet stopSet = new CharArraySet(stopWords.size(), ignoreCase);
+ public static CharArraySet makeStopSet(Version matchVersion, List<?> stopWords, boolean ignoreCase){
+ CharArraySet stopSet = new CharArraySet(matchVersion, stopWords.size(), ignoreCase);
stopSet.addAll(stopWords);
return stopSet;
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilterFactory.java?rev=1614852&r1=1614851&r2=1614852&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilterFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilterFactory.java Thu Jul 31 10:44:39 2014
@@ -81,6 +81,7 @@ public class StopFilterFactory extends T
/** Creates a new StopFilterFactory */
public StopFilterFactory(Map<String,String> args) {
super(args);
+ assureMatchVersion();
stopWordFiles = get(args, "words");
format = get(args, "format", (null == stopWordFiles ? null : FORMAT_WORDSET));
ignoreCase = getBoolean(args, "ignoreCase", false);
@@ -103,7 +104,7 @@ public class StopFilterFactory extends T
if (null != format) {
throw new IllegalArgumentException("'format' can not be specified w/o an explicit 'words' file: " + format);
}
- stopWords = new CharArraySet(StopAnalyzer.ENGLISH_STOP_WORDS_SET, ignoreCase);
+ stopWords = new CharArraySet(luceneMatchVersion, StopAnalyzer.ENGLISH_STOP_WORDS_SET, ignoreCase);
}
}
@@ -117,7 +118,7 @@ public class StopFilterFactory extends T
@Override
public TokenStream create(TokenStream input) {
- StopFilter stopFilter = new StopFilter(input,stopWords);
+ StopFilter stopFilter = new StopFilter(luceneMatchVersion,input,stopWords);
return stopFilter;
}
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilter.java?rev=1614852&r1=1614851&r2=1614852&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilter.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilter.java Thu Jul 31 10:44:39 2014
@@ -22,6 +22,7 @@ import java.util.Set;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.analysis.util.FilteringTokenFilter;
+import org.apache.lucene.util.Version;
/**
* Removes tokens whose types appear in a set of blocked types from a token stream.
@@ -34,13 +35,14 @@ public final class TypeTokenFilter exten
/**
* Create a new {@link TypeTokenFilter}.
+ * @param version the Lucene match version
* @param input the {@link TokenStream} to consume
* @param stopTypes the types to filter
* @param useWhiteList if true, then tokens whose type is in stopTypes will
* be kept, otherwise they will be filtered out
*/
- public TypeTokenFilter(TokenStream input, Set<String> stopTypes, boolean useWhiteList) {
- super(input);
+ public TypeTokenFilter(Version version, TokenStream input, Set<String> stopTypes, boolean useWhiteList) {
+ super(version, input);
this.stopTypes = stopTypes;
this.useWhiteList = useWhiteList;
}
@@ -48,9 +50,10 @@ public final class TypeTokenFilter exten
/**
* Create a new {@link TypeTokenFilter} that filters tokens out
* (useWhiteList=false).
+ * @see #TypeTokenFilter(Version, TokenStream, Set, boolean)
*/
- public TypeTokenFilter(TokenStream input, Set<String> stopTypes) {
- this(input, stopTypes, false);
+ public TypeTokenFilter(Version version, TokenStream input, Set<String> stopTypes) {
+ this(version, input, stopTypes, false);
}
/**
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilterFactory.java?rev=1614852&r1=1614851&r2=1614852&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilterFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilterFactory.java Thu Jul 31 10:44:39 2014
@@ -72,7 +72,7 @@ public class TypeTokenFilterFactory exte
@Override
public TokenStream create(TokenStream input) {
- final TokenStream filter = new TypeTokenFilter(input, stopTypes, useWhitelist);
+ final TokenStream filter = new TypeTokenFilter(luceneMatchVersion, input, stopTypes, useWhitelist);
return filter;
}
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/UpperCaseFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/UpperCaseFilter.java?rev=1614852&r1=1614851&r2=1614852&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/UpperCaseFilter.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/UpperCaseFilter.java Thu Jul 31 10:44:39 2014
@@ -23,9 +23,13 @@ import org.apache.lucene.analysis.TokenF
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.util.CharacterUtils;
+import org.apache.lucene.util.Version;
/**
* Normalizes token text to UPPER CASE.
+ * <a name="version"/>
+ * <p>You must specify the required {@link Version}
+ * compatibility when creating UpperCaseFilter
*
* <p><b>NOTE:</b> In Unicode, this transformation may lose information when the
* upper case character represents more than one lower case character. Use this filter
@@ -33,16 +37,18 @@ import org.apache.lucene.analysis.util.C
* general search matching
*/
public final class UpperCaseFilter extends TokenFilter {
- private final CharacterUtils charUtils = CharacterUtils.getInstance();
+ private final CharacterUtils charUtils;
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
/**
* Create a new UpperCaseFilter, that normalizes token text to upper case.
*
+ * @param matchVersion See <a href="#version">above</a>
* @param in TokenStream to filter
*/
- public UpperCaseFilter(TokenStream in) {
+ public UpperCaseFilter(Version matchVersion, TokenStream in) {
super(in);
+ charUtils = CharacterUtils.getInstance(matchVersion);
}
@Override
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/UpperCaseFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/UpperCaseFilterFactory.java?rev=1614852&r1=1614851&r2=1614852&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/UpperCaseFilterFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/UpperCaseFilterFactory.java Thu Jul 31 10:44:39 2014
@@ -45,6 +45,7 @@ public class UpperCaseFilterFactory exte
/** Creates a new UpperCaseFilterFactory */
public UpperCaseFilterFactory(Map<String,String> args) {
super(args);
+ assureMatchVersion();
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
@@ -52,7 +53,7 @@ public class UpperCaseFilterFactory exte
@Override
public UpperCaseFilter create(TokenStream input) {
- return new UpperCaseFilter(input);
+ return new UpperCaseFilter(luceneMatchVersion,input);
}
@Override
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceAnalyzer.java?rev=1614852&r1=1614851&r2=1614852&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceAnalyzer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceAnalyzer.java Thu Jul 31 10:44:39 2014
@@ -17,21 +17,38 @@ package org.apache.lucene.analysis.core;
* limitations under the License.
*/
+import java.io.Reader;
+
import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.util.CharTokenizer;
+import org.apache.lucene.util.Version;
/**
* An Analyzer that uses {@link WhitespaceTokenizer}.
+ * <p>
+ * <a name="version">You must specify the required {@link Version} compatibility
+ * when creating {@link CharTokenizer}:
+ * <ul>
+ * <li>As of 3.1, {@link WhitespaceTokenizer} uses an int based API to normalize and
+ * detect token codepoints. See {@link CharTokenizer#isTokenChar(int)} and
+ * {@link CharTokenizer#normalize(int)} for details.</li>
+ * </ul>
+ * <p>
**/
public final class WhitespaceAnalyzer extends Analyzer {
+ private final Version matchVersion;
+
/**
* Creates a new {@link WhitespaceAnalyzer}
+ * @param matchVersion Lucene version to match See {@link <a href="#version">above</a>}
*/
- public WhitespaceAnalyzer() {
+ public WhitespaceAnalyzer(Version matchVersion) {
+ this.matchVersion = matchVersion;
}
@Override
protected TokenStreamComponents createComponents(final String fieldName) {
- return new TokenStreamComponents(new WhitespaceTokenizer());
+ return new TokenStreamComponents(new WhitespaceTokenizer(matchVersion));
}
}