You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rj...@apache.org on 2014/08/09 00:42:54 UTC
svn commit: r1616901 [1/11] - in /lucene/dev/trunk: lucene/
lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/
lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/
lucene/analysis/common/src/java/org/apache/lucene/analysis/br/ luc...
Author: rjernst
Date: Fri Aug 8 22:42:48 2014
New Revision: 1616901
URL: http://svn.apache.org/r1616901
Log:
LUCENE-5859: Remove Version from Analyzer constructors
Added:
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/Lucene43CompoundWordTokenFilterBase.java (with props)
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/Lucene43DictionaryCompoundWordTokenFilter.java (with props)
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/Lucene43HyphenationCompoundWordTokenFilter.java (with props)
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43EdgeNGramTokenFilter.java (with props)
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43EdgeNGramTokenizer.java (with props)
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43NGramTokenFilter.java (with props)
Modified:
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ckb/SoraniAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilterFactory.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilterFactory.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilterFactory.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizerFactory.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizerFactory.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilterFactory.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizerFactory.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilter.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilterFactory.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilter.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilterFactory.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/UpperCaseFilter.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/UpperCaseFilterFactory.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizerFactory.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilterFactory.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekStemmer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilter.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilterFactory.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/KStemmer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/eu/BasqueAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hy/ArmenianAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilterFactory.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilter.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilterFactory.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilter.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilterFactory.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilterFactory.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilter.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilterFactory.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilterFactory.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramFilterFactory.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizerFactory.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43NGramTokenizer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramFilterFactory.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizerFactory.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/RSLPStemmerBase.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilter.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilterFactory.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapper.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerFactory.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilter.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilterFactory.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerFactory.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerFactory.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilterFactory.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArraySet.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharacterUtils.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/FilteringTokenFilter.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/StopwordAnalyzerBase.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/collation/CollationKeyAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicStemFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianStemmer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/ca/TestCatalanAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKBigramFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/ckb/TestSoraniAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/ckb/TestSoraniStemFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestBugInSomething.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestDuelingAnalyzers.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestTypeTokenFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechStemmer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/da/TestDanishAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/el/TestGreekStemmer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/eu/TestBasqueAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/ga/TestIrishAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianMinimalStemFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellStemFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hy/TestArmenianAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/lv/TestLatvianAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCapitalizationFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCodepointCountFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepWordFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLucene47WordDelimiterFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPerFieldAnalyzerWrapper.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenizerTest.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenizerTest.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianLightStemFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianMinimalStemFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSolrSynonymParser.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArrayMap.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArraySet.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharacterUtils.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestElision.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestFilesystemResourceLoader.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestWordlistLoader.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyAnalyzer.java
lucene/dev/trunk/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2CharFilter.java
lucene/dev/trunk/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestWithCJKBigramFilter.java
lucene/dev/trunk/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseAnalyzer.java
lucene/dev/trunk/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilter.java
lucene/dev/trunk/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilterFactory.java
lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseAnalyzer.java
lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilter.java
lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilter.java
lucene/dev/trunk/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikAnalyzer.java
lucene/dev/trunk/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java
lucene/dev/trunk/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilterFactory.java
lucene/dev/trunk/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java
lucene/dev/trunk/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilter.java
lucene/dev/trunk/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java
lucene/dev/trunk/lucene/analysis/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java
lucene/dev/trunk/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java
lucene/dev/trunk/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java
lucene/dev/trunk/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiQueryMaker.java
lucene/dev/trunk/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/FileBasedQueryMaker.java
lucene/dev/trunk/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LongToEnglishQueryMaker.java
lucene/dev/trunk/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersQueryMaker.java
lucene/dev/trunk/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleQueryMaker.java
lucene/dev/trunk/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NewCollationAnalyzerTask.java
lucene/dev/trunk/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SimpleQQParser.java
lucene/dev/trunk/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java
lucene/dev/trunk/lucene/classification/src/test/org/apache/lucene/classification/SimpleNaiveBayesClassifierTest.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/analysis/Analyzer.java
lucene/dev/trunk/lucene/demo/src/java/org/apache/lucene/demo/IndexFiles.java
lucene/dev/trunk/lucene/demo/src/java/org/apache/lucene/demo/SearchFiles.java
lucene/dev/trunk/lucene/demo/src/java/org/apache/lucene/demo/facet/AssociationsFacetsExample.java
lucene/dev/trunk/lucene/demo/src/java/org/apache/lucene/demo/facet/DistanceFacetsExample.java
lucene/dev/trunk/lucene/demo/src/java/org/apache/lucene/demo/facet/ExpressionAggregationFacetsExample.java
lucene/dev/trunk/lucene/demo/src/java/org/apache/lucene/demo/facet/MultiCategoryListsFacetsExample.java
lucene/dev/trunk/lucene/demo/src/java/org/apache/lucene/demo/facet/RangeFacetsExample.java
lucene/dev/trunk/lucene/demo/src/java/org/apache/lucene/demo/facet/SimpleFacetsExample.java
lucene/dev/trunk/lucene/demo/src/java/org/apache/lucene/demo/facet/SimpleSortedSetFacetsExample.java
lucene/dev/trunk/lucene/demo/src/java/org/apache/lucene/demo/xmlparser/FormBasedXmlQueryDemo.java
lucene/dev/trunk/lucene/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java
lucene/dev/trunk/lucene/queryparser/src/java/org/apache/lucene/queryparser/analyzing/AnalyzingQueryParser.java
lucene/dev/trunk/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/MultiFieldQueryParser.java
lucene/dev/trunk/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.java
lucene/dev/trunk/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.jj
lucene/dev/trunk/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java
lucene/dev/trunk/lucene/queryparser/src/java/org/apache/lucene/queryparser/complexPhrase/ComplexPhraseQueryParser.java
lucene/dev/trunk/lucene/queryparser/src/java/org/apache/lucene/queryparser/ext/ExtendableQueryParser.java
lucene/dev/trunk/lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/UserInputQueryBuilder.java
lucene/dev/trunk/lucene/queryparser/src/test/org/apache/lucene/queryparser/analyzing/TestAnalyzingQueryParser.java
lucene/dev/trunk/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiAnalyzer.java
lucene/dev/trunk/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiFieldQueryParser.java
lucene/dev/trunk/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiPhraseQueryParsing.java
lucene/dev/trunk/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestQueryParser.java
lucene/dev/trunk/lucene/queryparser/src/test/org/apache/lucene/queryparser/complexPhrase/TestComplexPhraseQuery.java
lucene/dev/trunk/lucene/queryparser/src/test/org/apache/lucene/queryparser/ext/TestExtendableQueryParser.java
lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java
lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggesterTest.java
lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestFreeTextSuggester.java
lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestSuggestStopFilter.java
lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java
lucene/dev/trunk/solr/core/src/java/org/apache/solr/rest/schema/analysis/ManagedStopFilterFactory.java
lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/CollationField.java
lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/FieldTypePluginLoader.java
lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/ComplexPhraseQParserPlugin.java
lucene/dev/trunk/solr/core/src/java/org/apache/solr/spelling/SolrSpellChecker.java
lucene/dev/trunk/solr/core/src/test-files/solr/collection1/conf/schema-luceneMatchVersion.xml
lucene/dev/trunk/solr/core/src/test/org/apache/solr/analysis/TestLuceneMatchVersion.java
lucene/dev/trunk/solr/core/src/test/org/apache/solr/core/TestArbitraryIndexDir.java
lucene/dev/trunk/solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java
lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/TestSort.java
lucene/dev/trunk/solr/core/src/test/org/apache/solr/spelling/IndexBasedSpellCheckerTest.java
lucene/dev/trunk/solr/core/src/test/org/apache/solr/spelling/SimpleQueryConverter.java
lucene/dev/trunk/solr/core/src/test/org/apache/solr/spelling/SpellingQueryConverterTest.java
lucene/dev/trunk/solr/core/src/test/org/apache/solr/spelling/TestSuggestSpellingConverter.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1616901&r1=1616900&r2=1616901&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Fri Aug 8 22:42:48 2014
@@ -80,6 +80,11 @@ API Changes
takes the same selectors. Add helper methods to DocValues.java that are better
suited for search code (never return null, etc). (Mike McCandless, Robert Muir)
+* LUCENE-5859: Remove Version from Analyzer constructors. Use Analyzer.setVersion()
+ to set the version an analyzer should use to replicate behavior from a specific
+ release.
+ (Ryan Ernst, Robert Muir)
+
Documentation
* LUCENE-5392: Add/improve analysis package documentation to reflect
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java?rev=1616901&r1=1616900&r2=1616901&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java Fri Aug 8 22:42:48 2014
@@ -29,7 +29,6 @@ import org.apache.lucene.analysis.util.C
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.util.Version;
/**
* {@link Analyzer} for Arabic.
@@ -89,20 +88,18 @@ public final class ArabicAnalyzer extend
/**
* Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
*/
- public ArabicAnalyzer(Version matchVersion) {
- this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+ public ArabicAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_STOP_SET);
}
/**
* Builds an analyzer with the given stop words
*
- * @param matchVersion
- * lucene compatibility version
* @param stopwords
* a stopword set
*/
- public ArabicAnalyzer(Version matchVersion, CharArraySet stopwords){
- this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+ public ArabicAnalyzer(CharArraySet stopwords){
+ this(stopwords, CharArraySet.EMPTY_SET);
}
/**
@@ -110,17 +107,14 @@ public final class ArabicAnalyzer extend
* provided this analyzer will add a {@link SetKeywordMarkerFilter} before
* {@link ArabicStemFilter}.
*
- * @param matchVersion
- * lucene compatibility version
* @param stopwords
* a stopword set
* @param stemExclusionSet
* a set of terms not to be stemmed
*/
- public ArabicAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet){
- super(matchVersion, stopwords);
- this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
- matchVersion, stemExclusionSet));
+ public ArabicAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet){
+ super(stopwords);
+ this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}
/**
@@ -136,10 +130,10 @@ public final class ArabicAnalyzer extend
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer(matchVersion);
- TokenStream result = new LowerCaseFilter(matchVersion, source);
+ final Tokenizer source = new StandardTokenizer();
+ TokenStream result = new LowerCaseFilter(source);
// the order here is important: the stopword list is not normalized!
- result = new StopFilter( matchVersion, result, stopwords);
+ result = new StopFilter(result, stopwords);
// TODO maybe we should make ArabicNormalization filter also KeywordAttribute aware?!
result = new ArabicNormalizationFilter(result);
if(!stemExclusionSet.isEmpty()) {
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianAnalyzer.java?rev=1616901&r1=1616900&r2=1616901&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianAnalyzer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianAnalyzer.java Fri Aug 8 22:42:48 2014
@@ -19,7 +19,6 @@ package org.apache.lucene.analysis.bg;
import java.io.IOException;
import java.io.Reader;
-import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
@@ -31,7 +30,6 @@ import org.apache.lucene.analysis.standa
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
-import org.apache.lucene.util.Version;
/**
* {@link Analyzer} for Bulgarian.
@@ -42,6 +40,7 @@ import org.apache.lucene.util.Version;
* <p>
*/
public final class BulgarianAnalyzer extends StopwordAnalyzerBase {
+
/**
* File containing default Bulgarian stopwords.
*
@@ -84,15 +83,15 @@ public final class BulgarianAnalyzer ext
* Builds an analyzer with the default stop words:
* {@link #DEFAULT_STOPWORD_FILE}.
*/
- public BulgarianAnalyzer(Version matchVersion) {
- this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+ public BulgarianAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_STOP_SET);
}
/**
* Builds an analyzer with the given stop words.
*/
- public BulgarianAnalyzer(Version matchVersion, CharArraySet stopwords) {
- this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+ public BulgarianAnalyzer(CharArraySet stopwords) {
+ this(stopwords, CharArraySet.EMPTY_SET);
}
/**
@@ -100,10 +99,10 @@ public final class BulgarianAnalyzer ext
* If a stem exclusion set is provided this analyzer will add a {@link SetKeywordMarkerFilter}
* before {@link BulgarianStemFilter}.
*/
- public BulgarianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
- super(matchVersion, stopwords);
- this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
- matchVersion, stemExclusionSet)); }
+ public BulgarianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
+ super(stopwords);
+ this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
+ }
/**
* Creates a
@@ -119,10 +118,10 @@ public final class BulgarianAnalyzer ext
*/
@Override
public TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer(matchVersion);
- TokenStream result = new StandardFilter(matchVersion, source);
- result = new LowerCaseFilter(matchVersion, result);
- result = new StopFilter(matchVersion, result, stopwords);
+ final Tokenizer source = new StandardTokenizer();
+ TokenStream result = new StandardFilter(source);
+ result = new LowerCaseFilter(result);
+ result = new StopFilter(result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
result = new BulgarianStemFilter(result);
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java?rev=1616901&r1=1616900&r2=1616901&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java Fri Aug 8 22:42:48 2014
@@ -34,7 +34,6 @@ import org.apache.lucene.analysis.util.C
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.util.Version;
/**
* {@link Analyzer} for Brazilian Portuguese language.
@@ -44,7 +43,7 @@ import org.apache.lucene.util.Version;
* not be stemmed, but indexed).
* </p>
*
- * <p><b>NOTE</b>: This class uses the same {@link Version}
+ * <p><b>NOTE</b>: This class uses the same {@link org.apache.lucene.util.Version}
* dependent settings as {@link StandardAnalyzer}.</p>
*/
public final class BrazilianAnalyzer extends StopwordAnalyzerBase {
@@ -65,7 +64,7 @@ public final class BrazilianAnalyzer ext
static {
try {
DEFAULT_STOP_SET = WordlistLoader.getWordSet(IOUtils.getDecodingReader(BrazilianAnalyzer.class,
- DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), "#", Version.LUCENE_CURRENT);
+ DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), "#");
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
@@ -83,35 +82,29 @@ public final class BrazilianAnalyzer ext
/**
* Builds an analyzer with the default stop words ({@link #getDefaultStopSet()}).
*/
- public BrazilianAnalyzer(Version matchVersion) {
- this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+ public BrazilianAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_STOP_SET);
}
/**
* Builds an analyzer with the given stop words
*
- * @param matchVersion
- * lucene compatibility version
* @param stopwords
* a stopword set
*/
- public BrazilianAnalyzer(Version matchVersion, CharArraySet stopwords) {
- super(matchVersion, stopwords);
+ public BrazilianAnalyzer(CharArraySet stopwords) {
+ super(stopwords);
}
/**
* Builds an analyzer with the given stop words and stemming exclusion words
*
- * @param matchVersion
- * lucene compatibility version
* @param stopwords
* a stopword set
*/
- public BrazilianAnalyzer(Version matchVersion, CharArraySet stopwords,
- CharArraySet stemExclusionSet) {
- this(matchVersion, stopwords);
- excltable = CharArraySet.unmodifiableSet(CharArraySet
- .copy(matchVersion, stemExclusionSet));
+ public BrazilianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
+ this(stopwords);
+ excltable = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}
/**
@@ -126,10 +119,10 @@ public final class BrazilianAnalyzer ext
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- Tokenizer source = new StandardTokenizer(matchVersion);
- TokenStream result = new LowerCaseFilter(matchVersion, source);
- result = new StandardFilter(matchVersion, result);
- result = new StopFilter(matchVersion, result, stopwords);
+ Tokenizer source = new StandardTokenizer();
+ TokenStream result = new LowerCaseFilter(source);
+ result = new StandardFilter(result);
+ result = new StopFilter(result, stopwords);
if(excltable != null && !excltable.isEmpty())
result = new SetKeywordMarkerFilter(result, excltable);
return new TokenStreamComponents(source, new BrazilianStemFilter(result));
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java?rev=1616901&r1=1616900&r2=1616901&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java Fri Aug 8 22:42:48 2014
@@ -33,7 +33,6 @@ import org.apache.lucene.analysis.standa
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.ElisionFilter;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
-import org.apache.lucene.util.Version;
import org.tartarus.snowball.ext.CatalanStemmer;
/**
@@ -46,7 +45,7 @@ public final class CatalanAnalyzer exten
public final static String DEFAULT_STOPWORD_FILE = "stopwords.txt";
private static final CharArraySet DEFAULT_ARTICLES = CharArraySet.unmodifiableSet(
- new CharArraySet(Version.LUCENE_CURRENT,
+ new CharArraySet(
Arrays.asList(
"d", "l", "m", "n", "s", "t"
), true));
@@ -81,18 +80,17 @@ public final class CatalanAnalyzer exten
/**
* Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
*/
- public CatalanAnalyzer(Version matchVersion) {
- this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+ public CatalanAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_STOP_SET);
}
/**
* Builds an analyzer with the given stop words.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
*/
- public CatalanAnalyzer(Version matchVersion, CharArraySet stopwords) {
- this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+ public CatalanAnalyzer(CharArraySet stopwords) {
+ this(stopwords, CharArraySet.EMPTY_SET);
}
/**
@@ -100,14 +98,12 @@ public final class CatalanAnalyzer exten
* provided this analyzer will add a {@link SetKeywordMarkerFilter} before
* stemming.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
* @param stemExclusionSet a set of terms not to be stemmed
*/
- public CatalanAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
- super(matchVersion, stopwords);
- this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
- matchVersion, stemExclusionSet));
+ public CatalanAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
+ super(stopwords);
+ this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}
/**
@@ -124,11 +120,11 @@ public final class CatalanAnalyzer exten
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer(matchVersion);
- TokenStream result = new StandardFilter(matchVersion, source);
+ final Tokenizer source = new StandardTokenizer();
+ TokenStream result = new StandardFilter(source);
result = new ElisionFilter(result, DEFAULT_ARTICLES);
- result = new LowerCaseFilter(matchVersion, result);
- result = new StopFilter(matchVersion, result, stopwords);
+ result = new LowerCaseFilter(result);
+ result = new StopFilter(result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
result = new SnowballFilter(result, new CatalanStemmer());
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java?rev=1616901&r1=1616900&r2=1616901&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java Fri Aug 8 22:42:48 2014
@@ -26,7 +26,6 @@ import java.util.HashMap;
import java.util.Map;
import java.util.Set;
-import org.apache.lucene.util.Version;
import org.apache.lucene.analysis.util.CharArrayMap;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.OpenStringBuilder;
@@ -29841,7 +29840,7 @@ public final class HTMLStripCharFilter e
upperCaseVariantsAccepted.put("amp", "AMP");
}
private static final CharArrayMap<Character> entityValues
- = new CharArrayMap<>(Version.LUCENE_CURRENT, 253, false);
+ = new CharArrayMap<>(253, false);
static {
String[] entities = {
"AElig", "\u00C6", "Aacute", "\u00C1", "Acirc", "\u00C2",
@@ -29980,7 +29979,7 @@ public final class HTMLStripCharFilter e
escapeSTYLE = true;
} else {
if (null == this.escapedTags) {
- this.escapedTags = new CharArraySet(Version.LUCENE_CURRENT, 16, true);
+ this.escapedTags = new CharArraySet(16, true);
}
this.escapedTags.add(tag);
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex?rev=1616901&r1=1616900&r2=1616901&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex Fri Aug 8 22:42:48 2014
@@ -24,7 +24,6 @@ import java.util.HashMap;
import java.util.Map;
import java.util.Set;
-import org.apache.lucene.util.Version;
import org.apache.lucene.analysis.util.CharArrayMap;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.OpenStringBuilder;
@@ -195,7 +194,7 @@ InlineElment = ( [aAbBiIqQsSuU]
escapeSTYLE = true;
} else {
if (null == this.escapedTags) {
- this.escapedTags = new CharArraySet(Version.LUCENE_CURRENT, 16, true);
+ this.escapedTags = new CharArraySet(16, true);
}
this.escapedTags.add(tag);
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java?rev=1616901&r1=1616900&r2=1616901&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java Fri Aug 8 22:42:48 2014
@@ -18,7 +18,6 @@ package org.apache.lucene.analysis.cjk;
*/
import java.io.IOException;
-import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
@@ -28,7 +27,6 @@ import org.apache.lucene.analysis.core.S
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
-import org.apache.lucene.util.Version;
/**
* An {@link Analyzer} that tokenizes text with {@link StandardTokenizer},
@@ -37,6 +35,7 @@ import org.apache.lucene.util.Version;
* and filters stopwords with {@link StopFilter}
*/
public final class CJKAnalyzer extends StopwordAnalyzerBase {
+
/**
* File containing default CJK stopwords.
* <p/>
@@ -70,29 +69,27 @@ public final class CJKAnalyzer extends S
/**
* Builds an analyzer which removes words in {@link #getDefaultStopSet()}.
*/
- public CJKAnalyzer(Version matchVersion) {
- this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+ public CJKAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_STOP_SET);
}
/**
* Builds an analyzer with the given stop words
*
- * @param matchVersion
- * lucene compatibility version
* @param stopwords
* a stopword set
*/
- public CJKAnalyzer(Version matchVersion, CharArraySet stopwords){
- super(matchVersion, stopwords);
+ public CJKAnalyzer(CharArraySet stopwords){
+ super(stopwords);
}
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer(matchVersion);
+ final Tokenizer source = new StandardTokenizer();
// run the widthfilter first before bigramming, it sometimes combines characters.
TokenStream result = new CJKWidthFilter(source);
- result = new LowerCaseFilter(matchVersion, result);
+ result = new LowerCaseFilter(result);
result = new CJKBigramFilter(result);
- return new TokenStreamComponents(source, new StopFilter(matchVersion, result, stopwords));
+ return new TokenStreamComponents(source, new StopFilter(result, stopwords));
}
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ckb/SoraniAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ckb/SoraniAnalyzer.java?rev=1616901&r1=1616900&r2=1616901&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ckb/SoraniAnalyzer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ckb/SoraniAnalyzer.java Fri Aug 8 22:42:48 2014
@@ -33,7 +33,6 @@ import org.apache.lucene.analysis.util.C
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.util.Version;
/**
* {@link Analyzer} for Sorani Kurdish.
@@ -62,7 +61,7 @@ public final class SoraniAnalyzer extend
static {
try {
DEFAULT_STOP_SET = WordlistLoader.getWordSet(IOUtils.getDecodingReader(SoraniAnalyzer.class,
- DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
+ DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
@@ -74,18 +73,17 @@ public final class SoraniAnalyzer extend
/**
* Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
*/
- public SoraniAnalyzer(Version matchVersion) {
- this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+ public SoraniAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_STOP_SET);
}
/**
* Builds an analyzer with the given stop words.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
*/
- public SoraniAnalyzer(Version matchVersion, CharArraySet stopwords) {
- this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+ public SoraniAnalyzer(CharArraySet stopwords) {
+ this(stopwords, CharArraySet.EMPTY_SET);
}
/**
@@ -93,14 +91,12 @@ public final class SoraniAnalyzer extend
* provided this analyzer will add a {@link SetKeywordMarkerFilter} before
* stemming.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
* @param stemExclusionSet a set of terms not to be stemmed
*/
- public SoraniAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
- super(matchVersion, stopwords);
- this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
- matchVersion, stemExclusionSet));
+ public SoraniAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
+ super(stopwords);
+ this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}
/**
@@ -118,11 +114,11 @@ public final class SoraniAnalyzer extend
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer(matchVersion);
- TokenStream result = new StandardFilter(matchVersion, source);
+ final Tokenizer source = new StandardTokenizer();
+ TokenStream result = new StandardFilter(source);
result = new SoraniNormalizationFilter(result);
- result = new LowerCaseFilter(matchVersion, result);
- result = new StopFilter(matchVersion, result, stopwords);
+ result = new LowerCaseFilter(result);
+ result = new StopFilter(result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
result = new SoraniStemFilter(result);
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java?rev=1616901&r1=1616900&r2=1616901&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java Fri Aug 8 22:42:48 2014
@@ -27,7 +27,6 @@ import org.apache.lucene.analysis.tokena
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.util.Version;
/*
* TODO: Consider implementing https://issues.apache.org/jira/browse/LUCENE-1688 changes to stop list and associated constructors
@@ -78,7 +77,7 @@ public final class CommonGramsFilter ext
* @param input TokenStream input in filter chain
* @param commonWords The set of common words.
*/
- public CommonGramsFilter(Version matchVersion, TokenStream input, CharArraySet commonWords) {
+ public CommonGramsFilter(TokenStream input, CharArraySet commonWords) {
super(input);
this.commonWords = commonWords;
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilterFactory.java?rev=1616901&r1=1616900&r2=1616901&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilterFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilterFactory.java Fri Aug 8 22:42:48 2014
@@ -76,7 +76,7 @@ public class CommonGramsFilterFactory ex
@Override
public TokenFilter create(TokenStream input) {
- CommonGramsFilter commonGrams = new CommonGramsFilter(luceneMatchVersion, input, commonWords);
+ CommonGramsFilter commonGrams = new CommonGramsFilter(input, commonWords);
return commonGrams;
}
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java?rev=1616901&r1=1616900&r2=1616901&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java Fri Aug 8 22:42:48 2014
@@ -17,31 +17,18 @@ package org.apache.lucene.analysis.compo
* limitations under the License.
*/
-import java.io.IOException;
-import java.util.LinkedList;
-
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.util.AttributeSource;
-import org.apache.lucene.util.Version;
+
+import java.io.IOException;
+import java.util.LinkedList;
/**
* Base class for decomposition token filters.
- * <p>
- *
- * <a name="version"></a>
- * You must specify the required {@link Version} compatibility when creating
- * CompoundWordTokenFilterBase:
- * <ul>
- * <li>As of 3.1, CompoundWordTokenFilterBase correctly handles Unicode 4.0
- * supplementary characters in strings and char arrays provided as compound word
- * dictionaries.
- * <li>As of 4.4, {@link CompoundWordTokenFilterBase} doesn't update offsets.
- * </ul>
*/
public abstract class CompoundWordTokenFilterBase extends TokenFilter {
/**
@@ -59,31 +46,29 @@ public abstract class CompoundWordTokenF
*/
public static final int DEFAULT_MAX_SUBWORD_SIZE = 15;
- protected final Version matchVersion;
protected final CharArraySet dictionary;
protected final LinkedList<CompoundToken> tokens;
protected final int minWordSize;
protected final int minSubwordSize;
protected final int maxSubwordSize;
protected final boolean onlyLongestMatch;
-
+
protected final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
protected final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
-
- private AttributeSource.State current;
- protected CompoundWordTokenFilterBase(Version matchVersion, TokenStream input, CharArraySet dictionary, boolean onlyLongestMatch) {
- this(matchVersion, input,dictionary,DEFAULT_MIN_WORD_SIZE,DEFAULT_MIN_SUBWORD_SIZE,DEFAULT_MAX_SUBWORD_SIZE, onlyLongestMatch);
+ private State current;
+
+ protected CompoundWordTokenFilterBase(TokenStream input, CharArraySet dictionary, boolean onlyLongestMatch) {
+ this(input,dictionary,DEFAULT_MIN_WORD_SIZE,DEFAULT_MIN_SUBWORD_SIZE,DEFAULT_MAX_SUBWORD_SIZE, onlyLongestMatch);
}
- protected CompoundWordTokenFilterBase(Version matchVersion, TokenStream input, CharArraySet dictionary) {
- this(matchVersion, input,dictionary,DEFAULT_MIN_WORD_SIZE,DEFAULT_MIN_SUBWORD_SIZE,DEFAULT_MAX_SUBWORD_SIZE, false);
+ protected CompoundWordTokenFilterBase(TokenStream input, CharArraySet dictionary) {
+ this(input,dictionary,DEFAULT_MIN_WORD_SIZE,DEFAULT_MIN_SUBWORD_SIZE,DEFAULT_MAX_SUBWORD_SIZE, false);
}
- protected CompoundWordTokenFilterBase(Version matchVersion, TokenStream input, CharArraySet dictionary, int minWordSize, int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
+ protected CompoundWordTokenFilterBase(TokenStream input, CharArraySet dictionary, int minWordSize, int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
super(input);
- this.matchVersion = matchVersion;
this.tokens=new LinkedList<>();
if (minWordSize < 0) {
throw new IllegalArgumentException("minWordSize cannot be negative");
@@ -100,7 +85,7 @@ public abstract class CompoundWordTokenF
this.onlyLongestMatch=onlyLongestMatch;
this.dictionary = dictionary;
}
-
+
@Override
public final boolean incrementToken() throws IOException {
if (!tokens.isEmpty()) {
@@ -141,7 +126,7 @@ public abstract class CompoundWordTokenF
tokens.clear();
current = null;
}
-
+
/**
* Helper class to hold decompounded token information
*/
@@ -154,20 +139,8 @@ public abstract class CompoundWordTokenF
this.txt = CompoundWordTokenFilterBase.this.termAtt.subSequence(offset, offset + length);
// offsets of the original word
- int startOff = CompoundWordTokenFilterBase.this.offsetAtt.startOffset();
- int endOff = CompoundWordTokenFilterBase.this.offsetAtt.endOffset();
-
- if (matchVersion.onOrAfter(Version.LUCENE_4_4) ||
- endOff - startOff != CompoundWordTokenFilterBase.this.termAtt.length()) {
- // if length by start + end offsets doesn't match the term text then assume
- // this is a synonym and don't adjust the offsets.
- this.startOffset = startOff;
- this.endOffset = endOff;
- } else {
- final int newStart = startOff + offset;
- this.startOffset = newStart;
- this.endOffset = newStart + length;
- }
+ this.startOffset = CompoundWordTokenFilterBase.this.offsetAtt.startOffset();
+ this.endOffset = CompoundWordTokenFilterBase.this.offsetAtt.endOffset();
}
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java?rev=1616901&r1=1616900&r2=1616901&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java Fri Aug 8 22:42:48 2014
@@ -18,60 +18,39 @@ package org.apache.lucene.analysis.compo
*/
-import java.util.Set;
-
-import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.util.Version;
/**
- * A {@link TokenFilter} that decomposes compound words found in many Germanic languages.
+ * A {@link org.apache.lucene.analysis.TokenFilter} that decomposes compound words found in many Germanic languages.
* <p>
* "Donaudampfschiff" becomes Donau, dampf, schiff so that you can find
- * "Donaudampfschiff" even when you only enter "schiff".
+ * "Donaudampfschiff" even when you only enter "schiff".
* It uses a brute-force algorithm to achieve this.
* <p>
- * You must specify the required {@link Version} compatibility when creating
- * CompoundWordTokenFilterBase:
- * <ul>
- * <li>As of 3.1, CompoundWordTokenFilterBase correctly handles Unicode 4.0
- * supplementary characters in strings and char arrays provided as compound word
- * dictionaries.
- * </ul>
*/
public class DictionaryCompoundWordTokenFilter extends CompoundWordTokenFilterBase {
-
+
/**
* Creates a new {@link DictionaryCompoundWordTokenFilter}
- *
- * @param matchVersion
- * Lucene version to enable correct Unicode 4.0 behavior in the
- * dictionaries if Version > 3.0. See <a
- * href="CompoundWordTokenFilterBase.html#version"
- * >CompoundWordTokenFilterBase</a> for details.
+ *
* @param input
- * the {@link TokenStream} to process
+ * the {@link org.apache.lucene.analysis.TokenStream} to process
* @param dictionary
* the word dictionary to match against.
*/
- public DictionaryCompoundWordTokenFilter(Version matchVersion, TokenStream input, CharArraySet dictionary) {
- super(matchVersion, input, dictionary);
+ public DictionaryCompoundWordTokenFilter(TokenStream input, CharArraySet dictionary) {
+ super(input, dictionary);
if (dictionary == null) {
throw new IllegalArgumentException("dictionary cannot be null");
}
}
-
+
/**
* Creates a new {@link DictionaryCompoundWordTokenFilter}
- *
- * @param matchVersion
- * Lucene version to enable correct Unicode 4.0 behavior in the
- * dictionaries if Version > 3.0. See <a
- * href="CompoundWordTokenFilterBase.html#version"
- * >CompoundWordTokenFilterBase</a> for details.
+ *
* @param input
- * the {@link TokenStream} to process
+ * the {@link org.apache.lucene.analysis.TokenStream} to process
* @param dictionary
* the word dictionary to match against.
* @param minWordSize
@@ -83,9 +62,9 @@ public class DictionaryCompoundWordToken
* @param onlyLongestMatch
* Add only the longest matching subword to the stream
*/
- public DictionaryCompoundWordTokenFilter(Version matchVersion, TokenStream input, CharArraySet dictionary,
- int minWordSize, int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
- super(matchVersion, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch);
+ public DictionaryCompoundWordTokenFilter(TokenStream input, CharArraySet dictionary,
+ int minWordSize, int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
+ super(input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch);
if (dictionary == null) {
throw new IllegalArgumentException("dictionary cannot be null");
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilterFactory.java?rev=1616901&r1=1616900&r2=1616901&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilterFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilterFactory.java Fri Aug 8 22:42:48 2014
@@ -22,12 +22,13 @@ import org.apache.lucene.analysis.util.C
import org.apache.lucene.analysis.util.ResourceLoader;
import org.apache.lucene.analysis.util.ResourceLoaderAware;
import org.apache.lucene.analysis.util.TokenFilterFactory;
+import org.apache.lucene.util.Version;
import java.util.Map;
import java.io.IOException;
/**
- * Factory for {@link DictionaryCompoundWordTokenFilter}.
+ * Factory for {@link Lucene43DictionaryCompoundWordTokenFilter}.
* <pre class="prettyprint">
* <fieldType name="text_dictcomp" class="solr.TextField" positionIncrementGap="100">
* <analyzer>
@@ -50,9 +51,9 @@ public class DictionaryCompoundWordToken
super(args);
assureMatchVersion();
dictFile = require(args, "dictionary");
- minWordSize = getInt(args, "minWordSize", CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE);
- minSubwordSize = getInt(args, "minSubwordSize", CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE);
- maxSubwordSize = getInt(args, "maxSubwordSize", CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE);
+ minWordSize = getInt(args, "minWordSize", Lucene43CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE);
+ minSubwordSize = getInt(args, "minSubwordSize", Lucene43CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE);
+ maxSubwordSize = getInt(args, "maxSubwordSize", Lucene43CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE);
onlyLongestMatch = getBoolean(args, "onlyLongestMatch", true);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
@@ -67,8 +68,13 @@ public class DictionaryCompoundWordToken
@Override
public TokenStream create(TokenStream input) {
// if the dictionary is null, it means it was empty
- return dictionary == null ? input : new DictionaryCompoundWordTokenFilter
- (luceneMatchVersion, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch);
+ if (dictionary == null) {
+ return input;
+ }
+ if (luceneMatchVersion.onOrAfter(Version.LUCENE_4_4)) {
+ return new DictionaryCompoundWordTokenFilter(input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch);
+ }
+ return new Lucene43DictionaryCompoundWordTokenFilter(input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch);
}
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java?rev=1616901&r1=1616900&r2=1616901&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java Fri Aug 8 22:42:48 2014
@@ -17,67 +17,47 @@ package org.apache.lucene.analysis.compo
* limitations under the License.
*/
-import java.io.File;
-import java.io.IOException;
-
-import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.compound.hyphenation.Hyphenation;
import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree;
import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.util.Version;
import org.xml.sax.InputSource;
+import java.io.File;
+import java.io.IOException;
+
/**
- * A {@link TokenFilter} that decomposes compound words found in many Germanic languages.
- * <p>
+ * A {@link org.apache.lucene.analysis.TokenFilter} that decomposes compound words found in many Germanic languages.
+ *
* "Donaudampfschiff" becomes Donau, dampf, schiff so that you can find
* "Donaudampfschiff" even when you only enter "schiff". It uses a hyphenation
* grammar and a word dictionary to achieve this.
- * <p>
- * You must specify the required {@link Version} compatibility when creating
- * CompoundWordTokenFilterBase:
- * <ul>
- * <li>As of 3.1, CompoundWordTokenFilterBase correctly handles Unicode 4.0
- * supplementary characters in strings and char arrays provided as compound word
- * dictionaries.
- * </ul>
*/
public class HyphenationCompoundWordTokenFilter extends
CompoundWordTokenFilterBase {
private HyphenationTree hyphenator;
/**
- * Creates a new {@link HyphenationCompoundWordTokenFilter} instance.
- *
- * @param matchVersion
- * Lucene version to enable correct Unicode 4.0 behavior in the
- * dictionaries if Version > 3.0. See <a
- * href="CompoundWordTokenFilterBase.html#version"
- * >CompoundWordTokenFilterBase</a> for details.
+ * Creates a new {@link HyphenationCompoundWordTokenFilter} instance.
+ *
* @param input
- * the {@link TokenStream} to process
+ * the {@link org.apache.lucene.analysis.TokenStream} to process
* @param hyphenator
* the hyphenation pattern tree to use for hyphenation
* @param dictionary
* the word dictionary to match against.
*/
- public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input,
- HyphenationTree hyphenator, CharArraySet dictionary) {
- this(matchVersion, input, hyphenator, dictionary, DEFAULT_MIN_WORD_SIZE,
+ public HyphenationCompoundWordTokenFilter(TokenStream input,
+ HyphenationTree hyphenator, CharArraySet dictionary) {
+ this(input, hyphenator, dictionary, DEFAULT_MIN_WORD_SIZE,
DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE, false);
}
/**
* Creates a new {@link HyphenationCompoundWordTokenFilter} instance.
- *
- * @param matchVersion
- * Lucene version to enable correct Unicode 4.0 behavior in the
- * dictionaries if Version > 3.0. See <a
- * href="CompoundWordTokenFilterBase.html#version"
- * >CompoundWordTokenFilterBase</a> for details.
+ *
* @param input
- * the {@link TokenStream} to process
+ * the {@link org.apache.lucene.analysis.TokenStream} to process
* @param hyphenator
* the hyphenation pattern tree to use for hyphenation
* @param dictionary
@@ -91,10 +71,10 @@ public class HyphenationCompoundWordToke
* @param onlyLongestMatch
* Add only the longest matching subword to the stream
*/
- public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input,
- HyphenationTree hyphenator, CharArraySet dictionary, int minWordSize,
- int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
- super(matchVersion, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize,
+ public HyphenationCompoundWordTokenFilter(TokenStream input,
+ HyphenationTree hyphenator, CharArraySet dictionary, int minWordSize,
+ int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
+ super(input, dictionary, minWordSize, minSubwordSize, maxSubwordSize,
onlyLongestMatch);
this.hyphenator = hyphenator;
@@ -103,36 +83,36 @@ public class HyphenationCompoundWordToke
/**
* Create a HyphenationCompoundWordTokenFilter with no dictionary.
* <p>
- * Calls {@link #HyphenationCompoundWordTokenFilter(Version, TokenStream, HyphenationTree, CharArraySet, int, int, int, boolean)
+ * Calls {@link #HyphenationCompoundWordTokenFilter(org.apache.lucene.analysis.TokenStream, org.apache.lucene.analysis.compound.hyphenation.HyphenationTree, org.apache.lucene.analysis.util.CharArraySet, int, int, int, boolean)
* HyphenationCompoundWordTokenFilter(matchVersion, input, hyphenator,
* null, minWordSize, minSubwordSize, maxSubwordSize }
*/
- public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input,
- HyphenationTree hyphenator, int minWordSize, int minSubwordSize,
- int maxSubwordSize) {
- this(matchVersion, input, hyphenator, null, minWordSize, minSubwordSize,
+ public HyphenationCompoundWordTokenFilter(TokenStream input,
+ HyphenationTree hyphenator, int minWordSize, int minSubwordSize,
+ int maxSubwordSize) {
+ this(input, hyphenator, null, minWordSize, minSubwordSize,
maxSubwordSize, false);
}
-
+
/**
* Create a HyphenationCompoundWordTokenFilter with no dictionary.
* <p>
- * Calls {@link #HyphenationCompoundWordTokenFilter(Version, TokenStream, HyphenationTree, int, int, int)
- * HyphenationCompoundWordTokenFilter(matchVersion, input, hyphenator,
+ * Calls {@link #HyphenationCompoundWordTokenFilter(org.apache.lucene.analysis.TokenStream, org.apache.lucene.analysis.compound.hyphenation.HyphenationTree, int, int, int)
+ * HyphenationCompoundWordTokenFilter(matchVersion, input, hyphenator,
* DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE }
*/
- public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input,
- HyphenationTree hyphenator) {
- this(matchVersion, input, hyphenator, DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE,
+ public HyphenationCompoundWordTokenFilter(TokenStream input,
+ HyphenationTree hyphenator) {
+ this(input, hyphenator, DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE,
DEFAULT_MAX_SUBWORD_SIZE);
}
/**
* Create a hyphenator tree
- *
+ *
* @param hyphenationFilename the filename of the XML grammar to load
* @return An object representing the hyphenation patterns
- * @throws IOException If there is a low-level I/O error.
+ * @throws java.io.IOException If there is a low-level I/O error.
*/
public static HyphenationTree getHyphenationTree(String hyphenationFilename)
throws IOException {
@@ -141,10 +121,10 @@ public class HyphenationCompoundWordToke
/**
* Create a hyphenator tree
- *
+ *
* @param hyphenationFile the file of the XML grammar to load
* @return An object representing the hyphenation patterns
- * @throws IOException If there is a low-level I/O error.
+ * @throws java.io.IOException If there is a low-level I/O error.
*/
public static HyphenationTree getHyphenationTree(File hyphenationFile)
throws IOException {
@@ -153,10 +133,10 @@ public class HyphenationCompoundWordToke
/**
* Create a hyphenator tree
- *
+ *
* @param hyphenationSource the InputSource pointing to the XML grammar
* @return An object representing the hyphenation patterns
- * @throws IOException If there is a low-level I/O error.
+ * @throws java.io.IOException If there is a low-level I/O error.
*/
public static HyphenationTree getHyphenationTree(InputSource hyphenationSource)
throws IOException {
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilterFactory.java?rev=1616901&r1=1616900&r2=1616901&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilterFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilterFactory.java Fri Aug 8 22:42:48 2014
@@ -17,6 +17,7 @@ package org.apache.lucene.analysis.compo
* limitations under the License.
*/
+import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree;
import org.apache.lucene.analysis.util.CharArraySet;
@@ -28,10 +29,12 @@ import org.apache.lucene.util.IOUtils;
import java.util.Map;
import java.io.IOException;
import java.io.InputStream;
+
+import org.apache.lucene.util.Version;
import org.xml.sax.InputSource;
/**
- * Factory for {@link HyphenationCompoundWordTokenFilter}.
+ * Factory for {@link Lucene43HyphenationCompoundWordTokenFilter}.
* <p>
* This factory accepts the following parameters:
* <ul>
@@ -55,7 +58,7 @@ import org.xml.sax.InputSource;
* </analyzer>
* </fieldType></pre>
*
- * @see HyphenationCompoundWordTokenFilter
+ * @see Lucene43HyphenationCompoundWordTokenFilter
*/
public class HyphenationCompoundWordTokenFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
private CharArraySet dictionary;
@@ -75,9 +78,9 @@ public class HyphenationCompoundWordToke
dictFile = get(args, "dictionary");
encoding = get(args, "encoding");
hypFile = require(args, "hyphenator");
- minWordSize = getInt(args, "minWordSize", CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE);
- minSubwordSize = getInt(args, "minSubwordSize", CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE);
- maxSubwordSize = getInt(args, "maxSubwordSize", CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE);
+ minWordSize = getInt(args, "minWordSize", Lucene43CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE);
+ minSubwordSize = getInt(args, "minSubwordSize", Lucene43CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE);
+ maxSubwordSize = getInt(args, "maxSubwordSize", Lucene43CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE);
onlyLongestMatch = getBoolean(args, "onlyLongestMatch", false);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
@@ -96,14 +99,21 @@ public class HyphenationCompoundWordToke
final InputSource is = new InputSource(stream);
is.setEncoding(encoding); // if it's null let xml parser decide
is.setSystemId(hypFile);
- hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(is);
+ if (luceneMatchVersion.onOrAfter(Version.LUCENE_4_4)) {
+ hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(is);
+ } else {
+ hyphenator = Lucene43HyphenationCompoundWordTokenFilter.getHyphenationTree(is);
+ }
} finally {
IOUtils.closeWhileHandlingException(stream);
}
}
@Override
- public HyphenationCompoundWordTokenFilter create(TokenStream input) {
- return new HyphenationCompoundWordTokenFilter(luceneMatchVersion, input, hyphenator, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch);
+ public TokenFilter create(TokenStream input) {
+ if (luceneMatchVersion.onOrAfter(Version.LUCENE_4_4)) {
+ return new HyphenationCompoundWordTokenFilter(input, hyphenator, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch);
+ }
+ return new Lucene43HyphenationCompoundWordTokenFilter(input, hyphenator, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch);
}
}