You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rj...@apache.org on 2014/10/08 19:59:18 UTC
svn commit: r1630189 [1/5] - in /lucene/dev/branches/branch_5x/lucene: ./
analysis/common/src/java/org/apache/lucene/analysis/ar/
analysis/common/src/java/org/apache/lucene/analysis/bg/
analysis/common/src/java/org/apache/lucene/analysis/br/ analysis/c...
Author: rjernst
Date: Wed Oct 8 17:59:16 2014
New Revision: 1630189
URL: http://svn.apache.org/r1630189
Log:
LUCENE-5999: Fix backcompat support for StandardTokenizer
Added:
lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std40/StandardTokenizer40.java (with props)
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianAnalyzer.java
- copied, changed from r1630182, lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/nl/TestDutchAnalyzer.java
- copied, changed from r1630182, lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/LuceneResourcesWikiPage.html
- copied, changed from r1630182, lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/LuceneResourcesWikiPage.html
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/LuceneResourcesWikiPageURLs.txt
- copied unchanged from r1630182, lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/LuceneResourcesWikiPageURLs.txt
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestStandardAnalyzer.java
- copied, changed from r1630182, lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailAnalyzer.java
- copied, changed from r1630182, lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailTokenizer.java
- copied, changed from r1630182, lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/WordBreakTestUnicode_6_1_0.java (with props)
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/WordBreakTestUnicode_6_3_0.java
- copied, changed from r1630182, lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/WordBreakTestUnicode_6_3_0.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/email.addresses.from.random.text.with.email.addresses.txt
- copied unchanged from r1630182, lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/email.addresses.from.random.text.with.email.addresses.txt
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/generateJavaUnicodeWordBreakTest.pl
- copied unchanged from r1630182, lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/generateJavaUnicodeWordBreakTest.pl
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/random.text.with.email.addresses.txt
- copied unchanged from r1630182, lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/random.text.with.email.addresses.txt
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/random.text.with.urls.txt
- copied unchanged from r1630182, lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/random.text.with.urls.txt
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/urls.from.random.text.with.urls.txt
- copied unchanged from r1630182, lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/urls.from.random.text.with.urls.txt
Removed:
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/LuceneResourcesWikiPage.html
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/LuceneResourcesWikiPageURLs.txt
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/WordBreakTestUnicode_6_3_0.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/email.addresses.from.random.text.with.email.addresses.txt
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/generateJavaUnicodeWordBreakTest.pl
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/random.text.with.email.addresses.txt
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/random.text.with.urls.txt
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/urls.from.random.text.with.urls.txt
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java
Modified:
lucene/dev/branches/branch_5x/lucene/CHANGES.txt
lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ckb/SoraniAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/eu/BasqueAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/hy/ArmenianAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerFactory.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/ca/TestCatalanAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/ckb/TestSoraniAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/da/TestDanishAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/eu/TestBasqueAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/ga/TestIrishAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/hy/TestArmenianAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/lv/TestLatvianAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java
lucene/dev/branches/branch_5x/lucene/tools/junit4/cached-timehints.txt
Modified: lucene/dev/branches/branch_5x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/CHANGES.txt?rev=1630189&r1=1630188&r2=1630189&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_5x/lucene/CHANGES.txt Wed Oct 8 17:59:16 2014
@@ -159,6 +159,9 @@ Bug Fixes
* LUCENE-5980: Don't let document length overflow. (Robert Muir)
+* LUCENE-5999: Fix backcompat support for StandardTokenizer
+ (Ryan Ernst)
+
Documentation
* LUCENE-5392: Add/improve analysis package documentation to reflect
Modified: lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java?rev=1630189&r1=1630188&r2=1630189&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java Wed Oct 8 17:59:16 2014
@@ -25,10 +25,12 @@ import org.apache.lucene.analysis.core.L
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.standard.std40.StandardTokenizer40;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.util.Version;
/**
* {@link Analyzer} for Arabic.
@@ -130,7 +132,12 @@ public final class ArabicAnalyzer extend
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer();
+ final Tokenizer source;
+ if (getVersion().onOrAfter(Version.LUCENE_4_7_0)) {
+ source = new StandardTokenizer();
+ } else {
+ source = new StandardTokenizer40();
+ }
TokenStream result = new LowerCaseFilter(source);
// the order here is important: the stopword list is not normalized!
result = new StopFilter(result, stopwords);
Modified: lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianAnalyzer.java?rev=1630189&r1=1630188&r2=1630189&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianAnalyzer.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianAnalyzer.java Wed Oct 8 17:59:16 2014
@@ -28,8 +28,10 @@ import org.apache.lucene.analysis.TokenS
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.standard.std40.StandardTokenizer40;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+import org.apache.lucene.util.Version;
/**
* {@link Analyzer} for Bulgarian.
@@ -118,7 +120,12 @@ public final class BulgarianAnalyzer ext
*/
@Override
public TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer();
+ final Tokenizer source;
+ if (getVersion().onOrAfter(Version.LUCENE_4_7_0)) {
+ source = new StandardTokenizer();
+ } else {
+ source = new StandardTokenizer40();
+ }
TokenStream result = new StandardFilter(source);
result = new LowerCaseFilter(result);
result = new StopFilter(result, stopwords);
Modified: lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java?rev=1630189&r1=1630188&r2=1630189&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java Wed Oct 8 17:59:16 2014
@@ -30,10 +30,12 @@ import org.apache.lucene.analysis.miscel
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.standard.std40.StandardTokenizer40;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.Version;
/**
* {@link Analyzer} for Brazilian Portuguese language.
@@ -119,7 +121,12 @@ public final class BrazilianAnalyzer ext
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- Tokenizer source = new StandardTokenizer();
+ final Tokenizer source;
+ if (getVersion().onOrAfter(Version.LUCENE_4_7_0)) {
+ source = new StandardTokenizer();
+ } else {
+ source = new StandardTokenizer40();
+ }
TokenStream result = new LowerCaseFilter(source);
result = new StandardFilter(result);
result = new StopFilter(result, stopwords);
Modified: lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java?rev=1630189&r1=1630188&r2=1630189&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java Wed Oct 8 17:59:16 2014
@@ -30,9 +30,11 @@ import org.apache.lucene.analysis.Tokeni
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.standard.std40.StandardTokenizer40;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.ElisionFilter;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+import org.apache.lucene.util.Version;
import org.tartarus.snowball.ext.CatalanStemmer;
/**
@@ -120,7 +122,12 @@ public final class CatalanAnalyzer exten
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer();
+ final Tokenizer source;
+ if (getVersion().onOrAfter(Version.LUCENE_4_7_0)) {
+ source = new StandardTokenizer();
+ } else {
+ source = new StandardTokenizer40();
+ }
TokenStream result = new StandardFilter(source);
result = new ElisionFilter(result, DEFAULT_ARTICLES);
result = new LowerCaseFilter(result);
Modified: lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java?rev=1630189&r1=1630188&r2=1630189&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java Wed Oct 8 17:59:16 2014
@@ -25,8 +25,10 @@ import org.apache.lucene.analysis.Tokeni
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.standard.std40.StandardTokenizer40;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+import org.apache.lucene.util.Version;
/**
* An {@link Analyzer} that tokenizes text with {@link StandardTokenizer},
@@ -85,7 +87,12 @@ public final class CJKAnalyzer extends S
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer();
+ final Tokenizer source;
+ if (getVersion().onOrAfter(Version.LUCENE_4_7_0)) {
+ source = new StandardTokenizer();
+ } else {
+ source = new StandardTokenizer40();
+ }
// run the widthfilter first before bigramming, it sometimes combines characters.
TokenStream result = new CJKWidthFilter(source);
result = new LowerCaseFilter(result);
Modified: lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ckb/SoraniAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ckb/SoraniAnalyzer.java?rev=1630189&r1=1630188&r2=1630189&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ckb/SoraniAnalyzer.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ckb/SoraniAnalyzer.java Wed Oct 8 17:59:16 2014
@@ -29,10 +29,12 @@ import org.apache.lucene.analysis.TokenS
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.standard.std40.StandardTokenizer40;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.Version;
/**
* {@link Analyzer} for Sorani Kurdish.
@@ -114,7 +116,12 @@ public final class SoraniAnalyzer extend
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer();
+ final Tokenizer source;
+ if (getVersion().onOrAfter(Version.LUCENE_4_7_0)) {
+ source = new StandardTokenizer();
+ } else {
+ source = new StandardTokenizer40();
+ }
TokenStream result = new StandardFilter(source);
result = new SoraniNormalizationFilter(result);
result = new LowerCaseFilter(result);
Modified: lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java?rev=1630189&r1=1630188&r2=1630189&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java Wed Oct 8 17:59:16 2014
@@ -25,10 +25,12 @@ import org.apache.lucene.analysis.TokenS
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.standard.std40.StandardTokenizer40;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.Version;
import java.io.*;
import java.nio.charset.StandardCharsets;
@@ -116,7 +118,12 @@ public final class CzechAnalyzer extends
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer();
+ final Tokenizer source;
+ if (getVersion().onOrAfter(Version.LUCENE_4_7_0)) {
+ source = new StandardTokenizer();
+ } else {
+ source = new StandardTokenizer40();
+ }
TokenStream result = new StandardFilter(source);
result = new LowerCaseFilter(result);
result = new StopFilter(result, stopwords);
Modified: lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java?rev=1630189&r1=1630188&r2=1630189&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java Wed Oct 8 17:59:16 2014
@@ -30,10 +30,12 @@ import org.apache.lucene.analysis.Tokeni
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.standard.std40.StandardTokenizer40;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.Version;
import org.tartarus.snowball.ext.DanishStemmer;
/**
@@ -115,7 +117,12 @@ public final class DanishAnalyzer extend
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer();
+ final Tokenizer source;
+ if (getVersion().onOrAfter(Version.LUCENE_4_7_0)) {
+ source = new StandardTokenizer();
+ } else {
+ source = new StandardTokenizer40();
+ }
TokenStream result = new StandardFilter(source);
result = new LowerCaseFilter(result);
result = new StopFilter(result, stopwords);
Modified: lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java?rev=1630189&r1=1630188&r2=1630189&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java Wed Oct 8 17:59:16 2014
@@ -32,10 +32,12 @@ import org.apache.lucene.analysis.snowba
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.standard.std40.StandardTokenizer40;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.Version;
/**
* {@link Analyzer} for German language.
@@ -130,7 +132,12 @@ public final class GermanAnalyzer extend
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer();
+ final Tokenizer source;
+ if (getVersion().onOrAfter(Version.LUCENE_4_7_0)) {
+ source = new StandardTokenizer();
+ } else {
+ source = new StandardTokenizer40();
+ }
TokenStream result = new StandardFilter(source);
result = new LowerCaseFilter(result);
result = new StopFilter(result, stopwords);
Modified: lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java?rev=1630189&r1=1630188&r2=1630189&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java Wed Oct 8 17:59:16 2014
@@ -26,8 +26,10 @@ import org.apache.lucene.analysis.core.S
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.standard.std40.StandardTokenizer40;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+import org.apache.lucene.util.Version;
/**
* {@link Analyzer} for the Greek language.
@@ -97,7 +99,12 @@ public final class GreekAnalyzer extends
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer();
+ final Tokenizer source;
+ if (getVersion().onOrAfter(Version.LUCENE_4_7_0)) {
+ source = new StandardTokenizer();
+ } else {
+ source = new StandardTokenizer40();
+ }
TokenStream result = new GreekLowerCaseFilter(source);
result = new StandardFilter(result);
result = new StopFilter(result, stopwords);
Modified: lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java?rev=1630189&r1=1630188&r2=1630189&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java Wed Oct 8 17:59:16 2014
@@ -28,8 +28,10 @@ import org.apache.lucene.analysis.miscel
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.standard.std40.StandardTokenizer40;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+import org.apache.lucene.util.Version;
/**
* {@link Analyzer} for English.
@@ -97,7 +99,12 @@ public final class EnglishAnalyzer exten
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer();
+ final Tokenizer source;
+ if (getVersion().onOrAfter(Version.LUCENE_4_7_0)) {
+ source = new StandardTokenizer();
+ } else {
+ source = new StandardTokenizer40();
+ }
TokenStream result = new StandardFilter(source);
result = new EnglishPossessiveFilter(result);
result = new LowerCaseFilter(result);
Modified: lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java?rev=1630189&r1=1630188&r2=1630189&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java Wed Oct 8 17:59:16 2014
@@ -30,10 +30,12 @@ import org.apache.lucene.analysis.Tokeni
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.standard.std40.StandardTokenizer40;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.Version;
/**
* {@link Analyzer} for Spanish.
@@ -114,7 +116,12 @@ public final class SpanishAnalyzer exten
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer();
+ final Tokenizer source;
+ if (getVersion().onOrAfter(Version.LUCENE_4_7_0)) {
+ source = new StandardTokenizer();
+ } else {
+ source = new StandardTokenizer40();
+ }
TokenStream result = new StandardFilter(source);
result = new LowerCaseFilter(result);
result = new StopFilter(result, stopwords);
Modified: lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/eu/BasqueAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/eu/BasqueAnalyzer.java?rev=1630189&r1=1630188&r2=1630189&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/eu/BasqueAnalyzer.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/eu/BasqueAnalyzer.java Wed Oct 8 17:59:16 2014
@@ -29,8 +29,10 @@ import org.apache.lucene.analysis.Tokeni
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.standard.std40.StandardTokenizer40;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+import org.apache.lucene.util.Version;
import org.tartarus.snowball.ext.BasqueStemmer;
/**
@@ -112,7 +114,12 @@ public final class BasqueAnalyzer extend
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer();
+ final Tokenizer source;
+ if (getVersion().onOrAfter(Version.LUCENE_4_7_0)) {
+ source = new StandardTokenizer();
+ } else {
+ source = new StandardTokenizer40();
+ }
TokenStream result = new StandardFilter(source);
result = new LowerCaseFilter(result);
result = new StopFilter(result, stopwords);
Modified: lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java?rev=1630189&r1=1630188&r2=1630189&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java Wed Oct 8 17:59:16 2014
@@ -27,8 +27,10 @@ import org.apache.lucene.analysis.ar.Ara
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.standard.std40.StandardTokenizer40;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+import org.apache.lucene.util.Version;
/**
* {@link Analyzer} for Persian.
@@ -112,7 +114,12 @@ public final class PersianAnalyzer exten
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer();
+ final Tokenizer source;
+ if (getVersion().onOrAfter(Version.LUCENE_4_7_0)) {
+ source = new StandardTokenizer();
+ } else {
+ source = new StandardTokenizer40();
+ }
TokenStream result = new LowerCaseFilter(source);
result = new ArabicNormalizationFilter(result);
/* additional persian-specific normalization */
Modified: lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java?rev=1630189&r1=1630188&r2=1630189&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java Wed Oct 8 17:59:16 2014
@@ -30,10 +30,12 @@ import org.apache.lucene.analysis.Tokeni
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.standard.std40.StandardTokenizer40;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.Version;
import org.tartarus.snowball.ext.FinnishStemmer;
/**
@@ -115,7 +117,12 @@ public final class FinnishAnalyzer exten
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer();
+ final Tokenizer source;
+ if (getVersion().onOrAfter(Version.LUCENE_4_7_0)) {
+ source = new StandardTokenizer();
+ } else {
+ source = new StandardTokenizer40();
+ }
TokenStream result = new StandardFilter(source);
result = new LowerCaseFilter(result);
result = new StopFilter(result, stopwords);
Modified: lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java?rev=1630189&r1=1630188&r2=1630189&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java Wed Oct 8 17:59:16 2014
@@ -27,11 +27,13 @@ import org.apache.lucene.analysis.snowba
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.standard.StandardAnalyzer; // for javadoc
+import org.apache.lucene.analysis.standard.std40.StandardTokenizer40;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.ElisionFilter;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.Version;
import java.io.IOException;
import java.io.Reader;
@@ -134,7 +136,12 @@ public final class FrenchAnalyzer extend
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer();
+ final Tokenizer source;
+ if (getVersion().onOrAfter(Version.LUCENE_4_7_0)) {
+ source = new StandardTokenizer();
+ } else {
+ source = new StandardTokenizer40();
+ }
TokenStream result = new StandardFilter(source);
result = new ElisionFilter(result, DEFAULT_ARTICLES);
result = new LowerCaseFilter(result);
Modified: lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java?rev=1630189&r1=1630188&r2=1630189&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java Wed Oct 8 17:59:16 2014
@@ -29,9 +29,11 @@ import org.apache.lucene.analysis.Tokeni
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.standard.std40.StandardTokenizer40;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.ElisionFilter;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+import org.apache.lucene.util.Version;
import org.tartarus.snowball.ext.IrishStemmer;
/**
@@ -130,7 +132,12 @@ public final class IrishAnalyzer extends
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer();
+ final Tokenizer source;
+ if (getVersion().onOrAfter(Version.LUCENE_4_7_0)) {
+ source = new StandardTokenizer();
+ } else {
+ source = new StandardTokenizer40();
+ }
TokenStream result = new StandardFilter(source);
result = new StopFilter(result, HYPHENATIONS);
result = new ElisionFilter(result, DEFAULT_ARTICLES);
Modified: lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java?rev=1630189&r1=1630188&r2=1630189&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java Wed Oct 8 17:59:16 2014
@@ -29,10 +29,12 @@ import org.apache.lucene.analysis.TokenS
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.standard.std40.StandardTokenizer40;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.Version;
/**
* {@link Analyzer} for Galician.
@@ -113,7 +115,12 @@ public final class GalicianAnalyzer exte
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer();
+ final Tokenizer source;
+ if (getVersion().onOrAfter(Version.LUCENE_4_7_0)) {
+ source = new StandardTokenizer();
+ } else {
+ source = new StandardTokenizer40();
+ }
TokenStream result = new StandardFilter(source);
result = new LowerCaseFilter(result);
result = new StopFilter(result, stopwords);
Modified: lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiAnalyzer.java?rev=1630189&r1=1630188&r2=1630189&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiAnalyzer.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiAnalyzer.java Wed Oct 8 17:59:16 2014
@@ -22,6 +22,7 @@ import java.io.Reader;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.standard.std40.StandardTokenizer40;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
@@ -29,6 +30,7 @@ import org.apache.lucene.analysis.Tokeni
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.in.IndicNormalizationFilter;
+import org.apache.lucene.util.Version;
/**
* Analyzer for Hindi.
@@ -113,7 +115,12 @@ public final class HindiAnalyzer extends
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer();
+ final Tokenizer source;
+ if (getVersion().onOrAfter(Version.LUCENE_4_7_0)) {
+ source = new StandardTokenizer();
+ } else {
+ source = new StandardTokenizer40();
+ }
TokenStream result = new LowerCaseFilter(source);
if (!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
Modified: lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java?rev=1630189&r1=1630188&r2=1630189&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java Wed Oct 8 17:59:16 2014
@@ -30,10 +30,12 @@ import org.apache.lucene.analysis.Tokeni
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.standard.std40.StandardTokenizer40;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.Version;
import org.tartarus.snowball.ext.HungarianStemmer;
/**
@@ -115,7 +117,12 @@ public final class HungarianAnalyzer ext
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer();
+ final Tokenizer source;
+ if (getVersion().onOrAfter(Version.LUCENE_4_7_0)) {
+ source = new StandardTokenizer();
+ } else {
+ source = new StandardTokenizer40();
+ }
TokenStream result = new StandardFilter(source);
result = new LowerCaseFilter(result);
result = new StopFilter(result, stopwords);
Modified: lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/hy/ArmenianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/hy/ArmenianAnalyzer.java?rev=1630189&r1=1630188&r2=1630189&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/hy/ArmenianAnalyzer.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/hy/ArmenianAnalyzer.java Wed Oct 8 17:59:16 2014
@@ -29,8 +29,10 @@ import org.apache.lucene.analysis.Tokeni
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.standard.std40.StandardTokenizer40;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+import org.apache.lucene.util.Version;
import org.tartarus.snowball.ext.ArmenianStemmer;
/**
@@ -112,7 +114,12 @@ public final class ArmenianAnalyzer exte
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer();
+ final Tokenizer source;
+ if (getVersion().onOrAfter(Version.LUCENE_4_7_0)) {
+ source = new StandardTokenizer();
+ } else {
+ source = new StandardTokenizer40();
+ }
TokenStream result = new StandardFilter(source);
result = new LowerCaseFilter(result);
result = new StopFilter(result, stopwords);
Modified: lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianAnalyzer.java?rev=1630189&r1=1630188&r2=1630189&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianAnalyzer.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianAnalyzer.java Wed Oct 8 17:59:16 2014
@@ -27,8 +27,10 @@ import org.apache.lucene.analysis.TokenS
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.standard.std40.StandardTokenizer40;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+import org.apache.lucene.util.Version;
/**
* Analyzer for Indonesian (Bahasa)
@@ -110,7 +112,12 @@ public final class IndonesianAnalyzer ex
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer();
+ final Tokenizer source;
+ if (getVersion().onOrAfter(Version.LUCENE_4_7_0)) {
+ source = new StandardTokenizer();
+ } else {
+ source = new StandardTokenizer40();
+ }
TokenStream result = new StandardFilter(source);
result = new LowerCaseFilter(result);
result = new StopFilter(result, stopwords);
Modified: lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java?rev=1630189&r1=1630188&r2=1630189&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java Wed Oct 8 17:59:16 2014
@@ -31,11 +31,13 @@ import org.apache.lucene.analysis.Tokeni
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.standard.std40.StandardTokenizer40;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.ElisionFilter;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.Version;
/**
* {@link Analyzer} for Italian.
@@ -123,7 +125,12 @@ public final class ItalianAnalyzer exten
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer();
+ final Tokenizer source;
+ if (getVersion().onOrAfter(Version.LUCENE_4_7_0)) {
+ source = new StandardTokenizer();
+ } else {
+ source = new StandardTokenizer40();
+ }
TokenStream result = new StandardFilter(source);
result = new ElisionFilter(result, DEFAULT_ARTICLES);
result = new LowerCaseFilter(result);
Modified: lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java?rev=1630189&r1=1630188&r2=1630189&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java Wed Oct 8 17:59:16 2014
@@ -29,10 +29,12 @@ import org.apache.lucene.analysis.TokenS
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.standard.std40.StandardTokenizer40;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.Version;
/**
* {@link Analyzer} for Latvian.
@@ -113,7 +115,12 @@ public final class LatvianAnalyzer exten
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer();
+ final Tokenizer source;
+ if (getVersion().onOrAfter(Version.LUCENE_4_7_0)) {
+ source = new StandardTokenizer();
+ } else {
+ source = new StandardTokenizer40();
+ }
TokenStream result = new StandardFilter(source);
result = new LowerCaseFilter(result);
result = new StopFilter(result, stopwords);
Modified: lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java?rev=1630189&r1=1630188&r2=1630189&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java Wed Oct 8 17:59:16 2014
@@ -28,12 +28,14 @@ import org.apache.lucene.analysis.miscel
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.standard.std40.StandardTokenizer40;
import org.apache.lucene.analysis.util.CharArrayMap;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.CharsRefBuilder;
import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.Version;
import java.io.IOException;
import java.io.Reader;
@@ -149,7 +151,12 @@ public final class DutchAnalyzer extends
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer();
+ final Tokenizer source;
+ if (getVersion().onOrAfter(Version.LUCENE_4_7_0)) {
+ source = new StandardTokenizer();
+ } else {
+ source = new StandardTokenizer40();
+ }
TokenStream result = new StandardFilter(source);
result = new LowerCaseFilter(result);
result = new StopFilter(result, stoptable);
Modified: lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java?rev=1630189&r1=1630188&r2=1630189&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java Wed Oct 8 17:59:16 2014
@@ -30,10 +30,12 @@ import org.apache.lucene.analysis.Tokeni
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.standard.std40.StandardTokenizer40;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.Version;
import org.tartarus.snowball.ext.NorwegianStemmer;
/**
@@ -115,7 +117,12 @@ public final class NorwegianAnalyzer ext
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer();
+ final Tokenizer source;
+ if (getVersion().onOrAfter(Version.LUCENE_4_7_0)) {
+ source = new StandardTokenizer();
+ } else {
+ source = new StandardTokenizer40();
+ }
TokenStream result = new StandardFilter(source);
result = new LowerCaseFilter(result);
result = new StopFilter(result, stopwords);
Modified: lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java?rev=1630189&r1=1630188&r2=1630189&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java Wed Oct 8 17:59:16 2014
@@ -30,10 +30,12 @@ import org.apache.lucene.analysis.Tokeni
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.standard.std40.StandardTokenizer40;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.Version;
/**
* {@link Analyzer} for Portuguese.
@@ -114,7 +116,12 @@ public final class PortugueseAnalyzer ex
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer();
+ final Tokenizer source;
+ if (getVersion().onOrAfter(Version.LUCENE_4_7_0)) {
+ source = new StandardTokenizer();
+ } else {
+ source = new StandardTokenizer40();
+ }
TokenStream result = new StandardFilter(source);
result = new LowerCaseFilter(result);
result = new StopFilter(result, stopwords);
Modified: lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java?rev=1630189&r1=1630188&r2=1630189&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java Wed Oct 8 17:59:16 2014
@@ -29,8 +29,10 @@ import org.apache.lucene.analysis.Tokeni
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.standard.std40.StandardTokenizer40;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+import org.apache.lucene.util.Version;
import org.tartarus.snowball.ext.RomanianStemmer;
/**
@@ -117,7 +119,12 @@ public final class RomanianAnalyzer exte
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer();
+ final Tokenizer source;
+ if (getVersion().onOrAfter(Version.LUCENE_4_7_0)) {
+ source = new StandardTokenizer();
+ } else {
+ source = new StandardTokenizer40();
+ }
TokenStream result = new StandardFilter(source);
result = new LowerCaseFilter(result);
result = new StopFilter(result, stopwords);
Modified: lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java?rev=1630189&r1=1630188&r2=1630189&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java Wed Oct 8 17:59:16 2014
@@ -25,6 +25,7 @@ import org.apache.lucene.analysis.Analyz
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.standard.std40.StandardTokenizer40;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
@@ -34,6 +35,7 @@ import org.apache.lucene.analysis.miscel
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.Version;
/**
* {@link Analyzer} for Russian language.
@@ -112,7 +114,12 @@ public final class RussianAnalyzer exten
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer();
+ final Tokenizer source;
+ if (getVersion().onOrAfter(Version.LUCENE_4_7_0)) {
+ source = new StandardTokenizer();
+ } else {
+ source = new StandardTokenizer40();
+ }
TokenStream result = new StandardFilter(source);
result = new LowerCaseFilter(result);
result = new StopFilter(result, stopwords);
Modified: lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java?rev=1630189&r1=1630188&r2=1630189&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java Wed Oct 8 17:59:16 2014
@@ -18,12 +18,15 @@ package org.apache.lucene.analysis.stand
*/
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.StopAnalyzer;
import org.apache.lucene.analysis.core.StopFilter;
+import org.apache.lucene.analysis.standard.std40.StandardTokenizer40;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
+import org.apache.lucene.util.Version;
import java.io.IOException;
import java.io.Reader;
@@ -82,15 +85,28 @@ public final class StandardAnalyzer exte
@Override
protected TokenStreamComponents createComponents(final String fieldName) {
- final StandardTokenizer src = new StandardTokenizer();
- src.setMaxTokenLength(maxTokenLength);
+ final Tokenizer src;
+ if (getVersion().onOrAfter(Version.LUCENE_4_7_0)) {
+ StandardTokenizer t = new StandardTokenizer();
+ t.setMaxTokenLength(maxTokenLength);
+ src = t;
+ } else {
+ StandardTokenizer40 t = new StandardTokenizer40();
+ t.setMaxTokenLength(maxTokenLength);
+ src = t;
+ }
TokenStream tok = new StandardFilter(src);
tok = new LowerCaseFilter(tok);
tok = new StopFilter(tok, stopwords);
return new TokenStreamComponents(src, tok) {
@Override
protected void setReader(final Reader reader) throws IOException {
- src.setMaxTokenLength(StandardAnalyzer.this.maxTokenLength);
+ int m = StandardAnalyzer.this.maxTokenLength;
+ if (src instanceof StandardTokenizer) {
+ ((StandardTokenizer)src).setMaxTokenLength(m);
+ } else {
+ ((StandardTokenizer40)src).setMaxTokenLength(m);
+ }
super.setReader(reader);
}
};
Modified: lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerFactory.java?rev=1630189&r1=1630188&r2=1630189&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerFactory.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerFactory.java Wed Oct 8 17:59:16 2014
@@ -17,8 +17,11 @@ package org.apache.lucene.analysis.stand
* limitations under the License.
*/
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.standard.std40.StandardTokenizer40;
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeFactory;
+import org.apache.lucene.util.Version;
import java.util.Map;
@@ -44,9 +47,15 @@ public class StandardTokenizerFactory ex
}
@Override
- public StandardTokenizer create(AttributeFactory factory) {
- StandardTokenizer tokenizer = new StandardTokenizer(factory);
- tokenizer.setMaxTokenLength(maxTokenLength);
- return tokenizer;
+ public Tokenizer create(AttributeFactory factory) {
+ if (luceneMatchVersion.onOrAfter(Version.LUCENE_4_7_0)) {
+ StandardTokenizer tokenizer = new StandardTokenizer(factory);
+ tokenizer.setMaxTokenLength(maxTokenLength);
+ return tokenizer;
+ } else {
+ StandardTokenizer40 tokenizer40 = new StandardTokenizer40(factory);
+ tokenizer40.setMaxTokenLength(maxTokenLength);
+ return tokenizer40;
+ }
}
}
Added: lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std40/StandardTokenizer40.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std40/StandardTokenizer40.java?rev=1630189&view=auto
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std40/StandardTokenizer40.java (added)
+++ lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std40/StandardTokenizer40.java Wed Oct 8 17:59:16 2014
@@ -0,0 +1,191 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.standard.std40;
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.analysis.standard.StandardTokenizerInterface;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+import org.apache.lucene.util.AttributeFactory;
+
+/** Backcompat standard tokenizer for Lucene 4.0-4.6. This supports Unicode 6.1.
+ *
+ * @deprecated Use {@link org.apache.lucene.analysis.standard.StandardTokenizer}
+ */
+@Deprecated
+public final class StandardTokenizer40 extends Tokenizer {
+ /** A private instance of the JFlex-constructed scanner */
+ private StandardTokenizerInterface scanner;
+
+ public static final int ALPHANUM = 0;
+ /** @deprecated (3.1) */
+ @Deprecated
+ public static final int APOSTROPHE = 1;
+ /** @deprecated (3.1) */
+ @Deprecated
+ public static final int ACRONYM = 2;
+ /** @deprecated (3.1) */
+ @Deprecated
+ public static final int COMPANY = 3;
+ public static final int EMAIL = 4;
+ /** @deprecated (3.1) */
+ @Deprecated
+ public static final int HOST = 5;
+ public static final int NUM = 6;
+ /** @deprecated (3.1) */
+ @Deprecated
+ public static final int CJ = 7;
+
+ /** @deprecated (3.1) */
+ @Deprecated
+ public static final int ACRONYM_DEP = 8;
+
+ public static final int SOUTHEAST_ASIAN = 9;
+ public static final int IDEOGRAPHIC = 10;
+ public static final int HIRAGANA = 11;
+ public static final int KATAKANA = 12;
+ public static final int HANGUL = 13;
+
+ /** String token types that correspond to token type int constants */
+ public static final String [] TOKEN_TYPES = new String [] {
+ "<ALPHANUM>",
+ "<APOSTROPHE>",
+ "<ACRONYM>",
+ "<COMPANY>",
+ "<EMAIL>",
+ "<HOST>",
+ "<NUM>",
+ "<CJ>",
+ "<ACRONYM_DEP>",
+ "<SOUTHEAST_ASIAN>",
+ "<IDEOGRAPHIC>",
+ "<HIRAGANA>",
+ "<KATAKANA>",
+ "<HANGUL>"
+ };
+
+ private int skippedPositions;
+
+ private int maxTokenLength = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH;
+
+ /** Set the max allowed token length. Any token longer
+ * than this is skipped. */
+ public void setMaxTokenLength(int length) {
+ this.maxTokenLength = length;
+ }
+
+ /** @see #setMaxTokenLength */
+ public int getMaxTokenLength() {
+ return maxTokenLength;
+ }
+
+ /**
+ * Creates a new instance of the {@link org.apache.lucene.analysis.standard.std40.StandardTokenizer40}.
+ *
+ * See http://issues.apache.org/jira/browse/LUCENE-1068
+ */
+ public StandardTokenizer40() {
+ init();
+ }
+
+ /**
+ * Creates a new StandardTokenizer40 with a given {@link org.apache.lucene.util.AttributeFactory}
+ */
+ public StandardTokenizer40(AttributeFactory factory) {
+ super(factory);
+ init();
+ }
+
+ private final void init() {
+ this.scanner = new StandardTokenizerImpl40(input);
+ }
+
+ // this tokenizer generates three attributes:
+ // term offset, positionIncrement and type
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+ private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
+ private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see org.apache.lucene.analysis.TokenStream#next()
+ */
+ @Override
+ public final boolean incrementToken() throws IOException {
+ clearAttributes();
+ skippedPositions = 0;
+
+ while(true) {
+ int tokenType = scanner.getNextToken();
+
+ if (tokenType == StandardTokenizerInterface.YYEOF) {
+ return false;
+ }
+
+ if (scanner.yylength() <= maxTokenLength) {
+ posIncrAtt.setPositionIncrement(skippedPositions+1);
+ scanner.getText(termAtt);
+ final int start = scanner.yychar();
+ offsetAtt.setOffset(correctOffset(start), correctOffset(start+termAtt.length()));
+ // This 'if' should be removed in the next release. For now, it converts
+ // invalid acronyms to HOST. When removed, only the 'else' part should
+ // remain.
+ if (tokenType == StandardTokenizer40.ACRONYM_DEP) {
+ typeAtt.setType(StandardTokenizer40.TOKEN_TYPES[StandardTokenizer40.HOST]);
+ termAtt.setLength(termAtt.length() - 1); // remove extra '.'
+ } else {
+ typeAtt.setType(StandardTokenizer40.TOKEN_TYPES[tokenType]);
+ }
+ return true;
+ } else
+ // When we skip a too-long term, we still increment the
+ // position increment
+ skippedPositions++;
+ }
+ }
+
+ @Override
+ public final void end() throws IOException {
+ super.end();
+ // set final offset
+ int finalOffset = correctOffset(scanner.yychar() + scanner.yylength());
+ offsetAtt.setOffset(finalOffset, finalOffset);
+ // adjust any skipped tokens
+ posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement()+skippedPositions);
+ }
+
+ @Override
+ public void close() throws IOException {
+ super.close();
+ scanner.yyreset(input);
+ }
+
+ @Override
+ public void reset() throws IOException {
+ super.reset();
+ scanner.yyreset(input);
+ skippedPositions = 0;
+ }
+}
Modified: lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java?rev=1630189&r1=1630188&r2=1630189&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java Wed Oct 8 17:59:16 2014
@@ -30,10 +30,12 @@ import org.apache.lucene.analysis.Tokeni
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.standard.std40.StandardTokenizer40;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.Version;
import org.tartarus.snowball.ext.SwedishStemmer;
/**
@@ -115,7 +117,12 @@ public final class SwedishAnalyzer exten
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer();
+ final Tokenizer source;
+ if (getVersion().onOrAfter(Version.LUCENE_4_7_0)) {
+ source = new StandardTokenizer();
+ } else {
+ source = new StandardTokenizer40();
+ }
TokenStream result = new StandardFilter(source);
result = new LowerCaseFilter(result);
result = new StopFilter(result, stopwords);
Modified: lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java?rev=1630189&r1=1630188&r2=1630189&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java Wed Oct 8 17:59:16 2014
@@ -26,6 +26,7 @@ import org.apache.lucene.analysis.core.L
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.standard.std40.StandardTokenizer40;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.util.Version;
@@ -104,7 +105,12 @@ public final class ThaiAnalyzer extends
result = new StopFilter(result, stopwords);
return new TokenStreamComponents(source, result);
} else {
- final Tokenizer source = new StandardTokenizer();
+ final Tokenizer source;
+ if (getVersion().onOrAfter(Version.LUCENE_4_7_0)) {
+ source = new StandardTokenizer();
+ } else {
+ source = new StandardTokenizer40();
+ }
TokenStream result = new StandardFilter(source);
result = new LowerCaseFilter(result);
result = new ThaiWordFilter(result);
Modified: lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java?rev=1630189&r1=1630188&r2=1630189&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java Wed Oct 8 17:59:16 2014
@@ -28,6 +28,7 @@ import org.apache.lucene.analysis.Tokeni
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.standard.std40.StandardTokenizer40;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.util.Version;
@@ -117,7 +118,12 @@ public final class TurkishAnalyzer exten
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer();
+ final Tokenizer source;
+ if (getVersion().onOrAfter(Version.LUCENE_4_7_0)) {
+ source = new StandardTokenizer();
+ } else {
+ source = new StandardTokenizer40();
+ }
TokenStream result = new StandardFilter(source);
if (getVersion().onOrAfter(Version.LUCENE_4_8_0)) {
result = new ApostropheFilter(result);
Modified: lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java?rev=1630189&r1=1630188&r2=1630189&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java Wed Oct 8 17:59:16 2014
@@ -21,6 +21,7 @@ import java.io.IOException;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.util.Version;
/**
* Test the Arabic Analyzer
@@ -98,4 +99,11 @@ public class TestArabicAnalyzer extends
public void testRandomStrings() throws Exception {
checkRandomData(random(), new ArabicAnalyzer(), 1000*RANDOM_MULTIPLIER);
}
+
+ public void testBackcompat40() throws IOException {
+ ArabicAnalyzer a = new ArabicAnalyzer();
+ a.setVersion(Version.LUCENE_4_6_1);
+ // this is just a test to see the correct unicode version is being used, not actually testing hebrew
+ assertAnalyzesTo(a, "×\"×", new String[] {"×", "×"});
+ }
}
Modified: lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianAnalyzer.java?rev=1630189&r1=1630188&r2=1630189&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianAnalyzer.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianAnalyzer.java Wed Oct 8 17:59:16 2014
@@ -22,6 +22,7 @@ import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.util.Version;
/**
* Test the Bulgarian analyzer
@@ -77,4 +78,11 @@ public class TestBulgarianAnalyzer exten
public void testRandomStrings() throws Exception {
checkRandomData(random(), new BulgarianAnalyzer(), 1000*RANDOM_MULTIPLIER);
}
+
+ public void testBackcompat40() throws IOException {
+ BulgarianAnalyzer a = new BulgarianAnalyzer();
+ a.setVersion(Version.LUCENE_4_6_1);
+ // this is just a test to see the correct unicode version is being used, not actually testing hebrew
+ assertAnalyzesTo(a, "×\"×", new String[] {"×", "×"});
+ }
}
Copied: lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianAnalyzer.java (from r1630182, lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianAnalyzer.java?p2=lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianAnalyzer.java&p1=lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java&r1=1630182&r2=1630189&rev=1630189&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianAnalyzer.java Wed Oct 8 17:59:16 2014
@@ -28,6 +28,7 @@ import org.apache.lucene.analysis.core.K
import org.apache.lucene.analysis.core.LowerCaseTokenizer;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.util.Version;
/**
* Test the Brazilian Stem Filter, which only modifies the term text.
@@ -35,7 +36,7 @@ import org.apache.lucene.analysis.util.C
* It is very similar to the snowball portuguese algorithm but not exactly the same.
*
*/
-public class TestBrazilianStemmer extends BaseTokenStreamTestCase {
+public class TestBrazilianAnalyzer extends BaseTokenStreamTestCase {
public void testWithSnowballExamples() throws Exception {
check("boa", "boa");
@@ -176,4 +177,11 @@ public class TestBrazilianStemmer extend
};
checkOneTerm(a, "", "");
}
+
+ public void testBackcompat40() throws IOException {
+ BrazilianAnalyzer a = new BrazilianAnalyzer();
+ a.setVersion(Version.LUCENE_4_6_1);
+ // this is just a test to see the correct unicode version is being used, not actually testing hebrew
+ assertAnalyzesTo(a, "×\"×", new String[] {"×", "×"});
+ }
}