You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rj...@apache.org on 2014/08/09 00:42:54 UTC
svn commit: r1616901 [3/11] - in /lucene/dev/trunk: lucene/
lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/
lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/
lucene/analysis/common/src/java/org/apache/lucene/analysis/br/ luc...
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java?rev=1616901&r1=1616900&r2=1616901&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java Fri Aug 8 22:42:48 2014
@@ -30,7 +30,6 @@ import org.apache.lucene.analysis.standa
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
-import org.apache.lucene.util.Version;
/**
* {@link Analyzer} for English.
@@ -57,18 +56,17 @@ public final class EnglishAnalyzer exten
/**
* Builds an analyzer with the default stop words: {@link #getDefaultStopSet}.
*/
- public EnglishAnalyzer(Version matchVersion) {
- this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+ public EnglishAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_STOP_SET);
}
/**
* Builds an analyzer with the given stop words.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
*/
- public EnglishAnalyzer(Version matchVersion, CharArraySet stopwords) {
- this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+ public EnglishAnalyzer(CharArraySet stopwords) {
+ this(stopwords, CharArraySet.EMPTY_SET);
}
/**
@@ -76,14 +74,12 @@ public final class EnglishAnalyzer exten
* provided this analyzer will add a {@link SetKeywordMarkerFilter} before
* stemming.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
* @param stemExclusionSet a set of terms not to be stemmed
*/
- public EnglishAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
- super(matchVersion, stopwords);
- this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
- matchVersion, stemExclusionSet));
+ public EnglishAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
+ super(stopwords);
+ this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}
/**
@@ -101,11 +97,11 @@ public final class EnglishAnalyzer exten
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer(matchVersion);
- TokenStream result = new StandardFilter(matchVersion, source);
- result = new EnglishPossessiveFilter(matchVersion, result);
- result = new LowerCaseFilter(matchVersion, result);
- result = new StopFilter(matchVersion, result, stopwords);
+ final Tokenizer source = new StandardTokenizer();
+ TokenStream result = new StandardFilter(source);
+ result = new EnglishPossessiveFilter(result);
+ result = new LowerCaseFilter(result);
+ result = new StopFilter(result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
result = new PorterStemFilter(result);
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilter.java?rev=1616901&r1=1616900&r2=1616901&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilter.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilter.java Fri Aug 8 22:42:48 2014
@@ -22,7 +22,6 @@ import java.io.IOException;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.util.Version;
/**
* TokenFilter that removes possessives (trailing 's) from words.
@@ -30,8 +29,7 @@ import org.apache.lucene.util.Version;
public final class EnglishPossessiveFilter extends TokenFilter {
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
- // NOTE: version now unused
- public EnglishPossessiveFilter(Version version, TokenStream input) {
+ public EnglishPossessiveFilter(TokenStream input) {
super(input);
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilterFactory.java?rev=1616901&r1=1616900&r2=1616901&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilterFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilterFactory.java Fri Aug 8 22:42:48 2014
@@ -39,7 +39,6 @@ public class EnglishPossessiveFilterFact
/** Creates a new EnglishPossessiveFilterFactory */
public EnglishPossessiveFilterFactory(Map<String,String> args) {
super(args);
- assureMatchVersion();
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
@@ -47,6 +46,6 @@ public class EnglishPossessiveFilterFact
@Override
public TokenStream create(TokenStream input) {
- return new EnglishPossessiveFilter(luceneMatchVersion, input);
+ return new EnglishPossessiveFilter(input);
}
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/KStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/KStemmer.java?rev=1616901&r1=1616900&r2=1616901&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/KStemmer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/KStemmer.java Fri Aug 8 22:42:48 2014
@@ -64,7 +64,6 @@ import org.apache.lucene.analysis.util.O
* <p>Copyright: Copyright 2008, Luicid Imagination, Inc. </p>
* <p>Copyright: Copyright 2003, CIIR University of Massachusetts Amherst (http://ciir.cs.umass.edu) </p>
*/
-import org.apache.lucene.util.Version;
/**
* This class implements the Kstem algorithm
@@ -280,7 +279,7 @@ public class KStemmer {
DictEntry defaultEntry;
DictEntry entry;
- CharArrayMap<DictEntry> d = new CharArrayMap<>(Version.LUCENE_CURRENT, 1000, false);
+ CharArrayMap<DictEntry> d = new CharArrayMap<>(1000, false);
for (int i = 0; i < exceptionWords.length; i++) {
if (!d.containsKey(exceptionWords[i])) {
entry = new DictEntry(exceptionWords[i], true);
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java?rev=1616901&r1=1616900&r2=1616901&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java Fri Aug 8 22:42:48 2014
@@ -34,7 +34,6 @@ import org.apache.lucene.analysis.util.C
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.util.Version;
/**
* {@link Analyzer} for Spanish.
@@ -63,7 +62,7 @@ public final class SpanishAnalyzer exten
static {
try {
DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class,
- DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
+ DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
@@ -75,18 +74,17 @@ public final class SpanishAnalyzer exten
/**
* Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
*/
- public SpanishAnalyzer(Version matchVersion) {
- this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+ public SpanishAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_STOP_SET);
}
/**
* Builds an analyzer with the given stop words.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
*/
- public SpanishAnalyzer(Version matchVersion, CharArraySet stopwords) {
- this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+ public SpanishAnalyzer(CharArraySet stopwords) {
+ this(stopwords, CharArraySet.EMPTY_SET);
}
/**
@@ -94,14 +92,12 @@ public final class SpanishAnalyzer exten
* provided this analyzer will add a {@link SetKeywordMarkerFilter} before
* stemming.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
* @param stemExclusionSet a set of terms not to be stemmed
*/
- public SpanishAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
- super(matchVersion, stopwords);
- this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
- matchVersion, stemExclusionSet));
+ public SpanishAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
+ super(stopwords);
+ this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}
/**
@@ -118,10 +114,10 @@ public final class SpanishAnalyzer exten
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer(matchVersion);
- TokenStream result = new StandardFilter(matchVersion, source);
- result = new LowerCaseFilter(matchVersion, result);
- result = new StopFilter(matchVersion, result, stopwords);
+ final Tokenizer source = new StandardTokenizer();
+ TokenStream result = new StandardFilter(source);
+ result = new LowerCaseFilter(result);
+ result = new StopFilter(result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
result = new SpanishLightStemFilter(result);
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/eu/BasqueAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/eu/BasqueAnalyzer.java?rev=1616901&r1=1616900&r2=1616901&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/eu/BasqueAnalyzer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/eu/BasqueAnalyzer.java Fri Aug 8 22:42:48 2014
@@ -31,7 +31,6 @@ import org.apache.lucene.analysis.standa
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
-import org.apache.lucene.util.Version;
import org.tartarus.snowball.ext.BasqueStemmer;
/**
@@ -73,18 +72,17 @@ public final class BasqueAnalyzer extend
/**
* Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
*/
- public BasqueAnalyzer(Version matchVersion) {
- this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+ public BasqueAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_STOP_SET);
}
/**
* Builds an analyzer with the given stop words.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
*/
- public BasqueAnalyzer(Version matchVersion, CharArraySet stopwords) {
- this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+ public BasqueAnalyzer(CharArraySet stopwords) {
+ this(stopwords, CharArraySet.EMPTY_SET);
}
/**
@@ -92,14 +90,12 @@ public final class BasqueAnalyzer extend
* provided this analyzer will add a {@link SetKeywordMarkerFilter} before
* stemming.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
* @param stemExclusionSet a set of terms not to be stemmed
*/
- public BasqueAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
- super(matchVersion, stopwords);
- this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
- matchVersion, stemExclusionSet));
+ public BasqueAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
+ super(stopwords);
+ this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}
/**
@@ -116,10 +112,10 @@ public final class BasqueAnalyzer extend
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer(matchVersion);
- TokenStream result = new StandardFilter(matchVersion, source);
- result = new LowerCaseFilter(matchVersion, result);
- result = new StopFilter(matchVersion, result, stopwords);
+ final Tokenizer source = new StandardTokenizer();
+ TokenStream result = new StandardFilter(source);
+ result = new LowerCaseFilter(result);
+ result = new StopFilter(result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
result = new SnowballFilter(result, new BasqueStemmer());
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java?rev=1616901&r1=1616900&r2=1616901&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java Fri Aug 8 22:42:48 2014
@@ -29,7 +29,6 @@ import org.apache.lucene.analysis.core.S
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
-import org.apache.lucene.util.Version;
/**
* {@link Analyzer} for Persian.
@@ -87,20 +86,18 @@ public final class PersianAnalyzer exten
* Builds an analyzer with the default stop words:
* {@link #DEFAULT_STOPWORD_FILE}.
*/
- public PersianAnalyzer(Version matchVersion) {
- this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+ public PersianAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_STOP_SET);
}
/**
* Builds an analyzer with the given stop words
*
- * @param matchVersion
- * lucene compatibility version
* @param stopwords
* a stopword set
*/
- public PersianAnalyzer(Version matchVersion, CharArraySet stopwords){
- super(matchVersion, stopwords);
+ public PersianAnalyzer(CharArraySet stopwords){
+ super(stopwords);
}
/**
@@ -115,8 +112,8 @@ public final class PersianAnalyzer exten
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer(matchVersion);
- TokenStream result = new LowerCaseFilter(matchVersion, source);
+ final Tokenizer source = new StandardTokenizer();
+ TokenStream result = new LowerCaseFilter(source);
result = new ArabicNormalizationFilter(result);
/* additional persian-specific normalization */
result = new PersianNormalizationFilter(result);
@@ -124,7 +121,7 @@ public final class PersianAnalyzer exten
* the order here is important: the stopword list is normalized with the
* above!
*/
- return new TokenStreamComponents(source, new StopFilter(matchVersion, result, stopwords));
+ return new TokenStreamComponents(source, new StopFilter(result, stopwords));
}
/**
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java?rev=1616901&r1=1616900&r2=1616901&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java Fri Aug 8 22:42:48 2014
@@ -34,7 +34,6 @@ import org.apache.lucene.analysis.util.C
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.util.Version;
import org.tartarus.snowball.ext.FinnishStemmer;
/**
@@ -64,7 +63,7 @@ public final class FinnishAnalyzer exten
static {
try {
DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class,
- DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
+ DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
@@ -76,18 +75,17 @@ public final class FinnishAnalyzer exten
/**
* Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
*/
- public FinnishAnalyzer(Version matchVersion) {
- this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+ public FinnishAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_STOP_SET);
}
/**
* Builds an analyzer with the given stop words.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
*/
- public FinnishAnalyzer(Version matchVersion, CharArraySet stopwords) {
- this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+ public FinnishAnalyzer(CharArraySet stopwords) {
+ this(stopwords, CharArraySet.EMPTY_SET);
}
/**
@@ -95,14 +93,12 @@ public final class FinnishAnalyzer exten
* provided this analyzer will add a {@link SetKeywordMarkerFilter} before
* stemming.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
* @param stemExclusionSet a set of terms not to be stemmed
*/
- public FinnishAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
- super(matchVersion, stopwords);
- this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
- matchVersion, stemExclusionSet));
+ public FinnishAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
+ super(stopwords);
+ this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}
/**
@@ -119,10 +115,10 @@ public final class FinnishAnalyzer exten
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer(matchVersion);
- TokenStream result = new StandardFilter(matchVersion, source);
- result = new LowerCaseFilter(matchVersion, result);
- result = new StopFilter(matchVersion, result, stopwords);
+ final Tokenizer source = new StandardTokenizer();
+ TokenStream result = new StandardFilter(source);
+ result = new LowerCaseFilter(result);
+ result = new StopFilter(result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
result = new SnowballFilter(result, new FinnishStemmer());
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java?rev=1616901&r1=1616900&r2=1616901&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java Fri Aug 8 22:42:48 2014
@@ -32,7 +32,6 @@ import org.apache.lucene.analysis.util.E
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.util.Version;
import java.io.IOException;
import java.io.Reader;
@@ -49,7 +48,7 @@ import java.util.Arrays;
* exclusion list is empty by default.
* </p>
*
- * <p><b>NOTE</b>: This class uses the same {@link Version}
+ * <p><b>NOTE</b>: This class uses the same {@link org.apache.lucene.util.Version}
* dependent settings as {@link StandardAnalyzer}.</p>
*/
public final class FrenchAnalyzer extends StopwordAnalyzerBase {
@@ -59,7 +58,7 @@ public final class FrenchAnalyzer extend
/** Default set of articles for ElisionFilter */
public static final CharArraySet DEFAULT_ARTICLES = CharArraySet.unmodifiableSet(
- new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList(
+ new CharArraySet(Arrays.asList(
"l", "m", "t", "qu", "n", "s", "j", "d", "c", "jusqu", "quoiqu", "lorsqu", "puisqu"), true));
/**
@@ -80,7 +79,7 @@ public final class FrenchAnalyzer extend
static {
try {
DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class,
- DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
+ DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
@@ -92,37 +91,33 @@ public final class FrenchAnalyzer extend
/**
* Builds an analyzer with the default stop words ({@link #getDefaultStopSet}).
*/
- public FrenchAnalyzer(Version matchVersion) {
- this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+ public FrenchAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_STOP_SET);
}
/**
* Builds an analyzer with the given stop words
*
- * @param matchVersion
- * lucene compatibility version
* @param stopwords
* a stopword set
*/
- public FrenchAnalyzer(Version matchVersion, CharArraySet stopwords){
- this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+ public FrenchAnalyzer(CharArraySet stopwords){
+ this(stopwords, CharArraySet.EMPTY_SET);
}
/**
* Builds an analyzer with the given stop words
*
- * @param matchVersion
- * lucene compatibility version
* @param stopwords
* a stopword set
* @param stemExclutionSet
* a stemming exclusion set
*/
- public FrenchAnalyzer(Version matchVersion, CharArraySet stopwords,
+ public FrenchAnalyzer(CharArraySet stopwords,
CharArraySet stemExclutionSet) {
- super(matchVersion, stopwords);
+ super(stopwords);
this.excltable = CharArraySet.unmodifiableSet(CharArraySet
- .copy(matchVersion, stemExclutionSet));
+ .copy(stemExclutionSet));
}
/**
@@ -139,11 +134,11 @@ public final class FrenchAnalyzer extend
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer(matchVersion);
- TokenStream result = new StandardFilter(matchVersion, source);
+ final Tokenizer source = new StandardTokenizer();
+ TokenStream result = new StandardFilter(source);
result = new ElisionFilter(result, DEFAULT_ARTICLES);
- result = new LowerCaseFilter(matchVersion, result);
- result = new StopFilter(matchVersion, result, stopwords);
+ result = new LowerCaseFilter(result);
+ result = new StopFilter(result, stopwords);
if(!excltable.isEmpty())
result = new SetKeywordMarkerFilter(result, excltable);
result = new FrenchLightStemFilter(result);
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java?rev=1616901&r1=1616900&r2=1616901&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java Fri Aug 8 22:42:48 2014
@@ -32,7 +32,6 @@ import org.apache.lucene.analysis.standa
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.ElisionFilter;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
-import org.apache.lucene.util.Version;
import org.tartarus.snowball.ext.IrishStemmer;
/**
@@ -45,7 +44,7 @@ public final class IrishAnalyzer extends
public final static String DEFAULT_STOPWORD_FILE = "stopwords.txt";
private static final CharArraySet DEFAULT_ARTICLES = CharArraySet.unmodifiableSet(
- new CharArraySet(Version.LUCENE_CURRENT,
+ new CharArraySet(
Arrays.asList(
"d", "m", "b"
), true));
@@ -56,7 +55,7 @@ public final class IrishAnalyzer extends
* with phrase queries versus tAthair (which would not have a gap).
*/
private static final CharArraySet HYPHENATIONS = CharArraySet.unmodifiableSet(
- new CharArraySet(Version.LUCENE_CURRENT,
+ new CharArraySet(
Arrays.asList(
"h", "n", "t"
), true));
@@ -91,18 +90,17 @@ public final class IrishAnalyzer extends
/**
* Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
*/
- public IrishAnalyzer(Version matchVersion) {
- this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+ public IrishAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_STOP_SET);
}
/**
* Builds an analyzer with the given stop words.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
*/
- public IrishAnalyzer(Version matchVersion, CharArraySet stopwords) {
- this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+ public IrishAnalyzer(CharArraySet stopwords) {
+ this(stopwords, CharArraySet.EMPTY_SET);
}
/**
@@ -110,14 +108,12 @@ public final class IrishAnalyzer extends
* provided this analyzer will add a {@link SetKeywordMarkerFilter} before
* stemming.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
* @param stemExclusionSet a set of terms not to be stemmed
*/
- public IrishAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
- super(matchVersion, stopwords);
- this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
- matchVersion, stemExclusionSet));
+ public IrishAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
+ super(stopwords);
+ this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}
/**
@@ -134,12 +130,12 @@ public final class IrishAnalyzer extends
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer(matchVersion);
- TokenStream result = new StandardFilter(matchVersion, source);
- result = new StopFilter(matchVersion, result, HYPHENATIONS);
+ final Tokenizer source = new StandardTokenizer();
+ TokenStream result = new StandardFilter(source);
+ result = new StopFilter(result, HYPHENATIONS);
result = new ElisionFilter(result, DEFAULT_ARTICLES);
result = new IrishLowerCaseFilter(result);
- result = new StopFilter(matchVersion, result, stopwords);
+ result = new StopFilter(result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
result = new SnowballFilter(result, new IrishStemmer());
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java?rev=1616901&r1=1616900&r2=1616901&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java Fri Aug 8 22:42:48 2014
@@ -33,7 +33,6 @@ import org.apache.lucene.analysis.util.C
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.util.Version;
/**
* {@link Analyzer} for Galician.
@@ -62,7 +61,7 @@ public final class GalicianAnalyzer exte
static {
try {
DEFAULT_STOP_SET = WordlistLoader.getWordSet(IOUtils.getDecodingReader(GalicianAnalyzer.class,
- DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
+ DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
@@ -74,18 +73,17 @@ public final class GalicianAnalyzer exte
/**
* Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
*/
- public GalicianAnalyzer(Version matchVersion) {
- this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+ public GalicianAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_STOP_SET);
}
/**
* Builds an analyzer with the given stop words.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
*/
- public GalicianAnalyzer(Version matchVersion, CharArraySet stopwords) {
- this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+ public GalicianAnalyzer(CharArraySet stopwords) {
+ this(stopwords, CharArraySet.EMPTY_SET);
}
/**
@@ -93,14 +91,12 @@ public final class GalicianAnalyzer exte
* provided this analyzer will add a {@link SetKeywordMarkerFilter} before
* stemming.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
* @param stemExclusionSet a set of terms not to be stemmed
*/
- public GalicianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
- super(matchVersion, stopwords);
- this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
- matchVersion, stemExclusionSet));
+ public GalicianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
+ super(stopwords);
+ this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}
/**
@@ -117,10 +113,10 @@ public final class GalicianAnalyzer exte
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer(matchVersion);
- TokenStream result = new StandardFilter(matchVersion, source);
- result = new LowerCaseFilter(matchVersion, result);
- result = new StopFilter(matchVersion, result, stopwords);
+ final Tokenizer source = new StandardTokenizer();
+ TokenStream result = new StandardFilter(source);
+ result = new LowerCaseFilter(result);
+ result = new StopFilter(result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
result = new GalicianStemFilter(result);
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiAnalyzer.java?rev=1616901&r1=1616900&r2=1616901&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiAnalyzer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiAnalyzer.java Fri Aug 8 22:42:48 2014
@@ -29,7 +29,6 @@ import org.apache.lucene.analysis.Tokeni
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.in.IndicNormalizationFilter;
-import org.apache.lucene.util.Version;
/**
* Analyzer for Hindi.
@@ -75,32 +74,29 @@ public final class HindiAnalyzer extends
/**
* Builds an analyzer with the given stop words
*
- * @param version lucene compatibility version
* @param stopwords a stopword set
* @param stemExclusionSet a stemming exclusion set
*/
- public HindiAnalyzer(Version version, CharArraySet stopwords, CharArraySet stemExclusionSet) {
- super(version, stopwords);
- this.stemExclusionSet = CharArraySet.unmodifiableSet(
- CharArraySet.copy(matchVersion, stemExclusionSet));
+ public HindiAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
+ super(stopwords);
+ this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}
/**
* Builds an analyzer with the given stop words
*
- * @param version lucene compatibility version
* @param stopwords a stopword set
*/
- public HindiAnalyzer(Version version, CharArraySet stopwords) {
- this(version, stopwords, CharArraySet.EMPTY_SET);
+ public HindiAnalyzer(CharArraySet stopwords) {
+ this(stopwords, CharArraySet.EMPTY_SET);
}
/**
* Builds an analyzer with the default stop words:
* {@link #DEFAULT_STOPWORD_FILE}.
*/
- public HindiAnalyzer(Version version) {
- this(version, DefaultSetHolder.DEFAULT_STOP_SET);
+ public HindiAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_STOP_SET);
}
/**
@@ -117,13 +113,13 @@ public final class HindiAnalyzer extends
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer(matchVersion);
- TokenStream result = new LowerCaseFilter(matchVersion, source);
+ final Tokenizer source = new StandardTokenizer();
+ TokenStream result = new LowerCaseFilter(source);
if (!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
result = new IndicNormalizationFilter(result);
result = new HindiNormalizationFilter(result);
- result = new StopFilter(matchVersion, result, stopwords);
+ result = new StopFilter(result, stopwords);
result = new HindiStemFilter(result);
return new TokenStreamComponents(source, result);
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java?rev=1616901&r1=1616900&r2=1616901&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java Fri Aug 8 22:42:48 2014
@@ -34,7 +34,6 @@ import org.apache.lucene.analysis.util.C
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.util.Version;
import org.tartarus.snowball.ext.HungarianStemmer;
/**
@@ -64,7 +63,7 @@ public final class HungarianAnalyzer ext
static {
try {
DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class,
- DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
+ DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
@@ -76,18 +75,17 @@ public final class HungarianAnalyzer ext
/**
* Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
*/
- public HungarianAnalyzer(Version matchVersion) {
- this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+ public HungarianAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_STOP_SET);
}
/**
* Builds an analyzer with the given stop words.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
*/
- public HungarianAnalyzer(Version matchVersion, CharArraySet stopwords) {
- this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+ public HungarianAnalyzer(CharArraySet stopwords) {
+ this(stopwords, CharArraySet.EMPTY_SET);
}
/**
@@ -95,14 +93,12 @@ public final class HungarianAnalyzer ext
* provided this analyzer will add a {@link SetKeywordMarkerFilter} before
* stemming.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
* @param stemExclusionSet a set of terms not to be stemmed
*/
- public HungarianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
- super(matchVersion, stopwords);
- this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
- matchVersion, stemExclusionSet));
+ public HungarianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
+ super(stopwords);
+ this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}
/**
@@ -119,10 +115,10 @@ public final class HungarianAnalyzer ext
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer(matchVersion);
- TokenStream result = new StandardFilter(matchVersion, source);
- result = new LowerCaseFilter(matchVersion, result);
- result = new StopFilter(matchVersion, result, stopwords);
+ final Tokenizer source = new StandardTokenizer();
+ TokenStream result = new StandardFilter(source);
+ result = new LowerCaseFilter(result);
+ result = new StopFilter(result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
result = new SnowballFilter(result, new HungarianStemmer());
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java?rev=1616901&r1=1616900&r2=1616901&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java Fri Aug 8 22:42:48 2014
@@ -28,7 +28,6 @@ import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.IntsRef;
-import org.apache.lucene.util.Version;
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.Outputs;
@@ -215,7 +214,7 @@ final class Stemmer {
if (stems.size() < 2) {
return stems;
}
- CharArraySet terms = new CharArraySet(Version.LUCENE_CURRENT, 8, dictionary.ignoreCase);
+ CharArraySet terms = new CharArraySet(8, dictionary.ignoreCase);
List<CharsRef> deduped = new ArrayList<>();
for (CharsRef s : stems) {
if (!terms.contains(s)) {
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hy/ArmenianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hy/ArmenianAnalyzer.java?rev=1616901&r1=1616900&r2=1616901&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hy/ArmenianAnalyzer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hy/ArmenianAnalyzer.java Fri Aug 8 22:42:48 2014
@@ -31,7 +31,6 @@ import org.apache.lucene.analysis.standa
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
-import org.apache.lucene.util.Version;
import org.tartarus.snowball.ext.ArmenianStemmer;
/**
@@ -73,18 +72,17 @@ public final class ArmenianAnalyzer exte
/**
* Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
*/
- public ArmenianAnalyzer(Version matchVersion) {
- this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+ public ArmenianAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_STOP_SET);
}
/**
* Builds an analyzer with the given stop words.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
*/
- public ArmenianAnalyzer(Version matchVersion, CharArraySet stopwords) {
- this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+ public ArmenianAnalyzer(CharArraySet stopwords) {
+ this(stopwords, CharArraySet.EMPTY_SET);
}
/**
@@ -92,14 +90,12 @@ public final class ArmenianAnalyzer exte
* provided this analyzer will add a {@link SetKeywordMarkerFilter} before
* stemming.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
* @param stemExclusionSet a set of terms not to be stemmed
*/
- public ArmenianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
- super(matchVersion, stopwords);
- this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
- matchVersion, stemExclusionSet));
+ public ArmenianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
+ super(stopwords);
+ this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}
/**
@@ -116,10 +112,10 @@ public final class ArmenianAnalyzer exte
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer(matchVersion);
- TokenStream result = new StandardFilter(matchVersion, source);
- result = new LowerCaseFilter(matchVersion, result);
- result = new StopFilter(matchVersion, result, stopwords);
+ final Tokenizer source = new StandardTokenizer();
+ TokenStream result = new StandardFilter(source);
+ result = new LowerCaseFilter(result);
+ result = new StopFilter(result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
result = new SnowballFilter(result, new ArmenianStemmer());
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianAnalyzer.java?rev=1616901&r1=1616900&r2=1616901&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianAnalyzer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianAnalyzer.java Fri Aug 8 22:42:48 2014
@@ -29,7 +29,6 @@ import org.apache.lucene.analysis.standa
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
-import org.apache.lucene.util.Version;
/**
* Analyzer for Indonesian (Bahasa)
@@ -69,20 +68,18 @@ public final class IndonesianAnalyzer ex
/**
* Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
*/
- public IndonesianAnalyzer(Version matchVersion) {
- this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+ public IndonesianAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_STOP_SET);
}
/**
* Builds an analyzer with the given stop words
*
- * @param matchVersion
- * lucene compatibility version
* @param stopwords
* a stopword set
*/
- public IndonesianAnalyzer(Version matchVersion, CharArraySet stopwords){
- this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+ public IndonesianAnalyzer(CharArraySet stopwords){
+ this(stopwords, CharArraySet.EMPTY_SET);
}
/**
@@ -90,17 +87,14 @@ public final class IndonesianAnalyzer ex
* provided this analyzer will add a {@link SetKeywordMarkerFilter} before
* {@link IndonesianStemFilter}.
*
- * @param matchVersion
- * lucene compatibility version
* @param stopwords
* a stopword set
* @param stemExclusionSet
* a set of terms not to be stemmed
*/
- public IndonesianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet){
- super(matchVersion, stopwords);
- this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
- matchVersion, stemExclusionSet));
+ public IndonesianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet){
+ super(stopwords);
+ this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}
/**
@@ -116,10 +110,10 @@ public final class IndonesianAnalyzer ex
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer(matchVersion);
- TokenStream result = new StandardFilter(matchVersion, source);
- result = new LowerCaseFilter(matchVersion, result);
- result = new StopFilter(matchVersion, result, stopwords);
+ final Tokenizer source = new StandardTokenizer();
+ TokenStream result = new StandardFilter(source);
+ result = new LowerCaseFilter(result);
+ result = new StopFilter(result, stopwords);
if (!stemExclusionSet.isEmpty()) {
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java?rev=1616901&r1=1616900&r2=1616901&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java Fri Aug 8 22:42:48 2014
@@ -36,7 +36,6 @@ import org.apache.lucene.analysis.util.E
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.util.Version;
/**
* {@link Analyzer} for Italian.
@@ -48,7 +47,7 @@ public final class ItalianAnalyzer exten
public final static String DEFAULT_STOPWORD_FILE = "italian_stop.txt";
private static final CharArraySet DEFAULT_ARTICLES = CharArraySet.unmodifiableSet(
- new CharArraySet(Version.LUCENE_CURRENT,
+ new CharArraySet(
Arrays.asList(
"c", "l", "all", "dall", "dell", "nell", "sull", "coll", "pell",
"gl", "agl", "dagl", "degl", "negl", "sugl", "un", "m", "t", "s", "v", "d"
@@ -72,7 +71,7 @@ public final class ItalianAnalyzer exten
static {
try {
DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class,
- DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
+ DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
@@ -84,18 +83,17 @@ public final class ItalianAnalyzer exten
/**
* Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
*/
- public ItalianAnalyzer(Version matchVersion) {
- this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+ public ItalianAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_STOP_SET);
}
/**
* Builds an analyzer with the given stop words.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
*/
- public ItalianAnalyzer(Version matchVersion, CharArraySet stopwords) {
- this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+ public ItalianAnalyzer(CharArraySet stopwords) {
+ this(stopwords, CharArraySet.EMPTY_SET);
}
/**
@@ -103,14 +101,12 @@ public final class ItalianAnalyzer exten
* provided this analyzer will add a {@link SetKeywordMarkerFilter} before
* stemming.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
* @param stemExclusionSet a set of terms not to be stemmed
*/
- public ItalianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
- super(matchVersion, stopwords);
- this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
- matchVersion, stemExclusionSet));
+ public ItalianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
+ super(stopwords);
+ this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}
/**
@@ -127,11 +123,11 @@ public final class ItalianAnalyzer exten
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer(matchVersion);
- TokenStream result = new StandardFilter(matchVersion, source);
+ final Tokenizer source = new StandardTokenizer();
+ TokenStream result = new StandardFilter(source);
result = new ElisionFilter(result, DEFAULT_ARTICLES);
- result = new LowerCaseFilter(matchVersion, result);
- result = new StopFilter(matchVersion, result, stopwords);
+ result = new LowerCaseFilter(result);
+ result = new StopFilter(result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
result = new ItalianLightStemFilter(result);
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java?rev=1616901&r1=1616900&r2=1616901&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java Fri Aug 8 22:42:48 2014
@@ -33,7 +33,6 @@ import org.apache.lucene.analysis.util.C
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.util.Version;
/**
* {@link Analyzer} for Latvian.
@@ -62,7 +61,7 @@ public final class LatvianAnalyzer exten
static {
try {
DEFAULT_STOP_SET = WordlistLoader.getWordSet(IOUtils.getDecodingReader(LatvianAnalyzer.class,
- DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
+ DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
@@ -74,18 +73,17 @@ public final class LatvianAnalyzer exten
/**
* Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
*/
- public LatvianAnalyzer(Version matchVersion) {
- this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+ public LatvianAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_STOP_SET);
}
/**
* Builds an analyzer with the given stop words.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
*/
- public LatvianAnalyzer(Version matchVersion, CharArraySet stopwords) {
- this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+ public LatvianAnalyzer(CharArraySet stopwords) {
+ this(stopwords, CharArraySet.EMPTY_SET);
}
/**
@@ -93,14 +91,12 @@ public final class LatvianAnalyzer exten
* provided this analyzer will add a {@link SetKeywordMarkerFilter} before
* stemming.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
* @param stemExclusionSet a set of terms not to be stemmed
*/
- public LatvianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
- super(matchVersion, stopwords);
- this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
- matchVersion, stemExclusionSet));
+ public LatvianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
+ super(stopwords);
+ this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}
/**
@@ -117,10 +113,10 @@ public final class LatvianAnalyzer exten
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer(matchVersion);
- TokenStream result = new StandardFilter(matchVersion, source);
- result = new LowerCaseFilter(matchVersion, result);
- result = new StopFilter(matchVersion, result, stopwords);
+ final Tokenizer source = new StandardTokenizer();
+ TokenStream result = new StandardFilter(source);
+ result = new LowerCaseFilter(result);
+ result = new StopFilter(result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
result = new LatvianStemFilter(result);
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilterFactory.java?rev=1616901&r1=1616900&r2=1616901&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilterFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilterFactory.java Fri Aug 8 22:42:48 2014
@@ -82,7 +82,7 @@ public class CapitalizationFilterFactory
boolean ignoreCase = getBoolean(args, KEEP_IGNORE_CASE, false);
Set<String> k = getSet(args, KEEP);
if (k != null) {
- keep = new CharArraySet(luceneMatchVersion, 10, ignoreCase);
+ keep = new CharArraySet(10, ignoreCase);
keep.addAll(k);
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilter.java?rev=1616901&r1=1616900&r2=1616901&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilter.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilter.java Fri Aug 8 22:42:48 2014
@@ -20,7 +20,6 @@ package org.apache.lucene.analysis.misce
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.util.FilteringTokenFilter;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.util.Version;
/**
* Removes words that are too long or too short from the stream.
@@ -39,13 +38,12 @@ public final class CodepointCountFilter
* Create a new {@link CodepointCountFilter}. This will filter out tokens whose
* {@link CharTermAttribute} is either too short ({@link Character#codePointCount(char[], int, int)}
* < min) or too long ({@link Character#codePointCount(char[], int, int)} > max).
- * @param version the Lucene match version
* @param in the {@link TokenStream} to consume
* @param min the minimum length
* @param max the maximum length
*/
- public CodepointCountFilter(Version version, TokenStream in, int min, int max) {
- super(version, in);
+ public CodepointCountFilter(TokenStream in, int min, int max) {
+ super(in);
if (min < 0) {
throw new IllegalArgumentException("minimum length must be greater than or equal to zero");
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilterFactory.java?rev=1616901&r1=1616900&r2=1616901&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilterFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilterFactory.java Fri Aug 8 22:42:48 2014
@@ -50,6 +50,6 @@ public class CodepointCountFilterFactory
@Override
public CodepointCountFilter create(TokenStream input) {
- return new CodepointCountFilter(luceneMatchVersion, input, min, max);
+ return new CodepointCountFilter(input, min, max);
}
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilter.java?rev=1616901&r1=1616900&r2=1616901&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilter.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilter.java Fri Aug 8 22:42:48 2014
@@ -21,7 +21,6 @@ import org.apache.lucene.analysis.TokenS
import org.apache.lucene.analysis.util.FilteringTokenFilter;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.util.Version;
/**
* A TokenFilter that only keeps tokens with text contained in the
@@ -37,12 +36,11 @@ public final class KeepWordFilter extend
* Create a new {@link KeepWordFilter}.
* <p><b>NOTE</b>: The words set passed to this constructor will be directly
* used by this filter and should not be modified.
- * @param version the Lucene match version
* @param in the {@link TokenStream} to consume
* @param words the words to keep
*/
- public KeepWordFilter(Version version, TokenStream in, CharArraySet words) {
- super(version, in);
+ public KeepWordFilter(TokenStream in, CharArraySet words) {
+ super(in);
this.words = words;
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilterFactory.java?rev=1616901&r1=1616900&r2=1616901&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilterFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilterFactory.java Fri Aug 8 22:42:48 2014
@@ -44,7 +44,6 @@ public class KeepWordFilterFactory exten
/** Creates a new KeepWordFilterFactory */
public KeepWordFilterFactory(Map<String,String> args) {
super(args);
- assureMatchVersion();
wordFiles = get(args, "words");
ignoreCase = getBoolean(args, "ignoreCase", false);
if (!args.isEmpty()) {
@@ -73,7 +72,7 @@ public class KeepWordFilterFactory exten
if (words == null) {
return input;
} else {
- final TokenStream filter = new KeepWordFilter(luceneMatchVersion, input, words);
+ final TokenStream filter = new KeepWordFilter(input, words);
return filter;
}
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java?rev=1616901&r1=1616900&r2=1616901&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java Fri Aug 8 22:42:48 2014
@@ -20,7 +20,6 @@ package org.apache.lucene.analysis.misce
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.util.FilteringTokenFilter;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.util.Version;
/**
* Removes words that are too long or too short from the stream.
@@ -39,13 +38,12 @@ public final class LengthFilter extends
* Create a new {@link LengthFilter}. This will filter out tokens whose
* {@link CharTermAttribute} is either too short ({@link CharTermAttribute#length()}
* < min) or too long ({@link CharTermAttribute#length()} > max).
- * @param version the Lucene match version
* @param in the {@link TokenStream} to consume
* @param min the minimum length
* @param max the maximum length
*/
- public LengthFilter(Version version, TokenStream in, int min, int max) {
- super(version, in);
+ public LengthFilter(TokenStream in, int min, int max) {
+ super(in);
if (min < 0) {
throw new IllegalArgumentException("minimum length must be greater than or equal to zero");
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilterFactory.java?rev=1616901&r1=1616900&r2=1616901&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilterFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilterFactory.java Fri Aug 8 22:42:48 2014
@@ -50,7 +50,7 @@ public class LengthFilterFactory extends
@Override
public LengthFilter create(TokenStream input) {
- final LengthFilter filter = new LengthFilter(luceneMatchVersion, input,min,max);
+ final LengthFilter filter = new LengthFilter(input,min,max);
return filter;
}
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.java?rev=1616901&r1=1616900&r2=1616901&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.java Fri Aug 8 22:42:48 2014
@@ -22,7 +22,6 @@ import org.apache.lucene.analysis.TokenS
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.util.Version;
import java.io.IOException;
@@ -34,8 +33,7 @@ public final class RemoveDuplicatesToken
private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
private final PositionIncrementAttribute posIncAttribute = addAttribute(PositionIncrementAttribute.class);
- // use a fixed version, as we don't care about case sensitivity.
- private final CharArraySet previous = new CharArraySet(Version.LUCENE_CURRENT, 8, false);
+ private final CharArraySet previous = new CharArraySet(8, false);
/**
* Creates a new RemoveDuplicatesTokenFilter
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilter.java?rev=1616901&r1=1616900&r2=1616901&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilter.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilter.java Fri Aug 8 22:42:48 2014
@@ -20,15 +20,11 @@ package org.apache.lucene.analysis.misce
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.util.Version;
import java.io.IOException;
/**
* Trims leading and trailing whitespace from Tokens in the stream.
- * <p>As of Lucene 4.4, this filter does not support updateOffsets=true anymore
- * as it can lead to broken token streams.
*/
public final class TrimFilter extends TokenFilter {
@@ -36,10 +32,9 @@ public final class TrimFilter extends To
/**
* Create a new {@link TrimFilter}.
- * @param version the Lucene match version
* @param in the stream to consume
*/
- public TrimFilter(Version version, TokenStream in) {
+ public TrimFilter(TokenStream in) {
super(in);
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilterFactory.java?rev=1616901&r1=1616900&r2=1616901&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilterFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilterFactory.java Fri Aug 8 22:42:48 2014
@@ -47,7 +47,7 @@ public class TrimFilterFactory extends T
@Override
public TrimFilter create(TokenStream input) {
- final TrimFilter filter = new TrimFilter(luceneMatchVersion, input);
+ final TrimFilter filter = new TrimFilter(input);
return filter;
}
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.java?rev=1616901&r1=1616900&r2=1616901&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.java Fri Aug 8 22:42:48 2014
@@ -30,7 +30,6 @@ import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.InPlaceMergeSorter;
import org.apache.lucene.util.RamUsageEstimator;
-import org.apache.lucene.util.Version;
import java.io.IOException;
import java.util.Arrays;
@@ -206,11 +205,8 @@ public final class WordDelimiterFilter e
* @param configurationFlags Flags configuring the filter
* @param protWords If not null is the set of tokens to protect from being delimited
*/
- public WordDelimiterFilter(Version matchVersion, TokenStream in, byte[] charTypeTable, int configurationFlags, CharArraySet protWords) {
+ public WordDelimiterFilter(TokenStream in, byte[] charTypeTable, int configurationFlags, CharArraySet protWords) {
super(in);
- if (!matchVersion.onOrAfter(Version.LUCENE_4_8)) {
- throw new IllegalArgumentException("This class only works with Lucene 4.8+. To emulate the old (broken) behavior of WordDelimiterFilter, use Lucene47WordDelimiterFilter");
- }
this.flags = configurationFlags;
this.protWords = protWords;
this.iterator = new WordDelimiterIterator(
@@ -225,8 +221,8 @@ public final class WordDelimiterFilter e
* @param configurationFlags Flags configuring the filter
* @param protWords If not null is the set of tokens to protect from being delimited
*/
- public WordDelimiterFilter(Version matchVersion, TokenStream in, int configurationFlags, CharArraySet protWords) {
- this(matchVersion, in, WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, configurationFlags, protWords);
+ public WordDelimiterFilter(TokenStream in, int configurationFlags, CharArraySet protWords) {
+ this(in, WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, configurationFlags, protWords);
}
@Override
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilterFactory.java?rev=1616901&r1=1616900&r2=1616901&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilterFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilterFactory.java Fri Aug 8 22:42:48 2014
@@ -119,7 +119,7 @@ public class WordDelimiterFilterFactory
@Override
public TokenFilter create(TokenStream input) {
if (luceneMatchVersion.onOrAfter(Version.LUCENE_4_8)) {
- return new WordDelimiterFilter(luceneMatchVersion, input, typeTable == null ? WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE : typeTable,
+ return new WordDelimiterFilter(input, typeTable == null ? WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE : typeTable,
flags, protectedWords);
} else {
return new Lucene47WordDelimiterFilter(input, typeTable == null ? WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE : typeTable,
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramFilterFactory.java?rev=1616901&r1=1616900&r2=1616901&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramFilterFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramFilterFactory.java Fri Aug 8 22:42:48 2014
@@ -18,8 +18,11 @@ package org.apache.lucene.analysis.ngram
*/
import java.util.Map;
+
+import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.util.TokenFilterFactory;
+import org.apache.lucene.util.Version;
/**
* Creates new instances of {@link EdgeNGramTokenFilter}.
@@ -46,7 +49,10 @@ public class EdgeNGramFilterFactory exte
}
@Override
- public EdgeNGramTokenFilter create(TokenStream input) {
- return new EdgeNGramTokenFilter(luceneMatchVersion, input, minGramSize, maxGramSize);
+ public TokenFilter create(TokenStream input) {
+ if (luceneMatchVersion.onOrAfter(Version.LUCENE_4_4)) {
+ return new EdgeNGramTokenFilter(input, minGramSize, maxGramSize);
+ }
+ return new Lucene43EdgeNGramTokenFilter(input, minGramSize, maxGramSize);
}
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java?rev=1616901&r1=1616900&r2=1616901&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java Fri Aug 8 22:42:48 2014
@@ -26,7 +26,6 @@ import org.apache.lucene.analysis.tokena
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
import org.apache.lucene.analysis.util.CharacterUtils;
-import org.apache.lucene.util.Version;
/**
* Tokenizes the given token into n-grams of given size(s).
@@ -59,18 +58,13 @@ public final class EdgeNGramTokenFilter
/**
* Creates EdgeNGramTokenFilter that can generate n-grams in the sizes of the given range
*
- * @param version the Lucene match version
* @param input {@link TokenStream} holding the input to be tokenized
* @param minGram the smallest n-gram to generate
* @param maxGram the largest n-gram to generate
*/
- public EdgeNGramTokenFilter(Version version, TokenStream input, int minGram, int maxGram) {
+ public EdgeNGramTokenFilter(TokenStream input, int minGram, int maxGram) {
super(input);
- if (version == null) {
- throw new IllegalArgumentException("version must not be null");
- }
-
if (minGram < 1) {
throw new IllegalArgumentException("minGram must be greater than zero");
}
@@ -79,9 +73,7 @@ public final class EdgeNGramTokenFilter
throw new IllegalArgumentException("minGram must not be greater than maxGram");
}
- this.charUtils = version.onOrAfter(Version.LUCENE_4_4)
- ? CharacterUtils.getInstance(version)
- : CharacterUtils.getJava4Instance();
+ this.charUtils = CharacterUtils.getInstance();
this.minGram = minGram;
this.maxGram = maxGram;
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java?rev=1616901&r1=1616900&r2=1616901&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java Fri Aug 8 22:42:48 2014
@@ -17,8 +17,6 @@ package org.apache.lucene.analysis.ngram
* limitations under the License.
*/
-import java.io.Reader;
-
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.Version;
@@ -38,24 +36,22 @@ public class EdgeNGramTokenizer extends
/**
* Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
*
- * @param version the Lucene match version
* @param minGram the smallest n-gram to generate
* @param maxGram the largest n-gram to generate
*/
- public EdgeNGramTokenizer(Version version, int minGram, int maxGram) {
- super(version, minGram, maxGram, true);
+ public EdgeNGramTokenizer(int minGram, int maxGram) {
+ super(minGram, maxGram, true);
}
/**
* Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
*
- * @param version the Lucene match version
* @param factory {@link org.apache.lucene.util.AttributeFactory} to use
* @param minGram the smallest n-gram to generate
* @param maxGram the largest n-gram to generate
*/
- public EdgeNGramTokenizer(Version version, AttributeFactory factory, int minGram, int maxGram) {
- super(version, factory, minGram, maxGram, true);
+ public EdgeNGramTokenizer(AttributeFactory factory, int minGram, int maxGram) {
+ super(factory, minGram, maxGram, true);
}
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizerFactory.java?rev=1616901&r1=1616900&r2=1616901&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizerFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizerFactory.java Fri Aug 8 22:42:48 2014
@@ -17,8 +17,10 @@ package org.apache.lucene.analysis.ngram
* limitations under the License.
*/
+import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeFactory;
+import org.apache.lucene.util.Version;
import java.io.Reader;
import java.util.Map;
@@ -47,7 +49,10 @@ public class EdgeNGramTokenizerFactory e
}
@Override
- public EdgeNGramTokenizer create(AttributeFactory factory) {
- return new EdgeNGramTokenizer(luceneMatchVersion, factory, minGramSize, maxGramSize);
+ public Tokenizer create(AttributeFactory factory) {
+ if (luceneMatchVersion.onOrAfter(Version.LUCENE_4_4)) {
+ return new EdgeNGramTokenizer(factory, minGramSize, maxGramSize);
+ }
+ return new Lucene43NGramTokenizer(factory, minGramSize, maxGramSize);
}
}
Added: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43EdgeNGramTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43EdgeNGramTokenFilter.java?rev=1616901&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43EdgeNGramTokenFilter.java (added)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43EdgeNGramTokenFilter.java Fri Aug 8 22:42:48 2014
@@ -0,0 +1,126 @@
+package org.apache.lucene.analysis.ngram;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
+import org.apache.lucene.analysis.util.CharacterUtils;
+
+import java.io.IOException;
+
+/**
+ * Tokenizes the given token into n-grams of given size(s), using pre-4.4 behavior.
+ *
+ * @deprecated Use {@link org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter}.
+ */
+@Deprecated
+public final class Lucene43EdgeNGramTokenFilter extends TokenFilter {
+ public static final int DEFAULT_MAX_GRAM_SIZE = 1;
+ public static final int DEFAULT_MIN_GRAM_SIZE = 1;
+
+ private final CharacterUtils charUtils;
+ private final int minGram;
+ private final int maxGram;
+ private char[] curTermBuffer;
+ private int curTermLength;
+ private int curCodePointCount;
+ private int curGramSize;
+ private int tokStart;
+ private int tokEnd; // only used if the length changed before this filter
+ private int savePosIncr;
+ private int savePosLen;
+
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+ private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
+ private final PositionLengthAttribute posLenAtt = addAttribute(PositionLengthAttribute.class);
+
+ /**
+ * Creates EdgeNGramTokenFilter that can generate n-grams in the sizes of the given range
+ *
+ * @param input {@link org.apache.lucene.analysis.TokenStream} holding the input to be tokenized
+ * @param minGram the smallest n-gram to generate
+ * @param maxGram the largest n-gram to generate
+ */
+ public Lucene43EdgeNGramTokenFilter(TokenStream input, int minGram, int maxGram) {
+ super(input);
+
+ if (minGram < 1) {
+ throw new IllegalArgumentException("minGram must be greater than zero");
+ }
+
+ if (minGram > maxGram) {
+ throw new IllegalArgumentException("minGram must not be greater than maxGram");
+ }
+
+ this.charUtils = CharacterUtils.getJava4Instance();
+ this.minGram = minGram;
+ this.maxGram = maxGram;
+ }
+
+ @Override
+ public final boolean incrementToken() throws IOException {
+ while (true) {
+ if (curTermBuffer == null) {
+ if (!input.incrementToken()) {
+ return false;
+ } else {
+ curTermBuffer = termAtt.buffer().clone();
+ curTermLength = termAtt.length();
+ curCodePointCount = charUtils.codePointCount(termAtt);
+ curGramSize = minGram;
+ tokStart = offsetAtt.startOffset();
+ tokEnd = offsetAtt.endOffset();
+ savePosIncr += posIncrAtt.getPositionIncrement();
+ savePosLen = posLenAtt.getPositionLength();
+ }
+ }
+ if (curGramSize <= maxGram) { // if we have hit the end of our n-gram size range, quit
+ if (curGramSize <= curCodePointCount) { // if the remaining input is too short, we can't generate any n-grams
+ // grab gramSize chars from front or back
+ clearAttributes();
+ offsetAtt.setOffset(tokStart, tokEnd);
+ // first ngram gets increment, others don't
+ if (curGramSize == minGram) {
+ posIncrAtt.setPositionIncrement(savePosIncr);
+ savePosIncr = 0;
+ } else {
+ posIncrAtt.setPositionIncrement(0);
+ }
+ posLenAtt.setPositionLength(savePosLen);
+ final int charLength = charUtils.offsetByCodePoints(curTermBuffer, 0, curTermLength, 0, curGramSize);
+ termAtt.copyBuffer(curTermBuffer, 0, charLength);
+ curGramSize++;
+ return true;
+ }
+ }
+ curTermBuffer = null;
+ }
+ }
+
+ @Override
+ public void reset() throws IOException {
+ super.reset();
+ curTermBuffer = null;
+ savePosIncr = 0;
+ }
+}
Added: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43EdgeNGramTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43EdgeNGramTokenizer.java?rev=1616901&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43EdgeNGramTokenizer.java (added)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43EdgeNGramTokenizer.java Fri Aug 8 22:42:48 2014
@@ -0,0 +1,53 @@
+package org.apache.lucene.analysis.ngram;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.util.AttributeFactory;
+
+/**
+ * Tokenizes the input from an edge into n-grams of given size(s), using pre-4.4 behavior.
+ *
+ * @deprecated Use {@link org.apache.lucene.analysis.ngram.EdgeNGramTokenizer}.
+ */
+@Deprecated
+public class Lucene43EdgeNGramTokenizer extends Lucene43NGramTokenizer {
+ public static final int DEFAULT_MAX_GRAM_SIZE = 1;
+ public static final int DEFAULT_MIN_GRAM_SIZE = 1;
+
+ /**
+ * Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
+ *
+ * @param minGram the smallest n-gram to generate
+ * @param maxGram the largest n-gram to generate
+ */
+ public Lucene43EdgeNGramTokenizer(int minGram, int maxGram) {
+ super(minGram, maxGram);
+ }
+
+ /**
+ * Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
+ *
+ * @param factory {@link org.apache.lucene.util.AttributeFactory} to use
+ * @param minGram the smallest n-gram to generate
+ * @param maxGram the largest n-gram to generate
+ */
+ public Lucene43EdgeNGramTokenizer(AttributeFactory factory, int minGram, int maxGram) {
+ super(factory, minGram, maxGram);
+ }
+
+}