You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rj...@apache.org on 2014/08/21 05:12:58 UTC
svn commit: r1619283 [3/11] - in /lucene/dev/branches/branch_4x: ./ lucene/
lucene/analysis/
lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/
lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/
lucene/analysis/common/src/java/o...
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java?rev=1619283&r1=1619282&r2=1619283&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java Thu Aug 21 03:12:52 2014
@@ -64,7 +64,7 @@ public final class FinnishAnalyzer exten
static {
try {
DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class,
- DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
+ DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
@@ -76,6 +76,14 @@ public final class FinnishAnalyzer exten
/**
* Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
*/
+ public FinnishAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_STOP_SET);
+ }
+
+ /**
+ * @deprecated Use {@link #FinnishAnalyzer()}
+ */
+ @Deprecated
public FinnishAnalyzer(Version matchVersion) {
this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
}
@@ -83,9 +91,16 @@ public final class FinnishAnalyzer exten
/**
* Builds an analyzer with the given stop words.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
*/
+ public FinnishAnalyzer(CharArraySet stopwords) {
+ this(stopwords, CharArraySet.EMPTY_SET);
+ }
+
+ /**
+ * @deprecated Use {@link #FinnishAnalyzer(CharArraySet)}
+ */
+ @Deprecated
public FinnishAnalyzer(Version matchVersion, CharArraySet stopwords) {
this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
}
@@ -95,10 +110,18 @@ public final class FinnishAnalyzer exten
* provided this analyzer will add a {@link SetKeywordMarkerFilter} before
* stemming.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
* @param stemExclusionSet a set of terms not to be stemmed
*/
+ public FinnishAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
+ super(stopwords);
+ this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
+ }
+
+ /**
+ * @deprecated Use {@link #FinnishAnalyzer(CharArraySet,CharArraySet)}
+ */
+ @Deprecated
public FinnishAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
super(matchVersion, stopwords);
this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
@@ -120,10 +143,10 @@ public final class FinnishAnalyzer exten
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {
- final Tokenizer source = new StandardTokenizer(matchVersion, reader);
- TokenStream result = new StandardFilter(matchVersion, source);
- result = new LowerCaseFilter(matchVersion, result);
- result = new StopFilter(matchVersion, result, stopwords);
+ final Tokenizer source = new StandardTokenizer(getVersion(), reader);
+ TokenStream result = new StandardFilter(getVersion(), source);
+ result = new LowerCaseFilter(getVersion(), result);
+ result = new StopFilter(getVersion(), result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
result = new SnowballFilter(result, new FinnishStemmer());
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java?rev=1619283&r1=1619282&r2=1619283&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java Thu Aug 21 03:12:52 2014
@@ -50,15 +50,13 @@ import java.util.Arrays;
* </p>
*
* <a name="version"/>
- * <p>You must specify the required {@link Version}
+ * <p>You may specify the {@link Version}
* compatibility when creating FrenchAnalyzer:
* <ul>
* <li> As of 3.6, FrenchLightStemFilter is used for less aggressive stemming.
* <li> As of 3.1, Snowball stemming is done with SnowballFilter,
* LowerCaseFilter is used prior to StopFilter, and ElisionFilter and
* Snowball stopwords are used by default.
- * <li> As of 2.9, StopFilter preserves position
- * increments
* </ul>
*
* <p><b>NOTE</b>: This class uses the same {@link Version}
@@ -101,7 +99,7 @@ public final class FrenchAnalyzer extend
/** Default set of articles for ElisionFilter */
public static final CharArraySet DEFAULT_ARTICLES = CharArraySet.unmodifiableSet(
- new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList(
+ new CharArraySet(Arrays.asList(
"l", "m", "t", "qu", "n", "s", "j", "d", "c", "jusqu", "quoiqu", "lorsqu", "puisqu"), true));
/**
@@ -127,7 +125,7 @@ public final class FrenchAnalyzer extend
static {
try {
DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class,
- DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
+ DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
@@ -139,6 +137,14 @@ public final class FrenchAnalyzer extend
/**
* Builds an analyzer with the default stop words ({@link #getDefaultStopSet}).
*/
+ public FrenchAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_STOP_SET);
+ }
+
+ /**
+ * @deprecated Use {@link #FrenchAnalyzer()}
+ */
+ @Deprecated
public FrenchAnalyzer(Version matchVersion) {
this(matchVersion,
matchVersion.onOrAfter(Version.LUCENE_3_1) ? DefaultSetHolder.DEFAULT_STOP_SET
@@ -148,11 +154,17 @@ public final class FrenchAnalyzer extend
/**
* Builds an analyzer with the given stop words
*
- * @param matchVersion
- * lucene compatibility version
* @param stopwords
* a stopword set
*/
+ public FrenchAnalyzer(CharArraySet stopwords){
+ this(stopwords, CharArraySet.EMPTY_SET);
+ }
+
+ /**
+ * @deprecated Use {@link #FrenchAnalyzer(CharArraySet)}
+ */
+ @Deprecated
public FrenchAnalyzer(Version matchVersion, CharArraySet stopwords){
this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
}
@@ -160,13 +172,22 @@ public final class FrenchAnalyzer extend
/**
* Builds an analyzer with the given stop words
*
- * @param matchVersion
- * lucene compatibility version
* @param stopwords
* a stopword set
* @param stemExclutionSet
* a stemming exclusion set
*/
+ public FrenchAnalyzer(CharArraySet stopwords,
+ CharArraySet stemExclutionSet) {
+ super(stopwords);
+ this.excltable = CharArraySet.unmodifiableSet(CharArraySet
+ .copy(stemExclutionSet));
+ }
+
+ /**
+ * @deprecated Use {@link #FrenchAnalyzer(CharArraySet,CharArraySet)}
+ */
+ @Deprecated
public FrenchAnalyzer(Version matchVersion, CharArraySet stopwords,
CharArraySet stemExclutionSet) {
super(matchVersion, stopwords);
@@ -189,29 +210,29 @@ public final class FrenchAnalyzer extend
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {
- if (matchVersion.onOrAfter(Version.LUCENE_3_1)) {
- final Tokenizer source = new StandardTokenizer(matchVersion, reader);
- TokenStream result = new StandardFilter(matchVersion, source);
+ if (getVersion().onOrAfter(Version.LUCENE_3_1_0)) {
+ final Tokenizer source = new StandardTokenizer(getVersion(), reader);
+ TokenStream result = new StandardFilter(getVersion(), source);
result = new ElisionFilter(result, DEFAULT_ARTICLES);
- result = new LowerCaseFilter(matchVersion, result);
- result = new StopFilter(matchVersion, result, stopwords);
+ result = new LowerCaseFilter(getVersion(), result);
+ result = new StopFilter(getVersion(), result, stopwords);
if(!excltable.isEmpty())
result = new SetKeywordMarkerFilter(result, excltable);
- if (matchVersion.onOrAfter(Version.LUCENE_3_6)) {
+ if (getVersion().onOrAfter(Version.LUCENE_3_6_0)) {
result = new FrenchLightStemFilter(result);
} else {
result = new SnowballFilter(result, new org.tartarus.snowball.ext.FrenchStemmer());
}
return new TokenStreamComponents(source, result);
} else {
- final Tokenizer source = new StandardTokenizer(matchVersion, reader);
- TokenStream result = new StandardFilter(matchVersion, source);
- result = new StopFilter(matchVersion, result, stopwords);
+ final Tokenizer source = new StandardTokenizer(getVersion(), reader);
+ TokenStream result = new StandardFilter(getVersion(), source);
+ result = new StopFilter(getVersion(), result, stopwords);
if(!excltable.isEmpty())
result = new SetKeywordMarkerFilter(result, excltable);
result = new FrenchStemFilter(result);
// Convert to lowercase after stemming!
- return new TokenStreamComponents(source, new LowerCaseFilter(matchVersion, result));
+ return new TokenStreamComponents(source, new LowerCaseFilter(getVersion(), result));
}
}
}
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java?rev=1619283&r1=1619282&r2=1619283&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java Thu Aug 21 03:12:52 2014
@@ -45,7 +45,7 @@ public final class IrishAnalyzer extends
public final static String DEFAULT_STOPWORD_FILE = "stopwords.txt";
private static final CharArraySet DEFAULT_ARTICLES = CharArraySet.unmodifiableSet(
- new CharArraySet(Version.LUCENE_CURRENT,
+ new CharArraySet(
Arrays.asList(
"d", "m", "b"
), true));
@@ -56,7 +56,7 @@ public final class IrishAnalyzer extends
* with phrase queries versus tAthair (which would not have a gap).
*/
private static final CharArraySet HYPHENATIONS = CharArraySet.unmodifiableSet(
- new CharArraySet(Version.LUCENE_CURRENT,
+ new CharArraySet(
Arrays.asList(
"h", "n", "t"
), true));
@@ -91,16 +91,31 @@ public final class IrishAnalyzer extends
/**
* Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
*/
+ public IrishAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_STOP_SET);
+ }
+
+ /**
+ * @deprecated Use {@link #IrishAnalyzer()}
+ */
+ @Deprecated
public IrishAnalyzer(Version matchVersion) {
this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
}
/**
* Builds an analyzer with the given stop words.
- *
- * @param matchVersion lucene compatibility version
+ *
* @param stopwords a stopword set
*/
+ public IrishAnalyzer(CharArraySet stopwords) {
+ this(stopwords, CharArraySet.EMPTY_SET);
+ }
+
+ /**
+ * @deprecated Use {@link #IrishAnalyzer(CharArraySet)}
+ */
+ @Deprecated
public IrishAnalyzer(Version matchVersion, CharArraySet stopwords) {
this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
}
@@ -110,10 +125,18 @@ public final class IrishAnalyzer extends
* provided this analyzer will add a {@link SetKeywordMarkerFilter} before
* stemming.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
* @param stemExclusionSet a set of terms not to be stemmed
*/
+ public IrishAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
+ super(stopwords);
+ this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
+ }
+
+ /**
+ * @deprecated Use {@link #IrishAnalyzer(CharArraySet,CharArraySet)}
+ */
+ @Deprecated
public IrishAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
super(matchVersion, stopwords);
this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
@@ -135,16 +158,16 @@ public final class IrishAnalyzer extends
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {
- final Tokenizer source = new StandardTokenizer(matchVersion, reader);
- TokenStream result = new StandardFilter(matchVersion, source);
- StopFilter s = new StopFilter(matchVersion, result, HYPHENATIONS);
- if (!matchVersion.onOrAfter(Version.LUCENE_4_4)) {
+ final Tokenizer source = new StandardTokenizer(getVersion(), reader);
+ TokenStream result = new StandardFilter(getVersion(), source);
+ StopFilter s = new StopFilter(getVersion(), result, HYPHENATIONS);
+ if (!getVersion().onOrAfter(Version.LUCENE_4_4)) {
s.setEnablePositionIncrements(false);
}
result = s;
result = new ElisionFilter(result, DEFAULT_ARTICLES);
result = new IrishLowerCaseFilter(result);
- result = new StopFilter(matchVersion, result, stopwords);
+ result = new StopFilter(getVersion(), result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
result = new SnowballFilter(result, new IrishStemmer());
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java?rev=1619283&r1=1619282&r2=1619283&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java Thu Aug 21 03:12:52 2014
@@ -62,7 +62,7 @@ public final class GalicianAnalyzer exte
static {
try {
DEFAULT_STOP_SET = WordlistLoader.getWordSet(IOUtils.getDecodingReader(GalicianAnalyzer.class,
- DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
+ DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
@@ -74,6 +74,14 @@ public final class GalicianAnalyzer exte
/**
* Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
*/
+ public GalicianAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_STOP_SET);
+ }
+
+ /**
+ * @deprecated Use {@link #GalicianAnalyzer()}
+ */
+ @Deprecated
public GalicianAnalyzer(Version matchVersion) {
this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
}
@@ -81,9 +89,16 @@ public final class GalicianAnalyzer exte
/**
* Builds an analyzer with the given stop words.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
*/
+ public GalicianAnalyzer(CharArraySet stopwords) {
+ this(stopwords, CharArraySet.EMPTY_SET);
+ }
+
+ /**
+ * @deprecated Use {@link #GalicianAnalyzer(CharArraySet)}
+ */
+ @Deprecated
public GalicianAnalyzer(Version matchVersion, CharArraySet stopwords) {
this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
}
@@ -93,10 +108,18 @@ public final class GalicianAnalyzer exte
* provided this analyzer will add a {@link SetKeywordMarkerFilter} before
* stemming.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
* @param stemExclusionSet a set of terms not to be stemmed
*/
+ public GalicianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
+ super(stopwords);
+ this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
+ }
+
+ /**
+ * @deprecated Use {@link #GalicianAnalyzer(CharArraySet,CharArraySet)}
+ */
+ @Deprecated
public GalicianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
super(matchVersion, stopwords);
this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
@@ -118,10 +141,10 @@ public final class GalicianAnalyzer exte
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {
- final Tokenizer source = new StandardTokenizer(matchVersion, reader);
- TokenStream result = new StandardFilter(matchVersion, source);
- result = new LowerCaseFilter(matchVersion, result);
- result = new StopFilter(matchVersion, result, stopwords);
+ final Tokenizer source = new StandardTokenizer(getVersion(), reader);
+ TokenStream result = new StandardFilter(getVersion(), source);
+ result = new LowerCaseFilter(getVersion(), result);
+ result = new StopFilter(getVersion(), result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
result = new GalicianStemFilter(result);
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiAnalyzer.java?rev=1619283&r1=1619282&r2=1619283&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiAnalyzer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiAnalyzer.java Thu Aug 21 03:12:52 2014
@@ -83,12 +83,20 @@ public final class HindiAnalyzer extends
/**
* Builds an analyzer with the given stop words
*
- * @param version lucene compatibility version
* @param stopwords a stopword set
* @param stemExclusionSet a stemming exclusion set
*/
- public HindiAnalyzer(Version version, CharArraySet stopwords, CharArraySet stemExclusionSet) {
- super(version, stopwords);
+ public HindiAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
+ super(stopwords);
+ this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
+ }
+
+ /**
+ * @deprecated Use {@link #HindiAnalyzer(CharArraySet, CharArraySet)}
+ */
+ @Deprecated
+ public HindiAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
+ super(matchVersion, stopwords);
this.stemExclusionSet = CharArraySet.unmodifiableSet(
CharArraySet.copy(matchVersion, stemExclusionSet));
}
@@ -96,9 +104,16 @@ public final class HindiAnalyzer extends
/**
* Builds an analyzer with the given stop words
*
- * @param version lucene compatibility version
* @param stopwords a stopword set
*/
+ public HindiAnalyzer(CharArraySet stopwords) {
+ this(stopwords, CharArraySet.EMPTY_SET);
+ }
+
+ /**
+ * @deprecated Use {@link #HindiAnalyzer(CharArraySet)}
+ */
+ @Deprecated
public HindiAnalyzer(Version version, CharArraySet stopwords) {
this(version, stopwords, CharArraySet.EMPTY_SET);
}
@@ -107,6 +122,14 @@ public final class HindiAnalyzer extends
* Builds an analyzer with the default stop words:
* {@link #DEFAULT_STOPWORD_FILE}.
*/
+ public HindiAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_STOP_SET);
+ }
+
+ /**
+ * @deprecated Use {@link #HindiAnalyzer()}
+ */
+ @Deprecated
public HindiAnalyzer(Version version) {
this(version, DefaultSetHolder.DEFAULT_STOP_SET);
}
@@ -127,17 +150,17 @@ public final class HindiAnalyzer extends
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {
final Tokenizer source;
- if (matchVersion.onOrAfter(Version.LUCENE_3_6)) {
- source = new StandardTokenizer(matchVersion, reader);
+ if (getVersion().onOrAfter(Version.LUCENE_3_6)) {
+ source = new StandardTokenizer(getVersion(), reader);
} else {
- source = new IndicTokenizer(matchVersion, reader);
+ source = new IndicTokenizer(getVersion(), reader);
}
- TokenStream result = new LowerCaseFilter(matchVersion, source);
+ TokenStream result = new LowerCaseFilter(getVersion(), source);
if (!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
result = new IndicNormalizationFilter(result);
result = new HindiNormalizationFilter(result);
- result = new StopFilter(matchVersion, result, stopwords);
+ result = new StopFilter(getVersion(), result, stopwords);
result = new HindiStemFilter(result);
return new TokenStreamComponents(source, result);
}
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java?rev=1619283&r1=1619282&r2=1619283&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java Thu Aug 21 03:12:52 2014
@@ -64,7 +64,7 @@ public final class HungarianAnalyzer ext
static {
try {
DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class,
- DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
+ DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
@@ -76,6 +76,14 @@ public final class HungarianAnalyzer ext
/**
* Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
*/
+ public HungarianAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_STOP_SET);
+ }
+
+ /**
+ * @deprecated Use {@link #HungarianAnalyzer()}
+ */
+ @Deprecated
public HungarianAnalyzer(Version matchVersion) {
this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
}
@@ -83,9 +91,16 @@ public final class HungarianAnalyzer ext
/**
* Builds an analyzer with the given stop words.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
*/
+ public HungarianAnalyzer(CharArraySet stopwords) {
+ this(stopwords, CharArraySet.EMPTY_SET);
+ }
+
+ /**
+ * @deprecated Use {@link #HungarianAnalyzer(CharArraySet)}
+ */
+ @Deprecated
public HungarianAnalyzer(Version matchVersion, CharArraySet stopwords) {
this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
}
@@ -95,10 +110,18 @@ public final class HungarianAnalyzer ext
* provided this analyzer will add a {@link SetKeywordMarkerFilter} before
* stemming.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
* @param stemExclusionSet a set of terms not to be stemmed
*/
+ public HungarianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
+ super(stopwords);
+ this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
+ }
+
+ /**
+ * @deprecated Use {@link #HungarianAnalyzer(CharArraySet,CharArraySet)}
+ */
+ @Deprecated
public HungarianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
super(matchVersion, stopwords);
this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
@@ -120,10 +143,10 @@ public final class HungarianAnalyzer ext
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {
- final Tokenizer source = new StandardTokenizer(matchVersion, reader);
- TokenStream result = new StandardFilter(matchVersion, source);
- result = new LowerCaseFilter(matchVersion, result);
- result = new StopFilter(matchVersion, result, stopwords);
+ final Tokenizer source = new StandardTokenizer(getVersion(), reader);
+ TokenStream result = new StandardFilter(getVersion(), source);
+ result = new LowerCaseFilter(getVersion(), result);
+ result = new StopFilter(getVersion(), result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
result = new SnowballFilter(result, new HungarianStemmer());
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java?rev=1619283&r1=1619282&r2=1619283&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java Thu Aug 21 03:12:52 2014
@@ -28,7 +28,6 @@ import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.IntsRef;
-import org.apache.lucene.util.Version;
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.Outputs;
@@ -215,7 +214,7 @@ final class Stemmer {
if (stems.size() < 2) {
return stems;
}
- CharArraySet terms = new CharArraySet(Version.LUCENE_CURRENT, 8, dictionary.ignoreCase);
+ CharArraySet terms = new CharArraySet(8, dictionary.ignoreCase);
List<CharsRef> deduped = new ArrayList<>();
for (CharsRef s : stems) {
if (!terms.contains(s)) {
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/hy/ArmenianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/hy/ArmenianAnalyzer.java?rev=1619283&r1=1619282&r2=1619283&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/hy/ArmenianAnalyzer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/hy/ArmenianAnalyzer.java Thu Aug 21 03:12:52 2014
@@ -73,6 +73,14 @@ public final class ArmenianAnalyzer exte
/**
* Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
*/
+ public ArmenianAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_STOP_SET);
+ }
+
+ /**
+ * @deprecated Use {@link #ArmenianAnalyzer()}
+ */
+ @Deprecated
public ArmenianAnalyzer(Version matchVersion) {
this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
}
@@ -80,9 +88,16 @@ public final class ArmenianAnalyzer exte
/**
* Builds an analyzer with the given stop words.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
*/
+ public ArmenianAnalyzer(CharArraySet stopwords) {
+ this(stopwords, CharArraySet.EMPTY_SET);
+ }
+
+ /**
+ * @deprecated Use {@link #ArmenianAnalyzer(CharArraySet)}
+ */
+ @Deprecated
public ArmenianAnalyzer(Version matchVersion, CharArraySet stopwords) {
this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
}
@@ -92,10 +107,18 @@ public final class ArmenianAnalyzer exte
* provided this analyzer will add a {@link SetKeywordMarkerFilter} before
* stemming.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
* @param stemExclusionSet a set of terms not to be stemmed
*/
+ public ArmenianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
+ super(stopwords);
+ this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
+ }
+
+ /**
+ * @deprecated Use {@link #ArmenianAnalyzer(CharArraySet,CharArraySet)}
+ */
+ @Deprecated
public ArmenianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
super(matchVersion, stopwords);
this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
@@ -117,10 +140,10 @@ public final class ArmenianAnalyzer exte
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {
- final Tokenizer source = new StandardTokenizer(matchVersion, reader);
- TokenStream result = new StandardFilter(matchVersion, source);
- result = new LowerCaseFilter(matchVersion, result);
- result = new StopFilter(matchVersion, result, stopwords);
+ final Tokenizer source = new StandardTokenizer(getVersion(), reader);
+ TokenStream result = new StandardFilter(getVersion(), source);
+ result = new LowerCaseFilter(getVersion(), result);
+ result = new StopFilter(getVersion(), result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
result = new SnowballFilter(result, new ArmenianStemmer());
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianAnalyzer.java?rev=1619283&r1=1619282&r2=1619283&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianAnalyzer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianAnalyzer.java Thu Aug 21 03:12:52 2014
@@ -69,6 +69,14 @@ public final class IndonesianAnalyzer ex
/**
* Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
*/
+ public IndonesianAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_STOP_SET);
+ }
+
+ /**
+ * @deprecated Use {@link #IndonesianAnalyzer()}
+ */
+ @Deprecated
public IndonesianAnalyzer(Version matchVersion) {
this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
}
@@ -76,12 +84,18 @@ public final class IndonesianAnalyzer ex
/**
* Builds an analyzer with the given stop words
*
- * @param matchVersion
- * lucene compatibility version
* @param stopwords
* a stopword set
*/
- public IndonesianAnalyzer(Version matchVersion, CharArraySet stopwords){
+ public IndonesianAnalyzer(CharArraySet stopwords){
+ this(stopwords, CharArraySet.EMPTY_SET);
+ }
+
+ /**
+ * @deprecated Use {@link #IndonesianAnalyzer(CharArraySet)}
+ */
+ @Deprecated
+ public IndonesianAnalyzer(Version matchVersion, CharArraySet stopwords) {
this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
}
@@ -90,13 +104,20 @@ public final class IndonesianAnalyzer ex
* provided this analyzer will add a {@link SetKeywordMarkerFilter} before
* {@link IndonesianStemFilter}.
*
- * @param matchVersion
- * lucene compatibility version
* @param stopwords
* a stopword set
* @param stemExclusionSet
* a set of terms not to be stemmed
*/
+ public IndonesianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet){
+ super(stopwords);
+ this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
+ }
+
+ /**
+ * @deprecated Use {@link #IndonesianAnalyzer(CharArraySet, CharArraySet)}
+ */
+ @Deprecated
public IndonesianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet){
super(matchVersion, stopwords);
this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
@@ -117,10 +138,10 @@ public final class IndonesianAnalyzer ex
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {
- final Tokenizer source = new StandardTokenizer(matchVersion, reader);
- TokenStream result = new StandardFilter(matchVersion, source);
- result = new LowerCaseFilter(matchVersion, result);
- result = new StopFilter(matchVersion, result, stopwords);
+ final Tokenizer source = new StandardTokenizer(getVersion(), reader);
+ TokenStream result = new StandardFilter(getVersion(), source);
+ result = new LowerCaseFilter(getVersion(), result);
+ result = new StopFilter(getVersion(), result, stopwords);
if (!stemExclusionSet.isEmpty()) {
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
}
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java?rev=1619283&r1=1619282&r2=1619283&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java Thu Aug 21 03:12:52 2014
@@ -43,7 +43,7 @@ import org.tartarus.snowball.ext.Italian
* {@link Analyzer} for Italian.
* <p>
* <a name="version"/>
- * <p>You must specify the required {@link Version}
+ * <p>You may specify the {@link Version}
* compatibility when creating ItalianAnalyzer:
* <ul>
* <li> As of 3.6, ItalianLightStemFilter is used for less aggressive stemming.
@@ -58,7 +58,7 @@ public final class ItalianAnalyzer exten
public final static String DEFAULT_STOPWORD_FILE = "italian_stop.txt";
private static final CharArraySet DEFAULT_ARTICLES = CharArraySet.unmodifiableSet(
- new CharArraySet(Version.LUCENE_CURRENT,
+ new CharArraySet(
Arrays.asList(
"c", "l", "all", "dall", "dell", "nell", "sull", "coll", "pell",
"gl", "agl", "dagl", "degl", "negl", "sugl", "un", "m", "t", "s", "v", "d"
@@ -82,7 +82,7 @@ public final class ItalianAnalyzer exten
static {
try {
DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class,
- DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
+ DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
@@ -94,6 +94,14 @@ public final class ItalianAnalyzer exten
/**
* Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
*/
+ public ItalianAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_STOP_SET);
+ }
+
+ /**
+ * @deprecated Use {@link #ItalianAnalyzer()}
+ */
+ @Deprecated
public ItalianAnalyzer(Version matchVersion) {
this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
}
@@ -101,9 +109,16 @@ public final class ItalianAnalyzer exten
/**
* Builds an analyzer with the given stop words.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
*/
+ public ItalianAnalyzer(CharArraySet stopwords) {
+ this(stopwords, CharArraySet.EMPTY_SET);
+ }
+
+ /**
+ * @deprecated Use {@link #ItalianAnalyzer(CharArraySet)}
+ */
+ @Deprecated
public ItalianAnalyzer(Version matchVersion, CharArraySet stopwords) {
this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
}
@@ -113,10 +128,18 @@ public final class ItalianAnalyzer exten
* provided this analyzer will add a {@link SetKeywordMarkerFilter} before
* stemming.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
* @param stemExclusionSet a set of terms not to be stemmed
*/
+ public ItalianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
+ super(stopwords);
+ this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
+ }
+
+ /**
+ * @deprecated Use {@link #ItalianAnalyzer(CharArraySet,CharArraySet)}
+ */
+ @Deprecated
public ItalianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
super(matchVersion, stopwords);
this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
@@ -138,16 +161,16 @@ public final class ItalianAnalyzer exten
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {
- final Tokenizer source = new StandardTokenizer(matchVersion, reader);
- TokenStream result = new StandardFilter(matchVersion, source);
- if (matchVersion.onOrAfter(Version.LUCENE_3_2)) {
+ final Tokenizer source = new StandardTokenizer(getVersion(), reader);
+ TokenStream result = new StandardFilter(getVersion(), source);
+ if (getVersion().onOrAfter(Version.LUCENE_3_2_0)) {
result = new ElisionFilter(result, DEFAULT_ARTICLES);
}
- result = new LowerCaseFilter(matchVersion, result);
- result = new StopFilter(matchVersion, result, stopwords);
+ result = new LowerCaseFilter(getVersion(), result);
+ result = new StopFilter(getVersion(), result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
- if (matchVersion.onOrAfter(Version.LUCENE_3_6)) {
+ if (getVersion().onOrAfter(Version.LUCENE_3_6_0)) {
result = new ItalianLightStemFilter(result);
} else {
result = new SnowballFilter(result, new ItalianStemmer());
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java?rev=1619283&r1=1619282&r2=1619283&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java Thu Aug 21 03:12:52 2014
@@ -62,7 +62,7 @@ public final class LatvianAnalyzer exten
static {
try {
DEFAULT_STOP_SET = WordlistLoader.getWordSet(IOUtils.getDecodingReader(LatvianAnalyzer.class,
- DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
+ DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
@@ -74,6 +74,14 @@ public final class LatvianAnalyzer exten
/**
* Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
*/
+ public LatvianAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_STOP_SET);
+ }
+
+ /**
+ * @deprecated Use {@link #LatvianAnalyzer()}
+ */
+ @Deprecated
public LatvianAnalyzer(Version matchVersion) {
this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
}
@@ -81,9 +89,16 @@ public final class LatvianAnalyzer exten
/**
* Builds an analyzer with the given stop words.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
*/
+ public LatvianAnalyzer(CharArraySet stopwords) {
+ this(stopwords, CharArraySet.EMPTY_SET);
+ }
+
+ /**
+ * @deprecated Use {@link #LatvianAnalyzer(CharArraySet)}
+ */
+ @Deprecated
public LatvianAnalyzer(Version matchVersion, CharArraySet stopwords) {
this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
}
@@ -93,10 +108,18 @@ public final class LatvianAnalyzer exten
* provided this analyzer will add a {@link SetKeywordMarkerFilter} before
* stemming.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
* @param stemExclusionSet a set of terms not to be stemmed
*/
+ public LatvianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
+ super(stopwords);
+ this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
+ }
+
+ /**
+ * @deprecated Use {@link #LatvianAnalyzer(CharArraySet,CharArraySet)}
+ */
+ @Deprecated
public LatvianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
super(matchVersion, stopwords);
this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
@@ -118,10 +141,10 @@ public final class LatvianAnalyzer exten
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {
- final Tokenizer source = new StandardTokenizer(matchVersion, reader);
- TokenStream result = new StandardFilter(matchVersion, source);
- result = new LowerCaseFilter(matchVersion, result);
- result = new StopFilter(matchVersion, result, stopwords);
+ final Tokenizer source = new StandardTokenizer(getVersion(), reader);
+ TokenStream result = new StandardFilter(getVersion(), source);
+ result = new LowerCaseFilter(getVersion(), result);
+ result = new StopFilter(getVersion(), result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
result = new LatvianStemFilter(result);
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilterFactory.java?rev=1619283&r1=1619282&r2=1619283&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilterFactory.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilterFactory.java Thu Aug 21 03:12:52 2014
@@ -82,7 +82,7 @@ public class CapitalizationFilterFactory
boolean ignoreCase = getBoolean(args, KEEP_IGNORE_CASE, false);
Set<String> k = getSet(args, KEEP);
if (k != null) {
- keep = new CharArraySet(luceneMatchVersion, 10, ignoreCase);
+ keep = new CharArraySet(10, ignoreCase);
keep.addAll(k);
}
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilter.java?rev=1619283&r1=1619282&r2=1619283&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilter.java Thu Aug 21 03:12:52 2014
@@ -39,11 +39,18 @@ public final class CodepointCountFilter
* Create a new {@link CodepointCountFilter}. This will filter out tokens whose
* {@link CharTermAttribute} is either too short ({@link Character#codePointCount(char[], int, int)}
* < min) or too long ({@link Character#codePointCount(char[], int, int)} > max).
- * @param version the Lucene match version
* @param in the {@link TokenStream} to consume
* @param min the minimum length
* @param max the maximum length
*/
+ public CodepointCountFilter(TokenStream in, int min, int max) {
+ this(Version.LATEST, in, min, max);
+ }
+
+ /**
+ * @deprecated Use {@link #CodepointCountFilter(TokenStream, int, int)}
+ */
+ @Deprecated
public CodepointCountFilter(Version version, TokenStream in, int min, int max) {
super(version, in);
if (min < 0) {
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilterFactory.java?rev=1619283&r1=1619282&r2=1619283&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilterFactory.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilterFactory.java Thu Aug 21 03:12:52 2014
@@ -50,6 +50,6 @@ public class CodepointCountFilterFactory
@Override
public CodepointCountFilter create(TokenStream input) {
- return new CodepointCountFilter(luceneMatchVersion, input, min, max);
+ return new CodepointCountFilter(input, min, max);
}
}
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilter.java?rev=1619283&r1=1619282&r2=1619283&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilter.java Thu Aug 21 03:12:52 2014
@@ -44,10 +44,17 @@ public final class KeepWordFilter extend
* Create a new {@link KeepWordFilter}.
* <p><b>NOTE</b>: The words set passed to this constructor will be directly
* used by this filter and should not be modified.
- * @param version the Lucene match version
* @param in the {@link TokenStream} to consume
* @param words the words to keep
*/
+ public KeepWordFilter(TokenStream in, CharArraySet words) {
+ this(Version.LATEST, in, words);
+ }
+
+ /**
+ * @deprecated Use {@link #KeepWordFilter(TokenStream, CharArraySet)}
+ */
+ @Deprecated
public KeepWordFilter(Version version, TokenStream in, CharArraySet words) {
super(version, in);
this.words = words;
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilterFactory.java?rev=1619283&r1=1619282&r2=1619283&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilterFactory.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilterFactory.java Thu Aug 21 03:12:52 2014
@@ -22,6 +22,7 @@ import org.apache.lucene.analysis.util.C
import org.apache.lucene.analysis.util.ResourceLoader;
import org.apache.lucene.analysis.util.ResourceLoaderAware;
import org.apache.lucene.analysis.util.TokenFilterFactory;
+import org.apache.lucene.util.Version;
import java.util.Map;
import java.io.IOException;
@@ -45,7 +46,6 @@ public class KeepWordFilterFactory exten
/** Creates a new KeepWordFilterFactory */
public KeepWordFilterFactory(Map<String,String> args) {
super(args);
- assureMatchVersion();
wordFiles = get(args, "words");
ignoreCase = getBoolean(args, "ignoreCase", false);
enablePositionIncrements = getBoolean(args, "enablePositionIncrements", true);
@@ -79,9 +79,15 @@ public class KeepWordFilterFactory exten
if (words == null) {
return input;
} else {
- @SuppressWarnings("deprecation")
- final TokenStream filter = new KeepWordFilter(luceneMatchVersion, enablePositionIncrements, input, words);
- return filter;
+ if (luceneMatchVersion == null) {
+ @SuppressWarnings("deprecation")
+ final TokenStream filter = new KeepWordFilter(Version.LATEST, enablePositionIncrements, input, words);
+ return filter;
+ } else {
+ @SuppressWarnings("deprecation")
+ final TokenStream filter = new KeepWordFilter(luceneMatchVersion, enablePositionIncrements, input, words);
+ return filter;
+ }
}
}
}
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java?rev=1619283&r1=1619282&r2=1619283&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java Thu Aug 21 03:12:52 2014
@@ -53,11 +53,18 @@ public final class LengthFilter extends
* Create a new {@link LengthFilter}. This will filter out tokens whose
* {@link CharTermAttribute} is either too short ({@link CharTermAttribute#length()}
* < min) or too long ({@link CharTermAttribute#length()} > max).
- * @param version the Lucene match version
* @param in the {@link TokenStream} to consume
* @param min the minimum length
* @param max the maximum length
*/
+ public LengthFilter(TokenStream in, int min, int max) {
+ this(Version.LATEST, in, min, max);
+ }
+
+ /**
+ * @deprecated Use {@link #LengthFilter(TokenStream, int, int)}
+ */
+ @Deprecated
public LengthFilter(Version version, TokenStream in, int min, int max) {
super(version, in);
if (min < 0) {
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilterFactory.java?rev=1619283&r1=1619282&r2=1619283&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilterFactory.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilterFactory.java Thu Aug 21 03:12:52 2014
@@ -21,6 +21,7 @@ import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.util.TokenFilterFactory;
+import org.apache.lucene.util.Version;
/**
* Factory for {@link LengthFilter}.
@@ -45,6 +46,10 @@ public class LengthFilterFactory extends
min = requireInt(args, MIN_KEY);
max = requireInt(args, MAX_KEY);
enablePositionIncrements = getBoolean(args, "enablePositionIncrements", true);
+ if (enablePositionIncrements == false &&
+ (luceneMatchVersion == null || luceneMatchVersion.onOrAfter(Version.LUCENE_4_4_0))) {
+ throw new IllegalArgumentException("enablePositionIncrements=false is not supported anymore as of Lucene 4.4");
+ }
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
@@ -52,6 +57,9 @@ public class LengthFilterFactory extends
@Override
public LengthFilter create(TokenStream input) {
+ if (luceneMatchVersion == null) {
+ return new LengthFilter(input, min, max);
+ }
@SuppressWarnings("deprecation")
final LengthFilter filter = new LengthFilter(luceneMatchVersion, enablePositionIncrements, input,min,max);
return filter;
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.java?rev=1619283&r1=1619282&r2=1619283&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.java Thu Aug 21 03:12:52 2014
@@ -22,7 +22,6 @@ import org.apache.lucene.analysis.TokenS
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.util.Version;
import java.io.IOException;
@@ -33,9 +32,8 @@ public final class RemoveDuplicatesToken
private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
private final PositionIncrementAttribute posIncAttribute = addAttribute(PositionIncrementAttribute.class);
-
- // use a fixed version, as we don't care about case sensitivity.
- private final CharArraySet previous = new CharArraySet(Version.LUCENE_3_1, 8, false);
+
+ private final CharArraySet previous = new CharArraySet(8, false);
/**
* Creates a new RemoveDuplicatesTokenFilter
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilter.java?rev=1619283&r1=1619282&r2=1619283&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilter.java Thu Aug 21 03:12:52 2014
@@ -53,6 +53,15 @@ public final class TrimFilter extends To
}
/** Create a new {@link TrimFilter} on top of <code>in</code>. */
+ public TrimFilter(TokenStream in) {
+ super(in);
+ this.updateOffsets = false;
+ }
+
+ /**
+ * @deprecated Use {@link #TrimFilter(TokenStream)}
+ */
+ @Deprecated
public TrimFilter(Version version, TokenStream in) {
this(version, in, false);
}
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilterFactory.java?rev=1619283&r1=1619282&r2=1619283&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilterFactory.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilterFactory.java Thu Aug 21 03:12:52 2014
@@ -22,6 +22,7 @@ import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.miscellaneous.TrimFilter;
import org.apache.lucene.analysis.util.TokenFilterFactory;
+import org.apache.lucene.util.Version;
/**
* Factory for {@link TrimFilter}.
@@ -43,6 +44,10 @@ public class TrimFilterFactory extends T
public TrimFilterFactory(Map<String,String> args) {
super(args);
updateOffsets = getBoolean(args, "updateOffsets", false);
+ if (updateOffsets &&
+ (luceneMatchVersion == null || luceneMatchVersion.onOrAfter(Version.LUCENE_4_4_0))) {
+ throw new IllegalArgumentException("updateOffsets=true is not supported anymore as of Lucene 4.4");
+ }
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
@@ -50,6 +55,9 @@ public class TrimFilterFactory extends T
@Override
public TrimFilter create(TokenStream input) {
+ if (luceneMatchVersion == null) {
+ return new TrimFilter(input);
+ }
@SuppressWarnings("deprecation")
final TrimFilter filter = new TrimFilter(luceneMatchVersion, input, updateOffsets);
return filter;
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.java?rev=1619283&r1=1619282&r2=1619283&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.java Thu Aug 21 03:12:52 2014
@@ -206,6 +206,14 @@ public final class WordDelimiterFilter e
* @param configurationFlags Flags configuring the filter
* @param protWords If not null is the set of tokens to protect from being delimited
*/
+ public WordDelimiterFilter(TokenStream in, byte[] charTypeTable, int configurationFlags, CharArraySet protWords) {
+ this(Version.LATEST, in, charTypeTable, configurationFlags, protWords);
+ }
+
+ /**
+ * @deprecated Use {@link #WordDelimiterFilter(TokenStream, byte[], int, CharArraySet)}
+ */
+ @Deprecated
public WordDelimiterFilter(Version matchVersion, TokenStream in, byte[] charTypeTable, int configurationFlags, CharArraySet protWords) {
super(in);
if (!matchVersion.onOrAfter(Version.LUCENE_4_8)) {
@@ -225,6 +233,14 @@ public final class WordDelimiterFilter e
* @param configurationFlags Flags configuring the filter
* @param protWords If not null is the set of tokens to protect from being delimited
*/
+ public WordDelimiterFilter(TokenStream in, int configurationFlags, CharArraySet protWords) {
+ this(in, WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, configurationFlags, protWords);
+ }
+
+ /**
+ * @deprecated Use {@link #WordDelimiterFilter(TokenStream, int, CharArraySet)}
+ */
+ @Deprecated
public WordDelimiterFilter(Version matchVersion, TokenStream in, int configurationFlags, CharArraySet protWords) {
this(matchVersion, in, WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, configurationFlags, protWords);
}
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramFilterFactory.java?rev=1619283&r1=1619282&r2=1619283&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramFilterFactory.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramFilterFactory.java Thu Aug 21 03:12:52 2014
@@ -22,6 +22,7 @@ import java.util.Map;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.util.TokenFilterFactory;
+import org.apache.lucene.util.Version;
/**
* Creates new instances of {@link EdgeNGramTokenFilter}.
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java?rev=1619283&r1=1619282&r2=1619283&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java Thu Aug 21 03:12:52 2014
@@ -96,20 +96,24 @@ public final class EdgeNGramTokenFilter
/**
* Creates EdgeNGramTokenFilter that can generate n-grams in the sizes of the given range
*
- * @param version the <a href="#version">Lucene match version</a>
* @param input {@link TokenStream} holding the input to be tokenized
* @param side the {@link Side} from which to chop off an n-gram
* @param minGram the smallest n-gram to generate
* @param maxGram the largest n-gram to generate
+ * @deprecated Use {@link #EdgeNGramTokenFilter(TokenStream, int, int)}
+ */
+ @Deprecated
+ public EdgeNGramTokenFilter(TokenStream input, Side side, int minGram, int maxGram) {
+ this(Version.LATEST, input, side, minGram, maxGram);
+ }
+
+ /**
+ * @deprecated For {@link Version#LUCENE_4_3_0} or below, use {@link Lucene43EdgeNGramTokenFilter}, otherwise use {@link #EdgeNGramTokenFilter(TokenStream, int, int)}
*/
@Deprecated
public EdgeNGramTokenFilter(Version version, TokenStream input, Side side, int minGram, int maxGram) {
super(input);
- if (version == null) {
- throw new IllegalArgumentException("version must not be null");
- }
-
if (version.onOrAfter(Version.LUCENE_4_4) && side == Side.BACK) {
throw new IllegalArgumentException("Side.BACK is not supported anymore as of Lucene 4.4, use ReverseStringFilter up-front and afterward");
}
@@ -138,11 +142,19 @@ public final class EdgeNGramTokenFilter
/**
* Creates EdgeNGramTokenFilter that can generate n-grams in the sizes of the given range
*
- * @param version the <a href="#version">Lucene match version</a>
* @param input {@link TokenStream} holding the input to be tokenized
* @param sideLabel the name of the {@link Side} from which to chop off an n-gram
* @param minGram the smallest n-gram to generate
* @param maxGram the largest n-gram to generate
+ * @deprecated Use {@link #EdgeNGramTokenFilter(TokenStream, int, int)}
+ */
+ @Deprecated
+ public EdgeNGramTokenFilter(TokenStream input, String sideLabel, int minGram, int maxGram) {
+ this(input, Side.getSide(sideLabel), minGram, maxGram);
+ }
+
+ /**
+ * @deprecated For {@link Version#LUCENE_4_3_0} or below, use {@link Lucene43EdgeNGramTokenFilter}, otherwise use {@link #EdgeNGramTokenFilter(TokenStream, int, int)}
*/
@Deprecated
public EdgeNGramTokenFilter(Version version, TokenStream input, String sideLabel, int minGram, int maxGram) {
@@ -152,11 +164,18 @@ public final class EdgeNGramTokenFilter
/**
* Creates EdgeNGramTokenFilter that can generate n-grams in the sizes of the given range
*
- * @param version the <a href="#version">Lucene match version</a>
* @param input {@link TokenStream} holding the input to be tokenized
* @param minGram the smallest n-gram to generate
* @param maxGram the largest n-gram to generate
*/
+ public EdgeNGramTokenFilter(TokenStream input, int minGram, int maxGram) {
+ this(input, Side.FRONT, minGram, maxGram);
+ }
+
+ /**
+ * @deprecated For {@link Version#LUCENE_4_3_0} or below, use {@link Lucene43EdgeNGramTokenFilter}, otherwise use {@link #EdgeNGramTokenFilter(TokenStream, int, int)}
+ */
+ @Deprecated
public EdgeNGramTokenFilter(Version version, TokenStream input, int minGram, int maxGram) {
this(version, input, Side.FRONT, minGram, maxGram);
}
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java?rev=1619283&r1=1619282&r2=1619283&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java Thu Aug 21 03:12:52 2014
@@ -45,11 +45,18 @@ public class EdgeNGramTokenizer extends
/**
* Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
*
- * @param version the <a href="#version">Lucene match version</a>
* @param input {@link Reader} holding the input to be tokenized
* @param minGram the smallest n-gram to generate
* @param maxGram the largest n-gram to generate
*/
+ public EdgeNGramTokenizer(Reader input, int minGram, int maxGram) {
+ super(Version.LATEST, input, minGram, maxGram, true);
+ }
+
+ /**
+ * @deprecated Use {@link #EdgeNGramTokenizer(Reader, int, int)}
+ */
+ @Deprecated
public EdgeNGramTokenizer(Version version, Reader input, int minGram, int maxGram) {
super(version, input, minGram, maxGram, true);
}
@@ -57,12 +64,19 @@ public class EdgeNGramTokenizer extends
/**
* Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
*
- * @param version the <a href="#version">Lucene match version</a>
* @param factory {@link org.apache.lucene.util.AttributeFactory} to use
* @param input {@link Reader} holding the input to be tokenized
* @param minGram the smallest n-gram to generate
* @param maxGram the largest n-gram to generate
*/
+ public EdgeNGramTokenizer(AttributeFactory factory, Reader input, int minGram, int maxGram) {
+ super(Version.LATEST, factory, input, minGram, maxGram, true);
+ }
+
+ /**
+ * @deprecated Use {@link #EdgeNGramTokenizer(AttributeFactory, Reader, int, int)}
+ */
+ @Deprecated
public EdgeNGramTokenizer(Version version, AttributeFactory factory, Reader input, int minGram, int maxGram) {
super(version, factory, input, minGram, maxGram, true);
}
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizerFactory.java?rev=1619283&r1=1619282&r2=1619283&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizerFactory.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizerFactory.java Thu Aug 21 03:12:52 2014
@@ -56,7 +56,7 @@ public class EdgeNGramTokenizerFactory e
if (!EdgeNGramTokenFilter.Side.FRONT.getLabel().equals(side)) {
throw new IllegalArgumentException(EdgeNGramTokenizer.class.getSimpleName() + " does not support backward n-grams as of Lucene 4.4");
}
- return new EdgeNGramTokenizer(luceneMatchVersion, input, minGramSize, maxGramSize);
+ return new EdgeNGramTokenizer(input, minGramSize, maxGramSize);
} else {
return new Lucene43EdgeNGramTokenizer(luceneMatchVersion, input, side, minGramSize, maxGramSize);
}
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43EdgeNGramTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43EdgeNGramTokenizer.java?rev=1619283&r1=1619282&r2=1619283&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43EdgeNGramTokenizer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43EdgeNGramTokenizer.java Thu Aug 21 03:12:52 2014
@@ -144,11 +144,18 @@ public final class Lucene43EdgeNGramToke
/**
* Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
*
- * @param version the <a href="#version">Lucene match version</a>
* @param input {@link Reader} holding the input to be tokenized
* @param minGram the smallest n-gram to generate
* @param maxGram the largest n-gram to generate
*/
+ public Lucene43EdgeNGramTokenizer(Reader input, int minGram, int maxGram) {
+ this(Version.LATEST, input, Side.FRONT, minGram, maxGram);
+ }
+
+ /**
+ * @deprecated Use {@link #Lucene43EdgeNGramTokenizer(Reader, int, int)}
+ */
+ @Deprecated
public Lucene43EdgeNGramTokenizer(Version version, Reader input, int minGram, int maxGram) {
this(version, input, Side.FRONT, minGram, maxGram);
}
@@ -156,12 +163,19 @@ public final class Lucene43EdgeNGramToke
/**
* Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
*
- * @param version the <a href="#version">Lucene match version</a>
* @param factory {@link org.apache.lucene.util.AttributeFactory} to use
* @param input {@link Reader} holding the input to be tokenized
* @param minGram the smallest n-gram to generate
* @param maxGram the largest n-gram to generate
*/
+ public Lucene43EdgeNGramTokenizer(AttributeFactory factory, Reader input, int minGram, int maxGram) {
+ this(Version.LATEST, factory, input, Side.FRONT, minGram, maxGram);
+ }
+
+ /**
+ * @deprecated Use {@link #Lucene43EdgeNGramTokenizer(AttributeFactory, Reader, int, int)}
+ */
+ @Deprecated
public Lucene43EdgeNGramTokenizer(Version version, AttributeFactory factory, Reader input, int minGram, int maxGram) {
this(version, factory, input, Side.FRONT, minGram, maxGram);
}
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43NGramTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43NGramTokenizer.java?rev=1619283&r1=1619282&r2=1619283&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43NGramTokenizer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43NGramTokenizer.java Thu Aug 21 03:12:52 2014
@@ -29,7 +29,7 @@ import org.apache.lucene.util.AttributeF
* Old broken version of {@link NGramTokenizer}.
*/
@Deprecated
-public final class Lucene43NGramTokenizer extends Tokenizer {
+public class Lucene43NGramTokenizer extends Tokenizer {
public static final int DEFAULT_MIN_NGRAM_SIZE = 1;
public static final int DEFAULT_MAX_NGRAM_SIZE = 2;
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramFilterFactory.java?rev=1619283&r1=1619282&r2=1619283&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramFilterFactory.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramFilterFactory.java Thu Aug 21 03:12:52 2014
@@ -22,6 +22,7 @@ import java.util.Map;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.util.TokenFilterFactory;
+import org.apache.lucene.util.Version;
/**
* Factory for {@link NGramTokenFilter}.
@@ -49,6 +50,9 @@ public class NGramFilterFactory extends
@Override
public TokenFilter create(TokenStream input) {
+ if (luceneMatchVersion == null) {
+ return new NGramTokenFilter(input, minGramSize, maxGramSize);
+ }
return new NGramTokenFilter(luceneMatchVersion, input, minGramSize, maxGramSize);
}
}
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java?rev=1619283&r1=1619282&r2=1619283&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java Thu Aug 21 03:12:52 2014
@@ -31,17 +31,15 @@ import org.apache.lucene.util.Version;
/**
* Tokenizes the input into n-grams of the given size(s).
- * <a name="version"/>
- * <p>You must specify the required {@link Version} compatibility when
- * creating a {@link NGramTokenFilter}. As of Lucene 4.4, this token filters:<ul>
+ * As of Lucene 4.4, this token filter:<ul>
* <li>handles supplementary characters correctly,</li>
* <li>emits all n-grams for the same token at the same position,</li>
* <li>does not modify offsets,</li>
* <li>sorts n-grams by their offset in the original token first, then
* increasing length (meaning that "abc" will give "a", "ab", "abc", "b", "bc",
* "c").</li></ul>
- * <p>You can make this filter use the old behavior by providing a version <
- * {@link Version#LUCENE_4_4} in the constructor but this is not recommended as
+ * <p>You can make this filter use the old behavior by using
+ * {@link org.apache.lucene.analysis.ngram.Lucene43NGramTokenFilter} but this is not recommended as
* it will lead to broken {@link TokenStream}s that will cause highlighting
* bugs.
* <p>If you were using this {@link TokenFilter} to perform partial highlighting,
@@ -74,12 +72,18 @@ public final class NGramTokenFilter exte
/**
* Creates NGramTokenFilter with given min and max n-grams.
- * @param version Lucene version to enable correct position increments.
- * See <a href="#version">above</a> for details.
* @param input {@link TokenStream} holding the input to be tokenized
* @param minGram the smallest n-gram to generate
* @param maxGram the largest n-gram to generate
*/
+ public NGramTokenFilter(TokenStream input, int minGram, int maxGram) {
+ this(Version.LATEST, input, minGram, maxGram);
+ }
+
+ /**
+ * @deprecated Use {@link #NGramTokenFilter(TokenStream, int, int)}
+ */
+ @Deprecated
public NGramTokenFilter(Version version, TokenStream input, int minGram, int maxGram) {
super(new CodepointCountFilter(version, input, minGram, Integer.MAX_VALUE));
this.version = version;
@@ -108,7 +112,7 @@ public final class NGramTokenFilter exte
};
posLenAtt = new PositionLengthAttribute() {
@Override
- public void setPositionLength(int positionLength) {}
+ public void setPositionLength(int positionLength) {}
@Override
public int getPositionLength() {
return 0;
@@ -119,10 +123,16 @@ public final class NGramTokenFilter exte
/**
* Creates NGramTokenFilter with default min and max n-grams.
- * @param version Lucene version to enable correct position increments.
- * See <a href="#version">above</a> for details.
* @param input {@link TokenStream} holding the input to be tokenized
*/
+ public NGramTokenFilter(TokenStream input) {
+ this(Version.LATEST, input, DEFAULT_MIN_NGRAM_SIZE, DEFAULT_MAX_NGRAM_SIZE);
+ }
+
+ /**
+ * @deprecated Use {@link #NGramTokenFilter(TokenStream)}
+ */
+ @Deprecated
public NGramTokenFilter(Version version, TokenStream input) {
this(version, input, DEFAULT_MIN_NGRAM_SIZE, DEFAULT_MAX_NGRAM_SIZE);
}
@@ -149,7 +159,7 @@ public final class NGramTokenFilter exte
hasIllegalOffsets = (tokStart + curTermLength) != tokEnd;
}
}
- if (version.onOrAfter(Version.LUCENE_4_4)) {
+ if (version.onOrAfter(Version.LUCENE_4_4_0)) {
if (curGramSize > maxGram || (curPos + curGramSize) > curCodePointCount) {
++curPos;
curGramSize = minGram;
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java?rev=1619283&r1=1619282&r2=1619283&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java Thu Aug 21 03:12:52 2014
@@ -85,11 +85,18 @@ public class NGramTokenizer extends Toke
/**
* Creates NGramTokenizer with given min and max n-grams.
- * @param version the lucene compatibility <a href="#version">version</a>
* @param input {@link Reader} holding the input to be tokenized
* @param minGram the smallest n-gram to generate
* @param maxGram the largest n-gram to generate
*/
+ public NGramTokenizer(Reader input, int minGram, int maxGram) {
+ this(Version.LATEST, input, minGram, maxGram, false);
+ }
+
+ /**
+ * @deprecated For {@link Version#LUCENE_4_3_0} and before, use {@link org.apache.lucene.analysis.ngram.Lucene43NGramTokenizer}, otherwise use {@link #NGramTokenizer(Reader, int, int)}
+ */
+ @Deprecated
public NGramTokenizer(Version version, Reader input, int minGram, int maxGram) {
this(version, input, minGram, maxGram, false);
}
@@ -101,19 +108,25 @@ public class NGramTokenizer extends Toke
/**
* Creates NGramTokenizer with given min and max n-grams.
- * @param version the lucene compatibility <a href="#version">version</a>
* @param factory {@link org.apache.lucene.util.AttributeFactory} to use
* @param input {@link Reader} holding the input to be tokenized
* @param minGram the smallest n-gram to generate
* @param maxGram the largest n-gram to generate
*/
+ public NGramTokenizer(AttributeFactory factory, Reader input, int minGram, int maxGram) {
+ this(Version.LATEST, factory, input, minGram, maxGram, false);
+ }
+
+ /**
+ * @deprecated For {@link Version#LUCENE_4_3_0} and before, use {@link org.apache.lucene.analysis.ngram.Lucene43NGramTokenizer}, otherwise use {@link #NGramTokenizer(AttributeFactory, Reader, int, int)}
+ */
+ @Deprecated
public NGramTokenizer(Version version, AttributeFactory factory, Reader input, int minGram, int maxGram) {
this(version, factory, input, minGram, maxGram, false);
}
/**
* Creates NGramTokenizer with default min and max n-grams.
- * @param version the lucene compatibility <a href="#version">version</a>
* @param input {@link Reader} holding the input to be tokenized
*/
public NGramTokenizer(Version version, Reader input) {
@@ -121,10 +134,10 @@ public class NGramTokenizer extends Toke
}
private void init(Version version, int minGram, int maxGram, boolean edgesOnly) {
- if (!version.onOrAfter(Version.LUCENE_4_4)) {
+ if (!version.onOrAfter(Version.LUCENE_4_4_0)) {
throw new IllegalArgumentException("This class only works with Lucene 4.4+. To emulate the old (broken) behavior of NGramTokenizer, use Lucene43NGramTokenizer/Lucene43EdgeNGramTokenizer");
}
- charUtils = version.onOrAfter(Version.LUCENE_4_4)
+ charUtils = version.onOrAfter(Version.LUCENE_4_4_0)
? CharacterUtils.getInstance(version)
: CharacterUtils.getJava4Instance();
if (minGram < 1) {
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java?rev=1619283&r1=1619282&r2=1619283&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java Thu Aug 21 03:12:52 2014
@@ -53,7 +53,7 @@ import java.nio.charset.StandardCharsets
* </p>
*
* <a name="version"/>
- * <p>You must specify the required {@link Version}
+ * <p>You may specify the {@link Version}
* compatibility when creating DutchAnalyzer:
* <ul>
* <li> As of 3.6, {@link #DutchAnalyzer(Version, CharArraySet)} and
@@ -62,13 +62,12 @@ import java.nio.charset.StandardCharsets
* <li> As of 3.1, Snowball stemming is done with SnowballFilter,
* LowerCaseFilter is used prior to StopFilter, and Snowball
* stopwords are used by default.
- * <li> As of 2.9, StopFilter preserves position
- * increments
* </ul>
*
* <p><b>NOTE</b>: This class uses the same {@link Version}
* dependent settings as {@link StandardAnalyzer}.</p>
*/
+// TODO: extend StopwordAnalyzerBase
public final class DutchAnalyzer extends Analyzer {
/** File containing default Dutch stopwords. */
@@ -88,14 +87,14 @@ public final class DutchAnalyzer extends
static {
try {
DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class,
- DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
+ DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
throw new RuntimeException("Unable to load default stopword set");
}
- DEFAULT_STEM_DICT = new CharArrayMap<>(Version.LUCENE_CURRENT, 4, false);
+ DEFAULT_STEM_DICT = new CharArrayMap<>(4, false);
DEFAULT_STEM_DICT.put("fiets", "fiets"); //otherwise fiet
DEFAULT_STEM_DICT.put("bromfiets", "bromfiets"); //otherwise bromfiet
DEFAULT_STEM_DICT.put("ei", "eier");
@@ -118,18 +117,33 @@ public final class DutchAnalyzer extends
// null if on 3.1 or later - only for bw compat
private final CharArrayMap<String> origStemdict;
- private final Version matchVersion;
/**
* Builds an analyzer with the default stop words ({@link #getDefaultStopSet()})
* and a few default entries for the stem exclusion table.
*
*/
+ public DutchAnalyzer() {
+ this(Version.LATEST);
+ }
+
+ /**
+ * @deprecated Use {@link #DutchAnalyzer()}
+ */
+ @Deprecated
public DutchAnalyzer(Version matchVersion) {
// historically, only this ctor populated the stem dict!!!!!
this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET, CharArraySet.EMPTY_SET, DefaultSetHolder.DEFAULT_STEM_DICT);
}
-
+
+ public DutchAnalyzer(CharArraySet stopwords){
+ this(Version.LATEST, stopwords);
+ }
+
+ /**
+ * @deprecated Use {@link #DutchAnalyzer(CharArraySet)}
+ */
+ @Deprecated
public DutchAnalyzer(Version matchVersion, CharArraySet stopwords){
// historically, this ctor never the stem dict!!!!!
// so we populate it only for >= 3.6
@@ -138,7 +152,15 @@ public final class DutchAnalyzer extends
? DefaultSetHolder.DEFAULT_STEM_DICT
: CharArrayMap.<String>emptyMap());
}
-
+
+ public DutchAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionTable){
+ this(Version.LATEST, stopwords, stemExclusionTable);
+ }
+
+ /**
+ * @deprecated Use {@link #DutchAnalyzer(CharArraySet,CharArraySet)}
+ */
+ @Deprecated
public DutchAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionTable){
// historically, this ctor never the stem dict!!!!!
// so we populate it only for >= 3.6
@@ -147,9 +169,17 @@ public final class DutchAnalyzer extends
? DefaultSetHolder.DEFAULT_STEM_DICT
: CharArrayMap.<String>emptyMap());
}
-
+
+ public DutchAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionTable, CharArrayMap<String> stemOverrideDict) {
+ this(Version.LATEST, stopwords, stemExclusionTable, stemOverrideDict);
+ }
+
+ /**
+ * @deprecated Use {@link #DutchAnalyzer(CharArraySet,CharArraySet,CharArrayMap)}
+ */
+ @Deprecated
public DutchAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionTable, CharArrayMap<String> stemOverrideDict) {
- this.matchVersion = matchVersion;
+ setVersion(matchVersion);
this.stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stopwords));
this.excltable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionTable));
if (stemOverrideDict.isEmpty() || !matchVersion.onOrAfter(Version.LUCENE_3_1)) {
@@ -186,11 +216,11 @@ public final class DutchAnalyzer extends
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader aReader) {
- if (matchVersion.onOrAfter(Version.LUCENE_3_1)) {
- final Tokenizer source = new StandardTokenizer(matchVersion, aReader);
- TokenStream result = new StandardFilter(matchVersion, source);
- result = new LowerCaseFilter(matchVersion, result);
- result = new StopFilter(matchVersion, result, stoptable);
+ if (getVersion().onOrAfter(Version.LUCENE_3_1)) {
+ final Tokenizer source = new StandardTokenizer(getVersion(), aReader);
+ TokenStream result = new StandardFilter(getVersion(), source);
+ result = new LowerCaseFilter(getVersion(), result);
+ result = new StopFilter(getVersion(), result, stoptable);
if (!excltable.isEmpty())
result = new SetKeywordMarkerFilter(result, excltable);
if (stemdict != null)
@@ -198,9 +228,9 @@ public final class DutchAnalyzer extends
result = new SnowballFilter(result, new org.tartarus.snowball.ext.DutchStemmer());
return new TokenStreamComponents(source, result);
} else {
- final Tokenizer source = new StandardTokenizer(matchVersion, aReader);
- TokenStream result = new StandardFilter(matchVersion, source);
- result = new StopFilter(matchVersion, result, stoptable);
+ final Tokenizer source = new StandardTokenizer(getVersion(), aReader);
+ TokenStream result = new StandardFilter(getVersion(), source);
+ result = new StopFilter(getVersion(), result, stoptable);
if (!excltable.isEmpty())
result = new SetKeywordMarkerFilter(result, excltable);
result = new DutchStemFilter(result, origStemdict);