You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2010/11/30 12:22:46 UTC
svn commit: r1040463 [7/8] - in /lucene/dev/trunk:
lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/
lucene/contrib/db/bdb-je/src/java/org/apache/lucene/store/je/
lucene/contrib/db/bdb/src/java/org/apache/lucene/store/db/ luce...
Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballAnalyzer.java?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballAnalyzer.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballAnalyzer.java Tue Nov 30 11:22:39 2010
@@ -24,9 +24,9 @@ import org.apache.lucene.analysis.en.Eng
import org.apache.lucene.analysis.standard.*;
import org.apache.lucene.analysis.tr.TurkishLowerCaseFilter;
import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
import org.apache.lucene.util.Version;
-import java.io.IOException;
import java.io.Reader;
import java.util.Set;
@@ -43,11 +43,11 @@ import java.util.Set;
* <li> As of 3.1, uses {@link TurkishLowerCaseFilter} for Turkish language.
* </ul>
* </p>
- * @deprecated Use the language-specific analyzer in modules/analysis instead.
- * This analyzer will be removed in Lucene 4.0
+ * @deprecated (3.1) Use the language-specific analyzer in modules/analysis instead.
+ * This analyzer will be removed in Lucene 5.0
*/
@Deprecated
-public final class SnowballAnalyzer extends Analyzer {
+public final class SnowballAnalyzer extends ReusableAnalyzerBase {
private String name;
private Set<?> stopSet;
private final Version matchVersion;
@@ -58,16 +58,6 @@ public final class SnowballAnalyzer exte
this.matchVersion = matchVersion;
}
- /**
- * Builds the named analyzer with the given stop words.
- * @deprecated Use {@link #SnowballAnalyzer(Version, String, Set)} instead.
- */
- @Deprecated
- public SnowballAnalyzer(Version matchVersion, String name, String[] stopWords) {
- this(matchVersion, name);
- stopSet = StopFilter.makeStopSet(matchVersion, stopWords);
- }
-
/** Builds the named analyzer with the given stop words. */
public SnowballAnalyzer(Version matchVersion, String name, Set<?> stopWords) {
this(matchVersion, name);
@@ -79,9 +69,9 @@ public final class SnowballAnalyzer exte
StandardFilter}, a {@link LowerCaseFilter}, a {@link StopFilter},
and a {@link SnowballFilter} */
@Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
- TokenStream result = new StandardTokenizer(matchVersion, reader);
- result = new StandardFilter(matchVersion, result);
+ public TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new StandardTokenizer(matchVersion, reader);
+ TokenStream result = new StandardFilter(matchVersion, tokenizer);
// remove the possessive 's for english stemmers
if (matchVersion.onOrAfter(Version.LUCENE_31) &&
(name.equals("English") || name.equals("Porter") || name.equals("Lovins")))
@@ -95,38 +85,6 @@ public final class SnowballAnalyzer exte
result = new StopFilter(matchVersion,
result, stopSet);
result = new SnowballFilter(result, name);
- return result;
- }
-
- private class SavedStreams {
- Tokenizer source;
- TokenStream result;
- }
-
- /** Returns a (possibly reused) {@link StandardTokenizer} filtered by a
- * {@link StandardFilter}, a {@link LowerCaseFilter},
- * a {@link StopFilter}, and a {@link SnowballFilter} */
- @Override
- public TokenStream reusableTokenStream(String fieldName, Reader reader)
- throws IOException {
- SavedStreams streams = (SavedStreams) getPreviousTokenStream();
- if (streams == null) {
- streams = new SavedStreams();
- streams.source = new StandardTokenizer(matchVersion, reader);
- streams.result = new StandardFilter(matchVersion, streams.source);
- // Use a special lowercase filter for turkish, the stemmer expects it.
- if (matchVersion.onOrAfter(Version.LUCENE_31) && name.equals("Turkish"))
- streams.result = new TurkishLowerCaseFilter(streams.result);
- else
- streams.result = new LowerCaseFilter(matchVersion, streams.result);
- if (stopSet != null)
- streams.result = new StopFilter(matchVersion,
- streams.result, stopSet);
- streams.result = new SnowballFilter(streams.result, name);
- setPreviousTokenStream(streams);
- } else {
- streams.source.reset(reader);
- }
- return streams.result;
+ return new TokenStreamComponents(tokenizer, result);
}
}
Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java Tue Nov 30 11:22:39 2010
@@ -58,12 +58,6 @@ public final class ClassicAnalyzer exten
private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH;
- /**
- * Specifies whether deprecated acronyms should be replaced with HOST type.
- * See {@linkplain "https://issues.apache.org/jira/browse/LUCENE-1068"}
- */
- private final boolean replaceInvalidAcronym;
-
/** An unmodifiable set containing some common English words that are usually not
useful for searching. */
public static final Set<?> STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
@@ -74,7 +68,6 @@ public final class ClassicAnalyzer exten
* @param stopWords stop words */
public ClassicAnalyzer(Version matchVersion, Set<?> stopWords) {
super(matchVersion, stopWords);
- replaceInvalidAcronym = matchVersion.onOrAfter(Version.LUCENE_24);
}
/** Builds an analyzer with the default stop words ({@link
@@ -125,7 +118,6 @@ public final class ClassicAnalyzer exten
protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) {
final ClassicTokenizer src = new ClassicTokenizer(matchVersion, reader);
src.setMaxTokenLength(maxTokenLength);
- src.setReplaceInvalidAcronym(replaceInvalidAcronym);
TokenStream tok = new ClassicFilter(src);
tok = new LowerCaseFilter(matchVersion, tok);
tok = new StopFilter(matchVersion, tok, stopwords);
Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java Tue Nov 30 11:22:39 2010
@@ -44,14 +44,6 @@ import org.apache.lucene.util.Version;
* not suit your application, please consider copying this source code
* directory to your project and maintaining your own grammar-based tokenizer.
*
- * <a name="version"/>
- * <p>You must specify the required {@link Version}
- * compatibility when creating ClassicAnalyzer:
- * <ul>
- * <li> As of 2.4, Tokens incorrectly identified as acronyms
- * are corrected (see <a href="https://issues.apache.org/jira/browse/LUCENE-1068">LUCENE-1608</a>
- * </ul>
- *
* ClassicTokenizer was named StandardTokenizer in Lucene versions prior to 3.1.
* As of 3.1, {@link StandardTokenizer} implements Unicode text segmentation,
* as specified by UAX#29.
@@ -70,13 +62,8 @@ public final class ClassicTokenizer exte
public static final int NUM = 6;
public static final int CJ = 7;
- /**
- * @deprecated this solves a bug where HOSTs that end with '.' are identified
- * as ACRONYMs.
- */
- @Deprecated
public static final int ACRONYM_DEP = 8;
-
+
/** String token types that correspond to token type int constants */
public static final String [] TOKEN_TYPES = new String [] {
"<ALPHANUM>",
@@ -90,8 +77,6 @@ public final class ClassicTokenizer exte
"<ACRONYM_DEP>"
};
- private boolean replaceInvalidAcronym;
-
private int maxTokenLength = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH;
/** Set the max allowed token length. Any token longer
@@ -134,15 +119,9 @@ public final class ClassicTokenizer exte
init(input, matchVersion);
}
- private final void init(Reader input, Version matchVersion) {
+ private void init(Reader input, Version matchVersion) {
this.scanner = new ClassicTokenizerImpl(input);
-
- if (matchVersion.onOrAfter(Version.LUCENE_24)) {
- replaceInvalidAcronym = true;
- } else {
- replaceInvalidAcronym = false;
- }
- this.input = input;
+ this.input = input;
}
// this tokenizer generates three attributes:
@@ -174,16 +153,10 @@ public final class ClassicTokenizer exte
scanner.getText(termAtt);
final int start = scanner.yychar();
offsetAtt.setOffset(correctOffset(start), correctOffset(start+termAtt.length()));
- // This 'if' should be removed in the next release. For now, it converts
- // invalid acronyms to HOST. When removed, only the 'else' part should
- // remain.
+
if (tokenType == ClassicTokenizer.ACRONYM_DEP) {
- if (replaceInvalidAcronym) {
- typeAtt.setType(ClassicTokenizer.TOKEN_TYPES[ClassicTokenizer.HOST]);
- termAtt.setLength(termAtt.length() - 1); // remove extra '.'
- } else {
- typeAtt.setType(ClassicTokenizer.TOKEN_TYPES[ClassicTokenizer.ACRONYM]);
- }
+ typeAtt.setType(ClassicTokenizer.TOKEN_TYPES[ClassicTokenizer.HOST]);
+ termAtt.setLength(termAtt.length() - 1); // remove extra '.'
} else {
typeAtt.setType(ClassicTokenizer.TOKEN_TYPES[tokenType]);
}
@@ -207,28 +180,4 @@ public final class ClassicTokenizer exte
super.reset(reader);
scanner.yyreset(reader);
}
-
- /**
- * Prior to https://issues.apache.org/jira/browse/LUCENE-1068, ClassicTokenizer mischaracterized as acronyms tokens like www.abc.com
- * when they should have been labeled as hosts instead.
- * @return true if ClassicTokenizer now returns these tokens as Hosts, otherwise false
- *
- * @deprecated Remove in 3.X and make true the only valid value
- */
- @Deprecated
- public boolean isReplaceInvalidAcronym() {
- return replaceInvalidAcronym;
- }
-
- /**
- *
- * @param replaceInvalidAcronym Set to true to replace mischaracterized acronyms as HOST.
- * @deprecated Remove in 3.X and make true the only valid value
- *
- * See https://issues.apache.org/jira/browse/LUCENE-1068
- */
- @Deprecated
- public void setReplaceInvalidAcronym(boolean replaceInvalidAcronym) {
- this.replaceInvalidAcronym = replaceInvalidAcronym;
- }
}
Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java Tue Nov 30 11:22:39 2010
@@ -355,11 +355,6 @@ public static final int EMAIL
public static final int HOST = StandardTokenizer.HOST;
public static final int NUM = StandardTokenizer.NUM;
public static final int CJ = StandardTokenizer.CJ;
-/**
- * @deprecated this solves a bug where HOSTs that end with '.' are identified
- * as ACRONYMs.
- */
-@Deprecated
public static final int ACRONYM_DEP = StandardTokenizer.ACRONYM_DEP;
public static final String [] TOKEN_TYPES = StandardTokenizer.TOKEN_TYPES;
Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex Tue Nov 30 11:22:39 2010
@@ -47,11 +47,6 @@ public static final int EMAIL
public static final int HOST = StandardTokenizer.HOST;
public static final int NUM = StandardTokenizer.NUM;
public static final int CJ = StandardTokenizer.CJ;
-/**
- * @deprecated this solves a bug where HOSTs that end with '.' are identified
- * as ACRONYMs.
- */
-@Deprecated
public static final int ACRONYM_DEP = StandardTokenizer.ACRONYM_DEP;
public static final String [] TOKEN_TYPES = StandardTokenizer.TOKEN_TYPES;
Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java Tue Nov 30 11:22:39 2010
@@ -56,12 +56,6 @@ public final class StandardAnalyzer exte
private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH;
- /**
- * Specifies whether deprecated acronyms should be replaced with HOST type.
- * See {@linkplain "https://issues.apache.org/jira/browse/LUCENE-1068"}
- */
- private final boolean replaceInvalidAcronym;
-
/** An unmodifiable set containing some common English words that are usually not
useful for searching. */
public static final Set<?> STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
@@ -72,7 +66,6 @@ public final class StandardAnalyzer exte
* @param stopWords stop words */
public StandardAnalyzer(Version matchVersion, Set<?> stopWords) {
super(matchVersion, stopWords);
- replaceInvalidAcronym = matchVersion.onOrAfter(Version.LUCENE_24);
}
/** Builds an analyzer with the default stop words ({@link
@@ -123,7 +116,6 @@ public final class StandardAnalyzer exte
protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) {
final StandardTokenizer src = new StandardTokenizer(matchVersion, reader);
src.setMaxTokenLength(maxTokenLength);
- src.setReplaceInvalidAcronym(replaceInvalidAcronym);
TokenStream tok = new StandardFilter(matchVersion, src);
tok = new LowerCaseFilter(matchVersion, tok);
tok = new StopFilter(matchVersion, tok, stopwords);
Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilter.java?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilter.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilter.java Tue Nov 30 11:22:39 2010
@@ -31,12 +31,6 @@ import org.apache.lucene.util.Version;
public class StandardFilter extends TokenFilter {
private final Version matchVersion;
- /** @deprecated Use {@link #StandardFilter(Version, TokenStream)} instead. */
- @Deprecated
- public StandardFilter(TokenStream in) {
- this(Version.LUCENE_30, in);
- }
-
public StandardFilter(Version matchVersion, TokenStream in) {
super(in);
this.matchVersion = matchVersion;
Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java Tue Nov 30 11:22:39 2010
@@ -17,6 +17,9 @@
package org.apache.lucene.analysis.standard;
+import java.io.IOException;
+import java.io.Reader;
+
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
@@ -25,9 +28,6 @@ import org.apache.lucene.analysis.tokena
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Version;
-import java.io.IOException;
-import java.io.Reader;
-
/** A grammar-based tokenizer constructed with JFlex.
* <p>
* As of Lucene version 3.1, this class implements the Word Break rules from the
@@ -61,28 +61,25 @@ public final class StandardTokenizer ext
private StandardTokenizerInterface scanner;
public static final int ALPHANUM = 0;
- /** @deprecated */
+ /** @deprecated (3.1) */
@Deprecated
public static final int APOSTROPHE = 1;
- /** @deprecated */
+ /** @deprecated (3.1) */
@Deprecated
public static final int ACRONYM = 2;
- /** @deprecated */
+ /** @deprecated (3.1) */
@Deprecated
public static final int COMPANY = 3;
public static final int EMAIL = 4;
- /** @deprecated */
+ /** @deprecated (3.1) */
@Deprecated
public static final int HOST = 5;
public static final int NUM = 6;
- /** @deprecated */
+ /** @deprecated (3.1) */
@Deprecated
public static final int CJ = 7;
- /**
- * @deprecated this solves a bug where HOSTs that end with '.' are identified
- * as ACRONYMs.
- */
+ /** @deprecated (3.1) */
@Deprecated
public static final int ACRONYM_DEP = 8;
@@ -108,8 +105,6 @@ public final class StandardTokenizer ext
"<HIRAGANA>"
};
- private boolean replaceInvalidAcronym;
-
private int maxTokenLength = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH;
/** Set the max allowed token length. Any token longer
@@ -155,12 +150,7 @@ public final class StandardTokenizer ext
private final void init(Reader input, Version matchVersion) {
this.scanner = matchVersion.onOrAfter(Version.LUCENE_31) ?
new StandardTokenizerImpl(input) : new ClassicTokenizerImpl(input);
- if (matchVersion.onOrAfter(Version.LUCENE_24)) {
- replaceInvalidAcronym = true;
- } else {
- replaceInvalidAcronym = false;
- }
- this.input = input;
+ this.input = input;
}
// this tokenizer generates three attributes:
@@ -196,12 +186,8 @@ public final class StandardTokenizer ext
// invalid acronyms to HOST. When removed, only the 'else' part should
// remain.
if (tokenType == StandardTokenizer.ACRONYM_DEP) {
- if (replaceInvalidAcronym) {
- typeAtt.setType(StandardTokenizer.TOKEN_TYPES[StandardTokenizer.HOST]);
- termAtt.setLength(termAtt.length() - 1); // remove extra '.'
- } else {
- typeAtt.setType(StandardTokenizer.TOKEN_TYPES[StandardTokenizer.ACRONYM]);
- }
+ typeAtt.setType(StandardTokenizer.TOKEN_TYPES[StandardTokenizer.HOST]);
+ termAtt.setLength(termAtt.length() - 1); // remove extra '.'
} else {
typeAtt.setType(StandardTokenizer.TOKEN_TYPES[tokenType]);
}
@@ -225,28 +211,4 @@ public final class StandardTokenizer ext
super.reset(reader);
scanner.yyreset(reader);
}
-
- /**
- * Prior to https://issues.apache.org/jira/browse/LUCENE-1068, StandardTokenizer mischaracterized as acronyms tokens like www.abc.com
- * when they should have been labeled as hosts instead.
- * @return true if StandardTokenizer now returns these tokens as Hosts, otherwise false
- *
- * @deprecated Remove in 3.X and make true the only valid value
- */
- @Deprecated
- public boolean isReplaceInvalidAcronym() {
- return replaceInvalidAcronym;
- }
-
- /**
- *
- * @param replaceInvalidAcronym Set to true to replace mischaracterized acronyms as HOST.
- * @deprecated Remove in 3.X and make true the only valid value
- *
- * See https://issues.apache.org/jira/browse/LUCENE-1068
- */
- @Deprecated
- public void setReplaceInvalidAcronym(boolean replaceInvalidAcronym) {
- this.replaceInvalidAcronym = replaceInvalidAcronym;
- }
}
Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java Tue Nov 30 11:22:39 2010
@@ -53,9 +53,9 @@ public class SynonymMap {
SynonymMap currMap = this;
for (String str : singleMatch) {
if (currMap.submap==null) {
- // for now hardcode at 2.9, as its what the old code did.
+ // for now hardcode at 4.0, as its what the old code did.
// would be nice to fix, but shouldn't store a version in each submap!!!
- currMap.submap = new CharArrayMap<SynonymMap>(Version.LUCENE_29, 1, ignoreCase());
+ currMap.submap = new CharArrayMap<SynonymMap>(Version.LUCENE_40, 1, ignoreCase());
}
SynonymMap map = currMap.submap.get(str);
Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java Tue Nov 30 11:22:39 2010
@@ -17,17 +17,17 @@ package org.apache.lucene.analysis.th;
*/
import java.io.IOException;
-import java.util.Locale;
import java.lang.Character.UnicodeBlock;
-import javax.swing.text.Segment;
import java.text.BreakIterator;
+import java.util.Locale;
+import javax.swing.text.Segment;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.LowerCaseFilter;
-import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Version;
@@ -69,14 +69,6 @@ public final class ThaiWordFilter extend
private OffsetAttribute clonedOffsetAtt = null;
private boolean hasMoreTokensInClone = false;
- /** Creates a new ThaiWordFilter that also lowercases non-thai text.
- * @deprecated Use the ctor with {@code matchVersion} instead!
- */
- @Deprecated
- public ThaiWordFilter(TokenStream input) {
- this(Version.LUCENE_30, input);
- }
-
/** Creates a new ThaiWordFilter with the specified match version. */
public ThaiWordFilter(Version matchVersion, TokenStream input) {
super(matchVersion.onOrAfter(Version.LUCENE_31) ?
Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharArraySet.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharArraySet.java?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharArraySet.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharArraySet.java Tue Nov 30 11:22:39 2010
@@ -51,8 +51,7 @@ import org.apache.lucene.util.Version;
* that has a string representation. The add methods will use
* {@link Object#toString} and store the result using a {@code char[]}
* buffer. The same behavior have the {@code contains()} methods.
- * The {@link #iterator()} returns an {@code Iterator<String>}.
- * For type safety also {@link #stringIterator()} is provided.
+ * The {@link #iterator()} returns an {@code Iterator<char[]>}.
*/
public class CharArraySet extends AbstractSet<Object> {
public static final CharArraySet EMPTY_SET = new CharArraySet(CharArrayMap.<Object>emptyMap());
@@ -93,37 +92,6 @@ public class CharArraySet extends Abstra
addAll(c);
}
- /**
- * Creates a set with enough capacity to hold startSize terms
- *
- * @param startSize
- * the initial capacity
- * @param ignoreCase
- * <code>false</code> if and only if the set should be case sensitive
- * otherwise <code>true</code>.
- * @deprecated use {@link #CharArraySet(Version, int, boolean)} instead
- */
- @Deprecated
- public CharArraySet(int startSize, boolean ignoreCase) {
- this(Version.LUCENE_30, startSize, ignoreCase);
- }
-
- /**
- * Creates a set from a Collection of objects.
- *
- * @param c
- * a collection whose elements to be placed into the set
- * @param ignoreCase
- * <code>false</code> if and only if the set should be case sensitive
- * otherwise <code>true</code>.
- * @deprecated use {@link #CharArraySet(Version, Collection, boolean)} instead
- */
- @Deprecated
- public CharArraySet(Collection<?> c, boolean ignoreCase) {
- this(Version.LUCENE_30, c.size(), ignoreCase);
- addAll(c);
- }
-
/** Create set from the specified map (internal only), used also by {@link CharArrayMap#keySet()} */
CharArraySet(final CharArrayMap<Object> map){
this.map = map;
@@ -202,24 +170,6 @@ public class CharArraySet extends Abstra
/**
* Returns a copy of the given set as a {@link CharArraySet}. If the given set
* is a {@link CharArraySet} the ignoreCase property will be preserved.
- *
- * @param set
- * a set to copy
- * @return a copy of the given set as a {@link CharArraySet}. If the given set
- * is a {@link CharArraySet} the ignoreCase and matchVersion property will be
- * preserved.
- * @deprecated use {@link #copy(Version, Set)} instead.
- */
- @Deprecated
- public static CharArraySet copy(final Set<?> set) {
- if(set == EMPTY_SET)
- return EMPTY_SET;
- return copy(Version.LUCENE_30, set);
- }
-
- /**
- * Returns a copy of the given set as a {@link CharArraySet}. If the given set
- * is a {@link CharArraySet} the ignoreCase property will be preserved.
* <p>
* <b>Note:</b> If you intend to create a copy of another {@link CharArraySet} where
* the {@link Version} of the source set differs from its copy
@@ -248,68 +198,13 @@ public class CharArraySet extends Abstra
return new CharArraySet(matchVersion, set, false);
}
- /** The Iterator<String> for this set. Strings are constructed on the fly, so
- * use <code>nextCharArray</code> for more efficient access.
- * @deprecated Use the standard iterator, which returns {@code char[]} instances.
- */
- @Deprecated
- public class CharArraySetIterator implements Iterator<String> {
- int pos=-1;
- char[] next;
- private CharArraySetIterator() {
- goNext();
- }
-
- private void goNext() {
- next = null;
- pos++;
- while (pos < map.keys.length && (next=map.keys[pos]) == null) pos++;
- }
-
- public boolean hasNext() {
- return next != null;
- }
-
- /** do not modify the returned char[] */
- public char[] nextCharArray() {
- char[] ret = next;
- goNext();
- return ret;
- }
-
- /** Returns the next String, as a Set<String> would...
- * use nextCharArray() for better efficiency. */
- public String next() {
- return new String(nextCharArray());
- }
-
- public void remove() {
- throw new UnsupportedOperationException();
- }
- }
-
- /** returns an iterator of new allocated Strings (an instance of {@link CharArraySetIterator}).
- * @deprecated Use {@link #iterator}, which returns {@code char[]} instances.
- */
- @Deprecated
- public Iterator<String> stringIterator() {
- return new CharArraySetIterator();
- }
-
- /** Returns an {@link Iterator} depending on the version used:
- * <ul>
- * <li>if {@code matchVersion} ≥ 3.1, it returns {@code char[]} instances in this set.</li>
- * <li>if {@code matchVersion} is 3.0 or older, it returns new
- * allocated Strings, so this method violates the Set interface.
- * It is kept this way for backwards compatibility, normally it should
- * return {@code char[]} on {@code next()}</li>
- * </ul>
+ /**
+ * Returns an {@link Iterator} for {@code char[]} instances in this set.
*/
@Override @SuppressWarnings("unchecked")
public Iterator<Object> iterator() {
// use the AbstractSet#keySet()'s iterator (to not produce endless recursion)
- return map.matchVersion.onOrAfter(Version.LUCENE_31) ?
- map.originalKeySet().iterator() : (Iterator) stringIterator();
+ return map.originalKeySet().iterator();
}
@Override
Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/tartarus/snowball/SnowballProgram.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/tartarus/snowball/SnowballProgram.java?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/tartarus/snowball/SnowballProgram.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/tartarus/snowball/SnowballProgram.java Tue Nov 30 11:22:39 2010
@@ -239,13 +239,6 @@ public abstract class SnowballProgram {
return true;
}
- /** @deprecated for binary back compat. Will be removed in Lucene 4.0 */
- @Deprecated
- protected boolean eq_s(int s_size, String s)
- {
- return eq_s(s_size, (CharSequence)s);
- }
-
protected boolean eq_s_b(int s_size, CharSequence s)
{
if (cursor - limit_backward < s_size) return false;
@@ -257,35 +250,15 @@ public abstract class SnowballProgram {
return true;
}
- /** @deprecated for binary back compat. Will be removed in Lucene 4.0 */
- @Deprecated
- protected boolean eq_s_b(int s_size, String s)
- {
- return eq_s_b(s_size, (CharSequence)s);
- }
-
protected boolean eq_v(CharSequence s)
{
return eq_s(s.length(), s);
}
- /** @deprecated for binary back compat. Will be removed in Lucene 4.0 */
- @Deprecated
- protected boolean eq_v(StringBuilder s)
- {
- return eq_s(s.length(), (CharSequence)s);
- }
-
protected boolean eq_v_b(CharSequence s)
{ return eq_s_b(s.length(), s);
}
- /** @deprecated for binary back compat. Will be removed in Lucene 4.0 */
- @Deprecated
- protected boolean eq_v_b(StringBuilder s)
- { return eq_s_b(s.length(), (CharSequence)s);
- }
-
protected int find_among(Among v[], int v_size)
{
int i = 0;
@@ -456,12 +429,6 @@ public abstract class SnowballProgram {
return adjustment;
}
- /** @deprecated for binary back compat. Will be removed in Lucene 4.0 */
- @Deprecated
- protected int replace_s(int c_bra, int c_ket, String s) {
- return replace_s(c_bra, c_ket, (CharSequence)s);
- }
-
protected void slice_check()
{
if (bra < 0 ||
@@ -484,20 +451,6 @@ public abstract class SnowballProgram {
replace_s(bra, ket, s);
}
- /** @deprecated for binary back compat. Will be removed in Lucene 4.0 */
- @Deprecated
- protected void slice_from(String s)
- {
- slice_from((CharSequence)s);
- }
-
- /** @deprecated for binary back compat. Will be removed in Lucene 4.0 */
- @Deprecated
- protected void slice_from(StringBuilder s)
- {
- slice_from((CharSequence)s);
- }
-
protected void slice_del()
{
slice_from((CharSequence)"");
@@ -510,20 +463,6 @@ public abstract class SnowballProgram {
if (c_bra <= ket) ket += adjustment;
}
- /** @deprecated for binary back compat. Will be removed in Lucene 4.0 */
- @Deprecated
- protected void insert(int c_bra, int c_ket, String s)
- {
- insert(c_bra, c_ket, (CharSequence)s);
- }
-
- /** @deprecated for binary back compat. Will be removed in Lucene 4.0 */
- @Deprecated
- protected void insert(int c_bra, int c_ket, StringBuilder s)
- {
- insert(c_bra, c_ket, (CharSequence)s);
- }
-
/* Copy the slice into the supplied StringBuffer */
protected StringBuilder slice_to(StringBuilder s)
{
Added: lucene/dev/trunk/modules/analysis/common/src/resources/org/apache/lucene/analysis/cjk/stopwords.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/resources/org/apache/lucene/analysis/cjk/stopwords.txt?rev=1040463&view=auto
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/resources/org/apache/lucene/analysis/cjk/stopwords.txt (added)
+++ lucene/dev/trunk/modules/analysis/common/src/resources/org/apache/lucene/analysis/cjk/stopwords.txt Tue Nov 30 11:22:39 2010
@@ -0,0 +1,35 @@
+a
+and
+are
+as
+at
+be
+but
+by
+for
+if
+in
+into
+is
+it
+no
+not
+of
+on
+or
+s
+such
+t
+that
+the
+their
+then
+there
+these
+they
+this
+to
+was
+will
+with
+www
Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java Tue Nov 30 11:22:39 2010
@@ -19,9 +19,10 @@ package org.apache.lucene.analysis.br;
import java.io.IOException;
import java.io.StringReader;
+import java.util.Collections;
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.core.LowerCaseTokenizer;
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
import org.apache.lucene.analysis.util.CharArraySet;
@@ -135,19 +136,10 @@ public class TestBrazilianStemmer extend
}
public void testStemExclusionTable() throws Exception {
- BrazilianAnalyzer a = new BrazilianAnalyzer(TEST_VERSION_CURRENT);
- a.setStemExclusionTable(new String[] { "quintessência" });
+ BrazilianAnalyzer a = new BrazilianAnalyzer(TEST_VERSION_CURRENT, Collections.emptySet(), asSet("quintessência"));
checkReuse(a, "quintessência", "quintessência"); // excluded words will be completely unchanged.
}
- public void testStemExclusionTableBWCompat() throws IOException {
- CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
- set.add("BrasÃlia");
- BrazilianStemFilter filter = new BrazilianStemFilter(
- new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader("BrasÃlia Brasilia")), set);
- assertTokenStreamContents(filter, new String[] { "brasÃlia", "brasil" });
- }
-
public void testWithKeywordAttribute() throws IOException {
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
set.add("BrasÃlia");
@@ -157,28 +149,6 @@ public class TestBrazilianStemmer extend
assertTokenStreamContents(filter, new String[] { "brasÃlia", "brasil" });
}
- public void testWithKeywordAttributeAndExclusionTable() throws IOException {
- CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
- set.add("BrasÃlia");
- CharArraySet set1 = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
- set1.add("Brasilia");
- BrazilianStemFilter filter = new BrazilianStemFilter(
- new KeywordMarkerFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(
- "BrasÃlia Brasilia")), set), set1);
- assertTokenStreamContents(filter, new String[] { "brasÃlia", "brasilia" });
- }
-
- /*
- * Test that changes to the exclusion table are applied immediately
- * when using reusable token streams.
- */
- public void testExclusionTableReuse() throws Exception {
- BrazilianAnalyzer a = new BrazilianAnalyzer(TEST_VERSION_CURRENT);
- checkReuse(a, "quintessência", "quintessente");
- a.setStemExclusionTable(new String[] { "quintessência" });
- checkReuse(a, "quintessência", "quintessência");
- }
-
private void check(final String input, final String expected) throws Exception {
checkOneTerm(new BrazilianAnalyzer(TEST_VERSION_CURRENT), input, expected);
}
Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java Tue Nov 30 11:22:39 2010
@@ -18,6 +18,7 @@ package org.apache.lucene.analysis.commo
import java.io.Reader;
import java.io.StringReader;
+import java.util.Arrays;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
@@ -26,18 +27,20 @@ import org.apache.lucene.analysis.TokenF
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.util.CharArraySet;
/**
* Tests CommonGrams(Query)Filter
*/
public class CommonGramsFilterTest extends BaseTokenStreamTestCase {
- private static final String[] commonWords = { "s", "a", "b", "c", "d", "the",
- "of" };
+ private static final CharArraySet commonWords = new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(
+ "s", "a", "b", "c", "d", "the", "of"
+ ), false);
public void testReset() throws Exception {
final String input = "How the s a brown s cow d like A B thing?";
WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
- CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords);
+ CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
CharTermAttribute term = cgf.addAttribute(CharTermAttribute.class);
assertTrue(cgf.incrementToken());
@@ -58,7 +61,7 @@ public class CommonGramsFilterTest exten
public void testQueryReset() throws Exception {
final String input = "How the s a brown s cow d like A B thing?";
WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
- CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords);
+ CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
CommonGramsQueryFilter nsf = new CommonGramsQueryFilter(cgf);
CharTermAttribute term = wt.addAttribute(CharTermAttribute.class);
@@ -88,7 +91,7 @@ public class CommonGramsFilterTest exten
Analyzer a = new Analyzer() {
@Override
public TokenStream tokenStream(String field, Reader in) {
- return new CommonGramsQueryFilter(new CommonGramsFilter(
+ return new CommonGramsQueryFilter(new CommonGramsFilter(TEST_VERSION_CURRENT,
new WhitespaceTokenizer(TEST_VERSION_CURRENT, in), commonWords));
}
};
@@ -157,7 +160,7 @@ public class CommonGramsFilterTest exten
Analyzer a = new Analyzer() {
@Override
public TokenStream tokenStream(String field, Reader in) {
- return new CommonGramsFilter(
+ return new CommonGramsFilter(TEST_VERSION_CURRENT,
new WhitespaceTokenizer(TEST_VERSION_CURRENT, in), commonWords);
}
};
@@ -245,8 +248,7 @@ public class CommonGramsFilterTest exten
public void testCaseSensitive() throws Exception {
final String input = "How The s a brown s cow d like A B thing?";
WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
- Set common = CommonGramsFilter.makeCommonSet(commonWords);
- TokenFilter cgf = new CommonGramsFilter(wt, common, false);
+ TokenFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
assertTokenStreamContents(cgf, new String[] {"How", "The", "The_s", "s",
"s_a", "a", "a_brown", "brown", "brown_s", "s", "s_cow", "cow",
"cow_d", "d", "d_like", "like", "A", "B", "thing?"});
@@ -258,7 +260,7 @@ public class CommonGramsFilterTest exten
public void testLastWordisStopWord() throws Exception {
final String input = "dog the";
WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
- CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords);
+ CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
TokenFilter nsf = new CommonGramsQueryFilter(cgf);
assertTokenStreamContents(nsf, new String[] { "dog_the" });
}
@@ -269,7 +271,7 @@ public class CommonGramsFilterTest exten
public void testFirstWordisStopWord() throws Exception {
final String input = "the dog";
WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
- CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords);
+ CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
TokenFilter nsf = new CommonGramsQueryFilter(cgf);
assertTokenStreamContents(nsf, new String[] { "the_dog" });
}
@@ -280,7 +282,7 @@ public class CommonGramsFilterTest exten
public void testOneWordQueryStopWord() throws Exception {
final String input = "the";
WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
- CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords);
+ CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
TokenFilter nsf = new CommonGramsQueryFilter(cgf);
assertTokenStreamContents(nsf, new String[] { "the" });
}
@@ -291,7 +293,7 @@ public class CommonGramsFilterTest exten
public void testOneWordQuery() throws Exception {
final String input = "monster";
WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
- CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords);
+ CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
TokenFilter nsf = new CommonGramsQueryFilter(cgf);
assertTokenStreamContents(nsf, new String[] { "monster" });
}
@@ -302,7 +304,7 @@ public class CommonGramsFilterTest exten
public void TestFirstAndLastStopWord() throws Exception {
final String input = "the of";
WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
- CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords);
+ CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
TokenFilter nsf = new CommonGramsQueryFilter(cgf);
assertTokenStreamContents(nsf, new String[] { "the_of" });
}
Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java Tue Nov 30 11:22:39 2010
@@ -18,22 +18,16 @@ package org.apache.lucene.analysis.core;
*/
import java.io.IOException;
-import java.io.StringReader;
import java.io.Reader;
+import java.io.StringReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.core.LowerCaseTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
-import org.apache.lucene.analysis.core.SimpleAnalyzer;
-import org.apache.lucene.analysis.core.StopAnalyzer;
-import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.index.Payload;
import org.apache.lucene.util.Version;
@@ -137,20 +131,6 @@ public class TestAnalyzers extends BaseT
}
/**
- * @deprecated remove this when lucene 3.0 "broken unicode 4" support
- * is no longer needed.
- */
- @Deprecated
- private static class LowerCaseWhitespaceAnalyzerBWComp extends Analyzer {
-
- @Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
- return new LowerCaseFilter(new WhitespaceTokenizer(reader));
- }
-
- }
-
- /**
* Test that LowercaseFilter handles entire unicode range correctly
*/
public void testLowerCaseFilter() throws IOException {
@@ -196,30 +176,6 @@ public class TestAnalyzers extends BaseT
}
- /**
- * Test that LowercaseFilter only works on BMP for back compat,
- * depending upon version
- * @deprecated remove this test when lucene 3.0 "broken unicode 4" support
- * is no longer needed.
- */
- @Deprecated
- public void testLowerCaseFilterBWComp() throws IOException {
- Analyzer a = new LowerCaseWhitespaceAnalyzerBWComp();
- // BMP
- assertAnalyzesTo(a, "AbaCaDabA", new String[] { "abacadaba" });
- // supplementary, no-op
- assertAnalyzesTo(a, "\ud801\udc16\ud801\udc16\ud801\udc16\ud801\udc16",
- new String[] {"\ud801\udc16\ud801\udc16\ud801\udc16\ud801\udc16"});
- assertAnalyzesTo(a, "AbaCa\ud801\udc16DabA",
- new String[] { "abaca\ud801\udc16daba" });
- // unpaired lead surrogate
- assertAnalyzesTo(a, "AbaC\uD801AdaBa",
- new String [] { "abac\uD801adaba" });
- // unpaired trail surrogate
- assertAnalyzesTo(a, "AbaC\uDC16AdaBa",
- new String [] { "abac\uDC16adaba" });
- }
-
public void testLowerCaseTokenizer() throws IOException {
StringReader reader = new StringReader("Tokenizer \ud801\udc1ctest");
LowerCaseTokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT,
@@ -228,6 +184,7 @@ public class TestAnalyzers extends BaseT
"\ud801\udc44test" });
}
+ /** @deprecated (3.1) */
@Deprecated
public void testLowerCaseTokenizerBWCompat() throws IOException {
StringReader reader = new StringReader("Tokenizer \ud801\udc1ctest");
@@ -235,7 +192,7 @@ public class TestAnalyzers extends BaseT
reader);
assertTokenStreamContents(tokenizer, new String[] { "tokenizer", "test" });
}
-
+
public void testWhitespaceTokenizer() throws IOException {
StringReader reader = new StringReader("Tokenizer \ud801\udc1ctest");
WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
@@ -244,6 +201,7 @@ public class TestAnalyzers extends BaseT
"\ud801\udc1ctest" });
}
+ /** @deprecated (3.1) */
@Deprecated
public void testWhitespaceTokenizerBWCompat() throws IOException {
StringReader reader = new StringReader("Tokenizer \ud801\udc1ctest");
Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java Tue Nov 30 11:22:39 2010
@@ -129,12 +129,13 @@ public class TestClassicAnalyzer extends
// the following should be recognized as HOST:
assertAnalyzesTo(a2, "www.nutch.org.", new String[]{ "www.nutch.org" }, new String[] { "<HOST>" });
- // 2.3 should show the bug
- a2 = new ClassicAnalyzer(org.apache.lucene.util.Version.LUCENE_23);
- assertAnalyzesTo(a2, "www.nutch.org.", new String[]{ "wwwnutchorg" }, new String[] { "<ACRONYM>" });
-
- // 2.4 should not show the bug
- a2 = new ClassicAnalyzer(Version.LUCENE_24);
+ // 2.3 should show the bug. But, alas, it's obsolete, we don't support it.
+ // a2 = new ClassicAnalyzer(org.apache.lucene.util.Version.LUCENE_23);
+ // assertAnalyzesTo(a2, "www.nutch.org.", new String[]{ "wwwnutchorg" }, new String[] { "<ACRONYM>" });
+
+ // 2.4 should not show the bug. But, alas, it's also obsolete,
+ // so we check latest released (Robert's gonna break this on 4.0 soon :) )
+ a2 = new ClassicAnalyzer(Version.LUCENE_31);
assertAnalyzesTo(a2, "www.nutch.org.", new String[]{ "www.nutch.org" }, new String[] { "<HOST>" });
}
Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java Tue Nov 30 11:22:39 2010
@@ -62,17 +62,15 @@ public class TestStopAnalyzer extends Ba
stopWordsSet.add("good");
stopWordsSet.add("test");
stopWordsSet.add("analyzer");
- StopAnalyzer newStop = new StopAnalyzer(Version.LUCENE_24, stopWordsSet);
+ StopAnalyzer newStop = new StopAnalyzer(Version.LUCENE_40, stopWordsSet);
StringReader reader = new StringReader("This is a good test of the english stop analyzer");
TokenStream stream = newStop.tokenStream("test", reader);
assertNotNull(stream);
CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
- PositionIncrementAttribute posIncrAtt = stream.addAttribute(PositionIncrementAttribute.class);
while (stream.incrementToken()) {
String text = termAtt.toString();
assertFalse(stopWordsSet.contains(text));
- assertEquals(1,posIncrAtt.getPositionIncrement()); // in 2.4 stop tokenizer does not apply increments.
}
}
Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilter.java?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilter.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilter.java Tue Nov 30 11:22:39 2010
@@ -16,22 +16,18 @@ package org.apache.lucene.analysis.core;
* limitations under the License.
*/
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.Set;
+
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
-import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.util.English;
import org.apache.lucene.util.Version;
-import java.io.IOException;
-import java.io.StringReader;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Set;
-import java.util.HashSet;
-
public class TestStopFilter extends BaseTokenStreamTestCase {
@@ -39,7 +35,7 @@ public class TestStopFilter extends Base
public void testExactCase() throws IOException {
StringReader reader = new StringReader("Now is The Time");
- Set<String> stopWords = new HashSet<String>(Arrays.asList("is", "the", "Time"));
+ Set<String> stopWords = asSet("is", "the", "Time");
TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), stopWords, false);
final CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
assertTrue(stream.incrementToken());
@@ -51,7 +47,7 @@ public class TestStopFilter extends Base
public void testIgnoreCase() throws IOException {
StringReader reader = new StringReader("Now is The Time");
- Set<Object> stopWords = new HashSet<Object>(Arrays.asList( "is", "the", "Time" ));
+ Set<String> stopWords = asSet( "is", "the", "Time" );
TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), stopWords, true);
final CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
assertTrue(stream.incrementToken());
@@ -89,7 +85,7 @@ public class TestStopFilter extends Base
Set<Object> stopSet = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords);
// with increments
StringReader reader = new StringReader(sb.toString());
- StopFilter stpf = new StopFilter(Version.LUCENE_24, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), stopSet);
+ StopFilter stpf = new StopFilter(Version.LUCENE_40, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), stopSet);
doTestStopPositons(stpf,true);
// without increments
reader = new StringReader(sb.toString());
Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java Tue Nov 30 11:22:39 2010
@@ -35,9 +35,8 @@ import org.apache.lucene.util.Version;
*
*/
public class TestCzechAnalyzer extends BaseTokenStreamTestCase {
-
/**
- * @deprecated Remove this test when support for 3.0 indexes is no longer needed.
+ * @deprecated (3.1) Remove this test when support for 3.0 indexes is no longer needed.
*/
@Deprecated
public void testStopWordLegacy() throws Exception {
@@ -51,7 +50,7 @@ public class TestCzechAnalyzer extends B
}
/**
- * @deprecated Remove this test when support for 3.0 indexes is no longer needed.
+ * @deprecated (3.1) Remove this test when support for 3.0 indexes is no longer needed.
*/
@Deprecated
public void testReusableTokenStreamLegacy() throws Exception {
@@ -66,49 +65,6 @@ public class TestCzechAnalyzer extends B
assertAnalyzesToReuse(analyzer, "Äeská Republika", new String[] { "Äesk", "republik" });
}
- /**
- * An input stream that always throws IOException for testing.
- * @deprecated Remove this class when the loadStopWords method is removed.
- */
- @Deprecated
- private class UnreliableInputStream extends InputStream {
- @Override
- public int read() throws IOException {
- throw new IOException();
- }
- }
-
- /**
- * The loadStopWords method does not throw IOException on error,
- * instead previously it set the stoptable to null (versus empty)
- * this would cause a NPE when it is time to create the StopFilter.
- * @deprecated Remove this test when the loadStopWords method is removed.
- */
- @Deprecated
- public void testInvalidStopWordFile() throws Exception {
- CzechAnalyzer cz = new CzechAnalyzer(Version.LUCENE_30);
- cz.loadStopWords(new UnreliableInputStream(), "UTF-8");
- assertAnalyzesTo(cz, "Pokud mluvime o volnem",
- new String[] { "pokud", "mluvime", "o", "volnem" });
- }
-
- /**
- * Test that changes to the stop table via loadStopWords are applied immediately
- * when using reusable token streams.
- * @deprecated Remove this test when the loadStopWords method is removed.
- */
- @Deprecated
- public void testStopWordFileReuse() throws Exception {
- CzechAnalyzer cz = new CzechAnalyzer(Version.LUCENE_30);
- assertAnalyzesToReuse(cz, "Äeská Republika",
- new String[] { "Äeská", "republika" });
-
- InputStream stopwords = getClass().getResourceAsStream("customStopWordFile.txt");
- cz.loadStopWords(stopwords, "UTF-8");
-
- assertAnalyzesToReuse(cz, "Äeská Republika", new String[] { "Äeská" });
- }
-
public void testWithStemExclusionSet() throws IOException{
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
set.add("hole");
Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java Tue Nov 30 11:22:39 2010
@@ -19,6 +19,7 @@ package org.apache.lucene.analysis.de;
import java.io.IOException;
import java.io.StringReader;
+import java.util.Collections;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
@@ -35,15 +36,6 @@ public class TestGermanAnalyzer extends
checkOneTermReuse(a, "Tischen", "tisch");
}
- public void testExclusionTableBWCompat() throws IOException {
- GermanStemFilter filter = new GermanStemFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT,
- new StringReader("Fischen Trinken")));
- CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
- set.add("fischen");
- filter.setExclusionSet(set);
- assertTokenStreamContents(filter, new String[] { "fischen", "trink" });
- }
-
public void testWithKeywordAttribute() throws IOException {
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
set.add("fischen");
@@ -53,27 +45,8 @@ public class TestGermanAnalyzer extends
assertTokenStreamContents(filter, new String[] { "fischen", "trink" });
}
- public void testWithKeywordAttributeAndExclusionTable() throws IOException {
- CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
- set.add("fischen");
- CharArraySet set1 = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
- set1.add("trinken");
- set1.add("fischen");
- GermanStemFilter filter = new GermanStemFilter(
- new KeywordMarkerFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(
- "Fischen Trinken")), set));
- filter.setExclusionSet(set1);
- assertTokenStreamContents(filter, new String[] { "fischen", "trinken" });
- }
-
- /*
- * Test that changes to the exclusion table are applied immediately
- * when using reusable token streams.
- */
- public void testExclusionTableReuse() throws Exception {
- GermanAnalyzer a = new GermanAnalyzer(TEST_VERSION_CURRENT);
- checkOneTermReuse(a, "tischen", "tisch");
- a.setStemExclusionTable(new String[] { "tischen" });
+ public void testStemExclusionTable() throws Exception {
+ GermanAnalyzer a = new GermanAnalyzer(TEST_VERSION_CURRENT, Collections.emptySet(), asSet("tischen"));
checkOneTermReuse(a, "tischen", "tischen");
}
Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java Tue Nov 30 11:22:39 2010
@@ -16,8 +16,8 @@ package org.apache.lucene.analysis.el;
* limitations under the License.
*/
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.util.Version;
/**
@@ -52,7 +52,7 @@ public class GreekAnalyzerTest extends B
* Test the analysis of various greek strings.
*
* @throws Exception in case an error occurs
- * @deprecated Remove this test when support for 3.0 is no longer needed
+ * @deprecated (3.1) Remove this test when support for 3.0 is no longer needed
*/
@Deprecated
public void testAnalyzerBWCompat() throws Exception {
@@ -87,15 +87,4 @@ public class GreekAnalyzerTest extends B
assertAnalyzesToReuse(a, "ΠΡÎΫΠÎÎÎΣÎÎΣ ÎÏογοÏ, ο μεÏÏÏÏ ÎºÎ±Î¹ οι άλλοι",
new String[] { "ÏÏοÏ
ÏοθεÏ", "αÏογ", "μεÏÏ", "αλλ" });
}
-
- /**
- * Greek Analyzer didn't call standardFilter, so no normalization of acronyms.
- * check that this is preserved.
- * @deprecated remove this test in Lucene 4.0
- */
- @Deprecated
- public void testAcronymBWCompat() throws Exception {
- Analyzer a = new GreekAnalyzer(Version.LUCENE_30);
- assertAnalyzesTo(a, "Î.Î .Τ.", new String[] { "α.Ï.Ï." });
}
-}
Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java Tue Nov 30 11:22:39 2010
@@ -17,8 +17,8 @@ package org.apache.lucene.analysis.fa;
* limitations under the License.
*/
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
/**
* Test the Persian Analyzer
@@ -215,9 +215,8 @@ public class TestPersianAnalyzer extends
* Test that custom stopwords work, and are not case-sensitive.
*/
public void testCustomStopwords() throws Exception {
- PersianAnalyzer a = new PersianAnalyzer(TEST_VERSION_CURRENT, new String[] { "the", "and", "a" });
+ PersianAnalyzer a = new PersianAnalyzer(TEST_VERSION_CURRENT, asSet("the", "and", "a"));
assertAnalyzesTo(a, "The quick brown fox.", new String[] { "quick",
"brown", "fox" });
}
-
}
Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java Tue Nov 30 11:22:39 2010
@@ -116,7 +116,7 @@ public class TestFrenchAnalyzer extends
}
/**
- * @deprecated remove this test for Lucene 4.0
+ * @deprecated (3.1) remove this test for Lucene 5.0
*/
@Deprecated
public void testAnalyzer30() throws Exception {
@@ -224,17 +224,6 @@ public class TestFrenchAnalyzer extends
"captif" });
}
- /*
- * Test that changes to the exclusion table are applied immediately
- * when using reusable token streams.
- */
- public void testExclusionTableReuse() throws Exception {
- FrenchAnalyzer fa = new FrenchAnalyzer(TEST_VERSION_CURRENT);
- assertAnalyzesToReuse(fa, "habitable", new String[] { "habit" });
- fa.setStemExclusionTable(new String[] { "habitable" });
- assertAnalyzesToReuse(fa, "habitable", new String[] { "habitable" });
- }
-
public void testExclusionTableViaCtor() throws Exception {
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
set.add("habitable");
@@ -256,7 +245,7 @@ public class TestFrenchAnalyzer extends
/**
* Prior to 3.1, this analyzer had no lowercase filter.
* stopwords were case sensitive. Preserve this for back compat.
- * @deprecated Remove this test in Lucene 4.0
+ * @deprecated (3.1) Remove this test in Lucene 5.0
*/
@Deprecated
public void testBuggyStopwordsCasing() throws IOException {
Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepWordFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepWordFilter.java?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepWordFilter.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepWordFilter.java Tue Nov 30 11:22:39 2010
@@ -24,6 +24,7 @@ import java.util.Set;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+import org.apache.lucene.analysis.util.CharArraySet;
/** Test {@link KeepWordFilter} */
public class TestKeepWordFilter extends BaseTokenStreamTestCase {
@@ -38,12 +39,12 @@ public class TestKeepWordFilter extends
// Test Stopwords
TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
- stream = new KeepWordFilter(stream, words, true);
+ stream = new KeepWordFilter(stream, new CharArraySet(TEST_VERSION_CURRENT, words, true));
assertTokenStreamContents(stream, new String[] { "aaa", "BBB" });
// Now force case
stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
- stream = new KeepWordFilter(stream, words, false);
+ stream = new KeepWordFilter(stream, new CharArraySet(TEST_VERSION_CURRENT,words, false));
assertTokenStreamContents(stream, new String[] { "aaa" });
}
}
Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java Tue Nov 30 11:22:39 2010
@@ -23,12 +23,7 @@ import java.util.Collection;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
-import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+import org.apache.lucene.analysis.tokenattributes.*;
/**
* @version $Id:$
@@ -70,7 +65,7 @@ public class TestTrimFilter extends Base
}
/**
- * @deprecated does not support custom attributes
+ * @deprecated (3.0) does not support custom attributes
*/
@Deprecated
private static class IterTokenStream extends TokenStream {
Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java Tue Nov 30 11:22:39 2010
@@ -25,7 +25,6 @@ import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
-import org.apache.lucene.analysis.miscellaneous.SingleTokenTokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@@ -68,18 +67,14 @@ public class TestWordDelimiterFilter ext
// test that subwords and catenated subwords have
// the correct offsets.
- WordDelimiterFilter wdf = new WordDelimiterFilter(
- new SingleTokenTokenStream(new Token("foo-bar", 5, 12)),
- 1,1,0,0,1,1,0);
+ WordDelimiterFilter wdf = new WordDelimiterFilter(new SingleTokenTokenStream(new Token("foo-bar", 5, 12)), WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, 1, 1, 0, 0, 1, 1, 0, 1, 1, null);
assertTokenStreamContents(wdf,
new String[] { "foo", "bar", "foobar" },
new int[] { 5, 9, 5 },
new int[] { 8, 12, 12 });
- wdf = new WordDelimiterFilter(
- new SingleTokenTokenStream(new Token("foo-bar", 5, 6)),
- 1,1,0,0,1,1,0);
+ wdf = new WordDelimiterFilter(new SingleTokenTokenStream(new Token("foo-bar", 5, 6)), WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, 1, 1, 0, 0, 1, 1, 0, 1, 1, null);
assertTokenStreamContents(wdf,
new String[] { "foo", "bar", "foobar" },
@@ -90,10 +85,7 @@ public class TestWordDelimiterFilter ext
@Test
public void testOffsetChange() throws Exception
{
- WordDelimiterFilter wdf = new WordDelimiterFilter(
- new SingleTokenTokenStream(new Token("übelkeit)", 7, 16)),
- 1,1,0,0,1,1,0
- );
+ WordDelimiterFilter wdf = new WordDelimiterFilter(new SingleTokenTokenStream(new Token("übelkeit)", 7, 16)), WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, 1, 1, 0, 0, 1, 1, 0, 1, 1, null);
assertTokenStreamContents(wdf,
new String[] { "übelkeit" },
@@ -104,10 +96,7 @@ public class TestWordDelimiterFilter ext
@Test
public void testOffsetChange2() throws Exception
{
- WordDelimiterFilter wdf = new WordDelimiterFilter(
- new SingleTokenTokenStream(new Token("(übelkeit", 7, 17)),
- 1,1,0,0,1,1,0
- );
+ WordDelimiterFilter wdf = new WordDelimiterFilter(new SingleTokenTokenStream(new Token("(übelkeit", 7, 17)), WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, 1, 1, 0, 0, 1, 1, 0, 1, 1, null);
assertTokenStreamContents(wdf,
new String[] { "übelkeit" },
@@ -118,10 +107,7 @@ public class TestWordDelimiterFilter ext
@Test
public void testOffsetChange3() throws Exception
{
- WordDelimiterFilter wdf = new WordDelimiterFilter(
- new SingleTokenTokenStream(new Token("(übelkeit", 7, 16)),
- 1,1,0,0,1,1,0
- );
+ WordDelimiterFilter wdf = new WordDelimiterFilter(new SingleTokenTokenStream(new Token("(übelkeit", 7, 16)), WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, 1, 1, 0, 0, 1, 1, 0, 1, 1, null);
assertTokenStreamContents(wdf,
new String[] { "übelkeit" },
@@ -132,10 +118,7 @@ public class TestWordDelimiterFilter ext
@Test
public void testOffsetChange4() throws Exception
{
- WordDelimiterFilter wdf = new WordDelimiterFilter(
- new SingleTokenTokenStream(new Token("(foo,bar)", 7, 16)),
- 1,1,0,0,1,1,0
- );
+ WordDelimiterFilter wdf = new WordDelimiterFilter(new SingleTokenTokenStream(new Token("(foo,bar)", 7, 16)), WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, 1, 1, 0, 0, 1, 1, 0, 1, 1, null);
assertTokenStreamContents(wdf,
new String[] { "foo", "bar", "foobar"},
@@ -145,7 +128,7 @@ public class TestWordDelimiterFilter ext
public void doSplit(final String input, String... output) throws Exception {
WordDelimiterFilter wdf = new WordDelimiterFilter(new KeywordTokenizer(
- new StringReader(input)), 1, 1, 0, 0, 0);
+ new StringReader(input)), WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, 1, 1, 0, 0, 0, 1, 0, 1, 1, null);
assertTokenStreamContents(wdf, output);
}
Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java Tue Nov 30 11:22:39 2010
@@ -114,7 +114,7 @@ public class TestDutchStemmer extends Ba
}
/**
- * @deprecated remove this test in Lucene 4.0
+ * @deprecated (3.1) remove this test in Lucene 5.0
*/
@Deprecated
public void testOldBuggyStemmer() throws Exception {
@@ -139,19 +139,6 @@ public class TestDutchStemmer extends Ba
checkOneTermReuse(a, "lichamelijkheden", "licham");
}
- /*
- * Test that changes to the exclusion table are applied immediately
- * when using reusable token streams.
- */
- public void testExclusionTableReuse() throws Exception {
- DutchAnalyzer a = new DutchAnalyzer(TEST_VERSION_CURRENT);
- checkOneTermReuse(a, "lichamelijk", "licham");
- a.setStemExclusionTable(new String[] { "lichamelijk" });
- checkOneTermReuse(a, "lichamelijk", "lichamelijk");
-
-
- }
-
public void testExclusionTableViaCtor() throws IOException {
CharArraySet set = new CharArraySet(Version.LUCENE_30, 1, true);
set.add("lichamelijk");
@@ -178,7 +165,7 @@ public class TestDutchStemmer extends Ba
/**
* Prior to 3.1, this analyzer had no lowercase filter.
* stopwords were case sensitive. Preserve this for back compat.
- * @deprecated Remove this test in Lucene 4.0
+ * @deprecated (3.1) Remove this test in Lucene 5.0
*/
@Deprecated
public void testBuggyStopwordsCasing() throws IOException {
Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTokenizer.java?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTokenizer.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTokenizer.java Tue Nov 30 11:22:39 2010
@@ -26,9 +26,9 @@ import java.util.regex.Pattern;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.CharReader;
import org.apache.lucene.analysis.CharStream;
+import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.charfilter.MappingCharFilter;
import org.apache.lucene.analysis.charfilter.NormalizeCharMap;
-import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
public class TestPatternTokenizer extends BaseTokenStreamTestCase
@@ -96,9 +96,7 @@ public class TestPatternTokenizer extend
/**
* TODO: rewrite tests not to use string comparison.
- * @deprecated only tests TermAttribute!
*/
- @Deprecated
private static String tsToString(TokenStream in) throws IOException {
StringBuilder out = new StringBuilder();
CharTermAttribute termAtt = in.addAttribute(CharTermAttribute.class);
Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java Tue Nov 30 11:22:39 2010
@@ -22,6 +22,7 @@ import java.io.StringReader;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.util.Version;
public class TestReverseStringFilter extends BaseTokenStreamTestCase {
public void testFilter() throws Exception {
@@ -53,9 +54,11 @@ public class TestReverseStringFilter ext
/**
* Test the broken 3.0 behavior, for back compat
+ * @deprecated (3.1) Remove in Lucene 5.0
*/
+ @Deprecated
public void testBackCompat() throws Exception {
- assertEquals("\uDF05\uD866\uDF05\uD866", ReverseStringFilter.reverse("ð©¬
ð©¬
"));
+ assertEquals("\uDF05\uD866\uDF05\uD866", ReverseStringFilter.reverse(Version.LUCENE_30, "ð©¬
ð©¬
"));
}
public void testReverseSupplementary() throws Exception {
Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java Tue Nov 30 11:22:39 2010
@@ -18,12 +18,9 @@ package org.apache.lucene.analysis.ru;
*/
import java.io.IOException;
-import java.io.InputStreamReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.util.Version;
@@ -31,65 +28,16 @@ import org.apache.lucene.util.Version;
* Test case for RussianAnalyzer.
*/
-public class TestRussianAnalyzer extends BaseTokenStreamTestCase
-{
- private InputStreamReader inWords;
+public class TestRussianAnalyzer extends BaseTokenStreamTestCase {
- private InputStreamReader sampleUnicode;
-
- /**
- * @deprecated remove this test and its datafiles in Lucene 4.0
- * the Snowball version has its own data tests.
- */
- @Deprecated
- public void testUnicode30() throws IOException
- {
- RussianAnalyzer ra = new RussianAnalyzer(Version.LUCENE_30);
- inWords =
- new InputStreamReader(
- getClass().getResourceAsStream("testUTF8.txt"),
- "UTF-8");
-
- sampleUnicode =
- new InputStreamReader(
- getClass().getResourceAsStream("resUTF8.htm"),
- "UTF-8");
-
- TokenStream in = ra.tokenStream("all", inWords);
-
- RussianLetterTokenizer sample =
- new RussianLetterTokenizer(TEST_VERSION_CURRENT,
- sampleUnicode);
-
- CharTermAttribute text = in.getAttribute(CharTermAttribute.class);
- CharTermAttribute sampleText = sample.getAttribute(CharTermAttribute.class);
-
- for (;;)
- {
- if (in.incrementToken() == false)
- break;
-
- boolean nextSampleToken = sample.incrementToken();
- assertEquals(
- "Unicode",
- text.toString(),
- nextSampleToken == false
- ? null
- : sampleText.toString());
- }
-
- inWords.close();
- sampleUnicode.close();
- }
-
- /** Check that RussianAnalyzer doesnt discard any numbers */
+ /** Check that RussianAnalyzer doesnt discard any numbers */
public void testDigitsInRussianCharset() throws IOException
{
RussianAnalyzer ra = new RussianAnalyzer(TEST_VERSION_CURRENT);
assertAnalyzesTo(ra, "text 1000", new String[] { "text", "1000" });
}
- /** @deprecated remove this test in Lucene 4.0: stopwords changed */
+ /** @deprecated (3.1) remove this test in Lucene 5.0: stopwords changed */
@Deprecated
public void testReusableTokenStream30() throws Exception {
Analyzer a = new RussianAnalyzer(Version.LUCENE_30);
Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLetterTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLetterTokenizer.java?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLetterTokenizer.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLetterTokenizer.java Tue Nov 30 11:22:39 2010
@@ -25,7 +25,7 @@ import org.apache.lucene.util.Version;
/**
* Testcase for {@link RussianLetterTokenizer}
- * @deprecated Remove this test class in Lucene 4.0
+ * @deprecated (3.1) Remove this test class in Lucene 5.0
*/
@Deprecated
public class TestRussianLetterTokenizer extends BaseTokenStreamTestCase {