You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2012/07/28 13:28:27 UTC
svn commit: r1366643 [7/19] - in /lucene/dev/branches/lucene3312: ./
dev-tools/ dev-tools/eclipse/ dev-tools/idea/.idea/copyright/
dev-tools/idea/.idea/libraries/ dev-tools/idea/lucene/ dev-tools/maven/
dev-tools/maven/lucene/benchmark/ dev-tools/maven...
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex?rev=1366643&r1=1366642&r2=1366643&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex Sat Jul 28 11:27:51 2012
@@ -39,7 +39,7 @@ import org.apache.lucene.analysis.tokena
*/
%%
-%unicode 6.0
+%unicode 6.1
%integer
%final
%public
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java?rev=1366643&r1=1366642&r2=1366643&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java Sat Jul 28 11:27:51 2012
@@ -59,7 +59,7 @@ public abstract class AbstractAnalysisFa
* to inform user, that for this factory a {@link #luceneMatchVersion} is required */
protected final void assureMatchVersion() {
if (luceneMatchVersion == null) {
- throw new InitializationException("Configuration Error: Factory '" + this.getClass().getName() +
+ throw new IllegalArgumentException("Configuration Error: Factory '" + this.getClass().getName() +
"' needs a 'luceneMatchVersion' parameter");
}
}
@@ -86,7 +86,7 @@ public abstract class AbstractAnalysisFa
if (useDefault) {
return defaultVal;
}
- throw new InitializationException("Configuration Error: missing parameter '" + name + "'");
+ throw new IllegalArgumentException("Configuration Error: missing parameter '" + name + "'");
}
return Integer.parseInt(s);
}
@@ -99,7 +99,7 @@ public abstract class AbstractAnalysisFa
String s = args.get(name);
if (s==null) {
if (useDefault) return defaultVal;
- throw new InitializationException("Configuration Error: missing parameter '" + name + "'");
+ throw new IllegalArgumentException("Configuration Error: missing parameter '" + name + "'");
}
return Boolean.parseBoolean(s);
}
@@ -108,11 +108,11 @@ public abstract class AbstractAnalysisFa
try {
String pat = args.get(name);
if (null == pat) {
- throw new InitializationException("Configuration Error: missing parameter '" + name + "'");
+ throw new IllegalArgumentException("Configuration Error: missing parameter '" + name + "'");
}
return Pattern.compile(args.get(name));
} catch (PatternSyntaxException e) {
- throw new InitializationException
+ throw new IllegalArgumentException
("Configuration Error: '" + name + "' can not be parsed in " +
this.getClass().getSimpleName(), e);
}
@@ -129,13 +129,17 @@ public abstract class AbstractAnalysisFa
words = new CharArraySet(luceneMatchVersion,
files.size() * 10, ignoreCase);
for (String file : files) {
- List<String> wlist = loader.getLines(file.trim());
+ List<String> wlist = getLines(loader, file.trim());
words.addAll(StopFilter.makeStopSet(luceneMatchVersion, wlist,
ignoreCase));
}
}
return words;
}
+
+ protected List<String> getLines(ResourceLoader loader, String resource) throws IOException {
+ return WordlistLoader.getLines(loader.openResource(resource), IOUtils.CHARSET_UTF_8);
+ }
/** same as {@link #getWordSet(ResourceLoader, String, boolean)},
* except the input is in snowball format. */
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharFilterFactory.java?rev=1366643&r1=1366642&r2=1366643&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharFilterFactory.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharFilterFactory.java Sat Jul 28 11:27:51 2012
@@ -17,13 +17,50 @@ package org.apache.lucene.analysis.util;
* limitations under the License.
*/
-import org.apache.lucene.analysis.CharStream;
+import java.io.Reader;
+import java.util.Set;
+
+import org.apache.lucene.analysis.CharFilter;
/**
- * Abstract parent class for analysis factories that create {@link CharStream}
+ * Abstract parent class for analysis factories that create {@link CharFilter}
* instances.
*/
public abstract class CharFilterFactory extends AbstractAnalysisFactory {
- public abstract CharStream create(CharStream input);
+ private static final AnalysisSPILoader<CharFilterFactory> loader =
+ new AnalysisSPILoader<CharFilterFactory>(CharFilterFactory.class);
+
+ /** looks up a charfilter by name from context classpath */
+ public static CharFilterFactory forName(String name) {
+ return loader.newInstance(name);
+ }
+
+ /** looks up a charfilter class by name from context classpath */
+ public static Class<? extends CharFilterFactory> lookupClass(String name) {
+ return loader.lookupClass(name);
+ }
+
+ /** returns a list of all available charfilter names */
+ public static Set<String> availableCharFilters() {
+ return loader.availableServices();
+ }
+
+ /**
+ * Reloads the factory list from the given {@link ClassLoader}.
+ * Changes to the factories are visible after the method ends, all
+ * iterators ({@link #availableCharFilters()},...) stay consistent.
+ *
+ * <p><b>NOTE:</b> Only new factories are added, existing ones are
+ * never removed or replaced.
+ *
+ * <p><em>This method is expensive and should only be called for discovery
+ * of new factories on the given classpath/classloader!</em>
+ */
+ public static void reloadCharFilters(ClassLoader classloader) {
+ loader.reload(classloader);
+ }
+
+ /** Wraps the given Reader with a CharFilter. */
+ public abstract Reader create(Reader input);
}
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java?rev=1366643&r1=1366642&r2=1366643&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java Sat Jul 28 11:27:51 2012
@@ -162,8 +162,8 @@ public abstract class CharTokenizer exte
}
@Override
- public void reset(Reader input) throws IOException {
- super.reset(input);
+ public void setReader(Reader input) throws IOException {
+ super.setReader(input);
bufferIndex = 0;
offset = 0;
dataLen = 0;
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceLoader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceLoader.java?rev=1366643&r1=1366642&r2=1366643&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceLoader.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceLoader.java Sat Jul 28 11:27:51 2012
@@ -19,29 +19,20 @@ package org.apache.lucene.analysis.util;
import java.io.IOException;
import java.io.InputStream;
-import java.util.List;
/**
* Abstraction for loading resources (streams, files, and classes).
*/
public interface ResourceLoader {
+ /**
+ * Opens a named resource
+ */
public InputStream openResource(String resource) throws IOException;
/**
- * Accesses a resource by name and returns the (non comment) lines
- * containing data.
- *
- * <p>
- * A comment line is any line that starts with the character "#"
- * </p>
- *
- * @param resource
- * @return a list of non-blank non-comment lines with whitespace trimmed
- * from front and back.
- * @throws IOException
+ * Creates a class of the name and expected type
*/
- public List<String> getLines(String resource) throws IOException;
-
- public <T> T newInstance(String cname, Class<T> expectedType, String ... subpackages);
+ // TODO: fix exception handling
+ public <T> T newInstance(String cname, Class<T> expectedType);
}
\ No newline at end of file
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceLoaderAware.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceLoaderAware.java?rev=1366643&r1=1366642&r2=1366643&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceLoaderAware.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceLoaderAware.java Sat Jul 28 11:27:51 2012
@@ -17,6 +17,8 @@
package org.apache.lucene.analysis.util;
+import java.io.IOException;
+
/**
* Interface for a component that needs to be initialized by
* an implementation of {@link ResourceLoader}.
@@ -25,5 +27,5 @@ package org.apache.lucene.analysis.util;
*/
public interface ResourceLoaderAware {
- void inform(ResourceLoader loader);
+ void inform(ResourceLoader loader) throws IOException;
}
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/TokenFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/TokenFilterFactory.java?rev=1366643&r1=1366642&r2=1366643&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/TokenFilterFactory.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/TokenFilterFactory.java Sat Jul 28 11:27:51 2012
@@ -17,6 +17,8 @@ package org.apache.lucene.analysis.util;
* limitations under the License.
*/
+import java.util.Set;
+
import org.apache.lucene.analysis.TokenStream;
/**
@@ -25,6 +27,40 @@ import org.apache.lucene.analysis.TokenS
*/
public abstract class TokenFilterFactory extends AbstractAnalysisFactory {
+ private static final AnalysisSPILoader<TokenFilterFactory> loader =
+ new AnalysisSPILoader<TokenFilterFactory>(TokenFilterFactory.class,
+ new String[] { "TokenFilterFactory", "FilterFactory" });
+
+ /** looks up a tokenfilter by name from context classpath */
+ public static TokenFilterFactory forName(String name) {
+ return loader.newInstance(name);
+ }
+
+ /** looks up a tokenfilter class by name from context classpath */
+ public static Class<? extends TokenFilterFactory> lookupClass(String name) {
+ return loader.lookupClass(name);
+ }
+
+ /** returns a list of all available tokenfilter names from context classpath */
+ public static Set<String> availableTokenFilters() {
+ return loader.availableServices();
+ }
+
+ /**
+ * Reloads the factory list from the given {@link ClassLoader}.
+ * Changes to the factories are visible after the method ends, all
+ * iterators ({@link #availableTokenFilters()},...) stay consistent.
+ *
+ * <p><b>NOTE:</b> Only new factories are added, existing ones are
+ * never removed or replaced.
+ *
+ * <p><em>This method is expensive and should only be called for discovery
+ * of new factories on the given classpath/classloader!</em>
+ */
+ public static void reloadTokenFilters(ClassLoader classloader) {
+ loader.reload(classloader);
+ }
+
/** Transform the specified input TokenStream */
public abstract TokenStream create(TokenStream input);
}
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/TokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/TokenizerFactory.java?rev=1366643&r1=1366642&r2=1366643&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/TokenizerFactory.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/TokenizerFactory.java Sat Jul 28 11:27:51 2012
@@ -20,6 +20,7 @@ package org.apache.lucene.analysis.util;
import org.apache.lucene.analysis.Tokenizer;
import java.io.Reader;
+import java.util.Set;
/**
* Abstract parent class for analysis factories that create {@link Tokenizer}
@@ -27,6 +28,39 @@ import java.io.Reader;
*/
public abstract class TokenizerFactory extends AbstractAnalysisFactory {
+ private static final AnalysisSPILoader<TokenizerFactory> loader =
+ new AnalysisSPILoader<TokenizerFactory>(TokenizerFactory.class);
+
+ /** looks up a tokenizer by name from context classpath */
+ public static TokenizerFactory forName(String name) {
+ return loader.newInstance(name);
+ }
+
+ /** looks up a tokenizer class by name from context classpath */
+ public static Class<? extends TokenizerFactory> lookupClass(String name) {
+ return loader.lookupClass(name);
+ }
+
+ /** returns a list of all available tokenizer names from context classpath */
+ public static Set<String> availableTokenizers() {
+ return loader.availableServices();
+ }
+
+ /**
+ * Reloads the factory list from the given {@link ClassLoader}.
+ * Changes to the factories are visible after the method ends, all
+ * iterators ({@link #availableTokenizers()},...) stay consistent.
+ *
+ * <p><b>NOTE:</b> Only new factories are added, existing ones are
+ * never removed or replaced.
+ *
+ * <p><em>This method is expensive and should only be called for discovery
+ * of new factories on the given classpath/classloader!</em>
+ */
+ public static void reloadTokenizers(ClassLoader classloader) {
+ loader.reload(classloader);
+ }
+
/** Creates a TokenStream of the specified input */
public abstract Tokenizer create(Reader input);
}
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java?rev=1366643&r1=1366642&r2=1366643&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java Sat Jul 28 11:27:51 2012
@@ -19,7 +19,11 @@ package org.apache.lucene.analysis.util;
import java.io.BufferedReader;
import java.io.IOException;
+import java.io.InputStream;
import java.io.Reader;
+import java.nio.charset.Charset;
+import java.util.ArrayList;
+import java.util.List;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.Version;
@@ -194,6 +198,47 @@ public class WordlistLoader {
return result;
}
+ /**
+ * Accesses a resource by name and returns the (non comment) lines containing
+ * data using the given character encoding.
+ *
+ * <p>
+ * A comment line is any line that starts with the character "#"
+ * </p>
+ *
+ * @return a list of non-blank non-comment lines with whitespace trimmed
+ * @throws IOException
+ */
+ public static List<String> getLines(InputStream stream, Charset charset) throws IOException{
+ BufferedReader input = null;
+ ArrayList<String> lines;
+ boolean success = false;
+ try {
+ input = getBufferedReader(IOUtils.getDecodingReader(stream, charset));
+
+ lines = new ArrayList<String>();
+ for (String word=null; (word=input.readLine())!=null;) {
+ // skip initial bom marker
+ if (lines.isEmpty() && word.length() > 0 && word.charAt(0) == '\uFEFF')
+ word = word.substring(1);
+ // skip comments
+ if (word.startsWith("#")) continue;
+ word=word.trim();
+ // skip blank lines
+ if (word.length()==0) continue;
+ lines.add(word);
+ }
+ success = true;
+ return lines;
+ } finally {
+ if (success) {
+ IOUtils.close(input);
+ } else {
+ IOUtils.closeWhileHandlingException(input);
+ }
+ }
+ }
+
private static BufferedReader getBufferedReader(Reader reader) {
return (reader instanceof BufferedReader) ? (BufferedReader) reader
: new BufferedReader(reader);
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java?rev=1366643&r1=1366642&r2=1366643&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java Sat Jul 28 11:27:51 2012
@@ -325,8 +325,8 @@ public final class WikipediaTokenizer ex
}
@Override
- public void reset(Reader reader) throws IOException {
- super.reset(reader);
+ public void setReader(Reader reader) throws IOException {
+ super.setReader(reader);
scanner.yyreset(input);
}
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java?rev=1366643&r1=1366642&r2=1366643&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java Sat Jul 28 11:27:51 2012
@@ -1,4 +1,4 @@
-/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 08.07.12 17:00 */
+/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 7/15/12 1:57 AM */
package org.apache.lucene.analysis.wikipedia;
@@ -25,8 +25,8 @@ import org.apache.lucene.analysis.tokena
/**
* This class is a scanner generated by
* <a href="http://www.jflex.de/">JFlex</a> 1.5.0-SNAPSHOT
- * on 08.07.12 17:00 from the specification file
- * <tt>C:/Users/Uwe Schindler/Projects/lucene/lucene4199/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex</tt>
+ * on 7/15/12 1:57 AM from the specification file
+ * <tt>C:/cygwin/home/s/svn/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex</tt>
*/
class WikipediaTokenizerImpl {
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/tartarus/snowball/SnowballProgram.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/tartarus/snowball/SnowballProgram.java?rev=1366643&r1=1366642&r2=1366643&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/tartarus/snowball/SnowballProgram.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/tartarus/snowball/SnowballProgram.java Sat Jul 28 11:27:51 2012
@@ -435,7 +435,7 @@ public abstract class SnowballProgram {
bra > ket ||
ket > limit)
{
- System.err.println("faulty slice operation");
+ throw new IllegalArgumentException("faulty slice operation: bra=" + bra + ",ket=" + ket + ",limit=" + limit);
// FIXME: report error somehow.
/*
fprintf(stderr, "faulty slice operation:\n");
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/overview.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/overview.html?rev=1366643&r1=1366642&r2=1366643&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/overview.html (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/overview.html Sat Jul 28 11:27:51 2012
@@ -24,7 +24,7 @@
For an introduction to Lucene's analysis API, see the {@link org.apache.lucene.analysis} package documentation.
</p>
<p>
- This module contains concrete components ({@link org.apache.lucene.analysis.charfilter.CharFilter}s,
+ This module contains concrete components ({@link org.apache.lucene.analysis.CharFilter}s,
{@link org.apache.lucene.analysis.Tokenizer}s, and ({@link org.apache.lucene.analysis.TokenFilter}s) for
analyzing different types of content. It also provides a number of {@link org.apache.lucene.analysis.Analyzer}s
for different languages that you can use to get started quickly.
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java?rev=1366643&r1=1366642&r2=1366643&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java Sat Jul 28 11:27:51 2012
@@ -29,7 +29,6 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.CharReader;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.util._TestUtil;
@@ -46,7 +45,7 @@ public class HTMLStripCharFilterTest ext
@Override
protected Reader initReader(String fieldName, Reader reader) {
- return new HTMLStripCharFilter(CharReader.get(reader));
+ return new HTMLStripCharFilter(reader);
}
};
}
@@ -60,7 +59,7 @@ public class HTMLStripCharFilterTest ext
String gold = "\nthis is some text\n here is a link and " +
"another link. " +
"This is an entity: & plus a <. Here is an &. ";
- HTMLStripCharFilter reader = new HTMLStripCharFilter(CharReader.get(new StringReader(html)));
+ HTMLStripCharFilter reader = new HTMLStripCharFilter(new StringReader(html));
StringBuilder builder = new StringBuilder();
int ch = -1;
char [] goldArray = gold.toCharArray();
@@ -79,7 +78,7 @@ public class HTMLStripCharFilterTest ext
//Some sanity checks, but not a full-fledged check
public void testHTML() throws Exception {
InputStream stream = getClass().getResourceAsStream("htmlStripReaderTest.html");
- HTMLStripCharFilter reader = new HTMLStripCharFilter(CharReader.get(new InputStreamReader(stream, "UTF-8")));
+ HTMLStripCharFilter reader = new HTMLStripCharFilter(new InputStreamReader(stream, "UTF-8"));
StringBuilder builder = new StringBuilder();
int ch = -1;
while ((ch = reader.read()) != -1){
@@ -96,7 +95,7 @@ public class HTMLStripCharFilterTest ext
public void testMSWord14GeneratedHTML() throws Exception {
InputStream stream = getClass().getResourceAsStream("MS-Word 14 generated.htm");
- HTMLStripCharFilter reader = new HTMLStripCharFilter(CharReader.get(new InputStreamReader(stream, "UTF-8")));
+ HTMLStripCharFilter reader = new HTMLStripCharFilter(new InputStreamReader(stream, "UTF-8"));
String gold = "This is a test";
StringBuilder builder = new StringBuilder();
int ch = 0;
@@ -117,7 +116,7 @@ public class HTMLStripCharFilterTest ext
String gold = "\u0393";
Set<String> set = new HashSet<String>();
set.add("reserved");
- Reader reader = new HTMLStripCharFilter(CharReader.get(new StringReader(test)), set);
+ Reader reader = new HTMLStripCharFilter(new StringReader(test), set);
StringBuilder builder = new StringBuilder();
int ch = 0;
while ((ch = reader.read()) != -1){
@@ -132,7 +131,7 @@ public class HTMLStripCharFilterTest ext
String gold = " <foo> \u00DCbermensch = \u0393 bar \u0393";
Set<String> set = new HashSet<String>();
set.add("reserved");
- Reader reader = new HTMLStripCharFilter(CharReader.get(new StringReader(test)), set);
+ Reader reader = new HTMLStripCharFilter(new StringReader(test), set);
StringBuilder builder = new StringBuilder();
int ch = 0;
while ((ch = reader.read()) != -1){
@@ -147,7 +146,7 @@ public class HTMLStripCharFilterTest ext
String gold = " <junk/> ! @ and â";
Set<String> set = new HashSet<String>();
set.add("reserved");
- Reader reader = new HTMLStripCharFilter(CharReader.get(new StringReader(test)), set);
+ Reader reader = new HTMLStripCharFilter(new StringReader(test), set);
StringBuilder builder = new StringBuilder();
int ch = 0;
while ((ch = reader.read()) != -1){
@@ -161,7 +160,7 @@ public class HTMLStripCharFilterTest ext
String test = "aaa bbb <reserved ccc=\"ddddd\"> eeee </reserved> ffff <reserved ggg=\"hhhh\"/> <other/>";
Set<String> set = new HashSet<String>();
set.add("reserved");
- Reader reader = new HTMLStripCharFilter(CharReader.get(new StringReader(test)), set);
+ Reader reader = new HTMLStripCharFilter(new StringReader(test), set);
StringBuilder builder = new StringBuilder();
int ch = 0;
while ((ch = reader.read()) != -1){
@@ -346,7 +345,7 @@ public class HTMLStripCharFilterTest ext
for (int i = 0 ; i < testGold.length ; i += 2) {
String test = testGold[i];
String gold = testGold[i + 1];
- Reader reader = new HTMLStripCharFilter(CharReader.get(new StringReader(test)));
+ Reader reader = new HTMLStripCharFilter(new StringReader(test));
StringBuilder builder = new StringBuilder();
int ch = 0;
while ((ch = reader.read()) != -1){
@@ -370,7 +369,7 @@ public class HTMLStripCharFilterTest ext
testBuilder.append("-->foo");
String gold = "foo";
- Reader reader = new HTMLStripCharFilter(CharReader.get(new StringReader(testBuilder.toString())));
+ Reader reader = new HTMLStripCharFilter(new StringReader(testBuilder.toString()));
int ch = 0;
StringBuilder builder = new StringBuilder();
try {
@@ -388,7 +387,7 @@ public class HTMLStripCharFilterTest ext
appendChars(testBuilder, HTMLStripCharFilter.getInitialBufferSize() + 500);
testBuilder.append("?>");
gold = "";
- reader = new HTMLStripCharFilter(CharReader.get(new StringReader(testBuilder.toString())));
+ reader = new HTMLStripCharFilter(new StringReader(testBuilder.toString()));
ch = 0;
builder = new StringBuilder();
try {
@@ -406,7 +405,7 @@ public class HTMLStripCharFilterTest ext
appendChars(testBuilder, HTMLStripCharFilter.getInitialBufferSize() + 500);
testBuilder.append("/>");
gold = "";
- reader = new HTMLStripCharFilter(CharReader.get(new StringReader(testBuilder.toString())));
+ reader = new HTMLStripCharFilter(new StringReader(testBuilder.toString()));
ch = 0;
builder = new StringBuilder();
try {
@@ -430,7 +429,7 @@ public class HTMLStripCharFilterTest ext
private void processBuffer(String test, String assertMsg) throws IOException {
// System.out.println("-------------------processBuffer----------");
- Reader reader = new HTMLStripCharFilter(CharReader.get(new BufferedReader(new StringReader(test))));//force the use of BufferedReader
+ Reader reader = new HTMLStripCharFilter(new BufferedReader(new StringReader(test)));//force the use of BufferedReader
int ch = 0;
StringBuilder builder = new StringBuilder();
try {
@@ -448,7 +447,7 @@ public class HTMLStripCharFilterTest ext
String test = "<!--- three dashes, still a valid comment ---> ";
String gold = " ";
- Reader reader = new HTMLStripCharFilter(CharReader.get(new BufferedReader(new StringReader(test))));//force the use of BufferedReader
+ Reader reader = new HTMLStripCharFilter(new BufferedReader(new StringReader(test)));//force the use of BufferedReader
int ch = 0;
StringBuilder builder = new StringBuilder();
try {
@@ -464,7 +463,7 @@ public class HTMLStripCharFilterTest ext
public void doTestOffsets(String in) throws Exception {
- HTMLStripCharFilter reader = new HTMLStripCharFilter(CharReader.get(new BufferedReader(new StringReader(in))));
+ HTMLStripCharFilter reader = new HTMLStripCharFilter(new BufferedReader(new StringReader(in)));
int ch = 0;
int off = 0; // offset in the reader
int strOff = -1; // offset in the original string
@@ -491,7 +490,7 @@ public class HTMLStripCharFilterTest ext
static void assertLegalOffsets(String in) throws Exception {
int length = in.length();
- HTMLStripCharFilter reader = new HTMLStripCharFilter(CharReader.get(new BufferedReader(new StringReader(in))));
+ HTMLStripCharFilter reader = new HTMLStripCharFilter(new BufferedReader(new StringReader(in)));
int ch = 0;
int off = 0;
while ((ch = reader.read()) != -1) {
@@ -526,7 +525,7 @@ public class HTMLStripCharFilterTest ext
+ " alt = \"Alt: <!--#echo var='${IMAGE_CAPTION:<!--comment-->\\'Comment\\'}' -->\"\n\n"
+ " title=\"Title: <!--#echo var=\"IMAGE_CAPTION\"-->\">two";
String gold = "onetwo";
- Reader reader = new HTMLStripCharFilter(CharReader.get(new StringReader(test)));
+ Reader reader = new HTMLStripCharFilter(new StringReader(test));
int ch = 0;
StringBuilder builder = new StringBuilder();
try {
@@ -540,7 +539,7 @@ public class HTMLStripCharFilterTest ext
test = "one<script><!-- <!--#config comment=\"<!-- \\\"comment\\\"-->\"--> --></script>two";
gold = "one\ntwo";
- reader = new HTMLStripCharFilter(CharReader.get(new StringReader(test)));
+ reader = new HTMLStripCharFilter(new StringReader(test));
ch = 0;
builder = new StringBuilder();
try {
@@ -557,7 +556,7 @@ public class HTMLStripCharFilterTest ext
public void testScriptQuotes() throws Exception {
String test = "one<script attr= bare><!-- action('<!-- comment -->', \"\\\"-->\\\"\"); --></script>two";
String gold = "one\ntwo";
- Reader reader = new HTMLStripCharFilter(CharReader.get(new StringReader(test)));
+ Reader reader = new HTMLStripCharFilter(new StringReader(test));
int ch = 0;
StringBuilder builder = new StringBuilder();
try {
@@ -572,7 +571,7 @@ public class HTMLStripCharFilterTest ext
test = "hello<script><!-- f('<!--internal--></script>'); --></script>";
gold = "hello\n";
- reader = new HTMLStripCharFilter(CharReader.get(new StringReader(test)));
+ reader = new HTMLStripCharFilter(new StringReader(test));
ch = 0;
builder = new StringBuilder();
try {
@@ -591,7 +590,7 @@ public class HTMLStripCharFilterTest ext
String gold = "one<script no-value-attr></script>two";
Set<String> escapedTags = new HashSet<String>(Arrays.asList("SCRIPT"));
Reader reader = new HTMLStripCharFilter
- (CharReader.get(new StringReader(test)), escapedTags);
+ (new StringReader(test), escapedTags);
int ch = 0;
StringBuilder builder = new StringBuilder();
try {
@@ -612,7 +611,7 @@ public class HTMLStripCharFilterTest ext
+ "-->\n"
+ "</style>two";
String gold = "one\ntwo";
- Reader reader = new HTMLStripCharFilter(CharReader.get(new StringReader(test)));
+ Reader reader = new HTMLStripCharFilter(new StringReader(test));
int ch = 0;
StringBuilder builder = new StringBuilder();
try {
@@ -631,7 +630,7 @@ public class HTMLStripCharFilterTest ext
String gold = "one<style type=\"text/css\"></style>two";
Set<String> escapedTags = new HashSet<String>(Arrays.asList("STYLE"));
Reader reader = new HTMLStripCharFilter
- (CharReader.get(new StringReader(test)), escapedTags);
+ (new StringReader(test), escapedTags);
int ch = 0;
StringBuilder builder = new StringBuilder();
try {
@@ -656,7 +655,7 @@ public class HTMLStripCharFilterTest ext
for (int i = 0 ; i < testGold.length ; i += 2) {
String test = testGold[i];
String gold = testGold[i + 1];
- Reader reader = new HTMLStripCharFilter(CharReader.get(new StringReader(test)));
+ Reader reader = new HTMLStripCharFilter(new StringReader(test));
StringBuilder builder = new StringBuilder();
int ch = 0;
while ((ch = reader.read()) != -1){
@@ -671,7 +670,7 @@ public class HTMLStripCharFilterTest ext
String gold = "one<BR class='whatever'>two</\nBR\n>";
Set<String> escapedTags = new HashSet<String>(Arrays.asList("BR"));
Reader reader = new HTMLStripCharFilter
- (CharReader.get(new StringReader(test)), escapedTags);
+ (new StringReader(test), escapedTags);
int ch = 0;
StringBuilder builder = new StringBuilder();
try {
@@ -688,7 +687,7 @@ public class HTMLStripCharFilterTest ext
public void testInlineTagsNoSpace() throws Exception {
String test = "one<sPAn class=\"invisible\">two<sup>2<sup>e</sup></sup>.</SpaN>three";
String gold = "onetwo2e.three";
- Reader reader = new HTMLStripCharFilter(CharReader.get(new StringReader(test)));
+ Reader reader = new HTMLStripCharFilter(new StringReader(test));
int ch = 0;
StringBuilder builder = new StringBuilder();
try {
@@ -705,7 +704,7 @@ public class HTMLStripCharFilterTest ext
public void testCDATA() throws Exception {
String test = "one<![CDATA[<one><two>three<four></four></two></one>]]>two";
String gold = "one<one><two>three<four></four></two></one>two";
- Reader reader = new HTMLStripCharFilter(CharReader.get(new StringReader(test)));
+ Reader reader = new HTMLStripCharFilter(new StringReader(test));
int ch = 0;
StringBuilder builder = new StringBuilder();
try {
@@ -720,7 +719,7 @@ public class HTMLStripCharFilterTest ext
test = "one<![CDATA[two<![CDATA[three]]]]><![CDATA[>four]]>five";
gold = "onetwo<![CDATA[three]]>fourfive";
- reader = new HTMLStripCharFilter(CharReader.get(new StringReader(test)));
+ reader = new HTMLStripCharFilter(new StringReader(test));
ch = 0;
builder = new StringBuilder();
try {
@@ -737,7 +736,7 @@ public class HTMLStripCharFilterTest ext
public void testUppercaseCharacterEntityVariants() throws Exception {
String test = " "-©>><<®&";
String gold = " \"-\u00A9>><<\u00AE&";
- Reader reader = new HTMLStripCharFilter(CharReader.get(new StringReader(test)));
+ Reader reader = new HTMLStripCharFilter(new StringReader(test));
int ch = 0;
StringBuilder builder = new StringBuilder();
try {
@@ -754,7 +753,7 @@ public class HTMLStripCharFilterTest ext
public void testMSWordMalformedProcessingInstruction() throws Exception {
String test = "one<?xml:namespace prefix = o ns = \"urn:schemas-microsoft-com:office:office\" />two";
String gold = "onetwo";
- Reader reader = new HTMLStripCharFilter(CharReader.get(new StringReader(test)));
+ Reader reader = new HTMLStripCharFilter(new StringReader(test));
int ch = 0;
StringBuilder builder = new StringBuilder();
try {
@@ -771,7 +770,7 @@ public class HTMLStripCharFilterTest ext
public void testSupplementaryCharsInTags() throws Exception {
String test = "one<ð©¬
è±éä¹æ¯ç>two<çæ¯ð©¬
>three çæ¯ð©¬
</çæ¯ð©¬
>four</ð©¬
è±éä¹æ¯ç>five<ð ð >six<ð ð />seven";
String gold = "one\ntwo\nthree çæ¯ð©¬
\nfour\nfive\nsix\nseven";
- Reader reader = new HTMLStripCharFilter(CharReader.get(new StringReader(test)));
+ Reader reader = new HTMLStripCharFilter(new StringReader(test));
int ch = 0;
StringBuilder builder = new StringBuilder();
try {
@@ -822,7 +821,7 @@ public class HTMLStripCharFilterTest ext
}
}
Reader reader = new HTMLStripCharFilter
- (CharReader.get(new StringReader(text.toString())));
+ (new StringReader(text.toString()));
while (reader.read() != -1);
}
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilter.java?rev=1366643&r1=1366642&r2=1366643&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilter.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilter.java Sat Jul 28 11:27:51 2012
@@ -29,8 +29,7 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.CharReader;
-import org.apache.lucene.analysis.CharStream;
+import org.apache.lucene.analysis.CharFilter;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
@@ -60,7 +59,7 @@ public class TestMappingCharFilter exten
}
public void testReaderReset() throws Exception {
- CharStream cs = new MappingCharFilter( normMap, new StringReader( "x" ) );
+ CharFilter cs = new MappingCharFilter( normMap, new StringReader( "x" ) );
char[] buf = new char[10];
int len = cs.read(buf, 0, 10);
assertEquals( 1, len );
@@ -76,55 +75,55 @@ public class TestMappingCharFilter exten
}
public void testNothingChange() throws Exception {
- CharStream cs = new MappingCharFilter( normMap, new StringReader( "x" ) );
+ CharFilter cs = new MappingCharFilter( normMap, new StringReader( "x" ) );
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts, new String[]{"x"}, new int[]{0}, new int[]{1}, 1);
}
public void test1to1() throws Exception {
- CharStream cs = new MappingCharFilter( normMap, new StringReader( "h" ) );
+ CharFilter cs = new MappingCharFilter( normMap, new StringReader( "h" ) );
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts, new String[]{"i"}, new int[]{0}, new int[]{1}, 1);
}
public void test1to2() throws Exception {
- CharStream cs = new MappingCharFilter( normMap, new StringReader( "j" ) );
+ CharFilter cs = new MappingCharFilter( normMap, new StringReader( "j" ) );
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts, new String[]{"jj"}, new int[]{0}, new int[]{1}, 1);
}
public void test1to3() throws Exception {
- CharStream cs = new MappingCharFilter( normMap, new StringReader( "k" ) );
+ CharFilter cs = new MappingCharFilter( normMap, new StringReader( "k" ) );
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts, new String[]{"kkk"}, new int[]{0}, new int[]{1}, 1);
}
public void test2to4() throws Exception {
- CharStream cs = new MappingCharFilter( normMap, new StringReader( "ll" ) );
+ CharFilter cs = new MappingCharFilter( normMap, new StringReader( "ll" ) );
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts, new String[]{"llll"}, new int[]{0}, new int[]{2}, 2);
}
public void test2to1() throws Exception {
- CharStream cs = new MappingCharFilter( normMap, new StringReader( "aa" ) );
+ CharFilter cs = new MappingCharFilter( normMap, new StringReader( "aa" ) );
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts, new String[]{"a"}, new int[]{0}, new int[]{2}, 2);
}
public void test3to1() throws Exception {
- CharStream cs = new MappingCharFilter( normMap, new StringReader( "bbb" ) );
+ CharFilter cs = new MappingCharFilter( normMap, new StringReader( "bbb" ) );
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts, new String[]{"b"}, new int[]{0}, new int[]{3}, 3);
}
public void test4to2() throws Exception {
- CharStream cs = new MappingCharFilter( normMap, new StringReader( "cccc" ) );
+ CharFilter cs = new MappingCharFilter( normMap, new StringReader( "cccc" ) );
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts, new String[]{"cc"}, new int[]{0}, new int[]{4}, 4);
}
public void test5to0() throws Exception {
- CharStream cs = new MappingCharFilter( normMap, new StringReader( "empty" ) );
+ CharFilter cs = new MappingCharFilter( normMap, new StringReader( "empty" ) );
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts, new String[0], new int[]{}, new int[]{}, 5);
}
@@ -149,7 +148,7 @@ public class TestMappingCharFilter exten
//
public void testTokenStream() throws Exception {
String testString = "h i j k ll cccc bbb aa";
- CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( testString ) ) );
+ CharFilter cs = new MappingCharFilter( normMap, new StringReader( testString ) );
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts,
new String[]{"i","i","jj","kkk","llll","cc","b","a"},
@@ -171,8 +170,8 @@ public class TestMappingCharFilter exten
// h,8,9 => i,8,9
public void testChained() throws Exception {
String testString = "aaaa ll h";
- CharStream cs = new MappingCharFilter( normMap,
- new MappingCharFilter( normMap, CharReader.get( new StringReader( testString ) ) ) );
+ CharFilter cs = new MappingCharFilter( normMap,
+ new MappingCharFilter( normMap, new StringReader( testString ) ) );
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts,
new String[]{"a","llllllll","i"},
@@ -193,7 +192,7 @@ public class TestMappingCharFilter exten
@Override
protected Reader initReader(String fieldName, Reader reader) {
- return new MappingCharFilter(normMap, CharReader.get(reader));
+ return new MappingCharFilter(normMap, reader);
}
};
@@ -219,7 +218,7 @@ public class TestMappingCharFilter exten
@Override
protected Reader initReader(String fieldName, Reader reader) {
- return new MappingCharFilter(map, CharReader.get(reader));
+ return new MappingCharFilter(map, reader);
}
};
@@ -241,7 +240,7 @@ public class TestMappingCharFilter exten
@Override
protected Reader initReader(String fieldName, Reader reader) {
- return new MappingCharFilter(map, CharReader.get(reader));
+ return new MappingCharFilter(map, reader);
}
};
int numRounds = 100;
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKAnalyzer.java?rev=1366643&r1=1366642&r2=1366643&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKAnalyzer.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKAnalyzer.java Sat Jul 28 11:27:51 2012
@@ -23,7 +23,6 @@ import java.util.Random;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.CharReader;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
@@ -216,7 +215,7 @@ public class TestCJKAnalyzer extends Bas
@Override
protected Reader initReader(String fieldName, Reader reader) {
- return new MappingCharFilter(norm, CharReader.get(reader));
+ return new MappingCharFilter(norm, reader);
}
};
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java?rev=1366643&r1=1366642&r2=1366643&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java Sat Jul 28 11:27:51 2012
@@ -48,7 +48,7 @@ public class CommonGramsFilterTest exten
assertTrue(cgf.incrementToken());
assertEquals("the_s", term.toString());
- wt.reset(new StringReader(input));
+ wt.setReader(new StringReader(input));
cgf.reset();
assertTrue(cgf.incrementToken());
assertEquals("How", term.toString());
@@ -66,7 +66,7 @@ public class CommonGramsFilterTest exten
assertTrue(nsf.incrementToken());
assertEquals("the_s", term.toString());
- wt.reset(new StringReader(input));
+ wt.setReader(new StringReader(input));
nsf.reset();
assertTrue(nsf.incrementToken());
assertEquals("How_the", term.toString());
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java?rev=1366643&r1=1366642&r2=1366643&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java Sat Jul 28 11:27:51 2012
@@ -24,7 +24,6 @@ import java.util.Arrays;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.CharReader;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
@@ -240,7 +239,7 @@ public class TestCompoundWordTokenFilter
assertEquals("Rindfleischüberwachungsgesetz", termAtt.toString());
assertTrue(tf.incrementToken());
assertEquals("Rind", termAtt.toString());
- wsTokenizer.reset(new StringReader("Rindfleischüberwachungsgesetz"));
+ wsTokenizer.setReader(new StringReader("Rindfleischüberwachungsgesetz"));
tf.reset();
assertTrue(tf.incrementToken());
assertEquals("Rindfleischüberwachungsgesetz", termAtt.toString());
@@ -327,7 +326,7 @@ public class TestCompoundWordTokenFilter
@Override
protected Reader initReader(String fieldName, Reader reader) {
- return new MappingCharFilter(normMap, CharReader.get(reader));
+ return new MappingCharFilter(normMap, reader);
}
};
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java?rev=1366643&r1=1366642&r2=1366643&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java Sat Jul 28 11:27:51 2012
@@ -163,7 +163,7 @@ public class TestAnalyzers extends BaseT
filter.reset();
String highSurEndingUpper = "BogustermBoguster\ud801";
String highSurEndingLower = "bogustermboguster\ud801";
- tokenizer.reset(new StringReader(highSurEndingUpper));
+ tokenizer.setReader(new StringReader(highSurEndingUpper));
assertTokenStreamContents(filter, new String[] {highSurEndingLower});
assertTrue(filter.hasAttribute(CharTermAttribute.class));
char[] termBuffer = filter.getAttribute(CharTermAttribute.class).buffer();
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestBugInSomething.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestBugInSomething.java?rev=1366643&r1=1366642&r2=1366643&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestBugInSomething.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestBugInSomething.java Sat Jul 28 11:27:51 2012
@@ -1,11 +1,12 @@
package org.apache.lucene.analysis.core;
import java.io.Reader;
+import java.io.StringReader;
import java.nio.CharBuffer;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.CharStream;
+import org.apache.lucene.analysis.CharFilter;
import org.apache.lucene.analysis.MockCharFilter;
import org.apache.lucene.analysis.MockTokenFilter;
import org.apache.lucene.analysis.MockTokenizer;
@@ -64,7 +65,7 @@ public class TestBugInSomething extends
checkAnalysisConsistency(random(), a, false, "wmgddzunizdomqyj");
}
- CharStream wrappedStream = new CharStream() {
+ CharFilter wrappedStream = new CharFilter(new StringReader("bogus")) {
@Override
public void mark(int readAheadLimit) {
@@ -107,8 +108,8 @@ public class TestBugInSomething extends
}
@Override
- public int correctOffset(int currentOff) {
- throw new UnsupportedOperationException("correctOffset(int)");
+ public int correct(int currentOff) {
+ throw new UnsupportedOperationException("correct(int)");
}
@Override
@@ -123,7 +124,7 @@ public class TestBugInSomething extends
};
public void testWrapping() throws Exception {
- CharStream cs = new TestRandomChains.CheckThatYouDidntReadAnythingReaderWrapper(wrappedStream);
+ CharFilter cs = new TestRandomChains.CheckThatYouDidntReadAnythingReaderWrapper(wrappedStream);
try {
cs.mark(1);
fail();
@@ -177,7 +178,7 @@ public class TestBugInSomething extends
cs.correctOffset(1);
fail();
} catch (Exception e) {
- assertEquals("correctOffset(int)", e.getMessage());
+ assertEquals("correct(int)", e.getMessage());
}
try {
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java?rev=1366643&r1=1366642&r2=1366643&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java Sat Jul 28 11:27:51 2012
@@ -44,8 +44,7 @@ import java.util.regex.Pattern;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.CachingTokenFilter;
-import org.apache.lucene.analysis.CharReader;
-import org.apache.lucene.analysis.CharStream;
+import org.apache.lucene.analysis.CharFilter;
import org.apache.lucene.analysis.EmptyTokenizer;
import org.apache.lucene.analysis.MockGraphTokenFilter;
import org.apache.lucene.analysis.MockRandomLookaheadTokenFilter;
@@ -101,7 +100,7 @@ public class TestRandomChains extends Ba
static List<Constructor<? extends Tokenizer>> tokenizers;
static List<Constructor<? extends TokenFilter>> tokenfilters;
- static List<Constructor<? extends CharStream>> charfilters;
+ static List<Constructor<? extends CharFilter>> charfilters;
// TODO: fix those and remove
private static final Set<Class<?>> brokenComponents = Collections.newSetFromMap(new IdentityHashMap<Class<?>,Boolean>());
@@ -170,7 +169,7 @@ public class TestRandomChains extends Ba
getClassesForPackage("org.apache.lucene.analysis", analysisClasses);
tokenizers = new ArrayList<Constructor<? extends Tokenizer>>();
tokenfilters = new ArrayList<Constructor<? extends TokenFilter>>();
- charfilters = new ArrayList<Constructor<? extends CharStream>>();
+ charfilters = new ArrayList<Constructor<? extends CharFilter>>();
for (final Class<?> c : analysisClasses) {
final int modifiers = c.getModifiers();
if (
@@ -179,7 +178,7 @@ public class TestRandomChains extends Ba
|| c.isSynthetic() || c.isAnonymousClass() || c.isMemberClass() || c.isInterface()
|| brokenComponents.contains(c)
|| c.isAnnotationPresent(Deprecated.class)
- || !(Tokenizer.class.isAssignableFrom(c) || TokenFilter.class.isAssignableFrom(c) || CharStream.class.isAssignableFrom(c))
+ || !(Tokenizer.class.isAssignableFrom(c) || TokenFilter.class.isAssignableFrom(c) || CharFilter.class.isAssignableFrom(c))
) {
continue;
}
@@ -197,10 +196,10 @@ public class TestRandomChains extends Ba
assertTrue(ctor.toGenericString() + " has unsupported parameter types",
allowedTokenFilterArgs.containsAll(Arrays.asList(ctor.getParameterTypes())));
tokenfilters.add(castConstructor(TokenFilter.class, ctor));
- } else if (CharStream.class.isAssignableFrom(c)) {
+ } else if (CharFilter.class.isAssignableFrom(c)) {
assertTrue(ctor.toGenericString() + " has unsupported parameter types",
allowedCharFilterArgs.containsAll(Arrays.asList(ctor.getParameterTypes())));
- charfilters.add(castConstructor(CharStream.class, ctor));
+ charfilters.add(castConstructor(CharFilter.class, ctor));
} else {
fail("Cannot get here");
}
@@ -236,7 +235,7 @@ public class TestRandomChains extends Ba
private static <T> Constructor<T> castConstructor(Class<T> instanceClazz, Constructor<?> ctor) {
return (Constructor<T>) ctor;
}
- private static void getClassesForPackage(String pckgname, List<Class<?>> classes) throws Exception {
+ static void getClassesForPackage(String pckgname, List<Class<?>> classes) throws Exception {
final ClassLoader cld = TestRandomChains.class.getClassLoader();
final String path = pckgname.replace('.', '/');
final Enumeration<URL> resources = cld.getResources(path);
@@ -524,7 +523,6 @@ public class TestRandomChains extends Ba
allowedCharFilterArgs = Collections.newSetFromMap(new IdentityHashMap<Class<?>,Boolean>());
allowedCharFilterArgs.addAll(argProducers.keySet());
allowedCharFilterArgs.add(Reader.class);
- allowedCharFilterArgs.add(CharStream.class);
}
@SuppressWarnings("unchecked")
@@ -560,8 +558,6 @@ public class TestRandomChains extends Ba
Class<?> paramType = paramTypes[i];
if (paramType == Reader.class) {
args[i] = reader;
- } else if (paramType == CharStream.class) {
- args[i] = CharReader.get(reader);
} else {
args[i] = newRandomArg(random, paramType);
}
@@ -701,7 +697,7 @@ public class TestRandomChains extends Ba
int numFilters = random.nextInt(3);
for (int i = 0; i < numFilters; i++) {
while (true) {
- final Constructor<? extends CharStream> ctor = charfilters.get(random.nextInt(charfilters.size()));
+ final Constructor<? extends CharFilter> ctor = charfilters.get(random.nextInt(charfilters.size()));
final Object args[] = newCharFilterArgs(random, spec.reader, ctor.getParameterTypes());
reader = createComponent(ctor, args, descr);
if (reader != null) {
@@ -760,24 +756,16 @@ public class TestRandomChains extends Ba
}
}
- // wants charfilter to be a filterreader...
- // do *NOT*, do *NOT* refactor me to be a charfilter: LUCENE-3990
- static class CheckThatYouDidntReadAnythingReaderWrapper extends CharStream {
+ static class CheckThatYouDidntReadAnythingReaderWrapper extends CharFilter {
boolean readSomething;
- CharStream in;
CheckThatYouDidntReadAnythingReaderWrapper(Reader in) {
- this.in = CharReader.get(in);
+ super(in);
}
@Override
- public int correctOffset(int currentOff) {
- return in.correctOffset(currentOff);
- }
-
- @Override
- public void close() throws IOException {
- in.close();
+ public int correct(int currentOff) {
+ return currentOff; // we don't change any offsets
}
@Override
@@ -799,32 +787,12 @@ public class TestRandomChains extends Ba
}
@Override
- public void mark(int readAheadLimit) throws IOException {
- in.mark(readAheadLimit);
- }
-
- @Override
- public boolean markSupported() {
- return in.markSupported();
- }
-
- @Override
public int read(char[] cbuf) throws IOException {
readSomething = true;
return in.read(cbuf);
}
@Override
- public boolean ready() throws IOException {
- return in.ready();
- }
-
- @Override
- public void reset() throws IOException {
- in.reset();
- }
-
- @Override
public long skip(long n) throws IOException {
readSomething = true;
return in.skip(n);
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java?rev=1366643&r1=1366642&r2=1366643&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java Sat Jul 28 11:27:51 2012
@@ -202,7 +202,7 @@ public class TestStandardAnalyzer extend
}
public void testUnicodeWordBreaks() throws Exception {
- WordBreakTestUnicode_6_0_0 wordBreakTest = new WordBreakTestUnicode_6_0_0();
+ WordBreakTestUnicode_6_1_0 wordBreakTest = new WordBreakTestUnicode_6_1_0();
wordBreakTest.test(a);
}
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java?rev=1366643&r1=1366642&r2=1366643&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java Sat Jul 28 11:27:51 2012
@@ -424,7 +424,7 @@ public class TestUAX29URLEmailTokenizer
}
public void testUnicodeWordBreaks() throws Exception {
- WordBreakTestUnicode_6_0_0 wordBreakTest = new WordBreakTestUnicode_6_0_0();
+ WordBreakTestUnicode_6_1_0 wordBreakTest = new WordBreakTestUnicode_6_1_0();
wordBreakTest.test(a);
}
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java?rev=1366643&r1=1366642&r2=1366643&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java Sat Jul 28 11:27:51 2012
@@ -23,8 +23,11 @@ import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.KeywordTokenizer;
+import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
+import org.apache.lucene.analysis.util.CharArraySet;
import static org.apache.lucene.analysis.VocabularyAssert.*;
@@ -46,6 +49,19 @@ public class TestGermanLightStemFilter e
assertVocabulary(analyzer, getDataFile("delighttestdata.zip"), "delight.txt");
}
+ public void testKeyword() throws IOException {
+ final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("sängerinnen"), false);
+ Analyzer a = new Analyzer() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ TokenStream sink = new KeywordMarkerFilter(source, exclusionSet);
+ return new TokenStreamComponents(source, new GermanLightStemFilter(sink));
+ }
+ };
+ checkOneTerm(a, "sängerinnen", "sängerinnen");
+ }
+
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random(), analyzer, 1000*RANDOM_MULTIPLIER);
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java?rev=1366643&r1=1366642&r2=1366643&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java Sat Jul 28 11:27:51 2012
@@ -23,8 +23,11 @@ import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.KeywordTokenizer;
+import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
+import org.apache.lucene.analysis.util.CharArraySet;
import static org.apache.lucene.analysis.VocabularyAssert.*;
@@ -53,6 +56,19 @@ public class TestGermanMinimalStemFilter
checkOneTerm(analyzer, "äpfel", "apfel");
}
+ public void testKeyword() throws IOException {
+ final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("sängerinnen"), false);
+ Analyzer a = new Analyzer() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ TokenStream sink = new KeywordMarkerFilter(source, exclusionSet);
+ return new TokenStreamComponents(source, new GermanMinimalStemFilter(sink));
+ }
+ };
+ checkOneTerm(a, "sängerinnen", "sängerinnen");
+ }
+
/** Test against a vocabulary from the reference impl */
public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataFile("deminimaltestdata.zip"), "deminimal.txt");
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java?rev=1366643&r1=1366642&r2=1366643&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java Sat Jul 28 11:27:51 2012
@@ -23,9 +23,13 @@ import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
+import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
+import org.apache.lucene.analysis.util.CharArraySet;
import static org.apache.lucene.analysis.VocabularyAssert.*;
@@ -58,6 +62,19 @@ public class TestGermanStemFilter extend
assertAnalyzesTo(analyzer, "", new String[] { "" });
}
+ public void testKeyword() throws IOException {
+ final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("sängerinnen"), false);
+ Analyzer a = new Analyzer() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ TokenStream sink = new KeywordMarkerFilter(source, exclusionSet);
+ return new TokenStreamComponents(source, new GermanStemFilter(sink));
+ }
+ };
+ checkOneTerm(a, "sängerinnen", "sängerinnen");
+ }
+
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random(), analyzer, 1000*RANDOM_MULTIPLIER);
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilter.java?rev=1366643&r1=1366642&r2=1366643&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilter.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilter.java Sat Jul 28 11:27:51 2012
@@ -23,8 +23,11 @@ import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.KeywordTokenizer;
+import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
+import org.apache.lucene.analysis.util.CharArraySet;
import static org.apache.lucene.analysis.VocabularyAssert.*;
@@ -46,6 +49,19 @@ public class TestFinnishLightStemFilter
assertVocabulary(analyzer, getDataFile("filighttestdata.zip"), "filight.txt");
}
+ public void testKeyword() throws IOException {
+ final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("edeltäjistään"), false);
+ Analyzer a = new Analyzer() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ TokenStream sink = new KeywordMarkerFilter(source, exclusionSet);
+ return new TokenStreamComponents(source, new FinnishLightStemFilter(sink));
+ }
+ };
+ checkOneTerm(a, "edeltäjistään", "edeltäjistään");
+ }
+
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random(), analyzer, 1000*RANDOM_MULTIPLIER);
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java?rev=1366643&r1=1366642&r2=1366643&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java Sat Jul 28 11:27:51 2012
@@ -23,8 +23,11 @@ import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.KeywordTokenizer;
+import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
+import org.apache.lucene.analysis.util.CharArraySet;
import static org.apache.lucene.analysis.VocabularyAssert.*;
@@ -176,6 +179,19 @@ public class TestFrenchLightStemFilter e
assertVocabulary(analyzer, getDataFile("frlighttestdata.zip"), "frlight.txt");
}
+ public void testKeyword() throws IOException {
+ final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("chevaux"), false);
+ Analyzer a = new Analyzer() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ TokenStream sink = new KeywordMarkerFilter(source, exclusionSet);
+ return new TokenStreamComponents(source, new FrenchLightStemFilter(sink));
+ }
+ };
+ checkOneTerm(a, "chevaux", "chevaux");
+ }
+
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random(), analyzer, 1000*RANDOM_MULTIPLIER);
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java?rev=1366643&r1=1366642&r2=1366643&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java Sat Jul 28 11:27:51 2012
@@ -23,8 +23,11 @@ import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.KeywordTokenizer;
+import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
+import org.apache.lucene.analysis.util.CharArraySet;
import static org.apache.lucene.analysis.VocabularyAssert.*;
@@ -55,6 +58,19 @@ public class TestFrenchMinimalStemFilter
checkOneTerm(analyzer, "baron", "baron");
}
+ public void testKeyword() throws IOException {
+ final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("chevaux"), false);
+ Analyzer a = new Analyzer() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ TokenStream sink = new KeywordMarkerFilter(source, exclusionSet);
+ return new TokenStreamComponents(source, new FrenchMinimalStemFilter(sink));
+ }
+ };
+ checkOneTerm(a, "chevaux", "chevaux");
+ }
+
/** Test against a vocabulary from the reference impl */
public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataFile("frminimaltestdata.zip"), "frminimal.txt");
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianMinimalStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianMinimalStemFilter.java?rev=1366643&r1=1366642&r2=1366643&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianMinimalStemFilter.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianMinimalStemFilter.java Sat Jul 28 11:27:51 2012
@@ -23,8 +23,11 @@ import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.KeywordTokenizer;
+import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
+import org.apache.lucene.analysis.util.CharArraySet;
/**
* Simple tests for {@link GalicianMinimalStemmer}
@@ -50,6 +53,19 @@ public class TestGalicianMinimalStemFilt
checkOneTerm(a, "barcelonês", "barcelonês");
}
+ public void testKeyword() throws IOException {
+ final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("elefantes"), false);
+ Analyzer a = new Analyzer() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ TokenStream sink = new KeywordMarkerFilter(source, exclusionSet);
+ return new TokenStreamComponents(source, new GalicianMinimalStemFilter(sink));
+ }
+ };
+ checkOneTerm(a, "elefantes", "elefantes");
+ }
+
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilter.java?rev=1366643&r1=1366642&r2=1366643&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilter.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilter.java Sat Jul 28 11:27:51 2012
@@ -23,8 +23,11 @@ import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.KeywordTokenizer;
+import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
+import org.apache.lucene.analysis.util.CharArraySet;
import static org.apache.lucene.analysis.VocabularyAssert.*;
@@ -46,6 +49,19 @@ public class TestHungarianLightStemFilte
assertVocabulary(analyzer, getDataFile("hulighttestdata.zip"), "hulight.txt");
}
+ public void testKeyword() throws IOException {
+ final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("babakocsi"), false);
+ Analyzer a = new Analyzer() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ TokenStream sink = new KeywordMarkerFilter(source, exclusionSet);
+ return new TokenStreamComponents(source, new HungarianLightStemFilter(sink));
+ }
+ };
+ checkOneTerm(a, "babakocsi", "babakocsi");
+ }
+
public void testEmptyTerm() throws IOException {
Analyzer a = new Analyzer() {
@Override
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellDictionaryTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellDictionaryTest.java?rev=1366643&r1=1366642&r2=1366643&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellDictionaryTest.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellDictionaryTest.java Sat Jul 28 11:27:51 2012
@@ -17,19 +17,98 @@ package org.apache.lucene.analysis.hunsp
* limitations under the License.
*/
-import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util.Version;
-import org.junit.Assert;
-import org.junit.Test;
-
import java.io.IOException;
import java.io.InputStream;
import java.text.ParseException;
import java.util.Arrays;
-import static junit.framework.Assert.assertEquals;
+import org.apache.lucene.util.LuceneTestCase;
+import org.junit.Assert;
+import org.junit.Test;
public class HunspellDictionaryTest extends LuceneTestCase {
+
+ private class CloseCheckInputStream extends InputStream {
+ private InputStream delegate;
+
+ private boolean closed = false;
+
+ public CloseCheckInputStream(InputStream delegate) {
+ super();
+ this.delegate = delegate;
+ }
+
+ public int read() throws IOException {
+ return delegate.read();
+ }
+
+ public int hashCode() {
+ return delegate.hashCode();
+ }
+
+ public int read(byte[] b) throws IOException {
+ return delegate.read(b);
+ }
+
+ public boolean equals(Object obj) {
+ return delegate.equals(obj);
+ }
+
+ public int read(byte[] b, int off, int len) throws IOException {
+ return delegate.read(b, off, len);
+ }
+
+ public long skip(long n) throws IOException {
+ return delegate.skip(n);
+ }
+
+ public String toString() {
+ return delegate.toString();
+ }
+
+ public int available() throws IOException {
+ return delegate.available();
+ }
+
+ public void close() throws IOException {
+ this.closed = true;
+ delegate.close();
+ }
+
+ public void mark(int readlimit) {
+ delegate.mark(readlimit);
+ }
+
+ public void reset() throws IOException {
+ delegate.reset();
+ }
+
+ public boolean markSupported() {
+ return delegate.markSupported();
+ }
+
+ public boolean isClosed() {
+ return this.closed;
+ }
+
+ }
+
+ @Test
+ public void testResourceCleanup() throws IOException, ParseException {
+ CloseCheckInputStream affixStream = new CloseCheckInputStream(getClass().getResourceAsStream("testCompressed.aff"));
+ CloseCheckInputStream dictStream = new CloseCheckInputStream(getClass().getResourceAsStream("testCompressed.dic"));
+
+ new HunspellDictionary(affixStream, dictStream, TEST_VERSION_CURRENT);
+
+ assertFalse(affixStream.isClosed());
+ assertFalse(dictStream.isClosed());
+
+ affixStream.close();
+ dictStream.close();
+
+ assertTrue(affixStream.isClosed());
+ assertTrue(dictStream.isClosed());
+ }
@Test
public void testHunspellDictionary_loadDicAff() throws IOException, ParseException {
@@ -40,7 +119,7 @@ public class HunspellDictionaryTest exte
assertEquals(3, dictionary.lookupSuffix(new char[]{'e'}, 0, 1).size());
assertEquals(1, dictionary.lookupPrefix(new char[]{'s'}, 0, 1).size());
assertEquals(1, dictionary.lookupWord(new char[]{'o', 'l', 'r'}, 0, 3).size());
-
+
affixStream.close();
dictStream.close();
}
@@ -54,7 +133,7 @@ public class HunspellDictionaryTest exte
assertEquals(3, dictionary.lookupSuffix(new char[]{'e'}, 0, 1).size());
assertEquals(1, dictionary.lookupPrefix(new char[]{'s'}, 0, 1).size());
assertEquals(1, dictionary.lookupWord(new char[]{'o', 'l', 'r'}, 0, 3).size());
-
+
affixStream.close();
dictStream.close();
}
@@ -69,7 +148,9 @@ public class HunspellDictionaryTest exte
assertEquals(1, dictionary.lookupPrefix(new char[]{'s'}, 0, 1).size());
assertEquals(1, dictionary.lookupWord(new char[]{'o', 'l', 'r'}, 0, 3).size());
//strict parsing disabled: malformed rule is not loaded
- assertNull(dictionary.lookupPrefix(new char[]{'a'}, 0, 1));
+ assertNull(dictionary.lookupPrefix(new char[]{'a'}, 0, 1));
+ affixStream.close();
+ dictStream.close();
affixStream = getClass().getResourceAsStream("testWrongAffixRule.aff");
dictStream = getClass().getResourceAsStream("test.dic");
@@ -81,7 +162,7 @@ public class HunspellDictionaryTest exte
Assert.assertEquals("The affix file contains a rule with less than five elements", e.getMessage());
Assert.assertEquals(23, e.getErrorOffset());
}
-
+
affixStream.close();
dictStream.close();
}
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellStemFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellStemFilterTest.java?rev=1366643&r1=1366642&r2=1366643&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellStemFilterTest.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellStemFilterTest.java Sat Jul 28 11:27:51 2012
@@ -30,6 +30,7 @@ import org.apache.lucene.analysis.Tokeni
import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
import org.apache.lucene.analysis.util.CharArraySet;
+import org.junit.AfterClass;
import org.junit.BeforeClass;
public class HunspellStemFilterTest extends BaseTokenStreamTestCase {
@@ -39,6 +40,10 @@ public class HunspellStemFilterTest ext
public static void beforeClass() throws IOException, ParseException {
DICTIONARY = createDict(true);
}
+ @AfterClass
+ public static void afterClass() {
+ DICTIONARY = null;
+ }
public static HunspellDictionary createDict(boolean ignoreCase) throws IOException, ParseException {
InputStream affixStream = HunspellStemmerTest.class.getResourceAsStream("test.aff");
InputStream dictStream = HunspellStemmerTest.class.getResourceAsStream("test.dic");
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellStemmerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellStemmerTest.java?rev=1366643&r1=1366642&r2=1366643&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellStemmerTest.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellStemmerTest.java Sat Jul 28 11:27:51 2012
@@ -19,6 +19,7 @@ package org.apache.lucene.analysis.hunsp
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.Version;
+import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
@@ -37,6 +38,11 @@ public class HunspellStemmerTest extends
public static void beforeClass() throws IOException, ParseException {
createStemmer(true);
}
+
+ @AfterClass
+ public static void afterClass() {
+ stemmer = null;
+ }
@Test
public void testStem_simpleSuffix() {
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPerFieldAnalyzerWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPerFieldAnalyzerWrapper.java?rev=1366643&r1=1366642&r2=1366643&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPerFieldAnalyzerWrapper.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPerFieldAnalyzerWrapper.java Sat Jul 28 11:27:51 2012
@@ -68,7 +68,7 @@ public class TestPerFieldAnalyzerWrapper
@Override
protected Reader initReader(String fieldName, Reader reader) {
- return new MockCharFilter(CharReader.get(reader), 7);
+ return new MockCharFilter(reader, 7);
}
};
assertAnalyzesTo(a, "ab",