You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2010/05/04 19:07:29 UTC
svn commit: r940962 - in /lucene/dev/trunk: lucene/contrib/
lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/
lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/
solr/src/java/org/apache/s...
Author: rmuir
Date: Tue May 4 17:07:28 2010
New Revision: 940962
URL: http://svn.apache.org/viewvc?rev=940962&view=rev
Log:
LUCENE-2413: Consolidate KeepWords,HyphenatedWords,Trim filters to contrib/analyzers
Added:
lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/HyphenatedWordsFilter.java
- copied, changed from r940912, lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/HyphenatedWordsFilter.java
lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilter.java
- copied, changed from r940912, lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/KeepWordFilter.java
lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilter.java
- copied, changed from r940912, lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/TrimFilter.java
lucene/dev/trunk/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestHyphenatedWordsFilter.java
- copied, changed from r940912, lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestHyphenatedWordsFilter.java
lucene/dev/trunk/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepWordFilter.java
- copied, changed from r940912, lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestKeepWordFilter.java
lucene/dev/trunk/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java
- copied, changed from r940912, lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestTrimFilter.java
lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestTrimFilterFactory.java
- copied, changed from r940912, lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestTrimFilter.java
Removed:
lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/HyphenatedWordsFilter.java
lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/KeepWordFilter.java
lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/TrimFilter.java
lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestHyphenatedWordsFilter.java
lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestKeepWordFilter.java
lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestTrimFilter.java
Modified:
lucene/dev/trunk/lucene/contrib/CHANGES.txt
lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/HyphenatedWordsFilterFactory.java
lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/KeepWordFilterFactory.java
lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/TrimFilterFactory.java
Modified: lucene/dev/trunk/lucene/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/CHANGES.txt?rev=940962&r1=940961&r2=940962&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/contrib/CHANGES.txt Tue May 4 17:07:28 2010
@@ -57,6 +57,12 @@ New features
into subwords and performs optional transformations on subword groups.
- o.a.l.analysis.miscellaneous.RemoveDuplicatesTokenFilter: TokenFilter which
filters out Tokens at the same position and Term text as the previous token.
+ - o.a.l.analysis.miscellaneous.TrimFilter: Trims leading and trailing whitespace
+ from Tokens in the stream.
+ - o.a.l.analysis.miscellaneous.KeepWordFilter: A TokenFilter that only keeps tokens
+ with text contained in the required words (inverse of StopFilter).
+ - o.a.l.analysis.miscellaneous.HyphenatedWordsFilter: A TokenFilter that puts
+ hyphenated words broken into two lines back together.
- o.a.l.analysis.pattern: Package for pattern-based analysis, containing a
CharFilter, Tokenizer, and Tokenfilter for transforming text with regexes.
(... in progress)
Copied: lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/HyphenatedWordsFilter.java (from r940912, lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/HyphenatedWordsFilter.java)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/HyphenatedWordsFilter.java?p2=lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/HyphenatedWordsFilter.java&p1=lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/HyphenatedWordsFilter.java&r1=940912&r2=940962&rev=940962&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/HyphenatedWordsFilter.java (original)
+++ lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/HyphenatedWordsFilter.java Tue May 4 17:07:28 2010
@@ -1,4 +1,4 @@
-package org.apache.solr.analysis;
+package org.apache.lucene.analysis.miscellaneous;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
Copied: lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilter.java (from r940912, lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/KeepWordFilter.java)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilter.java?p2=lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilter.java&p1=lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/KeepWordFilter.java&r1=940912&r2=940962&rev=940962&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/KeepWordFilter.java (original)
+++ lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilter.java Tue May 4 17:07:28 2010
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.solr.analysis;
+package org.apache.lucene.analysis.miscellaneous;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
@@ -29,14 +29,13 @@ import java.util.Set;
* A TokenFilter that only keeps tokens with text contained in the
* required words. This filter behaves like the inverse of StopFilter.
*
- * @version $Id$
* @since solr 1.3
*/
public final class KeepWordFilter extends TokenFilter {
private final CharArraySet words;
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
- /** @deprecated Use {@link #KeepWordFilter(TokenStream, Set, boolean)} instead */
+ /** @deprecated Use {@link #KeepWordFilter(TokenStream, CharArraySet)} instead */
@Deprecated
public KeepWordFilter(TokenStream in, Set<String> words, boolean ignoreCase ) {
this(in, new CharArraySet(words, ignoreCase));
Copied: lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilter.java (from r940912, lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/TrimFilter.java)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilter.java?p2=lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilter.java&p1=lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/TrimFilter.java&r1=940912&r2=940962&rev=940962&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/TrimFilter.java (original)
+++ lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilter.java Tue May 4 17:07:28 2010
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.solr.analysis;
+package org.apache.lucene.analysis.miscellaneous;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
@@ -26,8 +26,6 @@ import java.io.IOException;
/**
* Trims leading and trailing whitespace from Tokens in the stream.
- *
- * @version $Id:$
*/
public final class TrimFilter extends TokenFilter {
Copied: lucene/dev/trunk/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestHyphenatedWordsFilter.java (from r940912, lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestHyphenatedWordsFilter.java)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestHyphenatedWordsFilter.java?p2=lucene/dev/trunk/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestHyphenatedWordsFilter.java&p1=lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestHyphenatedWordsFilter.java&r1=940912&r2=940962&rev=940962&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestHyphenatedWordsFilter.java (original)
+++ lucene/dev/trunk/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestHyphenatedWordsFilter.java Tue May 4 17:07:28 2010
@@ -15,23 +15,23 @@
* limitations under the License.
*/
-package org.apache.solr.analysis;
+package org.apache.lucene.analysis.miscellaneous;
import java.io.StringReader;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.WhitespaceTokenizer;
/**
* HyphenatedWordsFilter test
*/
-public class TestHyphenatedWordsFilter extends BaseTokenTestCase {
+public class TestHyphenatedWordsFilter extends BaseTokenStreamTestCase {
public void testHyphenatedWords() throws Exception {
String input = "ecologi-\r\ncal devel-\r\n\r\nop compre-\u0009hensive-hands-on and ecologi-\ncal";
// first test
- TokenStream ts = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input));
- HyphenatedWordsFilterFactory factory = new HyphenatedWordsFilterFactory();
- ts = factory.create(ts);
+ TokenStream ts = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
+ ts = new HyphenatedWordsFilter(ts);
assertTokenStreamContents(ts,
new String[] { "ecological", "develop", "comprehensive-hands-on", "and", "ecological" });
}
@@ -42,9 +42,8 @@ public class TestHyphenatedWordsFilter e
public void testHyphenAtEnd() throws Exception {
String input = "ecologi-\r\ncal devel-\r\n\r\nop compre-\u0009hensive-hands-on and ecology-";
// first test
- TokenStream ts = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input));
- HyphenatedWordsFilterFactory factory = new HyphenatedWordsFilterFactory();
- ts = factory.create(ts);
+ TokenStream ts = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
+ ts = new HyphenatedWordsFilter(ts);
assertTokenStreamContents(ts,
new String[] { "ecological", "develop", "comprehensive-hands-on", "and", "ecology-" });
}
Copied: lucene/dev/trunk/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepWordFilter.java (from r940912, lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestKeepWordFilter.java)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepWordFilter.java?p2=lucene/dev/trunk/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepWordFilter.java&p1=lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestKeepWordFilter.java&r1=940912&r2=940962&rev=940962&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestKeepWordFilter.java (original)
+++ lucene/dev/trunk/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepWordFilter.java Tue May 4 17:07:28 2010
@@ -15,24 +15,18 @@
* limitations under the License.
*/
-package org.apache.solr.analysis;
+package org.apache.lucene.analysis.miscellaneous;
import java.io.StringReader;
-import java.util.HashMap;
import java.util.HashSet;
-import java.util.Map;
import java.util.Set;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.WhitespaceTokenizer;
-import org.apache.solr.common.ResourceLoader;
-import org.apache.solr.core.SolrResourceLoader;
-
-/**
- * @version $Id$
- */
-public class TestKeepWordFilter extends BaseTokenTestCase {
+/** Test {@link KeepWordFilter} */
+public class TestKeepWordFilter extends BaseTokenStreamTestCase {
public void testStopAndGo() throws Exception
{
@@ -41,39 +35,15 @@ public class TestKeepWordFilter extends
words.add( "bbb" );
String input = "aaa BBB ccc ddd EEE";
- Map<String,String> args = new HashMap<String, String>(DEFAULT_VERSION_PARAM);
- ResourceLoader loader = new SolrResourceLoader(null, null);
// Test Stopwords
- KeepWordFilterFactory factory = new KeepWordFilterFactory();
- args.put( "ignoreCase", "true" );
- factory.init( args );
- factory.inform( loader );
- factory.setWords( words );
- assertTrue(factory.isIgnoreCase());
- TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input)));
+ TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
+ stream = new KeepWordFilter(stream, words, true);
assertTokenStreamContents(stream, new String[] { "aaa", "BBB" });
-
- // Test Stopwords (ignoreCase via the setter instead)
- factory = new KeepWordFilterFactory();
- args = new HashMap<String, String>(DEFAULT_VERSION_PARAM);
- factory.init( args );
- factory.inform( loader );
- factory.setIgnoreCase(true);
- factory.setWords( words );
- assertTrue(factory.isIgnoreCase());
- stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input)));
- assertTokenStreamContents(stream, new String[] { "aaa", "BBB" });
-
+
// Now force case
- factory = new KeepWordFilterFactory();
- args = new HashMap<String, String>(DEFAULT_VERSION_PARAM);
- args.put( "ignoreCase", "false" );
- factory.init( args );
- factory.inform( loader );
- factory.setWords( words );
- assertFalse(factory.isIgnoreCase());
- stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input)));
+ stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
+ stream = new KeepWordFilter(stream, words, false);
assertTokenStreamContents(stream, new String[] { "aaa" });
}
}
Copied: lucene/dev/trunk/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java (from r940912, lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestTrimFilter.java)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java?p2=lucene/dev/trunk/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java&p1=lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestTrimFilter.java&r1=940912&r2=940962&rev=940962&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestTrimFilter.java (original)
+++ lucene/dev/trunk/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java Tue May 4 17:07:28 2010
@@ -15,13 +15,12 @@
* limitations under the License.
*/
-package org.apache.solr.analysis;
+package org.apache.lucene.analysis.miscellaneous;
import java.io.IOException;
import java.util.Collection;
-import java.util.HashMap;
-import java.util.Map;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
@@ -34,7 +33,7 @@ import org.apache.lucene.analysis.tokena
/**
* @version $Id:$
*/
-public class TestTrimFilter extends BaseTokenTestCase {
+public class TestTrimFilter extends BaseTokenStreamTestCase {
public void testTrim() throws Exception {
char[] a = " a ".toCharArray();
@@ -42,15 +41,13 @@ public class TestTrimFilter extends Base
char[] ccc = "cCc".toCharArray();
char[] whitespace = " ".toCharArray();
char[] empty = "".toCharArray();
- TrimFilterFactory factory = new TrimFilterFactory();
- Map<String,String> args = new HashMap<String,String>();
- args.put("updateOffsets", "false");
- factory.init(args);
- TokenStream ts = factory.create(new IterTokenStream(new Token(a, 0, a.length, 1, 5),
+
+ TokenStream ts = new IterTokenStream(new Token(a, 0, a.length, 1, 5),
new Token(b, 0, b.length, 6, 10),
new Token(ccc, 0, ccc.length, 11, 15),
new Token(whitespace, 0, whitespace.length, 16, 20),
- new Token(empty, 0, empty.length, 21, 21)));
+ new Token(empty, 0, empty.length, 21, 21));
+ ts = new TrimFilter(ts, false);
assertTokenStreamContents(ts, new String[] { "a", "b", "cCc", "", ""});
@@ -58,15 +55,12 @@ public class TestTrimFilter extends Base
b = "b ".toCharArray();
ccc = " c ".toCharArray();
whitespace = " ".toCharArray();
- factory = new TrimFilterFactory();
- args = new HashMap<String,String>();
- args.put("updateOffsets", "true");
- factory.init(args);
- ts = factory.create(new IterTokenStream(
+ ts = new IterTokenStream(
new Token(a, 0, a.length, 0, 2),
new Token(b, 0, b.length, 0, 2),
new Token(ccc, 0, ccc.length, 0, 3),
- new Token(whitespace, 0, whitespace.length, 0, 3)));
+ new Token(whitespace, 0, whitespace.length, 0, 3));
+ ts = new TrimFilter(ts, true);
assertTokenStreamContents(ts,
new String[] { "a", "b", "c", "" },
Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/HyphenatedWordsFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/HyphenatedWordsFilterFactory.java?rev=940962&r1=940961&r2=940962&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/HyphenatedWordsFilterFactory.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/HyphenatedWordsFilterFactory.java Tue May 4 17:07:28 2010
@@ -18,6 +18,7 @@ package org.apache.solr.analysis;
*/
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.miscellaneous.HyphenatedWordsFilter;
import org.apache.solr.analysis.BaseTokenFilterFactory;
/**
Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/KeepWordFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/KeepWordFilterFactory.java?rev=940962&r1=940961&r2=940962&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/KeepWordFilterFactory.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/KeepWordFilterFactory.java Tue May 4 17:07:28 2010
@@ -21,6 +21,7 @@ import org.apache.solr.common.ResourceLo
import org.apache.solr.util.plugin.ResourceLoaderAware;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.miscellaneous.KeepWordFilter;
import java.util.Set;
import java.io.IOException;
Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/TrimFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/TrimFilterFactory.java?rev=940962&r1=940961&r2=940962&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/TrimFilterFactory.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/TrimFilterFactory.java Tue May 4 17:07:28 2010
@@ -20,6 +20,7 @@ package org.apache.solr.analysis;
import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.miscellaneous.TrimFilter;
import org.apache.solr.common.SolrException;
/**
Copied: lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestTrimFilterFactory.java (from r940912, lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestTrimFilter.java)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestTrimFilterFactory.java?p2=lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestTrimFilterFactory.java&p1=lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestTrimFilter.java&r1=940912&r2=940962&rev=940962&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestTrimFilter.java (original)
+++ lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestTrimFilterFactory.java Tue May 4 17:07:28 2010
@@ -18,10 +18,12 @@
package org.apache.solr.analysis;
import java.io.IOException;
+import java.io.StringReader;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
+import org.apache.lucene.analysis.KeywordTokenizer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
@@ -32,85 +34,15 @@ import org.apache.lucene.analysis.tokena
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
/**
- * @version $Id:$
+ * Simple tests to ensure this factory is working
*/
-public class TestTrimFilter extends BaseTokenTestCase {
-
- public void testTrim() throws Exception {
- char[] a = " a ".toCharArray();
- char[] b = "b ".toCharArray();
- char[] ccc = "cCc".toCharArray();
- char[] whitespace = " ".toCharArray();
- char[] empty = "".toCharArray();
+public class TestTrimFilterFactory extends BaseTokenTestCase {
+ public void testTrimming() throws Exception {
TrimFilterFactory factory = new TrimFilterFactory();
Map<String,String> args = new HashMap<String,String>();
args.put("updateOffsets", "false");
factory.init(args);
- TokenStream ts = factory.create(new IterTokenStream(new Token(a, 0, a.length, 1, 5),
- new Token(b, 0, b.length, 6, 10),
- new Token(ccc, 0, ccc.length, 11, 15),
- new Token(whitespace, 0, whitespace.length, 16, 20),
- new Token(empty, 0, empty.length, 21, 21)));
-
- assertTokenStreamContents(ts, new String[] { "a", "b", "cCc", "", ""});
-
- a = " a".toCharArray();
- b = "b ".toCharArray();
- ccc = " c ".toCharArray();
- whitespace = " ".toCharArray();
- factory = new TrimFilterFactory();
- args = new HashMap<String,String>();
- args.put("updateOffsets", "true");
- factory.init(args);
- ts = factory.create(new IterTokenStream(
- new Token(a, 0, a.length, 0, 2),
- new Token(b, 0, b.length, 0, 2),
- new Token(ccc, 0, ccc.length, 0, 3),
- new Token(whitespace, 0, whitespace.length, 0, 3)));
-
- assertTokenStreamContents(ts,
- new String[] { "a", "b", "c", "" },
- new int[] { 1, 0, 1, 3 },
- new int[] { 2, 1, 2, 3 },
- new int[] { 1, 1, 1, 1 });
- }
-
- /**
- * @deprecated does not support custom attributes
- */
- private static class IterTokenStream extends TokenStream {
- final Token tokens[];
- int index = 0;
- CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
- OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
- PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
- FlagsAttribute flagsAtt = addAttribute(FlagsAttribute.class);
- TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
- PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class);
-
- public IterTokenStream(Token... tokens) {
- super();
- this.tokens = tokens;
- }
-
- public IterTokenStream(Collection<Token> tokens) {
- this(tokens.toArray(new Token[tokens.size()]));
- }
-
- public boolean incrementToken() throws IOException {
- if (index >= tokens.length)
- return false;
- else {
- clearAttributes();
- Token token = tokens[index++];
- termAtt.setEmpty().append(token.term());
- offsetAtt.setOffset(token.startOffset(), token.endOffset());
- posIncAtt.setPositionIncrement(token.getPositionIncrement());
- flagsAtt.setFlags(token.getFlags());
- typeAtt.setType(token.type());
- payloadAtt.setPayload(token.getPayload());
- return true;
- }
- }
+ TokenStream ts = factory.create(new KeywordTokenizer(new StringReader("trim me ")));
+ assertTokenStreamContents(ts, new String[] { "trim me" });
}
}