You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2010/05/04 11:55:43 UTC
svn commit: r940788 - in /lucene/dev/trunk: lucene/contrib/
lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/
lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/
solr/src/java/org/apache/s...
Author: rmuir
Date: Tue May 4 09:55:43 2010
New Revision: 940788
URL: http://svn.apache.org/viewvc?rev=940788&view=rev
Log:
LUCENE-2413: consolidate RemoveDuplicatesTokenFilter to contrib/analyzers
Added:
lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.java
- copied, changed from r940782, lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/RemoveDuplicatesTokenFilter.java
lucene/dev/trunk/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestRemoveDuplicatesTokenFilter.java
- copied, changed from r940782, lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestRemoveDuplicatesTokenFilter.java
lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestRemoveDuplicatesTokenFilterFactory.java
- copied, changed from r940782, lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestRemoveDuplicatesTokenFilter.java
Removed:
lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/RemoveDuplicatesTokenFilter.java
lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestRemoveDuplicatesTokenFilter.java
Modified:
lucene/dev/trunk/lucene/contrib/CHANGES.txt
lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/RemoveDuplicatesTokenFilterFactory.java
Modified: lucene/dev/trunk/lucene/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/CHANGES.txt?rev=940788&r1=940787&r2=940788&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/contrib/CHANGES.txt Tue May 4 09:55:43 2010
@@ -163,6 +163,8 @@ New features
constructs.
- o.a.l.analysis.miscellaneous.WordDelimiterFilter: TokenFilter that splits words
into subwords and performs optional transformations on subword groups.
+ - o.a.l.analysis.miscellaneous.RemoveDuplicatesTokenFilter: TokenFilter which
+ filters out Tokens at the same position and Term text as the previous token.
(... in progress)
Build
Copied: lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.java (from r940782, lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/RemoveDuplicatesTokenFilter.java)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.java?p2=lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.java&p1=lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/RemoveDuplicatesTokenFilter.java&r1=940782&r2=940788&rev=940788&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/RemoveDuplicatesTokenFilter.java (original)
+++ lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.java Tue May 4 09:55:43 2010
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.solr.analysis;
+package org.apache.lucene.analysis.miscellaneous;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.TokenFilter;
Copied: lucene/dev/trunk/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestRemoveDuplicatesTokenFilter.java (from r940782, lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestRemoveDuplicatesTokenFilter.java)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestRemoveDuplicatesTokenFilter.java?p2=lucene/dev/trunk/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestRemoveDuplicatesTokenFilter.java&p1=lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestRemoveDuplicatesTokenFilter.java&r1=940782&r2=940788&rev=940788&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestRemoveDuplicatesTokenFilter.java (original)
+++ lucene/dev/trunk/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestRemoveDuplicatesTokenFilter.java Tue May 4 09:55:43 2010
@@ -15,8 +15,9 @@
* limitations under the License.
*/
-package org.apache.solr.analysis;
+package org.apache.lucene.analysis.miscellaneous;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
@@ -26,7 +27,7 @@ import org.apache.lucene.analysis.tokena
import java.util.Iterator;
import java.util.Arrays;
-public class TestRemoveDuplicatesTokenFilter extends BaseTokenTestCase {
+public class TestRemoveDuplicatesTokenFilter extends BaseTokenStreamTestCase {
public static Token tok(int pos, String t, int start, int end) {
Token tok = new Token(t,start,end);
@@ -41,8 +42,7 @@ public class TestRemoveDuplicatesTokenFi
throws Exception {
final Iterator<Token> toks = Arrays.asList(tokens).iterator();
- RemoveDuplicatesTokenFilterFactory factory = new RemoveDuplicatesTokenFilterFactory();
- final TokenStream ts = factory.create
+ final TokenStream ts = new RemoveDuplicatesTokenFilter(
(new TokenStream() {
CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
@@ -59,7 +59,7 @@ public class TestRemoveDuplicatesTokenFi
return false;
}
}
- });
+ }));
assertTokenStreamContents(ts, expected.split("\\s"));
}
Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/RemoveDuplicatesTokenFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/RemoveDuplicatesTokenFilterFactory.java?rev=940788&r1=940787&r2=940788&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/RemoveDuplicatesTokenFilterFactory.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/RemoveDuplicatesTokenFilterFactory.java Tue May 4 09:55:43 2010
@@ -18,6 +18,7 @@
package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.miscellaneous.RemoveDuplicatesTokenFilter;
/**
* @version $Id:$
Copied: lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestRemoveDuplicatesTokenFilterFactory.java (from r940782, lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestRemoveDuplicatesTokenFilter.java)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestRemoveDuplicatesTokenFilterFactory.java?p2=lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestRemoveDuplicatesTokenFilterFactory.java&p1=lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestRemoveDuplicatesTokenFilter.java&r1=940782&r2=940788&rev=940788&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestRemoveDuplicatesTokenFilter.java (original)
+++ lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestRemoveDuplicatesTokenFilterFactory.java Tue May 4 09:55:43 2010
@@ -26,7 +26,8 @@ import org.apache.lucene.analysis.tokena
import java.util.Iterator;
import java.util.Arrays;
-public class TestRemoveDuplicatesTokenFilter extends BaseTokenTestCase {
+/** Simple tests to ensure this factory is working */
+public class TestRemoveDuplicatesTokenFilterFactory extends BaseTokenTestCase {
public static Token tok(int pos, String t, int start, int end) {
Token tok = new Token(t,start,end);
@@ -63,23 +64,8 @@ public class TestRemoveDuplicatesTokenFi
assertTokenStreamContents(ts, expected.split("\\s"));
}
-
- public void testNoDups() throws Exception {
-
- testDups("A B B C D E"
- ,tok(1,"A", 0, 4)
- ,tok(1,"B", 5, 10)
- ,tok(1,"B",11, 15)
- ,tok(1,"C",16, 20)
- ,tok(0,"D",16, 20)
- ,tok(1,"E",21, 25)
- );
-
- }
-
-
+
public void testSimpleDups() throws Exception {
-
testDups("A B C D E"
,tok(1,"A", 0, 4)
,tok(1,"B", 5, 10)
@@ -87,34 +73,6 @@ public class TestRemoveDuplicatesTokenFi
,tok(1,"C",16, 20)
,tok(0,"D",16, 20)
,tok(1,"E",21, 25)
- );
-
+ );
}
-
- public void testComplexDups() throws Exception {
-
- testDups("A B C D E F G H I J K"
- ,tok(1,"A")
- ,tok(1,"B")
- ,tok(0,"B")
- ,tok(1,"C")
- ,tok(1,"D")
- ,tok(0,"D")
- ,tok(0,"D")
- ,tok(1,"E")
- ,tok(1,"F")
- ,tok(0,"F")
- ,tok(1,"G")
- ,tok(0,"H")
- ,tok(0,"H")
- ,tok(1,"I")
- ,tok(1,"J")
- ,tok(0,"K")
- ,tok(0,"J")
- );
-
- }
-
-
-
}