You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2014/04/01 06:30:37 UTC
svn commit: r1583527 - in /lucene/dev/branches/branch_4x: ./ lucene/
lucene/analysis/
lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/
lucene/analysis/common/src/resources/META-INF/services/
lucene/analysis/common/src/test/org/...
Author: rmuir
Date: Tue Apr 1 04:30:36 2014
New Revision: 1583527
URL: http://svn.apache.org/r1583527
Log:
LUCENE-5558: Add TruncateTokenFilter
Added:
lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TruncateTokenFilter.java
- copied unchanged from r1583525, lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TruncateTokenFilter.java
lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TruncateTokenFilterFactory.java
- copied unchanged from r1583525, lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TruncateTokenFilterFactory.java
lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTruncateTokenFilter.java
- copied, changed from r1583525, lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTruncateTokenFilter.java
lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTruncateTokenFilterFactory.java
- copied, changed from r1583525, lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTruncateTokenFilterFactory.java
Modified:
lucene/dev/branches/branch_4x/ (props changed)
lucene/dev/branches/branch_4x/lucene/ (props changed)
lucene/dev/branches/branch_4x/lucene/CHANGES.txt
lucene/dev/branches/branch_4x/lucene/analysis/ (props changed)
lucene/dev/branches/branch_4x/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory
Modified: lucene/dev/branches/branch_4x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/CHANGES.txt?rev=1583527&r1=1583526&r2=1583527&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_4x/lucene/CHANGES.txt Tue Apr 1 04:30:36 2014
@@ -82,6 +82,9 @@ New Features
resort the hits from a first pass search using a Sort or an
Expression. (Simon Willnauer, Robert Muir, Mike McCandless)
+* LUCENE-5558: Add TruncateTokenFilter which truncates terms to
+ the specified length. (Ahmet Arslan via Robert Muir)
+
API Changes
* LUCENE-5454: Add RandomAccessOrds, an optional extension of SortedSetDocValues
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory?rev=1583527&r1=1583526&r2=1583527&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory Tue Apr 1 04:30:36 2014
@@ -70,6 +70,7 @@ org.apache.lucene.analysis.miscellaneous
org.apache.lucene.analysis.miscellaneous.RemoveDuplicatesTokenFilterFactory
org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilterFactory
org.apache.lucene.analysis.miscellaneous.TrimFilterFactory
+org.apache.lucene.analysis.miscellaneous.TruncateTokenFilterFactory
org.apache.lucene.analysis.miscellaneous.WordDelimiterFilterFactory
org.apache.lucene.analysis.miscellaneous.ScandinavianFoldingFilterFactory
org.apache.lucene.analysis.miscellaneous.ScandinavianNormalizationFilterFactory
Copied: lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTruncateTokenFilter.java (from r1583525, lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTruncateTokenFilter.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTruncateTokenFilter.java?p2=lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTruncateTokenFilter.java&p1=lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTruncateTokenFilter.java&r1=1583525&r2=1583527&rev=1583527&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTruncateTokenFilter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTruncateTokenFilter.java Tue Apr 1 04:30:36 2014
@@ -17,7 +17,10 @@ package org.apache.lucene.analysis.misce
* limitations under the License.
*/
+import java.io.StringReader;
+
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.junit.Test;
@@ -27,13 +30,13 @@ import org.junit.Test;
public class TestTruncateTokenFilter extends BaseTokenStreamTestCase {
public void testTruncating() throws Exception {
- TokenStream stream = whitespaceMockTokenizer("abcdefg 1234567 ABCDEFG abcde abc 12345 123");
+ TokenStream stream = new MockTokenizer(new StringReader("abcdefg 1234567 ABCDEFG abcde abc 12345 123"), MockTokenizer.WHITESPACE, false);
stream = new TruncateTokenFilter(stream, 5);
assertTokenStreamContents(stream, new String[]{"abcde", "12345", "ABCDE", "abcde", "abc", "12345", "123"});
}
@Test(expected = IllegalArgumentException.class)
public void testNonPositiveLength() throws Exception {
- new TruncateTokenFilter(whitespaceMockTokenizer("length must be a positive number"), -48);
+ new TruncateTokenFilter(new MockTokenizer(new StringReader("length must be a positive number")), -48);
}
}
Copied: lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTruncateTokenFilterFactory.java (from r1583525, lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTruncateTokenFilterFactory.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTruncateTokenFilterFactory.java?p2=lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTruncateTokenFilterFactory.java&p1=lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTruncateTokenFilterFactory.java&r1=1583525&r2=1583527&rev=1583527&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTruncateTokenFilterFactory.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTruncateTokenFilterFactory.java Tue Apr 1 04:30:36 2014
@@ -34,8 +34,7 @@ public class TestTruncateTokenFilterFact
*/
public void testTruncating() throws Exception {
Reader reader = new StringReader("abcdefg 1234567 ABCDEFG abcde abc 12345 123");
- TokenStream stream = new MockTokenizer(MockTokenizer.WHITESPACE, false);
- ((Tokenizer) stream).setReader(reader);
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
stream = tokenFilterFactory("Truncate",
TruncateTokenFilterFactory.PREFIX_LENGTH_KEY, "5").create(stream);
assertTokenStreamContents(stream, new String[]{"abcde", "12345", "ABCDE", "abcde", "abc", "12345", "123"});