You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-commits@lucene.apache.org by gs...@apache.org on 2009/09/08 15:07:08 UTC
svn commit: r812494 - in /lucene/solr/trunk: CHANGES.txt
src/java/org/apache/solr/analysis/TrimFilter.java
src/test/org/apache/solr/analysis/TestTrimFilter.java
Author: gsingers
Date: Tue Sep 8 13:07:08 2009
New Revision: 812494
URL: http://svn.apache.org/viewvc?rev=812494&view=rev
Log:
SOLR-1400: handle zero length term buffer in TrimFilter
Modified:
lucene/solr/trunk/CHANGES.txt
lucene/solr/trunk/src/java/org/apache/solr/analysis/TrimFilter.java
lucene/solr/trunk/src/test/org/apache/solr/analysis/TestTrimFilter.java
Modified: lucene/solr/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/CHANGES.txt?rev=812494&r1=812493&r2=812494&view=diff
==============================================================================
--- lucene/solr/trunk/CHANGES.txt (original)
+++ lucene/solr/trunk/CHANGES.txt Tue Sep 8 13:07:08 2009
@@ -532,6 +532,8 @@
63. SOLR-1398: Add offset corrections in PatternTokenizerFactory.
(Anders Melchiorsen, koji)
+64. SOLR-1400: Properly handle zero-length tokens in TrimFilter (Peter Wolanin, gsingers)
+
Other Changes
----------------------
1. Upgraded to Lucene 2.4.0 (yonik)
Modified: lucene/solr/trunk/src/java/org/apache/solr/analysis/TrimFilter.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/analysis/TrimFilter.java?rev=812494&r1=812493&r2=812494&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/analysis/TrimFilter.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/analysis/TrimFilter.java Tue Sep 8 13:07:08 2009
@@ -51,6 +51,11 @@
char[] termBuffer = termAtt.termBuffer();
int len = termAtt.termLength();
+ //TODO: Is this the right behavior or should we return false? Currently, " ", returns true, so I think this should
+ //also return true
+ if (len == 0){
+ return true;
+ }
int start = 0;
int end = 0;
int endOff = 0;
Modified: lucene/solr/trunk/src/test/org/apache/solr/analysis/TestTrimFilter.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/test/org/apache/solr/analysis/TestTrimFilter.java?rev=812494&r1=812493&r2=812494&view=diff
==============================================================================
--- lucene/solr/trunk/src/test/org/apache/solr/analysis/TestTrimFilter.java (original)
+++ lucene/solr/trunk/src/test/org/apache/solr/analysis/TestTrimFilter.java Tue Sep 8 13:07:08 2009
@@ -17,50 +17,64 @@
package org.apache.solr.analysis;
-import java.util.List;
-
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+
+import java.util.List;
/**
* @version $Id:$
*/
public class TestTrimFilter extends BaseTokenTestCase {
-
+
public void testTrim() throws Exception {
+ char[] a = " a ".toCharArray();
+ char[] b = "b ".toCharArray();
+ char[] ccc = "cCc".toCharArray();
+ char[] whitespace = " ".toCharArray();
+ char[] empty = "".toCharArray();
TokenStream ts = new TrimFilter
- (new IterTokenStream(new Token(" a ", 1, 5),
- new Token("b ",6,10),
- new Token("cCc",11,15),
- new Token(" ",16,20)), false );
-
- Token token = ts.next();
+ (new IterTokenStream(new Token(a, 0, a.length, 1, 5),
+ new Token(b, 0, b.length, 6, 10),
+ new Token(ccc, 0, ccc.length, 11, 15),
+ new Token(whitespace, 0, whitespace.length, 16, 20),
+ new Token(empty, 0, empty.length, 21, 21)), false);
+
+ TermAttribute token;
+ assertTrue(ts.incrementToken());
+ token = (TermAttribute) ts.getAttribute(TermAttribute.class);
assertEquals("a", new String(token.termBuffer(), 0, token.termLength()));
- token = ts.next();
+ assertTrue(ts.incrementToken());
assertEquals("b", new String(token.termBuffer(), 0, token.termLength()));
- token = ts.next();
+ assertTrue(ts.incrementToken());
assertEquals("cCc", new String(token.termBuffer(), 0, token.termLength()));
- token = ts.next();
+ assertTrue(ts.incrementToken());
assertEquals("", new String(token.termBuffer(), 0, token.termLength()));
- token = ts.next();
- assertNull(token);
-
- ts = new TrimFilter( new IterTokenStream(
- new Token(" a", 0,2),
- new Token("b ", 0,2),
- new Token(" c ",0,3),
- new Token(" ",0,3)), true );
+ assertTrue(ts.incrementToken());
+ assertEquals("", new String(token.termBuffer(), 0, token.termLength()));
+ assertFalse(ts.incrementToken());
+
+ a = " a".toCharArray();
+ b = "b ".toCharArray();
+ ccc = " c ".toCharArray();
+ whitespace = " ".toCharArray();
+ ts = new TrimFilter(new IterTokenStream(
+ new Token(a, 0, a.length, 0, 2),
+ new Token(b, 0, b.length, 0, 2),
+ new Token(ccc, 0, ccc.length, 0, 3),
+ new Token(whitespace, 0, whitespace.length, 0, 3)), true);
- List<Token> expect = tokens( "a,1,1,2 b,1,0,1 c,1,1,2 ,1,3,3" );
+ List<Token> expect = tokens("a,1,1,2 b,1,0,1 c,1,1,2 ,1,3,3");
List<Token> real = getTokens(ts);
- for( Token t : expect ) {
- System.out.println( "TEST:" + t );
+ for (Token t : expect) {
+ System.out.println("TEST:" + t);
}
- for( Token t : real ) {
- System.out.println( "REAL:" + t );
+ for (Token t : real) {
+ System.out.println("REAL:" + t);
}
- assertTokEqualOff( expect, real );
+ assertTokEqualOff(expect, real);
}
}