You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-commits@lucene.apache.org by gs...@apache.org on 2009/09/08 15:07:08 UTC

svn commit: r812494 - in /lucene/solr/trunk: CHANGES.txt src/java/org/apache/solr/analysis/TrimFilter.java src/test/org/apache/solr/analysis/TestTrimFilter.java

Author: gsingers
Date: Tue Sep  8 13:07:08 2009
New Revision: 812494

URL: http://svn.apache.org/viewvc?rev=812494&view=rev
Log:
SOLR-1400: handle zero length term buffer in TrimFilter

Modified:
    lucene/solr/trunk/CHANGES.txt
    lucene/solr/trunk/src/java/org/apache/solr/analysis/TrimFilter.java
    lucene/solr/trunk/src/test/org/apache/solr/analysis/TestTrimFilter.java

Modified: lucene/solr/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/CHANGES.txt?rev=812494&r1=812493&r2=812494&view=diff
==============================================================================
--- lucene/solr/trunk/CHANGES.txt (original)
+++ lucene/solr/trunk/CHANGES.txt Tue Sep  8 13:07:08 2009
@@ -532,6 +532,8 @@
 63. SOLR-1398: Add offset corrections in PatternTokenizerFactory.
     (Anders Melchiorsen, koji)
 
+64. SOLR-1400: Properly handle zero-length tokens in TrimFilter (Peter Wolanin, gsingers)
+
 Other Changes
 ----------------------
  1. Upgraded to Lucene 2.4.0 (yonik)

Modified: lucene/solr/trunk/src/java/org/apache/solr/analysis/TrimFilter.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/analysis/TrimFilter.java?rev=812494&r1=812493&r2=812494&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/analysis/TrimFilter.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/analysis/TrimFilter.java Tue Sep  8 13:07:08 2009
@@ -51,6 +51,11 @@
 
     char[] termBuffer = termAtt.termBuffer();
     int len = termAtt.termLength();
+    //TODO: Is this the right behavior or should we return false?  Currently, "  ", returns true, so I think this should
+    //also return true
+    if (len == 0){
+      return true;
+    }
     int start = 0;
     int end = 0;
     int endOff = 0;

Modified: lucene/solr/trunk/src/test/org/apache/solr/analysis/TestTrimFilter.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/test/org/apache/solr/analysis/TestTrimFilter.java?rev=812494&r1=812493&r2=812494&view=diff
==============================================================================
--- lucene/solr/trunk/src/test/org/apache/solr/analysis/TestTrimFilter.java (original)
+++ lucene/solr/trunk/src/test/org/apache/solr/analysis/TestTrimFilter.java Tue Sep  8 13:07:08 2009
@@ -17,50 +17,64 @@
 
 package org.apache.solr.analysis;
 
-import java.util.List;
-
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+
+import java.util.List;
 
 
 /**
  * @version $Id:$
  */
 public class TestTrimFilter extends BaseTokenTestCase {
-  
+
   public void testTrim() throws Exception {
+    char[] a = " a ".toCharArray();
+    char[] b = "b   ".toCharArray();
+    char[] ccc = "cCc".toCharArray();
+    char[] whitespace = "   ".toCharArray();
+    char[] empty = "".toCharArray();
     TokenStream ts = new TrimFilter
-      (new IterTokenStream(new Token(" a ", 1, 5),
-                           new Token("b   ",6,10),
-                           new Token("cCc",11,15),
-                           new Token("   ",16,20)), false );
-
-    Token token = ts.next();
+            (new IterTokenStream(new Token(a, 0, a.length, 1, 5),
+                    new Token(b, 0, b.length, 6, 10),
+                    new Token(ccc, 0, ccc.length, 11, 15),
+                    new Token(whitespace, 0, whitespace.length, 16, 20),
+                    new Token(empty, 0, empty.length, 21, 21)), false);
+
+    TermAttribute token;
+    assertTrue(ts.incrementToken());
+    token = (TermAttribute) ts.getAttribute(TermAttribute.class);
     assertEquals("a", new String(token.termBuffer(), 0, token.termLength()));
-    token = ts.next();
+    assertTrue(ts.incrementToken());
     assertEquals("b", new String(token.termBuffer(), 0, token.termLength()));
-    token = ts.next();
+    assertTrue(ts.incrementToken());
     assertEquals("cCc", new String(token.termBuffer(), 0, token.termLength()));
-    token = ts.next();
+    assertTrue(ts.incrementToken());
     assertEquals("", new String(token.termBuffer(), 0, token.termLength()));
-    token = ts.next();
-    assertNull(token);
-    
-    ts = new TrimFilter( new IterTokenStream(
-           new Token(" a", 0,2),
-           new Token("b ", 0,2),
-           new Token(" c ",0,3),
-           new Token("   ",0,3)), true );
+    assertTrue(ts.incrementToken());
+    assertEquals("", new String(token.termBuffer(), 0, token.termLength()));
+    assertFalse(ts.incrementToken());
+
+    a = " a".toCharArray();
+    b = "b ".toCharArray();
+    ccc = " c ".toCharArray();
+    whitespace = "   ".toCharArray();
+    ts = new TrimFilter(new IterTokenStream(
+            new Token(a, 0, a.length, 0, 2),
+            new Token(b, 0, b.length, 0, 2),
+            new Token(ccc, 0, ccc.length, 0, 3),
+            new Token(whitespace, 0, whitespace.length, 0, 3)), true);
     
-    List<Token> expect = tokens( "a,1,1,2 b,1,0,1 c,1,1,2 ,1,3,3" );
+    List<Token> expect = tokens("a,1,1,2 b,1,0,1 c,1,1,2 ,1,3,3");
     List<Token> real = getTokens(ts);
-    for( Token t : expect ) {
-      System.out.println( "TEST:" + t );
+    for (Token t : expect) {
+      System.out.println("TEST:" + t);
     }
-    for( Token t : real ) {
-      System.out.println( "REAL:" + t );
+    for (Token t : real) {
+      System.out.println("REAL:" + t);
     }
-    assertTokEqualOff( expect, real );
+    assertTokEqualOff(expect, real);
   }
 
 }