You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by yo...@apache.org on 2012/01/25 17:21:33 UTC

svn commit: r1235810 - in /lucene/dev/trunk/solr/core/src: java/org/apache/solr/analysis/TokenizerChain.java test/org/apache/solr/BasicFunctionalityTest.java

Author: yonik
Date: Wed Jan 25 16:21:33 2012
New Revision: 1235810

URL: http://svn.apache.org/viewvc?rev=1235810&view=rev
Log:
LUCENE-3721: CharFilters were not being invoked in Solr

Modified:
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/TokenizerChain.java
    lucene/dev/trunk/solr/core/src/test/org/apache/solr/BasicFunctionalityTest.java

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/TokenizerChain.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/TokenizerChain.java?rev=1235810&r1=1235809&r2=1235810&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/TokenizerChain.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/TokenizerChain.java Wed Jan 25 16:21:33 2012
@@ -19,6 +19,7 @@ package org.apache.solr.analysis;
 
 import org.apache.lucene.analysis.*;
 
+import java.io.IOException;
 import java.io.Reader;
 
 /**
@@ -48,6 +49,21 @@ public final class TokenizerChain extend
   public TokenizerFactory getTokenizerFactory() { return tokenizer; }
   public TokenFilterFactory[] getTokenFilterFactories() { return filters; }
 
+  class SolrTokenStreamComponents extends TokenStreamComponents {
+    public SolrTokenStreamComponents(final Tokenizer source, final TokenStream result) {
+      super(source, result);
+    }
+
+    @Override
+    protected void reset(Reader reader) throws IOException {
+      // the tokenizers are currently reset by the indexing process, so only
+      // the tokenizer needs to be reset.
+      Reader r = initReader(reader);
+      super.reset(r);
+    }
+  }
+  
+  
   @Override
   public Reader initReader(Reader reader) {
     if (charFilters != null && charFilters.length > 0) {
@@ -62,12 +78,12 @@ public final class TokenizerChain extend
 
   @Override
   protected TokenStreamComponents createComponents(String fieldName, Reader aReader) {
-    Tokenizer tk = tokenizer.create(aReader);
+    Tokenizer tk = tokenizer.create( initReader(aReader) );
     TokenStream ts = tk;
     for (TokenFilterFactory filter : filters) {
       ts = filter.create(ts);
     }
-    return new TokenStreamComponents(tk, ts);
+    return new SolrTokenStreamComponents(tk, ts);
   }
 
   @Override

Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/BasicFunctionalityTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/BasicFunctionalityTest.java?rev=1235810&r1=1235809&r2=1235810&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/BasicFunctionalityTest.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/BasicFunctionalityTest.java Wed Jan 25 16:21:33 2012
@@ -222,6 +222,19 @@ public class BasicFunctionalityTest exte
             );
   }
 
+  @Test
+  public void testHTMLStrip() {
+    assertU(add(doc("id","200", "HTMLwhitetok","ABC")));
+    assertU(add(doc("id","201", "HTMLwhitetok","ABC")));      // do it again to make sure reuse is working
+    assertU(commit());
+    assertQ(req("q","HTMLwhitetok:ABC")
+        ,"//*[@numFound='2']"
+    );
+    assertQ(req("q","HTMLwhitetok:ABC")
+        ,"//*[@numFound='2']"
+    );
+  }
+
 
   @Test
   public void testClientErrorOnMalformedNumbers() throws Exception {