You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by yo...@apache.org on 2012/01/25 17:21:33 UTC
svn commit: r1235810 - in /lucene/dev/trunk/solr/core/src:
java/org/apache/solr/analysis/TokenizerChain.java
test/org/apache/solr/BasicFunctionalityTest.java
Author: yonik
Date: Wed Jan 25 16:21:33 2012
New Revision: 1235810
URL: http://svn.apache.org/viewvc?rev=1235810&view=rev
Log:
LUCENE-3721: CharFilters were not being invoked in Solr
Modified:
lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/TokenizerChain.java
lucene/dev/trunk/solr/core/src/test/org/apache/solr/BasicFunctionalityTest.java
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/TokenizerChain.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/TokenizerChain.java?rev=1235810&r1=1235809&r2=1235810&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/TokenizerChain.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/TokenizerChain.java Wed Jan 25 16:21:33 2012
@@ -19,6 +19,7 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.*;
+import java.io.IOException;
import java.io.Reader;
/**
@@ -48,6 +49,21 @@ public final class TokenizerChain extend
public TokenizerFactory getTokenizerFactory() { return tokenizer; }
public TokenFilterFactory[] getTokenFilterFactories() { return filters; }
+ class SolrTokenStreamComponents extends TokenStreamComponents {
+ public SolrTokenStreamComponents(final Tokenizer source, final TokenStream result) {
+ super(source, result);
+ }
+
+ @Override
+ protected void reset(Reader reader) throws IOException {
+ // the tokenizers are currently reset by the indexing process, so only
+ // the tokenizer needs to be reset.
+ Reader r = initReader(reader);
+ super.reset(r);
+ }
+ }
+
+
@Override
public Reader initReader(Reader reader) {
if (charFilters != null && charFilters.length > 0) {
@@ -62,12 +78,12 @@ public final class TokenizerChain extend
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader aReader) {
- Tokenizer tk = tokenizer.create(aReader);
+ Tokenizer tk = tokenizer.create( initReader(aReader) );
TokenStream ts = tk;
for (TokenFilterFactory filter : filters) {
ts = filter.create(ts);
}
- return new TokenStreamComponents(tk, ts);
+ return new SolrTokenStreamComponents(tk, ts);
}
@Override
Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/BasicFunctionalityTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/BasicFunctionalityTest.java?rev=1235810&r1=1235809&r2=1235810&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/BasicFunctionalityTest.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/BasicFunctionalityTest.java Wed Jan 25 16:21:33 2012
@@ -222,6 +222,19 @@ public class BasicFunctionalityTest exte
);
}
+ @Test
+ public void testHTMLStrip() {
+ assertU(add(doc("id","200", "HTMLwhitetok","ABC")));
+ assertU(add(doc("id","201", "HTMLwhitetok","ABC"))); // do it again to make sure reuse is working
+ assertU(commit());
+ assertQ(req("q","HTMLwhitetok:ABC")
+ ,"//*[@numFound='2']"
+ );
+ assertQ(req("q","HTMLwhitetok:ABC")
+ ,"//*[@numFound='2']"
+ );
+ }
+
@Test
public void testClientErrorOnMalformedNumbers() throws Exception {