You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/06/13 13:22:41 UTC
svn commit: r1349758 - in /lucene/dev/trunk:
lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/
lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/
lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/ lucene/an...
Author: rmuir
Date: Wed Jun 13 11:22:40 2012
New Revision: 1349758
URL: http://svn.apache.org/viewvc?rev=1349758&view=rev
Log:
LUCENE-4142: AnalyzerWrapper doesn't work with CharFilters
Modified:
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestBugInSomething.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPerFieldAnalzyerWrapper.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceCharFilter.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/analysis/Analyzer.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/analysis/AnalyzerWrapper.java
lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/analysis/TestMockCharFilter.java
lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/SolrAnalyzer.java
lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/TokenizerChain.java
lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java
lucene/dev/trunk/solr/core/src/test/org/apache/solr/analysis/LegacyHTMLStripCharFilterTest.java
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java?rev=1349758&r1=1349757&r2=1349758&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java Wed Jun 13 11:22:40 2012
@@ -133,7 +133,7 @@ public final class PersianAnalyzer exten
* Wraps the Reader with {@link PersianCharFilter}
*/
@Override
- protected Reader initReader(Reader reader) {
+ protected Reader initReader(String fieldName, Reader reader) {
return new PersianCharFilter(CharReader.get(reader));
}
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java?rev=1349758&r1=1349757&r2=1349758&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java Wed Jun 13 11:22:40 2012
@@ -45,7 +45,7 @@ public class HTMLStripCharFilterTest ext
}
@Override
- protected Reader initReader(Reader reader) {
+ protected Reader initReader(String fieldName, Reader reader) {
return new HTMLStripCharFilter(CharReader.get(reader));
}
};
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilter.java?rev=1349758&r1=1349757&r2=1349758&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilter.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilter.java Wed Jun 13 11:22:40 2012
@@ -192,7 +192,7 @@ public class TestMappingCharFilter exten
}
@Override
- protected Reader initReader(Reader reader) {
+ protected Reader initReader(String fieldName, Reader reader) {
return new MappingCharFilter(normMap, CharReader.get(reader));
}
};
@@ -218,7 +218,7 @@ public class TestMappingCharFilter exten
}
@Override
- protected Reader initReader(Reader reader) {
+ protected Reader initReader(String fieldName, Reader reader) {
return new MappingCharFilter(map, CharReader.get(reader));
}
};
@@ -240,7 +240,7 @@ public class TestMappingCharFilter exten
}
@Override
- protected Reader initReader(Reader reader) {
+ protected Reader initReader(String fieldName, Reader reader) {
return new MappingCharFilter(map, CharReader.get(reader));
}
};
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKAnalyzer.java?rev=1349758&r1=1349757&r2=1349758&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKAnalyzer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKAnalyzer.java Wed Jun 13 11:22:40 2012
@@ -215,7 +215,7 @@ public class TestCJKAnalyzer extends Bas
}
@Override
- protected Reader initReader(Reader reader) {
+ protected Reader initReader(String fieldName, Reader reader) {
return new MappingCharFilter(norm, CharReader.get(reader));
}
};
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java?rev=1349758&r1=1349757&r2=1349758&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java Wed Jun 13 11:22:40 2012
@@ -326,7 +326,7 @@ public class TestCompoundWordTokenFilter
}
@Override
- protected Reader initReader(Reader reader) {
+ protected Reader initReader(String fieldName, Reader reader) {
return new MappingCharFilter(normMap, CharReader.get(reader));
}
};
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestBugInSomething.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestBugInSomething.java?rev=1349758&r1=1349757&r2=1349758&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestBugInSomething.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestBugInSomething.java Wed Jun 13 11:22:40 2012
@@ -56,7 +56,7 @@ public class TestBugInSomething extends
}
@Override
- protected Reader initReader(Reader reader) {
+ protected Reader initReader(String fieldName, Reader reader) {
reader = new MockCharFilter(reader, 0);
reader = new MappingCharFilter(map, reader);
return reader;
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java?rev=1349758&r1=1349757&r2=1349758&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java Wed Jun 13 11:22:40 2012
@@ -609,7 +609,7 @@ public class TestRandomChains extends Ba
}
@Override
- protected Reader initReader(Reader reader) {
+ protected Reader initReader(String fieldName, Reader reader) {
Random random = new Random(seed);
CharFilterSpec charfilterspec = newCharFilterChain(random, reader);
return charfilterspec.reader;
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPerFieldAnalzyerWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPerFieldAnalzyerWrapper.java?rev=1349758&r1=1349757&r2=1349758&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPerFieldAnalzyerWrapper.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPerFieldAnalzyerWrapper.java Wed Jun 13 11:22:40 2012
@@ -1,12 +1,15 @@
package org.apache.lucene.analysis.miscellaneous;
+import java.io.Reader;
import java.io.StringReader;
+import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.core.SimpleAnalyzer;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/*
@@ -56,4 +59,32 @@ public class TestPerFieldAnalzyerWrapper
"qwerty",
termAtt.toString());
}
+
+ public void testCharFilters() throws Exception {
+ Analyzer a = new Analyzer() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ return new TokenStreamComponents(new MockTokenizer(reader));
+ }
+
+ @Override
+ protected Reader initReader(String fieldName, Reader reader) {
+ return new MockCharFilter(CharReader.get(reader), 7);
+ }
+ };
+ assertAnalyzesTo(a, "ab",
+ new String[] { "aab" },
+ new int[] { 0 },
+ new int[] { 2 }
+ );
+
+ // now wrap in PFAW
+ PerFieldAnalyzerWrapper p = new PerFieldAnalyzerWrapper(a, Collections.<String,Analyzer>emptyMap());
+
+ assertAnalyzesTo(p, "ab",
+ new String[] { "aab" },
+ new int[] { 0 },
+ new int[] { 2 }
+ );
+ }
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceCharFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceCharFilter.java?rev=1349758&r1=1349757&r2=1349758&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceCharFilter.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceCharFilter.java Wed Jun 13 11:22:40 2012
@@ -314,7 +314,7 @@ public class TestPatternReplaceCharFilte
}
@Override
- protected Reader initReader(Reader reader) {
+ protected Reader initReader(String fieldName, Reader reader) {
return new PatternReplaceCharFilter(p, replacement, CharReader.get(reader));
}
};
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/analysis/Analyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/analysis/Analyzer.java?rev=1349758&r1=1349757&r2=1349758&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/analysis/Analyzer.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/analysis/Analyzer.java Wed Jun 13 11:22:40 2012
@@ -78,7 +78,7 @@ public abstract class Analyzer {
public final TokenStream tokenStream(final String fieldName,
final Reader reader) throws IOException {
TokenStreamComponents components = reuseStrategy.getReusableComponents(fieldName);
- final Reader r = initReader(reader);
+ final Reader r = initReader(fieldName, reader);
if (components == null) {
components = createComponents(fieldName, r);
reuseStrategy.setReusableComponents(fieldName, components);
@@ -91,7 +91,7 @@ public abstract class Analyzer {
/**
* Override this if you want to add a CharFilter chain.
*/
- protected Reader initReader(Reader reader) {
+ protected Reader initReader(String fieldName, Reader reader) {
return reader;
}
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/analysis/AnalyzerWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/analysis/AnalyzerWrapper.java?rev=1349758&r1=1349757&r2=1349758&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/analysis/AnalyzerWrapper.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/analysis/AnalyzerWrapper.java Wed Jun 13 11:22:40 2012
@@ -86,4 +86,9 @@ public abstract class AnalyzerWrapper ex
public final int getOffsetGap(IndexableField field) {
return getWrappedAnalyzer(field.name()).getOffsetGap(field);
}
+
+ @Override
+ public final Reader initReader(String fieldName, Reader reader) {
+ return getWrappedAnalyzer(fieldName).initReader(fieldName, reader);
+ }
}
Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/analysis/TestMockCharFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/analysis/TestMockCharFilter.java?rev=1349758&r1=1349757&r2=1349758&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/analysis/TestMockCharFilter.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/analysis/TestMockCharFilter.java Wed Jun 13 11:22:40 2012
@@ -32,7 +32,7 @@ public class TestMockCharFilter extends
}
@Override
- protected Reader initReader(Reader reader) {
+ protected Reader initReader(String fieldName, Reader reader) {
return new MockCharFilter(CharReader.get(reader), 7);
}
};
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/SolrAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/SolrAnalyzer.java?rev=1349758&r1=1349757&r2=1349758&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/SolrAnalyzer.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/SolrAnalyzer.java Wed Jun 13 11:22:40 2012
@@ -44,7 +44,7 @@ public abstract class SolrAnalyzer exten
}
@Override
- protected Reader initReader(Reader reader) {
+ protected Reader initReader(String fieldName, Reader reader) {
return charStream(reader);
}
}
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/TokenizerChain.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/TokenizerChain.java?rev=1349758&r1=1349757&r2=1349758&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/TokenizerChain.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/TokenizerChain.java Wed Jun 13 11:22:40 2012
@@ -57,18 +57,19 @@ public final class TokenizerChain extend
super(source, result);
}
+ // TODO: what is going on here?
@Override
protected void reset(Reader reader) throws IOException {
// the tokenizers are currently reset by the indexing process, so only
// the tokenizer needs to be reset.
- Reader r = initReader(reader);
+ Reader r = initReader(null, reader);
super.reset(r);
}
}
@Override
- public Reader initReader(Reader reader) {
+ public Reader initReader(String fieldName, Reader reader) {
if (charFilters != null && charFilters.length > 0) {
CharStream cs = CharReader.get( reader );
for (CharFilterFactory charFilter : charFilters) {
@@ -81,7 +82,7 @@ public final class TokenizerChain extend
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader aReader) {
- Tokenizer tk = tokenizer.create( initReader(aReader) );
+ Tokenizer tk = tokenizer.create( initReader(fieldName, aReader) );
TokenStream ts = tk;
for (TokenFilterFactory filter : filters) {
ts = filter.create(ts);
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java?rev=1349758&r1=1349757&r2=1349758&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java Wed Jun 13 11:22:40 2012
@@ -112,7 +112,7 @@ public abstract class AnalysisRequestHan
}
}
- TokenStream tokenStream = tfac.create(tokenizerChain.initReader(new StringReader(value)));
+ TokenStream tokenStream = tfac.create(tokenizerChain.initReader(null, new StringReader(value)));
List<AttributeSource> tokens = analyzeTokenStream(tokenStream);
namedList.add(tokenStream.getClass().getName(), convertTokensToNamedLists(tokens, context));
Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/analysis/LegacyHTMLStripCharFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/analysis/LegacyHTMLStripCharFilterTest.java?rev=1349758&r1=1349757&r2=1349758&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/analysis/LegacyHTMLStripCharFilterTest.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/analysis/LegacyHTMLStripCharFilterTest.java Wed Jun 13 11:22:40 2012
@@ -267,7 +267,7 @@ public class LegacyHTMLStripCharFilterTe
}
@Override
- protected Reader initReader(Reader reader) {
+ protected Reader initReader(String fieldName, Reader reader) {
return new LegacyHTMLStripCharFilter(CharReader.get(new BufferedReader(reader)));
}
};