You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/07/17 13:09:27 UTC
svn commit: r1362446 [1/2] - in /lucene/dev/branches/branch_4x: ./ lucene/
lucene/analysis/common/src/java/
lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/
lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/ lucene/ana...
Author: rmuir
Date: Tue Jul 17 11:09:24 2012
New Revision: 1362446
URL: http://svn.apache.org/viewvc?rev=1362446&view=rev
Log:
LUCENE-4228: refactor CharFilter to be a java.io.FilterReader
Added:
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/analysis/CharFilter.java
- copied unchanged from r1362442, lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/analysis/CharFilter.java
lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/analysis/TestCharFilter.java
- copied unchanged from r1362442, lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/analysis/TestCharFilter.java
Removed:
lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/CharFilter.java
lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestCharFilter.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/analysis/CharReader.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/analysis/CharStream.java
Modified:
lucene/dev/branches/branch_4x/ (props changed)
lucene/dev/branches/branch_4x/lucene/ (props changed)
lucene/dev/branches/branch_4x/lucene/CHANGES.txt
lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/BaseCharFilter.java
lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java
lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex
lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/MappingCharFilter.java
lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java
lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianCharFilter.java
lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternReplaceCharFilter.java
lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java
lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailAnalyzer.java
lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharFilterFactory.java
lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/overview.html
lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java
lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilter.java
lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKAnalyzer.java
lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java
lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestBugInSomething.java
lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPerFieldAnalyzerWrapper.java
lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/path/TestPathHierarchyTokenizer.java
lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceCharFilter.java
lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTokenizer.java
lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseIterationMarkCharFilter.java
lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseIterationMarkCharFilter.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/analysis/Analyzer.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/analysis/Tokenizer.java
lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java
lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/analysis/TestMockCharFilter.java
lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/analysis/MockCharFilter.java
lucene/dev/branches/branch_4x/solr/ (props changed)
lucene/dev/branches/branch_4x/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/HTMLStripTransformer.java
lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/analysis/HTMLStripCharFilterFactory.java
lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/analysis/JapaneseIterationMarkCharFilterFactory.java
lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/analysis/LegacyHTMLStripCharFilter.java
lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/analysis/LegacyHTMLStripCharFilterFactory.java
lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/analysis/MappingCharFilterFactory.java
lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/analysis/PatternReplaceCharFilterFactory.java
lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/analysis/PersianCharFilterFactory.java
lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/analysis/TokenizerChain.java
lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java
lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/update/processor/HTMLStripFieldUpdateProcessorFactory.java
lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/analysis/LegacyHTMLStripCharFilterTest.java
lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/analysis/TestArabicFilters.java
lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/analysis/TestHTMLStripCharFilterFactory.java
lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/analysis/TestJapaneseIterationMarkCharFilterFactory.java
lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/analysis/TestPatternReplaceCharFilterFactory.java
lucene/dev/branches/branch_4x/solr/test-framework/src/java/org/apache/solr/analysis/MockCharFilterFactory.java
Modified: lucene/dev/branches/branch_4x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/CHANGES.txt?rev=1362446&r1=1362445&r2=1362446&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_4x/lucene/CHANGES.txt Tue Jul 17 11:09:24 2012
@@ -42,6 +42,10 @@ API Changes
reusing the object. But the name was often confused with TokenStream.reset().
(Robert Muir)
+* LUCENE-4228: Refactored CharFilter to extend java.io.FilterReader. CharFilters
+ filter another reader and you override correct() for offset correction.
+ (Robert Muir)
+
Optimizations
* LUCENE-4171: Performance improvements to Packed64.
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/BaseCharFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/BaseCharFilter.java?rev=1362446&r1=1362445&r2=1362446&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/BaseCharFilter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/BaseCharFilter.java Tue Jul 17 11:09:24 2012
@@ -17,9 +17,10 @@
package org.apache.lucene.analysis.charfilter;
-import org.apache.lucene.analysis.CharStream;
+import org.apache.lucene.analysis.CharFilter;
import org.apache.lucene.util.ArrayUtil;
+import java.io.Reader;
import java.util.Arrays;
/**
@@ -34,7 +35,7 @@ public abstract class BaseCharFilter ext
private int diffs[];
private int size = 0;
- public BaseCharFilter(CharStream in) {
+ public BaseCharFilter(Reader in) {
super(in);
}
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java?rev=1362446&r1=1362445&r2=1362446&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java Tue Jul 17 11:09:24 2012
@@ -1,4 +1,4 @@
-/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 5/18/12 12:24 PM */
+/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 7/16/12 4:05 PM */
package org.apache.lucene.analysis.charfilter;
@@ -20,13 +20,13 @@ package org.apache.lucene.analysis.charf
*/
import java.io.IOException;
+import java.io.Reader;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.util.Version;
-import org.apache.lucene.analysis.CharStream;
import org.apache.lucene.analysis.util.CharArrayMap;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.OpenStringBuilder;
@@ -40,8 +40,8 @@ import org.apache.lucene.analysis.util.O
/**
* This class is a scanner generated by
* <a href="http://www.jflex.de/">JFlex</a> 1.5.0-SNAPSHOT
- * on 5/18/12 12:24 PM from the specification file
- * <tt>C:/svn/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex</tt>
+ * on 7/16/12 4:05 PM from the specification file
+ * <tt>/home/rmuir/workspace/lucene-trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex</tt>
*/
public final class HTMLStripCharFilter extends BaseCharFilter {
@@ -30647,7 +30647,7 @@ public final class HTMLStripCharFilter e
/**
* @param source
*/
- public HTMLStripCharFilter(CharStream source) {
+ public HTMLStripCharFilter(Reader source) {
super(source);
this.zzReader = source;
}
@@ -30657,7 +30657,7 @@ public final class HTMLStripCharFilter e
* @param escapedTags Tags in this set (both start and end tags)
* will not be filtered out.
*/
- public HTMLStripCharFilter(CharStream source, Set<String> escapedTags) {
+ public HTMLStripCharFilter(Reader source, Set<String> escapedTags) {
super(source);
this.zzReader = source;
if (null != escapedTags) {
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex?rev=1362446&r1=1362445&r2=1362446&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex Tue Jul 17 11:09:24 2012
@@ -18,13 +18,13 @@ package org.apache.lucene.analysis.charf
*/
import java.io.IOException;
+import java.io.Reader;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.util.Version;
-import org.apache.lucene.analysis.CharStream;
import org.apache.lucene.analysis.util.CharArrayMap;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.OpenStringBuilder;
@@ -173,7 +173,7 @@ InlineElment = ( [aAbBiIqQsSuU]
/**
* @param source
*/
- public HTMLStripCharFilter(CharStream source) {
+ public HTMLStripCharFilter(Reader source) {
super(source);
this.zzReader = source;
}
@@ -183,7 +183,7 @@ InlineElment = ( [aAbBiIqQsSuU]
* @param escapedTags Tags in this set (both start and end tags)
* will not be filtered out.
*/
- public HTMLStripCharFilter(CharStream source, Set<String> escapedTags) {
+ public HTMLStripCharFilter(Reader source, Set<String> escapedTags) {
super(source);
this.zzReader = source;
if (null != escapedTags) {
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/MappingCharFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/MappingCharFilter.java?rev=1362446&r1=1362445&r2=1362446&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/MappingCharFilter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/MappingCharFilter.java Tue Jul 17 11:09:24 2012
@@ -21,8 +21,7 @@ import java.io.IOException;
import java.io.Reader;
import java.util.Map;
-import org.apache.lucene.analysis.CharReader;
-import org.apache.lucene.analysis.CharStream;
+import org.apache.lucene.analysis.CharFilter; // javadocs
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.RollingCharBuffer;
import org.apache.lucene.util.fst.CharSequenceOutputs;
@@ -51,8 +50,8 @@ public class MappingCharFilter extends B
private int replacementPointer;
private int inputOff;
- /** Default constructor that takes a {@link CharStream}. */
- public MappingCharFilter(NormalizeCharMap normMap, CharStream in) {
+ /** Default constructor that takes a {@link Reader}. */
+ public MappingCharFilter(NormalizeCharMap normMap, Reader in) {
super(in);
buffer.reset(in);
@@ -66,15 +65,10 @@ public class MappingCharFilter extends B
}
}
- /** Easy-use constructor that takes a {@link Reader}. */
- public MappingCharFilter(NormalizeCharMap normMap, Reader in) {
- this(normMap, CharReader.get(in));
- }
-
@Override
public void reset() throws IOException {
super.reset();
- buffer.reset(input);
+ buffer.reset(in);
replacement = null;
inputOff = 0;
}
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java?rev=1362446&r1=1362445&r2=1362446&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java Tue Jul 17 11:09:24 2012
@@ -21,7 +21,6 @@ import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.CharReader;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.ar.ArabicLetterTokenizer;
@@ -141,7 +140,7 @@ public final class PersianAnalyzer exten
@Override
protected Reader initReader(String fieldName, Reader reader) {
return matchVersion.onOrAfter(Version.LUCENE_31) ?
- new PersianCharFilter(CharReader.get(reader)) :
+ new PersianCharFilter(reader) :
reader;
}
}
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianCharFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianCharFilter.java?rev=1362446&r1=1362445&r2=1362446&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianCharFilter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianCharFilter.java Tue Jul 17 11:09:24 2012
@@ -18,9 +18,9 @@ package org.apache.lucene.analysis.fa;
*/
import java.io.IOException;
+import java.io.Reader;
-import org.apache.lucene.analysis.CharStream;
-import org.apache.lucene.analysis.charfilter.CharFilter;
+import org.apache.lucene.analysis.CharFilter;
/**
* CharFilter that replaces instances of Zero-width non-joiner with an
@@ -28,7 +28,7 @@ import org.apache.lucene.analysis.charfi
*/
public class PersianCharFilter extends CharFilter {
- public PersianCharFilter(CharStream in) {
+ public PersianCharFilter(Reader in) {
super(in);
}
@@ -45,4 +45,9 @@ public class PersianCharFilter extends C
}
return charsRead;
}
+
+ @Override
+ protected int correct(int currentOff) {
+ return currentOff; // we don't change the length of the string
+ }
}
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternReplaceCharFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternReplaceCharFilter.java?rev=1362446&r1=1362445&r2=1362446&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternReplaceCharFilter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternReplaceCharFilter.java Tue Jul 17 11:09:24 2012
@@ -23,7 +23,6 @@ import java.io.StringReader;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-import org.apache.lucene.analysis.CharStream;
import org.apache.lucene.analysis.charfilter.BaseCharFilter;
/**
@@ -56,7 +55,7 @@ public class PatternReplaceCharFilter ex
private final String replacement;
private Reader transformedInput;
- public PatternReplaceCharFilter(Pattern pattern, String replacement, CharStream in) {
+ public PatternReplaceCharFilter(Pattern pattern, String replacement, Reader in) {
super(in);
this.pattern = pattern;
this.replacement = replacement;
@@ -64,7 +63,7 @@ public class PatternReplaceCharFilter ex
@Deprecated
public PatternReplaceCharFilter(Pattern pattern, String replacement,
- int maxBlockChars, String blockDelimiter, CharStream in) {
+ int maxBlockChars, String blockDelimiter, Reader in) {
this(pattern, replacement, in);
}
@@ -72,16 +71,29 @@ public class PatternReplaceCharFilter ex
public int read(char[] cbuf, int off, int len) throws IOException {
// Buffer all input on the first call.
if (transformedInput == null) {
- StringBuilder buffered = new StringBuilder();
- char [] temp = new char [1024];
- for (int cnt = input.read(temp); cnt > 0; cnt = input.read(temp)) {
- buffered.append(temp, 0, cnt);
- }
- transformedInput = new StringReader(processPattern(buffered).toString());
+ fill();
}
return transformedInput.read(cbuf, off, len);
}
+
+ private void fill() throws IOException {
+ StringBuilder buffered = new StringBuilder();
+ char [] temp = new char [1024];
+ for (int cnt = in.read(temp); cnt > 0; cnt = in.read(temp)) {
+ buffered.append(temp, 0, cnt);
+ }
+ transformedInput = new StringReader(processPattern(buffered).toString());
+ }
+
+ @Override
+ public int read() throws IOException {
+ if (transformedInput == null) {
+ fill();
+ }
+
+ return transformedInput.read();
+ }
@Override
protected int correct(int currentOff) {
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java?rev=1362446&r1=1362445&r2=1362446&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java Tue Jul 17 11:09:24 2012
@@ -114,9 +114,9 @@ public final class ClassicAnalyzer exten
tok = new StopFilter(matchVersion, tok, stopwords);
return new TokenStreamComponents(src, tok) {
@Override
- protected void reset(final Reader reader) throws IOException {
+ protected void setReader(final Reader reader) throws IOException {
src.setMaxTokenLength(ClassicAnalyzer.this.maxTokenLength);
- super.reset(reader);
+ super.setReader(reader);
}
};
}
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java?rev=1362446&r1=1362445&r2=1362446&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java Tue Jul 17 11:09:24 2012
@@ -115,9 +115,9 @@ public final class StandardAnalyzer exte
tok = new StopFilter(matchVersion, tok, stopwords);
return new TokenStreamComponents(src, tok) {
@Override
- protected void reset(final Reader reader) throws IOException {
+ protected void setReader(final Reader reader) throws IOException {
src.setMaxTokenLength(StandardAnalyzer.this.maxTokenLength);
- super.reset(reader);
+ super.setReader(reader);
}
};
}
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailAnalyzer.java?rev=1362446&r1=1362445&r2=1362446&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailAnalyzer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailAnalyzer.java Tue Jul 17 11:09:24 2012
@@ -104,9 +104,9 @@ public final class UAX29URLEmailAnalyzer
tok = new StopFilter(matchVersion, tok, stopwords);
return new TokenStreamComponents(src, tok) {
@Override
- protected void reset(final Reader reader) throws IOException {
+ protected void setReader(final Reader reader) throws IOException {
src.setMaxTokenLength(UAX29URLEmailAnalyzer.this.maxTokenLength);
- super.reset(reader);
+ super.setReader(reader);
}
};
}
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharFilterFactory.java?rev=1362446&r1=1362445&r2=1362446&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharFilterFactory.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharFilterFactory.java Tue Jul 17 11:09:24 2012
@@ -17,13 +17,15 @@ package org.apache.lucene.analysis.util;
* limitations under the License.
*/
-import org.apache.lucene.analysis.CharStream;
+import java.io.Reader;
+
+import org.apache.lucene.analysis.CharFilter;
/**
- * Abstract parent class for analysis factories that create {@link CharStream}
+ * Abstract parent class for analysis factories that create {@link CharFilter}
* instances.
*/
public abstract class CharFilterFactory extends AbstractAnalysisFactory {
- public abstract CharStream create(CharStream input);
+ public abstract CharFilter create(Reader input);
}
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/overview.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/overview.html?rev=1362446&r1=1362445&r2=1362446&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/overview.html (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/overview.html Tue Jul 17 11:09:24 2012
@@ -24,7 +24,7 @@
For an introduction to Lucene's analysis API, see the {@link org.apache.lucene.analysis} package documentation.
</p>
<p>
- This module contains concrete components ({@link org.apache.lucene.analysis.charfilter.CharFilter}s,
+ This module contains concrete components ({@link org.apache.lucene.analysis.CharFilter}s,
{@link org.apache.lucene.analysis.Tokenizer}s, and ({@link org.apache.lucene.analysis.TokenFilter}s) for
analyzing different types of content. It also provides a number of {@link org.apache.lucene.analysis.Analyzer}s
for different languages that you can use to get started quickly.
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java?rev=1362446&r1=1362445&r2=1362446&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java Tue Jul 17 11:09:24 2012
@@ -29,7 +29,6 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.CharReader;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.util._TestUtil;
@@ -46,7 +45,7 @@ public class HTMLStripCharFilterTest ext
@Override
protected Reader initReader(String fieldName, Reader reader) {
- return new HTMLStripCharFilter(CharReader.get(reader));
+ return new HTMLStripCharFilter(reader);
}
};
}
@@ -60,7 +59,7 @@ public class HTMLStripCharFilterTest ext
String gold = "\nthis is some text\n here is a link and " +
"another link. " +
"This is an entity: & plus a <. Here is an &. ";
- HTMLStripCharFilter reader = new HTMLStripCharFilter(CharReader.get(new StringReader(html)));
+ HTMLStripCharFilter reader = new HTMLStripCharFilter(new StringReader(html));
StringBuilder builder = new StringBuilder();
int ch = -1;
char [] goldArray = gold.toCharArray();
@@ -79,7 +78,7 @@ public class HTMLStripCharFilterTest ext
//Some sanity checks, but not a full-fledged check
public void testHTML() throws Exception {
InputStream stream = getClass().getResourceAsStream("htmlStripReaderTest.html");
- HTMLStripCharFilter reader = new HTMLStripCharFilter(CharReader.get(new InputStreamReader(stream, "UTF-8")));
+ HTMLStripCharFilter reader = new HTMLStripCharFilter(new InputStreamReader(stream, "UTF-8"));
StringBuilder builder = new StringBuilder();
int ch = -1;
while ((ch = reader.read()) != -1){
@@ -96,7 +95,7 @@ public class HTMLStripCharFilterTest ext
public void testMSWord14GeneratedHTML() throws Exception {
InputStream stream = getClass().getResourceAsStream("MS-Word 14 generated.htm");
- HTMLStripCharFilter reader = new HTMLStripCharFilter(CharReader.get(new InputStreamReader(stream, "UTF-8")));
+ HTMLStripCharFilter reader = new HTMLStripCharFilter(new InputStreamReader(stream, "UTF-8"));
String gold = "This is a test";
StringBuilder builder = new StringBuilder();
int ch = 0;
@@ -117,7 +116,7 @@ public class HTMLStripCharFilterTest ext
String gold = "\u0393";
Set<String> set = new HashSet<String>();
set.add("reserved");
- Reader reader = new HTMLStripCharFilter(CharReader.get(new StringReader(test)), set);
+ Reader reader = new HTMLStripCharFilter(new StringReader(test), set);
StringBuilder builder = new StringBuilder();
int ch = 0;
while ((ch = reader.read()) != -1){
@@ -132,7 +131,7 @@ public class HTMLStripCharFilterTest ext
String gold = " <foo> \u00DCbermensch = \u0393 bar \u0393";
Set<String> set = new HashSet<String>();
set.add("reserved");
- Reader reader = new HTMLStripCharFilter(CharReader.get(new StringReader(test)), set);
+ Reader reader = new HTMLStripCharFilter(new StringReader(test), set);
StringBuilder builder = new StringBuilder();
int ch = 0;
while ((ch = reader.read()) != -1){
@@ -147,7 +146,7 @@ public class HTMLStripCharFilterTest ext
String gold = " <junk/> ! @ and â";
Set<String> set = new HashSet<String>();
set.add("reserved");
- Reader reader = new HTMLStripCharFilter(CharReader.get(new StringReader(test)), set);
+ Reader reader = new HTMLStripCharFilter(new StringReader(test), set);
StringBuilder builder = new StringBuilder();
int ch = 0;
while ((ch = reader.read()) != -1){
@@ -161,7 +160,7 @@ public class HTMLStripCharFilterTest ext
String test = "aaa bbb <reserved ccc=\"ddddd\"> eeee </reserved> ffff <reserved ggg=\"hhhh\"/> <other/>";
Set<String> set = new HashSet<String>();
set.add("reserved");
- Reader reader = new HTMLStripCharFilter(CharReader.get(new StringReader(test)), set);
+ Reader reader = new HTMLStripCharFilter(new StringReader(test), set);
StringBuilder builder = new StringBuilder();
int ch = 0;
while ((ch = reader.read()) != -1){
@@ -346,7 +345,7 @@ public class HTMLStripCharFilterTest ext
for (int i = 0 ; i < testGold.length ; i += 2) {
String test = testGold[i];
String gold = testGold[i + 1];
- Reader reader = new HTMLStripCharFilter(CharReader.get(new StringReader(test)));
+ Reader reader = new HTMLStripCharFilter(new StringReader(test));
StringBuilder builder = new StringBuilder();
int ch = 0;
while ((ch = reader.read()) != -1){
@@ -370,7 +369,7 @@ public class HTMLStripCharFilterTest ext
testBuilder.append("-->foo");
String gold = "foo";
- Reader reader = new HTMLStripCharFilter(CharReader.get(new StringReader(testBuilder.toString())));
+ Reader reader = new HTMLStripCharFilter(new StringReader(testBuilder.toString()));
int ch = 0;
StringBuilder builder = new StringBuilder();
try {
@@ -388,7 +387,7 @@ public class HTMLStripCharFilterTest ext
appendChars(testBuilder, HTMLStripCharFilter.getInitialBufferSize() + 500);
testBuilder.append("?>");
gold = "";
- reader = new HTMLStripCharFilter(CharReader.get(new StringReader(testBuilder.toString())));
+ reader = new HTMLStripCharFilter(new StringReader(testBuilder.toString()));
ch = 0;
builder = new StringBuilder();
try {
@@ -406,7 +405,7 @@ public class HTMLStripCharFilterTest ext
appendChars(testBuilder, HTMLStripCharFilter.getInitialBufferSize() + 500);
testBuilder.append("/>");
gold = "";
- reader = new HTMLStripCharFilter(CharReader.get(new StringReader(testBuilder.toString())));
+ reader = new HTMLStripCharFilter(new StringReader(testBuilder.toString()));
ch = 0;
builder = new StringBuilder();
try {
@@ -430,7 +429,7 @@ public class HTMLStripCharFilterTest ext
private void processBuffer(String test, String assertMsg) throws IOException {
// System.out.println("-------------------processBuffer----------");
- Reader reader = new HTMLStripCharFilter(CharReader.get(new BufferedReader(new StringReader(test))));//force the use of BufferedReader
+ Reader reader = new HTMLStripCharFilter(new BufferedReader(new StringReader(test)));//force the use of BufferedReader
int ch = 0;
StringBuilder builder = new StringBuilder();
try {
@@ -448,7 +447,7 @@ public class HTMLStripCharFilterTest ext
String test = "<!--- three dashes, still a valid comment ---> ";
String gold = " ";
- Reader reader = new HTMLStripCharFilter(CharReader.get(new BufferedReader(new StringReader(test))));//force the use of BufferedReader
+ Reader reader = new HTMLStripCharFilter(new BufferedReader(new StringReader(test)));//force the use of BufferedReader
int ch = 0;
StringBuilder builder = new StringBuilder();
try {
@@ -464,7 +463,7 @@ public class HTMLStripCharFilterTest ext
public void doTestOffsets(String in) throws Exception {
- HTMLStripCharFilter reader = new HTMLStripCharFilter(CharReader.get(new BufferedReader(new StringReader(in))));
+ HTMLStripCharFilter reader = new HTMLStripCharFilter(new BufferedReader(new StringReader(in)));
int ch = 0;
int off = 0; // offset in the reader
int strOff = -1; // offset in the original string
@@ -491,7 +490,7 @@ public class HTMLStripCharFilterTest ext
static void assertLegalOffsets(String in) throws Exception {
int length = in.length();
- HTMLStripCharFilter reader = new HTMLStripCharFilter(CharReader.get(new BufferedReader(new StringReader(in))));
+ HTMLStripCharFilter reader = new HTMLStripCharFilter(new BufferedReader(new StringReader(in)));
int ch = 0;
int off = 0;
while ((ch = reader.read()) != -1) {
@@ -526,7 +525,7 @@ public class HTMLStripCharFilterTest ext
+ " alt = \"Alt: <!--#echo var='${IMAGE_CAPTION:<!--comment-->\\'Comment\\'}' -->\"\n\n"
+ " title=\"Title: <!--#echo var=\"IMAGE_CAPTION\"-->\">two";
String gold = "onetwo";
- Reader reader = new HTMLStripCharFilter(CharReader.get(new StringReader(test)));
+ Reader reader = new HTMLStripCharFilter(new StringReader(test));
int ch = 0;
StringBuilder builder = new StringBuilder();
try {
@@ -540,7 +539,7 @@ public class HTMLStripCharFilterTest ext
test = "one<script><!-- <!--#config comment=\"<!-- \\\"comment\\\"-->\"--> --></script>two";
gold = "one\ntwo";
- reader = new HTMLStripCharFilter(CharReader.get(new StringReader(test)));
+ reader = new HTMLStripCharFilter(new StringReader(test));
ch = 0;
builder = new StringBuilder();
try {
@@ -557,7 +556,7 @@ public class HTMLStripCharFilterTest ext
public void testScriptQuotes() throws Exception {
String test = "one<script attr= bare><!-- action('<!-- comment -->', \"\\\"-->\\\"\"); --></script>two";
String gold = "one\ntwo";
- Reader reader = new HTMLStripCharFilter(CharReader.get(new StringReader(test)));
+ Reader reader = new HTMLStripCharFilter(new StringReader(test));
int ch = 0;
StringBuilder builder = new StringBuilder();
try {
@@ -572,7 +571,7 @@ public class HTMLStripCharFilterTest ext
test = "hello<script><!-- f('<!--internal--></script>'); --></script>";
gold = "hello\n";
- reader = new HTMLStripCharFilter(CharReader.get(new StringReader(test)));
+ reader = new HTMLStripCharFilter(new StringReader(test));
ch = 0;
builder = new StringBuilder();
try {
@@ -591,7 +590,7 @@ public class HTMLStripCharFilterTest ext
String gold = "one<script no-value-attr></script>two";
Set<String> escapedTags = new HashSet<String>(Arrays.asList("SCRIPT"));
Reader reader = new HTMLStripCharFilter
- (CharReader.get(new StringReader(test)), escapedTags);
+ (new StringReader(test), escapedTags);
int ch = 0;
StringBuilder builder = new StringBuilder();
try {
@@ -612,7 +611,7 @@ public class HTMLStripCharFilterTest ext
+ "-->\n"
+ "</style>two";
String gold = "one\ntwo";
- Reader reader = new HTMLStripCharFilter(CharReader.get(new StringReader(test)));
+ Reader reader = new HTMLStripCharFilter(new StringReader(test));
int ch = 0;
StringBuilder builder = new StringBuilder();
try {
@@ -631,7 +630,7 @@ public class HTMLStripCharFilterTest ext
String gold = "one<style type=\"text/css\"></style>two";
Set<String> escapedTags = new HashSet<String>(Arrays.asList("STYLE"));
Reader reader = new HTMLStripCharFilter
- (CharReader.get(new StringReader(test)), escapedTags);
+ (new StringReader(test), escapedTags);
int ch = 0;
StringBuilder builder = new StringBuilder();
try {
@@ -656,7 +655,7 @@ public class HTMLStripCharFilterTest ext
for (int i = 0 ; i < testGold.length ; i += 2) {
String test = testGold[i];
String gold = testGold[i + 1];
- Reader reader = new HTMLStripCharFilter(CharReader.get(new StringReader(test)));
+ Reader reader = new HTMLStripCharFilter(new StringReader(test));
StringBuilder builder = new StringBuilder();
int ch = 0;
while ((ch = reader.read()) != -1){
@@ -671,7 +670,7 @@ public class HTMLStripCharFilterTest ext
String gold = "one<BR class='whatever'>two</\nBR\n>";
Set<String> escapedTags = new HashSet<String>(Arrays.asList("BR"));
Reader reader = new HTMLStripCharFilter
- (CharReader.get(new StringReader(test)), escapedTags);
+ (new StringReader(test), escapedTags);
int ch = 0;
StringBuilder builder = new StringBuilder();
try {
@@ -688,7 +687,7 @@ public class HTMLStripCharFilterTest ext
public void testInlineTagsNoSpace() throws Exception {
String test = "one<sPAn class=\"invisible\">two<sup>2<sup>e</sup></sup>.</SpaN>three";
String gold = "onetwo2e.three";
- Reader reader = new HTMLStripCharFilter(CharReader.get(new StringReader(test)));
+ Reader reader = new HTMLStripCharFilter(new StringReader(test));
int ch = 0;
StringBuilder builder = new StringBuilder();
try {
@@ -705,7 +704,7 @@ public class HTMLStripCharFilterTest ext
public void testCDATA() throws Exception {
String test = "one<![CDATA[<one><two>three<four></four></two></one>]]>two";
String gold = "one<one><two>three<four></four></two></one>two";
- Reader reader = new HTMLStripCharFilter(CharReader.get(new StringReader(test)));
+ Reader reader = new HTMLStripCharFilter(new StringReader(test));
int ch = 0;
StringBuilder builder = new StringBuilder();
try {
@@ -720,7 +719,7 @@ public class HTMLStripCharFilterTest ext
test = "one<![CDATA[two<![CDATA[three]]]]><![CDATA[>four]]>five";
gold = "onetwo<![CDATA[three]]>fourfive";
- reader = new HTMLStripCharFilter(CharReader.get(new StringReader(test)));
+ reader = new HTMLStripCharFilter(new StringReader(test));
ch = 0;
builder = new StringBuilder();
try {
@@ -737,7 +736,7 @@ public class HTMLStripCharFilterTest ext
public void testUppercaseCharacterEntityVariants() throws Exception {
String test = " "-©>><<®&";
String gold = " \"-\u00A9>><<\u00AE&";
- Reader reader = new HTMLStripCharFilter(CharReader.get(new StringReader(test)));
+ Reader reader = new HTMLStripCharFilter(new StringReader(test));
int ch = 0;
StringBuilder builder = new StringBuilder();
try {
@@ -754,7 +753,7 @@ public class HTMLStripCharFilterTest ext
public void testMSWordMalformedProcessingInstruction() throws Exception {
String test = "one<?xml:namespace prefix = o ns = \"urn:schemas-microsoft-com:office:office\" />two";
String gold = "onetwo";
- Reader reader = new HTMLStripCharFilter(CharReader.get(new StringReader(test)));
+ Reader reader = new HTMLStripCharFilter(new StringReader(test));
int ch = 0;
StringBuilder builder = new StringBuilder();
try {
@@ -771,7 +770,7 @@ public class HTMLStripCharFilterTest ext
public void testSupplementaryCharsInTags() throws Exception {
String test = "one<ð©¬
è±éä¹æ¯ç>two<çæ¯ð©¬
>three çæ¯ð©¬
</çæ¯ð©¬
>four</ð©¬
è±éä¹æ¯ç>five<ð ð >six<ð ð />seven";
String gold = "one\ntwo\nthree çæ¯ð©¬
\nfour\nfive\nsix\nseven";
- Reader reader = new HTMLStripCharFilter(CharReader.get(new StringReader(test)));
+ Reader reader = new HTMLStripCharFilter(new StringReader(test));
int ch = 0;
StringBuilder builder = new StringBuilder();
try {
@@ -822,7 +821,7 @@ public class HTMLStripCharFilterTest ext
}
}
Reader reader = new HTMLStripCharFilter
- (CharReader.get(new StringReader(text.toString())));
+ (new StringReader(text.toString()));
while (reader.read() != -1);
}
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilter.java?rev=1362446&r1=1362445&r2=1362446&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilter.java Tue Jul 17 11:09:24 2012
@@ -29,8 +29,7 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.CharReader;
-import org.apache.lucene.analysis.CharStream;
+import org.apache.lucene.analysis.CharFilter;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
@@ -60,7 +59,7 @@ public class TestMappingCharFilter exten
}
public void testReaderReset() throws Exception {
- CharStream cs = new MappingCharFilter( normMap, new StringReader( "x" ) );
+ CharFilter cs = new MappingCharFilter( normMap, new StringReader( "x" ) );
char[] buf = new char[10];
int len = cs.read(buf, 0, 10);
assertEquals( 1, len );
@@ -76,55 +75,55 @@ public class TestMappingCharFilter exten
}
public void testNothingChange() throws Exception {
- CharStream cs = new MappingCharFilter( normMap, new StringReader( "x" ) );
+ CharFilter cs = new MappingCharFilter( normMap, new StringReader( "x" ) );
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts, new String[]{"x"}, new int[]{0}, new int[]{1}, 1);
}
public void test1to1() throws Exception {
- CharStream cs = new MappingCharFilter( normMap, new StringReader( "h" ) );
+ CharFilter cs = new MappingCharFilter( normMap, new StringReader( "h" ) );
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts, new String[]{"i"}, new int[]{0}, new int[]{1}, 1);
}
public void test1to2() throws Exception {
- CharStream cs = new MappingCharFilter( normMap, new StringReader( "j" ) );
+ CharFilter cs = new MappingCharFilter( normMap, new StringReader( "j" ) );
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts, new String[]{"jj"}, new int[]{0}, new int[]{1}, 1);
}
public void test1to3() throws Exception {
- CharStream cs = new MappingCharFilter( normMap, new StringReader( "k" ) );
+ CharFilter cs = new MappingCharFilter( normMap, new StringReader( "k" ) );
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts, new String[]{"kkk"}, new int[]{0}, new int[]{1}, 1);
}
public void test2to4() throws Exception {
- CharStream cs = new MappingCharFilter( normMap, new StringReader( "ll" ) );
+ CharFilter cs = new MappingCharFilter( normMap, new StringReader( "ll" ) );
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts, new String[]{"llll"}, new int[]{0}, new int[]{2}, 2);
}
public void test2to1() throws Exception {
- CharStream cs = new MappingCharFilter( normMap, new StringReader( "aa" ) );
+ CharFilter cs = new MappingCharFilter( normMap, new StringReader( "aa" ) );
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts, new String[]{"a"}, new int[]{0}, new int[]{2}, 2);
}
public void test3to1() throws Exception {
- CharStream cs = new MappingCharFilter( normMap, new StringReader( "bbb" ) );
+ CharFilter cs = new MappingCharFilter( normMap, new StringReader( "bbb" ) );
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts, new String[]{"b"}, new int[]{0}, new int[]{3}, 3);
}
public void test4to2() throws Exception {
- CharStream cs = new MappingCharFilter( normMap, new StringReader( "cccc" ) );
+ CharFilter cs = new MappingCharFilter( normMap, new StringReader( "cccc" ) );
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts, new String[]{"cc"}, new int[]{0}, new int[]{4}, 4);
}
public void test5to0() throws Exception {
- CharStream cs = new MappingCharFilter( normMap, new StringReader( "empty" ) );
+ CharFilter cs = new MappingCharFilter( normMap, new StringReader( "empty" ) );
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts, new String[0], new int[]{}, new int[]{}, 5);
}
@@ -149,7 +148,7 @@ public class TestMappingCharFilter exten
//
public void testTokenStream() throws Exception {
String testString = "h i j k ll cccc bbb aa";
- CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( testString ) ) );
+ CharFilter cs = new MappingCharFilter( normMap, new StringReader( testString ) );
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts,
new String[]{"i","i","jj","kkk","llll","cc","b","a"},
@@ -171,8 +170,8 @@ public class TestMappingCharFilter exten
// h,8,9 => i,8,9
public void testChained() throws Exception {
String testString = "aaaa ll h";
- CharStream cs = new MappingCharFilter( normMap,
- new MappingCharFilter( normMap, CharReader.get( new StringReader( testString ) ) ) );
+ CharFilter cs = new MappingCharFilter( normMap,
+ new MappingCharFilter( normMap, new StringReader( testString ) ) );
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts,
new String[]{"a","llllllll","i"},
@@ -193,7 +192,7 @@ public class TestMappingCharFilter exten
@Override
protected Reader initReader(String fieldName, Reader reader) {
- return new MappingCharFilter(normMap, CharReader.get(reader));
+ return new MappingCharFilter(normMap, reader);
}
};
@@ -219,7 +218,7 @@ public class TestMappingCharFilter exten
@Override
protected Reader initReader(String fieldName, Reader reader) {
- return new MappingCharFilter(map, CharReader.get(reader));
+ return new MappingCharFilter(map, reader);
}
};
@@ -241,7 +240,7 @@ public class TestMappingCharFilter exten
@Override
protected Reader initReader(String fieldName, Reader reader) {
- return new MappingCharFilter(map, CharReader.get(reader));
+ return new MappingCharFilter(map, reader);
}
};
int numRounds = 100;
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKAnalyzer.java?rev=1362446&r1=1362445&r2=1362446&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKAnalyzer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKAnalyzer.java Tue Jul 17 11:09:24 2012
@@ -23,7 +23,6 @@ import java.util.Random;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.CharReader;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
@@ -216,7 +215,7 @@ public class TestCJKAnalyzer extends Bas
@Override
protected Reader initReader(String fieldName, Reader reader) {
- return new MappingCharFilter(norm, CharReader.get(reader));
+ return new MappingCharFilter(norm, reader);
}
};
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java?rev=1362446&r1=1362445&r2=1362446&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java Tue Jul 17 11:09:24 2012
@@ -24,7 +24,6 @@ import java.util.Arrays;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.CharReader;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
@@ -327,7 +326,7 @@ public class TestCompoundWordTokenFilter
@Override
protected Reader initReader(String fieldName, Reader reader) {
- return new MappingCharFilter(normMap, CharReader.get(reader));
+ return new MappingCharFilter(normMap, reader);
}
};
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestBugInSomething.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestBugInSomething.java?rev=1362446&r1=1362445&r2=1362446&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestBugInSomething.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestBugInSomething.java Tue Jul 17 11:09:24 2012
@@ -1,11 +1,12 @@
package org.apache.lucene.analysis.core;
import java.io.Reader;
+import java.io.StringReader;
import java.nio.CharBuffer;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.CharStream;
+import org.apache.lucene.analysis.CharFilter;
import org.apache.lucene.analysis.MockCharFilter;
import org.apache.lucene.analysis.MockTokenFilter;
import org.apache.lucene.analysis.MockTokenizer;
@@ -64,7 +65,7 @@ public class TestBugInSomething extends
checkAnalysisConsistency(random(), a, false, "wmgddzunizdomqyj");
}
- CharStream wrappedStream = new CharStream() {
+ CharFilter wrappedStream = new CharFilter(new StringReader("bogus")) {
@Override
public void mark(int readAheadLimit) {
@@ -107,8 +108,8 @@ public class TestBugInSomething extends
}
@Override
- public int correctOffset(int currentOff) {
- throw new UnsupportedOperationException("correctOffset(int)");
+ public int correct(int currentOff) {
+ throw new UnsupportedOperationException("correct(int)");
}
@Override
@@ -123,7 +124,7 @@ public class TestBugInSomething extends
};
public void testWrapping() throws Exception {
- CharStream cs = new TestRandomChains.CheckThatYouDidntReadAnythingReaderWrapper(wrappedStream);
+ CharFilter cs = new TestRandomChains.CheckThatYouDidntReadAnythingReaderWrapper(wrappedStream);
try {
cs.mark(1);
fail();
@@ -177,7 +178,7 @@ public class TestBugInSomething extends
cs.correctOffset(1);
fail();
} catch (Exception e) {
- assertEquals("correctOffset(int)", e.getMessage());
+ assertEquals("correct(int)", e.getMessage());
}
try {
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java?rev=1362446&r1=1362445&r2=1362446&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java Tue Jul 17 11:09:24 2012
@@ -44,8 +44,7 @@ import java.util.regex.Pattern;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.CachingTokenFilter;
-import org.apache.lucene.analysis.CharReader;
-import org.apache.lucene.analysis.CharStream;
+import org.apache.lucene.analysis.CharFilter;
import org.apache.lucene.analysis.EmptyTokenizer;
import org.apache.lucene.analysis.MockGraphTokenFilter;
import org.apache.lucene.analysis.MockRandomLookaheadTokenFilter;
@@ -101,7 +100,7 @@ public class TestRandomChains extends Ba
static List<Constructor<? extends Tokenizer>> tokenizers;
static List<Constructor<? extends TokenFilter>> tokenfilters;
- static List<Constructor<? extends CharStream>> charfilters;
+ static List<Constructor<? extends CharFilter>> charfilters;
// TODO: fix those and remove
private static final Set<Class<?>> brokenComponents = Collections.newSetFromMap(new IdentityHashMap<Class<?>,Boolean>());
@@ -170,7 +169,7 @@ public class TestRandomChains extends Ba
getClassesForPackage("org.apache.lucene.analysis", analysisClasses);
tokenizers = new ArrayList<Constructor<? extends Tokenizer>>();
tokenfilters = new ArrayList<Constructor<? extends TokenFilter>>();
- charfilters = new ArrayList<Constructor<? extends CharStream>>();
+ charfilters = new ArrayList<Constructor<? extends CharFilter>>();
for (final Class<?> c : analysisClasses) {
final int modifiers = c.getModifiers();
if (
@@ -179,7 +178,7 @@ public class TestRandomChains extends Ba
|| c.isSynthetic() || c.isAnonymousClass() || c.isMemberClass() || c.isInterface()
|| brokenComponents.contains(c)
|| c.isAnnotationPresent(Deprecated.class)
- || !(Tokenizer.class.isAssignableFrom(c) || TokenFilter.class.isAssignableFrom(c) || CharStream.class.isAssignableFrom(c))
+ || !(Tokenizer.class.isAssignableFrom(c) || TokenFilter.class.isAssignableFrom(c) || CharFilter.class.isAssignableFrom(c))
) {
continue;
}
@@ -197,10 +196,10 @@ public class TestRandomChains extends Ba
assertTrue(ctor.toGenericString() + " has unsupported parameter types",
allowedTokenFilterArgs.containsAll(Arrays.asList(ctor.getParameterTypes())));
tokenfilters.add(castConstructor(TokenFilter.class, ctor));
- } else if (CharStream.class.isAssignableFrom(c)) {
+ } else if (CharFilter.class.isAssignableFrom(c)) {
assertTrue(ctor.toGenericString() + " has unsupported parameter types",
allowedCharFilterArgs.containsAll(Arrays.asList(ctor.getParameterTypes())));
- charfilters.add(castConstructor(CharStream.class, ctor));
+ charfilters.add(castConstructor(CharFilter.class, ctor));
} else {
fail("Cannot get here");
}
@@ -524,7 +523,6 @@ public class TestRandomChains extends Ba
allowedCharFilterArgs = Collections.newSetFromMap(new IdentityHashMap<Class<?>,Boolean>());
allowedCharFilterArgs.addAll(argProducers.keySet());
allowedCharFilterArgs.add(Reader.class);
- allowedCharFilterArgs.add(CharStream.class);
}
@SuppressWarnings("unchecked")
@@ -560,8 +558,6 @@ public class TestRandomChains extends Ba
Class<?> paramType = paramTypes[i];
if (paramType == Reader.class) {
args[i] = reader;
- } else if (paramType == CharStream.class) {
- args[i] = CharReader.get(reader);
} else {
args[i] = newRandomArg(random, paramType);
}
@@ -701,7 +697,7 @@ public class TestRandomChains extends Ba
int numFilters = random.nextInt(3);
for (int i = 0; i < numFilters; i++) {
while (true) {
- final Constructor<? extends CharStream> ctor = charfilters.get(random.nextInt(charfilters.size()));
+ final Constructor<? extends CharFilter> ctor = charfilters.get(random.nextInt(charfilters.size()));
final Object args[] = newCharFilterArgs(random, spec.reader, ctor.getParameterTypes());
reader = createComponent(ctor, args, descr);
if (reader != null) {
@@ -760,24 +756,16 @@ public class TestRandomChains extends Ba
}
}
- // wants charfilter to be a filterreader...
- // do *NOT*, do *NOT* refactor me to be a charfilter: LUCENE-3990
- static class CheckThatYouDidntReadAnythingReaderWrapper extends CharStream {
+ static class CheckThatYouDidntReadAnythingReaderWrapper extends CharFilter {
boolean readSomething;
- CharStream in;
CheckThatYouDidntReadAnythingReaderWrapper(Reader in) {
- this.in = CharReader.get(in);
+ super(in);
}
@Override
- public int correctOffset(int currentOff) {
- return in.correctOffset(currentOff);
- }
-
- @Override
- public void close() throws IOException {
- in.close();
+ public int correct(int currentOff) {
+ return currentOff; // we don't change any offsets
}
@Override
@@ -799,32 +787,12 @@ public class TestRandomChains extends Ba
}
@Override
- public void mark(int readAheadLimit) throws IOException {
- in.mark(readAheadLimit);
- }
-
- @Override
- public boolean markSupported() {
- return in.markSupported();
- }
-
- @Override
public int read(char[] cbuf) throws IOException {
readSomething = true;
return in.read(cbuf);
}
@Override
- public boolean ready() throws IOException {
- return in.ready();
- }
-
- @Override
- public void reset() throws IOException {
- in.reset();
- }
-
- @Override
public long skip(long n) throws IOException {
readSomething = true;
return in.skip(n);
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPerFieldAnalyzerWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPerFieldAnalyzerWrapper.java?rev=1362446&r1=1362445&r2=1362446&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPerFieldAnalyzerWrapper.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPerFieldAnalyzerWrapper.java Tue Jul 17 11:09:24 2012
@@ -68,7 +68,7 @@ public class TestPerFieldAnalyzerWrapper
@Override
protected Reader initReader(String fieldName, Reader reader) {
- return new MockCharFilter(CharReader.get(reader), 7);
+ return new MockCharFilter(reader, 7);
}
};
assertAnalyzesTo(a, "ab",
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/path/TestPathHierarchyTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/path/TestPathHierarchyTokenizer.java?rev=1362446&r1=1362445&r2=1362446&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/path/TestPathHierarchyTokenizer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/path/TestPathHierarchyTokenizer.java Tue Jul 17 11:09:24 2012
@@ -23,7 +23,6 @@ import java.util.Random;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.CharStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.charfilter.MappingCharFilter;
import org.apache.lucene.analysis.charfilter.NormalizeCharMap;
@@ -123,7 +122,7 @@ public class TestPathHierarchyTokenizer
builder.add("\\", "/");
NormalizeCharMap normMap = builder.build();
String path = "c:\\a\\b\\c";
- CharStream cs = new MappingCharFilter(normMap, new StringReader(path));
+ Reader cs = new MappingCharFilter(normMap, new StringReader(path));
PathHierarchyTokenizer t = new PathHierarchyTokenizer( cs );
assertTokenStreamContents(t,
new String[]{"c:", "c:/a", "c:/a/b", "c:/a/b/c"},
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceCharFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceCharFilter.java?rev=1362446&r1=1362445&r2=1362446&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceCharFilter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceCharFilter.java Tue Jul 17 11:09:24 2012
@@ -26,8 +26,7 @@ import java.util.regex.Pattern;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.CharReader;
-import org.apache.lucene.analysis.CharStream;
+import org.apache.lucene.analysis.CharFilter;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
@@ -102,8 +101,8 @@ public class TestPatternReplaceCharFilte
private void checkOutput(String input, String pattern, String replacement,
String expectedOutput, String expectedIndexMatchedOutput) throws IOException {
- CharStream cs = new PatternReplaceCharFilter(pattern(pattern), replacement,
- CharReader.get(new StringReader(input)));
+ CharFilter cs = new PatternReplaceCharFilter(pattern(pattern), replacement,
+ new StringReader(input));
StringBuilder output = new StringBuilder();
for (int chr = cs.read(); chr > 0; chr = cs.read()) {
@@ -138,8 +137,8 @@ public class TestPatternReplaceCharFilte
// this is test.
public void testNothingChange() throws IOException {
final String BLOCK = "this is test.";
- CharStream cs = new PatternReplaceCharFilter( pattern("(aa)\\s+(bb)\\s+(cc)"), "$1$2$3",
- CharReader.get( new StringReader( BLOCK ) ) );
+ CharFilter cs = new PatternReplaceCharFilter( pattern("(aa)\\s+(bb)\\s+(cc)"), "$1$2$3",
+ new StringReader( BLOCK ) );
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts,
new String[] { "this", "is", "test." },
@@ -152,8 +151,8 @@ public class TestPatternReplaceCharFilte
// aa bb cc
public void testReplaceByEmpty() throws IOException {
final String BLOCK = "aa bb cc";
- CharStream cs = new PatternReplaceCharFilter( pattern("(aa)\\s+(bb)\\s+(cc)"), "",
- CharReader.get( new StringReader( BLOCK ) ) );
+ CharFilter cs = new PatternReplaceCharFilter( pattern("(aa)\\s+(bb)\\s+(cc)"), "",
+ new StringReader( BLOCK ) );
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts, new String[] {});
}
@@ -163,8 +162,8 @@ public class TestPatternReplaceCharFilte
// aa#bb#cc
public void test1block1matchSameLength() throws IOException {
final String BLOCK = "aa bb cc";
- CharStream cs = new PatternReplaceCharFilter( pattern("(aa)\\s+(bb)\\s+(cc)"), "$1#$2#$3",
- CharReader.get( new StringReader( BLOCK ) ) );
+ CharFilter cs = new PatternReplaceCharFilter( pattern("(aa)\\s+(bb)\\s+(cc)"), "$1#$2#$3",
+ new StringReader( BLOCK ) );
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts,
new String[] { "aa#bb#cc" },
@@ -179,8 +178,8 @@ public class TestPatternReplaceCharFilte
// aa##bb###cc dd
public void test1block1matchLonger() throws IOException {
final String BLOCK = "aa bb cc dd";
- CharStream cs = new PatternReplaceCharFilter( pattern("(aa)\\s+(bb)\\s+(cc)"), "$1##$2###$3",
- CharReader.get( new StringReader( BLOCK ) ) );
+ CharFilter cs = new PatternReplaceCharFilter( pattern("(aa)\\s+(bb)\\s+(cc)"), "$1##$2###$3",
+ new StringReader( BLOCK ) );
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts,
new String[] { "aa##bb###cc", "dd" },
@@ -194,8 +193,8 @@ public class TestPatternReplaceCharFilte
// aa aa
public void test1block2matchLonger() throws IOException {
final String BLOCK = " a a";
- CharStream cs = new PatternReplaceCharFilter( pattern("a"), "aa",
- CharReader.get( new StringReader( BLOCK ) ) );
+ CharFilter cs = new PatternReplaceCharFilter( pattern("a"), "aa",
+ new StringReader( BLOCK ) );
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts,
new String[] { "aa", "aa" },
@@ -210,8 +209,8 @@ public class TestPatternReplaceCharFilte
// aa#bb dd
public void test1block1matchShorter() throws IOException {
final String BLOCK = "aa bb cc dd";
- CharStream cs = new PatternReplaceCharFilter( pattern("(aa)\\s+(bb)\\s+(cc)"), "$1#$2",
- CharReader.get( new StringReader( BLOCK ) ) );
+ CharFilter cs = new PatternReplaceCharFilter( pattern("(aa)\\s+(bb)\\s+(cc)"), "$1#$2",
+ new StringReader( BLOCK ) );
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts,
new String[] { "aa#bb", "dd" },
@@ -226,8 +225,8 @@ public class TestPatternReplaceCharFilte
// aa bb cc --- aa bb aa bb cc
public void test1blockMultiMatches() throws IOException {
final String BLOCK = " aa bb cc --- aa bb aa bb cc";
- CharStream cs = new PatternReplaceCharFilter( pattern("(aa)\\s+(bb)\\s+(cc)"), "$1 $2 $3",
- CharReader.get( new StringReader( BLOCK ) ) );
+ CharFilter cs = new PatternReplaceCharFilter( pattern("(aa)\\s+(bb)\\s+(cc)"), "$1 $2 $3",
+ new StringReader( BLOCK ) );
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts,
new String[] { "aa", "bb", "cc", "---", "aa", "bb", "aa", "bb", "cc" },
@@ -246,8 +245,8 @@ public class TestPatternReplaceCharFilte
public void test2blocksMultiMatches() throws IOException {
final String BLOCK = " aa bb cc --- aa bb aa. bb aa bb cc";
- CharStream cs = new PatternReplaceCharFilter( pattern("(aa)\\s+(bb)"), "$1##$2",
- CharReader.get( new StringReader( BLOCK ) ) );
+ CharFilter cs = new PatternReplaceCharFilter( pattern("(aa)\\s+(bb)"), "$1##$2",
+ new StringReader( BLOCK ) );
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts,
new String[] { "aa##bb", "cc", "---", "aa##bb", "aa.", "bb", "aa##bb", "cc" },
@@ -262,8 +261,8 @@ public class TestPatternReplaceCharFilte
// aa b - c . --- b aa . c c b
public void testChain() throws IOException {
final String BLOCK = " a bb - ccc . --- bb a . ccc ccc bb";
- CharStream cs = new PatternReplaceCharFilter( pattern("a"), "aa",
- CharReader.get( new StringReader( BLOCK ) ) );
+ CharFilter cs = new PatternReplaceCharFilter( pattern("a"), "aa",
+ new StringReader( BLOCK ) );
cs = new PatternReplaceCharFilter( pattern("bb"), "b", cs );
cs = new PatternReplaceCharFilter( pattern("ccc"), "c", cs );
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
@@ -315,7 +314,7 @@ public class TestPatternReplaceCharFilte
@Override
protected Reader initReader(String fieldName, Reader reader) {
- return new PatternReplaceCharFilter(p, replacement, CharReader.get(reader));
+ return new PatternReplaceCharFilter(p, replacement, reader);
}
};
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTokenizer.java?rev=1362446&r1=1362445&r2=1362446&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTokenizer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTokenizer.java Tue Jul 17 11:09:24 2012
@@ -26,8 +26,7 @@ import java.util.regex.Pattern;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.CharReader;
-import org.apache.lucene.analysis.CharStream;
+import org.apache.lucene.analysis.CharFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.Analyzer.TokenStreamComponents;
@@ -83,7 +82,7 @@ public class TestPatternTokenizer extend
NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
builder.add("ü", "ü");
NormalizeCharMap normMap = builder.build();
- CharStream charStream = new MappingCharFilter( normMap, CharReader.get( new StringReader( INPUT ) ) );
+ CharFilter charStream = new MappingCharFilter( normMap, new StringReader( INPUT ) );
// create PatternTokenizer
TokenStream stream = new PatternTokenizer(charStream, Pattern.compile("[,;/\\s]+"), -1);
@@ -93,7 +92,7 @@ public class TestPatternTokenizer extend
new int[] { 12, 25, 28, 33 },
INPUT.length());
- charStream = new MappingCharFilter( normMap, CharReader.get( new StringReader( INPUT ) ) );
+ charStream = new MappingCharFilter( normMap, new StringReader( INPUT ) );
stream = new PatternTokenizer(charStream, Pattern.compile("Günther"), 0);
assertTokenStreamContents(stream,
new String[] { "Günther", "Günther" },
Modified: lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseIterationMarkCharFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseIterationMarkCharFilter.java?rev=1362446&r1=1362445&r2=1362446&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseIterationMarkCharFilter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseIterationMarkCharFilter.java Tue Jul 17 11:09:24 2012
@@ -17,11 +17,11 @@ package org.apache.lucene.analysis.ja;
* limitations under the License.
*/
-import org.apache.lucene.analysis.CharStream;
-import org.apache.lucene.analysis.charfilter.CharFilter;
+import org.apache.lucene.analysis.CharFilter;
import org.apache.lucene.util.RollingCharBuffer;
import java.io.IOException;
+import java.io.Reader;
/**
* Normalizes Japanese horizontal iteration marks (odoriji) to their expanded form.
@@ -147,7 +147,7 @@ public class JapaneseIterationMarkCharFi
*
* @param input char stream
*/
- public JapaneseIterationMarkCharFilter(CharStream input) {
+ public JapaneseIterationMarkCharFilter(Reader input) {
this(input, NORMALIZE_KANJI_DEFAULT, NORMALIZE_KANA_DEFAULT);
}
@@ -159,7 +159,7 @@ public class JapaneseIterationMarkCharFi
* @param normalizeKanji indicates whether kanji iteration marks should be normalized
* @param normalizeKana indicates whether kana iteration marks should be normalized
*/
- public JapaneseIterationMarkCharFilter(CharStream input, boolean normalizeKanji, boolean normalizeKana) {
+ public JapaneseIterationMarkCharFilter(Reader input, boolean normalizeKanji, boolean normalizeKana) {
super(input);
this.normalizeKanji = normalizeKanji;
this.normalizeKana = normalizeKana;
@@ -453,4 +453,10 @@ public class JapaneseIterationMarkCharFi
private boolean inside(char c, char[] map, char offset) {
return c >= offset && c < offset + map.length;
}
+
+
+ @Override
+ protected int correct(int currentOff) {
+ return currentOff; // this filter doesn't change the length of strings
+ }
}
Modified: lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseIterationMarkCharFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseIterationMarkCharFilter.java?rev=1362446&r1=1362445&r2=1362446&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseIterationMarkCharFilter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseIterationMarkCharFilter.java Tue Jul 17 11:09:24 2012
@@ -19,11 +19,9 @@ package org.apache.lucene.analysis.ja;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.CharReader;
-import org.apache.lucene.analysis.CharStream;
+import org.apache.lucene.analysis.CharFilter;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.charfilter.CharFilter;
import java.io.IOException;
import java.io.Reader;
@@ -40,7 +38,7 @@ public class TestJapaneseIterationMarkCh
@Override
protected Reader initReader(String fieldName, Reader reader) {
- return new JapaneseIterationMarkCharFilter(CharReader.get(reader));
+ return new JapaneseIterationMarkCharFilter(reader);
}
};
@@ -53,7 +51,7 @@ public class TestJapaneseIterationMarkCh
@Override
protected Reader initReader(String fieldName, Reader reader) {
- return new JapaneseIterationMarkCharFilter(CharReader.get(reader));
+ return new JapaneseIterationMarkCharFilter(reader);
}
};
@@ -138,7 +136,7 @@ public class TestJapaneseIterationMarkCh
public void testKanjiOnly() throws IOException {
// Test kanji only repetition marks
CharFilter filter = new JapaneseIterationMarkCharFilter(
- CharReader.get(new StringReader("æã
ãããã®ããã¨ä¸ç·ã«ã寿å¸ãé£ã¹ããã§ããabcã¨ãããããã")),
+ new StringReader("æã
ãããã®ããã¨ä¸ç·ã«ã寿å¸ãé£ã¹ããã§ããabcã¨ãããããã"),
true, // kanji
false // no kana
);
@@ -148,7 +146,7 @@ public class TestJapaneseIterationMarkCh
public void testKanaOnly() throws IOException {
// Test kana only repetition marks
CharFilter filter = new JapaneseIterationMarkCharFilter(
- CharReader.get(new StringReader("æã
ãããã®ããã¨ä¸ç·ã«ã寿å¸ãé£ã¹ããã§ããabcã¨ãããããã")),
+ new StringReader("æã
ãããã®ããã¨ä¸ç·ã«ã寿å¸ãé£ã¹ããã§ããabcã¨ãããããã"),
false, // no kanji
true // kana
);
@@ -158,7 +156,7 @@ public class TestJapaneseIterationMarkCh
public void testNone() throws IOException {
// Test no repetition marks
CharFilter filter = new JapaneseIterationMarkCharFilter(
- CharReader.get(new StringReader("æã
ãããã®ããã¨ä¸ç·ã«ã寿å¸ãé£ã¹ããã§ããabcã¨ãããããã")),
+ new StringReader("æã
ãããã®ããã¨ä¸ç·ã«ã寿å¸ãé£ã¹ããã§ããabcã¨ãããããã"),
false, // no kanji
false // no kana
);
@@ -210,7 +208,7 @@ public class TestJapaneseIterationMarkCh
assertEquals(expected, actual);
}
- private String readFully(CharStream stream) throws IOException {
+ private String readFully(Reader stream) throws IOException {
StringBuffer buffer = new StringBuffer();
int ch;
while ((ch = stream.read()) != -1) {
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/analysis/Analyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/analysis/Analyzer.java?rev=1362446&r1=1362445&r2=1362446&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/analysis/Analyzer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/analysis/Analyzer.java Tue Jul 17 11:09:24 2012
@@ -69,7 +69,7 @@ public abstract class Analyzer {
* instance of {@link TokenStreamComponents}. It returns the sink of the
* components and stores the components internally. Subsequent calls to this
* method will reuse the previously stored components after resetting them
- * through {@link TokenStreamComponents#reset(Reader)}.
+ * through {@link TokenStreamComponents#setReader(Reader)}.
* </p>
*
* @param fieldName the name of the field the created TokenStream is used for
@@ -83,7 +83,7 @@ public abstract class Analyzer {
components = createComponents(fieldName, r);
reuseStrategy.setReusableComponents(fieldName, components);
} else {
- components.reset(r);
+ components.setReader(r);
}
return components.getTokenStream();
}
@@ -181,7 +181,7 @@ public abstract class Analyzer {
* @throws IOException
* if the component's reset method throws an {@link IOException}
*/
- protected void reset(final Reader reader) throws IOException {
+ protected void setReader(final Reader reader) throws IOException {
source.setReader(reader);
}
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/analysis/Tokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/analysis/Tokenizer.java?rev=1362446&r1=1362445&r2=1362446&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/analysis/Tokenizer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/analysis/Tokenizer.java Tue Jul 17 11:09:24 2012
@@ -65,15 +65,15 @@ public abstract class Tokenizer extends
}
}
- /** Return the corrected offset. If {@link #input} is a {@link CharStream} subclass
- * this method calls {@link CharStream#correctOffset}, else returns <code>currentOff</code>.
+ /** Return the corrected offset. If {@link #input} is a {@link CharFilter} subclass
+ * this method calls {@link CharFilter#correctOffset}, else returns <code>currentOff</code>.
* @param currentOff offset as seen in the output
* @return corrected offset based on the input
- * @see CharStream#correctOffset
+ * @see CharFilter#correctOffset
*/
protected final int correctOffset(int currentOff) {
assert input != null: "this tokenizer is closed";
- return (input instanceof CharStream) ? ((CharStream) input).correctOffset(currentOff) : currentOff;
+ return (input instanceof CharFilter) ? ((CharFilter) input).correctOffset(currentOff) : currentOff;
}
/** Expert: Reset the tokenizer to a new reader. Typically, an
Modified: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java?rev=1362446&r1=1362445&r2=1362446&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java Tue Jul 17 11:09:24 2012
@@ -123,7 +123,7 @@ public class TestMockAnalyzer extends Ba
for (int i = 0; i < num; i++) {
String s = _TestUtil.randomHtmlishString(random(), 20);
StringReader reader = new StringReader(s);
- MockCharFilter charfilter = new MockCharFilter(CharReader.get(reader), 2);
+ MockCharFilter charfilter = new MockCharFilter(reader, 2);
MockAnalyzer analyzer = new MockAnalyzer(random());
TokenStream ts = analyzer.tokenStream("bogus", charfilter);
ts.reset();
Modified: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/analysis/TestMockCharFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/analysis/TestMockCharFilter.java?rev=1362446&r1=1362445&r2=1362446&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/analysis/TestMockCharFilter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/analysis/TestMockCharFilter.java Tue Jul 17 11:09:24 2012
@@ -33,7 +33,7 @@ public class TestMockCharFilter extends
@Override
protected Reader initReader(String fieldName, Reader reader) {
- return new MockCharFilter(CharReader.get(reader), 7);
+ return new MockCharFilter(reader, 7);
}
};
Modified: lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/analysis/MockCharFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/analysis/MockCharFilter.java?rev=1362446&r1=1362445&r2=1362446&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/analysis/MockCharFilter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/analysis/MockCharFilter.java Tue Jul 17 11:09:24 2012
@@ -24,13 +24,12 @@ import java.util.TreeMap;
/** the purpose of this charfilter is to send offsets out of bounds
if the analyzer doesn't use correctOffset or does incorrect offset math. */
-public class MockCharFilter extends CharStream {
- final CharStream in;
+public class MockCharFilter extends CharFilter {
final int remainder;
// for testing only
public MockCharFilter(Reader in, int remainder) {
- this.in = CharReader.get(in);
+ super(in);
// TODO: instead of fixed remainder... maybe a fixed
// random seed?
this.remainder = remainder;
@@ -94,11 +93,11 @@ public class MockCharFilter extends Char
}
@Override
- public int correctOffset(int currentOff) {
+ public int correct(int currentOff) {
SortedMap<Integer,Integer> subMap = corrections.subMap(0, currentOff+1);
int ret = subMap.isEmpty() ? currentOff : currentOff + subMap.get(subMap.lastKey());
assert ret >= 0 : "currentOff=" + currentOff + ",diff=" + (ret-currentOff);
- return in.correctOffset(ret); // chain the call
+ return ret;
}
protected void addOffCorrectMap(int off, int cumulativeDiff) {
Modified: lucene/dev/branches/branch_4x/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/HTMLStripTransformer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/HTMLStripTransformer.java?rev=1362446&r1=1362445&r2=1362446&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/HTMLStripTransformer.java (original)
+++ lucene/dev/branches/branch_4x/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/HTMLStripTransformer.java Tue Jul 17 11:09:24 2012
@@ -17,7 +17,6 @@
package org.apache.solr.handler.dataimport;
import org.apache.lucene.analysis.charfilter.HTMLStripCharFilter;
-import org.apache.lucene.analysis.CharReader;
import java.io.IOException;
import java.io.StringReader;
@@ -73,7 +72,7 @@ public class HTMLStripTransformer extend
StringBuilder out = new StringBuilder();
StringReader strReader = new StringReader(value);
try {
- HTMLStripCharFilter html = new HTMLStripCharFilter(CharReader.get(strReader.markSupported() ? strReader : new BufferedReader(strReader)));
+ HTMLStripCharFilter html = new HTMLStripCharFilter(strReader.markSupported() ? strReader : new BufferedReader(strReader));
char[] cbuf = new char[1024 * 10];
while (true) {
int count = html.read(cbuf);