You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/08/29 16:56:28 UTC
svn commit: r1378593 - in /lucene/dev/trunk: ./ lucene/ lucene/core/
lucene/core/src/java/org/apache/lucene/analysis/CharFilter.java
lucene/core/src/java/org/apache/lucene/analysis/package.html
Author: rmuir
Date: Wed Aug 29 14:56:28 2012
New Revision: 1378593
URL: http://svn.apache.org/viewvc?rev=1378593&view=rev
Log:
Add an example that builds a CharFilter chain in Analyzer
Modified:
lucene/dev/trunk/ (props changed)
lucene/dev/trunk/lucene/ (props changed)
lucene/dev/trunk/lucene/core/ (props changed)
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/analysis/CharFilter.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/analysis/package.html
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/analysis/CharFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/analysis/CharFilter.java?rev=1378593&r1=1378592&r2=1378593&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/analysis/CharFilter.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/analysis/CharFilter.java Wed Aug 29 14:56:28 2012
@@ -33,6 +33,9 @@ import java.io.Reader;
* You can optionally provide more efficient implementations of additional methods
* like {@link #read()}, {@link #read(char[])}, {@link #read(java.nio.CharBuffer)},
* but this is not required.
+ * <p>
+ * For examples and integration with {@link Analyzer}, see the
+ * {@link org.apache.lucene.analysis Analysis package documentation}.
*/
// the way java.io.FilterReader should work!
public abstract class CharFilter extends Reader {
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/analysis/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/analysis/package.html?rev=1378593&r1=1378592&r2=1378593&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/analysis/package.html (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/analysis/package.html Wed Aug 29 14:56:28 2012
@@ -817,5 +817,30 @@ As a small hint, this is how the new Att
...
</pre>
+<h4>Adding a CharFilter chain</h4>
+Analyzers take Java {@link java.io.Reader}s as input. Of course you can wrap your Readers with {@link java.io.FilterReader}s
+to manipulate content, but this would have the big disadvantage that character offsets might be inconsistent with your original
+text.
+<p>
+{@link org.apache.lucene.analysis.CharFilter} is designed to allow you to pre-process input like a FilterReader would, but also
+preserve the original offsets associated with those characters. This way mechanisms like highlighting still work correctly.
+CharFilters can be chained.
+<p>
+Example:
+<pre class="prettyprint">
+public class MyAnalyzer extends Analyzer {
+
+ {@literal @Override}
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ return new TokenStreamComponents(new MyTokenizer(reader));
+ }
+
+ {@literal @Override}
+ protected Reader initReader(String fieldName, Reader reader) {
+ // wrap the Reader in a CharFilter chain.
+ return new SecondCharFilter(new FirstCharFilter(reader));
+ }
+}
+</pre>
</body>
</html>