You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/08/29 16:55:57 UTC

svn commit: r1378591 - in /lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/analysis: CharFilter.java package.html

Author: rmuir
Date: Wed Aug 29 14:55:56 2012
New Revision: 1378591

URL: http://svn.apache.org/viewvc?rev=1378591&view=rev
Log:
Add an example that builds a CharFilter chain in Analyzer

Modified:
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/analysis/CharFilter.java
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/analysis/package.html

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/analysis/CharFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/analysis/CharFilter.java?rev=1378591&r1=1378590&r2=1378591&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/analysis/CharFilter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/analysis/CharFilter.java Wed Aug 29 14:55:56 2012
@@ -33,6 +33,9 @@ import java.io.Reader;
  * You can optionally provide more efficient implementations of additional methods 
  * like {@link #read()}, {@link #read(char[])}, {@link #read(java.nio.CharBuffer)},
  * but this is not required.
+ * <p>
+ * For examples and integration with {@link Analyzer}, see the 
+ * {@link org.apache.lucene.analysis Analysis package documentation}.
  */
 // the way java.io.FilterReader should work!
 public abstract class CharFilter extends Reader {

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/analysis/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/analysis/package.html?rev=1378591&r1=1378590&r2=1378591&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/analysis/package.html (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/analysis/package.html Wed Aug 29 14:55:56 2012
@@ -817,5 +817,30 @@ As a small hint, this is how the new Att
 
   ...
 </pre>
+<h4>Adding a CharFilter chain</h4>
+Analyzers take Java {@link java.io.Reader}s as input. Of course you can wrap your Readers with {@link java.io.FilterReader}s
+to manipulate content, but this would have the big disadvantage that character offsets might be inconsistent with your original
+text.
+<p>
+{@link org.apache.lucene.analysis.CharFilter} is designed to allow you to pre-process input like a FilterReader would, but also
+preserve the original offsets associated with those characters. This way mechanisms like highlighting still work correctly.
+CharFilters can be chained.
+<p>
+Example:
+<pre class="prettyprint">
+public class MyAnalyzer extends Analyzer {
+
+  {@literal @Override}
+  protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+    return new TokenStreamComponents(new MyTokenizer(reader));
+  }
+  
+  {@literal @Override}
+  protected Reader initReader(String fieldName, Reader reader) {
+    // wrap the Reader in a CharFilter chain.
+    return new SecondCharFilter(new FirstCharFilter(reader));
+  }
+}
+</pre>
 </body>
 </html>