You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by rm...@apache.org on 2009/08/19 14:07:16 UTC
svn commit: r805769 - in /lucene/java/trunk/contrib: CHANGES.txt
analyzers/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilter.java
analyzers/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java
Author: rmuir
Date: Wed Aug 19 12:07:15 2009
New Revision: 805769
URL: http://svn.apache.org/viewvc?rev=805769&view=rev
Log:
LUCENE-1813: Add option to ReverseStringFilter to mark reversed tokens
Modified:
lucene/java/trunk/contrib/CHANGES.txt
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilter.java
lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java
Modified: lucene/java/trunk/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/CHANGES.txt?rev=805769&r1=805768&r2=805769&view=diff
==============================================================================
--- lucene/java/trunk/contrib/CHANGES.txt (original)
+++ lucene/java/trunk/contrib/CHANGES.txt Wed Aug 19 12:07:15 2009
@@ -147,6 +147,9 @@
16. LUCENE-1628: Added Persian analyzer. (Robert Muir)
+17. LUCENE-1813: Add option to ReverseStringFilter to mark reversed tokens.
+ (Andrzej Bialecki via Robert Muir)
+
Optimizations
1. LUCENE-1643: Re-use the collation key (RawCollationKey) for
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilter.java?rev=805769&r1=805768&r2=805769&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilter.java Wed Aug 19 12:07:15 2009
@@ -24,22 +24,82 @@
import java.io.IOException;
/**
- * Reverse token string e.g. "country" => "yrtnuoc".
- *
+ * Reverse token string, for example "country" => "yrtnuoc".
+ * <p>
+ * If <code>marker</code> is supplied, then tokens will be also prepended by
+ * that character. For example, with a marker of \u0001, "country" =>
+ * "\u0001yrtnuoc". This is useful when implementing efficient leading
+ * wildcards search.
+ * </p>
+ *
* @version $Id$
*/
public final class ReverseStringFilter extends TokenFilter {
private TermAttribute termAtt;
-
+ private final char marker;
+ private static final char NOMARKER = '\uFFFF';
+
+ /**
+ * Example marker character: U+0001 (START OF HEADING)
+ */
+ public static final char START_OF_HEADING_MARKER = '\u0001';
+
+ /**
+ * Example marker character: U+001F (INFORMATION SEPARATOR ONE)
+ */
+ public static final char INFORMATION_SEPARATOR_MARKER = '\u001F';
+
+ /**
+ * Example marker character: U+EC00 (PRIVATE USE AREA: EC00)
+ */
+ public static final char PUA_EC00_MARKER = '\uEC00';
+
+ /**
+ * Example marker character: U+200F (RIGHT-TO-LEFT MARK)
+ */
+ public static final char RTL_DIRECTION_MARKER = '\u200F';
+
+ /**
+ * Create a new ReverseStringFilter that reverses all tokens in the
+ * supplied {@link TokenStream}.
+ * <p>
+ * The reversed tokens will not be marked.
+ * </p>
+ *
+ * @param in {@link TokenStream} to filter
+ */
public ReverseStringFilter(TokenStream in) {
+ this(in, NOMARKER);
+ }
+
+ /**
+ * Create a new ReverseStringFilter that reverses and marks all tokens in the
+ * supplied {@link TokenStream}.
+ * <p>
+ * The reversed tokens will be prepended (marked) by the <code>marker</code>
+ * character.
+ * </p>
+ *
+ * @param in {@link TokenStream} to filter
+ * @param marker A character used to mark reversed tokens
+ */
+ public ReverseStringFilter(TokenStream in, char marker) {
super(in);
+ this.marker = marker;
termAtt = (TermAttribute) addAttribute(TermAttribute.class);
}
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
- reverse( termAtt.termBuffer(), termAtt.termLength() );
+ int len = termAtt.termLength();
+ if (marker != NOMARKER) {
+ len++;
+ termAtt.resizeTermBuffer(len);
+ termAtt.termBuffer()[len - 1] = marker;
+ }
+ reverse( termAtt.termBuffer(), len );
+ termAtt.setTermLength(len);
return true;
} else {
return false;
Modified: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java?rev=805769&r1=805768&r2=805769&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java Wed Aug 19 12:07:15 2009
@@ -42,6 +42,25 @@
assertEquals("yad", text.term());
assertFalse(filter.incrementToken());
}
+
+ public void testFilterWithMark() throws Exception {
+ TokenStream stream = new WhitespaceTokenizer(new StringReader(
+ "Do have a nice day")); // 1-4 length string
+ ReverseStringFilter filter = new ReverseStringFilter(stream, '\u0001');
+ TermAttribute text = (TermAttribute) filter
+ .getAttribute(TermAttribute.class);
+ assertTrue(filter.incrementToken());
+ assertEquals("\u0001oD", text.term());
+ assertTrue(filter.incrementToken());
+ assertEquals("\u0001evah", text.term());
+ assertTrue(filter.incrementToken());
+ assertEquals("\u0001a", text.term());
+ assertTrue(filter.incrementToken());
+ assertEquals("\u0001ecin", text.term());
+ assertTrue(filter.incrementToken());
+ assertEquals("\u0001yad", text.term());
+ assertFalse(filter.incrementToken());
+ }
public void testReverseString() throws Exception {
assertEquals( "A", ReverseStringFilter.reverse( "A" ) );