You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-commits@lucene.apache.org by yo...@apache.org on 2008/11/04 23:19:47 UTC
svn commit: r711450 - in /lucene/solr/trunk: CHANGES.txt
src/java/org/apache/solr/analysis/WordDelimiterFilter.java
src/test/org/apache/solr/analysis/TestWordDelimiterFilter.java
Author: yonik
Date: Tue Nov 4 14:19:46 2008
New Revision: 711450
URL: http://svn.apache.org/viewvc?rev=711450&view=rev
Log:
SOLR-751: WordDelimiterFilter didn't adjust the start offset of single tokens
Modified:
lucene/solr/trunk/CHANGES.txt
lucene/solr/trunk/src/java/org/apache/solr/analysis/WordDelimiterFilter.java
lucene/solr/trunk/src/test/org/apache/solr/analysis/TestWordDelimiterFilter.java
Modified: lucene/solr/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/CHANGES.txt?rev=711450&r1=711449&r2=711450&view=diff
==============================================================================
--- lucene/solr/trunk/CHANGES.txt (original)
+++ lucene/solr/trunk/CHANGES.txt Tue Nov 4 14:19:46 2008
@@ -95,6 +95,10 @@
4. SOLR-805: DisMax queries are not being cached in QueryResultCache (Todd Feak via koji)
+ 5. SOLR-751: WordDelimiterFilter didn't adjust the start offset of single
+ tokens that started with delimiters, leading to incorrect highlighting.
+ (Stefan Oestreicher via yonik)
+
Other Changes
----------------------
Modified: lucene/solr/trunk/src/java/org/apache/solr/analysis/WordDelimiterFilter.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/analysis/WordDelimiterFilter.java?rev=711450&r1=711449&r2=711450&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/analysis/WordDelimiterFilter.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/analysis/WordDelimiterFilter.java Tue Nov 4 14:19:46 2008
@@ -373,6 +373,7 @@
// just adjust the text w/o changing the rest
// of the original token
t.setTermBuffer(termBuffer, start, len-start);
+ t.setStartOffset(t.startOffset() + start);
return t;
}
Modified: lucene/solr/trunk/src/test/org/apache/solr/analysis/TestWordDelimiterFilter.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/test/org/apache/solr/analysis/TestWordDelimiterFilter.java?rev=711450&r1=711449&r2=711450&view=diff
==============================================================================
--- lucene/solr/trunk/src/test/org/apache/solr/analysis/TestWordDelimiterFilter.java (original)
+++ lucene/solr/trunk/src/test/org/apache/solr/analysis/TestWordDelimiterFilter.java Tue Nov 4 14:19:46 2008
@@ -183,4 +183,99 @@
assertEquals(6, t.endOffset());
}
}
+
+ public void testOffsetChange() throws Exception
+ {
+ WordDelimiterFilter wdf = new WordDelimiterFilter(
+ new TokenStream() {
+ Token t;
+ public Token next() {
+ if (t != null) return null;
+ t = new Token("übelkeit)", 7, 16);
+ return t;
+ }
+ },
+ 1,1,0,0,1,1,0
+ );
+
+ Token t = wdf.next();
+
+ assertNotNull(t);
+ assertEquals("übelkeit", t.term());
+ assertEquals(7, t.startOffset());
+ assertEquals(15, t.endOffset());
+ }
+
+ public void testOffsetChange2() throws Exception
+ {
+ WordDelimiterFilter wdf = new WordDelimiterFilter(
+ new TokenStream() {
+ Token t;
+ public Token next() {
+ if (t != null) return null;
+ t = new Token("(übelkeit", 7, 17);
+ return t;
+ }
+ },
+ 1,1,0,0,1,1,0
+ );
+
+ Token t = wdf.next();
+
+ assertNotNull(t);
+ assertEquals("übelkeit", t.term());
+ assertEquals(8, t.startOffset());
+ assertEquals(17, t.endOffset());
+ }
+
+ public void testOffsetChange3() throws Exception
+ {
+ WordDelimiterFilter wdf = new WordDelimiterFilter(
+ new TokenStream() {
+ Token t;
+ public Token next() {
+ if (t != null) return null;
+ t = new Token("(übelkeit", 7, 16);
+ return t;
+ }
+ },
+ 1,1,0,0,1,1,0
+ );
+
+ Token t = wdf.next();
+
+ assertNotNull(t);
+ assertEquals("übelkeit", t.term());
+ assertEquals(8, t.startOffset());
+ assertEquals(16, t.endOffset());
+ }
+
+ public void testOffsetChange4() throws Exception
+ {
+ WordDelimiterFilter wdf = new WordDelimiterFilter(
+ new TokenStream() {
+ private Token t;
+ public Token next() {
+ if (t != null) return null;
+ t = new Token("(foo,bar)", 7, 16);
+ return t;
+ }
+ },
+ 1,1,0,0,1,1,0
+ );
+
+ Token t = wdf.next();
+
+ assertNotNull(t);
+ assertEquals("foo", t.term());
+ assertEquals(8, t.startOffset());
+ assertEquals(11, t.endOffset());
+
+ t = wdf.next();
+
+ assertNotNull(t);
+ assertEquals("bar", t.term());
+ assertEquals(12, t.startOffset());
+ assertEquals(15, t.endOffset());
+ }
}