You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-commits@lucene.apache.org by yo...@apache.org on 2007/03/02 00:15:22 UTC
svn commit: r513517 - in /lucene/solr/trunk: CHANGES.txt
src/java/org/apache/solr/analysis/SynonymFilter.java
src/test/org/apache/solr/analysis/TestSynonymFilter.java
Author: yonik
Date: Thu Mar 1 13:36:36 2007
New Revision: 513517
URL: http://svn.apache.org/viewvc?view=rev&rev=513517
Log:
fix synonymfilter offsets: SOLR-167
Modified:
lucene/solr/trunk/CHANGES.txt
lucene/solr/trunk/src/java/org/apache/solr/analysis/SynonymFilter.java
lucene/solr/trunk/src/test/org/apache/solr/analysis/TestSynonymFilter.java
Modified: lucene/solr/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/CHANGES.txt?view=diff&rev=513517&r1=513516&r2=513517
==============================================================================
--- lucene/solr/trunk/CHANGES.txt (original)
+++ lucene/solr/trunk/CHANGES.txt Thu Mar 1 13:36:36 2007
@@ -175,6 +175,9 @@
7. SOLR-168: Fix display positioning of multiple tokens at the same
position in analysis.jsp (yonik)
+
+ 8. SOLR-167: The SynonymFilter sometimes generated incorrect offsets when
+ multi token synonyms were mached in the source text. (yonik)
Other Changes
1. Updated to Lucene 2.1
Modified: lucene/solr/trunk/src/java/org/apache/solr/analysis/SynonymFilter.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/analysis/SynonymFilter.java?view=diff&rev=513517&r1=513516&r2=513517
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/analysis/SynonymFilter.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/analysis/SynonymFilter.java Thu Mar 1 13:36:36 2007
@@ -86,8 +86,7 @@
// OK, we matched a token, so find the longest match.
- // since matched is only used for matches >= 2, defer creation until now
- if (matched==null) matched=new LinkedList();
+ matched = new LinkedList();
SynonymMap result = match((SynonymMap)o);
Modified: lucene/solr/trunk/src/test/org/apache/solr/analysis/TestSynonymFilter.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/test/org/apache/solr/analysis/TestSynonymFilter.java?view=diff&rev=513517&r1=513516&r2=513517
==============================================================================
--- lucene/solr/trunk/src/test/org/apache/solr/analysis/TestSynonymFilter.java (original)
+++ lucene/solr/trunk/src/test/org/apache/solr/analysis/TestSynonymFilter.java Thu Mar 1 13:36:36 2007
@@ -43,6 +43,7 @@
* a b c => returns List<Token> [a,b,c]
* a/b => tokens a and b share the same spot (b.positionIncrement=0)
* a,3/b/c => a,b,c all share same position (a.positionIncrement=3, b.positionIncrement=0, c.positionIncrement=0)
+ * a,1,10,11 => "a" with positionIncrement=1, startOffset=10, endOffset=11
*/
public List tokens(String str) {
String[] arr = str.split(" ");
@@ -50,8 +51,32 @@
for (int i=0; i<arr.length; i++) {
String[] toks = arr[i].split("/");
String[] params = toks[0].split(",");
- Token t = new Token(params[0],0,0,"TEST");
- if (params.length > 1) t.setPositionIncrement(Integer.parseInt(params[1]));
+
+ int posInc;
+ int start;
+ int end;
+
+ if (params.length > 1) {
+ posInc = Integer.parseInt(params[1]);
+ } else {
+ posInc = 1;
+ }
+
+ if (params.length > 2) {
+ start = Integer.parseInt(params[2]);
+ } else {
+ start = 0;
+ }
+
+ if (params.length > 3) {
+ end = Integer.parseInt(params[3]);
+ } else {
+ end = start + params[0].length();
+ }
+
+ Token t = new Token(params[0],start,end,"TEST");
+ t.setPositionIncrement(posInc);
+
result.add(t);
for (int j=1; j<toks.length; j++) {
t = new Token(toks[j],0,0,"TEST");
@@ -91,27 +116,42 @@
public void assertTokEqual(List a, List b) {
- assertTokEq(a,b);
- assertTokEq(b,a);
+ assertTokEq(a,b,false);
+ assertTokEq(b,a,false);
}
- private void assertTokEq(List a, List b) {
+ public void assertTokEqualOff(List a, List b) {
+ assertTokEq(a,b,true);
+ assertTokEq(b,a,true);
+ }
+
+ private void assertTokEq(List a, List b, boolean checkOff) {
int pos=0;
for (Iterator iter = a.iterator(); iter.hasNext();) {
Token tok = (Token)iter.next();
pos += tok.getPositionIncrement();
- if (!tokAt(b, tok.termText(), pos)) {
+ if (!tokAt(b, tok.termText(), pos
+ , checkOff ? tok.startOffset() : -1
+ , checkOff ? tok.endOffset() : -1
+ ))
+ {
fail(a + "!=" + b);
}
}
}
- public boolean tokAt(List lst, String val, int tokPos) {
+ public boolean tokAt(List lst, String val, int tokPos, int startOff, int endOff) {
int pos=0;
for (Iterator iter = lst.iterator(); iter.hasNext();) {
Token tok = (Token)iter.next();
pos += tok.getPositionIncrement();
- if (pos==tokPos && tok.termText().equals(val)) return true;
+ if (pos==tokPos && tok.termText().equals(val)
+ && (startOff==-1 || tok.startOffset()==startOff)
+ && (endOff==-1 || tok.endOffset()==endOff)
+ )
+ {
+ return true;
+ }
}
return false;
}
@@ -281,5 +321,25 @@
assertTokEqual(getTokList(map,"c,0",false), tokens("cc,0/c c2,2"));
}
+
+ public void testOffsetBug() throws IOException {
+ // With the following rules:
+ // a a=>b
+ // x=>y
+ // analysing "a x" causes "y" to have a bad offset (end less than start)
+ // SOLR-167
+ SynonymMap map = new SynonymMap();
+
+ boolean orig = false;
+ boolean merge = true;
+
+ map.add(strings("a a"), tokens("b"), orig, merge);
+ map.add(strings("x"), tokens("y"), orig, merge);
+
+ System.out.println(getTokList(map,"a,1,0,1 a,1,2,3 x,1,4,5",false));
+
+ // "a a x" => "b y"
+ assertTokEqualOff(getTokList(map,"a,1,0,1 a,1,2,3 x,1,4,5",false), tokens("b,1,0,3 y,1,4,5"));
+ }
}