You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by ko...@apache.org on 2010/10/29 18:48:07 UTC
svn commit: r1028833 - in /lucene/java/branches/lucene_2_9/contrib: ./
fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/
fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/
Author: koji
Date: Fri Oct 29 16:48:06 2010
New Revision: 1028833
URL: http://svn.apache.org/viewvc?rev=1028833&view=rev
Log:
LUCENE-2278: FastVectorHighlighter: highlighted term is out of alignment in multi-valued NOT_ANALYZED field
Modified:
lucene/java/branches/lucene_2_9/contrib/CHANGES.txt
lucene/java/branches/lucene_2_9/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java
lucene/java/branches/lucene_2_9/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java
lucene/java/branches/lucene_2_9/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java
Modified: lucene/java/branches/lucene_2_9/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_9/contrib/CHANGES.txt?rev=1028833&r1=1028832&r2=1028833&view=diff
==============================================================================
--- lucene/java/branches/lucene_2_9/contrib/CHANGES.txt (original)
+++ lucene/java/branches/lucene_2_9/contrib/CHANGES.txt Fri Oct 29 16:48:06 2010
@@ -10,6 +10,9 @@ Bug Fixes
* LUCENE-2284: MatchAllDocsQueryNode toString() created an invalid XML tag.
(Frank Wesemann via Robert Muir)
+ * LUCENE-2278: FastVectorHighlighter: Highlighted term is out of alignment
+ in multi-valued NOT_ANALYZED field. (Koji Sekiguchi)
+
Documentation
* LUCENE-2055: Add documentation noting that the Dutch and French stemmers
Modified: lucene/java/branches/lucene_2_9/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_9/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java?rev=1028833&r1=1028832&r2=1028833&view=diff
==============================================================================
--- lucene/java/branches/lucene_2_9/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java (original)
+++ lucene/java/branches/lucene_2_9/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java Fri Oct 29 16:48:06 2010
@@ -22,6 +22,7 @@ import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
import org.apache.lucene.document.MapFieldSelector;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo;
@@ -72,7 +73,7 @@ public abstract class BaseFragmentsBuild
List<WeightedFragInfo> fragInfos = getWeightedFragInfoList( fieldFragList.fragInfos );
List<String> fragments = new ArrayList<String>( maxNumFragments );
- String[] values = getFieldValues( reader, docId, fieldName );
+ Field[] values = getFields( reader, docId, fieldName );
if( values.length == 0 ) return null;
StringBuilder buffer = new StringBuilder();
int[] nextValueIndex = { 0 };
@@ -83,15 +84,31 @@ public abstract class BaseFragmentsBuild
return fragments.toArray( new String[fragments.size()] );
}
+ @Deprecated
protected String[] getFieldValues( IndexReader reader, int docId, String fieldName) throws IOException {
Document doc = reader.document( docId, new MapFieldSelector( new String[]{ fieldName } ) );
return doc.getValues( fieldName ); // according to Document class javadoc, this never returns null
}
+
+ protected Field[] getFields( IndexReader reader, int docId, String fieldName) throws IOException {
+ // according to javadoc, doc.getFields(fieldName) cannot be used with lazy loaded field???
+ Document doc = reader.document( docId, new MapFieldSelector( new String[]{ fieldName } ) );
+ return doc.getFields( fieldName ); // according to Document class javadoc, this never returns null
+ }
+ @Deprecated
protected String makeFragment( StringBuilder buffer, int[] index, String[] values, WeightedFragInfo fragInfo ){
- StringBuilder fragment = new StringBuilder();
final int s = fragInfo.startOffset;
- String src = getFragmentSource( buffer, index, values, s, fragInfo.endOffset );
+ return makeFragment( fragInfo, getFragmentSource( buffer, index, values, s, fragInfo.endOffset ), s );
+ }
+
+ protected String makeFragment( StringBuilder buffer, int[] index, Field[] values, WeightedFragInfo fragInfo ){
+ final int s = fragInfo.startOffset;
+ return makeFragment( fragInfo, getFragmentSource( buffer, index, values, s, fragInfo.endOffset ), s );
+ }
+
+ private String makeFragment( WeightedFragInfo fragInfo, String src, int s ){
+ StringBuilder fragment = new StringBuilder();
int srcIndex = 0;
for( SubInfo subInfo : fragInfo.subInfos ){
for( Toffs to : subInfo.termsOffsets ){
@@ -104,6 +121,7 @@ public abstract class BaseFragmentsBuild
return fragment.toString();
}
+ @Deprecated
protected String getFragmentSource( StringBuilder buffer, int[] index, String[] values,
int startOffset, int endOffset ){
while( buffer.length() < endOffset && index[0] < values.length ){
@@ -114,6 +132,17 @@ public abstract class BaseFragmentsBuild
int eo = buffer.length() < endOffset ? buffer.length() : endOffset;
return buffer.substring( startOffset, eo );
}
+
+ protected String getFragmentSource( StringBuilder buffer, int[] index, Field[] values,
+ int startOffset, int endOffset ){
+ while( buffer.length() < endOffset && index[0] < values.length ){
+ if( index[0] > 0 && values[index[0]].isTokenized() && values[index[0]].stringValue().length() > 0 )
+ buffer.append( ' ' );
+ buffer.append( values[index[0]++].stringValue() );
+ }
+ int eo = buffer.length() < endOffset ? buffer.length() : endOffset;
+ return buffer.substring( startOffset, eo );
+ }
protected String getPreTag( int num ){
return preTags.length > num ? preTags[num] : preTags[0];
Modified: lucene/java/branches/lucene_2_9/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_9/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java?rev=1028833&r1=1028832&r2=1028833&view=diff
==============================================================================
--- lucene/java/branches/lucene_2_9/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java (original)
+++ lucene/java/branches/lucene_2_9/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java Fri Oct 29 16:48:06 2010
@@ -24,6 +24,7 @@ import java.util.Collection;
import junit.framework.TestCase;
import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.KeywordAnalyzer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
@@ -54,6 +55,7 @@ public abstract class AbstractTestCase e
protected Directory dir;
protected Analyzer analyzerW;
protected Analyzer analyzerB;
+ protected Analyzer analyzerK;
protected IndexReader reader;
protected QueryParser paW;
protected QueryParser paB;
@@ -75,9 +77,16 @@ public abstract class AbstractTestCase e
"\nWhen you talk about processing speed, the"
};
+ protected static final String[] strMVValues = {
+ "abc",
+ "defg",
+ "hijkl"
+ };
+
protected void setUp() throws Exception {
analyzerW = new WhitespaceAnalyzer();
analyzerB = new BigramAnalyzer();
+ analyzerK = new KeywordAnalyzer();
paW = new QueryParser( F, analyzerW );
paB = new QueryParser( F, analyzerB );
dir = new RAMDirectory();
@@ -280,6 +289,7 @@ public abstract class AbstractTestCase e
make1dmfIndex( analyzerB, values );
}
+ // make 1 doc with multi valued field
protected void make1dmfIndex( Analyzer analyzer, String... values ) throws Exception {
IndexWriter writer = new IndexWriter( dir, analyzer, true, MaxFieldLength.LIMITED );
Document doc = new Document();
@@ -291,6 +301,18 @@ public abstract class AbstractTestCase e
reader = IndexReader.open( dir );
}
+ // make 1 doc with multi valued & not analyzed field
+ protected void make1dmfIndexNA( String... values ) throws Exception {
+ IndexWriter writer = new IndexWriter( dir, analyzerK, true, MaxFieldLength.LIMITED );
+ Document doc = new Document();
+ for( String value: values )
+ doc.add( new Field( F, value, Store.YES, Index.NOT_ANALYZED, TermVector.WITH_POSITIONS_OFFSETS ) );
+ writer.addDocument( doc );
+ writer.close();
+
+ reader = IndexReader.open( dir, true );
+ }
+
protected void makeIndexShortMV() throws Exception {
// 012345
@@ -352,4 +374,18 @@ public abstract class AbstractTestCase e
make1dmfIndexB( biMVValues );
}
+
+ protected void makeIndexStrMV() throws Exception {
+
+ // 0123
+ // "abc"
+
+ // 34567
+ // "defg"
+
+ // 111
+ // 789012
+ // "hijkl"
+ make1dmfIndexNA( strMVValues );
+ }
}
Modified: lucene/java/branches/lucene_2_9/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_9/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java?rev=1028833&r1=1028832&r2=1028833&view=diff
==============================================================================
--- lucene/java/branches/lucene_2_9/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java (original)
+++ lucene/java/branches/lucene_2_9/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java Fri Oct 29 16:48:06 2010
@@ -127,4 +127,16 @@ public class SimpleFragmentsBuilderTest
reader = IndexReader.open( dir );
}
+
+ public void test1StrMV() throws Exception {
+ makeIndexStrMV();
+
+ FieldQuery fq = new FieldQuery( tq( "defg" ), true, true );
+ FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
+ FieldPhraseList fpl = new FieldPhraseList( stack, fq );
+ SimpleFragListBuilder sflb = new SimpleFragListBuilder();
+ FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
+ SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
+ assertEquals( "abc<b>defg</b>hijkl", sfb.createFragment( reader, 0, F, ffl ) );
+ }
}