You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by ko...@apache.org on 2010/10/29 18:48:07 UTC

svn commit: r1028833 - in /lucene/java/branches/lucene_2_9/contrib: ./ fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/ fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/

Author: koji
Date: Fri Oct 29 16:48:06 2010
New Revision: 1028833

URL: http://svn.apache.org/viewvc?rev=1028833&view=rev
Log:
LUCENE-2278: FastVectorHighlighter: highlighted term is out of alignment in multi-valued NOT_ANALYZED field

Modified:
    lucene/java/branches/lucene_2_9/contrib/CHANGES.txt
    lucene/java/branches/lucene_2_9/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java
    lucene/java/branches/lucene_2_9/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java
    lucene/java/branches/lucene_2_9/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java

Modified: lucene/java/branches/lucene_2_9/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_9/contrib/CHANGES.txt?rev=1028833&r1=1028832&r2=1028833&view=diff
==============================================================================
--- lucene/java/branches/lucene_2_9/contrib/CHANGES.txt (original)
+++ lucene/java/branches/lucene_2_9/contrib/CHANGES.txt Fri Oct 29 16:48:06 2010
@@ -10,6 +10,9 @@ Bug Fixes
  * LUCENE-2284: MatchAllDocsQueryNode toString() created an invalid XML tag.
    (Frank Wesemann via Robert Muir)
 
+ * LUCENE-2278: FastVectorHighlighter: Highlighted term is out of alignment
+   in multi-valued NOT_ANALYZED field. (Koji Sekiguchi)
+
 Documentation
 
  * LUCENE-2055: Add documentation noting that the Dutch and French stemmers

Modified: lucene/java/branches/lucene_2_9/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_9/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java?rev=1028833&r1=1028832&r2=1028833&view=diff
==============================================================================
--- lucene/java/branches/lucene_2_9/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java (original)
+++ lucene/java/branches/lucene_2_9/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java Fri Oct 29 16:48:06 2010
@@ -22,6 +22,7 @@ import java.util.ArrayList;
 import java.util.List;
 
 import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
 import org.apache.lucene.document.MapFieldSelector;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo;
@@ -72,7 +73,7 @@ public abstract class BaseFragmentsBuild
     List<WeightedFragInfo> fragInfos = getWeightedFragInfoList( fieldFragList.fragInfos );
     
     List<String> fragments = new ArrayList<String>( maxNumFragments );
-    String[] values = getFieldValues( reader, docId, fieldName );
+    Field[] values = getFields( reader, docId, fieldName );
     if( values.length == 0 ) return null;
     StringBuilder buffer = new StringBuilder();
     int[] nextValueIndex = { 0 };
@@ -83,15 +84,31 @@ public abstract class BaseFragmentsBuild
     return fragments.toArray( new String[fragments.size()] );
   }
   
+  @Deprecated
   protected String[] getFieldValues( IndexReader reader, int docId, String fieldName) throws IOException {
     Document doc = reader.document( docId, new MapFieldSelector( new String[]{ fieldName } ) );
     return doc.getValues( fieldName ); // according to Document class javadoc, this never returns null
   }
+  
+  protected Field[] getFields( IndexReader reader, int docId, String fieldName) throws IOException {
+    // according to javadoc, doc.getFields(fieldName) cannot be used with lazy loaded field???
+    Document doc = reader.document( docId, new MapFieldSelector( new String[]{ fieldName } ) );
+    return doc.getFields( fieldName ); // according to Document class javadoc, this never returns null
+  }
 
+  @Deprecated
   protected String makeFragment( StringBuilder buffer, int[] index, String[] values, WeightedFragInfo fragInfo ){
-    StringBuilder fragment = new StringBuilder();
     final int s = fragInfo.startOffset;
-    String src = getFragmentSource( buffer, index, values, s, fragInfo.endOffset );
+    return makeFragment( fragInfo, getFragmentSource( buffer, index, values, s, fragInfo.endOffset ), s );
+  }
+
+  protected String makeFragment( StringBuilder buffer, int[] index, Field[] values, WeightedFragInfo fragInfo ){
+    final int s = fragInfo.startOffset;
+    return makeFragment( fragInfo, getFragmentSource( buffer, index, values, s, fragInfo.endOffset ), s );
+  }
+  
+  private String makeFragment( WeightedFragInfo fragInfo, String src, int s ){
+    StringBuilder fragment = new StringBuilder();
     int srcIndex = 0;
     for( SubInfo subInfo : fragInfo.subInfos ){
       for( Toffs to : subInfo.termsOffsets ){
@@ -104,6 +121,7 @@ public abstract class BaseFragmentsBuild
     return fragment.toString();
   }
   
+  @Deprecated
   protected String getFragmentSource( StringBuilder buffer, int[] index, String[] values,
       int startOffset, int endOffset ){
     while( buffer.length() < endOffset && index[0] < values.length ){
@@ -114,6 +132,17 @@ public abstract class BaseFragmentsBuild
     int eo = buffer.length() < endOffset ? buffer.length() : endOffset;
     return buffer.substring( startOffset, eo );
   }
+
+  protected String getFragmentSource( StringBuilder buffer, int[] index, Field[] values,
+      int startOffset, int endOffset ){
+    while( buffer.length() < endOffset && index[0] < values.length ){
+      if( index[0] > 0 && values[index[0]].isTokenized() && values[index[0]].stringValue().length() > 0 )
+        buffer.append( ' ' );
+      buffer.append( values[index[0]++].stringValue() );
+    }
+    int eo = buffer.length() < endOffset ? buffer.length() : endOffset;
+    return buffer.substring( startOffset, eo );
+  }
   
   protected String getPreTag( int num ){
     return preTags.length > num ? preTags[num] : preTags[0];

Modified: lucene/java/branches/lucene_2_9/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_9/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java?rev=1028833&r1=1028832&r2=1028833&view=diff
==============================================================================
--- lucene/java/branches/lucene_2_9/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java (original)
+++ lucene/java/branches/lucene_2_9/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java Fri Oct 29 16:48:06 2010
@@ -24,6 +24,7 @@ import java.util.Collection;
 import junit.framework.TestCase;
 
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.KeywordAnalyzer;
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
@@ -54,6 +55,7 @@ public abstract class AbstractTestCase e
   protected Directory dir;
   protected Analyzer analyzerW;
   protected Analyzer analyzerB;
+  protected Analyzer analyzerK;
   protected IndexReader reader;  
   protected QueryParser paW;
   protected QueryParser paB;
@@ -75,9 +77,16 @@ public abstract class AbstractTestCase e
     "\nWhen you talk about processing speed, the"
   };
 
+  protected static final String[] strMVValues = {                                                           
+    "abc",                                                                                                  
+    "defg",                                                                                                 
+    "hijkl"                                                                                                 
+  };                                                                                                        
+  
   protected void setUp() throws Exception {
     analyzerW = new WhitespaceAnalyzer();
     analyzerB = new BigramAnalyzer();
+    analyzerK = new KeywordAnalyzer(); 
     paW = new QueryParser( F, analyzerW );
     paB = new QueryParser( F, analyzerB );
     dir = new RAMDirectory();
@@ -280,6 +289,7 @@ public abstract class AbstractTestCase e
     make1dmfIndex( analyzerB, values );
   }
   
+  // make 1 doc with multi valued field
   protected void make1dmfIndex( Analyzer analyzer, String... values ) throws Exception {
     IndexWriter writer = new IndexWriter( dir, analyzer, true, MaxFieldLength.LIMITED );
     Document doc = new Document();
@@ -291,6 +301,18 @@ public abstract class AbstractTestCase e
     reader = IndexReader.open( dir );
   }
   
+  // make 1 doc with multi valued & not analyzed field
+  protected void make1dmfIndexNA( String... values ) throws Exception {
+    IndexWriter writer = new IndexWriter( dir, analyzerK, true, MaxFieldLength.LIMITED );
+    Document doc = new Document();
+    for( String value: values )
+      doc.add( new Field( F, value, Store.YES, Index.NOT_ANALYZED, TermVector.WITH_POSITIONS_OFFSETS ) );
+    writer.addDocument( doc );
+    writer.close();
+
+    reader = IndexReader.open( dir, true );
+  }
+  
   protected void makeIndexShortMV() throws Exception {
 
     //  012345
@@ -352,4 +374,18 @@ public abstract class AbstractTestCase e
 
     make1dmfIndexB( biMVValues );
   }
+  
+  protected void makeIndexStrMV() throws Exception {
+
+    //  0123
+    // "abc"
+    
+    //  34567
+    // "defg"
+
+    //     111
+    //  789012
+    // "hijkl"
+    make1dmfIndexNA( strMVValues );
+  }
 }

Modified: lucene/java/branches/lucene_2_9/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_9/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java?rev=1028833&r1=1028832&r2=1028833&view=diff
==============================================================================
--- lucene/java/branches/lucene_2_9/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java (original)
+++ lucene/java/branches/lucene_2_9/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java Fri Oct 29 16:48:06 2010
@@ -127,4 +127,16 @@ public class SimpleFragmentsBuilderTest 
 
     reader = IndexReader.open( dir );
   }
+  
+  public void test1StrMV() throws Exception {
+    makeIndexStrMV();
+
+    FieldQuery fq = new FieldQuery( tq( "defg" ), true, true );
+    FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
+    FieldPhraseList fpl = new FieldPhraseList( stack, fq );
+    SimpleFragListBuilder sflb = new SimpleFragListBuilder();
+    FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
+    SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
+    assertEquals( "abc<b>defg</b>hijkl", sfb.createFragment( reader, 0, F, ffl ) );
+  }
 }