You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ko...@apache.org on 2010/08/17 04:39:40 UTC

svn commit: r986173 - in /lucene/dev/trunk/lucene/contrib: CHANGES.txt highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java

Author: koji
Date: Tue Aug 17 02:39:40 2010
New Revision: 986173

URL: http://svn.apache.org/viewvc?rev=986173&view=rev
Log:
LUCENE-2603: FastVectorHighlighter: add a method to set an arbitrary char that is used when concatenating multiValued data

Modified:
    lucene/dev/trunk/lucene/contrib/CHANGES.txt
    lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java
    lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java

Modified: lucene/dev/trunk/lucene/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/CHANGES.txt?rev=986173&r1=986172&r2=986173&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/contrib/CHANGES.txt Tue Aug 17 02:39:40 2010
@@ -112,6 +112,10 @@ API Changes
    such as stopword lists and any language-specific processing in addition to
    stemming. Add Turkish and Romanian stopwords lists to support this.
    (Robert Muir, Uwe Schindler, Simon Willnauer)
+
+ * LUCENE-2603: Add setMultiValuedSeparator(char) method to set an arbitrary
+   char that is used when concatenating multiValued data. Default is a space
+   (' '). It is applied on ANALYZED field only. (Koji Sekiguchi)
    
 New features
 

Modified: lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java?rev=986173&r1=986172&r2=986173&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java (original)
+++ lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java Tue Aug 17 02:39:40 2010
@@ -44,6 +44,7 @@ public abstract class BaseFragmentsBuild
     "<b style=\"background:turquoise\">", "<b style=\"background:powderblue\">"
   };
   public static final String[] COLORED_POST_TAGS = { "</b>" };
+  private char multiValuedSeparator = ' ';
   
   protected BaseFragmentsBuilder(){
     this( new String[]{ "<b>" }, new String[]{ "</b>" } );
@@ -155,7 +156,7 @@ public abstract class BaseFragmentsBuild
       int startOffset, int endOffset ){
     while( buffer.length() < endOffset && index[0] < values.length ){
       if( index[0] > 0 && values[index[0]].length() > 0 )
-        buffer.append( ' ' );
+        buffer.append( multiValuedSeparator );
       buffer.append( values[index[0]++] );
     }
     int eo = buffer.length() < endOffset ? buffer.length() : endOffset;
@@ -166,12 +167,20 @@ public abstract class BaseFragmentsBuild
       int startOffset, int endOffset ){
     while( buffer.length() < endOffset && index[0] < values.length ){
       if( index[0] > 0 && values[index[0]].isTokenized() && values[index[0]].stringValue().length() > 0 )
-        buffer.append( ' ' );
+        buffer.append( multiValuedSeparator );
       buffer.append( values[index[0]++].stringValue() );
     }
     int eo = buffer.length() < endOffset ? buffer.length() : endOffset;
     return buffer.substring( startOffset, eo );
   }
+  
+  public void setMultiValuedSeparator( char separator ){
+    multiValuedSeparator = separator;
+  }
+  
+  public char getMultiValuedSeparator(){
+    return multiValuedSeparator;
+  }
 
   protected String getPreTag( int num ){
     return getPreTag( preTags, num );

Modified: lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java?rev=986173&r1=986172&r2=986173&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java (original)
+++ lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java Tue Aug 17 02:39:40 2010
@@ -148,6 +148,21 @@ public class SimpleFragmentsBuilderTest 
     SimpleFragListBuilder sflb = new SimpleFragListBuilder();
     FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
     SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
+    // '/' separator doesn't effect the snippet because of NOT_ANALYZED field
+    sfb.setMultiValuedSeparator( '/' );
     assertEquals( "abc<b>defg</b>hijkl", sfb.createFragment( reader, 0, F, ffl ) );
   }
+  
+  public void testMVSeparator() throws Exception {
+    makeIndexShortMV();
+
+    FieldQuery fq = new FieldQuery( tq( "d" ), true, true );
+    FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
+    FieldPhraseList fpl = new FieldPhraseList( stack, fq );
+    SimpleFragListBuilder sflb = new SimpleFragListBuilder();
+    FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
+    SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
+    sfb.setMultiValuedSeparator( '/' );
+    assertEquals( "a b c/<b>d</b> e", sfb.createFragment( reader, 0, F, ffl ) );
+  }
 }