You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ko...@apache.org on 2012/06/04 17:18:26 UTC

svn commit: r1346006 - in /lucene/dev/branches/branch_4x: ./ dev-tools/ lucene/ lucene/analysis/ lucene/analysis/common/ lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/ lucene/analysis/common/src/java/org/apache/lucene/analys...

Author: koji
Date: Mon Jun  4 15:18:23 2012
New Revision: 1346006

URL: http://svn.apache.org/viewvc?rev=1346006&view=rev
Log:
LUCENE-4107: FVH: make FieldFragList plugable, part of LUCENE-3440

Added:
    lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragListBuilder.java
      - copied unchanged from r1346001, lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragListBuilder.java
    lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/SimpleFieldFragList.java
      - copied unchanged from r1346001, lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/SimpleFieldFragList.java
Modified:
    lucene/dev/branches/branch_4x/   (props changed)
    lucene/dev/branches/branch_4x/dev-tools/   (props changed)
    lucene/dev/branches/branch_4x/lucene/   (props changed)
    lucene/dev/branches/branch_4x/lucene/BUILD.txt   (props changed)
    lucene/dev/branches/branch_4x/lucene/CHANGES.txt   (props changed)
    lucene/dev/branches/branch_4x/lucene/JRE_VERSION_MIGRATION.txt   (props changed)
    lucene/dev/branches/branch_4x/lucene/LICENSE.txt   (props changed)
    lucene/dev/branches/branch_4x/lucene/MIGRATE.txt   (props changed)
    lucene/dev/branches/branch_4x/lucene/NOTICE.txt   (props changed)
    lucene/dev/branches/branch_4x/lucene/README.txt   (props changed)
    lucene/dev/branches/branch_4x/lucene/analysis/   (props changed)
    lucene/dev/branches/branch_4x/lucene/analysis/common/   (props changed)
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/package.html   (props changed)
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std34/package.html   (props changed)
    lucene/dev/branches/branch_4x/lucene/backwards/   (props changed)
    lucene/dev/branches/branch_4x/lucene/benchmark/   (props changed)
    lucene/dev/branches/branch_4x/lucene/build.xml   (props changed)
    lucene/dev/branches/branch_4x/lucene/common-build.xml   (props changed)
    lucene/dev/branches/branch_4x/lucene/core/   (props changed)
    lucene/dev/branches/branch_4x/lucene/demo/   (props changed)
    lucene/dev/branches/branch_4x/lucene/facet/   (props changed)
    lucene/dev/branches/branch_4x/lucene/grouping/   (props changed)
    lucene/dev/branches/branch_4x/lucene/highlighter/   (props changed)
    lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldFragList.java
    lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java
    lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilder.java
    lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/SingleFragListBuilder.java
    lucene/dev/branches/branch_4x/lucene/ivy-settings.xml   (props changed)
    lucene/dev/branches/branch_4x/lucene/join/   (props changed)
    lucene/dev/branches/branch_4x/lucene/memory/   (props changed)
    lucene/dev/branches/branch_4x/lucene/misc/   (props changed)
    lucene/dev/branches/branch_4x/lucene/module-build.xml   (props changed)
    lucene/dev/branches/branch_4x/lucene/queries/   (props changed)
    lucene/dev/branches/branch_4x/lucene/queryparser/   (props changed)
    lucene/dev/branches/branch_4x/lucene/sandbox/   (props changed)
    lucene/dev/branches/branch_4x/lucene/site/   (props changed)
    lucene/dev/branches/branch_4x/lucene/spatial/   (props changed)
    lucene/dev/branches/branch_4x/lucene/suggest/   (props changed)
    lucene/dev/branches/branch_4x/lucene/test-framework/   (props changed)
    lucene/dev/branches/branch_4x/lucene/tools/   (props changed)
    lucene/dev/branches/branch_4x/solr/   (props changed)
    lucene/dev/branches/branch_4x/solr/CHANGES.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/LICENSE.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/NOTICE.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/README.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/build.xml   (props changed)
    lucene/dev/branches/branch_4x/solr/cloud-dev/   (props changed)
    lucene/dev/branches/branch_4x/solr/common-build.xml   (props changed)
    lucene/dev/branches/branch_4x/solr/contrib/   (props changed)
    lucene/dev/branches/branch_4x/solr/core/   (props changed)
    lucene/dev/branches/branch_4x/solr/dev-tools/   (props changed)
    lucene/dev/branches/branch_4x/solr/example/   (props changed)
    lucene/dev/branches/branch_4x/solr/lib/   (props changed)
    lucene/dev/branches/branch_4x/solr/lib/httpclient-LICENSE-ASL.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/lib/httpclient-NOTICE.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/lib/httpcore-LICENSE-ASL.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/lib/httpcore-NOTICE.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/lib/httpmime-LICENSE-ASL.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/lib/httpmime-NOTICE.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/scripts/   (props changed)
    lucene/dev/branches/branch_4x/solr/solrj/   (props changed)
    lucene/dev/branches/branch_4x/solr/test-framework/   (props changed)
    lucene/dev/branches/branch_4x/solr/testlogging.properties   (props changed)
    lucene/dev/branches/branch_4x/solr/webapp/   (props changed)

Modified: lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldFragList.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldFragList.java?rev=1346006&r1=1346005&r2=1346006&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldFragList.java (original)
+++ lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldFragList.java Mon Jun  4 15:18:23 2012
@@ -27,7 +27,7 @@ import org.apache.lucene.search.vectorhi
  * FieldFragList has a list of "frag info" that is used by FragmentsBuilder class
  * to create fragments (snippets).
  */
-public class FieldFragList {
+public abstract class FieldFragList {
 
   private List<WeightedFragInfo> fragInfos = new ArrayList<WeightedFragInfo>();
 
@@ -46,9 +46,7 @@ public class FieldFragList {
    * @param endOffset end offset of the fragment
    * @param phraseInfoList list of WeightedPhraseInfo objects
    */
-  public void add( int startOffset, int endOffset, List<WeightedPhraseInfo> phraseInfoList ){
-    fragInfos.add( new WeightedFragInfo( startOffset, endOffset, phraseInfoList ) );
-  }
+  public abstract void add( int startOffset, int endOffset, List<WeightedPhraseInfo> phraseInfoList );
   
   /**
    * return the list of WeightedFragInfos.
@@ -66,15 +64,11 @@ public class FieldFragList {
     private int startOffset;
     private int endOffset;
 
-    public WeightedFragInfo( int startOffset, int endOffset, List<WeightedPhraseInfo> phraseInfoList ){
+    public WeightedFragInfo( int startOffset, int endOffset, List<SubInfo> subInfos, float totalBoost ){
       this.startOffset = startOffset;
       this.endOffset = endOffset;
-      subInfos = new ArrayList<SubInfo>();
-      for( WeightedPhraseInfo phraseInfo : phraseInfoList ){
-        SubInfo subInfo = new SubInfo( phraseInfo.getText(), phraseInfo.getTermsOffsets(), phraseInfo.getSeqnum() );
-        subInfos.add( subInfo );
-        totalBoost += phraseInfo.getBoost();
-      }
+      this.totalBoost = totalBoost;
+      this.subInfos = subInfos;
     }
     
     public List<SubInfo> getSubInfos(){

Modified: lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java?rev=1346006&r1=1346005&r2=1346006&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java (original)
+++ lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java Mon Jun  4 15:18:23 2012
@@ -42,13 +42,22 @@ public class FieldPhraseList {
   }
   
   /**
+   * return the list of WeightedPhraseInfo.
+   * 
+   * @return phraseList.
+   */ 
+  public List<WeightedPhraseInfo> getPhraseList() {
+    return phraseList; 
+  }
+  
+  /**
    * a constructor.
    * 
    * @param fieldTermStack FieldTermStack object
    * @param fieldQuery FieldQuery object
    * @param phraseLimit maximum size of phraseList
    */
-  public FieldPhraseList( FieldTermStack fieldTermStack, FieldQuery fieldQuery, int phraseLimit){
+  public FieldPhraseList( FieldTermStack fieldTermStack, FieldQuery fieldQuery, int phraseLimit ){
     final String field = fieldTermStack.getFieldName();
 
     LinkedList<TermInfo> phraseCandidate = new LinkedList<TermInfo>();

Modified: lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilder.java?rev=1346006&r1=1346005&r2=1346006&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilder.java (original)
+++ lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilder.java Mon Jun  4 15:18:23 2012
@@ -17,83 +17,25 @@ package org.apache.lucene.search.vectorh
  * limitations under the License.
  */
 
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-
-import org.apache.lucene.search.vectorhighlight.FieldPhraseList.WeightedPhraseInfo;
 
 /**
  * A simple implementation of {@link FragListBuilder}.
  */
-public class SimpleFragListBuilder implements FragListBuilder {
+public class SimpleFragListBuilder extends BaseFragListBuilder {
   
-  public static final int MARGIN_DEFAULT = 6;
-  public static final int MIN_FRAG_CHAR_SIZE_FACTOR = 3;
-
-  final int margin;
-  final int minFragCharSize;
-
-  public SimpleFragListBuilder( int margin ){
-    if( margin < 0 )
-      throw new IllegalArgumentException( "margin(" + margin + ") is too small. It must be 0 or higher." );
-
-    this.margin = margin;
-    this.minFragCharSize = Math.max( 1, margin * MIN_FRAG_CHAR_SIZE_FACTOR );
+  public SimpleFragListBuilder() {
+    super();
   }
 
-  public SimpleFragListBuilder(){
-    this( MARGIN_DEFAULT );
+  public SimpleFragListBuilder(int margin) {
+    super(margin);
   }
 
-  public FieldFragList createFieldFragList(FieldPhraseList fieldPhraseList, int fragCharSize) {
-    if( fragCharSize < minFragCharSize )
-      throw new IllegalArgumentException( "fragCharSize(" + fragCharSize + ") is too small. It must be " +
-          minFragCharSize + " or higher." );
-
-    FieldFragList ffl = new FieldFragList( fragCharSize );
-
-    List<WeightedPhraseInfo> wpil = new ArrayList<WeightedPhraseInfo>();
-    Iterator<WeightedPhraseInfo> ite = fieldPhraseList.phraseList.iterator();
-    WeightedPhraseInfo phraseInfo = null;
-    int startOffset = 0;
-    boolean taken = false;
-    while( true ){
-      if( !taken ){
-        if( !ite.hasNext() ) break;
-        phraseInfo = ite.next();
-      }
-      taken = false;
-      if( phraseInfo == null ) break;
-
-      // if the phrase violates the border of previous fragment, discard it and try next phrase
-      if( phraseInfo.getStartOffset() < startOffset ) continue;
-
-      wpil.clear();
-      wpil.add( phraseInfo );
-      int st = phraseInfo.getStartOffset() - margin < startOffset ?
-          startOffset : phraseInfo.getStartOffset() - margin;
-      int en = st + fragCharSize;
-      if( phraseInfo.getEndOffset() > en )
-        en = phraseInfo.getEndOffset();
-      startOffset = en;
-
-      while( true ){
-        if( ite.hasNext() ){
-          phraseInfo = ite.next();
-          taken = true;
-          if( phraseInfo == null ) break;
-        }
-        else
-          break;
-        if( phraseInfo.getEndOffset() <= en )
-          wpil.add( phraseInfo );
-        else
-          break;
-      }
-      ffl.add( st, en, wpil );
-    }
-    return ffl;
+  /* (non-Javadoc)
+   * @see org.apache.lucene.search.vectorhighlight.FragListBuilder#createFieldFragList(FieldPhraseList fieldPhraseList, int fragCharSize)
+   */ 
+  public FieldFragList createFieldFragList( FieldPhraseList fieldPhraseList, int fragCharSize ){
+    return createFieldFragList( fieldPhraseList, new SimpleFieldFragList( fragCharSize ), fragCharSize );
   }
 
 }

Modified: lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/SingleFragListBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/SingleFragListBuilder.java?rev=1346006&r1=1346005&r2=1346006&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/SingleFragListBuilder.java (original)
+++ lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/SingleFragListBuilder.java Mon Jun  4 15:18:23 2012
@@ -38,7 +38,7 @@ public class SingleFragListBuilder imple
   public FieldFragList createFieldFragList(FieldPhraseList fieldPhraseList,
       int fragCharSize) {
 
-    FieldFragList ffl = new FieldFragList( fragCharSize );
+    FieldFragList ffl = new SimpleFieldFragList( fragCharSize );
 
     List<WeightedPhraseInfo> wpil = new ArrayList<WeightedPhraseInfo>();
     Iterator<WeightedPhraseInfo> ite = fieldPhraseList.phraseList.iterator();