You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by mi...@apache.org on 2009/07/09 15:06:52 UTC

svn commit: r792542 [1/3] - in /lucene/java/trunk: ./ contrib/ contrib/fast-vector-highlighter/ contrib/fast-vector-highlighter/src/ contrib/fast-vector-highlighter/src/java/ contrib/fast-vector-highlighter/src/java/org/ contrib/fast-vector-highlighter...

Author: mikemccand
Date: Thu Jul  9 13:06:51 2009
New Revision: 792542

URL: http://svn.apache.org/viewvc?rev=792542&view=rev
Log:
LUCENE-1522: adding new Fast Vector Highlighter contrib

Added:
    lucene/java/trunk/contrib/fast-vector-highlighter/
    lucene/java/trunk/contrib/fast-vector-highlighter/build.xml   (with props)
    lucene/java/trunk/contrib/fast-vector-highlighter/src/
    lucene/java/trunk/contrib/fast-vector-highlighter/src/java/
    lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/
    lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/
    lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/
    lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/
    lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/
    lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java   (with props)
    lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/FastVectorHighlighter.java   (with props)
    lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldFragList.java   (with props)
    lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java   (with props)
    lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java   (with props)
    lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java   (with props)
    lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/FragListBuilder.java   (with props)
    lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/FragmentsBuilder.java   (with props)
    lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/ScoreOrderFragmentsBuilder.java   (with props)
    lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilder.java   (with props)
    lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilder.java   (with props)
    lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/package.html   (with props)
    lucene/java/trunk/contrib/fast-vector-highlighter/src/test/
    lucene/java/trunk/contrib/fast-vector-highlighter/src/test/org/
    lucene/java/trunk/contrib/fast-vector-highlighter/src/test/org/apache/
    lucene/java/trunk/contrib/fast-vector-highlighter/src/test/org/apache/lucene/
    lucene/java/trunk/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/
    lucene/java/trunk/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/
    lucene/java/trunk/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java   (with props)
    lucene/java/trunk/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldPhraseListTest.java   (with props)
    lucene/java/trunk/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java   (with props)
    lucene/java/trunk/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldTermStackTest.java   (with props)
    lucene/java/trunk/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/IndexTimeSynonymTest.java   (with props)
    lucene/java/trunk/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/ScoreOrderFragmentsBuilderTest.java   (with props)
    lucene/java/trunk/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilderTest.java   (with props)
    lucene/java/trunk/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java   (with props)
Modified:
    lucene/java/trunk/build.xml
    lucene/java/trunk/contrib/CHANGES.txt
    lucene/java/trunk/docs/benchmarks.html
    lucene/java/trunk/docs/broken-links.xml
    lucene/java/trunk/docs/contributions.html
    lucene/java/trunk/docs/demo.html
    lucene/java/trunk/docs/demo2.html
    lucene/java/trunk/docs/demo3.html
    lucene/java/trunk/docs/demo4.html
    lucene/java/trunk/docs/fileformats.html
    lucene/java/trunk/docs/gettingstarted.html
    lucene/java/trunk/docs/index.html
    lucene/java/trunk/docs/linkmap.html
    lucene/java/trunk/docs/linkmap.pdf
    lucene/java/trunk/docs/lucene-sandbox/index.html
    lucene/java/trunk/docs/lucene-sandbox/index.pdf
    lucene/java/trunk/docs/queryparsersyntax.html
    lucene/java/trunk/docs/scoring.html
    lucene/java/trunk/src/site/src/documentation/content/xdocs/lucene-sandbox/index.xml
    lucene/java/trunk/src/site/src/documentation/content/xdocs/site.xml

Modified: lucene/java/trunk/build.xml
URL: http://svn.apache.org/viewvc/lucene/java/trunk/build.xml?rev=792542&r1=792541&r2=792542&view=diff
==============================================================================
--- lucene/java/trunk/build.xml (original)
+++ lucene/java/trunk/build.xml Thu Jul  9 13:06:51 2009
@@ -312,6 +312,7 @@
           <packageset dir="contrib/collation/src/java"/>
           <packageset dir="contrib/db/bdb-je/src/java"/>
           <packageset dir="contrib/db/bdb/src/java"/>
+          <packageset dir="contrib/fast-vector-highlighter/src/java"/>
           <packageset dir="contrib/highlighter/src/java"/>
           <packageset dir="contrib/instantiated/src/java"/>
           <packageset dir="contrib/lucli/src/java"/>
@@ -343,6 +344,7 @@
           <group title="contrib: Benchmark" packages="org.apache.lucene.benchmark*"/>
           <group title="contrib: Collation" packages="org.apache.lucene.collation*"/>
           <group title="contrib: DB" packages="org.apache.lucene.store.db*:org.apache.lucene.store.je*:com.sleepycat*"/>
+          <group title="contrib: Fast Vector Highlighter" packages="org.apache.lucene.search.vectorhighlight*"/>
           <group title="contrib: Highlighter" packages="org.apache.lucene.search.highlight*"/>
           <group title="contrib: Instantiated" packages="org.apache.lucene.store.instantiated*"/>
           <group title="contrib: Lucli" packages="lucli*"/>

Modified: lucene/java/trunk/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/CHANGES.txt?rev=792542&r1=792541&r2=792542&view=diff
==============================================================================
--- lucene/java/trunk/contrib/CHANGES.txt (original)
+++ lucene/java/trunk/contrib/CHANGES.txt Thu Jul  9 13:06:51 2009
@@ -65,6 +65,9 @@
  7. LUCENE-1704: Allow specifying the Tidy configuration file when
     parsing HTML docs with contrib/ant.  (Keith Sprochi via Mike
     McCandless)
+
+ 8. LUCENE-1522: Added contrib/fast-vector-highlighter, a new alternative
+    highlighter.  (Koji Sekiguchi via Mike McCandless)
  
 Optimizations
 

Added: lucene/java/trunk/contrib/fast-vector-highlighter/build.xml
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/fast-vector-highlighter/build.xml?rev=792542&view=auto
==============================================================================
--- lucene/java/trunk/contrib/fast-vector-highlighter/build.xml (added)
+++ lucene/java/trunk/contrib/fast-vector-highlighter/build.xml Thu Jul  9 13:06:51 2009
@@ -0,0 +1,47 @@
+<?xml version="1.0"?>
+
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one or more
+    contributor license agreements.  See the NOTICE file distributed with
+    this work for additional information regarding copyright ownership.
+    The ASF licenses this file to You under the Apache License, Version 2.0
+    the "License"); you may not use this file except in compliance with
+    the License.  You may obtain a copy of the License at
+ 
+        http://www.apache.org/licenses/LICENSE-2.0
+ 
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+ -->
+
+<project name="fast-vector-highlighter" default="default">
+
+  <description>
+    Hits highlighter using TermVectors
+  </description>
+
+  <property name="javac.source" value="1.5" />
+  <property name="javac.target" value="1.5" />
+
+  <import file="../contrib-build.xml"/>
+
+  <property name="analyzers.jar" location="${common.dir}/build/contrib/analyzers/lucene-analyzers-${version}.jar"/>
+  <available property="analyzers.jar.present" type="file" file="${analyzers.jar}"/>
+
+  <path id="classpath">
+    <pathelement path="${lucene.jar}"/>
+    <pathelement path="${analyzers.jar}"/>
+    <pathelement path="${project.classpath}"/>
+  </path>
+
+  <target name="compile-core" depends="build-analyzers, common.compile-core" />
+
+  <target name="build-analyzers" unless="analyzers.jar.present">
+    <echo>Fast Vector Highlighter building dependency ${analyzers.jar}</echo>
+    <ant antfile="../analyzers/build.xml" target="default" inheritall="false" dir="../analyzers" />
+  </target>
+
+</project>

Propchange: lucene/java/trunk/contrib/fast-vector-highlighter/build.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java?rev=792542&view=auto
==============================================================================
--- lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java (added)
+++ lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java Thu Jul  9 13:06:51 2009
@@ -0,0 +1,124 @@
+package org.apache.lucene.search.vectorhighlight;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.MapFieldSelector;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo;
+import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo.SubInfo;
+import org.apache.lucene.search.vectorhighlight.FieldPhraseList.WeightedPhraseInfo.Toffs;
+
+public abstract class BaseFragmentsBuilder implements FragmentsBuilder {
+
+  protected String[] preTags, postTags;
+  public static final String[] COLORED_PRE_TAGS = {
+    "<b style=\"background:yellow\">", "<b style=\"background:lawngreen\">", "<b style=\"background:aquamarine\">",
+    "<b style=\"background:magenta\">", "<b style=\"background:palegreen\">", "<b style=\"background:coral\">",
+    "<b style=\"background:wheat\">", "<b style=\"background:khaki\">", "<b style=\"background:lime\">",
+    "<b style=\"background:deepskyblue\">"
+  };
+  public static final String[] COLORED_POST_TAGS = { "</b>" };
+  
+  protected BaseFragmentsBuilder(){
+    this( new String[]{ "<b>" }, new String[]{ "</b>" } );
+  }
+  
+  protected BaseFragmentsBuilder( String[] preTags, String[] postTags ){
+    this.preTags = preTags;
+    this.postTags = postTags;
+  }
+  
+  static Object checkTagsArgument( Object tags ){
+    if( tags instanceof String ) return tags;
+    else if( tags instanceof String[] ) return tags;
+    throw new IllegalArgumentException( "type of preTags/postTags must be a String or String[]" );
+  }
+  
+  public abstract List<WeightedFragInfo> getWeightedFragInfoList( List<WeightedFragInfo> src );
+  
+  public String createFragment( IndexReader reader, int docId,
+      String fieldName, FieldFragList fieldFragList ) throws IOException {
+    String[] fragments = createFragments( reader, docId, fieldName, fieldFragList, 1 );
+    if( fragments == null || fragments.length == 0 ) return null;
+    return fragments[0];
+  }
+
+  public String[] createFragments( IndexReader reader, int docId,
+      String fieldName, FieldFragList fieldFragList, int maxNumFragments )
+      throws IOException {
+    if( maxNumFragments < 0 )
+      throw new IllegalArgumentException( "maxNumFragments(" + maxNumFragments + ") must be positive number." );
+
+    List<WeightedFragInfo> fragInfos = getWeightedFragInfoList( fieldFragList.fragInfos );
+    
+    List<String> fragments = new ArrayList<String>( maxNumFragments );
+    String[] values = getFieldValues( reader, docId, fieldName );
+    StringBuilder buffer = new StringBuilder();
+    int[] nextValueIndex = { 0 };
+    for( int n = 0; n < maxNumFragments && n < fragInfos.size(); n++ ){
+      WeightedFragInfo fragInfo = fragInfos.get( n );
+      fragments.add( makeFragment( buffer, nextValueIndex, values, fragInfo ) );
+    }
+    return fragments.toArray( new String[fragments.size()] );
+  }
+  
+  protected String[] getFieldValues( IndexReader reader, int docId, String fieldName) throws IOException {
+    Document doc = reader.document( docId, new MapFieldSelector( new String[]{ fieldName } ) );
+    return doc.getValues( fieldName );
+  }
+
+  protected String makeFragment( StringBuilder buffer, int[] index, String[] values, WeightedFragInfo fragInfo ){
+    StringBuilder fragment = new StringBuilder();
+    final int s = fragInfo.startOffset;
+    String src = getFragmentSource( buffer, index, values, s, fragInfo.endOffset );
+    int srcIndex = 0;
+    for( SubInfo subInfo : fragInfo.subInfos ){
+      for( Toffs to : subInfo.termsOffsets ){
+        fragment.append( src.substring( srcIndex, to.startOffset - s ) ).append( getPreTag( subInfo.seqnum ) )
+          .append( src.substring( to.startOffset - s, to.endOffset - s ) ).append( getPostTag( subInfo.seqnum ) );
+        srcIndex = to.endOffset - s;
+      }
+    }
+    fragment.append( src.substring( srcIndex ) );
+    return fragment.toString();
+  }
+  
+  protected String getFragmentSource( StringBuilder buffer, int[] index, String[] values,
+      int startOffset, int endOffset ){
+    while( buffer.length() < endOffset && index[0] < values.length ){
+      if( index[0] > 0 && values[index[0]].length() > 0 )
+        buffer.append( ' ' );
+      buffer.append( values[index[0]++] );
+    }
+    int eo = buffer.length() < endOffset ? buffer.length() : endOffset;
+    return buffer.substring( startOffset, eo );
+  }
+  
+  protected String getPreTag( int num ){
+    return preTags.length > num ? preTags[num] : preTags[0];
+  }
+  
+  protected String getPostTag( int num ){
+    return postTags.length > num ? postTags[num] : postTags[0];
+  }
+}

Propchange: lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/FastVectorHighlighter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/FastVectorHighlighter.java?rev=792542&view=auto
==============================================================================
--- lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/FastVectorHighlighter.java (added)
+++ lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/FastVectorHighlighter.java Thu Jul  9 13:06:51 2009
@@ -0,0 +1,137 @@
+package org.apache.lucene.search.vectorhighlight;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.search.Query;
+
+/**
+ * Another highlighter implementation.
+ *
+ */
+public class FastVectorHighlighter {
+
+  public static final boolean DEFAULT_PHRASE_HIGHLIGHT = true;
+  public static final boolean DEFAULT_FIELD_MATCH = true;
+  private final boolean phraseHighlight;
+  private final boolean fieldMatch;
+  private final FragListBuilder fragListBuilder;
+  private final FragmentsBuilder fragmentsBuilder;
+
+  /**
+   * the default constructor.
+   */
+  public FastVectorHighlighter(){
+    this( DEFAULT_PHRASE_HIGHLIGHT, DEFAULT_FIELD_MATCH );
+  }
+
+  /**
+   * a constructor. Using SimpleFragListBuilder and ScoreOrderFragmentsBuilder.
+   * 
+   * @param phraseHighlight true or false for phrase highlighting
+   * @param fieldMatch true of false for field matching
+   */
+  public FastVectorHighlighter( boolean phraseHighlight, boolean fieldMatch ){
+    this( phraseHighlight, fieldMatch, new SimpleFragListBuilder(), new ScoreOrderFragmentsBuilder() );
+  }
+
+  /**
+   * a constructor. A FragListBuilder and a FragmentsBuilder can be specified (plugins).
+   * 
+   * @param phraseHighlight true of false for phrase highlighting
+   * @param fieldMatch true of false for field matching
+   * @param fragListBuilder an instance of FragListBuilder
+   * @param fragmentsBuilder an instance of FragmentsBuilder
+   */
+  public FastVectorHighlighter( boolean phraseHighlight, boolean fieldMatch,
+      FragListBuilder fragListBuilder, FragmentsBuilder fragmentsBuilder ){
+    this.phraseHighlight = phraseHighlight;
+    this.fieldMatch = fieldMatch;
+    this.fragListBuilder = fragListBuilder;
+    this.fragmentsBuilder = fragmentsBuilder;
+  }
+
+  /**
+   * create a FieldQuery object.
+   * 
+   * @param query a query
+   * @return the created FieldQuery object
+   */
+  public FieldQuery getFieldQuery( Query query ){
+    return new FieldQuery( query, phraseHighlight, fieldMatch );
+  }
+
+  /**
+   * return the best fragment.
+   * 
+   * @param fieldQuery FieldQuery object
+   * @param reader IndexReader of the index
+   * @param docId document id to be highlighted
+   * @param fieldName field of the document to be highlighted
+   * @param fragCharSize the length (number of chars) of a fragment
+   * @return the best fragment (snippet) string
+   * @throws IOException
+   */
+  public final String getBestFragment( final FieldQuery fieldQuery, IndexReader reader, int docId,
+      String fieldName, int fragCharSize ) throws IOException {
+    FieldFragList fieldFragList = getFieldFragList( fieldQuery, reader, docId, fieldName, fragCharSize );
+    return fragmentsBuilder.createFragment( reader, docId, fieldName, fieldFragList );
+  }
+
+  /**
+   * return the best fragments.
+   * 
+   * @param fieldQuery FieldQuery object
+   * @param reader IndexReader of the index
+   * @param docId document id to be highlighted
+   * @param fieldName field of the document to be highlighted
+   * @param fragCharSize the length (number of chars) of a fragment
+   * @param maxNumFragments maximum number of fragments
+   * @return created fragments or null when no fragments created.
+   *         size of the array can be less than maxNumFragments
+   * @throws IOException
+   */
+  public final String[] getBestFragments( final FieldQuery fieldQuery, IndexReader reader, int docId,
+      String fieldName, int fragCharSize, int maxNumFragments ) throws IOException {
+    FieldFragList fieldFragList = getFieldFragList( fieldQuery, reader, docId, fieldName, fragCharSize );
+    return fragmentsBuilder.createFragments( reader, docId, fieldName, fieldFragList, maxNumFragments );
+  }
+  
+  private FieldFragList getFieldFragList( final FieldQuery fieldQuery, IndexReader reader, int docId,
+      String fieldName, int fragCharSize ) throws IOException {
+    FieldTermStack fieldTermStack = new FieldTermStack( reader, docId, fieldName, fieldQuery );
+    FieldPhraseList fieldPhraseList = new FieldPhraseList( fieldTermStack, fieldQuery );
+    return fragListBuilder.createFieldFragList( fieldPhraseList, fragCharSize );
+  }
+
+  /**
+   * return whether phraseHighlight or not.
+   * 
+   * @return
+   */
+  public boolean isPhraseHighlight(){ return phraseHighlight; }
+
+  /**
+   * return whether fieldMatch or not.
+   * 
+   * @return
+   */
+  public boolean isFieldMatch(){ return fieldMatch; }
+}

Propchange: lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/FastVectorHighlighter.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldFragList.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldFragList.java?rev=792542&view=auto
==============================================================================
--- lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldFragList.java (added)
+++ lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldFragList.java Thu Jul  9 13:06:51 2009
@@ -0,0 +1,103 @@
+package org.apache.lucene.search.vectorhighlight;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.lucene.search.vectorhighlight.FieldPhraseList.WeightedPhraseInfo;
+import org.apache.lucene.search.vectorhighlight.FieldPhraseList.WeightedPhraseInfo.Toffs;
+
+/**
+ * FieldFragList has a list of "frag info" that is used by FragmentsBuilder class
+ * to create fragments (snippets).
+ */
+public class FieldFragList {
+
+  private final int fragCharSize;
+  List<WeightedFragInfo> fragInfos = new ArrayList<WeightedFragInfo>();
+
+  /**
+   * a constructor.
+   * 
+   * @param fragCharSize the length (number of chars) of a fragment
+   */
+  public FieldFragList( int fragCharSize ){
+    this.fragCharSize = fragCharSize;
+  }
+
+  /**
+   * convert the list of WeightedPhraseInfo to WeightedFragInfo, then add it to the fragInfos
+   * 
+   * @param startOffset start offset of the fragment
+   * @param endOffset end offset of the fragment
+   * @param phraseInfoList list of WeightedPhraseInfo objects
+   */
+  public void add( int startOffset, int endOffset, List<WeightedPhraseInfo> phraseInfoList ){
+    fragInfos.add( new WeightedFragInfo( startOffset, endOffset, phraseInfoList ) );
+  }
+  
+  public static class WeightedFragInfo {
+
+    List<SubInfo> subInfos;
+    float totalBoost;
+    int startOffset;
+    int endOffset;
+
+    public WeightedFragInfo( int startOffset, int endOffset, List<WeightedPhraseInfo> phraseInfoList ){
+      this.startOffset = startOffset;
+      this.endOffset = endOffset;
+      subInfos = new ArrayList<SubInfo>();
+      for( WeightedPhraseInfo phraseInfo : phraseInfoList ){
+        SubInfo subInfo = new SubInfo( phraseInfo.text, phraseInfo.termsOffsets, phraseInfo.seqnum );
+        subInfos.add( subInfo );
+        totalBoost += phraseInfo.boost;
+      }
+    }
+    
+    public String toString(){
+      StringBuilder sb = new StringBuilder();
+      sb.append( "subInfos=(" );
+      for( SubInfo si : subInfos )
+        sb.append( si.toString() );
+      sb.append( ")/" ).append( totalBoost ).append( '(' ).append( startOffset ).append( ',' ).append( endOffset ).append( ')' );
+      return sb.toString();
+    }
+    
+    static class SubInfo {
+      final String text;  // unnecessary member, just exists for debugging purpose
+      final List<Toffs> termsOffsets;   // usually termsOffsets.size() == 1,
+                              // but if position-gap > 1 and slop > 0 then size() could be greater than 1
+      int seqnum;
+      SubInfo( String text, List<Toffs> termsOffsets, int seqnum ){
+        this.text = text;
+        this.termsOffsets = termsOffsets;
+        this.seqnum = seqnum;
+      }
+      
+      public String toString(){
+        StringBuilder sb = new StringBuilder();
+        sb.append( text ).append( '(' );
+        for( Toffs to : termsOffsets )
+          sb.append( to.toString() );
+        sb.append( ')' );
+        return sb.toString();
+      }
+    }
+  }
+}

Propchange: lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldFragList.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java?rev=792542&view=auto
==============================================================================
--- lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java (added)
+++ lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java Thu Jul  9 13:06:51 2009
@@ -0,0 +1,183 @@
+package org.apache.lucene.search.vectorhighlight;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.ArrayList;
+import java.util.LinkedList;
+import java.util.List;
+
+import org.apache.lucene.search.vectorhighlight.FieldQuery.QueryPhraseMap;
+import org.apache.lucene.search.vectorhighlight.FieldTermStack.TermInfo;
+
+/**
+ * FieldPhraseList has a list of WeightedPhraseInfo that is used by FragListBuilder
+ * to create a FieldFragList object.
+ */
+public class FieldPhraseList {
+
+  LinkedList<WeightedPhraseInfo> phraseList = new LinkedList<WeightedPhraseInfo>();
+
+  /**
+   * a constructor.
+   * 
+   * @param fieldTermStack FieldTermStack object
+   * @param fieldQuery FieldQuery object
+   */
+  public FieldPhraseList( FieldTermStack fieldTermStack, FieldQuery fieldQuery ){
+    final String field = fieldTermStack.getFieldName();
+
+    LinkedList<TermInfo> phraseCandidate = new LinkedList<TermInfo>();
+    QueryPhraseMap currMap = null;
+    QueryPhraseMap nextMap = null;
+    while( !fieldTermStack.isEmpty() ){
+      
+      phraseCandidate.clear();
+
+      TermInfo ti = fieldTermStack.pop();
+      currMap = fieldQuery.getFieldTermMap( field, ti.getText() );
+
+      // if not found, discard top TermInfo from stack, then try next element
+      if( currMap == null ) continue;
+      
+      // if found, search the longest phrase
+      phraseCandidate.add( ti );
+      while( true ){
+        ti = fieldTermStack.pop();
+        nextMap = null;
+        if( ti != null )
+          nextMap = currMap.getTermMap( ti.getText() );
+        if( ti == null || nextMap == null ){
+          if( ti != null )
+            fieldTermStack.push( ti );
+          if( currMap.isValidTermOrPhrase( phraseCandidate ) ){
+            addIfNoOverlap( new WeightedPhraseInfo( phraseCandidate, currMap.getBoost(), currMap.getTermOrPhraseNumber() ) );
+          }
+          else{
+            while( phraseCandidate.size() > 1 ){
+              fieldTermStack.push( phraseCandidate.removeLast() );
+              currMap = fieldQuery.searchPhrase( field, phraseCandidate );
+              if( currMap != null ){
+                addIfNoOverlap( new WeightedPhraseInfo( phraseCandidate, currMap.getBoost(), currMap.getTermOrPhraseNumber() ) );
+                break;
+              }
+            }
+          }
+          break;
+        }
+        else{
+          phraseCandidate.add( ti );
+          currMap = nextMap;
+        }
+      }
+    }
+  }
+  
+  void addIfNoOverlap( WeightedPhraseInfo wpi ){
+    for( WeightedPhraseInfo existWpi : phraseList ){
+      if( existWpi.isOffsetOverlap( wpi ) ) return;
+    }
+    phraseList.add( wpi );
+  }
+  
+  public static class WeightedPhraseInfo {
+
+    String text;  // unnecessary member, just exists for debugging purpose
+    List<Toffs> termsOffsets;   // usually termsOffsets.size() == 1,
+                            // but if position-gap > 1 and slop > 0 then size() could be greater than 1
+    float boost;  // query boost
+    int seqnum;
+    
+    public WeightedPhraseInfo( LinkedList<TermInfo> terms, float boost ){
+      this( terms, boost, 0 );
+    }
+    
+    public WeightedPhraseInfo( LinkedList<TermInfo> terms, float boost, int number ){
+      this.boost = boost;
+      this.seqnum = number;
+      termsOffsets = new ArrayList<Toffs>( terms.size() );
+      TermInfo ti = terms.get( 0 );
+      termsOffsets.add( new Toffs( ti.getStartOffset(), ti.getEndOffset() ) );
+      if( terms.size() == 1 ){
+        text = ti.getText();
+        return;
+      }
+      StringBuilder sb = new StringBuilder();
+      sb.append( ti.getText() );
+      int pos = ti.getPosition();
+      for( int i = 1; i < terms.size(); i++ ){
+        ti = terms.get( i );
+        sb.append( ti.getText() );
+        if( ti.getPosition() - pos == 1 ){
+          Toffs to = termsOffsets.get( termsOffsets.size() - 1 );
+          to.setEndOffset( ti.getEndOffset() );
+        }
+        else{
+          termsOffsets.add( new Toffs( ti.getStartOffset(), ti.getEndOffset() ) );
+        }
+        pos = ti.getPosition();
+      }
+      text = sb.toString();
+    }
+    
+    public int getStartOffset(){
+      return termsOffsets.get( 0 ).startOffset;
+    }
+    
+    public int getEndOffset(){
+      return termsOffsets.get( termsOffsets.size() - 1 ).endOffset;
+    }
+    
+    public boolean isOffsetOverlap( WeightedPhraseInfo other ){
+      int so = getStartOffset();
+      int eo = getEndOffset();
+      int oso = other.getStartOffset();
+      int oeo = other.getEndOffset();
+      if( so <= oso && oso <= eo ) return true;
+      if( so <= oeo && oeo <= eo ) return true;
+      if( oso <= so && so <= oeo ) return true;
+      if( oso <= eo && eo <= oeo ) return true;
+      return false;
+    }
+    
+    public String toString(){
+      StringBuilder sb = new StringBuilder();
+      sb.append( text ).append( '(' ).append( boost ).append( ")(" );
+      for( Toffs to : termsOffsets ){
+        sb.append( to );
+      }
+      sb.append( ')' );
+      return sb.toString();
+    }
+    
+    public static class Toffs {
+      int startOffset;
+      int endOffset;
+      public Toffs( int startOffset, int endOffset ){
+        this.startOffset = startOffset;
+        this.endOffset = endOffset;
+      }
+      void setEndOffset( int endOffset ){
+        this.endOffset = endOffset;
+      }
+      public String toString(){
+        StringBuilder sb = new StringBuilder();
+        sb.append( '(' ).append( startOffset ).append( ',' ).append( endOffset ).append( ')' );
+        return sb.toString();
+      }
+    }
+  }
+}

Propchange: lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java?rev=792542&view=auto
==============================================================================
--- lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java (added)
+++ lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java Thu Jul  9 13:06:51 2009
@@ -0,0 +1,391 @@
+package org.apache.lucene.search.vectorhighlight;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.PhraseQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.vectorhighlight.FieldTermStack.TermInfo;
+
+/**
+ * FieldQuery breaks down query object into terms/phrases and keep
+ * them in QueryPhraseMap structure.
+ */
+public class FieldQuery {
+
+  final boolean fieldMatch;
+
+  // fieldMatch==true,  Map<fieldName,QueryPhraseMap>
+  // fieldMatch==false, Map<null,QueryPhraseMap>
+  Map<String, QueryPhraseMap> rootMaps = new HashMap<String, QueryPhraseMap>();
+
+  // fieldMatch==true,  Map<fieldName,setOfTermsInQueries>
+  // fieldMatch==false, Map<null,setOfTermsInQueries>
+  Map<String, Set<String>> termSetMap = new HashMap<String, Set<String>>();
+
+  int termOrPhraseNumber; // used for colored tag support
+
+  FieldQuery( Query query, boolean phraseHighlight, boolean fieldMatch ){
+    this.fieldMatch = fieldMatch;
+    Set<Query> flatQueries = new HashSet<Query>();
+    flatten( query, flatQueries );
+    saveTerms( flatQueries );
+    Collection<Query> expandQueries = expand( flatQueries );
+
+    for( Query flatQuery : expandQueries ){
+      QueryPhraseMap rootMap = getRootMap( flatQuery );
+      rootMap.add( flatQuery );
+      if( !phraseHighlight && flatQuery instanceof PhraseQuery ){
+        PhraseQuery pq = (PhraseQuery)flatQuery;
+        if( pq.getTerms().length > 1 ){
+          for( Term term : pq.getTerms() )
+            rootMap.addTerm( term, flatQuery.getBoost() );
+        }
+      }
+    }
+  }
+  
+  void flatten( Query sourceQuery, Collection<Query> flatQueries ){
+    if( sourceQuery instanceof BooleanQuery ){
+      BooleanQuery bq = (BooleanQuery)sourceQuery;
+      for( BooleanClause clause : bq.getClauses() ){
+        if( !clause.isProhibited() )
+          flatten( clause.getQuery(), flatQueries );
+      }
+    }
+    else if( sourceQuery instanceof TermQuery ){
+      if( !flatQueries.contains( sourceQuery ) )
+        flatQueries.add( sourceQuery );
+    }
+    else if( sourceQuery instanceof PhraseQuery ){
+      if( !flatQueries.contains( sourceQuery ) ){
+        PhraseQuery pq = (PhraseQuery)sourceQuery;
+        if( pq.getTerms().length > 1 )
+          flatQueries.add( pq );
+        else if( pq.getTerms().length == 1 ){
+          flatQueries.add( new TermQuery( pq.getTerms()[0] ) );
+        }
+      }
+    }
+    // else discard queries
+  }
+  
+  /*
+   * Create expandQueries from flatQueries.
+   * 
+   * expandQueries := flatQueries + overlapped phrase queries
+   * 
+   * ex1) flatQueries={a,b,c}
+   *      => expandQueries={a,b,c}
+   * ex2) flatQueries={a,"b c","c d"}
+   *      => expandQueries={a,"b c","c d","b c d"}
+   */
+  Collection<Query> expand( Collection<Query> flatQueries ){
+    Set<Query> expandQueries = new HashSet<Query>();
+    for( Iterator<Query> i = flatQueries.iterator(); i.hasNext(); ){
+      Query query = i.next();
+      i.remove();
+      expandQueries.add( query );
+      if( !( query instanceof PhraseQuery ) ) continue;
+      for( Iterator<Query> j = flatQueries.iterator(); j.hasNext(); ){
+        Query qj = j.next();
+        if( !( qj instanceof PhraseQuery ) ) continue;
+        checkOverlap( expandQueries, (PhraseQuery)query, (PhraseQuery)qj );
+      }
+    }
+    return expandQueries;
+  }
+
+  /*
+   * Check if PhraseQuery A and B have overlapped part.
+   * 
+   * ex1) A="a b", B="b c" => overlap; expandQueries={"a b c"}
+   * ex2) A="b c", B="a b" => overlap; expandQueries={"a b c"}
+   * ex3) A="a b", B="c d" => no overlap; expandQueries={}
+   */
+  private void checkOverlap( Collection<Query> expandQueries, PhraseQuery a, PhraseQuery b ){
+    if( a.getSlop() != b.getSlop() ) return;
+    Term[] ats = a.getTerms();
+    Term[] bts = b.getTerms();
+    if( fieldMatch && !ats[0].field().equals( bts[0].field() ) ) return;
+    checkOverlap( expandQueries, ats, bts, a.getSlop(), a.getBoost() );
+    checkOverlap( expandQueries, bts, ats, b.getSlop(), b.getBoost() );
+  }
+
+  /*
+   * Check if src and dest have overlapped part and if it is, create PhraseQueries and add expandQueries.
+   * 
+   * ex1) src="a b", dest="c d"       => no overlap
+   * ex2) src="a b", dest="a b c"     => no overlap
+   * ex3) src="a b", dest="b c"       => overlap; expandQueries={"a b c"}
+   * ex4) src="a b c", dest="b c d"   => overlap; expandQueries={"a b c d"}
+   * ex5) src="a b c", dest="b c"     => no overlap
+   * ex6) src="a b c", dest="b"       => no overlap
+   * ex7) src="a a a a", dest="a a a" => overlap;
+   *                                     expandQueries={"a a a a a","a a a a a a"}
+   */
+  private void checkOverlap( Collection<Query> expandQueries, Term[] src, Term[] dest, int slop, float boost ){
+    // beginning from 1 (not 0) is safe because that the PhraseQuery has multiple terms
+    // is guaranteed in flatten() method (if PhraseQuery has only one term, flatten()
+    // converts PhraseQuery to TermQuery)
+    for( int i = 1; i < src.length; i++ ){
+      boolean overlap = true;
+      for( int j = i; j < src.length; j++ ){
+        if( !src[j].text().equals( dest[j-i].text() ) ){
+          overlap = false;
+          break;
+        }
+      }
+      if( overlap && src.length - i < dest.length ){
+        PhraseQuery pq = new PhraseQuery();
+        for( Term srcTerm : src )
+          pq.add( srcTerm );
+        for( int k = src.length - i; k < dest.length; k++ ){
+          pq.add( new Term( src[0].field(), dest[k].text() ) );
+        }
+        pq.setSlop( slop );
+        pq.setBoost( boost );
+        if(!expandQueries.contains( pq ) )
+          expandQueries.add( pq );
+      }
+    }
+  }
+  
+  QueryPhraseMap getRootMap( Query query ){
+    String key = getKey( query );
+    QueryPhraseMap map = rootMaps.get( key );
+    if( map == null ){
+      map = new QueryPhraseMap( this );
+      rootMaps.put( key, map );
+    }
+    return map;
+  }
+  
+  /*
+   * Return 'key' string. 'key' is the field name of the Query.
+   * If not fieldMatch, 'key' will be null.
+   */
+  private String getKey( Query query ){
+    if( !fieldMatch ) return null;
+    if( query instanceof TermQuery )
+      return ((TermQuery)query).getTerm().field();
+    else if ( query instanceof PhraseQuery ){
+      PhraseQuery pq = (PhraseQuery)query;
+      Term[] terms = pq.getTerms();
+      return terms[0].field();
+    }
+    else
+      throw new RuntimeException( "query \"" + query.toString() + "\" must be flatten first." );
+  }
+
+  /*
+   * Save the set of terms in the queries to termSetMap.
+   * 
+   * ex1) q=name:john
+   *      - fieldMatch==true
+   *          termSetMap=Map<"name",Set<"john">>
+   *      - fieldMatch==false
+   *          termSetMap=Map<null,Set<"john">>
+   *          
+   * ex2) q=name:john title:manager
+   *      - fieldMatch==true
+   *          termSetMap=Map<"name",Set<"john">,
+   *                         "title",Set<"manager">>
+   *      - fieldMatch==false
+   *          termSetMap=Map<null,Set<"john","manager">>
+   *          
+   * ex3) q=name:"john lennon"
+   *      - fieldMatch==true
+   *          termSetMap=Map<"name",Set<"john","lennon">>
+   *      - fieldMatch==false
+   *          termSetMap=Map<null,Set<"john","lennon">>
+   */
+  void saveTerms( Collection<Query> flatQueries ){
+    for( Query query : flatQueries ){
+      Set<String> termSet = getTermSet( query );
+      if( query instanceof TermQuery )
+        termSet.add( ((TermQuery)query).getTerm().text() );
+      else if( query instanceof PhraseQuery ){
+        for( Term term : ((PhraseQuery)query).getTerms() )
+          termSet.add( term.text() );
+      }
+      else
+        throw new RuntimeException( "query \"" + query.toString() + "\" must be flatten first." );
+    }
+  }
+  
+  private Set<String> getTermSet( Query query ){
+    String key = getKey( query );
+    Set<String> set = termSetMap.get( key );
+    if( set == null ){
+      set = new HashSet<String>();
+      termSetMap.put( key, set );
+    }
+    return set;
+  }
+  
+  Set<String> getTermSet( String field ){
+    return termSetMap.get( fieldMatch ? field : null );
+  }
+
+  /**
+   * 
+   * @param fieldName
+   * @param term
+   * @return
+   */
+  public QueryPhraseMap getFieldTermMap( String fieldName, String term ){
+    QueryPhraseMap rootMap = getRootMap( fieldName );
+    return rootMap == null ? null : rootMap.subMap.get( term );
+  }
+
+  /**
+   * 
+   * @param fieldName
+   * @param phraseCandidate
+   * @return
+   */
+  public QueryPhraseMap searchPhrase( String fieldName, final List<TermInfo> phraseCandidate ){
+    QueryPhraseMap root = getRootMap( fieldName );
+    if( root == null ) return null;
+    return root.searchPhrase( phraseCandidate );
+  }
+  
+  private QueryPhraseMap getRootMap( String fieldName ){
+    return rootMaps.get( fieldMatch ? fieldName : null );
+  }
+  
+  int nextTermOrPhraseNumber(){
+    return termOrPhraseNumber++;
+  }
+  
+  public static class QueryPhraseMap {
+
+    boolean terminal;
+    int slop;   // valid if terminal == true and phraseHighlight == true
+    float boost;  // valid if terminal == true
+    int termOrPhraseNumber;   // valid if terminal == true
+    FieldQuery fieldQuery;
+    Map<String, QueryPhraseMap> subMap = new HashMap<String, QueryPhraseMap>();
+    
+    public QueryPhraseMap( FieldQuery fieldQuery ){
+      this.fieldQuery = fieldQuery;
+    }
+
+    void addTerm( Term term, float boost ){
+      QueryPhraseMap map = getOrNewMap( subMap, term.text() );
+      map.markTerminal( boost );
+    }
+    
+    private QueryPhraseMap getOrNewMap( Map<String, QueryPhraseMap> subMap, String term ){
+      QueryPhraseMap map = subMap.get( term );
+      if( map == null ){
+        map = new QueryPhraseMap( fieldQuery );
+        subMap.put( term, map );
+      }
+      return map;
+    }
+
+    void add( Query query ){
+      if( query instanceof TermQuery ){
+        addTerm( ((TermQuery)query).getTerm(), query.getBoost() );
+      }
+      else if( query instanceof PhraseQuery ){
+        PhraseQuery pq = (PhraseQuery)query;
+        Term[] terms = pq.getTerms();
+        Map<String, QueryPhraseMap> map = subMap;
+        QueryPhraseMap qpm = null;
+        for( Term term : terms ){
+          qpm = getOrNewMap( map, term.text() );
+          map = qpm.subMap;
+        }
+        qpm.markTerminal( pq.getSlop(), pq.getBoost() );
+      }
+      else
+        throw new RuntimeException( "query \"" + query.toString() + "\" must be flatten first." );
+    }
+    
+    public QueryPhraseMap getTermMap( String term ){
+      return subMap.get( term );
+    }
+    
+    private void markTerminal( float boost ){
+      markTerminal( 0, boost );
+    }
+    
+    private void markTerminal( int slop, float boost ){
+      this.terminal = true;
+      this.slop = slop;
+      this.boost = boost;
+      this.termOrPhraseNumber = fieldQuery.nextTermOrPhraseNumber();
+    }
+    
+    public boolean isTerminal(){
+      return terminal;
+    }
+    
+    public int getSlop(){
+      return slop;
+    }
+    
+    public float getBoost(){
+      return boost;
+    }
+    
+    public int getTermOrPhraseNumber(){
+      return termOrPhraseNumber;
+    }
+    
+    public QueryPhraseMap searchPhrase( final List<TermInfo> phraseCandidate ){
+      QueryPhraseMap currMap = this;
+      for( TermInfo ti : phraseCandidate ){
+        currMap = currMap.subMap.get( ti.getText() );
+        if( currMap == null ) return null;
+      }
+      return currMap.isValidTermOrPhrase( phraseCandidate ) ? currMap : null;
+    }
+    
+    public boolean isValidTermOrPhrase( final List<TermInfo> phraseCandidate ){
+      // check terminal
+      if( !terminal ) return false;
+
+      // if the candidate is a term, it is valid
+      if( phraseCandidate.size() == 1 ) return true;
+
+      // else check whether the candidate is valid phrase
+      // compare position-gaps between terms to slop
+      int pos = phraseCandidate.get( 0 ).getPosition();
+      for( int i = 1; i < phraseCandidate.size(); i++ ){
+        int nextPos = phraseCandidate.get( i ).getPosition();
+        if( Math.abs( nextPos - pos - 1 ) > slop ) return false;
+        pos = nextPos;
+      }
+      return true;
+    }
+  }
+}

Propchange: lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java?rev=792542&view=auto
==============================================================================
--- lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java (added)
+++ lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java Thu Jul  9 13:06:51 2009
@@ -0,0 +1,171 @@
+package org.apache.lucene.search.vectorhighlight;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.LinkedList;
+import java.util.Set;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.WhitespaceAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Field.Index;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.Field.TermVector;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.TermFreqVector;
+import org.apache.lucene.index.TermPositionVector;
+import org.apache.lucene.index.TermVectorOffsetInfo;
+import org.apache.lucene.index.IndexWriter.MaxFieldLength;
+import org.apache.lucene.queryParser.QueryParser;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.RAMDirectory;
+
+/**
+ * <code>FieldTermStack</code> is a stack that keeps query terms in the specified field
+ * of the document to be highlighted.
+ */
+public class FieldTermStack {
+  
+  private final String fieldName;
+  LinkedList<TermInfo> termList = new LinkedList<TermInfo>();
+  
+  public static void main( String[] args ) throws Exception {
+    Analyzer analyzer = new WhitespaceAnalyzer();
+    QueryParser parser = new QueryParser( "f", analyzer );
+    Query query = parser.parse( "a x:b" );
+    FieldQuery fieldQuery = new FieldQuery( query, true, false );
+    
+    Directory dir = new RAMDirectory();
+    IndexWriter writer = new IndexWriter( dir, analyzer, MaxFieldLength.LIMITED );
+    Document doc = new Document();
+    doc.add( new Field( "f", "a a a b b c a b b c d e f", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS ) );
+    doc.add( new Field( "f", "b a b a f", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS ) );
+    writer.addDocument( doc );
+    writer.close();
+    
+    IndexReader reader = IndexReader.open( dir );
+    FieldTermStack ftl = new FieldTermStack( reader, 0, "f", fieldQuery );
+    reader.close();
+  }
+
+  /**
+   * a constructor.
+   * 
+   * @param reader IndexReader of the index
+   * @param docId document id to be highlighted
+   * @param fieldName field of the document to be highlighted
+   * @param fieldQuery FieldQuery object
+   * @throws IOException
+   */
+  public FieldTermStack( IndexReader reader, int docId, String fieldName, final FieldQuery fieldQuery ) throws IOException {
+    this.fieldName = fieldName;
+
+    TermFreqVector tfv = reader.getTermFreqVector( docId, fieldName );
+    if( tfv == null ) return; // just return to make null snippets
+    TermPositionVector tpv = null;
+    try{
+      tpv = (TermPositionVector)tfv;
+    }
+    catch( ClassCastException e ){
+      return; // just return to make null snippets
+    }
+    
+    Set<String> termSet = fieldQuery.getTermSet( fieldName );
+    // just return to make null snippet if un-matched fieldName specified when fieldMatch == true
+    if( termSet == null ) return;
+    
+    for( String term : tpv.getTerms() ){
+      if( !termSet.contains( term ) ) continue;
+      int index = tpv.indexOf( term );
+      TermVectorOffsetInfo[] tvois = tpv.getOffsets( index );
+      if( tvois == null ) return; // just return to make null snippets
+      int[] poss = tpv.getTermPositions( index );
+      if( poss == null ) return; // just return to make null snippets
+      for( int i = 0; i < tvois.length; i++ )
+        termList.add( new TermInfo( term, tvois[i].getStartOffset(), tvois[i].getEndOffset(), poss[i] ) );
+    }
+    
+    // sort by position
+    Collections.sort( termList );
+  }
+
+  /**
+   * @return field name
+   */
+  public String getFieldName(){
+    return fieldName;
+  }
+
+  /**
+   * @return the top TermInfo object of the stack
+   */
+  public TermInfo pop(){
+    return termList.poll();
+  }
+
+  /**
+   * @param termInfo the TermInfo object to be put on the top of the stack
+   */
+  public void push( TermInfo termInfo ){
+    // termList.push( termInfo );  // avoid Java 1.6 feature
+    termList.addFirst( termInfo );
+  }
+
+  /**
+   * to know whether the stack is empty
+   * 
+   * @return true if the stack is empty, false if not
+   */
+  public boolean isEmpty(){
+    return termList == null || termList.size() == 0;
+  }
+  
+  public static class TermInfo implements Comparable<TermInfo>{
+
+    final String text;
+    final int startOffset;
+    final int endOffset;
+    final int position;
+
+    TermInfo( String text, int startOffset, int endOffset, int position ){
+      this.text = text;
+      this.startOffset = startOffset;
+      this.endOffset = endOffset;
+      this.position = position;
+    }
+    
+    public String getText(){ return text; }
+    public int getStartOffset(){ return startOffset; }
+    public int getEndOffset(){ return endOffset; }
+    public int getPosition(){ return position; }
+    
+    public String toString(){
+      StringBuilder sb = new StringBuilder();
+      sb.append( text ).append( '(' ).append(startOffset).append( ',' ).append( endOffset ).append( ',' ).append( position ).append( ')' );
+      return sb.toString();
+    }
+
+    public int compareTo( TermInfo o ) {
+      return ( this.position - o.position );
+    }
+  }
+}

Propchange: lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/FragListBuilder.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/FragListBuilder.java?rev=792542&view=auto
==============================================================================
--- lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/FragListBuilder.java (added)
+++ lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/FragListBuilder.java Thu Jul  9 13:06:51 2009
@@ -0,0 +1,34 @@
+package org.apache.lucene.search.vectorhighlight;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * FragListBuilder is an interface for FieldFragList builder classes.
+ * A FragListBuilder class can be plugged in to Highlighter.
+ */
+public interface FragListBuilder {
+
+  /**
+   * create a FieldFragList.
+   * 
+   * @param fieldPhraseList FieldPhraseList object
+   * @param fragCharSize the length (number of chars) of a fragment
+   * @return the created FieldFragList object
+   */
+  public FieldFragList createFieldFragList( FieldPhraseList fieldPhraseList, int fragCharSize );
+}

Propchange: lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/FragListBuilder.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/FragmentsBuilder.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/FragmentsBuilder.java?rev=792542&view=auto
==============================================================================
--- lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/FragmentsBuilder.java (added)
+++ lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/FragmentsBuilder.java Thu Jul  9 13:06:51 2009
@@ -0,0 +1,57 @@
+package org.apache.lucene.search.vectorhighlight;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.IndexReader;
+
+/**
+ * FragmentsBuilder is an interface for fragments (snippets) builder classes.
+ * A FragmentsBuilder class can be plugged in to Highlighter.
+ */
+public interface FragmentsBuilder {
+
+  /**
+   * create a fragment.
+   * 
+   * @param reader IndexReader of the index
+   * @param docId document id to be highlighted
+   * @param fieldName field of the document to be highlighted
+   * @param fieldFragList FieldFragList object
+   * @return a created fragment or null when no fragment created
+   * @throws IOException
+   */
+  public String createFragment( IndexReader reader, int docId, String fieldName,
+      FieldFragList fieldFragList ) throws IOException;
+
+  /**
+   * create multiple fragments.
+   * 
+   * @param reader IndexReader of the index
+   * @param docId document id to be highlighter
+   * @param fieldName field of the document to be highlighted
+   * @param fieldFragList FieldFragList object
+   * @param maxNumFragments maximum number of fragments
+   * @return created fragments or null when no fragments created.
+   *         size of the array can be less than maxNumFragments
+   * @throws IOException
+   */
+  public String[] createFragments( IndexReader reader, int docId, String fieldName,
+      FieldFragList fieldFragList, int maxNumFragments ) throws IOException;
+}

Propchange: lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/FragmentsBuilder.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/ScoreOrderFragmentsBuilder.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/ScoreOrderFragmentsBuilder.java?rev=792542&view=auto
==============================================================================
--- lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/ScoreOrderFragmentsBuilder.java (added)
+++ lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/ScoreOrderFragmentsBuilder.java Thu Jul  9 13:06:51 2009
@@ -0,0 +1,69 @@
+package org.apache.lucene.search.vectorhighlight;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+
+import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo;
+
+/**
+ * An implementation of FragmentsBuilder that outputs score-order fragments.
+ */
+public class ScoreOrderFragmentsBuilder extends BaseFragmentsBuilder {
+
+  /**
+   * a constructor.
+   */
+  public ScoreOrderFragmentsBuilder(){
+    super();
+  }
+
+  /**
+   * a constructor.
+   * 
+   * @param preTags aray of pre-tags for markup terms.
+   * @param postTags array of post-tags for markup terms.
+   */
+  public ScoreOrderFragmentsBuilder( String[] preTags, String[] postTags ){
+    super( preTags, postTags );
+  }
+
+  /**
+   * Sort by score the list of WeightedFragInfo
+   */
+  public List<WeightedFragInfo> getWeightedFragInfoList( List<WeightedFragInfo> src ) {
+    Collections.sort( src, new ScoreComparator() );
+    return src;
+  }
+
+  public static class ScoreComparator implements Comparator<WeightedFragInfo> {
+
+    public int compare( WeightedFragInfo o1, WeightedFragInfo o2 ) {
+      if( o1.totalBoost > o2.totalBoost ) return -1;
+      else if( o1.totalBoost < o2.totalBoost ) return 1;
+      // if same score then check startOffset
+      else{
+        if( o1.startOffset < o2.startOffset ) return -1;
+        else if( o1.startOffset > o2.startOffset ) return 1;
+      }
+      return 0;
+    }
+  }
+}

Propchange: lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/ScoreOrderFragmentsBuilder.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilder.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilder.java?rev=792542&view=auto
==============================================================================
--- lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilder.java (added)
+++ lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilder.java Thu Jul  9 13:06:51 2009
@@ -0,0 +1,82 @@
+package org.apache.lucene.search.vectorhighlight;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.lucene.search.vectorhighlight.FieldPhraseList.WeightedPhraseInfo;
+
+/**
+ * A simple implementation of FragListBuilder.
+ */
+public class SimpleFragListBuilder implements FragListBuilder {
+  
+  public static final int MARGIN = 6;
+  public static final int MIN_FRAG_CHAR_SIZE = MARGIN * 3;
+
+  public FieldFragList createFieldFragList(FieldPhraseList fieldPhraseList, int fragCharSize) {
+    if( fragCharSize < MIN_FRAG_CHAR_SIZE )
+      throw new IllegalArgumentException( "fragCharSize(" + fragCharSize + ") is too small. It must be " +
+          MIN_FRAG_CHAR_SIZE + " or higher." );
+
+    FieldFragList ffl = new FieldFragList( fragCharSize );
+
+    List<WeightedPhraseInfo> wpil = new ArrayList<WeightedPhraseInfo>();
+    Iterator<WeightedPhraseInfo> ite = fieldPhraseList.phraseList.iterator();
+    WeightedPhraseInfo phraseInfo = null;
+    int startOffset = 0;
+    boolean taken = false;
+    while( true ){
+      if( !taken ){
+        if( !ite.hasNext() ) break;
+        phraseInfo = ite.next();
+      }
+      taken = false;
+      if( phraseInfo == null ) break;
+
+      // if the phrase violates the border of previous fragment, discard it and try next phrase
+      if( phraseInfo.getStartOffset() < startOffset ) continue;
+
+      wpil.clear();
+      wpil.add( phraseInfo );
+      int st = phraseInfo.getStartOffset() - MARGIN < startOffset ?
+          startOffset : phraseInfo.getStartOffset() - MARGIN;
+      int en = st + fragCharSize;
+      startOffset = en;
+
+      while( true ){
+        if( ite.hasNext() ){
+          phraseInfo = ite.next();
+          taken = true;
+          if( phraseInfo == null ) break;
+        }
+        else
+          break;
+        if( phraseInfo.getEndOffset() <= en )
+          wpil.add( phraseInfo );
+        else
+          break;
+      }
+      ffl.add( st, en, wpil );
+    }
+    return ffl;
+  }
+
+}

Propchange: lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilder.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilder.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilder.java?rev=792542&view=auto
==============================================================================
--- lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilder.java (added)
+++ lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilder.java Thu Jul  9 13:06:51 2009
@@ -0,0 +1,53 @@
+package org.apache.lucene.search.vectorhighlight;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.List;
+
+import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo;
+
+/**
+ * A simple implementation of FragmentsBuilder.
+ *
+ */
+public class SimpleFragmentsBuilder extends BaseFragmentsBuilder {
+
+  /**
+   * a constructor.
+   */
+  public SimpleFragmentsBuilder() {
+    super();
+  }
+
+  /**
+   * a constructor.
+   * 
+   * @param preTags array of pre-tags for markup terms.
+   * @param postTags array of post-tags for markup terms.
+   */
+  public SimpleFragmentsBuilder( String[] preTags, String[] postTags ) {
+    super( preTags, postTags );
+  }
+
+  /**
+   * do nothing. return the source list.
+   */
+  public List<WeightedFragInfo> getWeightedFragInfoList( List<WeightedFragInfo> src ) {
+    return src;
+  }
+}

Propchange: lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilder.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/package.html
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/package.html?rev=792542&view=auto
==============================================================================
--- lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/package.html (added)
+++ lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/package.html Thu Jul  9 13:06:51 2009
@@ -0,0 +1,126 @@
+<html>
+<body>
+This is an another highlighter implementation.
+
+<h2>Features</h2>
+<ul>
+<li>fast for large docs</li>
+<li>support N-gram fields</li>
+<li>support phrase-unit highlighting with slops</li>
+<li>need Java 1.5</li>
+<li>highlight fields need to be TermVector.WITH_POSITIONS_OFFSETS</li>
+<li>take into account query boost to score fragments</li>
+<li>support colored highlight tags</li>
+<li>pluggable FragListBuilder</li>
+<li>pluggable FragmentsBuilder</li>
+</ul>
+
+<h2>Algorithm</h2>
+<p>To explain the algorithm, let's use the following sample text
+ (to be highlighted) and user query:</p>
+
+<table border=1>
+<tr>
+<td><b>Sample Text</b></td>
+<td>Lucene is a search engine library.</td>
+</tr>
+<tr>
+<td><b>User Query</b></td>
+<td>Lucene^2 OR "search library"~1</td>
+</tr>
+</table>
+
+<p>The user query is a BooleanQuery that consists of TermQuery("Lucene") 
+with boost of 2 and PhraseQuery("search library") with slop of 1.</p>
+<p>For your convenience, here is the offsets and positions info of the 
+sample text.</p>
+
+<pre>
++--------+-----------------------------------+
+|        |          1111111111222222222233333|
+|  offset|01234567890123456789012345678901234|
++--------+-----------------------------------+
+|document|Lucene is a search engine library. |
++--------*-----------------------------------+
+|position|0      1  2 3      4      5        |
++--------*-----------------------------------+
+</pre>
+
+<h3>Step 1.</h3>
+<p>In Step 1, Fast Vector Highlighter generates {@link org.apache.lucene.search.vectorhighlight.FieldQuery.QueryPhraseMap} from the user query.
+<code>QueryPhraseMap</code> consists of the following members:</p>
+<pre>
+public class QueryPhraseMap {
+  boolean terminal;
+  int slop;   // valid if terminal == true and phraseHighlight == true
+  float boost;  // valid if terminal == true
+  Map&lt;String, QueryPhraseMap&gt; subMap;
+} 
+</pre>
+<p><code>QueryPhraseMap</code> has subMap. The key of the subMap is a term 
+text in the user query and the value is a subsequent <code>QueryPhraseMap</code>.
+If the query is a term (not phrase), then the subsequent <code>QueryPhraseMap</code>
+is marked as terminal. If the query is a phrase, then the subsequent <code>QueryPhraseMap</code>
+is not a terminal and it has the next term text in the phrase.</p>
+
+<p>From the sample user query, the following <code>QueryPhraseMap</code> 
+will be generated:</p>
+<pre>
+   QueryPhraseMap
++--------+-+  +-------+-+
+|"Lucene"|o+->|boost=2|*|  * : terminal
++--------+-+  +-------+-+
+
++--------+-+  +---------+-+  +-------+------+-+
+|"search"|o+->|"library"|o+->|boost=1|slop=1|*|
++--------+-+  +---------+-+  +-------+------+-+
+</pre>
+
+<h3>Step 2.</h3>
+<p>In Step 2, Fast Vector Highlighter generates {@link org.apache.lucene.search.vectorhighlight.FieldTermStack}. Fast Vector Highlighter uses {@link org.apache.lucene.index.TermFreqVector} data
+(must be stored {@link org.apache.lucene.document.Field.TermVector#WITH_POSITIONS_OFFSETS})
+to generate it. <code>FieldTermStack</code> keeps the terms in the user query.
+Therefore, in this sample case, Fast Vector Highlighter generates the following <code>FieldTermStack</code>:</p>
+<pre>
+   FieldTermStack
++------------------+
+|"Lucene"(0,6,0)   |
++------------------+
+|"search"(12,18,3) |
++------------------+
+|"library"(26,33,5)|
++------------------+
+where : "termText"(startOffset,endOffset,position)
+</pre>
+<h3>Step 3.</h3>
+<p>In Step 3, Fast Vector Highlighter generates {@link org.apache.lucene.search.vectorhighlight.FieldPhraseList}
+by reference to <code>QueryPhraseMap</code> and <code>FieldTermStack</code>.</p>
+<pre>
+   FieldPhraseList
++----------------+-----------------+---+
+|"Lucene"        |[(0,6)]          |w=2|
++----------------+-----------------+---+
+|"search library"|[(12,18),(26,33)]|w=1|
++----------------+-----------------+---+
+</pre>
+<p>The type of each entry is <code>WeightedPhraseInfo</code> that consists of
+an array of terms offsets and weight. The weight (Fast Vector Highlighter uses query boost to
+calculate the weight) will be taken into account when Fast Vector Highlighter creates
+{@link org.apache.lucene.search.vectorhighlight.FieldFragList} in the next step.</p>
+<h3>Step 4.</h3>
+<p>In Step 4, Fast Vector Highlighter creates <code>FieldFragList</code> by reference to
+<code>FieldPhraseList</code>. In this sample case, the following
+<code>FieldFragList</code> will be generated:</p>
+<pre>
+   FieldFragList
++---------------------------------+
+|"Lucene"[(0,6)]                  |
+|"search library"[(12,18),(26,33)]|
+|totalBoost=3                     |
++---------------------------------+
+</pre>
+<h3>Step 5.</h3>
+<p>In Step 5, by using <code>FieldFragList</code> and the field stored data,
+Fast Vector Highlighter creates highlighted snippets!</p>
+</body>
+</html>

Propchange: lucene/java/trunk/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/package.html
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java?rev=792542&view=auto
==============================================================================
--- lucene/java/trunk/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java (added)
+++ lucene/java/trunk/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java Thu Jul  9 13:06:51 2009
@@ -0,0 +1,345 @@
+package org.apache.lucene.search.vectorhighlight;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.Reader;
+import java.util.Collection;
+
+import junit.framework.TestCase;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WhitespaceAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Field.Index;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.Field.TermVector;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.IndexWriter.MaxFieldLength;
+import org.apache.lucene.queryParser.QueryParser;
+import org.apache.lucene.search.PhraseQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.RAMDirectory;
+
+public abstract class AbstractTestCase extends TestCase {
+
+  protected final String F = "f";
+  protected final String F1 = "f1";
+  protected final String F2 = "f2";
+  protected Directory dir;
+  protected Analyzer analyzerW;
+  protected Analyzer analyzerB;
+  protected IndexReader reader;  
+  protected QueryParser paW;
+  protected QueryParser paB;
+  
+  protected static final String[] shortMVValues = {
+    "a b c",
+    "",   // empty data in multi valued field
+    "d e"
+  };
+  
+  protected static final String[] longMVValues = {
+    "Followings are the examples of customizable parameters and actual examples of customization:",
+    "The most search engines use only one of these methods. Even the search engines that says they can use the both methods basically"
+  };
+  
+  // test data for LUCENE-1448 bug
+  protected static final String[] biMVValues = {
+    "\nLucene/Solr does not require such additional hardware.",
+    "\nWhen you talk about processing speed, the"
+  };
+
+  protected void setUp() throws Exception {
+    analyzerW = new WhitespaceAnalyzer();
+    analyzerB = new BigramAnalyzer();
+    paW = new QueryParser( F, analyzerW );
+    paB = new QueryParser( F, analyzerB );
+    dir = new RAMDirectory();
+  }
+  
+  protected void tearDown() throws Exception {
+    if( reader != null ){
+      reader.close();
+      reader = null;
+    }
+  }
+
+  protected Query tq( String text ){
+    return tq( 1F, text );
+  }
+
+  protected Query tq( float boost, String text ){
+    return tq( boost, F, text );
+  }
+  
+  protected Query tq( String field, String text ){
+    return tq( 1F, field, text );
+  }
+  
+  protected Query tq( float boost, String field, String text ){
+    Query query = new TermQuery( new Term( field, text ) );
+    query.setBoost( boost );
+    return query;
+  }
+  
+  protected Query pqF( String... texts ){
+    return pqF( 1F, texts );
+  }
+  
+  protected Query pqF( float boost, String... texts ){
+    return pqF( boost, 0, texts );
+  }
+  
+  protected Query pqF( float boost, int slop, String... texts ){
+    return pq( boost, slop, F, texts );
+  }
+  
+  protected Query pq( String field, String... texts ){
+    return pq( 1F, 0, field, texts );
+  }
+  
+  protected Query pq( float boost, String field, String... texts ){
+    return pq( boost, 0, field, texts );
+  }
+  
+  protected Query pq( float boost, int slop, String field, String... texts ){
+    PhraseQuery query = new PhraseQuery();
+    for( String text : texts ){
+      query.add( new Term( field, text ) );
+    }
+    query.setBoost( boost );
+    query.setSlop( slop );
+    return query;
+  }
+  
+  protected void assertCollectionQueries( Collection<Query> actual, Query... expected ){
+    assertEquals( expected.length, actual.size() );
+    for( Query query : expected ){
+      assertTrue( actual.contains( query ) );
+    }
+  }
+
+  static class BigramAnalyzer extends Analyzer {
+    public TokenStream tokenStream(String fieldName, Reader reader) {
+      return new BasicNGramTokenizer( reader );
+    }
+  }
+  
+  static class BasicNGramTokenizer extends Tokenizer {
+
+    public static final int DEFAULT_N_SIZE = 2;
+    public static final String DEFAULT_DELIMITERS = " \t\n.,";
+    private final int n;
+    private final String delimiters;
+    private int startTerm;
+    private int lenTerm;
+    private int startOffset;
+    private int nextStartOffset;
+    private int ch;
+    private String snippet;
+    private StringBuilder snippetBuffer;
+    private static final int BUFFER_SIZE = 4096;
+    private char[] charBuffer;
+    private int charBufferIndex;
+    private int charBufferLen;
+    
+    public BasicNGramTokenizer( Reader in ){
+      this( in, DEFAULT_N_SIZE );
+    }
+    
+    public BasicNGramTokenizer( Reader in, int n ){
+      this( in, n, DEFAULT_DELIMITERS );
+    }
+    
+    public BasicNGramTokenizer( Reader in, String delimiters ){
+      this( in, DEFAULT_N_SIZE, delimiters );
+    }
+    
+    public BasicNGramTokenizer( Reader in, int n, String delimiters ){
+      super(in);
+      this.n = n;
+      this.delimiters = delimiters;
+      startTerm = 0;
+      nextStartOffset = 0;
+      snippet = null;
+      snippetBuffer = new StringBuilder();
+      charBuffer = new char[BUFFER_SIZE];
+      charBufferIndex = BUFFER_SIZE;
+      charBufferLen = 0;
+      ch = 0;
+    }
+
+    public Token next( Token reusableToken ) throws IOException {
+      if( !getNextPartialSnippet() )
+        return null;
+      reusableToken.reinit( snippet, startTerm, lenTerm, startOffset, startOffset + lenTerm );
+      return reusableToken;
+    }
+
+    public int getFinalOffset() {
+      return nextStartOffset;
+    }
+    
+    protected boolean getNextPartialSnippet() throws IOException {
+      if( snippet != null && snippet.length() >= startTerm + 1 + n ){
+        startTerm++;
+        startOffset++;
+        lenTerm = n;
+        return true;
+      }
+      return getNextSnippet();
+    }
+    
+    protected boolean getNextSnippet() throws IOException {
+      startTerm = 0;
+      startOffset = nextStartOffset;
+      snippetBuffer.delete( 0, snippetBuffer.length() );
+      while( true ){
+        if( ch != -1 )
+          ch = readCharFromBuffer();
+        if( ch == -1 ) break;
+        else if( !isDelimiter( ch ) )
+          snippetBuffer.append( (char)ch );
+        else if( snippetBuffer.length() > 0 )
+          break;
+        else
+          startOffset++;
+      }
+      if( snippetBuffer.length() == 0 )
+        return false;
+      snippet = snippetBuffer.toString();
+      lenTerm = snippet.length() >= n ? n : snippet.length();
+      return true;
+    }
+    
+    protected int readCharFromBuffer() throws IOException {
+      if( charBufferIndex >= charBufferLen ){
+        charBufferLen = input.read( charBuffer );
+        if( charBufferLen == -1 ){
+          return -1;
+        }
+        charBufferIndex = 0;
+      }
+      int c = (int)charBuffer[charBufferIndex++];
+      nextStartOffset++;
+      return c;
+    }
+    
+    protected boolean isDelimiter( int c ){
+      return delimiters.indexOf( c ) >= 0;
+    }
+  }
+
+  protected void make1d1fIndex( String value ) throws Exception {
+    make1dmfIndex( value );
+  }
+  
+  protected void make1d1fIndexB( String value ) throws Exception {
+    make1dmfIndexB( value );
+  }
+  
+  protected void make1dmfIndex( String... values ) throws Exception {
+    make1dmfIndex( analyzerW, values );
+  }
+  
+  protected void make1dmfIndexB( String... values ) throws Exception {
+    make1dmfIndex( analyzerB, values );
+  }
+  
+  protected void make1dmfIndex( Analyzer analyzer, String... values ) throws Exception {
+    IndexWriter writer = new IndexWriter( dir, analyzer, true, MaxFieldLength.LIMITED );
+    Document doc = new Document();
+    for( String value: values )
+      doc.add( new Field( F, value, Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS ) );
+    writer.addDocument( doc );
+    writer.close();
+
+    reader = IndexReader.open( dir );
+  }
+  
+  protected void makeIndexShortMV() throws Exception {
+
+    //  012345
+    // "a b c"
+    //  0 1 2
+    
+    // ""
+
+    //  6789
+    // "d e"
+    //  3 4
+    make1dmfIndex( shortMVValues );
+  }
+  
+  protected void makeIndexLongMV() throws Exception {
+    //           11111111112222222222333333333344444444445555555555666666666677777777778888888888999
+    // 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012
+    // Followings are the examples of customizable parameters and actual examples of customization:
+    // 0          1   2   3        4  5            6          7   8      9        10 11
+    
+    //        1                                                                                                   2
+    // 999999900000000001111111111222222222233333333334444444444555555555566666666667777777777888888888899999999990000000000111111111122
+    // 345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901
+    // The most search engines use only one of these methods. Even the search engines that says they can use the both methods basically
+    // 12  13  (14)   (15)     16  17   18  19 20    21       22   23 (24)   (25)     26   27   28   29  30  31  32   33      34
+
+    make1dmfIndex( longMVValues );
+  }
+  
+  protected void makeIndexLongMVB() throws Exception {
+    // "*" ... LF
+    
+    //           1111111111222222222233333333334444444444555555
+    // 01234567890123456789012345678901234567890123456789012345
+    // *Lucene/Solr does not require such additional hardware.
+    //  Lu 0        do 10    re 15   su 21       na 31
+    //   uc 1        oe 11    eq 16   uc 22       al 32
+    //    ce 2        es 12    qu 17   ch 23         ha 33
+    //     en 3          no 13  ui 18     ad 24       ar 34
+    //      ne 4          ot 14  ir 19     dd 25       rd 35
+    //       e/ 5                 re 20     di 26       dw 36
+    //        /S 6                           it 27       wa 37
+    //         So 7                           ti 28       ar 38
+    //          ol 8                           io 29       re 39
+    //           lr 9                           on 30
+
+    // 5555666666666677777777778888888888999999999
+    // 6789012345678901234567890123456789012345678
+    // *When you talk about processing speed, the
+    //  Wh 40         ab 48     es 56         th 65
+    //   he 41         bo 49     ss 57         he 66
+    //    en 42         ou 50     si 58
+    //       yo 43       ut 51     in 59
+    //        ou 44         pr 52   ng 60
+    //           ta 45       ro 53     sp 61
+    //            al 46       oc 54     pe 62
+    //             lk 47       ce 55     ee 63
+    //                                    ed 64
+
+    make1dmfIndexB( biMVValues );
+  }
+}