You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/10/21 20:58:44 UTC

svn commit: r1534320 [20/39] - in /lucene/dev/branches/lucene4956: ./ dev-tools/ dev-tools/idea/.idea/ dev-tools/idea/lucene/expressions/ dev-tools/idea/solr/contrib/velocity/ dev-tools/maven/ dev-tools/maven/lucene/ dev-tools/maven/lucene/expressions/...

Modified: lucene/dev/branches/lucene4956/lucene/facet/src/test/org/apache/lucene/facet/range/TestRangeAccumulator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/facet/src/test/org/apache/lucene/facet/range/TestRangeAccumulator.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/facet/src/test/org/apache/lucene/facet/range/TestRangeAccumulator.java (original)
+++ lucene/dev/branches/lucene4956/lucene/facet/src/test/org/apache/lucene/facet/range/TestRangeAccumulator.java Mon Oct 21 18:58:24 2013
@@ -73,6 +73,8 @@ public class TestRangeAccumulator extend
       field.setLongValue(l);
       w.addDocument(doc);
     }
+    field.setLongValue(Long.MAX_VALUE);
+    w.addDocument(doc);
 
     IndexReader r = w.getReader();
     w.close();
@@ -82,7 +84,7 @@ public class TestRangeAccumulator extend
         new LongRange("less than or equal to 10", 0L, true, 10L, true),
         new LongRange("over 90", 90L, false, 100L, false),
         new LongRange("90 or above", 90L, true, 100L, false),
-        new LongRange("over 1000", 1000L, false, Long.MAX_VALUE, false)));
+        new LongRange("over 1000", 1000L, false, Long.MAX_VALUE, true)));
     
     FacetsCollector fc = FacetsCollector.create(a);
 
@@ -90,7 +92,7 @@ public class TestRangeAccumulator extend
     s.search(new MatchAllDocsQuery(), fc);
     List<FacetResult> result = fc.getFacetResults();
     assertEquals(1, result.size());
-    assertEquals("field (0)\n  less than 10 (10)\n  less than or equal to 10 (11)\n  over 90 (9)\n  90 or above (10)\n  over 1000 (0)\n", FacetTestUtils.toSimpleString(result.get(0)));
+    assertEquals("field (0)\n  less than 10 (10)\n  less than or equal to 10 (11)\n  over 90 (9)\n  90 or above (10)\n  over 1000 (1)\n", FacetTestUtils.toSimpleString(result.get(0)));
     
     r.close();
     d.close();
@@ -632,5 +634,44 @@ public class TestRangeAccumulator extend
     r.close();
     dir.close();
   }
-}
 
+  // LUCENE-5178
+  public void testMissingValues() throws Exception {
+    assumeTrue("codec does not support docsWithField", defaultCodecSupportsDocsWithField());
+    Directory d = newDirectory();
+    RandomIndexWriter w = new RandomIndexWriter(random(), d);
+    Document doc = new Document();
+    NumericDocValuesField field = new NumericDocValuesField("field", 0L);
+    doc.add(field);
+    for(long l=0;l<100;l++) {
+      if (l % 5 == 0) {
+        // Every 5th doc is missing the value:
+        w.addDocument(new Document());
+        continue;
+      }
+      field.setLongValue(l);
+      w.addDocument(doc);
+    }
+
+    IndexReader r = w.getReader();
+    w.close();
+
+    RangeAccumulator a = new RangeAccumulator(new RangeFacetRequest<LongRange>("field",
+        new LongRange("less than 10", 0L, true, 10L, false),
+        new LongRange("less than or equal to 10", 0L, true, 10L, true),
+        new LongRange("over 90", 90L, false, 100L, false),
+        new LongRange("90 or above", 90L, true, 100L, false),
+        new LongRange("over 1000", 1000L, false, Long.MAX_VALUE, false)));
+    
+    FacetsCollector fc = FacetsCollector.create(a);
+
+    IndexSearcher s = newSearcher(r);
+    s.search(new MatchAllDocsQuery(), fc);
+    List<FacetResult> result = fc.getFacetResults();
+    assertEquals(1, result.size());
+    assertEquals("field (0)\n  less than 10 (8)\n  less than or equal to 10 (8)\n  over 90 (8)\n  90 or above (8)\n  over 1000 (0)\n", FacetTestUtils.toSimpleString(result.get(0)));
+    
+    r.close();
+    d.close();
+  }
+}

Modified: lucene/dev/branches/lucene4956/lucene/facet/src/test/org/apache/lucene/facet/search/DrillDownQueryTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/facet/src/test/org/apache/lucene/facet/search/DrillDownQueryTest.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/facet/src/test/org/apache/lucene/facet/search/DrillDownQueryTest.java (original)
+++ lucene/dev/branches/lucene4956/lucene/facet/src/test/org/apache/lucene/facet/search/DrillDownQueryTest.java Mon Oct 21 18:58:24 2013
@@ -66,6 +66,10 @@ public class DrillDownQueryTest extends 
   @AfterClass
   public static void afterClassDrillDownQueryTest() throws Exception {
     IOUtils.close(reader, taxo, dir, taxoDir);
+    reader = null;
+    taxo = null;
+    dir = null;
+    taxoDir = null;
   }
 
   @BeforeClass

Modified: lucene/dev/branches/lucene4956/lucene/facet/src/test/org/apache/lucene/facet/search/TestDemoFacets.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/facet/src/test/org/apache/lucene/facet/search/TestDemoFacets.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/facet/src/test/org/apache/lucene/facet/search/TestDemoFacets.java (original)
+++ lucene/dev/branches/lucene4956/lucene/facet/src/test/org/apache/lucene/facet/search/TestDemoFacets.java Mon Oct 21 18:58:24 2013
@@ -22,13 +22,16 @@ import java.io.IOException;
 import java.io.PrintStream;
 import java.util.ArrayList;
 import java.util.Collections;
+import java.util.HashSet;
 import java.util.List;
+import java.util.Set;
 
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.facet.FacetTestCase;
 import org.apache.lucene.facet.FacetTestUtils;
+import org.apache.lucene.facet.codecs.facet46.Facet46Codec;
 import org.apache.lucene.facet.index.FacetFields;
 import org.apache.lucene.facet.params.CategoryListParams;
 import org.apache.lucene.facet.params.FacetIndexingParams;
@@ -48,6 +51,8 @@ import org.apache.lucene.search.similari
 import org.apache.lucene.search.similarities.Similarity;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util._TestUtil;
 
 public class TestDemoFacets extends FacetTestCase {
 
@@ -248,4 +253,60 @@ public class TestDemoFacets extends Face
     dir.close();
     taxoDir.close();
   }
+  
+  // LUCENE-4583: make sure if we require > 32 KB for one
+  // document, we don't hit exc when using Facet42DocValuesFormat
+  public void testManyFacetsInOneDocument() throws Exception {
+    Directory dir = newDirectory();
+    Directory taxoDir = newDirectory();
+    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+    iwc.setCodec(new Facet46Codec());
+    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
+    DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
+
+    FacetFields facetFields = new FacetFields(taxoWriter);
+
+    int numLabels = _TestUtil.nextInt(random(), 40000, 100000);
+
+    Document doc = new Document();
+    doc.add(newTextField("field", "text", Field.Store.NO));
+    List<CategoryPath> paths = new ArrayList<CategoryPath>();
+    for(int i=0;i<numLabels;i++) {
+      paths.add(new CategoryPath("dim", "" + i));
+    }
+    facetFields.addFields(doc, paths);
+    writer.addDocument(doc);
+
+    // NRT open
+    IndexSearcher searcher = newSearcher(writer.getReader());
+    writer.close();
+
+    // NRT open
+    TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
+    taxoWriter.close();
+    
+    FacetSearchParams fsp = new FacetSearchParams(new CountFacetRequest(new CategoryPath("dim"), Integer.MAX_VALUE));
+
+    // Aggregate the facet counts:
+    FacetsCollector c = FacetsCollector.create(fsp, searcher.getIndexReader(), taxoReader);
+
+    // MatchAllDocsQuery is for "browsing" (counts facets
+    // for all non-deleted docs in the index); normally
+    // you'd use a "normal" query, and use MultiCollector to
+    // wrap collecting the "normal" hits and also facets:
+    searcher.search(new MatchAllDocsQuery(), c);
+    List<FacetResult> results = c.getFacetResults();
+    assertEquals(1, results.size());
+    FacetResultNode root = results.get(0).getFacetResultNode();
+    assertEquals(numLabels, root.subResults.size());
+    Set<String> allLabels = new HashSet<String>();
+    for(FacetResultNode childNode : root.subResults) {
+      assertEquals(2, childNode.label.length);
+      allLabels.add(childNode.label.components[1]);
+      assertEquals(1, (int) childNode.value);
+    }
+    assertEquals(numLabels, allLabels.size());
+
+    IOUtils.close(searcher.getIndexReader(), taxoReader, dir, taxoDir);
+  }
 }

Modified: lucene/dev/branches/lucene4956/lucene/facet/src/test/org/apache/lucene/facet/search/TestFacetsCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/facet/src/test/org/apache/lucene/facet/search/TestFacetsCollector.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/facet/src/test/org/apache/lucene/facet/search/TestFacetsCollector.java (original)
+++ lucene/dev/branches/lucene4956/lucene/facet/src/test/org/apache/lucene/facet/search/TestFacetsCollector.java Mon Oct 21 18:58:24 2013
@@ -89,14 +89,8 @@ public class TestFacetsCollector extends
     DirectoryReader r = DirectoryReader.open(indexDir);
     DirectoryTaxonomyReader taxo = new DirectoryTaxonomyReader(taxoDir);
     
-    FacetSearchParams sParams = new FacetSearchParams(new SumScoreFacetRequest(new CategoryPath("a"), 10));
-    TaxonomyFacetsAccumulator fa = new TaxonomyFacetsAccumulator(sParams, r, taxo) {
-      @Override
-      public FacetsAggregator getAggregator() {
-        return new SumScoreFacetsAggregator();
-      }
-    };
-    FacetsCollector fc = FacetsCollector.create(fa);
+    FacetSearchParams fsp = new FacetSearchParams(new SumScoreFacetRequest(new CategoryPath("a"), 10));
+    FacetsCollector fc = FacetsCollector.create(fsp, r, taxo);
     TopScoreDocCollector topDocs = TopScoreDocCollector.create(10, false);
     ConstantScoreQuery csq = new ConstantScoreQuery(new MatchAllDocsQuery());
     csq.setBoost(2.0f);
@@ -335,12 +329,7 @@ public class TestFacetsCollector extends
     // assert IntFacetResultHandler
     fsp = new FacetSearchParams(new SumScoreFacetRequest(new CategoryPath("a"), 10));
     if (random().nextBoolean()) {
-      fa = new TaxonomyFacetsAccumulator(fsp, r, taxo) {
-        @Override
-        public FacetsAggregator getAggregator() {
-          return new SumScoreFacetsAggregator();
-        }
-      };
+      fa = new TaxonomyFacetsAccumulator(fsp, r, taxo);
     } else {
       fa = new OldFacetsAccumulator(fsp, r, taxo);
     }

Modified: lucene/dev/branches/lucene4956/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyReader.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyReader.java (original)
+++ lucene/dev/branches/lucene4956/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyReader.java Mon Oct 21 18:58:24 2013
@@ -473,6 +473,9 @@ public class TestDirectoryTaxonomyReader
     int numCategories = atLeast(10);
     int numA = 0, numB = 0;
     Random random = random();
+    // add the two categories for which we'll also add children (so asserts are simpler)
+    taxoWriter.addCategory(new CategoryPath("a"));
+    taxoWriter.addCategory(new CategoryPath("b"));
     for (int i = 0; i < numCategories; i++) {
       if (random.nextBoolean()) {
         taxoWriter.addCategory(new CategoryPath("a", Integer.toString(i)));

Modified: lucene/dev/branches/lucene4956/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyWriter.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyWriter.java (original)
+++ lucene/dev/branches/lucene4956/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyWriter.java Mon Oct 21 18:58:24 2013
@@ -469,4 +469,27 @@ public class TestDirectoryTaxonomyWriter
     
     IOUtils.close(indexDir, taxoDir);
   }
+  
+  @Test
+  public void testReplaceTaxoWithLargeTaxonomy() throws Exception {
+    Directory srcTaxoDir = newDirectory(), targetTaxoDir = newDirectory();
+    
+    // build source, large, taxonomy
+    DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(srcTaxoDir);
+    int ord = taxoWriter.addCategory(new CategoryPath("A/1/1/1/1/1/1", '/'));
+    taxoWriter.close();
+    
+    taxoWriter = new DirectoryTaxonomyWriter(targetTaxoDir);
+    int ordinal = taxoWriter.addCategory(new CategoryPath("B/1", '/'));
+    assertEquals(1, taxoWriter.getParent(ordinal)); // call getParent to initialize taxoArrays
+    taxoWriter.commit();
+    
+    taxoWriter.replaceTaxonomy(srcTaxoDir);
+    assertEquals(ord - 1, taxoWriter.getParent(ord));
+    taxoWriter.close();
+    
+    srcTaxoDir.close();
+    targetTaxoDir.close();
+  }
+  
 }

Modified: lucene/dev/branches/lucene4956/lucene/grouping/src/java/org/apache/lucene/search/grouping/function/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/grouping/src/java/org/apache/lucene/search/grouping/function/package.html?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/grouping/src/java/org/apache/lucene/search/grouping/function/package.html (original)
+++ lucene/dev/branches/lucene4956/lucene/grouping/src/java/org/apache/lucene/search/grouping/function/package.html Mon Oct 21 18:58:24 2013
@@ -16,6 +16,6 @@
 -->
 <html>
 <body>
-Support for grouping by {org.apache.lucene.queries.function.ValueSource}.
+Support for grouping by {@link org.apache.lucene.queries.function.ValueSource}.
 </body>
 </html>

Modified: lucene/dev/branches/lucene4956/lucene/grouping/src/java/org/apache/lucene/search/grouping/term/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/grouping/src/java/org/apache/lucene/search/grouping/term/package.html?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/grouping/src/java/org/apache/lucene/search/grouping/term/package.html (original)
+++ lucene/dev/branches/lucene4956/lucene/grouping/src/java/org/apache/lucene/search/grouping/term/package.html Mon Oct 21 18:58:24 2013
@@ -16,6 +16,6 @@
 -->
 <html>
 <body>
-Support for grouping by indexed terms via {org.apache.lucene.search.FieldCache}.
+Support for grouping by indexed terms via {@link org.apache.lucene.search.FieldCache}.
 </body>
 </html>

Modified: lucene/dev/branches/lucene4956/lucene/grouping/src/test/org/apache/lucene/search/grouping/AllGroupHeadsCollectorTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/grouping/src/test/org/apache/lucene/search/grouping/AllGroupHeadsCollectorTest.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/grouping/src/test/org/apache/lucene/search/grouping/AllGroupHeadsCollectorTest.java (original)
+++ lucene/dev/branches/lucene4956/lucene/grouping/src/test/org/apache/lucene/search/grouping/AllGroupHeadsCollectorTest.java Mon Oct 21 18:58:24 2013
@@ -302,7 +302,7 @@ public class AllGroupHeadsCollectorTest 
       w.close();
 
       // NOTE: intentional but temporary field cache insanity!
-      final FieldCache.Ints docIdToFieldId = FieldCache.DEFAULT.getInts(new SlowCompositeReaderWrapper(r), "id", false);
+      final FieldCache.Ints docIdToFieldId = FieldCache.DEFAULT.getInts(SlowCompositeReaderWrapper.wrap(r), "id", false);
       final int[] fieldIdToDocID = new int[numDocs];
       for (int i = 0; i < numDocs; i++) {
         int fieldId = docIdToFieldId.get(i);

Modified: lucene/dev/branches/lucene4956/lucene/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java (original)
+++ lucene/dev/branches/lucene4956/lucene/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java Mon Oct 21 18:58:24 2013
@@ -743,7 +743,7 @@ public class TestGrouping extends Lucene
       w.close();
 
       // NOTE: intentional but temporary field cache insanity!
-      final FieldCache.Ints docIDToID = FieldCache.DEFAULT.getInts(new SlowCompositeReaderWrapper(r), "id", false);
+      final FieldCache.Ints docIDToID = FieldCache.DEFAULT.getInts(SlowCompositeReaderWrapper.wrap(r), "id", false);
       DirectoryReader rBlocks = null;
       Directory dirBlocks = null;
 
@@ -779,7 +779,7 @@ public class TestGrouping extends Lucene
         dirBlocks = newDirectory();
         rBlocks = getDocBlockReader(dirBlocks, groupDocs);
         final Filter lastDocInBlock = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("groupend", "x"))));
-        final FieldCache.Ints docIDToIDBlocks = FieldCache.DEFAULT.getInts(new SlowCompositeReaderWrapper(rBlocks), "id", false);
+        final FieldCache.Ints docIDToIDBlocks = FieldCache.DEFAULT.getInts(SlowCompositeReaderWrapper.wrap(rBlocks), "id", false);
 
         final IndexSearcher sBlocks = newSearcher(rBlocks);
         final ShardState shardsBlocks = new ShardState(sBlocks);

Modified: lucene/dev/branches/lucene4956/lucene/highlighter/src/java/org/apache/lucene/search/highlight/SimpleHTMLEncoder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/highlighter/src/java/org/apache/lucene/search/highlight/SimpleHTMLEncoder.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/highlighter/src/java/org/apache/lucene/search/highlight/SimpleHTMLEncoder.java (original)
+++ lucene/dev/branches/lucene4956/lucene/highlighter/src/java/org/apache/lucene/search/highlight/SimpleHTMLEncoder.java Mon Oct 21 18:58:24 2013
@@ -47,33 +47,27 @@ public class SimpleHTMLEncoder implement
     {
       char ch = plainText.charAt(index);
 
-      switch (ch)
-      {
+      switch (ch) {
       case '"':
         result.append("&quot;");
         break;
-
       case '&':
         result.append("&amp;");
         break;
-
       case '<':
         result.append("&lt;");
         break;
-
       case '>':
         result.append("&gt;");
         break;
-
+      case '\'':
+        result.append("&#x27;");
+        break;
+      case '/':
+        result.append("&#x2F;");
+        break;
       default:
-           if (ch < 128)
-           {
-                 result.append(ch);
-             }
-           else
-             {
-                 result.append("&#").append((int)ch).append(";");
-             }
+        result.append(ch);
       }
     }
 

Modified: lucene/dev/branches/lucene4956/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java (original)
+++ lucene/dev/branches/lucene4956/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java Mon Oct 21 18:58:24 2013
@@ -422,6 +422,11 @@ public class WeightedSpanTermExtractor {
     public NumericDocValues getNormValues(String field) throws IOException {
       return super.getNormValues(FIELD_NAME);
     }
+
+    @Override
+    public Bits getDocsWithField(String field) throws IOException {
+      return super.getDocsWithField(FIELD_NAME);
+    }
   }
 
   /**

Modified: lucene/dev/branches/lucene4956/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PassageFormatter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PassageFormatter.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PassageFormatter.java (original)
+++ lucene/dev/branches/lucene4956/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PassageFormatter.java Mon Oct 21 18:58:24 2013
@@ -31,8 +31,11 @@ public abstract class PassageFormatter {
    * @param passages top-N passages for the field. Note these are sorted in
    *        the order that they appear in the document for convenience.
    * @param content content for the field.
-   * @return formatted highlight
+   * @return formatted highlight.  Note that for the
+   * non-expert APIs in {@link PostingsHighlighter} that
+   * return String, the toString method on the Object
+   * returned by this method is used to compute the string.
    */
-  public abstract String format(Passage passages[], String content);
+  public abstract Object format(Passage passages[], String content);
 
 }

Modified: lucene/dev/branches/lucene4956/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java (original)
+++ lucene/dev/branches/lucene4956/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java Mon Oct 21 18:58:24 2013
@@ -107,7 +107,7 @@ public class PostingsHighlighter {
   private PassageScorer defaultScorer;
   
   /**
-   * Creates a new highlighter with default parameters.
+   * Creates a new highlighter with {@link #DEFAULT_MAX_LENGTH}.
    */
   public PostingsHighlighter() {
     this(DEFAULT_MAX_LENGTH);
@@ -267,7 +267,7 @@ public class PostingsHighlighter {
 
     return highlightFields(fields, query, searcher, docids, maxPassages);
   }
-    
+
   /**
    * Highlights the top-N passages from multiple fields,
    * for the provided int[] docids.
@@ -280,7 +280,7 @@ public class PostingsHighlighter {
    * @param maxPassagesIn The maximum number of top-N ranked passages per-field used to 
    *        form the highlighted snippets.
    * @return Map keyed on field name, containing the array of formatted snippets 
-   *         corresponding to the documents in <code>topDocs</code>. 
+   *         corresponding to the documents in <code>docidsIn</code>. 
    *         If no highlights were found for a document, the
    *         first {@code maxPassages} from the field will
    *         be returned.
@@ -289,6 +289,45 @@ public class PostingsHighlighter {
    *         {@link IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS}
    */
   public Map<String,String[]> highlightFields(String fieldsIn[], Query query, IndexSearcher searcher, int[] docidsIn, int maxPassagesIn[]) throws IOException {
+    Map<String,String[]> snippets = new HashMap<String,String[]>();
+    for(Map.Entry<String,Object[]> ent : highlightFieldsAsObjects(fieldsIn, query, searcher, docidsIn, maxPassagesIn).entrySet()) {
+      Object[] snippetObjects = ent.getValue();
+      String[] snippetStrings = new String[snippetObjects.length];
+      snippets.put(ent.getKey(), snippetStrings);
+      for(int i=0;i<snippetObjects.length;i++) {
+        Object snippet = snippetObjects[i];
+        if (snippet != null) {
+          snippetStrings[i] = snippet.toString();
+        }
+      }
+    }
+
+    return snippets;
+  }
+
+  /**
+   * Expert: highlights the top-N passages from multiple fields,
+   * for the provided int[] docids, to custom Object as
+   * returned by the {@link PassageFormatter}.  Use
+   * this API to render to something other than String.
+   * 
+   * @param fieldsIn field names to highlight. 
+   *        Must have a stored string value and also be indexed with offsets.
+   * @param query query to highlight.
+   * @param searcher searcher that was previously used to execute the query.
+   * @param docidsIn containing the document IDs to highlight.
+   * @param maxPassagesIn The maximum number of top-N ranked passages per-field used to 
+   *        form the highlighted snippets.
+   * @return Map keyed on field name, containing the array of formatted snippets 
+   *         corresponding to the documents in <code>docidsIn</code>. 
+   *         If no highlights were found for a document, the
+   *         first {@code maxPassages} from the field will
+   *         be returned.
+   * @throws IOException if an I/O error occurred during processing
+   * @throws IllegalArgumentException if <code>field</code> was indexed without 
+   *         {@link IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS}
+   */
+  protected Map<String,Object[]> highlightFieldsAsObjects(String fieldsIn[], Query query, IndexSearcher searcher, int[] docidsIn, int maxPassagesIn[]) throws IOException {
     if (fieldsIn.length < 1) {
       throw new IllegalArgumentException("fieldsIn must not be empty");
     }
@@ -335,7 +374,7 @@ public class PostingsHighlighter {
     // pull stored data:
     String[][] contents = loadFieldValues(searcher, fields, docids, maxLength);
     
-    Map<String,String[]> highlights = new HashMap<String,String[]>();
+    Map<String,Object[]> highlights = new HashMap<String,Object[]>();
     for (int i = 0; i < fields.length; i++) {
       String field = fields[i];
       int numPassages = maxPassages[i];
@@ -350,9 +389,9 @@ public class PostingsHighlighter {
       for(Term term : fieldTerms) {
         terms[termUpto++] = term.bytes();
       }
-      Map<Integer,String> fieldHighlights = highlightField(field, contents[i], getBreakIterator(field), terms, docids, leaves, numPassages);
+      Map<Integer,Object> fieldHighlights = highlightField(field, contents[i], getBreakIterator(field), terms, docids, leaves, numPassages);
         
-      String[] result = new String[docids.length];
+      Object[] result = new Object[docids.length];
       for (int j = 0; j < docidsIn.length; j++) {
         result[j] = fieldHighlights.get(docidsIn[j]);
       }
@@ -394,8 +433,8 @@ public class PostingsHighlighter {
     return ' ';
   }
     
-  private Map<Integer,String> highlightField(String field, String contents[], BreakIterator bi, BytesRef terms[], int[] docids, List<AtomicReaderContext> leaves, int maxPassages) throws IOException {  
-    Map<Integer,String> highlights = new HashMap<Integer,String>();
+  private Map<Integer,Object> highlightField(String field, String contents[], BreakIterator bi, BytesRef terms[], int[] docids, List<AtomicReaderContext> leaves, int maxPassages) throws IOException {  
+    Map<Integer,Object> highlights = new HashMap<Integer,Object>();
     
     // reuse in the real sense... for docs in same segment we just advance our old enum
     DocsAndPositionsEnum postings[] = null;
@@ -563,7 +602,7 @@ public class PostingsHighlighter {
           start = dp.startOffset();
           end = dp.endOffset();
         }
-        if (start >= current.endOffset) {
+        if (start >= current.endOffset || end > contentLength) {
           pq.offer(off);
           break;
         }

Modified: lucene/dev/branches/lucene4956/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FastVectorHighlighter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FastVectorHighlighter.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FastVectorHighlighter.java (original)
+++ lucene/dev/branches/lucene4956/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FastVectorHighlighter.java Mon Oct 21 18:58:24 2013
@@ -18,6 +18,8 @@ package org.apache.lucene.search.vectorh
  */
 
 import java.io.IOException;
+import java.util.Iterator;
+import java.util.Set;
 
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.search.Query;
@@ -28,7 +30,6 @@ import org.apache.lucene.search.highligh
  *
  */
 public class FastVectorHighlighter {
-
   public static final boolean DEFAULT_PHRASE_HIGHLIGHT = true;
   public static final boolean DEFAULT_FIELD_MATCH = true;
   private final boolean phraseHighlight;
@@ -186,16 +187,71 @@ public class FastVectorHighlighter {
     return fragmentsBuilder.createFragments( reader, docId, fieldName, fieldFragList, maxNumFragments,
         preTags, postTags, encoder );
   }
-  
+
+  /**
+   * Return the best fragments.  Matches are scanned from matchedFields and turned into fragments against
+   * storedField.  The highlighting may not make sense if matchedFields has matches with offsets that don't
+   * correspond features in storedField.  It will outright throw a {@code StringIndexOutOfBoundsException}
+   * if matchedFields produces offsets outside of storedField.  As such it is advisable that all
+   * matchedFields share the same source as storedField or are at least a prefix of it.
+   * 
+   * @param fieldQuery {@link FieldQuery} object
+   * @param reader {@link IndexReader} of the index
+   * @param docId document id to be highlighted
+   * @param storedField field of the document that stores the text
+   * @param matchedFields fields of the document to scan for matches
+   * @param fragCharSize the length (number of chars) of a fragment
+   * @param maxNumFragments maximum number of fragments
+   * @param fragListBuilder {@link FragListBuilder} object
+   * @param fragmentsBuilder {@link FragmentsBuilder} object
+   * @param preTags pre-tags to be used to highlight terms
+   * @param postTags post-tags to be used to highlight terms
+   * @param encoder an encoder that generates encoded text
+   * @return created fragments or null when no fragments created.
+   *         size of the array can be less than maxNumFragments
+   * @throws IOException If there is a low-level I/O error
+   */
+  public final String[] getBestFragments( final FieldQuery fieldQuery, IndexReader reader, int docId,
+      String storedField, Set< String > matchedFields, int fragCharSize, int maxNumFragments,
+      FragListBuilder fragListBuilder, FragmentsBuilder fragmentsBuilder,
+      String[] preTags, String[] postTags, Encoder encoder ) throws IOException {
+    FieldFragList fieldFragList =
+      getFieldFragList( fragListBuilder, fieldQuery, reader, docId, matchedFields, fragCharSize );
+    return fragmentsBuilder.createFragments( reader, docId, storedField, fieldFragList, maxNumFragments,
+        preTags, postTags, encoder );
+  }
+
+  /**
+   * Build a FieldFragList for one field.
+   */
   private FieldFragList getFieldFragList( FragListBuilder fragListBuilder,
       final FieldQuery fieldQuery, IndexReader reader, int docId,
-      String fieldName, int fragCharSize ) throws IOException {
-    FieldTermStack fieldTermStack = new FieldTermStack( reader, docId, fieldName, fieldQuery );
+      String matchedField, int fragCharSize ) throws IOException {
+    FieldTermStack fieldTermStack = new FieldTermStack( reader, docId, matchedField, fieldQuery );
     FieldPhraseList fieldPhraseList = new FieldPhraseList( fieldTermStack, fieldQuery, phraseLimit );
     return fragListBuilder.createFieldFragList( fieldPhraseList, fragCharSize );
   }
 
   /**
+   * Build a FieldFragList for more than one field.
+   */
+  private FieldFragList getFieldFragList( FragListBuilder fragListBuilder,
+      final FieldQuery fieldQuery, IndexReader reader, int docId,
+      Set< String > matchedFields, int fragCharSize ) throws IOException {
+    Iterator< String > matchedFieldsItr = matchedFields.iterator();
+    if ( !matchedFieldsItr.hasNext() ) {
+      throw new IllegalArgumentException( "matchedFields must contain at least on field name." );
+    }
+    FieldPhraseList[] toMerge = new FieldPhraseList[ matchedFields.size() ];
+    int i = 0;
+    while ( matchedFieldsItr.hasNext() ) {
+      FieldTermStack stack = new FieldTermStack( reader, docId, matchedFieldsItr.next(), fieldQuery );
+      toMerge[ i++ ] = new FieldPhraseList( stack, fieldQuery, phraseLimit );
+    } 
+    return fragListBuilder.createFieldFragList( new FieldPhraseList( toMerge ), fragCharSize );
+  }
+
+  /**
    * return whether phraseHighlight or not.
    * 
    * @return whether phraseHighlight or not

Modified: lucene/dev/branches/lucene4956/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java (original)
+++ lucene/dev/branches/lucene4956/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java Mon Oct 21 18:58:24 2013
@@ -17,18 +17,23 @@ package org.apache.lucene.search.vectorh
  */
 
 import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
 
 import org.apache.lucene.search.vectorhighlight.FieldQuery.QueryPhraseMap;
 import org.apache.lucene.search.vectorhighlight.FieldTermStack.TermInfo;
+import org.apache.lucene.util.MergedIterator;
 
 /**
  * FieldPhraseList has a list of WeightedPhraseInfo that is used by FragListBuilder
  * to create a FieldFragList object.
  */
 public class FieldPhraseList {
-
+  /**
+   * List of non-overlapping WeightedPhraseInfo objects.
+   */
   LinkedList<WeightedPhraseInfo> phraseList = new LinkedList<WeightedPhraseInfo>();
   
   /**
@@ -60,46 +65,98 @@ public class FieldPhraseList {
   public FieldPhraseList( FieldTermStack fieldTermStack, FieldQuery fieldQuery, int phraseLimit ){
     final String field = fieldTermStack.getFieldName();
 
-    QueryPhraseMap qpm = fieldQuery.getRootMap(field);
-    if (qpm != null) {
-      LinkedList<TermInfo> phraseCandidate = new LinkedList<TermInfo>();
-      extractPhrases(fieldTermStack.termList, qpm, phraseCandidate, 0);
-      assert phraseCandidate.size() == 0;
+    LinkedList<TermInfo> phraseCandidate = new LinkedList<TermInfo>();
+    QueryPhraseMap currMap = null;
+    QueryPhraseMap nextMap = null;
+    while( !fieldTermStack.isEmpty() && (phraseList.size() < phraseLimit) )
+    {      
+      phraseCandidate.clear();
+
+      TermInfo ti = fieldTermStack.pop();
+      currMap = fieldQuery.getFieldTermMap( field, ti.getText() );
+
+      // if not found, discard top TermInfo from stack, then try next element
+      if( currMap == null ) continue;
+      
+      // if found, search the longest phrase
+      phraseCandidate.add( ti );
+      while( true ){
+        ti = fieldTermStack.pop();
+        nextMap = null;
+        if( ti != null )
+          nextMap = currMap.getTermMap( ti.getText() );
+        if( ti == null || nextMap == null ){
+          if( ti != null ) 
+            fieldTermStack.push( ti );
+          if( currMap.isValidTermOrPhrase( phraseCandidate ) ){
+            addIfNoOverlap( new WeightedPhraseInfo( phraseCandidate, currMap.getBoost(), currMap.getTermOrPhraseNumber() ) );
+          }
+          else{
+            while( phraseCandidate.size() > 1 ){
+              fieldTermStack.push( phraseCandidate.removeLast() );
+              currMap = fieldQuery.searchPhrase( field, phraseCandidate );
+              if( currMap != null ){
+                addIfNoOverlap( new WeightedPhraseInfo( phraseCandidate, currMap.getBoost(), currMap.getTermOrPhraseNumber() ) );
+                break;
+              }
+            }
+          }
+          break;
+        }
+        else{
+          phraseCandidate.add( ti );
+          currMap = nextMap;
+        }
+      }
     }
   }
 
-  void extractPhrases(LinkedList<TermInfo> terms, QueryPhraseMap currMap, LinkedList<TermInfo> phraseCandidate, int longest) {
-    if (terms.isEmpty()) {
-      if (longest > 0) {
-        addIfNoOverlap( new WeightedPhraseInfo( phraseCandidate.subList(0, longest), currMap.getBoost(), currMap.getTermOrPhraseNumber() ) );
-      }
+  /**
+   * Merging constructor.
+   *
+   * @param toMerge FieldPhraseLists to merge to build this one
+   */
+  public FieldPhraseList( FieldPhraseList[] toMerge ) {
+    // Merge all overlapping WeightedPhraseInfos
+    // Step 1.  Sort by startOffset, endOffset, and boost, in that order.
+    @SuppressWarnings( { "rawtypes", "unchecked" } )
+    Iterator< WeightedPhraseInfo >[] allInfos = new Iterator[ toMerge.length ];
+    int index = 0;
+    for ( FieldPhraseList fplToMerge : toMerge ) {
+      allInfos[ index++ ] = fplToMerge.phraseList.iterator();
+    }
+    MergedIterator< WeightedPhraseInfo > itr = new MergedIterator< WeightedPhraseInfo >( false, allInfos );
+    // Step 2.  Walk the sorted list merging infos that overlap
+    phraseList = new LinkedList< WeightedPhraseInfo >();
+    if ( !itr.hasNext() ) {
       return;
     }
-    ArrayList<TermInfo> samePositionTerms = new ArrayList<TermInfo>();
-    do {
-      samePositionTerms.add(terms.pop());
-    } while (!terms.isEmpty() && terms.get(0).getPosition() == samePositionTerms.get(0).getPosition());
-
-    // try all next terms at the same position
-    for (TermInfo nextTerm : samePositionTerms) {
-      QueryPhraseMap nextMap = currMap.getTermMap(nextTerm.getText());
-      if (nextMap != null) {
-        phraseCandidate.add(nextTerm);
-        int l = longest;
-        if(nextMap.isValidTermOrPhrase( phraseCandidate ) ){
-          l = phraseCandidate.size();
-        }
-        extractPhrases(terms, nextMap, phraseCandidate, l);
-        phraseCandidate.removeLast();
+    List< WeightedPhraseInfo > work = new ArrayList< WeightedPhraseInfo >();
+    WeightedPhraseInfo first = itr.next();
+    work.add( first );
+    int workEndOffset = first.getEndOffset();
+    while ( itr.hasNext() ) {
+      WeightedPhraseInfo current = itr.next();
+      if ( current.getStartOffset() <= workEndOffset ) {
+        workEndOffset = Math.max( workEndOffset, current.getEndOffset() );
+        work.add( current );
+      } else {
+        if ( work.size() == 1 ) {
+          phraseList.add( work.get( 0 ) );
+          work.set( 0, current );
+        } else {
+          phraseList.add( new WeightedPhraseInfo( work ) );
+          work.clear();
+          work.add( current );
+        }
+        workEndOffset = current.getEndOffset();
       }
     }
-
-    // ignore the next term
-    extractPhrases(terms, currMap, phraseCandidate, longest);
-
-    // add terms back
-    for (TermInfo nextTerm : samePositionTerms) {
-      terms.push(nextTerm);
+    if ( work.size() == 1 ) {
+      phraseList.add( work.get( 0 ) );
+    } else {
+      phraseList.add( new WeightedPhraseInfo( work ) );
+      work.clear();
     }
   }
 
@@ -118,9 +175,7 @@ public class FieldPhraseList {
   /**
    * Represents the list of term offsets and boost for some text
    */
-  public static class WeightedPhraseInfo {
-
-    private String text;  // unnecessary member, just exists for debugging purpose
+  public static class WeightedPhraseInfo implements Comparable< WeightedPhraseInfo > {
     private List<Toffs> termsOffsets;   // usually termsOffsets.size() == 1,
                             // but if position-gap > 1 and slop > 0 then size() could be greater than 1
     private float boost;  // query boost
@@ -129,10 +184,15 @@ public class FieldPhraseList {
     private ArrayList<TermInfo> termsInfos;
     
     /**
+     * Text of the match, calculated on the fly.  Use for debugging only.
      * @return the text
      */
     public String getText() {
-      return text;
+      StringBuilder text = new StringBuilder();
+      for ( TermInfo ti: termsInfos ) {
+        text.append( ti.getText() );
+      }
+      return text.toString();
     }
 
     /**
@@ -156,11 +216,11 @@ public class FieldPhraseList {
       return termsInfos;
     }
 
-    public WeightedPhraseInfo( List<TermInfo> terms, float boost ){
+    public WeightedPhraseInfo( LinkedList<TermInfo> terms, float boost ){
       this( terms, boost, 0 );
     }
     
-    public WeightedPhraseInfo( List<TermInfo> terms, float boost, int seqnum ){
+    public WeightedPhraseInfo( LinkedList<TermInfo> terms, float boost, int seqnum ){
       this.boost = boost;
       this.seqnum = seqnum;
       
@@ -171,15 +231,11 @@ public class FieldPhraseList {
       TermInfo ti = terms.get( 0 );
       termsOffsets.add( new Toffs( ti.getStartOffset(), ti.getEndOffset() ) );
       if( terms.size() == 1 ){
-        text = ti.getText();
         return;
       }
-      StringBuilder sb = new StringBuilder();
-      sb.append( ti.getText() );
       int pos = ti.getPosition();
       for( int i = 1; i < terms.size(); i++ ){
         ti = terms.get( i );
-        sb.append( ti.getText() );
         if( ti.getPosition() - pos == 1 ){
           Toffs to = termsOffsets.get( termsOffsets.size() - 1 );
           to.setEndOffset( ti.getEndOffset() );
@@ -189,7 +245,50 @@ public class FieldPhraseList {
         }
         pos = ti.getPosition();
       }
-      text = sb.toString();
+    }
+
+    /**
+     * Merging constructor.  Note that this just grabs seqnum from the first info.
+     */
+    public WeightedPhraseInfo( Collection< WeightedPhraseInfo > toMerge ) {
+      // Pretty much the same idea as merging FieldPhraseLists:
+      // Step 1.  Sort by startOffset, endOffset
+      //          While we are here merge the boosts and termInfos
+      Iterator< WeightedPhraseInfo > toMergeItr = toMerge.iterator();
+      if ( !toMergeItr.hasNext() ) {
+        throw new IllegalArgumentException( "toMerge must contain at least one WeightedPhraseInfo." );
+      }
+      WeightedPhraseInfo first = toMergeItr.next();
+      @SuppressWarnings( { "rawtypes", "unchecked" } )
+      Iterator< Toffs >[] allToffs = new Iterator[ toMerge.size() ];
+      termsInfos = new ArrayList< TermInfo >();
+      seqnum = first.seqnum;
+      boost = first.boost;
+      allToffs[ 0 ] = first.termsOffsets.iterator();
+      int index = 1;
+      while ( toMergeItr.hasNext() ) {
+        WeightedPhraseInfo info = toMergeItr.next();
+        boost += info.boost;
+        termsInfos.addAll( info.termsInfos );
+        allToffs[ index++ ] = info.termsOffsets.iterator();
+      }
+      // Step 2.  Walk the sorted list merging overlaps
+      MergedIterator< Toffs > itr = new MergedIterator< Toffs >( false, allToffs );
+      termsOffsets = new ArrayList< Toffs >();
+      if ( !itr.hasNext() ) {
+        return;
+      }
+      Toffs work = itr.next();
+      while ( itr.hasNext() ) {
+        Toffs current = itr.next();
+        if ( current.startOffset <= work.endOffset ) {
+          work.endOffset = Math.max( work.endOffset, current.endOffset );
+        } else {
+          termsOffsets.add( work );
+          work = current;
+        }
+      }
+      termsOffsets.add( work );
     }
     
     public int getStartOffset(){
@@ -199,7 +298,7 @@ public class FieldPhraseList {
     public int getEndOffset(){
       return termsOffsets.get( termsOffsets.size() - 1 ).endOffset;
     }
-    
+
     public boolean isOffsetOverlap( WeightedPhraseInfo other ){
       int so = getStartOffset();
       int eo = getEndOffset();
@@ -215,7 +314,7 @@ public class FieldPhraseList {
     @Override
     public String toString(){
       StringBuilder sb = new StringBuilder();
-      sb.append( text ).append( '(' ).append( boost ).append( ")(" );
+      sb.append( getText() ).append( '(' ).append( boost ).append( ")(" );
       for( Toffs to : termsOffsets ){
         sb.append( to );
       }
@@ -230,10 +329,58 @@ public class FieldPhraseList {
       return seqnum;
     }
 
+    @Override
+    public int compareTo( WeightedPhraseInfo other ) {
+      int diff = getStartOffset() - other.getStartOffset();
+      if ( diff != 0 ) {
+        return diff;
+      }
+      diff = getEndOffset() - other.getEndOffset();
+      if ( diff != 0 ) {
+        return diff;
+      }
+      return (int) Math.signum( getBoost() - other.getBoost() );
+    }
+
+    @Override
+    public int hashCode() {
+      final int prime = 31;
+      int result = 1;
+      result = prime * result + getStartOffset();
+      result = prime * result + getEndOffset();
+      long b = Double.doubleToLongBits( getBoost() );
+      result = prime * result + ( int )( b ^ ( b >>> 32 ) );
+      return result;
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+      if (this == obj) {
+        return true;
+      }
+      if (obj == null) {
+        return false;
+      }
+      if (getClass() != obj.getClass()) {
+        return false;
+      }
+      WeightedPhraseInfo other = (WeightedPhraseInfo) obj;
+      if (getStartOffset() != other.getStartOffset()) {
+        return false;
+      }
+      if (getEndOffset() != other.getEndOffset()) {
+        return false;
+      }
+      if (getBoost() != other.getBoost()) {
+        return false;
+      }
+      return true;
+    }
+
     /**
      * Term offsets (start + end)
      */
-    public static class Toffs {
+    public static class Toffs implements Comparable< Toffs > {
       private int startOffset;
       private int endOffset;
       public Toffs( int startOffset, int endOffset ){
@@ -250,6 +397,42 @@ public class FieldPhraseList {
         return endOffset;
       }
       @Override
+      public int compareTo( Toffs other ) {
+        int diff = getStartOffset() - other.getStartOffset();
+        if ( diff != 0 ) {
+          return diff;
+        }
+        return getEndOffset() - other.getEndOffset();
+      }
+      @Override
+      public int hashCode() {
+        final int prime = 31;
+        int result = 1;
+        result = prime * result + getStartOffset();
+        result = prime * result + getEndOffset();
+        return result;
+      }
+      @Override
+      public boolean equals(Object obj) {
+        if (this == obj) {
+          return true;
+        }
+        if (obj == null) {
+          return false;
+        }
+        if (getClass() != obj.getClass()) {
+          return false;
+        }
+        Toffs other = (Toffs) obj;
+        if (getStartOffset() != other.getStartOffset()) {
+          return false;
+        }
+        if (getEndOffset() != other.getEndOffset()) {
+          return false;
+        }
+        return true;
+      }
+      @Override
       public String toString(){
         StringBuilder sb = new StringBuilder();
         sb.append( '(' ).append( startOffset ).append( ',' ).append( endOffset ).append( ')' );

Modified: lucene/dev/branches/lucene4956/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java (original)
+++ lucene/dev/branches/lucene4956/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java Mon Oct 21 18:58:24 2013
@@ -17,8 +17,6 @@ package org.apache.lucene.search.vectorh
  */
 
 import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Collection;
 import java.util.HashMap;
 import java.util.HashSet;
@@ -41,7 +39,6 @@ import org.apache.lucene.search.PhraseQu
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.vectorhighlight.FieldTermStack.TermInfo;
-import org.apache.lucene.util.InPlaceMergeSorter;
 
 /**
  * FieldQuery breaks down query object into terms/phrases and keeps
@@ -333,8 +330,7 @@ public class FieldQuery {
     return root.searchPhrase( phraseCandidate );
   }
   
-  /** Get the root map for the given field name. */
-  public QueryPhraseMap getRootMap( String fieldName ){
+  private QueryPhraseMap getRootMap( String fieldName ){
     return rootMaps.get( fieldMatch ? fieldName : null );
   }
   
@@ -351,7 +347,6 @@ public class FieldQuery {
     boolean terminal;
     int slop;   // valid if terminal == true and phraseHighlight == true
     float boost;  // valid if terminal == true
-    int[] positions; // valid if terminal == true
     int termOrPhraseNumber;   // valid if terminal == true
     FieldQuery fieldQuery;
     Map<String, QueryPhraseMap> subMap = new HashMap<String, QueryPhraseMap>();
@@ -374,107 +369,38 @@ public class FieldQuery {
       return map;
     }
 
-    void add( Query query, IndexReader reader ) {
+      void add( Query query, IndexReader reader ) {
       if( query instanceof TermQuery ){
         addTerm( ((TermQuery)query).getTerm(), query.getBoost() );
       }
       else if( query instanceof PhraseQuery ){
         PhraseQuery pq = (PhraseQuery)query;
-        final Term[] terms = pq.getTerms();
-        final int[] positions = pq.getPositions();
-        new InPlaceMergeSorter() {
-
-          @Override
-          protected void swap(int i, int j) {
-            Term tmpTerm = terms[i];
-            terms[i] = terms[j];
-            terms[j] = tmpTerm;
-
-            int tmpPos = positions[i];
-            positions[i] = positions[j];
-            positions[j] = tmpPos;
-          }
-
-          @Override
-          protected int compare(int i, int j) {
-            return positions[i] - positions[j];
-          }
-        }.sort(0, terms.length);
-
-        addToMap(pq, terms, positions, 0, subMap, pq.getSlop());
+        Term[] terms = pq.getTerms();
+        Map<String, QueryPhraseMap> map = subMap;
+        QueryPhraseMap qpm = null;
+        for( Term term : terms ){
+          qpm = getOrNewMap( map, term.text() );
+          map = qpm.subMap;
+        }
+        qpm.markTerminal( pq.getSlop(), pq.getBoost() );
       }
       else
         throw new RuntimeException( "query \"" + query.toString() + "\" must be flatten first." );
     }
-
-    private int numTermsAtSamePosition(int[] positions, int i) {
-      int numTermsAtSamePosition = 1;
-      for (int j = i + 1; j < positions.length; ++j) {
-        if (positions[j] == positions[i]) {
-          ++numTermsAtSamePosition;
-        }
-      }
-      return numTermsAtSamePosition;
-    }
-
-    private void addToMap(PhraseQuery pq, Term[] terms, int[] positions, int i, Map<String, QueryPhraseMap> map, int slop) {
-      int numTermsAtSamePosition = numTermsAtSamePosition(positions, i);
-      for (int j = 0; j < numTermsAtSamePosition; ++j) {
-        QueryPhraseMap qpm = getOrNewMap(map, terms[i + j].text());
-        if (i + numTermsAtSamePosition == terms.length) {
-          qpm.markTerminal(pq.getSlop(), pq.getBoost(), uniquePositions(positions));
-        } else {
-          addToMap(pq, terms, positions, i + numTermsAtSamePosition, qpm.subMap, slop);
-        }
-      }
-      if (slop > 2 && i + numTermsAtSamePosition < terms.length) {
-        Term[] otherTerms = Arrays.copyOf(terms, terms.length);
-        int[] otherPositions = Arrays.copyOf(positions, positions.length);
-        final int nextTermAtSamePosition = numTermsAtSamePosition(positions, i + numTermsAtSamePosition);
-        System.arraycopy(terms, i + numTermsAtSamePosition, otherTerms, i, nextTermAtSamePosition);
-        System.arraycopy(positions, i + numTermsAtSamePosition, otherPositions, i, nextTermAtSamePosition);
-        System.arraycopy(terms, i, otherTerms, i + nextTermAtSamePosition, numTermsAtSamePosition);
-        System.arraycopy(positions, i, otherPositions, i + nextTermAtSamePosition, numTermsAtSamePosition);
-        addToMap(pq, otherTerms, otherPositions, i, map, slop - 2);
-      }
-    }
-
-    private int[] uniquePositions(int[] positions) {
-      int uniqueCount = 1;
-      for (int i = 1; i < positions.length; ++i) {
-        if (positions[i] != positions[i - 1]) {
-          ++uniqueCount;
-        }
-      }
-      if (uniqueCount == positions.length) {
-        return positions;
-      }
-      int[] result = new int[uniqueCount];
-      result[0] = positions[0];
-      for (int i = 1, j = 1; i < positions.length; ++i) {
-        if (positions[i] != positions[i - 1]) {
-          result[j++] = positions[i];
-        }
-      }
-      return result;
-    }
-
+    
     public QueryPhraseMap getTermMap( String term ){
       return subMap.get( term );
     }
     
     private void markTerminal( float boost ){
-      markTerminal( 0, boost, null );
+      markTerminal( 0, boost );
     }
     
-    private void markTerminal( int slop, float boost, int[] positions ){
-      if (slop > this.slop || (slop == this.slop && boost > this.boost)) {
-        this.terminal = true;
-        this.slop = slop;
-        this.boost = boost;
-        this.termOrPhraseNumber = fieldQuery.nextTermOrPhraseNumber();
-        this.positions = positions;
-      }
+    private void markTerminal( int slop, float boost ){
+      this.terminal = true;
+      this.slop = slop;
+      this.boost = boost;
+      this.termOrPhraseNumber = fieldQuery.nextTermOrPhraseNumber();
     }
     
     public boolean isTerminal(){
@@ -509,20 +435,15 @@ public class FieldQuery {
       // if the candidate is a term, it is valid
       if( phraseCandidate.size() == 1 ) return true;
 
-      
-      assert phraseCandidate.size() == positions.length;
       // else check whether the candidate is valid phrase
       // compare position-gaps between terms to slop
       int pos = phraseCandidate.get( 0 ).getPosition();
-      int totalDistance = 0;
       for( int i = 1; i < phraseCandidate.size(); i++ ){
         int nextPos = phraseCandidate.get( i ).getPosition();
-        final int expectedDelta = positions[i] - positions[i - 1];
-        final int actualDelta = nextPos - pos;
-        totalDistance += Math.abs(expectedDelta - actualDelta);
+        if( Math.abs( nextPos - pos - 1 ) > slop ) return false;
         pos = nextPos;
       }
-      return totalDistance <= slop;
+      return true;
     }
   }
 }

Modified: lucene/dev/branches/lucene4956/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java (original)
+++ lucene/dev/branches/lucene4956/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java Mon Oct 21 18:58:24 2013
@@ -145,13 +145,6 @@ public class FieldTermStack {
   }
 
   /**
-   * Return the top TermInfo object of the stack without removing it.
-   */
-  public TermInfo peek() {
-    return termList.peek();
-  }
-
-  /**
    * @param termInfo the TermInfo object to be put on the top of the stack
    */
   public void push( TermInfo termInfo ){
@@ -168,7 +161,8 @@ public class FieldTermStack {
   }
   
   /**
-   * Single term with its position/offsets in the document and IDF weight
+   * Single term with its position/offsets in the document and IDF weight.
+   * It is Comparable but considers only position.
    */
   public static class TermInfo implements Comparable<TermInfo>{
 
@@ -205,5 +199,30 @@ public class FieldTermStack {
     public int compareTo( TermInfo o ){
       return ( this.position - o.position );
     }
+    @Override
+    public int hashCode() {
+      final int prime = 31;
+      int result = 1;
+      result = prime * result + position;
+      return result;
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+      if (this == obj) {
+        return true;
+      }
+      if (obj == null) {
+        return false;
+      }
+      if (getClass() != obj.getClass()) {
+        return false;
+      }
+      TermInfo other = (TermInfo) obj;
+      if (position != other.position) {
+        return false;
+      }
+      return true;
+    }
   }
 }

Modified: lucene/dev/branches/lucene4956/lucene/highlighter/src/test/org/apache/lucene/search/highlight/OffsetLimitTokenFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/highlighter/src/test/org/apache/lucene/search/highlight/OffsetLimitTokenFilterTest.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/highlighter/src/test/org/apache/lucene/search/highlight/OffsetLimitTokenFilterTest.java (original)
+++ lucene/dev/branches/lucene4956/lucene/highlighter/src/test/org/apache/lucene/search/highlight/OffsetLimitTokenFilterTest.java Mon Oct 21 18:58:24 2013
@@ -49,7 +49,7 @@ public class OffsetLimitTokenFilterTest 
     assertTokenStreamContents(filter, new String[] {"short", "toolong",
         "evenmuchlongertext"});
     
-    checkOneTermReuse(new Analyzer() {
+    checkOneTerm(new Analyzer() {
       
       @Override
       public TokenStreamComponents createComponents(String fieldName, Reader reader) {

Modified: lucene/dev/branches/lucene4956/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java (original)
+++ lucene/dev/branches/lucene4956/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java Mon Oct 21 18:58:24 2013
@@ -21,6 +21,7 @@ import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.text.BreakIterator;
+import java.util.Arrays;
 import java.util.Map;
 
 import org.apache.lucene.analysis.Analyzer;
@@ -47,8 +48,8 @@ import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.store.Directory;
-import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
+import org.apache.lucene.util.LuceneTestCase;
 
 @SuppressCodecs({"MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom"})
 public class TestPostingsHighlighter extends LuceneTestCase {
@@ -86,12 +87,42 @@ public class TestPostingsHighlighter ext
     ir.close();
     dir.close();
   }
+
+  public void testFormatWithMatchExceedingContentLength2() throws Exception {
+    
+    String bodyText = "123 TEST 01234 TEST";
+
+    String[] snippets = formatWithMatchExceedingContentLength(bodyText);
+    
+    assertEquals(1, snippets.length);
+    assertEquals("123 <b>TEST</b> 01234 TE", snippets[0]);
+  }
+
+  public void testFormatWithMatchExceedingContentLength3() throws Exception {
+    
+    String bodyText = "123 5678 01234 TEST TEST";
+    
+    String[] snippets = formatWithMatchExceedingContentLength(bodyText);
+    
+    assertEquals(1, snippets.length);
+    assertEquals("123 5678 01234 TE", snippets[0]);
+  }
   
   public void testFormatWithMatchExceedingContentLength() throws Exception {
-          
-    int maxLength = 17;
+    
     String bodyText = "123 5678 01234 TEST";
     
+    String[] snippets = formatWithMatchExceedingContentLength(bodyText);
+    
+    assertEquals(1, snippets.length);
+    // LUCENE-5166: no snippet
+    assertEquals("123 5678 01234 TE", snippets[0]);
+  }
+
+  private String[] formatWithMatchExceedingContentLength(String bodyText) throws IOException {
+    
+    int maxLength = 17;
+    
     final Analyzer analyzer = new MockAnalyzer(random());
     
     Directory dir = newDirectory();
@@ -122,12 +153,9 @@ public class TestPostingsHighlighter ext
     String snippets[] = highlighter.highlight("body", query, searcher, topDocs);
     
     
-    assertEquals(1, snippets.length);
-    // LUCENE-5166: no snippet
-    assertEquals("123 5678 01234 TE", snippets[0]);
-    
     ir.close();
     dir.close();
+    return snippets;
   }
   
   // simple test highlighting last word.
@@ -1041,4 +1069,54 @@ public class TestPostingsHighlighter ext
     ir.close();
     dir.close();
   }
+
+  // LUCENE-4906
+  public void testObjectFormatter() throws Exception {
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+    iwc.setMergePolicy(newLogMergePolicy());
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
+    
+    FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
+    offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
+    Field body = new Field("body", "", offsetsType);
+    Document doc = new Document();
+    doc.add(body);
+    
+    body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
+    iw.addDocument(doc);
+    
+    IndexReader ir = iw.getReader();
+    iw.close();
+    
+    IndexSearcher searcher = newSearcher(ir);
+    PostingsHighlighter highlighter = new PostingsHighlighter() {
+      @Override
+      protected PassageFormatter getFormatter(String field) {
+        return new PassageFormatter() {
+          PassageFormatter defaultFormatter = new DefaultPassageFormatter();
+
+          @Override
+          public String[] format(Passage passages[], String content) {
+            // Just turns the String snippet into a length 2
+            // array of String
+            return new String[] {"blah blah", defaultFormatter.format(passages, content).toString()};
+          }
+        };
+      }
+    };
+
+    Query query = new TermQuery(new Term("body", "highlighting"));
+    TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
+    assertEquals(1, topDocs.totalHits);
+    int[] docIDs = new int[1];
+    docIDs[0] = topDocs.scoreDocs[0].doc;
+    Map<String,Object[]> snippets = highlighter.highlightFieldsAsObjects(new String[]{"body"}, query, searcher, docIDs, new int[] {1});
+    Object[] bodySnippets = snippets.get("body");
+    assertEquals(1, bodySnippets.length);
+    assertTrue(Arrays.equals(new String[] {"blah blah", "Just a test <b>highlighting</b> from postings. "}, (String[]) bodySnippets[0]));
+    
+    ir.close();
+    dir.close();
+  }
 }

Modified: lucene/dev/branches/lucene4956/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java (original)
+++ lucene/dev/branches/lucene4956/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java Mon Oct 21 18:58:24 2013
@@ -170,20 +170,20 @@ public abstract class AbstractTestCase e
   protected List<BytesRef> analyze(String text, String field, Analyzer analyzer) throws IOException {
     List<BytesRef> bytesRefs = new ArrayList<BytesRef>();
 
-    TokenStream tokenStream = analyzer.tokenStream(field, text);
-    TermToBytesRefAttribute termAttribute = tokenStream.getAttribute(TermToBytesRefAttribute.class);
+    try (TokenStream tokenStream = analyzer.tokenStream(field, text)) {
+      TermToBytesRefAttribute termAttribute = tokenStream.getAttribute(TermToBytesRefAttribute.class);
 
-    BytesRef bytesRef = termAttribute.getBytesRef();
+      BytesRef bytesRef = termAttribute.getBytesRef();
 
-    tokenStream.reset();
+      tokenStream.reset();
     
-    while (tokenStream.incrementToken()) {
-      termAttribute.fillBytesRef();
-      bytesRefs.add(BytesRef.deepCopyOf(bytesRef));
-    }
+      while (tokenStream.incrementToken()) {
+        termAttribute.fillBytesRef();
+        bytesRefs.add(BytesRef.deepCopyOf(bytesRef));
+      }
 
-    tokenStream.end();
-    tokenStream.close();
+      tokenStream.end();
+    }
 
     return bytesRefs;
   }
@@ -264,7 +264,8 @@ public abstract class AbstractTestCase e
     }
     
     @Override
-    public final void end(){
+    public final void end() throws IOException {
+      super.end();
       offsetAtt.setOffset(getFinalOffset(),getFinalOffset());
     }
     
@@ -318,7 +319,8 @@ public abstract class AbstractTestCase e
     }
     
     @Override
-    public void reset() {
+    public void reset() throws IOException {
+      super.reset();
       startTerm = 0;
       nextStartOffset = 0;
       snippet = null;

Modified: lucene/dev/branches/lucene4956/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java (original)
+++ lucene/dev/branches/lucene4956/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java Mon Oct 21 18:58:24 2013
@@ -16,18 +16,18 @@ package org.apache.lucene.search.vectorh
  * limitations under the License.
  */
 import java.io.IOException;
-import java.io.Reader;
-import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeMap;
 
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.AnalyzerWrapper;
+import org.apache.lucene.analysis.CannedTokenStream;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.analysis.MockTokenFilter;
 import org.apache.lucene.analysis.MockTokenizer;
-import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.Token;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
@@ -35,7 +35,6 @@ import org.apache.lucene.document.TextFi
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.queries.CommonTermsQuery;
 import org.apache.lucene.search.BooleanClause.Occur;
@@ -45,13 +44,16 @@ import org.apache.lucene.search.PhraseQu
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.highlight.DefaultEncoder;
+import org.apache.lucene.search.highlight.Encoder;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.automaton.CharacterRunAutomaton;
+import org.apache.lucene.util.automaton.RegExp;
 
 
 public class FastVectorHighlighterTest extends LuceneTestCase {
-
-  private static final String FIELD = "text";
+  
   
   public void testSimpleHighlightTest() throws IOException {
     Directory dir = newDirectory();
@@ -298,128 +300,222 @@ public class FastVectorHighlighterTest e
     writer.close();
     dir.close();
   }
+  
+  public void testMatchedFields() throws IOException {
+    // Searching just on the stored field doesn't highlight a stopword
+    matchedFieldsTestCase( false, true, "a match", "a <b>match</b>",
+      clause( "field", "a" ), clause( "field", "match" ) );
+
+    // Even if you add an unqueried matched field that would match it
+    matchedFieldsTestCase( "a match", "a <b>match</b>",
+      clause( "field", "a" ), clause( "field", "match" ) );
+
+    // Nor if you query the field but don't add it as a matched field to the highlighter
+    matchedFieldsTestCase( false, false, "a match", "a <b>match</b>",
+      clause( "field_exact", "a" ), clause( "field", "match" ) );
+
+    // But if you query the field and add it as a matched field to the highlighter then it is highlighted
+    matchedFieldsTestCase( "a match", "<b>a</b> <b>match</b>",
+      clause( "field_exact", "a" ), clause( "field", "match" ) );
+
+    // It is also ok to match just the matched field but get highlighting from the stored field
+    matchedFieldsTestCase( "a match", "<b>a</b> <b>match</b>",
+      clause( "field_exact", "a" ), clause( "field_exact", "match" ) );
+
+    // Boosted matched fields work too
+    matchedFieldsTestCase( "a match", "<b>a</b> <b>match</b>",
+      clause( "field_exact", 5, "a" ), clause( "field", "match" ) );
+
+    // It is also ok if both the stored and the matched field match the term
+    matchedFieldsTestCase( "a match", "a <b>match</b>",
+      clause( "field_exact", "match" ), clause( "field", "match" ) );
+
+    // And the highlighter respects the boosts on matched fields when sorting fragments
+    matchedFieldsTestCase( "cat cat junk junk junk junk junk junk junk a cat junk junk",
+      "junk junk <b>a cat</b> junk junk",
+      clause( "field", "cat" ), clause( "field_exact", 5, "a", "cat" ) );
+    matchedFieldsTestCase( "cat cat junk junk junk junk junk junk junk a cat junk junk",
+      "<b>cat</b> <b>cat</b> junk junk junk junk",
+      clause( "field", "cat" ), clause( "field_exact", "a", "cat" ) );
+
+    // The same thing works across three fields as well
+    matchedFieldsTestCase( "cat cat CAT junk junk junk junk junk junk junk a cat junk junk",
+      "junk junk <b>a cat</b> junk junk",
+      clause( "field", "cat" ), clause( "field_exact", 200, "a", "cat" ), clause( "field_super_exact", 5, "CAT" ) );
+    matchedFieldsTestCase( "a cat cat junk junk junk junk junk junk junk a CAT junk junk",
+      "junk junk <b>a CAT</b> junk junk",
+      clause( "field", "cat" ), clause( "field_exact", 5, "a", "cat" ), clause( "field_super_exact", 200, "a", "CAT" ) );
+
+    // And across fields with different tokenizers!
+    matchedFieldsTestCase( "cat cat junk junk junk junk junk junk junk a cat junk junk",
+      "junk junk <b>a cat</b> junk junk",
+      clause( "field_exact", 5, "a", "cat" ), clause( "field_characters", "c" ) );
+    matchedFieldsTestCase( "cat cat junk junk junk junk junk junk junk a cat junk junk",
+      "<b>c</b>at <b>c</b>at junk junk junk junk",
+      clause( "field_exact", "a", "cat" ), clause( "field_characters", "c" ) );
+    matchedFieldsTestCase( "cat cat junk junk junk junk junk junk junk a cat junk junk",
+      "ca<b>t</b> ca<b>t</b> junk junk junk junk",
+      clause( "field_exact", "a", "cat" ), clause( "field_characters", "t" ) );
+    matchedFieldsTestCase( "cat cat junk junk junk junk junk junk junk a cat junk junk",
+      "<b>cat</b> <b>cat</b> junk junk junk junk", // See how the phrases are joined?
+      clause( "field", "cat" ), clause( "field_characters", 5, "c" ) );
+    matchedFieldsTestCase( "cat cat junk junk junk junk junk junk junk a cat junk junk",
+      "junk junk <b>a cat</b> junk junk",
+      clause( "field", "cat" ), clause( "field_characters", 5, "a", " ", "c", "a", "t" ) );
+
+    // Phrases and tokens inside one another are joined
+    matchedFieldsTestCase( "cats wow", "<b>cats w</b>ow",
+      clause( "field", "cats" ), clause( "field_tripples", "s w" ) );
+
+    // Everything works pretty well even if you don't require a field match
+    matchedFieldsTestCase( true, false, "cat cat junk junk junk junk junk junk junk a cat junk junk",
+      "junk junk <b>a cat</b> junk junk",
+      clause( "field", "cat" ), clause( "field_characters", 10, "a", " ", "c", "a", "t" ) );
+
+    // Even boosts keep themselves pretty much intact
+    matchedFieldsTestCase( true, false, "a cat cat junk junk junk junk junk junk junk a CAT junk junk",
+      "junk junk <b>a CAT</b> junk junk",
+      clause( "field", "cat" ), clause( "field_exact", 5, "a", "cat" ), clause( "field_super_exact", 200, "a", "CAT" ) );
+    matchedFieldsTestCase( true, false, "cat cat CAT junk junk junk junk junk junk junk a cat junk junk",
+      "junk junk <b>a cat</b> junk junk",
+      clause( "field", "cat" ), clause( "field_exact", 200, "a", "cat" ), clause( "field_super_exact", 5, "CAT" ) );
+
+    // Except that all the matched field matches apply even if they aren't mentioned in the query
+    // which can make for some confusing scoring.  This isn't too big a deal, just something you
+    // need to think about when you don't force a field match.
+    matchedFieldsTestCase( true, false, "cat cat junk junk junk junk junk junk junk a cat junk junk",
+      "<b>cat</b> <b>cat</b> junk junk junk junk",
+      clause( "field", "cat" ), clause( "field_characters", 4, "a", " ", "c", "a", "t" ) );
+
+    // It is also cool to match fields that don't have _exactly_ the same text so long as you are careful.
+    // In this case field_sliced is a prefix of field.
+    matchedFieldsTestCase( "cat cat junk junk junk junk junk junk junk a cat junk junk",
+      "<b>cat</b> <b>cat</b> junk junk junk junk", clause( "field_sliced", "cat" ) );
+
+    // Multiple matches add to the score of the segment
+    matchedFieldsTestCase( "cat cat junk junk junk junk junk junk junk a cat junk junk",
+      "<b>cat</b> <b>cat</b> junk junk junk junk",
+      clause( "field", "cat" ), clause( "field_sliced", "cat" ), clause( "field_exact", 2, "a", "cat" ) );
+    matchedFieldsTestCase( "cat cat junk junk junk junk junk junk junk a cat junk junk",
+      "junk junk <b>a cat</b> junk junk",
+      clause( "field", "cat" ), clause( "field_sliced", "cat" ), clause( "field_exact", 4, "a", "cat" ) );
+
+    // Even fields with tokens on top of one another are ok
+    matchedFieldsTestCase( "cat cat junk junk junk junk junk junk junk a cat junk junk",
+      "<b>cat</b> cat junk junk junk junk",
+      clause( "field_der_red", 2, "der" ), clause( "field_exact", "a", "cat" ) );
+    matchedFieldsTestCase( "cat cat junk junk junk junk junk junk junk a cat junk junk",
+      "<b>cat</b> cat junk junk junk junk",
+      clause( "field_der_red", 2, "red" ), clause( "field_exact", "a", "cat" ) );
+    matchedFieldsTestCase( "cat cat junk junk junk junk junk junk junk a cat junk junk",
+      "<b>cat</b> cat junk junk junk junk",
+      clause( "field_der_red", "red" ), clause( "field_der_red", "der" ), clause( "field_exact", "a", "cat" ) );
+  }
 
-  public void testOverlappingPhrases() throws IOException {
-    final Analyzer analyzer = new Analyzer() {
-
-      @Override
-      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
-        final Tokenizer source = new MockTokenizer(reader);
-        TokenStream sink = source;
-        sink = new SynonymFilter(sink);
-        return new TokenStreamComponents(source, sink);
-      }
-
-    };
-    final Directory directory = newDirectory();
-    RandomIndexWriter iw = new RandomIndexWriter(random(), directory, analyzer);
-    Document doc = new Document();
-    FieldType withVectors = new FieldType(TextField.TYPE_STORED);
-    withVectors.setStoreTermVectors(true);
-    withVectors.setStoreTermVectorPositions(true);
-    withVectors.setStoreTermVectorOffsets(true);
-    doc.add(new Field(FIELD, "a b c", withVectors));
-    iw.addDocument(doc);
-    DirectoryReader ir = iw.getReader();
-
-    // Disjunction of two overlapping phrase queries
-    final PhraseQuery pq1 = new PhraseQuery();
-    pq1.add(new Term(FIELD, "a"), 0);
-    pq1.add(new Term(FIELD, "b"), 1);
-    pq1.add(new Term(FIELD, "c"), 2);
-
-    final PhraseQuery pq2 = new PhraseQuery();
-    pq2.add(new Term(FIELD, "a"), 0);
-    pq2.add(new Term(FIELD, "B"), 1);
-    pq2.add(new Term(FIELD, "c"), 2);
-
-    final BooleanQuery bq = new BooleanQuery();
-    bq.add(pq1, Occur.SHOULD);
-    bq.add(pq2, Occur.SHOULD);
-
-    // Single phrase query with two terms at the same position
-    final PhraseQuery pq = new PhraseQuery();
-    pq.add(new Term(FIELD, "a"), 0);
-    pq.add(new Term(FIELD, "b"), 1);
-    pq.add(new Term(FIELD, "B"), 1);
-    pq.add(new Term(FIELD, "c"), 2);
-
-    for (Query query : Arrays.asList(pq1, pq2, bq, pq)) {
-      assertEquals(1, new IndexSearcher(ir).search(bq, 1).totalHits);
-
-      FastVectorHighlighter highlighter = new FastVectorHighlighter();
-      FieldQuery fieldQuery  = highlighter.getFieldQuery(query, ir);
-      String[] bestFragments = highlighter.getBestFragments(fieldQuery, ir, 0, FIELD, 1000, 1);
-      assertEquals("<b>a b c</b>", bestFragments[0]);
-    }
-
-    ir.close();
-    iw.close();
-    directory.close();
+  private void matchedFieldsTestCase( String fieldValue, String expected, Query... queryClauses ) throws IOException {
+    matchedFieldsTestCase( true, true, fieldValue, expected, queryClauses );
   }
 
-  public void testPhraseWithGap() throws IOException {
-    final Directory directory = newDirectory();
-    RandomIndexWriter iw = new RandomIndexWriter(random(), directory, new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false));
+  private void matchedFieldsTestCase( boolean useMatchedFields, boolean fieldMatch, String fieldValue, String expected, Query... queryClauses ) throws IOException {
     Document doc = new Document();
-    FieldType withVectors = new FieldType(TextField.TYPE_STORED);
-    withVectors.setStoreTermVectors(true);
-    withVectors.setStoreTermVectorPositions(true);
-    withVectors.setStoreTermVectorOffsets(true);
-    doc.add(new Field(FIELD, "a b c", withVectors));
-    iw.addDocument(doc);
-    DirectoryReader ir = iw.getReader();
-
-    final PhraseQuery pq = new PhraseQuery();
-    pq.add(new Term(FIELD, "c"), 2);
-    pq.add(new Term(FIELD, "a"), 0);
+    FieldType stored = new FieldType( TextField.TYPE_STORED );
+    stored.setStoreTermVectorOffsets( true );
+    stored.setStoreTermVectorPositions( true );
+    stored.setStoreTermVectors( true );
+    stored.freeze();
+    FieldType matched = new FieldType( TextField.TYPE_NOT_STORED );
+    matched.setStoreTermVectorOffsets( true );
+    matched.setStoreTermVectorPositions( true );
+    matched.setStoreTermVectors( true );
+    matched.freeze();
+    doc.add( new Field( "field", fieldValue, stored ) );               // Whitespace tokenized with English stop words
+    doc.add( new Field( "field_exact", fieldValue, matched ) );        // Whitespace tokenized without stop words
+    doc.add( new Field( "field_super_exact", fieldValue, matched ) );  // Whitespace tokenized without toLower
+    doc.add( new Field( "field_characters", fieldValue, matched ) );   // Each letter is a token
+    doc.add( new Field( "field_tripples", fieldValue, matched ) );     // Every three letters is a token
+    doc.add( new Field( "field_sliced", fieldValue.substring( 0,       // Sliced at 10 chars then analyzed just like field
+      Math.min( fieldValue.length() - 1 , 10 ) ), matched ) );
+    doc.add( new Field( "field_der_red", new CannedTokenStream(        // Hacky field containing "der" and "red" at pos = 0
+          token( "der", 1, 0, 3 ),
+          token( "red", 0, 0, 3 )
+        ), matched ) );
+
+    final Map<String, Analyzer> fieldAnalyzers = new TreeMap<String, Analyzer>();
+    fieldAnalyzers.put( "field", new MockAnalyzer( random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET ) );
+    fieldAnalyzers.put( "field_exact", new MockAnalyzer( random() ) );
+    fieldAnalyzers.put( "field_super_exact", new MockAnalyzer( random(), MockTokenizer.WHITESPACE, false ) );
+    fieldAnalyzers.put( "field_characters", new MockAnalyzer( random(), new CharacterRunAutomaton( new RegExp(".").toAutomaton() ), true ) );
+    fieldAnalyzers.put( "field_tripples", new MockAnalyzer( random(), new CharacterRunAutomaton( new RegExp("...").toAutomaton() ), true ) );
+    fieldAnalyzers.put( "field_sliced", fieldAnalyzers.get( "field" ) );
+    fieldAnalyzers.put( "field_der_red", fieldAnalyzers.get( "field" ) );  // This is required even though we provide a token stream
+    Analyzer analyzer = new AnalyzerWrapper() {
+      public Analyzer getWrappedAnalyzer(String fieldName) {
+        return fieldAnalyzers.get( fieldName );
+      }
+    };
 
-    assertEquals(1, new IndexSearcher(ir).search(pq, 1).totalHits);
+    Directory dir = newDirectory();
+    IndexWriter writer = new IndexWriter( dir, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer ) );
+    writer.addDocument( doc );
 
     FastVectorHighlighter highlighter = new FastVectorHighlighter();
-    FieldQuery fieldQuery  = highlighter.getFieldQuery(pq, ir);
-    String[] bestFragments = highlighter.getBestFragments(fieldQuery, ir, 0, FIELD, 1000, 1);
-    assertEquals("<b>a</b> b <b>c</b>", bestFragments[0]);
-
-    ir.close();
-    iw.close();
-    directory.close();
-  }
-
-  // Simple token filter that adds 'B' as a synonym of 'b'
-  private static class SynonymFilter extends TokenFilter {
+    FragListBuilder fragListBuilder = new SimpleFragListBuilder();
+    FragmentsBuilder fragmentsBuilder = new ScoreOrderFragmentsBuilder();
+    IndexReader reader = DirectoryReader.open( writer, true );
+    String[] preTags = new String[] { "<b>" };
+    String[] postTags = new String[] { "</b>" };
+    Encoder encoder = new DefaultEncoder();
+    int docId = 0;
+    BooleanQuery query = new BooleanQuery();
+    for ( Query clause : queryClauses ) {
+      query.add( clause, Occur.MUST );
+    }
+    FieldQuery fieldQuery = new FieldQuery( query, reader, true, fieldMatch );
+    String[] bestFragments;
+    if ( useMatchedFields ) {
+      Set< String > matchedFields = new HashSet< String >();
+      matchedFields.add( "field" );
+      matchedFields.add( "field_exact" );
+      matchedFields.add( "field_super_exact" );
+      matchedFields.add( "field_characters" );
+      matchedFields.add( "field_tripples" );
+      matchedFields.add( "field_sliced" );
+      matchedFields.add( "field_der_red" );
+      bestFragments = highlighter.getBestFragments( fieldQuery, reader, docId, "field", matchedFields, 25, 1,
+        fragListBuilder, fragmentsBuilder, preTags, postTags, encoder );
+    } else {
+      bestFragments = highlighter.getBestFragments( fieldQuery, reader, docId, "field", 25, 1,
+        fragListBuilder, fragmentsBuilder, preTags, postTags, encoder );
+    }
+    assertEquals( expected, bestFragments[ 0 ] );
 
-    final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
-    final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
+    reader.close();
+    writer.close();
+    dir.close();
+  }
 
-    State pending;
+  private Query clause( String field, String... terms ) {
+    return clause( field, 1, terms );
+  }
 
-    protected SynonymFilter(TokenStream input) {
-      super(input);
-    }
-
-    @Override
-    public boolean incrementToken() throws IOException {
-      if (pending != null) {
-        restoreState(pending);
-        termAtt.setEmpty().append('B');
-        posIncAtt.setPositionIncrement(0);
-        pending = null;
-        return true;
-      }
-      if (!input.incrementToken()) {
-        return false;
-      }
-      if (termAtt.toString().equals("b")) {
-        pending = captureState();
+  private Query clause( String field, float boost, String... terms ) {
+    Query q;
+    if ( terms.length == 1 ) {
+      q = new TermQuery( new Term( field, terms[ 0 ] ) );
+    } else {
+      PhraseQuery pq = new PhraseQuery();
+      for ( String term: terms ) {
+        pq.add( new Term( field, term ) );
       }
-      return true;
+      q = pq;
     }
+    q.setBoost( boost );
+    return q;
+  }
 
-    @Override
-    public void reset() throws IOException {
-      super.reset();
-      pending = null;
-    }
+  private static Token token( String term, int posInc, int startOffset, int endOffset ) {
+    Token t = new Token( term, startOffset, endOffset );
+    t.setPositionIncrement( posInc );
+    return t;
   }
 }