You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ja...@apache.org on 2013/05/30 09:53:46 UTC

svn commit: r1487777 [28/50] - in /lucene/dev/branches/security: ./ dev-tools/ dev-tools/eclipse/dot.settings/ dev-tools/idea/.idea/ dev-tools/idea/.idea/libraries/ dev-tools/idea/lucene/replicator/ dev-tools/maven/ dev-tools/maven/lucene/ dev-tools/ma...

Modified: lucene/dev/branches/security/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java (original)
+++ lucene/dev/branches/security/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java Thu May 30 07:53:18 2013
@@ -17,6 +17,10 @@ package org.apache.lucene.search.posting
  * limitations under the License.
  */
 
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.text.BreakIterator;
 import java.util.Map;
 
 import org.apache.lucene.analysis.Analyzer;
@@ -31,12 +35,14 @@ import org.apache.lucene.index.FieldInfo
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.StoredDocument;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.PhraseQuery;
 import org.apache.lucene.search.Query;
+import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TopDocs;
@@ -340,4 +346,543 @@ public class TestPostingsHighlighter ext
     ir.close();
     dir.close();
   }
+
+  public void testCambridgeMA() throws Exception {
+    BufferedReader r = new BufferedReader(new InputStreamReader(
+                     this.getClass().getResourceAsStream("CambridgeMA.utf8"), "UTF-8"));
+    String text = r.readLine();
+    r.close();
+    Directory dir = newDirectory();
+    Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, analyzer);
+    FieldType positionsType = new FieldType(TextField.TYPE_STORED);
+    positionsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
+    Field body = new Field("body", text, positionsType);
+    Document document = new Document();
+    document.add(body);
+    iw.addDocument(document);
+    IndexReader ir = iw.getReader();
+    iw.close();
+    IndexSearcher searcher = newSearcher(ir);
+    BooleanQuery query = new BooleanQuery();
+    query.add(new TermQuery(new Term("body", "porter")), BooleanClause.Occur.SHOULD);
+    query.add(new TermQuery(new Term("body", "square")), BooleanClause.Occur.SHOULD);
+    query.add(new TermQuery(new Term("body", "massachusetts")), BooleanClause.Occur.SHOULD);
+    TopDocs topDocs = searcher.search(query, 10);
+    assertEquals(1, topDocs.totalHits);
+    PostingsHighlighter highlighter = new PostingsHighlighter(Integer.MAX_VALUE-1);
+    String snippets[] = highlighter.highlight("body", query, searcher, topDocs, 2);
+    assertEquals(1, snippets.length);
+    assertTrue(snippets[0].contains("<b>Square</b>"));
+    assertTrue(snippets[0].contains("<b>Porter</b>"));
+    ir.close();
+    dir.close();
+  }
+  
+  public void testPassageRanking() throws Exception {
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
+    iwc.setMergePolicy(newLogMergePolicy());
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
+    
+    FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
+    offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
+    Field body = new Field("body", "", offsetsType);
+    Document doc = new Document();
+    doc.add(body);
+    
+    body.setStringValue("This is a test.  Just highlighting from postings. This is also a much sillier test.  Feel free to test test test test test test test.");
+    iw.addDocument(doc);
+    
+    IndexReader ir = iw.getReader();
+    iw.close();
+    
+    IndexSearcher searcher = newSearcher(ir);
+    PostingsHighlighter highlighter = new PostingsHighlighter();
+    Query query = new TermQuery(new Term("body", "test"));
+    TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
+    assertEquals(1, topDocs.totalHits);
+    String snippets[] = highlighter.highlight("body", query, searcher, topDocs, 2);
+    assertEquals(1, snippets.length);
+    assertEquals("This is a <b>test</b>.  ... Feel free to <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b>.", snippets[0]);
+    
+    ir.close();
+    dir.close();
+  }
+
+  public void testBooleanMustNot() throws Exception {
+    Directory dir = newDirectory();
+    Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, analyzer);
+    FieldType positionsType = new FieldType(TextField.TYPE_STORED);
+    positionsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
+    Field body = new Field("body", "This sentence has both terms.  This sentence has only terms.", positionsType);
+    Document document = new Document();
+    document.add(body);
+    iw.addDocument(document);
+    IndexReader ir = iw.getReader();
+    iw.close();
+    IndexSearcher searcher = newSearcher(ir);
+    BooleanQuery query = new BooleanQuery();
+    query.add(new TermQuery(new Term("body", "terms")), BooleanClause.Occur.SHOULD);
+    BooleanQuery query2 = new BooleanQuery();
+    query.add(query2, BooleanClause.Occur.SHOULD);
+    query2.add(new TermQuery(new Term("body", "both")), BooleanClause.Occur.MUST_NOT);
+    TopDocs topDocs = searcher.search(query, 10);
+    assertEquals(1, topDocs.totalHits);
+    PostingsHighlighter highlighter = new PostingsHighlighter(Integer.MAX_VALUE-1);
+    String snippets[] = highlighter.highlight("body", query, searcher, topDocs, 2);
+    assertEquals(1, snippets.length);
+    assertFalse(snippets[0].contains("<b>both</b>"));
+    ir.close();
+    dir.close();
+  }
+
+  public void testHighlightAllText() throws Exception {
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
+    iwc.setMergePolicy(newLogMergePolicy());
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
+    
+    FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
+    offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
+    Field body = new Field("body", "", offsetsType);
+    Document doc = new Document();
+    doc.add(body);
+    
+    body.setStringValue("This is a test.  Just highlighting from postings. This is also a much sillier test.  Feel free to test test test test test test test.");
+    iw.addDocument(doc);
+    
+    IndexReader ir = iw.getReader();
+    iw.close();
+    
+    IndexSearcher searcher = newSearcher(ir);
+    PostingsHighlighter highlighter = new PostingsHighlighter(10000) {
+      @Override
+      protected BreakIterator getBreakIterator(String field) {
+        return new WholeBreakIterator();
+      }
+    };
+    Query query = new TermQuery(new Term("body", "test"));
+    TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
+    assertEquals(1, topDocs.totalHits);
+    String snippets[] = highlighter.highlight("body", query, searcher, topDocs, 2);
+    assertEquals(1, snippets.length);
+    assertEquals("This is a <b>test</b>.  Just highlighting from postings. This is also a much sillier <b>test</b>.  Feel free to <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b>.", snippets[0]);
+    
+    ir.close();
+    dir.close();
+  }
+
+  public void testSpecificDocIDs() throws Exception {
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+    iwc.setMergePolicy(newLogMergePolicy());
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
+    
+    FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
+    offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
+    Field body = new Field("body", "", offsetsType);
+    Document doc = new Document();
+    doc.add(body);
+    
+    body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
+    iw.addDocument(doc);
+    body.setStringValue("Highlighting the first term. Hope it works.");
+    iw.addDocument(doc);
+    
+    IndexReader ir = iw.getReader();
+    iw.close();
+    
+    IndexSearcher searcher = newSearcher(ir);
+    PostingsHighlighter highlighter = new PostingsHighlighter();
+    Query query = new TermQuery(new Term("body", "highlighting"));
+    TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
+    assertEquals(2, topDocs.totalHits);
+    ScoreDoc[] hits = topDocs.scoreDocs;
+    int[] docIDs = new int[2];
+    docIDs[0] = hits[0].doc;
+    docIDs[1] = hits[1].doc;
+    String snippets[] = highlighter.highlightFields(new String[] {"body"}, query, searcher, docIDs, new int[] { 1 }).get("body");
+    assertEquals(2, snippets.length);
+    assertEquals("Just a test <b>highlighting</b> from postings. ", snippets[0]);
+    assertEquals("<b>Highlighting</b> the first term. ", snippets[1]);
+    
+    ir.close();
+    dir.close();
+  }
+
+  public void testCustomFieldValueSource() throws Exception {
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
+    iwc.setMergePolicy(newLogMergePolicy());
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
+    
+    Document doc = new Document();
+
+    FieldType offsetsType = new FieldType(TextField.TYPE_NOT_STORED);
+    offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
+    final String text = "This is a test.  Just highlighting from postings. This is also a much sillier test.  Feel free to test test test test test test test.";
+    Field body = new Field("body", text, offsetsType);
+    doc.add(body);
+    iw.addDocument(doc);
+    
+    IndexReader ir = iw.getReader();
+    iw.close();
+    
+    IndexSearcher searcher = newSearcher(ir);
+
+    PostingsHighlighter highlighter = new PostingsHighlighter(10000) {
+        @Override
+        protected String[][] loadFieldValues(IndexSearcher searcher, String[] fields, int[] docids, int maxLength) throws IOException {
+          assert fields.length == 1;
+          assert docids.length == 1;
+          String[][] contents = new String[1][1];
+          contents[0][0] = text;
+          return contents;
+        }
+
+        @Override
+        protected BreakIterator getBreakIterator(String field) {
+          return new WholeBreakIterator();
+        }
+      };
+
+    Query query = new TermQuery(new Term("body", "test"));
+    TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
+    assertEquals(1, topDocs.totalHits);
+    String snippets[] = highlighter.highlight("body", query, searcher, topDocs, 2);
+    assertEquals(1, snippets.length);
+    assertEquals("This is a <b>test</b>.  Just highlighting from postings. This is also a much sillier <b>test</b>.  Feel free to <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b>.", snippets[0]);
+    
+    ir.close();
+    dir.close();
+  }
+
+  /** Make sure highlighter returns first N sentences if
+   *  there were no hits. */
+  public void testEmptyHighlights() throws Exception {
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+    iwc.setMergePolicy(newLogMergePolicy());
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
+    
+    FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
+    offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
+    Document doc = new Document();
+
+    Field body = new Field("body", "test this is.  another sentence this test has.  far away is that planet.", offsetsType);
+    doc.add(body);
+    iw.addDocument(doc);
+    
+    IndexReader ir = iw.getReader();
+    iw.close();
+    
+    IndexSearcher searcher = newSearcher(ir);
+    PostingsHighlighter highlighter = new PostingsHighlighter();
+    Query query = new TermQuery(new Term("body", "highlighting"));
+    int[] docIDs = new int[] {0};
+    String snippets[] = highlighter.highlightFields(new String[] {"body"}, query, searcher, docIDs, new int[] { 2 }).get("body");
+    assertEquals(1, snippets.length);
+    assertEquals("test this is.  another sentence this test has.  ", snippets[0]);
+
+    ir.close();
+    dir.close();
+  }
+
+  /** Make sure highlighter we can customize how emtpy
+   *  highlight is returned. */
+  public void testCustomEmptyHighlights() throws Exception {
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+    iwc.setMergePolicy(newLogMergePolicy());
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
+    
+    FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
+    offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
+    Document doc = new Document();
+
+    Field body = new Field("body", "test this is.  another sentence this test has.  far away is that planet.", offsetsType);
+    doc.add(body);
+    iw.addDocument(doc);
+    
+    IndexReader ir = iw.getReader();
+    iw.close();
+    
+    IndexSearcher searcher = newSearcher(ir);
+    PostingsHighlighter highlighter = new PostingsHighlighter() {
+        @Override
+        public Passage[] getEmptyHighlight(String fieldName, BreakIterator bi, int maxPassages) {
+          return new Passage[0];
+        }
+      };
+    Query query = new TermQuery(new Term("body", "highlighting"));
+    int[] docIDs = new int[] {0};
+    String snippets[] = highlighter.highlightFields(new String[] {"body"}, query, searcher, docIDs, new int[] { 2 }).get("body");
+    assertEquals(1, snippets.length);
+    assertNull(snippets[0]);
+
+    ir.close();
+    dir.close();
+  }
+
+  /** Make sure highlighter returns whole text when there
+   *  are no hits and BreakIterator is null. */
+  public void testEmptyHighlightsWhole() throws Exception {
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+    iwc.setMergePolicy(newLogMergePolicy());
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
+    
+    FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
+    offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
+    Document doc = new Document();
+
+    Field body = new Field("body", "test this is.  another sentence this test has.  far away is that planet.", offsetsType);
+    doc.add(body);
+    iw.addDocument(doc);
+    
+    IndexReader ir = iw.getReader();
+    iw.close();
+    
+    IndexSearcher searcher = newSearcher(ir);
+    PostingsHighlighter highlighter = new PostingsHighlighter(10000) {
+      @Override
+      protected BreakIterator getBreakIterator(String field) {
+        return new WholeBreakIterator();
+      }
+    };
+    Query query = new TermQuery(new Term("body", "highlighting"));
+    int[] docIDs = new int[] {0};
+    String snippets[] = highlighter.highlightFields(new String[] {"body"}, query, searcher, docIDs, new int[] { 2 }).get("body");
+    assertEquals(1, snippets.length);
+    assertEquals("test this is.  another sentence this test has.  far away is that planet.", snippets[0]);
+
+    ir.close();
+    dir.close();
+  }
+
+  /** Make sure highlighter is OK with entirely missing
+   *  field. */
+  public void testFieldIsMissing() throws Exception {
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+    iwc.setMergePolicy(newLogMergePolicy());
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
+    
+    FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
+    offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
+    Document doc = new Document();
+
+    Field body = new Field("body", "test this is.  another sentence this test has.  far away is that planet.", offsetsType);
+    doc.add(body);
+    iw.addDocument(doc);
+    
+    IndexReader ir = iw.getReader();
+    iw.close();
+    
+    IndexSearcher searcher = newSearcher(ir);
+    PostingsHighlighter highlighter = new PostingsHighlighter();
+    Query query = new TermQuery(new Term("bogus", "highlighting"));
+    int[] docIDs = new int[] {0};
+    String snippets[] = highlighter.highlightFields(new String[] {"bogus"}, query, searcher, docIDs, new int[] { 2 }).get("bogus");
+    assertEquals(1, snippets.length);
+    assertNull(snippets[0]);
+
+    ir.close();
+    dir.close();
+  }
+
+  public void testFieldIsJustSpace() throws Exception {
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+    iwc.setMergePolicy(newLogMergePolicy());
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
+    
+    FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
+    offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
+
+    Document doc = new Document();
+    doc.add(new Field("body", "   ", offsetsType));
+    doc.add(new Field("id", "id", offsetsType));
+    iw.addDocument(doc);
+
+    doc = new Document();
+    doc.add(new Field("body", "something", offsetsType));
+    iw.addDocument(doc);
+    
+    IndexReader ir = iw.getReader();
+    iw.close();
+    
+    IndexSearcher searcher = newSearcher(ir);
+    PostingsHighlighter highlighter = new PostingsHighlighter();
+    int docID = searcher.search(new TermQuery(new Term("id", "id")), 1).scoreDocs[0].doc;
+
+    Query query = new TermQuery(new Term("body", "highlighting"));
+    int[] docIDs = new int[1];
+    docIDs[0] = docID;
+    String snippets[] = highlighter.highlightFields(new String[] {"body"}, query, searcher, docIDs, new int[] { 2 }).get("body");
+    assertEquals(1, snippets.length);
+    assertEquals("   ", snippets[0]);
+
+    ir.close();
+    dir.close();
+  }
+
+  public void testFieldIsEmptyString() throws Exception {
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+    iwc.setMergePolicy(newLogMergePolicy());
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
+    
+    FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
+    offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
+
+    Document doc = new Document();
+    doc.add(new Field("body", "", offsetsType));
+    doc.add(new Field("id", "id", offsetsType));
+    iw.addDocument(doc);
+
+    doc = new Document();
+    doc.add(new Field("body", "something", offsetsType));
+    iw.addDocument(doc);
+    
+    IndexReader ir = iw.getReader();
+    iw.close();
+    
+    IndexSearcher searcher = newSearcher(ir);
+    PostingsHighlighter highlighter = new PostingsHighlighter();
+    int docID = searcher.search(new TermQuery(new Term("id", "id")), 1).scoreDocs[0].doc;
+
+    Query query = new TermQuery(new Term("body", "highlighting"));
+    int[] docIDs = new int[1];
+    docIDs[0] = docID;
+    String snippets[] = highlighter.highlightFields(new String[] {"body"}, query, searcher, docIDs, new int[] { 2 }).get("body");
+    assertEquals(1, snippets.length);
+    assertNull(snippets[0]);
+
+    ir.close();
+    dir.close();
+  }
+
+  public void testMultipleDocs() throws Exception {
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+    iwc.setMergePolicy(newLogMergePolicy());
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
+    
+    FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
+    offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
+
+    int numDocs = atLeast(100);
+    for(int i=0;i<numDocs;i++) {
+      Document doc = new Document();
+      String content = "the answer is " + i;
+      if ((i & 1) == 0) {
+        content += " some more terms";
+      }
+      doc.add(new Field("body", content, offsetsType));
+      doc.add(newStringField("id", ""+i, Field.Store.YES));
+      iw.addDocument(doc);
+
+      if (random().nextInt(10) == 2) {
+        iw.commit();
+      }
+    }
+
+    IndexReader ir = iw.getReader();
+    iw.close();
+    
+    IndexSearcher searcher = newSearcher(ir);
+    PostingsHighlighter highlighter = new PostingsHighlighter();
+    Query query = new TermQuery(new Term("body", "answer"));
+    TopDocs hits = searcher.search(query, numDocs);
+    assertEquals(numDocs, hits.totalHits);
+
+    String snippets[] = highlighter.highlight("body", query, searcher, hits);
+    assertEquals(numDocs, snippets.length);
+    for(int hit=0;hit<numDocs;hit++) {
+      StoredDocument doc = searcher.doc(hits.scoreDocs[hit].doc);
+      int id = Integer.parseInt(doc.get("id"));
+      String expected = "the <b>answer</b> is " + id;
+      if ((id  & 1) == 0) {
+        expected += " some more terms";
+      }
+      assertEquals(expected, snippets[hit]);
+    }
+
+    ir.close();
+    dir.close();
+  }
+  
+  public void testMultipleSnippetSizes() throws Exception {
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
+    iwc.setMergePolicy(newLogMergePolicy());
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
+    
+    FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
+    offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
+    Field body = new Field("body", "", offsetsType);
+    Field title = new Field("title", "", offsetsType);
+    Document doc = new Document();
+    doc.add(body);
+    doc.add(title);
+    
+    body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
+    title.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
+    iw.addDocument(doc);
+    
+    IndexReader ir = iw.getReader();
+    iw.close();
+    
+    IndexSearcher searcher = newSearcher(ir);
+    PostingsHighlighter highlighter = new PostingsHighlighter();
+    BooleanQuery query = new BooleanQuery();
+    query.add(new TermQuery(new Term("body", "test")), BooleanClause.Occur.SHOULD);
+    query.add(new TermQuery(new Term("title", "test")), BooleanClause.Occur.SHOULD);
+    Map<String,String[]> snippets = highlighter.highlightFields(new String[] { "title", "body" }, query, searcher, new int[] { 0 }, new int[] { 1, 2 });
+    String titleHighlight = snippets.get("title")[0];
+    String bodyHighlight = snippets.get("body")[0];
+    assertEquals("This is a <b>test</b>. ", titleHighlight);
+    assertEquals("This is a <b>test</b>. Just a <b>test</b> highlighting from postings. ", bodyHighlight);
+    ir.close();
+    dir.close();
+  }
+  
+  public void testEncode() throws Exception {
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+    iwc.setMergePolicy(newLogMergePolicy());
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
+    
+    FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
+    offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
+    Field body = new Field("body", "", offsetsType);
+    Document doc = new Document();
+    doc.add(body);
+    
+    body.setStringValue("This is a test. Just a test highlighting from <i>postings</i>. Feel free to ignore.");
+    iw.addDocument(doc);
+    
+    IndexReader ir = iw.getReader();
+    iw.close();
+    
+    IndexSearcher searcher = newSearcher(ir);
+    PostingsHighlighter highlighter = new PostingsHighlighter() {
+      @Override
+      protected PassageFormatter getFormatter(String field) {
+        return new DefaultPassageFormatter("<b>", "</b>", "... ", true);
+      }
+    };
+    Query query = new TermQuery(new Term("body", "highlighting"));
+    TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
+    assertEquals(1, topDocs.totalHits);
+    String snippets[] = highlighter.highlight("body", query, searcher, topDocs);
+    assertEquals(1, snippets.length);
+    assertEquals("Just&#32;a&#32;test&#32;<b>highlighting</b>&#32;from&#32;&lt;i&gt;postings&lt;&#x2F;i&gt;&#46;&#32;", snippets[0]);
+    
+    ir.close();
+    dir.close();
+  }
 }

Modified: lucene/dev/branches/security/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java (original)
+++ lucene/dev/branches/security/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java Thu May 30 07:53:18 2013
@@ -18,9 +18,7 @@ package org.apache.lucene.search.posting
  */
 
 import java.io.IOException;
-import java.text.BreakIterator;
 import java.util.HashSet;
-import java.util.Locale;
 import java.util.Random;
 
 import org.apache.lucene.analysis.MockAnalyzer;
@@ -30,21 +28,23 @@ import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.FieldInfo.IndexOptions;
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.RandomIndexWriter;
-import org.apache.lucene.index.FieldInfo.IndexOptions;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
+import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util._TestUtil;
 import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
+import org.apache.lucene.util._TestUtil;
 
 @SuppressCodecs({"MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom"})
 public class TestPostingsHighlighterRanking extends LuceneTestCase {
@@ -73,7 +73,7 @@ public class TestPostingsHighlighterRank
     document.add(id);
     document.add(body);
     
-    for (int i = 0; i < numDocs; i++) {;
+    for (int i = 0; i < numDocs; i++) {
       StringBuilder bodyText = new StringBuilder();
       int numSentences = _TestUtil.nextInt(random(), 1, maxNumSentences);
       for (int j = 0; j < numSentences; j++) {
@@ -110,16 +110,24 @@ public class TestPostingsHighlighterRank
   
   private void checkQuery(IndexSearcher is, Query query, int doc, int maxTopN) throws IOException {
     for (int n = 1; n < maxTopN; n++) {
-      FakePassageFormatter f1 = new FakePassageFormatter();
-      PostingsHighlighter p1 = new PostingsHighlighter(Integer.MAX_VALUE-1, 
-                                                       BreakIterator.getSentenceInstance(Locale.ROOT), 
-                                                       new PassageScorer(),
-                                                       f1);
-      FakePassageFormatter f2 = new FakePassageFormatter();
-      PostingsHighlighter p2 = new PostingsHighlighter(Integer.MAX_VALUE-1, 
-                                                       BreakIterator.getSentenceInstance(Locale.ROOT), 
-                                                       new PassageScorer(),
-                                                       f2);
+      final FakePassageFormatter f1 = new FakePassageFormatter();
+      PostingsHighlighter p1 = new PostingsHighlighter(Integer.MAX_VALUE-1) {
+          @Override
+          protected PassageFormatter getFormatter(String field) {
+            assertEquals("body", field);
+            return f1;
+          }
+        };
+
+      final FakePassageFormatter f2 = new FakePassageFormatter();
+      PostingsHighlighter p2 = new PostingsHighlighter(Integer.MAX_VALUE-1) {
+          @Override
+          protected PassageFormatter getFormatter(String field) {
+            assertEquals("body", field);
+            return f2;
+          }
+        };
+
       BooleanQuery bq = new BooleanQuery(false);
       bq.add(query, BooleanClause.Occur.MUST);
       bq.add(new TermQuery(new Term("id", Integer.toString(doc))), BooleanClause.Occur.MUST);
@@ -168,8 +176,7 @@ public class TestPostingsHighlighterRank
         // we use a very simple analyzer. so we can assert the matches are correct
         int lastMatchStart = -1;
         for (int i = 0; i < p.getNumMatches(); i++) {
-          Term term = p.getMatchTerms()[i];
-          assertEquals("body", term.field());
+          BytesRef term = p.getMatchTerms()[i];
           int matchStart = p.getMatchStarts()[i];
           assertTrue(matchStart >= 0);
           // must at least start within the passage
@@ -182,9 +189,8 @@ public class TestPostingsHighlighterRank
           // single character terms
           assertEquals(matchStart+1, matchEnd);
           // and the offsets must be correct...
-          BytesRef bytes = term.bytes();
-          assertEquals(1, bytes.length);
-          assertEquals((char)bytes.bytes[bytes.offset], Character.toLowerCase(content.charAt(matchStart)));
+          assertEquals(1, term.length);
+          assertEquals((char)term.bytes[term.offset], Character.toLowerCase(content.charAt(matchStart)));
         }
         // record just the start/end offset for simplicity
         seen.add(new Pair(p.getStartOffset(), p.getEndOffset()));
@@ -237,4 +243,83 @@ public class TestPostingsHighlighterRank
       return "Pair [start=" + start + ", end=" + end + "]";
     }
   }
+  
+  /** sets b=0 to disable passage length normalization */
+  public void testCustomB() throws Exception {
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
+    iwc.setMergePolicy(newLogMergePolicy());
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
+    
+    FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
+    offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
+    Field body = new Field("body", "", offsetsType);
+    Document doc = new Document();
+    doc.add(body);
+    
+    body.setStringValue("This is a test.  This test is a better test but the sentence is excruiatingly long, " + 
+                        "you have no idea how painful it was for me to type this long sentence into my IDE.");
+    iw.addDocument(doc);
+    
+    IndexReader ir = iw.getReader();
+    iw.close();
+    
+    IndexSearcher searcher = newSearcher(ir);
+    PostingsHighlighter highlighter = new PostingsHighlighter(10000) {
+        @Override
+        protected PassageScorer getScorer(String field) {
+          return new PassageScorer(1.2f, 0, 87);
+        }
+      };
+    Query query = new TermQuery(new Term("body", "test"));
+    TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
+    assertEquals(1, topDocs.totalHits);
+    String snippets[] = highlighter.highlight("body", query, searcher, topDocs, 1);
+    assertEquals(1, snippets.length);
+    assertTrue(snippets[0].startsWith("This <b>test</b> is a better <b>test</b>"));
+    
+    ir.close();
+    dir.close();
+  }
+  
+  /** sets k1=0 for simple coordinate-level match (# of query terms present) */
+  public void testCustomK1() throws Exception {
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
+    iwc.setMergePolicy(newLogMergePolicy());
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
+    
+    FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
+    offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
+    Field body = new Field("body", "", offsetsType);
+    Document doc = new Document();
+    doc.add(body);
+    
+    body.setStringValue("This has only foo foo. " + 
+                        "On the other hand this sentence contains both foo and bar. " + 
+                        "This has only bar bar bar bar bar bar bar bar bar bar bar bar.");
+    iw.addDocument(doc);
+    
+    IndexReader ir = iw.getReader();
+    iw.close();
+    
+    IndexSearcher searcher = newSearcher(ir);
+    PostingsHighlighter highlighter = new PostingsHighlighter(10000) {
+        @Override
+        protected PassageScorer getScorer(String field) {
+          return new PassageScorer(0, 0.75f, 87);
+        }
+      };
+    BooleanQuery query = new BooleanQuery();
+    query.add(new TermQuery(new Term("body", "foo")), BooleanClause.Occur.SHOULD);
+    query.add(new TermQuery(new Term("body", "bar")), BooleanClause.Occur.SHOULD);
+    TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
+    assertEquals(1, topDocs.totalHits);
+    String snippets[] = highlighter.highlight("body", query, searcher, topDocs, 1);
+    assertEquals(1, snippets.length);
+    assertTrue(snippets[0].startsWith("On the other hand"));
+    
+    ir.close();
+    dir.close();
+  }
 }

Modified: lucene/dev/branches/security/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java (original)
+++ lucene/dev/branches/security/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java Thu May 30 07:53:18 2013
@@ -29,18 +29,19 @@ import org.apache.lucene.index.IndexRead
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.queries.CommonTermsQuery;
+import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.PhraseQuery;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TopDocs;
-import org.apache.lucene.search.BooleanClause.Occur;
-import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
-import org.apache.lucene.search.highlight.TokenSources;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.LuceneTestCase;
 
 
 public class FastVectorHighlighterTest extends LuceneTestCase {
   
+  
   public void testSimpleHighlightTest() throws IOException {
     Directory dir = newDirectory();
     IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
@@ -71,9 +72,182 @@ public class FastVectorHighlighterTest e
     dir.close();
   }
   
+  public void testPhraseHighlightLongTextTest() throws IOException {
+    Directory dir = newDirectory();
+    IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
+    Document doc = new Document();
+    FieldType type = new FieldType(TextField.TYPE_STORED);
+    type.setStoreTermVectorOffsets(true);
+    type.setStoreTermVectorPositions(true);
+    type.setStoreTermVectors(true);
+    type.freeze();
+    Field text = new Field("text", 
+        "Netscape was the general name for a series of web browsers originally produced by Netscape Communications Corporation, now a subsidiary of AOL The original browser was once the dominant browser in terms of usage share, but as a result of the first browser war it lost virtually all of its share to Internet Explorer Netscape was discontinued and support for all Netscape browsers and client products was terminated on March 1, 2008 Netscape Navigator was the name of Netscape\u0027s web browser from versions 1.0 through 4.8 The first beta release versions of the browser were released in 1994 and known as Mosaic and then Mosaic Netscape until a legal challenge from the National Center for Supercomputing Applications (makers of NCSA Mosaic, which many of Netscape\u0027s founders used to develop), led to the name change to Netscape Navigator The company\u0027s name also changed from Mosaic Communications Corporation to Netscape Communications Corporation The browser was eas
 ily the most advanced...", type);
+    doc.add(text);
+    writer.addDocument(doc);
+    FastVectorHighlighter highlighter = new FastVectorHighlighter();
+    IndexReader reader = DirectoryReader.open(writer, true);
+    int docId = 0;
+    String field = "text";
+    {
+      BooleanQuery query = new BooleanQuery();
+      query.add(new TermQuery(new Term(field, "internet")), Occur.MUST);
+      query.add(new TermQuery(new Term(field, "explorer")), Occur.MUST);
+      FieldQuery fieldQuery = highlighter.getFieldQuery(query, reader);
+      String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader,
+          docId, field, 128, 1);
+      // highlighted results are centered
+      assertEquals(1, bestFragments.length);
+      assertEquals("first browser war it lost virtually all of its share to <b>Internet</b> <b>Explorer</b> Netscape was discontinued and support for all Netscape browsers", bestFragments[0]);
+    }
+    
+    {
+      PhraseQuery query = new PhraseQuery();
+      query.add(new Term(field, "internet"));
+      query.add(new Term(field, "explorer"));
+      FieldQuery fieldQuery = highlighter.getFieldQuery(query, reader);
+      String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader,
+          docId, field, 128, 1);
+      // highlighted results are centered
+      assertEquals(1, bestFragments.length);
+      assertEquals("first browser war it lost virtually all of its share to <b>Internet Explorer</b> Netscape was discontinued and support for all Netscape browsers", bestFragments[0]);
+    }
+    reader.close();
+    writer.close();
+    dir.close();
+  }
+  
+  // see LUCENE-4899
+  public void testPhraseHighlightTest() throws IOException {
+    Directory dir = newDirectory();
+    IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
+    Document doc = new Document();
+    FieldType type = new FieldType(TextField.TYPE_STORED);
+    type.setStoreTermVectorOffsets(true);
+    type.setStoreTermVectorPositions(true);
+    type.setStoreTermVectors(true);
+    type.freeze();
+    Field longTermField = new Field("long_term", "This is a test thisisaverylongwordandmakessurethisfails where foo is highlighed and should be highlighted", type);
+    Field noLongTermField = new Field("no_long_term", "This is a test where foo is highlighed and should be highlighted", type);
+
+    doc.add(longTermField);
+    doc.add(noLongTermField);
+    writer.addDocument(doc);
+    FastVectorHighlighter highlighter = new FastVectorHighlighter();
+    IndexReader reader = DirectoryReader.open(writer, true);
+    int docId = 0;
+    String field = "no_long_term";
+    {
+      BooleanQuery query = new BooleanQuery();
+      query.add(new TermQuery(new Term(field, "test")), Occur.MUST);
+      query.add(new TermQuery(new Term(field, "foo")), Occur.MUST);
+      query.add(new TermQuery(new Term(field, "highlighed")), Occur.MUST);
+      FieldQuery fieldQuery = highlighter.getFieldQuery(query, reader);
+      String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader,
+          docId, field, 18, 1);
+      // highlighted results are centered
+      assertEquals(1, bestFragments.length);
+      assertEquals("<b>foo</b> is <b>highlighed</b> and", bestFragments[0]);
+    }
+    {
+      BooleanQuery query = new BooleanQuery();
+      PhraseQuery pq = new PhraseQuery();
+      pq.add(new Term(field, "test"));
+      pq.add(new Term(field, "foo"));
+      pq.add(new Term(field, "highlighed"));
+      pq.setSlop(5);
+      query.add(new TermQuery(new Term(field, "foo")), Occur.MUST);
+      query.add(pq, Occur.MUST);
+      query.add(new TermQuery(new Term(field, "highlighed")), Occur.MUST);
+      FieldQuery fieldQuery = highlighter.getFieldQuery(query, reader);
+      String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader,
+          docId, field, 18, 1);
+      // highlighted results are centered
+      assertEquals(0, bestFragments.length);
+      bestFragments = highlighter.getBestFragments(fieldQuery, reader,
+          docId, field, 30, 1);
+      // highlighted results are centered
+      assertEquals(1, bestFragments.length);
+      assertEquals("a <b>test</b> where <b>foo</b> is <b>highlighed</b> and", bestFragments[0]);
+      
+    }
+    {
+      PhraseQuery query = new PhraseQuery();
+      query.add(new Term(field, "test"));
+      query.add(new Term(field, "foo"));
+      query.add(new Term(field, "highlighed"));
+      query.setSlop(3);
+      FieldQuery fieldQuery = highlighter.getFieldQuery(query, reader);
+      String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader,
+          docId, field, 18, 1);
+      // highlighted results are centered
+      assertEquals(0, bestFragments.length);
+      bestFragments = highlighter.getBestFragments(fieldQuery, reader,
+          docId, field, 30, 1);
+      // highlighted results are centered
+      assertEquals(1, bestFragments.length);
+      assertEquals("a <b>test</b> where <b>foo</b> is <b>highlighed</b> and", bestFragments[0]);
+      
+    }
+    {
+      PhraseQuery query = new PhraseQuery();
+      query.add(new Term(field, "test"));
+      query.add(new Term(field, "foo"));
+      query.add(new Term(field, "highlighted"));
+      query.setSlop(30);
+      FieldQuery fieldQuery = highlighter.getFieldQuery(query, reader);
+      String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader,
+          docId, field, 18, 1);
+      assertEquals(0, bestFragments.length);
+    }
+    {
+      BooleanQuery query = new BooleanQuery();
+      PhraseQuery pq = new PhraseQuery();
+      pq.add(new Term(field, "test"));
+      pq.add(new Term(field, "foo"));
+      pq.add(new Term(field, "highlighed"));
+      pq.setSlop(5);
+      BooleanQuery inner = new BooleanQuery();
+      inner.add(pq, Occur.MUST);
+      inner.add(new TermQuery(new Term(field, "foo")), Occur.MUST);
+      query.add(inner, Occur.MUST);
+      query.add(pq, Occur.MUST);
+      query.add(new TermQuery(new Term(field, "highlighed")), Occur.MUST);
+      FieldQuery fieldQuery = highlighter.getFieldQuery(query, reader);
+      String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader,
+          docId, field, 18, 1);
+      assertEquals(0, bestFragments.length);
+      
+      bestFragments = highlighter.getBestFragments(fieldQuery, reader,
+          docId, field, 30, 1);
+      // highlighted results are centered
+      assertEquals(1, bestFragments.length);
+      assertEquals("a <b>test</b> where <b>foo</b> is <b>highlighed</b> and", bestFragments[0]);
+    }
+    
+    field = "long_term";
+    {
+      BooleanQuery query = new BooleanQuery();
+      query.add(new TermQuery(new Term(field,
+          "thisisaverylongwordandmakessurethisfails")), Occur.MUST);
+      query.add(new TermQuery(new Term(field, "foo")), Occur.MUST);
+      query.add(new TermQuery(new Term(field, "highlighed")), Occur.MUST);
+      FieldQuery fieldQuery = highlighter.getFieldQuery(query, reader);
+      String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader,
+          docId, field, 18, 1);
+      // highlighted results are centered
+      assertEquals(1, bestFragments.length);
+      assertEquals("<b>thisisaverylongwordandmakessurethisfails</b>",
+          bestFragments[0]);
+    }
+    reader.close();
+    writer.close();
+    dir.close();
+  }
+  
   public void testCommonTermsQueryHighlightTest() throws IOException {
     Directory dir = newDirectory();
-    IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT,  new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true)));
+    IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT,  new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET)));
     FieldType type = new FieldType(TextField.TYPE_STORED);
     type.setStoreTermVectorOffsets(true);
     type.setStoreTermVectorPositions(true);
@@ -98,7 +272,7 @@ public class FastVectorHighlighterTest e
    
     FastVectorHighlighter highlighter = new FastVectorHighlighter();
     IndexReader reader = DirectoryReader.open(writer, true);
-    IndexSearcher searcher = new IndexSearcher(reader);
+    IndexSearcher searcher = newSearcher(reader);
     TopDocs hits = searcher.search(query, 10);
     assertEquals(2, hits.totalHits);
     FieldQuery fieldQuery  = highlighter.getFieldQuery(query, reader);

Modified: lucene/dev/branches/security/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilderTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilderTest.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilderTest.java (original)
+++ lucene/dev/branches/security/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilderTest.java Thu May 30 07:53:18 2013
@@ -42,7 +42,7 @@ public class SimpleFragListBuilderTest e
     SimpleFragListBuilder sflb = new SimpleFragListBuilder();
     FieldFragList ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "abcdefghijklmnopqrs")), "abcdefghijklmnopqrs" ), sflb.minFragCharSize );
     assertEquals( 1, ffl.getFragInfos().size() );
-    assertEquals( "subInfos=(abcdefghijklmnopqrs((0,19)))/1.0(0,18)", ffl.getFragInfos().get( 0 ).toString() );
+    assertEquals( "subInfos=(abcdefghijklmnopqrs((0,19)))/1.0(0,19)", ffl.getFragInfos().get( 0 ).toString() );
   }
   
   public void testSmallerFragSizeThanPhraseQuery() throws Exception {
@@ -55,7 +55,7 @@ public class SimpleFragListBuilderTest e
     FieldFragList ffl = sflb.createFieldFragList( fpl(phraseQuery, "abcdefgh   jklmnopqrs" ), sflb.minFragCharSize );
     assertEquals( 1, ffl.getFragInfos().size() );
     if (VERBOSE) System.out.println( ffl.getFragInfos().get( 0 ).toString() );
-    assertEquals( "subInfos=(abcdefghjklmnopqrs((0,21)))/1.0(1,19)", ffl.getFragInfos().get( 0 ).toString() );
+    assertEquals( "subInfos=(abcdefghjklmnopqrs((0,21)))/1.0(0,21)", ffl.getFragInfos().get( 0 ).toString() );
   }
   
   public void test1TermIndex() throws Exception {

Modified: lucene/dev/branches/security/lucene/join/src/java/org/apache/lucene/search/join/TermsIncludingScoreQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/join/src/java/org/apache/lucene/search/join/TermsIncludingScoreQuery.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/join/src/java/org/apache/lucene/search/join/TermsIncludingScoreQuery.java (original)
+++ lucene/dev/branches/security/lucene/join/src/java/org/apache/lucene/search/join/TermsIncludingScoreQuery.java Thu May 30 07:53:18 2013
@@ -23,6 +23,7 @@ import org.apache.lucene.index.IndexRead
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.search.Collector;
 import org.apache.lucene.search.ComplexExplanation;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.Explanation;
@@ -160,18 +161,21 @@ class TermsIncludingScoreQuery extends Q
         if (terms == null) {
           return null;
         }
+        
+        // what is the runtime...seems ok?
+        final long cost = context.reader().maxDoc() * terms.size();
 
         segmentTermsEnum = terms.iterator(segmentTermsEnum);
         if (scoreDocsInOrder) {
           if (multipleValuesPerDocument) {
-            return new MVInOrderScorer(this, acceptDocs, segmentTermsEnum, context.reader().maxDoc());
+            return new MVInOrderScorer(this, acceptDocs, segmentTermsEnum, context.reader().maxDoc(), cost);
           } else {
-            return new SVInOrderScorer(this, acceptDocs, segmentTermsEnum, context.reader().maxDoc());
+            return new SVInOrderScorer(this, acceptDocs, segmentTermsEnum, context.reader().maxDoc(), cost);
           }
         } else if (multipleValuesPerDocument) {
-          return new MVInnerScorer(this, acceptDocs, segmentTermsEnum, context.reader().maxDoc());
+          return new MVInnerScorer(this, acceptDocs, segmentTermsEnum, context.reader().maxDoc(), cost);
         } else {
-          return new SVInnerScorer(this, acceptDocs, segmentTermsEnum);
+          return new SVInnerScorer(this, acceptDocs, segmentTermsEnum, cost);
         }
       }
     };
@@ -183,16 +187,28 @@ class TermsIncludingScoreQuery extends Q
     final BytesRef spare = new BytesRef();
     final Bits acceptDocs;
     final TermsEnum termsEnum;
+    final long cost;
 
     int upto;
     DocsEnum docsEnum;
     DocsEnum reuse;
     int scoreUpto;
+    int doc;
 
-    SVInnerScorer(Weight weight, Bits acceptDocs, TermsEnum termsEnum) {
+    SVInnerScorer(Weight weight, Bits acceptDocs, TermsEnum termsEnum, long cost) {
       super(weight);
       this.acceptDocs = acceptDocs;
       this.termsEnum = termsEnum;
+      this.cost = cost;
+      this.doc = -1;
+    }
+
+    @Override
+    public void score(Collector collector) throws IOException {
+      collector.setScorer(this);
+      for (int doc = nextDocOutOfOrder(); doc != NO_MORE_DOCS; doc = nextDocOutOfOrder()) {
+        collector.collect(doc);
+      }
     }
 
     @Override
@@ -206,23 +222,22 @@ class TermsIncludingScoreQuery extends Q
 
     @Override
     public int docID() {
-      return docsEnum != null ? docsEnum.docID() : DocIdSetIterator.NO_MORE_DOCS;
+      return doc;
     }
 
-    @Override
-    public int nextDoc() throws IOException {
+    int nextDocOutOfOrder() throws IOException {
       if (docsEnum != null) {
         int docId = docsEnum.nextDoc();
         if (docId == DocIdSetIterator.NO_MORE_DOCS) {
           docsEnum = null;
         } else {
-          return docId;
+          return doc = docId;
         }
       }
 
       do {
         if (upto == terms.size()) {
-          return DocIdSetIterator.NO_MORE_DOCS;
+          return doc = DocIdSetIterator.NO_MORE_DOCS;
         }
 
         scoreUpto = upto;
@@ -231,7 +246,12 @@ class TermsIncludingScoreQuery extends Q
         }
       } while (docsEnum == null);
 
-      return docsEnum.nextDoc();
+      return doc = docsEnum.nextDoc();
+    }
+
+    @Override
+    public int nextDoc() throws IOException {
+      throw new UnsupportedOperationException("nextDoc() isn't supported because doc ids are emitted out of order");
     }
 
     @Override
@@ -242,7 +262,7 @@ class TermsIncludingScoreQuery extends Q
     private int advanceForExplainOnly(int target) throws IOException {
       int docId;
       do {
-        docId = nextDoc();
+        docId = nextDocOutOfOrder();
         if (docId < target) {
           int tempDocId = docsEnum.advance(target);
           if (tempDocId == target) {
@@ -261,6 +281,11 @@ class TermsIncludingScoreQuery extends Q
     public int freq() {
       return 1;
     }
+
+    @Override
+    public long cost() {
+      return cost;
+    }
   }
 
   // This impl that tracks whether a docid has already been emitted. This check makes sure that docs aren't emitted
@@ -270,13 +295,13 @@ class TermsIncludingScoreQuery extends Q
 
     final FixedBitSet alreadyEmittedDocs;
 
-    MVInnerScorer(Weight weight, Bits acceptDocs, TermsEnum termsEnum, int maxDoc) {
-      super(weight, acceptDocs, termsEnum);
+    MVInnerScorer(Weight weight, Bits acceptDocs, TermsEnum termsEnum, int maxDoc, long cost) {
+      super(weight, acceptDocs, termsEnum, cost);
       alreadyEmittedDocs = new FixedBitSet(maxDoc);
     }
 
     @Override
-    public int nextDoc() throws IOException {
+    int nextDocOutOfOrder() throws IOException {
       if (docsEnum != null) {
         int docId;
         do {
@@ -326,15 +351,17 @@ class TermsIncludingScoreQuery extends Q
 
     final DocIdSetIterator matchingDocsIterator;
     final float[] scores;
+    final long cost;
 
     int currentDoc = -1;
 
-    SVInOrderScorer(Weight weight, Bits acceptDocs, TermsEnum termsEnum, int maxDoc) throws IOException {
+    SVInOrderScorer(Weight weight, Bits acceptDocs, TermsEnum termsEnum, int maxDoc, long cost) throws IOException {
       super(weight);
       FixedBitSet matchingDocs = new FixedBitSet(maxDoc);
       this.scores = new float[maxDoc];
       fillDocsAndScores(matchingDocs, acceptDocs, termsEnum);
       this.matchingDocsIterator = matchingDocs.iterator();
+      this.cost = cost;
     }
 
     protected void fillDocsAndScores(FixedBitSet matchingDocs, Bits acceptDocs, TermsEnum termsEnum) throws IOException {
@@ -378,13 +405,18 @@ class TermsIncludingScoreQuery extends Q
     public int advance(int target) throws IOException {
       return currentDoc = matchingDocsIterator.advance(target);
     }
+
+    @Override
+    public long cost() {
+      return cost;
+    }
   }
 
   // This scorer deals with the fact that a document can have more than one score from multiple related documents.
   class MVInOrderScorer extends SVInOrderScorer {
 
-    MVInOrderScorer(Weight weight, Bits acceptDocs, TermsEnum termsEnum, int maxDoc) throws IOException {
-      super(weight, acceptDocs, termsEnum, maxDoc);
+    MVInOrderScorer(Weight weight, Bits acceptDocs, TermsEnum termsEnum, int maxDoc, long cost) throws IOException {
+      super(weight, acceptDocs, termsEnum, maxDoc, cost);
     }
 
     @Override

Modified: lucene/dev/branches/security/lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java (original)
+++ lucene/dev/branches/security/lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java Thu May 30 07:53:18 2013
@@ -301,6 +301,11 @@ public class ToChildBlockJoinQuery exten
       }
       return childDoc;
     }
+
+    @Override
+    public long cost() {
+      return parentScorer.cost();
+    }
   }
 
   @Override
@@ -334,7 +339,8 @@ public class ToChildBlockJoinQuery exten
       final ToChildBlockJoinQuery other = (ToChildBlockJoinQuery) _other;
       return origParentQuery.equals(other.origParentQuery) &&
         parentsFilter.equals(other.parentsFilter) &&
-        doScores == other.doScores;
+        doScores == other.doScores &&
+        super.equals(other);
     } else {
       return false;
     }
@@ -343,7 +349,7 @@ public class ToChildBlockJoinQuery exten
   @Override
   public int hashCode() {
     final int prime = 31;
-    int hash = 1;
+    int hash = super.hashCode();
     hash = prime * hash + origParentQuery.hashCode();
     hash = prime * hash + new Boolean(doScores).hashCode();
     hash = prime * hash + parentsFilter.hashCode();

Modified: lucene/dev/branches/security/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinCollector.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinCollector.java (original)
+++ lucene/dev/branches/security/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinCollector.java Thu May 30 07:53:18 2013
@@ -80,7 +80,7 @@ public class ToParentBlockJoinCollector 
 
   // Maps each BlockJoinQuery instance to its "slot" in
   // joinScorers and in OneGroup's cached doc/scores/count:
-  private final Map<Query,Integer> joinQueryID = new HashMap<Query,Integer>();
+  private final Map<Query,Integer> joinQueryID = new HashMap<>();
   private final int numParentHits;
   private final FieldValueHitQueue<OneGroup> queue;
   private final FieldComparator[] comparators;
@@ -111,6 +111,7 @@ public class ToParentBlockJoinCollector 
     if (trackMaxScore) {
       maxScore = Float.MIN_VALUE;
     }
+    //System.out.println("numParentHits=" + numParentHits);
     this.trackScores = trackScores;
     this.numParentHits = numParentHits;
     queue = FieldValueHitQueue.create(sort.getSort(), numParentHits);
@@ -122,6 +123,7 @@ public class ToParentBlockJoinCollector 
   private static final class OneGroup extends FieldValueHitQueue.Entry {
     public OneGroup(int comparatorSlot, int parentDoc, float parentScore, int numJoins, boolean doScores) {
       super(comparatorSlot, parentDoc, parentScore);
+      //System.out.println("make OneGroup parentDoc=" + parentDoc);
       docs = new int[numJoins][];
       for(int joinID=0;joinID<numJoins;joinID++) {
         docs[joinID] = new int[5];
@@ -138,11 +140,11 @@ public class ToParentBlockJoinCollector 
     int[][] docs;
     float[][] scores;
     int[] counts;
-  };
+  }
 
   @Override
   public void collect(int parentDoc) throws IOException {
-    //System.out.println("C parentDoc=" + parentDoc);
+    //System.out.println("\nC parentDoc=" + parentDoc);
     totalHitCount++;
 
     float score = Float.NaN;
@@ -203,8 +205,7 @@ public class ToParentBlockJoinCollector 
       for (int i = 0; i < comparators.length; i++) {
         comparators[i].copy(comparatorSlot, parentDoc);
       }
-      //System.out.println("  startup: new OG doc=" +
-      //(docBase+parentDoc));
+      //System.out.println("  startup: new OG doc=" + (docBase+parentDoc));
       if (!trackMaxScore && trackScores) {
         score = scorer.score();
       }
@@ -241,22 +242,28 @@ public class ToParentBlockJoinCollector 
       og.scores = ArrayUtil.grow(og.scores);
     }
 
-    //System.out.println("copyGroups parentDoc=" + og.doc);
+    //System.out.println("\ncopyGroups parentDoc=" + og.doc);
     for(int scorerIDX = 0;scorerIDX < numSubScorers;scorerIDX++) {
       final ToParentBlockJoinQuery.BlockJoinScorer joinScorer = joinScorers[scorerIDX];
       //System.out.println("  scorer=" + joinScorer);
-      if (joinScorer != null) {
+      if (joinScorer != null && docBase + joinScorer.getParentDoc() == og.doc) {
         og.counts[scorerIDX] = joinScorer.getChildCount();
         //System.out.println("    count=" + og.counts[scorerIDX]);
         og.docs[scorerIDX] = joinScorer.swapChildDocs(og.docs[scorerIDX]);
+        assert og.docs[scorerIDX].length >= og.counts[scorerIDX]: "length=" + og.docs[scorerIDX].length + " vs count=" + og.counts[scorerIDX];
+        //System.out.println("    len=" + og.docs[scorerIDX].length);
         /*
-        for(int idx=0;idx<og.counts[scorerIDX];idx++) {
+          for(int idx=0;idx<og.counts[scorerIDX];idx++) {
           System.out.println("    docs[" + idx + "]=" + og.docs[scorerIDX][idx]);
-        }
+          }
         */
         if (trackScores) {
+          //System.out.println("    copy scores");
           og.scores[scorerIDX] = joinScorer.swapChildScores(og.scores[scorerIDX]);
+          assert og.scores[scorerIDX].length >= og.counts[scorerIDX]: "length=" + og.scores[scorerIDX].length + " vs count=" + og.counts[scorerIDX];
         }
+      } else {
+        og.counts[scorerIDX] = 0;
       }
     }
   }
@@ -302,13 +309,16 @@ public class ToParentBlockJoinCollector 
     Arrays.fill(joinScorers, null);
 
     Queue<Scorer> queue = new LinkedList<Scorer>();
+    //System.out.println("\nqueue: add top scorer=" + scorer);
     queue.add(scorer);
     while ((scorer = queue.poll()) != null) {
+      //System.out.println("  poll: " + scorer + "; " + scorer.getWeight().getQuery());
       if (scorer instanceof ToParentBlockJoinQuery.BlockJoinScorer) {
         enroll((ToParentBlockJoinQuery) scorer.getWeight().getQuery(), (ToParentBlockJoinQuery.BlockJoinScorer) scorer);
       }
 
       for (ChildScorer sub : scorer.getChildren()) {
+        //System.out.println("  add sub: " + sub.child + "; " + sub.child.getWeight().getQuery());
         queue.add(sub.child);
       }
     }
@@ -347,6 +357,11 @@ public class ToParentBlockJoinCollector 
     public int nextDoc() {
       throw new UnsupportedOperationException();
     }
+
+    @Override
+    public long cost() {
+      return 1;
+    }
   }
 
   private OneGroup[] sortedGroups;
@@ -358,30 +373,31 @@ public class ToParentBlockJoinCollector 
     }
   }
 
-  /** Return the TopGroups for the specified
-   *  BlockJoinQuery.  The groupValue of each GroupDocs will
-   *  be the parent docID for that group.  Note that the
-   *  {@link GroupDocs#totalHits}, which would be the
-   *  total number of child documents matching that parent,
-   *  is not computed (will always be 0).  Returns null if
-   *  no groups matched. */
-  @SuppressWarnings("unchecked")
-  public TopGroups<Integer> getTopGroups(ToParentBlockJoinQuery query, Sort withinGroupSort, int offset, int maxDocsPerGroup, int withinGroupOffset, boolean fillSortFields) 
-
+  /** Returns the TopGroups for the specified
+   *  BlockJoinQuery. The groupValue of each GroupDocs will
+   *  be the parent docID for that group.
+   *  The number of documents within each group is calculated as minimum of <code>maxDocsPerGroup</code>
+   *  and number of matched child documents for that group.
+   *  Returns null if no groups matched.
+   *
+   * @param query Search query
+   * @param withinGroupSort Sort criteria within groups
+   * @param offset Parent docs offset
+   * @param maxDocsPerGroup Upper bound of documents per group number
+   * @param withinGroupOffset Offset within each group of child docs
+   * @param fillSortFields Specifies whether to add sort fields or not
+   * @return TopGroups for specified query
+   * @throws IOException if there is a low-level I/O error
+   */
+  public TopGroups<Integer> getTopGroups(ToParentBlockJoinQuery query, Sort withinGroupSort, int offset,
+                                         int maxDocsPerGroup, int withinGroupOffset, boolean fillSortFields)
     throws IOException {
 
     final Integer _slot = joinQueryID.get(query);
-    if (_slot == null) {
-      if (totalHitCount == 0) {
-        return null;
-      } else {
-        throw new IllegalArgumentException("the Query did not contain the provided BlockJoinQuery");
-      }
+    if (_slot == null && totalHitCount == 0) {
+      return null;
     }
 
-    // unbox once
-    final int slot = _slot;
-
     if (sortedGroups == null) {
       if (offset >= queue.size()) {
         return null;
@@ -391,34 +407,62 @@ public class ToParentBlockJoinCollector 
       return null;
     }
 
-    int totalGroupedHitCount = 0;
+    return accumulateGroups(_slot == null ? -1 : _slot.intValue(), offset, maxDocsPerGroup, withinGroupOffset, withinGroupSort, fillSortFields);
+  }
 
+  /**
+   *  Accumulates groups for the BlockJoinQuery specified by its slot.
+   *
+   * @param slot Search query's slot
+   * @param offset Parent docs offset
+   * @param maxDocsPerGroup Upper bound of documents per group number
+   * @param withinGroupOffset Offset within each group of child docs
+   * @param withinGroupSort Sort criteria within groups
+   * @param fillSortFields Specifies whether to add sort fields or not
+   * @return TopGroups for the query specified by slot
+   * @throws IOException if there is a low-level I/O error
+   */
+  @SuppressWarnings({"unchecked","rawtypes"})
+  private TopGroups<Integer> accumulateGroups(int slot, int offset, int maxDocsPerGroup,
+                                              int withinGroupOffset, Sort withinGroupSort, boolean fillSortFields) throws IOException {
+    final GroupDocs<Integer>[] groups = new GroupDocs[sortedGroups.length - offset];
     final FakeScorer fakeScorer = new FakeScorer();
 
-    @SuppressWarnings({"unchecked","rawtypes"})
-    final GroupDocs<Integer>[] groups = new GroupDocs[sortedGroups.length - offset];
+    int totalGroupedHitCount = 0;
+    //System.out.println("slot=" + slot);
 
     for(int groupIDX=offset;groupIDX<sortedGroups.length;groupIDX++) {
       final OneGroup og = sortedGroups[groupIDX];
+      final int numChildDocs;
+      if (slot == -1 || slot >= og.counts.length) {
+        numChildDocs = 0;
+      } else {
+        numChildDocs = og.counts[slot];
+      }
+
+      // Number of documents in group should be bounded to prevent redundant memory allocation
+      final int numDocsInGroup = Math.max(1, Math.min(numChildDocs, maxDocsPerGroup));
+      //System.out.println("parent doc=" + og.doc + " numChildDocs=" + numChildDocs + " maxDocsPG=" + maxDocsPerGroup);
 
       // At this point we hold all docs w/ in each group,
       // unsorted; we now sort them:
       final TopDocsCollector<?> collector;
       if (withinGroupSort == null) {
+        //System.out.println("sort by score");
         // Sort by score
         if (!trackScores) {
           throw new IllegalArgumentException("cannot sort by relevance within group: trackScores=false");
         }
-        collector = TopScoreDocCollector.create(maxDocsPerGroup, true);
+        collector = TopScoreDocCollector.create(numDocsInGroup, true);
       } else {
         // Sort by fields
-        collector = TopFieldCollector.create(withinGroupSort, maxDocsPerGroup, fillSortFields, trackScores, trackMaxScore, true);
+        collector = TopFieldCollector.create(withinGroupSort, numDocsInGroup, fillSortFields, trackScores, trackMaxScore, true);
       }
 
       collector.setScorer(fakeScorer);
       collector.setNextReader(og.readerContext);
-      final int numChildDocs = og.counts[slot];
       for(int docIDX=0;docIDX<numChildDocs;docIDX++) {
+        //System.out.println("docIDX=" + docIDX + " vs " + og.docs[slot].length);
         final int doc = og.docs[slot][docIDX];
         fakeScorer.doc = doc;
         if (trackScores) {
@@ -439,21 +483,42 @@ public class ToParentBlockJoinCollector 
         groupSortValues = null;
       }
 
-      final TopDocs topDocs = collector.topDocs(withinGroupOffset, maxDocsPerGroup);
+      final TopDocs topDocs = collector.topDocs(withinGroupOffset, numDocsInGroup);
 
-      groups[groupIDX-offset] = new GroupDocs<Integer>(og.score,
+      groups[groupIDX-offset] = new GroupDocs<>(og.score,
                                                        topDocs.getMaxScore(),
-                                                       og.counts[slot],
+                                                       numChildDocs,
                                                        topDocs.scoreDocs,
                                                        og.doc,
                                                        groupSortValues);
     }
 
-    return new TopGroups<Integer>(new TopGroups<Integer>(sort.getSort(),
-                                                         withinGroupSort == null ? null : withinGroupSort.getSort(),
-                                                         0, totalGroupedHitCount, groups, maxScore),
+    return new TopGroups<>(new TopGroups<>(sort.getSort(),
+                                                       withinGroupSort == null ? null : withinGroupSort.getSort(),
+                                                       0, totalGroupedHitCount, groups, maxScore),
                                   totalHitCount);
   }
+
+  /** Returns the TopGroups for the specified BlockJoinQuery.
+   *  The groupValue of each GroupDocs will be the parent docID for that group.
+   *  The number of documents within each group
+   *  equals to the total number of matched child documents for that group.
+   *  Returns null if no groups matched.
+   *
+   * @param query Search query
+   * @param withinGroupSort Sort criteria within groups
+   * @param offset Parent docs offset
+   * @param withinGroupOffset Offset within each group of child docs
+   * @param fillSortFields Specifies whether to add sort fields or not
+   * @return TopGroups for specified query
+   * @throws IOException if there is a low-level I/O error
+   */
+  public TopGroups<Integer> getTopGroupsWithAllChildDocs(ToParentBlockJoinQuery query, Sort withinGroupSort, int offset,
+                                                         int withinGroupOffset, boolean fillSortFields)
+    throws IOException {
+
+    return getTopGroups(query, withinGroupSort, offset, Integer.MAX_VALUE, withinGroupOffset, fillSortFields);
+  }
   
   /**
    * Returns the highest score across all collected parent hits, as long as

Modified: lucene/dev/branches/security/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java (original)
+++ lucene/dev/branches/security/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java Thu May 30 07:53:18 2013
@@ -181,8 +181,7 @@ public class ToParentBlockJoinQuery exte
       // acceptDocs when we score:
       final DocIdSet parents = parentsFilter.getDocIdSet(readerContext, null);
 
-      if (parents == null
-          || parents.iterator().docID() == DocIdSetIterator.NO_MORE_DOCS) { // <-- means DocIdSet#EMPTY_DOCIDSET
+      if (parents == null) {
         // No matches
         return null;
       }
@@ -196,10 +195,8 @@ public class ToParentBlockJoinQuery exte
     @Override
     public Explanation explain(AtomicReaderContext context, int doc) throws IOException {
       BlockJoinScorer scorer = (BlockJoinScorer) scorer(context, true, false, context.reader().getLiveDocs());
-      if (scorer != null) {
-        if (scorer.advance(doc) == doc) {
-          return scorer.explain(context.docBase);
-        }
+      if (scorer != null && scorer.advance(doc) == doc) {
+        return scorer.explain(context.docBase);
       }
       return new ComplexExplanation(false, 0.0f, "Not a match");
     }
@@ -247,6 +244,10 @@ public class ToParentBlockJoinQuery exte
       return childDocUpto;
     }
 
+    int getParentDoc() {
+      return parentDoc;
+    }
+
     int[] swapChildDocs(int[] other) {
       final int[] ret = pendingChildDocs;
       if (other == null) {
@@ -273,7 +274,6 @@ public class ToParentBlockJoinQuery exte
     @Override
     public int nextDoc() throws IOException {
       //System.out.println("Q.nextDoc() nextChildDoc=" + nextChildDoc);
-
       // Loop until we hit a parentDoc that's accepted
       while (true) {
         if (nextChildDoc == NO_MORE_DOCS) {
@@ -286,6 +286,12 @@ public class ToParentBlockJoinQuery exte
 
         parentDoc = parentBits.nextSetBit(nextChildDoc);
 
+        // Parent & child docs are supposed to be
+        // orthogonal:
+        if (nextChildDoc == parentDoc) {
+          throw new IllegalStateException("child query must only match non-parent docs, but parent docID=" + nextChildDoc + " matched childScorer=" + childScorer.getClass());
+        }
+
         //System.out.println("  parentDoc=" + parentDoc);
         assert parentDoc != -1;
 
@@ -296,6 +302,13 @@ public class ToParentBlockJoinQuery exte
           do {
             nextChildDoc = childScorer.nextDoc();
           } while (nextChildDoc < parentDoc);
+
+          // Parent & child docs are supposed to be
+          // orthogonal:
+          if (nextChildDoc == parentDoc) {
+            throw new IllegalStateException("child query must only match non-parent docs, but parent docID=" + nextChildDoc + " matched childScorer=" + childScorer.getClass());
+          }
+
           continue;
         }
 
@@ -327,8 +340,11 @@ public class ToParentBlockJoinQuery exte
           nextChildDoc = childScorer.nextDoc();
         } while (nextChildDoc < parentDoc);
 
-        // Parent & child docs are supposed to be orthogonal:
-        assert nextChildDoc != parentDoc;
+        // Parent & child docs are supposed to be
+        // orthogonal:
+        if (nextChildDoc == parentDoc) {
+          throw new IllegalStateException("child query must only match non-parent docs, but parent docID=" + nextChildDoc + " matched childScorer=" + childScorer.getClass());
+        }
 
         switch(scoreMode) {
         case Avg:
@@ -344,7 +360,7 @@ public class ToParentBlockJoinQuery exte
           break;
         }
 
-        //System.out.println("  return parentDoc=" + parentDoc);
+        //System.out.println("  return parentDoc=" + parentDoc + " childDocUpto=" + childDocUpto);
         return parentDoc;
       }
     }
@@ -394,7 +410,9 @@ public class ToParentBlockJoinQuery exte
       }
 
       // Parent & child docs are supposed to be orthogonal:
-      assert nextChildDoc != prevParentDoc;
+      if (nextChildDoc == prevParentDoc) {
+        throw new IllegalStateException("child query must only match non-parent docs, but parent docID=" + nextChildDoc + " matched childScorer=" + childScorer.getClass());
+      }
 
       final int nd = nextDoc();
       //System.out.println("  return nextParentDoc=" + nd);
@@ -409,6 +427,10 @@ public class ToParentBlockJoinQuery exte
       );
     }
 
+    @Override
+    public long cost() {
+      return childScorer.cost();
+    }
   }
 
   @Override
@@ -442,7 +464,8 @@ public class ToParentBlockJoinQuery exte
       final ToParentBlockJoinQuery other = (ToParentBlockJoinQuery) _other;
       return origChildQuery.equals(other.origChildQuery) &&
         parentsFilter.equals(other.parentsFilter) &&
-        scoreMode == other.scoreMode;
+        scoreMode == other.scoreMode && 
+        super.equals(other);
     } else {
       return false;
     }
@@ -451,17 +474,10 @@ public class ToParentBlockJoinQuery exte
   @Override
   public int hashCode() {
     final int prime = 31;
-    int hash = 1;
+    int hash = super.hashCode();
     hash = prime * hash + origChildQuery.hashCode();
     hash = prime * hash + scoreMode.hashCode();
     hash = prime * hash + parentsFilter.hashCode();
     return hash;
   }
-
-  @Override
-  public ToParentBlockJoinQuery clone() {
-    return new ToParentBlockJoinQuery(origChildQuery.clone(),
-                              parentsFilter,
-                              scoreMode);
-  }
 }