You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2013/04/30 13:35:36 UTC

svn commit: r1477570 - in /lucene/dev/trunk: ./ lucene/ lucene/join/ lucene/join/src/java/org/apache/lucene/search/join/ lucene/join/src/test/org/apache/lucene/search/join/

Author: mikemccand
Date: Tue Apr 30 11:35:36 2013
New Revision: 1477570

URL: http://svn.apache.org/r1477570
Log:
LUCENE-4968: fix several block join issues

Modified:
    lucene/dev/trunk/   (props changed)
    lucene/dev/trunk/lucene/   (props changed)
    lucene/dev/trunk/lucene/CHANGES.txt   (contents, props changed)
    lucene/dev/trunk/lucene/join/   (props changed)
    lucene/dev/trunk/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinCollector.java
    lucene/dev/trunk/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
    lucene/dev/trunk/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1477570&r1=1477569&r2=1477570&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Tue Apr 30 11:35:36 2013
@@ -68,6 +68,11 @@ Bug Fixes
 * LUCENE-4959: Fix incorrect return value in
   SimpleNaiveBayesClassifier.assignClass. (Alexey Kutin via Adrien Grand)
 
+* LUCENE-4968: Fixed ToParentBlockJoinQuery/Collector: correctly handle parent
+  hits that had no child matches, don't throw IllegalArgumentEx when
+  the child query has no hits, more aggressively catch cases where childQuery
+  incorrectly matches parent documents (Mike McCandless)
+
 Optimizations
 
 * LUCENE-4938: Don't use an unnecessarily large priority queue in IndexSearcher

Modified: lucene/dev/trunk/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinCollector.java?rev=1477570&r1=1477569&r2=1477570&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinCollector.java (original)
+++ lucene/dev/trunk/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinCollector.java Tue Apr 30 11:35:36 2013
@@ -111,6 +111,7 @@ public class ToParentBlockJoinCollector 
     if (trackMaxScore) {
       maxScore = Float.MIN_VALUE;
     }
+    //System.out.println("numParentHits=" + numParentHits);
     this.trackScores = trackScores;
     this.numParentHits = numParentHits;
     queue = FieldValueHitQueue.create(sort.getSort(), numParentHits);
@@ -122,6 +123,7 @@ public class ToParentBlockJoinCollector 
   private static final class OneGroup extends FieldValueHitQueue.Entry {
     public OneGroup(int comparatorSlot, int parentDoc, float parentScore, int numJoins, boolean doScores) {
       super(comparatorSlot, parentDoc, parentScore);
+      //System.out.println("make OneGroup parentDoc=" + parentDoc);
       docs = new int[numJoins][];
       for(int joinID=0;joinID<numJoins;joinID++) {
         docs[joinID] = new int[5];
@@ -142,7 +144,7 @@ public class ToParentBlockJoinCollector 
 
   @Override
   public void collect(int parentDoc) throws IOException {
-    //System.out.println("C parentDoc=" + parentDoc);
+    //System.out.println("\nC parentDoc=" + parentDoc);
     totalHitCount++;
 
     float score = Float.NaN;
@@ -203,8 +205,7 @@ public class ToParentBlockJoinCollector 
       for (int i = 0; i < comparators.length; i++) {
         comparators[i].copy(comparatorSlot, parentDoc);
       }
-      //System.out.println("  startup: new OG doc=" +
-      //(docBase+parentDoc));
+      //System.out.println("  startup: new OG doc=" + (docBase+parentDoc));
       if (!trackMaxScore && trackScores) {
         score = scorer.score();
       }
@@ -241,22 +242,28 @@ public class ToParentBlockJoinCollector 
       og.scores = ArrayUtil.grow(og.scores);
     }
 
-    //System.out.println("copyGroups parentDoc=" + og.doc);
+    //System.out.println("\ncopyGroups parentDoc=" + og.doc);
     for(int scorerIDX = 0;scorerIDX < numSubScorers;scorerIDX++) {
       final ToParentBlockJoinQuery.BlockJoinScorer joinScorer = joinScorers[scorerIDX];
       //System.out.println("  scorer=" + joinScorer);
-      if (joinScorer != null) {
+      if (joinScorer != null && docBase + joinScorer.getParentDoc() == og.doc) {
         og.counts[scorerIDX] = joinScorer.getChildCount();
         //System.out.println("    count=" + og.counts[scorerIDX]);
         og.docs[scorerIDX] = joinScorer.swapChildDocs(og.docs[scorerIDX]);
+        assert og.docs[scorerIDX].length >= og.counts[scorerIDX]: "length=" + og.docs[scorerIDX].length + " vs count=" + og.counts[scorerIDX];
+        //System.out.println("    len=" + og.docs[scorerIDX].length);
         /*
-        for(int idx=0;idx<og.counts[scorerIDX];idx++) {
+          for(int idx=0;idx<og.counts[scorerIDX];idx++) {
           System.out.println("    docs[" + idx + "]=" + og.docs[scorerIDX][idx]);
-        }
+          }
         */
         if (trackScores) {
+          //System.out.println("    copy scores");
           og.scores[scorerIDX] = joinScorer.swapChildScores(og.scores[scorerIDX]);
+          assert og.scores[scorerIDX].length >= og.counts[scorerIDX]: "length=" + og.scores[scorerIDX].length + " vs count=" + og.counts[scorerIDX];
         }
+      } else {
+        og.counts[scorerIDX] = 0;
       }
     }
   }
@@ -302,13 +309,16 @@ public class ToParentBlockJoinCollector 
     Arrays.fill(joinScorers, null);
 
     Queue<Scorer> queue = new LinkedList<Scorer>();
+    //System.out.println("\nqueue: add top scorer=" + scorer);
     queue.add(scorer);
     while ((scorer = queue.poll()) != null) {
+      //System.out.println("  poll: " + scorer + "; " + scorer.getWeight().getQuery());
       if (scorer instanceof ToParentBlockJoinQuery.BlockJoinScorer) {
         enroll((ToParentBlockJoinQuery) scorer.getWeight().getQuery(), (ToParentBlockJoinQuery.BlockJoinScorer) scorer);
       }
 
       for (ChildScorer sub : scorer.getChildren()) {
+        //System.out.println("  add sub: " + sub.child + "; " + sub.child.getWeight().getQuery());
         queue.add(sub.child);
       }
     }
@@ -384,12 +394,8 @@ public class ToParentBlockJoinCollector 
     throws IOException {
 
     final Integer _slot = joinQueryID.get(query);
-    if (_slot == null) {
-      if (totalHitCount == 0) {
-        return null;
-      } else {
-        throw new IllegalArgumentException("the Query did not contain the provided BlockJoinQuery");
-      }
+    if (_slot == null && totalHitCount == 0) {
+      return null;
     }
 
     if (sortedGroups == null) {
@@ -401,7 +407,7 @@ public class ToParentBlockJoinCollector 
       return null;
     }
 
-    return accumulateGroups(_slot, offset, maxDocsPerGroup, withinGroupOffset, withinGroupSort, fillSortFields);
+    return accumulateGroups(_slot == null ? -1 : _slot.intValue(), offset, maxDocsPerGroup, withinGroupOffset, withinGroupSort, fillSortFields);
   }
 
   /**
@@ -423,18 +429,26 @@ public class ToParentBlockJoinCollector 
     final FakeScorer fakeScorer = new FakeScorer();
 
     int totalGroupedHitCount = 0;
+    //System.out.println("slot=" + slot);
 
     for(int groupIDX=offset;groupIDX<sortedGroups.length;groupIDX++) {
       final OneGroup og = sortedGroups[groupIDX];
-      final int numChildDocs = og.counts[slot];
+      final int numChildDocs;
+      if (slot == -1 || slot >= og.counts.length) {
+        numChildDocs = 0;
+      } else {
+        numChildDocs = og.counts[slot];
+      }
 
       // Number of documents in group should be bounded to prevent redundant memory allocation
-      final int numDocsInGroup = Math.min(numChildDocs, maxDocsPerGroup);
+      final int numDocsInGroup = Math.max(1, Math.min(numChildDocs, maxDocsPerGroup));
+      //System.out.println("parent doc=" + og.doc + " numChildDocs=" + numChildDocs + " maxDocsPG=" + maxDocsPerGroup);
 
       // At this point we hold all docs w/ in each group,
       // unsorted; we now sort them:
       final TopDocsCollector<?> collector;
       if (withinGroupSort == null) {
+        //System.out.println("sort by score");
         // Sort by score
         if (!trackScores) {
           throw new IllegalArgumentException("cannot sort by relevance within group: trackScores=false");
@@ -448,6 +462,7 @@ public class ToParentBlockJoinCollector 
       collector.setScorer(fakeScorer);
       collector.setNextReader(og.readerContext);
       for(int docIDX=0;docIDX<numChildDocs;docIDX++) {
+        //System.out.println("docIDX=" + docIDX + " vs " + og.docs[slot].length);
         final int doc = og.docs[slot][docIDX];
         fakeScorer.doc = doc;
         if (trackScores) {

Modified: lucene/dev/trunk/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java?rev=1477570&r1=1477569&r2=1477570&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java (original)
+++ lucene/dev/trunk/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java Tue Apr 30 11:35:36 2013
@@ -195,10 +195,8 @@ public class ToParentBlockJoinQuery exte
     @Override
     public Explanation explain(AtomicReaderContext context, int doc) throws IOException {
       BlockJoinScorer scorer = (BlockJoinScorer) scorer(context, true, false, context.reader().getLiveDocs());
-      if (scorer != null) {
-        if (scorer.advance(doc) == doc) {
-          return scorer.explain(context.docBase);
-        }
+      if (scorer != null && scorer.advance(doc) == doc) {
+        return scorer.explain(context.docBase);
       }
       return new ComplexExplanation(false, 0.0f, "Not a match");
     }
@@ -246,6 +244,10 @@ public class ToParentBlockJoinQuery exte
       return childDocUpto;
     }
 
+    int getParentDoc() {
+      return parentDoc;
+    }
+
     int[] swapChildDocs(int[] other) {
       final int[] ret = pendingChildDocs;
       if (other == null) {
@@ -272,7 +274,6 @@ public class ToParentBlockJoinQuery exte
     @Override
     public int nextDoc() throws IOException {
       //System.out.println("Q.nextDoc() nextChildDoc=" + nextChildDoc);
-
       // Loop until we hit a parentDoc that's accepted
       while (true) {
         if (nextChildDoc == NO_MORE_DOCS) {
@@ -285,6 +286,12 @@ public class ToParentBlockJoinQuery exte
 
         parentDoc = parentBits.nextSetBit(nextChildDoc);
 
+        // Parent & child docs are supposed to be
+        // orthogonal:
+        if (nextChildDoc == parentDoc) {
+          throw new IllegalStateException("child query must only match non-parent docs, but parent docID=" + nextChildDoc + " matched childScorer=" + childScorer.getClass());
+        }
+
         //System.out.println("  parentDoc=" + parentDoc);
         assert parentDoc != -1;
 
@@ -295,6 +302,13 @@ public class ToParentBlockJoinQuery exte
           do {
             nextChildDoc = childScorer.nextDoc();
           } while (nextChildDoc < parentDoc);
+
+          // Parent & child docs are supposed to be
+          // orthogonal:
+          if (nextChildDoc == parentDoc) {
+            throw new IllegalStateException("child query must only match non-parent docs, but parent docID=" + nextChildDoc + " matched childScorer=" + childScorer.getClass());
+          }
+
           continue;
         }
 
@@ -326,8 +340,11 @@ public class ToParentBlockJoinQuery exte
           nextChildDoc = childScorer.nextDoc();
         } while (nextChildDoc < parentDoc);
 
-        // Parent & child docs are supposed to be orthogonal:
-        assert nextChildDoc != parentDoc;
+        // Parent & child docs are supposed to be
+        // orthogonal:
+        if (nextChildDoc == parentDoc) {
+          throw new IllegalStateException("child query must only match non-parent docs, but parent docID=" + nextChildDoc + " matched childScorer=" + childScorer.getClass());
+        }
 
         switch(scoreMode) {
         case Avg:
@@ -343,7 +360,7 @@ public class ToParentBlockJoinQuery exte
           break;
         }
 
-        //System.out.println("  return parentDoc=" + parentDoc);
+        //System.out.println("  return parentDoc=" + parentDoc + " childDocUpto=" + childDocUpto);
         return parentDoc;
       }
     }
@@ -393,7 +410,9 @@ public class ToParentBlockJoinQuery exte
       }
 
       // Parent & child docs are supposed to be orthogonal:
-      assert nextChildDoc != prevParentDoc;
+      if (nextChildDoc == prevParentDoc) {
+        throw new IllegalStateException("child query must only match non-parent docs, but parent docID=" + nextChildDoc + " matched childScorer=" + childScorer.getClass());
+      }
 
       final int nd = nextDoc();
       //System.out.println("  return nextParentDoc=" + nd);

Modified: lucene/dev/trunk/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java?rev=1477570&r1=1477569&r2=1477570&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java (original)
+++ lucene/dev/trunk/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java Tue Apr 30 11:35:36 2013
@@ -1206,4 +1206,191 @@ public class TestBlockJoin extends Lucen
     r.close();
     dir.close();
   }
+
+  // LUCENE-4968
+  public void testSometimesParentOnlyMatches() throws Exception {
+    Directory d = newDirectory();
+    RandomIndexWriter w = new RandomIndexWriter(random(), d);
+    Document parent = new Document();
+    parent.add(new StoredField("parentID", "0"));
+    parent.add(newTextField("parentText", "text", Field.Store.NO));
+    parent.add(newStringField("isParent", "yes", Field.Store.NO));
+
+    List<Document> docs = new ArrayList<Document>();
+
+    Document child = new Document();
+    docs.add(child);
+    child.add(new StoredField("childID", "0"));
+    child.add(newTextField("childText", "text", Field.Store.NO));
+
+    // parent last:
+    docs.add(parent);
+    w.addDocuments(docs);
+
+    docs.clear();
+
+    parent = new Document();
+    parent.add(newTextField("parentText", "text", Field.Store.NO));
+    parent.add(newStringField("isParent", "yes", Field.Store.NO));
+    parent.add(new StoredField("parentID", "1"));
+
+    // parent last:
+    docs.add(parent);
+    w.addDocuments(docs);
+    
+    IndexReader r = w.getReader();
+    w.close();
+
+    Query childQuery = new TermQuery(new Term("childText", "text"));
+    Filter parentsFilter = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isParent", "yes"))));
+    ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);
+    BooleanQuery parentQuery = new BooleanQuery();
+    parentQuery.add(childJoinQuery, Occur.SHOULD);
+    parentQuery.add(new TermQuery(new Term("parentText", "text")), Occur.SHOULD);
+
+    ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(new Sort(new SortField("parentID", SortField.Type.STRING)),
+                                                                  10, true, true);
+    newSearcher(r).search(parentQuery, c);
+    TopGroups<Integer> groups = c.getTopGroups(childJoinQuery, null, 0, 10, 0, false);
+
+    // Two parents:
+    assertEquals(2, groups.totalGroupCount.intValue());
+
+    // One child docs:
+    assertEquals(1, groups.totalGroupedHitCount);
+
+    GroupDocs<Integer> group = groups.groups[0];
+    StoredDocument doc = r.document(group.groupValue.intValue());
+    assertEquals("0", doc.get("parentID"));
+    System.out.println("group: " + group);
+
+    group = groups.groups[1];
+    doc = r.document(group.groupValue.intValue());
+    assertEquals("1", doc.get("parentID"));
+
+    r.close();
+    d.close();
+  }
+
+  // LUCENE-4968
+  public void testChildQueryNeverMatches() throws Exception {
+    Directory d = newDirectory();
+    RandomIndexWriter w = new RandomIndexWriter(random(), d);
+    Document parent = new Document();
+    parent.add(new StoredField("parentID", "0"));
+    parent.add(newTextField("parentText", "text", Field.Store.NO));
+    parent.add(newStringField("isParent", "yes", Field.Store.NO));
+
+    List<Document> docs = new ArrayList<Document>();
+
+    Document child = new Document();
+    docs.add(child);
+    child.add(new StoredField("childID", "0"));
+    child.add(newTextField("childText", "text", Field.Store.NO));
+
+    // parent last:
+    docs.add(parent);
+    w.addDocuments(docs);
+
+    docs.clear();
+
+    parent = new Document();
+    parent.add(newTextField("parentText", "text", Field.Store.NO));
+    parent.add(newStringField("isParent", "yes", Field.Store.NO));
+    parent.add(new StoredField("parentID", "1"));
+
+    // parent last:
+    docs.add(parent);
+    w.addDocuments(docs);
+    
+    IndexReader r = w.getReader();
+    w.close();
+
+    // never matches:
+    Query childQuery = new TermQuery(new Term("childText", "bogus"));
+    Filter parentsFilter = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isParent", "yes"))));
+    ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);
+    BooleanQuery parentQuery = new BooleanQuery();
+    parentQuery.add(childJoinQuery, Occur.SHOULD);
+    parentQuery.add(new TermQuery(new Term("parentText", "text")), Occur.SHOULD);
+
+    ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(new Sort(new SortField("parentID", SortField.Type.STRING)),
+                                                                  10, true, true);
+    newSearcher(r).search(parentQuery, c);
+    TopGroups<Integer> groups = c.getTopGroups(childJoinQuery, null, 0, 10, 0, false);
+
+    // Two parents:
+    assertEquals(2, groups.totalGroupCount.intValue());
+
+    // One child docs:
+    assertEquals(0, groups.totalGroupedHitCount);
+
+    GroupDocs<Integer> group = groups.groups[0];
+    StoredDocument doc = r.document(group.groupValue.intValue());
+    assertEquals("0", doc.get("parentID"));
+    System.out.println("group: " + group);
+
+    group = groups.groups[1];
+    doc = r.document(group.groupValue.intValue());
+    assertEquals("1", doc.get("parentID"));
+
+    r.close();
+    d.close();
+  }
+
+  // LUCENE-4968
+  public void testChildQueryMatchesParent() throws Exception {
+    Directory d = newDirectory();
+    RandomIndexWriter w = new RandomIndexWriter(random(), d);
+    Document parent = new Document();
+    parent.add(new StoredField("parentID", "0"));
+    parent.add(newTextField("parentText", "text", Field.Store.NO));
+    parent.add(newStringField("isParent", "yes", Field.Store.NO));
+
+    List<Document> docs = new ArrayList<Document>();
+
+    Document child = new Document();
+    docs.add(child);
+    child.add(new StoredField("childID", "0"));
+    child.add(newTextField("childText", "text", Field.Store.NO));
+
+    // parent last:
+    docs.add(parent);
+    w.addDocuments(docs);
+
+    docs.clear();
+
+    parent = new Document();
+    parent.add(newTextField("parentText", "text", Field.Store.NO));
+    parent.add(newStringField("isParent", "yes", Field.Store.NO));
+    parent.add(new StoredField("parentID", "1"));
+
+    // parent last:
+    docs.add(parent);
+    w.addDocuments(docs);
+    
+    IndexReader r = w.getReader();
+    w.close();
+
+    // illegally matches parent:
+    Query childQuery = new TermQuery(new Term("parentText", "text"));
+    Filter parentsFilter = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isParent", "yes"))));
+    ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);
+    BooleanQuery parentQuery = new BooleanQuery();
+    parentQuery.add(childJoinQuery, Occur.SHOULD);
+    parentQuery.add(new TermQuery(new Term("parentText", "text")), Occur.SHOULD);
+
+    ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(new Sort(new SortField("parentID", SortField.Type.STRING)),
+                                                                  10, true, true);
+
+    try {
+      newSearcher(r).search(parentQuery, c);
+      fail("should have hit exception");
+    } catch (IllegalStateException ise) {
+      // expected
+    }
+
+    r.close();
+    d.close();
+  }
 }