You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2012/02/10 22:28:52 UTC

svn commit: r1242934 - in /lucene/dev/trunk: lucene/ modules/join/src/java/org/apache/lucene/search/join/ modules/join/src/test/org/apache/lucene/search/join/

Author: mikemccand
Date: Fri Feb 10 21:28:52 2012
New Revision: 1242934

URL: http://svn.apache.org/viewvc?rev=1242934&view=rev
Log:
SOLR-3076: fix BJQ to handle incoming liveDocs/filter correctly

Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/modules/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java
    lucene/dev/trunk/modules/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
    lucene/dev/trunk/modules/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1242934&r1=1242933&r2=1242934&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Fri Feb 10 21:28:52 2012
@@ -698,7 +698,10 @@ Bug fixes
 
 * LUCENE-3589: BytesRef copy(short) didnt set length.
   (Peter Chang via Robert Muir)
-  
+
+* SOLR-3076: ToParent/ChildBlockJoinQuery was not handling an incoming
+  filter nor deleted docs correctly (Mikhail Khludnev via Mike
+  McCandless).
   
 ======================= Lucene 3.6.0 =======================
 

Modified: lucene/dev/trunk/modules/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java?rev=1242934&r1=1242933&r2=1242934&view=diff
==============================================================================
--- lucene/dev/trunk/modules/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java (original)
+++ lucene/dev/trunk/modules/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java Fri Feb 10 21:28:52 2012
@@ -109,20 +109,27 @@ public class ToChildBlockJoinQuery exten
       parentWeight.normalize(norm, topLevelBoost * joinQuery.getBoost());
     }
 
+    // NOTE: acceptDocs applies (and is checked) only in the
+    // child document space
     @Override
     public Scorer scorer(AtomicReaderContext readerContext, boolean scoreDocsInOrder,
         boolean topScorer, Bits acceptDocs) throws IOException {
+
       // Pass scoreDocsInOrder true, topScorer false to our sub:
-      final Scorer parentScorer = parentWeight.scorer(readerContext, true, false, acceptDocs);
+      final Scorer parentScorer = parentWeight.scorer(readerContext, true, false, null);
 
       if (parentScorer == null) {
         // No matches
         return null;
       }
 
-      final DocIdSet parents = parentsFilter.getDocIdSet(readerContext, readerContext.reader().getLiveDocs());
-      // TODO: once we do random-access filters we can
-      // generalize this:
+      // NOTE: we cannot pass acceptDocs here because this
+      // will (most likely, justifiably) cause the filter to
+      // not return a FixedBitSet but rather a
+      // BitsFilteredDocIdSet.  Instead, we filter by
+      // acceptDocs when we score:
+      final DocIdSet parents = parentsFilter.getDocIdSet(readerContext, null);
+
       if (parents == null) {
         // No matches
         return null;
@@ -131,7 +138,7 @@ public class ToChildBlockJoinQuery exten
         throw new IllegalStateException("parentFilter must return FixedBitSet; got " + parents);
       }
 
-      return new ToChildBlockJoinScorer(this, parentScorer, (FixedBitSet) parents, doScores);
+      return new ToChildBlockJoinScorer(this, parentScorer, (FixedBitSet) parents, doScores, acceptDocs);
     }
 
     @Override
@@ -151,16 +158,19 @@ public class ToChildBlockJoinQuery exten
     private final Scorer parentScorer;
     private final FixedBitSet parentBits;
     private final boolean doScores;
+    private final Bits acceptDocs;
+
     private float parentScore;
 
     private int childDoc = -1;
     private int parentDoc;
 
-    public ToChildBlockJoinScorer(Weight weight, Scorer parentScorer, FixedBitSet parentBits, boolean doScores) {
+    public ToChildBlockJoinScorer(Weight weight, Scorer parentScorer, FixedBitSet parentBits, boolean doScores, Bits acceptDocs) {
       super(weight);
       this.doScores = doScores;
       this.parentBits = parentBits;
       this.parentScorer = parentScorer;
+      this.acceptDocs = acceptDocs;
     }
 
     @Override
@@ -172,45 +182,56 @@ public class ToChildBlockJoinQuery exten
     public int nextDoc() throws IOException {
       //System.out.println("Q.nextDoc() parentDoc=" + parentDoc + " childDoc=" + childDoc);
 
-      if (childDoc+1 == parentDoc) {
-        // OK, we are done iterating through all children
-        // matching this one parent doc, so we now nextDoc()
-        // the parent.  Use a while loop because we may have
-        // to skip over some number of parents w/ no
-        // children:
-        while (true) {
-          parentDoc = parentScorer.nextDoc();
-          if (parentDoc == 0) {
-            // Degenerate but allowed: parent has no children
-            // TODO: would be nice to pull initial parent
-            // into ctor so we can skip this if... but it's
-            // tricky because scorer must return -1 for
-            // .doc() on init...
+      // Loop until we hit a childDoc that's accepted
+      while (true) {
+        if (childDoc+1 == parentDoc) {
+          // OK, we are done iterating through all children
+          // matching this one parent doc, so we now nextDoc()
+          // the parent.  Use a while loop because we may have
+          // to skip over some number of parents w/ no
+          // children:
+          while (true) {
             parentDoc = parentScorer.nextDoc();
-          }
+            if (parentDoc == 0) {
+              // Degenerate but allowed: parent has no children
+              // TODO: would be nice to pull initial parent
+              // into ctor so we can skip this if... but it's
+              // tricky because scorer must return -1 for
+              // .doc() on init...
+              parentDoc = parentScorer.nextDoc();
+            }
 
-          if (parentDoc == NO_MORE_DOCS) {
-            childDoc = NO_MORE_DOCS;
-            //System.out.println("  END");
-            return childDoc;
-          }
+            if (parentDoc == NO_MORE_DOCS) {
+              childDoc = NO_MORE_DOCS;
+              //System.out.println("  END");
+              return childDoc;
+            }
+
+            childDoc = 1 + parentBits.prevSetBit(parentDoc-1);
 
-          childDoc = 1 + parentBits.prevSetBit(parentDoc-1);
-          if (childDoc < parentDoc) {
-            if (doScores) {
-              parentScore = parentScorer.score();
+            if (acceptDocs != null && !acceptDocs.get(childDoc)) {
+              continue;
             }
-            //System.out.println("  " + childDoc);
-            return childDoc;
-          } else {
-            // Degenerate but allowed: parent has no children
+
+            if (childDoc < parentDoc) {
+              if (doScores) {
+                parentScore = parentScorer.score();
+              }
+              //System.out.println("  " + childDoc);
+              return childDoc;
+            } else {
+              // Degenerate but allowed: parent has no children
+            }
+          }
+        } else {
+          assert childDoc < parentDoc: "childDoc=" + childDoc + " parentDoc=" + parentDoc;
+          childDoc++;
+          if (acceptDocs != null && !acceptDocs.get(childDoc)) {
+            continue;
           }
+          //System.out.println("  " + childDoc);
+          return childDoc;
         }
-      } else {
-        assert childDoc < parentDoc: "childDoc=" + childDoc + " parentDoc=" + parentDoc;
-        childDoc++;
-        //System.out.println("  " + childDoc);
-        return childDoc;
       }
     }
 

Modified: lucene/dev/trunk/modules/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java?rev=1242934&r1=1242933&r2=1242934&view=diff
==============================================================================
--- lucene/dev/trunk/modules/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java (original)
+++ lucene/dev/trunk/modules/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java Fri Feb 10 21:28:52 2012
@@ -169,11 +169,14 @@ public class ToParentBlockJoinQuery exte
       childWeight.normalize(norm, topLevelBoost * joinQuery.getBoost());
     }
 
+    // NOTE: acceptDocs applies (and is checked) only in the
+    // parent document space
     @Override
     public Scorer scorer(AtomicReaderContext readerContext, boolean scoreDocsInOrder,
         boolean topScorer, Bits acceptDocs) throws IOException {
+
       // Pass scoreDocsInOrder true, topScorer false to our sub:
-      final Scorer childScorer = childWeight.scorer(readerContext, true, false, acceptDocs);
+      final Scorer childScorer = childWeight.scorer(readerContext, true, false, null);
 
       if (childScorer == null) {
         // No matches
@@ -186,9 +189,13 @@ public class ToParentBlockJoinQuery exte
         return null;
       }
 
-      final DocIdSet parents = parentsFilter.getDocIdSet(readerContext, readerContext.reader().getLiveDocs());
-      // TODO: once we do random-access filters we can
-      // generalize this:
+      // NOTE: we cannot pass acceptDocs here because this
+      // will (most likely, justifiably) cause the filter to
+      // not return a FixedBitSet but rather a
+      // BitsFilteredDocIdSet.  Instead, we filter by
+      // acceptDocs when we score:
+      final DocIdSet parents = parentsFilter.getDocIdSet(readerContext, null);
+
       if (parents == null) {
         // No matches
         return null;
@@ -197,7 +204,7 @@ public class ToParentBlockJoinQuery exte
         throw new IllegalStateException("parentFilter must return FixedBitSet; got " + parents);
       }
 
-      return new BlockJoinScorer(this, childScorer, (FixedBitSet) parents, firstChildDoc, scoreMode);
+      return new BlockJoinScorer(this, childScorer, (FixedBitSet) parents, firstChildDoc, scoreMode, acceptDocs);
     }
 
     @Override
@@ -217,6 +224,7 @@ public class ToParentBlockJoinQuery exte
     private final Scorer childScorer;
     private final FixedBitSet parentBits;
     private final ScoreMode scoreMode;
+    private final Bits acceptDocs;
     private int parentDoc = -1;
     private float parentScore;
     private int nextChildDoc;
@@ -225,12 +233,13 @@ public class ToParentBlockJoinQuery exte
     private float[] pendingChildScores;
     private int childDocUpto;
 
-    public BlockJoinScorer(Weight weight, Scorer childScorer, FixedBitSet parentBits, int firstChildDoc, ScoreMode scoreMode) {
+    public BlockJoinScorer(Weight weight, Scorer childScorer, FixedBitSet parentBits, int firstChildDoc, ScoreMode scoreMode, Bits acceptDocs) {
       super(weight);
       //System.out.println("Q.init firstChildDoc=" + firstChildDoc);
       this.parentBits = parentBits;
       this.childScorer = childScorer;
       this.scoreMode = scoreMode;
+      this.acceptDocs = acceptDocs;
       if (scoreMode != ScoreMode.None) {
         pendingChildScores = new float[5];
       }
@@ -273,60 +282,75 @@ public class ToParentBlockJoinQuery exte
     public int nextDoc() throws IOException {
       //System.out.println("Q.nextDoc() nextChildDoc=" + nextChildDoc);
 
-      if (nextChildDoc == NO_MORE_DOCS) {
-        //System.out.println("  end");
-        return parentDoc = NO_MORE_DOCS;
-      }
-
-      // Gather all children sharing the same parent as nextChildDoc
-      parentDoc = parentBits.nextSetBit(nextChildDoc);
-      //System.out.println("  parentDoc=" + parentDoc);
-      assert parentDoc != -1;
-
-      float totalScore = 0;
-      float maxScore = Float.NEGATIVE_INFINITY;
-
-      childDocUpto = 0;
-      do {
-        //System.out.println("  c=" + nextChildDoc);
-        if (pendingChildDocs.length == childDocUpto) {
-          pendingChildDocs = ArrayUtil.grow(pendingChildDocs);
-        }
-        if (scoreMode != ScoreMode.None && pendingChildScores.length == childDocUpto) {
-          pendingChildScores = ArrayUtil.grow(pendingChildScores);
+      // Loop until we hit a parentDoc that's accepted
+      while (true) {
+        if (nextChildDoc == NO_MORE_DOCS) {
+          //System.out.println("  end");
+          return parentDoc = NO_MORE_DOCS;
         }
-        pendingChildDocs[childDocUpto] = nextChildDoc;
-        if (scoreMode != ScoreMode.None) {
-          // TODO: specialize this into dedicated classes per-scoreMode
-          final float childScore = childScorer.score();
-          pendingChildScores[childDocUpto] = childScore;
-          maxScore = Math.max(childScore, maxScore);
-          totalScore += childScore;
+
+        // Gather all children sharing the same parent as
+        // nextChildDoc
+
+        parentDoc = parentBits.nextSetBit(nextChildDoc);
+
+        //System.out.println("  parentDoc=" + parentDoc);
+        assert parentDoc != -1;
+
+        //System.out.println("  nextChildDoc=" + nextChildDoc);
+        if (acceptDocs != null && !acceptDocs.get(parentDoc)) {
+          // Parent doc not accepted; skip child docs until
+          // we hit a new parent doc:
+          do {
+            nextChildDoc = childScorer.nextDoc();
+          } while (nextChildDoc < parentDoc);
+          continue;
         }
-        childDocUpto++;
-        nextChildDoc = childScorer.nextDoc();
-      } while (nextChildDoc < parentDoc);
-      //System.out.println("  nextChildDoc=" + nextChildDoc);
 
-      // Parent & child docs are supposed to be orthogonal:
-      assert nextChildDoc != parentDoc;
+        float totalScore = 0;
+        float maxScore = Float.NEGATIVE_INFINITY;
 
-      switch(scoreMode) {
-      case Avg:
-        parentScore = totalScore / childDocUpto;
-        break;
-      case Max:
-        parentScore = maxScore;
-        break;
-      case Total:
-        parentScore = totalScore;
-        break;
-      case None:
-        break;
-      }
+        childDocUpto = 0;
+        do {
+          //System.out.println("  c=" + nextChildDoc);
+          if (pendingChildDocs.length == childDocUpto) {
+            pendingChildDocs = ArrayUtil.grow(pendingChildDocs);
+          }
+          if (scoreMode != ScoreMode.None && pendingChildScores.length == childDocUpto) {
+            pendingChildScores = ArrayUtil.grow(pendingChildScores);
+          }
+          pendingChildDocs[childDocUpto] = nextChildDoc;
+          if (scoreMode != ScoreMode.None) {
+            // TODO: specialize this into dedicated classes per-scoreMode
+            final float childScore = childScorer.score();
+            pendingChildScores[childDocUpto] = childScore;
+            maxScore = Math.max(childScore, maxScore);
+            totalScore += childScore;
+          }
+          childDocUpto++;
+          nextChildDoc = childScorer.nextDoc();
+        } while (nextChildDoc < parentDoc);
+
+        // Parent & child docs are supposed to be orthogonal:
+        assert nextChildDoc != parentDoc;
+
+        switch(scoreMode) {
+        case Avg:
+          parentScore = totalScore / childDocUpto;
+          break;
+        case Max:
+          parentScore = maxScore;
+          break;
+        case Total:
+          parentScore = totalScore;
+          break;
+        case None:
+          break;
+        }
 
-      //System.out.println("  return parentDoc=" + parentDoc);
-      return parentDoc;
+        //System.out.println("  return parentDoc=" + parentDoc);
+        return parentDoc;
+      }
     }
 
     @Override

Modified: lucene/dev/trunk/modules/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java?rev=1242934&r1=1242933&r2=1242934&view=diff
==============================================================================
--- lucene/dev/trunk/modules/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java (original)
+++ lucene/dev/trunk/modules/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java Fri Feb 10 21:28:52 2012
@@ -151,10 +151,75 @@ public class TestBlockJoin extends Lucen
     assertEquals("java", childDoc.get("skill"));
     assertEquals(2007, ((StoredField) childDoc.getField("year")).numericValue());
     assertEquals("Lisa", getParentDoc(r, parentsFilter, hits.scoreDocs[0].doc).get("name"));
+
+    // Test with filter on child docs:
+    assertEquals(0, s.search(fullChildQuery,
+                             new QueryWrapperFilter(new TermQuery(new Term("skill", "foosball"))),
+                             1).totalHits);
+    
     r.close();
     dir.close();
   }
 
+  public void testSimpleFilter() throws Exception {
+
+    final Directory dir = newDirectory();
+    final RandomIndexWriter w = new RandomIndexWriter(random, dir);
+
+    final List<Document> docs = new ArrayList<Document>();
+
+    docs.add(makeJob("java", 2007));
+    docs.add(makeJob("python", 2010));
+    docs.add(makeResume("Lisa", "United Kingdom"));
+    w.addDocuments(docs);
+
+    docs.clear();
+    docs.add(makeJob("ruby", 2005));
+    docs.add(makeJob("java", 2006));
+    docs.add(makeResume("Frank", "United States"));
+    w.addDocuments(docs);
+
+    IndexReader r = w.getReader();
+    w.close();
+    IndexSearcher s = newSearcher(r);
+
+    // Create a filter that defines "parent" documents in the index - in this case resumes
+    Filter parentsFilter = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));
+
+    // Define child document criteria (finds an example of relevant work experience)
+    BooleanQuery childQuery = new BooleanQuery();
+    childQuery.add(new BooleanClause(new TermQuery(new Term("skill", "java")), Occur.MUST));
+    childQuery.add(new BooleanClause(NumericRangeQuery.newIntRange("year", 2006, 2011, true, true), Occur.MUST));
+
+    // Define parent document criteria (find a resident in the UK)
+    Query parentQuery = new TermQuery(new Term("country", "United Kingdom"));
+      
+    // Wrap the child document query to 'join' any matches
+    // up to corresponding parent:
+    ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ToParentBlockJoinQuery.ScoreMode.Avg);
+      
+    assertEquals("no filter - both passed", 2, s.search(childJoinQuery, 10).totalHits);
+
+    assertEquals("dummy filter passes everyone ", 2, s.search(childJoinQuery, parentsFilter, 10).totalHits);
+    assertEquals("dummy filter passes everyone ", 2, s.search(childJoinQuery, new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))), 10).totalHits);
+      
+    // not found test
+    assertEquals("noone live there", 0, s.search(childJoinQuery, new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("country", "Oz")))), 1).totalHits);
+    assertEquals("noone live there", 0, s.search(childJoinQuery, new QueryWrapperFilter(new TermQuery(new Term("country", "Oz"))), 1).totalHits);
+      
+    // apply the UK filter by the searcher
+    TopDocs ukOnly = s.search(childJoinQuery, new QueryWrapperFilter(parentQuery), 1);
+    assertEquals("has filter - single passed", 1, ukOnly.totalHits);
+    assertEquals( "Lisa", r.document(ukOnly.scoreDocs[0].doc).get("name"));
+
+    // looking for US candidates
+    TopDocs usThen = s.search(childJoinQuery , new QueryWrapperFilter(new TermQuery(new Term("country", "United States"))), 1);
+    assertEquals("has filter - single passed", 1, usThen.totalHits);
+    assertEquals("Frank", r.document(usThen.scoreDocs[0].doc).get("name"));
+    r.close();
+    dir.close();
+  }
+  
   private Document getParentDoc(IndexReader reader, Filter parents, int childDocID) throws IOException {
     final AtomicReaderContext[] leaves = reader.getTopReaderContext().leaves();
     final int subIndex = ReaderUtil.subIndex(childDocID, leaves);
@@ -241,6 +306,9 @@ public class TestBlockJoin extends Lucen
     // Values for child fields:
     final String[][] childFields = getRandomFields(numParentDocs);
 
+    final boolean doDeletes = random.nextBoolean();
+    final List<Integer> toDelete = new ArrayList<Integer>();
+
     // TODO: parallel star join, nested join cases too!
     final RandomIndexWriter w = new RandomIndexWriter(random, dir);
     final RandomIndexWriter joinW = new RandomIndexWriter(random, joinDir);
@@ -261,6 +329,11 @@ public class TestBlockJoin extends Lucen
         }
       }
 
+      if (doDeletes) {
+        parentDoc.add(newField("blockID", ""+parentDocID, StringField.TYPE_UNSTORED));
+        parentJoinDoc.add(newField("blockID", ""+parentDocID, StringField.TYPE_UNSTORED));
+      }
+
       final List<Document> joinDocs = new ArrayList<Document>();
 
       if (VERBOSE) {
@@ -308,12 +381,28 @@ public class TestBlockJoin extends Lucen
           System.out.println("    " + sb.toString());
         }
 
+        if (doDeletes) {
+          joinChildDoc.add(newField("blockID", ""+parentDocID, StringField.TYPE_UNSTORED));
+        }
+
         w.addDocument(childDoc);
       }
 
       // Parent last:
       joinDocs.add(parentJoinDoc);
       joinW.addDocuments(joinDocs);
+
+      if (doDeletes && random.nextInt(30) == 7) {
+        toDelete.add(parentDocID);
+      }
+    }
+
+    for(int deleteID : toDelete) {
+      if (VERBOSE) {
+        System.out.println("DELETE parentID=" + deleteID);
+      }
+      w.deleteDocuments(new Term("blockID", ""+deleteID));
+      joinW.deleteDocuments(new Term("blockID", ""+deleteID));
     }
 
     final IndexReader r = w.getReader();