You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2012/02/10 22:28:52 UTC
svn commit: r1242934 - in /lucene/dev/trunk: lucene/
modules/join/src/java/org/apache/lucene/search/join/
modules/join/src/test/org/apache/lucene/search/join/
Author: mikemccand
Date: Fri Feb 10 21:28:52 2012
New Revision: 1242934
URL: http://svn.apache.org/viewvc?rev=1242934&view=rev
Log:
SOLR-3076: fix BJQ to handle incoming liveDocs/filter correctly
Modified:
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/modules/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java
lucene/dev/trunk/modules/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
lucene/dev/trunk/modules/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1242934&r1=1242933&r2=1242934&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Fri Feb 10 21:28:52 2012
@@ -698,7 +698,10 @@ Bug fixes
* LUCENE-3589: BytesRef copy(short) didnt set length.
(Peter Chang via Robert Muir)
-
+
+* SOLR-3076: ToParent/ChildBlockJoinQuery was not handling an incoming
+ filter nor deleted docs correctly (Mikhail Khludnev via Mike
+ McCandless).
======================= Lucene 3.6.0 =======================
Modified: lucene/dev/trunk/modules/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java?rev=1242934&r1=1242933&r2=1242934&view=diff
==============================================================================
--- lucene/dev/trunk/modules/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java (original)
+++ lucene/dev/trunk/modules/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java Fri Feb 10 21:28:52 2012
@@ -109,20 +109,27 @@ public class ToChildBlockJoinQuery exten
parentWeight.normalize(norm, topLevelBoost * joinQuery.getBoost());
}
+ // NOTE: acceptDocs applies (and is checked) only in the
+ // child document space
@Override
public Scorer scorer(AtomicReaderContext readerContext, boolean scoreDocsInOrder,
boolean topScorer, Bits acceptDocs) throws IOException {
+
// Pass scoreDocsInOrder true, topScorer false to our sub:
- final Scorer parentScorer = parentWeight.scorer(readerContext, true, false, acceptDocs);
+ final Scorer parentScorer = parentWeight.scorer(readerContext, true, false, null);
if (parentScorer == null) {
// No matches
return null;
}
- final DocIdSet parents = parentsFilter.getDocIdSet(readerContext, readerContext.reader().getLiveDocs());
- // TODO: once we do random-access filters we can
- // generalize this:
+ // NOTE: we cannot pass acceptDocs here because this
+ // will (most likely, justifiably) cause the filter to
+ // not return a FixedBitSet but rather a
+ // BitsFilteredDocIdSet. Instead, we filter by
+ // acceptDocs when we score:
+ final DocIdSet parents = parentsFilter.getDocIdSet(readerContext, null);
+
if (parents == null) {
// No matches
return null;
@@ -131,7 +138,7 @@ public class ToChildBlockJoinQuery exten
throw new IllegalStateException("parentFilter must return FixedBitSet; got " + parents);
}
- return new ToChildBlockJoinScorer(this, parentScorer, (FixedBitSet) parents, doScores);
+ return new ToChildBlockJoinScorer(this, parentScorer, (FixedBitSet) parents, doScores, acceptDocs);
}
@Override
@@ -151,16 +158,19 @@ public class ToChildBlockJoinQuery exten
private final Scorer parentScorer;
private final FixedBitSet parentBits;
private final boolean doScores;
+ private final Bits acceptDocs;
+
private float parentScore;
private int childDoc = -1;
private int parentDoc;
- public ToChildBlockJoinScorer(Weight weight, Scorer parentScorer, FixedBitSet parentBits, boolean doScores) {
+ public ToChildBlockJoinScorer(Weight weight, Scorer parentScorer, FixedBitSet parentBits, boolean doScores, Bits acceptDocs) {
super(weight);
this.doScores = doScores;
this.parentBits = parentBits;
this.parentScorer = parentScorer;
+ this.acceptDocs = acceptDocs;
}
@Override
@@ -172,45 +182,56 @@ public class ToChildBlockJoinQuery exten
public int nextDoc() throws IOException {
//System.out.println("Q.nextDoc() parentDoc=" + parentDoc + " childDoc=" + childDoc);
- if (childDoc+1 == parentDoc) {
- // OK, we are done iterating through all children
- // matching this one parent doc, so we now nextDoc()
- // the parent. Use a while loop because we may have
- // to skip over some number of parents w/ no
- // children:
- while (true) {
- parentDoc = parentScorer.nextDoc();
- if (parentDoc == 0) {
- // Degenerate but allowed: parent has no children
- // TODO: would be nice to pull initial parent
- // into ctor so we can skip this if... but it's
- // tricky because scorer must return -1 for
- // .doc() on init...
+ // Loop until we hit a childDoc that's accepted
+ while (true) {
+ if (childDoc+1 == parentDoc) {
+ // OK, we are done iterating through all children
+ // matching this one parent doc, so we now nextDoc()
+ // the parent. Use a while loop because we may have
+ // to skip over some number of parents w/ no
+ // children:
+ while (true) {
parentDoc = parentScorer.nextDoc();
- }
+ if (parentDoc == 0) {
+ // Degenerate but allowed: parent has no children
+ // TODO: would be nice to pull initial parent
+ // into ctor so we can skip this if... but it's
+ // tricky because scorer must return -1 for
+ // .doc() on init...
+ parentDoc = parentScorer.nextDoc();
+ }
- if (parentDoc == NO_MORE_DOCS) {
- childDoc = NO_MORE_DOCS;
- //System.out.println(" END");
- return childDoc;
- }
+ if (parentDoc == NO_MORE_DOCS) {
+ childDoc = NO_MORE_DOCS;
+ //System.out.println(" END");
+ return childDoc;
+ }
+
+ childDoc = 1 + parentBits.prevSetBit(parentDoc-1);
- childDoc = 1 + parentBits.prevSetBit(parentDoc-1);
- if (childDoc < parentDoc) {
- if (doScores) {
- parentScore = parentScorer.score();
+ if (acceptDocs != null && !acceptDocs.get(childDoc)) {
+ continue;
}
- //System.out.println(" " + childDoc);
- return childDoc;
- } else {
- // Degenerate but allowed: parent has no children
+
+ if (childDoc < parentDoc) {
+ if (doScores) {
+ parentScore = parentScorer.score();
+ }
+ //System.out.println(" " + childDoc);
+ return childDoc;
+ } else {
+ // Degenerate but allowed: parent has no children
+ }
+ }
+ } else {
+ assert childDoc < parentDoc: "childDoc=" + childDoc + " parentDoc=" + parentDoc;
+ childDoc++;
+ if (acceptDocs != null && !acceptDocs.get(childDoc)) {
+ continue;
}
+ //System.out.println(" " + childDoc);
+ return childDoc;
}
- } else {
- assert childDoc < parentDoc: "childDoc=" + childDoc + " parentDoc=" + parentDoc;
- childDoc++;
- //System.out.println(" " + childDoc);
- return childDoc;
}
}
Modified: lucene/dev/trunk/modules/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java?rev=1242934&r1=1242933&r2=1242934&view=diff
==============================================================================
--- lucene/dev/trunk/modules/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java (original)
+++ lucene/dev/trunk/modules/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java Fri Feb 10 21:28:52 2012
@@ -169,11 +169,14 @@ public class ToParentBlockJoinQuery exte
childWeight.normalize(norm, topLevelBoost * joinQuery.getBoost());
}
+ // NOTE: acceptDocs applies (and is checked) only in the
+ // parent document space
@Override
public Scorer scorer(AtomicReaderContext readerContext, boolean scoreDocsInOrder,
boolean topScorer, Bits acceptDocs) throws IOException {
+
// Pass scoreDocsInOrder true, topScorer false to our sub:
- final Scorer childScorer = childWeight.scorer(readerContext, true, false, acceptDocs);
+ final Scorer childScorer = childWeight.scorer(readerContext, true, false, null);
if (childScorer == null) {
// No matches
@@ -186,9 +189,13 @@ public class ToParentBlockJoinQuery exte
return null;
}
- final DocIdSet parents = parentsFilter.getDocIdSet(readerContext, readerContext.reader().getLiveDocs());
- // TODO: once we do random-access filters we can
- // generalize this:
+ // NOTE: we cannot pass acceptDocs here because this
+ // will (most likely, justifiably) cause the filter to
+ // not return a FixedBitSet but rather a
+ // BitsFilteredDocIdSet. Instead, we filter by
+ // acceptDocs when we score:
+ final DocIdSet parents = parentsFilter.getDocIdSet(readerContext, null);
+
if (parents == null) {
// No matches
return null;
@@ -197,7 +204,7 @@ public class ToParentBlockJoinQuery exte
throw new IllegalStateException("parentFilter must return FixedBitSet; got " + parents);
}
- return new BlockJoinScorer(this, childScorer, (FixedBitSet) parents, firstChildDoc, scoreMode);
+ return new BlockJoinScorer(this, childScorer, (FixedBitSet) parents, firstChildDoc, scoreMode, acceptDocs);
}
@Override
@@ -217,6 +224,7 @@ public class ToParentBlockJoinQuery exte
private final Scorer childScorer;
private final FixedBitSet parentBits;
private final ScoreMode scoreMode;
+ private final Bits acceptDocs;
private int parentDoc = -1;
private float parentScore;
private int nextChildDoc;
@@ -225,12 +233,13 @@ public class ToParentBlockJoinQuery exte
private float[] pendingChildScores;
private int childDocUpto;
- public BlockJoinScorer(Weight weight, Scorer childScorer, FixedBitSet parentBits, int firstChildDoc, ScoreMode scoreMode) {
+ public BlockJoinScorer(Weight weight, Scorer childScorer, FixedBitSet parentBits, int firstChildDoc, ScoreMode scoreMode, Bits acceptDocs) {
super(weight);
//System.out.println("Q.init firstChildDoc=" + firstChildDoc);
this.parentBits = parentBits;
this.childScorer = childScorer;
this.scoreMode = scoreMode;
+ this.acceptDocs = acceptDocs;
if (scoreMode != ScoreMode.None) {
pendingChildScores = new float[5];
}
@@ -273,60 +282,75 @@ public class ToParentBlockJoinQuery exte
public int nextDoc() throws IOException {
//System.out.println("Q.nextDoc() nextChildDoc=" + nextChildDoc);
- if (nextChildDoc == NO_MORE_DOCS) {
- //System.out.println(" end");
- return parentDoc = NO_MORE_DOCS;
- }
-
- // Gather all children sharing the same parent as nextChildDoc
- parentDoc = parentBits.nextSetBit(nextChildDoc);
- //System.out.println(" parentDoc=" + parentDoc);
- assert parentDoc != -1;
-
- float totalScore = 0;
- float maxScore = Float.NEGATIVE_INFINITY;
-
- childDocUpto = 0;
- do {
- //System.out.println(" c=" + nextChildDoc);
- if (pendingChildDocs.length == childDocUpto) {
- pendingChildDocs = ArrayUtil.grow(pendingChildDocs);
- }
- if (scoreMode != ScoreMode.None && pendingChildScores.length == childDocUpto) {
- pendingChildScores = ArrayUtil.grow(pendingChildScores);
+ // Loop until we hit a parentDoc that's accepted
+ while (true) {
+ if (nextChildDoc == NO_MORE_DOCS) {
+ //System.out.println(" end");
+ return parentDoc = NO_MORE_DOCS;
}
- pendingChildDocs[childDocUpto] = nextChildDoc;
- if (scoreMode != ScoreMode.None) {
- // TODO: specialize this into dedicated classes per-scoreMode
- final float childScore = childScorer.score();
- pendingChildScores[childDocUpto] = childScore;
- maxScore = Math.max(childScore, maxScore);
- totalScore += childScore;
+
+ // Gather all children sharing the same parent as
+ // nextChildDoc
+
+ parentDoc = parentBits.nextSetBit(nextChildDoc);
+
+ //System.out.println(" parentDoc=" + parentDoc);
+ assert parentDoc != -1;
+
+ //System.out.println(" nextChildDoc=" + nextChildDoc);
+ if (acceptDocs != null && !acceptDocs.get(parentDoc)) {
+ // Parent doc not accepted; skip child docs until
+ // we hit a new parent doc:
+ do {
+ nextChildDoc = childScorer.nextDoc();
+ } while (nextChildDoc < parentDoc);
+ continue;
}
- childDocUpto++;
- nextChildDoc = childScorer.nextDoc();
- } while (nextChildDoc < parentDoc);
- //System.out.println(" nextChildDoc=" + nextChildDoc);
- // Parent & child docs are supposed to be orthogonal:
- assert nextChildDoc != parentDoc;
+ float totalScore = 0;
+ float maxScore = Float.NEGATIVE_INFINITY;
- switch(scoreMode) {
- case Avg:
- parentScore = totalScore / childDocUpto;
- break;
- case Max:
- parentScore = maxScore;
- break;
- case Total:
- parentScore = totalScore;
- break;
- case None:
- break;
- }
+ childDocUpto = 0;
+ do {
+ //System.out.println(" c=" + nextChildDoc);
+ if (pendingChildDocs.length == childDocUpto) {
+ pendingChildDocs = ArrayUtil.grow(pendingChildDocs);
+ }
+ if (scoreMode != ScoreMode.None && pendingChildScores.length == childDocUpto) {
+ pendingChildScores = ArrayUtil.grow(pendingChildScores);
+ }
+ pendingChildDocs[childDocUpto] = nextChildDoc;
+ if (scoreMode != ScoreMode.None) {
+ // TODO: specialize this into dedicated classes per-scoreMode
+ final float childScore = childScorer.score();
+ pendingChildScores[childDocUpto] = childScore;
+ maxScore = Math.max(childScore, maxScore);
+ totalScore += childScore;
+ }
+ childDocUpto++;
+ nextChildDoc = childScorer.nextDoc();
+ } while (nextChildDoc < parentDoc);
+
+ // Parent & child docs are supposed to be orthogonal:
+ assert nextChildDoc != parentDoc;
+
+ switch(scoreMode) {
+ case Avg:
+ parentScore = totalScore / childDocUpto;
+ break;
+ case Max:
+ parentScore = maxScore;
+ break;
+ case Total:
+ parentScore = totalScore;
+ break;
+ case None:
+ break;
+ }
- //System.out.println(" return parentDoc=" + parentDoc);
- return parentDoc;
+ //System.out.println(" return parentDoc=" + parentDoc);
+ return parentDoc;
+ }
}
@Override
Modified: lucene/dev/trunk/modules/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java?rev=1242934&r1=1242933&r2=1242934&view=diff
==============================================================================
--- lucene/dev/trunk/modules/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java (original)
+++ lucene/dev/trunk/modules/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java Fri Feb 10 21:28:52 2012
@@ -151,10 +151,75 @@ public class TestBlockJoin extends Lucen
assertEquals("java", childDoc.get("skill"));
assertEquals(2007, ((StoredField) childDoc.getField("year")).numericValue());
assertEquals("Lisa", getParentDoc(r, parentsFilter, hits.scoreDocs[0].doc).get("name"));
+
+ // Test with filter on child docs:
+ assertEquals(0, s.search(fullChildQuery,
+ new QueryWrapperFilter(new TermQuery(new Term("skill", "foosball"))),
+ 1).totalHits);
+
r.close();
dir.close();
}
+ public void testSimpleFilter() throws Exception {
+
+ final Directory dir = newDirectory();
+ final RandomIndexWriter w = new RandomIndexWriter(random, dir);
+
+ final List<Document> docs = new ArrayList<Document>();
+
+ docs.add(makeJob("java", 2007));
+ docs.add(makeJob("python", 2010));
+ docs.add(makeResume("Lisa", "United Kingdom"));
+ w.addDocuments(docs);
+
+ docs.clear();
+ docs.add(makeJob("ruby", 2005));
+ docs.add(makeJob("java", 2006));
+ docs.add(makeResume("Frank", "United States"));
+ w.addDocuments(docs);
+
+ IndexReader r = w.getReader();
+ w.close();
+ IndexSearcher s = newSearcher(r);
+
+ // Create a filter that defines "parent" documents in the index - in this case resumes
+ Filter parentsFilter = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));
+
+ // Define child document criteria (finds an example of relevant work experience)
+ BooleanQuery childQuery = new BooleanQuery();
+ childQuery.add(new BooleanClause(new TermQuery(new Term("skill", "java")), Occur.MUST));
+ childQuery.add(new BooleanClause(NumericRangeQuery.newIntRange("year", 2006, 2011, true, true), Occur.MUST));
+
+ // Define parent document criteria (find a resident in the UK)
+ Query parentQuery = new TermQuery(new Term("country", "United Kingdom"));
+
+ // Wrap the child document query to 'join' any matches
+ // up to corresponding parent:
+ ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ToParentBlockJoinQuery.ScoreMode.Avg);
+
+ assertEquals("no filter - both passed", 2, s.search(childJoinQuery, 10).totalHits);
+
+ assertEquals("dummy filter passes everyone ", 2, s.search(childJoinQuery, parentsFilter, 10).totalHits);
+ assertEquals("dummy filter passes everyone ", 2, s.search(childJoinQuery, new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))), 10).totalHits);
+
+ // not found test
+ assertEquals("noone live there", 0, s.search(childJoinQuery, new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("country", "Oz")))), 1).totalHits);
+ assertEquals("noone live there", 0, s.search(childJoinQuery, new QueryWrapperFilter(new TermQuery(new Term("country", "Oz"))), 1).totalHits);
+
+ // apply the UK filter by the searcher
+ TopDocs ukOnly = s.search(childJoinQuery, new QueryWrapperFilter(parentQuery), 1);
+ assertEquals("has filter - single passed", 1, ukOnly.totalHits);
+ assertEquals( "Lisa", r.document(ukOnly.scoreDocs[0].doc).get("name"));
+
+ // looking for US candidates
+ TopDocs usThen = s.search(childJoinQuery , new QueryWrapperFilter(new TermQuery(new Term("country", "United States"))), 1);
+ assertEquals("has filter - single passed", 1, usThen.totalHits);
+ assertEquals("Frank", r.document(usThen.scoreDocs[0].doc).get("name"));
+ r.close();
+ dir.close();
+ }
+
private Document getParentDoc(IndexReader reader, Filter parents, int childDocID) throws IOException {
final AtomicReaderContext[] leaves = reader.getTopReaderContext().leaves();
final int subIndex = ReaderUtil.subIndex(childDocID, leaves);
@@ -241,6 +306,9 @@ public class TestBlockJoin extends Lucen
// Values for child fields:
final String[][] childFields = getRandomFields(numParentDocs);
+ final boolean doDeletes = random.nextBoolean();
+ final List<Integer> toDelete = new ArrayList<Integer>();
+
// TODO: parallel star join, nested join cases too!
final RandomIndexWriter w = new RandomIndexWriter(random, dir);
final RandomIndexWriter joinW = new RandomIndexWriter(random, joinDir);
@@ -261,6 +329,11 @@ public class TestBlockJoin extends Lucen
}
}
+ if (doDeletes) {
+ parentDoc.add(newField("blockID", ""+parentDocID, StringField.TYPE_UNSTORED));
+ parentJoinDoc.add(newField("blockID", ""+parentDocID, StringField.TYPE_UNSTORED));
+ }
+
final List<Document> joinDocs = new ArrayList<Document>();
if (VERBOSE) {
@@ -308,12 +381,28 @@ public class TestBlockJoin extends Lucen
System.out.println(" " + sb.toString());
}
+ if (doDeletes) {
+ joinChildDoc.add(newField("blockID", ""+parentDocID, StringField.TYPE_UNSTORED));
+ }
+
w.addDocument(childDoc);
}
// Parent last:
joinDocs.add(parentJoinDoc);
joinW.addDocuments(joinDocs);
+
+ if (doDeletes && random.nextInt(30) == 7) {
+ toDelete.add(parentDocID);
+ }
+ }
+
+ for(int deleteID : toDelete) {
+ if (VERBOSE) {
+ System.out.println("DELETE parentID=" + deleteID);
+ }
+ w.deleteDocuments(new Term("blockID", ""+deleteID));
+ joinW.deleteDocuments(new Term("blockID", ""+deleteID));
}
final IndexReader r = w.getReader();