You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2013/04/30 13:35:36 UTC
svn commit: r1477570 - in /lucene/dev/trunk: ./ lucene/ lucene/join/
lucene/join/src/java/org/apache/lucene/search/join/
lucene/join/src/test/org/apache/lucene/search/join/
Author: mikemccand
Date: Tue Apr 30 11:35:36 2013
New Revision: 1477570
URL: http://svn.apache.org/r1477570
Log:
LUCENE-4968: fix several block join issues
Modified:
lucene/dev/trunk/ (props changed)
lucene/dev/trunk/lucene/ (props changed)
lucene/dev/trunk/lucene/CHANGES.txt (contents, props changed)
lucene/dev/trunk/lucene/join/ (props changed)
lucene/dev/trunk/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinCollector.java
lucene/dev/trunk/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
lucene/dev/trunk/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1477570&r1=1477569&r2=1477570&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Tue Apr 30 11:35:36 2013
@@ -68,6 +68,11 @@ Bug Fixes
* LUCENE-4959: Fix incorrect return value in
SimpleNaiveBayesClassifier.assignClass. (Alexey Kutin via Adrien Grand)
+* LUCENE-4968: Fixed ToParentBlockJoinQuery/Collector: correctly handle parent
+ hits that had no child matches, don't throw IllegalArgumentEx when
+ the child query has no hits, more aggressively catch cases where childQuery
+ incorrectly matches parent documents (Mike McCandless)
+
Optimizations
* LUCENE-4938: Don't use an unnecessarily large priority queue in IndexSearcher
Modified: lucene/dev/trunk/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinCollector.java?rev=1477570&r1=1477569&r2=1477570&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinCollector.java (original)
+++ lucene/dev/trunk/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinCollector.java Tue Apr 30 11:35:36 2013
@@ -111,6 +111,7 @@ public class ToParentBlockJoinCollector
if (trackMaxScore) {
maxScore = Float.MIN_VALUE;
}
+ //System.out.println("numParentHits=" + numParentHits);
this.trackScores = trackScores;
this.numParentHits = numParentHits;
queue = FieldValueHitQueue.create(sort.getSort(), numParentHits);
@@ -122,6 +123,7 @@ public class ToParentBlockJoinCollector
private static final class OneGroup extends FieldValueHitQueue.Entry {
public OneGroup(int comparatorSlot, int parentDoc, float parentScore, int numJoins, boolean doScores) {
super(comparatorSlot, parentDoc, parentScore);
+ //System.out.println("make OneGroup parentDoc=" + parentDoc);
docs = new int[numJoins][];
for(int joinID=0;joinID<numJoins;joinID++) {
docs[joinID] = new int[5];
@@ -142,7 +144,7 @@ public class ToParentBlockJoinCollector
@Override
public void collect(int parentDoc) throws IOException {
- //System.out.println("C parentDoc=" + parentDoc);
+ //System.out.println("\nC parentDoc=" + parentDoc);
totalHitCount++;
float score = Float.NaN;
@@ -203,8 +205,7 @@ public class ToParentBlockJoinCollector
for (int i = 0; i < comparators.length; i++) {
comparators[i].copy(comparatorSlot, parentDoc);
}
- //System.out.println(" startup: new OG doc=" +
- //(docBase+parentDoc));
+ //System.out.println(" startup: new OG doc=" + (docBase+parentDoc));
if (!trackMaxScore && trackScores) {
score = scorer.score();
}
@@ -241,22 +242,28 @@ public class ToParentBlockJoinCollector
og.scores = ArrayUtil.grow(og.scores);
}
- //System.out.println("copyGroups parentDoc=" + og.doc);
+ //System.out.println("\ncopyGroups parentDoc=" + og.doc);
for(int scorerIDX = 0;scorerIDX < numSubScorers;scorerIDX++) {
final ToParentBlockJoinQuery.BlockJoinScorer joinScorer = joinScorers[scorerIDX];
//System.out.println(" scorer=" + joinScorer);
- if (joinScorer != null) {
+ if (joinScorer != null && docBase + joinScorer.getParentDoc() == og.doc) {
og.counts[scorerIDX] = joinScorer.getChildCount();
//System.out.println(" count=" + og.counts[scorerIDX]);
og.docs[scorerIDX] = joinScorer.swapChildDocs(og.docs[scorerIDX]);
+ assert og.docs[scorerIDX].length >= og.counts[scorerIDX]: "length=" + og.docs[scorerIDX].length + " vs count=" + og.counts[scorerIDX];
+ //System.out.println(" len=" + og.docs[scorerIDX].length);
/*
- for(int idx=0;idx<og.counts[scorerIDX];idx++) {
+ for(int idx=0;idx<og.counts[scorerIDX];idx++) {
System.out.println(" docs[" + idx + "]=" + og.docs[scorerIDX][idx]);
- }
+ }
*/
if (trackScores) {
+ //System.out.println(" copy scores");
og.scores[scorerIDX] = joinScorer.swapChildScores(og.scores[scorerIDX]);
+ assert og.scores[scorerIDX].length >= og.counts[scorerIDX]: "length=" + og.scores[scorerIDX].length + " vs count=" + og.counts[scorerIDX];
}
+ } else {
+ og.counts[scorerIDX] = 0;
}
}
}
@@ -302,13 +309,16 @@ public class ToParentBlockJoinCollector
Arrays.fill(joinScorers, null);
Queue<Scorer> queue = new LinkedList<Scorer>();
+ //System.out.println("\nqueue: add top scorer=" + scorer);
queue.add(scorer);
while ((scorer = queue.poll()) != null) {
+ //System.out.println(" poll: " + scorer + "; " + scorer.getWeight().getQuery());
if (scorer instanceof ToParentBlockJoinQuery.BlockJoinScorer) {
enroll((ToParentBlockJoinQuery) scorer.getWeight().getQuery(), (ToParentBlockJoinQuery.BlockJoinScorer) scorer);
}
for (ChildScorer sub : scorer.getChildren()) {
+ //System.out.println(" add sub: " + sub.child + "; " + sub.child.getWeight().getQuery());
queue.add(sub.child);
}
}
@@ -384,12 +394,8 @@ public class ToParentBlockJoinCollector
throws IOException {
final Integer _slot = joinQueryID.get(query);
- if (_slot == null) {
- if (totalHitCount == 0) {
- return null;
- } else {
- throw new IllegalArgumentException("the Query did not contain the provided BlockJoinQuery");
- }
+ if (_slot == null && totalHitCount == 0) {
+ return null;
}
if (sortedGroups == null) {
@@ -401,7 +407,7 @@ public class ToParentBlockJoinCollector
return null;
}
- return accumulateGroups(_slot, offset, maxDocsPerGroup, withinGroupOffset, withinGroupSort, fillSortFields);
+ return accumulateGroups(_slot == null ? -1 : _slot.intValue(), offset, maxDocsPerGroup, withinGroupOffset, withinGroupSort, fillSortFields);
}
/**
@@ -423,18 +429,26 @@ public class ToParentBlockJoinCollector
final FakeScorer fakeScorer = new FakeScorer();
int totalGroupedHitCount = 0;
+ //System.out.println("slot=" + slot);
for(int groupIDX=offset;groupIDX<sortedGroups.length;groupIDX++) {
final OneGroup og = sortedGroups[groupIDX];
- final int numChildDocs = og.counts[slot];
+ final int numChildDocs;
+ if (slot == -1 || slot >= og.counts.length) {
+ numChildDocs = 0;
+ } else {
+ numChildDocs = og.counts[slot];
+ }
// Number of documents in group should be bounded to prevent redundant memory allocation
- final int numDocsInGroup = Math.min(numChildDocs, maxDocsPerGroup);
+ final int numDocsInGroup = Math.max(1, Math.min(numChildDocs, maxDocsPerGroup));
+ //System.out.println("parent doc=" + og.doc + " numChildDocs=" + numChildDocs + " maxDocsPG=" + maxDocsPerGroup);
// At this point we hold all docs w/ in each group,
// unsorted; we now sort them:
final TopDocsCollector<?> collector;
if (withinGroupSort == null) {
+ //System.out.println("sort by score");
// Sort by score
if (!trackScores) {
throw new IllegalArgumentException("cannot sort by relevance within group: trackScores=false");
@@ -448,6 +462,7 @@ public class ToParentBlockJoinCollector
collector.setScorer(fakeScorer);
collector.setNextReader(og.readerContext);
for(int docIDX=0;docIDX<numChildDocs;docIDX++) {
+ //System.out.println("docIDX=" + docIDX + " vs " + og.docs[slot].length);
final int doc = og.docs[slot][docIDX];
fakeScorer.doc = doc;
if (trackScores) {
Modified: lucene/dev/trunk/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java?rev=1477570&r1=1477569&r2=1477570&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java (original)
+++ lucene/dev/trunk/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java Tue Apr 30 11:35:36 2013
@@ -195,10 +195,8 @@ public class ToParentBlockJoinQuery exte
@Override
public Explanation explain(AtomicReaderContext context, int doc) throws IOException {
BlockJoinScorer scorer = (BlockJoinScorer) scorer(context, true, false, context.reader().getLiveDocs());
- if (scorer != null) {
- if (scorer.advance(doc) == doc) {
- return scorer.explain(context.docBase);
- }
+ if (scorer != null && scorer.advance(doc) == doc) {
+ return scorer.explain(context.docBase);
}
return new ComplexExplanation(false, 0.0f, "Not a match");
}
@@ -246,6 +244,10 @@ public class ToParentBlockJoinQuery exte
return childDocUpto;
}
+ int getParentDoc() {
+ return parentDoc;
+ }
+
int[] swapChildDocs(int[] other) {
final int[] ret = pendingChildDocs;
if (other == null) {
@@ -272,7 +274,6 @@ public class ToParentBlockJoinQuery exte
@Override
public int nextDoc() throws IOException {
//System.out.println("Q.nextDoc() nextChildDoc=" + nextChildDoc);
-
// Loop until we hit a parentDoc that's accepted
while (true) {
if (nextChildDoc == NO_MORE_DOCS) {
@@ -285,6 +286,12 @@ public class ToParentBlockJoinQuery exte
parentDoc = parentBits.nextSetBit(nextChildDoc);
+ // Parent & child docs are supposed to be
+ // orthogonal:
+ if (nextChildDoc == parentDoc) {
+ throw new IllegalStateException("child query must only match non-parent docs, but parent docID=" + nextChildDoc + " matched childScorer=" + childScorer.getClass());
+ }
+
//System.out.println(" parentDoc=" + parentDoc);
assert parentDoc != -1;
@@ -295,6 +302,13 @@ public class ToParentBlockJoinQuery exte
do {
nextChildDoc = childScorer.nextDoc();
} while (nextChildDoc < parentDoc);
+
+ // Parent & child docs are supposed to be
+ // orthogonal:
+ if (nextChildDoc == parentDoc) {
+ throw new IllegalStateException("child query must only match non-parent docs, but parent docID=" + nextChildDoc + " matched childScorer=" + childScorer.getClass());
+ }
+
continue;
}
@@ -326,8 +340,11 @@ public class ToParentBlockJoinQuery exte
nextChildDoc = childScorer.nextDoc();
} while (nextChildDoc < parentDoc);
- // Parent & child docs are supposed to be orthogonal:
- assert nextChildDoc != parentDoc;
+ // Parent & child docs are supposed to be
+ // orthogonal:
+ if (nextChildDoc == parentDoc) {
+ throw new IllegalStateException("child query must only match non-parent docs, but parent docID=" + nextChildDoc + " matched childScorer=" + childScorer.getClass());
+ }
switch(scoreMode) {
case Avg:
@@ -343,7 +360,7 @@ public class ToParentBlockJoinQuery exte
break;
}
- //System.out.println(" return parentDoc=" + parentDoc);
+ //System.out.println(" return parentDoc=" + parentDoc + " childDocUpto=" + childDocUpto);
return parentDoc;
}
}
@@ -393,7 +410,9 @@ public class ToParentBlockJoinQuery exte
}
// Parent & child docs are supposed to be orthogonal:
- assert nextChildDoc != prevParentDoc;
+ if (nextChildDoc == prevParentDoc) {
+ throw new IllegalStateException("child query must only match non-parent docs, but parent docID=" + nextChildDoc + " matched childScorer=" + childScorer.getClass());
+ }
final int nd = nextDoc();
//System.out.println(" return nextParentDoc=" + nd);
Modified: lucene/dev/trunk/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java?rev=1477570&r1=1477569&r2=1477570&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java (original)
+++ lucene/dev/trunk/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java Tue Apr 30 11:35:36 2013
@@ -1206,4 +1206,191 @@ public class TestBlockJoin extends Lucen
r.close();
dir.close();
}
+
+ // LUCENE-4968
+ public void testSometimesParentOnlyMatches() throws Exception {
+ Directory d = newDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(random(), d);
+ Document parent = new Document();
+ parent.add(new StoredField("parentID", "0"));
+ parent.add(newTextField("parentText", "text", Field.Store.NO));
+ parent.add(newStringField("isParent", "yes", Field.Store.NO));
+
+ List<Document> docs = new ArrayList<Document>();
+
+ Document child = new Document();
+ docs.add(child);
+ child.add(new StoredField("childID", "0"));
+ child.add(newTextField("childText", "text", Field.Store.NO));
+
+ // parent last:
+ docs.add(parent);
+ w.addDocuments(docs);
+
+ docs.clear();
+
+ parent = new Document();
+ parent.add(newTextField("parentText", "text", Field.Store.NO));
+ parent.add(newStringField("isParent", "yes", Field.Store.NO));
+ parent.add(new StoredField("parentID", "1"));
+
+ // parent last:
+ docs.add(parent);
+ w.addDocuments(docs);
+
+ IndexReader r = w.getReader();
+ w.close();
+
+ Query childQuery = new TermQuery(new Term("childText", "text"));
+ Filter parentsFilter = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isParent", "yes"))));
+ ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);
+ BooleanQuery parentQuery = new BooleanQuery();
+ parentQuery.add(childJoinQuery, Occur.SHOULD);
+ parentQuery.add(new TermQuery(new Term("parentText", "text")), Occur.SHOULD);
+
+ ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(new Sort(new SortField("parentID", SortField.Type.STRING)),
+ 10, true, true);
+ newSearcher(r).search(parentQuery, c);
+ TopGroups<Integer> groups = c.getTopGroups(childJoinQuery, null, 0, 10, 0, false);
+
+ // Two parents:
+ assertEquals(2, groups.totalGroupCount.intValue());
+
+ // One child docs:
+ assertEquals(1, groups.totalGroupedHitCount);
+
+ GroupDocs<Integer> group = groups.groups[0];
+ StoredDocument doc = r.document(group.groupValue.intValue());
+ assertEquals("0", doc.get("parentID"));
+ System.out.println("group: " + group);
+
+ group = groups.groups[1];
+ doc = r.document(group.groupValue.intValue());
+ assertEquals("1", doc.get("parentID"));
+
+ r.close();
+ d.close();
+ }
+
+ // LUCENE-4968
+ public void testChildQueryNeverMatches() throws Exception {
+ Directory d = newDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(random(), d);
+ Document parent = new Document();
+ parent.add(new StoredField("parentID", "0"));
+ parent.add(newTextField("parentText", "text", Field.Store.NO));
+ parent.add(newStringField("isParent", "yes", Field.Store.NO));
+
+ List<Document> docs = new ArrayList<Document>();
+
+ Document child = new Document();
+ docs.add(child);
+ child.add(new StoredField("childID", "0"));
+ child.add(newTextField("childText", "text", Field.Store.NO));
+
+ // parent last:
+ docs.add(parent);
+ w.addDocuments(docs);
+
+ docs.clear();
+
+ parent = new Document();
+ parent.add(newTextField("parentText", "text", Field.Store.NO));
+ parent.add(newStringField("isParent", "yes", Field.Store.NO));
+ parent.add(new StoredField("parentID", "1"));
+
+ // parent last:
+ docs.add(parent);
+ w.addDocuments(docs);
+
+ IndexReader r = w.getReader();
+ w.close();
+
+ // never matches:
+ Query childQuery = new TermQuery(new Term("childText", "bogus"));
+ Filter parentsFilter = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isParent", "yes"))));
+ ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);
+ BooleanQuery parentQuery = new BooleanQuery();
+ parentQuery.add(childJoinQuery, Occur.SHOULD);
+ parentQuery.add(new TermQuery(new Term("parentText", "text")), Occur.SHOULD);
+
+ ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(new Sort(new SortField("parentID", SortField.Type.STRING)),
+ 10, true, true);
+ newSearcher(r).search(parentQuery, c);
+ TopGroups<Integer> groups = c.getTopGroups(childJoinQuery, null, 0, 10, 0, false);
+
+ // Two parents:
+ assertEquals(2, groups.totalGroupCount.intValue());
+
+ // One child docs:
+ assertEquals(0, groups.totalGroupedHitCount);
+
+ GroupDocs<Integer> group = groups.groups[0];
+ StoredDocument doc = r.document(group.groupValue.intValue());
+ assertEquals("0", doc.get("parentID"));
+ System.out.println("group: " + group);
+
+ group = groups.groups[1];
+ doc = r.document(group.groupValue.intValue());
+ assertEquals("1", doc.get("parentID"));
+
+ r.close();
+ d.close();
+ }
+
+ // LUCENE-4968
+ public void testChildQueryMatchesParent() throws Exception {
+ Directory d = newDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(random(), d);
+ Document parent = new Document();
+ parent.add(new StoredField("parentID", "0"));
+ parent.add(newTextField("parentText", "text", Field.Store.NO));
+ parent.add(newStringField("isParent", "yes", Field.Store.NO));
+
+ List<Document> docs = new ArrayList<Document>();
+
+ Document child = new Document();
+ docs.add(child);
+ child.add(new StoredField("childID", "0"));
+ child.add(newTextField("childText", "text", Field.Store.NO));
+
+ // parent last:
+ docs.add(parent);
+ w.addDocuments(docs);
+
+ docs.clear();
+
+ parent = new Document();
+ parent.add(newTextField("parentText", "text", Field.Store.NO));
+ parent.add(newStringField("isParent", "yes", Field.Store.NO));
+ parent.add(new StoredField("parentID", "1"));
+
+ // parent last:
+ docs.add(parent);
+ w.addDocuments(docs);
+
+ IndexReader r = w.getReader();
+ w.close();
+
+ // illegally matches parent:
+ Query childQuery = new TermQuery(new Term("parentText", "text"));
+ Filter parentsFilter = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isParent", "yes"))));
+ ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);
+ BooleanQuery parentQuery = new BooleanQuery();
+ parentQuery.add(childJoinQuery, Occur.SHOULD);
+ parentQuery.add(new TermQuery(new Term("parentText", "text")), Occur.SHOULD);
+
+ ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(new Sort(new SortField("parentID", SortField.Type.STRING)),
+ 10, true, true);
+
+ try {
+ newSearcher(r).search(parentQuery, c);
+ fail("should have hit exception");
+ } catch (IllegalStateException ise) {
+ // expected
+ }
+
+ r.close();
+ d.close();
+ }
}