You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by is...@apache.org on 2017/02/12 13:18:40 UTC
[16/18] lucene-solr:jira/solr-5944: Updating branch by merging latest
changes from master
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4fc5a9f0/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinCollector.java
----------------------------------------------------------------------
diff --git a/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinCollector.java b/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinCollector.java
deleted file mode 100644
index f81b943..0000000
--- a/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinCollector.java
+++ /dev/null
@@ -1,507 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.search.join;
-
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.LinkedList;
-import java.util.Map;
-import java.util.Queue;
-
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.LeafReaderContext;
-import org.apache.lucene.search.Collector;
-import org.apache.lucene.search.FieldComparator;
-import org.apache.lucene.search.FieldValueHitQueue;
-import org.apache.lucene.search.LeafCollector;
-import org.apache.lucene.search.LeafFieldComparator;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.ScoreCachingWrappingScorer;
-import org.apache.lucene.search.Scorer;
-import org.apache.lucene.search.Scorer.ChildScorer;
-import org.apache.lucene.search.Sort;
-import org.apache.lucene.search.TopDocs;
-import org.apache.lucene.search.TopDocsCollector;
-import org.apache.lucene.search.TopFieldCollector;
-import org.apache.lucene.search.TopScoreDocCollector;
-import org.apache.lucene.search.grouping.GroupDocs;
-import org.apache.lucene.search.grouping.TopGroups;
-import org.apache.lucene.util.ArrayUtil;
-
-
-/** Collects parent document hits for a Query containing one more more
- * BlockJoinQuery clauses, sorted by the
- * specified parent Sort. Note that this cannot perform
- * arbitrary joins; rather, it requires that all joined
- * documents are indexed as a doc block (using {@link
- * IndexWriter#addDocuments} or {@link
- * IndexWriter#updateDocuments}). Ie, the join is computed
- * at index time.
- *
- * <p>This collector MUST be used with {@link ToParentBlockJoinIndexSearcher},
- * in order to work correctly.
- *
- * <p>The parent Sort must only use
- * fields from the parent documents; sorting by field in
- * the child documents is not supported.</p>
- *
- * <p>You should only use this
- * collector if one or more of the clauses in the query is
- * a {@link ToParentBlockJoinQuery}. This collector will find those query
- * clauses and record the matching child documents for the
- * top scoring parent documents.</p>
- *
- * <p>Multiple joins (star join) and nested joins and a mix
- * of the two are allowed, as long as in all cases the
- * documents corresponding to a single row of each joined
- * parent table were indexed as a doc block.</p>
- *
- * <p>For the simple star join you can retrieve the
- * {@link TopGroups} instance containing each {@link ToParentBlockJoinQuery}'s
- * matching child documents for the top parent groups,
- * using {@link #getTopGroups}. Ie,
- * a single query, which will contain two or more
- * {@link ToParentBlockJoinQuery}'s as clauses representing the star join,
- * can then retrieve two or more {@link TopGroups} instances.</p>
- *
- * <p>For nested joins, the query will run correctly (ie,
- * match the right parent and child documents), however,
- * because TopGroups is currently unable to support nesting
- * (each group is not able to hold another TopGroups), you
- * are only able to retrieve the TopGroups of the first
- * join. The TopGroups of the nested joins will not be
- * correct.
- *
- * See {@link org.apache.lucene.search.join} for a code
- * sample.
- *
- * @lucene.experimental
- */
-public class ToParentBlockJoinCollector implements Collector {
-
- private final Sort sort;
-
- // Maps each BlockJoinQuery instance to its "slot" in
- // joinScorers and in OneGroup's cached doc/scores/count:
- private final Map<Query,Integer> joinQueryID = new HashMap<>();
- private final int numParentHits;
- private final FieldValueHitQueue<OneGroup> queue;
- private final FieldComparator<?>[] comparators;
- private final boolean trackMaxScore;
- private final boolean trackScores;
-
- private ToParentBlockJoinQuery.BlockJoinScorer[] joinScorers = new ToParentBlockJoinQuery.BlockJoinScorer[0];
- private boolean queueFull;
-
- private OneGroup bottom;
- private int totalHitCount;
- private float maxScore = Float.NaN;
-
- /** Creates a ToParentBlockJoinCollector. The provided sort must
- * not be null. If you pass true trackScores, all
- * ToParentBlockQuery instances must not use
- * ScoreMode.None. */
- public ToParentBlockJoinCollector(Sort sort, int numParentHits, boolean trackScores, boolean trackMaxScore) {
- // TODO: allow null sort to be specialized to relevance
- // only collector
- this.sort = sort;
- this.trackMaxScore = trackMaxScore;
- if (trackMaxScore) {
- maxScore = Float.MIN_VALUE;
- }
- //System.out.println("numParentHits=" + numParentHits);
- this.trackScores = trackScores;
- this.numParentHits = numParentHits;
- queue = FieldValueHitQueue.create(sort.getSort(), numParentHits);
- comparators = queue.getComparators();
- }
-
- private static final class OneGroup extends FieldValueHitQueue.Entry {
- public OneGroup(int comparatorSlot, int parentDoc, float parentScore, int numJoins, boolean doScores) {
- super(comparatorSlot, parentDoc, parentScore);
- //System.out.println("make OneGroup parentDoc=" + parentDoc);
- docs = new int[numJoins][];
- for(int joinID=0;joinID<numJoins;joinID++) {
- docs[joinID] = new int[5];
- }
- if (doScores) {
- scores = new float[numJoins][];
- for(int joinID=0;joinID<numJoins;joinID++) {
- scores[joinID] = new float[5];
- }
- }
- counts = new int[numJoins];
- }
- LeafReaderContext readerContext;
- int[][] docs;
- float[][] scores;
- int[] counts;
- }
-
- @Override
- public LeafCollector getLeafCollector(final LeafReaderContext context)
- throws IOException {
- final LeafFieldComparator[] comparators = queue.getComparators(context);
- final int[] reverseMul = queue.getReverseMul();
- final int docBase = context.docBase;
- return new LeafCollector() {
-
- private Scorer scorer;
-
- @Override
- public void setScorer(Scorer scorer) throws IOException {
- //System.out.println("C.setScorer scorer=" + scorer);
- // Since we invoke .score(), and the comparators likely
- // do as well, cache it so it's only "really" computed
- // once:
- if (scorer instanceof ScoreCachingWrappingScorer == false) {
- scorer = new ScoreCachingWrappingScorer(scorer);
- }
- this.scorer = scorer;
- for (LeafFieldComparator comparator : comparators) {
- comparator.setScorer(scorer);
- }
- Arrays.fill(joinScorers, null);
-
- Queue<Scorer> queue = new LinkedList<>();
- //System.out.println("\nqueue: add top scorer=" + scorer);
- queue.add(scorer);
- while ((scorer = queue.poll()) != null) {
- //System.out.println(" poll: " + scorer + "; " + scorer.getWeight().getQuery());
- if (scorer instanceof ToParentBlockJoinQuery.BlockJoinScorer) {
- enroll((ToParentBlockJoinQuery) scorer.getWeight().getQuery(), (ToParentBlockJoinQuery.BlockJoinScorer) scorer);
- }
-
- for (ChildScorer sub : scorer.getChildren()) {
- //System.out.println(" add sub: " + sub.child + "; " + sub.child.getWeight().getQuery());
- queue.add(sub.child);
- }
- }
- }
-
- @Override
- public void collect(int parentDoc) throws IOException {
- //System.out.println("\nC parentDoc=" + parentDoc);
- totalHitCount++;
-
- float score = Float.NaN;
-
- if (trackMaxScore) {
- score = scorer.score();
- maxScore = Math.max(maxScore, score);
- }
-
- // TODO: we could sweep all joinScorers here and
- // aggregate total child hit count, so we can fill this
- // in getTopGroups (we wire it to 0 now)
-
- if (queueFull) {
- //System.out.println(" queueFull");
- // Fastmatch: return if this hit is not competitive
- int c = 0;
- for (int i = 0; i < comparators.length; ++i) {
- c = reverseMul[i] * comparators[i].compareBottom(parentDoc);
- if (c != 0) {
- break;
- }
- }
- if (c <= 0) { // in case of equality, this hit is not competitive as docs are visited in order
- // Definitely not competitive.
- //System.out.println(" skip");
- return;
- }
-
- //System.out.println(" competes! doc=" + (docBase + parentDoc));
-
- // This hit is competitive - replace bottom element in queue & adjustTop
- for (LeafFieldComparator comparator : comparators) {
- comparator.copy(bottom.slot, parentDoc);
- }
- if (!trackMaxScore && trackScores) {
- score = scorer.score();
- }
- bottom.doc = docBase + parentDoc;
- bottom.readerContext = context;
- bottom.score = score;
- copyGroups(bottom);
- bottom = queue.updateTop();
-
- for (LeafFieldComparator comparator : comparators) {
- comparator.setBottom(bottom.slot);
- }
- } else {
- // Startup transient: queue is not yet full:
- final int comparatorSlot = totalHitCount - 1;
-
- // Copy hit into queue
- for (LeafFieldComparator comparator : comparators) {
- comparator.copy(comparatorSlot, parentDoc);
- }
- //System.out.println(" startup: new OG doc=" + (docBase+parentDoc));
- if (!trackMaxScore && trackScores) {
- score = scorer.score();
- }
- final OneGroup og = new OneGroup(comparatorSlot, docBase+parentDoc, score, joinScorers.length, trackScores);
- og.readerContext = context;
- copyGroups(og);
- bottom = queue.add(og);
- queueFull = totalHitCount == numParentHits;
- if (queueFull) {
- // End of startup transient: queue just filled up:
- for (LeafFieldComparator comparator : comparators) {
- comparator.setBottom(bottom.slot);
- }
- }
- }
- }
-
- // Pulls out child doc and scores for all join queries:
- private void copyGroups(OneGroup og) {
- // While rare, it's possible top arrays could be too
- // short if join query had null scorer on first
- // segment(s) but then became non-null on later segments
- final int numSubScorers = joinScorers.length;
- if (og.docs.length < numSubScorers) {
- // While rare, this could happen if join query had
- // null scorer on first segment(s) but then became
- // non-null on later segments
- og.docs = ArrayUtil.grow(og.docs, numSubScorers);
- }
- if (og.counts.length < numSubScorers) {
- og.counts = ArrayUtil.grow(og.counts);
- }
- if (trackScores && og.scores.length < numSubScorers) {
- og.scores = ArrayUtil.grow(og.scores, numSubScorers);
- }
-
- //System.out.println("\ncopyGroups parentDoc=" + og.doc);
- for(int scorerIDX = 0;scorerIDX < numSubScorers;scorerIDX++) {
- final ToParentBlockJoinQuery.BlockJoinScorer joinScorer = joinScorers[scorerIDX];
- //System.out.println(" scorer=" + joinScorer);
- if (joinScorer != null && docBase + joinScorer.getParentDoc() == og.doc) {
- og.counts[scorerIDX] = joinScorer.getChildCount();
- //System.out.println(" count=" + og.counts[scorerIDX]);
- og.docs[scorerIDX] = joinScorer.swapChildDocs(og.docs[scorerIDX]);
- assert og.docs[scorerIDX].length >= og.counts[scorerIDX]: "length=" + og.docs[scorerIDX].length + " vs count=" + og.counts[scorerIDX];
- //System.out.println(" len=" + og.docs[scorerIDX].length);
- /*
- for(int idx=0;idx<og.counts[scorerIDX];idx++) {
- System.out.println(" docs[" + idx + "]=" + og.docs[scorerIDX][idx]);
- }
- */
- if (trackScores) {
- //System.out.println(" copy scores");
- og.scores[scorerIDX] = joinScorer.swapChildScores(og.scores[scorerIDX]);
- assert og.scores[scorerIDX].length >= og.counts[scorerIDX]: "length=" + og.scores[scorerIDX].length + " vs count=" + og.counts[scorerIDX];
- }
- } else {
- og.counts[scorerIDX] = 0;
- }
- }
- }
- };
- }
-
- private void enroll(ToParentBlockJoinQuery query, ToParentBlockJoinQuery.BlockJoinScorer scorer) {
- scorer.trackPendingChildHits();
- final Integer slot = joinQueryID.get(query);
- if (slot == null) {
- joinQueryID.put(query, joinScorers.length);
- //System.out.println("found JQ: " + query + " slot=" + joinScorers.length);
- final ToParentBlockJoinQuery.BlockJoinScorer[] newArray = new ToParentBlockJoinQuery.BlockJoinScorer[1+joinScorers.length];
- System.arraycopy(joinScorers, 0, newArray, 0, joinScorers.length);
- joinScorers = newArray;
- joinScorers[joinScorers.length-1] = scorer;
- } else {
- joinScorers[slot] = scorer;
- }
- }
-
- private OneGroup[] sortedGroups;
-
- private void sortQueue() {
- sortedGroups = new OneGroup[queue.size()];
- for(int downTo=queue.size()-1;downTo>=0;downTo--) {
- sortedGroups[downTo] = queue.pop();
- }
- }
-
- /** Returns the TopGroups for the specified
- * BlockJoinQuery. The groupValue of each GroupDocs will
- * be the parent docID for that group.
- * The number of documents within each group is calculated as minimum of <code>maxDocsPerGroup</code>
- * and number of matched child documents for that group.
- * Returns null if no groups matched.
- *
- * @param query Search query
- * @param withinGroupSort Sort criteria within groups
- * @param offset Parent docs offset
- * @param maxDocsPerGroup Upper bound of documents per group number
- * @param withinGroupOffset Offset within each group of child docs
- * @param fillSortFields Specifies whether to add sort fields or not
- * @return TopGroups for specified query
- * @throws IOException if there is a low-level I/O error
- */
- public TopGroups<Integer> getTopGroups(ToParentBlockJoinQuery query, Sort withinGroupSort, int offset,
- int maxDocsPerGroup, int withinGroupOffset, boolean fillSortFields)
- throws IOException {
-
- final Integer _slot = joinQueryID.get(query);
- if (_slot == null && totalHitCount == 0) {
- return null;
- }
-
- if (sortedGroups == null) {
- if (offset >= queue.size()) {
- return null;
- }
- sortQueue();
- } else if (offset > sortedGroups.length) {
- return null;
- }
-
- return accumulateGroups(_slot == null ? -1 : _slot.intValue(), offset, maxDocsPerGroup, withinGroupOffset, withinGroupSort, fillSortFields);
- }
-
- /**
- * Accumulates groups for the BlockJoinQuery specified by its slot.
- *
- * @param slot Search query's slot
- * @param offset Parent docs offset
- * @param maxDocsPerGroup Upper bound of documents per group number
- * @param withinGroupOffset Offset within each group of child docs
- * @param withinGroupSort Sort criteria within groups
- * @param fillSortFields Specifies whether to add sort fields or not
- * @return TopGroups for the query specified by slot
- * @throws IOException if there is a low-level I/O error
- */
- @SuppressWarnings({"unchecked","rawtypes"})
- private TopGroups<Integer> accumulateGroups(int slot, int offset, int maxDocsPerGroup,
- int withinGroupOffset, Sort withinGroupSort, boolean fillSortFields) throws IOException {
- final GroupDocs<Integer>[] groups = new GroupDocs[sortedGroups.length - offset];
- final FakeScorer fakeScorer = new FakeScorer();
-
- int totalGroupedHitCount = 0;
- //System.out.println("slot=" + slot);
-
- for(int groupIDX=offset;groupIDX<sortedGroups.length;groupIDX++) {
- final OneGroup og = sortedGroups[groupIDX];
- final int numChildDocs;
- if (slot == -1 || slot >= og.counts.length) {
- numChildDocs = 0;
- } else {
- numChildDocs = og.counts[slot];
- }
-
- // Number of documents in group should be bounded to prevent redundant memory allocation
- final int numDocsInGroup = Math.max(1, Math.min(numChildDocs, maxDocsPerGroup));
- //System.out.println("parent doc=" + og.doc + " numChildDocs=" + numChildDocs + " maxDocsPG=" + maxDocsPerGroup);
-
- // At this point we hold all docs w/ in each group,
- // unsorted; we now sort them:
- final TopDocsCollector<?> collector;
- if (withinGroupSort == null) {
- //System.out.println("sort by score");
- // Sort by score
- if (!trackScores) {
- throw new IllegalArgumentException("cannot sort by relevance within group: trackScores=false");
- }
- collector = TopScoreDocCollector.create(numDocsInGroup);
- } else {
- // Sort by fields
- collector = TopFieldCollector.create(withinGroupSort, numDocsInGroup, fillSortFields, trackScores, trackMaxScore);
- }
-
- LeafCollector leafCollector = collector.getLeafCollector(og.readerContext);
- leafCollector.setScorer(fakeScorer);
- for(int docIDX=0;docIDX<numChildDocs;docIDX++) {
- //System.out.println("docIDX=" + docIDX + " vs " + og.docs[slot].length);
- final int doc = og.docs[slot][docIDX];
- fakeScorer.doc = doc;
- if (trackScores) {
- fakeScorer.score = og.scores[slot][docIDX];
- }
- leafCollector.collect(doc);
- }
- totalGroupedHitCount += numChildDocs;
-
- final Object[] groupSortValues;
-
- if (fillSortFields) {
- groupSortValues = new Object[comparators.length];
- for(int sortFieldIDX=0;sortFieldIDX<comparators.length;sortFieldIDX++) {
- groupSortValues[sortFieldIDX] = comparators[sortFieldIDX].value(og.slot);
- }
- } else {
- groupSortValues = null;
- }
-
- final TopDocs topDocs = collector.topDocs(withinGroupOffset, numDocsInGroup);
-
- groups[groupIDX-offset] = new GroupDocs<>(og.score,
- topDocs.getMaxScore(),
- numChildDocs,
- topDocs.scoreDocs,
- og.doc,
- groupSortValues);
- }
-
- return new TopGroups<>(new TopGroups<>(sort.getSort(),
- withinGroupSort == null ? null : withinGroupSort.getSort(),
- 0, totalGroupedHitCount, groups, maxScore),
- totalHitCount);
- }
-
- /** Returns the TopGroups for the specified BlockJoinQuery.
- * The groupValue of each GroupDocs will be the parent docID for that group.
- * The number of documents within each group
- * equals to the total number of matched child documents for that group.
- * Returns null if no groups matched.
- *
- * @param query Search query
- * @param withinGroupSort Sort criteria within groups
- * @param offset Parent docs offset
- * @param withinGroupOffset Offset within each group of child docs
- * @param fillSortFields Specifies whether to add sort fields or not
- * @return TopGroups for specified query
- * @throws IOException if there is a low-level I/O error
- */
- public TopGroups<Integer> getTopGroupsWithAllChildDocs(ToParentBlockJoinQuery query, Sort withinGroupSort, int offset,
- int withinGroupOffset, boolean fillSortFields)
- throws IOException {
-
- return getTopGroups(query, withinGroupSort, offset, Integer.MAX_VALUE, withinGroupOffset, fillSortFields);
- }
-
- /**
- * Returns the highest score across all collected parent hits, as long as
- * <code>trackMaxScores=true</code> was passed
- * {@link #ToParentBlockJoinCollector(Sort, int, boolean, boolean) on
- * construction}. Else, this returns <code>Float.NaN</code>
- */
- public float getMaxScore() {
- return maxScore;
- }
-
- @Override
- public boolean needsScores() {
- // needed so that eg. BooleanQuery does not rewrite its MUST clauses to
- // FILTER since the filter scorers are hidden in Scorer.getChildren().
- return true;
- }
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4fc5a9f0/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinIndexSearcher.java
----------------------------------------------------------------------
diff --git a/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinIndexSearcher.java b/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinIndexSearcher.java
deleted file mode 100644
index 84a02a3..0000000
--- a/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinIndexSearcher.java
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.search.join;
-
-import java.io.IOException;
-import java.util.List;
-import java.util.concurrent.ExecutorService;
-
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.LeafReaderContext;
-import org.apache.lucene.search.Collector;
-import org.apache.lucene.search.DocIdSetIterator;
-import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.LeafCollector;
-import org.apache.lucene.search.Scorer;
-import org.apache.lucene.search.Weight;
-import org.apache.lucene.util.Bits;
-
-/**
- * An {@link IndexSearcher} to use in conjunction with
- * {@link ToParentBlockJoinCollector}.
- */
-public class ToParentBlockJoinIndexSearcher extends IndexSearcher {
-
- /** Creates a searcher searching the provided index. Search on individual
- * segments will be run in the provided {@link ExecutorService}.
- * @see IndexSearcher#IndexSearcher(IndexReader, ExecutorService) */
- public ToParentBlockJoinIndexSearcher(IndexReader r, ExecutorService executor) {
- super(r, executor);
- }
-
- /** Creates a searcher searching the provided index.
- * @see IndexSearcher#IndexSearcher(IndexReader) */
- public ToParentBlockJoinIndexSearcher(IndexReader r) {
- super(r);
- }
-
- @Override
- protected void search(List<LeafReaderContext> leaves, Weight weight, Collector collector) throws IOException {
- for (LeafReaderContext ctx : leaves) { // search each subreader
- // we force the use of Scorer (not BulkScorer) to make sure
- // that the scorer passed to LeafCollector.setScorer supports
- // Scorer.getChildren
- Scorer scorer = weight.scorer(ctx);
- if (scorer != null) {
- final LeafCollector leafCollector = collector.getLeafCollector(ctx);
- leafCollector.setScorer(scorer);
- final Bits liveDocs = ctx.reader().getLiveDocs();
- final DocIdSetIterator it = scorer.iterator();
- for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
- if (liveDocs == null || liveDocs.get(doc)) {
- leafCollector.collect(doc);
- }
- }
- }
- }
- }
-
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4fc5a9f0/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
----------------------------------------------------------------------
diff --git a/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java b/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
index 432ebcc..2837423 100644
--- a/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
+++ b/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
@@ -20,18 +20,19 @@ import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
import java.util.Locale;
+
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.LeafReaderContext;
-import org.apache.lucene.search.FilterWeight;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation;
+import org.apache.lucene.search.FilterWeight;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.ScorerSupplier;
+import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.Weight;
-import org.apache.lucene.search.grouping.TopGroups;
-import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BitSet;
/**
@@ -57,20 +58,6 @@ import org.apache.lucene.util.BitSet;
* documents: the wrapped child query must never
* return a parent document.</p>
*
- * If you'd like to retrieve {@link TopGroups} for the
- * resulting query, use the {@link ToParentBlockJoinCollector}.
- * Note that this is not necessary, ie, if you simply want
- * to collect the parent documents and don't need to see
- * which child documents matched under that parent, then
- * you can use any collector.
- *
- * <p><b>NOTE</b>: If the overall query contains parent-only
- * matches, for example you OR a parent-only query with a
- * joined child-only query, then the resulting collected documents
- * will be correct, however the {@link TopGroups} you get
- * from {@link ToParentBlockJoinCollector} will not contain every
- * child for parents that had matched.
- *
* <p>See {@link org.apache.lucene.search.join} for an
* overview. </p>
*
@@ -90,7 +77,7 @@ public class ToParentBlockJoinQuery extends Query {
private final ScoreMode scoreMode;
/** Create a ToParentBlockJoinQuery.
- *
+ *
* @param childQuery Query matching child documents.
* @param parentsFilter Filter identifying the parent documents.
* @param scoreMode How to aggregate multiple child scores
@@ -116,7 +103,7 @@ public class ToParentBlockJoinQuery extends Query {
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
return new BlockJoinWeight(this, childQuery.createWeight(searcher, needsScores, boost), parentsFilter, needsScores ? scoreMode : ScoreMode.None);
}
-
+
/** Return our child query. */
public Query getChildQuery() {
return childQuery;
@@ -132,33 +119,44 @@ public class ToParentBlockJoinQuery extends Query {
this.scoreMode = scoreMode;
}
- // NOTE: acceptDocs applies (and is checked) only in the
- // parent document space
@Override
- public Scorer scorer(LeafReaderContext readerContext) throws IOException {
-
- final Scorer childScorer = in.scorer(readerContext);
- if (childScorer == null) {
- // No matches
+ public Scorer scorer(LeafReaderContext context) throws IOException {
+ final ScorerSupplier scorerSupplier = scorerSupplier(context);
+ if (scorerSupplier == null) {
return null;
}
+ return scorerSupplier.get(false);
+ }
- final int firstChildDoc = childScorer.iterator().nextDoc();
- if (firstChildDoc == DocIdSetIterator.NO_MORE_DOCS) {
- // No matches
+ // NOTE: acceptDocs applies (and is checked) only in the
+ // parent document space
+ @Override
+ public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
+ final ScorerSupplier childScorerSupplier = in.scorerSupplier(context);
+ if (childScorerSupplier == null) {
return null;
}
// NOTE: this does not take accept docs into account, the responsibility
// to not match deleted docs is on the scorer
- final BitSet parents = parentsFilter.getBitSet(readerContext);
-
+ final BitSet parents = parentsFilter.getBitSet(context);
if (parents == null) {
// No matches
return null;
}
- return new BlockJoinScorer(this, childScorer, parents, firstChildDoc, scoreMode);
+ return new ScorerSupplier() {
+
+ @Override
+ public Scorer get(boolean randomAccess) throws IOException {
+ return new BlockJoinScorer(BlockJoinWeight.this, childScorerSupplier.get(randomAccess), parents, scoreMode);
+ }
+
+ @Override
+ public long cost() {
+ return childScorerSupplier.cost();
+ }
+ };
}
@Override
@@ -170,259 +168,191 @@ public class ToParentBlockJoinQuery extends Query {
return Explanation.noMatch("Not a match");
}
}
-
- /**
- * Ascendant for {@link ToParentBlockJoinQuery}'s scorer.
- * @lucene.experimental it might be removed at <b>6.0</b>
- * */
- public static abstract class ChildrenMatchesScorer extends Scorer{
-
- /** inherited constructor */
- protected ChildrenMatchesScorer(Weight weight) {
- super(weight);
- }
-
- /**
- * enables children matches recording
- * */
- public abstract void trackPendingChildHits() ;
-
- /**
- * reports matched children
- * @return number of recorded matched children docs
- * */
- public abstract int getChildCount() ;
-
- /**
- * reports matched children
- * @param other array for recording matching children docs of next parent,
- * it might be null (that's slower) or the same array which was returned
- * from the previous call
- * @return array with {@link #getChildCount()} matched children docnums
- * */
- public abstract int[] swapChildDocs(int[] other);
- }
-
- static class BlockJoinScorer extends ChildrenMatchesScorer {
- private final Scorer childScorer;
+
+ private static class ParentApproximation extends DocIdSetIterator {
+
+ private final DocIdSetIterator childApproximation;
private final BitSet parentBits;
- private final ScoreMode scoreMode;
- private int parentDoc = -1;
- private int prevParentDoc;
- private float parentScore;
- private int parentFreq;
- private int nextChildDoc;
- private int[] pendingChildDocs;
- private float[] pendingChildScores;
- private int childDocUpto;
-
- public BlockJoinScorer(Weight weight, Scorer childScorer, BitSet parentBits, int firstChildDoc, ScoreMode scoreMode) {
- super(weight);
- //System.out.println("Q.init firstChildDoc=" + firstChildDoc);
+ private int doc = -1;
+
+ ParentApproximation(DocIdSetIterator childApproximation, BitSet parentBits) {
+ this.childApproximation = childApproximation;
this.parentBits = parentBits;
- this.childScorer = childScorer;
- this.scoreMode = scoreMode;
- nextChildDoc = firstChildDoc;
}
@Override
- public Collection<ChildScorer> getChildren() {
- return Collections.singleton(new ChildScorer(childScorer, "BLOCK_JOIN"));
+ public int docID() {
+ return doc;
}
@Override
- public int getChildCount() {
- return childDocUpto;
- }
-
- int getParentDoc() {
- return parentDoc;
+ public int nextDoc() throws IOException {
+ return advance(doc + 1);
}
@Override
- public int[] swapChildDocs(int[] other) {
- final int[] ret = pendingChildDocs;
- if (other == null) {
- pendingChildDocs = new int[5];
- } else {
- pendingChildDocs = other;
+ public int advance(int target) throws IOException {
+ if (target >= parentBits.length()) {
+ return doc = NO_MORE_DOCS;
}
- return ret;
- }
-
- float[] swapChildScores(float[] other) {
- if (scoreMode == ScoreMode.None) {
- throw new IllegalStateException("ScoreMode is None; you must pass trackScores=false to ToParentBlockJoinCollector");
+ final int firstChildTarget = target == 0 ? 0 : parentBits.prevSetBit(target - 1) + 1;
+ int childDoc = childApproximation.docID();
+ if (childDoc < firstChildTarget) {
+ childDoc = childApproximation.advance(firstChildTarget);
}
- final float[] ret = pendingChildScores;
- if (other == null) {
- pendingChildScores = new float[5];
- } else {
- pendingChildScores = other;
+ if (childDoc >= parentBits.length() - 1) {
+ return doc = NO_MORE_DOCS;
}
- return ret;
+ return doc = parentBits.nextSetBit(childDoc + 1);
}
@Override
- public DocIdSetIterator iterator() {
- return new DocIdSetIterator() {
- final DocIdSetIterator childIt = childScorer.iterator();
-
- @Override
- public int nextDoc() throws IOException {
- //System.out.println("Q.nextDoc() nextChildDoc=" + nextChildDoc);
- if (nextChildDoc == NO_MORE_DOCS) {
- //System.out.println(" end");
- return parentDoc = NO_MORE_DOCS;
- }
-
- // Gather all children sharing the same parent as
- // nextChildDoc
-
- parentDoc = parentBits.nextSetBit(nextChildDoc);
-
- // Parent & child docs are supposed to be
- // orthogonal:
- checkOrthogonal(nextChildDoc, parentDoc);
-
- //System.out.println(" parentDoc=" + parentDoc);
- assert parentDoc != DocIdSetIterator.NO_MORE_DOCS;
-
- float totalScore = 0;
- float maxScore = Float.NEGATIVE_INFINITY;
- float minScore = Float.POSITIVE_INFINITY;
-
- childDocUpto = 0;
- parentFreq = 0;
- do {
-
- //System.out.println(" c=" + nextChildDoc);
- if (pendingChildDocs != null && pendingChildDocs.length == childDocUpto) {
- pendingChildDocs = ArrayUtil.grow(pendingChildDocs);
- }
- if (pendingChildScores != null && scoreMode != ScoreMode.None && pendingChildScores.length == childDocUpto) {
- pendingChildScores = ArrayUtil.grow(pendingChildScores);
- }
- if (pendingChildDocs != null) {
- pendingChildDocs[childDocUpto] = nextChildDoc;
- }
- if (scoreMode != ScoreMode.None) {
- // TODO: specialize this into dedicated classes per-scoreMode
- final float childScore = childScorer.score();
- final int childFreq = childScorer.freq();
- if (pendingChildScores != null) {
- pendingChildScores[childDocUpto] = childScore;
- }
- maxScore = Math.max(childScore, maxScore);
- minScore = Math.min(childScore, minScore);
- totalScore += childScore;
- parentFreq += childFreq;
- }
- childDocUpto++;
- nextChildDoc = childIt.nextDoc();
- } while (nextChildDoc < parentDoc);
-
- // Parent & child docs are supposed to be
- // orthogonal:
- checkOrthogonal(nextChildDoc, parentDoc);
-
- switch(scoreMode) {
- case Avg:
- parentScore = totalScore / childDocUpto;
- break;
- case Max:
- parentScore = maxScore;
- break;
- case Min:
- parentScore = minScore;
- break;
- case Total:
- parentScore = totalScore;
- break;
- case None:
- break;
- }
-
- //System.out.println(" return parentDoc=" + parentDoc + " childDocUpto=" + childDocUpto);
- return parentDoc;
- }
-
- @Override
- public int advance(int parentTarget) throws IOException {
-
- //System.out.println("Q.advance parentTarget=" + parentTarget);
- if (parentTarget == NO_MORE_DOCS) {
- return parentDoc = NO_MORE_DOCS;
- }
-
- if (parentTarget == 0) {
- // Callers should only be passing in a docID from
- // the parent space, so this means this parent
- // has no children (it got docID 0), so it cannot
- // possibly match. We must handle this case
- // separately otherwise we pass invalid -1 to
- // prevSetBit below:
- return nextDoc();
- }
+ public long cost() {
+ return childApproximation.cost();
+ }
+ }
- prevParentDoc = parentBits.prevSetBit(parentTarget-1);
+ private static class ParentTwoPhase extends TwoPhaseIterator {
- //System.out.println(" rolled back to prevParentDoc=" + prevParentDoc + " vs parentDoc=" + parentDoc);
- assert prevParentDoc >= parentDoc;
- if (prevParentDoc > nextChildDoc) {
- nextChildDoc = childIt.advance(prevParentDoc);
- // System.out.println(" childScorer advanced to child docID=" + nextChildDoc);
- //} else {
- //System.out.println(" skip childScorer advance");
- }
+ private final ParentApproximation parentApproximation;
+ private final DocIdSetIterator childApproximation;
+ private final TwoPhaseIterator childTwoPhase;
- // Parent & child docs are supposed to be orthogonal:
- checkOrthogonal(nextChildDoc, prevParentDoc);
+ ParentTwoPhase(ParentApproximation parentApproximation, TwoPhaseIterator childTwoPhase) {
+ super(parentApproximation);
+ this.parentApproximation = parentApproximation;
+ this.childApproximation = childTwoPhase.approximation();
+ this.childTwoPhase = childTwoPhase;
+ }
- final int nd = nextDoc();
- //System.out.println(" return nextParentDoc=" + nd);
- return nd;
+ @Override
+ public boolean matches() throws IOException {
+ assert childApproximation.docID() < parentApproximation.docID();
+ do {
+ if (childTwoPhase.matches()) {
+ return true;
}
+ } while (childApproximation.nextDoc() < parentApproximation.docID());
+ return false;
+ }
- @Override
- public int docID() {
- return parentDoc;
- }
+ @Override
+ public float matchCost() {
+ // TODO: how could we compute a match cost?
+ return childTwoPhase.matchCost() + 10;
+ }
+ }
- @Override
- public long cost() {
- return childIt.cost();
- }
- };
+ static class BlockJoinScorer extends Scorer {
+ private final Scorer childScorer;
+ private final BitSet parentBits;
+ private final ScoreMode scoreMode;
+ private final DocIdSetIterator childApproximation;
+ private final TwoPhaseIterator childTwoPhase;
+ private final ParentApproximation parentApproximation;
+ private final ParentTwoPhase parentTwoPhase;
+ private float score;
+ private int freq;
+
+ public BlockJoinScorer(Weight weight, Scorer childScorer, BitSet parentBits, ScoreMode scoreMode) {
+ super(weight);
+ //System.out.println("Q.init firstChildDoc=" + firstChildDoc);
+ this.parentBits = parentBits;
+ this.childScorer = childScorer;
+ this.scoreMode = scoreMode;
+ childTwoPhase = childScorer.twoPhaseIterator();
+ if (childTwoPhase == null) {
+ childApproximation = childScorer.iterator();
+ parentApproximation = new ParentApproximation(childApproximation, parentBits);
+ parentTwoPhase = null;
+ } else {
+ childApproximation = childTwoPhase.approximation();
+ parentApproximation = new ParentApproximation(childTwoPhase.approximation(), parentBits);
+ parentTwoPhase = new ParentTwoPhase(parentApproximation, childTwoPhase);
+ }
}
- private void checkOrthogonal(int childDoc, int parentDoc) {
- if (childDoc==parentDoc) {
- throw new IllegalStateException("Child query must not match same docs with parent filter. "
- + "Combine them as must clauses (+) to find a problem doc. "
- + "docId=" + nextChildDoc + ", " + childScorer.getClass());
-
+ @Override
+ public Collection<ChildScorer> getChildren() {
+ return Collections.singleton(new ChildScorer(childScorer, "BLOCK_JOIN"));
+ }
+
+ @Override
+ public DocIdSetIterator iterator() {
+ if (parentTwoPhase == null) {
+ // the approximation is exact
+ return parentApproximation;
+ } else {
+ return TwoPhaseIterator.asDocIdSetIterator(parentTwoPhase);
}
}
@Override
+ public TwoPhaseIterator twoPhaseIterator() {
+ return parentTwoPhase;
+ }
+
+ @Override
public int docID() {
- return parentDoc;
+ return parentApproximation.docID();
}
@Override
public float score() throws IOException {
- return parentScore;
+ setScoreAndFreq();
+ return score;
}
@Override
- public int freq() {
- return parentFreq;
+ public int freq() throws IOException {
+ setScoreAndFreq();
+ return freq;
+ }
+
+ private void setScoreAndFreq() throws IOException {
+ if (childApproximation.docID() >= parentApproximation.docID()) {
+ return;
+ }
+ double score = scoreMode == ScoreMode.None ? 0 : childScorer.score();
+ int freq = 1;
+ while (childApproximation.nextDoc() < parentApproximation.docID()) {
+ if (childTwoPhase == null || childTwoPhase.matches()) {
+ final float childScore = childScorer.score();
+ freq += 1;
+ switch (scoreMode) {
+ case Total:
+ case Avg:
+ score += childScore;
+ break;
+ case Min:
+ score = Math.min(score, childScore);
+ break;
+ case Max:
+ score = Math.min(score, childScore);
+ break;
+ case None:
+ break;
+ default:
+ throw new AssertionError();
+ }
+ }
+ }
+ if (childApproximation.docID() == parentApproximation.docID() && (childTwoPhase == null || childTwoPhase.matches())) {
+ throw new IllegalStateException("Child query must not match same docs with parent filter. "
+ + "Combine them as must clauses (+) to find a problem doc. "
+ + "docId=" + parentApproximation.docID() + ", " + childScorer.getClass());
+ }
+ if (scoreMode == ScoreMode.Avg) {
+ score /= freq;
+ }
+ this.score = (float) score;
+ this.freq = freq;
}
public Explanation explain(LeafReaderContext context, Weight childWeight) throws IOException {
+ int prevParentDoc = parentBits.prevSetBit(parentApproximation.docID() - 1);
int start = context.docBase + prevParentDoc + 1; // +1 b/c prevParentDoc is previous parent doc
- int end = context.docBase + parentDoc - 1; // -1 b/c parentDoc is parent doc
+ int end = context.docBase + parentApproximation.docID() - 1; // -1 b/c parentDoc is parent doc
Explanation bestChild = null;
int matches = 0;
@@ -436,21 +366,11 @@ public class ToParentBlockJoinQuery extends Query {
}
}
+ assert freq() == matches;
return Explanation.match(score(), String.format(Locale.ROOT,
"Score based on %d child docs in range from %d to %d, best match:", matches, start, end), bestChild
);
}
-
- /**
- * Instructs this scorer to keep track of the child docIds and score ids for retrieval purposes.
- */
- @Override
- public void trackPendingChildHits() {
- pendingChildDocs = new int[5];
- if (scoreMode != ScoreMode.None) {
- pendingChildScores = new float[5];
- }
- }
}
@Override
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4fc5a9f0/lucene/join/src/java/org/apache/lucene/search/join/package-info.java
----------------------------------------------------------------------
diff --git a/lucene/join/src/java/org/apache/lucene/search/join/package-info.java b/lucene/join/src/java/org/apache/lucene/search/join/package-info.java
index 6133f99..7c7ff67 100644
--- a/lucene/join/src/java/org/apache/lucene/search/join/package-info.java
+++ b/lucene/join/src/java/org/apache/lucene/search/join/package-info.java
@@ -41,14 +41,25 @@
* resulting query can then be used as a clause in any query that
* matches parent.</p>
*
- * <p>If you only care about the parent documents matching the query, you
- * can use any collector to collect the parent hits, but if you'd also
- * like to see which child documents match for each parent document,
- * use the {@link org.apache.lucene.search.join.ToParentBlockJoinCollector} to collect the hits. Once the
- * search is done, you retrieve a {@link
- * org.apache.lucene.search.grouping.TopGroups} instance from the
- * {@link org.apache.lucene.search.join.ToParentBlockJoinCollector#getTopGroups ToParentBlockJoinCollector.getTopGroups()} method.</p>
- *
+ * <p>If you care about what child documents matched for each parent document,
+ * then use the {@link org.apache.lucene.search.join.ParentChildrenBlockJoinQuery} query to
+ * per matched parent document retrieve the child documents that caused to match the
+ * parent document in first place. This query should be used after your main query
+ * has been executed. For each hit execute the the
+ * {@link org.apache.lucene.search.join.ParentChildrenBlockJoinQuery} query </p>
+ * <pre class="prettyprint">
+ * TopDocs results = searcher.search(mainQuery, 10);
+ * for (int i = 0; i < results.scoreDocs.length; i++) {
+ * ScoreDoc scoreDoc = results.scoreDocs[i];
+ *
+ * // Run ParentChildrenBlockJoinQuery to figure out the top matching child docs:
+ * ParentChildrenBlockJoinQuery parentChildrenBlockJoinQuery =
+ * new ParentChildrenBlockJoinQuery(parentFilter, childQuery, scoreDoc.doc);
+ * TopDocs topChildResults = searcher.search(parentChildrenBlockJoinQuery, 3);
+ * // Process top child hits...
+ * }
+ * </pre>
+ *
* <p>To map/join in the opposite direction, use {@link
* org.apache.lucene.search.join.ToChildBlockJoinQuery}. This wraps
* any query matching parent documents, creating the joined query
@@ -80,9 +91,9 @@
* </p>
* <pre class="prettyprint">
* String fromField = "from"; // Name of the from field
- * boolean multipleValuesPerDocument = false; // Set only yo true in the case when your fromField has multiple values per document in your index
+ * boolean multipleValuesPerDocument = false; // Set only to true in the case when your fromField has multiple values per document in your index
* String toField = "to"; // Name of the to field
- * ScoreMode scoreMode = ScoreMode.Max // Defines how the scores are translated into the other side of the join.
+ * ScoreMode scoreMode = ScoreMode.Max; // Defines how the scores are translated into the other side of the join.
* Query fromQuery = new TermQuery(new Term("content", searchTerm)); // Query executed to collect from values to join to the to values
*
* Query joinQuery = JoinUtil.createJoinQuery(fromField, multipleValuesPerDocument, toField, fromQuery, fromSearcher, scoreMode);
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4fc5a9f0/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java
----------------------------------------------------------------------
diff --git a/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java b/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java
index cf21fa4..a13e66f 100644
--- a/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java
+++ b/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java
@@ -22,15 +22,18 @@ import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
+import java.util.Map;
import java.util.Set;
+import java.util.SortedMap;
+import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.IntPoint;
-import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.StoredField;
@@ -47,30 +50,8 @@ import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.Term;
+import org.apache.lucene.search.*;
import org.apache.lucene.search.BooleanClause.Occur;
-import org.apache.lucene.search.BooleanClause;
-import org.apache.lucene.search.BooleanQuery;
-import org.apache.lucene.search.BoostQuery;
-import org.apache.lucene.search.CheckHits;
-import org.apache.lucene.search.DocIdSetIterator;
-import org.apache.lucene.search.Explanation;
-import org.apache.lucene.search.FieldDoc;
-import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.MatchAllDocsQuery;
-import org.apache.lucene.search.MatchNoDocsQuery;
-import org.apache.lucene.search.PrefixQuery;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.QueryUtils;
-import org.apache.lucene.search.RandomApproximationQuery;
-import org.apache.lucene.search.ScoreDoc;
-import org.apache.lucene.search.Scorer;
-import org.apache.lucene.search.Sort;
-import org.apache.lucene.search.SortField;
-import org.apache.lucene.search.TermQuery;
-import org.apache.lucene.search.TopDocs;
-import org.apache.lucene.search.Weight;
-import org.apache.lucene.search.grouping.GroupDocs;
-import org.apache.lucene.search.grouping.TopGroups;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.Bits;
@@ -157,7 +138,7 @@ public class TestBlockJoin extends LuceneTestCase {
IndexReader r = DirectoryReader.open(w);
w.close();
- IndexSearcher s = new IndexSearcher(r);
+ IndexSearcher s = newSearcher(r);
BitSetProducer parentsFilter = new QueryBitSetProducer(new TermQuery(new Term("docType", "resume")));
CheckJoinIndex.check(r, parentsFilter);
@@ -170,18 +151,21 @@ public class TestBlockJoin extends LuceneTestCase {
BooleanQuery.Builder fullQuery = new BooleanQuery.Builder();
fullQuery.add(new BooleanClause(childJoinQuery, Occur.MUST));
fullQuery.add(new BooleanClause(new MatchAllDocsQuery(), Occur.MUST));
- ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(Sort.RELEVANCE, 1, true, true);
- s.search(fullQuery.build(), c);
- TopGroups<Integer> results = c.getTopGroups(childJoinQuery, null, 0, 10, 0, true);
- assertFalse(Float.isNaN(results.maxScore));
- assertEquals(1, results.totalGroupedHitCount);
- assertEquals(1, results.groups.length);
- final GroupDocs<Integer> group = results.groups[0];
- Document childDoc = s.doc(group.scoreDocs[0].doc);
- assertEquals("java", childDoc.get("skill"));
- assertNotNull(group.groupValue);
- Document parentDoc = s.doc(group.groupValue);
- assertEquals("Lisa", parentDoc.get("name"));
+ TopDocs topDocs = s.search(fullQuery.build(), 2);
+ assertEquals(2, topDocs.totalHits);
+ assertEquals(asSet("Lisa", "Frank"),
+ asSet(s.doc(topDocs.scoreDocs[0].doc).get("name"), s.doc(topDocs.scoreDocs[1].doc).get("name")));
+
+ ParentChildrenBlockJoinQuery childrenQuery =
+ new ParentChildrenBlockJoinQuery(parentsFilter, childQuery.build(), topDocs.scoreDocs[0].doc);
+ TopDocs matchingChildren = s.search(childrenQuery, 1);
+ assertEquals(1, matchingChildren.totalHits);
+ assertEquals("java", s.doc(matchingChildren.scoreDocs[0].doc).get("skill"));
+
+ childrenQuery = new ParentChildrenBlockJoinQuery(parentsFilter, childQuery.build(), topDocs.scoreDocs[1].doc);
+ matchingChildren = s.search(childrenQuery, 1);
+ assertEquals(1, matchingChildren.totalHits);
+ assertEquals("java", s.doc(matchingChildren.scoreDocs[0].doc).get("skill"));
r.close();
dir.close();
@@ -207,8 +191,7 @@ public class TestBlockJoin extends LuceneTestCase {
IndexReader r = w.getReader();
w.close();
- IndexSearcher s = new ToParentBlockJoinIndexSearcher(r);
- //IndexSearcher s = newSearcher(r, false);
+ IndexSearcher s = newSearcher(r, false);
//IndexSearcher s = new IndexSearcher(r);
// Create a filter that defines "parent" documents in the index - in this case resumes
@@ -232,23 +215,21 @@ public class TestBlockJoin extends LuceneTestCase {
fullQuery.add(new BooleanClause(parentQuery, Occur.SHOULD));
fullQuery.add(new BooleanClause(childJoinQuery, Occur.SHOULD));
- ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(Sort.RELEVANCE, 1, true, true);
- s.search(fullQuery.build(), c);
- TopGroups<Integer> results = c.getTopGroups(childJoinQuery, null, 0, 10, 0, true);
- assertEquals(1, results.totalGroupedHitCount);
- assertEquals(1, results.groups.length);
-
- final GroupDocs<Integer> group = results.groups[0];
- assertEquals(1, group.totalHits);
- assertFalse(Float.isNaN(group.score));
-
- Document childDoc = s.doc(group.scoreDocs[0].doc);
- //System.out.println(" doc=" + group.scoreDocs[0].doc);
- assertEquals("java", childDoc.get("skill"));
- assertNotNull(group.groupValue);
- Document parentDoc = s.doc(group.groupValue);
- assertEquals("Lisa", parentDoc.get("name"));
-
+ final TopDocs topDocs = s.search(fullQuery.build(), 2);
+ assertEquals(2, topDocs.totalHits);
+ assertEquals(asSet("Lisa", "Frank"),
+ asSet(s.doc(topDocs.scoreDocs[0].doc).get("name"), s.doc(topDocs.scoreDocs[1].doc).get("name")));
+
+ ParentChildrenBlockJoinQuery childrenQuery =
+ new ParentChildrenBlockJoinQuery(parentsFilter, childQuery.build(), topDocs.scoreDocs[0].doc);
+ TopDocs matchingChildren = s.search(childrenQuery, 1);
+ assertEquals(1, matchingChildren.totalHits);
+ assertEquals("java", s.doc(matchingChildren.scoreDocs[0].doc).get("skill"));
+
+ childrenQuery = new ParentChildrenBlockJoinQuery(parentsFilter, childQuery.build(), topDocs.scoreDocs[1].doc);
+ matchingChildren = s.search(childrenQuery, 1);
+ assertEquals(1, matchingChildren.totalHits);
+ assertEquals("java", s.doc(matchingChildren.scoreDocs[0].doc).get("skill"));
r.close();
dir.close();
@@ -297,30 +278,21 @@ public class TestBlockJoin extends LuceneTestCase {
fullQuery.add(new BooleanClause(parentQuery, Occur.MUST));
fullQuery.add(new BooleanClause(childJoinQuery, Occur.MUST));
- ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(Sort.RELEVANCE, 1, true, true);
-
CheckHits.checkHitCollector(random(), fullQuery.build(), "country", s, new int[] {2});
- s.search(fullQuery.build(), c);
-
- TopGroups<Integer> results = c.getTopGroups(childJoinQuery, null, 0, 10, 0, true);
- assertFalse(Float.isNaN(results.maxScore));
+ TopDocs topDocs = s.search(fullQuery.build(), 1);
//assertEquals(1, results.totalHitCount);
- assertEquals(1, results.totalGroupedHitCount);
- assertEquals(1, results.groups.length);
-
- final GroupDocs<Integer> group = results.groups[0];
- assertEquals(1, group.totalHits);
- assertFalse(Float.isNaN(group.score));
-
- Document childDoc = s.doc(group.scoreDocs[0].doc);
- //System.out.println(" doc=" + group.scoreDocs[0].doc);
- assertEquals("java", childDoc.get("skill"));
- assertNotNull(group.groupValue);
- Document parentDoc = s.doc(group.groupValue);
+ assertEquals(1, topDocs.totalHits);
+ Document parentDoc = s.doc(topDocs.scoreDocs[0].doc);
assertEquals("Lisa", parentDoc.get("name"));
+ ParentChildrenBlockJoinQuery childrenQuery =
+ new ParentChildrenBlockJoinQuery(parentsFilter, childQuery.build(), topDocs.scoreDocs[0].doc);
+ TopDocs matchingChildren = s.search(childrenQuery, 1);
+ assertEquals(1, matchingChildren.totalHits);
+ assertEquals("java", s.doc(matchingChildren.scoreDocs[0].doc).get("skill"));
+
//System.out.println("TEST: now test up");
@@ -333,7 +305,7 @@ public class TestBlockJoin extends LuceneTestCase {
//System.out.println("FULL: " + fullChildQuery);
TopDocs hits = s.search(fullChildQuery.build(), 10);
assertEquals(1, hits.totalHits);
- childDoc = s.doc(hits.scoreDocs[0].doc);
+ Document childDoc = s.doc(hits.scoreDocs[0].doc);
//System.out.println("CHILD = " + childDoc + " docID=" + hits.scoreDocs[0].doc);
assertEquals("java", childDoc.get("skill"));
assertEquals(2007, childDoc.getField("year").numericValue());
@@ -347,72 +319,6 @@ public class TestBlockJoin extends LuceneTestCase {
dir.close();
}
- public void testBugCausedByRewritingTwice() throws IOException {
- final Directory dir = newDirectory();
- final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
-
- final List<Document> docs = new ArrayList<>();
-
- for (int i=0;i<10;i++) {
- docs.clear();
- docs.add(makeJob("ruby", i));
- docs.add(makeJob("java", 2007));
- docs.add(makeResume("Frank", "United States"));
- w.addDocuments(docs);
- }
-
- IndexReader r = w.getReader();
- w.close();
- IndexSearcher s = newSearcher(r, false);
-
- // Hacky: this causes the query to need 2 rewrite
- // iterations:
- BooleanQuery.Builder builder = new BooleanQuery.Builder();
- builder.add(IntPoint.newExactQuery("year", 2007), BooleanClause.Occur.MUST);
- Query qc = new Query() {
- @Override
- public Query rewrite(IndexReader reader) throws IOException {
- return builder.build();
- }
-
- @Override
- public String toString(String field) {
- return "hack!";
- }
-
- @Override
- public boolean equals(Object o) {
- return o == this;
- }
-
- @Override
- public int hashCode() {
- return System.identityHashCode(this);
- }
- };
-
- BitSetProducer parentsFilter = new QueryBitSetProducer(new TermQuery(new Term("docType", "resume")));
- CheckJoinIndex.check(r, parentsFilter);
-
- Query qw1 = qc.rewrite(r);
- Query qw2 = qw1.rewrite(r);
-
- assertNotSame(qc, qw1);
- assertNotSame(qw1, qw2);
-
- ToParentBlockJoinQuery qp = new ToParentBlockJoinQuery(qc, parentsFilter, ScoreMode.Max);
- ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(Sort.RELEVANCE, 10, true, true);
-
- s.search(qp, c);
- TopGroups<Integer> groups = c.getTopGroups(qp, Sort.INDEXORDER, 0, 10, 0, true);
- for (GroupDocs<Integer> group : groups.groups) {
- assertEquals(1, group.totalHits);
- }
-
- r.close();
- dir.close();
- }
-
protected Query skill(String skill) {
return new TermQuery(new Term("skill", skill));
}
@@ -612,6 +518,7 @@ public class TestBlockJoin extends LuceneTestCase {
final Directory dir = newDirectory();
final Directory joinDir = newDirectory();
+ final int maxNumChildrenPerParent = 20;
final int numParentDocs = TestUtil.nextInt(random(), 100 * RANDOM_MULTIPLIER, 300 * RANDOM_MULTIPLIER);
//final int numParentDocs = 30;
@@ -669,7 +576,7 @@ public class TestBlockJoin extends LuceneTestCase {
System.out.println(" " + sb.toString());
}
- final int numChildDocs = TestUtil.nextInt(random(), 1, 20);
+ final int numChildDocs = TestUtil.nextInt(random(), 1, maxNumChildrenPerParent);
for(int childDocID=0;childDocID<numChildDocs;childDocID++) {
// Denormalize: copy all parent fields into child doc:
Document childDoc = TestUtil.cloneDocument(parentDoc);
@@ -752,7 +659,7 @@ public class TestBlockJoin extends LuceneTestCase {
final IndexSearcher s = newSearcher(r, false);
- final IndexSearcher joinS = new IndexSearcher(joinR);
+ final IndexSearcher joinS = newSearcher(joinR);
final BitSetProducer parentsFilter = new QueryBitSetProducer(new TermQuery(new Term("isParent", "x")));
CheckJoinIndex.check(joinS.getIndexReader(), parentsFilter);
@@ -764,7 +671,7 @@ public class TestBlockJoin extends LuceneTestCase {
System.out.println("TEST: iter=" + (1+iter) + " of " + iters);
}
- final Query childQuery;
+ Query childQuery;
if (random().nextInt(3) == 2) {
final int childFieldID = random().nextInt(childFields.length);
childQuery = new TermQuery(new Term("child" + childFieldID,
@@ -799,6 +706,9 @@ public class TestBlockJoin extends LuceneTestCase {
random().nextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT);
childQuery = bq.build();
}
+ if (random().nextBoolean()) {
+ childQuery = new RandomApproximationQuery(childQuery, random());
+ }
final ScoreMode agg = ScoreMode.values()[random().nextInt(ScoreMode.values().length)];
@@ -880,55 +790,35 @@ public class TestBlockJoin extends LuceneTestCase {
}
}
- final boolean trackScores;
- final boolean trackMaxScore;
- if (agg == ScoreMode.None) {
- trackScores = false;
- trackMaxScore = false;
- } else {
- trackScores = random().nextBoolean();
- trackMaxScore = random().nextBoolean();
+ TopDocs joinedResults = joinS.search(parentJoinQuery, numParentDocs);
+ SortedMap<Integer, TopDocs> joinResults = new TreeMap<>();
+ for (ScoreDoc parentHit : joinedResults.scoreDocs) {
+ ParentChildrenBlockJoinQuery childrenQuery =
+ new ParentChildrenBlockJoinQuery(parentsFilter, childQuery, parentHit.doc);
+ TopDocs childTopDocs = joinS.search(childrenQuery, maxNumChildrenPerParent, childSort);
+ final Document parentDoc = joinS.doc(parentHit.doc);
+ joinResults.put(Integer.valueOf(parentDoc.get("parentID")), childTopDocs);
}
- final ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(parentSort, 10, trackScores, trackMaxScore);
-
- joinS.search(parentJoinQuery, c);
final int hitsPerGroup = TestUtil.nextInt(random(), 1, 20);
//final int hitsPerGroup = 100;
- final TopGroups<Integer> joinResults = c.getTopGroups(childJoinQuery, childSort, 0, hitsPerGroup, 0, true);
if (VERBOSE) {
- System.out.println("\nTEST: block join index gets " + (joinResults == null ? 0 : joinResults.groups.length) + " groups; hitsPerGroup=" + hitsPerGroup);
+ System.out.println("\nTEST: block join index gets " + (joinResults == null ? 0 : joinResults.size()) + " groups; hitsPerGroup=" + hitsPerGroup);
if (joinResults != null) {
- final GroupDocs<Integer>[] groups = joinResults.groups;
- for(int groupIDX=0;groupIDX<groups.length;groupIDX++) {
- final GroupDocs<Integer> group = groups[groupIDX];
- if (group.groupSortValues != null) {
- System.out.print(" ");
- for(Object o : group.groupSortValues) {
- if (o instanceof BytesRef) {
- System.out.print(((BytesRef) o).utf8ToString() + " ");
- } else {
- System.out.print(o + " ");
- }
- }
- System.out.println();
- }
-
- assertNotNull(group.groupValue);
- final Document parentDoc = joinS.doc(group.groupValue);
- System.out.println(" group parentID=" + parentDoc.get("parentID") + " (docID=" + group.groupValue + ")");
- for(int hitIDX=0;hitIDX<group.scoreDocs.length;hitIDX++) {
- final Document doc = joinS.doc(group.scoreDocs[hitIDX].doc);
- //System.out.println(" score=" + group.scoreDocs[hitIDX].score + " childID=" + doc.get("childID") + " (docID=" + group.scoreDocs[hitIDX].doc + ")");
- System.out.println(" childID=" + doc.get("childID") + " child0=" + doc.get("child0") + " (docID=" + group.scoreDocs[hitIDX].doc + ")");
+ for (Map.Entry<Integer, TopDocs> entry : joinResults.entrySet()) {
+ System.out.println(" group parentID=" + entry.getKey() + " (docID=" + entry.getKey() + ")");
+ for(ScoreDoc childHit : entry.getValue().scoreDocs) {
+ final Document doc = joinS.doc(childHit.doc);
+// System.out.println(" score=" + childHit.score + " childID=" + doc.get("childID") + " (docID=" + childHit.doc + ")");
+ System.out.println(" childID=" + doc.get("childID") + " child0=" + doc.get("child0") + " (docID=" + childHit.doc + ")");
}
}
}
}
if (results.totalHits == 0) {
- assertNull(joinResults);
+ assertEquals(0, joinResults.size());
} else {
compareHits(r, joinR, results, joinResults);
TopDocs b = joinS.search(childJoinQuery, 10);
@@ -1115,43 +1005,24 @@ public class TestBlockJoin extends LuceneTestCase {
}
}
- private void compareHits(IndexReader r, IndexReader joinR, TopDocs results, TopGroups<Integer> joinResults) throws Exception {
- // results is 'complete'; joinResults is a subset
- int resultUpto = 0;
- int joinGroupUpto = 0;
-
- final ScoreDoc[] hits = results.scoreDocs;
- final GroupDocs<Integer>[] groupDocs = joinResults.groups;
-
- while(joinGroupUpto < groupDocs.length) {
- final GroupDocs<Integer> group = groupDocs[joinGroupUpto++];
- final ScoreDoc[] groupHits = group.scoreDocs;
- assertNotNull(group.groupValue);
- final Document parentDoc = joinR.document(group.groupValue);
- final String parentID = parentDoc.get("parentID");
- //System.out.println("GROUP groupDoc=" + group.groupDoc + " parent=" + parentDoc);
- assertNotNull(parentID);
- assertTrue(groupHits.length > 0);
- for(int hitIDX=0;hitIDX<groupHits.length;hitIDX++) {
- final Document nonJoinHit = r.document(hits[resultUpto++].doc);
- final Document joinHit = joinR.document(groupHits[hitIDX].doc);
- assertEquals(parentID,
- nonJoinHit.get("parentID"));
- assertEquals(joinHit.get("childID"),
- nonJoinHit.get("childID"));
+ private void compareHits(IndexReader r, IndexReader joinR, TopDocs controlHits, Map<Integer, TopDocs> joinResults) throws Exception {
+ int currentParentID = -1;
+ int childHitSlot = 0;
+ TopDocs childHits = new TopDocs(0, new ScoreDoc[0], 0f);
+ for (ScoreDoc controlHit : controlHits.scoreDocs) {
+ Document controlDoc = r.document(controlHit.doc);
+ int parentID = Integer.valueOf(controlDoc.get("parentID"));
+ if (parentID != currentParentID) {
+ assertEquals(childHitSlot, childHits.scoreDocs.length);
+ currentParentID = parentID;
+ childHitSlot = 0;
+ childHits = joinResults.get(parentID);
}
- if (joinGroupUpto < groupDocs.length) {
- // Advance non-join hit to the next parentID:
- //System.out.println(" next joingroupUpto=" + joinGroupUpto + " gd.length=" + groupDocs.length + " parentID=" + parentID);
- while(true) {
- assertTrue(resultUpto < hits.length);
- if (!parentID.equals(r.document(hits[resultUpto].doc).get("parentID"))) {
- break;
- }
- resultUpto++;
- }
- }
+ String controlChildID = controlDoc.get("childID");
+ Document childDoc = joinR.document(childHits.scoreDocs[childHitSlot++].doc);
+ String childID = childDoc.get("childID");
+ assertEquals(controlChildID, childID);
}
}
@@ -1200,43 +1071,21 @@ public class TestBlockJoin extends LuceneTestCase {
fullQuery.add(new BooleanClause(childJobJoinQuery, Occur.MUST));
fullQuery.add(new BooleanClause(childQualificationJoinQuery, Occur.MUST));
- // Collects all job and qualification child docs for
- // each resume hit in the top N (sorted by score):
- ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(Sort.RELEVANCE, 10, true, false);
-
- s.search(fullQuery.build(), c);
-
- // Examine "Job" children
- TopGroups<Integer> jobResults = c.getTopGroups(childJobJoinQuery, null, 0, 10, 0, true);
-
- //assertEquals(1, results.totalHitCount);
- assertEquals(1, jobResults.totalGroupedHitCount);
- assertEquals(1, jobResults.groups.length);
-
- final GroupDocs<Integer> group = jobResults.groups[0];
- assertEquals(1, group.totalHits);
-
- Document childJobDoc = s.doc(group.scoreDocs[0].doc);
- //System.out.println(" doc=" + group.scoreDocs[0].doc);
- assertEquals("java", childJobDoc.get("skill"));
- assertNotNull(group.groupValue);
- Document parentDoc = s.doc(group.groupValue);
+ final TopDocs topDocs = s.search(fullQuery.build(), 10);
+ assertEquals(1, topDocs.totalHits);
+ Document parentDoc = s.doc(topDocs.scoreDocs[0].doc);
assertEquals("Lisa", parentDoc.get("name"));
- // Now Examine qualification children
- TopGroups<Integer> qualificationResults = c.getTopGroups(childQualificationJoinQuery, null, 0, 10, 0, true);
-
- assertEquals(1, qualificationResults.totalGroupedHitCount);
- assertEquals(1, qualificationResults.groups.length);
-
- final GroupDocs<Integer> qGroup = qualificationResults.groups[0];
- assertEquals(1, qGroup.totalHits);
+ ParentChildrenBlockJoinQuery childrenQuery =
+ new ParentChildrenBlockJoinQuery(parentsFilter, childJobQuery.build(), topDocs.scoreDocs[0].doc);
+ TopDocs matchingChildren = s.search(childrenQuery, 1);
+ assertEquals(1, matchingChildren.totalHits);
+ assertEquals("java", s.doc(matchingChildren.scoreDocs[0].doc).get("skill"));
- Document childQualificationDoc = s.doc(qGroup.scoreDocs[0].doc);
- assertEquals("maths", childQualificationDoc.get("qualification"));
- assertNotNull(qGroup.groupValue);
- parentDoc = s.doc(qGroup.groupValue);
- assertEquals("Lisa", parentDoc.get("name"));
+ childrenQuery = new ParentChildrenBlockJoinQuery(parentsFilter, childQualificationQuery.build(), topDocs.scoreDocs[0].doc);
+ matchingChildren = s.search(childrenQuery, 1);
+ assertEquals(1, matchingChildren.totalHits);
+ assertEquals("maths", s.doc(matchingChildren.scoreDocs[0].doc).get("qualification"));
r.close();
dir.close();
@@ -1300,165 +1149,6 @@ public class TestBlockJoin extends LuceneTestCase {
dir.close();
}
- public void testGetTopGroups() throws Exception {
-
- final Directory dir = newDirectory();
- final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
-
- final List<Document> docs = new ArrayList<>();
- docs.add(makeJob("ruby", 2005));
- docs.add(makeJob("java", 2006));
- docs.add(makeJob("java", 2010));
- docs.add(makeJob("java", 2012));
- Collections.shuffle(docs, random());
- docs.add(makeResume("Frank", "United States"));
-
- addSkillless(w);
- w.addDocuments(docs);
- addSkillless(w);
-
- IndexReader r = w.getReader();
- w.close();
- IndexSearcher s = new IndexSearcher(r);
-
- // Create a filter that defines "parent" documents in the index - in this case resumes
- BitSetProducer parentsFilter = new QueryBitSetProducer(new TermQuery(new Term("docType", "resume")));
- CheckJoinIndex.check(s.getIndexReader(), parentsFilter);
-
- // Define child document criteria (finds an example of relevant work experience)
- BooleanQuery.Builder childQuery = new BooleanQuery.Builder();
- childQuery.add(new BooleanClause(new TermQuery(new Term("skill", "java")), Occur.MUST));
- childQuery.add(new BooleanClause(IntPoint.newRangeQuery("year", 2006, 2011), Occur.MUST));
-
- // Wrap the child document query to 'join' any matches
- // up to corresponding parent:
- ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery.build(), parentsFilter, ScoreMode.Avg);
-
- ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(Sort.RELEVANCE, 2, true, true);
- s.search(childJoinQuery, c);
-
- //Get all child documents within groups
- @SuppressWarnings({"unchecked","rawtypes"})
- TopGroups<Integer>[] getTopGroupsResults = new TopGroups[2];
- getTopGroupsResults[0] = c.getTopGroups(childJoinQuery, null, 0, 10, 0, true);
- getTopGroupsResults[1] = c.getTopGroupsWithAllChildDocs(childJoinQuery, null, 0, 0, true);
-
- for (TopGroups<Integer> results : getTopGroupsResults) {
- assertFalse(Float.isNaN(results.maxScore));
- assertEquals(2, results.totalGroupedHitCount);
- assertEquals(1, results.groups.length);
-
- final GroupDocs<Integer> group = results.groups[0];
- assertEquals(2, group.totalHits);
- assertFalse(Float.isNaN(group.score));
- assertNotNull(group.groupValue);
- Document parentDoc = s.doc(group.groupValue);
- assertEquals("Frank", parentDoc.get("name"));
-
- assertEquals(2, group.scoreDocs.length); //all matched child documents collected
-
- for (ScoreDoc scoreDoc : group.scoreDocs) {
- Document childDoc = s.doc(scoreDoc.doc);
- assertEquals("java", childDoc.get("skill"));
- int year = Integer.parseInt(childDoc.get("year"));
- assertTrue(year >= 2006 && year <= 2011);
- }
- }
-
- //Get part of child documents
- TopGroups<Integer> boundedResults = c.getTopGroups(childJoinQuery, null, 0, 1, 0, true);
- assertFalse(Float.isNaN(boundedResults.maxScore));
- assertEquals(2, boundedResults.totalGroupedHitCount);
- assertEquals(1, boundedResults.groups.length);
-
- final GroupDocs<Integer> group = boundedResults.groups[0];
- assertEquals(2, group.totalHits);
- assertFalse(Float.isNaN(group.score));
- assertNotNull(group.groupValue);
- Document parentDoc = s.doc(group.groupValue);
- assertEquals("Frank", parentDoc.get("name"));
-
- assertEquals(1, group.scoreDocs.length); //not all matched child documents collected
-
- for (ScoreDoc scoreDoc : group.scoreDocs) {
- Document childDoc = s.doc(scoreDoc.doc);
- assertEquals("java", childDoc.get("skill"));
- int year = Integer.parseInt(childDoc.get("year"));
- assertTrue(year >= 2006 && year <= 2011);
- }
-
- r.close();
- dir.close();
- }
-
- // LUCENE-4968
- public void testSometimesParentOnlyMatches() throws Exception {
- Directory d = newDirectory();
- RandomIndexWriter w = new RandomIndexWriter(random(), d);
- Document parent = new Document();
- parent.add(new StoredField("parentID", "0"));
- parent.add(new SortedDocValuesField("parentID", new BytesRef("0")));
- parent.add(newTextField("parentText", "text", Field.Store.NO));
- parent.add(newStringField("isParent", "yes", Field.Store.NO));
-
- List<Document> docs = new ArrayList<>();
-
- Document child = new Document();
- docs.add(child);
- child.add(new StoredField("childID", "0"));
- child.add(newTextField("childText", "text", Field.Store.NO));
-
- // parent last:
- docs.add(parent);
- w.addDocuments(docs);
-
- docs.clear();
-
- parent = new Document();
- parent.add(newTextField("parentText", "text", Field.Store.NO));
- parent.add(newStringField("isParent", "yes", Field.Store.NO));
- parent.add(new StoredField("parentID", "1"));
- parent.add(new SortedDocValuesField("parentID", new BytesRef("1")));
-
- // parent last:
- docs.add(parent);
- w.addDocuments(docs);
-
- IndexReader r = w.getReader();
- w.close();
-
- IndexSearcher searcher = new ToParentBlockJoinIndexSearcher(r);
- Query childQuery = new TermQuery(new Term("childText", "text"));
- BitSetProducer parentsFilter = new QueryBitSetProducer(new TermQuery(new Term("isParent", "yes")));
- CheckJoinIndex.check(r, parentsFilter);
- ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);
- BooleanQuery.Builder parentQuery = new BooleanQuery.Builder();
- parentQuery.add(childJoinQuery, Occur.SHOULD);
- parentQuery.add(new TermQuery(new Term("parentText", "text")), Occur.SHOULD);
-
- ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(new Sort(new SortField("parentID", SortField.Type.STRING)),
- 10, true, true);
- searcher.search(parentQuery.build(), c);
- TopGroups<Integer> groups = c.getTopGroups(childJoinQuery, null, 0, 10, 0, false);
-
- // Two parents:
- assertEquals(2, groups.totalGroupCount.intValue());
-
- // One child docs:
- assertEquals(1, groups.totalGroupedHitCount);
-
- GroupDocs<Integer> group = groups.groups[0];
- Document doc = r.document(group.groupValue.intValue());
- assertEquals("0", doc.get("parentID"));
-
- group = groups.groups[1];
- doc = r.document(group.groupValue.intValue());
- assertEquals("1", doc.get("parentID"));
-
- r.close();
- d.close();
- }
-
// LUCENE-4968
public void testChildQueryNeverMatches() throws Exception {
Directory d = newDirectory();
@@ -1496,90 +1186,25 @@ public class TestBlockJoin extends LuceneTestCase {
IndexReader r = w.getReader();
w.close();
- IndexSearcher searcher = new ToParentBlockJoinIndexSearcher(r);
+ IndexSearcher searcher = newSearcher(r);
// never matches:
Query childQuery = new TermQuery(new Term("childText", "bogus"));
BitSetProducer parentsFilter = new QueryBitSetProducer(new TermQuery(new Term("isParent", "yes")));
CheckJoinIndex.check(r, parentsFilter);
ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);
- BooleanQuery.Builder parentQuery = new BooleanQuery.Builder();
- parentQuery.add(childJoinQuery, Occur.SHOULD);
- parentQuery.add(new TermQuery(new Term("parentText", "text")), Occur.SHOULD);
-
- ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(new Sort(new SortField("parentID", SortField.Type.STRING)),
- 10, true, true);
- searcher.search(parentQuery.build(), c);
- TopGroups<Integer> groups = c.getTopGroups(childJoinQuery, null, 0, 10, 0, false);
-
- // Two parents:
- assertEquals(2, groups.totalGroupCount.intValue());
-
- // One child docs:
- assertEquals(0, groups.totalGroupedHitCount);
-
- GroupDocs<Integer> group = groups.groups[0];
- Document doc = r.document(group.groupValue.intValue());
- assertEquals("0", doc.get("parentID"));
-
- group = groups.groups[1];
- doc = r.document(group.groupValue.intValue());
- assertEquals("1", doc.get("parentID"));
-
- r.close();
- d.close();
- }
-
- // LUCENE-4968
- public void testChildQueryMatchesParent() throws Exception {
- Directory d = newDirectory();
- RandomIndexWriter w = new RandomIndexWriter(random(), d);
- Document parent = new Document();
- parent.add(new StoredField("parentID", "0"));
- parent.add(newTextField("parentText", "text", Field.Store.NO));
- parent.add(newStringField("isParent", "yes", Field.Store.NO));
-
- List<Document> docs = new ArrayList<>();
-
- Document child = new Document();
- docs.add(child);
- child.add(new StoredField("childID", "0"));
- child.add(newTextField("childText", "text", Field.Store.NO));
-
- // parent last:
- docs.add(parent);
- w.addDocuments(docs);
-
- docs.clear();
-
- parent = new Document();
- parent.add(newTextField("parentText", "text", Field.Store.NO));
- parent.add(newStringField("isParent", "yes", Field.Store.NO));
- parent.add(new StoredField("parentID", "1"));
-
- // parent last:
- docs.add(parent);
- w.addDocuments(docs);
-
- IndexReader r = w.getReader();
- w.close();
-
- // illegally matches parent:
- Query childQuery = new TermQuery(new Term("parentText", "text"));
- BitSetProducer parentsFilter = new QueryBitSetProducer(new TermQuery(new Term("isParent", "yes")));
- CheckJoinIndex.check(r, parentsFilter);
- ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);
- BooleanQuery.Builder parentQuery = new BooleanQuery.Builder();
- parentQuery.add(childJoinQuery, Occur.SHOULD);
- parentQuery.add(new TermQuery(new Term("parentText", "text")), Occur.SHOULD);
- ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(new Sort(new SortField("parentID", SortField.Type.STRING)),
- 10, true, true);
+ Weight weight = searcher.createNormalizedWeight(childJoinQuery, random().nextBoolean());
+ Scorer scorer = weight.scorer(searcher.getIndexReader().leaves().get(0));
+ assertNull(scorer);
- expectThrows(IllegalStateException.class, () -> {
- newSearcher(r).search(parentQuery.build(), c);
- });
+ // never matches and produces a null scorer
+ childQuery = new TermQuery(new Term("bogus", "bogus"));
+ childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);
+ weight = searcher.createNormalizedWeight(childJoinQuery, random().nextBoolean());
+ scorer = weight.scorer(searcher.getIndexReader().leaves().get(0));
+ assertNull(scorer);
r.close();
d.close();
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4fc5a9f0/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoinValidation.java
----------------------------------------------------------------------
diff --git a/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoinValidation.java b/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoinValidation.java
index aa68d09..cb3762c 100644
--- a/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoinValidation.java
+++ b/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoinValidation.java
@@ -87,25 +87,6 @@ public class TestBlockJoinValidation extends LuceneTestCase {
assertTrue(expected.getMessage() != null && expected.getMessage().contains("Child query must not match same docs with parent filter"));
}
- public void testAdvanceValidationForToParentBjq() throws Exception {
- int randomChildNumber = getRandomChildNumber(0);
- // we need to make advance method meet wrong document, so random child number
- // in BJQ must be greater than child number in Boolean clause
- int nextRandomChildNumber = getRandomChildNumber(randomChildNumber);
- Query parentQueryWithRandomChild = createChildrenQueryWithOneParent(nextRandomChildNumber);
- ToParentBlockJoinQuery blockJoinQuery = new ToParentBlockJoinQuery(parentQueryWithRandomChild, parentsFilter, ScoreMode.None);
- // advance() method is used by ConjunctionScorer, so we need to create Boolean conjunction query
- BooleanQuery.Builder conjunctionQuery = new BooleanQuery.Builder();
- WildcardQuery childQuery = new WildcardQuery(new Term("child", createFieldValue(randomChildNumber)));
- conjunctionQuery.add(new BooleanClause(childQuery, BooleanClause.Occur.MUST));
- conjunctionQuery.add(new BooleanClause(blockJoinQuery, BooleanClause.Occur.MUST));
-
- IllegalStateException expected = expectThrows(IllegalStateException.class, () -> {
- indexSearcher.search(conjunctionQuery.build(), 1);
- });
- assertTrue(expected.getMessage() != null && expected.getMessage().contains("Child query must not match same docs with parent filter"));
- }
-
public void testNextDocValidationForToChildBjq() throws Exception {
Query parentQueryWithRandomChild = createParentsQueryWithOneChild(getRandomChildNumber(0));