You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by sy...@apache.org on 2015/08/24 00:34:10 UTC
[11/17] lucenenet git commit: Lucene.Net.Join tests now passing
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/src/Lucene.Net.Join/ToParentBlockJoinCollector.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Join/ToParentBlockJoinCollector.cs b/src/Lucene.Net.Join/ToParentBlockJoinCollector.cs
new file mode 100644
index 0000000..85d8ee8
--- /dev/null
+++ b/src/Lucene.Net.Join/ToParentBlockJoinCollector.cs
@@ -0,0 +1,578 @@
+using System;
+using System.Collections.Concurrent;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.IO;
+using Lucene.Net.Grouping;
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Join
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Collects parent document hits for a Query containing one more more
+ /// BlockJoinQuery clauses, sorted by the
+ /// specified parent Sort. Note that this cannot perform
+ /// arbitrary joins; rather, it requires that all joined
+ /// documents are indexed as a doc block (using {@link
+ /// IndexWriter#addDocuments} or {@link
+ /// IndexWriter#updateDocuments}). Ie, the join is computed
+ /// at index time.
+ ///
+ /// <p>The parent Sort must only use
+ /// fields from the parent documents; sorting by field in
+ /// the child documents is not supported.</p>
+ ///
+ /// <p>You should only use this
+ /// collector if one or more of the clauses in the query is
+ /// a <seealso cref="ToParentBlockJoinQuery"/>. This collector will find those query
+ /// clauses and record the matching child documents for the
+ /// top scoring parent documents.</p>
+ ///
+ /// <p>Multiple joins (star join) and nested joins and a mix
+ /// of the two are allowed, as long as in all cases the
+ /// documents corresponding to a single row of each joined
+ /// parent table were indexed as a doc block.</p>
+ ///
+ /// <p>For the simple star join you can retrieve the
+ /// <seealso cref="TopGroups"/> instance containing each <seealso cref="ToParentBlockJoinQuery"/>'s
+ /// matching child documents for the top parent groups,
+ /// using <seealso cref="#getTopGroups"/>. Ie,
+ /// a single query, which will contain two or more
+ /// <seealso cref="ToParentBlockJoinQuery"/>'s as clauses representing the star join,
+ /// can then retrieve two or more <seealso cref="TopGroups"/> instances.</p>
+ ///
+ /// <p>For nested joins, the query will run correctly (ie,
+ /// match the right parent and child documents), however,
+ /// because TopGroups is currently unable to support nesting
+ /// (each group is not able to hold another TopGroups), you
+ /// are only able to retrieve the TopGroups of the first
+ /// join. The TopGroups of the nested joins will not be
+ /// correct.
+ ///
+ /// See <seealso cref="org.apache.lucene.search.join"/> for a code
+ /// sample.
+ ///
+ /// @lucene.experimental
+ /// </summary>
+ public class ToParentBlockJoinCollector : Collector
+ {
+ private readonly Sort sort;
+
+ // Maps each BlockJoinQuery instance to its "slot" in
+ // joinScorers and in OneGroup's cached doc/scores/count:
+ private readonly IDictionary<Query, int?> joinQueryID = new Dictionary<Query, int?>();
+ private readonly int numParentHits;
+ private readonly FieldValueHitQueue<OneGroup> queue;
+ private readonly FieldComparator[] comparators;
+ private readonly int[] reverseMul;
+ private readonly int compEnd;
+ private readonly bool trackMaxScore;
+ private readonly bool trackScores;
+
+ private int docBase;
+ private ToParentBlockJoinQuery.BlockJoinScorer[] joinScorers = new ToParentBlockJoinQuery.BlockJoinScorer[0];
+ private AtomicReaderContext currentReaderContext;
+ private Scorer scorer;
+ private bool queueFull;
+
+ private OneGroup bottom;
+ private int totalHitCount;
+ private float maxScore = float.NaN;
+
+ /// <summary>
+ /// Creates a ToParentBlockJoinCollector. The provided sort must
+ /// not be null. If you pass true trackScores, all
+ /// ToParentBlockQuery instances must not use
+ /// ScoreMode.None.
+ /// </summary>
+ public ToParentBlockJoinCollector(Sort sort, int numParentHits, bool trackScores, bool trackMaxScore)
+ {
+ // TODO: allow null sort to be specialized to relevance
+ // only collector
+ this.sort = sort;
+ this.trackMaxScore = trackMaxScore;
+ if (trackMaxScore)
+ {
+ maxScore = float.MinValue;
+ }
+ //System.out.println("numParentHits=" + numParentHits);
+ this.trackScores = trackScores;
+ this.numParentHits = numParentHits;
+ queue = FieldValueHitQueue.Create<OneGroup>(sort.GetSort(), numParentHits);
+ comparators = queue.Comparators;
+ reverseMul = queue.ReverseMul;
+ compEnd = comparators.Length - 1;
+ }
+
+ private sealed class OneGroup : FieldValueHitQueue.Entry
+ {
+ public OneGroup(int comparatorSlot, int parentDoc, float parentScore, int numJoins, bool doScores)
+ : base(comparatorSlot, parentDoc, parentScore)
+ {
+ //System.out.println("make OneGroup parentDoc=" + parentDoc);
+ docs = new int[numJoins][];
+ for (int joinId = 0; joinId < numJoins; joinId++)
+ {
+ docs[joinId] = new int[5];
+ }
+ if (doScores)
+ {
+ scores = new float[numJoins][];
+ for (int joinId = 0; joinId < numJoins; joinId++)
+ {
+ scores[joinId] = new float[5];
+ }
+ }
+ counts = new int[numJoins];
+ }
+ internal AtomicReaderContext readerContext;
+ internal int[][] docs;
+ internal float[][] scores;
+ internal int[] counts;
+ }
+
+ public override void Collect(int parentDoc)
+ {
+ //System.out.println("\nC parentDoc=" + parentDoc);
+ totalHitCount++;
+
+ float score = float.NaN;
+
+ if (trackMaxScore)
+ {
+ score = scorer.Score();
+ maxScore = Math.Max(maxScore, score);
+ }
+
+ // TODO: we could sweep all joinScorers here and
+ // aggregate total child hit count, so we can fill this
+ // in getTopGroups (we wire it to 0 now)
+
+ if (queueFull)
+ {
+ //System.out.println(" queueFull");
+ // Fastmatch: return if this hit is not competitive
+ for (int i = 0; ; i++)
+ {
+ int c = reverseMul[i] * comparators[i].CompareBottom(parentDoc);
+ if (c < 0)
+ {
+ // Definitely not competitive.
+ //System.out.println(" skip");
+ return;
+ }
+ if (c > 0)
+ {
+ // Definitely competitive.
+ break;
+ }
+ if (i == compEnd)
+ {
+ // Here c=0. If we're at the last comparator, this doc is not
+ // competitive, since docs are visited in doc Id order, which means
+ // this doc cannot compete with any other document in the queue.
+ //System.out.println(" skip");
+ return;
+ }
+ }
+
+ //System.out.println(" competes! doc=" + (docBase + parentDoc));
+
+ // This hit is competitive - replace bottom element in queue & adjustTop
+ for (int i = 0; i < comparators.Length; i++)
+ {
+ comparators[i].Copy(bottom.Slot, parentDoc);
+ }
+ if (!trackMaxScore && trackScores)
+ {
+ score = scorer.Score();
+ }
+ bottom.Doc = docBase + parentDoc;
+ bottom.readerContext = currentReaderContext;
+ bottom.Score = score;
+ CopyGroups(bottom);
+ bottom = queue.UpdateTop();
+
+ for (int i = 0; i < comparators.Length; i++)
+ {
+ comparators[i].Bottom = bottom.Slot;
+ }
+ }
+ else
+ {
+ // Startup transient: queue is not yet full:
+ int comparatorSlot = totalHitCount - 1;
+
+ // Copy hit into queue
+ for (int i = 0; i < comparators.Length; i++)
+ {
+ comparators[i].Copy(comparatorSlot, parentDoc);
+ }
+ //System.out.println(" startup: new OG doc=" + (docBase+parentDoc));
+ if (!trackMaxScore && trackScores)
+ {
+ score = scorer.Score();
+ }
+ OneGroup og = new OneGroup(comparatorSlot, docBase + parentDoc, score, joinScorers.Length, trackScores);
+ og.readerContext = currentReaderContext;
+ CopyGroups(og);
+ bottom = queue.Add(og);
+ queueFull = totalHitCount == numParentHits;
+ if (queueFull)
+ {
+ // End of startup transient: queue just filled up:
+ for (int i = 0; i < comparators.Length; i++)
+ {
+ comparators[i].Bottom = bottom.Slot;
+ }
+ }
+ }
+ }
+
+ // Pulls out child doc and scores for all join queries:
+ private void CopyGroups(OneGroup og)
+ {
+ // While rare, it's possible top arrays could be too
+ // short if join query had null scorer on first
+ // segment(s) but then became non-null on later segments
+ int numSubScorers = joinScorers.Length;
+ if (og.docs.Length < numSubScorers)
+ {
+ // While rare, this could happen if join query had
+ // null scorer on first segment(s) but then became
+ // non-null on later segments
+ og.docs = ArrayUtil.Grow(og.docs);
+ }
+ if (og.counts.Length < numSubScorers)
+ {
+ og.counts = ArrayUtil.Grow(og.counts);
+ }
+ if (trackScores && og.scores.Length < numSubScorers)
+ {
+ og.scores = ArrayUtil.Grow(og.scores);
+ }
+
+ //System.out.println("\ncopyGroups parentDoc=" + og.doc);
+ for (int scorerIDX = 0; scorerIDX < numSubScorers; scorerIDX++)
+ {
+ ToParentBlockJoinQuery.BlockJoinScorer joinScorer = joinScorers[scorerIDX];
+ //System.out.println(" scorer=" + joinScorer);
+ if (joinScorer != null && docBase + joinScorer.ParentDoc == og.Doc)
+ {
+ og.counts[scorerIDX] = joinScorer.ChildCount;
+ //System.out.println(" count=" + og.counts[scorerIDX]);
+ og.docs[scorerIDX] = joinScorer.SwapChildDocs(og.docs[scorerIDX]);
+ Debug.Assert(og.docs[scorerIDX].Length >= og.counts[scorerIDX], "length=" + og.docs[scorerIDX].Length + " vs count=" + og.counts[scorerIDX]);
+ //System.out.println(" len=" + og.docs[scorerIDX].length);
+ /*
+ for(int idx=0;idx<og.counts[scorerIDX];idx++) {
+ System.out.println(" docs[" + idx + "]=" + og.docs[scorerIDX][idx]);
+ }
+ */
+ if (trackScores)
+ {
+ //System.out.println(" copy scores");
+ og.scores[scorerIDX] = joinScorer.SwapChildScores(og.scores[scorerIDX]);
+ Debug.Assert(og.scores[scorerIDX].Length >= og.counts[scorerIDX], "length=" + og.scores[scorerIDX].Length + " vs count=" + og.counts[scorerIDX]);
+ }
+ }
+ else
+ {
+ og.counts[scorerIDX] = 0;
+ }
+ }
+ }
+
+ public override AtomicReaderContext NextReader
+ {
+ set
+ {
+ currentReaderContext = value;
+ docBase = value.DocBase;
+ for (int compIDX = 0; compIDX < comparators.Length; compIDX++)
+ {
+ queue.SetComparator(compIDX, comparators[compIDX].SetNextReader(value));
+ }
+ }
+ }
+
+ public override bool AcceptsDocsOutOfOrder()
+ {
+ return false;
+ }
+
+ private void Enroll(ToParentBlockJoinQuery query, ToParentBlockJoinQuery.BlockJoinScorer scorer)
+ {
+ scorer.TrackPendingChildHits();
+ int? slot;
+ if (joinQueryID.TryGetValue(query, out slot))
+ {
+ joinScorers[(int) slot] = scorer;
+ }
+ else
+ {
+ joinQueryID[query] = joinScorers.Length;
+ //System.out.println("found JQ: " + query + " slot=" + joinScorers.length);
+ ToParentBlockJoinQuery.BlockJoinScorer[] newArray = new ToParentBlockJoinQuery.BlockJoinScorer[1 + joinScorers.Length];
+ Array.Copy(joinScorers, 0, newArray, 0, joinScorers.Length);
+ joinScorers = newArray;
+ joinScorers[joinScorers.Length - 1] = scorer;
+ }
+ }
+
+ public override Scorer Scorer
+ {
+ set
+ {
+ //System.out.println("C.setScorer scorer=" + value);
+ // Since we invoke .score(), and the comparators likely
+ // do as well, cache it so it's only "really" computed
+ // once:
+ scorer = new ScoreCachingWrappingScorer(value);
+ for (int compIdx = 0; compIdx < comparators.Length; compIdx++)
+ {
+ comparators[compIdx].Scorer = scorer;
+ }
+ Arrays.Fill(joinScorers, null);
+
+ var queue2 = new ConcurrentQueue<Scorer>();
+ //System.out.println("\nqueue: add top scorer=" + value);
+ queue2.Enqueue(value);
+// while ((queue.Count > 0 && (queue.Dequeue()) != null))
+// {
+// //System.out.println(" poll: " + value + "; " + value.getWeight().getQuery());
+// if (value is ToParentBlockJoinQuery.BlockJoinScorer)
+// {
+// Enroll((ToParentBlockJoinQuery)value.Weight.Query, (ToParentBlockJoinQuery.BlockJoinScorer)value);
+// }
+//
+// foreach (Scorer.ChildScorer sub in value.Children)
+// {
+// //System.out.println(" add sub: " + sub.child + "; " + sub.child.getWeight().getQuery());
+// queue.Enqueue(sub.Child);
+// }
+// }
+
+ while (queue2.TryDequeue(out value))
+ {
+ //System.out.println(" poll: " + value + "; " + value.getWeight().getQuery());
+ if (value is ToParentBlockJoinQuery.BlockJoinScorer)
+ {
+ Enroll((ToParentBlockJoinQuery)value.Weight.Query, (ToParentBlockJoinQuery.BlockJoinScorer)value);
+ }
+
+ foreach (Scorer.ChildScorer sub in value.Children)
+ {
+ //System.out.println(" add sub: " + sub.child + "; " + sub.child.getWeight().getQuery());
+ queue2.Enqueue(sub.Child);
+ }
+ }
+ }
+ }
+
+ private OneGroup[] sortedGroups;
+
+ private void sortQueue()
+ {
+ sortedGroups = new OneGroup[queue.Size()];
+ for (int downTo = queue.Size() - 1; downTo >= 0; downTo--)
+ {
+ sortedGroups[downTo] = queue.Pop();
+ }
+ }
+
+ /// <summary>
+ /// Returns the TopGroups for the specified
+ /// BlockJoinQuery. The groupValue of each GroupDocs will
+ /// be the parent docID for that group.
+ /// The number of documents within each group is calculated as minimum of <code>maxDocsPerGroup</code>
+ /// and number of matched child documents for that group.
+ /// Returns null if no groups matched.
+ /// </summary>
+ /// <param name="query"> Search query </param>
+ /// <param name="withinGroupSort"> Sort criteria within groups </param>
+ /// <param name="offset"> Parent docs offset </param>
+ /// <param name="maxDocsPerGroup"> Upper bound of documents per group number </param>
+ /// <param name="withinGroupOffset"> Offset within each group of child docs </param>
+ /// <param name="fillSortFields"> Specifies whether to add sort fields or not </param>
+ /// <returns> TopGroups for specified query </returns>
+ /// <exception cref="IOException"> if there is a low-level I/O error </exception>
+ public virtual TopGroups<int> GetTopGroups(ToParentBlockJoinQuery query, Sort withinGroupSort, int offset, int maxDocsPerGroup, int withinGroupOffset, bool fillSortFields)
+ {
+ int? slot;
+ if (!joinQueryID.TryGetValue(query, out slot))
+ {
+ if (totalHitCount == 0)
+ {
+ return null;
+ }
+ }
+
+ if (sortedGroups == null)
+ {
+ if (offset >= queue.Size())
+ {
+ return null;
+ }
+ sortQueue();
+ }
+ else if (offset > sortedGroups.Length)
+ {
+ return null;
+ }
+
+ return AccumulateGroups(slot == null ? -1 : (int)slot, offset, maxDocsPerGroup, withinGroupOffset, withinGroupSort, fillSortFields);
+ }
+
+ /// <summary>
+ /// Accumulates groups for the BlockJoinQuery specified by its slot.
+ /// </summary>
+ /// <param name="slot"> Search query's slot </param>
+ /// <param name="offset"> Parent docs offset </param>
+ /// <param name="maxDocsPerGroup"> Upper bound of documents per group number </param>
+ /// <param name="withinGroupOffset"> Offset within each group of child docs </param>
+ /// <param name="withinGroupSort"> Sort criteria within groups </param>
+ /// <param name="fillSortFields"> Specifies whether to add sort fields or not </param>
+ /// <returns> TopGroups for the query specified by slot </returns>
+ /// <exception cref="IOException"> if there is a low-level I/O error </exception>
+ private TopGroups<int> AccumulateGroups(int slot, int offset, int maxDocsPerGroup, int withinGroupOffset, Sort withinGroupSort, bool fillSortFields)
+ {
+ var groups = new GroupDocs<int>[sortedGroups.Length - offset];
+ var fakeScorer = new FakeScorer();
+
+ int totalGroupedHitCount = 0;
+ //System.out.println("slot=" + slot);
+
+ for (int groupIdx = offset; groupIdx < sortedGroups.Length; groupIdx++)
+ {
+ OneGroup og = sortedGroups[groupIdx];
+ int numChildDocs;
+ if (slot == -1 || slot >= og.counts.Length)
+ {
+ numChildDocs = 0;
+ }
+ else
+ {
+ numChildDocs = og.counts[slot];
+ }
+
+ // Number of documents in group should be bounded to prevent redundant memory allocation
+ int numDocsInGroup = Math.Max(1, Math.Min(numChildDocs, maxDocsPerGroup));
+ //System.out.println("parent doc=" + og.doc + " numChildDocs=" + numChildDocs + " maxDocsPG=" + maxDocsPerGroup);
+
+ // At this point we hold all docs w/ in each group, unsorted; we now sort them:
+ Collector collector;
+ if (withinGroupSort == null)
+ {
+ //System.out.println("sort by score");
+ // Sort by score
+ if (!trackScores)
+ {
+ throw new ArgumentException("cannot sort by relevance within group: trackScores=false");
+ }
+ collector = TopScoreDocCollector.Create(numDocsInGroup, true);
+ }
+ else
+ {
+ // Sort by fields
+ collector = TopFieldCollector.Create(withinGroupSort, numDocsInGroup, fillSortFields, trackScores, trackMaxScore, true);
+ }
+
+ collector.Scorer = fakeScorer;
+ collector.NextReader = og.readerContext;
+ for (int docIdx = 0; docIdx < numChildDocs; docIdx++)
+ {
+ //System.out.println("docIDX=" + docIDX + " vs " + og.docs[slot].length);
+ int doc = og.docs[slot][docIdx];
+ fakeScorer.doc = doc;
+ if (trackScores)
+ {
+ fakeScorer._score = og.scores[slot][docIdx];
+ }
+ collector.Collect(doc);
+ }
+ totalGroupedHitCount += numChildDocs;
+
+ object[] groupSortValues;
+
+ if (fillSortFields)
+ {
+ groupSortValues = new object[comparators.Length];
+ for (int sortFieldIdx = 0; sortFieldIdx < comparators.Length; sortFieldIdx++)
+ {
+ groupSortValues[sortFieldIdx] = comparators[sortFieldIdx].Value(og.Slot);
+ }
+ }
+ else
+ {
+ groupSortValues = null;
+ }
+
+ TopDocs topDocs;
+ if (withinGroupSort == null)
+ {
+ var tempCollector = (TopScoreDocCollector) collector;
+ topDocs = tempCollector.TopDocs(withinGroupOffset, numDocsInGroup);
+ }
+ else
+ {
+ var tempCollector = (TopFieldCollector) collector;
+ topDocs = tempCollector.TopDocs(withinGroupOffset, numDocsInGroup);
+ }
+
+ groups[groupIdx - offset] = new GroupDocs<int>(og.Score, topDocs.MaxScore, numChildDocs, topDocs.ScoreDocs, og.Doc, groupSortValues);
+ }
+
+ return new TopGroups<int>(new TopGroups<int>(sort.GetSort(), withinGroupSort == null ? null : withinGroupSort.GetSort(), 0, totalGroupedHitCount, groups, maxScore), totalHitCount);
+ }
+
+ /// <summary>
+ /// Returns the TopGroups for the specified BlockJoinQuery. The groupValue of each
+ /// GroupDocs will be the parent docID for that group. The number of documents within
+ /// each group equals to the total number of matched child documents for that group.
+ /// Returns null if no groups matched.
+ /// </summary>
+ /// <param name="query">Search query</param>
+ /// <param name="withinGroupSort">Sort criteria within groups</param>
+ /// <param name="offset">Parent docs offset</param>
+ /// <param name="withinGroupOffset">Offset within each group of child docs</param>
+ /// <param name="fillSortFields">Specifies whether to add sort fields or not</param>
+ /// <returns>TopGroups for specified query</returns>
+ /// <exception cref="IOException"> if there is a low-level I/O error </exception>
+ public virtual TopGroups<int> GetTopGroupsWithAllChildDocs(ToParentBlockJoinQuery query, Sort withinGroupSort, int offset, int withinGroupOffset, bool fillSortFields)
+ {
+ return GetTopGroups(query, withinGroupSort, offset, int.MaxValue, withinGroupOffset, fillSortFields);
+ }
+
+ /// <summary>
+ /// Returns the highest score across all collected parent hits, as long as
+ /// <code>trackMaxScores=true</code> was passed
+ /// {@link #ToParentBlockJoinCollector(Sort, int, boolean, boolean) on
+ /// construction}. Else, this returns <code>Float.NaN</code>
+ /// </summary>
+ public virtual float MaxScore
+ {
+ get { return maxScore; }
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/src/Lucene.Net.Join/ToParentBlockJoinFieldComparator.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Join/ToParentBlockJoinFieldComparator.cs b/src/Lucene.Net.Join/ToParentBlockJoinFieldComparator.cs
new file mode 100644
index 0000000..c41fd50
--- /dev/null
+++ b/src/Lucene.Net.Join/ToParentBlockJoinFieldComparator.cs
@@ -0,0 +1,393 @@
+using System;
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Join
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// A field comparator that allows parent documents to be sorted by fields
+ /// from the nested / child documents.
+ ///
+ /// @lucene.experimental
+ /// </summary>
+ public abstract class ToParentBlockJoinFieldComparator : FieldComparator<object>
+ {
+ private readonly Filter _parentFilter;
+ private readonly Filter _childFilter;
+ private readonly int _spareSlot;
+
+ private FieldComparator _wrappedComparator;
+ private FixedBitSet _parentDocuments;
+ private FixedBitSet _childDocuments;
+
+ private ToParentBlockJoinFieldComparator(FieldComparator wrappedComparator, Filter parentFilter, Filter childFilter, int spareSlot)
+ {
+ _wrappedComparator = wrappedComparator;
+ _parentFilter = parentFilter;
+ _childFilter = childFilter;
+ _spareSlot = spareSlot;
+ }
+
+ public override int Compare(int slot1, int slot2)
+ {
+ return _wrappedComparator.Compare(slot1, slot2);
+ }
+
+ public override int Bottom
+ {
+ set
+ {
+ _wrappedComparator.Bottom = value;
+ }
+ }
+
+ public override object TopValue
+ {
+ set
+ {
+ _wrappedComparator.TopValue = value;
+ }
+ }
+
+ public override FieldComparator SetNextReader(AtomicReaderContext context)
+ {
+ DocIdSet innerDocuments = _childFilter.GetDocIdSet(context, null);
+ if (IsEmpty(innerDocuments))
+ {
+ _childDocuments = null;
+ }
+ else if (innerDocuments is FixedBitSet)
+ {
+ _childDocuments = (FixedBitSet)innerDocuments;
+ }
+ else
+ {
+ DocIdSetIterator iterator = innerDocuments.GetIterator();
+ _childDocuments = iterator != null ? ToFixedBitSet(iterator, context.AtomicReader.MaxDoc) : null;
+ }
+ DocIdSet rootDocuments = _parentFilter.GetDocIdSet(context, null);
+ if (IsEmpty(rootDocuments))
+ {
+ _parentDocuments = null;
+ }
+ else if (rootDocuments is FixedBitSet)
+ {
+ _parentDocuments = (FixedBitSet)rootDocuments;
+ }
+ else
+ {
+ DocIdSetIterator iterator = rootDocuments.GetIterator();
+ _parentDocuments = iterator != null ? ToFixedBitSet(iterator, context.AtomicReader.MaxDoc) : null;
+ }
+
+ _wrappedComparator = _wrappedComparator.SetNextReader(context);
+ return this;
+ }
+
+ private static bool IsEmpty(DocIdSet set)
+ {
+ return set == null;
+ }
+
+ private static FixedBitSet ToFixedBitSet(DocIdSetIterator iterator, int numBits)
+ {
+ var set = new FixedBitSet(numBits);
+ int doc;
+ while ((doc = iterator.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
+ {
+ set.Set(doc);
+ }
+ return set;
+ }
+
+ public override IComparable Value(int slot)
+ {
+ return _wrappedComparator.Value(slot);
+ }
+
+ /// <summary>
+ /// Concrete implementation of <see cref="ToParentBlockJoinSortField"/> to sorts the parent docs with the lowest values
+ /// in the child / nested docs first.
+ /// </summary>
+ public sealed class Lowest : ToParentBlockJoinFieldComparator
+ {
+ /// <summary>
+ /// Create ToParentBlockJoinFieldComparator.Lowest
+ /// </summary>
+ /// <param name="wrappedComparator">The <see cref="FieldComparator"/> on the child / nested level. </param>
+ /// <param name="parentFilter">Filter (must produce FixedBitSet per-segment) that identifies the parent documents. </param>
+ /// <param name="childFilter">Filter that defines which child / nested documents participates in sorting. </param>
+ /// <param name="spareSlot">The extra slot inside the wrapped comparator that is used to compare which nested document
+ /// inside the parent document scope is most competitive. </param>
+ public Lowest(FieldComparator wrappedComparator, Filter parentFilter, Filter childFilter, int spareSlot)
+ : base(wrappedComparator, parentFilter, childFilter, spareSlot)
+ {
+ }
+
+ public override int CompareBottom(int parentDoc)
+ {
+ if (parentDoc == 0 || _parentDocuments == null || _childDocuments == null)
+ {
+ return 0;
+ }
+
+ // We need to copy the lowest value from all child docs into slot.
+ int prevParentDoc = _parentDocuments.PrevSetBit(parentDoc - 1);
+ int childDoc = _childDocuments.NextSetBit(prevParentDoc + 1);
+ if (childDoc >= parentDoc || childDoc == -1)
+ {
+ return 0;
+ }
+
+ // We only need to emit a single cmp value for any matching child doc
+ int cmp = _wrappedComparator.CompareBottom(childDoc);
+ if (cmp > 0)
+ {
+ return cmp;
+ }
+
+ while (true)
+ {
+ childDoc = _childDocuments.NextSetBit(childDoc + 1);
+ if (childDoc >= parentDoc || childDoc == -1)
+ {
+ return cmp;
+ }
+ int cmp1 = _wrappedComparator.CompareBottom(childDoc);
+ if (cmp1 > 0)
+ {
+ return cmp1;
+ }
+ if (cmp1 == 0)
+ {
+ cmp = 0;
+ }
+ }
+ }
+
+ public override void Copy(int slot, int parentDoc)
+ {
+ if (parentDoc == 0 || _parentDocuments == null || _childDocuments == null)
+ {
+ return;
+ }
+
+ // We need to copy the lowest value from all child docs into slot.
+ int prevParentDoc = _parentDocuments.PrevSetBit(parentDoc - 1);
+ int childDoc = _childDocuments.NextSetBit(prevParentDoc + 1);
+ if (childDoc >= parentDoc || childDoc == -1)
+ {
+ return;
+ }
+ _wrappedComparator.Copy(_spareSlot, childDoc);
+ _wrappedComparator.Copy(slot, childDoc);
+
+ while (true)
+ {
+ childDoc = _childDocuments.NextSetBit(childDoc + 1);
+ if (childDoc >= parentDoc || childDoc == -1)
+ {
+ return;
+ }
+ _wrappedComparator.Copy(_spareSlot, childDoc);
+ if (_wrappedComparator.Compare(_spareSlot, slot) < 0)
+ {
+ _wrappedComparator.Copy(slot, childDoc);
+ }
+ }
+ }
+
+ public override int CompareTop(int parentDoc)
+ {
+ if (parentDoc == 0 || _parentDocuments == null || _childDocuments == null)
+ {
+ return 0;
+ }
+
+ // We need to copy the lowest value from all nested docs into slot.
+ int prevParentDoc = _parentDocuments.PrevSetBit(parentDoc - 1);
+ int childDoc = _childDocuments.NextSetBit(prevParentDoc + 1);
+ if (childDoc >= parentDoc || childDoc == -1)
+ {
+ return 0;
+ }
+
+ // We only need to emit a single cmp value for any matching child doc
+ int cmp = _wrappedComparator.CompareBottom(childDoc);
+ if (cmp > 0)
+ {
+ return cmp;
+ }
+
+ while (true)
+ {
+ childDoc = _childDocuments.NextSetBit(childDoc + 1);
+ if (childDoc >= parentDoc || childDoc == -1)
+ {
+ return cmp;
+ }
+ int cmp1 = _wrappedComparator.CompareTop(childDoc);
+ if (cmp1 > 0)
+ {
+ return cmp1;
+ }
+ if (cmp1 == 0)
+ {
+ cmp = 0;
+ }
+ }
+ }
+
+ }
+
+ /// <summary>
+ /// Concrete implementation of <see cref="ToParentBlockJoinSortField"/> to sorts the parent docs with the highest values
+ /// in the child / nested docs first.
+ /// </summary>
+ public sealed class Highest : ToParentBlockJoinFieldComparator
+ {
+ /// <summary>
+ /// Create ToParentBlockJoinFieldComparator.Highest
+ /// </summary>
+ /// <param name="wrappedComparator">The <see cref="FieldComparator"/> on the child / nested level. </param>
+ /// <param name="parentFilter">Filter (must produce FixedBitSet per-segment) that identifies the parent documents. </param>
+ /// <param name="childFilter">Filter that defines which child / nested documents participates in sorting. </param>
+ /// <param name="spareSlot">The extra slot inside the wrapped comparator that is used to compare which nested document
+ /// inside the parent document scope is most competitive. </param>
+ public Highest(FieldComparator wrappedComparator, Filter parentFilter, Filter childFilter, int spareSlot)
+ : base(wrappedComparator, parentFilter, childFilter, spareSlot)
+ {
+ }
+
+ public override int CompareBottom(int parentDoc)
+ {
+ if (parentDoc == 0 || _parentDocuments == null || _childDocuments == null)
+ {
+ return 0;
+ }
+
+ int prevParentDoc = _parentDocuments.PrevSetBit(parentDoc - 1);
+ int childDoc = _childDocuments.NextSetBit(prevParentDoc + 1);
+ if (childDoc >= parentDoc || childDoc == -1)
+ {
+ return 0;
+ }
+
+ int cmp = _wrappedComparator.CompareBottom(childDoc);
+ if (cmp < 0)
+ {
+ return cmp;
+ }
+
+ while (true)
+ {
+ childDoc = _childDocuments.NextSetBit(childDoc + 1);
+ if (childDoc >= parentDoc || childDoc == -1)
+ {
+ return cmp;
+ }
+ int cmp1 = _wrappedComparator.CompareBottom(childDoc);
+ if (cmp1 < 0)
+ {
+ return cmp1;
+ }
+ else
+ {
+ if (cmp1 == 0)
+ {
+ cmp = 0;
+ }
+ }
+ }
+ }
+
+ public override void Copy(int slot, int parentDoc)
+ {
+ if (parentDoc == 0 || _parentDocuments == null || _childDocuments == null)
+ {
+ return;
+ }
+
+ int prevParentDoc = _parentDocuments.PrevSetBit(parentDoc - 1);
+ int childDoc = _childDocuments.NextSetBit(prevParentDoc + 1);
+ if (childDoc >= parentDoc || childDoc == -1)
+ {
+ return;
+ }
+ _wrappedComparator.Copy(_spareSlot, childDoc);
+ _wrappedComparator.Copy(slot, childDoc);
+
+ while (true)
+ {
+ childDoc = _childDocuments.NextSetBit(childDoc + 1);
+ if (childDoc >= parentDoc || childDoc == -1)
+ {
+ return;
+ }
+ _wrappedComparator.Copy(_spareSlot, childDoc);
+ if (_wrappedComparator.Compare(_spareSlot, slot) > 0)
+ {
+ _wrappedComparator.Copy(slot, childDoc);
+ }
+ }
+ }
+
+ public override int CompareTop(int parentDoc)
+ {
+ if (parentDoc == 0 || _parentDocuments == null || _childDocuments == null)
+ {
+ return 0;
+ }
+
+ int prevParentDoc = _parentDocuments.PrevSetBit(parentDoc - 1);
+ int childDoc = _childDocuments.NextSetBit(prevParentDoc + 1);
+ if (childDoc >= parentDoc || childDoc == -1)
+ {
+ return 0;
+ }
+
+ int cmp = _wrappedComparator.CompareBottom(childDoc);
+ if (cmp < 0)
+ {
+ return cmp;
+ }
+
+ while (true)
+ {
+ childDoc = _childDocuments.NextSetBit(childDoc + 1);
+ if (childDoc >= parentDoc || childDoc == -1)
+ {
+ return cmp;
+ }
+ int cmp1 = _wrappedComparator.CompareTop(childDoc);
+ if (cmp1 < 0)
+ {
+ return cmp1;
+ }
+ if (cmp1 == 0)
+ {
+ cmp = 0;
+ }
+ }
+ }
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/src/Lucene.Net.Join/ToParentBlockJoinQuery.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Join/ToParentBlockJoinQuery.cs b/src/Lucene.Net.Join/ToParentBlockJoinQuery.cs
new file mode 100644
index 0000000..810f30e
--- /dev/null
+++ b/src/Lucene.Net.Join/ToParentBlockJoinQuery.cs
@@ -0,0 +1,516 @@
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Join
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// This query requires that you index
+ /// children and parent docs as a single block, using the
+ /// <see cref="IndexWriter#addDocuments IndexWriter.addDocuments()"/> or {@link
+ /// IndexWriter#updateDocuments IndexWriter.updateDocuments()} API. In each block, the
+ /// child documents must appear first, ending with the parent
+ /// document. At search time you provide a Filter
+ /// identifying the parents, however this Filter must provide
+ /// an <see cref="FixedBitSet"/> per sub-reader.
+ ///
+ /// <p>Once the block index is built, use this query to wrap
+ /// any sub-query matching only child docs and join matches in that
+ /// child document space up to the parent document space.
+ /// You can then use this Query as a clause with
+ /// other queries in the parent document space.</p>
+ ///
+ /// <p>See <see cref="ToChildBlockJoinQuery"/> if you need to join
+ /// in the reverse order.
+ ///
+ /// <p>The child documents must be orthogonal to the parent
+ /// documents: the wrapped child query must never
+ /// return a parent document.</p>
+ ///
+ /// If you'd like to retrieve <see cref="TopGroups"/> for the
+ /// resulting query, use the <see cref="ToParentBlockJoinCollector"/>.
+ /// Note that this is not necessary, ie, if you simply want
+ /// to collect the parent documents and don't need to see
+ /// which child documents matched under that parent, then
+ /// you can use any collector.
+ ///
+ /// <p><b>NOTE</b>: If the overall query contains parent-only
+ /// matches, for example you OR a parent-only query with a
+ /// joined child-only query, then the resulting collected documents
+ /// will be correct, however the <see cref="TopGroups"/> you get
+ /// from <see cref="ToParentBlockJoinCollector"/> will not contain every
+ /// child for parents that had matched.
+ ///
+ /// <p>See <see cref="org.apache.lucene.search.join"/> for an
+ /// overview. </p>
+ ///
+ /// @lucene.experimental
+ /// </summary>
+ public class ToParentBlockJoinQuery : Query
+ {
+
+ private readonly Filter _parentsFilter;
+ private readonly Query _childQuery;
+
+ // If we are rewritten, this is the original childQuery we
+ // were passed; we use this for .equals() and
+ // .hashCode(). This makes rewritten query equal the
+ // original, so that user does not have to .rewrite() their
+ // query before searching:
+ private readonly Query _origChildQuery;
+ private readonly ScoreMode _scoreMode;
+
+ /// <summary>
+ /// Create a ToParentBlockJoinQuery.
+ /// </summary>
+ /// <param name="childQuery"> Query matching child documents. </param>
+ /// <param name="parentsFilter"> Filter (must produce FixedBitSet
+ /// per-segment, like <see cref="FixedBitSetCachingWrapperFilter"/>)
+ /// identifying the parent documents. </param>
+ /// <param name="scoreMode"> How to aggregate multiple child scores
+ /// into a single parent score.
+ /// </param>
+ public ToParentBlockJoinQuery(Query childQuery, Filter parentsFilter, ScoreMode scoreMode)
+ {
+ _origChildQuery = childQuery;
+ _childQuery = childQuery;
+ _parentsFilter = parentsFilter;
+ _scoreMode = scoreMode;
+ }
+
+ private ToParentBlockJoinQuery(Query origChildQuery, Query childQuery, Filter parentsFilter, ScoreMode scoreMode) : base()
+ {
+ _origChildQuery = origChildQuery;
+ _childQuery = childQuery;
+ _parentsFilter = parentsFilter;
+ _scoreMode = scoreMode;
+ }
+
+ public override Weight CreateWeight(IndexSearcher searcher)
+ {
+ return new BlockJoinWeight(this, _childQuery.CreateWeight(searcher), _parentsFilter, _scoreMode);
+ }
+
+ private class BlockJoinWeight : Weight
+ {
+ internal readonly Query JoinQuery;
+ internal readonly Weight ChildWeight;
+ internal readonly Filter ParentsFilter;
+ internal readonly ScoreMode ScoreMode;
+
+ public BlockJoinWeight(Query joinQuery, Weight childWeight, Filter parentsFilter, ScoreMode scoreMode) : base()
+ {
+ JoinQuery = joinQuery;
+ ChildWeight = childWeight;
+ ParentsFilter = parentsFilter;
+ ScoreMode = scoreMode;
+ }
+
+ public override Query Query
+ {
+ get { return JoinQuery; }
+ }
+
+ public override float ValueForNormalization
+ {
+ get { return ChildWeight.ValueForNormalization*JoinQuery.Boost*JoinQuery.Boost; }
+ }
+
+ public override void Normalize(float norm, float topLevelBoost)
+ {
+ ChildWeight.Normalize(norm, topLevelBoost * JoinQuery.Boost);
+ }
+
+ // NOTE: acceptDocs applies (and is checked) only in the parent document space
+ public override Scorer Scorer(AtomicReaderContext readerContext, Bits acceptDocs)
+ {
+
+ Scorer childScorer = ChildWeight.Scorer(readerContext, readerContext.AtomicReader.LiveDocs);
+ if (childScorer == null)
+ {
+ // No matches
+ return null;
+ }
+
+ int firstChildDoc = childScorer.NextDoc();
+ if (firstChildDoc == DocIdSetIterator.NO_MORE_DOCS)
+ {
+ // No matches
+ return null;
+ }
+
+ // NOTE: we cannot pass acceptDocs here because this
+ // will (most likely, justifiably) cause the filter to
+ // not return a FixedBitSet but rather a
+ // BitsFilteredDocIdSet. Instead, we filter by
+ // acceptDocs when we score:
+ DocIdSet parents = ParentsFilter.GetDocIdSet(readerContext, null);
+
+ if (parents == null)
+ {
+ // No matches
+ return null;
+ }
+ if (!(parents is FixedBitSet))
+ {
+ throw new InvalidOperationException("parentFilter must return FixedBitSet; got " + parents);
+ }
+
+ return new BlockJoinScorer(this, childScorer, (FixedBitSet)parents, firstChildDoc, ScoreMode, acceptDocs);
+ }
+
+ public override Explanation Explain(AtomicReaderContext context, int doc)
+ {
+ BlockJoinScorer scorer = (BlockJoinScorer)Scorer(context, context.AtomicReader.LiveDocs);
+ if (scorer != null && scorer.Advance(doc) == doc)
+ {
+ return scorer.Explain(context.DocBase);
+ }
+ return new ComplexExplanation(false, 0.0f, "Not a match");
+ }
+
+ public override bool ScoresDocsOutOfOrder()
+ {
+ return false;
+ }
+ }
+
+ internal class BlockJoinScorer : Scorer
+ {
+ private readonly Scorer _childScorer;
+ private readonly FixedBitSet _parentBits;
+ private readonly ScoreMode _scoreMode;
+ private readonly Bits _acceptDocs;
+ private int _parentDocRenamed = -1;
+ private int _prevParentDoc;
+ private float _parentScore;
+ private int _parentFreq;
+ private int _nextChildDoc;
+ private int[] _pendingChildDocs;
+ private float[] _pendingChildScores;
+ private int _childDocUpto;
+
+ public BlockJoinScorer(Weight weight, Scorer childScorer, FixedBitSet parentBits, int firstChildDoc, ScoreMode scoreMode, Bits acceptDocs) : base(weight)
+ {
+ //System.out.println("Q.init firstChildDoc=" + firstChildDoc);
+ _parentBits = parentBits;
+ _childScorer = childScorer;
+ _scoreMode = scoreMode;
+ _acceptDocs = acceptDocs;
+ _nextChildDoc = firstChildDoc;
+ }
+
+ public override ICollection<ChildScorer> Children
+ {
+ get { return Collections.Singleton(new ChildScorer(_childScorer, "BLOCK_JOIN")); }
+ }
+
+ internal virtual int ChildCount
+ {
+ get { return _childDocUpto; }
+ }
+
+ internal virtual int ParentDoc
+ {
+ get { return _parentDocRenamed; }
+ }
+
+ internal virtual int[] SwapChildDocs(int[] other)
+ {
+ int[] ret = _pendingChildDocs;
+ if (other == null)
+ {
+ _pendingChildDocs = new int[5];
+ }
+ else
+ {
+ _pendingChildDocs = other;
+ }
+ return ret;
+ }
+
+ internal virtual float[] SwapChildScores(float[] other)
+ {
+ if (_scoreMode == ScoreMode.None)
+ {
+ throw new InvalidOperationException("ScoreMode is None; you must pass trackScores=false to ToParentBlockJoinCollector");
+ }
+ float[] ret = _pendingChildScores;
+ if (other == null)
+ {
+ _pendingChildScores = new float[5];
+ }
+ else
+ {
+ _pendingChildScores = other;
+ }
+ return ret;
+ }
+
+ public override int NextDoc()
+ {
+ //System.out.println("Q.nextDoc() nextChildDoc=" + nextChildDoc);
+ // Loop until we hit a parentDoc that's accepted
+ while (true)
+ {
+ if (_nextChildDoc == NO_MORE_DOCS)
+ {
+ //System.out.println(" end");
+ return _parentDocRenamed = NO_MORE_DOCS;
+ }
+
+ // Gather all children sharing the same parent as
+ // nextChildDoc
+
+ _parentDocRenamed = _parentBits.NextSetBit(_nextChildDoc);
+
+ // Parent & child docs are supposed to be
+ // orthogonal:
+ if (_nextChildDoc == _parentDocRenamed)
+ {
+ throw new InvalidOperationException("child query must only match non-parent docs, but parent docID=" + _nextChildDoc + " matched childScorer=" + _childScorer.GetType());
+ }
+
+ //System.out.println(" parentDoc=" + parentDoc);
+ Debug.Assert(_parentDocRenamed != -1);
+
+ //System.out.println(" nextChildDoc=" + nextChildDoc);
+ if (_acceptDocs != null && !_acceptDocs.Get(_parentDocRenamed))
+ {
+ // Parent doc not accepted; skip child docs until
+ // we hit a new parent doc:
+ do
+ {
+ _nextChildDoc = _childScorer.NextDoc();
+ } while (_nextChildDoc < _parentDocRenamed);
+
+ // Parent & child docs are supposed to be
+ // orthogonal:
+ if (_nextChildDoc == _parentDocRenamed)
+ {
+ throw new InvalidOperationException("child query must only match non-parent docs, but parent docID=" + _nextChildDoc + " matched childScorer=" + _childScorer.GetType());
+ }
+
+ continue;
+ }
+
+ float totalScore = 0;
+ float maxScore = float.NegativeInfinity;
+
+ _childDocUpto = 0;
+ _parentFreq = 0;
+ do
+ {
+ //System.out.println(" c=" + nextChildDoc);
+ if (_pendingChildDocs != null && _pendingChildDocs.Length == _childDocUpto)
+ {
+ _pendingChildDocs = ArrayUtil.Grow(_pendingChildDocs);
+ }
+ if (_pendingChildScores != null && _scoreMode != ScoreMode.None && _pendingChildScores.Length == _childDocUpto)
+ {
+ _pendingChildScores = ArrayUtil.Grow(_pendingChildScores);
+ }
+ if (_pendingChildDocs != null)
+ {
+ _pendingChildDocs[_childDocUpto] = _nextChildDoc;
+ }
+ if (_scoreMode != ScoreMode.None)
+ {
+ // TODO: specialize this into dedicated classes per-scoreMode
+ float childScore = _childScorer.Score();
+ int childFreq = _childScorer.Freq();
+ if (_pendingChildScores != null)
+ {
+ _pendingChildScores[_childDocUpto] = childScore;
+ }
+ maxScore = Math.Max(childScore, maxScore);
+ totalScore += childScore;
+ _parentFreq += childFreq;
+ }
+ _childDocUpto++;
+ _nextChildDoc = _childScorer.NextDoc();
+ } while (_nextChildDoc < _parentDocRenamed);
+
+ // Parent & child docs are supposed to be
+ // orthogonal:
+ if (_nextChildDoc == _parentDocRenamed)
+ {
+ throw new InvalidOperationException("child query must only match non-parent docs, but parent docID=" + _nextChildDoc + " matched childScorer=" + _childScorer.GetType());
+ }
+
+ switch (_scoreMode)
+ {
+ case ScoreMode.Avg:
+ _parentScore = totalScore / _childDocUpto;
+ break;
+ case ScoreMode.Max:
+ _parentScore = maxScore;
+ break;
+ case ScoreMode.Total:
+ _parentScore = totalScore;
+ break;
+ case ScoreMode.None:
+ break;
+ }
+
+ //System.out.println(" return parentDoc=" + parentDoc + " childDocUpto=" + childDocUpto);
+ return _parentDocRenamed;
+ }
+ }
+
+ public override int DocID()
+ {
+ return _parentDocRenamed;
+ }
+
+ //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+ //ORIGINAL LINE: @Override public float score() throws java.io.IOException
+ public override float Score()
+ {
+ return _parentScore;
+ }
+
+ public override int Freq()
+ {
+ return _parentFreq;
+ }
+
+ public override int Advance(int parentTarget)
+ {
+
+ //System.out.println("Q.advance parentTarget=" + parentTarget);
+ if (parentTarget == NO_MORE_DOCS)
+ {
+ return _parentDocRenamed = NO_MORE_DOCS;
+ }
+
+ if (parentTarget == 0)
+ {
+ // Callers should only be passing in a docID from
+ // the parent space, so this means this parent
+ // has no children (it got docID 0), so it cannot
+ // possibly match. We must handle this case
+ // separately otherwise we pass invalid -1 to
+ // prevSetBit below:
+ return NextDoc();
+ }
+
+ _prevParentDoc = _parentBits.PrevSetBit(parentTarget - 1);
+
+ //System.out.println(" rolled back to prevParentDoc=" + prevParentDoc + " vs parentDoc=" + parentDoc);
+ Debug.Assert(_prevParentDoc >= _parentDocRenamed);
+ if (_prevParentDoc > _nextChildDoc)
+ {
+ _nextChildDoc = _childScorer.Advance(_prevParentDoc);
+ // System.out.println(" childScorer advanced to child docID=" + nextChildDoc);
+ //} else {
+ //System.out.println(" skip childScorer advance");
+ }
+
+ // Parent & child docs are supposed to be orthogonal:
+ if (_nextChildDoc == _prevParentDoc)
+ {
+ throw new InvalidOperationException("child query must only match non-parent docs, but parent docID=" + _nextChildDoc + " matched childScorer=" + _childScorer.GetType());
+ }
+
+ int nd = NextDoc();
+ //System.out.println(" return nextParentDoc=" + nd);
+ return nd;
+ }
+
+ public virtual Explanation Explain(int docBase)
+ {
+ int start = docBase + _prevParentDoc + 1; // +1 b/c prevParentDoc is previous parent doc
+ int end = docBase + _parentDocRenamed - 1; // -1 b/c parentDoc is parent doc
+ return new ComplexExplanation(true, Score(), string.Format("Score based on child doc range from {0} to {1}", start, end));
+ }
+
+ public override long Cost()
+ {
+ return _childScorer.Cost();
+ }
+
+ /// <summary>
+ /// Instructs this scorer to keep track of the child docIds and score ids for retrieval purposes.
+ /// </summary>
+ public virtual void TrackPendingChildHits()
+ {
+ _pendingChildDocs = new int[5];
+ if (_scoreMode != ScoreMode.None)
+ {
+ _pendingChildScores = new float[5];
+ }
+ }
+ }
+
+ public override void ExtractTerms(ISet<Term> terms)
+ {
+ _childQuery.ExtractTerms(terms);
+ }
+
+ public override Query Rewrite(IndexReader reader)
+ {
+ Query childRewrite = _childQuery.Rewrite(reader);
+ if (childRewrite != _childQuery)
+ {
+ Query rewritten = new ToParentBlockJoinQuery(_origChildQuery, childRewrite, _parentsFilter, _scoreMode);
+ rewritten.Boost = Boost;
+ return rewritten;
+ }
+ return this;
+ }
+
+ public override string ToString(string field)
+ {
+ return "ToParentBlockJoinQuery (" + _childQuery + ")";
+ }
+
+ protected bool Equals(ToParentBlockJoinQuery other)
+ {
+ return base.Equals(other) &&
+ Equals(_parentsFilter, other._parentsFilter) &&
+ _scoreMode == other._scoreMode &&
+ Equals(_origChildQuery, other._origChildQuery);
+ }
+
+ public override bool Equals(object obj)
+ {
+ if (ReferenceEquals(null, obj)) return false;
+ if (ReferenceEquals(this, obj)) return true;
+ if (obj.GetType() != GetType()) return false;
+ return Equals((ToParentBlockJoinQuery) obj);
+ }
+
+ public override int GetHashCode()
+ {
+ unchecked
+ {
+ int hashCode = base.GetHashCode();
+ hashCode = (hashCode*397) ^ (_parentsFilter != null ? _parentsFilter.GetHashCode() : 0);
+ hashCode = (hashCode*397) ^ (int) _scoreMode;
+ hashCode = (hashCode*397) ^ (_origChildQuery != null ? _origChildQuery.GetHashCode() : 0);
+ return hashCode;
+ }
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/src/Lucene.Net.Join/ToParentBlockJoinSortField.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Join/ToParentBlockJoinSortField.cs b/src/Lucene.Net.Join/ToParentBlockJoinSortField.cs
new file mode 100644
index 0000000..aa2a3b6
--- /dev/null
+++ b/src/Lucene.Net.Join/ToParentBlockJoinSortField.cs
@@ -0,0 +1,78 @@
+using System;
+using Lucene.Net.Search;
+
+namespace Lucene.Net.Join
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// A special sort field that allows sorting parent docs based on nested / child level fields.
+ /// Based on the sort order it either takes the document with the lowest or highest field value into account.
+ ///
+ /// @lucene.experimental
+ /// </summary>
+ public class ToParentBlockJoinSortField : SortField
+ {
+ private readonly bool Order;
+ private readonly Filter ParentFilter;
+ private readonly Filter ChildFilter;
+
+ /// <summary>
+ /// Create ToParentBlockJoinSortField. The parent document ordering is based on child document ordering (reverse).
+ /// </summary>
+ /// <param name="field"> The sort field on the nested / child level. </param>
+ /// <param name="type"> The sort type on the nested / child level. </param>
+ /// <param name="reverse"> Whether natural order should be reversed on the nested / child level. </param>
+ /// <param name="parentFilter"> Filter that identifies the parent documents. </param>
+ /// <param name="childFilter"> Filter that defines which child documents participates in sorting. </param>
+ public ToParentBlockJoinSortField(string field, Type_e type, bool reverse, Filter parentFilter, Filter childFilter) : base(field, type, reverse)
+ {
+ Order = reverse;
+ ParentFilter = parentFilter;
+ ChildFilter = childFilter;
+ }
+
+ /// <summary>
+ /// Create ToParentBlockJoinSortField.
+ /// </summary>
+ /// <param name="field"> The sort field on the nested / child level. </param>
+ /// <param name="type"> The sort type on the nested / child level. </param>
+ /// <param name="reverse"> Whether natural order should be reversed on the nested / child document level. </param>
+ /// <param name="order"> Whether natural order should be reversed on the parent level. </param>
+ /// <param name="parentFilter"> Filter that identifies the parent documents. </param>
+ /// <param name="childFilter"> Filter that defines which child documents participates in sorting. </param>
+ public ToParentBlockJoinSortField(string field, Type_e type, bool reverse, bool order, Filter parentFilter, Filter childFilter)
+ : base(field, type, reverse)
+ {
+ Order = order;
+ ParentFilter = parentFilter;
+ ChildFilter = childFilter;
+ }
+
+ public override FieldComparator GetComparator(int numHits, int sortPos)
+ {
+ var wrappedFieldComparator = base.GetComparator(numHits + 1, sortPos);
+ if (Order)
+ {
+ return new ToParentBlockJoinFieldComparator.Highest(wrappedFieldComparator, ParentFilter, ChildFilter, numHits);
+ }
+
+ return new ToParentBlockJoinFieldComparator.Lowest(wrappedFieldComparator, ParentFilter, ChildFilter, numHits);
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/src/Lucene.Net.TestFramework/Util/LuceneTestCase.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.TestFramework/Util/LuceneTestCase.cs b/src/Lucene.Net.TestFramework/Util/LuceneTestCase.cs
index 9e2879c..ea68c2f 100644
--- a/src/Lucene.Net.TestFramework/Util/LuceneTestCase.cs
+++ b/src/Lucene.Net.TestFramework/Util/LuceneTestCase.cs
@@ -607,7 +607,7 @@ namespace Lucene.Net.Util
/// </summary>
public static Random Random()
{
- return _random ?? (_random = new Random( /* LUCENENET TODO seed */));
+ return _random ?? (_random = new Random(1));
//return RandomizedContext.Current.Random;
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/src/Lucene.Net.Tests.Join/Lucene.Net.Tests.Join.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Join/Lucene.Net.Tests.Join.csproj b/src/Lucene.Net.Tests.Join/Lucene.Net.Tests.Join.csproj
new file mode 100644
index 0000000..d6cd6d1
--- /dev/null
+++ b/src/Lucene.Net.Tests.Join/Lucene.Net.Tests.Join.csproj
@@ -0,0 +1,86 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
+ <PropertyGroup>
+ <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+ <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+ <ProjectGuid>{4C1B794F-8158-45E6-85B3-2C46569BEBC2}</ProjectGuid>
+ <OutputType>Library</OutputType>
+ <AppDesignerFolder>Properties</AppDesignerFolder>
+ <RootNamespace>Lucene.Net.Tests.Join</RootNamespace>
+ <AssemblyName>Lucene.Net.Tests.Join</AssemblyName>
+ <TargetFrameworkVersion>v4.5.1</TargetFrameworkVersion>
+ <FileAlignment>512</FileAlignment>
+ </PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
+ <DebugSymbols>true</DebugSymbols>
+ <DebugType>full</DebugType>
+ <Optimize>false</Optimize>
+ <OutputPath>bin\Debug\</OutputPath>
+ <DefineConstants>DEBUG;TRACE</DefineConstants>
+ <ErrorReport>prompt</ErrorReport>
+ <WarningLevel>4</WarningLevel>
+ </PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
+ <DebugType>pdbonly</DebugType>
+ <Optimize>true</Optimize>
+ <OutputPath>bin\Release\</OutputPath>
+ <DefineConstants>TRACE</DefineConstants>
+ <ErrorReport>prompt</ErrorReport>
+ <WarningLevel>4</WarningLevel>
+ </PropertyGroup>
+ <ItemGroup>
+ <Reference Include="Apache.NMS, Version=1.6.0.3083, Culture=neutral, PublicKeyToken=82756feee3957618, processorArchitecture=MSIL">
+ <HintPath>..\packages\Apache.NMS.1.6.0.3083\lib\net40\Apache.NMS.dll</HintPath>
+ <Private>True</Private>
+ </Reference>
+ <Reference Include="nunit.framework, Version=2.6.3.13283, Culture=neutral, PublicKeyToken=96d09a1eb7f44a77, processorArchitecture=MSIL">
+ <HintPath>..\packages\NUnit.2.6.3\lib\nunit.framework.dll</HintPath>
+ <Private>True</Private>
+ </Reference>
+ <Reference Include="System" />
+ <Reference Include="System.Core" />
+ <Reference Include="System.Xml.Linq" />
+ <Reference Include="System.Data.DataSetExtensions" />
+ <Reference Include="Microsoft.CSharp" />
+ <Reference Include="System.Data" />
+ <Reference Include="System.Net.Http" />
+ <Reference Include="System.Xml" />
+ </ItemGroup>
+ <ItemGroup>
+ <Compile Include="Properties\AssemblyInfo.cs" />
+ <Compile Include="TestBlockJoin.cs" />
+ <Compile Include="TestBlockJoinSorting.cs" />
+ <Compile Include="TestBlockJoinValidation.cs" />
+ <Compile Include="TestJoinUtil.cs" />
+ </ItemGroup>
+ <ItemGroup>
+ <ProjectReference Include="..\Lucene.Net.Grouping\Lucene.Net.Grouping.csproj">
+ <Project>{02BAB603-067D-48B1-AEDD-316849652568}</Project>
+ <Name>Lucene.Net.Grouping</Name>
+ </ProjectReference>
+ <ProjectReference Include="..\Lucene.Net.Join\Lucene.Net.Join.csproj">
+ <Project>{e8a339c7-fcf6-4a72-8586-56d8961d7b99}</Project>
+ <Name>Lucene.Net.Join</Name>
+ </ProjectReference>
+ <ProjectReference Include="..\Lucene.Net.Core\Lucene.Net.csproj">
+ <Project>{5d4ad9be-1ffb-41ab-9943-25737971bf57}</Project>
+ <Name>Lucene.Net</Name>
+ </ProjectReference>
+ <ProjectReference Include="..\Lucene.Net.TestFramework\Lucene.Net.TestFramework.csproj">
+ <Project>{b2c0d749-ce34-4f62-a15e-00cb2ff5ddb3}</Project>
+ <Name>Lucene.Net.TestFramework</Name>
+ </ProjectReference>
+ </ItemGroup>
+ <ItemGroup>
+ <None Include="packages.config" />
+ </ItemGroup>
+ <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
+ <!-- To modify your build process, add your task inside one of the targets below and uncomment it.
+ Other similar extension points exist, see Microsoft.Common.targets.
+ <Target Name="BeforeBuild">
+ </Target>
+ <Target Name="AfterBuild">
+ </Target>
+ -->
+</Project>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/src/Lucene.Net.Tests.Join/Properties/AssemblyInfo.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Join/Properties/AssemblyInfo.cs b/src/Lucene.Net.Tests.Join/Properties/AssemblyInfo.cs
new file mode 100644
index 0000000..f94805a
--- /dev/null
+++ b/src/Lucene.Net.Tests.Join/Properties/AssemblyInfo.cs
@@ -0,0 +1,36 @@
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+// General Information about an assembly is controlled through the following
+// set of attributes. Change these attribute values to modify the information
+// associated with an assembly.
+[assembly: AssemblyTitle("Lucene.Net.Tests.Join")]
+[assembly: AssemblyDescription("")]
+[assembly: AssemblyConfiguration("")]
+[assembly: AssemblyCompany("")]
+[assembly: AssemblyProduct("Lucene.Net.Tests.Join")]
+[assembly: AssemblyCopyright("Copyright © 2015")]
+[assembly: AssemblyTrademark("")]
+[assembly: AssemblyCulture("")]
+
+// Setting ComVisible to false makes the types in this assembly not visible
+// to COM components. If you need to access a type in this assembly from
+// COM, set the ComVisible attribute to true on that type.
+[assembly: ComVisible(false)]
+
+// The following GUID is for the ID of the typelib if this project is exposed to COM
+[assembly: Guid("4c1b794f-8158-45e6-85b3-2c46569bebc2")]
+
+// Version information for an assembly consists of the following four values:
+//
+// Major Version
+// Minor Version
+// Build Number
+// Revision
+//
+// You can specify all the values or you can default the Build and Revision Numbers
+// by using the '*' as shown below:
+// [assembly: AssemblyVersion("1.0.*")]
+[assembly: AssemblyVersion("1.0.0.0")]
+[assembly: AssemblyFileVersion("1.0.0.0")]