You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ni...@apache.org on 2016/11/10 11:33:42 UTC
[31/58] [abbrv] lucenenet git commit: WIP on Grouping
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9d72bcb3/src/Lucene.Net.Grouping/Term/TermAllGroupHeadsCollector.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Grouping/Term/TermAllGroupHeadsCollector.cs b/src/Lucene.Net.Grouping/Term/TermAllGroupHeadsCollector.cs
new file mode 100644
index 0000000..fbbec34
--- /dev/null
+++ b/src/Lucene.Net.Grouping/Term/TermAllGroupHeadsCollector.cs
@@ -0,0 +1,807 @@
+\ufeffusing Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Lucene.Net.Search.Grouping.Terms
+{
+ /// <summary>
+ /// A base implementation of <see cref="AbstractAllGroupHeadsCollector{GH}"/> for retrieving the most relevant groups when grouping
+ /// on a string based group field. More specifically this all concrete implementations of this base implementation
+ /// use <see cref="Index.SortedDocValues"/>.
+ ///
+ /// @lucene.experimental
+ /// </summary>
+ /// <typeparam name="GH"></typeparam>
+ public abstract class TermAllGroupHeadsCollector<GH> : AbstractAllGroupHeadsCollector<GH> where GH : AbstractGroupHead /*AbstractAllGroupHeadsCollector<GH>.GroupHead*/
+ {
+ internal readonly string groupField;
+ internal readonly BytesRef scratchBytesRef = new BytesRef();
+
+ internal SortedDocValues groupIndex;
+ internal AtomicReaderContext readerContext;
+
+ protected TermAllGroupHeadsCollector(string groupField, int numberOfSorts)
+ : base(numberOfSorts)
+ {
+ this.groupField = groupField;
+ }
+ }
+
+ public class TermAllGroupHeadsCollector
+ {
+ private static readonly int DEFAULT_INITIAL_SIZE = 128;
+
+ // Disallow creation
+ private TermAllGroupHeadsCollector() { }
+
+ /**
+ * Creates an <code>AbstractAllGroupHeadsCollector</code> instance based on the supplied arguments.
+ * This factory method decides with implementation is best suited.
+ *
+ * Delegates to {@link #create(String, org.apache.lucene.search.Sort, int)} with an initialSize of 128.
+ *
+ * @param groupField The field to group by
+ * @param sortWithinGroup The sort within each group
+ * @return an <code>AbstractAllGroupHeadsCollector</code> instance based on the supplied arguments
+ */
+ public static AbstractAllGroupHeadsCollector Create(string groupField, Sort sortWithinGroup)
+ {
+ return Create(groupField, sortWithinGroup, DEFAULT_INITIAL_SIZE);
+ }
+
+ /**
+ * Creates an <code>AbstractAllGroupHeadsCollector</code> instance based on the supplied arguments.
+ * This factory method decides with implementation is best suited.
+ *
+ * @param groupField The field to group by
+ * @param sortWithinGroup The sort within each group
+ * @param initialSize The initial allocation size of the internal int set and group list which should roughly match
+ * the total number of expected unique groups. Be aware that the heap usage is
+ * 4 bytes * initialSize.
+ * @return an <code>AbstractAllGroupHeadsCollector</code> instance based on the supplied arguments
+ */
+ public static AbstractAllGroupHeadsCollector Create(string groupField, Sort sortWithinGroup, int initialSize)
+ {
+ bool sortAllScore = true;
+ bool sortAllFieldValue = true;
+
+ foreach (SortField sortField in sortWithinGroup.GetSort())
+ {
+ if (sortField.Type == SortField.Type_e.SCORE)
+ {
+ sortAllFieldValue = false;
+ }
+ else if (NeedGeneralImpl(sortField))
+ {
+ return new GeneralAllGroupHeadsCollector(groupField, sortWithinGroup);
+ }
+ else
+ {
+ sortAllScore = false;
+ }
+ }
+
+ if (sortAllScore)
+ {
+ return new ScoreAllGroupHeadsCollector(groupField, sortWithinGroup, initialSize);
+ }
+ else if (sortAllFieldValue)
+ {
+ return new OrdAllGroupHeadsCollector(groupField, sortWithinGroup, initialSize);
+ }
+ else
+ {
+ return new OrdScoreAllGroupHeadsCollector(groupField, sortWithinGroup, initialSize);
+ }
+ }
+
+ // Returns when a sort field needs the general impl.
+ private static bool NeedGeneralImpl(SortField sortField)
+ {
+ SortField.Type_e sortType = sortField.Type;
+ // Note (MvG): We can also make an optimized impl when sorting is SortField.DOC
+ return sortType != SortField.Type_e.STRING_VAL && sortType != SortField.Type_e.STRING && sortType != SortField.Type_e.SCORE;
+ }
+ }
+
+ // A general impl that works for any group sort.
+ internal class GeneralAllGroupHeadsCollector : TermAllGroupHeadsCollector<GeneralAllGroupHeadsCollector.GroupHead>
+ {
+
+ private readonly Sort sortWithinGroup;
+ private readonly IDictionary<BytesRef, GroupHead> groups;
+
+ internal Scorer scorer;
+
+ internal GeneralAllGroupHeadsCollector(string groupField, Sort sortWithinGroup)
+ : base(groupField, sortWithinGroup.GetSort().Length)
+ {
+ this.sortWithinGroup = sortWithinGroup;
+ groups = new HashMap<BytesRef, GroupHead>();
+
+ SortField[] sortFields = sortWithinGroup.GetSort();
+ for (int i = 0; i < sortFields.Length; i++)
+ {
+ reversed[i] = sortFields[i].Reverse ? -1 : 1;
+ }
+ }
+
+ protected override void RetrieveGroupHeadAndAddIfNotExist(int doc)
+ {
+ int ord = groupIndex.GetOrd(doc);
+ BytesRef groupValue;
+ if (ord == -1)
+ {
+ groupValue = null;
+ }
+ else
+ {
+ groupIndex.LookupOrd(ord, scratchBytesRef);
+ groupValue = scratchBytesRef;
+ }
+ GroupHead groupHead;
+ if (!groups.TryGetValue(groupValue, out groupHead))
+ {
+ groupHead = new GroupHead(this, groupValue, sortWithinGroup, doc);
+ groups[groupValue == null ? null : BytesRef.DeepCopyOf(groupValue)] = groupHead;
+ temporalResult.stop = true;
+ }
+ else
+ {
+ temporalResult.stop = false;
+ }
+ temporalResult.groupHead = groupHead;
+ }
+
+ protected override ICollection<GroupHead> GetCollectedGroupHeads()
+ {
+ return groups.Values;
+ }
+
+ public override AtomicReaderContext NextReader
+ {
+ set
+ {
+ this.readerContext = value;
+ groupIndex = FieldCache.DEFAULT.GetTermsIndex(value.AtomicReader, groupField);
+
+ foreach (GroupHead groupHead in groups.Values)
+ {
+ for (int i = 0; i < groupHead.comparators.Length; i++)
+ {
+ groupHead.comparators[i] = groupHead.comparators[i].SetNextReader(value);
+ }
+ }
+ }
+ }
+
+ public override Scorer Scorer
+ {
+ set
+ {
+ this.scorer = value;
+ foreach (GroupHead groupHead in groups.Values)
+ {
+ foreach (FieldComparator comparator in groupHead.comparators)
+ {
+ comparator.Scorer = value;
+ }
+ }
+ }
+ }
+
+ internal class GroupHead : AbstractGroupHead /*AbstractAllGroupHeadsCollector.GroupHead<BytesRef>*/
+ {
+ private readonly GeneralAllGroupHeadsCollector outerInstance;
+ public readonly BytesRef groupValue;
+
+ internal readonly FieldComparator[] comparators;
+
+ internal GroupHead(GeneralAllGroupHeadsCollector outerInstance, BytesRef groupValue, Sort sort, int doc)
+ : base(doc + outerInstance.readerContext.DocBase)
+ {
+ this.outerInstance = outerInstance;
+ SortField[] sortFields = sort.GetSort();
+ comparators = new FieldComparator[sortFields.Length];
+ for (int i = 0; i < sortFields.Length; i++)
+ {
+ comparators[i] = sortFields[i].GetComparator(1, i).SetNextReader(outerInstance.readerContext);
+ comparators[i].Scorer = outerInstance.scorer;
+ comparators[i].Copy(0, doc);
+ comparators[i].Bottom = 0;
+ }
+ }
+
+ public override int Compare(int compIDX, int doc)
+ {
+ return comparators[compIDX].CompareBottom(doc);
+ }
+
+ public override void UpdateDocHead(int doc)
+ {
+ foreach (FieldComparator comparator in comparators)
+ {
+ comparator.Copy(0, doc);
+ comparator.Bottom = 0;
+ }
+ this.Doc = doc + outerInstance.readerContext.DocBase;
+ }
+ }
+ }
+
+
+ // AbstractAllGroupHeadsCollector optimized for ord fields and scores.
+ internal class OrdScoreAllGroupHeadsCollector : TermAllGroupHeadsCollector<OrdScoreAllGroupHeadsCollector.GroupHead>
+ {
+ //private readonly TermAllGroupHeadsCollector<GH> outerInstance;
+ private readonly SentinelIntSet ordSet;
+ private readonly IList<GroupHead> collectedGroups;
+ private readonly SortField[] fields;
+
+ private SortedDocValues[] sortsIndex;
+ private Scorer scorer;
+ private GroupHead[] segmentGroupHeads;
+
+ internal OrdScoreAllGroupHeadsCollector(/*TermAllGroupHeadsCollector<GH> outerInstance,*/ string groupField, Sort sortWithinGroup, int initialSize)
+ : base(groupField, sortWithinGroup.GetSort().Length)
+ {
+ //this.outerInstance = outerInstance;
+ ordSet = new SentinelIntSet(initialSize, -2);
+ collectedGroups = new List<GroupHead>(initialSize);
+
+ SortField[] sortFields = sortWithinGroup.GetSort();
+ fields = new SortField[sortFields.Length];
+ sortsIndex = new SortedDocValues[sortFields.Length];
+ for (int i = 0; i < sortFields.Length; i++)
+ {
+ reversed[i] = sortFields[i].Reverse ? -1 : 1;
+ fields[i] = sortFields[i];
+ }
+ }
+
+ protected override ICollection<GroupHead> GetCollectedGroupHeads()
+ {
+ return collectedGroups;
+ }
+
+ public override Scorer Scorer
+ {
+ set
+ {
+ this.scorer = value;
+ }
+ }
+
+
+ protected override void RetrieveGroupHeadAndAddIfNotExist(int doc)
+ {
+ int key = groupIndex.GetOrd(doc);
+ GroupHead groupHead;
+ if (!ordSet.Exists(key))
+ {
+ ordSet.Put(key);
+ BytesRef term;
+ if (key == -1)
+ {
+ term = null;
+ }
+ else
+ {
+ term = new BytesRef();
+ groupIndex.LookupOrd(key, term);
+ }
+ groupHead = new GroupHead(this, doc, term);
+ collectedGroups.Add(groupHead);
+ segmentGroupHeads[key + 1] = groupHead;
+ temporalResult.stop = true;
+ }
+ else
+ {
+ temporalResult.stop = false;
+ groupHead = segmentGroupHeads[key + 1];
+ }
+ temporalResult.groupHead = groupHead;
+ }
+
+ public override AtomicReaderContext NextReader
+ {
+ set
+ {
+ this.readerContext = value;
+ groupIndex = FieldCache.DEFAULT.GetTermsIndex(value.AtomicReader, groupField);
+ for (int i = 0; i < fields.Length; i++)
+ {
+ if (fields[i].Type == SortField.Type_e.SCORE)
+ {
+ continue;
+ }
+
+ sortsIndex[i] = FieldCache.DEFAULT.GetTermsIndex(value.AtomicReader, fields[i].Field);
+ }
+
+ // Clear ordSet and fill it with previous encountered groups that can occur in the current segment.
+ ordSet.Clear();
+ segmentGroupHeads = new GroupHead[groupIndex.ValueCount + 1];
+ foreach (GroupHead collectedGroup in collectedGroups)
+ {
+ int ord;
+ if (collectedGroup.groupValue == null)
+ {
+ ord = -1;
+ }
+ else
+ {
+ ord = groupIndex.LookupTerm(collectedGroup.groupValue);
+ }
+ if (collectedGroup.groupValue == null || ord >= 0)
+ {
+ ordSet.Put(ord);
+ segmentGroupHeads[ord + 1] = collectedGroup;
+
+ for (int i = 0; i < sortsIndex.Length; i++)
+ {
+ if (fields[i].Type == SortField.Type_e.SCORE)
+ {
+ continue;
+ }
+ int sortOrd;
+ if (collectedGroup.sortValues[i] == null)
+ {
+ sortOrd = -1;
+ }
+ else
+ {
+ sortOrd = sortsIndex[i].LookupTerm(collectedGroup.sortValues[i]);
+ }
+ collectedGroup.sortOrds[i] = sortOrd;
+ }
+ }
+ }
+ }
+
+ }
+
+ internal class GroupHead : AbstractGroupHead /*AbstractAllGroupHeadsCollector.GroupHead<BytesRef>*/
+ {
+ private readonly OrdScoreAllGroupHeadsCollector outerInstance;
+ public readonly BytesRef groupValue;
+
+ internal BytesRef[] sortValues;
+ internal int[] sortOrds;
+ internal float[] scores;
+
+ internal GroupHead(OrdScoreAllGroupHeadsCollector outerInstance, int doc, BytesRef groupValue)
+ : base(doc + outerInstance.readerContext.DocBase)
+ {
+ this.outerInstance = outerInstance;
+ this.groupValue = groupValue;
+
+ sortValues = new BytesRef[outerInstance.sortsIndex.Length];
+ sortOrds = new int[outerInstance.sortsIndex.Length];
+ scores = new float[outerInstance.sortsIndex.Length];
+ for (int i = 0; i < outerInstance.sortsIndex.Length; i++)
+ {
+ if (outerInstance.fields[i].Type == SortField.Type_e.SCORE)
+ {
+ scores[i] = outerInstance.scorer.Score();
+ }
+ else
+ {
+ sortOrds[i] = outerInstance.sortsIndex[i].GetOrd(doc);
+ sortValues[i] = new BytesRef();
+ if (sortOrds[i] != -1)
+ {
+ outerInstance.sortsIndex[i].Get(doc, sortValues[i]);
+ }
+ }
+ }
+ }
+
+ public override int Compare(int compIDX, int doc)
+ {
+ if (outerInstance.fields[compIDX].Type == SortField.Type_e.SCORE)
+ {
+ float score = outerInstance.scorer.Score();
+ if (scores[compIDX] < score)
+ {
+ return 1;
+ }
+ else if (scores[compIDX] > score)
+ {
+ return -1;
+ }
+ return 0;
+ }
+ else
+ {
+ if (sortOrds[compIDX] < 0)
+ {
+ // The current segment doesn't contain the sort value we encountered before. Therefore the ord is negative.
+ if (outerInstance.sortsIndex[compIDX].GetOrd(doc) == -1)
+ {
+ outerInstance.scratchBytesRef.Length = 0;
+ }
+ else
+ {
+ outerInstance.sortsIndex[compIDX].Get(doc, outerInstance.scratchBytesRef);
+ }
+ return sortValues[compIDX].CompareTo(outerInstance.scratchBytesRef);
+ }
+ else
+ {
+ return sortOrds[compIDX] - outerInstance.sortsIndex[compIDX].GetOrd(doc);
+ }
+ }
+ }
+
+ public override void UpdateDocHead(int doc)
+ {
+ for (int i = 0; i < outerInstance.sortsIndex.Length; i++)
+ {
+ if (outerInstance.fields[i].Type == Search.SortField.Type_e.SCORE)
+ {
+ scores[i] = outerInstance.scorer.Score();
+ }
+ else
+ {
+ sortOrds[i] = outerInstance.sortsIndex[i].GetOrd(doc);
+ if (sortOrds[i] == -1)
+ {
+ sortValues[i].Length = 0;
+ }
+ else
+ {
+ outerInstance.sortsIndex[i].Get(doc, sortValues[i]);
+ }
+ }
+ }
+ this.Doc = doc + outerInstance.readerContext.DocBase;
+ }
+ }
+ }
+
+
+ // AbstractAllGroupHeadsCollector optimized for ord fields.
+ internal class OrdAllGroupHeadsCollector : TermAllGroupHeadsCollector<OrdAllGroupHeadsCollector.GroupHead>
+ {
+ //private readonly TermAllGroupHeadsCollector<GH> outerInstance;
+ private readonly SentinelIntSet ordSet;
+ private readonly IList<GroupHead> collectedGroups;
+ private readonly SortField[] fields;
+
+ private SortedDocValues[] sortsIndex;
+ private GroupHead[] segmentGroupHeads;
+
+ internal OrdAllGroupHeadsCollector(/*TermAllGroupHeadsCollector<GH> outerInstance,*/ string groupField, Sort sortWithinGroup, int initialSize)
+ : base(groupField, sortWithinGroup.GetSort().Length)
+ {
+ //this.outerInstance = outerInstance;
+ ordSet = new SentinelIntSet(initialSize, -2);
+ collectedGroups = new List<GroupHead>(initialSize);
+
+ SortField[] sortFields = sortWithinGroup.GetSort();
+ fields = new SortField[sortFields.Length];
+ sortsIndex = new SortedDocValues[sortFields.Length];
+ for (int i = 0; i < sortFields.Length; i++)
+ {
+ reversed[i] = sortFields[i].Reverse ? -1 : 1;
+ fields[i] = sortFields[i];
+ }
+ }
+
+ protected override ICollection<GroupHead> GetCollectedGroupHeads()
+ {
+ return collectedGroups;
+ }
+
+ public override Scorer Scorer
+ {
+ set
+ {
+ }
+ }
+
+
+ protected override void RetrieveGroupHeadAndAddIfNotExist(int doc)
+ {
+ int key = groupIndex.GetOrd(doc);
+ GroupHead groupHead;
+ if (!ordSet.Exists(key))
+ {
+ ordSet.Put(key);
+ BytesRef term;
+ if (key == -1)
+ {
+ term = null;
+ }
+ else
+ {
+ term = new BytesRef();
+ groupIndex.LookupOrd(key, term);
+ }
+ groupHead = new GroupHead(this, doc, term);
+ collectedGroups.Add(groupHead);
+ segmentGroupHeads[key + 1] = groupHead;
+ temporalResult.stop = true;
+ }
+ else
+ {
+ temporalResult.stop = false;
+ groupHead = segmentGroupHeads[key + 1];
+ }
+ temporalResult.groupHead = groupHead;
+ }
+
+ public override AtomicReaderContext NextReader
+ {
+ set
+ {
+ this.readerContext = value;
+ groupIndex = FieldCache.DEFAULT.GetTermsIndex(value.AtomicReader, groupField);
+ for (int i = 0; i < fields.Length; i++)
+ {
+ sortsIndex[i] = FieldCache.DEFAULT.GetTermsIndex(value.AtomicReader, fields[i].Field);
+ }
+
+ // Clear ordSet and fill it with previous encountered groups that can occur in the current segment.
+ ordSet.Clear();
+ segmentGroupHeads = new GroupHead[groupIndex.ValueCount + 1];
+ foreach (GroupHead collectedGroup in collectedGroups)
+ {
+ int groupOrd;
+ if (collectedGroup.groupValue == null)
+ {
+ groupOrd = -1;
+ }
+ else
+ {
+ groupOrd = groupIndex.LookupTerm(collectedGroup.groupValue);
+ }
+ if (collectedGroup.groupValue == null || groupOrd >= 0)
+ {
+ ordSet.Put(groupOrd);
+ segmentGroupHeads[groupOrd + 1] = collectedGroup;
+
+ for (int i = 0; i < sortsIndex.Length; i++)
+ {
+ int sortOrd;
+ if (collectedGroup.sortOrds[i] == -1)
+ {
+ sortOrd = -1;
+ }
+ else
+ {
+ sortOrd = sortsIndex[i].LookupTerm(collectedGroup.sortValues[i]);
+ }
+ collectedGroup.sortOrds[i] = sortOrd;
+ }
+ }
+ }
+ }
+ }
+
+ internal class GroupHead : AbstractGroupHead /* AbstractAllGroupHeadsCollector.GroupHead<BytesRef>*/
+ {
+ private readonly OrdAllGroupHeadsCollector outerInstance;
+ public readonly BytesRef groupValue;
+ internal BytesRef[] sortValues;
+ internal int[] sortOrds;
+
+ internal GroupHead(OrdAllGroupHeadsCollector outerInstance, int doc, BytesRef groupValue)
+ : base(doc + outerInstance.readerContext.DocBase)
+ {
+ this.outerInstance = outerInstance;
+ this.groupValue = groupValue;
+
+ sortValues = new BytesRef[outerInstance.sortsIndex.Length];
+ sortOrds = new int[outerInstance.sortsIndex.Length];
+ for (int i = 0; i < outerInstance.sortsIndex.Length; i++)
+ {
+ sortOrds[i] = outerInstance.sortsIndex[i].GetOrd(doc);
+ sortValues[i] = new BytesRef();
+ if (sortOrds[i] != -1)
+ {
+ outerInstance.sortsIndex[i].Get(doc, sortValues[i]);
+ }
+ }
+ }
+
+ public override int Compare(int compIDX, int doc)
+ {
+ if (sortOrds[compIDX] < 0)
+ {
+ // The current segment doesn't contain the sort value we encountered before. Therefore the ord is negative.
+ if (outerInstance.sortsIndex[compIDX].GetOrd(doc) == -1)
+ {
+ outerInstance.scratchBytesRef.Length = 0;
+ }
+ else
+ {
+ outerInstance.sortsIndex[compIDX].Get(doc, outerInstance.scratchBytesRef);
+ }
+ return sortValues[compIDX].CompareTo(outerInstance.scratchBytesRef);
+ }
+ else
+ {
+ return sortOrds[compIDX] - outerInstance.sortsIndex[compIDX].GetOrd(doc);
+ }
+ }
+
+ public override void UpdateDocHead(int doc)
+ {
+ for (int i = 0; i < outerInstance.sortsIndex.Length; i++)
+ {
+ sortOrds[i] = outerInstance.sortsIndex[i].GetOrd(doc);
+ if (sortOrds[i] == -1)
+ {
+ sortValues[i].Length = 0;
+ }
+ else
+ {
+ outerInstance.sortsIndex[i].LookupOrd(sortOrds[i], sortValues[i]);
+ }
+ }
+ this.Doc = doc + outerInstance.readerContext.DocBase;
+ }
+
+ }
+
+ }
+
+
+ // AbstractAllGroupHeadsCollector optimized for scores.
+ internal class ScoreAllGroupHeadsCollector : TermAllGroupHeadsCollector<ScoreAllGroupHeadsCollector.GroupHead>
+ {
+ //private readonly TermAllGroupHeadsCollector<GH> outerInstance;
+ private readonly SentinelIntSet ordSet;
+ private readonly IList<GroupHead> collectedGroups;
+ private readonly SortField[] fields;
+
+ private Scorer scorer;
+ private GroupHead[] segmentGroupHeads;
+
+ internal ScoreAllGroupHeadsCollector(/*TermAllGroupHeadsCollector<GH> outerInstance,*/ string groupField, Sort sortWithinGroup, int initialSize)
+ : base(groupField, sortWithinGroup.GetSort().Length)
+ {
+ //this.outerInstance = outerInstance;
+ ordSet = new SentinelIntSet(initialSize, -2);
+ collectedGroups = new List<GroupHead>(initialSize);
+
+ SortField[] sortFields = sortWithinGroup.GetSort();
+ fields = new SortField[sortFields.Length];
+ for (int i = 0; i < sortFields.Length; i++)
+ {
+ reversed[i] = sortFields[i].Reverse ? -1 : 1;
+ fields[i] = sortFields[i];
+ }
+ }
+
+ protected override ICollection<GroupHead> GetCollectedGroupHeads()
+ {
+ return collectedGroups;
+ }
+
+ public override Scorer Scorer
+ {
+ set
+ {
+ this.scorer = value;
+ }
+ }
+
+ protected override void RetrieveGroupHeadAndAddIfNotExist(int doc)
+ {
+ int key = groupIndex.GetOrd(doc);
+ GroupHead groupHead;
+ if (!ordSet.Exists(key))
+ {
+ ordSet.Put(key);
+ BytesRef term;
+ if (key == -1)
+ {
+ term = null;
+ }
+ else
+ {
+ term = new BytesRef();
+ groupIndex.LookupOrd(key, term);
+ }
+ groupHead = new GroupHead(this, doc, term);
+ collectedGroups.Add(groupHead);
+ segmentGroupHeads[key + 1] = groupHead;
+ temporalResult.stop = true;
+ }
+ else
+ {
+ temporalResult.stop = false;
+ groupHead = segmentGroupHeads[key + 1];
+ }
+ temporalResult.groupHead = groupHead;
+ }
+ public override AtomicReaderContext NextReader
+ {
+ set
+ {
+ this.readerContext = value;
+ groupIndex = FieldCache.DEFAULT.GetTermsIndex(value.AtomicReader, groupField);
+
+ // Clear ordSet and fill it with previous encountered groups that can occur in the current segment.
+ ordSet.Clear();
+ segmentGroupHeads = new GroupHead[groupIndex.ValueCount + 1];
+ foreach (GroupHead collectedGroup in collectedGroups)
+ {
+ int ord;
+ if (collectedGroup.groupValue == null)
+ {
+ ord = -1;
+ }
+ else
+ {
+ ord = groupIndex.LookupTerm(collectedGroup.groupValue);
+ }
+ if (collectedGroup.groupValue == null || ord >= 0)
+ {
+ ordSet.Put(ord);
+ segmentGroupHeads[ord + 1] = collectedGroup;
+ }
+ }
+ }
+ }
+
+ internal class GroupHead : AbstractGroupHead /*AbstractAllGroupHeadsCollector.GroupHead<BytesRef>*/
+ {
+ private readonly ScoreAllGroupHeadsCollector outerInstance;
+ public readonly BytesRef groupValue;
+ internal float[] scores;
+
+ internal GroupHead(ScoreAllGroupHeadsCollector outerInstance, int doc, BytesRef groupValue)
+ : base(doc + outerInstance.readerContext.DocBase)
+ {
+ this.outerInstance = outerInstance;
+ this.groupValue = groupValue;
+
+ scores = new float[outerInstance.fields.Length];
+ float score = outerInstance.scorer.Score();
+ for (int i = 0; i < scores.Length; i++)
+ {
+ scores[i] = score;
+ }
+ }
+
+ public override int Compare(int compIDX, int doc)
+ {
+ float score = outerInstance.scorer.Score();
+ if (scores[compIDX] < score)
+ {
+ return 1;
+ }
+ else if (scores[compIDX] > score)
+ {
+ return -1;
+ }
+ return 0;
+ }
+
+ public override void UpdateDocHead(int doc)
+ {
+ float score = outerInstance.scorer.Score();
+ for (int i = 0; i < scores.Length; i++)
+ {
+ scores[i] = score;
+ }
+ this.Doc = doc + outerInstance.readerContext.DocBase;
+ }
+
+ }
+
+
+
+
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9d72bcb3/src/Lucene.Net.Grouping/Term/TermAllGroupsCollector.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Grouping/Term/TermAllGroupsCollector.cs b/src/Lucene.Net.Grouping/Term/TermAllGroupsCollector.cs
new file mode 100644
index 0000000..7693d93
--- /dev/null
+++ b/src/Lucene.Net.Grouping/Term/TermAllGroupsCollector.cs
@@ -0,0 +1,120 @@
+\ufeffusing Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Util;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Lucene.Net.Search.Grouping.Terms
+{
+ /// <summary>
+ /// A collector that collects all groups that match the
+ /// query. Only the group value is collected, and the order
+ /// is undefined. This collector does not determine
+ /// the most relevant document of a group.
+ ///
+ /// <para>
+ /// Implementation detail: an int hash set (SentinelIntSet)
+ /// is used to detect if a group is already added to the
+ /// total count. For each segment the int set is cleared and filled
+ /// with previous counted groups that occur in the new
+ /// segment.
+ /// </para>
+ /// @lucene.experimental
+ /// </summary>
+ public class TermAllGroupsCollector : AbstractAllGroupsCollector<BytesRef>
+ {
+ private static readonly int DEFAULT_INITIAL_SIZE = 128;
+
+ private readonly String groupField;
+ private readonly SentinelIntSet ordSet;
+ private readonly IList<BytesRef> groups;
+
+ private SortedDocValues index;
+
+ /// <summary>
+ /// Expert: Constructs a <see cref="AbstractAllGroupsCollector{BytesRef}"/>
+ /// </summary>
+ /// <param name="groupField">The field to group by</param>
+ /// <param name="initialSize">
+ /// The initial allocation size of the
+ /// internal int set and group list
+ /// which should roughly match the total
+ /// number of expected unique groups. Be aware that the
+ /// heap usage is 4 bytes * initialSize.
+ /// </param>
+ public TermAllGroupsCollector(string groupField, int initialSize)
+ {
+ ordSet = new SentinelIntSet(initialSize, -2);
+ groups = new List<BytesRef>(initialSize);
+ this.groupField = groupField;
+ }
+
+ /// <summary>
+ /// Constructs a <see cref="AbstractAllGroupsCollector{BytesRef}"/>. This sets the
+ /// initial allocation size for the internal int set and group
+ /// list to 128.
+ /// </summary>
+ /// <param name="groupField">The field to group by</param>
+ public TermAllGroupsCollector(string groupField)
+ : this(groupField, DEFAULT_INITIAL_SIZE)
+ {
+ }
+
+ public override void Collect(int doc)
+ {
+ int key = index.GetOrd(doc);
+ if (!ordSet.Exists(key))
+ {
+ ordSet.Put(key);
+ BytesRef term;
+ if (key == -1)
+ {
+ term = null;
+ }
+ else
+ {
+ term = new BytesRef();
+ index.LookupOrd(key, term);
+ }
+ groups.Add(term);
+ }
+ }
+
+ public override ICollection<BytesRef> Groups
+ {
+ get
+ {
+ return groups;
+ }
+ }
+
+ public override AtomicReaderContext NextReader
+ {
+ set
+ {
+ index = FieldCache.DEFAULT.GetTermsIndex(value.AtomicReader, groupField);
+
+ // Clear ordSet and fill it with previous encountered groups that can occur in the current segment.
+ ordSet.Clear();
+ foreach (BytesRef countedGroup in groups)
+ {
+ if (countedGroup == null)
+ {
+ ordSet.Put(-1);
+ }
+ else
+ {
+ int ord = index.LookupTerm(countedGroup);
+ if (ord >= 0)
+ {
+ ordSet.Put(ord);
+ }
+ }
+ }
+ }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9d72bcb3/src/Lucene.Net.Grouping/Term/TermDistinctValuesCollector.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Grouping/Term/TermDistinctValuesCollector.cs b/src/Lucene.Net.Grouping/Term/TermDistinctValuesCollector.cs
new file mode 100644
index 0000000..d6f6bab
--- /dev/null
+++ b/src/Lucene.Net.Grouping/Term/TermDistinctValuesCollector.cs
@@ -0,0 +1,144 @@
+\ufeffusing Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Lucene.Net.Search.Grouping.Terms
+{
+ /// <summary>
+ /// A term based implementation of <see cref="AbstractDistinctValuesCollector{TermDistinctValuesCollector.GroupCount}"/> that relies
+ /// on <see cref="SortedDocValues"/> to count the distinct values per group.
+ ///
+ /// @lucene.experimental
+ /// </summary>
+ public class TermDistinctValuesCollector : AbstractDistinctValuesCollector<TermDistinctValuesCollector.GroupCount>
+ {
+ private readonly string groupField;
+ private readonly string countField;
+ private readonly List<GroupCount> groups;
+ private readonly SentinelIntSet ordSet;
+ private readonly GroupCount[] groupCounts;
+
+ private SortedDocValues groupFieldTermIndex;
+ private SortedDocValues countFieldTermIndex;
+
+ /**
+ * Constructs {@link TermDistinctValuesCollector} instance.
+ *
+ * @param groupField The field to group by
+ * @param countField The field to count distinct values for
+ * @param groups The top N groups, collected during the first phase search
+ */
+ public TermDistinctValuesCollector(string groupField, string countField, ICollection<SearchGroup<BytesRef>> groups)
+ {
+ this.groupField = groupField;
+ this.countField = countField;
+ this.groups = new List<GroupCount>(groups.Count);
+ foreach (SearchGroup<BytesRef> group in groups)
+ {
+ this.groups.Add(new GroupCount(group.groupValue));
+ }
+ ordSet = new SentinelIntSet(groups.Count, -2);
+ groupCounts = new GroupCount[ordSet.Keys.Length];
+ }
+
+ public override void Collect(int doc)
+ {
+ int slot = ordSet.Find(groupFieldTermIndex.GetOrd(doc));
+ if (slot < 0)
+ {
+ return;
+ }
+
+ GroupCount gc = groupCounts[slot];
+ int countOrd = countFieldTermIndex.GetOrd(doc);
+ if (DoesNotContainOrd(countOrd, gc.ords))
+ {
+ if (countOrd == -1)
+ {
+ gc.uniqueValues.Add(null);
+ }
+ else
+ {
+ BytesRef br = new BytesRef();
+ countFieldTermIndex.LookupOrd(countOrd, br);
+ gc.uniqueValues.Add(br);
+ }
+
+ gc.ords = Arrays.CopyOf(gc.ords, gc.ords.Length + 1);
+ gc.ords[gc.ords.Length - 1] = countOrd;
+ if (gc.ords.Length > 1)
+ {
+ Array.Sort(gc.ords);
+ }
+ }
+ }
+
+ private bool DoesNotContainOrd(int ord, int[] ords)
+ {
+ if (ords.Length == 0)
+ {
+ return true;
+ }
+ else if (ords.Length == 1)
+ {
+ return ord != ords[0];
+ }
+ return Array.BinarySearch(ords, ord) < 0;
+ }
+
+ public override List<GroupCount> GetGroups()
+ {
+ return groups;
+ }
+
+ public override AtomicReaderContext NextReader
+ {
+ set
+ {
+ groupFieldTermIndex = FieldCache.DEFAULT.GetTermsIndex(value.AtomicReader, groupField);
+ countFieldTermIndex = FieldCache.DEFAULT.GetTermsIndex(value.AtomicReader, countField);
+ ordSet.Clear();
+ foreach (GroupCount group in groups)
+ {
+ int groupOrd = group.groupValue == null ? -1 : groupFieldTermIndex.LookupTerm(group.groupValue);
+ if (group.groupValue != null && groupOrd < 0)
+ {
+ continue;
+ }
+
+ groupCounts[ordSet.Put(groupOrd)] = group;
+ group.ords = new int[group.uniqueValues.Count];
+ Arrays.Fill(group.ords, -2);
+ int i = 0;
+ foreach (BytesRef value2 in group.uniqueValues)
+ {
+ int countOrd = value2 == null ? -1 : countFieldTermIndex.LookupTerm(value2);
+ if (value2 == null || countOrd >= 0)
+ {
+ group.ords[i++] = countOrd;
+ }
+ }
+ }
+ }
+ }
+
+ /** Holds distinct values for a single group.
+ *
+ * @lucene.experimental */
+ public class GroupCount : AbstractGroupCount<BytesRef> /*AbstractDistinctValuesCollector.GroupCount<BytesRef>*/
+ {
+ internal int[] ords;
+
+ internal GroupCount(BytesRef groupValue)
+ : base(groupValue)
+ {
+ }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9d72bcb3/src/Lucene.Net.Grouping/Term/TermFirstPassGroupingCollector.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Grouping/Term/TermFirstPassGroupingCollector.cs b/src/Lucene.Net.Grouping/Term/TermFirstPassGroupingCollector.cs
new file mode 100644
index 0000000..17003ba
--- /dev/null
+++ b/src/Lucene.Net.Grouping/Term/TermFirstPassGroupingCollector.cs
@@ -0,0 +1,88 @@
+\ufeffusing Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Search.Grouping.Terms
+{
+ /// <summary>
+ /// Concrete implementation of <see cref="AbstractFirstPassGroupingCollector{BytesRef}"/> that groups based on
+ /// field values and more specifically uses <see cref="SortedDocValues"/>
+ /// to collect groups.
+ ///
+ /// @lucene.experimental
+ /// </summary>
+ public class TermFirstPassGroupingCollector : AbstractFirstPassGroupingCollector<BytesRef>
+ {
+ private readonly BytesRef scratchBytesRef = new BytesRef();
+ private SortedDocValues index;
+
+ private string groupField;
+
+ /// <summary>
+ /// Create the first pass collector.
+ /// </summary>
+ /// <param name="groupField">
+ /// The field used to group
+ /// documents. This field must be single-valued and
+ /// indexed (<see cref="FieldCache"/> is used to access its value
+ /// per-document).
+ /// </param>
+ /// <param name="groupSort">
+ /// The <see cref="Sort"/> used to sort the
+ /// groups. The top sorted document within each group
+ /// according to groupSort, determines how that group
+ /// sorts against other groups. This must be non-null,
+ /// ie, if you want to groupSort by relevance use
+ /// <see cref="Sort.RELEVANCE"/>.
+ /// </param>
+ /// <param name="topNGroups">
+ /// How many top groups to keep.
+ /// </param>
+ /// <exception cref="IOException">When I/O related errors occur</exception>
+ public TermFirstPassGroupingCollector(string groupField, Sort groupSort, int topNGroups)
+ : base(groupSort, topNGroups)
+ {
+ this.groupField = groupField;
+ }
+
+ protected override BytesRef GetDocGroupValue(int doc)
+ {
+ int ord = index.GetOrd(doc);
+ if (ord == -1)
+ {
+ return null;
+ }
+ else
+ {
+ index.LookupOrd(ord, scratchBytesRef);
+ return scratchBytesRef;
+ }
+ }
+
+ protected override BytesRef CopyDocGroupValue(BytesRef groupValue, BytesRef reuse)
+ {
+ if (groupValue == null)
+ {
+ return null;
+ }
+ else if (reuse != null)
+ {
+ reuse.CopyBytes(groupValue);
+ return reuse;
+ }
+ else
+ {
+ return BytesRef.DeepCopyOf(groupValue);
+ }
+ }
+
+ public override AtomicReaderContext NextReader
+ {
+ set
+ {
+ base.NextReader = value;
+ index = FieldCache.DEFAULT.GetTermsIndex(value.AtomicReader, groupField);
+ }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9d72bcb3/src/Lucene.Net.Grouping/Term/TermGroupFacetCollector.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Grouping/Term/TermGroupFacetCollector.cs b/src/Lucene.Net.Grouping/Term/TermGroupFacetCollector.cs
new file mode 100644
index 0000000..08fbb70
--- /dev/null
+++ b/src/Lucene.Net.Grouping/Term/TermGroupFacetCollector.cs
@@ -0,0 +1,444 @@
+\ufeffusing Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Util;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Lucene.Net.Search.Grouping.Terms
+{
+ /// <summary>
+ /// An implementation of <see cref="AbstractGroupFacetCollector"/> that computes grouped facets based on the indexed terms
+ /// from the <see cref="FieldCache"/>.
+ ///
+ /// @lucene.experimental
+ /// </summary>
+ public abstract class TermGroupFacetCollector : AbstractGroupFacetCollector
+ {
+ internal readonly List<GroupedFacetHit> groupedFacetHits;
+ internal readonly SentinelIntSet segmentGroupedFacetHits;
+
+ internal SortedDocValues groupFieldTermsIndex;
+
+ /**
+ * Factory method for creating the right implementation based on the fact whether the facet field contains
+ * multiple tokens per documents.
+ *
+ * @param groupField The group field
+ * @param facetField The facet field
+ * @param facetFieldMultivalued Whether the facet field has multiple tokens per document
+ * @param facetPrefix The facet prefix a facet entry should start with to be included.
+ * @param initialSize The initial allocation size of the internal int set and group facet list which should roughly
+ * match the total number of expected unique groups. Be aware that the heap usage is
+ * 4 bytes * initialSize.
+ * @return <code>TermGroupFacetCollector</code> implementation
+ */
+ public static TermGroupFacetCollector CreateTermGroupFacetCollector(string groupField,
+ string facetField,
+ bool facetFieldMultivalued,
+ BytesRef facetPrefix,
+ int initialSize)
+ {
+ if (facetFieldMultivalued)
+ {
+ return new MV(groupField, facetField, facetPrefix, initialSize);
+ }
+ else
+ {
+ return new SV(groupField, facetField, facetPrefix, initialSize);
+ }
+ }
+
+ internal TermGroupFacetCollector(string groupField, string facetField, BytesRef facetPrefix, int initialSize)
+ : base(groupField, facetField, facetPrefix)
+ {
+ groupedFacetHits = new List<GroupedFacetHit>(initialSize);
+ segmentGroupedFacetHits = new SentinelIntSet(initialSize, int.MinValue);
+ }
+
+ // Implementation for single valued facet fields.
+ internal class SV : TermGroupFacetCollector
+ {
+
+ private SortedDocValues facetFieldTermsIndex;
+
+ internal SV(string groupField, string facetField, BytesRef facetPrefix, int initialSize)
+ : base(groupField, facetField, facetPrefix, initialSize)
+ {
+ }
+
+ public override void Collect(int doc)
+ {
+ int facetOrd = facetFieldTermsIndex.GetOrd(doc);
+ if (facetOrd < startFacetOrd || facetOrd >= endFacetOrd)
+ {
+ return;
+ }
+
+ int groupOrd = groupFieldTermsIndex.GetOrd(doc);
+ int segmentGroupedFacetsIndex = groupOrd * (facetFieldTermsIndex.ValueCount + 1) + facetOrd;
+ if (segmentGroupedFacetHits.Exists(segmentGroupedFacetsIndex))
+ {
+ return;
+ }
+
+ segmentTotalCount++;
+ segmentFacetCounts[facetOrd + 1]++;
+
+ segmentGroupedFacetHits.Put(segmentGroupedFacetsIndex);
+
+ BytesRef groupKey;
+ if (groupOrd == -1)
+ {
+ groupKey = null;
+ }
+ else
+ {
+ groupKey = new BytesRef();
+ groupFieldTermsIndex.LookupOrd(groupOrd, groupKey);
+ }
+
+ BytesRef facetKey;
+ if (facetOrd == -1)
+ {
+ facetKey = null;
+ }
+ else
+ {
+ facetKey = new BytesRef();
+ facetFieldTermsIndex.LookupOrd(facetOrd, facetKey);
+ }
+
+ groupedFacetHits.Add(new GroupedFacetHit(groupKey, facetKey));
+ }
+
+ public override AtomicReaderContext NextReader
+ {
+ set
+ {
+ if (segmentFacetCounts != null)
+ {
+ segmentResults.Add(CreateSegmentResult());
+ }
+
+ groupFieldTermsIndex = FieldCache.DEFAULT.GetTermsIndex(value.AtomicReader, groupField);
+ facetFieldTermsIndex = FieldCache.DEFAULT.GetTermsIndex(value.AtomicReader, facetField);
+
+ // 1+ to allow for the -1 "not set":
+ segmentFacetCounts = new int[facetFieldTermsIndex.ValueCount + 1];
+ segmentTotalCount = 0;
+
+ segmentGroupedFacetHits.Clear();
+ foreach (GroupedFacetHit groupedFacetHit in groupedFacetHits)
+ {
+ int facetOrd = groupedFacetHit.facetValue == null ? -1 : facetFieldTermsIndex.LookupTerm(groupedFacetHit.facetValue);
+ if (groupedFacetHit.facetValue != null && facetOrd < 0)
+ {
+ continue;
+ }
+
+ int groupOrd = groupedFacetHit.groupValue == null ? -1 : groupFieldTermsIndex.LookupTerm(groupedFacetHit.groupValue);
+ if (groupedFacetHit.groupValue != null && groupOrd < 0)
+ {
+ continue;
+ }
+
+ int segmentGroupedFacetsIndex = groupOrd * (facetFieldTermsIndex.ValueCount + 1) + facetOrd;
+ segmentGroupedFacetHits.Put(segmentGroupedFacetsIndex);
+ }
+
+ if (facetPrefix != null)
+ {
+ startFacetOrd = facetFieldTermsIndex.LookupTerm(facetPrefix);
+ if (startFacetOrd < 0)
+ {
+ // Points to the ord one higher than facetPrefix
+ startFacetOrd = -startFacetOrd - 1;
+ }
+ BytesRef facetEndPrefix = BytesRef.DeepCopyOf(facetPrefix);
+ facetEndPrefix.Append(UnicodeUtil.BIG_TERM);
+ endFacetOrd = facetFieldTermsIndex.LookupTerm(facetEndPrefix);
+ Debug.Assert(endFacetOrd < 0);
+ endFacetOrd = -endFacetOrd - 1; // Points to the ord one higher than facetEndPrefix
+ }
+ else
+ {
+ startFacetOrd = -1;
+ endFacetOrd = facetFieldTermsIndex.ValueCount;
+ }
+ }
+ }
+
+
+ protected override AbstractSegmentResult CreateSegmentResult()
+ {
+ return new SegmentResult(segmentFacetCounts, segmentTotalCount, facetFieldTermsIndex.TermsEnum(), startFacetOrd, endFacetOrd);
+ }
+
+ internal class SegmentResult : AbstractGroupFacetCollector.AbstractSegmentResult
+ {
+
+ internal readonly TermsEnum tenum;
+
+ internal SegmentResult(int[] counts, int total, TermsEnum tenum, int startFacetOrd, int endFacetOrd)
+ : base(counts, total - counts[0], counts[0], endFacetOrd + 1)
+ {
+ this.tenum = tenum;
+ this.mergePos = startFacetOrd == -1 ? 1 : startFacetOrd + 1;
+ if (mergePos < maxTermPos)
+ {
+ Debug.Assert(tenum != null);
+ tenum.SeekExact(startFacetOrd == -1 ? 0 : startFacetOrd);
+ mergeTerm = tenum.Term();
+ }
+ }
+
+ protected internal override void NextTerm()
+ {
+ mergeTerm = tenum.Next();
+ }
+ }
+ }
+
+ // Implementation for multi valued facet fields.
+ internal class MV : TermGroupFacetCollector
+ {
+
+ private SortedSetDocValues facetFieldDocTermOrds;
+ private TermsEnum facetOrdTermsEnum;
+ private int facetFieldNumTerms;
+ private readonly BytesRef scratch = new BytesRef();
+
+ internal MV(string groupField, string facetField, BytesRef facetPrefix, int initialSize)
+ : base(groupField, facetField, facetPrefix, initialSize)
+ {
+ }
+
+ public override void Collect(int doc)
+ {
+ int groupOrd = groupFieldTermsIndex.GetOrd(doc);
+ if (facetFieldNumTerms == 0)
+ {
+ int segmentGroupedFacetsIndex = groupOrd * (facetFieldNumTerms + 1);
+ if (facetPrefix != null || segmentGroupedFacetHits.Exists(segmentGroupedFacetsIndex))
+ {
+ return;
+ }
+
+ segmentTotalCount++;
+ segmentFacetCounts[facetFieldNumTerms]++;
+
+ segmentGroupedFacetHits.Put(segmentGroupedFacetsIndex);
+ BytesRef groupKey;
+ if (groupOrd == -1)
+ {
+ groupKey = null;
+ }
+ else
+ {
+ groupKey = new BytesRef();
+ groupFieldTermsIndex.LookupOrd(groupOrd, groupKey);
+ }
+ groupedFacetHits.Add(new GroupedFacetHit(groupKey, null));
+ return;
+ }
+
+ facetFieldDocTermOrds.Document = doc;
+ long ord;
+ bool empty = true;
+ while ((ord = facetFieldDocTermOrds.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
+ {
+ Process(groupOrd, (int)ord);
+ empty = false;
+ }
+
+ if (empty)
+ {
+ Process(groupOrd, facetFieldNumTerms); // this facet ord is reserved for docs not containing facet field.
+ }
+ }
+
+ private void Process(int groupOrd, int facetOrd)
+ {
+ if (facetOrd < startFacetOrd || facetOrd >= endFacetOrd)
+ {
+ return;
+ }
+
+ int segmentGroupedFacetsIndex = groupOrd * (facetFieldNumTerms + 1) + facetOrd;
+ if (segmentGroupedFacetHits.Exists(segmentGroupedFacetsIndex))
+ {
+ return;
+ }
+
+ segmentTotalCount++;
+ segmentFacetCounts[facetOrd]++;
+
+ segmentGroupedFacetHits.Put(segmentGroupedFacetsIndex);
+
+ BytesRef groupKey;
+ if (groupOrd == -1)
+ {
+ groupKey = null;
+ }
+ else
+ {
+ groupKey = new BytesRef();
+ groupFieldTermsIndex.LookupOrd(groupOrd, groupKey);
+ }
+
+ BytesRef facetValue;
+ if (facetOrd == facetFieldNumTerms)
+ {
+ facetValue = null;
+ }
+ else
+ {
+ facetFieldDocTermOrds.LookupOrd(facetOrd, scratch);
+ facetValue = BytesRef.DeepCopyOf(scratch); // must we?
+ }
+ groupedFacetHits.Add(new GroupedFacetHit(groupKey, facetValue));
+ }
+
+ public override AtomicReaderContext NextReader
+ {
+ set
+ {
+ if (segmentFacetCounts != null)
+ {
+ segmentResults.Add(CreateSegmentResult());
+ }
+
+ groupFieldTermsIndex = FieldCache.DEFAULT.GetTermsIndex(value.AtomicReader, groupField);
+ facetFieldDocTermOrds = FieldCache.DEFAULT.GetDocTermOrds(value.AtomicReader, facetField);
+ facetFieldNumTerms = (int)facetFieldDocTermOrds.ValueCount;
+ if (facetFieldNumTerms == 0)
+ {
+ facetOrdTermsEnum = null;
+ }
+ else
+ {
+ facetOrdTermsEnum = facetFieldDocTermOrds.TermsEnum();
+ }
+ // [facetFieldNumTerms() + 1] for all possible facet values and docs not containing facet field
+ segmentFacetCounts = new int[facetFieldNumTerms + 1];
+ segmentTotalCount = 0;
+
+ segmentGroupedFacetHits.Clear();
+ foreach (GroupedFacetHit groupedFacetHit in groupedFacetHits)
+ {
+ int groupOrd = groupedFacetHit.groupValue == null ? -1 : groupFieldTermsIndex.LookupTerm(groupedFacetHit.groupValue);
+ if (groupedFacetHit.groupValue != null && groupOrd < 0)
+ {
+ continue;
+ }
+
+ int facetOrd;
+ if (groupedFacetHit.facetValue != null)
+ {
+ if (facetOrdTermsEnum == null || !facetOrdTermsEnum.SeekExact(groupedFacetHit.facetValue))
+ {
+ continue;
+ }
+ facetOrd = (int)facetOrdTermsEnum.Ord();
+ }
+ else
+ {
+ facetOrd = facetFieldNumTerms;
+ }
+
+ // (facetFieldDocTermOrds.numTerms() + 1) for all possible facet values and docs not containing facet field
+ int segmentGroupedFacetsIndex = groupOrd * (facetFieldNumTerms + 1) + facetOrd;
+ segmentGroupedFacetHits.Put(segmentGroupedFacetsIndex);
+ }
+
+ if (facetPrefix != null)
+ {
+ TermsEnum.SeekStatus seekStatus;
+ if (facetOrdTermsEnum != null)
+ {
+ seekStatus = facetOrdTermsEnum.SeekCeil(facetPrefix);
+ }
+ else
+ {
+ seekStatus = TermsEnum.SeekStatus.END;
+ }
+
+ if (seekStatus != TermsEnum.SeekStatus.END)
+ {
+ startFacetOrd = (int)facetOrdTermsEnum.Ord();
+ }
+ else
+ {
+ startFacetOrd = 0;
+ endFacetOrd = 0;
+ return;
+ }
+
+ BytesRef facetEndPrefix = BytesRef.DeepCopyOf(facetPrefix);
+ facetEndPrefix.Append(UnicodeUtil.BIG_TERM);
+ seekStatus = facetOrdTermsEnum.SeekCeil(facetEndPrefix);
+ if (seekStatus != TermsEnum.SeekStatus.END)
+ {
+ endFacetOrd = (int)facetOrdTermsEnum.Ord();
+ }
+ else
+ {
+ endFacetOrd = facetFieldNumTerms; // Don't include null...
+ }
+ }
+ else
+ {
+ startFacetOrd = 0;
+ endFacetOrd = facetFieldNumTerms + 1;
+ }
+ }
+ }
+
+ protected override AbstractSegmentResult CreateSegmentResult()
+ {
+ return new SegmentResult(segmentFacetCounts, segmentTotalCount, facetFieldNumTerms, facetOrdTermsEnum, startFacetOrd, endFacetOrd);
+ }
+
+ internal class SegmentResult : AbstractGroupFacetCollector.AbstractSegmentResult
+ {
+
+ internal readonly TermsEnum tenum;
+
+ internal SegmentResult(int[] counts, int total, int missingCountIndex, TermsEnum tenum, int startFacetOrd, int endFacetOrd)
+ : base(counts, total - counts[missingCountIndex], counts[missingCountIndex],
+ endFacetOrd == missingCountIndex + 1 ? missingCountIndex : endFacetOrd)
+ {
+ this.tenum = tenum;
+ this.mergePos = startFacetOrd;
+ if (tenum != null)
+ {
+ tenum.SeekExact(mergePos);
+ mergeTerm = tenum.Term();
+ }
+ }
+
+ protected internal override void NextTerm()
+ {
+ mergeTerm = tenum.Next();
+ }
+ }
+ }
+ }
+
+
+ internal class GroupedFacetHit
+ {
+ internal readonly BytesRef groupValue;
+ internal readonly BytesRef facetValue;
+
+ internal GroupedFacetHit(BytesRef groupValue, BytesRef facetValue)
+ {
+ this.groupValue = groupValue;
+ this.facetValue = facetValue;
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9d72bcb3/src/Lucene.Net.Grouping/Term/TermSecondPassGroupingCollector.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Grouping/Term/TermSecondPassGroupingCollector.cs b/src/Lucene.Net.Grouping/Term/TermSecondPassGroupingCollector.cs
new file mode 100644
index 0000000..da70372
--- /dev/null
+++ b/src/Lucene.Net.Grouping/Term/TermSecondPassGroupingCollector.cs
@@ -0,0 +1,65 @@
+\ufeffusing Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Util;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Lucene.Net.Search.Grouping.Terms
+{
+ /// <summary>
+ /// Concrete implementation of <see cref="AbstractSecondPassGroupingCollector{BytesRef}"/> that groups based on
+ /// field values and more specifically uses <see cref="SortedDocValues"/>
+ /// to collect grouped docs.
+ ///
+ /// @lucene.experimental
+ /// </summary>
+ public class TermSecondPassGroupingCollector : AbstractSecondPassGroupingCollector<BytesRef>
+ {
+ private readonly SentinelIntSet ordSet;
+ private SortedDocValues index;
+ private readonly string groupField;
+
+ public TermSecondPassGroupingCollector(string groupField, ICollection<SearchGroup<BytesRef>> groups, Sort groupSort, Sort withinGroupSort,
+ int maxDocsPerGroup, bool getScores, bool getMaxScores, bool fillSortFields)
+ : base(groups, groupSort, withinGroupSort, maxDocsPerGroup, getScores, getMaxScores, fillSortFields)
+ {
+ ordSet = new SentinelIntSet(groupMap.Count, -2);
+ this.groupField = groupField;
+ groupDocs = /*(SearchGroupDocs<BytesRef>[])*/ new AbstractSecondPassGroupingCollector.SearchGroupDocs<BytesRef>[ordSet.Keys.Length];
+ }
+
+ public override AtomicReaderContext NextReader
+ {
+ set
+ {
+ base.NextReader = value;
+ index = FieldCache.DEFAULT.GetTermsIndex(value.AtomicReader, groupField);
+
+ // Rebuild ordSet
+ ordSet.Clear();
+ foreach (AbstractSecondPassGroupingCollector.SearchGroupDocs<BytesRef> group in groupMap.Values)
+ {
+ // System.out.println(" group=" + (group.groupValue == null ? "null" : group.groupValue.utf8ToString()));
+ int ord = group.groupValue == null ? -1 : index.LookupTerm(group.groupValue);
+ if (group.groupValue == null || ord >= 0)
+ {
+ groupDocs[ordSet.Put(ord)] = group;
+ }
+ }
+ }
+ }
+
+ protected override AbstractSecondPassGroupingCollector.SearchGroupDocs<BytesRef> RetrieveGroup(int doc)
+ {
+ int slot = ordSet.Find(index.GetOrd(doc));
+ if (slot >= 0)
+ {
+ return groupDocs[slot];
+ }
+ return null;
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9d72bcb3/src/Lucene.Net.Grouping/TopGroups.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Grouping/TopGroups.cs b/src/Lucene.Net.Grouping/TopGroups.cs
index 017c975..091103d 100644
--- a/src/Lucene.Net.Grouping/TopGroups.cs
+++ b/src/Lucene.Net.Grouping/TopGroups.cs
@@ -1,7 +1,6 @@
\ufeffusing System;
-using Lucene.Net.Search;
-namespace Lucene.Net.Grouping
+namespace Lucene.Net.Search.Grouping
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -25,7 +24,7 @@ namespace Lucene.Net.Grouping
///
/// @lucene.experimental
/// </summary>
- public class TopGroups<TGroupValueType>
+ public class TopGroups<TGroupValue>
{
/// <summary>
/// Number of documents matching the search </summary>
@@ -41,7 +40,7 @@ namespace Lucene.Net.Grouping
/// <summary>
/// Group results in groupSort order </summary>
- public readonly GroupDocs<TGroupValueType>[] Groups;
+ public readonly GroupDocs<TGroupValue>[] Groups;
/// <summary>
/// How groups are sorted against each other </summary>
@@ -57,7 +56,7 @@ namespace Lucene.Net.Grouping
/// </summary>
public readonly float MaxScore;
- public TopGroups(SortField[] groupSort, SortField[] withinGroupSort, int totalHitCount, int totalGroupedHitCount, GroupDocs<TGroupValueType>[] groups, float maxScore)
+ public TopGroups(SortField[] groupSort, SortField[] withinGroupSort, int totalHitCount, int totalGroupedHitCount, GroupDocs<TGroupValue>[] groups, float maxScore)
{
GroupSort = groupSort;
WithinGroupSort = withinGroupSort;
@@ -68,7 +67,7 @@ namespace Lucene.Net.Grouping
MaxScore = maxScore;
}
- public TopGroups(TopGroups<TGroupValueType> oldTopGroups, int? totalGroupCount)
+ public TopGroups(TopGroups<TGroupValue> oldTopGroups, int? totalGroupCount)
{
GroupSort = oldTopGroups.GroupSort;
WithinGroupSort = oldTopGroups.WithinGroupSort;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9d72bcb3/src/Lucene.Net.Tests.Grouping/AbstractGroupingTestCase.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Grouping/AbstractGroupingTestCase.cs b/src/Lucene.Net.Tests.Grouping/AbstractGroupingTestCase.cs
new file mode 100644
index 0000000..4cc29e3
--- /dev/null
+++ b/src/Lucene.Net.Tests.Grouping/AbstractGroupingTestCase.cs
@@ -0,0 +1,30 @@
+\ufeffusing Lucene.Net.Util;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Lucene.Net.Search.Grouping
+{
+ /// <summary>
+ /// Base class for grouping related tests.
+ /// </summary>
+ // TODO (MvG) : The grouping tests contain a lot of code duplication. Try to move the common code to this class..
+ public abstract class AbstractGroupingTestCase : LuceneTestCase
+ {
+ protected string GenerateRandomNonEmptyString()
+ {
+ string randomValue;
+ do
+ {
+ // B/c of DV based impl we can't see the difference between an empty string and a null value.
+ // For that reason we don't generate empty string
+ // groups.
+ randomValue = TestUtil.RandomRealisticUnicodeString(Random());
+ //randomValue = TestUtil.randomSimpleString(random());
+ } while ("".equals(randomValue));
+ return randomValue;
+ }
+ }
+}