You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ni...@apache.org on 2016/11/10 11:33:44 UTC
[33/58] [abbrv] lucenenet git commit: WIP on Grouping
WIP on Grouping
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/9d72bcb3
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/9d72bcb3
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/9d72bcb3
Branch: refs/heads/grouping
Commit: 9d72bcb3469dedd6bac66c3ee82bfc38e80e0eba
Parents: c3abdc7
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Thu Oct 27 14:07:36 2016 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Tue Nov 8 02:24:53 2016 +0700
----------------------------------------------------------------------
Lucene.Net.sln | 14 +
src/Lucene.Net.Core/Search/TopDocsCollector.cs | 22 +-
.../AbstractAllGroupHeadsCollector.cs | 245 +++
.../AbstractAllGroupsCollector.cs | 85 +
.../AbstractDistinctValuesCollector.cs | 89 +
.../AbstractFirstPassGroupingCollector.cs | 427 +++++
.../AbstractGroupFacetCollector.cs | 375 ++++
.../AbstractSecondPassGroupingCollector.cs | 170 ++
.../BlockGroupingCollector.cs | 601 +++++++
src/Lucene.Net.Grouping/CollectedSearchGroup.cs | 31 +
.../Function/FunctionAllGroupHeadsCollector.cs | 155 ++
.../Function/FunctionAllGroupsCollector.cs | 69 +
.../Function/FunctionDistinctValuesCollector.cs | 85 +
.../FunctionFirstPassGroupingCollector.cs | 71 +
.../FunctionSecondPassGroupingCollector.cs | 64 +
src/Lucene.Net.Grouping/GroupDocs.cs | 8 +-
src/Lucene.Net.Grouping/GroupingSearch.cs | 483 +++++
.../Lucene.Net.Grouping.csproj | 26 +
src/Lucene.Net.Grouping/SearchGroup.cs | 388 ++++
.../Term/TermAllGroupHeadsCollector.cs | 807 +++++++++
.../Term/TermAllGroupsCollector.cs | 120 ++
.../Term/TermDistinctValuesCollector.cs | 144 ++
.../Term/TermFirstPassGroupingCollector.cs | 88 +
.../Term/TermGroupFacetCollector.cs | 444 +++++
.../Term/TermSecondPassGroupingCollector.cs | 65 +
src/Lucene.Net.Grouping/TopGroups.cs | 11 +-
.../AbstractGroupingTestCase.cs | 30 +
.../AllGroupHeadsCollectorTest.cs | 718 ++++++++
.../AllGroupsCollectorTest.cs | 138 ++
.../DistinctValuesCollectorTest.cs | 648 +++++++
.../GroupFacetCollectorTest.cs | 943 ++++++++++
.../GroupingSearchTest.cs | 245 +++
.../Lucene.Net.Tests.Grouping.csproj | 85 +
.../Properties/AssemblyInfo.cs | 36 +
src/Lucene.Net.Tests.Grouping/TestGrouping.cs | 1692 ++++++++++++++++++
src/Lucene.Net.Tests.Grouping/packages.config | 4 +
36 files changed, 9615 insertions(+), 11 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9d72bcb3/Lucene.Net.sln
----------------------------------------------------------------------
diff --git a/Lucene.Net.sln b/Lucene.Net.sln
index c87a7be..7591ffc 100644
--- a/Lucene.Net.sln
+++ b/Lucene.Net.sln
@@ -68,6 +68,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.Analysis.Stempel
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.Tests.Analysis.Stempel", "src\Lucene.Net.Tests.Analysis.Stempel\Lucene.Net.Tests.Analysis.Stempel.csproj", "{940A6AB1-F00A-40E2-BC1A-2898EFA8C48F}"
EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.Tests.Grouping", "src\Lucene.Net.Tests.Grouping\Lucene.Net.Tests.Grouping.csproj", "{C2349F0D-FB66-4544-9C33-4D87F73C6004}"
+EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
@@ -383,6 +385,18 @@ Global
{940A6AB1-F00A-40E2-BC1A-2898EFA8C48F}.Release|Mixed Platforms.Build.0 = Release|Any CPU
{940A6AB1-F00A-40E2-BC1A-2898EFA8C48F}.Release|x86.ActiveCfg = Release|Any CPU
{940A6AB1-F00A-40E2-BC1A-2898EFA8C48F}.Release|x86.Build.0 = Release|Any CPU
+ {C2349F0D-FB66-4544-9C33-4D87F73C6004}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {C2349F0D-FB66-4544-9C33-4D87F73C6004}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {C2349F0D-FB66-4544-9C33-4D87F73C6004}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU
+ {C2349F0D-FB66-4544-9C33-4D87F73C6004}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU
+ {C2349F0D-FB66-4544-9C33-4D87F73C6004}.Debug|x86.ActiveCfg = Debug|Any CPU
+ {C2349F0D-FB66-4544-9C33-4D87F73C6004}.Debug|x86.Build.0 = Debug|Any CPU
+ {C2349F0D-FB66-4544-9C33-4D87F73C6004}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {C2349F0D-FB66-4544-9C33-4D87F73C6004}.Release|Any CPU.Build.0 = Release|Any CPU
+ {C2349F0D-FB66-4544-9C33-4D87F73C6004}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU
+ {C2349F0D-FB66-4544-9C33-4D87F73C6004}.Release|Mixed Platforms.Build.0 = Release|Any CPU
+ {C2349F0D-FB66-4544-9C33-4D87F73C6004}.Release|x86.ActiveCfg = Release|Any CPU
+ {C2349F0D-FB66-4544-9C33-4D87F73C6004}.Release|x86.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9d72bcb3/src/Lucene.Net.Core/Search/TopDocsCollector.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Search/TopDocsCollector.cs b/src/Lucene.Net.Core/Search/TopDocsCollector.cs
index 3b28e63..ccfa2a6 100644
--- a/src/Lucene.Net.Core/Search/TopDocsCollector.cs
+++ b/src/Lucene.Net.Core/Search/TopDocsCollector.cs
@@ -1,4 +1,5 @@
using System;
+using Lucene.Net.Index;
namespace Lucene.Net.Search
{
@@ -32,7 +33,7 @@ namespace Lucene.Net.Search
/// however, you might want to consider overriding all methods, in order to avoid
/// a NullPointerException.
/// </summary>
- public abstract class TopDocsCollector<T> : Collector where T : ScoreDoc
+ public abstract class TopDocsCollector<T> : Collector, ITopDocsCollector where T : ScoreDoc
{
/// <summary>
/// this is used in case topDocs() is called with illegal parameters, or there
@@ -185,4 +186,23 @@ namespace Lucene.Net.Search
return NewTopDocs(results, start);
}
}
+
+ /// <summary>
+ /// LUCENENET specific interface used to reference <see cref="TopDocsCollector{T}"/>
+ /// without referencing its generic type.
+ /// </summary>
+ public interface ITopDocsCollector
+ {
+ // From TopDocsCollector<T>
+ int TotalHits { get; set; }
+ TopDocs TopDocs();
+ TopDocs TopDocs(int start);
+ TopDocs TopDocs(int start, int howMany);
+
+ // From Collector
+ Scorer Scorer { set; }
+ void Collect(int doc);
+ AtomicReaderContext NextReader { set; }
+ bool AcceptsDocsOutOfOrder();
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9d72bcb3/src/Lucene.Net.Grouping/AbstractAllGroupHeadsCollector.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Grouping/AbstractAllGroupHeadsCollector.cs b/src/Lucene.Net.Grouping/AbstractAllGroupHeadsCollector.cs
new file mode 100644
index 0000000..a3ae1bf
--- /dev/null
+++ b/src/Lucene.Net.Grouping/AbstractAllGroupHeadsCollector.cs
@@ -0,0 +1,245 @@
+\ufeffusing Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Util;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Lucene.Net.Search.Grouping
+{
+ public abstract class AbstractAllGroupHeadsCollector<GH> : AbstractAllGroupHeadsCollector
+ where GH : AbstractGroupHead /*AbstractAllGroupHeadsCollector<GH>.GroupHead*/
+ {
+ protected readonly int[] reversed;
+ protected readonly int compIDXEnd;
+ protected readonly TemporalResult temporalResult;
+
+ protected AbstractAllGroupHeadsCollector(int numberOfSorts)
+ {
+ this.reversed = new int[numberOfSorts];
+ this.compIDXEnd = numberOfSorts - 1;
+ temporalResult = new TemporalResult();
+ }
+
+ /// <summary>
+ ///
+ /// </summary>
+ /// <param name="maxDoc">The maxDoc of the top level <see cref="Index.IndexReader"/></param>
+ /// <returns>a <see cref="FixedBitSet"/> containing all group heads.</returns>
+ public override FixedBitSet RetrieveGroupHeads(int maxDoc)
+ {
+ FixedBitSet bitSet = new FixedBitSet(maxDoc);
+
+ ICollection<GH> groupHeads = GetCollectedGroupHeads();
+ foreach (GH groupHead in groupHeads)
+ {
+ bitSet.Set(groupHead.Doc);
+ }
+
+ return bitSet;
+ }
+
+ /// <summary>
+ ///
+ /// </summary>
+ /// <returns>an int array containing all group heads. The size of the array is equal to number of collected unique groups.</returns>
+ public override int[] RetrieveGroupHeads()
+ {
+ ICollection<GH> groupHeads = GetCollectedGroupHeads();
+ int[] docHeads = new int[groupHeads.Count];
+
+ int i = 0;
+ foreach (GH groupHead in groupHeads)
+ {
+ docHeads[i++] = groupHead.Doc;
+ }
+
+ return docHeads;
+ }
+
+ /// <summary>
+ ///
+ /// </summary>
+ /// <returns>the number of group heads found for a query.</returns>
+ public override int GroupHeadsSize
+ {
+ get
+ {
+ return GetCollectedGroupHeads().Count;
+ }
+ }
+
+ /// <summary>
+ /// Returns the group head and puts it into <see cref="TemporalResult"/>.
+ /// If the group head wasn't encountered before then it will be added to the collected group heads.
+ /// <para>
+ /// The <see cref="TemporalResult.stop"/> property will be <c>true</c> if the group head wasn't encountered before
+ /// otherwise <c>false</c>.
+ /// </para>
+ /// </summary>
+ /// <param name="doc">The document to retrieve the group head for.</param>
+ /// <exception cref="IOException">If I/O related errors occur</exception>
+ protected override abstract void RetrieveGroupHeadAndAddIfNotExist(int doc);
+
+ /// <summary>
+ /// Returns the collected group heads.
+ /// Subsequent calls should return the same group heads.
+ /// </summary>
+ /// <returns>the collected group heads</returns>
+ protected abstract ICollection<GH> GetCollectedGroupHeads();
+
+ public override void Collect(int doc)
+ {
+ RetrieveGroupHeadAndAddIfNotExist(doc);
+ if (temporalResult.stop)
+ {
+ return;
+ }
+ GH groupHead = temporalResult.groupHead;
+
+ // Ok now we need to check if the current doc is more relevant then current doc for this group
+ for (int compIDX = 0; ; compIDX++)
+ {
+ int c = reversed[compIDX] * groupHead.Compare(compIDX, doc);
+ if (c < 0)
+ {
+ // Definitely not competitive. So don't even bother to continue
+ return;
+ }
+ else if (c > 0)
+ {
+ // Definitely competitive.
+ break;
+ }
+ else if (compIDX == compIDXEnd)
+ {
+ // Here c=0. If we're at the last comparator, this doc is not
+ // competitive, since docs are visited in doc Id order, which means
+ // this doc cannot compete with any other document in the queue.
+ return;
+ }
+ }
+ groupHead.UpdateDocHead(doc);
+ }
+
+ public override bool AcceptsDocsOutOfOrder()
+ {
+ return false;
+ }
+
+ /// <summary>
+ /// Contains the result of group head retrieval.
+ /// To prevent new object creations of this class for every collect.
+ /// </summary>
+ public class TemporalResult
+ {
+
+ public GH groupHead;
+ public bool stop;
+
+ }
+ }
+
+ /// <summary>
+ /// Represents a group head. A group head is the most relevant document for a particular group.
+ /// The relevancy is based is usually based on the sort.
+ /// <para>
+ /// The group head contains a group value with its associated most relevant document id.
+ /// </para>
+ /// </summary>
+ /// <remarks>
+ /// LUCENENET: moved this class outside of the AbstractAllGroupHeadsCollector,
+ /// made non-generic, and renamed from GroupHead to AbstractGroupHead.
+ /// </remarks>
+ public abstract class AbstractGroupHead /*<TGroupValue>*/
+ {
+
+ //public readonly TGroupValue groupValue;
+ public int Doc { get; protected set; }
+
+ protected AbstractGroupHead(/*TGroupValue groupValue,*/ int doc)
+ {
+ //this.groupValue = groupValue;
+ this.Doc = doc;
+ }
+
+ /// <summary>
+ /// Compares the specified document for a specified comparator against the current most relevant document.
+ /// </summary>
+ /// <param name="compIDX">The comparator index of the specified comparator.</param>
+ /// <param name="doc">The specified document.</param>
+ /// <returns>
+ /// -1 if the specified document wasn't competitive against the current most relevant document, 1 if the
+ /// specified document was competitive against the current most relevant document. Otherwise 0.
+ /// </returns>
+ /// <exception cref="IOException">If I/O related errors occur</exception>
+ public abstract int Compare(int compIDX, int doc);
+
+ /// <summary>
+ /// Updates the current most relevant document with the specified document.
+ /// </summary>
+ /// <param name="doc">The specified document</param>
+ /// <exception cref="IOException">If I/O related errors occur</exception>
+ public abstract void UpdateDocHead(int doc);
+ }
+
+ /// <summary>
+ /// LUCENENET specific class used to reference an
+ /// <see cref="AbstractAllGroupHeadsCollector{GH}"/> subclass
+ /// without refering to its generic closing type.
+ /// </summary>
+ public abstract class AbstractAllGroupHeadsCollector : Collector
+ {
+ /// <summary>
+ ///
+ /// </summary>
+ /// <param name="maxDoc">The maxDoc of the top level <see cref="Index.IndexReader"/></param>
+ /// <returns>a <see cref="FixedBitSet"/> containing all group heads.</returns>
+ public abstract FixedBitSet RetrieveGroupHeads(int maxDoc);
+
+ /// <summary>
+ ///
+ /// </summary>
+ /// <returns>an int array containing all group heads. The size of the array is equal to number of collected unique groups.</returns>
+ public abstract int[] RetrieveGroupHeads();
+
+ /// <summary>
+ ///
+ /// </summary>
+ /// <returns>the number of group heads found for a query.</returns>
+ public abstract int GroupHeadsSize { get; }
+
+ /// <summary>
+ /// Returns the group head and puts it into <see cref="TemporalResult"/>.
+ /// If the group head wasn't encountered before then it will be added to the collected group heads.
+ /// <para>
+ /// The <see cref="TemporalResult.stop"/> property will be <c>true</c> if the group head wasn't encountered before
+ /// otherwise <c>false</c>.
+ /// </para>
+ /// </summary>
+ /// <param name="doc">The document to retrieve the group head for.</param>
+ /// <exception cref="IOException">If I/O related errors occur</exception>
+ protected abstract void RetrieveGroupHeadAndAddIfNotExist(int doc);
+ }
+
+ /////// <summary>
+ /////// LUCENENET specific interface used to reference an
+ /////// <see cref="AbstractAllGroupHeadsCollector{GH}"/> subclass
+ /////// without refering to its generic closing type.
+ /////// </summary>
+ ////public interface IAllGroupHeadsCollector
+ ////{
+ //// // From AbstractAllGroupHeadsCollector{GH}
+ //// FixedBitSet RetrieveGroupHeads(int maxDoc);
+ //// int[] RetrieveGroupHeads();
+ //// int GroupHeadsSize { get; }
+ //// void Collect(int doc);
+ //// bool AcceptsDocsOutOfOrder();
+
+ //// // From Collector
+ //// Scorer Scorer { set; }
+ //// AtomicReaderContext NextReader { set; }
+ ////}
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9d72bcb3/src/Lucene.Net.Grouping/AbstractAllGroupsCollector.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Grouping/AbstractAllGroupsCollector.cs b/src/Lucene.Net.Grouping/AbstractAllGroupsCollector.cs
new file mode 100644
index 0000000..0c8bc14
--- /dev/null
+++ b/src/Lucene.Net.Grouping/AbstractAllGroupsCollector.cs
@@ -0,0 +1,85 @@
+\ufeffusing System.Collections.Generic;
+
+namespace Lucene.Net.Search.Grouping
+{
+ /// <summary>
+ /// A collector that collects all groups that match the
+ /// query. Only the group value is collected, and the order
+ /// is undefined. This collector does not determine
+ /// the most relevant document of a group.
+ ///
+ /// <para>
+ /// This is an abstract version. Concrete implementations define
+ /// what a group actually is and how it is internally collected.
+ /// </para>
+ /// @lucene.experimental
+ /// </summary>
+ /// <typeparam name="TGroupValue"></typeparam>
+ public abstract class AbstractAllGroupsCollector<TGroupValue> : AbstractAllGroupsCollector
+ {
+ /// <summary>
+ /// Returns the total number of groups for the executed search.
+ /// This is a convenience method. The following code snippet has the same effect: <code>GetGroups().Count</code>
+ /// </summary>
+ /// <returns>The total number of groups for the executed search</returns>
+ public override int GroupCount
+ {
+ get
+ {
+ return Groups.Count;
+ }
+ }
+
+ /// <summary>
+ /// Returns the group values
+ /// <para>
+ /// This is an unordered collections of group values. For each group that matched the query there is a <see cref="BytesRef"/>
+ /// representing a group value.
+ /// </para>
+ /// </summary>
+ /// <returns>the group values</returns>
+ public abstract ICollection<TGroupValue> Groups { get; }
+
+
+ // Empty not necessary
+ public override Scorer Scorer
+ {
+ set
+ {
+ }
+ }
+
+ public override bool AcceptsDocsOutOfOrder()
+ {
+ return true;
+ }
+ }
+
+ /// <summary>
+ /// LUCENENET specific class used to reference <see cref="AbstractAllGroupsCollector{TGroupValue}"/>
+ /// without refering to its generic closing type.
+ /// </summary>
+ public abstract class AbstractAllGroupsCollector : Collector
+ {
+ /// <summary>
+ /// Returns the total number of groups for the executed search.
+ /// This is a convenience method. The following code snippet has the same effect: <code>GetGroups().Count</code>
+ /// </summary>
+ /// <returns>The total number of groups for the executed search</returns>
+ public abstract int GroupCount { get; }
+
+
+ // Empty not necessary
+ public override Scorer Scorer
+ {
+ set
+ {
+ }
+ }
+
+ public override bool AcceptsDocsOutOfOrder()
+ {
+ return true;
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9d72bcb3/src/Lucene.Net.Grouping/AbstractDistinctValuesCollector.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Grouping/AbstractDistinctValuesCollector.cs b/src/Lucene.Net.Grouping/AbstractDistinctValuesCollector.cs
new file mode 100644
index 0000000..e9df8d8
--- /dev/null
+++ b/src/Lucene.Net.Grouping/AbstractDistinctValuesCollector.cs
@@ -0,0 +1,89 @@
+\ufeffusing Lucene.Net.Search;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Lucene.Net.Search.Grouping
+{
+ /// <summary>
+ /// A second pass grouping collector that keeps track of distinct values for a specified field for the top N group.
+ ///
+ /// @lucene.experimental
+ /// </summary>
+ /// <typeparam name="GC"></typeparam>
+ public abstract class AbstractDistinctValuesCollector<GC> : Collector where GC : IGroupCount /* AbstractDistinctValuesCollector<GC>.GroupCount */
+ {
+ /// <summary>
+ /// Returns all unique values for each top N group.
+ /// </summary>
+ /// <returns>all unique values for each top N group</returns>
+ public abstract List<GC> GetGroups();
+
+ public override bool AcceptsDocsOutOfOrder()
+ {
+ return true;
+ }
+
+ public override Scorer Scorer
+ {
+ set
+ {
+ }
+ }
+ }
+
+ //public abstract class AbstractDistinctValuesCollector : Collector
+ //{
+ // /// <summary>
+ // /// Returns all unique values for each top N group.
+ // /// </summary>
+ // /// <returns>all unique values for each top N group</returns>
+ // public abstract List<GC> GetGroups();
+
+ // public override bool AcceptsDocsOutOfOrder()
+ // {
+ // return true;
+ // }
+
+ // public override Scorer Scorer
+ // {
+ // set
+ // {
+ // }
+ // }
+ //}
+
+
+ /// <summary>
+ /// Returned by <see cref="AbstractDistinctValuesCollector.GetGroups()"/>,
+ /// representing the value and set of distinct values for the group.
+ /// </summary>
+ /// <typeparam name="TGroupValue"></typeparam>
+ /// <remarks>
+ /// LUCENENET - removed this class from being a nested class of
+ /// <see cref="AbstractDistinctValuesCollector{GC}"/> and renamed
+ /// from GroupCount to AbstractGroupCount
+ /// </remarks>
+ public abstract class AbstractGroupCount<TGroupValue> : IGroupCount
+ //where TGroupValue : IComparable
+ {
+ public readonly TGroupValue groupValue;
+ public readonly ISet<TGroupValue> uniqueValues;
+
+ public AbstractGroupCount(TGroupValue groupValue)
+ {
+ this.groupValue = groupValue;
+ this.uniqueValues = new HashSet<TGroupValue>();
+ }
+ }
+
+ /// <summary>
+ /// LUCENENET specific interface to allow usage of <see cref="AbstractGroupCount{TGroupValue}"/>
+ /// as a generic closing type without having to specify TGroupValue.
+ /// </summary>
+ public interface IGroupCount
+ {
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9d72bcb3/src/Lucene.Net.Grouping/AbstractFirstPassGroupingCollector.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Grouping/AbstractFirstPassGroupingCollector.cs b/src/Lucene.Net.Grouping/AbstractFirstPassGroupingCollector.cs
new file mode 100644
index 0000000..8efafd2
--- /dev/null
+++ b/src/Lucene.Net.Grouping/AbstractFirstPassGroupingCollector.cs
@@ -0,0 +1,427 @@
+\ufeffusing Lucene.Net.Search;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+using Lucene.Net.Index;
+
+namespace Lucene.Net.Search.Grouping
+{
+ /// <summary>
+ /// FirstPassGroupingCollector is the first of two passes necessary
+ /// to collect grouped hits. This pass gathers the top N sorted
+ /// groups. Concrete subclasses define what a group is and how it
+ /// is internally collected.
+ ///
+ /// <para>
+ /// See {@link org.apache.lucene.search.grouping} for more
+ /// details including a full code example.
+ /// </para>
+ /// @lucene.experimental
+ /// </summary>
+ /// <typeparam name="TGroupValue"></typeparam>
+ public abstract class AbstractFirstPassGroupingCollector<TGroupValue> : Collector
+ {
+ private readonly Sort groupSort;
+ private readonly FieldComparator[] comparators;
+ private readonly int[] reversed;
+ private readonly int topNGroups;
+ private readonly IDictionary<TGroupValue, CollectedSearchGroup<TGroupValue>> groupMap;
+ private readonly int compIDXEnd;
+
+ // Set once we reach topNGroups unique groups:
+ // @lucene.internal
+ protected SortedSet<CollectedSearchGroup<TGroupValue>> orderedGroups;
+ private int docBase;
+ private int spareSlot;
+
+ /// <summary>
+ /// Create the first pass collector.
+ /// </summary>
+ /// <param name="groupSort">
+ /// The {@link Sort} used to sort the
+ /// groups. The top sorted document within each group
+ /// according to groupSort, determines how that group
+ /// sorts against other groups. This must be non-null,
+ /// ie, if you want to groupSort by relevance use
+ /// Sort.RELEVANCE.
+ /// </param>
+ /// <param name="topNGroups">How many top groups to keep.</param>
+ /// <exception cref="IOException">If I/O related errors occur</exception>
+ public AbstractFirstPassGroupingCollector(Sort groupSort, int topNGroups)
+ {
+ if (topNGroups < 1)
+ {
+ throw new ArgumentException("topNGroups must be >= 1 (got " + topNGroups + ")");
+ }
+
+ // TODO: allow null groupSort to mean "by relevance",
+ // and specialize it?
+ this.groupSort = groupSort;
+
+ this.topNGroups = topNGroups;
+
+ SortField[] sortFields = groupSort.GetSort();
+ comparators = new FieldComparator[sortFields.Length];
+ compIDXEnd = comparators.Length - 1;
+ reversed = new int[sortFields.Length];
+ for (int i = 0; i < sortFields.Length; i++)
+ {
+ SortField sortField = sortFields[i];
+
+ // use topNGroups + 1 so we have a spare slot to use for comparing (tracked by this.spareSlot):
+ comparators[i] = sortField.GetComparator(topNGroups + 1, i);
+ reversed[i] = sortField.Reverse ? -1 : 1;
+ }
+
+ spareSlot = topNGroups;
+ groupMap = new Dictionary<TGroupValue, CollectedSearchGroup<TGroupValue>>(topNGroups);
+ }
+
+ /// <summary>
+ /// Returns top groups, starting from offset. This may
+ /// return null, if no groups were collected, or if the
+ /// number of unique groups collected is <= offset.
+ /// </summary>
+ /// <param name="groupOffset">The offset in the collected groups</param>
+ /// <param name="fillFields">Whether to fill to <see cref="SearchGroup.sortValues"/></param>
+ /// <returns>top groups, starting from offset</returns>
+ public ICollection<SearchGroup<TGroupValue>> GetTopGroups(int groupOffset, bool fillFields)
+ {
+
+ //System.out.println("FP.getTopGroups groupOffset=" + groupOffset + " fillFields=" + fillFields + " groupMap.size()=" + groupMap.size());
+
+ if (groupOffset < 0)
+ {
+ throw new ArgumentException("groupOffset must be >= 0 (got " + groupOffset + ")");
+ }
+
+ if (groupMap.Count <= groupOffset)
+ {
+ return null;
+ }
+
+ if (orderedGroups == null)
+ {
+ BuildSortedSet();
+ }
+
+ ICollection<SearchGroup<TGroupValue>> result = new List<SearchGroup<TGroupValue>>();
+ int upto = 0;
+ int sortFieldCount = groupSort.GetSort().Length;
+ foreach (CollectedSearchGroup<TGroupValue> group in orderedGroups)
+ {
+ if (upto++ < groupOffset)
+ {
+ continue;
+ }
+ //System.out.println(" group=" + (group.groupValue == null ? "null" : group.groupValue.utf8ToString()));
+ SearchGroup<TGroupValue> searchGroup = new SearchGroup<TGroupValue>();
+ searchGroup.groupValue = group.groupValue;
+ if (fillFields)
+ {
+ searchGroup.sortValues = new object[sortFieldCount];
+ for (int sortFieldIDX = 0; sortFieldIDX < sortFieldCount; sortFieldIDX++)
+ {
+ searchGroup.sortValues[sortFieldIDX] = comparators[sortFieldIDX].Value(group.ComparatorSlot);
+ }
+ }
+ result.Add(searchGroup);
+ }
+ //System.out.println(" return " + result.size() + " groups");
+ return result;
+ }
+
+ public override Scorer Scorer
+ {
+ set
+ {
+ foreach (FieldComparator comparator in comparators)
+ {
+ comparator.Scorer = value;
+ }
+ }
+ }
+
+ public override void Collect(int doc)
+ {
+ //System.out.println("FP.collect doc=" + doc);
+
+ // If orderedGroups != null we already have collected N groups and
+ // can short circuit by comparing this document to the bottom group,
+ // without having to find what group this document belongs to.
+
+ // Even if this document belongs to a group in the top N, we'll know that
+ // we don't have to update that group.
+
+ // Downside: if the number of unique groups is very low, this is
+ // wasted effort as we will most likely be updating an existing group.
+ if (orderedGroups != null)
+ {
+ for (int compIDX = 0; ; compIDX++)
+ {
+ int c = reversed[compIDX] * comparators[compIDX].CompareBottom(doc);
+ if (c < 0)
+ {
+ // Definitely not competitive. So don't even bother to continue
+ return;
+ }
+ else if (c > 0)
+ {
+ // Definitely competitive.
+ break;
+ }
+ else if (compIDX == compIDXEnd)
+ {
+ // Here c=0. If we're at the last comparator, this doc is not
+ // competitive, since docs are visited in doc Id order, which means
+ // this doc cannot compete with any other document in the queue.
+ return;
+ }
+ }
+ }
+
+ // TODO: should we add option to mean "ignore docs that
+ // don't have the group field" (instead of stuffing them
+ // under null group)?
+ TGroupValue groupValue = GetDocGroupValue(doc);
+
+ CollectedSearchGroup<TGroupValue> group = groupMap[groupValue];
+
+ if (group == null)
+ {
+
+ // First time we are seeing this group, or, we've seen
+ // it before but it fell out of the top N and is now
+ // coming back
+
+ if (groupMap.Count < topNGroups)
+ {
+
+ // Still in startup transient: we have not
+ // seen enough unique groups to start pruning them;
+ // just keep collecting them
+
+ // Add a new CollectedSearchGroup:
+ CollectedSearchGroup<TGroupValue> sg = new CollectedSearchGroup<TGroupValue>();
+ sg.groupValue = CopyDocGroupValue(groupValue, default(TGroupValue));
+ sg.ComparatorSlot = groupMap.Count;
+ sg.TopDoc = docBase + doc;
+ foreach (FieldComparator fc in comparators)
+ {
+ fc.Copy(sg.ComparatorSlot, doc);
+ }
+ groupMap[sg.groupValue] = sg;
+
+ if (groupMap.Count == topNGroups)
+ {
+ // End of startup transient: we now have max
+ // number of groups; from here on we will drop
+ // bottom group when we insert new one:
+ BuildSortedSet();
+ }
+
+ return;
+ }
+
+ // We already tested that the document is competitive, so replace
+ // the bottom group with this new group.
+ //CollectedSearchGroup<TGroupValue> bottomGroup = orderedGroups.PollLast();
+ CollectedSearchGroup<TGroupValue> bottomGroup;
+ lock (orderedGroups)
+ {
+ bottomGroup = orderedGroups.Last();
+ orderedGroups.Remove(bottomGroup);
+ }
+ Debug.Assert(orderedGroups.Count == topNGroups - 1);
+
+ groupMap.Remove(bottomGroup.groupValue);
+
+ // reuse the removed CollectedSearchGroup
+ bottomGroup.groupValue = CopyDocGroupValue(groupValue, bottomGroup.groupValue);
+ bottomGroup.TopDoc = docBase + doc;
+
+ foreach (FieldComparator fc in comparators)
+ {
+ fc.Copy(bottomGroup.ComparatorSlot, doc);
+ }
+
+ groupMap[bottomGroup.groupValue] = bottomGroup;
+ orderedGroups.Add(bottomGroup);
+ Debug.Assert(orderedGroups.Count == topNGroups);
+
+ int lastComparatorSlot = orderedGroups.Last().ComparatorSlot;
+ foreach (FieldComparator fc in comparators)
+ {
+ fc.Bottom = lastComparatorSlot;
+ }
+
+ return;
+ }
+
+ // Update existing group:
+ for (int compIDX = 0; ; compIDX++)
+ {
+ FieldComparator fc = comparators[compIDX];
+ fc.Copy(spareSlot, doc);
+
+ int c = reversed[compIDX] * fc.Compare(group.ComparatorSlot, spareSlot);
+ if (c < 0)
+ {
+ // Definitely not competitive.
+ return;
+ }
+ else if (c > 0)
+ {
+ // Definitely competitive; set remaining comparators:
+ for (int compIDX2 = compIDX + 1; compIDX2 < comparators.Length; compIDX2++)
+ {
+ comparators[compIDX2].Copy(spareSlot, doc);
+ }
+ break;
+ }
+ else if (compIDX == compIDXEnd)
+ {
+ // Here c=0. If we're at the last comparator, this doc is not
+ // competitive, since docs are visited in doc Id order, which means
+ // this doc cannot compete with any other document in the queue.
+ return;
+ }
+ }
+
+ // Remove before updating the group since lookup is done via comparators
+ // TODO: optimize this
+
+ CollectedSearchGroup<TGroupValue> prevLast;
+ if (orderedGroups != null)
+ {
+ lock (orderedGroups)
+ {
+ prevLast = orderedGroups.Last();
+ orderedGroups.Remove(group);
+ }
+ Debug.Assert(orderedGroups.Count == topNGroups - 1);
+ }
+ else
+ {
+ prevLast = null;
+ }
+
+ group.TopDoc = docBase + doc;
+
+ // Swap slots
+ int tmp = spareSlot;
+ spareSlot = group.ComparatorSlot;
+ group.ComparatorSlot = tmp;
+
+ // Re-add the changed group
+ if (orderedGroups != null)
+ {
+ orderedGroups.Add(group);
+ Debug.Assert(orderedGroups.Count == topNGroups);
+ var newLast = orderedGroups.Last();
+ // If we changed the value of the last group, or changed which group was last, then update bottom:
+ if (group == newLast || prevLast != newLast)
+ {
+ foreach (FieldComparator fc in comparators)
+ {
+ fc.Bottom = newLast.ComparatorSlot;
+ }
+ }
+ }
+ }
+
+ private class BuildSortedSetComparer : IComparer<ICollectedSearchGroup>
+ {
+ private readonly AbstractFirstPassGroupingCollector<TGroupValue> outerInstance;
+ public BuildSortedSetComparer(AbstractFirstPassGroupingCollector<TGroupValue> outerInstance)
+ {
+ this.outerInstance = outerInstance;
+ }
+ public int Compare(ICollectedSearchGroup o1, ICollectedSearchGroup o2)
+ {
+ for (int compIDX = 0; ; compIDX++)
+ {
+ FieldComparator fc = outerInstance.comparators[compIDX];
+ int c = outerInstance.reversed[compIDX] * fc.Compare(o1.ComparatorSlot, o2.ComparatorSlot);
+ if (c != 0)
+ {
+ return c;
+ }
+ else if (compIDX == outerInstance.compIDXEnd)
+ {
+ return o1.TopDoc - o2.TopDoc;
+ }
+ }
+ }
+ }
+ private void BuildSortedSet()
+ {
+ var comparator = new BuildSortedSetComparer(this);
+ // var comparator = new Comparator<CollectedSearchGroup<?>>() {
+ // @Override
+ // public int compare(CollectedSearchGroup<?> o1, CollectedSearchGroup<?> o2)
+ //{
+ // for (int compIDX = 0; ; compIDX++)
+ // {
+ // FieldComparator <?> fc = comparators[compIDX];
+ // final int c = reversed[compIDX] * fc.compare(o1.comparatorSlot, o2.comparatorSlot);
+ // if (c != 0)
+ // {
+ // return c;
+ // }
+ // else if (compIDX == compIDXEnd)
+ // {
+ // return o1.topDoc - o2.topDoc;
+ // }
+ // }
+ //}
+ // };
+
+ orderedGroups = new SortedSet<CollectedSearchGroup<TGroupValue>>(comparator);
+ orderedGroups.UnionWith(groupMap.Values);
+ Debug.Assert(orderedGroups.Count > 0);
+
+ foreach (FieldComparator fc in comparators)
+ {
+ fc.Bottom = orderedGroups.Last().ComparatorSlot;
+ }
+ }
+
+ public override bool AcceptsDocsOutOfOrder()
+ {
+ return false;
+ }
+
+ public override AtomicReaderContext NextReader
+ {
+ set
+ {
+ docBase = value.DocBase;
+ for (int i = 0; i < comparators.Length; i++)
+ {
+ comparators[i] = comparators[i].SetNextReader(value);
+ }
+ }
+ }
+
+ /// <summary>
+ /// Returns the group value for the specified doc.
+ /// </summary>
+ /// <param name="doc">The specified doc</param>
+ /// <returns>the group value for the specified doc</returns>
+ protected abstract TGroupValue GetDocGroupValue(int doc);
+
+ /// <summary>
+ /// Returns a copy of the specified group value by creating a new instance and copying the value from the specified
+ /// groupValue in the new instance. Or optionally the reuse argument can be used to copy the group value in.
+ /// </summary>
+ /// <param name="groupValue">The group value to copy</param>
+ /// <param name="reuse">Optionally a reuse instance to prevent a new instance creation</param>
+ /// <returns>a copy of the specified group value</returns>
+ protected abstract TGroupValue CopyDocGroupValue(TGroupValue groupValue, TGroupValue reuse);
+
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9d72bcb3/src/Lucene.Net.Grouping/AbstractGroupFacetCollector.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Grouping/AbstractGroupFacetCollector.cs b/src/Lucene.Net.Grouping/AbstractGroupFacetCollector.cs
new file mode 100644
index 0000000..1cc4574
--- /dev/null
+++ b/src/Lucene.Net.Grouping/AbstractGroupFacetCollector.cs
@@ -0,0 +1,375 @@
+\ufeffusing Lucene.Net.Search;
+using Lucene.Net.Util;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Lucene.Net.Search.Grouping
+{
+ /// <summary>
+ /// Base class for computing grouped facets.
+ /// @lucene.experimental
+ /// </summary>
+ public abstract class AbstractGroupFacetCollector : Collector
+ {
+ protected readonly string groupField;
+ protected readonly string facetField;
+ protected readonly BytesRef facetPrefix;
+ protected readonly IList<AbstractSegmentResult> segmentResults;
+
+ protected int[] segmentFacetCounts;
+ protected int segmentTotalCount;
+ protected int startFacetOrd;
+ protected int endFacetOrd;
+
+ protected AbstractGroupFacetCollector(string groupField, string facetField, BytesRef facetPrefix)
+ {
+ this.groupField = groupField;
+ this.facetField = facetField;
+ this.facetPrefix = facetPrefix;
+ segmentResults = new List<AbstractSegmentResult>();
+ }
+
+ /**
+ * Returns grouped facet results that were computed over zero or more segments.
+ * Grouped facet counts are merged from zero or more segment results.
+ *
+ * @param size The total number of facets to include. This is typically offset + limit
+ * @param minCount The minimum count a facet entry should have to be included in the grouped facet result
+ * @param orderByCount Whether to sort the facet entries by facet entry count. If <code>false</code> then the facets
+ * are sorted lexicographically in ascending order.
+ * @return grouped facet results
+ * @throws IOException If I/O related errors occur during merging segment grouped facet counts.
+ */
+ public GroupedFacetResult MergeSegmentResults(int size, int minCount, bool orderByCount)
+ {
+ if (segmentFacetCounts != null)
+ {
+ segmentResults.Add(CreateSegmentResult());
+ segmentFacetCounts = null; // reset
+ }
+
+ int totalCount = 0;
+ int missingCount = 0;
+ SegmentResultPriorityQueue segments = new SegmentResultPriorityQueue(segmentResults.Count);
+ foreach (AbstractSegmentResult segmentResult in segmentResults)
+ {
+ missingCount += segmentResult.missing;
+ if (segmentResult.mergePos >= segmentResult.maxTermPos)
+ {
+ continue;
+ }
+ totalCount += segmentResult.total;
+ segments.Add(segmentResult);
+ }
+
+ GroupedFacetResult facetResult = new GroupedFacetResult(size, minCount, orderByCount, totalCount, missingCount);
+ while (segments.Size() > 0)
+ {
+ AbstractSegmentResult segmentResult = segments.Top();
+ BytesRef currentFacetValue = BytesRef.DeepCopyOf(segmentResult.mergeTerm);
+ int count = 0;
+
+ do
+ {
+ count += segmentResult.counts[segmentResult.mergePos++];
+ if (segmentResult.mergePos < segmentResult.maxTermPos)
+ {
+ segmentResult.NextTerm();
+ segmentResult = segments.UpdateTop();
+ }
+ else
+ {
+ segments.Pop();
+ segmentResult = segments.Top();
+ if (segmentResult == null)
+ {
+ break;
+ }
+ }
+ } while (currentFacetValue.Equals(segmentResult.mergeTerm));
+ facetResult.AddFacetCount(currentFacetValue, count);
+ }
+ return facetResult;
+ }
+
+ protected abstract AbstractSegmentResult CreateSegmentResult();
+
+ public override Scorer Scorer
+ {
+ set
+ {
+ }
+ }
+
+ public override bool AcceptsDocsOutOfOrder()
+ {
+ return true;
+ }
+
+ private class OrderByCountAndValueComparer : IComparer<FacetEntry>
+ {
+ public int Compare(FacetEntry a, FacetEntry b)
+ {
+ int cmp = b.Count - a.Count; // Highest count first!
+ if (cmp != 0)
+ {
+ return cmp;
+ }
+ return a.Value.CompareTo(b.Value);
+ }
+ }
+
+ private class OrderByValueComparer : IComparer<FacetEntry>
+ {
+ public int Compare(FacetEntry a, FacetEntry b)
+ {
+ return a.Value.CompareTo(b.Value);
+ }
+ }
+
+ /**
+ * The grouped facet result. Containing grouped facet entries, total count and total missing count.
+ */
+ public class GroupedFacetResult
+ {
+ private readonly static IComparer<FacetEntry> orderByCountAndValue = new OrderByCountAndValueComparer();
+ // private readonly static IComparer<FacetEntry> orderByCountAndValue = new Comparator<FacetEntry>() {
+
+ // @Override
+ // public override int Compare(FacetEntry a, FacetEntry b)
+ // {
+ // int cmp = b.count - a.count; // Highest count first!
+ // if (cmp != 0)
+ // {
+ // return cmp;
+ // }
+ // return a.value.compareTo(b.value);
+ // }
+
+ //};
+
+ private readonly static IComparer<FacetEntry> orderByValue = new OrderByValueComparer();
+
+ // private readonly static IComparer<FacetEntry> orderByValue = new Comparator<FacetEntry>() {
+
+ // @Override
+ // public int compare(FacetEntry a, FacetEntry b)
+ //{
+ // return a.value.compareTo(b.value);
+ //}
+
+ // };
+
+ private readonly int maxSize;
+ private readonly SortedSet<FacetEntry> facetEntries;
+ private readonly int totalMissingCount;
+ private readonly int totalCount;
+
+ private int currentMin;
+
+ public GroupedFacetResult(int size, int minCount, bool orderByCount, int totalCount, int totalMissingCount)
+ {
+ this.facetEntries = new SortedSet<FacetEntry>(orderByCount ? orderByCountAndValue : orderByValue);
+ this.totalMissingCount = totalMissingCount;
+ this.totalCount = totalCount;
+ maxSize = size;
+ currentMin = minCount;
+ }
+
+ public virtual void AddFacetCount(BytesRef facetValue, int count)
+ {
+ if (count < currentMin)
+ {
+ return;
+ }
+
+ FacetEntry facetEntry = new FacetEntry(facetValue, count);
+ if (facetEntries.Count == maxSize)
+ {
+ // LUCENENET TODO: Add the C5.TreeSet to Support?
+ // LUCENENET TODO: Finish implementation
+ //if (facetEntries.Higher(facetEntry) == null)
+ //{
+ // return;
+ //}
+ facetEntries.Remove(facetEntries.Last());
+ }
+ facetEntries.Add(facetEntry);
+
+ if (facetEntries.Count == maxSize)
+ {
+ currentMin = facetEntries.Last().Count;
+ }
+ }
+
+ /**
+ * Returns a list of facet entries to be rendered based on the specified offset and limit.
+ * The facet entries are retrieved from the facet entries collected during merging.
+ *
+ * @param offset The offset in the collected facet entries during merging
+ * @param limit The number of facets to return starting from the offset.
+ * @return a list of facet entries to be rendered based on the specified offset and limit
+ */
+ public List<FacetEntry> GetFacetEntries(int offset, int limit)
+ {
+ List<FacetEntry> entries = new List<FacetEntry>();
+
+ int skipped = 0;
+ int included = 0;
+ foreach (FacetEntry facetEntry in facetEntries)
+ {
+ if (skipped < offset)
+ {
+ skipped++;
+ continue;
+ }
+ if (included++ >= limit)
+ {
+ break;
+ }
+ entries.Add(facetEntry);
+ }
+ return entries;
+ }
+
+ /**
+ * Returns the sum of all facet entries counts.
+ *
+ * @return the sum of all facet entries counts
+ */
+ public int TotalCount
+ {
+ get
+ {
+ return totalCount;
+ }
+ }
+
+ /**
+ * Returns the number of groups that didn't have a facet value.
+ *
+ * @return the number of groups that didn't have a facet value
+ */
+ public int TotalMissingCount
+ {
+ get
+ {
+ return totalMissingCount;
+ }
+ }
+ }
+
+ /**
+ * Represents a facet entry with a value and a count.
+ */
+ public class FacetEntry
+ {
+
+ private readonly BytesRef value;
+ private readonly int count;
+
+ public FacetEntry(BytesRef value, int count)
+ {
+ this.value = value;
+ this.count = count;
+ }
+
+ public override bool Equals(object o)
+ {
+ if (this == o) return true;
+ if (o == null || GetType() != o.GetType()) return false;
+
+ FacetEntry that = (FacetEntry)o;
+
+ if (count != that.count) return false;
+ if (!value.Equals(that.value)) return false;
+
+ return true;
+ }
+
+ public override int GetHashCode()
+ {
+ int result = value.GetHashCode();
+ result = 31 * result + count;
+ return result;
+ }
+
+ public override string ToString()
+ {
+ return "FacetEntry{" +
+ "value=" + value.Utf8ToString() +
+ ", count=" + count +
+ '}';
+ }
+
+ /**
+ * @return The value of this facet entry
+ */
+ public BytesRef Value
+ {
+ get
+ {
+ return value;
+ }
+ }
+
+ /**
+ * @return The count (number of groups) of this facet entry.
+ */
+ public int Count
+ {
+ get
+ {
+ return count;
+ }
+ }
+ }
+
+ /**
+ * Contains the local grouped segment counts for a particular segment.
+ * Each <code>SegmentResult</code> must be added together.
+ */
+ protected internal abstract class AbstractSegmentResult
+ {
+ protected internal readonly int[] counts;
+ protected internal readonly int total;
+ protected internal readonly int missing;
+ protected internal readonly int maxTermPos;
+
+ protected internal BytesRef mergeTerm;
+ protected internal int mergePos;
+
+ protected AbstractSegmentResult(int[] counts, int total, int missing, int maxTermPos)
+ {
+ this.counts = counts;
+ this.total = total;
+ this.missing = missing;
+ this.maxTermPos = maxTermPos;
+ }
+
+ /**
+ * Go to next term in this <code>SegmentResult</code> in order to retrieve the grouped facet counts.
+ *
+ * @throws IOException If I/O related errors occur
+ */
+ protected internal abstract void NextTerm();
+
+ }
+
+ private class SegmentResultPriorityQueue : PriorityQueue<AbstractSegmentResult>
+ {
+ internal SegmentResultPriorityQueue(int maxSize)
+ : base(maxSize)
+ {
+ }
+
+ public override bool LessThan(AbstractSegmentResult a, AbstractSegmentResult b)
+ {
+ return a.mergeTerm.CompareTo(b.mergeTerm) < 0;
+ }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9d72bcb3/src/Lucene.Net.Grouping/AbstractSecondPassGroupingCollector.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Grouping/AbstractSecondPassGroupingCollector.cs b/src/Lucene.Net.Grouping/AbstractSecondPassGroupingCollector.cs
new file mode 100644
index 0000000..580617c
--- /dev/null
+++ b/src/Lucene.Net.Grouping/AbstractSecondPassGroupingCollector.cs
@@ -0,0 +1,170 @@
+\ufeffusing Lucene.Net.Search;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+using Lucene.Net.Index;
+
+namespace Lucene.Net.Search.Grouping
+{
+ /// <summary>
+ /// SecondPassGroupingCollector is the second of two passes
+ /// necessary to collect grouped docs. This pass gathers the
+ /// top N documents per top group computed from the
+ /// first pass. Concrete subclasses define what a group is and how it
+ /// is internally collected.
+ /// <para>
+ /// See {@link org.apache.lucene.search.grouping} for more
+ /// details including a full code example.
+ /// </para>
+ /// @lucene.experimental
+ /// </summary>
+ /// <typeparam name="TGroupValue"></typeparam>
+ public abstract class AbstractSecondPassGroupingCollector<TGroupValue> : Collector
+ {
+ protected readonly IDictionary<TGroupValue, AbstractSecondPassGroupingCollector.SearchGroupDocs<TGroupValue>> groupMap;
+ private readonly int maxDocsPerGroup;
+ protected AbstractSecondPassGroupingCollector.SearchGroupDocs<TGroupValue>[] groupDocs;
+ private readonly ICollection<SearchGroup<TGroupValue>> groups;
+ private readonly Sort withinGroupSort;
+ private readonly Sort groupSort;
+
+ private int totalHitCount;
+ private int totalGroupedHitCount;
+
+ public AbstractSecondPassGroupingCollector(ICollection<SearchGroup<TGroupValue>> groups, Sort groupSort, Sort withinGroupSort,
+ int maxDocsPerGroup, bool getScores, bool getMaxScores, bool fillSortFields)
+ {
+
+ //System.out.println("SP init");
+ if (groups.Count == 0)
+ {
+ throw new ArgumentException("no groups to collect (groups.size() is 0)");
+ }
+
+ this.groupSort = groupSort;
+ this.withinGroupSort = withinGroupSort;
+ this.groups = groups;
+ this.maxDocsPerGroup = maxDocsPerGroup;
+ groupMap = new Dictionary<TGroupValue, AbstractSecondPassGroupingCollector.SearchGroupDocs<TGroupValue>>(groups.Count);
+
+ foreach (SearchGroup<TGroupValue> group in groups)
+ {
+ //System.out.println(" prep group=" + (group.groupValue == null ? "null" : group.groupValue.utf8ToString()));
+ //TopDocsCollector collector;
+ ITopDocsCollector collector;
+ if (withinGroupSort == null)
+ {
+ // Sort by score
+ collector = TopScoreDocCollector.Create(maxDocsPerGroup, true);
+ }
+ else
+ {
+ // Sort by fields
+ collector = TopFieldCollector.Create(withinGroupSort, maxDocsPerGroup, fillSortFields, getScores, getMaxScores, true);
+ }
+ groupMap[group.groupValue] = new AbstractSecondPassGroupingCollector.SearchGroupDocs<TGroupValue>(group.groupValue, collector);
+ }
+ }
+
+ public override Scorer Scorer
+ {
+ set
+ {
+ foreach (AbstractSecondPassGroupingCollector.SearchGroupDocs<TGroupValue> group in groupMap.Values)
+ {
+ group.collector.Scorer = value;
+ }
+ }
+ }
+
+ public override void Collect(int doc)
+ {
+ totalHitCount++;
+ AbstractSecondPassGroupingCollector.SearchGroupDocs<TGroupValue> group = RetrieveGroup(doc);
+ if (group != null)
+ {
+ totalGroupedHitCount++;
+ group.collector.Collect(doc);
+ }
+ }
+
+ /**
+ * Returns the group the specified doc belongs to or <code>null</code> if no group could be retrieved.
+ *
+ * @param doc The specified doc
+ * @return the group the specified doc belongs to or <code>null</code> if no group could be retrieved
+ * @throws IOException If an I/O related error occurred
+ */
+ protected abstract AbstractSecondPassGroupingCollector.SearchGroupDocs<TGroupValue> RetrieveGroup(int doc);
+
+ public override AtomicReaderContext NextReader
+ {
+ set
+ {
+ //System.out.println("SP.setNextReader");
+ foreach (AbstractSecondPassGroupingCollector.SearchGroupDocs<TGroupValue> group in groupMap.Values)
+ {
+ group.collector.NextReader = value;
+ }
+ }
+ }
+
+ public override bool AcceptsDocsOutOfOrder()
+ {
+ return false;
+ }
+
+ public TopGroups<TGroupValue> GetTopGroups(int withinGroupOffset)
+ {
+ GroupDocs<TGroupValue>[] groupDocsResult = new GroupDocs<TGroupValue>[groups.Count];
+
+ int groupIDX = 0;
+ float maxScore = float.MinValue;
+ foreach (var group in groups)
+ {
+ AbstractSecondPassGroupingCollector.SearchGroupDocs<TGroupValue> groupDocs = groupMap.ContainsKey(group.groupValue) ? groupMap[group.groupValue] : null;
+ TopDocs topDocs = groupDocs.collector.TopDocs(withinGroupOffset, maxDocsPerGroup);
+ groupDocsResult[groupIDX++] = new GroupDocs<TGroupValue>(float.NaN,
+ topDocs.MaxScore,
+ topDocs.TotalHits,
+ topDocs.ScoreDocs,
+ groupDocs.groupValue,
+ group.sortValues);
+ maxScore = Math.Max(maxScore, topDocs.MaxScore);
+ }
+
+ return new TopGroups<TGroupValue>(groupSort.GetSort(),
+ withinGroupSort == null ? null : withinGroupSort.GetSort(),
+ totalHitCount, totalGroupedHitCount, groupDocsResult,
+ maxScore);
+ }
+
+
+
+ }
+
+ public class AbstractSecondPassGroupingCollector
+ {
+ /// <summary>
+ /// Don't allow creation
+ /// </summary>
+ private AbstractSecondPassGroupingCollector() { }
+
+ // TODO: merge with SearchGroup or not?
+ // ad: don't need to build a new hashmap
+ // disad: blows up the size of SearchGroup if we need many of them, and couples implementations
+ public class SearchGroupDocs<TGroupValue>
+ {
+ public readonly TGroupValue groupValue;
+ //public readonly TopDocsCollector<?> collector;
+ public readonly ITopDocsCollector collector;
+ public SearchGroupDocs(TGroupValue groupValue, ITopDocsCollector /*TopDocsCollector<?>*/ collector)
+ {
+ this.groupValue = groupValue;
+ this.collector = collector;
+ }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9d72bcb3/src/Lucene.Net.Grouping/BlockGroupingCollector.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Grouping/BlockGroupingCollector.cs b/src/Lucene.Net.Grouping/BlockGroupingCollector.cs
new file mode 100644
index 0000000..2a82993
--- /dev/null
+++ b/src/Lucene.Net.Grouping/BlockGroupingCollector.cs
@@ -0,0 +1,601 @@
+\ufeffusing Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Util;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Lucene.Net.Search.Grouping
+{
+ // TODO: this sentence is too long for the class summary.
+ /// <summary>
+ /// BlockGroupingCollector performs grouping with a
+ /// single pass collector, as long as you are grouping by a
+ /// doc block field, ie all documents sharing a given group
+ /// value were indexed as a doc block using the atomic
+ /// <see cref="IndexWriter.AddDocuments(IEnumerable{IEnumerable{IndexableField}}, Analysis.Analyzer)"/> or
+ /// <see cref="IndexWriter.UpdateDocuments(Term, IEnumerable{IEnumerable{IndexableField}}, Analysis.Analyzer)"/>
+ /// API.
+ ///
+ /// <para>
+ /// This results in faster performance (~25% faster QPS)
+ /// than the two-pass grouping collectors, with the tradeoff
+ /// being that the documents in each group must always be
+ /// indexed as a block. This collector also fills in
+ /// TopGroups.totalGroupCount without requiring the separate
+ /// <see cref="Terms.TermAllGroupsCollector"/>. However, this collector does
+ /// not fill in the groupValue of each group; this field
+ /// will always be null.
+ /// </para>
+ /// <para>
+ /// <c>NOTE</c>: this collector makes no effort to verify
+ /// the docs were in fact indexed as a block, so it's up to
+ /// you to ensure this was the case.
+ /// </para>
+ /// <para>
+ /// See {@link org.apache.lucene.search.grouping} for more
+ /// details including a full code example.
+ /// </para>
+ /// @lucene.experimental
+ /// </summary>
+ public class BlockGroupingCollector : Collector
+ {
+ private int[] pendingSubDocs;
+ private float[] pendingSubScores;
+ private int subDocUpto;
+
+ private readonly Sort groupSort;
+ private readonly int topNGroups;
+ private readonly Filter lastDocPerGroup;
+
+ // TODO: specialize into 2 classes, static "create" method:
+ private readonly bool needsScores;
+
+ private readonly FieldComparator[] comparators;
+ private readonly int[] reversed;
+ private readonly int compIDXEnd;
+ private int bottomSlot;
+ private bool queueFull;
+ private AtomicReaderContext currentReaderContext;
+
+ private int topGroupDoc;
+ private int totalHitCount;
+ private int totalGroupCount;
+ private int docBase;
+ private int groupEndDocID;
+ private DocIdSetIterator lastDocPerGroupBits;
+ private Scorer scorer;
+ private readonly GroupQueue groupQueue;
+ private bool groupCompetes;
+
+ private sealed class FakeScorer : Scorer
+ {
+ internal float score;
+ internal int doc;
+
+ public FakeScorer()
+ : base(null)
+ {
+ }
+
+ public override float Score()
+ {
+ return score;
+ }
+
+ public override int Freq()
+ {
+ throw new InvalidOperationException(); // TODO: wtf does this class do?
+ }
+
+ public override int DocID()
+ {
+ return doc;
+ }
+
+ public override int Advance(int target)
+ {
+ throw new InvalidOperationException();
+ }
+
+ public override int NextDoc()
+ {
+ throw new InvalidOperationException();
+ }
+
+ public override long Cost()
+ {
+ return 1;
+ }
+
+ public override Weight Weight
+ {
+ get
+ {
+ throw new InvalidOperationException();
+ }
+ }
+
+
+ public override ICollection<ChildScorer> Children
+ {
+ get
+ {
+ throw new InvalidOperationException();
+ }
+ }
+ }
+
+ private sealed class OneGroup
+ {
+ internal AtomicReaderContext readerContext;
+ //internal int groupOrd;
+ internal int topGroupDoc;
+ internal int[] docs;
+ internal float[] scores;
+ internal int count;
+ internal int comparatorSlot;
+ }
+
+ // Sorts by groupSort. Not static -- uses comparators, reversed
+ private sealed class GroupQueue : PriorityQueue<OneGroup>
+ {
+
+ private readonly BlockGroupingCollector outerInstance;
+ public GroupQueue(BlockGroupingCollector outerInstance, int size)
+ : base(size)
+ {
+ this.outerInstance = outerInstance;
+ }
+
+ public override bool LessThan(OneGroup group1, OneGroup group2)
+ {
+
+ //System.out.println(" ltcheck");
+ Debug.Assert(group1 != group2);
+ Debug.Assert(group1.comparatorSlot != group2.comparatorSlot);
+
+ int numComparators = outerInstance.comparators.Length;
+ for (int compIDX = 0; compIDX < numComparators; compIDX++)
+ {
+ int c = outerInstance.reversed[compIDX] * outerInstance.comparators[compIDX].Compare(group1.comparatorSlot, group2.comparatorSlot);
+ if (c != 0)
+ {
+ // Short circuit
+ return c > 0;
+ }
+ }
+
+ // Break ties by docID; lower docID is always sorted first
+ return group1.topGroupDoc > group2.topGroupDoc;
+ }
+ }
+
+ // Called when we transition to another group; if the
+ // group is competitive we insert into the group queue
+ private void ProcessGroup()
+ {
+ totalGroupCount++;
+ //System.out.println(" processGroup ord=" + lastGroupOrd + " competes=" + groupCompetes + " count=" + subDocUpto + " groupDoc=" + topGroupDoc);
+ if (groupCompetes)
+ {
+ if (!queueFull)
+ {
+ // Startup transient: always add a new OneGroup
+ OneGroup og = new OneGroup();
+ og.count = subDocUpto;
+ og.topGroupDoc = docBase + topGroupDoc;
+ og.docs = pendingSubDocs;
+ pendingSubDocs = new int[10];
+ if (needsScores)
+ {
+ og.scores = pendingSubScores;
+ pendingSubScores = new float[10];
+ }
+ og.readerContext = currentReaderContext;
+ //og.groupOrd = lastGroupOrd;
+ og.comparatorSlot = bottomSlot;
+ OneGroup bottomGroup = groupQueue.Add(og);
+ //System.out.println(" ADD group=" + getGroupString(lastGroupOrd) + " newBottom=" + getGroupString(bottomGroup.groupOrd));
+ queueFull = groupQueue.Size() == topNGroups;
+ if (queueFull)
+ {
+ // Queue just became full; now set the real bottom
+ // in the comparators:
+ bottomSlot = bottomGroup.comparatorSlot;
+ //System.out.println(" set bottom=" + bottomSlot);
+ for (int i = 0; i < comparators.Length; i++)
+ {
+ comparators[i].Bottom = bottomSlot;
+ }
+ //System.out.println(" QUEUE FULL");
+ }
+ else
+ {
+ // Queue not full yet -- just advance bottomSlot:
+ bottomSlot = groupQueue.Size();
+ }
+ }
+ else
+ {
+ // Replace bottom element in PQ and then updateTop
+ OneGroup og = groupQueue.Top();
+ Debug.Assert(og != null);
+ og.count = subDocUpto;
+ og.topGroupDoc = docBase + topGroupDoc;
+ // Swap pending docs
+ int[] savDocs = og.docs;
+ og.docs = pendingSubDocs;
+ pendingSubDocs = savDocs;
+ if (needsScores)
+ {
+ // Swap pending scores
+ float[] savScores = og.scores;
+ og.scores = pendingSubScores;
+ pendingSubScores = savScores;
+ }
+ og.readerContext = currentReaderContext;
+ //og.groupOrd = lastGroupOrd;
+ bottomSlot = groupQueue.UpdateTop().comparatorSlot;
+
+ //System.out.println(" set bottom=" + bottomSlot);
+ for (int i = 0; i < comparators.Length; i++)
+ {
+ comparators[i].Bottom = bottomSlot;
+ }
+ }
+ }
+ subDocUpto = 0;
+ }
+
+ /**
+ * Create the single pass collector.
+ *
+ * @param groupSort The {@link Sort} used to sort the
+ * groups. The top sorted document within each group
+ * according to groupSort, determines how that group
+ * sorts against other groups. This must be non-null,
+ * ie, if you want to groupSort by relevance use
+ * Sort.RELEVANCE.
+ * @param topNGroups How many top groups to keep.
+ * @param needsScores true if the collected documents
+ * require scores, either because relevance is included
+ * in the withinGroupSort or because you plan to pass true
+ * for either getSscores or getMaxScores to {@link
+ * #getTopGroups}
+ * @param lastDocPerGroup a {@link Filter} that marks the
+ * last document in each group.
+ */
+ public BlockGroupingCollector(Sort groupSort, int topNGroups, bool needsScores, Filter lastDocPerGroup)
+ {
+
+ if (topNGroups < 1)
+ {
+ throw new ArgumentException("topNGroups must be >= 1 (got " + topNGroups + ")");
+ }
+
+ groupQueue = new GroupQueue(this, topNGroups);
+ pendingSubDocs = new int[10];
+ if (needsScores)
+ {
+ pendingSubScores = new float[10];
+ }
+
+ this.needsScores = needsScores;
+ this.lastDocPerGroup = lastDocPerGroup;
+ // TODO: allow null groupSort to mean "by relevance",
+ // and specialize it?
+ this.groupSort = groupSort;
+
+ this.topNGroups = topNGroups;
+
+ SortField[] sortFields = groupSort.GetSort();
+ comparators = new FieldComparator[sortFields.Length];
+ compIDXEnd = comparators.Length - 1;
+ reversed = new int[sortFields.Length];
+ for (int i = 0; i < sortFields.Length; i++)
+ {
+ SortField sortField = sortFields[i];
+ comparators[i] = sortField.GetComparator(topNGroups, i);
+ reversed[i] = sortField.Reverse ? -1 : 1;
+ }
+ }
+
+ // TODO: maybe allow no sort on retrieving groups? app
+ // may want to simply process docs in the group itself?
+ // typically they will be presented as a "single" result
+ // in the UI?
+
+ /** Returns the grouped results. Returns null if the
+ * number of groups collected is <= groupOffset.
+ *
+ * <p><b>NOTE</b>: This collector is unable to compute
+ * the groupValue per group so it will always be null.
+ * This is normally not a problem, as you can obtain the
+ * value just like you obtain other values for each
+ * matching document (eg, via stored fields, via
+ * FieldCache, etc.)
+ *
+ * @param withinGroupSort The {@link Sort} used to sort
+ * documents within each group. Passing null is
+ * allowed, to sort by relevance.
+ * @param groupOffset Which group to start from
+ * @param withinGroupOffset Which document to start from
+ * within each group
+ * @param maxDocsPerGroup How many top documents to keep
+ * within each group.
+ * @param fillSortFields If true then the Comparable
+ * values for the sort fields will be set
+ */
+ public TopGroups<T> GetTopGroups<T>(Sort withinGroupSort, int groupOffset, int withinGroupOffset, int maxDocsPerGroup, bool fillSortFields)
+ {
+
+ //if (queueFull) {
+ //System.out.println("getTopGroups groupOffset=" + groupOffset + " topNGroups=" + topNGroups);
+ //}
+ if (subDocUpto != 0)
+ {
+ ProcessGroup();
+ }
+ if (groupOffset >= groupQueue.Size())
+ {
+ return null;
+ }
+ int totalGroupedHitCount = 0;
+
+ FakeScorer fakeScorer = new FakeScorer();
+
+ float maxScore = float.MinValue;
+
+ GroupDocs<T>[] groups = new GroupDocs<T>[groupQueue.Size() - groupOffset];
+ for (int downTo = groupQueue.Size() - groupOffset - 1; downTo >= 0; downTo--)
+ {
+ OneGroup og = groupQueue.Pop();
+
+ // At this point we hold all docs w/ in each group,
+ // unsorted; we now sort them:
+ ITopDocsCollector collector;
+ if (withinGroupSort == null)
+ {
+ // Sort by score
+ if (!needsScores)
+ {
+ throw new ArgumentException("cannot sort by relevance within group: needsScores=false");
+ }
+ collector = TopScoreDocCollector.Create(maxDocsPerGroup, true);
+ }
+ else
+ {
+ // Sort by fields
+ collector = TopFieldCollector.Create(withinGroupSort, maxDocsPerGroup, fillSortFields, needsScores, needsScores, true);
+ }
+
+ collector.Scorer = fakeScorer;
+ collector.NextReader = og.readerContext;
+ for (int docIDX = 0; docIDX < og.count; docIDX++)
+ {
+ int doc = og.docs[docIDX];
+ fakeScorer.doc = doc;
+ if (needsScores)
+ {
+ fakeScorer.score = og.scores[docIDX];
+ }
+ collector.Collect(doc);
+ }
+ totalGroupedHitCount += og.count;
+
+ object[] groupSortValues;
+
+ if (fillSortFields)
+ {
+ groupSortValues = new IComparable[comparators.Length];
+ for (int sortFieldIDX = 0; sortFieldIDX < comparators.Length; sortFieldIDX++)
+ {
+ groupSortValues[sortFieldIDX] = comparators[sortFieldIDX].Value(og.comparatorSlot);
+ }
+ }
+ else
+ {
+ groupSortValues = null;
+ }
+
+ TopDocs topDocs = collector.TopDocs(withinGroupOffset, maxDocsPerGroup);
+
+ // TODO: we could aggregate scores across children
+ // by Sum/Avg instead of passing NaN:
+ groups[downTo] = new GroupDocs<T>(float.NaN,
+ topDocs.MaxScore,
+ og.count,
+ topDocs.ScoreDocs,
+ default(T),
+ groupSortValues);
+ maxScore = Math.Max(maxScore, topDocs.MaxScore);
+ }
+
+ /*
+ while (groupQueue.size() != 0) {
+ final OneGroup og = groupQueue.pop();
+ //System.out.println(" leftover: og ord=" + og.groupOrd + " count=" + og.count);
+ totalGroupedHitCount += og.count;
+ }
+ */
+
+ return new TopGroups<T>(new TopGroups<T>(groupSort.GetSort(),
+ withinGroupSort == null ? null : withinGroupSort.GetSort(),
+ totalHitCount, totalGroupedHitCount, groups, maxScore),
+ totalGroupCount);
+ }
+
+ public override Scorer Scorer
+ {
+ set
+ {
+ this.scorer = value;
+ foreach (FieldComparator comparator in comparators)
+ {
+ comparator.Scorer = value;
+ }
+ }
+ }
+
+ public override void Collect(int doc)
+ {
+
+ // System.out.println("C " + doc);
+
+ if (doc > groupEndDocID)
+ {
+ // Group changed
+ if (subDocUpto != 0)
+ {
+ ProcessGroup();
+ }
+ groupEndDocID = lastDocPerGroupBits.Advance(doc);
+ //System.out.println(" adv " + groupEndDocID + " " + lastDocPerGroupBits);
+ subDocUpto = 0;
+ groupCompetes = !queueFull;
+ }
+
+ totalHitCount++;
+
+ // Always cache doc/score within this group:
+ if (subDocUpto == pendingSubDocs.Length)
+ {
+ pendingSubDocs = ArrayUtil.Grow(pendingSubDocs);
+ }
+ pendingSubDocs[subDocUpto] = doc;
+ if (needsScores)
+ {
+ if (subDocUpto == pendingSubScores.Length)
+ {
+ pendingSubScores = ArrayUtil.Grow(pendingSubScores);
+ }
+ pendingSubScores[subDocUpto] = scorer.Score();
+ }
+ subDocUpto++;
+
+ if (groupCompetes)
+ {
+ if (subDocUpto == 1)
+ {
+ Debug.Assert(!queueFull);
+
+ //System.out.println(" init copy to bottomSlot=" + bottomSlot);
+ foreach (FieldComparator fc in comparators)
+ {
+ fc.Copy(bottomSlot, doc);
+ fc.Bottom = bottomSlot;
+ }
+ topGroupDoc = doc;
+ }
+ else
+ {
+ // Compare to bottomSlot
+ for (int compIDX = 0; ; compIDX++)
+ {
+ int c = reversed[compIDX] * comparators[compIDX].CompareBottom(doc);
+ if (c < 0)
+ {
+ // Definitely not competitive -- done
+ return;
+ }
+ else if (c > 0)
+ {
+ // Definitely competitive.
+ break;
+ }
+ else if (compIDX == compIDXEnd)
+ {
+ // Ties with bottom, except we know this docID is
+ // > docID in the queue (docs are visited in
+ // order), so not competitive:
+ return;
+ }
+ }
+
+ //System.out.println(" best w/in group!");
+
+ foreach (FieldComparator fc in comparators)
+ {
+ fc.Copy(bottomSlot, doc);
+ // Necessary because some comparators cache
+ // details of bottom slot; this forces them to
+ // re-cache:
+ fc.Bottom = bottomSlot;
+ }
+ topGroupDoc = doc;
+ }
+ }
+ else
+ {
+ // We're not sure this group will make it into the
+ // queue yet
+ for (int compIDX = 0; ; compIDX++)
+ {
+ int c = reversed[compIDX] * comparators[compIDX].CompareBottom(doc);
+ if (c < 0)
+ {
+ // Definitely not competitive -- done
+ //System.out.println(" doc doesn't compete w/ top groups");
+ return;
+ }
+ else if (c > 0)
+ {
+ // Definitely competitive.
+ break;
+ }
+ else if (compIDX == compIDXEnd)
+ {
+ // Ties with bottom, except we know this docID is
+ // > docID in the queue (docs are visited in
+ // order), so not competitive:
+ //System.out.println(" doc doesn't compete w/ top groups");
+ return;
+ }
+ }
+ groupCompetes = true;
+ foreach (FieldComparator fc in comparators)
+ {
+ fc.Copy(bottomSlot, doc);
+ // Necessary because some comparators cache
+ // details of bottom slot; this forces them to
+ // re-cache:
+ fc.Bottom = bottomSlot;
+ }
+ topGroupDoc = doc;
+ //System.out.println(" doc competes w/ top groups");
+ }
+ }
+
+ public override bool AcceptsDocsOutOfOrder()
+ {
+ return false;
+ }
+
+ public override AtomicReaderContext NextReader
+ {
+ set
+ {
+ if (subDocUpto != 0)
+ {
+ ProcessGroup();
+ }
+ subDocUpto = 0;
+ docBase = value.DocBase;
+ //System.out.println("setNextReader base=" + docBase + " r=" + readerContext.reader);
+ lastDocPerGroupBits = lastDocPerGroup.GetDocIdSet(value, value.AtomicReader.LiveDocs).GetIterator();
+ groupEndDocID = -1;
+
+ currentReaderContext = value;
+ for (int i = 0; i < comparators.Length; i++)
+ {
+ comparators[i] = comparators[i].SetNextReader(value);
+ }
+
+ }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9d72bcb3/src/Lucene.Net.Grouping/CollectedSearchGroup.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Grouping/CollectedSearchGroup.cs b/src/Lucene.Net.Grouping/CollectedSearchGroup.cs
new file mode 100644
index 0000000..9ed6cb8
--- /dev/null
+++ b/src/Lucene.Net.Grouping/CollectedSearchGroup.cs
@@ -0,0 +1,31 @@
+\ufeffusing System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Lucene.Net.Search.Grouping
+{
+ /// <summary>
+ /// Expert: representation of a group in {@link AbstractFirstPassGroupingCollector},
+ /// tracking the top doc and {@link FieldComparator} slot.
+ /// @lucene.internal
+ /// </summary>
+ /// <typeparam name="TGroupValue"></typeparam>
+ public class CollectedSearchGroup<TGroupValue> : SearchGroup<TGroupValue>, ICollectedSearchGroup
+ {
+ public int TopDoc { get; internal set; }
+ public int ComparatorSlot { get; internal set; }
+ }
+
+
+ /// <summary>
+ /// LUCENENET specific interface for passing/comparing the CollectedSearchGroup
+ /// without referencing its generic type
+ /// </summary>
+ public interface ICollectedSearchGroup
+ {
+ int TopDoc { get; }
+ int ComparatorSlot { get; }
+ }
+}