You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by sy...@apache.org on 2015/08/24 00:34:03 UTC
[04/17] lucenenet git commit: Completed the implementation port of
the Join project
Completed the implementation port of the Join project
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/47f20b9a
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/47f20b9a
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/47f20b9a
Branch: refs/heads/master
Commit: 47f20b9ac6a5890936fb48887cfef20830d9c4d9
Parents: bd772f0
Author: Josh Sullivan <ja...@gmail.com>
Authored: Mon Aug 17 11:30:55 2015 -0400
Committer: Josh Sullivan <ja...@gmail.com>
Committed: Mon Aug 17 11:30:55 2015 -0400
----------------------------------------------------------------------
Lucene.Net.Grouping/GroupDocs.cs | 71 +++
Lucene.Net.Grouping/Lucene.Net.Grouping.csproj | 61 ++
Lucene.Net.Grouping/Properties/AssemblyInfo.cs | 36 ++
Lucene.Net.Grouping/TopGroups.cs | 249 +++++++++
Lucene.Net.Join/Lucene.Net.Join.csproj | 9 +
Lucene.Net.Join/ToChildBlockJoinQuery.cs | 396 +++++++++++++
Lucene.Net.Join/ToParentBlockJoinCollector.cs | 560 +++++++++++++++++++
.../ToParentBlockJoinFieldComparator.cs | 391 +++++++++++++
Lucene.Net.Join/ToParentBlockJoinQuery.cs | 516 +++++++++++++++++
.../Lucene.Net.Tests.Join.csproj | 64 +++
.../Properties/AssemblyInfo.cs | 36 ++
Lucene.Net.Tests.Join/TestBlockJoin.cs | 7 +
Lucene.Net.sln | 14 +
.../Search/FieldValueHitQueue.cs | 4 +-
14 files changed, 2412 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/47f20b9a/Lucene.Net.Grouping/GroupDocs.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Grouping/GroupDocs.cs b/Lucene.Net.Grouping/GroupDocs.cs
new file mode 100644
index 0000000..00cdf83
--- /dev/null
+++ b/Lucene.Net.Grouping/GroupDocs.cs
@@ -0,0 +1,71 @@
+using Lucene.Net.Search;
+
+namespace Lucene.Net.Grouping
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Represents one group in the results.
+ ///
+ /// @lucene.experimental
+ /// </summary>
+ public class GroupDocs<TGroupValueType>
+ {
+ /// <summary>
+ /// The groupField value for all docs in this group; this
+ /// may be null if hits did not have the groupField.
+ /// </summary>
+ public readonly TGroupValueType GroupValue;
+
+ /// <summary>
+ /// Max score in this group
+ /// </summary>
+ public readonly float MaxScore;
+
+ /// <summary>
+ /// Overall aggregated score of this group (currently only set by join queries).
+ /// </summary>
+ public readonly float Score;
+
+ /// <summary>
+ /// Hits; this may be {@link org.apache.lucene.search.FieldDoc} instances if the
+ /// withinGroupSort sorted by fields.
+ /// </summary>
+ public readonly ScoreDoc[] ScoreDocs;
+
+ /// <summary>
+ /// Total hits within this group
+ /// </summary>
+ public readonly int TotalHits;
+
+ /// <summary>
+ /// Matches the groupSort passed to {@link AbstractFirstPassGroupingCollector}.
+ /// </summary>
+ public readonly object[] GroupSortValues;
+
+ public GroupDocs(float score, float maxScore, int totalHits, ScoreDoc[] scoreDocs, TGroupValueType groupValue, object[] groupSortValues)
+ {
+ Score = score;
+ MaxScore = maxScore;
+ TotalHits = totalHits;
+ ScoreDocs = scoreDocs;
+ GroupValue = groupValue;
+ GroupSortValues = groupSortValues;
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/47f20b9a/Lucene.Net.Grouping/Lucene.Net.Grouping.csproj
----------------------------------------------------------------------
diff --git a/Lucene.Net.Grouping/Lucene.Net.Grouping.csproj b/Lucene.Net.Grouping/Lucene.Net.Grouping.csproj
new file mode 100644
index 0000000..540b438
--- /dev/null
+++ b/Lucene.Net.Grouping/Lucene.Net.Grouping.csproj
@@ -0,0 +1,61 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
+ <PropertyGroup>
+ <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+ <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+ <ProjectGuid>{02BAB603-067D-48B1-AEDD-316849652568}</ProjectGuid>
+ <OutputType>Library</OutputType>
+ <AppDesignerFolder>Properties</AppDesignerFolder>
+ <RootNamespace>Lucene.Net.Grouping</RootNamespace>
+ <AssemblyName>Lucene.Net.Grouping</AssemblyName>
+ <TargetFrameworkVersion>v4.5.1</TargetFrameworkVersion>
+ <FileAlignment>512</FileAlignment>
+ </PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
+ <DebugSymbols>true</DebugSymbols>
+ <DebugType>full</DebugType>
+ <Optimize>false</Optimize>
+ <OutputPath>bin\Debug\</OutputPath>
+ <DefineConstants>DEBUG;TRACE</DefineConstants>
+ <ErrorReport>prompt</ErrorReport>
+ <WarningLevel>4</WarningLevel>
+ </PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
+ <DebugType>pdbonly</DebugType>
+ <Optimize>true</Optimize>
+ <OutputPath>bin\Release\</OutputPath>
+ <DefineConstants>TRACE</DefineConstants>
+ <ErrorReport>prompt</ErrorReport>
+ <WarningLevel>4</WarningLevel>
+ </PropertyGroup>
+ <ItemGroup>
+ <Reference Include="System" />
+ <Reference Include="System.Core" />
+ <Reference Include="System.Xml.Linq" />
+ <Reference Include="System.Data.DataSetExtensions" />
+ <Reference Include="Microsoft.CSharp" />
+ <Reference Include="System.Data" />
+ <Reference Include="System.Net.Http" />
+ <Reference Include="System.Xml" />
+ </ItemGroup>
+ <ItemGroup>
+ <Compile Include="GroupDocs.cs" />
+ <Compile Include="Properties\AssemblyInfo.cs" />
+ <Compile Include="TopGroups.cs" />
+ </ItemGroup>
+ <ItemGroup>
+ <ProjectReference Include="..\src\Lucene.Net.Core\Lucene.Net.csproj">
+ <Project>{5D4AD9BE-1FFB-41AB-9943-25737971BF57}</Project>
+ <Name>Lucene.Net</Name>
+ </ProjectReference>
+ </ItemGroup>
+ <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
+ <!-- To modify your build process, add your task inside one of the targets below and uncomment it.
+ Other similar extension points exist, see Microsoft.Common.targets.
+ <Target Name="BeforeBuild">
+ </Target>
+ <Target Name="AfterBuild">
+ </Target>
+ -->
+</Project>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/47f20b9a/Lucene.Net.Grouping/Properties/AssemblyInfo.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Grouping/Properties/AssemblyInfo.cs b/Lucene.Net.Grouping/Properties/AssemblyInfo.cs
new file mode 100644
index 0000000..9e6c1ce
--- /dev/null
+++ b/Lucene.Net.Grouping/Properties/AssemblyInfo.cs
@@ -0,0 +1,36 @@
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+// General Information about an assembly is controlled through the following
+// set of attributes. Change these attribute values to modify the information
+// associated with an assembly.
+[assembly: AssemblyTitle("Lucene.Net.Grouping")]
+[assembly: AssemblyDescription("")]
+[assembly: AssemblyConfiguration("")]
+[assembly: AssemblyCompany("")]
+[assembly: AssemblyProduct("Lucene.Net.Grouping")]
+[assembly: AssemblyCopyright("Copyright © 2015")]
+[assembly: AssemblyTrademark("")]
+[assembly: AssemblyCulture("")]
+
+// Setting ComVisible to false makes the types in this assembly not visible
+// to COM components. If you need to access a type in this assembly from
+// COM, set the ComVisible attribute to true on that type.
+[assembly: ComVisible(false)]
+
+// The following GUID is for the ID of the typelib if this project is exposed to COM
+[assembly: Guid("02bab603-067d-48b1-aedd-316849652568")]
+
+// Version information for an assembly consists of the following four values:
+//
+// Major Version
+// Minor Version
+// Build Number
+// Revision
+//
+// You can specify all the values or you can default the Build and Revision Numbers
+// by using the '*' as shown below:
+// [assembly: AssemblyVersion("1.0.*")]
+[assembly: AssemblyVersion("1.0.0.0")]
+[assembly: AssemblyFileVersion("1.0.0.0")]
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/47f20b9a/Lucene.Net.Grouping/TopGroups.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Grouping/TopGroups.cs b/Lucene.Net.Grouping/TopGroups.cs
new file mode 100644
index 0000000..017c975
--- /dev/null
+++ b/Lucene.Net.Grouping/TopGroups.cs
@@ -0,0 +1,249 @@
+using System;
+using Lucene.Net.Search;
+
+namespace Lucene.Net.Grouping
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Represents result returned by a grouping search.
+ ///
+ /// @lucene.experimental
+ /// </summary>
+ public class TopGroups<TGroupValueType>
+ {
+ /// <summary>
+ /// Number of documents matching the search </summary>
+ public readonly int TotalHitCount;
+
+ /// <summary>
+ /// Number of documents grouped into the topN groups </summary>
+ public readonly int TotalGroupedHitCount;
+
+ /// <summary>
+ /// The total number of unique groups. If <code>null</code> this value is not computed. </summary>
+ public readonly int? TotalGroupCount;
+
+ /// <summary>
+ /// Group results in groupSort order </summary>
+ public readonly GroupDocs<TGroupValueType>[] Groups;
+
+ /// <summary>
+ /// How groups are sorted against each other </summary>
+ public readonly SortField[] GroupSort;
+
+ /// <summary>
+ /// How docs are sorted within each group </summary>
+ public readonly SortField[] WithinGroupSort;
+
+ /// <summary>
+ /// Highest score across all hits, or
+ /// <code>Float.NaN</code> if scores were not computed.
+ /// </summary>
+ public readonly float MaxScore;
+
+ public TopGroups(SortField[] groupSort, SortField[] withinGroupSort, int totalHitCount, int totalGroupedHitCount, GroupDocs<TGroupValueType>[] groups, float maxScore)
+ {
+ GroupSort = groupSort;
+ WithinGroupSort = withinGroupSort;
+ TotalHitCount = totalHitCount;
+ TotalGroupedHitCount = totalGroupedHitCount;
+ Groups = groups;
+ TotalGroupCount = null;
+ MaxScore = maxScore;
+ }
+
+ public TopGroups(TopGroups<TGroupValueType> oldTopGroups, int? totalGroupCount)
+ {
+ GroupSort = oldTopGroups.GroupSort;
+ WithinGroupSort = oldTopGroups.WithinGroupSort;
+ TotalHitCount = oldTopGroups.TotalHitCount;
+ TotalGroupedHitCount = oldTopGroups.TotalGroupedHitCount;
+ Groups = oldTopGroups.Groups;
+ MaxScore = oldTopGroups.MaxScore;
+ TotalGroupCount = totalGroupCount;
+ }
+
+ /// <summary>
+ /// How the GroupDocs score (if any) should be merged. </summary>
+ public enum ScoreMergeMode
+ {
+ /// <summary>
+ /// Set score to Float.NaN
+ /// </summary>
+ None,
+
+ /// <summary>
+ /// Sum score across all shards for this group.
+ /// </summary>
+ Total,
+
+ /// <summary>
+ /// Avg score across all shards for this group.
+ /// </summary>
+ Avg,
+ }
+
+ /// <summary>
+ /// Merges an array of TopGroups, for example obtained from the second-pass
+ /// collector across multiple shards. Each TopGroups must have been sorted by the
+ /// same groupSort and docSort, and the top groups passed to all second-pass
+ /// collectors must be the same.
+ ///
+ /// <b>NOTE</b>: We can't always compute an exact totalGroupCount.
+ /// Documents belonging to a group may occur on more than
+ /// one shard and thus the merged totalGroupCount can be
+ /// higher than the actual totalGroupCount. In this case the
+ /// totalGroupCount represents a upper bound. If the documents
+ /// of one group do only reside in one shard then the
+ /// totalGroupCount is exact.
+ ///
+ /// <b>NOTE</b>: the topDocs in each GroupDocs is actually
+ /// an instance of TopDocsAndShards
+ /// </summary>
+ public static TopGroups<T> Merge<T>(TopGroups<T>[] shardGroups, Sort groupSort, Sort docSort, int docOffset, int docTopN, ScoreMergeMode scoreMergeMode)
+ {
+ //System.out.println("TopGroups.merge");
+
+ if (shardGroups.Length == 0)
+ {
+ return null;
+ }
+
+ int totalHitCount = 0;
+ int totalGroupedHitCount = 0;
+ // Optionally merge the totalGroupCount.
+ int? totalGroupCount = null;
+
+ int numGroups = shardGroups[0].Groups.Length;
+ foreach (var shard in shardGroups)
+ {
+ if (numGroups != shard.Groups.Length)
+ {
+ throw new ArgumentException("number of groups differs across shards; you must pass same top groups to all shards' second-pass collector");
+ }
+ totalHitCount += shard.TotalHitCount;
+ totalGroupedHitCount += shard.TotalGroupedHitCount;
+ if (shard.TotalGroupCount != null)
+ {
+ if (totalGroupCount == null)
+ {
+ totalGroupCount = 0;
+ }
+
+ totalGroupCount += shard.TotalGroupCount;
+ }
+ }
+
+ var mergedGroupDocs = new GroupDocs<T>[numGroups];
+
+ TopDocs[] shardTopDocs = new TopDocs[shardGroups.Length];
+ float totalMaxScore = float.MinValue;
+
+ for (int groupIDX = 0; groupIDX < numGroups; groupIDX++)
+ {
+ T groupValue = shardGroups[0].Groups[groupIDX].GroupValue;
+ //System.out.println(" merge groupValue=" + groupValue + " sortValues=" + Arrays.toString(shardGroups[0].groups[groupIDX].groupSortValues));
+ float maxScore = float.MinValue;
+ int totalHits = 0;
+ double scoreSum = 0.0;
+ for (int shardIdx = 0; shardIdx < shardGroups.Length; shardIdx++)
+ {
+ //System.out.println(" shard=" + shardIDX);
+ TopGroups<T> shard = shardGroups[shardIdx];
+ var shardGroupDocs = shard.Groups[groupIDX];
+ if (groupValue == null)
+ {
+ if (shardGroupDocs.GroupValue != null)
+ {
+ throw new ArgumentException("group values differ across shards; you must pass same top groups to all shards' second-pass collector");
+ }
+ }
+ else if (!groupValue.Equals(shardGroupDocs.GroupValue))
+ {
+ throw new ArgumentException("group values differ across shards; you must pass same top groups to all shards' second-pass collector");
+ }
+
+ /*
+ for(ScoreDoc sd : shardGroupDocs.scoreDocs) {
+ System.out.println(" doc=" + sd.doc);
+ }
+ */
+
+ shardTopDocs[shardIdx] = new TopDocs(shardGroupDocs.TotalHits, shardGroupDocs.ScoreDocs, shardGroupDocs.MaxScore);
+ maxScore = Math.Max(maxScore, shardGroupDocs.MaxScore);
+ totalHits += shardGroupDocs.TotalHits;
+ scoreSum += shardGroupDocs.Score;
+ }
+
+ TopDocs mergedTopDocs = TopDocs.Merge(docSort, docOffset + docTopN, shardTopDocs);
+
+ // Slice;
+ ScoreDoc[] mergedScoreDocs;
+ if (docOffset == 0)
+ {
+ mergedScoreDocs = mergedTopDocs.ScoreDocs;
+ }
+ else if (docOffset >= mergedTopDocs.ScoreDocs.Length)
+ {
+ mergedScoreDocs = new ScoreDoc[0];
+ }
+ else
+ {
+ mergedScoreDocs = new ScoreDoc[mergedTopDocs.ScoreDocs.Length - docOffset];
+ Array.Copy(mergedTopDocs.ScoreDocs, docOffset, mergedScoreDocs, 0, mergedTopDocs.ScoreDocs.Length - docOffset);
+ }
+
+ float groupScore;
+ switch (scoreMergeMode)
+ {
+ case ScoreMergeMode.None:
+ groupScore = float.NaN;
+ break;
+ case ScoreMergeMode.Avg:
+ if (totalHits > 0)
+ {
+ groupScore = (float)(scoreSum / totalHits);
+ }
+ else
+ {
+ groupScore = float.NaN;
+ }
+ break;
+ case ScoreMergeMode.Total:
+ groupScore = (float)scoreSum;
+ break;
+ default:
+ throw new ArgumentException("can't handle ScoreMergeMode " + scoreMergeMode);
+ }
+
+ //System.out.println("SHARDS=" + Arrays.toString(mergedTopDocs.shardIndex));
+ mergedGroupDocs[groupIDX] = new GroupDocs<T>(groupScore, maxScore, totalHits, mergedScoreDocs, groupValue, shardGroups[0].Groups[groupIDX].GroupSortValues);
+ totalMaxScore = Math.Max(totalMaxScore, maxScore);
+ }
+
+ if (totalGroupCount != null)
+ {
+ var result = new TopGroups<T>(groupSort.GetSort(), docSort == null ? null : docSort.GetSort(), totalHitCount, totalGroupedHitCount, mergedGroupDocs, totalMaxScore);
+ return new TopGroups<T>(result, totalGroupCount);
+ }
+
+ return new TopGroups<T>(groupSort.GetSort(), docSort == null ? null : docSort.GetSort(), totalHitCount, totalGroupedHitCount, mergedGroupDocs, totalMaxScore);
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/47f20b9a/Lucene.Net.Join/Lucene.Net.Join.csproj
----------------------------------------------------------------------
diff --git a/Lucene.Net.Join/Lucene.Net.Join.csproj b/Lucene.Net.Join/Lucene.Net.Join.csproj
index b38303e..30e64c6 100644
--- a/Lucene.Net.Join/Lucene.Net.Join.csproj
+++ b/Lucene.Net.Join/Lucene.Net.Join.csproj
@@ -48,8 +48,17 @@
<Compile Include="TermsCollector.cs" />
<Compile Include="TermsIncludingScoreQuery.cs" />
<Compile Include="TermsQuery.cs" />
+ <Compile Include="TermsWithScoreCollector.cs" />
+ <Compile Include="ToChildBlockJoinQuery.cs" />
+ <Compile Include="ToParentBlockJoinCollector.cs" />
+ <Compile Include="ToParentBlockJoinFieldComparator.cs" />
+ <Compile Include="ToParentBlockJoinQuery.cs" />
</ItemGroup>
<ItemGroup>
+ <ProjectReference Include="..\Lucene.Net.Grouping\Lucene.Net.Grouping.csproj">
+ <Project>{02BAB603-067D-48B1-AEDD-316849652568}</Project>
+ <Name>Lucene.Net.Grouping</Name>
+ </ProjectReference>
<ProjectReference Include="..\src\Lucene.Net.Core\Lucene.Net.csproj">
<Project>{5D4AD9BE-1FFB-41AB-9943-25737971BF57}</Project>
<Name>Lucene.Net</Name>
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/47f20b9a/Lucene.Net.Join/ToChildBlockJoinQuery.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Join/ToChildBlockJoinQuery.cs b/Lucene.Net.Join/ToChildBlockJoinQuery.cs
new file mode 100644
index 0000000..035301e
--- /dev/null
+++ b/Lucene.Net.Join/ToChildBlockJoinQuery.cs
@@ -0,0 +1,396 @@
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Join
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Just like <see cref="ToParentBlockJoinQuery"/>, except this
+ /// query joins in reverse: you provide a Query matching
+ /// parent documents and it joins down to child
+ /// documents.
+ ///
+ /// @lucene.experimental
+ /// </summary>
+ public class ToChildBlockJoinQuery : Query
+ {
+ /// <summary>
+ /// Message thrown from <see cref="ToChildBlockJoinScorer.ValidateParentDoc"/>
+ /// on mis-use, when the parent query incorrectly returns child docs.
+ /// </summary>
+ internal const string InvalidQueryMessage = "Parent query yields document which is not matched by parents filter, docID=";
+
+ private readonly Filter _parentsFilter;
+ private readonly Query _parentQuery;
+
+ // If we are rewritten, this is the original parentQuery we
+ // were passed; we use this for .equals() and
+ // .hashCode(). This makes rewritten query equal the
+ // original, so that user does not have to .rewrite() their
+ // query before searching:
+ private readonly Query _origParentQuery;
+ private readonly bool _doScores;
+
+ /// <summary>
+ /// Create a ToChildBlockJoinQuery.
+ /// </summary>
+ /// <param name="parentQuery">Query that matches parent documents</param>
+ /// <param name="parentsFilter">Filter (must produce FixedBitSet per-segment, like <see cref="FixedBitSetCachingWrapperFilter"/>)
+ /// identifying the parent documents.</param>
+ /// <param name="doScores">True if parent scores should be calculated.</param>
+ public ToChildBlockJoinQuery(Query parentQuery, Filter parentsFilter, bool doScores)
+ {
+ _origParentQuery = parentQuery;
+ _parentQuery = parentQuery;
+ _parentsFilter = parentsFilter;
+ _doScores = doScores;
+ }
+
+ private ToChildBlockJoinQuery(Query origParentQuery, Query parentQuery, Filter parentsFilter, bool doScores) : base()
+ {
+ _origParentQuery = origParentQuery;
+ _parentQuery = parentQuery;
+ _parentsFilter = parentsFilter;
+ _doScores = doScores;
+ }
+
+ public override Weight CreateWeight(IndexSearcher searcher)
+ {
+ return new ToChildBlockJoinWeight(this, _parentQuery.CreateWeight(searcher), _parentsFilter, _doScores);
+ }
+
+ private class ToChildBlockJoinWeight : Weight
+ {
+ private readonly Query _joinQuery;
+ private readonly Weight _parentWeight;
+ private readonly Filter _parentsFilter;
+ private readonly bool _doScores;
+
+ public ToChildBlockJoinWeight(Query joinQuery, Weight parentWeight, Filter parentsFilter, bool doScores) : base()
+ {
+ _joinQuery = joinQuery;
+ _parentWeight = parentWeight;
+ _parentsFilter = parentsFilter;
+ _doScores = doScores;
+ }
+
+ public override Query Query
+ {
+ get { return _joinQuery; }
+ }
+
+ public override float ValueForNormalization
+ {
+ get { return _parentWeight.ValueForNormalization*_joinQuery.Boost*_joinQuery.Boost; }
+ }
+
+ public override void Normalize(float norm, float topLevelBoost)
+ {
+ _parentWeight.Normalize(norm, topLevelBoost * _joinQuery.Boost);
+ }
+
+ // NOTE: acceptDocs applies (and is checked) only in the child document space
+ public override Scorer Scorer(AtomicReaderContext readerContext, Bits acceptDocs)
+ {
+ Scorer parentScorer = _parentWeight.Scorer(readerContext, null);
+
+ if (parentScorer == null)
+ {
+ // No matches
+ return null;
+ }
+
+ // NOTE: we cannot pass acceptDocs here because this
+ // will (most likely, justifiably) cause the filter to
+ // not return a FixedBitSet but rather a
+ // BitsFilteredDocIdSet. Instead, we filter by
+ // acceptDocs when we score:
+ DocIdSet parents = _parentsFilter.GetDocIdSet(readerContext, null);
+
+ if (parents == null)
+ {
+ // No matches
+ return null;
+ }
+ if (!(parents is FixedBitSet))
+ {
+ throw new InvalidOperationException("parentFilter must return FixedBitSet; got " + parents);
+ }
+
+ return new ToChildBlockJoinScorer(this, parentScorer, (FixedBitSet)parents, _doScores, acceptDocs);
+ }
+
+ public override Explanation Explain(AtomicReaderContext reader, int doc)
+ {
+ // TODO
+ throw new NotSupportedException(GetType().Name + " cannot explain match on parent document");
+ }
+
+ public override bool ScoresDocsOutOfOrder()
+ {
+ return false;
+ }
+ }
+
+ private sealed class ToChildBlockJoinScorer : Scorer
+ {
+ private readonly Scorer _parentScorer;
+ private readonly FixedBitSet _parentBits;
+ private readonly bool _doScores;
+ private readonly Bits _acceptDocs;
+
+ private float _parentScore;
+ private int _parentFreq = 1;
+
+ private int _childDoc = -1;
+ private int _parentDoc;
+
+ public ToChildBlockJoinScorer(Weight weight, Scorer parentScorer, FixedBitSet parentBits, bool doScores, Bits acceptDocs) : base(weight)
+ {
+ _doScores = doScores;
+ _parentBits = parentBits;
+ _parentScorer = parentScorer;
+ _acceptDocs = acceptDocs;
+ }
+
+ public override ICollection<ChildScorer> Children
+ {
+ get { return Collections.Singleton(new ChildScorer(_parentScorer, "BLOCK_JOIN")); }
+ }
+
+ public override int NextDoc()
+ {
+ //System.out.println("Q.nextDoc() parentDoc=" + parentDoc + " childDoc=" + childDoc);
+
+ // Loop until we hit a childDoc that's accepted
+ while (true)
+ {
+ if (_childDoc + 1 == _parentDoc)
+ {
+ // OK, we are done iterating through all children
+ // matching this one parent doc, so we now nextDoc()
+ // the parent. Use a while loop because we may have
+ // to skip over some number of parents w/ no
+ // children:
+ while (true)
+ {
+ _parentDoc = _parentScorer.NextDoc();
+ ValidateParentDoc();
+
+ if (_parentDoc == 0)
+ {
+ // Degenerate but allowed: first parent doc has no children
+ // TODO: would be nice to pull initial parent
+ // into ctor so we can skip this if... but it's
+ // tricky because scorer must return -1 for
+ // .doc() on init...
+ _parentDoc = _parentScorer.NextDoc();
+ ValidateParentDoc();
+ }
+
+ if (_parentDoc == NO_MORE_DOCS)
+ {
+ _childDoc = NO_MORE_DOCS;
+ //System.out.println(" END");
+ return _childDoc;
+ }
+
+ // Go to first child for this next parentDoc:
+ _childDoc = 1 + _parentBits.PrevSetBit(_parentDoc - 1);
+
+ if (_childDoc == _parentDoc)
+ {
+ // This parent has no children; continue
+ // parent loop so we move to next parent
+ continue;
+ }
+
+ if (_acceptDocs != null && !_acceptDocs.Get(_childDoc))
+ {
+ goto nextChildDocContinue;
+ }
+
+ if (_childDoc < _parentDoc)
+ {
+ if (_doScores)
+ {
+ _parentScore = _parentScorer.Score();
+ _parentFreq = _parentScorer.Freq();
+ }
+ //System.out.println(" " + childDoc);
+ return _childDoc;
+ }
+ else
+ {
+ // Degenerate but allowed: parent has no children
+ }
+ }
+ }
+
+ Debug.Assert(_childDoc < _parentDoc, "childDoc=" + _childDoc + " parentDoc=" + _parentDoc);
+ _childDoc++;
+ if (_acceptDocs != null && !_acceptDocs.Get(_childDoc))
+ {
+ continue;
+ }
+ //System.out.println(" " + childDoc);
+ return _childDoc;
+ nextChildDocContinue:;
+ }
+ }
+
+ /// <summary>
+ /// Detect mis-use, where provided parent query in fact sometimes returns child documents.
+ /// </summary>
+ private void ValidateParentDoc()
+ {
+ if (_parentDoc != NO_MORE_DOCS && !_parentBits.Get(_parentDoc))
+ {
+ throw new InvalidOperationException(InvalidQueryMessage + _parentDoc);
+ }
+ }
+
+ public override int DocID()
+ {
+ return _childDoc;
+ }
+
+ public override float Score()
+ {
+ return _parentScore;
+ }
+
+ public override int Freq()
+ {
+ return _parentFreq;
+ }
+
+ public override int Advance(int childTarget)
+ {
+ Debug.Assert(childTarget >= _parentBits.Length() || !_parentBits.Get(childTarget));
+
+ //System.out.println("Q.advance childTarget=" + childTarget);
+ if (childTarget == NO_MORE_DOCS)
+ {
+ //System.out.println(" END");
+ return _childDoc = _parentDoc = NO_MORE_DOCS;
+ }
+
+ Debug.Assert(_childDoc == -1 || childTarget != _parentDoc, "childTarget=" + childTarget);
+ if (_childDoc == -1 || childTarget > _parentDoc)
+ {
+ // Advance to new parent:
+ _parentDoc = _parentScorer.Advance(childTarget);
+ ValidateParentDoc();
+ //System.out.println(" advance to parentDoc=" + parentDoc);
+ Debug.Assert(_parentDoc > childTarget);
+ if (_parentDoc == NO_MORE_DOCS)
+ {
+ //System.out.println(" END");
+ return _childDoc = NO_MORE_DOCS;
+ }
+ if (_doScores)
+ {
+ _parentScore = _parentScorer.Score();
+ _parentFreq = _parentScorer.Freq();
+ }
+ int firstChild = _parentBits.PrevSetBit(_parentDoc - 1);
+ //System.out.println(" firstChild=" + firstChild);
+ childTarget = Math.Max(childTarget, firstChild);
+ }
+
+ Debug.Assert(childTarget < _parentDoc);
+
+ // Advance within children of current parent:
+ _childDoc = childTarget;
+ //System.out.println(" " + childDoc);
+ if (_acceptDocs != null && !_acceptDocs.Get(_childDoc))
+ {
+ NextDoc();
+ }
+ return _childDoc;
+ }
+
+ public override long Cost()
+ {
+ return _parentScorer.Cost();
+ }
+ }
+
+ public override void ExtractTerms(ISet<Term> terms)
+ {
+ _parentQuery.ExtractTerms(terms);
+ }
+
+ public override Query Rewrite(IndexReader reader)
+ {
+ Query parentRewrite = _parentQuery.Rewrite(reader);
+ if (parentRewrite != _parentQuery)
+ {
+ Query rewritten = new ToChildBlockJoinQuery(_parentQuery, parentRewrite, _parentsFilter, _doScores);
+ rewritten.Boost = Boost;
+ return rewritten;
+ }
+
+ return this;
+ }
+
+ public override string ToString(string field)
+ {
+ return "ToChildBlockJoinQuery (" + _parentQuery + ")";
+ }
+
+ protected bool Equals(ToChildBlockJoinQuery other)
+ {
+ return base.Equals(other) &&
+ Equals(_origParentQuery, other._origParentQuery) &&
+ _doScores == other._doScores &&
+ Equals(_parentsFilter, other._parentsFilter);
+ }
+
+ public override bool Equals(object obj)
+ {
+ if (ReferenceEquals(null, obj)) return false;
+ if (ReferenceEquals(this, obj)) return true;
+ if (obj.GetType() != GetType()) return false;
+ return Equals((ToChildBlockJoinQuery) obj);
+ }
+
+ public override int GetHashCode()
+ {
+ unchecked
+ {
+ int hashCode = base.GetHashCode();
+ hashCode = (hashCode*397) ^ (_origParentQuery != null ? _origParentQuery.GetHashCode() : 0);
+ hashCode = (hashCode*397) ^ _doScores.GetHashCode();
+ hashCode = (hashCode*397) ^ (_parentsFilter != null ? _parentsFilter.GetHashCode() : 0);
+ return hashCode;
+ }
+ }
+
+ public override object Clone()
+ {
+ return new ToChildBlockJoinQuery((ToChildBlockJoinQuery) _origParentQuery.Clone(), _parentsFilter, _doScores);
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/47f20b9a/Lucene.Net.Join/ToParentBlockJoinCollector.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Join/ToParentBlockJoinCollector.cs b/Lucene.Net.Join/ToParentBlockJoinCollector.cs
new file mode 100644
index 0000000..22fa53e
--- /dev/null
+++ b/Lucene.Net.Join/ToParentBlockJoinCollector.cs
@@ -0,0 +1,560 @@
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.IO;
+using Lucene.Net.Grouping;
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Join
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Collects parent document hits for a Query containing one more more
+ /// BlockJoinQuery clauses, sorted by the
+ /// specified parent Sort. Note that this cannot perform
+ /// arbitrary joins; rather, it requires that all joined
+ /// documents are indexed as a doc block (using {@link
+ /// IndexWriter#addDocuments} or {@link
+ /// IndexWriter#updateDocuments}). Ie, the join is computed
+ /// at index time.
+ ///
+ /// <p>The parent Sort must only use
+ /// fields from the parent documents; sorting by field in
+ /// the child documents is not supported.</p>
+ ///
+ /// <p>You should only use this
+ /// collector if one or more of the clauses in the query is
+ /// a <seealso cref="ToParentBlockJoinQuery"/>. This collector will find those query
+ /// clauses and record the matching child documents for the
+ /// top scoring parent documents.</p>
+ ///
+ /// <p>Multiple joins (star join) and nested joins and a mix
+ /// of the two are allowed, as long as in all cases the
+ /// documents corresponding to a single row of each joined
+ /// parent table were indexed as a doc block.</p>
+ ///
+ /// <p>For the simple star join you can retrieve the
+ /// <seealso cref="TopGroups"/> instance containing each <seealso cref="ToParentBlockJoinQuery"/>'s
+ /// matching child documents for the top parent groups,
+ /// using <seealso cref="#getTopGroups"/>. Ie,
+ /// a single query, which will contain two or more
+ /// <seealso cref="ToParentBlockJoinQuery"/>'s as clauses representing the star join,
+ /// can then retrieve two or more <seealso cref="TopGroups"/> instances.</p>
+ ///
+ /// <p>For nested joins, the query will run correctly (ie,
+ /// match the right parent and child documents), however,
+ /// because TopGroups is currently unable to support nesting
+ /// (each group is not able to hold another TopGroups), you
+ /// are only able to retrieve the TopGroups of the first
+ /// join. The TopGroups of the nested joins will not be
+ /// correct.
+ ///
+ /// See <seealso cref="org.apache.lucene.search.join"/> for a code
+ /// sample.
+ ///
+ /// @lucene.experimental
+ /// </summary>
+ public class ToParentBlockJoinCollector : Collector
+ {
+ private readonly Sort sort;
+
+ // Maps each BlockJoinQuery instance to its "slot" in
+ // joinScorers and in OneGroup's cached doc/scores/count:
+ private readonly IDictionary<Query, int?> joinQueryID = new Dictionary<Query, int?>();
+ private readonly int numParentHits;
+ private readonly FieldValueHitQueue<OneGroup> queue;
+ private readonly FieldComparator[] comparators;
+ private readonly int[] reverseMul;
+ private readonly int compEnd;
+ private readonly bool trackMaxScore;
+ private readonly bool trackScores;
+
+ private int docBase;
+ private ToParentBlockJoinQuery.BlockJoinScorer[] joinScorers = new ToParentBlockJoinQuery.BlockJoinScorer[0];
+ private AtomicReaderContext currentReaderContext;
+ private Scorer scorer;
+ private bool queueFull;
+
+ private OneGroup bottom;
+ private int totalHitCount;
+ private float maxScore = float.NaN;
+
+ /// <summary>
+ /// Creates a ToParentBlockJoinCollector. The provided sort must
+ /// not be null. If you pass true trackScores, all
+ /// ToParentBlockQuery instances must not use
+ /// ScoreMode.None.
+ /// </summary>
+ public ToParentBlockJoinCollector(Sort sort, int numParentHits, bool trackScores, bool trackMaxScore)
+ {
+ // TODO: allow null sort to be specialized to relevance
+ // only collector
+ this.sort = sort;
+ this.trackMaxScore = trackMaxScore;
+ if (trackMaxScore)
+ {
+ maxScore = float.MinValue;
+ }
+ //System.out.println("numParentHits=" + numParentHits);
+ this.trackScores = trackScores;
+ this.numParentHits = numParentHits;
+ queue = FieldValueHitQueue.Create<OneGroup>(sort.GetSort(), numParentHits);
+ comparators = queue.Comparators;
+ reverseMul = queue.ReverseMul;
+ compEnd = comparators.Length - 1;
+ }
+
+ private sealed class OneGroup : FieldValueHitQueue.Entry
+ {
+ public OneGroup(int comparatorSlot, int parentDoc, float parentScore, int numJoins, bool doScores)
+ : base(comparatorSlot, parentDoc, parentScore)
+ {
+ //System.out.println("make OneGroup parentDoc=" + parentDoc);
+ docs = new int[numJoins][];
+ for (int joinId = 0; joinId < numJoins; joinId++)
+ {
+ docs[joinId] = new int[5];
+ }
+ if (doScores)
+ {
+ scores = new float[numJoins][];
+ for (int joinId = 0; joinId < numJoins; joinId++)
+ {
+ scores[joinId] = new float[5];
+ }
+ }
+ counts = new int[numJoins];
+ }
+ internal AtomicReaderContext readerContext;
+ internal int[][] docs;
+ internal float[][] scores;
+ internal int[] counts;
+ }
+
+ public override void Collect(int parentDoc)
+ {
+ //System.out.println("\nC parentDoc=" + parentDoc);
+ totalHitCount++;
+
+ float score = float.NaN;
+
+ if (trackMaxScore)
+ {
+ score = scorer.Score();
+ maxScore = Math.Max(maxScore, score);
+ }
+
+ // TODO: we could sweep all joinScorers here and
+ // aggregate total child hit count, so we can fill this
+ // in getTopGroups (we wire it to 0 now)
+
+ if (queueFull)
+ {
+ //System.out.println(" queueFull");
+ // Fastmatch: return if this hit is not competitive
+ for (int i = 0; ; i++)
+ {
+ int c = reverseMul[i] * comparators[i].CompareBottom(parentDoc);
+ if (c < 0)
+ {
+ // Definitely not competitive.
+ //System.out.println(" skip");
+ return;
+ }
+ if (c > 0)
+ {
+ // Definitely competitive.
+ break;
+ }
+ if (i == compEnd)
+ {
+ // Here c=0. If we're at the last comparator, this doc is not
+ // competitive, since docs are visited in doc Id order, which means
+ // this doc cannot compete with any other document in the queue.
+ //System.out.println(" skip");
+ return;
+ }
+ }
+
+ //System.out.println(" competes! doc=" + (docBase + parentDoc));
+
+ // This hit is competitive - replace bottom element in queue & adjustTop
+ for (int i = 0; i < comparators.Length; i++)
+ {
+ comparators[i].Copy(bottom.Slot, parentDoc);
+ }
+ if (!trackMaxScore && trackScores)
+ {
+ score = scorer.Score();
+ }
+ bottom.Doc = docBase + parentDoc;
+ bottom.readerContext = currentReaderContext;
+ bottom.Score = score;
+ CopyGroups(bottom);
+ bottom = queue.UpdateTop();
+
+ for (int i = 0; i < comparators.Length; i++)
+ {
+ comparators[i].Bottom = bottom.Slot;
+ }
+ }
+ else
+ {
+ // Startup transient: queue is not yet full:
+ int comparatorSlot = totalHitCount - 1;
+
+ // Copy hit into queue
+ for (int i = 0; i < comparators.Length; i++)
+ {
+ comparators[i].Copy(comparatorSlot, parentDoc);
+ }
+ //System.out.println(" startup: new OG doc=" + (docBase+parentDoc));
+ if (!trackMaxScore && trackScores)
+ {
+ score = scorer.Score();
+ }
+ OneGroup og = new OneGroup(comparatorSlot, docBase + parentDoc, score, joinScorers.Length, trackScores);
+ og.readerContext = currentReaderContext;
+ CopyGroups(og);
+ bottom = queue.Add(og);
+ queueFull = totalHitCount == numParentHits;
+ if (queueFull)
+ {
+ // End of startup transient: queue just filled up:
+ for (int i = 0; i < comparators.Length; i++)
+ {
+ comparators[i].Bottom = bottom.Slot;
+ }
+ }
+ }
+ }
+
+ // Pulls out child doc and scores for all join queries:
+ private void CopyGroups(OneGroup og)
+ {
+ // While rare, it's possible top arrays could be too
+ // short if join query had null scorer on first
+ // segment(s) but then became non-null on later segments
+ int numSubScorers = joinScorers.Length;
+ if (og.docs.Length < numSubScorers)
+ {
+ // While rare, this could happen if join query had
+ // null scorer on first segment(s) but then became
+ // non-null on later segments
+ og.docs = ArrayUtil.Grow(og.docs);
+ }
+ if (og.counts.Length < numSubScorers)
+ {
+ og.counts = ArrayUtil.Grow(og.counts);
+ }
+ if (trackScores && og.scores.Length < numSubScorers)
+ {
+ og.scores = ArrayUtil.Grow(og.scores);
+ }
+
+ //System.out.println("\ncopyGroups parentDoc=" + og.doc);
+ for (int scorerIDX = 0; scorerIDX < numSubScorers; scorerIDX++)
+ {
+ ToParentBlockJoinQuery.BlockJoinScorer joinScorer = joinScorers[scorerIDX];
+ //System.out.println(" scorer=" + joinScorer);
+ if (joinScorer != null && docBase + joinScorer.ParentDoc == og.Doc)
+ {
+ og.counts[scorerIDX] = joinScorer.ChildCount;
+ //System.out.println(" count=" + og.counts[scorerIDX]);
+ og.docs[scorerIDX] = joinScorer.SwapChildDocs(og.docs[scorerIDX]);
+ Debug.Assert(og.docs[scorerIDX].Length >= og.counts[scorerIDX], "length=" + og.docs[scorerIDX].Length + " vs count=" + og.counts[scorerIDX]);
+ //System.out.println(" len=" + og.docs[scorerIDX].length);
+ /*
+ for(int idx=0;idx<og.counts[scorerIDX];idx++) {
+ System.out.println(" docs[" + idx + "]=" + og.docs[scorerIDX][idx]);
+ }
+ */
+ if (trackScores)
+ {
+ //System.out.println(" copy scores");
+ og.scores[scorerIDX] = joinScorer.SwapChildScores(og.scores[scorerIDX]);
+ Debug.Assert(og.scores[scorerIDX].Length >= og.counts[scorerIDX], "length=" + og.scores[scorerIDX].Length + " vs count=" + og.counts[scorerIDX]);
+ }
+ }
+ else
+ {
+ og.counts[scorerIDX] = 0;
+ }
+ }
+ }
+
+ public override AtomicReaderContext NextReader
+ {
+ set
+ {
+ currentReaderContext = value;
+ docBase = value.DocBase;
+ for (int compIDX = 0; compIDX < comparators.Length; compIDX++)
+ {
+ queue.SetComparator(compIDX, comparators[compIDX].SetNextReader(value));
+ }
+ }
+ }
+
+ public override bool AcceptsDocsOutOfOrder()
+ {
+ return false;
+ }
+
+ private void enroll(ToParentBlockJoinQuery query, ToParentBlockJoinQuery.BlockJoinScorer scorer)
+ {
+ scorer.TrackPendingChildHits();
+ int? slot = joinQueryID[query];
+ if (slot == null)
+ {
+ joinQueryID[query] = joinScorers.Length;
+ //System.out.println("found JQ: " + query + " slot=" + joinScorers.length);
+ ToParentBlockJoinQuery.BlockJoinScorer[] newArray = new ToParentBlockJoinQuery.BlockJoinScorer[1 + joinScorers.Length];
+ Array.Copy(joinScorers, 0, newArray, 0, joinScorers.Length);
+ joinScorers = newArray;
+ joinScorers[joinScorers.Length - 1] = scorer;
+ }
+ else
+ {
+ joinScorers[(int) slot] = scorer;
+ }
+ }
+
+ public override Scorer Scorer
+ {
+ set
+ {
+ //System.out.println("C.setScorer scorer=" + value);
+ // Since we invoke .score(), and the comparators likely
+ // do as well, cache it so it's only "really" computed
+ // once:
+ scorer = new ScoreCachingWrappingScorer(value);
+ for (int compIdx = 0; compIdx < comparators.Length; compIdx++)
+ {
+ comparators[compIdx].Scorer = scorer;
+ }
+ Arrays.Fill(joinScorers, null);
+
+ var queue = new Queue<Scorer>();
+ //System.out.println("\nqueue: add top scorer=" + value);
+ queue.Enqueue(value);
+ while ((queue.Count > 0 && (queue.Dequeue()) != null))
+ {
+ //System.out.println(" poll: " + value + "; " + value.getWeight().getQuery());
+ if (value is ToParentBlockJoinQuery.BlockJoinScorer)
+ {
+ enroll((ToParentBlockJoinQuery)value.Weight.Query, (ToParentBlockJoinQuery.BlockJoinScorer)value);
+ }
+
+ foreach (Scorer.ChildScorer sub in value.Children)
+ {
+ //System.out.println(" add sub: " + sub.child + "; " + sub.child.getWeight().getQuery());
+ queue.Enqueue(sub.Child);
+ }
+ }
+ }
+ }
+
+ private OneGroup[] sortedGroups;
+
+ private void sortQueue()
+ {
+ sortedGroups = new OneGroup[queue.Size()];
+ for (int downTo = queue.Size() - 1; downTo >= 0; downTo--)
+ {
+ sortedGroups[downTo] = queue.Pop();
+ }
+ }
+
+ /// <summary>
+ /// Returns the TopGroups for the specified
+ /// BlockJoinQuery. The groupValue of each GroupDocs will
+ /// be the parent docID for that group.
+ /// The number of documents within each group is calculated as minimum of <code>maxDocsPerGroup</code>
+ /// and number of matched child documents for that group.
+ /// Returns null if no groups matched.
+ /// </summary>
+ /// <param name="query"> Search query </param>
+ /// <param name="withinGroupSort"> Sort criteria within groups </param>
+ /// <param name="offset"> Parent docs offset </param>
+ /// <param name="maxDocsPerGroup"> Upper bound of documents per group number </param>
+ /// <param name="withinGroupOffset"> Offset within each group of child docs </param>
+ /// <param name="fillSortFields"> Specifies whether to add sort fields or not </param>
+ /// <returns> TopGroups for specified query </returns>
+ /// <exception cref="IOException"> if there is a low-level I/O error </exception>
+ public virtual TopGroups<int> GetTopGroups(ToParentBlockJoinQuery query, Sort withinGroupSort, int offset, int maxDocsPerGroup, int withinGroupOffset, bool fillSortFields)
+ {
+
+ var slot = joinQueryID[query];
+ if (slot == null && totalHitCount == 0)
+ {
+ return null;
+ }
+
+ if (sortedGroups == null)
+ {
+ if (offset >= queue.Size())
+ {
+ return null;
+ }
+ sortQueue();
+ }
+ else if (offset > sortedGroups.Length)
+ {
+ return null;
+ }
+
+ return AccumulateGroups(slot == null ? -1 : (int)slot, offset, maxDocsPerGroup, withinGroupOffset, withinGroupSort, fillSortFields);
+ }
+
+ /// <summary>
+ /// Accumulates groups for the BlockJoinQuery specified by its slot.
+ /// </summary>
+ /// <param name="slot"> Search query's slot </param>
+ /// <param name="offset"> Parent docs offset </param>
+ /// <param name="maxDocsPerGroup"> Upper bound of documents per group number </param>
+ /// <param name="withinGroupOffset"> Offset within each group of child docs </param>
+ /// <param name="withinGroupSort"> Sort criteria within groups </param>
+ /// <param name="fillSortFields"> Specifies whether to add sort fields or not </param>
+ /// <returns> TopGroups for the query specified by slot </returns>
+ /// <exception cref="IOException"> if there is a low-level I/O error </exception>
+ private TopGroups<int> AccumulateGroups(int slot, int offset, int maxDocsPerGroup, int withinGroupOffset, Sort withinGroupSort, bool fillSortFields)
+ {
+ var groups = new GroupDocs<int>[sortedGroups.Length - offset];
+ var fakeScorer = new FakeScorer();
+
+ int totalGroupedHitCount = 0;
+ //System.out.println("slot=" + slot);
+
+ for (int groupIdx = offset; groupIdx < sortedGroups.Length; groupIdx++)
+ {
+ OneGroup og = sortedGroups[groupIdx];
+ int numChildDocs;
+ if (slot == -1 || slot >= og.counts.Length)
+ {
+ numChildDocs = 0;
+ }
+ else
+ {
+ numChildDocs = og.counts[slot];
+ }
+
+ // Number of documents in group should be bounded to prevent redundant memory allocation
+ int numDocsInGroup = Math.Max(1, Math.Min(numChildDocs, maxDocsPerGroup));
+ //System.out.println("parent doc=" + og.doc + " numChildDocs=" + numChildDocs + " maxDocsPG=" + maxDocsPerGroup);
+
+ // At this point we hold all docs w/ in each group, unsorted; we now sort them:
+ Collector collector;
+ if (withinGroupSort == null)
+ {
+ //System.out.println("sort by score");
+ // Sort by score
+ if (!trackScores)
+ {
+ throw new ArgumentException("cannot sort by relevance within group: trackScores=false");
+ }
+ collector = TopScoreDocCollector.Create(numDocsInGroup, true);
+ }
+ else
+ {
+ // Sort by fields
+ collector = TopFieldCollector.Create(withinGroupSort, numDocsInGroup, fillSortFields, trackScores, trackMaxScore, true);
+ }
+
+ collector.Scorer = fakeScorer;
+ collector.NextReader = og.readerContext;
+ for (int docIdx = 0; docIdx < numChildDocs; docIdx++)
+ {
+ //System.out.println("docIDX=" + docIDX + " vs " + og.docs[slot].length);
+ int doc = og.docs[slot][docIdx];
+ fakeScorer.doc = doc;
+ if (trackScores)
+ {
+ fakeScorer._score = og.scores[slot][docIdx];
+ }
+ collector.Collect(doc);
+ }
+ totalGroupedHitCount += numChildDocs;
+
+ object[] groupSortValues;
+
+ if (fillSortFields)
+ {
+ groupSortValues = new object[comparators.Length];
+ for (int sortFieldIdx = 0; sortFieldIdx < comparators.Length; sortFieldIdx++)
+ {
+ groupSortValues[sortFieldIdx] = comparators[sortFieldIdx].Value(og.Slot);
+ }
+ }
+ else
+ {
+ groupSortValues = null;
+ }
+
+ TopDocs topDocs;
+ if (withinGroupSort == null)
+ {
+ var tempCollector = (TopScoreDocCollector) collector;
+ topDocs = tempCollector.TopDocs(withinGroupOffset, numDocsInGroup);
+ }
+ else
+ {
+ var tempCollector = (TopFieldCollector) collector;
+ topDocs = tempCollector.TopDocs(withinGroupOffset, numDocsInGroup);
+ }
+
+ groups[groupIdx - offset] = new GroupDocs<int>(og.Score, topDocs.MaxScore, numChildDocs, topDocs.ScoreDocs, og.Doc, groupSortValues);
+ }
+
+ return new TopGroups<int>(new TopGroups<int>(sort.GetSort(), withinGroupSort == null ? null : withinGroupSort.GetSort(), 0, totalGroupedHitCount, groups, maxScore), totalHitCount);
+ }
+
+ /// <summary>
+ /// Returns the TopGroups for the specified BlockJoinQuery. The groupValue of each
+ /// GroupDocs will be the parent docID for that group. The number of documents within
+ /// each group equals to the total number of matched child documents for that group.
+ /// Returns null if no groups matched.
+ /// </summary>
+ /// <param name="query">Search query</param>
+ /// <param name="withinGroupSort">Sort criteria within groups</param>
+ /// <param name="offset">Parent docs offset</param>
+ /// <param name="withinGroupOffset">Offset within each group of child docs</param>
+ /// <param name="fillSortFields">Specifies whether to add sort fields or not</param>
+ /// <returns>TopGroups for specified query</returns>
+ /// <exception cref="IOException"> if there is a low-level I/O error </exception>
+ public virtual TopGroups<int> GetTopGroupsWithAllChildDocs(ToParentBlockJoinQuery query, Sort withinGroupSort, int offset, int withinGroupOffset, bool fillSortFields)
+ {
+ return GetTopGroups(query, withinGroupSort, offset, int.MaxValue, withinGroupOffset, fillSortFields);
+ }
+
+ /// <summary>
+ /// Returns the highest score across all collected parent hits, as long as
+ /// <code>trackMaxScores=true</code> was passed
+ /// {@link #ToParentBlockJoinCollector(Sort, int, boolean, boolean) on
+ /// construction}. Else, this returns <code>Float.NaN</code>
+ /// </summary>
+ public virtual float MaxScore
+ {
+ get { return maxScore; }
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/47f20b9a/Lucene.Net.Join/ToParentBlockJoinFieldComparator.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Join/ToParentBlockJoinFieldComparator.cs b/Lucene.Net.Join/ToParentBlockJoinFieldComparator.cs
new file mode 100644
index 0000000..4386e39
--- /dev/null
+++ b/Lucene.Net.Join/ToParentBlockJoinFieldComparator.cs
@@ -0,0 +1,391 @@
+using System;
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Join
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// A field comparator that allows parent documents to be sorted by fields
+ /// from the nested / child documents.
+ ///
+ /// @lucene.experimental
+ /// </summary>
+ public abstract class ToParentBlockJoinFieldComparator : FieldComparator<object>
+ {
+ private readonly Filter _parentFilter;
+ private readonly Filter _childFilter;
+ private readonly int _spareSlot;
+
+ private FieldComparator<object> _wrappedComparator;
+ private FixedBitSet _parentDocuments;
+ private FixedBitSet _childDocuments;
+
+ internal ToParentBlockJoinFieldComparator(FieldComparator<object> wrappedComparator, Filter parentFilter, Filter childFilter, int spareSlot)
+ {
+ _wrappedComparator = wrappedComparator;
+ _parentFilter = parentFilter;
+ _childFilter = childFilter;
+ _spareSlot = spareSlot;
+ }
+
+ public override int Compare(int slot1, int slot2)
+ {
+ return _wrappedComparator.Compare(slot1, slot2);
+ }
+
+ public override int Bottom
+ {
+ set
+ {
+ _wrappedComparator.Bottom = value;
+ }
+ }
+
+ public override object TopValue
+ {
+ set
+ {
+ _wrappedComparator.TopValue = value;
+ }
+ }
+
+ public override FieldComparator SetNextReader(AtomicReaderContext context)
+ {
+ DocIdSet innerDocuments = _childFilter.GetDocIdSet(context, null);
+ if (IsEmpty(innerDocuments))
+ {
+ _childDocuments = null;
+ }
+ else if (innerDocuments is FixedBitSet)
+ {
+ _childDocuments = (FixedBitSet)innerDocuments;
+ }
+ else
+ {
+ DocIdSetIterator iterator = innerDocuments.GetIterator();
+ _childDocuments = iterator != null ? ToFixedBitSet(iterator, context.AtomicReader.MaxDoc) : null;
+ }
+ DocIdSet rootDocuments = _parentFilter.GetDocIdSet(context, null);
+ if (IsEmpty(rootDocuments))
+ {
+ _parentDocuments = null;
+ }
+ else if (rootDocuments is FixedBitSet)
+ {
+ _parentDocuments = (FixedBitSet)rootDocuments;
+ }
+ else
+ {
+ DocIdSetIterator iterator = rootDocuments.GetIterator();
+ _parentDocuments = iterator != null ? ToFixedBitSet(iterator, context.AtomicReader.MaxDoc) : null;
+ }
+
+ _wrappedComparator = _wrappedComparator.SetNextReader(context) as FieldComparator<object>;
+ return this;
+ }
+
+ private static bool IsEmpty(DocIdSet set)
+ {
+ return set == null;
+ }
+
+ private static FixedBitSet ToFixedBitSet(DocIdSetIterator iterator, int numBits)
+ {
+ var set = new FixedBitSet(numBits);
+ int doc;
+ while ((doc = iterator.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
+ {
+ set.Set(doc);
+ }
+ return set;
+ }
+
+ public override IComparable Value(int slot)
+ {
+ return _wrappedComparator.Value(slot);
+ }
+
+ /// <summary>
+ /// Concrete implementation of <see cref="ToParentBlockJoinSortField"/> to sorts the parent docs with the lowest values
+ /// in the child / nested docs first.
+ /// </summary>
+ public sealed class Lowest : ToParentBlockJoinFieldComparator
+ {
+ /// <summary>
+ /// Create ToParentBlockJoinFieldComparator.Lowest
+ /// </summary>
+ /// <param name="wrappedComparator">The <see cref="FieldComparator"/> on the child / nested level. </param>
+ /// <param name="parentFilter">Filter (must produce FixedBitSet per-segment) that identifies the parent documents. </param>
+ /// <param name="childFilter">Filter that defines which child / nested documents participates in sorting. </param>
+ /// <param name="spareSlot">The extra slot inside the wrapped comparator that is used to compare which nested document
+ /// inside the parent document scope is most competitive. </param>
+ public Lowest(FieldComparator<object> wrappedComparator, Filter parentFilter, Filter childFilter, int spareSlot) : base(wrappedComparator, parentFilter, childFilter, spareSlot)
+ {
+ }
+
+ public override int CompareBottom(int parentDoc)
+ {
+ if (parentDoc == 0 || _parentDocuments == null || _childDocuments == null)
+ {
+ return 0;
+ }
+
+ // We need to copy the lowest value from all child docs into slot.
+ int prevParentDoc = _parentDocuments.PrevSetBit(parentDoc - 1);
+ int childDoc = _childDocuments.NextSetBit(prevParentDoc + 1);
+ if (childDoc >= parentDoc || childDoc == -1)
+ {
+ return 0;
+ }
+
+ // We only need to emit a single cmp value for any matching child doc
+ int cmp = _wrappedComparator.CompareBottom(childDoc);
+ if (cmp > 0)
+ {
+ return cmp;
+ }
+
+ while (true)
+ {
+ childDoc = _childDocuments.NextSetBit(childDoc + 1);
+ if (childDoc >= parentDoc || childDoc == -1)
+ {
+ return cmp;
+ }
+ int cmp1 = _wrappedComparator.CompareBottom(childDoc);
+ if (cmp1 > 0)
+ {
+ return cmp1;
+ }
+ if (cmp1 == 0)
+ {
+ cmp = 0;
+ }
+ }
+ }
+
+ public override void Copy(int slot, int parentDoc)
+ {
+ if (parentDoc == 0 || _parentDocuments == null || _childDocuments == null)
+ {
+ return;
+ }
+
+ // We need to copy the lowest value from all child docs into slot.
+ int prevParentDoc = _parentDocuments.PrevSetBit(parentDoc - 1);
+ int childDoc = _childDocuments.NextSetBit(prevParentDoc + 1);
+ if (childDoc >= parentDoc || childDoc == -1)
+ {
+ return;
+ }
+ _wrappedComparator.Copy(_spareSlot, childDoc);
+ _wrappedComparator.Copy(slot, childDoc);
+
+ while (true)
+ {
+ childDoc = _childDocuments.NextSetBit(childDoc + 1);
+ if (childDoc >= parentDoc || childDoc == -1)
+ {
+ return;
+ }
+ _wrappedComparator.Copy(_spareSlot, childDoc);
+ if (_wrappedComparator.Compare(_spareSlot, slot) < 0)
+ {
+ _wrappedComparator.Copy(slot, childDoc);
+ }
+ }
+ }
+
+ public override int CompareTop(int parentDoc)
+ {
+ if (parentDoc == 0 || _parentDocuments == null || _childDocuments == null)
+ {
+ return 0;
+ }
+
+ // We need to copy the lowest value from all nested docs into slot.
+ int prevParentDoc = _parentDocuments.PrevSetBit(parentDoc - 1);
+ int childDoc = _childDocuments.NextSetBit(prevParentDoc + 1);
+ if (childDoc >= parentDoc || childDoc == -1)
+ {
+ return 0;
+ }
+
+ // We only need to emit a single cmp value for any matching child doc
+ int cmp = _wrappedComparator.CompareBottom(childDoc);
+ if (cmp > 0)
+ {
+ return cmp;
+ }
+
+ while (true)
+ {
+ childDoc = _childDocuments.NextSetBit(childDoc + 1);
+ if (childDoc >= parentDoc || childDoc == -1)
+ {
+ return cmp;
+ }
+ int cmp1 = _wrappedComparator.CompareTop(childDoc);
+ if (cmp1 > 0)
+ {
+ return cmp1;
+ }
+ if (cmp1 == 0)
+ {
+ cmp = 0;
+ }
+ }
+ }
+
+ }
+
+ /// <summary>
+ /// Concrete implementation of <see cref="ToParentBlockJoinSortField"/> to sorts the parent docs with the highest values
+ /// in the child / nested docs first.
+ /// </summary>
+ public sealed class Highest : ToParentBlockJoinFieldComparator
+ {
+ /// <summary>
+ /// Create ToParentBlockJoinFieldComparator.Highest
+ /// </summary>
+ /// <param name="wrappedComparator">The <see cref="FieldComparator"/> on the child / nested level. </param>
+ /// <param name="parentFilter">Filter (must produce FixedBitSet per-segment) that identifies the parent documents. </param>
+ /// <param name="childFilter">Filter that defines which child / nested documents participates in sorting. </param>
+ /// <param name="spareSlot">The extra slot inside the wrapped comparator that is used to compare which nested document
+ /// inside the parent document scope is most competitive. </param>
+ public Highest(FieldComparator<object> wrappedComparator, Filter parentFilter, Filter childFilter, int spareSlot) : base(wrappedComparator, parentFilter, childFilter, spareSlot)
+ {
+ }
+
+ public override int CompareBottom(int parentDoc)
+ {
+ if (parentDoc == 0 || _parentDocuments == null || _childDocuments == null)
+ {
+ return 0;
+ }
+
+ int prevParentDoc = _parentDocuments.PrevSetBit(parentDoc - 1);
+ int childDoc = _childDocuments.NextSetBit(prevParentDoc + 1);
+ if (childDoc >= parentDoc || childDoc == -1)
+ {
+ return 0;
+ }
+
+ int cmp = _wrappedComparator.CompareBottom(childDoc);
+ if (cmp < 0)
+ {
+ return cmp;
+ }
+
+ while (true)
+ {
+ childDoc = _childDocuments.NextSetBit(childDoc + 1);
+ if (childDoc >= parentDoc || childDoc == -1)
+ {
+ return cmp;
+ }
+ int cmp1 = _wrappedComparator.CompareBottom(childDoc);
+ if (cmp1 < 0)
+ {
+ return cmp1;
+ }
+ else
+ {
+ if (cmp1 == 0)
+ {
+ cmp = 0;
+ }
+ }
+ }
+ }
+
+ public override void Copy(int slot, int parentDoc)
+ {
+ if (parentDoc == 0 || _parentDocuments == null || _childDocuments == null)
+ {
+ return;
+ }
+
+ int prevParentDoc = _parentDocuments.PrevSetBit(parentDoc - 1);
+ int childDoc = _childDocuments.NextSetBit(prevParentDoc + 1);
+ if (childDoc >= parentDoc || childDoc == -1)
+ {
+ return;
+ }
+ _wrappedComparator.Copy(_spareSlot, childDoc);
+ _wrappedComparator.Copy(slot, childDoc);
+
+ while (true)
+ {
+ childDoc = _childDocuments.NextSetBit(childDoc + 1);
+ if (childDoc >= parentDoc || childDoc == -1)
+ {
+ return;
+ }
+ _wrappedComparator.Copy(_spareSlot, childDoc);
+ if (_wrappedComparator.Compare(_spareSlot, slot) > 0)
+ {
+ _wrappedComparator.Copy(slot, childDoc);
+ }
+ }
+ }
+
+ public override int CompareTop(int parentDoc)
+ {
+ if (parentDoc == 0 || _parentDocuments == null || _childDocuments == null)
+ {
+ return 0;
+ }
+
+ int prevParentDoc = _parentDocuments.PrevSetBit(parentDoc - 1);
+ int childDoc = _childDocuments.NextSetBit(prevParentDoc + 1);
+ if (childDoc >= parentDoc || childDoc == -1)
+ {
+ return 0;
+ }
+
+ int cmp = _wrappedComparator.CompareBottom(childDoc);
+ if (cmp < 0)
+ {
+ return cmp;
+ }
+
+ while (true)
+ {
+ childDoc = _childDocuments.NextSetBit(childDoc + 1);
+ if (childDoc >= parentDoc || childDoc == -1)
+ {
+ return cmp;
+ }
+ int cmp1 = _wrappedComparator.CompareTop(childDoc);
+ if (cmp1 < 0)
+ {
+ return cmp1;
+ }
+ if (cmp1 == 0)
+ {
+ cmp = 0;
+ }
+ }
+ }
+ }
+ }
+}
\ No newline at end of file