You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by sy...@apache.org on 2015/08/24 00:34:11 UTC
[12/17] lucenenet git commit: Lucene.Net.Join tests now passing
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/src/Lucene.Net.Grouping/TopGroups.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Grouping/TopGroups.cs b/src/Lucene.Net.Grouping/TopGroups.cs
new file mode 100644
index 0000000..017c975
--- /dev/null
+++ b/src/Lucene.Net.Grouping/TopGroups.cs
@@ -0,0 +1,249 @@
+using System;
+using Lucene.Net.Search;
+
+namespace Lucene.Net.Grouping
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Represents result returned by a grouping search.
+ ///
+ /// @lucene.experimental
+ /// </summary>
+ public class TopGroups<TGroupValueType>
+ {
+ /// <summary>
+ /// Number of documents matching the search </summary>
+ public readonly int TotalHitCount;
+
+ /// <summary>
+ /// Number of documents grouped into the topN groups </summary>
+ public readonly int TotalGroupedHitCount;
+
+ /// <summary>
+ /// The total number of unique groups. If <code>null</code> this value is not computed. </summary>
+ public readonly int? TotalGroupCount;
+
+ /// <summary>
+ /// Group results in groupSort order </summary>
+ public readonly GroupDocs<TGroupValueType>[] Groups;
+
+ /// <summary>
+ /// How groups are sorted against each other </summary>
+ public readonly SortField[] GroupSort;
+
+ /// <summary>
+ /// How docs are sorted within each group </summary>
+ public readonly SortField[] WithinGroupSort;
+
+ /// <summary>
+ /// Highest score across all hits, or
+ /// <code>Float.NaN</code> if scores were not computed.
+ /// </summary>
+ public readonly float MaxScore;
+
+ public TopGroups(SortField[] groupSort, SortField[] withinGroupSort, int totalHitCount, int totalGroupedHitCount, GroupDocs<TGroupValueType>[] groups, float maxScore)
+ {
+ GroupSort = groupSort;
+ WithinGroupSort = withinGroupSort;
+ TotalHitCount = totalHitCount;
+ TotalGroupedHitCount = totalGroupedHitCount;
+ Groups = groups;
+ TotalGroupCount = null;
+ MaxScore = maxScore;
+ }
+
+ public TopGroups(TopGroups<TGroupValueType> oldTopGroups, int? totalGroupCount)
+ {
+ GroupSort = oldTopGroups.GroupSort;
+ WithinGroupSort = oldTopGroups.WithinGroupSort;
+ TotalHitCount = oldTopGroups.TotalHitCount;
+ TotalGroupedHitCount = oldTopGroups.TotalGroupedHitCount;
+ Groups = oldTopGroups.Groups;
+ MaxScore = oldTopGroups.MaxScore;
+ TotalGroupCount = totalGroupCount;
+ }
+
+ /// <summary>
+ /// How the GroupDocs score (if any) should be merged. </summary>
+ public enum ScoreMergeMode
+ {
+ /// <summary>
+ /// Set score to Float.NaN
+ /// </summary>
+ None,
+
+ /// <summary>
+ /// Sum score across all shards for this group.
+ /// </summary>
+ Total,
+
+ /// <summary>
+ /// Avg score across all shards for this group.
+ /// </summary>
+ Avg,
+ }
+
+ /// <summary>
+ /// Merges an array of TopGroups, for example obtained from the second-pass
+ /// collector across multiple shards. Each TopGroups must have been sorted by the
+ /// same groupSort and docSort, and the top groups passed to all second-pass
+ /// collectors must be the same.
+ ///
+ /// <b>NOTE</b>: We can't always compute an exact totalGroupCount.
+ /// Documents belonging to a group may occur on more than
+ /// one shard and thus the merged totalGroupCount can be
+ /// higher than the actual totalGroupCount. In this case the
+ /// totalGroupCount represents a upper bound. If the documents
+ /// of one group do only reside in one shard then the
+ /// totalGroupCount is exact.
+ ///
+ /// <b>NOTE</b>: the topDocs in each GroupDocs is actually
+ /// an instance of TopDocsAndShards
+ /// </summary>
+ public static TopGroups<T> Merge<T>(TopGroups<T>[] shardGroups, Sort groupSort, Sort docSort, int docOffset, int docTopN, ScoreMergeMode scoreMergeMode)
+ {
+ //System.out.println("TopGroups.merge");
+
+ if (shardGroups.Length == 0)
+ {
+ return null;
+ }
+
+ int totalHitCount = 0;
+ int totalGroupedHitCount = 0;
+ // Optionally merge the totalGroupCount.
+ int? totalGroupCount = null;
+
+ int numGroups = shardGroups[0].Groups.Length;
+ foreach (var shard in shardGroups)
+ {
+ if (numGroups != shard.Groups.Length)
+ {
+ throw new ArgumentException("number of groups differs across shards; you must pass same top groups to all shards' second-pass collector");
+ }
+ totalHitCount += shard.TotalHitCount;
+ totalGroupedHitCount += shard.TotalGroupedHitCount;
+ if (shard.TotalGroupCount != null)
+ {
+ if (totalGroupCount == null)
+ {
+ totalGroupCount = 0;
+ }
+
+ totalGroupCount += shard.TotalGroupCount;
+ }
+ }
+
+ var mergedGroupDocs = new GroupDocs<T>[numGroups];
+
+ TopDocs[] shardTopDocs = new TopDocs[shardGroups.Length];
+ float totalMaxScore = float.MinValue;
+
+ for (int groupIDX = 0; groupIDX < numGroups; groupIDX++)
+ {
+ T groupValue = shardGroups[0].Groups[groupIDX].GroupValue;
+ //System.out.println(" merge groupValue=" + groupValue + " sortValues=" + Arrays.toString(shardGroups[0].groups[groupIDX].groupSortValues));
+ float maxScore = float.MinValue;
+ int totalHits = 0;
+ double scoreSum = 0.0;
+ for (int shardIdx = 0; shardIdx < shardGroups.Length; shardIdx++)
+ {
+ //System.out.println(" shard=" + shardIDX);
+ TopGroups<T> shard = shardGroups[shardIdx];
+ var shardGroupDocs = shard.Groups[groupIDX];
+ if (groupValue == null)
+ {
+ if (shardGroupDocs.GroupValue != null)
+ {
+ throw new ArgumentException("group values differ across shards; you must pass same top groups to all shards' second-pass collector");
+ }
+ }
+ else if (!groupValue.Equals(shardGroupDocs.GroupValue))
+ {
+ throw new ArgumentException("group values differ across shards; you must pass same top groups to all shards' second-pass collector");
+ }
+
+ /*
+ for(ScoreDoc sd : shardGroupDocs.scoreDocs) {
+ System.out.println(" doc=" + sd.doc);
+ }
+ */
+
+ shardTopDocs[shardIdx] = new TopDocs(shardGroupDocs.TotalHits, shardGroupDocs.ScoreDocs, shardGroupDocs.MaxScore);
+ maxScore = Math.Max(maxScore, shardGroupDocs.MaxScore);
+ totalHits += shardGroupDocs.TotalHits;
+ scoreSum += shardGroupDocs.Score;
+ }
+
+ TopDocs mergedTopDocs = TopDocs.Merge(docSort, docOffset + docTopN, shardTopDocs);
+
+ // Slice;
+ ScoreDoc[] mergedScoreDocs;
+ if (docOffset == 0)
+ {
+ mergedScoreDocs = mergedTopDocs.ScoreDocs;
+ }
+ else if (docOffset >= mergedTopDocs.ScoreDocs.Length)
+ {
+ mergedScoreDocs = new ScoreDoc[0];
+ }
+ else
+ {
+ mergedScoreDocs = new ScoreDoc[mergedTopDocs.ScoreDocs.Length - docOffset];
+ Array.Copy(mergedTopDocs.ScoreDocs, docOffset, mergedScoreDocs, 0, mergedTopDocs.ScoreDocs.Length - docOffset);
+ }
+
+ float groupScore;
+ switch (scoreMergeMode)
+ {
+ case ScoreMergeMode.None:
+ groupScore = float.NaN;
+ break;
+ case ScoreMergeMode.Avg:
+ if (totalHits > 0)
+ {
+ groupScore = (float)(scoreSum / totalHits);
+ }
+ else
+ {
+ groupScore = float.NaN;
+ }
+ break;
+ case ScoreMergeMode.Total:
+ groupScore = (float)scoreSum;
+ break;
+ default:
+ throw new ArgumentException("can't handle ScoreMergeMode " + scoreMergeMode);
+ }
+
+ //System.out.println("SHARDS=" + Arrays.toString(mergedTopDocs.shardIndex));
+ mergedGroupDocs[groupIDX] = new GroupDocs<T>(groupScore, maxScore, totalHits, mergedScoreDocs, groupValue, shardGroups[0].Groups[groupIDX].GroupSortValues);
+ totalMaxScore = Math.Max(totalMaxScore, maxScore);
+ }
+
+ if (totalGroupCount != null)
+ {
+ var result = new TopGroups<T>(groupSort.GetSort(), docSort == null ? null : docSort.GetSort(), totalHitCount, totalGroupedHitCount, mergedGroupDocs, totalMaxScore);
+ return new TopGroups<T>(result, totalGroupCount);
+ }
+
+ return new TopGroups<T>(groupSort.GetSort(), docSort == null ? null : docSort.GetSort(), totalHitCount, totalGroupedHitCount, mergedGroupDocs, totalMaxScore);
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/src/Lucene.Net.Join/FakeScorer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Join/FakeScorer.cs b/src/Lucene.Net.Join/FakeScorer.cs
new file mode 100644
index 0000000..42bf91b
--- /dev/null
+++ b/src/Lucene.Net.Join/FakeScorer.cs
@@ -0,0 +1,76 @@
+using System;
+using System.Collections.Generic;
+using Lucene.Net.Search;
+
+namespace Lucene.Net.Join
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Passed to <see cref="Collector.Scorer"/> during join collection.
+ /// </summary>
+ internal sealed class FakeScorer : Scorer
+ {
+ internal float _score;
+ internal int doc = -1;
+
+ public FakeScorer() : base(null)
+ {
+ }
+
+ public override int DocID()
+ {
+ return doc;
+ }
+
+ public override int NextDoc()
+ {
+ throw new NotSupportedException("FakeScorer doesn't support NextDoc()");
+ }
+
+ public override int Advance(int target)
+ {
+ throw new NotSupportedException("FakeScorer doesn't support Advance(int)");
+ }
+
+ public override long Cost()
+ {
+ return 1;
+ }
+
+ public override int Freq()
+ {
+ throw new NotSupportedException("FakeScorer doesn't support Freq()");
+ }
+
+ public override float Score()
+ {
+ return _score;
+ }
+
+ public override Weight Weight
+ {
+ get { throw new NotSupportedException(); }
+ }
+
+ public override ICollection<ChildScorer> Children
+ {
+ get { throw new NotSupportedException(); }
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/src/Lucene.Net.Join/FixedBitSetCachingWrapperFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Join/FixedBitSetCachingWrapperFilter.cs b/src/Lucene.Net.Join/FixedBitSetCachingWrapperFilter.cs
new file mode 100644
index 0000000..da8b0b8
--- /dev/null
+++ b/src/Lucene.Net.Join/FixedBitSetCachingWrapperFilter.cs
@@ -0,0 +1,62 @@
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Join
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// A <see cref="CachingWrapperFilter"/> that caches sets using a <see cref="FixedBitSet"/>,
+ /// as required for joins.
+ /// </summary>
+ public sealed class FixedBitSetCachingWrapperFilter : CachingWrapperFilter
+ {
+ /// <summary>
+ /// Sole constructor, see <see cref="CachingWrapperFilter"/>.
+ /// </summary>
+ public FixedBitSetCachingWrapperFilter(Filter filter) : base(filter)
+ {
+ }
+
+ protected override DocIdSet DocIdSetToCache(DocIdSet docIdSet, AtomicReader reader)
+ {
+ if (docIdSet == null)
+ {
+ return EMPTY_DOCIDSET;
+ }
+
+ if (docIdSet is FixedBitSet)
+ {
+ // this is different from CachingWrapperFilter: even when the DocIdSet is
+ // cacheable, we convert it to a FixedBitSet since we require all the
+ // cached filters to be FixedBitSets
+ return docIdSet;
+ }
+
+ DocIdSetIterator it = docIdSet.GetIterator();
+ if (it == null)
+ {
+ return EMPTY_DOCIDSET;
+ }
+ FixedBitSet copy = new FixedBitSet(reader.MaxDoc);
+ copy.Or(it);
+ return copy;
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/src/Lucene.Net.Join/JoinUtil.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Join/JoinUtil.cs b/src/Lucene.Net.Join/JoinUtil.cs
new file mode 100644
index 0000000..726731e
--- /dev/null
+++ b/src/Lucene.Net.Join/JoinUtil.cs
@@ -0,0 +1,80 @@
+using System.IO;
+using Lucene.Net.Search;
+
+namespace Lucene.Net.Join
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ /// <summary>
+ /// Utility for query time joining using TermsQuery and TermsCollector.
+ ///
+ /// @lucene.experimental
+ /// </summary>
+ public sealed class JoinUtil
+ {
+ // No instances allowed
+ private JoinUtil()
+ {
+ }
+
+ /// <summary>
+ /// Method for query time joining.
+ /// <p/>
+ /// Execute the returned query with a <seealso cref="IndexSearcher"/> to retrieve all documents that have the same terms in the
+ /// to field that match with documents matching the specified fromQuery and have the same terms in the from field.
+ /// <p/>
+ /// In the case a single document relates to more than one document the <code>multipleValuesPerDocument</code> option
+ /// should be set to true. When the <code>multipleValuesPerDocument</code> is set to <code>true</code> only the
+ /// the score from the first encountered join value originating from the 'from' side is mapped into the 'to' side.
+ /// Even in the case when a second join value related to a specific document yields a higher score. Obviously this
+ /// doesn't apply in the case that <seealso cref="ScoreMode.None"/> is used, since no scores are computed at all.
+ /// </p>
+ /// Memory considerations: During joining all unique join values are kept in memory. On top of that when the scoreMode
+ /// isn't set to <seealso cref="ScoreMode.None"/> a float value per unique join value is kept in memory for computing scores.
+ /// When scoreMode is set to <seealso cref="ScoreMode.Avg"/> also an additional integer value is kept in memory per unique
+ /// join value.
+ /// </summary>
+ /// <param name="fromField"> The from field to join from </param>
+ /// <param name="multipleValuesPerDocument"> Whether the from field has multiple terms per document </param>
+ /// <param name="toField"> The to field to join to </param>
+ /// <param name="fromQuery"> The query to match documents on the from side </param>
+ /// <param name="fromSearcher"> The searcher that executed the specified fromQuery </param>
+ /// <param name="scoreMode"> Instructs how scores from the fromQuery are mapped to the returned query </param>
+ /// <returns>A <see cref="Query"/> instance that can be used to join documents based on the terms in the from and to field</returns>
+ /// <exception cref="IOException"> If I/O related errors occur </exception>
+ public static Query CreateJoinQuery(string fromField, bool multipleValuesPerDocument, string toField, Query fromQuery, IndexSearcher fromSearcher, ScoreMode scoreMode)
+ {
+ switch (scoreMode)
+ {
+ case ScoreMode.None:
+ TermsCollector termsCollector = TermsCollector.Create(fromField, multipleValuesPerDocument);
+ fromSearcher.Search(fromQuery, termsCollector);
+ return new TermsQuery(toField, fromQuery, termsCollector.CollectorTerms);
+ case ScoreMode.Total:
+ case ScoreMode.Max:
+ case ScoreMode.Avg:
+ TermsWithScoreCollector termsWithScoreCollector = TermsWithScoreCollector.Create(fromField, multipleValuesPerDocument, scoreMode);
+ fromSearcher.Search(fromQuery, termsWithScoreCollector);
+ return new TermsIncludingScoreQuery(toField, multipleValuesPerDocument, termsWithScoreCollector.CollectedTerms, termsWithScoreCollector.ScoresPerTerm, fromQuery);
+ default:
+ throw new System.ArgumentException(string.Format("Score mode {0} isn't supported.", scoreMode));
+ }
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/src/Lucene.Net.Join/Lucene.Net.Join.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Join/Lucene.Net.Join.csproj b/src/Lucene.Net.Join/Lucene.Net.Join.csproj
new file mode 100644
index 0000000..72bda4a
--- /dev/null
+++ b/src/Lucene.Net.Join/Lucene.Net.Join.csproj
@@ -0,0 +1,76 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
+ <PropertyGroup>
+ <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+ <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+ <ProjectGuid>{E8A339C7-FCF6-4A72-8586-56D8961D7B99}</ProjectGuid>
+ <OutputType>Library</OutputType>
+ <AppDesignerFolder>Properties</AppDesignerFolder>
+ <RootNamespace>Lucene.Net.Join</RootNamespace>
+ <AssemblyName>Lucene.Net.Join</AssemblyName>
+ <TargetFrameworkVersion>v4.5.1</TargetFrameworkVersion>
+ <FileAlignment>512</FileAlignment>
+ </PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
+ <DebugSymbols>true</DebugSymbols>
+ <DebugType>full</DebugType>
+ <Optimize>false</Optimize>
+ <OutputPath>bin\Debug\</OutputPath>
+ <DefineConstants>DEBUG;TRACE</DefineConstants>
+ <ErrorReport>prompt</ErrorReport>
+ <WarningLevel>4</WarningLevel>
+ </PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
+ <DebugType>pdbonly</DebugType>
+ <Optimize>true</Optimize>
+ <OutputPath>bin\Release\</OutputPath>
+ <DefineConstants>TRACE</DefineConstants>
+ <ErrorReport>prompt</ErrorReport>
+ <WarningLevel>4</WarningLevel>
+ </PropertyGroup>
+ <ItemGroup>
+ <Reference Include="System" />
+ <Reference Include="System.Core" />
+ <Reference Include="System.Xml.Linq" />
+ <Reference Include="System.Data.DataSetExtensions" />
+ <Reference Include="Microsoft.CSharp" />
+ <Reference Include="System.Data" />
+ <Reference Include="System.Net.Http" />
+ <Reference Include="System.Xml" />
+ </ItemGroup>
+ <ItemGroup>
+ <Compile Include="FakeScorer.cs" />
+ <Compile Include="FixedBitSetCachingWrapperFilter.cs" />
+ <Compile Include="JoinUtil.cs" />
+ <Compile Include="Properties\AssemblyInfo.cs" />
+ <Compile Include="ScoreMode.cs" />
+ <Compile Include="TermsCollector.cs" />
+ <Compile Include="TermsIncludingScoreQuery.cs" />
+ <Compile Include="TermsQuery.cs" />
+ <Compile Include="TermsWithScoreCollector.cs" />
+ <Compile Include="ToChildBlockJoinQuery.cs" />
+ <Compile Include="ToParentBlockJoinCollector.cs" />
+ <Compile Include="ToParentBlockJoinFieldComparator.cs" />
+ <Compile Include="ToParentBlockJoinQuery.cs" />
+ <Compile Include="ToParentBlockJoinSortField.cs" />
+ </ItemGroup>
+ <ItemGroup>
+ <ProjectReference Include="..\Lucene.Net.Grouping\Lucene.Net.Grouping.csproj">
+ <Project>{02BAB603-067D-48B1-AEDD-316849652568}</Project>
+ <Name>Lucene.Net.Grouping</Name>
+ </ProjectReference>
+ <ProjectReference Include="..\Lucene.Net.Core\Lucene.Net.csproj">
+ <Project>{5D4AD9BE-1FFB-41AB-9943-25737971BF57}</Project>
+ <Name>Lucene.Net</Name>
+ </ProjectReference>
+ </ItemGroup>
+ <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
+ <!-- To modify your build process, add your task inside one of the targets below and uncomment it.
+ Other similar extension points exist, see Microsoft.Common.targets.
+ <Target Name="BeforeBuild">
+ </Target>
+ <Target Name="AfterBuild">
+ </Target>
+ -->
+</Project>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/src/Lucene.Net.Join/Properties/AssemblyInfo.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Join/Properties/AssemblyInfo.cs b/src/Lucene.Net.Join/Properties/AssemblyInfo.cs
new file mode 100644
index 0000000..2c17c13
--- /dev/null
+++ b/src/Lucene.Net.Join/Properties/AssemblyInfo.cs
@@ -0,0 +1,36 @@
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+// General Information about an assembly is controlled through the following
+// set of attributes. Change these attribute values to modify the information
+// associated with an assembly.
+[assembly: AssemblyTitle("Lucene.Net.Join")]
+[assembly: AssemblyDescription("")]
+[assembly: AssemblyConfiguration("")]
+[assembly: AssemblyCompany("")]
+[assembly: AssemblyProduct("Lucene.Net.Join")]
+[assembly: AssemblyCopyright("Copyright © 2015")]
+[assembly: AssemblyTrademark("")]
+[assembly: AssemblyCulture("")]
+
+// Setting ComVisible to false makes the types in this assembly not visible
+// to COM components. If you need to access a type in this assembly from
+// COM, set the ComVisible attribute to true on that type.
+[assembly: ComVisible(false)]
+
+// The following GUID is for the ID of the typelib if this project is exposed to COM
+[assembly: Guid("e8a339c7-fcf6-4a72-8586-56d8961d7b99")]
+
+// Version information for an assembly consists of the following four values:
+//
+// Major Version
+// Minor Version
+// Build Number
+// Revision
+//
+// You can specify all the values or you can default the Build and Revision Numbers
+// by using the '*' as shown below:
+// [assembly: AssemblyVersion("1.0.*")]
+[assembly: AssemblyVersion("1.0.0.0")]
+[assembly: AssemblyFileVersion("1.0.0.0")]
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/src/Lucene.Net.Join/ScoreMode.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Join/ScoreMode.cs b/src/Lucene.Net.Join/ScoreMode.cs
new file mode 100644
index 0000000..a5b91be
--- /dev/null
+++ b/src/Lucene.Net.Join/ScoreMode.cs
@@ -0,0 +1,45 @@
+namespace Lucene.Net.Join
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// How to aggregate multiple child hit scores into a single parent score.
+ /// </summary>
+ public enum ScoreMode
+ {
+ /// <summary>
+ /// Do no scoring.
+ /// </summary>
+ None,
+
+ /// <summary>
+ /// Parent hit's score is the average of all child scores.
+ /// </summary>
+ Avg,
+
+ /// <summary>
+ /// Parent hit's score is the max of all child scores.
+ /// </summary>
+ Max,
+
+ /// <summary>
+ /// Parent hit's score is the sum of all child scores.
+ /// </summary>
+ Total
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/src/Lucene.Net.Join/TermsCollector.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Join/TermsCollector.cs b/src/Lucene.Net.Join/TermsCollector.cs
new file mode 100644
index 0000000..2ccf1ed
--- /dev/null
+++ b/src/Lucene.Net.Join/TermsCollector.cs
@@ -0,0 +1,127 @@
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Join
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// A collector that collects all terms from a specified field matching the query.
+ ///
+ /// @lucene.experimental
+ /// </summary>
+ internal abstract class TermsCollector : Collector
+ {
+ private readonly string _field;
+ private readonly BytesRefHash _collectorTerms = new BytesRefHash();
+
+ internal TermsCollector(string field)
+ {
+ _field = field;
+ }
+
+ public BytesRefHash CollectorTerms
+ {
+ get
+ {
+ return _collectorTerms;
+ }
+ }
+
+ public override Scorer Scorer
+ {
+ set {}
+ }
+
+ public override bool AcceptsDocsOutOfOrder()
+ {
+ return true;
+ }
+
+ /// <summary>
+ /// Chooses the right <see cref="TermsCollector"/> implementation.
+ /// </summary>
+ /// <param name="field">The field to collect terms for.</param>
+ /// <param name="multipleValuesPerDocument">Whether the field to collect terms for has multiple values per document.</param>
+ /// <returns>A <see cref="TermsCollector"/> instance.</returns>
+ internal static TermsCollector Create(string field, bool multipleValuesPerDocument)
+ {
+ return multipleValuesPerDocument ? (TermsCollector) new MV(field) : new SV(field);
+ }
+
+ // impl that works with multiple values per document
+ private class MV : TermsCollector
+ {
+ private readonly BytesRef _scratch = new BytesRef();
+ private SortedSetDocValues _docTermOrds;
+
+ internal MV(string field) : base(field)
+ {
+ }
+
+ public override void Collect(int doc)
+ {
+ _docTermOrds.Document = doc;
+ long ord;
+ while ((ord = _docTermOrds.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
+ {
+ _docTermOrds.LookupOrd(ord, _scratch);
+ _collectorTerms.Add(_scratch);
+ }
+ }
+
+ public override AtomicReaderContext NextReader
+ {
+ set { _docTermOrds = FieldCache.DEFAULT.GetDocTermOrds(value.AtomicReader, _field); }
+ }
+
+ public override bool AcceptsDocsOutOfOrder()
+ {
+ throw new System.NotImplementedException();
+ }
+ }
+
+ // impl that works with single value per document
+ private class SV : TermsCollector
+ {
+ private readonly BytesRef _spare = new BytesRef();
+ private BinaryDocValues _fromDocTerms;
+
+ internal SV(string field) : base(field)
+ {
+ }
+
+ public override void Collect(int doc)
+ {
+ _fromDocTerms.Get(doc, _spare);
+ _collectorTerms.Add(_spare);
+ }
+
+ public override AtomicReaderContext NextReader
+ {
+ set { _fromDocTerms = FieldCache.DEFAULT.GetTerms(value.AtomicReader, _field, false); }
+ }
+
+ public override bool AcceptsDocsOutOfOrder()
+ {
+ return base.AcceptsDocsOutOfOrder();
+ }
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/src/Lucene.Net.Join/TermsIncludingScoreQuery.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Join/TermsIncludingScoreQuery.cs b/src/Lucene.Net.Join/TermsIncludingScoreQuery.cs
new file mode 100644
index 0000000..9f3befc
--- /dev/null
+++ b/src/Lucene.Net.Join/TermsIncludingScoreQuery.cs
@@ -0,0 +1,472 @@
+using System.Collections.Generic;
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Join
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ internal class TermsIncludingScoreQuery : Query
+ {
+ private readonly string _field;
+ private readonly bool _multipleValuesPerDocument;
+ private readonly BytesRefHash _terms;
+ private readonly float[] _scores;
+ private readonly int[] _ords;
+ private readonly Query _originalQuery;
+ private readonly Query _unwrittenOriginalQuery;
+
+ internal TermsIncludingScoreQuery(string field, bool multipleValuesPerDocument, BytesRefHash terms,
+ float[] scores, Query originalQuery)
+ {
+ _field = field;
+ _multipleValuesPerDocument = multipleValuesPerDocument;
+ _terms = terms;
+ _scores = scores;
+ _originalQuery = originalQuery;
+ _ords = terms.Sort(BytesRef.UTF8SortedAsUnicodeComparer);
+ _unwrittenOriginalQuery = originalQuery;
+ }
+
+ private TermsIncludingScoreQuery(string field, bool multipleValuesPerDocument, BytesRefHash terms,
+ float[] scores, int[] ords, Query originalQuery, Query unwrittenOriginalQuery)
+ {
+ _field = field;
+ _multipleValuesPerDocument = multipleValuesPerDocument;
+ _terms = terms;
+ _scores = scores;
+ _originalQuery = originalQuery;
+ _ords = ords;
+ _unwrittenOriginalQuery = unwrittenOriginalQuery;
+ }
+
+ public override string ToString(string @string)
+ {
+ return string.Format("TermsIncludingScoreQuery{{field={0};originalQuery={1}}}", _field,
+ _unwrittenOriginalQuery);
+ }
+
+ public override void ExtractTerms(ISet<Term> terms)
+ {
+ _originalQuery.ExtractTerms(terms);
+ }
+
+ public override Query Rewrite(IndexReader reader)
+ {
+ Query originalQueryRewrite = _originalQuery.Rewrite(reader);
+ if (originalQueryRewrite != _originalQuery)
+ {
+ Query rewritten = new TermsIncludingScoreQuery(_field, _multipleValuesPerDocument, _terms, _scores,
+ _ords, originalQueryRewrite, _originalQuery);
+ rewritten.Boost = Boost;
+ return rewritten;
+ }
+
+ return this;
+ }
+
+ protected bool Equals(TermsIncludingScoreQuery other)
+ {
+ return base.Equals(other) && string.Equals(_field, other._field) &&
+ Equals(_unwrittenOriginalQuery, other._unwrittenOriginalQuery);
+ }
+
+ public override bool Equals(object obj)
+ {
+ if (ReferenceEquals(null, obj)) return false;
+ if (ReferenceEquals(this, obj)) return true;
+ if (obj.GetType() != GetType()) return false;
+ return Equals((TermsIncludingScoreQuery) obj);
+ }
+
+ public override int GetHashCode()
+ {
+ unchecked
+ {
+ int hashCode = base.GetHashCode();
+ hashCode = (hashCode*397) ^ (_field != null ? _field.GetHashCode() : 0);
+ hashCode = (hashCode*397) ^
+ (_unwrittenOriginalQuery != null ? _unwrittenOriginalQuery.GetHashCode() : 0);
+ return hashCode;
+ }
+ }
+
+ public override Weight CreateWeight(IndexSearcher searcher)
+ {
+ Weight originalWeight = _originalQuery.CreateWeight(searcher);
+ return new WeightAnonymousInnerClassHelper(this, originalWeight);
+ }
+
+ private class WeightAnonymousInnerClassHelper : Weight
+ {
+ private readonly TermsIncludingScoreQuery outerInstance;
+
+ private Weight originalWeight;
+
+ public WeightAnonymousInnerClassHelper(TermsIncludingScoreQuery outerInstance, Weight originalWeight)
+ {
+ this.outerInstance = outerInstance;
+ this.originalWeight = originalWeight;
+ }
+
+
+ private TermsEnum segmentTermsEnum;
+
+ public override Explanation Explain(AtomicReaderContext context, int doc)
+ {
+ SVInnerScorer scorer = (SVInnerScorer) BulkScorer(context, false, null);
+ if (scorer != null)
+ {
+ return scorer.Explain(doc);
+ }
+ return new ComplexExplanation(false, 0.0f, "Not a match");
+ }
+
+ public override bool ScoresDocsOutOfOrder()
+ {
+ // We have optimized impls below if we are allowed
+ // to score out-of-order:
+ return true;
+ }
+
+ public override Query Query
+ {
+ get { return outerInstance; }
+ }
+
+ public override float ValueForNormalization
+ {
+ get { return originalWeight.ValueForNormalization*outerInstance.Boost*outerInstance.Boost; }
+ }
+
+ public override void Normalize(float norm, float topLevelBoost)
+ {
+ originalWeight.Normalize(norm, topLevelBoost*outerInstance.Boost);
+ }
+
+ public override Scorer Scorer(AtomicReaderContext context, Bits acceptDocs)
+ {
+ Terms terms = context.AtomicReader.Terms(outerInstance._field);
+ if (terms == null)
+ {
+ return null;
+ }
+
+ // what is the runtime...seems ok?
+ long cost = context.AtomicReader.MaxDoc * terms.Size();
+
+ segmentTermsEnum = terms.Iterator(segmentTermsEnum);
+ if (outerInstance._multipleValuesPerDocument)
+ {
+ return new MVInOrderScorer(outerInstance, this, acceptDocs, segmentTermsEnum, context.AtomicReader.MaxDoc, cost);
+ }
+
+ return new SVInOrderScorer(outerInstance, this, acceptDocs, segmentTermsEnum, context.AtomicReader.MaxDoc, cost);
+ }
+
+ public override BulkScorer BulkScorer(AtomicReaderContext context, bool scoreDocsInOrder, Bits acceptDocs)
+ {
+ if (scoreDocsInOrder)
+ {
+ return base.BulkScorer(context, scoreDocsInOrder, acceptDocs);
+ }
+
+ Terms terms = context.AtomicReader.Terms(outerInstance._field);
+ if (terms == null)
+ {
+ return null;
+ }
+ // what is the runtime...seems ok?
+ long cost = context.AtomicReader.MaxDoc * terms.Size();
+
+ segmentTermsEnum = terms.Iterator(segmentTermsEnum);
+ // Optimized impls that take advantage of docs
+ // being allowed to be out of order:
+ if (outerInstance._multipleValuesPerDocument)
+ {
+ return new MVInnerScorer(outerInstance, this, acceptDocs, segmentTermsEnum, context.AtomicReader.MaxDoc, cost);
+ }
+
+ return new SVInnerScorer(outerInstance, this, acceptDocs, segmentTermsEnum, cost);
+ }
+ }
+
+ // This impl assumes that the 'join' values are used uniquely per doc per field. Used for one to many relations.
+ internal class SVInnerScorer : BulkScorer
+ {
+ private readonly TermsIncludingScoreQuery outerInstance;
+
+ private readonly BytesRef _spare = new BytesRef();
+ private readonly Bits _acceptDocs;
+ private readonly TermsEnum _termsEnum;
+ private readonly long _cost;
+
+ private int _upto;
+ internal DocsEnum DocsEnum;
+ private DocsEnum _reuse;
+ private int _scoreUpto;
+ private int _doc;
+
+ internal SVInnerScorer(TermsIncludingScoreQuery outerInstance, Weight weight, Bits acceptDocs, TermsEnum termsEnum, long cost)
+ {
+ this.outerInstance = outerInstance;
+ _acceptDocs = acceptDocs;
+ _termsEnum = termsEnum;
+ _cost = cost;
+ _doc = -1;
+ }
+
+ public override bool Score(Collector collector, int max)
+ {
+ FakeScorer fakeScorer = new FakeScorer();
+ collector.Scorer = fakeScorer;
+ if (_doc == -1)
+ {
+ _doc = NextDocOutOfOrder();
+ }
+ while (_doc < max)
+ {
+ fakeScorer.doc = _doc;
+ fakeScorer._score = outerInstance._scores[outerInstance._ords[_scoreUpto]];
+ collector.Collect(_doc);
+ _doc = NextDocOutOfOrder();
+ }
+
+ return _doc != DocIdSetIterator.NO_MORE_DOCS;
+ }
+
+ private int NextDocOutOfOrder()
+ {
+ while (true)
+ {
+ if (DocsEnum != null)
+ {
+ int docId = DocsEnumNextDoc();
+ if (docId == DocIdSetIterator.NO_MORE_DOCS)
+ {
+ DocsEnum = null;
+ }
+ else
+ {
+ return _doc = docId;
+ }
+ }
+
+ if (_upto == outerInstance._terms.Size())
+ {
+ return _doc = DocIdSetIterator.NO_MORE_DOCS;
+ }
+
+ _scoreUpto = _upto;
+ if (_termsEnum.SeekExact(outerInstance._terms.Get(outerInstance._ords[_upto++], _spare)))
+ {
+ DocsEnum = _reuse = _termsEnum.Docs(_acceptDocs, _reuse, DocsEnum.FLAG_NONE);
+ }
+ }
+ }
+
+ protected virtual int DocsEnumNextDoc()
+ {
+ return DocsEnum.NextDoc();
+ }
+
+ internal Explanation Explain(int target)
+ {
+ int docId;
+ do
+ {
+ docId = NextDocOutOfOrder();
+ if (docId < target)
+ {
+ int tempDocId = DocsEnum.Advance(target);
+ if (tempDocId == target)
+ {
+ docId = tempDocId;
+ break;
+ }
+ }
+ else if (docId == target)
+ {
+ break;
+ }
+ DocsEnum = null; // goto the next ord.
+ } while (docId != DocIdSetIterator.NO_MORE_DOCS);
+
+ return new ComplexExplanation(true, outerInstance._scores[outerInstance._ords[_scoreUpto]],
+ "Score based on join value " + _termsEnum.Term().Utf8ToString());
+ }
+ }
+
+ // This impl that tracks whether a docid has already been emitted. This check makes sure that docs aren't emitted
+ // twice for different join values. This means that the first encountered join value determines the score of a document
+ // even if other join values yield a higher score.
+ internal class MVInnerScorer : SVInnerScorer
+ {
+ private readonly TermsIncludingScoreQuery outerInstance;
+
+
+ internal readonly FixedBitSet alreadyEmittedDocs;
+
+ internal MVInnerScorer(TermsIncludingScoreQuery outerInstance, Weight weight, Bits acceptDocs,
+ TermsEnum termsEnum, int maxDoc, long cost) : base(outerInstance, weight, acceptDocs, termsEnum, cost)
+ {
+ this.outerInstance = outerInstance;
+ alreadyEmittedDocs = new FixedBitSet(maxDoc);
+ }
+
+ protected override int DocsEnumNextDoc()
+ {
+ while (true)
+ {
+ int docId = DocsEnum.NextDoc();
+ if (docId == DocIdSetIterator.NO_MORE_DOCS)
+ {
+ return docId;
+ }
+ if (!alreadyEmittedDocs.GetAndSet(docId))
+ {
+ return docId; //if it wasn't previously set, return it
+ }
+ }
+ }
+ }
+
+ internal class SVInOrderScorer : Scorer
+ {
+ private readonly TermsIncludingScoreQuery outerInstance;
+
+
+ internal readonly DocIdSetIterator matchingDocsIterator;
+ internal readonly float[] scores;
+ internal readonly long cost_Renamed;
+
+ internal int currentDoc = -1;
+
+ internal SVInOrderScorer(TermsIncludingScoreQuery outerInstance, Weight weight, Bits acceptDocs,
+ TermsEnum termsEnum, int maxDoc, long cost) : base(weight)
+ {
+ this.outerInstance = outerInstance;
+ FixedBitSet matchingDocs = new FixedBitSet(maxDoc);
+ scores = new float[maxDoc];
+ FillDocsAndScores(matchingDocs, acceptDocs, termsEnum);
+ matchingDocsIterator = matchingDocs.GetIterator();
+ cost_Renamed = cost;
+ }
+
+ protected virtual void FillDocsAndScores(FixedBitSet matchingDocs, Bits acceptDocs,
+ TermsEnum termsEnum)
+ {
+ BytesRef spare = new BytesRef();
+ DocsEnum docsEnum = null;
+ for (int i = 0; i < outerInstance._terms.Size(); i++)
+ {
+ if (termsEnum.SeekExact(outerInstance._terms.Get(outerInstance._ords[i], spare)))
+ {
+ docsEnum = termsEnum.Docs(acceptDocs, docsEnum, FLAG_NONE);
+ float score = outerInstance._scores[outerInstance._ords[i]];
+ for (int doc = docsEnum.NextDoc();
+ doc != NO_MORE_DOCS;
+ doc = docsEnum.NextDoc())
+ {
+ matchingDocs.Set(doc);
+ // In the case the same doc is also related to a another doc, a score might be overwritten. I think this
+ // can only happen in a many-to-many relation
+ scores[doc] = score;
+ }
+ }
+ }
+ }
+
+ public override float Score()
+ {
+ return scores[currentDoc];
+ }
+
+ public override int Freq()
+ {
+ return 1;
+ }
+
+ public override int DocID()
+ {
+ return currentDoc;
+ }
+
+ public override int NextDoc()
+ {
+ return currentDoc = matchingDocsIterator.NextDoc();
+ }
+
+ public override int Advance(int target)
+ {
+ return currentDoc = matchingDocsIterator.Advance(target);
+ }
+
+ public override long Cost()
+ {
+ return cost_Renamed;
+ }
+ }
+
+ // This scorer deals with the fact that a document can have more than one score from multiple related documents.
+ internal class MVInOrderScorer : SVInOrderScorer
+ {
+ private readonly TermsIncludingScoreQuery outerInstance;
+
+
+ internal MVInOrderScorer(TermsIncludingScoreQuery outerInstance, Weight weight, Bits acceptDocs,
+ TermsEnum termsEnum, int maxDoc, long cost)
+ : base(outerInstance, weight, acceptDocs, termsEnum, maxDoc, cost)
+ {
+ this.outerInstance = outerInstance;
+ }
+
+ protected override void FillDocsAndScores(FixedBitSet matchingDocs, Bits acceptDocs,
+ TermsEnum termsEnum)
+ {
+ BytesRef spare = new BytesRef();
+ DocsEnum docsEnum = null;
+ for (int i = 0; i < outerInstance._terms.Size(); i++)
+ {
+ if (termsEnum.SeekExact(outerInstance._terms.Get(outerInstance._ords[i], spare)))
+ {
+ docsEnum = termsEnum.Docs(acceptDocs, docsEnum, FLAG_NONE);
+ float score = outerInstance._scores[outerInstance._ords[i]];
+ for (int doc = docsEnum.NextDoc();
+ doc != NO_MORE_DOCS;
+ doc = docsEnum.NextDoc())
+ {
+ // I prefer this:
+ /*if (scores[doc] < score) {
+ scores[doc] = score;
+ matchingDocs.set(doc);
+ }*/
+ // But this behaves the same as MVInnerScorer and only then the tests will pass:
+ if (!matchingDocs.Get(doc))
+ {
+ scores[doc] = score;
+ matchingDocs.Set(doc);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/src/Lucene.Net.Join/TermsQuery.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Join/TermsQuery.cs b/src/Lucene.Net.Join/TermsQuery.cs
new file mode 100644
index 0000000..2d5ccf8
--- /dev/null
+++ b/src/Lucene.Net.Join/TermsQuery.cs
@@ -0,0 +1,147 @@
+using System.Collections.Generic;
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Join
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// A query that has an array of terms from a specific field. This query will match documents have one or more terms in
+ /// the specified field that match with the terms specified in the array.
+ ///
+ /// @lucene.experimental
+ /// </summary>
+ internal class TermsQuery : MultiTermQuery
+ {
+ private readonly BytesRefHash _terms;
+ private readonly int[] _ords;
+ private readonly Query _fromQuery; // Used for equals() only
+
+ /// <summary>
+ ///
+ /// </summary>
+ /// <param name="field">The field that should contain terms that are specified in the previous parameter.</param>
+ /// <param name="fromQuery"></param>
+ /// <param name="terms">The terms that matching documents should have. The terms must be sorted by natural order.</param>
+ internal TermsQuery(string field, Query fromQuery, BytesRefHash terms) : base(field)
+ {
+ _fromQuery = fromQuery;
+ _terms = terms;
+ _ords = terms.Sort(BytesRef.UTF8SortedAsUnicodeComparer);
+ }
+
+ public override TermsEnum GetTermsEnum(Terms terms, AttributeSource atts)
+ {
+ if (_terms.Size() == 0)
+ {
+ return TermsEnum.EMPTY;
+ }
+
+ return new SeekingTermSetTermsEnum(terms.Iterator(null), _terms, _ords);
+
+ }
+
+ public override string ToString(string field)
+ {
+ return string.Format("TermsQuery{{field={0}}}", field);
+ }
+
+ private class SeekingTermSetTermsEnum : FilteredTermsEnum
+ {
+ private readonly BytesRefHash Terms;
+ private readonly int[] Ords;
+ private readonly int _lastElement;
+
+ private readonly BytesRef _lastTerm;
+ private readonly BytesRef _spare = new BytesRef();
+ private readonly IComparer<BytesRef> _comparator;
+
+ private BytesRef _seekTerm;
+ private int _upto;
+
+ internal SeekingTermSetTermsEnum(TermsEnum tenum, BytesRefHash terms, int[] ords) : base(tenum)
+ {
+ Terms = terms;
+ Ords = ords;
+ _comparator = BytesRef.UTF8SortedAsUnicodeComparer;
+ _lastElement = terms.Size() - 1;
+ _lastTerm = terms.Get(ords[_lastElement], new BytesRef());
+ _seekTerm = terms.Get(ords[_upto], _spare);
+ }
+
+
+
+ protected override BytesRef NextSeekTerm(BytesRef currentTerm)
+ {
+ BytesRef temp = _seekTerm;
+ _seekTerm = null;
+ return temp;
+ }
+
+ protected override AcceptStatus Accept(BytesRef term)
+ {
+ if (_comparator.Compare(term, _lastTerm) > 0)
+ {
+ return AcceptStatus.END;
+ }
+
+ BytesRef currentTerm = Terms.Get(Ords[_upto], _spare);
+ if (_comparator.Compare(term, currentTerm) == 0)
+ {
+ if (_upto == _lastElement)
+ {
+ return AcceptStatus.YES;
+ }
+
+ _seekTerm = Terms.Get(Ords[++_upto], _spare);
+ return AcceptStatus.YES_AND_SEEK;
+ }
+
+ if (_upto == _lastElement)
+ {
+ return AcceptStatus.NO;
+ } // Our current term doesn't match the the given term.
+
+ int cmp;
+ do // We maybe are behind the given term by more than one step. Keep incrementing till we're the same or higher.
+ {
+ if (_upto == _lastElement)
+ {
+ return AcceptStatus.NO;
+ }
+ // typically the terms dict is a superset of query's terms so it's unusual that we have to skip many of
+ // our terms so we don't do a binary search here
+ _seekTerm = Terms.Get(Ords[++_upto], _spare);
+ } while ((cmp = _comparator.Compare(_seekTerm, term)) < 0);
+ if (cmp == 0)
+ {
+ if (_upto == _lastElement)
+ {
+ return AcceptStatus.YES;
+ }
+ _seekTerm = Terms.Get(Ords[++_upto], _spare);
+ return AcceptStatus.YES_AND_SEEK;
+ }
+
+ return AcceptStatus.NO_AND_SEEK;
+ }
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/src/Lucene.Net.Join/TermsWithScoreCollector.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Join/TermsWithScoreCollector.cs b/src/Lucene.Net.Join/TermsWithScoreCollector.cs
new file mode 100644
index 0000000..e823293
--- /dev/null
+++ b/src/Lucene.Net.Join/TermsWithScoreCollector.cs
@@ -0,0 +1,333 @@
+using System;
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Join
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ internal abstract class TermsWithScoreCollector : Collector
+ {
+ private const int InitialArraySize = 256;
+
+ private readonly string _field;
+ private readonly BytesRefHash _collectedTerms = new BytesRefHash();
+ private readonly ScoreMode _scoreMode;
+
+ private Scorer _scorer;
+ private float[] _scoreSums = new float[InitialArraySize];
+
+ internal TermsWithScoreCollector(string field, ScoreMode scoreMode)
+ {
+ this._field = field;
+ this._scoreMode = scoreMode;
+ }
+
+ public BytesRefHash CollectedTerms
+ {
+ get
+ {
+ return _collectedTerms;
+ }
+ }
+
+ public virtual float[] ScoresPerTerm
+ {
+ get
+ {
+ return _scoreSums;
+ }
+ }
+
+ //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+ //ORIGINAL LINE: @Override public void setScorer(org.apache.lucene.search.Scorer scorer) throws java.io.IOException
+ public override Scorer Scorer
+ {
+ set
+ {
+ _scorer = value;
+ }
+ }
+
+ public override bool AcceptsDocsOutOfOrder()
+ {
+ return true;
+ }
+
+ /// <summary>
+ /// Chooses the right <seealso cref="TermsWithScoreCollector"/> implementation.
+ /// </summary>
+ /// <param name="field">The field to collect terms for.</param>
+ /// <param name="multipleValuesPerDocument">Whether the field to collect terms for has multiple values per document.</param>
+ /// <returns>A <see cref="TermsWithScoreCollector"/> instance</returns>
+ internal static TermsWithScoreCollector Create(string field, bool multipleValuesPerDocument, ScoreMode scoreMode)
+ {
+ if (multipleValuesPerDocument)
+ {
+ switch (scoreMode)
+ {
+ case ScoreMode.Avg:
+ return new Mv.Avg(field);
+ default:
+ return new Mv(field, scoreMode);
+ }
+ }
+
+ switch (scoreMode)
+ {
+ case ScoreMode.Avg:
+ return new Sv.Avg(field);
+ default:
+ return new Sv(field, scoreMode);
+ }
+ }
+
+ // impl that works with single value per document
+ internal class Sv : TermsWithScoreCollector
+ {
+ private readonly BytesRef _spare = new BytesRef();
+ private BinaryDocValues _fromDocTerms;
+
+ internal Sv(string field, ScoreMode scoreMode) : base(field, scoreMode)
+ {
+ }
+
+ public override void Collect(int doc)
+ {
+ _fromDocTerms.Get(doc, _spare);
+ int ord = _collectedTerms.Add(_spare);
+ if (ord < 0)
+ {
+ ord = -ord - 1;
+ }
+ else
+ {
+ if (ord >= _scoreSums.Length)
+ {
+ _scoreSums = ArrayUtil.Grow(_scoreSums);
+ }
+ }
+
+ float current = _scorer.Score();
+ float existing = _scoreSums[ord];
+ if (existing.CompareTo(0.0f) == 0)
+ {
+ _scoreSums[ord] = current;
+ }
+ else
+ {
+ switch (_scoreMode)
+ {
+ case ScoreMode.Total:
+ _scoreSums[ord] = _scoreSums[ord] + current;
+ break;
+ case ScoreMode.Max:
+ if (current > existing)
+ {
+ _scoreSums[ord] = current;
+ }
+ break;
+ }
+ }
+ }
+
+ public override AtomicReaderContext NextReader
+ {
+ set
+ {
+ _fromDocTerms = FieldCache.DEFAULT.GetTerms(value.AtomicReader, _field, false);
+ }
+ }
+
+ public override bool AcceptsDocsOutOfOrder()
+ {
+ return base.AcceptsDocsOutOfOrder();
+ }
+
+ internal class Avg : Sv
+ {
+ private int[] _scoreCounts = new int[InitialArraySize];
+
+ internal Avg(string field) : base(field, ScoreMode.Avg)
+ {
+ }
+
+ public override void Collect(int doc)
+ {
+ _fromDocTerms.Get(doc, _spare);
+ int ord = _collectedTerms.Add(_spare);
+ if (ord < 0)
+ {
+ ord = -ord - 1;
+ }
+ else
+ {
+ if (ord >= _scoreSums.Length)
+ {
+ _scoreSums = ArrayUtil.Grow(_scoreSums);
+ _scoreCounts = ArrayUtil.Grow(_scoreCounts);
+ }
+ }
+
+ float current = _scorer.Score();
+ float existing = _scoreSums[ord];
+ if (existing.CompareTo(0.0f) == 0)
+ {
+ _scoreSums[ord] = current;
+ _scoreCounts[ord] = 1;
+ }
+ else
+ {
+ _scoreSums[ord] = _scoreSums[ord] + current;
+ _scoreCounts[ord]++;
+ }
+ }
+
+ public override float[] ScoresPerTerm
+ {
+ get
+ {
+ if (_scoreCounts != null)
+ {
+ for (int i = 0; i < _scoreCounts.Length; i++)
+ {
+ _scoreSums[i] = _scoreSums[i] / _scoreCounts[i];
+ }
+ _scoreCounts = null;
+ }
+ return _scoreSums;
+ }
+ }
+ }
+ }
+
+ // impl that works with multiple values per document
+ internal class Mv : TermsWithScoreCollector
+ {
+ private SortedSetDocValues _fromDocTermOrds;
+ private readonly BytesRef _scratch = new BytesRef();
+
+ internal Mv(string field, ScoreMode scoreMode) : base(field, scoreMode)
+ {
+ }
+
+ public override void Collect(int doc)
+ {
+ _fromDocTermOrds.Document = doc;
+ long ord;
+ while ((ord = _fromDocTermOrds.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
+ {
+ _fromDocTermOrds.LookupOrd(ord, _scratch);
+
+ int termId = _collectedTerms.Add(_scratch);
+ if (termId < 0)
+ {
+ termId = -termId - 1;
+ }
+ else
+ {
+ if (termId >= _scoreSums.Length)
+ {
+ _scoreSums = ArrayUtil.Grow(_scoreSums);
+ }
+ }
+
+ switch (_scoreMode)
+ {
+ case ScoreMode.Total:
+ _scoreSums[termId] += _scorer.Score();
+ break;
+ case ScoreMode.Max:
+ _scoreSums[termId] = Math.Max(_scoreSums[termId], _scorer.Score());
+ break;
+ }
+ }
+ }
+
+ //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+ //ORIGINAL LINE: @Override public void setNextReader(org.apache.lucene.index.AtomicReaderContext context) throws java.io.IOException
+ public override AtomicReaderContext NextReader
+ {
+ set
+ {
+ _fromDocTermOrds = FieldCache.DEFAULT.GetDocTermOrds(value.AtomicReader, _field);
+ }
+ }
+
+ public override bool AcceptsDocsOutOfOrder()
+ {
+ throw new NotImplementedException();
+ }
+
+ internal class Avg : Mv
+ {
+ private int[] _scoreCounts = new int[InitialArraySize];
+
+ internal Avg(string field) : base(field, ScoreMode.Avg)
+ {
+ }
+
+ public override void Collect(int doc)
+ {
+ _fromDocTermOrds.Document = doc;
+ long ord;
+ while ((ord = _fromDocTermOrds.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
+ {
+ _fromDocTermOrds.LookupOrd(ord, _scratch);
+
+ int termId = _collectedTerms.Add(_scratch);
+ if (termId < 0)
+ {
+ termId = -termId - 1;
+ }
+ else
+ {
+ if (termId >= _scoreSums.Length)
+ {
+ _scoreSums = ArrayUtil.Grow(_scoreSums);
+ _scoreCounts = ArrayUtil.Grow(_scoreCounts);
+ }
+ }
+
+ _scoreSums[termId] += _scorer.Score();
+ _scoreCounts[termId]++;
+ }
+ }
+
+ public override float[] ScoresPerTerm
+ {
+ get
+ {
+ if (_scoreCounts != null)
+ {
+ for (int i = 0; i < _scoreCounts.Length; i++)
+ {
+ _scoreSums[i] = _scoreSums[i] / _scoreCounts[i];
+ }
+ _scoreCounts = null;
+ }
+ return _scoreSums;
+ }
+ }
+ }
+ }
+
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/src/Lucene.Net.Join/ToChildBlockJoinQuery.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Join/ToChildBlockJoinQuery.cs b/src/Lucene.Net.Join/ToChildBlockJoinQuery.cs
new file mode 100644
index 0000000..3d4f2d5
--- /dev/null
+++ b/src/Lucene.Net.Join/ToChildBlockJoinQuery.cs
@@ -0,0 +1,396 @@
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Join
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Just like <see cref="ToParentBlockJoinQuery"/>, except this
+ /// query joins in reverse: you provide a Query matching
+ /// parent documents and it joins down to child
+ /// documents.
+ ///
+ /// @lucene.experimental
+ /// </summary>
+ public class ToChildBlockJoinQuery : Query
+ {
+ /// <summary>
+ /// Message thrown from <see cref="ToChildBlockJoinScorer.ValidateParentDoc"/>
+ /// on mis-use, when the parent query incorrectly returns child docs.
+ /// </summary>
+ public const string InvalidQueryMessage = "Parent query yields document which is not matched by parents filter, docID=";
+
+ private readonly Filter _parentsFilter;
+ private readonly Query _parentQuery;
+
+ // If we are rewritten, this is the original parentQuery we
+ // were passed; we use this for .equals() and
+ // .hashCode(). This makes rewritten query equal the
+ // original, so that user does not have to .rewrite() their
+ // query before searching:
+ private readonly Query _origParentQuery;
+ private readonly bool _doScores;
+
+ /// <summary>
+ /// Create a ToChildBlockJoinQuery.
+ /// </summary>
+ /// <param name="parentQuery">Query that matches parent documents</param>
+ /// <param name="parentsFilter">Filter (must produce FixedBitSet per-segment, like <see cref="FixedBitSetCachingWrapperFilter"/>)
+ /// identifying the parent documents.</param>
+ /// <param name="doScores">True if parent scores should be calculated.</param>
+ public ToChildBlockJoinQuery(Query parentQuery, Filter parentsFilter, bool doScores)
+ {
+ _origParentQuery = parentQuery;
+ _parentQuery = parentQuery;
+ _parentsFilter = parentsFilter;
+ _doScores = doScores;
+ }
+
+ private ToChildBlockJoinQuery(Query origParentQuery, Query parentQuery, Filter parentsFilter, bool doScores) : base()
+ {
+ _origParentQuery = origParentQuery;
+ _parentQuery = parentQuery;
+ _parentsFilter = parentsFilter;
+ _doScores = doScores;
+ }
+
+ public override Weight CreateWeight(IndexSearcher searcher)
+ {
+ return new ToChildBlockJoinWeight(this, _parentQuery.CreateWeight(searcher), _parentsFilter, _doScores);
+ }
+
+ private class ToChildBlockJoinWeight : Weight
+ {
+ private readonly Query _joinQuery;
+ private readonly Weight _parentWeight;
+ private readonly Filter _parentsFilter;
+ private readonly bool _doScores;
+
+ public ToChildBlockJoinWeight(Query joinQuery, Weight parentWeight, Filter parentsFilter, bool doScores) : base()
+ {
+ _joinQuery = joinQuery;
+ _parentWeight = parentWeight;
+ _parentsFilter = parentsFilter;
+ _doScores = doScores;
+ }
+
+ public override Query Query
+ {
+ get { return _joinQuery; }
+ }
+
+ public override float ValueForNormalization
+ {
+ get { return _parentWeight.ValueForNormalization*_joinQuery.Boost*_joinQuery.Boost; }
+ }
+
+ public override void Normalize(float norm, float topLevelBoost)
+ {
+ _parentWeight.Normalize(norm, topLevelBoost * _joinQuery.Boost);
+ }
+
+ // NOTE: acceptDocs applies (and is checked) only in the child document space
+ public override Scorer Scorer(AtomicReaderContext readerContext, Bits acceptDocs)
+ {
+ Scorer parentScorer = _parentWeight.Scorer(readerContext, null);
+
+ if (parentScorer == null)
+ {
+ // No matches
+ return null;
+ }
+
+ // NOTE: we cannot pass acceptDocs here because this
+ // will (most likely, justifiably) cause the filter to
+ // not return a FixedBitSet but rather a
+ // BitsFilteredDocIdSet. Instead, we filter by
+ // acceptDocs when we score:
+ DocIdSet parents = _parentsFilter.GetDocIdSet(readerContext, null);
+
+ if (parents == null)
+ {
+ // No matches
+ return null;
+ }
+ if (!(parents is FixedBitSet))
+ {
+ throw new InvalidOperationException("parentFilter must return FixedBitSet; got " + parents);
+ }
+
+ return new ToChildBlockJoinScorer(this, parentScorer, (FixedBitSet)parents, _doScores, acceptDocs);
+ }
+
+ public override Explanation Explain(AtomicReaderContext reader, int doc)
+ {
+ // TODO
+ throw new NotSupportedException(GetType().Name + " cannot explain match on parent document");
+ }
+
+ public override bool ScoresDocsOutOfOrder()
+ {
+ return false;
+ }
+ }
+
+ private sealed class ToChildBlockJoinScorer : Scorer
+ {
+ private readonly Scorer _parentScorer;
+ private readonly FixedBitSet _parentBits;
+ private readonly bool _doScores;
+ private readonly Bits _acceptDocs;
+
+ private float _parentScore;
+ private int _parentFreq = 1;
+
+ private int _childDoc = -1;
+ private int _parentDoc;
+
+ public ToChildBlockJoinScorer(Weight weight, Scorer parentScorer, FixedBitSet parentBits, bool doScores, Bits acceptDocs) : base(weight)
+ {
+ _doScores = doScores;
+ _parentBits = parentBits;
+ _parentScorer = parentScorer;
+ _acceptDocs = acceptDocs;
+ }
+
+ public override ICollection<ChildScorer> Children
+ {
+ get { return Collections.Singleton(new ChildScorer(_parentScorer, "BLOCK_JOIN")); }
+ }
+
+ public override int NextDoc()
+ {
+ //System.out.println("Q.nextDoc() parentDoc=" + parentDoc + " childDoc=" + childDoc);
+
+ // Loop until we hit a childDoc that's accepted
+ while (true)
+ {
+ if (_childDoc + 1 == _parentDoc)
+ {
+ // OK, we are done iterating through all children
+ // matching this one parent doc, so we now nextDoc()
+ // the parent. Use a while loop because we may have
+ // to skip over some number of parents w/ no
+ // children:
+ while (true)
+ {
+ _parentDoc = _parentScorer.NextDoc();
+ ValidateParentDoc();
+
+ if (_parentDoc == 0)
+ {
+ // Degenerate but allowed: first parent doc has no children
+ // TODO: would be nice to pull initial parent
+ // into ctor so we can skip this if... but it's
+ // tricky because scorer must return -1 for
+ // .doc() on init...
+ _parentDoc = _parentScorer.NextDoc();
+ ValidateParentDoc();
+ }
+
+ if (_parentDoc == NO_MORE_DOCS)
+ {
+ _childDoc = NO_MORE_DOCS;
+ //System.out.println(" END");
+ return _childDoc;
+ }
+
+ // Go to first child for this next parentDoc:
+ _childDoc = 1 + _parentBits.PrevSetBit(_parentDoc - 1);
+
+ if (_childDoc == _parentDoc)
+ {
+ // This parent has no children; continue
+ // parent loop so we move to next parent
+ continue;
+ }
+
+ if (_acceptDocs != null && !_acceptDocs.Get(_childDoc))
+ {
+ goto nextChildDocContinue;
+ }
+
+ if (_childDoc < _parentDoc)
+ {
+ if (_doScores)
+ {
+ _parentScore = _parentScorer.Score();
+ _parentFreq = _parentScorer.Freq();
+ }
+ //System.out.println(" " + childDoc);
+ return _childDoc;
+ }
+ else
+ {
+ // Degenerate but allowed: parent has no children
+ }
+ }
+ }
+
+ Debug.Assert(_childDoc < _parentDoc, "childDoc=" + _childDoc + " parentDoc=" + _parentDoc);
+ _childDoc++;
+ if (_acceptDocs != null && !_acceptDocs.Get(_childDoc))
+ {
+ continue;
+ }
+ //System.out.println(" " + childDoc);
+ return _childDoc;
+ nextChildDocContinue:;
+ }
+ }
+
+ /// <summary>
+ /// Detect mis-use, where provided parent query in fact sometimes returns child documents.
+ /// </summary>
+ private void ValidateParentDoc()
+ {
+ if (_parentDoc != NO_MORE_DOCS && !_parentBits.Get(_parentDoc))
+ {
+ throw new InvalidOperationException(InvalidQueryMessage + _parentDoc);
+ }
+ }
+
+ public override int DocID()
+ {
+ return _childDoc;
+ }
+
+ public override float Score()
+ {
+ return _parentScore;
+ }
+
+ public override int Freq()
+ {
+ return _parentFreq;
+ }
+
+ public override int Advance(int childTarget)
+ {
+ Debug.Assert(childTarget >= _parentBits.Length() || !_parentBits.Get(childTarget));
+
+ //System.out.println("Q.advance childTarget=" + childTarget);
+ if (childTarget == NO_MORE_DOCS)
+ {
+ //System.out.println(" END");
+ return _childDoc = _parentDoc = NO_MORE_DOCS;
+ }
+
+ Debug.Assert(_childDoc == -1 || childTarget != _parentDoc, "childTarget=" + childTarget);
+ if (_childDoc == -1 || childTarget > _parentDoc)
+ {
+ // Advance to new parent:
+ _parentDoc = _parentScorer.Advance(childTarget);
+ ValidateParentDoc();
+ //System.out.println(" advance to parentDoc=" + parentDoc);
+ Debug.Assert(_parentDoc > childTarget);
+ if (_parentDoc == NO_MORE_DOCS)
+ {
+ //System.out.println(" END");
+ return _childDoc = NO_MORE_DOCS;
+ }
+ if (_doScores)
+ {
+ _parentScore = _parentScorer.Score();
+ _parentFreq = _parentScorer.Freq();
+ }
+ int firstChild = _parentBits.PrevSetBit(_parentDoc - 1);
+ //System.out.println(" firstChild=" + firstChild);
+ childTarget = Math.Max(childTarget, firstChild);
+ }
+
+ Debug.Assert(childTarget < _parentDoc);
+
+ // Advance within children of current parent:
+ _childDoc = childTarget;
+ //System.out.println(" " + childDoc);
+ if (_acceptDocs != null && !_acceptDocs.Get(_childDoc))
+ {
+ NextDoc();
+ }
+ return _childDoc;
+ }
+
+ public override long Cost()
+ {
+ return _parentScorer.Cost();
+ }
+ }
+
+ public override void ExtractTerms(ISet<Term> terms)
+ {
+ _parentQuery.ExtractTerms(terms);
+ }
+
+ public override Query Rewrite(IndexReader reader)
+ {
+ Query parentRewrite = _parentQuery.Rewrite(reader);
+ if (parentRewrite != _parentQuery)
+ {
+ Query rewritten = new ToChildBlockJoinQuery(_parentQuery, parentRewrite, _parentsFilter, _doScores);
+ rewritten.Boost = Boost;
+ return rewritten;
+ }
+
+ return this;
+ }
+
+ public override string ToString(string field)
+ {
+ return "ToChildBlockJoinQuery (" + _parentQuery + ")";
+ }
+
+ protected bool Equals(ToChildBlockJoinQuery other)
+ {
+ return base.Equals(other) &&
+ Equals(_origParentQuery, other._origParentQuery) &&
+ _doScores == other._doScores &&
+ Equals(_parentsFilter, other._parentsFilter);
+ }
+
+ public override bool Equals(object obj)
+ {
+ if (ReferenceEquals(null, obj)) return false;
+ if (ReferenceEquals(this, obj)) return true;
+ if (obj.GetType() != GetType()) return false;
+ return Equals((ToChildBlockJoinQuery) obj);
+ }
+
+ public override int GetHashCode()
+ {
+ unchecked
+ {
+ int hashCode = base.GetHashCode();
+ hashCode = (hashCode*397) ^ (_origParentQuery != null ? _origParentQuery.GetHashCode() : 0);
+ hashCode = (hashCode*397) ^ _doScores.GetHashCode();
+ hashCode = (hashCode*397) ^ (_parentsFilter != null ? _parentsFilter.GetHashCode() : 0);
+ return hashCode;
+ }
+ }
+
+ public override object Clone()
+ {
+ return new ToChildBlockJoinQuery((Query) _origParentQuery.Clone(), _parentsFilter, _doScores);
+ }
+ }
+}
\ No newline at end of file