You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by sy...@apache.org on 2015/08/24 00:34:01 UTC
[02/17] lucenenet git commit: Ported over the first batch of items
from the Join project
Ported over the first batch of items from the Join project
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/bd772f02
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/bd772f02
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/bd772f02
Branch: refs/heads/master
Commit: bd772f024f87a8e5b1115d5ce5ce07cd944fcdef
Parents: ecd2860
Author: Josh Sullivan <ja...@gmail.com>
Authored: Fri Aug 14 02:40:45 2015 -0400
Committer: Josh Sullivan <ja...@gmail.com>
Committed: Fri Aug 14 02:40:45 2015 -0400
----------------------------------------------------------------------
Lucene.Net.Join/FakeScorer.cs | 76 +++
.../FixedBitSetCachingWrapperFilter.cs | 62 +++
Lucene.Net.Join/JoinUtil.cs | 80 ++++
Lucene.Net.Join/Lucene.Net.Join.csproj | 66 +++
Lucene.Net.Join/Properties/AssemblyInfo.cs | 36 ++
Lucene.Net.Join/ScoreMode.cs | 45 ++
Lucene.Net.Join/TermsCollector.cs | 127 +++++
Lucene.Net.Join/TermsIncludingScoreQuery.cs | 472 +++++++++++++++++++
Lucene.Net.Join/TermsQuery.cs | 147 ++++++
Lucene.Net.Join/TermsWithScoreCollector.cs | 333 +++++++++++++
Lucene.Net.sln | 18 +-
11 files changed, 1460 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bd772f02/Lucene.Net.Join/FakeScorer.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Join/FakeScorer.cs b/Lucene.Net.Join/FakeScorer.cs
new file mode 100644
index 0000000..42bf91b
--- /dev/null
+++ b/Lucene.Net.Join/FakeScorer.cs
@@ -0,0 +1,76 @@
+using System;
+using System.Collections.Generic;
+using Lucene.Net.Search;
+
+namespace Lucene.Net.Join
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Passed to <see cref="Collector.Scorer"/> during join collection.
+ /// </summary>
+ internal sealed class FakeScorer : Scorer
+ {
+ internal float _score;
+ internal int doc = -1;
+
+ public FakeScorer() : base(null)
+ {
+ }
+
+ public override int DocID()
+ {
+ return doc;
+ }
+
+ public override int NextDoc()
+ {
+ throw new NotSupportedException("FakeScorer doesn't support NextDoc()");
+ }
+
+ public override int Advance(int target)
+ {
+ throw new NotSupportedException("FakeScorer doesn't support Advance(int)");
+ }
+
+ public override long Cost()
+ {
+ return 1;
+ }
+
+ public override int Freq()
+ {
+ throw new NotSupportedException("FakeScorer doesn't support Freq()");
+ }
+
+ public override float Score()
+ {
+ return _score;
+ }
+
+ public override Weight Weight
+ {
+ get { throw new NotSupportedException(); }
+ }
+
+ public override ICollection<ChildScorer> Children
+ {
+ get { throw new NotSupportedException(); }
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bd772f02/Lucene.Net.Join/FixedBitSetCachingWrapperFilter.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Join/FixedBitSetCachingWrapperFilter.cs b/Lucene.Net.Join/FixedBitSetCachingWrapperFilter.cs
new file mode 100644
index 0000000..da8b0b8
--- /dev/null
+++ b/Lucene.Net.Join/FixedBitSetCachingWrapperFilter.cs
@@ -0,0 +1,62 @@
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Join
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// A <see cref="CachingWrapperFilter"/> that caches sets using a <see cref="FixedBitSet"/>,
+ /// as required for joins.
+ /// </summary>
+ public sealed class FixedBitSetCachingWrapperFilter : CachingWrapperFilter
+ {
+ /// <summary>
+ /// Sole constructor, see <see cref="CachingWrapperFilter"/>.
+ /// </summary>
+ public FixedBitSetCachingWrapperFilter(Filter filter) : base(filter)
+ {
+ }
+
+ protected override DocIdSet DocIdSetToCache(DocIdSet docIdSet, AtomicReader reader)
+ {
+ if (docIdSet == null)
+ {
+ return EMPTY_DOCIDSET;
+ }
+
+ if (docIdSet is FixedBitSet)
+ {
+ // this is different from CachingWrapperFilter: even when the DocIdSet is
+ // cacheable, we convert it to a FixedBitSet since we require all the
+ // cached filters to be FixedBitSets
+ return docIdSet;
+ }
+
+ DocIdSetIterator it = docIdSet.GetIterator();
+ if (it == null)
+ {
+ return EMPTY_DOCIDSET;
+ }
+ FixedBitSet copy = new FixedBitSet(reader.MaxDoc);
+ copy.Or(it);
+ return copy;
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bd772f02/Lucene.Net.Join/JoinUtil.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Join/JoinUtil.cs b/Lucene.Net.Join/JoinUtil.cs
new file mode 100644
index 0000000..726731e
--- /dev/null
+++ b/Lucene.Net.Join/JoinUtil.cs
@@ -0,0 +1,80 @@
+using System.IO;
+using Lucene.Net.Search;
+
+namespace Lucene.Net.Join
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ /// <summary>
+ /// Utility for query time joining using TermsQuery and TermsCollector.
+ ///
+ /// @lucene.experimental
+ /// </summary>
+ public sealed class JoinUtil
+ {
+ // No instances allowed
+ private JoinUtil()
+ {
+ }
+
+ /// <summary>
+ /// Method for query time joining.
+ /// <p/>
+ /// Execute the returned query with a <seealso cref="IndexSearcher"/> to retrieve all documents that have the same terms in the
+ /// to field that match with documents matching the specified fromQuery and have the same terms in the from field.
+ /// <p/>
+ /// In the case a single document relates to more than one document the <code>multipleValuesPerDocument</code> option
+ /// should be set to true. When the <code>multipleValuesPerDocument</code> is set to <code>true</code> only the
+ /// the score from the first encountered join value originating from the 'from' side is mapped into the 'to' side.
+ /// Even in the case when a second join value related to a specific document yields a higher score. Obviously this
+ /// doesn't apply in the case that <seealso cref="ScoreMode.None"/> is used, since no scores are computed at all.
+ /// </p>
+ /// Memory considerations: During joining all unique join values are kept in memory. On top of that when the scoreMode
+ /// isn't set to <seealso cref="ScoreMode.None"/> a float value per unique join value is kept in memory for computing scores.
+ /// When scoreMode is set to <seealso cref="ScoreMode.Avg"/> also an additional integer value is kept in memory per unique
+ /// join value.
+ /// </summary>
+ /// <param name="fromField"> The from field to join from </param>
+ /// <param name="multipleValuesPerDocument"> Whether the from field has multiple terms per document </param>
+ /// <param name="toField"> The to field to join to </param>
+ /// <param name="fromQuery"> The query to match documents on the from side </param>
+ /// <param name="fromSearcher"> The searcher that executed the specified fromQuery </param>
+ /// <param name="scoreMode"> Instructs how scores from the fromQuery are mapped to the returned query </param>
+ /// <returns>A <see cref="Query"/> instance that can be used to join documents based on the terms in the from and to field</returns>
+ /// <exception cref="IOException"> If I/O related errors occur </exception>
+ public static Query CreateJoinQuery(string fromField, bool multipleValuesPerDocument, string toField, Query fromQuery, IndexSearcher fromSearcher, ScoreMode scoreMode)
+ {
+ switch (scoreMode)
+ {
+ case ScoreMode.None:
+ TermsCollector termsCollector = TermsCollector.Create(fromField, multipleValuesPerDocument);
+ fromSearcher.Search(fromQuery, termsCollector);
+ return new TermsQuery(toField, fromQuery, termsCollector.CollectorTerms);
+ case ScoreMode.Total:
+ case ScoreMode.Max:
+ case ScoreMode.Avg:
+ TermsWithScoreCollector termsWithScoreCollector = TermsWithScoreCollector.Create(fromField, multipleValuesPerDocument, scoreMode);
+ fromSearcher.Search(fromQuery, termsWithScoreCollector);
+ return new TermsIncludingScoreQuery(toField, multipleValuesPerDocument, termsWithScoreCollector.CollectedTerms, termsWithScoreCollector.ScoresPerTerm, fromQuery);
+ default:
+ throw new System.ArgumentException(string.Format("Score mode {0} isn't supported.", scoreMode));
+ }
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bd772f02/Lucene.Net.Join/Lucene.Net.Join.csproj
----------------------------------------------------------------------
diff --git a/Lucene.Net.Join/Lucene.Net.Join.csproj b/Lucene.Net.Join/Lucene.Net.Join.csproj
new file mode 100644
index 0000000..b38303e
--- /dev/null
+++ b/Lucene.Net.Join/Lucene.Net.Join.csproj
@@ -0,0 +1,66 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
+ <PropertyGroup>
+ <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+ <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+ <ProjectGuid>{E8A339C7-FCF6-4A72-8586-56D8961D7B99}</ProjectGuid>
+ <OutputType>Library</OutputType>
+ <AppDesignerFolder>Properties</AppDesignerFolder>
+ <RootNamespace>Lucene.Net.Join</RootNamespace>
+ <AssemblyName>Lucene.Net.Join</AssemblyName>
+ <TargetFrameworkVersion>v4.5.1</TargetFrameworkVersion>
+ <FileAlignment>512</FileAlignment>
+ </PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
+ <DebugSymbols>true</DebugSymbols>
+ <DebugType>full</DebugType>
+ <Optimize>false</Optimize>
+ <OutputPath>bin\Debug\</OutputPath>
+ <DefineConstants>DEBUG;TRACE</DefineConstants>
+ <ErrorReport>prompt</ErrorReport>
+ <WarningLevel>4</WarningLevel>
+ </PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
+ <DebugType>pdbonly</DebugType>
+ <Optimize>true</Optimize>
+ <OutputPath>bin\Release\</OutputPath>
+ <DefineConstants>TRACE</DefineConstants>
+ <ErrorReport>prompt</ErrorReport>
+ <WarningLevel>4</WarningLevel>
+ </PropertyGroup>
+ <ItemGroup>
+ <Reference Include="System" />
+ <Reference Include="System.Core" />
+ <Reference Include="System.Xml.Linq" />
+ <Reference Include="System.Data.DataSetExtensions" />
+ <Reference Include="Microsoft.CSharp" />
+ <Reference Include="System.Data" />
+ <Reference Include="System.Net.Http" />
+ <Reference Include="System.Xml" />
+ </ItemGroup>
+ <ItemGroup>
+ <Compile Include="FakeScorer.cs" />
+ <Compile Include="FixedBitSetCachingWrapperFilter.cs" />
+ <Compile Include="JoinUtil.cs" />
+ <Compile Include="Properties\AssemblyInfo.cs" />
+ <Compile Include="ScoreMode.cs" />
+ <Compile Include="TermsCollector.cs" />
+ <Compile Include="TermsIncludingScoreQuery.cs" />
+ <Compile Include="TermsQuery.cs" />
+ </ItemGroup>
+ <ItemGroup>
+ <ProjectReference Include="..\src\Lucene.Net.Core\Lucene.Net.csproj">
+ <Project>{5D4AD9BE-1FFB-41AB-9943-25737971BF57}</Project>
+ <Name>Lucene.Net</Name>
+ </ProjectReference>
+ </ItemGroup>
+ <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
+ <!-- To modify your build process, add your task inside one of the targets below and uncomment it.
+ Other similar extension points exist, see Microsoft.Common.targets.
+ <Target Name="BeforeBuild">
+ </Target>
+ <Target Name="AfterBuild">
+ </Target>
+ -->
+</Project>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bd772f02/Lucene.Net.Join/Properties/AssemblyInfo.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Join/Properties/AssemblyInfo.cs b/Lucene.Net.Join/Properties/AssemblyInfo.cs
new file mode 100644
index 0000000..2c17c13
--- /dev/null
+++ b/Lucene.Net.Join/Properties/AssemblyInfo.cs
@@ -0,0 +1,36 @@
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+// General Information about an assembly is controlled through the following
+// set of attributes. Change these attribute values to modify the information
+// associated with an assembly.
+[assembly: AssemblyTitle("Lucene.Net.Join")]
+[assembly: AssemblyDescription("")]
+[assembly: AssemblyConfiguration("")]
+[assembly: AssemblyCompany("")]
+[assembly: AssemblyProduct("Lucene.Net.Join")]
+[assembly: AssemblyCopyright("Copyright © 2015")]
+[assembly: AssemblyTrademark("")]
+[assembly: AssemblyCulture("")]
+
+// Setting ComVisible to false makes the types in this assembly not visible
+// to COM components. If you need to access a type in this assembly from
+// COM, set the ComVisible attribute to true on that type.
+[assembly: ComVisible(false)]
+
+// The following GUID is for the ID of the typelib if this project is exposed to COM
+[assembly: Guid("e8a339c7-fcf6-4a72-8586-56d8961d7b99")]
+
+// Version information for an assembly consists of the following four values:
+//
+// Major Version
+// Minor Version
+// Build Number
+// Revision
+//
+// You can specify all the values or you can default the Build and Revision Numbers
+// by using the '*' as shown below:
+// [assembly: AssemblyVersion("1.0.*")]
+[assembly: AssemblyVersion("1.0.0.0")]
+[assembly: AssemblyFileVersion("1.0.0.0")]
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bd772f02/Lucene.Net.Join/ScoreMode.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Join/ScoreMode.cs b/Lucene.Net.Join/ScoreMode.cs
new file mode 100644
index 0000000..a5b91be
--- /dev/null
+++ b/Lucene.Net.Join/ScoreMode.cs
@@ -0,0 +1,45 @@
+namespace Lucene.Net.Join
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// How to aggregate multiple child hit scores into a single parent score.
+ /// </summary>
+ public enum ScoreMode
+ {
+ /// <summary>
+ /// Do no scoring.
+ /// </summary>
+ None,
+
+ /// <summary>
+ /// Parent hit's score is the average of all child scores.
+ /// </summary>
+ Avg,
+
+ /// <summary>
+ /// Parent hit's score is the max of all child scores.
+ /// </summary>
+ Max,
+
+ /// <summary>
+ /// Parent hit's score is the sum of all child scores.
+ /// </summary>
+ Total
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bd772f02/Lucene.Net.Join/TermsCollector.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Join/TermsCollector.cs b/Lucene.Net.Join/TermsCollector.cs
new file mode 100644
index 0000000..8f8e4f5
--- /dev/null
+++ b/Lucene.Net.Join/TermsCollector.cs
@@ -0,0 +1,127 @@
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Join
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// A collector that collects all terms from a specified field matching the query.
+ ///
+ /// @lucene.experimental
+ /// </summary>
+ internal abstract class TermsCollector : Collector
+ {
+ private readonly string _field;
+ private readonly BytesRefHash _collectorTerms = new BytesRefHash();
+
+ internal TermsCollector(string field)
+ {
+ _field = field;
+ }
+
+ public BytesRefHash CollectorTerms
+ {
+ get
+ {
+ return _collectorTerms;
+ }
+ }
+
+ public override Scorer Scorer
+ {
+ set {}
+ }
+
+ public override bool AcceptsDocsOutOfOrder()
+ {
+ return true;
+ }
+
+ /// <summary>
+ /// Chooses the right <see cref="TermsCollector"/> implementation.
+ /// </summary>
+ /// <param name="field">The field to collect terms for.</param>
+ /// <param name="multipleValuesPerDocument">Whether the field to collect terms for has multiple values per document.</param>
+ /// <returns>A <see cref="TermsCollector"/> instance.</returns>
+ internal static TermsCollector Create(string field, bool multipleValuesPerDocument)
+ {
+ return multipleValuesPerDocument ? (TermsCollector) new MV(field) : new SV(field);
+ }
+
+ // impl that works with multiple values per document
+ private class MV : TermsCollector
+ {
+ private readonly BytesRef _scratch = new BytesRef();
+ private SortedSetDocValues _docTermOrds;
+
+ internal MV(string field) : base(field)
+ {
+ }
+
+ public override void Collect(int doc)
+ {
+ _docTermOrds.Document = doc;
+ long ord;
+ while ((ord = _docTermOrds.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
+ {
+ _docTermOrds.LookupOrd(ord, _scratch);
+ _collectorTerms.Add(_scratch);
+ }
+ }
+
+ public override AtomicReaderContext NextReader
+ {
+ set { _docTermOrds = FieldCache.DEFAULT.GetDocTermOrds(value.AtomicReader, _field); }
+ }
+
+ public override bool AcceptsDocsOutOfOrder()
+ {
+ throw new System.NotImplementedException();
+ }
+ }
+
+ // impl that works with single value per document
+ private class SV : TermsCollector
+ {
+ private readonly BytesRef _spare = new BytesRef();
+ private BinaryDocValues _fromDocTerms;
+
+ internal SV(string field) : base(field)
+ {
+ }
+
+ public override void Collect(int doc)
+ {
+ _fromDocTerms.Get(doc, _spare);
+ _collectorTerms.Add(_spare);
+ }
+
+ public override AtomicReaderContext NextReader
+ {
+ set { _fromDocTerms = FieldCache.DEFAULT.GetTerms(value.AtomicReader, _field, false); }
+ }
+
+ public override bool AcceptsDocsOutOfOrder()
+ {
+ throw new System.NotImplementedException();
+ }
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bd772f02/Lucene.Net.Join/TermsIncludingScoreQuery.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Join/TermsIncludingScoreQuery.cs b/Lucene.Net.Join/TermsIncludingScoreQuery.cs
new file mode 100644
index 0000000..9f3befc
--- /dev/null
+++ b/Lucene.Net.Join/TermsIncludingScoreQuery.cs
@@ -0,0 +1,472 @@
+using System.Collections.Generic;
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Join
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ internal class TermsIncludingScoreQuery : Query
+ {
+ private readonly string _field;
+ private readonly bool _multipleValuesPerDocument;
+ private readonly BytesRefHash _terms;
+ private readonly float[] _scores;
+ private readonly int[] _ords;
+ private readonly Query _originalQuery;
+ private readonly Query _unwrittenOriginalQuery;
+
+ internal TermsIncludingScoreQuery(string field, bool multipleValuesPerDocument, BytesRefHash terms,
+ float[] scores, Query originalQuery)
+ {
+ _field = field;
+ _multipleValuesPerDocument = multipleValuesPerDocument;
+ _terms = terms;
+ _scores = scores;
+ _originalQuery = originalQuery;
+ _ords = terms.Sort(BytesRef.UTF8SortedAsUnicodeComparer);
+ _unwrittenOriginalQuery = originalQuery;
+ }
+
+ private TermsIncludingScoreQuery(string field, bool multipleValuesPerDocument, BytesRefHash terms,
+ float[] scores, int[] ords, Query originalQuery, Query unwrittenOriginalQuery)
+ {
+ _field = field;
+ _multipleValuesPerDocument = multipleValuesPerDocument;
+ _terms = terms;
+ _scores = scores;
+ _originalQuery = originalQuery;
+ _ords = ords;
+ _unwrittenOriginalQuery = unwrittenOriginalQuery;
+ }
+
+ public override string ToString(string @string)
+ {
+ return string.Format("TermsIncludingScoreQuery{{field={0};originalQuery={1}}}", _field,
+ _unwrittenOriginalQuery);
+ }
+
+ public override void ExtractTerms(ISet<Term> terms)
+ {
+ _originalQuery.ExtractTerms(terms);
+ }
+
+ public override Query Rewrite(IndexReader reader)
+ {
+ Query originalQueryRewrite = _originalQuery.Rewrite(reader);
+ if (originalQueryRewrite != _originalQuery)
+ {
+ Query rewritten = new TermsIncludingScoreQuery(_field, _multipleValuesPerDocument, _terms, _scores,
+ _ords, originalQueryRewrite, _originalQuery);
+ rewritten.Boost = Boost;
+ return rewritten;
+ }
+
+ return this;
+ }
+
+ protected bool Equals(TermsIncludingScoreQuery other)
+ {
+ return base.Equals(other) && string.Equals(_field, other._field) &&
+ Equals(_unwrittenOriginalQuery, other._unwrittenOriginalQuery);
+ }
+
+ public override bool Equals(object obj)
+ {
+ if (ReferenceEquals(null, obj)) return false;
+ if (ReferenceEquals(this, obj)) return true;
+ if (obj.GetType() != GetType()) return false;
+ return Equals((TermsIncludingScoreQuery) obj);
+ }
+
+ public override int GetHashCode()
+ {
+ unchecked
+ {
+ int hashCode = base.GetHashCode();
+ hashCode = (hashCode*397) ^ (_field != null ? _field.GetHashCode() : 0);
+ hashCode = (hashCode*397) ^
+ (_unwrittenOriginalQuery != null ? _unwrittenOriginalQuery.GetHashCode() : 0);
+ return hashCode;
+ }
+ }
+
+ public override Weight CreateWeight(IndexSearcher searcher)
+ {
+ Weight originalWeight = _originalQuery.CreateWeight(searcher);
+ return new WeightAnonymousInnerClassHelper(this, originalWeight);
+ }
+
+ private class WeightAnonymousInnerClassHelper : Weight
+ {
+ private readonly TermsIncludingScoreQuery outerInstance;
+
+ private Weight originalWeight;
+
+ public WeightAnonymousInnerClassHelper(TermsIncludingScoreQuery outerInstance, Weight originalWeight)
+ {
+ this.outerInstance = outerInstance;
+ this.originalWeight = originalWeight;
+ }
+
+
+ private TermsEnum segmentTermsEnum;
+
+ public override Explanation Explain(AtomicReaderContext context, int doc)
+ {
+ SVInnerScorer scorer = (SVInnerScorer) BulkScorer(context, false, null);
+ if (scorer != null)
+ {
+ return scorer.Explain(doc);
+ }
+ return new ComplexExplanation(false, 0.0f, "Not a match");
+ }
+
+ public override bool ScoresDocsOutOfOrder()
+ {
+ // We have optimized impls below if we are allowed
+ // to score out-of-order:
+ return true;
+ }
+
+ public override Query Query
+ {
+ get { return outerInstance; }
+ }
+
+ public override float ValueForNormalization
+ {
+ get { return originalWeight.ValueForNormalization*outerInstance.Boost*outerInstance.Boost; }
+ }
+
+ public override void Normalize(float norm, float topLevelBoost)
+ {
+ originalWeight.Normalize(norm, topLevelBoost*outerInstance.Boost);
+ }
+
+ public override Scorer Scorer(AtomicReaderContext context, Bits acceptDocs)
+ {
+ Terms terms = context.AtomicReader.Terms(outerInstance._field);
+ if (terms == null)
+ {
+ return null;
+ }
+
+ // what is the runtime...seems ok?
+ long cost = context.AtomicReader.MaxDoc * terms.Size();
+
+ segmentTermsEnum = terms.Iterator(segmentTermsEnum);
+ if (outerInstance._multipleValuesPerDocument)
+ {
+ return new MVInOrderScorer(outerInstance, this, acceptDocs, segmentTermsEnum, context.AtomicReader.MaxDoc, cost);
+ }
+
+ return new SVInOrderScorer(outerInstance, this, acceptDocs, segmentTermsEnum, context.AtomicReader.MaxDoc, cost);
+ }
+
+ public override BulkScorer BulkScorer(AtomicReaderContext context, bool scoreDocsInOrder, Bits acceptDocs)
+ {
+ if (scoreDocsInOrder)
+ {
+ return base.BulkScorer(context, scoreDocsInOrder, acceptDocs);
+ }
+
+ Terms terms = context.AtomicReader.Terms(outerInstance._field);
+ if (terms == null)
+ {
+ return null;
+ }
+ // what is the runtime...seems ok?
+ long cost = context.AtomicReader.MaxDoc * terms.Size();
+
+ segmentTermsEnum = terms.Iterator(segmentTermsEnum);
+ // Optimized impls that take advantage of docs
+ // being allowed to be out of order:
+ if (outerInstance._multipleValuesPerDocument)
+ {
+ return new MVInnerScorer(outerInstance, this, acceptDocs, segmentTermsEnum, context.AtomicReader.MaxDoc, cost);
+ }
+
+ return new SVInnerScorer(outerInstance, this, acceptDocs, segmentTermsEnum, cost);
+ }
+ }
+
+ // This impl assumes that the 'join' values are used uniquely per doc per field. Used for one to many relations.
+ internal class SVInnerScorer : BulkScorer
+ {
+ private readonly TermsIncludingScoreQuery outerInstance;
+
+ private readonly BytesRef _spare = new BytesRef();
+ private readonly Bits _acceptDocs;
+ private readonly TermsEnum _termsEnum;
+ private readonly long _cost;
+
+ private int _upto;
+ internal DocsEnum DocsEnum;
+ private DocsEnum _reuse;
+ private int _scoreUpto;
+ private int _doc;
+
+ internal SVInnerScorer(TermsIncludingScoreQuery outerInstance, Weight weight, Bits acceptDocs, TermsEnum termsEnum, long cost)
+ {
+ this.outerInstance = outerInstance;
+ _acceptDocs = acceptDocs;
+ _termsEnum = termsEnum;
+ _cost = cost;
+ _doc = -1;
+ }
+
+ public override bool Score(Collector collector, int max)
+ {
+ FakeScorer fakeScorer = new FakeScorer();
+ collector.Scorer = fakeScorer;
+ if (_doc == -1)
+ {
+ _doc = NextDocOutOfOrder();
+ }
+ while (_doc < max)
+ {
+ fakeScorer.doc = _doc;
+ fakeScorer._score = outerInstance._scores[outerInstance._ords[_scoreUpto]];
+ collector.Collect(_doc);
+ _doc = NextDocOutOfOrder();
+ }
+
+ return _doc != DocIdSetIterator.NO_MORE_DOCS;
+ }
+
+ private int NextDocOutOfOrder()
+ {
+ while (true)
+ {
+ if (DocsEnum != null)
+ {
+ int docId = DocsEnumNextDoc();
+ if (docId == DocIdSetIterator.NO_MORE_DOCS)
+ {
+ DocsEnum = null;
+ }
+ else
+ {
+ return _doc = docId;
+ }
+ }
+
+ if (_upto == outerInstance._terms.Size())
+ {
+ return _doc = DocIdSetIterator.NO_MORE_DOCS;
+ }
+
+ _scoreUpto = _upto;
+ if (_termsEnum.SeekExact(outerInstance._terms.Get(outerInstance._ords[_upto++], _spare)))
+ {
+ DocsEnum = _reuse = _termsEnum.Docs(_acceptDocs, _reuse, DocsEnum.FLAG_NONE);
+ }
+ }
+ }
+
+ protected virtual int DocsEnumNextDoc()
+ {
+ return DocsEnum.NextDoc();
+ }
+
+ internal Explanation Explain(int target)
+ {
+ int docId;
+ do
+ {
+ docId = NextDocOutOfOrder();
+ if (docId < target)
+ {
+ int tempDocId = DocsEnum.Advance(target);
+ if (tempDocId == target)
+ {
+ docId = tempDocId;
+ break;
+ }
+ }
+ else if (docId == target)
+ {
+ break;
+ }
+ DocsEnum = null; // goto the next ord.
+ } while (docId != DocIdSetIterator.NO_MORE_DOCS);
+
+ return new ComplexExplanation(true, outerInstance._scores[outerInstance._ords[_scoreUpto]],
+ "Score based on join value " + _termsEnum.Term().Utf8ToString());
+ }
+ }
+
+ // This impl that tracks whether a docid has already been emitted. This check makes sure that docs aren't emitted
+ // twice for different join values. This means that the first encountered join value determines the score of a document
+ // even if other join values yield a higher score.
+ internal class MVInnerScorer : SVInnerScorer
+ {
+ private readonly TermsIncludingScoreQuery outerInstance;
+
+
+ internal readonly FixedBitSet alreadyEmittedDocs;
+
+ internal MVInnerScorer(TermsIncludingScoreQuery outerInstance, Weight weight, Bits acceptDocs,
+ TermsEnum termsEnum, int maxDoc, long cost) : base(outerInstance, weight, acceptDocs, termsEnum, cost)
+ {
+ this.outerInstance = outerInstance;
+ alreadyEmittedDocs = new FixedBitSet(maxDoc);
+ }
+
+ protected override int DocsEnumNextDoc()
+ {
+ while (true)
+ {
+ int docId = DocsEnum.NextDoc();
+ if (docId == DocIdSetIterator.NO_MORE_DOCS)
+ {
+ return docId;
+ }
+ if (!alreadyEmittedDocs.GetAndSet(docId))
+ {
+ return docId; //if it wasn't previously set, return it
+ }
+ }
+ }
+ }
+
+ internal class SVInOrderScorer : Scorer
+ {
+ private readonly TermsIncludingScoreQuery outerInstance;
+
+
+ internal readonly DocIdSetIterator matchingDocsIterator;
+ internal readonly float[] scores;
+ internal readonly long cost_Renamed;
+
+ internal int currentDoc = -1;
+
+ internal SVInOrderScorer(TermsIncludingScoreQuery outerInstance, Weight weight, Bits acceptDocs,
+ TermsEnum termsEnum, int maxDoc, long cost) : base(weight)
+ {
+ this.outerInstance = outerInstance;
+ FixedBitSet matchingDocs = new FixedBitSet(maxDoc);
+ scores = new float[maxDoc];
+ FillDocsAndScores(matchingDocs, acceptDocs, termsEnum);
+ matchingDocsIterator = matchingDocs.GetIterator();
+ cost_Renamed = cost;
+ }
+
+ protected virtual void FillDocsAndScores(FixedBitSet matchingDocs, Bits acceptDocs,
+ TermsEnum termsEnum)
+ {
+ BytesRef spare = new BytesRef();
+ DocsEnum docsEnum = null;
+ for (int i = 0; i < outerInstance._terms.Size(); i++)
+ {
+ if (termsEnum.SeekExact(outerInstance._terms.Get(outerInstance._ords[i], spare)))
+ {
+ docsEnum = termsEnum.Docs(acceptDocs, docsEnum, FLAG_NONE);
+ float score = outerInstance._scores[outerInstance._ords[i]];
+ for (int doc = docsEnum.NextDoc();
+ doc != NO_MORE_DOCS;
+ doc = docsEnum.NextDoc())
+ {
+ matchingDocs.Set(doc);
+ // In the case the same doc is also related to a another doc, a score might be overwritten. I think this
+ // can only happen in a many-to-many relation
+ scores[doc] = score;
+ }
+ }
+ }
+ }
+
+ public override float Score()
+ {
+ return scores[currentDoc];
+ }
+
+ public override int Freq()
+ {
+ return 1;
+ }
+
+ public override int DocID()
+ {
+ return currentDoc;
+ }
+
+ public override int NextDoc()
+ {
+ return currentDoc = matchingDocsIterator.NextDoc();
+ }
+
+ public override int Advance(int target)
+ {
+ return currentDoc = matchingDocsIterator.Advance(target);
+ }
+
+ public override long Cost()
+ {
+ return cost_Renamed;
+ }
+ }
+
+ // This scorer deals with the fact that a document can have more than one score from multiple related documents.
+ internal class MVInOrderScorer : SVInOrderScorer
+ {
+ private readonly TermsIncludingScoreQuery outerInstance;
+
+
+ internal MVInOrderScorer(TermsIncludingScoreQuery outerInstance, Weight weight, Bits acceptDocs,
+ TermsEnum termsEnum, int maxDoc, long cost)
+ : base(outerInstance, weight, acceptDocs, termsEnum, maxDoc, cost)
+ {
+ this.outerInstance = outerInstance;
+ }
+
+ protected override void FillDocsAndScores(FixedBitSet matchingDocs, Bits acceptDocs,
+ TermsEnum termsEnum)
+ {
+ BytesRef spare = new BytesRef();
+ DocsEnum docsEnum = null;
+ for (int i = 0; i < outerInstance._terms.Size(); i++)
+ {
+ if (termsEnum.SeekExact(outerInstance._terms.Get(outerInstance._ords[i], spare)))
+ {
+ docsEnum = termsEnum.Docs(acceptDocs, docsEnum, FLAG_NONE);
+ float score = outerInstance._scores[outerInstance._ords[i]];
+ for (int doc = docsEnum.NextDoc();
+ doc != NO_MORE_DOCS;
+ doc = docsEnum.NextDoc())
+ {
+ // I prefer this:
+ /*if (scores[doc] < score) {
+ scores[doc] = score;
+ matchingDocs.set(doc);
+ }*/
+ // But this behaves the same as MVInnerScorer and only then the tests will pass:
+ if (!matchingDocs.Get(doc))
+ {
+ scores[doc] = score;
+ matchingDocs.Set(doc);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bd772f02/Lucene.Net.Join/TermsQuery.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Join/TermsQuery.cs b/Lucene.Net.Join/TermsQuery.cs
new file mode 100644
index 0000000..2d5ccf8
--- /dev/null
+++ b/Lucene.Net.Join/TermsQuery.cs
@@ -0,0 +1,147 @@
+using System.Collections.Generic;
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Join
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// A query that has an array of terms from a specific field. This query will match documents have one or more terms in
+ /// the specified field that match with the terms specified in the array.
+ ///
+ /// @lucene.experimental
+ /// </summary>
+ internal class TermsQuery : MultiTermQuery
+ {
+ private readonly BytesRefHash _terms;
+ private readonly int[] _ords;
+ private readonly Query _fromQuery; // Used for equals() only
+
+ /// <summary>
+ ///
+ /// </summary>
+ /// <param name="field">The field that should contain terms that are specified in the previous parameter.</param>
+ /// <param name="fromQuery"></param>
+ /// <param name="terms">The terms that matching documents should have. The terms must be sorted by natural order.</param>
+ internal TermsQuery(string field, Query fromQuery, BytesRefHash terms) : base(field)
+ {
+ _fromQuery = fromQuery;
+ _terms = terms;
+ _ords = terms.Sort(BytesRef.UTF8SortedAsUnicodeComparer);
+ }
+
+ public override TermsEnum GetTermsEnum(Terms terms, AttributeSource atts)
+ {
+ if (_terms.Size() == 0)
+ {
+ return TermsEnum.EMPTY;
+ }
+
+ return new SeekingTermSetTermsEnum(terms.Iterator(null), _terms, _ords);
+
+ }
+
+ public override string ToString(string field)
+ {
+ return string.Format("TermsQuery{{field={0}}}", field);
+ }
+
+ private class SeekingTermSetTermsEnum : FilteredTermsEnum
+ {
+ private readonly BytesRefHash Terms;
+ private readonly int[] Ords;
+ private readonly int _lastElement;
+
+ private readonly BytesRef _lastTerm;
+ private readonly BytesRef _spare = new BytesRef();
+ private readonly IComparer<BytesRef> _comparator;
+
+ private BytesRef _seekTerm;
+ private int _upto;
+
+ internal SeekingTermSetTermsEnum(TermsEnum tenum, BytesRefHash terms, int[] ords) : base(tenum)
+ {
+ Terms = terms;
+ Ords = ords;
+ _comparator = BytesRef.UTF8SortedAsUnicodeComparer;
+ _lastElement = terms.Size() - 1;
+ _lastTerm = terms.Get(ords[_lastElement], new BytesRef());
+ _seekTerm = terms.Get(ords[_upto], _spare);
+ }
+
+
+
+ protected override BytesRef NextSeekTerm(BytesRef currentTerm)
+ {
+ BytesRef temp = _seekTerm;
+ _seekTerm = null;
+ return temp;
+ }
+
+ protected override AcceptStatus Accept(BytesRef term)
+ {
+ if (_comparator.Compare(term, _lastTerm) > 0)
+ {
+ return AcceptStatus.END;
+ }
+
+ BytesRef currentTerm = Terms.Get(Ords[_upto], _spare);
+ if (_comparator.Compare(term, currentTerm) == 0)
+ {
+ if (_upto == _lastElement)
+ {
+ return AcceptStatus.YES;
+ }
+
+ _seekTerm = Terms.Get(Ords[++_upto], _spare);
+ return AcceptStatus.YES_AND_SEEK;
+ }
+
+ if (_upto == _lastElement)
+ {
+ return AcceptStatus.NO;
+ } // Our current term doesn't match the the given term.
+
+ int cmp;
+ do // We maybe are behind the given term by more than one step. Keep incrementing till we're the same or higher.
+ {
+ if (_upto == _lastElement)
+ {
+ return AcceptStatus.NO;
+ }
+ // typically the terms dict is a superset of query's terms so it's unusual that we have to skip many of
+ // our terms so we don't do a binary search here
+ _seekTerm = Terms.Get(Ords[++_upto], _spare);
+ } while ((cmp = _comparator.Compare(_seekTerm, term)) < 0);
+ if (cmp == 0)
+ {
+ if (_upto == _lastElement)
+ {
+ return AcceptStatus.YES;
+ }
+ _seekTerm = Terms.Get(Ords[++_upto], _spare);
+ return AcceptStatus.YES_AND_SEEK;
+ }
+
+ return AcceptStatus.NO_AND_SEEK;
+ }
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bd772f02/Lucene.Net.Join/TermsWithScoreCollector.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Join/TermsWithScoreCollector.cs b/Lucene.Net.Join/TermsWithScoreCollector.cs
new file mode 100644
index 0000000..c4dc97d
--- /dev/null
+++ b/Lucene.Net.Join/TermsWithScoreCollector.cs
@@ -0,0 +1,333 @@
+using System;
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Join
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ internal abstract class TermsWithScoreCollector : Collector
+ {
+ private const int InitialArraySize = 256;
+
+ private readonly string _field;
+ private readonly BytesRefHash _collectedTerms = new BytesRefHash();
+ private readonly ScoreMode _scoreMode;
+
+ private Scorer _scorer;
+ private float[] _scoreSums = new float[InitialArraySize];
+
+ internal TermsWithScoreCollector(string field, ScoreMode scoreMode)
+ {
+ this._field = field;
+ this._scoreMode = scoreMode;
+ }
+
+ public BytesRefHash CollectedTerms
+ {
+ get
+ {
+ return _collectedTerms;
+ }
+ }
+
+ public virtual float[] ScoresPerTerm
+ {
+ get
+ {
+ return _scoreSums;
+ }
+ }
+
+ //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+ //ORIGINAL LINE: @Override public void setScorer(org.apache.lucene.search.Scorer scorer) throws java.io.IOException
+ public override Scorer Scorer
+ {
+ set
+ {
+ _scorer = value;
+ }
+ }
+
+ public override bool AcceptsDocsOutOfOrder()
+ {
+ return true;
+ }
+
+ /// <summary>
+ /// Chooses the right <seealso cref="TermsWithScoreCollector"/> implementation.
+ /// </summary>
+ /// <param name="field">The field to collect terms for.</param>
+ /// <param name="multipleValuesPerDocument">Whether the field to collect terms for has multiple values per document.</param>
+ /// <returns>A <see cref="TermsWithScoreCollector"/> instance</returns>
+ internal static TermsWithScoreCollector Create(string field, bool multipleValuesPerDocument, ScoreMode scoreMode)
+ {
+ if (multipleValuesPerDocument)
+ {
+ switch (scoreMode)
+ {
+ case ScoreMode.Avg:
+ return new Mv.Avg(field);
+ default:
+ return new Mv(field, scoreMode);
+ }
+ }
+
+ switch (scoreMode)
+ {
+ case ScoreMode.Avg:
+ return new Sv.Avg(field);
+ default:
+ return new Sv(field, scoreMode);
+ }
+ }
+
+ // impl that works with single value per document
+ internal class Sv : TermsWithScoreCollector
+ {
+ private readonly BytesRef _spare = new BytesRef();
+ private BinaryDocValues _fromDocTerms;
+
+ internal Sv(string field, ScoreMode scoreMode) : base(field, scoreMode)
+ {
+ }
+
+ public override void Collect(int doc)
+ {
+ _fromDocTerms.Get(doc, _spare);
+ int ord = _collectedTerms.Add(_spare);
+ if (ord < 0)
+ {
+ ord = -ord - 1;
+ }
+ else
+ {
+ if (ord >= _scoreSums.Length)
+ {
+ _scoreSums = ArrayUtil.Grow(_scoreSums);
+ }
+ }
+
+ float current = _scorer.Score();
+ float existing = _scoreSums[ord];
+ if (existing.CompareTo(0.0f) == 0)
+ {
+ _scoreSums[ord] = current;
+ }
+ else
+ {
+ switch (_scoreMode)
+ {
+ case ScoreMode.Total:
+ _scoreSums[ord] = _scoreSums[ord] + current;
+ break;
+ case ScoreMode.Max:
+ if (current > existing)
+ {
+ _scoreSums[ord] = current;
+ }
+ break;
+ }
+ }
+ }
+
+ public override AtomicReaderContext NextReader
+ {
+ set
+ {
+ _fromDocTerms = FieldCache.DEFAULT.GetTerms(value.AtomicReader, _field, false);
+ }
+ }
+
+ public override bool AcceptsDocsOutOfOrder()
+ {
+ throw new NotImplementedException();
+ }
+
+ internal class Avg : Sv
+ {
+ private int[] _scoreCounts = new int[InitialArraySize];
+
+ internal Avg(string field) : base(field, ScoreMode.Avg)
+ {
+ }
+
+ public override void Collect(int doc)
+ {
+ _fromDocTerms.Get(doc, _spare);
+ int ord = _collectedTerms.Add(_spare);
+ if (ord < 0)
+ {
+ ord = -ord - 1;
+ }
+ else
+ {
+ if (ord >= _scoreSums.Length)
+ {
+ _scoreSums = ArrayUtil.Grow(_scoreSums);
+ _scoreCounts = ArrayUtil.Grow(_scoreCounts);
+ }
+ }
+
+ float current = _scorer.Score();
+ float existing = _scoreSums[ord];
+ if (existing.CompareTo(0.0f) == 0)
+ {
+ _scoreSums[ord] = current;
+ _scoreCounts[ord] = 1;
+ }
+ else
+ {
+ _scoreSums[ord] = _scoreSums[ord] + current;
+ _scoreCounts[ord]++;
+ }
+ }
+
+ public override float[] ScoresPerTerm
+ {
+ get
+ {
+ if (_scoreCounts != null)
+ {
+ for (int i = 0; i < _scoreCounts.Length; i++)
+ {
+ _scoreSums[i] = _scoreSums[i] / _scoreCounts[i];
+ }
+ _scoreCounts = null;
+ }
+ return _scoreSums;
+ }
+ }
+ }
+ }
+
+ // impl that works with multiple values per document
+ internal class Mv : TermsWithScoreCollector
+ {
+ private SortedSetDocValues _fromDocTermOrds;
+ private readonly BytesRef _scratch = new BytesRef();
+
+ internal Mv(string field, ScoreMode scoreMode) : base(field, scoreMode)
+ {
+ }
+
+ public override void Collect(int doc)
+ {
+ _fromDocTermOrds.Document = doc;
+ long ord;
+ while ((ord = _fromDocTermOrds.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
+ {
+ _fromDocTermOrds.LookupOrd(ord, _scratch);
+
+ int termId = _collectedTerms.Add(_scratch);
+ if (termId < 0)
+ {
+ termId = -termId - 1;
+ }
+ else
+ {
+ if (termId >= _scoreSums.Length)
+ {
+ _scoreSums = ArrayUtil.Grow(_scoreSums);
+ }
+ }
+
+ switch (_scoreMode)
+ {
+ case ScoreMode.Total:
+ _scoreSums[termId] += _scorer.Score();
+ break;
+ case ScoreMode.Max:
+ _scoreSums[termId] = Math.Max(_scoreSums[termId], _scorer.Score());
+ break;
+ }
+ }
+ }
+
+ //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+ //ORIGINAL LINE: @Override public void setNextReader(org.apache.lucene.index.AtomicReaderContext context) throws java.io.IOException
+ public override AtomicReaderContext NextReader
+ {
+ set
+ {
+ _fromDocTermOrds = FieldCache.DEFAULT.GetDocTermOrds(value.AtomicReader, _field);
+ }
+ }
+
+ public override bool AcceptsDocsOutOfOrder()
+ {
+ throw new NotImplementedException();
+ }
+
+ internal class Avg : Mv
+ {
+ private int[] _scoreCounts = new int[InitialArraySize];
+
+ internal Avg(string field) : base(field, ScoreMode.Avg)
+ {
+ }
+
+ public override void Collect(int doc)
+ {
+ _fromDocTermOrds.Document = doc;
+ long ord;
+ while ((ord = _fromDocTermOrds.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
+ {
+ _fromDocTermOrds.LookupOrd(ord, _scratch);
+
+ int termId = _collectedTerms.Add(_scratch);
+ if (termId < 0)
+ {
+ termId = -termId - 1;
+ }
+ else
+ {
+ if (termId >= _scoreSums.Length)
+ {
+ _scoreSums = ArrayUtil.Grow(_scoreSums);
+ _scoreCounts = ArrayUtil.Grow(_scoreCounts);
+ }
+ }
+
+ _scoreSums[termId] += _scorer.Score();
+ _scoreCounts[termId]++;
+ }
+ }
+
+ public override float[] ScoresPerTerm
+ {
+ get
+ {
+ if (_scoreCounts != null)
+ {
+ for (int i = 0; i < _scoreCounts.Length; i++)
+ {
+ _scoreSums[i] = _scoreSums[i] / _scoreCounts[i];
+ }
+ _scoreCounts = null;
+ }
+ return _scoreSums;
+ }
+ }
+ }
+ }
+
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bd772f02/Lucene.Net.sln
----------------------------------------------------------------------
diff --git a/Lucene.Net.sln b/Lucene.Net.sln
index 18f70a3..34c4804 100644
--- a/Lucene.Net.sln
+++ b/Lucene.Net.sln
@@ -1,7 +1,7 @@
Microsoft Visual Studio Solution File, Format Version 12.00
-# Visual Studio 2013
-VisualStudioVersion = 12.0.30501.0
+# Visual Studio 14
+VisualStudioVersion = 14.0.23107.0
MinimumVisualStudioVersion = 10.0.40219.1
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net", "src\Lucene.Net.Core\Lucene.Net.csproj", "{5D4AD9BE-1FFB-41AB-9943-25737971BF57}"
EndProject
@@ -38,6 +38,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.Codecs", "src\Lu
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.Codecs.Tests", "src\Lucene.Net.Tests.Codecs\Lucene.Net.Codecs.Tests.csproj", "{351B75B1-BBD5-4E32-8036-7BED4E0135A6}"
EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.Join", "Lucene.Net.Join\Lucene.Net.Join.csproj", "{E8A339C7-FCF6-4A72-8586-56D8961D7B99}"
+EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
@@ -179,6 +181,18 @@ Global
{351B75B1-BBD5-4E32-8036-7BED4E0135A6}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU
{351B75B1-BBD5-4E32-8036-7BED4E0135A6}.Release|Mixed Platforms.Build.0 = Release|Any CPU
{351B75B1-BBD5-4E32-8036-7BED4E0135A6}.Release|x86.ActiveCfg = Release|Any CPU
+ {E8A339C7-FCF6-4A72-8586-56D8961D7B99}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {E8A339C7-FCF6-4A72-8586-56D8961D7B99}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {E8A339C7-FCF6-4A72-8586-56D8961D7B99}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU
+ {E8A339C7-FCF6-4A72-8586-56D8961D7B99}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU
+ {E8A339C7-FCF6-4A72-8586-56D8961D7B99}.Debug|x86.ActiveCfg = Debug|Any CPU
+ {E8A339C7-FCF6-4A72-8586-56D8961D7B99}.Debug|x86.Build.0 = Debug|Any CPU
+ {E8A339C7-FCF6-4A72-8586-56D8961D7B99}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {E8A339C7-FCF6-4A72-8586-56D8961D7B99}.Release|Any CPU.Build.0 = Release|Any CPU
+ {E8A339C7-FCF6-4A72-8586-56D8961D7B99}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU
+ {E8A339C7-FCF6-4A72-8586-56D8961D7B99}.Release|Mixed Platforms.Build.0 = Release|Any CPU
+ {E8A339C7-FCF6-4A72-8586-56D8961D7B99}.Release|x86.ActiveCfg = Release|Any CPU
+ {E8A339C7-FCF6-4A72-8586-56D8961D7B99}.Release|x86.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE