You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by sy...@apache.org on 2015/08/24 00:34:00 UTC
[01/17] lucenenet git commit: Merge pull request #1 from
jpsullivan/queries-test-coverage
Repository: lucenenet
Updated Branches:
refs/heads/master 2808ec740 -> 812e1c541
Merge pull request #1 from jpsullivan/queries-test-coverage
Add tests to Lucene.Net.Queries
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/ecd28605
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/ecd28605
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/ecd28605
Branch: refs/heads/master
Commit: ecd2860520fc211ccdcc9784ca23b36d0d949a9c
Parents: 70ba377 2808ec7
Author: Josh Sullivan <ja...@gmail.com>
Authored: Tue Aug 11 01:39:20 2015 -0400
Committer: Josh Sullivan <ja...@gmail.com>
Committed: Tue Aug 11 01:39:20 2015 -0400
----------------------------------------------------------------------
src/Lucene.Net.Queries/CustomScoreQuery.cs | 8 +-
src/Lucene.Net.Queries/TermsFilter.cs | 10 +
.../BoostingQueryTest.cs | 26 ++
.../ChainedFilterTest.cs | 188 +++++++++
.../Function/FunctionTestSetup.cs | 172 ++++++++
.../Function/TestBoostedQuery.cs | 79 ++++
.../Function/TestDocValuesFieldSources.cs | 149 +++++++
.../Function/TestFieldScoreQuery.cs | 162 ++++++++
.../Function/TestFunctionQuerySort.cs | 80 ++++
.../Function/TestLongNormValueSource.cs | 230 +++++++++++
.../Function/TestOrdValues.cs | 157 ++++++++
.../Function/TestValueSources.cs | 339 ++++++++++++++++
.../Lucene.Net.Tests.Queries.csproj | 18 +-
.../Mlt/TestMoreLikeThis.cs | 134 +++++++
src/Lucene.Net.Tests.Queries/TermsFilterTest.cs | 326 +++++++++++++++
.../TestCustomScoreQuery.cs | 397 +++++++++++++++++++
16 files changed, 2467 insertions(+), 8 deletions(-)
----------------------------------------------------------------------
[13/17] lucenenet git commit: Lucene.Net.Join tests now passing
Posted by sy...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/Lucene.Net.Tests.Join/TestBlockJoinSorting.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Tests.Join/TestBlockJoinSorting.cs b/Lucene.Net.Tests.Join/TestBlockJoinSorting.cs
deleted file mode 100644
index c2df0df..0000000
--- a/Lucene.Net.Tests.Join/TestBlockJoinSorting.cs
+++ /dev/null
@@ -1,277 +0,0 @@
-using System.Collections.Generic;
-using Lucene.Net.Analysis;
-using Lucene.Net.Documents;
-using Lucene.Net.Index;
-using Lucene.Net.Join;
-using Lucene.Net.Randomized.Generators;
-using Lucene.Net.Search;
-using Lucene.Net.Store;
-using Lucene.Net.Util;
-using NUnit.Framework;
-
-namespace Lucene.Net.Tests.Join
-{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- public class TestBlockJoinSorting : LuceneTestCase
- {
- [Test]
- public void TestNestedSorting()
- {
- Directory dir = NewDirectory();
- RandomIndexWriter w = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.COMPOUND_FILES));
-
- IList<Document> docs = new List<Document>();
- Document document = new Document();
- document.Add(new StringField("field2", "a", Field.Store.NO));
- document.Add(new StringField("filter_1", "T", Field.Store.NO));
- docs.Add(document);
- document = new Document();
- document.Add(new StringField("field2", "b", Field.Store.NO));
- document.Add(new StringField("filter_1", "T", Field.Store.NO));
- docs.Add(document);
- document = new Document();
- document.Add(new StringField("field2", "c", Field.Store.NO));
- document.Add(new StringField("filter_1", "T", Field.Store.NO));
- docs.Add(document);
- document = new Document();
- document.Add(new StringField("__type", "parent", Field.Store.NO));
- document.Add(new StringField("field1", "a", Field.Store.NO));
- docs.Add(document);
- w.AddDocuments(docs);
- w.Commit();
-
- docs.Clear();
- document = new Document();
- document.Add(new StringField("field2", "c", Field.Store.NO));
- document.Add(new StringField("filter_1", "T", Field.Store.NO));
- docs.Add(document);
- document = new Document();
- document.Add(new StringField("field2", "d", Field.Store.NO));
- document.Add(new StringField("filter_1", "T", Field.Store.NO));
- docs.Add(document);
- document = new Document();
- document.Add(new StringField("field2", "e", Field.Store.NO));
- document.Add(new StringField("filter_1", "T", Field.Store.NO));
- docs.Add(document);
- document = new Document();
- document.Add(new StringField("__type", "parent", Field.Store.NO));
- document.Add(new StringField("field1", "b", Field.Store.NO));
- docs.Add(document);
- w.AddDocuments(docs);
-
- docs.Clear();
- document = new Document();
- document.Add(new StringField("field2", "e", Field.Store.NO));
- document.Add(new StringField("filter_1", "T", Field.Store.NO));
- docs.Add(document);
- document = new Document();
- document.Add(new StringField("field2", "f", Field.Store.NO));
- document.Add(new StringField("filter_1", "T", Field.Store.NO));
- docs.Add(document);
- document = new Document();
- document.Add(new StringField("field2", "g", Field.Store.NO));
- document.Add(new StringField("filter_1", "T", Field.Store.NO));
- docs.Add(document);
- document = new Document();
- document.Add(new StringField("__type", "parent", Field.Store.NO));
- document.Add(new StringField("field1", "c", Field.Store.NO));
- docs.Add(document);
- w.AddDocuments(docs);
-
- docs.Clear();
- document = new Document();
- document.Add(new StringField("field2", "g", Field.Store.NO));
- document.Add(new StringField("filter_1", "T", Field.Store.NO));
- docs.Add(document);
- document = new Document();
- document.Add(new StringField("field2", "h", Field.Store.NO));
- document.Add(new StringField("filter_1", "F", Field.Store.NO));
- docs.Add(document);
- document = new Document();
- document.Add(new StringField("field2", "i", Field.Store.NO));
- document.Add(new StringField("filter_1", "F", Field.Store.NO));
- docs.Add(document);
- document = new Document();
- document.Add(new StringField("__type", "parent", Field.Store.NO));
- document.Add(new StringField("field1", "d", Field.Store.NO));
- docs.Add(document);
- w.AddDocuments(docs);
- w.Commit();
-
- docs.Clear();
- document = new Document();
- document.Add(new StringField("field2", "i", Field.Store.NO));
- document.Add(new StringField("filter_1", "F", Field.Store.NO));
- docs.Add(document);
- document = new Document();
- document.Add(new StringField("field2", "j", Field.Store.NO));
- document.Add(new StringField("filter_1", "F", Field.Store.NO));
- docs.Add(document);
- document = new Document();
- document.Add(new StringField("field2", "k", Field.Store.NO));
- document.Add(new StringField("filter_1", "F", Field.Store.NO));
- docs.Add(document);
- document = new Document();
- document.Add(new StringField("__type", "parent", Field.Store.NO));
- document.Add(new StringField("field1", "f", Field.Store.NO));
- docs.Add(document);
- w.AddDocuments(docs);
-
- docs.Clear();
- document = new Document();
- document.Add(new StringField("field2", "k", Field.Store.NO));
- document.Add(new StringField("filter_1", "T", Field.Store.NO));
- docs.Add(document);
- document = new Document();
- document.Add(new StringField("field2", "l", Field.Store.NO));
- document.Add(new StringField("filter_1", "T", Field.Store.NO));
- docs.Add(document);
- document = new Document();
- document.Add(new StringField("field2", "m", Field.Store.NO));
- document.Add(new StringField("filter_1", "T", Field.Store.NO));
- docs.Add(document);
- document = new Document();
- document.Add(new StringField("__type", "parent", Field.Store.NO));
- document.Add(new StringField("field1", "g", Field.Store.NO));
- docs.Add(document);
- w.AddDocuments(docs);
-
- // This doc will not be included, because it doesn't have nested docs
- document = new Document();
- document.Add(new StringField("__type", "parent", Field.Store.NO));
- document.Add(new StringField("field1", "h", Field.Store.NO));
- w.AddDocument(document);
-
- docs.Clear();
- document = new Document();
- document.Add(new StringField("field2", "m", Field.Store.NO));
- document.Add(new StringField("filter_1", "T", Field.Store.NO));
- docs.Add(document);
- document = new Document();
- document.Add(new StringField("field2", "n", Field.Store.NO));
- document.Add(new StringField("filter_1", "F", Field.Store.NO));
- docs.Add(document);
- document = new Document();
- document.Add(new StringField("field2", "o", Field.Store.NO));
- document.Add(new StringField("filter_1", "F", Field.Store.NO));
- docs.Add(document);
- document = new Document();
- document.Add(new StringField("__type", "parent", Field.Store.NO));
- document.Add(new StringField("field1", "i", Field.Store.NO));
- docs.Add(document);
- w.AddDocuments(docs);
- w.Commit();
-
- // Some garbage docs, just to check if the NestedFieldComparator can deal with this.
- document = new Document();
- document.Add(new StringField("fieldXXX", "x", Field.Store.NO));
- w.AddDocument(document);
- document = new Document();
- document.Add(new StringField("fieldXXX", "x", Field.Store.NO));
- w.AddDocument(document);
- document = new Document();
- document.Add(new StringField("fieldXXX", "x", Field.Store.NO));
- w.AddDocument(document);
-
- IndexSearcher searcher = new IndexSearcher(DirectoryReader.Open(w.w, false));
- w.Dispose();
- Filter parentFilter = new QueryWrapperFilter(new TermQuery(new Term("__type", "parent")));
- Filter childFilter = new QueryWrapperFilter(new PrefixQuery(new Term("field2")));
- ToParentBlockJoinQuery query = new ToParentBlockJoinQuery(new FilteredQuery(new MatchAllDocsQuery(), childFilter), new FixedBitSetCachingWrapperFilter(parentFilter), ScoreMode.None);
-
- // Sort by field ascending, order first
- ToParentBlockJoinSortField sortField = new ToParentBlockJoinSortField("field2", SortField.Type_e.STRING, false, Wrap(parentFilter), Wrap(childFilter));
- Sort sort = new Sort(sortField);
- TopFieldDocs topDocs = searcher.Search(query, 5, sort);
- assertEquals(7, topDocs.TotalHits);
- assertEquals(5, topDocs.ScoreDocs.Length);
- assertEquals(3, topDocs.ScoreDocs[0].Doc);
- assertEquals("a", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[0]).Fields[0]).Utf8ToString());
- assertEquals(7, topDocs.ScoreDocs[1].Doc);
- assertEquals("c", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[1]).Fields[0]).Utf8ToString());
- assertEquals(11, topDocs.ScoreDocs[2].Doc);
- assertEquals("e", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[2]).Fields[0]).Utf8ToString());
- assertEquals(15, topDocs.ScoreDocs[3].Doc);
- assertEquals("g", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[3]).Fields[0]).Utf8ToString());
- assertEquals(19, topDocs.ScoreDocs[4].Doc);
- assertEquals("i", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[4]).Fields[0]).Utf8ToString());
-
- // Sort by field ascending, order last
- sortField = new ToParentBlockJoinSortField("field2", SortField.Type_e.STRING, false, true, Wrap(parentFilter), Wrap(childFilter));
- sort = new Sort(sortField);
- topDocs = searcher.Search(query, 5, sort);
- assertEquals(7, topDocs.TotalHits);
- assertEquals(5, topDocs.ScoreDocs.Length);
- assertEquals(3, topDocs.ScoreDocs[0].Doc);
- assertEquals("c", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[0]).Fields[0]).Utf8ToString());
- assertEquals(7, topDocs.ScoreDocs[1].Doc);
- assertEquals("e", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[1]).Fields[0]).Utf8ToString());
- assertEquals(11, topDocs.ScoreDocs[2].Doc);
- assertEquals("g", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[2]).Fields[0]).Utf8ToString());
- assertEquals(15, topDocs.ScoreDocs[3].Doc);
- assertEquals("i", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[3]).Fields[0]).Utf8ToString());
- assertEquals(19, topDocs.ScoreDocs[4].Doc);
- assertEquals("k", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[4]).Fields[0]).Utf8ToString());
-
- // Sort by field descending, order last
- sortField = new ToParentBlockJoinSortField("field2", SortField.Type_e.STRING, true, Wrap(parentFilter), Wrap(childFilter));
- sort = new Sort(sortField);
- topDocs = searcher.Search(query, 5, sort);
- assertEquals(topDocs.TotalHits, 7);
- assertEquals(5, topDocs.ScoreDocs.Length);
- assertEquals(28, topDocs.ScoreDocs[0].Doc);
- assertEquals("o", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[0]).Fields[0]).Utf8ToString());
- assertEquals(23, topDocs.ScoreDocs[1].Doc);
- assertEquals("m", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[1]).Fields[0]).Utf8ToString());
- assertEquals(19, topDocs.ScoreDocs[2].Doc);
- assertEquals("k", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[2]).Fields[0]).Utf8ToString());
- assertEquals(15, topDocs.ScoreDocs[3].Doc);
- assertEquals("i", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[3]).Fields[0]).Utf8ToString());
- assertEquals(11, topDocs.ScoreDocs[4].Doc);
- assertEquals("g", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[4]).Fields[0]).Utf8ToString());
-
- // Sort by field descending, order last, sort filter (filter_1:T)
- childFilter = new QueryWrapperFilter(new TermQuery((new Term("filter_1", "T"))));
- query = new ToParentBlockJoinQuery(new FilteredQuery(new MatchAllDocsQuery(), childFilter), new FixedBitSetCachingWrapperFilter(parentFilter), ScoreMode.None);
- sortField = new ToParentBlockJoinSortField("field2", SortField.Type_e.STRING, true, Wrap(parentFilter), Wrap(childFilter));
- sort = new Sort(sortField);
- topDocs = searcher.Search(query, 5, sort);
- assertEquals(6, topDocs.TotalHits);
- assertEquals(5, topDocs.ScoreDocs.Length);
- assertEquals(23, topDocs.ScoreDocs[0].Doc);
- assertEquals("m", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[0]).Fields[0]).Utf8ToString());
- assertEquals(28, topDocs.ScoreDocs[1].Doc);
- assertEquals("m", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[1]).Fields[0]).Utf8ToString());
- assertEquals(11, topDocs.ScoreDocs[2].Doc);
- assertEquals("g", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[2]).Fields[0]).Utf8ToString());
- assertEquals(15, topDocs.ScoreDocs[3].Doc);
- assertEquals("g", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[3]).Fields[0]).Utf8ToString());
- assertEquals(7, topDocs.ScoreDocs[4].Doc);
- assertEquals("e", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[4]).Fields[0]).Utf8ToString());
-
- searcher.IndexReader.Dispose();
- dir.Dispose();
- }
-
- private Filter Wrap(Filter filter)
- {
- return Random().NextBoolean() ? new FixedBitSetCachingWrapperFilter(filter) : filter;
- }
- }
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/Lucene.Net.Tests.Join/TestBlockJoinValidation.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Tests.Join/TestBlockJoinValidation.cs b/Lucene.Net.Tests.Join/TestBlockJoinValidation.cs
deleted file mode 100644
index 5fdd35f..0000000
--- a/Lucene.Net.Tests.Join/TestBlockJoinValidation.cs
+++ /dev/null
@@ -1,227 +0,0 @@
-using System;
-using System.Collections.Generic;
-using System.Text;
-using Apache.NMS;
-using Lucene.Net.Analysis;
-using Lucene.Net.Documents;
-using Lucene.Net.Index;
-using Lucene.Net.Join;
-using Lucene.Net.Randomized.Generators;
-using Lucene.Net.Search;
-using Lucene.Net.Store;
-using Lucene.Net.Support;
-using Lucene.Net.Util;
-using NUnit.Framework;
-
-namespace Lucene.Net.Tests.Join
-{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- public class TestBlockJoinValidation : LuceneTestCase
- {
-
- public const int AMOUNT_OF_SEGMENTS = 5;
- public const int AMOUNT_OF_PARENT_DOCS = 10;
- public const int AMOUNT_OF_CHILD_DOCS = 5;
- public static readonly int AMOUNT_OF_DOCS_IN_SEGMENT = AMOUNT_OF_PARENT_DOCS + AMOUNT_OF_PARENT_DOCS * AMOUNT_OF_CHILD_DOCS;
-
- private Directory Directory;
- private IndexReader IndexReader;
- private IndexSearcher IndexSearcher;
- private Filter ParentsFilter;
-
- [SetUp]
- public override void SetUp()
- {
- Directory = NewDirectory();
- IndexWriterConfig config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
- IndexWriter indexWriter = new IndexWriter(Directory, config);
- for (int i = 0; i < AMOUNT_OF_SEGMENTS; i++)
- {
- IList<Document> segmentDocs = CreateDocsForSegment(i);
- indexWriter.AddDocuments(segmentDocs);
- indexWriter.Commit();
- }
- IndexReader = DirectoryReader.Open(indexWriter, Random().NextBoolean());
- indexWriter.Dispose();
- IndexSearcher = new IndexSearcher(IndexReader);
- ParentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new WildcardQuery(new Term("parent", "*"))));
- }
-
- [TearDown]
- public override void TearDown()
- {
- IndexReader.Dispose();
- Directory.Dispose();
- }
-
- [Test]
- public void TestNextDocValidationForToParentBjq()
- {
- Query parentQueryWithRandomChild = CreateChildrenQueryWithOneParent(GetRandomChildNumber(0));
- var blockJoinQuery = new ToParentBlockJoinQuery(parentQueryWithRandomChild, ParentsFilter, ScoreMode.None);
-
- var ex = Throws<InvalidOperationException>(() => IndexSearcher.Search(blockJoinQuery, 1));
- StringAssert.Contains("child query must only match non-parent docs", ex.Message);
-
- }
-
- [Test]
- public void TestAdvanceValidationForToParentBjq()
- {
- int randomChildNumber = GetRandomChildNumber(0);
- // we need to make advance method meet wrong document, so random child number
- // in BJQ must be greater than child number in Boolean clause
- int nextRandomChildNumber = GetRandomChildNumber(randomChildNumber);
- Query parentQueryWithRandomChild = CreateChildrenQueryWithOneParent(nextRandomChildNumber);
- ToParentBlockJoinQuery blockJoinQuery = new ToParentBlockJoinQuery(parentQueryWithRandomChild, ParentsFilter, ScoreMode.None);
- // advance() method is used by ConjunctionScorer, so we need to create Boolean conjunction query
- BooleanQuery conjunctionQuery = new BooleanQuery();
- WildcardQuery childQuery = new WildcardQuery(new Term("child", CreateFieldValue(randomChildNumber)));
- conjunctionQuery.Add(new BooleanClause(childQuery, BooleanClause.Occur.MUST));
- conjunctionQuery.Add(new BooleanClause(blockJoinQuery, BooleanClause.Occur.MUST));
-
- var ex = Throws<InvalidOperationException>(() => IndexSearcher.Search(conjunctionQuery, 1));
- StringAssert.Contains("child query must only match non-parent docs", ex.Message);
- }
-
- [Test]
- public void TestNextDocValidationForToChildBjq()
- {
- Query parentQueryWithRandomChild = CreateParentsQueryWithOneChild(GetRandomChildNumber(0));
- var blockJoinQuery = new ToChildBlockJoinQuery(parentQueryWithRandomChild, ParentsFilter, false);
-
- var ex = Throws<InvalidOperationException>(() => IndexSearcher.Search(blockJoinQuery, 1));
- StringAssert.Contains(ToChildBlockJoinQuery.InvalidQueryMessage, ex.Message);
- }
-
- [Test]
- public void TestAdvanceValidationForToChildBjq()
- {
- int randomChildNumber = GetRandomChildNumber(0);
- // we need to make advance method meet wrong document, so random child number
- // in BJQ must be greater than child number in Boolean clause
- int nextRandomChildNumber = GetRandomChildNumber(randomChildNumber);
- Query parentQueryWithRandomChild = CreateParentsQueryWithOneChild(nextRandomChildNumber);
- var blockJoinQuery = new ToChildBlockJoinQuery(parentQueryWithRandomChild, ParentsFilter, false);
- // advance() method is used by ConjunctionScorer, so we need to create Boolean conjunction query
- var conjunctionQuery = new BooleanQuery();
- var childQuery = new WildcardQuery(new Term("child", CreateFieldValue(randomChildNumber)));
- conjunctionQuery.Add(new BooleanClause(childQuery, BooleanClause.Occur.MUST));
- conjunctionQuery.Add(new BooleanClause(blockJoinQuery, BooleanClause.Occur.MUST));
-
- var ex = Throws<InvalidOperationException>(() => IndexSearcher.Search(conjunctionQuery, 1));
- StringAssert.Contains(ToChildBlockJoinQuery.InvalidQueryMessage, ex.Message);
- }
-
- private static IList<Document> CreateDocsForSegment(int segmentNumber)
- {
- IList<IList<Document>> blocks = new List<IList<Document>>(AMOUNT_OF_PARENT_DOCS);
- for (int i = 0; i < AMOUNT_OF_PARENT_DOCS; i++)
- {
- blocks.Add(CreateParentDocWithChildren(segmentNumber, i));
- }
- IList<Document> result = new List<Document>(AMOUNT_OF_DOCS_IN_SEGMENT);
- foreach (IList<Document> block in blocks)
- {
- result.AddRange(block);
- }
- return result;
- }
-
- private static IList<Document> CreateParentDocWithChildren(int segmentNumber, int parentNumber)
- {
- IList<Document> result = new List<Document>(AMOUNT_OF_CHILD_DOCS + 1);
- for (int i = 0; i < AMOUNT_OF_CHILD_DOCS; i++)
- {
- result.Add(CreateChildDoc(segmentNumber, parentNumber, i));
- }
- result.Add(CreateParentDoc(segmentNumber, parentNumber));
- return result;
- }
-
- private static Document CreateParentDoc(int segmentNumber, int parentNumber)
- {
- Document result = new Document();
- result.Add(NewStringField("id", CreateFieldValue(segmentNumber * AMOUNT_OF_PARENT_DOCS + parentNumber), Field.Store.YES));
- result.Add(NewStringField("parent", CreateFieldValue(parentNumber), Field.Store.NO));
- return result;
- }
-
- private static Document CreateChildDoc(int segmentNumber, int parentNumber, int childNumber)
- {
- Document result = new Document();
- result.Add(NewStringField("id", CreateFieldValue(segmentNumber * AMOUNT_OF_PARENT_DOCS + parentNumber, childNumber), Field.Store.YES));
- result.Add(NewStringField("child", CreateFieldValue(childNumber), Field.Store.NO));
- return result;
- }
-
- private static string CreateFieldValue(params int[] documentNumbers)
- {
- StringBuilder stringBuilder = new StringBuilder();
- foreach (int documentNumber in documentNumbers)
- {
- if (stringBuilder.Length > 0)
- {
- stringBuilder.Append("_");
- }
- stringBuilder.Append(documentNumber);
- }
- return stringBuilder.ToString();
- }
-
- private static Query CreateChildrenQueryWithOneParent(int childNumber)
- {
- TermQuery childQuery = new TermQuery(new Term("child", CreateFieldValue(childNumber)));
- Query randomParentQuery = new TermQuery(new Term("id", CreateFieldValue(RandomParentId)));
- BooleanQuery childrenQueryWithRandomParent = new BooleanQuery();
- childrenQueryWithRandomParent.Add(new BooleanClause(childQuery, BooleanClause.Occur.SHOULD));
- childrenQueryWithRandomParent.Add(new BooleanClause(randomParentQuery, BooleanClause.Occur.SHOULD));
- return childrenQueryWithRandomParent;
- }
-
- private static Query CreateParentsQueryWithOneChild(int randomChildNumber)
- {
- BooleanQuery childQueryWithRandomParent = new BooleanQuery();
- Query parentsQuery = new TermQuery(new Term("parent", CreateFieldValue(RandomParentNumber)));
- childQueryWithRandomParent.Add(new BooleanClause(parentsQuery, BooleanClause.Occur.SHOULD));
- childQueryWithRandomParent.Add(new BooleanClause(RandomChildQuery(randomChildNumber), BooleanClause.Occur.SHOULD));
- return childQueryWithRandomParent;
- }
-
- private static int RandomParentId
- {
- get { return Random().Next(AMOUNT_OF_PARENT_DOCS*AMOUNT_OF_SEGMENTS); }
- }
-
- private static int RandomParentNumber
- {
- get { return Random().Next(AMOUNT_OF_PARENT_DOCS); }
- }
-
- private static Query RandomChildQuery(int randomChildNumber)
- {
- return new TermQuery(new Term("id", CreateFieldValue(RandomParentId, randomChildNumber)));
- }
-
- private static int GetRandomChildNumber(int notLessThan)
- {
- return notLessThan + Random().Next(AMOUNT_OF_CHILD_DOCS - notLessThan);
- }
- }
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/Lucene.Net.Tests.Join/TestJoinUtil.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Tests.Join/TestJoinUtil.cs b/Lucene.Net.Tests.Join/TestJoinUtil.cs
deleted file mode 100644
index 81513c7..0000000
--- a/Lucene.Net.Tests.Join/TestJoinUtil.cs
+++ /dev/null
@@ -1,1165 +0,0 @@
-using System;
-using System.Collections.Generic;
-using System.Diagnostics;
-using Lucene.Net.Analysis;
-using Lucene.Net.Documents;
-using Lucene.Net.Index;
-using Lucene.Net.Join;
-using Lucene.Net.Randomized.Generators;
-using Lucene.Net.Search;
-using Lucene.Net.Store;
-using Lucene.Net.Support;
-using Lucene.Net.Util;
-using NUnit.Framework;
-
-namespace Lucene.Net.Tests.Join
-{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- public class TestJoinUtil : LuceneTestCase
- {
- [Test]
- public void TestSimple()
- {
- const string idField = "id";
- const string toField = "productId";
-
- Directory dir = NewDirectory();
- RandomIndexWriter w = new RandomIndexWriter(Random(), dir,
- NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))
- .SetMergePolicy(NewLogMergePolicy()));
-
- // 0
- Document doc = new Document();
- doc.Add(new TextField("description", "random text", Field.Store.NO));
- doc.Add(new TextField("name", "name1", Field.Store.NO));
- doc.Add(new TextField(idField, "1", Field.Store.NO));
- w.AddDocument(doc);
-
- // 1
- doc = new Document();
- doc.Add(new TextField("price", "10.0", Field.Store.NO));
- doc.Add(new TextField(idField, "2", Field.Store.NO));
- doc.Add(new TextField(toField, "1", Field.Store.NO));
- w.AddDocument(doc);
-
- // 2
- doc = new Document();
- doc.Add(new TextField("price", "20.0", Field.Store.NO));
- doc.Add(new TextField(idField, "3", Field.Store.NO));
- doc.Add(new TextField(toField, "1", Field.Store.NO));
- w.AddDocument(doc);
-
- // 3
- doc = new Document();
- doc.Add(new TextField("description", "more random text", Field.Store.NO));
- doc.Add(new TextField("name", "name2", Field.Store.NO));
- doc.Add(new TextField(idField, "4", Field.Store.NO));
- w.AddDocument(doc);
- w.Commit();
-
- // 4
- doc = new Document();
- doc.Add(new TextField("price", "10.0", Field.Store.NO));
- doc.Add(new TextField(idField, "5", Field.Store.NO));
- doc.Add(new TextField(toField, "4", Field.Store.NO));
- w.AddDocument(doc);
-
- // 5
- doc = new Document();
- doc.Add(new TextField("price", "20.0", Field.Store.NO));
- doc.Add(new TextField(idField, "6", Field.Store.NO));
- doc.Add(new TextField(toField, "4", Field.Store.NO));
- w.AddDocument(doc);
-
- IndexSearcher indexSearcher = new IndexSearcher(w.Reader);
- w.Dispose();
-
- // Search for product
- Query joinQuery = JoinUtil.CreateJoinQuery(idField, false, toField, new TermQuery(new Term("name", "name2")),
- indexSearcher, ScoreMode.None);
-
- TopDocs result = indexSearcher.Search(joinQuery, 10);
- assertEquals(2, result.TotalHits);
- assertEquals(4, result.ScoreDocs[0].Doc);
- assertEquals(5, result.ScoreDocs[1].Doc);
-
- joinQuery = JoinUtil.CreateJoinQuery(idField, false, toField, new TermQuery(new Term("name", "name1")),
- indexSearcher, ScoreMode.None);
- result = indexSearcher.Search(joinQuery, 10);
- assertEquals(2, result.TotalHits);
- assertEquals(1, result.ScoreDocs[0].Doc);
- assertEquals(2, result.ScoreDocs[1].Doc);
-
- // Search for offer
- joinQuery = JoinUtil.CreateJoinQuery(toField, false, idField, new TermQuery(new Term("id", "5")),
- indexSearcher, ScoreMode.None);
- result = indexSearcher.Search(joinQuery, 10);
- assertEquals(1, result.TotalHits);
- assertEquals(3, result.ScoreDocs[0].Doc);
-
- indexSearcher.IndexReader.Dispose();
- dir.Dispose();
- }
-
- // TermsWithScoreCollector.MV.Avg forgets to grow beyond TermsWithScoreCollector.INITIAL_ARRAY_SIZE
- [Test]
- public void TestOverflowTermsWithScoreCollector()
- {
- Test300spartans(true, ScoreMode.Avg);
- }
-
- [Test]
- public void TestOverflowTermsWithScoreCollectorRandom()
- {
- var scoreModeLength = Enum.GetNames(typeof(ScoreMode)).Length;
- Test300spartans(Random().NextBoolean(), (ScoreMode) Random().Next(scoreModeLength));
- }
-
- protected virtual void Test300spartans(bool multipleValues, ScoreMode scoreMode)
- {
- const string idField = "id";
- const string toField = "productId";
-
- Directory dir = NewDirectory();
- RandomIndexWriter w = new RandomIndexWriter(Random(), dir,
- NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))
- .SetMergePolicy(NewLogMergePolicy()));
-
- // 0
- Document doc = new Document();
- doc.Add(new TextField("description", "random text", Field.Store.NO));
- doc.Add(new TextField("name", "name1", Field.Store.NO));
- doc.Add(new TextField(idField, "0", Field.Store.NO));
- w.AddDocument(doc);
-
- doc = new Document();
- doc.Add(new TextField("price", "10.0", Field.Store.NO));
- for (int i = 0; i < 300; i++)
- {
- doc.Add(new TextField(toField, "" + i, Field.Store.NO));
- if (!multipleValues)
- {
- w.AddDocument(doc);
- doc.RemoveFields(toField);
- }
- }
- w.AddDocument(doc);
-
- IndexSearcher indexSearcher = new IndexSearcher(w.Reader);
- w.Dispose();
-
- // Search for product
- Query joinQuery = JoinUtil.CreateJoinQuery(toField, multipleValues, idField,
- new TermQuery(new Term("price", "10.0")), indexSearcher, scoreMode);
-
- TopDocs result = indexSearcher.Search(joinQuery, 10);
- assertEquals(1, result.TotalHits);
- assertEquals(0, result.ScoreDocs[0].Doc);
-
-
- indexSearcher.IndexReader.Dispose();
- dir.Dispose();
- }
-
- /// <summary>
- /// LUCENE-5487: verify a join query inside a SHOULD BQ
- /// will still use the join query's optimized BulkScorers
- /// </summary>
- [Test]
- public void TestInsideBooleanQuery()
- {
- const string idField = "id";
- const string toField = "productId";
-
- Directory dir = NewDirectory();
- RandomIndexWriter w = new RandomIndexWriter(Random(), dir,
- NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))
- .SetMergePolicy(NewLogMergePolicy()));
-
- // 0
- Document doc = new Document();
- doc.Add(new TextField("description", "random text", Field.Store.NO));
- doc.Add(new TextField("name", "name1", Field.Store.NO));
- doc.Add(new TextField(idField, "7", Field.Store.NO));
- w.AddDocument(doc);
-
- // 1
- doc = new Document();
- doc.Add(new TextField("price", "10.0", Field.Store.NO));
- doc.Add(new TextField(idField, "2", Field.Store.NO));
- doc.Add(new TextField(toField, "7", Field.Store.NO));
- w.AddDocument(doc);
-
- // 2
- doc = new Document();
- doc.Add(new TextField("price", "20.0", Field.Store.NO));
- doc.Add(new TextField(idField, "3", Field.Store.NO));
- doc.Add(new TextField(toField, "7", Field.Store.NO));
- w.AddDocument(doc);
-
- // 3
- doc = new Document();
- doc.Add(new TextField("description", "more random text", Field.Store.NO));
- doc.Add(new TextField("name", "name2", Field.Store.NO));
- doc.Add(new TextField(idField, "0", Field.Store.NO));
- w.AddDocument(doc);
- w.Commit();
-
- // 4
- doc = new Document();
- doc.Add(new TextField("price", "10.0", Field.Store.NO));
- doc.Add(new TextField(idField, "5", Field.Store.NO));
- doc.Add(new TextField(toField, "0", Field.Store.NO));
- w.AddDocument(doc);
-
- // 5
- doc = new Document();
- doc.Add(new TextField("price", "20.0", Field.Store.NO));
- doc.Add(new TextField(idField, "6", Field.Store.NO));
- doc.Add(new TextField(toField, "0", Field.Store.NO));
- w.AddDocument(doc);
-
- w.ForceMerge(1);
-
- IndexSearcher indexSearcher = new IndexSearcher(w.Reader);
- w.Dispose();
-
- // Search for product
- Query joinQuery = JoinUtil.CreateJoinQuery(idField, false, toField,
- new TermQuery(new Term("description", "random")), indexSearcher, ScoreMode.Avg);
-
- BooleanQuery bq = new BooleanQuery();
- bq.Add(joinQuery, BooleanClause.Occur.SHOULD);
- bq.Add(new TermQuery(new Term("id", "3")), BooleanClause.Occur.SHOULD);
-
- indexSearcher.Search(bq, new CollectorAnonymousInnerClassHelper(this));
-
- indexSearcher.IndexReader.Dispose();
- dir.Dispose();
- }
-
- private class CollectorAnonymousInnerClassHelper : Collector
- {
- private readonly TestJoinUtil OuterInstance;
-
- public CollectorAnonymousInnerClassHelper(TestJoinUtil outerInstance)
- {
- OuterInstance = outerInstance;
- }
-
- internal bool sawFive;
-
- public override AtomicReaderContext NextReader
- {
- set { }
- }
-
- public override void Collect(int docID)
- {
- // Hairy / evil (depends on how BooleanScorer
- // stores temporarily collected docIDs by
- // appending to head of linked list):
- if (docID == 5)
- {
- sawFive = true;
- }
- else if (docID == 1)
- {
- assertFalse("optimized bulkScorer was not used for join query embedded in boolean query!", sawFive);
- }
- }
-
- public override Scorer Scorer
- {
- set { }
- }
-
- public override bool AcceptsDocsOutOfOrder()
- {
- return true;
- }
- }
-
- [Test]
- public void TestSimpleWithScoring()
- {
- const string idField = "id";
- const string toField = "movieId";
-
- Directory dir = NewDirectory();
- RandomIndexWriter w = new RandomIndexWriter(Random(), dir,
- NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))
- .SetMergePolicy(NewLogMergePolicy()));
-
- // 0
- Document doc = new Document();
- doc.Add(new TextField("description", "A random movie", Field.Store.NO));
- doc.Add(new TextField("name", "Movie 1", Field.Store.NO));
- doc.Add(new TextField(idField, "1", Field.Store.NO));
- w.AddDocument(doc);
-
- // 1
- doc = new Document();
- doc.Add(new TextField("subtitle", "The first subtitle of this movie", Field.Store.NO));
- doc.Add(new TextField(idField, "2", Field.Store.NO));
- doc.Add(new TextField(toField, "1", Field.Store.NO));
- w.AddDocument(doc);
-
- // 2
- doc = new Document();
- doc.Add(new TextField("subtitle", "random subtitle; random event movie", Field.Store.NO));
- doc.Add(new TextField(idField, "3", Field.Store.NO));
- doc.Add(new TextField(toField, "1", Field.Store.NO));
- w.AddDocument(doc);
-
- // 3
- doc = new Document();
- doc.Add(new TextField("description", "A second random movie", Field.Store.NO));
- doc.Add(new TextField("name", "Movie 2", Field.Store.NO));
- doc.Add(new TextField(idField, "4", Field.Store.NO));
- w.AddDocument(doc);
- w.Commit();
-
- // 4
- doc = new Document();
- doc.Add(new TextField("subtitle", "a very random event happened during christmas night", Field.Store.NO));
- doc.Add(new TextField(idField, "5", Field.Store.NO));
- doc.Add(new TextField(toField, "4", Field.Store.NO));
- w.AddDocument(doc);
-
- // 5
- doc = new Document();
- doc.Add(new TextField("subtitle", "movie end movie test 123 test 123 random", Field.Store.NO));
- doc.Add(new TextField(idField, "6", Field.Store.NO));
- doc.Add(new TextField(toField, "4", Field.Store.NO));
- w.AddDocument(doc);
-
- IndexSearcher indexSearcher = new IndexSearcher(w.Reader);
- w.Dispose();
-
- // Search for movie via subtitle
- Query joinQuery = JoinUtil.CreateJoinQuery(toField, false, idField,
- new TermQuery(new Term("subtitle", "random")), indexSearcher, ScoreMode.Max);
- TopDocs result = indexSearcher.Search(joinQuery, 10);
- assertEquals(2, result.TotalHits);
- assertEquals(0, result.ScoreDocs[0].Doc);
- assertEquals(3, result.ScoreDocs[1].Doc);
-
- // Score mode max.
- joinQuery = JoinUtil.CreateJoinQuery(toField, false, idField, new TermQuery(new Term("subtitle", "movie")),
- indexSearcher, ScoreMode.Max);
- result = indexSearcher.Search(joinQuery, 10);
- assertEquals(2, result.TotalHits);
- assertEquals(3, result.ScoreDocs[0].Doc);
- assertEquals(0, result.ScoreDocs[1].Doc);
-
- // Score mode total
- joinQuery = JoinUtil.CreateJoinQuery(toField, false, idField, new TermQuery(new Term("subtitle", "movie")),
- indexSearcher, ScoreMode.Total);
- result = indexSearcher.Search(joinQuery, 10);
- assertEquals(2, result.TotalHits);
- assertEquals(0, result.ScoreDocs[0].Doc);
- assertEquals(3, result.ScoreDocs[1].Doc);
-
- //Score mode avg
- joinQuery = JoinUtil.CreateJoinQuery(toField, false, idField, new TermQuery(new Term("subtitle", "movie")),
- indexSearcher, ScoreMode.Avg);
- result = indexSearcher.Search(joinQuery, 10);
- assertEquals(2, result.TotalHits);
- assertEquals(3, result.ScoreDocs[0].Doc);
- assertEquals(0, result.ScoreDocs[1].Doc);
-
- indexSearcher.IndexReader.Dispose();
- dir.Dispose();
- }
-
- [Test]
- public void TestSingleValueRandomJoin()
- {
- int maxIndexIter = TestUtil.NextInt(Random(), 6, 12);
- int maxSearchIter = TestUtil.NextInt(Random(), 13, 26);
- ExecuteRandomJoin(false, maxIndexIter, maxSearchIter, TestUtil.NextInt(Random(), 87, 764));
- }
-
- [Test]
- public void TestMultiValueRandomJoin()
- // this test really takes more time, that is why the number of iterations are smaller.
- {
- int maxIndexIter = TestUtil.NextInt(Random(), 3, 6);
- int maxSearchIter = TestUtil.NextInt(Random(), 6, 12);
- ExecuteRandomJoin(true, maxIndexIter, maxSearchIter, TestUtil.NextInt(Random(), 11, 57));
- }
-
- private void ExecuteRandomJoin(bool multipleValuesPerDocument, int maxIndexIter, int maxSearchIter,
- int numberOfDocumentsToIndex)
- {
- for (int indexIter = 1; indexIter <= maxIndexIter; indexIter++)
- {
- if (VERBOSE)
- {
- Console.WriteLine("indexIter=" + indexIter);
- }
- Directory dir = NewDirectory();
- RandomIndexWriter w = new RandomIndexWriter(Random(), dir,
- NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.KEYWORD, false))
- .SetMergePolicy(NewLogMergePolicy()));
- bool scoreDocsInOrder = TestJoinUtil.Random().NextBoolean();
- IndexIterationContext context = CreateContext(numberOfDocumentsToIndex, w, multipleValuesPerDocument,
- scoreDocsInOrder);
-
- IndexReader topLevelReader = w.Reader;
- w.Dispose();
- for (int searchIter = 1; searchIter <= maxSearchIter; searchIter++)
- {
- if (VERBOSE)
- {
- Console.WriteLine("searchIter=" + searchIter);
- }
- IndexSearcher indexSearcher = NewSearcher(topLevelReader);
-
- int r = Random().Next(context.RandomUniqueValues.Length);
- bool from = context.RandomFrom[r];
- string randomValue = context.RandomUniqueValues[r];
- FixedBitSet expectedResult = CreateExpectedResult(randomValue, from, indexSearcher.IndexReader,
- context);
-
- Query actualQuery = new TermQuery(new Term("value", randomValue));
- if (VERBOSE)
- {
- Console.WriteLine("actualQuery=" + actualQuery);
- }
-
- var scoreModeLength = Enum.GetNames(typeof(ScoreMode)).Length;
- ScoreMode scoreMode = (ScoreMode) Random().Next(scoreModeLength);
- if (VERBOSE)
- {
- Console.WriteLine("scoreMode=" + scoreMode);
- }
-
- Query joinQuery;
- if (from)
- {
- joinQuery = JoinUtil.CreateJoinQuery("from", multipleValuesPerDocument, "to", actualQuery,
- indexSearcher, scoreMode);
- }
- else
- {
- joinQuery = JoinUtil.CreateJoinQuery("to", multipleValuesPerDocument, "from", actualQuery,
- indexSearcher, scoreMode);
- }
- if (VERBOSE)
- {
- Console.WriteLine("joinQuery=" + joinQuery);
- }
-
- // Need to know all documents that have matches. TopDocs doesn't give me that and then I'd be also testing TopDocsCollector...
- FixedBitSet actualResult = new FixedBitSet(indexSearcher.IndexReader.MaxDoc);
- TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.Create(10, false);
- indexSearcher.Search(joinQuery,
- new CollectorAnonymousInnerClassHelper2(this, scoreDocsInOrder, context, actualResult,
- topScoreDocCollector));
- // Asserting bit set...
- if (VERBOSE)
- {
- Console.WriteLine("expected cardinality:" + expectedResult.Cardinality());
- DocIdSetIterator iterator = expectedResult.GetIterator();
- for (int doc = iterator.NextDoc();
- doc != DocIdSetIterator.NO_MORE_DOCS;
- doc = iterator.NextDoc())
- {
- Console.WriteLine(string.Format("Expected doc[{0}] with id value {1}", doc, indexSearcher.Doc(doc).Get("id")));
- }
- Console.WriteLine("actual cardinality:" + actualResult.Cardinality());
- iterator = actualResult.GetIterator();
- for (int doc = iterator.NextDoc();
- doc != DocIdSetIterator.NO_MORE_DOCS;
- doc = iterator.NextDoc())
- {
- Console.WriteLine(string.Format("Actual doc[{0}] with id value {1}", doc, indexSearcher.Doc(doc).Get("id")));
- }
- }
- assertEquals(expectedResult, actualResult);
-
- // Asserting TopDocs...
- TopDocs expectedTopDocs = CreateExpectedTopDocs(randomValue, from, scoreMode, context);
- TopDocs actualTopDocs = topScoreDocCollector.TopDocs();
- assertEquals(expectedTopDocs.TotalHits, actualTopDocs.TotalHits);
- assertEquals(expectedTopDocs.ScoreDocs.Length, actualTopDocs.ScoreDocs.Length);
- if (scoreMode == ScoreMode.None)
- {
- continue;
- }
-
- assertEquals(expectedTopDocs.MaxScore, actualTopDocs.MaxScore, 0.0f);
- for (int i = 0; i < expectedTopDocs.ScoreDocs.Length; i++)
- {
- if (VERBOSE)
- {
- string.Format("Expected doc: {0} | Actual doc: {1}\n", expectedTopDocs.ScoreDocs[i].Doc, actualTopDocs.ScoreDocs[i].Doc);
- string.Format("Expected score: {0} | Actual score: {1}\n", expectedTopDocs.ScoreDocs[i].Score, actualTopDocs.ScoreDocs[i].Score);
- }
- assertEquals(expectedTopDocs.ScoreDocs[i].Doc, actualTopDocs.ScoreDocs[i].Doc);
- assertEquals(expectedTopDocs.ScoreDocs[i].Score, actualTopDocs.ScoreDocs[i].Score, 0.0f);
- Explanation explanation = indexSearcher.Explain(joinQuery, expectedTopDocs.ScoreDocs[i].Doc);
- assertEquals(expectedTopDocs.ScoreDocs[i].Score, explanation.Value, 0.0f);
- }
- }
- topLevelReader.Dispose();
- dir.Dispose();
- }
- }
-
- private class CollectorAnonymousInnerClassHelper2 : Collector
- {
- private readonly TestJoinUtil OuterInstance;
-
- private bool ScoreDocsInOrder;
- private IndexIterationContext Context;
- private FixedBitSet ActualResult;
- private TopScoreDocCollector TopScoreDocCollector;
-
- public CollectorAnonymousInnerClassHelper2(TestJoinUtil outerInstance, bool scoreDocsInOrder,
- IndexIterationContext context, FixedBitSet actualResult,
- TopScoreDocCollector topScoreDocCollector)
- {
- OuterInstance = outerInstance;
- ScoreDocsInOrder = scoreDocsInOrder;
- Context = context;
- ActualResult = actualResult;
- TopScoreDocCollector = topScoreDocCollector;
- }
-
-
- private int _docBase;
-
- public override void Collect(int doc)
- {
- ActualResult.Set(doc + _docBase);
- TopScoreDocCollector.Collect(doc);
- }
-
- public override AtomicReaderContext NextReader
- {
- set
- {
- _docBase = value.DocBase;
- TopScoreDocCollector.NextReader = value;
- }
- }
-
- public override Scorer Scorer
- {
- set { TopScoreDocCollector.Scorer = value; }
- }
-
- public override bool AcceptsDocsOutOfOrder()
- {
- return ScoreDocsInOrder;
- }
- }
-
- private IndexIterationContext CreateContext(int nDocs, RandomIndexWriter writer, bool multipleValuesPerDocument,
- bool scoreDocsInOrder)
- {
- return CreateContext(nDocs, writer, writer, multipleValuesPerDocument, scoreDocsInOrder);
- }
-
- private IndexIterationContext CreateContext(int nDocs, RandomIndexWriter fromWriter, RandomIndexWriter toWriter,
- bool multipleValuesPerDocument, bool scoreDocsInOrder)
- {
- IndexIterationContext context = new IndexIterationContext();
- int numRandomValues = nDocs/2;
- context.RandomUniqueValues = new string[numRandomValues];
- ISet<string> trackSet = new HashSet<string>();
- context.RandomFrom = new bool[numRandomValues];
- for (int i = 0; i < numRandomValues; i++)
- {
- string uniqueRandomValue;
- do
- {
- uniqueRandomValue = TestUtil.RandomRealisticUnicodeString(Random());
- // uniqueRandomValue = TestUtil.randomSimpleString(random);
- } while ("".Equals(uniqueRandomValue) || trackSet.Contains(uniqueRandomValue));
- // Generate unique values and empty strings aren't allowed.
- trackSet.Add(uniqueRandomValue);
- context.RandomFrom[i] = Random().NextBoolean();
- context.RandomUniqueValues[i] = uniqueRandomValue;
- }
-
- RandomDoc[] docs = new RandomDoc[nDocs];
- for (int i = 0; i < nDocs; i++)
- {
- string id = Convert.ToString(i);
- int randomI = Random().Next(context.RandomUniqueValues.Length);
- string value = context.RandomUniqueValues[randomI];
- Document document = new Document();
- document.Add(NewTextField(Random(), "id", id, Field.Store.NO));
- document.Add(NewTextField(Random(), "value", value, Field.Store.NO));
-
- bool from = context.RandomFrom[randomI];
- int numberOfLinkValues = multipleValuesPerDocument ? 2 + Random().Next(10) : 1;
- docs[i] = new RandomDoc(id, numberOfLinkValues, value, from);
- for (int j = 0; j < numberOfLinkValues; j++)
- {
- string linkValue = context.RandomUniqueValues[Random().Next(context.RandomUniqueValues.Length)];
- docs[i].LinkValues.Add(linkValue);
- if (from)
- {
- if (!context.FromDocuments.ContainsKey(linkValue))
- {
- context.FromDocuments[linkValue] = new List<RandomDoc>();
- }
- if (!context.RandomValueFromDocs.ContainsKey(value))
- {
- context.RandomValueFromDocs[value] = new List<RandomDoc>();
- }
-
- context.FromDocuments[linkValue].Add(docs[i]);
- context.RandomValueFromDocs[value].Add(docs[i]);
- document.Add(NewTextField(Random(), "from", linkValue, Field.Store.NO));
- }
- else
- {
- if (!context.ToDocuments.ContainsKey(linkValue))
- {
- context.ToDocuments[linkValue] = new List<RandomDoc>();
- }
- if (!context.RandomValueToDocs.ContainsKey(value))
- {
- context.RandomValueToDocs[value] = new List<RandomDoc>();
- }
-
- context.ToDocuments[linkValue].Add(docs[i]);
- context.RandomValueToDocs[value].Add(docs[i]);
- document.Add(NewTextField(Random(), "to", linkValue, Field.Store.NO));
- }
- }
-
- RandomIndexWriter w;
- if (from)
- {
- w = fromWriter;
- }
- else
- {
- w = toWriter;
- }
-
- w.AddDocument(document);
- if (Random().Next(10) == 4)
- {
- w.Commit();
- }
- if (VERBOSE)
- {
- Console.WriteLine("Added document[" + docs[i].Id + "]: " + document);
- }
- }
-
- // Pre-compute all possible hits for all unique random values. On top of this also compute all possible score for
- // any ScoreMode.
- IndexSearcher fromSearcher = NewSearcher(fromWriter.Reader);
- IndexSearcher toSearcher = NewSearcher(toWriter.Reader);
- for (int i = 0; i < context.RandomUniqueValues.Length; i++)
- {
- string uniqueRandomValue = context.RandomUniqueValues[i];
- string fromField;
- string toField;
- IDictionary<string, IDictionary<int, JoinScore>> queryVals;
- if (context.RandomFrom[i])
- {
- fromField = "from";
- toField = "to";
- queryVals = context.FromHitsToJoinScore;
- }
- else
- {
- fromField = "to";
- toField = "from";
- queryVals = context.ToHitsToJoinScore;
- }
- IDictionary<BytesRef, JoinScore> joinValueToJoinScores = new Dictionary<BytesRef, JoinScore>();
- if (multipleValuesPerDocument)
- {
- fromSearcher.Search(new TermQuery(new Term("value", uniqueRandomValue)),
- new CollectorAnonymousInnerClassHelper3(this, context, fromField, joinValueToJoinScores));
- }
- else
- {
- fromSearcher.Search(new TermQuery(new Term("value", uniqueRandomValue)),
- new CollectorAnonymousInnerClassHelper4(this, context, fromField, joinValueToJoinScores));
- }
-
- IDictionary<int, JoinScore> docToJoinScore = new Dictionary<int, JoinScore>();
- if (multipleValuesPerDocument)
- {
- if (scoreDocsInOrder)
- {
- AtomicReader slowCompositeReader = SlowCompositeReaderWrapper.Wrap(toSearcher.IndexReader);
- Terms terms = slowCompositeReader.Terms(toField);
- if (terms != null)
- {
- DocsEnum docsEnum = null;
- TermsEnum termsEnum = null;
- SortedSet<BytesRef> joinValues =
- new SortedSet<BytesRef>(BytesRef.UTF8SortedAsUnicodeComparer);
- joinValues.AddAll(joinValueToJoinScores.Keys);
- foreach (BytesRef joinValue in joinValues)
- {
- termsEnum = terms.Iterator(termsEnum);
- if (termsEnum.SeekExact(joinValue))
- {
- docsEnum = termsEnum.Docs(slowCompositeReader.LiveDocs, docsEnum, DocsEnum.FLAG_NONE);
- JoinScore joinScore = joinValueToJoinScores[joinValue];
-
- for (int doc = docsEnum.NextDoc();
- doc != DocIdSetIterator.NO_MORE_DOCS;
- doc = docsEnum.NextDoc())
- {
- // First encountered join value determines the score.
- // Something to keep in mind for many-to-many relations.
- if (!docToJoinScore.ContainsKey(doc))
- {
- docToJoinScore[doc] = joinScore;
- }
- }
- }
- }
- }
- }
- else
- {
- toSearcher.Search(new MatchAllDocsQuery(),
- new CollectorAnonymousInnerClassHelper5(this, context, toField, joinValueToJoinScores,
- docToJoinScore));
- }
- }
- else
- {
- toSearcher.Search(new MatchAllDocsQuery(),
- new CollectorAnonymousInnerClassHelper6(this, context, toField, joinValueToJoinScores,
- docToJoinScore));
- }
- queryVals[uniqueRandomValue] = docToJoinScore;
- }
-
- fromSearcher.IndexReader.Dispose();
- toSearcher.IndexReader.Dispose();
-
- return context;
- }
-
- private class CollectorAnonymousInnerClassHelper3 : Collector
- {
- private readonly TestJoinUtil OuterInstance;
-
- private IndexIterationContext Context;
- private string FromField;
- private IDictionary<BytesRef, JoinScore> JoinValueToJoinScores;
-
- public CollectorAnonymousInnerClassHelper3(TestJoinUtil outerInstance,
- IndexIterationContext context, string fromField,
- IDictionary<BytesRef, JoinScore> joinValueToJoinScores)
- {
- OuterInstance = outerInstance;
- Context = context;
- FromField = fromField;
- JoinValueToJoinScores = joinValueToJoinScores;
- joinValue = new BytesRef();
- }
-
-
- private Scorer scorer;
- private SortedSetDocValues docTermOrds;
- internal readonly BytesRef joinValue;
-
- public override void Collect(int doc)
- {
- docTermOrds.Document = doc;
- long ord;
- while ((ord = docTermOrds.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
- {
- docTermOrds.LookupOrd(ord, joinValue);
- var joinScore = JoinValueToJoinScores[joinValue];
- if (joinScore == null)
- {
- JoinValueToJoinScores[BytesRef.DeepCopyOf(joinValue)] = joinScore = new JoinScore();
- }
- joinScore.AddScore(scorer.Score());
- }
- }
-
- public override AtomicReaderContext NextReader
- {
- set { docTermOrds = FieldCache.DEFAULT.GetDocTermOrds(value.AtomicReader, FromField); }
- }
-
- public override Scorer Scorer
- {
- set { scorer = value; }
- }
-
- public override bool AcceptsDocsOutOfOrder()
- {
- return false;
- }
- }
-
- private class CollectorAnonymousInnerClassHelper4 : Collector
- {
- private readonly TestJoinUtil OuterInstance;
-
- private IndexIterationContext Context;
- private string FromField;
- private IDictionary<BytesRef, JoinScore> JoinValueToJoinScores;
-
- public CollectorAnonymousInnerClassHelper4(TestJoinUtil outerInstance,
- IndexIterationContext context, string fromField,
- IDictionary<BytesRef, JoinScore> joinValueToJoinScores)
- {
- OuterInstance = outerInstance;
- Context = context;
- FromField = fromField;
- JoinValueToJoinScores = joinValueToJoinScores;
- spare = new BytesRef();
- }
-
-
- private Scorer scorer;
- private BinaryDocValues terms;
- private Bits docsWithField;
- private readonly BytesRef spare;
-
- public override void Collect(int doc)
- {
- terms.Get(doc, spare);
- BytesRef joinValue = spare;
- if (joinValue.Length == 0 && !docsWithField.Get(doc))
- {
- return;
- }
-
- var joinScore = JoinValueToJoinScores[joinValue];
- if (joinScore == null)
- {
- JoinValueToJoinScores[BytesRef.DeepCopyOf(joinValue)] = joinScore = new JoinScore();
- }
- joinScore.AddScore(scorer.Score());
- }
-
- public override AtomicReaderContext NextReader
- {
- set
- {
- terms = FieldCache.DEFAULT.GetTerms(value.AtomicReader, FromField, true);
- docsWithField = FieldCache.DEFAULT.GetDocsWithField(value.AtomicReader, FromField);
- }
- }
-
- public override Scorer Scorer
- {
- set { scorer = value; }
- }
-
- public override bool AcceptsDocsOutOfOrder()
- {
- return false;
- }
- }
-
- private class CollectorAnonymousInnerClassHelper5 : Collector
- {
- private readonly TestJoinUtil OuterInstance;
-
- private string _toField;
- private readonly IDictionary<BytesRef, JoinScore> _joinValueToJoinScores;
- private readonly IDictionary<int, JoinScore> _docToJoinScore;
-
- private SortedSetDocValues docTermOrds;
- private readonly BytesRef scratch;
- private int docBase;
-
- public CollectorAnonymousInnerClassHelper5(TestJoinUtil testJoinUtil, IndexIterationContext context,
- string toField, IDictionary<BytesRef, JoinScore> joinValueToJoinScores,
- IDictionary<int, JoinScore> docToJoinScore)
- {
- OuterInstance = testJoinUtil;
- _toField = toField;
- _joinValueToJoinScores = joinValueToJoinScores;
- _docToJoinScore = docToJoinScore;
- }
-
- public override void Collect(int doc)
- {
- docTermOrds.Document = doc;
- long ord;
- while ((ord = docTermOrds.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
- {
- docTermOrds.LookupOrd(ord, scratch);
- JoinScore joinScore = _joinValueToJoinScores[scratch];
- if (joinScore == null)
- {
- continue;
- }
- int basedDoc = docBase + doc;
- // First encountered join value determines the score.
- // Something to keep in mind for many-to-many relations.
- if (!_docToJoinScore.ContainsKey(basedDoc))
- {
- _docToJoinScore[basedDoc] = joinScore;
- }
- }
- }
-
- public override AtomicReaderContext NextReader
- {
- set
- {
- docBase = value.DocBase;
- docTermOrds = FieldCache.DEFAULT.GetDocTermOrds(value.AtomicReader, _toField);
- }
- }
-
- public override bool AcceptsDocsOutOfOrder()
- {
- return false;
- }
-
- public override Scorer Scorer
- {
- set { }
- }
- }
-
- private class CollectorAnonymousInnerClassHelper6 : Collector
- {
- private readonly TestJoinUtil OuterInstance;
-
- private IndexIterationContext Context;
- private string ToField;
- private IDictionary<BytesRef, JoinScore> JoinValueToJoinScores;
- private IDictionary<int, JoinScore> DocToJoinScore;
-
- private BinaryDocValues terms;
- private int docBase;
- private readonly BytesRef spare;
-
- public CollectorAnonymousInnerClassHelper6(TestJoinUtil testJoinUtil,
- IndexIterationContext context, string toField,
- IDictionary<BytesRef, JoinScore> joinValueToJoinScores,
- IDictionary<int, JoinScore> docToJoinScore)
- {
- OuterInstance = testJoinUtil;
- ToField = toField;
- JoinValueToJoinScores = joinValueToJoinScores;
- DocToJoinScore = docToJoinScore;
- }
-
- public override void Collect(int doc)
- {
- terms.Get(doc, spare);
- JoinScore joinScore = JoinValueToJoinScores[spare];
- if (joinScore == null)
- {
- return;
- }
- DocToJoinScore[docBase + doc] = joinScore;
- }
-
- public override AtomicReaderContext NextReader
- {
- set
- {
- terms = FieldCache.DEFAULT.GetTerms(value.AtomicReader, ToField, false);
- docBase = value.DocBase;
- }
- }
-
- public override bool AcceptsDocsOutOfOrder()
- {
- return false;
- }
-
- public override Scorer Scorer
- {
- set { }
- }
- }
-
- private TopDocs CreateExpectedTopDocs(string queryValue, bool from, ScoreMode scoreMode,
- IndexIterationContext context)
- {
- var hitsToJoinScores = @from
- ? context.FromHitsToJoinScore[queryValue]
- : context.ToHitsToJoinScore[queryValue];
-
- var hits = new List<KeyValuePair<int, JoinScore>>(hitsToJoinScores.EntrySet());
- hits.Sort(new ComparatorAnonymousInnerClassHelper(this, scoreMode));
- ScoreDoc[] scoreDocs = new ScoreDoc[Math.Min(10, hits.Count)];
- for (int i = 0; i < scoreDocs.Length; i++)
- {
- KeyValuePair<int, JoinScore> hit = hits[i];
- scoreDocs[i] = new ScoreDoc(hit.Key, hit.Value.Score(scoreMode));
- }
- return new TopDocs(hits.Count, scoreDocs, hits.Count == 0 ? float.NaN : hits[0].Value.Score(scoreMode));
- }
-
- private class ComparatorAnonymousInnerClassHelper : IComparer<KeyValuePair<int, JoinScore>>
- {
- private readonly TestJoinUtil OuterInstance;
-
- private ScoreMode ScoreMode;
-
- public ComparatorAnonymousInnerClassHelper(TestJoinUtil outerInstance, ScoreMode scoreMode)
- {
- OuterInstance = outerInstance;
- ScoreMode = scoreMode;
- }
-
- public virtual int Compare(KeyValuePair<int, JoinScore> hit1, KeyValuePair<int, JoinScore> hit2)
- {
- float score1 = hit1.Value.Score(ScoreMode);
- float score2 = hit2.Value.Score(ScoreMode);
-
- int cmp = score2.CompareTo(score1);
- if (cmp != 0)
- {
- return cmp;
- }
- return hit1.Key - hit2.Key;
- }
- }
-
- private FixedBitSet CreateExpectedResult(string queryValue, bool from, IndexReader topLevelReader,
- IndexIterationContext context)
- {
- IDictionary<string, IList<RandomDoc>> randomValueDocs;
- IDictionary<string, IList<RandomDoc>> linkValueDocuments;
- if (from)
- {
- randomValueDocs = context.RandomValueFromDocs;
- linkValueDocuments = context.ToDocuments;
- }
- else
- {
- randomValueDocs = context.RandomValueToDocs;
- linkValueDocuments = context.FromDocuments;
- }
-
- FixedBitSet expectedResult = new FixedBitSet(topLevelReader.MaxDoc);
- IList<RandomDoc> matchingDocs = randomValueDocs[queryValue];
- if (matchingDocs == null)
- {
- return new FixedBitSet(topLevelReader.MaxDoc);
- }
-
- foreach (RandomDoc matchingDoc in matchingDocs)
- {
- foreach (string linkValue in matchingDoc.LinkValues)
- {
- IList<RandomDoc> otherMatchingDocs = linkValueDocuments[linkValue];
- if (otherMatchingDocs == null)
- {
- continue;
- }
-
- foreach (RandomDoc otherSideDoc in otherMatchingDocs)
- {
- DocsEnum docsEnum = MultiFields.GetTermDocsEnum(topLevelReader,
- MultiFields.GetLiveDocs(topLevelReader), "id", new BytesRef(otherSideDoc.Id), 0);
- Debug.Assert(docsEnum != null);
- int doc = docsEnum.NextDoc();
- expectedResult.Set(doc);
- }
- }
- }
- return expectedResult;
- }
-
- private class IndexIterationContext
- {
-
- internal string[] RandomUniqueValues;
- internal bool[] RandomFrom;
- internal IDictionary<string, IList<RandomDoc>> FromDocuments = new Dictionary<string, IList<RandomDoc>>();
- internal IDictionary<string, IList<RandomDoc>> ToDocuments = new Dictionary<string, IList<RandomDoc>>();
-
- internal IDictionary<string, IList<RandomDoc>> RandomValueFromDocs =
- new Dictionary<string, IList<RandomDoc>>();
-
- internal IDictionary<string, IList<RandomDoc>> RandomValueToDocs =
- new Dictionary<string, IList<RandomDoc>>();
-
- internal IDictionary<string, IDictionary<int, JoinScore>> FromHitsToJoinScore =
- new Dictionary<string, IDictionary<int, JoinScore>>();
-
- internal IDictionary<string, IDictionary<int, JoinScore>> ToHitsToJoinScore =
- new Dictionary<string, IDictionary<int, JoinScore>>();
- }
-
- private class RandomDoc
- {
- internal readonly string Id;
- internal readonly IList<string> LinkValues;
- internal readonly string Value;
- internal readonly bool From;
-
- internal RandomDoc(string id, int numberOfLinkValues, string value, bool from)
- {
- Id = id;
- From = from;
- LinkValues = new List<string>(numberOfLinkValues);
- Value = value;
- }
- }
-
- private class JoinScore
- {
- internal float MaxScore;
- internal float Total;
- internal int Count;
-
- internal virtual void AddScore(float score)
- {
- Total += score;
- if (score > MaxScore)
- {
- MaxScore = score;
- }
- Count++;
- }
-
- internal virtual float Score(ScoreMode mode)
- {
- switch (mode)
- {
- case ScoreMode.None:
- return 1.0f;
- case ScoreMode.Total:
- return Total;
- case ScoreMode.Avg:
- return Total/Count;
- case ScoreMode.Max:
- return MaxScore;
- }
- throw new ArgumentException("Unsupported ScoreMode: " + mode);
- }
- }
- }
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/Lucene.Net.Tests.Join/packages.config
----------------------------------------------------------------------
diff --git a/Lucene.Net.Tests.Join/packages.config b/Lucene.Net.Tests.Join/packages.config
deleted file mode 100644
index f0ed309..0000000
--- a/Lucene.Net.Tests.Join/packages.config
+++ /dev/null
@@ -1,5 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<packages>
- <package id="Apache.NMS" version="1.6.0.3083" targetFramework="net451" />
- <package id="NUnit" version="2.6.3" targetFramework="net451" />
-</packages>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/Lucene.Net.sln
----------------------------------------------------------------------
diff --git a/Lucene.Net.sln b/Lucene.Net.sln
index 2051e77..debbc08 100644
--- a/Lucene.Net.sln
+++ b/Lucene.Net.sln
@@ -38,11 +38,11 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.Codecs", "src\Lu
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.Codecs.Tests", "src\Lucene.Net.Tests.Codecs\Lucene.Net.Codecs.Tests.csproj", "{351B75B1-BBD5-4E32-8036-7BED4E0135A6}"
EndProject
-Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.Join", "Lucene.Net.Join\Lucene.Net.Join.csproj", "{E8A339C7-FCF6-4A72-8586-56D8961D7B99}"
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.Join", "src\Lucene.Net.Join\Lucene.Net.Join.csproj", "{E8A339C7-FCF6-4A72-8586-56D8961D7B99}"
EndProject
-Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.Grouping", "Lucene.Net.Grouping\Lucene.Net.Grouping.csproj", "{02BAB603-067D-48B1-AEDD-316849652568}"
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.Grouping", "src\Lucene.Net.Grouping\Lucene.Net.Grouping.csproj", "{02BAB603-067D-48B1-AEDD-316849652568}"
EndProject
-Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.Tests.Join", "Lucene.Net.Tests.Join\Lucene.Net.Tests.Join.csproj", "{4C1B794F-8158-45E6-85B3-2C46569BEBC2}"
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.Tests.Join", "src\Lucene.Net.Tests.Join\Lucene.Net.Tests.Join.csproj", "{4C1B794F-8158-45E6-85B3-2C46569BEBC2}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/src/Lucene.Net.Grouping/GroupDocs.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Grouping/GroupDocs.cs b/src/Lucene.Net.Grouping/GroupDocs.cs
new file mode 100644
index 0000000..00cdf83
--- /dev/null
+++ b/src/Lucene.Net.Grouping/GroupDocs.cs
@@ -0,0 +1,71 @@
+using Lucene.Net.Search;
+
+namespace Lucene.Net.Grouping
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Represents one group in the results.
+ ///
+ /// @lucene.experimental
+ /// </summary>
+ public class GroupDocs<TGroupValueType>
+ {
+ /// <summary>
+ /// The groupField value for all docs in this group; this
+ /// may be null if hits did not have the groupField.
+ /// </summary>
+ public readonly TGroupValueType GroupValue;
+
+ /// <summary>
+ /// Max score in this group
+ /// </summary>
+ public readonly float MaxScore;
+
+ /// <summary>
+ /// Overall aggregated score of this group (currently only set by join queries).
+ /// </summary>
+ public readonly float Score;
+
+ /// <summary>
+ /// Hits; this may be {@link org.apache.lucene.search.FieldDoc} instances if the
+ /// withinGroupSort sorted by fields.
+ /// </summary>
+ public readonly ScoreDoc[] ScoreDocs;
+
+ /// <summary>
+ /// Total hits within this group
+ /// </summary>
+ public readonly int TotalHits;
+
+ /// <summary>
+ /// Matches the groupSort passed to {@link AbstractFirstPassGroupingCollector}.
+ /// </summary>
+ public readonly object[] GroupSortValues;
+
+ public GroupDocs(float score, float maxScore, int totalHits, ScoreDoc[] scoreDocs, TGroupValueType groupValue, object[] groupSortValues)
+ {
+ Score = score;
+ MaxScore = maxScore;
+ TotalHits = totalHits;
+ ScoreDocs = scoreDocs;
+ GroupValue = groupValue;
+ GroupSortValues = groupSortValues;
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/src/Lucene.Net.Grouping/Lucene.Net.Grouping.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Grouping/Lucene.Net.Grouping.csproj b/src/Lucene.Net.Grouping/Lucene.Net.Grouping.csproj
new file mode 100644
index 0000000..5d4fbe2
--- /dev/null
+++ b/src/Lucene.Net.Grouping/Lucene.Net.Grouping.csproj
@@ -0,0 +1,61 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
+ <PropertyGroup>
+ <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+ <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+ <ProjectGuid>{02BAB603-067D-48B1-AEDD-316849652568}</ProjectGuid>
+ <OutputType>Library</OutputType>
+ <AppDesignerFolder>Properties</AppDesignerFolder>
+ <RootNamespace>Lucene.Net.Grouping</RootNamespace>
+ <AssemblyName>Lucene.Net.Grouping</AssemblyName>
+ <TargetFrameworkVersion>v4.5.1</TargetFrameworkVersion>
+ <FileAlignment>512</FileAlignment>
+ </PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
+ <DebugSymbols>true</DebugSymbols>
+ <DebugType>full</DebugType>
+ <Optimize>false</Optimize>
+ <OutputPath>bin\Debug\</OutputPath>
+ <DefineConstants>DEBUG;TRACE</DefineConstants>
+ <ErrorReport>prompt</ErrorReport>
+ <WarningLevel>4</WarningLevel>
+ </PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
+ <DebugType>pdbonly</DebugType>
+ <Optimize>true</Optimize>
+ <OutputPath>bin\Release\</OutputPath>
+ <DefineConstants>TRACE</DefineConstants>
+ <ErrorReport>prompt</ErrorReport>
+ <WarningLevel>4</WarningLevel>
+ </PropertyGroup>
+ <ItemGroup>
+ <Reference Include="System" />
+ <Reference Include="System.Core" />
+ <Reference Include="System.Xml.Linq" />
+ <Reference Include="System.Data.DataSetExtensions" />
+ <Reference Include="Microsoft.CSharp" />
+ <Reference Include="System.Data" />
+ <Reference Include="System.Net.Http" />
+ <Reference Include="System.Xml" />
+ </ItemGroup>
+ <ItemGroup>
+ <Compile Include="GroupDocs.cs" />
+ <Compile Include="Properties\AssemblyInfo.cs" />
+ <Compile Include="TopGroups.cs" />
+ </ItemGroup>
+ <ItemGroup>
+ <ProjectReference Include="..\Lucene.Net.Core\Lucene.Net.csproj">
+ <Project>{5D4AD9BE-1FFB-41AB-9943-25737971BF57}</Project>
+ <Name>Lucene.Net</Name>
+ </ProjectReference>
+ </ItemGroup>
+ <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
+ <!-- To modify your build process, add your task inside one of the targets below and uncomment it.
+ Other similar extension points exist, see Microsoft.Common.targets.
+ <Target Name="BeforeBuild">
+ </Target>
+ <Target Name="AfterBuild">
+ </Target>
+ -->
+</Project>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/src/Lucene.Net.Grouping/Properties/AssemblyInfo.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Grouping/Properties/AssemblyInfo.cs b/src/Lucene.Net.Grouping/Properties/AssemblyInfo.cs
new file mode 100644
index 0000000..9e6c1ce
--- /dev/null
+++ b/src/Lucene.Net.Grouping/Properties/AssemblyInfo.cs
@@ -0,0 +1,36 @@
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+// General Information about an assembly is controlled through the following
+// set of attributes. Change these attribute values to modify the information
+// associated with an assembly.
+[assembly: AssemblyTitle("Lucene.Net.Grouping")]
+[assembly: AssemblyDescription("")]
+[assembly: AssemblyConfiguration("")]
+[assembly: AssemblyCompany("")]
+[assembly: AssemblyProduct("Lucene.Net.Grouping")]
+[assembly: AssemblyCopyright("Copyright © 2015")]
+[assembly: AssemblyTrademark("")]
+[assembly: AssemblyCulture("")]
+
+// Setting ComVisible to false makes the types in this assembly not visible
+// to COM components. If you need to access a type in this assembly from
+// COM, set the ComVisible attribute to true on that type.
+[assembly: ComVisible(false)]
+
+// The following GUID is for the ID of the typelib if this project is exposed to COM
+[assembly: Guid("02bab603-067d-48b1-aedd-316849652568")]
+
+// Version information for an assembly consists of the following four values:
+//
+// Major Version
+// Minor Version
+// Build Number
+// Revision
+//
+// You can specify all the values or you can default the Build and Revision Numbers
+// by using the '*' as shown below:
+// [assembly: AssemblyVersion("1.0.*")]
+[assembly: AssemblyVersion("1.0.0.0")]
+[assembly: AssemblyFileVersion("1.0.0.0")]
[09/17] lucenenet git commit: Lucene.Net.Join tests now passing
Posted by sy...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/src/Lucene.Net.Tests.Join/TestBlockJoinValidation.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Join/TestBlockJoinValidation.cs b/src/Lucene.Net.Tests.Join/TestBlockJoinValidation.cs
new file mode 100644
index 0000000..5fdd35f
--- /dev/null
+++ b/src/Lucene.Net.Tests.Join/TestBlockJoinValidation.cs
@@ -0,0 +1,227 @@
+using System;
+using System.Collections.Generic;
+using System.Text;
+using Apache.NMS;
+using Lucene.Net.Analysis;
+using Lucene.Net.Documents;
+using Lucene.Net.Index;
+using Lucene.Net.Join;
+using Lucene.Net.Randomized.Generators;
+using Lucene.Net.Search;
+using Lucene.Net.Store;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using NUnit.Framework;
+
+namespace Lucene.Net.Tests.Join
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ public class TestBlockJoinValidation : LuceneTestCase
+ {
+
+ public const int AMOUNT_OF_SEGMENTS = 5;
+ public const int AMOUNT_OF_PARENT_DOCS = 10;
+ public const int AMOUNT_OF_CHILD_DOCS = 5;
+ public static readonly int AMOUNT_OF_DOCS_IN_SEGMENT = AMOUNT_OF_PARENT_DOCS + AMOUNT_OF_PARENT_DOCS * AMOUNT_OF_CHILD_DOCS;
+
+ private Directory Directory;
+ private IndexReader IndexReader;
+ private IndexSearcher IndexSearcher;
+ private Filter ParentsFilter;
+
+ [SetUp]
+ public override void SetUp()
+ {
+ Directory = NewDirectory();
+ IndexWriterConfig config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
+ IndexWriter indexWriter = new IndexWriter(Directory, config);
+ for (int i = 0; i < AMOUNT_OF_SEGMENTS; i++)
+ {
+ IList<Document> segmentDocs = CreateDocsForSegment(i);
+ indexWriter.AddDocuments(segmentDocs);
+ indexWriter.Commit();
+ }
+ IndexReader = DirectoryReader.Open(indexWriter, Random().NextBoolean());
+ indexWriter.Dispose();
+ IndexSearcher = new IndexSearcher(IndexReader);
+ ParentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new WildcardQuery(new Term("parent", "*"))));
+ }
+
+ [TearDown]
+ public override void TearDown()
+ {
+ IndexReader.Dispose();
+ Directory.Dispose();
+ }
+
+ [Test]
+ public void TestNextDocValidationForToParentBjq()
+ {
+ Query parentQueryWithRandomChild = CreateChildrenQueryWithOneParent(GetRandomChildNumber(0));
+ var blockJoinQuery = new ToParentBlockJoinQuery(parentQueryWithRandomChild, ParentsFilter, ScoreMode.None);
+
+ var ex = Throws<InvalidOperationException>(() => IndexSearcher.Search(blockJoinQuery, 1));
+ StringAssert.Contains("child query must only match non-parent docs", ex.Message);
+
+ }
+
+ [Test]
+ public void TestAdvanceValidationForToParentBjq()
+ {
+ int randomChildNumber = GetRandomChildNumber(0);
+ // we need to make advance method meet wrong document, so random child number
+ // in BJQ must be greater than child number in Boolean clause
+ int nextRandomChildNumber = GetRandomChildNumber(randomChildNumber);
+ Query parentQueryWithRandomChild = CreateChildrenQueryWithOneParent(nextRandomChildNumber);
+ ToParentBlockJoinQuery blockJoinQuery = new ToParentBlockJoinQuery(parentQueryWithRandomChild, ParentsFilter, ScoreMode.None);
+ // advance() method is used by ConjunctionScorer, so we need to create Boolean conjunction query
+ BooleanQuery conjunctionQuery = new BooleanQuery();
+ WildcardQuery childQuery = new WildcardQuery(new Term("child", CreateFieldValue(randomChildNumber)));
+ conjunctionQuery.Add(new BooleanClause(childQuery, BooleanClause.Occur.MUST));
+ conjunctionQuery.Add(new BooleanClause(blockJoinQuery, BooleanClause.Occur.MUST));
+
+ var ex = Throws<InvalidOperationException>(() => IndexSearcher.Search(conjunctionQuery, 1));
+ StringAssert.Contains("child query must only match non-parent docs", ex.Message);
+ }
+
+ [Test]
+ public void TestNextDocValidationForToChildBjq()
+ {
+ Query parentQueryWithRandomChild = CreateParentsQueryWithOneChild(GetRandomChildNumber(0));
+ var blockJoinQuery = new ToChildBlockJoinQuery(parentQueryWithRandomChild, ParentsFilter, false);
+
+ var ex = Throws<InvalidOperationException>(() => IndexSearcher.Search(blockJoinQuery, 1));
+ StringAssert.Contains(ToChildBlockJoinQuery.InvalidQueryMessage, ex.Message);
+ }
+
+ [Test]
+ public void TestAdvanceValidationForToChildBjq()
+ {
+ int randomChildNumber = GetRandomChildNumber(0);
+ // we need to make advance method meet wrong document, so random child number
+ // in BJQ must be greater than child number in Boolean clause
+ int nextRandomChildNumber = GetRandomChildNumber(randomChildNumber);
+ Query parentQueryWithRandomChild = CreateParentsQueryWithOneChild(nextRandomChildNumber);
+ var blockJoinQuery = new ToChildBlockJoinQuery(parentQueryWithRandomChild, ParentsFilter, false);
+ // advance() method is used by ConjunctionScorer, so we need to create Boolean conjunction query
+ var conjunctionQuery = new BooleanQuery();
+ var childQuery = new WildcardQuery(new Term("child", CreateFieldValue(randomChildNumber)));
+ conjunctionQuery.Add(new BooleanClause(childQuery, BooleanClause.Occur.MUST));
+ conjunctionQuery.Add(new BooleanClause(blockJoinQuery, BooleanClause.Occur.MUST));
+
+ var ex = Throws<InvalidOperationException>(() => IndexSearcher.Search(conjunctionQuery, 1));
+ StringAssert.Contains(ToChildBlockJoinQuery.InvalidQueryMessage, ex.Message);
+ }
+
+ private static IList<Document> CreateDocsForSegment(int segmentNumber)
+ {
+ IList<IList<Document>> blocks = new List<IList<Document>>(AMOUNT_OF_PARENT_DOCS);
+ for (int i = 0; i < AMOUNT_OF_PARENT_DOCS; i++)
+ {
+ blocks.Add(CreateParentDocWithChildren(segmentNumber, i));
+ }
+ IList<Document> result = new List<Document>(AMOUNT_OF_DOCS_IN_SEGMENT);
+ foreach (IList<Document> block in blocks)
+ {
+ result.AddRange(block);
+ }
+ return result;
+ }
+
+ private static IList<Document> CreateParentDocWithChildren(int segmentNumber, int parentNumber)
+ {
+ IList<Document> result = new List<Document>(AMOUNT_OF_CHILD_DOCS + 1);
+ for (int i = 0; i < AMOUNT_OF_CHILD_DOCS; i++)
+ {
+ result.Add(CreateChildDoc(segmentNumber, parentNumber, i));
+ }
+ result.Add(CreateParentDoc(segmentNumber, parentNumber));
+ return result;
+ }
+
+ private static Document CreateParentDoc(int segmentNumber, int parentNumber)
+ {
+ Document result = new Document();
+ result.Add(NewStringField("id", CreateFieldValue(segmentNumber * AMOUNT_OF_PARENT_DOCS + parentNumber), Field.Store.YES));
+ result.Add(NewStringField("parent", CreateFieldValue(parentNumber), Field.Store.NO));
+ return result;
+ }
+
+ private static Document CreateChildDoc(int segmentNumber, int parentNumber, int childNumber)
+ {
+ Document result = new Document();
+ result.Add(NewStringField("id", CreateFieldValue(segmentNumber * AMOUNT_OF_PARENT_DOCS + parentNumber, childNumber), Field.Store.YES));
+ result.Add(NewStringField("child", CreateFieldValue(childNumber), Field.Store.NO));
+ return result;
+ }
+
+ private static string CreateFieldValue(params int[] documentNumbers)
+ {
+ StringBuilder stringBuilder = new StringBuilder();
+ foreach (int documentNumber in documentNumbers)
+ {
+ if (stringBuilder.Length > 0)
+ {
+ stringBuilder.Append("_");
+ }
+ stringBuilder.Append(documentNumber);
+ }
+ return stringBuilder.ToString();
+ }
+
+ private static Query CreateChildrenQueryWithOneParent(int childNumber)
+ {
+ TermQuery childQuery = new TermQuery(new Term("child", CreateFieldValue(childNumber)));
+ Query randomParentQuery = new TermQuery(new Term("id", CreateFieldValue(RandomParentId)));
+ BooleanQuery childrenQueryWithRandomParent = new BooleanQuery();
+ childrenQueryWithRandomParent.Add(new BooleanClause(childQuery, BooleanClause.Occur.SHOULD));
+ childrenQueryWithRandomParent.Add(new BooleanClause(randomParentQuery, BooleanClause.Occur.SHOULD));
+ return childrenQueryWithRandomParent;
+ }
+
+ private static Query CreateParentsQueryWithOneChild(int randomChildNumber)
+ {
+ BooleanQuery childQueryWithRandomParent = new BooleanQuery();
+ Query parentsQuery = new TermQuery(new Term("parent", CreateFieldValue(RandomParentNumber)));
+ childQueryWithRandomParent.Add(new BooleanClause(parentsQuery, BooleanClause.Occur.SHOULD));
+ childQueryWithRandomParent.Add(new BooleanClause(RandomChildQuery(randomChildNumber), BooleanClause.Occur.SHOULD));
+ return childQueryWithRandomParent;
+ }
+
+ private static int RandomParentId
+ {
+ get { return Random().Next(AMOUNT_OF_PARENT_DOCS*AMOUNT_OF_SEGMENTS); }
+ }
+
+ private static int RandomParentNumber
+ {
+ get { return Random().Next(AMOUNT_OF_PARENT_DOCS); }
+ }
+
+ private static Query RandomChildQuery(int randomChildNumber)
+ {
+ return new TermQuery(new Term("id", CreateFieldValue(RandomParentId, randomChildNumber)));
+ }
+
+ private static int GetRandomChildNumber(int notLessThan)
+ {
+ return notLessThan + Random().Next(AMOUNT_OF_CHILD_DOCS - notLessThan);
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/src/Lucene.Net.Tests.Join/TestJoinUtil.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Join/TestJoinUtil.cs b/src/Lucene.Net.Tests.Join/TestJoinUtil.cs
new file mode 100644
index 0000000..81513c7
--- /dev/null
+++ b/src/Lucene.Net.Tests.Join/TestJoinUtil.cs
@@ -0,0 +1,1165 @@
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using Lucene.Net.Analysis;
+using Lucene.Net.Documents;
+using Lucene.Net.Index;
+using Lucene.Net.Join;
+using Lucene.Net.Randomized.Generators;
+using Lucene.Net.Search;
+using Lucene.Net.Store;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using NUnit.Framework;
+
+namespace Lucene.Net.Tests.Join
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ public class TestJoinUtil : LuceneTestCase
+ {
+ [Test]
+ public void TestSimple()
+ {
+ const string idField = "id";
+ const string toField = "productId";
+
+ Directory dir = NewDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(Random(), dir,
+ NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))
+ .SetMergePolicy(NewLogMergePolicy()));
+
+ // 0
+ Document doc = new Document();
+ doc.Add(new TextField("description", "random text", Field.Store.NO));
+ doc.Add(new TextField("name", "name1", Field.Store.NO));
+ doc.Add(new TextField(idField, "1", Field.Store.NO));
+ w.AddDocument(doc);
+
+ // 1
+ doc = new Document();
+ doc.Add(new TextField("price", "10.0", Field.Store.NO));
+ doc.Add(new TextField(idField, "2", Field.Store.NO));
+ doc.Add(new TextField(toField, "1", Field.Store.NO));
+ w.AddDocument(doc);
+
+ // 2
+ doc = new Document();
+ doc.Add(new TextField("price", "20.0", Field.Store.NO));
+ doc.Add(new TextField(idField, "3", Field.Store.NO));
+ doc.Add(new TextField(toField, "1", Field.Store.NO));
+ w.AddDocument(doc);
+
+ // 3
+ doc = new Document();
+ doc.Add(new TextField("description", "more random text", Field.Store.NO));
+ doc.Add(new TextField("name", "name2", Field.Store.NO));
+ doc.Add(new TextField(idField, "4", Field.Store.NO));
+ w.AddDocument(doc);
+ w.Commit();
+
+ // 4
+ doc = new Document();
+ doc.Add(new TextField("price", "10.0", Field.Store.NO));
+ doc.Add(new TextField(idField, "5", Field.Store.NO));
+ doc.Add(new TextField(toField, "4", Field.Store.NO));
+ w.AddDocument(doc);
+
+ // 5
+ doc = new Document();
+ doc.Add(new TextField("price", "20.0", Field.Store.NO));
+ doc.Add(new TextField(idField, "6", Field.Store.NO));
+ doc.Add(new TextField(toField, "4", Field.Store.NO));
+ w.AddDocument(doc);
+
+ IndexSearcher indexSearcher = new IndexSearcher(w.Reader);
+ w.Dispose();
+
+ // Search for product
+ Query joinQuery = JoinUtil.CreateJoinQuery(idField, false, toField, new TermQuery(new Term("name", "name2")),
+ indexSearcher, ScoreMode.None);
+
+ TopDocs result = indexSearcher.Search(joinQuery, 10);
+ assertEquals(2, result.TotalHits);
+ assertEquals(4, result.ScoreDocs[0].Doc);
+ assertEquals(5, result.ScoreDocs[1].Doc);
+
+ joinQuery = JoinUtil.CreateJoinQuery(idField, false, toField, new TermQuery(new Term("name", "name1")),
+ indexSearcher, ScoreMode.None);
+ result = indexSearcher.Search(joinQuery, 10);
+ assertEquals(2, result.TotalHits);
+ assertEquals(1, result.ScoreDocs[0].Doc);
+ assertEquals(2, result.ScoreDocs[1].Doc);
+
+ // Search for offer
+ joinQuery = JoinUtil.CreateJoinQuery(toField, false, idField, new TermQuery(new Term("id", "5")),
+ indexSearcher, ScoreMode.None);
+ result = indexSearcher.Search(joinQuery, 10);
+ assertEquals(1, result.TotalHits);
+ assertEquals(3, result.ScoreDocs[0].Doc);
+
+ indexSearcher.IndexReader.Dispose();
+ dir.Dispose();
+ }
+
+ // TermsWithScoreCollector.MV.Avg forgets to grow beyond TermsWithScoreCollector.INITIAL_ARRAY_SIZE
+ [Test]
+ public void TestOverflowTermsWithScoreCollector()
+ {
+ Test300spartans(true, ScoreMode.Avg);
+ }
+
+ [Test]
+ public void TestOverflowTermsWithScoreCollectorRandom()
+ {
+ var scoreModeLength = Enum.GetNames(typeof(ScoreMode)).Length;
+ Test300spartans(Random().NextBoolean(), (ScoreMode) Random().Next(scoreModeLength));
+ }
+
+ protected virtual void Test300spartans(bool multipleValues, ScoreMode scoreMode)
+ {
+ const string idField = "id";
+ const string toField = "productId";
+
+ Directory dir = NewDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(Random(), dir,
+ NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))
+ .SetMergePolicy(NewLogMergePolicy()));
+
+ // 0
+ Document doc = new Document();
+ doc.Add(new TextField("description", "random text", Field.Store.NO));
+ doc.Add(new TextField("name", "name1", Field.Store.NO));
+ doc.Add(new TextField(idField, "0", Field.Store.NO));
+ w.AddDocument(doc);
+
+ doc = new Document();
+ doc.Add(new TextField("price", "10.0", Field.Store.NO));
+ for (int i = 0; i < 300; i++)
+ {
+ doc.Add(new TextField(toField, "" + i, Field.Store.NO));
+ if (!multipleValues)
+ {
+ w.AddDocument(doc);
+ doc.RemoveFields(toField);
+ }
+ }
+ w.AddDocument(doc);
+
+ IndexSearcher indexSearcher = new IndexSearcher(w.Reader);
+ w.Dispose();
+
+ // Search for product
+ Query joinQuery = JoinUtil.CreateJoinQuery(toField, multipleValues, idField,
+ new TermQuery(new Term("price", "10.0")), indexSearcher, scoreMode);
+
+ TopDocs result = indexSearcher.Search(joinQuery, 10);
+ assertEquals(1, result.TotalHits);
+ assertEquals(0, result.ScoreDocs[0].Doc);
+
+
+ indexSearcher.IndexReader.Dispose();
+ dir.Dispose();
+ }
+
+ /// <summary>
+ /// LUCENE-5487: verify a join query inside a SHOULD BQ
+ /// will still use the join query's optimized BulkScorers
+ /// </summary>
+ [Test]
+ public void TestInsideBooleanQuery()
+ {
+ const string idField = "id";
+ const string toField = "productId";
+
+ Directory dir = NewDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(Random(), dir,
+ NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))
+ .SetMergePolicy(NewLogMergePolicy()));
+
+ // 0
+ Document doc = new Document();
+ doc.Add(new TextField("description", "random text", Field.Store.NO));
+ doc.Add(new TextField("name", "name1", Field.Store.NO));
+ doc.Add(new TextField(idField, "7", Field.Store.NO));
+ w.AddDocument(doc);
+
+ // 1
+ doc = new Document();
+ doc.Add(new TextField("price", "10.0", Field.Store.NO));
+ doc.Add(new TextField(idField, "2", Field.Store.NO));
+ doc.Add(new TextField(toField, "7", Field.Store.NO));
+ w.AddDocument(doc);
+
+ // 2
+ doc = new Document();
+ doc.Add(new TextField("price", "20.0", Field.Store.NO));
+ doc.Add(new TextField(idField, "3", Field.Store.NO));
+ doc.Add(new TextField(toField, "7", Field.Store.NO));
+ w.AddDocument(doc);
+
+ // 3
+ doc = new Document();
+ doc.Add(new TextField("description", "more random text", Field.Store.NO));
+ doc.Add(new TextField("name", "name2", Field.Store.NO));
+ doc.Add(new TextField(idField, "0", Field.Store.NO));
+ w.AddDocument(doc);
+ w.Commit();
+
+ // 4
+ doc = new Document();
+ doc.Add(new TextField("price", "10.0", Field.Store.NO));
+ doc.Add(new TextField(idField, "5", Field.Store.NO));
+ doc.Add(new TextField(toField, "0", Field.Store.NO));
+ w.AddDocument(doc);
+
+ // 5
+ doc = new Document();
+ doc.Add(new TextField("price", "20.0", Field.Store.NO));
+ doc.Add(new TextField(idField, "6", Field.Store.NO));
+ doc.Add(new TextField(toField, "0", Field.Store.NO));
+ w.AddDocument(doc);
+
+ w.ForceMerge(1);
+
+ IndexSearcher indexSearcher = new IndexSearcher(w.Reader);
+ w.Dispose();
+
+ // Search for product
+ Query joinQuery = JoinUtil.CreateJoinQuery(idField, false, toField,
+ new TermQuery(new Term("description", "random")), indexSearcher, ScoreMode.Avg);
+
+ BooleanQuery bq = new BooleanQuery();
+ bq.Add(joinQuery, BooleanClause.Occur.SHOULD);
+ bq.Add(new TermQuery(new Term("id", "3")), BooleanClause.Occur.SHOULD);
+
+ indexSearcher.Search(bq, new CollectorAnonymousInnerClassHelper(this));
+
+ indexSearcher.IndexReader.Dispose();
+ dir.Dispose();
+ }
+
+ private class CollectorAnonymousInnerClassHelper : Collector
+ {
+ private readonly TestJoinUtil OuterInstance;
+
+ public CollectorAnonymousInnerClassHelper(TestJoinUtil outerInstance)
+ {
+ OuterInstance = outerInstance;
+ }
+
+ internal bool sawFive;
+
+ public override AtomicReaderContext NextReader
+ {
+ set { }
+ }
+
+ public override void Collect(int docID)
+ {
+ // Hairy / evil (depends on how BooleanScorer
+ // stores temporarily collected docIDs by
+ // appending to head of linked list):
+ if (docID == 5)
+ {
+ sawFive = true;
+ }
+ else if (docID == 1)
+ {
+ assertFalse("optimized bulkScorer was not used for join query embedded in boolean query!", sawFive);
+ }
+ }
+
+ public override Scorer Scorer
+ {
+ set { }
+ }
+
+ public override bool AcceptsDocsOutOfOrder()
+ {
+ return true;
+ }
+ }
+
+ [Test]
+ public void TestSimpleWithScoring()
+ {
+ const string idField = "id";
+ const string toField = "movieId";
+
+ Directory dir = NewDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(Random(), dir,
+ NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))
+ .SetMergePolicy(NewLogMergePolicy()));
+
+ // 0
+ Document doc = new Document();
+ doc.Add(new TextField("description", "A random movie", Field.Store.NO));
+ doc.Add(new TextField("name", "Movie 1", Field.Store.NO));
+ doc.Add(new TextField(idField, "1", Field.Store.NO));
+ w.AddDocument(doc);
+
+ // 1
+ doc = new Document();
+ doc.Add(new TextField("subtitle", "The first subtitle of this movie", Field.Store.NO));
+ doc.Add(new TextField(idField, "2", Field.Store.NO));
+ doc.Add(new TextField(toField, "1", Field.Store.NO));
+ w.AddDocument(doc);
+
+ // 2
+ doc = new Document();
+ doc.Add(new TextField("subtitle", "random subtitle; random event movie", Field.Store.NO));
+ doc.Add(new TextField(idField, "3", Field.Store.NO));
+ doc.Add(new TextField(toField, "1", Field.Store.NO));
+ w.AddDocument(doc);
+
+ // 3
+ doc = new Document();
+ doc.Add(new TextField("description", "A second random movie", Field.Store.NO));
+ doc.Add(new TextField("name", "Movie 2", Field.Store.NO));
+ doc.Add(new TextField(idField, "4", Field.Store.NO));
+ w.AddDocument(doc);
+ w.Commit();
+
+ // 4
+ doc = new Document();
+ doc.Add(new TextField("subtitle", "a very random event happened during christmas night", Field.Store.NO));
+ doc.Add(new TextField(idField, "5", Field.Store.NO));
+ doc.Add(new TextField(toField, "4", Field.Store.NO));
+ w.AddDocument(doc);
+
+ // 5
+ doc = new Document();
+ doc.Add(new TextField("subtitle", "movie end movie test 123 test 123 random", Field.Store.NO));
+ doc.Add(new TextField(idField, "6", Field.Store.NO));
+ doc.Add(new TextField(toField, "4", Field.Store.NO));
+ w.AddDocument(doc);
+
+ IndexSearcher indexSearcher = new IndexSearcher(w.Reader);
+ w.Dispose();
+
+ // Search for movie via subtitle
+ Query joinQuery = JoinUtil.CreateJoinQuery(toField, false, idField,
+ new TermQuery(new Term("subtitle", "random")), indexSearcher, ScoreMode.Max);
+ TopDocs result = indexSearcher.Search(joinQuery, 10);
+ assertEquals(2, result.TotalHits);
+ assertEquals(0, result.ScoreDocs[0].Doc);
+ assertEquals(3, result.ScoreDocs[1].Doc);
+
+ // Score mode max.
+ joinQuery = JoinUtil.CreateJoinQuery(toField, false, idField, new TermQuery(new Term("subtitle", "movie")),
+ indexSearcher, ScoreMode.Max);
+ result = indexSearcher.Search(joinQuery, 10);
+ assertEquals(2, result.TotalHits);
+ assertEquals(3, result.ScoreDocs[0].Doc);
+ assertEquals(0, result.ScoreDocs[1].Doc);
+
+ // Score mode total
+ joinQuery = JoinUtil.CreateJoinQuery(toField, false, idField, new TermQuery(new Term("subtitle", "movie")),
+ indexSearcher, ScoreMode.Total);
+ result = indexSearcher.Search(joinQuery, 10);
+ assertEquals(2, result.TotalHits);
+ assertEquals(0, result.ScoreDocs[0].Doc);
+ assertEquals(3, result.ScoreDocs[1].Doc);
+
+ //Score mode avg
+ joinQuery = JoinUtil.CreateJoinQuery(toField, false, idField, new TermQuery(new Term("subtitle", "movie")),
+ indexSearcher, ScoreMode.Avg);
+ result = indexSearcher.Search(joinQuery, 10);
+ assertEquals(2, result.TotalHits);
+ assertEquals(3, result.ScoreDocs[0].Doc);
+ assertEquals(0, result.ScoreDocs[1].Doc);
+
+ indexSearcher.IndexReader.Dispose();
+ dir.Dispose();
+ }
+
+ [Test]
+ public void TestSingleValueRandomJoin()
+ {
+ int maxIndexIter = TestUtil.NextInt(Random(), 6, 12);
+ int maxSearchIter = TestUtil.NextInt(Random(), 13, 26);
+ ExecuteRandomJoin(false, maxIndexIter, maxSearchIter, TestUtil.NextInt(Random(), 87, 764));
+ }
+
+ [Test]
+ public void TestMultiValueRandomJoin()
+ // this test really takes more time, that is why the number of iterations are smaller.
+ {
+ int maxIndexIter = TestUtil.NextInt(Random(), 3, 6);
+ int maxSearchIter = TestUtil.NextInt(Random(), 6, 12);
+ ExecuteRandomJoin(true, maxIndexIter, maxSearchIter, TestUtil.NextInt(Random(), 11, 57));
+ }
+
+ private void ExecuteRandomJoin(bool multipleValuesPerDocument, int maxIndexIter, int maxSearchIter,
+ int numberOfDocumentsToIndex)
+ {
+ for (int indexIter = 1; indexIter <= maxIndexIter; indexIter++)
+ {
+ if (VERBOSE)
+ {
+ Console.WriteLine("indexIter=" + indexIter);
+ }
+ Directory dir = NewDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(Random(), dir,
+ NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.KEYWORD, false))
+ .SetMergePolicy(NewLogMergePolicy()));
+ bool scoreDocsInOrder = TestJoinUtil.Random().NextBoolean();
+ IndexIterationContext context = CreateContext(numberOfDocumentsToIndex, w, multipleValuesPerDocument,
+ scoreDocsInOrder);
+
+ IndexReader topLevelReader = w.Reader;
+ w.Dispose();
+ for (int searchIter = 1; searchIter <= maxSearchIter; searchIter++)
+ {
+ if (VERBOSE)
+ {
+ Console.WriteLine("searchIter=" + searchIter);
+ }
+ IndexSearcher indexSearcher = NewSearcher(topLevelReader);
+
+ int r = Random().Next(context.RandomUniqueValues.Length);
+ bool from = context.RandomFrom[r];
+ string randomValue = context.RandomUniqueValues[r];
+ FixedBitSet expectedResult = CreateExpectedResult(randomValue, from, indexSearcher.IndexReader,
+ context);
+
+ Query actualQuery = new TermQuery(new Term("value", randomValue));
+ if (VERBOSE)
+ {
+ Console.WriteLine("actualQuery=" + actualQuery);
+ }
+
+ var scoreModeLength = Enum.GetNames(typeof(ScoreMode)).Length;
+ ScoreMode scoreMode = (ScoreMode) Random().Next(scoreModeLength);
+ if (VERBOSE)
+ {
+ Console.WriteLine("scoreMode=" + scoreMode);
+ }
+
+ Query joinQuery;
+ if (from)
+ {
+ joinQuery = JoinUtil.CreateJoinQuery("from", multipleValuesPerDocument, "to", actualQuery,
+ indexSearcher, scoreMode);
+ }
+ else
+ {
+ joinQuery = JoinUtil.CreateJoinQuery("to", multipleValuesPerDocument, "from", actualQuery,
+ indexSearcher, scoreMode);
+ }
+ if (VERBOSE)
+ {
+ Console.WriteLine("joinQuery=" + joinQuery);
+ }
+
+ // Need to know all documents that have matches. TopDocs doesn't give me that and then I'd be also testing TopDocsCollector...
+ FixedBitSet actualResult = new FixedBitSet(indexSearcher.IndexReader.MaxDoc);
+ TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.Create(10, false);
+ indexSearcher.Search(joinQuery,
+ new CollectorAnonymousInnerClassHelper2(this, scoreDocsInOrder, context, actualResult,
+ topScoreDocCollector));
+ // Asserting bit set...
+ if (VERBOSE)
+ {
+ Console.WriteLine("expected cardinality:" + expectedResult.Cardinality());
+ DocIdSetIterator iterator = expectedResult.GetIterator();
+ for (int doc = iterator.NextDoc();
+ doc != DocIdSetIterator.NO_MORE_DOCS;
+ doc = iterator.NextDoc())
+ {
+ Console.WriteLine(string.Format("Expected doc[{0}] with id value {1}", doc, indexSearcher.Doc(doc).Get("id")));
+ }
+ Console.WriteLine("actual cardinality:" + actualResult.Cardinality());
+ iterator = actualResult.GetIterator();
+ for (int doc = iterator.NextDoc();
+ doc != DocIdSetIterator.NO_MORE_DOCS;
+ doc = iterator.NextDoc())
+ {
+ Console.WriteLine(string.Format("Actual doc[{0}] with id value {1}", doc, indexSearcher.Doc(doc).Get("id")));
+ }
+ }
+ assertEquals(expectedResult, actualResult);
+
+ // Asserting TopDocs...
+ TopDocs expectedTopDocs = CreateExpectedTopDocs(randomValue, from, scoreMode, context);
+ TopDocs actualTopDocs = topScoreDocCollector.TopDocs();
+ assertEquals(expectedTopDocs.TotalHits, actualTopDocs.TotalHits);
+ assertEquals(expectedTopDocs.ScoreDocs.Length, actualTopDocs.ScoreDocs.Length);
+ if (scoreMode == ScoreMode.None)
+ {
+ continue;
+ }
+
+ assertEquals(expectedTopDocs.MaxScore, actualTopDocs.MaxScore, 0.0f);
+ for (int i = 0; i < expectedTopDocs.ScoreDocs.Length; i++)
+ {
+ if (VERBOSE)
+ {
+ string.Format("Expected doc: {0} | Actual doc: {1}\n", expectedTopDocs.ScoreDocs[i].Doc, actualTopDocs.ScoreDocs[i].Doc);
+ string.Format("Expected score: {0} | Actual score: {1}\n", expectedTopDocs.ScoreDocs[i].Score, actualTopDocs.ScoreDocs[i].Score);
+ }
+ assertEquals(expectedTopDocs.ScoreDocs[i].Doc, actualTopDocs.ScoreDocs[i].Doc);
+ assertEquals(expectedTopDocs.ScoreDocs[i].Score, actualTopDocs.ScoreDocs[i].Score, 0.0f);
+ Explanation explanation = indexSearcher.Explain(joinQuery, expectedTopDocs.ScoreDocs[i].Doc);
+ assertEquals(expectedTopDocs.ScoreDocs[i].Score, explanation.Value, 0.0f);
+ }
+ }
+ topLevelReader.Dispose();
+ dir.Dispose();
+ }
+ }
+
+ private class CollectorAnonymousInnerClassHelper2 : Collector
+ {
+ private readonly TestJoinUtil OuterInstance;
+
+ private bool ScoreDocsInOrder;
+ private IndexIterationContext Context;
+ private FixedBitSet ActualResult;
+ private TopScoreDocCollector TopScoreDocCollector;
+
+ public CollectorAnonymousInnerClassHelper2(TestJoinUtil outerInstance, bool scoreDocsInOrder,
+ IndexIterationContext context, FixedBitSet actualResult,
+ TopScoreDocCollector topScoreDocCollector)
+ {
+ OuterInstance = outerInstance;
+ ScoreDocsInOrder = scoreDocsInOrder;
+ Context = context;
+ ActualResult = actualResult;
+ TopScoreDocCollector = topScoreDocCollector;
+ }
+
+
+ private int _docBase;
+
+ public override void Collect(int doc)
+ {
+ ActualResult.Set(doc + _docBase);
+ TopScoreDocCollector.Collect(doc);
+ }
+
+ public override AtomicReaderContext NextReader
+ {
+ set
+ {
+ _docBase = value.DocBase;
+ TopScoreDocCollector.NextReader = value;
+ }
+ }
+
+ public override Scorer Scorer
+ {
+ set { TopScoreDocCollector.Scorer = value; }
+ }
+
+ public override bool AcceptsDocsOutOfOrder()
+ {
+ return ScoreDocsInOrder;
+ }
+ }
+
+ private IndexIterationContext CreateContext(int nDocs, RandomIndexWriter writer, bool multipleValuesPerDocument,
+ bool scoreDocsInOrder)
+ {
+ return CreateContext(nDocs, writer, writer, multipleValuesPerDocument, scoreDocsInOrder);
+ }
+
+ private IndexIterationContext CreateContext(int nDocs, RandomIndexWriter fromWriter, RandomIndexWriter toWriter,
+ bool multipleValuesPerDocument, bool scoreDocsInOrder)
+ {
+ IndexIterationContext context = new IndexIterationContext();
+ int numRandomValues = nDocs/2;
+ context.RandomUniqueValues = new string[numRandomValues];
+ ISet<string> trackSet = new HashSet<string>();
+ context.RandomFrom = new bool[numRandomValues];
+ for (int i = 0; i < numRandomValues; i++)
+ {
+ string uniqueRandomValue;
+ do
+ {
+ uniqueRandomValue = TestUtil.RandomRealisticUnicodeString(Random());
+ // uniqueRandomValue = TestUtil.randomSimpleString(random);
+ } while ("".Equals(uniqueRandomValue) || trackSet.Contains(uniqueRandomValue));
+ // Generate unique values and empty strings aren't allowed.
+ trackSet.Add(uniqueRandomValue);
+ context.RandomFrom[i] = Random().NextBoolean();
+ context.RandomUniqueValues[i] = uniqueRandomValue;
+ }
+
+ RandomDoc[] docs = new RandomDoc[nDocs];
+ for (int i = 0; i < nDocs; i++)
+ {
+ string id = Convert.ToString(i);
+ int randomI = Random().Next(context.RandomUniqueValues.Length);
+ string value = context.RandomUniqueValues[randomI];
+ Document document = new Document();
+ document.Add(NewTextField(Random(), "id", id, Field.Store.NO));
+ document.Add(NewTextField(Random(), "value", value, Field.Store.NO));
+
+ bool from = context.RandomFrom[randomI];
+ int numberOfLinkValues = multipleValuesPerDocument ? 2 + Random().Next(10) : 1;
+ docs[i] = new RandomDoc(id, numberOfLinkValues, value, from);
+ for (int j = 0; j < numberOfLinkValues; j++)
+ {
+ string linkValue = context.RandomUniqueValues[Random().Next(context.RandomUniqueValues.Length)];
+ docs[i].LinkValues.Add(linkValue);
+ if (from)
+ {
+ if (!context.FromDocuments.ContainsKey(linkValue))
+ {
+ context.FromDocuments[linkValue] = new List<RandomDoc>();
+ }
+ if (!context.RandomValueFromDocs.ContainsKey(value))
+ {
+ context.RandomValueFromDocs[value] = new List<RandomDoc>();
+ }
+
+ context.FromDocuments[linkValue].Add(docs[i]);
+ context.RandomValueFromDocs[value].Add(docs[i]);
+ document.Add(NewTextField(Random(), "from", linkValue, Field.Store.NO));
+ }
+ else
+ {
+ if (!context.ToDocuments.ContainsKey(linkValue))
+ {
+ context.ToDocuments[linkValue] = new List<RandomDoc>();
+ }
+ if (!context.RandomValueToDocs.ContainsKey(value))
+ {
+ context.RandomValueToDocs[value] = new List<RandomDoc>();
+ }
+
+ context.ToDocuments[linkValue].Add(docs[i]);
+ context.RandomValueToDocs[value].Add(docs[i]);
+ document.Add(NewTextField(Random(), "to", linkValue, Field.Store.NO));
+ }
+ }
+
+ RandomIndexWriter w;
+ if (from)
+ {
+ w = fromWriter;
+ }
+ else
+ {
+ w = toWriter;
+ }
+
+ w.AddDocument(document);
+ if (Random().Next(10) == 4)
+ {
+ w.Commit();
+ }
+ if (VERBOSE)
+ {
+ Console.WriteLine("Added document[" + docs[i].Id + "]: " + document);
+ }
+ }
+
+ // Pre-compute all possible hits for all unique random values. On top of this also compute all possible score for
+ // any ScoreMode.
+ IndexSearcher fromSearcher = NewSearcher(fromWriter.Reader);
+ IndexSearcher toSearcher = NewSearcher(toWriter.Reader);
+ for (int i = 0; i < context.RandomUniqueValues.Length; i++)
+ {
+ string uniqueRandomValue = context.RandomUniqueValues[i];
+ string fromField;
+ string toField;
+ IDictionary<string, IDictionary<int, JoinScore>> queryVals;
+ if (context.RandomFrom[i])
+ {
+ fromField = "from";
+ toField = "to";
+ queryVals = context.FromHitsToJoinScore;
+ }
+ else
+ {
+ fromField = "to";
+ toField = "from";
+ queryVals = context.ToHitsToJoinScore;
+ }
+ IDictionary<BytesRef, JoinScore> joinValueToJoinScores = new Dictionary<BytesRef, JoinScore>();
+ if (multipleValuesPerDocument)
+ {
+ fromSearcher.Search(new TermQuery(new Term("value", uniqueRandomValue)),
+ new CollectorAnonymousInnerClassHelper3(this, context, fromField, joinValueToJoinScores));
+ }
+ else
+ {
+ fromSearcher.Search(new TermQuery(new Term("value", uniqueRandomValue)),
+ new CollectorAnonymousInnerClassHelper4(this, context, fromField, joinValueToJoinScores));
+ }
+
+ IDictionary<int, JoinScore> docToJoinScore = new Dictionary<int, JoinScore>();
+ if (multipleValuesPerDocument)
+ {
+ if (scoreDocsInOrder)
+ {
+ AtomicReader slowCompositeReader = SlowCompositeReaderWrapper.Wrap(toSearcher.IndexReader);
+ Terms terms = slowCompositeReader.Terms(toField);
+ if (terms != null)
+ {
+ DocsEnum docsEnum = null;
+ TermsEnum termsEnum = null;
+ SortedSet<BytesRef> joinValues =
+ new SortedSet<BytesRef>(BytesRef.UTF8SortedAsUnicodeComparer);
+ joinValues.AddAll(joinValueToJoinScores.Keys);
+ foreach (BytesRef joinValue in joinValues)
+ {
+ termsEnum = terms.Iterator(termsEnum);
+ if (termsEnum.SeekExact(joinValue))
+ {
+ docsEnum = termsEnum.Docs(slowCompositeReader.LiveDocs, docsEnum, DocsEnum.FLAG_NONE);
+ JoinScore joinScore = joinValueToJoinScores[joinValue];
+
+ for (int doc = docsEnum.NextDoc();
+ doc != DocIdSetIterator.NO_MORE_DOCS;
+ doc = docsEnum.NextDoc())
+ {
+ // First encountered join value determines the score.
+ // Something to keep in mind for many-to-many relations.
+ if (!docToJoinScore.ContainsKey(doc))
+ {
+ docToJoinScore[doc] = joinScore;
+ }
+ }
+ }
+ }
+ }
+ }
+ else
+ {
+ toSearcher.Search(new MatchAllDocsQuery(),
+ new CollectorAnonymousInnerClassHelper5(this, context, toField, joinValueToJoinScores,
+ docToJoinScore));
+ }
+ }
+ else
+ {
+ toSearcher.Search(new MatchAllDocsQuery(),
+ new CollectorAnonymousInnerClassHelper6(this, context, toField, joinValueToJoinScores,
+ docToJoinScore));
+ }
+ queryVals[uniqueRandomValue] = docToJoinScore;
+ }
+
+ fromSearcher.IndexReader.Dispose();
+ toSearcher.IndexReader.Dispose();
+
+ return context;
+ }
+
+ private class CollectorAnonymousInnerClassHelper3 : Collector
+ {
+ private readonly TestJoinUtil OuterInstance;
+
+ private IndexIterationContext Context;
+ private string FromField;
+ private IDictionary<BytesRef, JoinScore> JoinValueToJoinScores;
+
+ public CollectorAnonymousInnerClassHelper3(TestJoinUtil outerInstance,
+ IndexIterationContext context, string fromField,
+ IDictionary<BytesRef, JoinScore> joinValueToJoinScores)
+ {
+ OuterInstance = outerInstance;
+ Context = context;
+ FromField = fromField;
+ JoinValueToJoinScores = joinValueToJoinScores;
+ joinValue = new BytesRef();
+ }
+
+
+ private Scorer scorer;
+ private SortedSetDocValues docTermOrds;
+ internal readonly BytesRef joinValue;
+
+ public override void Collect(int doc)
+ {
+ docTermOrds.Document = doc;
+ long ord;
+ while ((ord = docTermOrds.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
+ {
+ docTermOrds.LookupOrd(ord, joinValue);
+ var joinScore = JoinValueToJoinScores[joinValue];
+ if (joinScore == null)
+ {
+ JoinValueToJoinScores[BytesRef.DeepCopyOf(joinValue)] = joinScore = new JoinScore();
+ }
+ joinScore.AddScore(scorer.Score());
+ }
+ }
+
+ public override AtomicReaderContext NextReader
+ {
+ set { docTermOrds = FieldCache.DEFAULT.GetDocTermOrds(value.AtomicReader, FromField); }
+ }
+
+ public override Scorer Scorer
+ {
+ set { scorer = value; }
+ }
+
+ public override bool AcceptsDocsOutOfOrder()
+ {
+ return false;
+ }
+ }
+
+ private class CollectorAnonymousInnerClassHelper4 : Collector
+ {
+ private readonly TestJoinUtil OuterInstance;
+
+ private IndexIterationContext Context;
+ private string FromField;
+ private IDictionary<BytesRef, JoinScore> JoinValueToJoinScores;
+
+ public CollectorAnonymousInnerClassHelper4(TestJoinUtil outerInstance,
+ IndexIterationContext context, string fromField,
+ IDictionary<BytesRef, JoinScore> joinValueToJoinScores)
+ {
+ OuterInstance = outerInstance;
+ Context = context;
+ FromField = fromField;
+ JoinValueToJoinScores = joinValueToJoinScores;
+ spare = new BytesRef();
+ }
+
+
+ private Scorer scorer;
+ private BinaryDocValues terms;
+ private Bits docsWithField;
+ private readonly BytesRef spare;
+
+ public override void Collect(int doc)
+ {
+ terms.Get(doc, spare);
+ BytesRef joinValue = spare;
+ if (joinValue.Length == 0 && !docsWithField.Get(doc))
+ {
+ return;
+ }
+
+ var joinScore = JoinValueToJoinScores[joinValue];
+ if (joinScore == null)
+ {
+ JoinValueToJoinScores[BytesRef.DeepCopyOf(joinValue)] = joinScore = new JoinScore();
+ }
+ joinScore.AddScore(scorer.Score());
+ }
+
+ public override AtomicReaderContext NextReader
+ {
+ set
+ {
+ terms = FieldCache.DEFAULT.GetTerms(value.AtomicReader, FromField, true);
+ docsWithField = FieldCache.DEFAULT.GetDocsWithField(value.AtomicReader, FromField);
+ }
+ }
+
+ public override Scorer Scorer
+ {
+ set { scorer = value; }
+ }
+
+ public override bool AcceptsDocsOutOfOrder()
+ {
+ return false;
+ }
+ }
+
+ private class CollectorAnonymousInnerClassHelper5 : Collector
+ {
+ private readonly TestJoinUtil OuterInstance;
+
+ private string _toField;
+ private readonly IDictionary<BytesRef, JoinScore> _joinValueToJoinScores;
+ private readonly IDictionary<int, JoinScore> _docToJoinScore;
+
+ private SortedSetDocValues docTermOrds;
+ private readonly BytesRef scratch;
+ private int docBase;
+
+ public CollectorAnonymousInnerClassHelper5(TestJoinUtil testJoinUtil, IndexIterationContext context,
+ string toField, IDictionary<BytesRef, JoinScore> joinValueToJoinScores,
+ IDictionary<int, JoinScore> docToJoinScore)
+ {
+ OuterInstance = testJoinUtil;
+ _toField = toField;
+ _joinValueToJoinScores = joinValueToJoinScores;
+ _docToJoinScore = docToJoinScore;
+ }
+
+ public override void Collect(int doc)
+ {
+ docTermOrds.Document = doc;
+ long ord;
+ while ((ord = docTermOrds.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
+ {
+ docTermOrds.LookupOrd(ord, scratch);
+ JoinScore joinScore = _joinValueToJoinScores[scratch];
+ if (joinScore == null)
+ {
+ continue;
+ }
+ int basedDoc = docBase + doc;
+ // First encountered join value determines the score.
+ // Something to keep in mind for many-to-many relations.
+ if (!_docToJoinScore.ContainsKey(basedDoc))
+ {
+ _docToJoinScore[basedDoc] = joinScore;
+ }
+ }
+ }
+
+ public override AtomicReaderContext NextReader
+ {
+ set
+ {
+ docBase = value.DocBase;
+ docTermOrds = FieldCache.DEFAULT.GetDocTermOrds(value.AtomicReader, _toField);
+ }
+ }
+
+ public override bool AcceptsDocsOutOfOrder()
+ {
+ return false;
+ }
+
+ public override Scorer Scorer
+ {
+ set { }
+ }
+ }
+
+ private class CollectorAnonymousInnerClassHelper6 : Collector
+ {
+ private readonly TestJoinUtil OuterInstance;
+
+ private IndexIterationContext Context;
+ private string ToField;
+ private IDictionary<BytesRef, JoinScore> JoinValueToJoinScores;
+ private IDictionary<int, JoinScore> DocToJoinScore;
+
+ private BinaryDocValues terms;
+ private int docBase;
+ private readonly BytesRef spare;
+
+ public CollectorAnonymousInnerClassHelper6(TestJoinUtil testJoinUtil,
+ IndexIterationContext context, string toField,
+ IDictionary<BytesRef, JoinScore> joinValueToJoinScores,
+ IDictionary<int, JoinScore> docToJoinScore)
+ {
+ OuterInstance = testJoinUtil;
+ ToField = toField;
+ JoinValueToJoinScores = joinValueToJoinScores;
+ DocToJoinScore = docToJoinScore;
+ }
+
+ public override void Collect(int doc)
+ {
+ terms.Get(doc, spare);
+ JoinScore joinScore = JoinValueToJoinScores[spare];
+ if (joinScore == null)
+ {
+ return;
+ }
+ DocToJoinScore[docBase + doc] = joinScore;
+ }
+
+ public override AtomicReaderContext NextReader
+ {
+ set
+ {
+ terms = FieldCache.DEFAULT.GetTerms(value.AtomicReader, ToField, false);
+ docBase = value.DocBase;
+ }
+ }
+
+ public override bool AcceptsDocsOutOfOrder()
+ {
+ return false;
+ }
+
+ public override Scorer Scorer
+ {
+ set { }
+ }
+ }
+
+ private TopDocs CreateExpectedTopDocs(string queryValue, bool from, ScoreMode scoreMode,
+ IndexIterationContext context)
+ {
+ var hitsToJoinScores = @from
+ ? context.FromHitsToJoinScore[queryValue]
+ : context.ToHitsToJoinScore[queryValue];
+
+ var hits = new List<KeyValuePair<int, JoinScore>>(hitsToJoinScores.EntrySet());
+ hits.Sort(new ComparatorAnonymousInnerClassHelper(this, scoreMode));
+ ScoreDoc[] scoreDocs = new ScoreDoc[Math.Min(10, hits.Count)];
+ for (int i = 0; i < scoreDocs.Length; i++)
+ {
+ KeyValuePair<int, JoinScore> hit = hits[i];
+ scoreDocs[i] = new ScoreDoc(hit.Key, hit.Value.Score(scoreMode));
+ }
+ return new TopDocs(hits.Count, scoreDocs, hits.Count == 0 ? float.NaN : hits[0].Value.Score(scoreMode));
+ }
+
+ private class ComparatorAnonymousInnerClassHelper : IComparer<KeyValuePair<int, JoinScore>>
+ {
+ private readonly TestJoinUtil OuterInstance;
+
+ private ScoreMode ScoreMode;
+
+ public ComparatorAnonymousInnerClassHelper(TestJoinUtil outerInstance, ScoreMode scoreMode)
+ {
+ OuterInstance = outerInstance;
+ ScoreMode = scoreMode;
+ }
+
+ public virtual int Compare(KeyValuePair<int, JoinScore> hit1, KeyValuePair<int, JoinScore> hit2)
+ {
+ float score1 = hit1.Value.Score(ScoreMode);
+ float score2 = hit2.Value.Score(ScoreMode);
+
+ int cmp = score2.CompareTo(score1);
+ if (cmp != 0)
+ {
+ return cmp;
+ }
+ return hit1.Key - hit2.Key;
+ }
+ }
+
+ private FixedBitSet CreateExpectedResult(string queryValue, bool from, IndexReader topLevelReader,
+ IndexIterationContext context)
+ {
+ IDictionary<string, IList<RandomDoc>> randomValueDocs;
+ IDictionary<string, IList<RandomDoc>> linkValueDocuments;
+ if (from)
+ {
+ randomValueDocs = context.RandomValueFromDocs;
+ linkValueDocuments = context.ToDocuments;
+ }
+ else
+ {
+ randomValueDocs = context.RandomValueToDocs;
+ linkValueDocuments = context.FromDocuments;
+ }
+
+ FixedBitSet expectedResult = new FixedBitSet(topLevelReader.MaxDoc);
+ IList<RandomDoc> matchingDocs = randomValueDocs[queryValue];
+ if (matchingDocs == null)
+ {
+ return new FixedBitSet(topLevelReader.MaxDoc);
+ }
+
+ foreach (RandomDoc matchingDoc in matchingDocs)
+ {
+ foreach (string linkValue in matchingDoc.LinkValues)
+ {
+ IList<RandomDoc> otherMatchingDocs = linkValueDocuments[linkValue];
+ if (otherMatchingDocs == null)
+ {
+ continue;
+ }
+
+ foreach (RandomDoc otherSideDoc in otherMatchingDocs)
+ {
+ DocsEnum docsEnum = MultiFields.GetTermDocsEnum(topLevelReader,
+ MultiFields.GetLiveDocs(topLevelReader), "id", new BytesRef(otherSideDoc.Id), 0);
+ Debug.Assert(docsEnum != null);
+ int doc = docsEnum.NextDoc();
+ expectedResult.Set(doc);
+ }
+ }
+ }
+ return expectedResult;
+ }
+
+ private class IndexIterationContext
+ {
+
+ internal string[] RandomUniqueValues;
+ internal bool[] RandomFrom;
+ internal IDictionary<string, IList<RandomDoc>> FromDocuments = new Dictionary<string, IList<RandomDoc>>();
+ internal IDictionary<string, IList<RandomDoc>> ToDocuments = new Dictionary<string, IList<RandomDoc>>();
+
+ internal IDictionary<string, IList<RandomDoc>> RandomValueFromDocs =
+ new Dictionary<string, IList<RandomDoc>>();
+
+ internal IDictionary<string, IList<RandomDoc>> RandomValueToDocs =
+ new Dictionary<string, IList<RandomDoc>>();
+
+ internal IDictionary<string, IDictionary<int, JoinScore>> FromHitsToJoinScore =
+ new Dictionary<string, IDictionary<int, JoinScore>>();
+
+ internal IDictionary<string, IDictionary<int, JoinScore>> ToHitsToJoinScore =
+ new Dictionary<string, IDictionary<int, JoinScore>>();
+ }
+
+ private class RandomDoc
+ {
+ internal readonly string Id;
+ internal readonly IList<string> LinkValues;
+ internal readonly string Value;
+ internal readonly bool From;
+
+ internal RandomDoc(string id, int numberOfLinkValues, string value, bool from)
+ {
+ Id = id;
+ From = from;
+ LinkValues = new List<string>(numberOfLinkValues);
+ Value = value;
+ }
+ }
+
+ private class JoinScore
+ {
+ internal float MaxScore;
+ internal float Total;
+ internal int Count;
+
+ internal virtual void AddScore(float score)
+ {
+ Total += score;
+ if (score > MaxScore)
+ {
+ MaxScore = score;
+ }
+ Count++;
+ }
+
+ internal virtual float Score(ScoreMode mode)
+ {
+ switch (mode)
+ {
+ case ScoreMode.None:
+ return 1.0f;
+ case ScoreMode.Total:
+ return Total;
+ case ScoreMode.Avg:
+ return Total/Count;
+ case ScoreMode.Max:
+ return MaxScore;
+ }
+ throw new ArgumentException("Unsupported ScoreMode: " + mode);
+ }
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/src/Lucene.Net.Tests.Join/packages.config
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Join/packages.config b/src/Lucene.Net.Tests.Join/packages.config
new file mode 100644
index 0000000..f0ed309
--- /dev/null
+++ b/src/Lucene.Net.Tests.Join/packages.config
@@ -0,0 +1,5 @@
+<?xml version="1.0" encoding="utf-8"?>
+<packages>
+ <package id="Apache.NMS" version="1.6.0.3083" targetFramework="net451" />
+ <package id="NUnit" version="2.6.3" targetFramework="net451" />
+</packages>
\ No newline at end of file
[07/17] lucenenet git commit: All ToChildBlockJoinQuery tests passing
Posted by sy...@apache.org.
All ToChildBlockJoinQuery tests passing
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/1213ca7c
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/1213ca7c
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/1213ca7c
Branch: refs/heads/master
Commit: 1213ca7c61e990ba008f087f00f167eb554d783c
Parents: dff959f
Author: Josh Sullivan <ja...@gmail.com>
Authored: Mon Aug 17 15:24:46 2015 -0400
Committer: Josh Sullivan <ja...@gmail.com>
Committed: Mon Aug 17 15:24:46 2015 -0400
----------------------------------------------------------------------
Lucene.Net.Join/ToChildBlockJoinQuery.cs | 2 +-
.../Lucene.Net.Tests.Join.csproj | 1 +
.../TestBlockJoinValidation.cs | 227 +++++++++++++++++++
3 files changed, 229 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1213ca7c/Lucene.Net.Join/ToChildBlockJoinQuery.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Join/ToChildBlockJoinQuery.cs b/Lucene.Net.Join/ToChildBlockJoinQuery.cs
index f16e2a9..3d4f2d5 100644
--- a/Lucene.Net.Join/ToChildBlockJoinQuery.cs
+++ b/Lucene.Net.Join/ToChildBlockJoinQuery.cs
@@ -38,7 +38,7 @@ namespace Lucene.Net.Join
/// Message thrown from <see cref="ToChildBlockJoinScorer.ValidateParentDoc"/>
/// on mis-use, when the parent query incorrectly returns child docs.
/// </summary>
- internal const string InvalidQueryMessage = "Parent query yields document which is not matched by parents filter, docID=";
+ public const string InvalidQueryMessage = "Parent query yields document which is not matched by parents filter, docID=";
private readonly Filter _parentsFilter;
private readonly Query _parentQuery;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1213ca7c/Lucene.Net.Tests.Join/Lucene.Net.Tests.Join.csproj
----------------------------------------------------------------------
diff --git a/Lucene.Net.Tests.Join/Lucene.Net.Tests.Join.csproj b/Lucene.Net.Tests.Join/Lucene.Net.Tests.Join.csproj
index eff35a9..9c959f8 100644
--- a/Lucene.Net.Tests.Join/Lucene.Net.Tests.Join.csproj
+++ b/Lucene.Net.Tests.Join/Lucene.Net.Tests.Join.csproj
@@ -51,6 +51,7 @@
<Compile Include="Properties\AssemblyInfo.cs" />
<Compile Include="TestBlockJoin.cs" />
<Compile Include="TestBlockJoinSorting.cs" />
+ <Compile Include="TestBlockJoinValidation.cs" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\Lucene.Net.Grouping\Lucene.Net.Grouping.csproj">
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1213ca7c/Lucene.Net.Tests.Join/TestBlockJoinValidation.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Tests.Join/TestBlockJoinValidation.cs b/Lucene.Net.Tests.Join/TestBlockJoinValidation.cs
new file mode 100644
index 0000000..5fdd35f
--- /dev/null
+++ b/Lucene.Net.Tests.Join/TestBlockJoinValidation.cs
@@ -0,0 +1,227 @@
+using System;
+using System.Collections.Generic;
+using System.Text;
+using Apache.NMS;
+using Lucene.Net.Analysis;
+using Lucene.Net.Documents;
+using Lucene.Net.Index;
+using Lucene.Net.Join;
+using Lucene.Net.Randomized.Generators;
+using Lucene.Net.Search;
+using Lucene.Net.Store;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using NUnit.Framework;
+
+namespace Lucene.Net.Tests.Join
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ public class TestBlockJoinValidation : LuceneTestCase
+ {
+
+ public const int AMOUNT_OF_SEGMENTS = 5;
+ public const int AMOUNT_OF_PARENT_DOCS = 10;
+ public const int AMOUNT_OF_CHILD_DOCS = 5;
+ public static readonly int AMOUNT_OF_DOCS_IN_SEGMENT = AMOUNT_OF_PARENT_DOCS + AMOUNT_OF_PARENT_DOCS * AMOUNT_OF_CHILD_DOCS;
+
+ private Directory Directory;
+ private IndexReader IndexReader;
+ private IndexSearcher IndexSearcher;
+ private Filter ParentsFilter;
+
+ [SetUp]
+ public override void SetUp()
+ {
+ Directory = NewDirectory();
+ IndexWriterConfig config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
+ IndexWriter indexWriter = new IndexWriter(Directory, config);
+ for (int i = 0; i < AMOUNT_OF_SEGMENTS; i++)
+ {
+ IList<Document> segmentDocs = CreateDocsForSegment(i);
+ indexWriter.AddDocuments(segmentDocs);
+ indexWriter.Commit();
+ }
+ IndexReader = DirectoryReader.Open(indexWriter, Random().NextBoolean());
+ indexWriter.Dispose();
+ IndexSearcher = new IndexSearcher(IndexReader);
+ ParentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new WildcardQuery(new Term("parent", "*"))));
+ }
+
+ [TearDown]
+ public override void TearDown()
+ {
+ IndexReader.Dispose();
+ Directory.Dispose();
+ }
+
+ [Test]
+ public void TestNextDocValidationForToParentBjq()
+ {
+ Query parentQueryWithRandomChild = CreateChildrenQueryWithOneParent(GetRandomChildNumber(0));
+ var blockJoinQuery = new ToParentBlockJoinQuery(parentQueryWithRandomChild, ParentsFilter, ScoreMode.None);
+
+ var ex = Throws<InvalidOperationException>(() => IndexSearcher.Search(blockJoinQuery, 1));
+ StringAssert.Contains("child query must only match non-parent docs", ex.Message);
+
+ }
+
+ [Test]
+ public void TestAdvanceValidationForToParentBjq()
+ {
+ int randomChildNumber = GetRandomChildNumber(0);
+ // we need to make advance method meet wrong document, so random child number
+ // in BJQ must be greater than child number in Boolean clause
+ int nextRandomChildNumber = GetRandomChildNumber(randomChildNumber);
+ Query parentQueryWithRandomChild = CreateChildrenQueryWithOneParent(nextRandomChildNumber);
+ ToParentBlockJoinQuery blockJoinQuery = new ToParentBlockJoinQuery(parentQueryWithRandomChild, ParentsFilter, ScoreMode.None);
+ // advance() method is used by ConjunctionScorer, so we need to create Boolean conjunction query
+ BooleanQuery conjunctionQuery = new BooleanQuery();
+ WildcardQuery childQuery = new WildcardQuery(new Term("child", CreateFieldValue(randomChildNumber)));
+ conjunctionQuery.Add(new BooleanClause(childQuery, BooleanClause.Occur.MUST));
+ conjunctionQuery.Add(new BooleanClause(blockJoinQuery, BooleanClause.Occur.MUST));
+
+ var ex = Throws<InvalidOperationException>(() => IndexSearcher.Search(conjunctionQuery, 1));
+ StringAssert.Contains("child query must only match non-parent docs", ex.Message);
+ }
+
+ [Test]
+ public void TestNextDocValidationForToChildBjq()
+ {
+ Query parentQueryWithRandomChild = CreateParentsQueryWithOneChild(GetRandomChildNumber(0));
+ var blockJoinQuery = new ToChildBlockJoinQuery(parentQueryWithRandomChild, ParentsFilter, false);
+
+ var ex = Throws<InvalidOperationException>(() => IndexSearcher.Search(blockJoinQuery, 1));
+ StringAssert.Contains(ToChildBlockJoinQuery.InvalidQueryMessage, ex.Message);
+ }
+
+ [Test]
+ public void TestAdvanceValidationForToChildBjq()
+ {
+ int randomChildNumber = GetRandomChildNumber(0);
+ // we need to make advance method meet wrong document, so random child number
+ // in BJQ must be greater than child number in Boolean clause
+ int nextRandomChildNumber = GetRandomChildNumber(randomChildNumber);
+ Query parentQueryWithRandomChild = CreateParentsQueryWithOneChild(nextRandomChildNumber);
+ var blockJoinQuery = new ToChildBlockJoinQuery(parentQueryWithRandomChild, ParentsFilter, false);
+ // advance() method is used by ConjunctionScorer, so we need to create Boolean conjunction query
+ var conjunctionQuery = new BooleanQuery();
+ var childQuery = new WildcardQuery(new Term("child", CreateFieldValue(randomChildNumber)));
+ conjunctionQuery.Add(new BooleanClause(childQuery, BooleanClause.Occur.MUST));
+ conjunctionQuery.Add(new BooleanClause(blockJoinQuery, BooleanClause.Occur.MUST));
+
+ var ex = Throws<InvalidOperationException>(() => IndexSearcher.Search(conjunctionQuery, 1));
+ StringAssert.Contains(ToChildBlockJoinQuery.InvalidQueryMessage, ex.Message);
+ }
+
+ private static IList<Document> CreateDocsForSegment(int segmentNumber)
+ {
+ IList<IList<Document>> blocks = new List<IList<Document>>(AMOUNT_OF_PARENT_DOCS);
+ for (int i = 0; i < AMOUNT_OF_PARENT_DOCS; i++)
+ {
+ blocks.Add(CreateParentDocWithChildren(segmentNumber, i));
+ }
+ IList<Document> result = new List<Document>(AMOUNT_OF_DOCS_IN_SEGMENT);
+ foreach (IList<Document> block in blocks)
+ {
+ result.AddRange(block);
+ }
+ return result;
+ }
+
+ private static IList<Document> CreateParentDocWithChildren(int segmentNumber, int parentNumber)
+ {
+ IList<Document> result = new List<Document>(AMOUNT_OF_CHILD_DOCS + 1);
+ for (int i = 0; i < AMOUNT_OF_CHILD_DOCS; i++)
+ {
+ result.Add(CreateChildDoc(segmentNumber, parentNumber, i));
+ }
+ result.Add(CreateParentDoc(segmentNumber, parentNumber));
+ return result;
+ }
+
+ private static Document CreateParentDoc(int segmentNumber, int parentNumber)
+ {
+ Document result = new Document();
+ result.Add(NewStringField("id", CreateFieldValue(segmentNumber * AMOUNT_OF_PARENT_DOCS + parentNumber), Field.Store.YES));
+ result.Add(NewStringField("parent", CreateFieldValue(parentNumber), Field.Store.NO));
+ return result;
+ }
+
+ private static Document CreateChildDoc(int segmentNumber, int parentNumber, int childNumber)
+ {
+ Document result = new Document();
+ result.Add(NewStringField("id", CreateFieldValue(segmentNumber * AMOUNT_OF_PARENT_DOCS + parentNumber, childNumber), Field.Store.YES));
+ result.Add(NewStringField("child", CreateFieldValue(childNumber), Field.Store.NO));
+ return result;
+ }
+
+ private static string CreateFieldValue(params int[] documentNumbers)
+ {
+ StringBuilder stringBuilder = new StringBuilder();
+ foreach (int documentNumber in documentNumbers)
+ {
+ if (stringBuilder.Length > 0)
+ {
+ stringBuilder.Append("_");
+ }
+ stringBuilder.Append(documentNumber);
+ }
+ return stringBuilder.ToString();
+ }
+
+ private static Query CreateChildrenQueryWithOneParent(int childNumber)
+ {
+ TermQuery childQuery = new TermQuery(new Term("child", CreateFieldValue(childNumber)));
+ Query randomParentQuery = new TermQuery(new Term("id", CreateFieldValue(RandomParentId)));
+ BooleanQuery childrenQueryWithRandomParent = new BooleanQuery();
+ childrenQueryWithRandomParent.Add(new BooleanClause(childQuery, BooleanClause.Occur.SHOULD));
+ childrenQueryWithRandomParent.Add(new BooleanClause(randomParentQuery, BooleanClause.Occur.SHOULD));
+ return childrenQueryWithRandomParent;
+ }
+
+ private static Query CreateParentsQueryWithOneChild(int randomChildNumber)
+ {
+ BooleanQuery childQueryWithRandomParent = new BooleanQuery();
+ Query parentsQuery = new TermQuery(new Term("parent", CreateFieldValue(RandomParentNumber)));
+ childQueryWithRandomParent.Add(new BooleanClause(parentsQuery, BooleanClause.Occur.SHOULD));
+ childQueryWithRandomParent.Add(new BooleanClause(RandomChildQuery(randomChildNumber), BooleanClause.Occur.SHOULD));
+ return childQueryWithRandomParent;
+ }
+
+ private static int RandomParentId
+ {
+ get { return Random().Next(AMOUNT_OF_PARENT_DOCS*AMOUNT_OF_SEGMENTS); }
+ }
+
+ private static int RandomParentNumber
+ {
+ get { return Random().Next(AMOUNT_OF_PARENT_DOCS); }
+ }
+
+ private static Query RandomChildQuery(int randomChildNumber)
+ {
+ return new TermQuery(new Term("id", CreateFieldValue(RandomParentId, randomChildNumber)));
+ }
+
+ private static int GetRandomChildNumber(int notLessThan)
+ {
+ return notLessThan + Random().Next(AMOUNT_OF_CHILD_DOCS - notLessThan);
+ }
+ }
+}
\ No newline at end of file
[17/17] lucenenet git commit: Revert Random() seed
Posted by sy...@apache.org.
Revert Random() seed
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/812e1c54
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/812e1c54
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/812e1c54
Branch: refs/heads/master
Commit: 812e1c541f1a00392391c5761fd3dcb7b0aedd88
Parents: 4820f23
Author: Josh Sullivan <ja...@gmail.com>
Authored: Sun Aug 23 18:02:01 2015 -0400
Committer: Josh Sullivan <ja...@gmail.com>
Committed: Sun Aug 23 18:02:01 2015 -0400
----------------------------------------------------------------------
src/Lucene.Net.TestFramework/Util/LuceneTestCase.cs | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/812e1c54/src/Lucene.Net.TestFramework/Util/LuceneTestCase.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.TestFramework/Util/LuceneTestCase.cs b/src/Lucene.Net.TestFramework/Util/LuceneTestCase.cs
index ea68c2f..3667b87 100644
--- a/src/Lucene.Net.TestFramework/Util/LuceneTestCase.cs
+++ b/src/Lucene.Net.TestFramework/Util/LuceneTestCase.cs
@@ -607,7 +607,7 @@ namespace Lucene.Net.Util
/// </summary>
public static Random Random()
{
- return _random ?? (_random = new Random(1));
+ return _random ?? (_random = new Random(/* LUCENENET TODO seed */));
//return RandomizedContext.Current.Random;
}
[15/17] lucenenet git commit: Lucene.Net.Join tests now passing
Posted by sy...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/Lucene.Net.Join/ToChildBlockJoinQuery.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Join/ToChildBlockJoinQuery.cs b/Lucene.Net.Join/ToChildBlockJoinQuery.cs
deleted file mode 100644
index 3d4f2d5..0000000
--- a/Lucene.Net.Join/ToChildBlockJoinQuery.cs
+++ /dev/null
@@ -1,396 +0,0 @@
-using System;
-using System.Collections.Generic;
-using System.Diagnostics;
-using Lucene.Net.Index;
-using Lucene.Net.Search;
-using Lucene.Net.Util;
-
-namespace Lucene.Net.Join
-{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- /// <summary>
- /// Just like <see cref="ToParentBlockJoinQuery"/>, except this
- /// query joins in reverse: you provide a Query matching
- /// parent documents and it joins down to child
- /// documents.
- ///
- /// @lucene.experimental
- /// </summary>
- public class ToChildBlockJoinQuery : Query
- {
- /// <summary>
- /// Message thrown from <see cref="ToChildBlockJoinScorer.ValidateParentDoc"/>
- /// on mis-use, when the parent query incorrectly returns child docs.
- /// </summary>
- public const string InvalidQueryMessage = "Parent query yields document which is not matched by parents filter, docID=";
-
- private readonly Filter _parentsFilter;
- private readonly Query _parentQuery;
-
- // If we are rewritten, this is the original parentQuery we
- // were passed; we use this for .equals() and
- // .hashCode(). This makes rewritten query equal the
- // original, so that user does not have to .rewrite() their
- // query before searching:
- private readonly Query _origParentQuery;
- private readonly bool _doScores;
-
- /// <summary>
- /// Create a ToChildBlockJoinQuery.
- /// </summary>
- /// <param name="parentQuery">Query that matches parent documents</param>
- /// <param name="parentsFilter">Filter (must produce FixedBitSet per-segment, like <see cref="FixedBitSetCachingWrapperFilter"/>)
- /// identifying the parent documents.</param>
- /// <param name="doScores">True if parent scores should be calculated.</param>
- public ToChildBlockJoinQuery(Query parentQuery, Filter parentsFilter, bool doScores)
- {
- _origParentQuery = parentQuery;
- _parentQuery = parentQuery;
- _parentsFilter = parentsFilter;
- _doScores = doScores;
- }
-
- private ToChildBlockJoinQuery(Query origParentQuery, Query parentQuery, Filter parentsFilter, bool doScores) : base()
- {
- _origParentQuery = origParentQuery;
- _parentQuery = parentQuery;
- _parentsFilter = parentsFilter;
- _doScores = doScores;
- }
-
- public override Weight CreateWeight(IndexSearcher searcher)
- {
- return new ToChildBlockJoinWeight(this, _parentQuery.CreateWeight(searcher), _parentsFilter, _doScores);
- }
-
- private class ToChildBlockJoinWeight : Weight
- {
- private readonly Query _joinQuery;
- private readonly Weight _parentWeight;
- private readonly Filter _parentsFilter;
- private readonly bool _doScores;
-
- public ToChildBlockJoinWeight(Query joinQuery, Weight parentWeight, Filter parentsFilter, bool doScores) : base()
- {
- _joinQuery = joinQuery;
- _parentWeight = parentWeight;
- _parentsFilter = parentsFilter;
- _doScores = doScores;
- }
-
- public override Query Query
- {
- get { return _joinQuery; }
- }
-
- public override float ValueForNormalization
- {
- get { return _parentWeight.ValueForNormalization*_joinQuery.Boost*_joinQuery.Boost; }
- }
-
- public override void Normalize(float norm, float topLevelBoost)
- {
- _parentWeight.Normalize(norm, topLevelBoost * _joinQuery.Boost);
- }
-
- // NOTE: acceptDocs applies (and is checked) only in the child document space
- public override Scorer Scorer(AtomicReaderContext readerContext, Bits acceptDocs)
- {
- Scorer parentScorer = _parentWeight.Scorer(readerContext, null);
-
- if (parentScorer == null)
- {
- // No matches
- return null;
- }
-
- // NOTE: we cannot pass acceptDocs here because this
- // will (most likely, justifiably) cause the filter to
- // not return a FixedBitSet but rather a
- // BitsFilteredDocIdSet. Instead, we filter by
- // acceptDocs when we score:
- DocIdSet parents = _parentsFilter.GetDocIdSet(readerContext, null);
-
- if (parents == null)
- {
- // No matches
- return null;
- }
- if (!(parents is FixedBitSet))
- {
- throw new InvalidOperationException("parentFilter must return FixedBitSet; got " + parents);
- }
-
- return new ToChildBlockJoinScorer(this, parentScorer, (FixedBitSet)parents, _doScores, acceptDocs);
- }
-
- public override Explanation Explain(AtomicReaderContext reader, int doc)
- {
- // TODO
- throw new NotSupportedException(GetType().Name + " cannot explain match on parent document");
- }
-
- public override bool ScoresDocsOutOfOrder()
- {
- return false;
- }
- }
-
- private sealed class ToChildBlockJoinScorer : Scorer
- {
- private readonly Scorer _parentScorer;
- private readonly FixedBitSet _parentBits;
- private readonly bool _doScores;
- private readonly Bits _acceptDocs;
-
- private float _parentScore;
- private int _parentFreq = 1;
-
- private int _childDoc = -1;
- private int _parentDoc;
-
- public ToChildBlockJoinScorer(Weight weight, Scorer parentScorer, FixedBitSet parentBits, bool doScores, Bits acceptDocs) : base(weight)
- {
- _doScores = doScores;
- _parentBits = parentBits;
- _parentScorer = parentScorer;
- _acceptDocs = acceptDocs;
- }
-
- public override ICollection<ChildScorer> Children
- {
- get { return Collections.Singleton(new ChildScorer(_parentScorer, "BLOCK_JOIN")); }
- }
-
- public override int NextDoc()
- {
- //System.out.println("Q.nextDoc() parentDoc=" + parentDoc + " childDoc=" + childDoc);
-
- // Loop until we hit a childDoc that's accepted
- while (true)
- {
- if (_childDoc + 1 == _parentDoc)
- {
- // OK, we are done iterating through all children
- // matching this one parent doc, so we now nextDoc()
- // the parent. Use a while loop because we may have
- // to skip over some number of parents w/ no
- // children:
- while (true)
- {
- _parentDoc = _parentScorer.NextDoc();
- ValidateParentDoc();
-
- if (_parentDoc == 0)
- {
- // Degenerate but allowed: first parent doc has no children
- // TODO: would be nice to pull initial parent
- // into ctor so we can skip this if... but it's
- // tricky because scorer must return -1 for
- // .doc() on init...
- _parentDoc = _parentScorer.NextDoc();
- ValidateParentDoc();
- }
-
- if (_parentDoc == NO_MORE_DOCS)
- {
- _childDoc = NO_MORE_DOCS;
- //System.out.println(" END");
- return _childDoc;
- }
-
- // Go to first child for this next parentDoc:
- _childDoc = 1 + _parentBits.PrevSetBit(_parentDoc - 1);
-
- if (_childDoc == _parentDoc)
- {
- // This parent has no children; continue
- // parent loop so we move to next parent
- continue;
- }
-
- if (_acceptDocs != null && !_acceptDocs.Get(_childDoc))
- {
- goto nextChildDocContinue;
- }
-
- if (_childDoc < _parentDoc)
- {
- if (_doScores)
- {
- _parentScore = _parentScorer.Score();
- _parentFreq = _parentScorer.Freq();
- }
- //System.out.println(" " + childDoc);
- return _childDoc;
- }
- else
- {
- // Degenerate but allowed: parent has no children
- }
- }
- }
-
- Debug.Assert(_childDoc < _parentDoc, "childDoc=" + _childDoc + " parentDoc=" + _parentDoc);
- _childDoc++;
- if (_acceptDocs != null && !_acceptDocs.Get(_childDoc))
- {
- continue;
- }
- //System.out.println(" " + childDoc);
- return _childDoc;
- nextChildDocContinue:;
- }
- }
-
- /// <summary>
- /// Detect mis-use, where provided parent query in fact sometimes returns child documents.
- /// </summary>
- private void ValidateParentDoc()
- {
- if (_parentDoc != NO_MORE_DOCS && !_parentBits.Get(_parentDoc))
- {
- throw new InvalidOperationException(InvalidQueryMessage + _parentDoc);
- }
- }
-
- public override int DocID()
- {
- return _childDoc;
- }
-
- public override float Score()
- {
- return _parentScore;
- }
-
- public override int Freq()
- {
- return _parentFreq;
- }
-
- public override int Advance(int childTarget)
- {
- Debug.Assert(childTarget >= _parentBits.Length() || !_parentBits.Get(childTarget));
-
- //System.out.println("Q.advance childTarget=" + childTarget);
- if (childTarget == NO_MORE_DOCS)
- {
- //System.out.println(" END");
- return _childDoc = _parentDoc = NO_MORE_DOCS;
- }
-
- Debug.Assert(_childDoc == -1 || childTarget != _parentDoc, "childTarget=" + childTarget);
- if (_childDoc == -1 || childTarget > _parentDoc)
- {
- // Advance to new parent:
- _parentDoc = _parentScorer.Advance(childTarget);
- ValidateParentDoc();
- //System.out.println(" advance to parentDoc=" + parentDoc);
- Debug.Assert(_parentDoc > childTarget);
- if (_parentDoc == NO_MORE_DOCS)
- {
- //System.out.println(" END");
- return _childDoc = NO_MORE_DOCS;
- }
- if (_doScores)
- {
- _parentScore = _parentScorer.Score();
- _parentFreq = _parentScorer.Freq();
- }
- int firstChild = _parentBits.PrevSetBit(_parentDoc - 1);
- //System.out.println(" firstChild=" + firstChild);
- childTarget = Math.Max(childTarget, firstChild);
- }
-
- Debug.Assert(childTarget < _parentDoc);
-
- // Advance within children of current parent:
- _childDoc = childTarget;
- //System.out.println(" " + childDoc);
- if (_acceptDocs != null && !_acceptDocs.Get(_childDoc))
- {
- NextDoc();
- }
- return _childDoc;
- }
-
- public override long Cost()
- {
- return _parentScorer.Cost();
- }
- }
-
- public override void ExtractTerms(ISet<Term> terms)
- {
- _parentQuery.ExtractTerms(terms);
- }
-
- public override Query Rewrite(IndexReader reader)
- {
- Query parentRewrite = _parentQuery.Rewrite(reader);
- if (parentRewrite != _parentQuery)
- {
- Query rewritten = new ToChildBlockJoinQuery(_parentQuery, parentRewrite, _parentsFilter, _doScores);
- rewritten.Boost = Boost;
- return rewritten;
- }
-
- return this;
- }
-
- public override string ToString(string field)
- {
- return "ToChildBlockJoinQuery (" + _parentQuery + ")";
- }
-
- protected bool Equals(ToChildBlockJoinQuery other)
- {
- return base.Equals(other) &&
- Equals(_origParentQuery, other._origParentQuery) &&
- _doScores == other._doScores &&
- Equals(_parentsFilter, other._parentsFilter);
- }
-
- public override bool Equals(object obj)
- {
- if (ReferenceEquals(null, obj)) return false;
- if (ReferenceEquals(this, obj)) return true;
- if (obj.GetType() != GetType()) return false;
- return Equals((ToChildBlockJoinQuery) obj);
- }
-
- public override int GetHashCode()
- {
- unchecked
- {
- int hashCode = base.GetHashCode();
- hashCode = (hashCode*397) ^ (_origParentQuery != null ? _origParentQuery.GetHashCode() : 0);
- hashCode = (hashCode*397) ^ _doScores.GetHashCode();
- hashCode = (hashCode*397) ^ (_parentsFilter != null ? _parentsFilter.GetHashCode() : 0);
- return hashCode;
- }
- }
-
- public override object Clone()
- {
- return new ToChildBlockJoinQuery((Query) _origParentQuery.Clone(), _parentsFilter, _doScores);
- }
- }
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/Lucene.Net.Join/ToParentBlockJoinCollector.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Join/ToParentBlockJoinCollector.cs b/Lucene.Net.Join/ToParentBlockJoinCollector.cs
deleted file mode 100644
index 22fa53e..0000000
--- a/Lucene.Net.Join/ToParentBlockJoinCollector.cs
+++ /dev/null
@@ -1,560 +0,0 @@
-using System;
-using System.Collections.Generic;
-using System.Diagnostics;
-using System.IO;
-using Lucene.Net.Grouping;
-using Lucene.Net.Index;
-using Lucene.Net.Search;
-using Lucene.Net.Support;
-using Lucene.Net.Util;
-
-namespace Lucene.Net.Join
-{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- /// <summary>
- /// Collects parent document hits for a Query containing one more more
- /// BlockJoinQuery clauses, sorted by the
- /// specified parent Sort. Note that this cannot perform
- /// arbitrary joins; rather, it requires that all joined
- /// documents are indexed as a doc block (using {@link
- /// IndexWriter#addDocuments} or {@link
- /// IndexWriter#updateDocuments}). Ie, the join is computed
- /// at index time.
- ///
- /// <p>The parent Sort must only use
- /// fields from the parent documents; sorting by field in
- /// the child documents is not supported.</p>
- ///
- /// <p>You should only use this
- /// collector if one or more of the clauses in the query is
- /// a <seealso cref="ToParentBlockJoinQuery"/>. This collector will find those query
- /// clauses and record the matching child documents for the
- /// top scoring parent documents.</p>
- ///
- /// <p>Multiple joins (star join) and nested joins and a mix
- /// of the two are allowed, as long as in all cases the
- /// documents corresponding to a single row of each joined
- /// parent table were indexed as a doc block.</p>
- ///
- /// <p>For the simple star join you can retrieve the
- /// <seealso cref="TopGroups"/> instance containing each <seealso cref="ToParentBlockJoinQuery"/>'s
- /// matching child documents for the top parent groups,
- /// using <seealso cref="#getTopGroups"/>. Ie,
- /// a single query, which will contain two or more
- /// <seealso cref="ToParentBlockJoinQuery"/>'s as clauses representing the star join,
- /// can then retrieve two or more <seealso cref="TopGroups"/> instances.</p>
- ///
- /// <p>For nested joins, the query will run correctly (ie,
- /// match the right parent and child documents), however,
- /// because TopGroups is currently unable to support nesting
- /// (each group is not able to hold another TopGroups), you
- /// are only able to retrieve the TopGroups of the first
- /// join. The TopGroups of the nested joins will not be
- /// correct.
- ///
- /// See <seealso cref="org.apache.lucene.search.join"/> for a code
- /// sample.
- ///
- /// @lucene.experimental
- /// </summary>
- public class ToParentBlockJoinCollector : Collector
- {
- private readonly Sort sort;
-
- // Maps each BlockJoinQuery instance to its "slot" in
- // joinScorers and in OneGroup's cached doc/scores/count:
- private readonly IDictionary<Query, int?> joinQueryID = new Dictionary<Query, int?>();
- private readonly int numParentHits;
- private readonly FieldValueHitQueue<OneGroup> queue;
- private readonly FieldComparator[] comparators;
- private readonly int[] reverseMul;
- private readonly int compEnd;
- private readonly bool trackMaxScore;
- private readonly bool trackScores;
-
- private int docBase;
- private ToParentBlockJoinQuery.BlockJoinScorer[] joinScorers = new ToParentBlockJoinQuery.BlockJoinScorer[0];
- private AtomicReaderContext currentReaderContext;
- private Scorer scorer;
- private bool queueFull;
-
- private OneGroup bottom;
- private int totalHitCount;
- private float maxScore = float.NaN;
-
- /// <summary>
- /// Creates a ToParentBlockJoinCollector. The provided sort must
- /// not be null. If you pass true trackScores, all
- /// ToParentBlockQuery instances must not use
- /// ScoreMode.None.
- /// </summary>
- public ToParentBlockJoinCollector(Sort sort, int numParentHits, bool trackScores, bool trackMaxScore)
- {
- // TODO: allow null sort to be specialized to relevance
- // only collector
- this.sort = sort;
- this.trackMaxScore = trackMaxScore;
- if (trackMaxScore)
- {
- maxScore = float.MinValue;
- }
- //System.out.println("numParentHits=" + numParentHits);
- this.trackScores = trackScores;
- this.numParentHits = numParentHits;
- queue = FieldValueHitQueue.Create<OneGroup>(sort.GetSort(), numParentHits);
- comparators = queue.Comparators;
- reverseMul = queue.ReverseMul;
- compEnd = comparators.Length - 1;
- }
-
- private sealed class OneGroup : FieldValueHitQueue.Entry
- {
- public OneGroup(int comparatorSlot, int parentDoc, float parentScore, int numJoins, bool doScores)
- : base(comparatorSlot, parentDoc, parentScore)
- {
- //System.out.println("make OneGroup parentDoc=" + parentDoc);
- docs = new int[numJoins][];
- for (int joinId = 0; joinId < numJoins; joinId++)
- {
- docs[joinId] = new int[5];
- }
- if (doScores)
- {
- scores = new float[numJoins][];
- for (int joinId = 0; joinId < numJoins; joinId++)
- {
- scores[joinId] = new float[5];
- }
- }
- counts = new int[numJoins];
- }
- internal AtomicReaderContext readerContext;
- internal int[][] docs;
- internal float[][] scores;
- internal int[] counts;
- }
-
- public override void Collect(int parentDoc)
- {
- //System.out.println("\nC parentDoc=" + parentDoc);
- totalHitCount++;
-
- float score = float.NaN;
-
- if (trackMaxScore)
- {
- score = scorer.Score();
- maxScore = Math.Max(maxScore, score);
- }
-
- // TODO: we could sweep all joinScorers here and
- // aggregate total child hit count, so we can fill this
- // in getTopGroups (we wire it to 0 now)
-
- if (queueFull)
- {
- //System.out.println(" queueFull");
- // Fastmatch: return if this hit is not competitive
- for (int i = 0; ; i++)
- {
- int c = reverseMul[i] * comparators[i].CompareBottom(parentDoc);
- if (c < 0)
- {
- // Definitely not competitive.
- //System.out.println(" skip");
- return;
- }
- if (c > 0)
- {
- // Definitely competitive.
- break;
- }
- if (i == compEnd)
- {
- // Here c=0. If we're at the last comparator, this doc is not
- // competitive, since docs are visited in doc Id order, which means
- // this doc cannot compete with any other document in the queue.
- //System.out.println(" skip");
- return;
- }
- }
-
- //System.out.println(" competes! doc=" + (docBase + parentDoc));
-
- // This hit is competitive - replace bottom element in queue & adjustTop
- for (int i = 0; i < comparators.Length; i++)
- {
- comparators[i].Copy(bottom.Slot, parentDoc);
- }
- if (!trackMaxScore && trackScores)
- {
- score = scorer.Score();
- }
- bottom.Doc = docBase + parentDoc;
- bottom.readerContext = currentReaderContext;
- bottom.Score = score;
- CopyGroups(bottom);
- bottom = queue.UpdateTop();
-
- for (int i = 0; i < comparators.Length; i++)
- {
- comparators[i].Bottom = bottom.Slot;
- }
- }
- else
- {
- // Startup transient: queue is not yet full:
- int comparatorSlot = totalHitCount - 1;
-
- // Copy hit into queue
- for (int i = 0; i < comparators.Length; i++)
- {
- comparators[i].Copy(comparatorSlot, parentDoc);
- }
- //System.out.println(" startup: new OG doc=" + (docBase+parentDoc));
- if (!trackMaxScore && trackScores)
- {
- score = scorer.Score();
- }
- OneGroup og = new OneGroup(comparatorSlot, docBase + parentDoc, score, joinScorers.Length, trackScores);
- og.readerContext = currentReaderContext;
- CopyGroups(og);
- bottom = queue.Add(og);
- queueFull = totalHitCount == numParentHits;
- if (queueFull)
- {
- // End of startup transient: queue just filled up:
- for (int i = 0; i < comparators.Length; i++)
- {
- comparators[i].Bottom = bottom.Slot;
- }
- }
- }
- }
-
- // Pulls out child doc and scores for all join queries:
- private void CopyGroups(OneGroup og)
- {
- // While rare, it's possible top arrays could be too
- // short if join query had null scorer on first
- // segment(s) but then became non-null on later segments
- int numSubScorers = joinScorers.Length;
- if (og.docs.Length < numSubScorers)
- {
- // While rare, this could happen if join query had
- // null scorer on first segment(s) but then became
- // non-null on later segments
- og.docs = ArrayUtil.Grow(og.docs);
- }
- if (og.counts.Length < numSubScorers)
- {
- og.counts = ArrayUtil.Grow(og.counts);
- }
- if (trackScores && og.scores.Length < numSubScorers)
- {
- og.scores = ArrayUtil.Grow(og.scores);
- }
-
- //System.out.println("\ncopyGroups parentDoc=" + og.doc);
- for (int scorerIDX = 0; scorerIDX < numSubScorers; scorerIDX++)
- {
- ToParentBlockJoinQuery.BlockJoinScorer joinScorer = joinScorers[scorerIDX];
- //System.out.println(" scorer=" + joinScorer);
- if (joinScorer != null && docBase + joinScorer.ParentDoc == og.Doc)
- {
- og.counts[scorerIDX] = joinScorer.ChildCount;
- //System.out.println(" count=" + og.counts[scorerIDX]);
- og.docs[scorerIDX] = joinScorer.SwapChildDocs(og.docs[scorerIDX]);
- Debug.Assert(og.docs[scorerIDX].Length >= og.counts[scorerIDX], "length=" + og.docs[scorerIDX].Length + " vs count=" + og.counts[scorerIDX]);
- //System.out.println(" len=" + og.docs[scorerIDX].length);
- /*
- for(int idx=0;idx<og.counts[scorerIDX];idx++) {
- System.out.println(" docs[" + idx + "]=" + og.docs[scorerIDX][idx]);
- }
- */
- if (trackScores)
- {
- //System.out.println(" copy scores");
- og.scores[scorerIDX] = joinScorer.SwapChildScores(og.scores[scorerIDX]);
- Debug.Assert(og.scores[scorerIDX].Length >= og.counts[scorerIDX], "length=" + og.scores[scorerIDX].Length + " vs count=" + og.counts[scorerIDX]);
- }
- }
- else
- {
- og.counts[scorerIDX] = 0;
- }
- }
- }
-
- public override AtomicReaderContext NextReader
- {
- set
- {
- currentReaderContext = value;
- docBase = value.DocBase;
- for (int compIDX = 0; compIDX < comparators.Length; compIDX++)
- {
- queue.SetComparator(compIDX, comparators[compIDX].SetNextReader(value));
- }
- }
- }
-
- public override bool AcceptsDocsOutOfOrder()
- {
- return false;
- }
-
- private void enroll(ToParentBlockJoinQuery query, ToParentBlockJoinQuery.BlockJoinScorer scorer)
- {
- scorer.TrackPendingChildHits();
- int? slot = joinQueryID[query];
- if (slot == null)
- {
- joinQueryID[query] = joinScorers.Length;
- //System.out.println("found JQ: " + query + " slot=" + joinScorers.length);
- ToParentBlockJoinQuery.BlockJoinScorer[] newArray = new ToParentBlockJoinQuery.BlockJoinScorer[1 + joinScorers.Length];
- Array.Copy(joinScorers, 0, newArray, 0, joinScorers.Length);
- joinScorers = newArray;
- joinScorers[joinScorers.Length - 1] = scorer;
- }
- else
- {
- joinScorers[(int) slot] = scorer;
- }
- }
-
- public override Scorer Scorer
- {
- set
- {
- //System.out.println("C.setScorer scorer=" + value);
- // Since we invoke .score(), and the comparators likely
- // do as well, cache it so it's only "really" computed
- // once:
- scorer = new ScoreCachingWrappingScorer(value);
- for (int compIdx = 0; compIdx < comparators.Length; compIdx++)
- {
- comparators[compIdx].Scorer = scorer;
- }
- Arrays.Fill(joinScorers, null);
-
- var queue = new Queue<Scorer>();
- //System.out.println("\nqueue: add top scorer=" + value);
- queue.Enqueue(value);
- while ((queue.Count > 0 && (queue.Dequeue()) != null))
- {
- //System.out.println(" poll: " + value + "; " + value.getWeight().getQuery());
- if (value is ToParentBlockJoinQuery.BlockJoinScorer)
- {
- enroll((ToParentBlockJoinQuery)value.Weight.Query, (ToParentBlockJoinQuery.BlockJoinScorer)value);
- }
-
- foreach (Scorer.ChildScorer sub in value.Children)
- {
- //System.out.println(" add sub: " + sub.child + "; " + sub.child.getWeight().getQuery());
- queue.Enqueue(sub.Child);
- }
- }
- }
- }
-
- private OneGroup[] sortedGroups;
-
- private void sortQueue()
- {
- sortedGroups = new OneGroup[queue.Size()];
- for (int downTo = queue.Size() - 1; downTo >= 0; downTo--)
- {
- sortedGroups[downTo] = queue.Pop();
- }
- }
-
- /// <summary>
- /// Returns the TopGroups for the specified
- /// BlockJoinQuery. The groupValue of each GroupDocs will
- /// be the parent docID for that group.
- /// The number of documents within each group is calculated as minimum of <code>maxDocsPerGroup</code>
- /// and number of matched child documents for that group.
- /// Returns null if no groups matched.
- /// </summary>
- /// <param name="query"> Search query </param>
- /// <param name="withinGroupSort"> Sort criteria within groups </param>
- /// <param name="offset"> Parent docs offset </param>
- /// <param name="maxDocsPerGroup"> Upper bound of documents per group number </param>
- /// <param name="withinGroupOffset"> Offset within each group of child docs </param>
- /// <param name="fillSortFields"> Specifies whether to add sort fields or not </param>
- /// <returns> TopGroups for specified query </returns>
- /// <exception cref="IOException"> if there is a low-level I/O error </exception>
- public virtual TopGroups<int> GetTopGroups(ToParentBlockJoinQuery query, Sort withinGroupSort, int offset, int maxDocsPerGroup, int withinGroupOffset, bool fillSortFields)
- {
-
- var slot = joinQueryID[query];
- if (slot == null && totalHitCount == 0)
- {
- return null;
- }
-
- if (sortedGroups == null)
- {
- if (offset >= queue.Size())
- {
- return null;
- }
- sortQueue();
- }
- else if (offset > sortedGroups.Length)
- {
- return null;
- }
-
- return AccumulateGroups(slot == null ? -1 : (int)slot, offset, maxDocsPerGroup, withinGroupOffset, withinGroupSort, fillSortFields);
- }
-
- /// <summary>
- /// Accumulates groups for the BlockJoinQuery specified by its slot.
- /// </summary>
- /// <param name="slot"> Search query's slot </param>
- /// <param name="offset"> Parent docs offset </param>
- /// <param name="maxDocsPerGroup"> Upper bound of documents per group number </param>
- /// <param name="withinGroupOffset"> Offset within each group of child docs </param>
- /// <param name="withinGroupSort"> Sort criteria within groups </param>
- /// <param name="fillSortFields"> Specifies whether to add sort fields or not </param>
- /// <returns> TopGroups for the query specified by slot </returns>
- /// <exception cref="IOException"> if there is a low-level I/O error </exception>
- private TopGroups<int> AccumulateGroups(int slot, int offset, int maxDocsPerGroup, int withinGroupOffset, Sort withinGroupSort, bool fillSortFields)
- {
- var groups = new GroupDocs<int>[sortedGroups.Length - offset];
- var fakeScorer = new FakeScorer();
-
- int totalGroupedHitCount = 0;
- //System.out.println("slot=" + slot);
-
- for (int groupIdx = offset; groupIdx < sortedGroups.Length; groupIdx++)
- {
- OneGroup og = sortedGroups[groupIdx];
- int numChildDocs;
- if (slot == -1 || slot >= og.counts.Length)
- {
- numChildDocs = 0;
- }
- else
- {
- numChildDocs = og.counts[slot];
- }
-
- // Number of documents in group should be bounded to prevent redundant memory allocation
- int numDocsInGroup = Math.Max(1, Math.Min(numChildDocs, maxDocsPerGroup));
- //System.out.println("parent doc=" + og.doc + " numChildDocs=" + numChildDocs + " maxDocsPG=" + maxDocsPerGroup);
-
- // At this point we hold all docs w/ in each group, unsorted; we now sort them:
- Collector collector;
- if (withinGroupSort == null)
- {
- //System.out.println("sort by score");
- // Sort by score
- if (!trackScores)
- {
- throw new ArgumentException("cannot sort by relevance within group: trackScores=false");
- }
- collector = TopScoreDocCollector.Create(numDocsInGroup, true);
- }
- else
- {
- // Sort by fields
- collector = TopFieldCollector.Create(withinGroupSort, numDocsInGroup, fillSortFields, trackScores, trackMaxScore, true);
- }
-
- collector.Scorer = fakeScorer;
- collector.NextReader = og.readerContext;
- for (int docIdx = 0; docIdx < numChildDocs; docIdx++)
- {
- //System.out.println("docIDX=" + docIDX + " vs " + og.docs[slot].length);
- int doc = og.docs[slot][docIdx];
- fakeScorer.doc = doc;
- if (trackScores)
- {
- fakeScorer._score = og.scores[slot][docIdx];
- }
- collector.Collect(doc);
- }
- totalGroupedHitCount += numChildDocs;
-
- object[] groupSortValues;
-
- if (fillSortFields)
- {
- groupSortValues = new object[comparators.Length];
- for (int sortFieldIdx = 0; sortFieldIdx < comparators.Length; sortFieldIdx++)
- {
- groupSortValues[sortFieldIdx] = comparators[sortFieldIdx].Value(og.Slot);
- }
- }
- else
- {
- groupSortValues = null;
- }
-
- TopDocs topDocs;
- if (withinGroupSort == null)
- {
- var tempCollector = (TopScoreDocCollector) collector;
- topDocs = tempCollector.TopDocs(withinGroupOffset, numDocsInGroup);
- }
- else
- {
- var tempCollector = (TopFieldCollector) collector;
- topDocs = tempCollector.TopDocs(withinGroupOffset, numDocsInGroup);
- }
-
- groups[groupIdx - offset] = new GroupDocs<int>(og.Score, topDocs.MaxScore, numChildDocs, topDocs.ScoreDocs, og.Doc, groupSortValues);
- }
-
- return new TopGroups<int>(new TopGroups<int>(sort.GetSort(), withinGroupSort == null ? null : withinGroupSort.GetSort(), 0, totalGroupedHitCount, groups, maxScore), totalHitCount);
- }
-
- /// <summary>
- /// Returns the TopGroups for the specified BlockJoinQuery. The groupValue of each
- /// GroupDocs will be the parent docID for that group. The number of documents within
- /// each group equals to the total number of matched child documents for that group.
- /// Returns null if no groups matched.
- /// </summary>
- /// <param name="query">Search query</param>
- /// <param name="withinGroupSort">Sort criteria within groups</param>
- /// <param name="offset">Parent docs offset</param>
- /// <param name="withinGroupOffset">Offset within each group of child docs</param>
- /// <param name="fillSortFields">Specifies whether to add sort fields or not</param>
- /// <returns>TopGroups for specified query</returns>
- /// <exception cref="IOException"> if there is a low-level I/O error </exception>
- public virtual TopGroups<int> GetTopGroupsWithAllChildDocs(ToParentBlockJoinQuery query, Sort withinGroupSort, int offset, int withinGroupOffset, bool fillSortFields)
- {
- return GetTopGroups(query, withinGroupSort, offset, int.MaxValue, withinGroupOffset, fillSortFields);
- }
-
- /// <summary>
- /// Returns the highest score across all collected parent hits, as long as
- /// <code>trackMaxScores=true</code> was passed
- /// {@link #ToParentBlockJoinCollector(Sort, int, boolean, boolean) on
- /// construction}. Else, this returns <code>Float.NaN</code>
- /// </summary>
- public virtual float MaxScore
- {
- get { return maxScore; }
- }
- }
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/Lucene.Net.Join/ToParentBlockJoinFieldComparator.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Join/ToParentBlockJoinFieldComparator.cs b/Lucene.Net.Join/ToParentBlockJoinFieldComparator.cs
deleted file mode 100644
index c41fd50..0000000
--- a/Lucene.Net.Join/ToParentBlockJoinFieldComparator.cs
+++ /dev/null
@@ -1,393 +0,0 @@
-using System;
-using Lucene.Net.Index;
-using Lucene.Net.Search;
-using Lucene.Net.Util;
-
-namespace Lucene.Net.Join
-{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- /// <summary>
- /// A field comparator that allows parent documents to be sorted by fields
- /// from the nested / child documents.
- ///
- /// @lucene.experimental
- /// </summary>
- public abstract class ToParentBlockJoinFieldComparator : FieldComparator<object>
- {
- private readonly Filter _parentFilter;
- private readonly Filter _childFilter;
- private readonly int _spareSlot;
-
- private FieldComparator _wrappedComparator;
- private FixedBitSet _parentDocuments;
- private FixedBitSet _childDocuments;
-
- private ToParentBlockJoinFieldComparator(FieldComparator wrappedComparator, Filter parentFilter, Filter childFilter, int spareSlot)
- {
- _wrappedComparator = wrappedComparator;
- _parentFilter = parentFilter;
- _childFilter = childFilter;
- _spareSlot = spareSlot;
- }
-
- public override int Compare(int slot1, int slot2)
- {
- return _wrappedComparator.Compare(slot1, slot2);
- }
-
- public override int Bottom
- {
- set
- {
- _wrappedComparator.Bottom = value;
- }
- }
-
- public override object TopValue
- {
- set
- {
- _wrappedComparator.TopValue = value;
- }
- }
-
- public override FieldComparator SetNextReader(AtomicReaderContext context)
- {
- DocIdSet innerDocuments = _childFilter.GetDocIdSet(context, null);
- if (IsEmpty(innerDocuments))
- {
- _childDocuments = null;
- }
- else if (innerDocuments is FixedBitSet)
- {
- _childDocuments = (FixedBitSet)innerDocuments;
- }
- else
- {
- DocIdSetIterator iterator = innerDocuments.GetIterator();
- _childDocuments = iterator != null ? ToFixedBitSet(iterator, context.AtomicReader.MaxDoc) : null;
- }
- DocIdSet rootDocuments = _parentFilter.GetDocIdSet(context, null);
- if (IsEmpty(rootDocuments))
- {
- _parentDocuments = null;
- }
- else if (rootDocuments is FixedBitSet)
- {
- _parentDocuments = (FixedBitSet)rootDocuments;
- }
- else
- {
- DocIdSetIterator iterator = rootDocuments.GetIterator();
- _parentDocuments = iterator != null ? ToFixedBitSet(iterator, context.AtomicReader.MaxDoc) : null;
- }
-
- _wrappedComparator = _wrappedComparator.SetNextReader(context);
- return this;
- }
-
- private static bool IsEmpty(DocIdSet set)
- {
- return set == null;
- }
-
- private static FixedBitSet ToFixedBitSet(DocIdSetIterator iterator, int numBits)
- {
- var set = new FixedBitSet(numBits);
- int doc;
- while ((doc = iterator.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
- {
- set.Set(doc);
- }
- return set;
- }
-
- public override IComparable Value(int slot)
- {
- return _wrappedComparator.Value(slot);
- }
-
- /// <summary>
- /// Concrete implementation of <see cref="ToParentBlockJoinSortField"/> to sorts the parent docs with the lowest values
- /// in the child / nested docs first.
- /// </summary>
- public sealed class Lowest : ToParentBlockJoinFieldComparator
- {
- /// <summary>
- /// Create ToParentBlockJoinFieldComparator.Lowest
- /// </summary>
- /// <param name="wrappedComparator">The <see cref="FieldComparator"/> on the child / nested level. </param>
- /// <param name="parentFilter">Filter (must produce FixedBitSet per-segment) that identifies the parent documents. </param>
- /// <param name="childFilter">Filter that defines which child / nested documents participates in sorting. </param>
- /// <param name="spareSlot">The extra slot inside the wrapped comparator that is used to compare which nested document
- /// inside the parent document scope is most competitive. </param>
- public Lowest(FieldComparator wrappedComparator, Filter parentFilter, Filter childFilter, int spareSlot)
- : base(wrappedComparator, parentFilter, childFilter, spareSlot)
- {
- }
-
- public override int CompareBottom(int parentDoc)
- {
- if (parentDoc == 0 || _parentDocuments == null || _childDocuments == null)
- {
- return 0;
- }
-
- // We need to copy the lowest value from all child docs into slot.
- int prevParentDoc = _parentDocuments.PrevSetBit(parentDoc - 1);
- int childDoc = _childDocuments.NextSetBit(prevParentDoc + 1);
- if (childDoc >= parentDoc || childDoc == -1)
- {
- return 0;
- }
-
- // We only need to emit a single cmp value for any matching child doc
- int cmp = _wrappedComparator.CompareBottom(childDoc);
- if (cmp > 0)
- {
- return cmp;
- }
-
- while (true)
- {
- childDoc = _childDocuments.NextSetBit(childDoc + 1);
- if (childDoc >= parentDoc || childDoc == -1)
- {
- return cmp;
- }
- int cmp1 = _wrappedComparator.CompareBottom(childDoc);
- if (cmp1 > 0)
- {
- return cmp1;
- }
- if (cmp1 == 0)
- {
- cmp = 0;
- }
- }
- }
-
- public override void Copy(int slot, int parentDoc)
- {
- if (parentDoc == 0 || _parentDocuments == null || _childDocuments == null)
- {
- return;
- }
-
- // We need to copy the lowest value from all child docs into slot.
- int prevParentDoc = _parentDocuments.PrevSetBit(parentDoc - 1);
- int childDoc = _childDocuments.NextSetBit(prevParentDoc + 1);
- if (childDoc >= parentDoc || childDoc == -1)
- {
- return;
- }
- _wrappedComparator.Copy(_spareSlot, childDoc);
- _wrappedComparator.Copy(slot, childDoc);
-
- while (true)
- {
- childDoc = _childDocuments.NextSetBit(childDoc + 1);
- if (childDoc >= parentDoc || childDoc == -1)
- {
- return;
- }
- _wrappedComparator.Copy(_spareSlot, childDoc);
- if (_wrappedComparator.Compare(_spareSlot, slot) < 0)
- {
- _wrappedComparator.Copy(slot, childDoc);
- }
- }
- }
-
- public override int CompareTop(int parentDoc)
- {
- if (parentDoc == 0 || _parentDocuments == null || _childDocuments == null)
- {
- return 0;
- }
-
- // We need to copy the lowest value from all nested docs into slot.
- int prevParentDoc = _parentDocuments.PrevSetBit(parentDoc - 1);
- int childDoc = _childDocuments.NextSetBit(prevParentDoc + 1);
- if (childDoc >= parentDoc || childDoc == -1)
- {
- return 0;
- }
-
- // We only need to emit a single cmp value for any matching child doc
- int cmp = _wrappedComparator.CompareBottom(childDoc);
- if (cmp > 0)
- {
- return cmp;
- }
-
- while (true)
- {
- childDoc = _childDocuments.NextSetBit(childDoc + 1);
- if (childDoc >= parentDoc || childDoc == -1)
- {
- return cmp;
- }
- int cmp1 = _wrappedComparator.CompareTop(childDoc);
- if (cmp1 > 0)
- {
- return cmp1;
- }
- if (cmp1 == 0)
- {
- cmp = 0;
- }
- }
- }
-
- }
-
- /// <summary>
- /// Concrete implementation of <see cref="ToParentBlockJoinSortField"/> to sorts the parent docs with the highest values
- /// in the child / nested docs first.
- /// </summary>
- public sealed class Highest : ToParentBlockJoinFieldComparator
- {
- /// <summary>
- /// Create ToParentBlockJoinFieldComparator.Highest
- /// </summary>
- /// <param name="wrappedComparator">The <see cref="FieldComparator"/> on the child / nested level. </param>
- /// <param name="parentFilter">Filter (must produce FixedBitSet per-segment) that identifies the parent documents. </param>
- /// <param name="childFilter">Filter that defines which child / nested documents participates in sorting. </param>
- /// <param name="spareSlot">The extra slot inside the wrapped comparator that is used to compare which nested document
- /// inside the parent document scope is most competitive. </param>
- public Highest(FieldComparator wrappedComparator, Filter parentFilter, Filter childFilter, int spareSlot)
- : base(wrappedComparator, parentFilter, childFilter, spareSlot)
- {
- }
-
- public override int CompareBottom(int parentDoc)
- {
- if (parentDoc == 0 || _parentDocuments == null || _childDocuments == null)
- {
- return 0;
- }
-
- int prevParentDoc = _parentDocuments.PrevSetBit(parentDoc - 1);
- int childDoc = _childDocuments.NextSetBit(prevParentDoc + 1);
- if (childDoc >= parentDoc || childDoc == -1)
- {
- return 0;
- }
-
- int cmp = _wrappedComparator.CompareBottom(childDoc);
- if (cmp < 0)
- {
- return cmp;
- }
-
- while (true)
- {
- childDoc = _childDocuments.NextSetBit(childDoc + 1);
- if (childDoc >= parentDoc || childDoc == -1)
- {
- return cmp;
- }
- int cmp1 = _wrappedComparator.CompareBottom(childDoc);
- if (cmp1 < 0)
- {
- return cmp1;
- }
- else
- {
- if (cmp1 == 0)
- {
- cmp = 0;
- }
- }
- }
- }
-
- public override void Copy(int slot, int parentDoc)
- {
- if (parentDoc == 0 || _parentDocuments == null || _childDocuments == null)
- {
- return;
- }
-
- int prevParentDoc = _parentDocuments.PrevSetBit(parentDoc - 1);
- int childDoc = _childDocuments.NextSetBit(prevParentDoc + 1);
- if (childDoc >= parentDoc || childDoc == -1)
- {
- return;
- }
- _wrappedComparator.Copy(_spareSlot, childDoc);
- _wrappedComparator.Copy(slot, childDoc);
-
- while (true)
- {
- childDoc = _childDocuments.NextSetBit(childDoc + 1);
- if (childDoc >= parentDoc || childDoc == -1)
- {
- return;
- }
- _wrappedComparator.Copy(_spareSlot, childDoc);
- if (_wrappedComparator.Compare(_spareSlot, slot) > 0)
- {
- _wrappedComparator.Copy(slot, childDoc);
- }
- }
- }
-
- public override int CompareTop(int parentDoc)
- {
- if (parentDoc == 0 || _parentDocuments == null || _childDocuments == null)
- {
- return 0;
- }
-
- int prevParentDoc = _parentDocuments.PrevSetBit(parentDoc - 1);
- int childDoc = _childDocuments.NextSetBit(prevParentDoc + 1);
- if (childDoc >= parentDoc || childDoc == -1)
- {
- return 0;
- }
-
- int cmp = _wrappedComparator.CompareBottom(childDoc);
- if (cmp < 0)
- {
- return cmp;
- }
-
- while (true)
- {
- childDoc = _childDocuments.NextSetBit(childDoc + 1);
- if (childDoc >= parentDoc || childDoc == -1)
- {
- return cmp;
- }
- int cmp1 = _wrappedComparator.CompareTop(childDoc);
- if (cmp1 < 0)
- {
- return cmp1;
- }
- if (cmp1 == 0)
- {
- cmp = 0;
- }
- }
- }
- }
- }
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/Lucene.Net.Join/ToParentBlockJoinQuery.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Join/ToParentBlockJoinQuery.cs b/Lucene.Net.Join/ToParentBlockJoinQuery.cs
deleted file mode 100644
index 810f30e..0000000
--- a/Lucene.Net.Join/ToParentBlockJoinQuery.cs
+++ /dev/null
@@ -1,516 +0,0 @@
-using System;
-using System.Collections.Generic;
-using System.Diagnostics;
-using Lucene.Net.Index;
-using Lucene.Net.Search;
-using Lucene.Net.Util;
-
-namespace Lucene.Net.Join
-{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- /// <summary>
- /// This query requires that you index
- /// children and parent docs as a single block, using the
- /// <see cref="IndexWriter#addDocuments IndexWriter.addDocuments()"/> or {@link
- /// IndexWriter#updateDocuments IndexWriter.updateDocuments()} API. In each block, the
- /// child documents must appear first, ending with the parent
- /// document. At search time you provide a Filter
- /// identifying the parents, however this Filter must provide
- /// an <see cref="FixedBitSet"/> per sub-reader.
- ///
- /// <p>Once the block index is built, use this query to wrap
- /// any sub-query matching only child docs and join matches in that
- /// child document space up to the parent document space.
- /// You can then use this Query as a clause with
- /// other queries in the parent document space.</p>
- ///
- /// <p>See <see cref="ToChildBlockJoinQuery"/> if you need to join
- /// in the reverse order.
- ///
- /// <p>The child documents must be orthogonal to the parent
- /// documents: the wrapped child query must never
- /// return a parent document.</p>
- ///
- /// If you'd like to retrieve <see cref="TopGroups"/> for the
- /// resulting query, use the <see cref="ToParentBlockJoinCollector"/>.
- /// Note that this is not necessary, ie, if you simply want
- /// to collect the parent documents and don't need to see
- /// which child documents matched under that parent, then
- /// you can use any collector.
- ///
- /// <p><b>NOTE</b>: If the overall query contains parent-only
- /// matches, for example you OR a parent-only query with a
- /// joined child-only query, then the resulting collected documents
- /// will be correct, however the <see cref="TopGroups"/> you get
- /// from <see cref="ToParentBlockJoinCollector"/> will not contain every
- /// child for parents that had matched.
- ///
- /// <p>See <see cref="org.apache.lucene.search.join"/> for an
- /// overview. </p>
- ///
- /// @lucene.experimental
- /// </summary>
- public class ToParentBlockJoinQuery : Query
- {
-
- private readonly Filter _parentsFilter;
- private readonly Query _childQuery;
-
- // If we are rewritten, this is the original childQuery we
- // were passed; we use this for .equals() and
- // .hashCode(). This makes rewritten query equal the
- // original, so that user does not have to .rewrite() their
- // query before searching:
- private readonly Query _origChildQuery;
- private readonly ScoreMode _scoreMode;
-
- /// <summary>
- /// Create a ToParentBlockJoinQuery.
- /// </summary>
- /// <param name="childQuery"> Query matching child documents. </param>
- /// <param name="parentsFilter"> Filter (must produce FixedBitSet
- /// per-segment, like <see cref="FixedBitSetCachingWrapperFilter"/>)
- /// identifying the parent documents. </param>
- /// <param name="scoreMode"> How to aggregate multiple child scores
- /// into a single parent score.
- /// </param>
- public ToParentBlockJoinQuery(Query childQuery, Filter parentsFilter, ScoreMode scoreMode)
- {
- _origChildQuery = childQuery;
- _childQuery = childQuery;
- _parentsFilter = parentsFilter;
- _scoreMode = scoreMode;
- }
-
- private ToParentBlockJoinQuery(Query origChildQuery, Query childQuery, Filter parentsFilter, ScoreMode scoreMode) : base()
- {
- _origChildQuery = origChildQuery;
- _childQuery = childQuery;
- _parentsFilter = parentsFilter;
- _scoreMode = scoreMode;
- }
-
- public override Weight CreateWeight(IndexSearcher searcher)
- {
- return new BlockJoinWeight(this, _childQuery.CreateWeight(searcher), _parentsFilter, _scoreMode);
- }
-
- private class BlockJoinWeight : Weight
- {
- internal readonly Query JoinQuery;
- internal readonly Weight ChildWeight;
- internal readonly Filter ParentsFilter;
- internal readonly ScoreMode ScoreMode;
-
- public BlockJoinWeight(Query joinQuery, Weight childWeight, Filter parentsFilter, ScoreMode scoreMode) : base()
- {
- JoinQuery = joinQuery;
- ChildWeight = childWeight;
- ParentsFilter = parentsFilter;
- ScoreMode = scoreMode;
- }
-
- public override Query Query
- {
- get { return JoinQuery; }
- }
-
- public override float ValueForNormalization
- {
- get { return ChildWeight.ValueForNormalization*JoinQuery.Boost*JoinQuery.Boost; }
- }
-
- public override void Normalize(float norm, float topLevelBoost)
- {
- ChildWeight.Normalize(norm, topLevelBoost * JoinQuery.Boost);
- }
-
- // NOTE: acceptDocs applies (and is checked) only in the parent document space
- public override Scorer Scorer(AtomicReaderContext readerContext, Bits acceptDocs)
- {
-
- Scorer childScorer = ChildWeight.Scorer(readerContext, readerContext.AtomicReader.LiveDocs);
- if (childScorer == null)
- {
- // No matches
- return null;
- }
-
- int firstChildDoc = childScorer.NextDoc();
- if (firstChildDoc == DocIdSetIterator.NO_MORE_DOCS)
- {
- // No matches
- return null;
- }
-
- // NOTE: we cannot pass acceptDocs here because this
- // will (most likely, justifiably) cause the filter to
- // not return a FixedBitSet but rather a
- // BitsFilteredDocIdSet. Instead, we filter by
- // acceptDocs when we score:
- DocIdSet parents = ParentsFilter.GetDocIdSet(readerContext, null);
-
- if (parents == null)
- {
- // No matches
- return null;
- }
- if (!(parents is FixedBitSet))
- {
- throw new InvalidOperationException("parentFilter must return FixedBitSet; got " + parents);
- }
-
- return new BlockJoinScorer(this, childScorer, (FixedBitSet)parents, firstChildDoc, ScoreMode, acceptDocs);
- }
-
- public override Explanation Explain(AtomicReaderContext context, int doc)
- {
- BlockJoinScorer scorer = (BlockJoinScorer)Scorer(context, context.AtomicReader.LiveDocs);
- if (scorer != null && scorer.Advance(doc) == doc)
- {
- return scorer.Explain(context.DocBase);
- }
- return new ComplexExplanation(false, 0.0f, "Not a match");
- }
-
- public override bool ScoresDocsOutOfOrder()
- {
- return false;
- }
- }
-
- internal class BlockJoinScorer : Scorer
- {
- private readonly Scorer _childScorer;
- private readonly FixedBitSet _parentBits;
- private readonly ScoreMode _scoreMode;
- private readonly Bits _acceptDocs;
- private int _parentDocRenamed = -1;
- private int _prevParentDoc;
- private float _parentScore;
- private int _parentFreq;
- private int _nextChildDoc;
- private int[] _pendingChildDocs;
- private float[] _pendingChildScores;
- private int _childDocUpto;
-
- public BlockJoinScorer(Weight weight, Scorer childScorer, FixedBitSet parentBits, int firstChildDoc, ScoreMode scoreMode, Bits acceptDocs) : base(weight)
- {
- //System.out.println("Q.init firstChildDoc=" + firstChildDoc);
- _parentBits = parentBits;
- _childScorer = childScorer;
- _scoreMode = scoreMode;
- _acceptDocs = acceptDocs;
- _nextChildDoc = firstChildDoc;
- }
-
- public override ICollection<ChildScorer> Children
- {
- get { return Collections.Singleton(new ChildScorer(_childScorer, "BLOCK_JOIN")); }
- }
-
- internal virtual int ChildCount
- {
- get { return _childDocUpto; }
- }
-
- internal virtual int ParentDoc
- {
- get { return _parentDocRenamed; }
- }
-
- internal virtual int[] SwapChildDocs(int[] other)
- {
- int[] ret = _pendingChildDocs;
- if (other == null)
- {
- _pendingChildDocs = new int[5];
- }
- else
- {
- _pendingChildDocs = other;
- }
- return ret;
- }
-
- internal virtual float[] SwapChildScores(float[] other)
- {
- if (_scoreMode == ScoreMode.None)
- {
- throw new InvalidOperationException("ScoreMode is None; you must pass trackScores=false to ToParentBlockJoinCollector");
- }
- float[] ret = _pendingChildScores;
- if (other == null)
- {
- _pendingChildScores = new float[5];
- }
- else
- {
- _pendingChildScores = other;
- }
- return ret;
- }
-
- public override int NextDoc()
- {
- //System.out.println("Q.nextDoc() nextChildDoc=" + nextChildDoc);
- // Loop until we hit a parentDoc that's accepted
- while (true)
- {
- if (_nextChildDoc == NO_MORE_DOCS)
- {
- //System.out.println(" end");
- return _parentDocRenamed = NO_MORE_DOCS;
- }
-
- // Gather all children sharing the same parent as
- // nextChildDoc
-
- _parentDocRenamed = _parentBits.NextSetBit(_nextChildDoc);
-
- // Parent & child docs are supposed to be
- // orthogonal:
- if (_nextChildDoc == _parentDocRenamed)
- {
- throw new InvalidOperationException("child query must only match non-parent docs, but parent docID=" + _nextChildDoc + " matched childScorer=" + _childScorer.GetType());
- }
-
- //System.out.println(" parentDoc=" + parentDoc);
- Debug.Assert(_parentDocRenamed != -1);
-
- //System.out.println(" nextChildDoc=" + nextChildDoc);
- if (_acceptDocs != null && !_acceptDocs.Get(_parentDocRenamed))
- {
- // Parent doc not accepted; skip child docs until
- // we hit a new parent doc:
- do
- {
- _nextChildDoc = _childScorer.NextDoc();
- } while (_nextChildDoc < _parentDocRenamed);
-
- // Parent & child docs are supposed to be
- // orthogonal:
- if (_nextChildDoc == _parentDocRenamed)
- {
- throw new InvalidOperationException("child query must only match non-parent docs, but parent docID=" + _nextChildDoc + " matched childScorer=" + _childScorer.GetType());
- }
-
- continue;
- }
-
- float totalScore = 0;
- float maxScore = float.NegativeInfinity;
-
- _childDocUpto = 0;
- _parentFreq = 0;
- do
- {
- //System.out.println(" c=" + nextChildDoc);
- if (_pendingChildDocs != null && _pendingChildDocs.Length == _childDocUpto)
- {
- _pendingChildDocs = ArrayUtil.Grow(_pendingChildDocs);
- }
- if (_pendingChildScores != null && _scoreMode != ScoreMode.None && _pendingChildScores.Length == _childDocUpto)
- {
- _pendingChildScores = ArrayUtil.Grow(_pendingChildScores);
- }
- if (_pendingChildDocs != null)
- {
- _pendingChildDocs[_childDocUpto] = _nextChildDoc;
- }
- if (_scoreMode != ScoreMode.None)
- {
- // TODO: specialize this into dedicated classes per-scoreMode
- float childScore = _childScorer.Score();
- int childFreq = _childScorer.Freq();
- if (_pendingChildScores != null)
- {
- _pendingChildScores[_childDocUpto] = childScore;
- }
- maxScore = Math.Max(childScore, maxScore);
- totalScore += childScore;
- _parentFreq += childFreq;
- }
- _childDocUpto++;
- _nextChildDoc = _childScorer.NextDoc();
- } while (_nextChildDoc < _parentDocRenamed);
-
- // Parent & child docs are supposed to be
- // orthogonal:
- if (_nextChildDoc == _parentDocRenamed)
- {
- throw new InvalidOperationException("child query must only match non-parent docs, but parent docID=" + _nextChildDoc + " matched childScorer=" + _childScorer.GetType());
- }
-
- switch (_scoreMode)
- {
- case ScoreMode.Avg:
- _parentScore = totalScore / _childDocUpto;
- break;
- case ScoreMode.Max:
- _parentScore = maxScore;
- break;
- case ScoreMode.Total:
- _parentScore = totalScore;
- break;
- case ScoreMode.None:
- break;
- }
-
- //System.out.println(" return parentDoc=" + parentDoc + " childDocUpto=" + childDocUpto);
- return _parentDocRenamed;
- }
- }
-
- public override int DocID()
- {
- return _parentDocRenamed;
- }
-
- //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
- //ORIGINAL LINE: @Override public float score() throws java.io.IOException
- public override float Score()
- {
- return _parentScore;
- }
-
- public override int Freq()
- {
- return _parentFreq;
- }
-
- public override int Advance(int parentTarget)
- {
-
- //System.out.println("Q.advance parentTarget=" + parentTarget);
- if (parentTarget == NO_MORE_DOCS)
- {
- return _parentDocRenamed = NO_MORE_DOCS;
- }
-
- if (parentTarget == 0)
- {
- // Callers should only be passing in a docID from
- // the parent space, so this means this parent
- // has no children (it got docID 0), so it cannot
- // possibly match. We must handle this case
- // separately otherwise we pass invalid -1 to
- // prevSetBit below:
- return NextDoc();
- }
-
- _prevParentDoc = _parentBits.PrevSetBit(parentTarget - 1);
-
- //System.out.println(" rolled back to prevParentDoc=" + prevParentDoc + " vs parentDoc=" + parentDoc);
- Debug.Assert(_prevParentDoc >= _parentDocRenamed);
- if (_prevParentDoc > _nextChildDoc)
- {
- _nextChildDoc = _childScorer.Advance(_prevParentDoc);
- // System.out.println(" childScorer advanced to child docID=" + nextChildDoc);
- //} else {
- //System.out.println(" skip childScorer advance");
- }
-
- // Parent & child docs are supposed to be orthogonal:
- if (_nextChildDoc == _prevParentDoc)
- {
- throw new InvalidOperationException("child query must only match non-parent docs, but parent docID=" + _nextChildDoc + " matched childScorer=" + _childScorer.GetType());
- }
-
- int nd = NextDoc();
- //System.out.println(" return nextParentDoc=" + nd);
- return nd;
- }
-
- public virtual Explanation Explain(int docBase)
- {
- int start = docBase + _prevParentDoc + 1; // +1 b/c prevParentDoc is previous parent doc
- int end = docBase + _parentDocRenamed - 1; // -1 b/c parentDoc is parent doc
- return new ComplexExplanation(true, Score(), string.Format("Score based on child doc range from {0} to {1}", start, end));
- }
-
- public override long Cost()
- {
- return _childScorer.Cost();
- }
-
- /// <summary>
- /// Instructs this scorer to keep track of the child docIds and score ids for retrieval purposes.
- /// </summary>
- public virtual void TrackPendingChildHits()
- {
- _pendingChildDocs = new int[5];
- if (_scoreMode != ScoreMode.None)
- {
- _pendingChildScores = new float[5];
- }
- }
- }
-
- public override void ExtractTerms(ISet<Term> terms)
- {
- _childQuery.ExtractTerms(terms);
- }
-
- public override Query Rewrite(IndexReader reader)
- {
- Query childRewrite = _childQuery.Rewrite(reader);
- if (childRewrite != _childQuery)
- {
- Query rewritten = new ToParentBlockJoinQuery(_origChildQuery, childRewrite, _parentsFilter, _scoreMode);
- rewritten.Boost = Boost;
- return rewritten;
- }
- return this;
- }
-
- public override string ToString(string field)
- {
- return "ToParentBlockJoinQuery (" + _childQuery + ")";
- }
-
- protected bool Equals(ToParentBlockJoinQuery other)
- {
- return base.Equals(other) &&
- Equals(_parentsFilter, other._parentsFilter) &&
- _scoreMode == other._scoreMode &&
- Equals(_origChildQuery, other._origChildQuery);
- }
-
- public override bool Equals(object obj)
- {
- if (ReferenceEquals(null, obj)) return false;
- if (ReferenceEquals(this, obj)) return true;
- if (obj.GetType() != GetType()) return false;
- return Equals((ToParentBlockJoinQuery) obj);
- }
-
- public override int GetHashCode()
- {
- unchecked
- {
- int hashCode = base.GetHashCode();
- hashCode = (hashCode*397) ^ (_parentsFilter != null ? _parentsFilter.GetHashCode() : 0);
- hashCode = (hashCode*397) ^ (int) _scoreMode;
- hashCode = (hashCode*397) ^ (_origChildQuery != null ? _origChildQuery.GetHashCode() : 0);
- return hashCode;
- }
- }
- }
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/Lucene.Net.Join/ToParentBlockJoinSortField.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Join/ToParentBlockJoinSortField.cs b/Lucene.Net.Join/ToParentBlockJoinSortField.cs
deleted file mode 100644
index aa2a3b6..0000000
--- a/Lucene.Net.Join/ToParentBlockJoinSortField.cs
+++ /dev/null
@@ -1,78 +0,0 @@
-using System;
-using Lucene.Net.Search;
-
-namespace Lucene.Net.Join
-{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- /// <summary>
- /// A special sort field that allows sorting parent docs based on nested / child level fields.
- /// Based on the sort order it either takes the document with the lowest or highest field value into account.
- ///
- /// @lucene.experimental
- /// </summary>
- public class ToParentBlockJoinSortField : SortField
- {
- private readonly bool Order;
- private readonly Filter ParentFilter;
- private readonly Filter ChildFilter;
-
- /// <summary>
- /// Create ToParentBlockJoinSortField. The parent document ordering is based on child document ordering (reverse).
- /// </summary>
- /// <param name="field"> The sort field on the nested / child level. </param>
- /// <param name="type"> The sort type on the nested / child level. </param>
- /// <param name="reverse"> Whether natural order should be reversed on the nested / child level. </param>
- /// <param name="parentFilter"> Filter that identifies the parent documents. </param>
- /// <param name="childFilter"> Filter that defines which child documents participates in sorting. </param>
- public ToParentBlockJoinSortField(string field, Type_e type, bool reverse, Filter parentFilter, Filter childFilter) : base(field, type, reverse)
- {
- Order = reverse;
- ParentFilter = parentFilter;
- ChildFilter = childFilter;
- }
-
- /// <summary>
- /// Create ToParentBlockJoinSortField.
- /// </summary>
- /// <param name="field"> The sort field on the nested / child level. </param>
- /// <param name="type"> The sort type on the nested / child level. </param>
- /// <param name="reverse"> Whether natural order should be reversed on the nested / child document level. </param>
- /// <param name="order"> Whether natural order should be reversed on the parent level. </param>
- /// <param name="parentFilter"> Filter that identifies the parent documents. </param>
- /// <param name="childFilter"> Filter that defines which child documents participates in sorting. </param>
- public ToParentBlockJoinSortField(string field, Type_e type, bool reverse, bool order, Filter parentFilter, Filter childFilter)
- : base(field, type, reverse)
- {
- Order = order;
- ParentFilter = parentFilter;
- ChildFilter = childFilter;
- }
-
- public override FieldComparator GetComparator(int numHits, int sortPos)
- {
- var wrappedFieldComparator = base.GetComparator(numHits + 1, sortPos);
- if (Order)
- {
- return new ToParentBlockJoinFieldComparator.Highest(wrappedFieldComparator, ParentFilter, ChildFilter, numHits);
- }
-
- return new ToParentBlockJoinFieldComparator.Lowest(wrappedFieldComparator, ParentFilter, ChildFilter, numHits);
- }
- }
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/Lucene.Net.Tests.Join/Lucene.Net.Tests.Join.csproj
----------------------------------------------------------------------
diff --git a/Lucene.Net.Tests.Join/Lucene.Net.Tests.Join.csproj b/Lucene.Net.Tests.Join/Lucene.Net.Tests.Join.csproj
deleted file mode 100644
index e5f6d16..0000000
--- a/Lucene.Net.Tests.Join/Lucene.Net.Tests.Join.csproj
+++ /dev/null
@@ -1,86 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
- <Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
- <PropertyGroup>
- <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
- <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
- <ProjectGuid>{4C1B794F-8158-45E6-85B3-2C46569BEBC2}</ProjectGuid>
- <OutputType>Library</OutputType>
- <AppDesignerFolder>Properties</AppDesignerFolder>
- <RootNamespace>Lucene.Net.Tests.Join</RootNamespace>
- <AssemblyName>Lucene.Net.Tests.Join</AssemblyName>
- <TargetFrameworkVersion>v4.5.1</TargetFrameworkVersion>
- <FileAlignment>512</FileAlignment>
- </PropertyGroup>
- <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
- <DebugSymbols>true</DebugSymbols>
- <DebugType>full</DebugType>
- <Optimize>false</Optimize>
- <OutputPath>bin\Debug\</OutputPath>
- <DefineConstants>DEBUG;TRACE</DefineConstants>
- <ErrorReport>prompt</ErrorReport>
- <WarningLevel>4</WarningLevel>
- </PropertyGroup>
- <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
- <DebugType>pdbonly</DebugType>
- <Optimize>true</Optimize>
- <OutputPath>bin\Release\</OutputPath>
- <DefineConstants>TRACE</DefineConstants>
- <ErrorReport>prompt</ErrorReport>
- <WarningLevel>4</WarningLevel>
- </PropertyGroup>
- <ItemGroup>
- <Reference Include="Apache.NMS, Version=1.6.0.3083, Culture=neutral, PublicKeyToken=82756feee3957618, processorArchitecture=MSIL">
- <HintPath>..\packages\Apache.NMS.1.6.0.3083\lib\net40\Apache.NMS.dll</HintPath>
- <Private>True</Private>
- </Reference>
- <Reference Include="nunit.framework, Version=2.6.3.13283, Culture=neutral, PublicKeyToken=96d09a1eb7f44a77, processorArchitecture=MSIL">
- <HintPath>..\packages\NUnit.2.6.3\lib\nunit.framework.dll</HintPath>
- <Private>True</Private>
- </Reference>
- <Reference Include="System" />
- <Reference Include="System.Core" />
- <Reference Include="System.Xml.Linq" />
- <Reference Include="System.Data.DataSetExtensions" />
- <Reference Include="Microsoft.CSharp" />
- <Reference Include="System.Data" />
- <Reference Include="System.Net.Http" />
- <Reference Include="System.Xml" />
- </ItemGroup>
- <ItemGroup>
- <Compile Include="Properties\AssemblyInfo.cs" />
- <Compile Include="TestBlockJoin.cs" />
- <Compile Include="TestBlockJoinSorting.cs" />
- <Compile Include="TestBlockJoinValidation.cs" />
- <Compile Include="TestJoinUtil.cs" />
- </ItemGroup>
- <ItemGroup>
- <ProjectReference Include="..\Lucene.Net.Grouping\Lucene.Net.Grouping.csproj">
- <Project>{02BAB603-067D-48B1-AEDD-316849652568}</Project>
- <Name>Lucene.Net.Grouping</Name>
- </ProjectReference>
- <ProjectReference Include="..\Lucene.Net.Join\Lucene.Net.Join.csproj">
- <Project>{e8a339c7-fcf6-4a72-8586-56d8961d7b99}</Project>
- <Name>Lucene.Net.Join</Name>
- </ProjectReference>
- <ProjectReference Include="..\src\Lucene.Net.Core\Lucene.Net.csproj">
- <Project>{5d4ad9be-1ffb-41ab-9943-25737971bf57}</Project>
- <Name>Lucene.Net</Name>
- </ProjectReference>
- <ProjectReference Include="..\src\Lucene.Net.TestFramework\Lucene.Net.TestFramework.csproj">
- <Project>{b2c0d749-ce34-4f62-a15e-00cb2ff5ddb3}</Project>
- <Name>Lucene.Net.TestFramework</Name>
- </ProjectReference>
- </ItemGroup>
- <ItemGroup>
- <None Include="packages.config" />
- </ItemGroup>
- <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
- <!-- To modify your build process, add your task inside one of the targets below and uncomment it.
- Other similar extension points exist, see Microsoft.Common.targets.
- <Target Name="BeforeBuild">
- </Target>
- <Target Name="AfterBuild">
- </Target>
- -->
-</Project>
\ No newline at end of file
[06/17] lucenenet git commit: Introduced tests for Lucene.Net.Join
Posted by sy...@apache.org.
Introduced tests for Lucene.Net.Join
Fixing bugs along the way.
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/dff959ff
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/dff959ff
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/dff959ff
Branch: refs/heads/master
Commit: dff959ff4125a1bd78b3caffe0ae7e99bd0bc6bb
Parents: 47f20b9
Author: Josh Sullivan <ja...@gmail.com>
Authored: Mon Aug 17 14:36:54 2015 -0400
Committer: Josh Sullivan <ja...@gmail.com>
Committed: Mon Aug 17 14:36:54 2015 -0400
----------------------------------------------------------------------
Lucene.Net.Join/Lucene.Net.Join.csproj | 1 +
Lucene.Net.Join/ToChildBlockJoinQuery.cs | 2 +-
.../ToParentBlockJoinFieldComparator.cs | 12 +-
Lucene.Net.Join/ToParentBlockJoinSortField.cs | 78 +
.../Lucene.Net.Tests.Join.csproj | 22 +-
.../Properties/AssemblyInfo.cs | 2 +-
Lucene.Net.Tests.Join/TestBlockJoin.cs | 1598 +++++++++++++++++-
Lucene.Net.Tests.Join/TestBlockJoinSorting.cs | 277 +++
Lucene.Net.Tests.Join/packages.config | 5 +
Lucene.Net.sln | 14 +
10 files changed, 2000 insertions(+), 11 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/dff959ff/Lucene.Net.Join/Lucene.Net.Join.csproj
----------------------------------------------------------------------
diff --git a/Lucene.Net.Join/Lucene.Net.Join.csproj b/Lucene.Net.Join/Lucene.Net.Join.csproj
index 30e64c6..2222b0e 100644
--- a/Lucene.Net.Join/Lucene.Net.Join.csproj
+++ b/Lucene.Net.Join/Lucene.Net.Join.csproj
@@ -53,6 +53,7 @@
<Compile Include="ToParentBlockJoinCollector.cs" />
<Compile Include="ToParentBlockJoinFieldComparator.cs" />
<Compile Include="ToParentBlockJoinQuery.cs" />
+ <Compile Include="ToParentBlockJoinSortField.cs" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\Lucene.Net.Grouping\Lucene.Net.Grouping.csproj">
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/dff959ff/Lucene.Net.Join/ToChildBlockJoinQuery.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Join/ToChildBlockJoinQuery.cs b/Lucene.Net.Join/ToChildBlockJoinQuery.cs
index 035301e..f16e2a9 100644
--- a/Lucene.Net.Join/ToChildBlockJoinQuery.cs
+++ b/Lucene.Net.Join/ToChildBlockJoinQuery.cs
@@ -390,7 +390,7 @@ namespace Lucene.Net.Join
public override object Clone()
{
- return new ToChildBlockJoinQuery((ToChildBlockJoinQuery) _origParentQuery.Clone(), _parentsFilter, _doScores);
+ return new ToChildBlockJoinQuery((Query) _origParentQuery.Clone(), _parentsFilter, _doScores);
}
}
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/dff959ff/Lucene.Net.Join/ToParentBlockJoinFieldComparator.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Join/ToParentBlockJoinFieldComparator.cs b/Lucene.Net.Join/ToParentBlockJoinFieldComparator.cs
index 4386e39..c41fd50 100644
--- a/Lucene.Net.Join/ToParentBlockJoinFieldComparator.cs
+++ b/Lucene.Net.Join/ToParentBlockJoinFieldComparator.cs
@@ -34,11 +34,11 @@ namespace Lucene.Net.Join
private readonly Filter _childFilter;
private readonly int _spareSlot;
- private FieldComparator<object> _wrappedComparator;
+ private FieldComparator _wrappedComparator;
private FixedBitSet _parentDocuments;
private FixedBitSet _childDocuments;
- internal ToParentBlockJoinFieldComparator(FieldComparator<object> wrappedComparator, Filter parentFilter, Filter childFilter, int spareSlot)
+ private ToParentBlockJoinFieldComparator(FieldComparator wrappedComparator, Filter parentFilter, Filter childFilter, int spareSlot)
{
_wrappedComparator = wrappedComparator;
_parentFilter = parentFilter;
@@ -98,7 +98,7 @@ namespace Lucene.Net.Join
_parentDocuments = iterator != null ? ToFixedBitSet(iterator, context.AtomicReader.MaxDoc) : null;
}
- _wrappedComparator = _wrappedComparator.SetNextReader(context) as FieldComparator<object>;
+ _wrappedComparator = _wrappedComparator.SetNextReader(context);
return this;
}
@@ -137,7 +137,8 @@ namespace Lucene.Net.Join
/// <param name="childFilter">Filter that defines which child / nested documents participates in sorting. </param>
/// <param name="spareSlot">The extra slot inside the wrapped comparator that is used to compare which nested document
/// inside the parent document scope is most competitive. </param>
- public Lowest(FieldComparator<object> wrappedComparator, Filter parentFilter, Filter childFilter, int spareSlot) : base(wrappedComparator, parentFilter, childFilter, spareSlot)
+ public Lowest(FieldComparator wrappedComparator, Filter parentFilter, Filter childFilter, int spareSlot)
+ : base(wrappedComparator, parentFilter, childFilter, spareSlot)
{
}
@@ -271,7 +272,8 @@ namespace Lucene.Net.Join
/// <param name="childFilter">Filter that defines which child / nested documents participates in sorting. </param>
/// <param name="spareSlot">The extra slot inside the wrapped comparator that is used to compare which nested document
/// inside the parent document scope is most competitive. </param>
- public Highest(FieldComparator<object> wrappedComparator, Filter parentFilter, Filter childFilter, int spareSlot) : base(wrappedComparator, parentFilter, childFilter, spareSlot)
+ public Highest(FieldComparator wrappedComparator, Filter parentFilter, Filter childFilter, int spareSlot)
+ : base(wrappedComparator, parentFilter, childFilter, spareSlot)
{
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/dff959ff/Lucene.Net.Join/ToParentBlockJoinSortField.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Join/ToParentBlockJoinSortField.cs b/Lucene.Net.Join/ToParentBlockJoinSortField.cs
new file mode 100644
index 0000000..aa2a3b6
--- /dev/null
+++ b/Lucene.Net.Join/ToParentBlockJoinSortField.cs
@@ -0,0 +1,78 @@
+using System;
+using Lucene.Net.Search;
+
+namespace Lucene.Net.Join
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// A special sort field that allows sorting parent docs based on nested / child level fields.
+ /// Based on the sort order it either takes the document with the lowest or highest field value into account.
+ ///
+ /// @lucene.experimental
+ /// </summary>
+ public class ToParentBlockJoinSortField : SortField
+ {
+ private readonly bool Order;
+ private readonly Filter ParentFilter;
+ private readonly Filter ChildFilter;
+
+ /// <summary>
+ /// Create ToParentBlockJoinSortField. The parent document ordering is based on child document ordering (reverse).
+ /// </summary>
+ /// <param name="field"> The sort field on the nested / child level. </param>
+ /// <param name="type"> The sort type on the nested / child level. </param>
+ /// <param name="reverse"> Whether natural order should be reversed on the nested / child level. </param>
+ /// <param name="parentFilter"> Filter that identifies the parent documents. </param>
+ /// <param name="childFilter"> Filter that defines which child documents participates in sorting. </param>
+ public ToParentBlockJoinSortField(string field, Type_e type, bool reverse, Filter parentFilter, Filter childFilter) : base(field, type, reverse)
+ {
+ Order = reverse;
+ ParentFilter = parentFilter;
+ ChildFilter = childFilter;
+ }
+
+ /// <summary>
+ /// Create ToParentBlockJoinSortField.
+ /// </summary>
+ /// <param name="field"> The sort field on the nested / child level. </param>
+ /// <param name="type"> The sort type on the nested / child level. </param>
+ /// <param name="reverse"> Whether natural order should be reversed on the nested / child document level. </param>
+ /// <param name="order"> Whether natural order should be reversed on the parent level. </param>
+ /// <param name="parentFilter"> Filter that identifies the parent documents. </param>
+ /// <param name="childFilter"> Filter that defines which child documents participates in sorting. </param>
+ public ToParentBlockJoinSortField(string field, Type_e type, bool reverse, bool order, Filter parentFilter, Filter childFilter)
+ : base(field, type, reverse)
+ {
+ Order = order;
+ ParentFilter = parentFilter;
+ ChildFilter = childFilter;
+ }
+
+ public override FieldComparator GetComparator(int numHits, int sortPos)
+ {
+ var wrappedFieldComparator = base.GetComparator(numHits + 1, sortPos);
+ if (Order)
+ {
+ return new ToParentBlockJoinFieldComparator.Highest(wrappedFieldComparator, ParentFilter, ChildFilter, numHits);
+ }
+
+ return new ToParentBlockJoinFieldComparator.Lowest(wrappedFieldComparator, ParentFilter, ChildFilter, numHits);
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/dff959ff/Lucene.Net.Tests.Join/Lucene.Net.Tests.Join.csproj
----------------------------------------------------------------------
diff --git a/Lucene.Net.Tests.Join/Lucene.Net.Tests.Join.csproj b/Lucene.Net.Tests.Join/Lucene.Net.Tests.Join.csproj
index 30d5a7b..eff35a9 100644
--- a/Lucene.Net.Tests.Join/Lucene.Net.Tests.Join.csproj
+++ b/Lucene.Net.Tests.Join/Lucene.Net.Tests.Join.csproj
@@ -4,7 +4,7 @@
<PropertyGroup>
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
- <ProjectGuid>{1866F8E4-ABF5-4CBE-B23B-4BADF6CD20DC}</ProjectGuid>
+ <ProjectGuid>{4C1B794F-8158-45E6-85B3-2C46569BEBC2}</ProjectGuid>
<OutputType>Library</OutputType>
<AppDesignerFolder>Properties</AppDesignerFolder>
<RootNamespace>Lucene.Net.Tests.Join</RootNamespace>
@@ -30,6 +30,14 @@
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<ItemGroup>
+ <Reference Include="Apache.NMS, Version=1.6.0.3083, Culture=neutral, PublicKeyToken=82756feee3957618, processorArchitecture=MSIL">
+ <HintPath>..\packages\Apache.NMS.1.6.0.3083\lib\net40\Apache.NMS.dll</HintPath>
+ <Private>True</Private>
+ </Reference>
+ <Reference Include="nunit.framework, Version=2.6.3.13283, Culture=neutral, PublicKeyToken=96d09a1eb7f44a77, processorArchitecture=MSIL">
+ <HintPath>..\packages\NUnit.2.6.3\lib\nunit.framework.dll</HintPath>
+ <Private>True</Private>
+ </Reference>
<Reference Include="System" />
<Reference Include="System.Core" />
<Reference Include="System.Xml.Linq" />
@@ -42,8 +50,13 @@
<ItemGroup>
<Compile Include="Properties\AssemblyInfo.cs" />
<Compile Include="TestBlockJoin.cs" />
+ <Compile Include="TestBlockJoinSorting.cs" />
</ItemGroup>
<ItemGroup>
+ <ProjectReference Include="..\Lucene.Net.Grouping\Lucene.Net.Grouping.csproj">
+ <Project>{02BAB603-067D-48B1-AEDD-316849652568}</Project>
+ <Name>Lucene.Net.Grouping</Name>
+ </ProjectReference>
<ProjectReference Include="..\Lucene.Net.Join\Lucene.Net.Join.csproj">
<Project>{e8a339c7-fcf6-4a72-8586-56d8961d7b99}</Project>
<Name>Lucene.Net.Join</Name>
@@ -52,6 +65,13 @@
<Project>{5d4ad9be-1ffb-41ab-9943-25737971bf57}</Project>
<Name>Lucene.Net</Name>
</ProjectReference>
+ <ProjectReference Include="..\src\Lucene.Net.TestFramework\Lucene.Net.TestFramework.csproj">
+ <Project>{b2c0d749-ce34-4f62-a15e-00cb2ff5ddb3}</Project>
+ <Name>Lucene.Net.TestFramework</Name>
+ </ProjectReference>
+ </ItemGroup>
+ <ItemGroup>
+ <None Include="packages.config" />
</ItemGroup>
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
<!-- To modify your build process, add your task inside one of the targets below and uncomment it.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/dff959ff/Lucene.Net.Tests.Join/Properties/AssemblyInfo.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Tests.Join/Properties/AssemblyInfo.cs b/Lucene.Net.Tests.Join/Properties/AssemblyInfo.cs
index cd01ea7..f94805a 100644
--- a/Lucene.Net.Tests.Join/Properties/AssemblyInfo.cs
+++ b/Lucene.Net.Tests.Join/Properties/AssemblyInfo.cs
@@ -20,7 +20,7 @@ using System.Runtime.InteropServices;
[assembly: ComVisible(false)]
// The following GUID is for the ID of the typelib if this project is exposed to COM
-[assembly: Guid("1866f8e4-abf5-4cbe-b23b-4badf6cd20dc")]
+[assembly: Guid("4c1b794f-8158-45e6-85b3-2c46569bebc2")]
// Version information for an assembly consists of the following four values:
//
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/dff959ff/Lucene.Net.Tests.Join/TestBlockJoin.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Tests.Join/TestBlockJoin.cs b/Lucene.Net.Tests.Join/TestBlockJoin.cs
index 1278782..7c6861f 100644
--- a/Lucene.Net.Tests.Join/TestBlockJoin.cs
+++ b/Lucene.Net.Tests.Join/TestBlockJoin.cs
@@ -1,7 +1,1599 @@
-namespace Lucene.Net.Tests.Join
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using Lucene.Net.Analysis;
+using Lucene.Net.Documents;
+using Lucene.Net.Grouping;
+using Lucene.Net.Index;
+using Lucene.Net.Join;
+using Lucene.Net.Randomized;
+using Lucene.Net.Randomized.Generators;
+using Lucene.Net.Search;
+using Lucene.Net.Store;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using NUnit.Framework;
+
+namespace Lucene.Net.Tests.Join
{
- public class TestBlockJoin
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ public class TestBlockJoin : LuceneTestCase
{
-
+ // One resume...
+ private Document MakeResume(string name, string country)
+ {
+ Document resume = new Document();
+ resume.Add(NewStringField("docType", "resume", Field.Store.NO));
+ resume.Add(NewStringField("name", name, Field.Store.YES));
+ resume.Add(NewStringField("country", country, Field.Store.NO));
+ return resume;
+ }
+
+ // ... has multiple jobs
+ private Document MakeJob(string skill, int year)
+ {
+ Document job = new Document();
+ job.Add(NewStringField("skill", skill, Field.Store.YES));
+ job.Add(new IntField("year", year, Field.Store.NO));
+ job.Add(new StoredField("year", year));
+ return job;
+ }
+
+ // ... has multiple qualifications
+ private Document MakeQualification(string qualification, int year)
+ {
+ Document job = new Document();
+ job.Add(NewStringField("qualification", qualification, Field.Store.YES));
+ job.Add(new IntField("year", year, Field.Store.NO));
+ return job;
+ }
+
+ [Test]
+ public void TestEmptyChildFilter()
+ {
+ Directory dir = NewDirectory();
+ IndexWriterConfig config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
+ config.SetMergePolicy(NoMergePolicy.NO_COMPOUND_FILES);
+ // we don't want to merge - since we rely on certain segment setup
+ IndexWriter w = new IndexWriter(dir, config);
+
+ IList<Document> docs = new List<Document>();
+
+ docs.Add(MakeJob("java", 2007));
+ docs.Add(MakeJob("python", 2010));
+ docs.Add(MakeResume("Lisa", "United Kingdom"));
+ w.AddDocuments(docs);
+
+ docs.Clear();
+ docs.Add(MakeJob("ruby", 2005));
+ docs.Add(MakeJob("java", 2006));
+ docs.Add(MakeResume("Frank", "United States"));
+ w.AddDocuments(docs);
+ w.Commit();
+ int num = AtLeast(10); // produce a segment that doesn't have a value in the docType field
+ for (int i = 0; i < num; i++)
+ {
+ docs.Clear();
+ docs.Add(MakeJob("java", 2007));
+ w.AddDocuments(docs);
+ }
+
+ IndexReader r = DirectoryReader.Open(w, Random().NextBoolean());
+ w.Dispose();
+ assertTrue(r.Leaves.size() > 1);
+ IndexSearcher s = new IndexSearcher(r);
+ Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));
+
+ BooleanQuery childQuery = new BooleanQuery();
+ childQuery.Add(new BooleanClause(new TermQuery(new Term("skill", "java")), BooleanClause.Occur.MUST));
+ childQuery.Add(new BooleanClause(NumericRangeQuery.NewIntRange("year", 2006, 2011, true, true), BooleanClause.Occur.MUST));
+
+ ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);
+
+ BooleanQuery fullQuery = new BooleanQuery();
+ fullQuery.Add(new BooleanClause(childJoinQuery, BooleanClause.Occur.MUST));
+ fullQuery.Add(new BooleanClause(new MatchAllDocsQuery(), BooleanClause.Occur.MUST));
+ ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(Sort.RELEVANCE, 1, true, true);
+ s.Search(fullQuery, c);
+ TopGroups<int> results = c.GetTopGroups(childJoinQuery, null, 0, 10, 0, true);
+ assertFalse(float.IsNaN(results.MaxScore));
+ assertEquals(1, results.TotalGroupedHitCount);
+ assertEquals(1, results.Groups.Length);
+ GroupDocs<int> group = results.Groups[0];
+ Document childDoc = s.Doc(group.ScoreDocs[0].Doc);
+ assertEquals("java", childDoc.Get("skill"));
+ assertNotNull(group.GroupValue);
+ Document parentDoc = s.Doc(group.GroupValue);
+ assertEquals("Lisa", parentDoc.Get("name"));
+
+ r.Dispose();
+ dir.Dispose();
+ }
+
+ [Test]
+ public void TestSimple()
+ {
+ Directory dir = NewDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(Random(), dir);
+
+ IList<Document> docs = new List<Document>();
+
+ docs.Add(MakeJob("java", 2007));
+ docs.Add(MakeJob("python", 2010));
+ docs.Add(MakeResume("Lisa", "United Kingdom"));
+ w.AddDocuments(docs);
+
+ docs.Clear();
+ docs.Add(MakeJob("ruby", 2005));
+ docs.Add(MakeJob("java", 2006));
+ docs.Add(MakeResume("Frank", "United States"));
+ w.AddDocuments(docs);
+
+ IndexReader r = w.Reader;
+ w.Dispose();
+ IndexSearcher s = NewSearcher(r);
+
+ // Create a filter that defines "parent" documents in the index - in this case resumes
+ Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));
+
+ // Define child document criteria (finds an example of relevant work experience)
+ BooleanQuery childQuery = new BooleanQuery();
+ childQuery.Add(new BooleanClause(new TermQuery(new Term("skill", "java")), BooleanClause.Occur.MUST));
+ childQuery.Add(new BooleanClause(NumericRangeQuery.NewIntRange("year", 2006, 2011, true, true), BooleanClause.Occur.MUST));
+
+ // Define parent document criteria (find a resident in the UK)
+ Query parentQuery = new TermQuery(new Term("country", "United Kingdom"));
+
+ // Wrap the child document query to 'join' any matches
+ // up to corresponding parent:
+ ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);
+
+ // Combine the parent and nested child queries into a single query for a candidate
+ BooleanQuery fullQuery = new BooleanQuery();
+ fullQuery.Add(new BooleanClause(parentQuery, BooleanClause.Occur.MUST));
+ fullQuery.Add(new BooleanClause(childJoinQuery, BooleanClause.Occur.MUST));
+
+ ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(Sort.RELEVANCE, 1, true, true);
+
+ s.Search(fullQuery, c);
+
+ TopGroups<int> results = c.GetTopGroups(childJoinQuery, null, 0, 10, 0, true);
+ assertFalse(float.IsNaN(results.MaxScore));
+
+ //assertEquals(1, results.totalHitCount);
+ assertEquals(1, results.TotalGroupedHitCount);
+ assertEquals(1, results.Groups.Length);
+
+ GroupDocs<int> group = results.Groups[0];
+ assertEquals(1, group.TotalHits);
+ assertFalse(float.IsNaN(group.Score));
+
+ Document childDoc = s.Doc(group.ScoreDocs[0].Doc);
+ //System.out.println(" doc=" + group.ScoreDocs[0].Doc);
+ assertEquals("java", childDoc.Get("skill"));
+ assertNotNull(group.GroupValue);
+ Document parentDoc = s.Doc(group.GroupValue);
+ assertEquals("Lisa", parentDoc.Get("name"));
+
+
+ //System.out.println("TEST: now test up");
+
+ // Now join "up" (map parent hits to child docs) instead...:
+ ToChildBlockJoinQuery parentJoinQuery = new ToChildBlockJoinQuery(parentQuery, parentsFilter, Random().NextBoolean());
+ BooleanQuery fullChildQuery = new BooleanQuery();
+ fullChildQuery.Add(new BooleanClause(parentJoinQuery, BooleanClause.Occur.MUST));
+ fullChildQuery.Add(new BooleanClause(childQuery, BooleanClause.Occur.MUST));
+
+ //System.out.println("FULL: " + fullChildQuery);
+ TopDocs hits = s.Search(fullChildQuery, 10);
+ assertEquals(1, hits.TotalHits);
+ childDoc = s.Doc(hits.ScoreDocs[0].Doc);
+ //System.out.println("CHILD = " + childDoc + " docID=" + hits.ScoreDocs[0].Doc);
+ assertEquals("java", childDoc.Get("skill"));
+ assertEquals(2007, childDoc.GetField("year").NumericValue);
+ assertEquals("Lisa", GetParentDoc(r, parentsFilter, hits.ScoreDocs[0].Doc).Get("name"));
+
+ // Test with filter on child docs:
+ assertEquals(0, s.Search(fullChildQuery, new QueryWrapperFilter(new TermQuery(new Term("skill", "foosball"))), 1).TotalHits);
+
+ r.Dispose();
+ dir.Dispose();
+ }
+
+ [Test]
+ public void TestBugCausedByRewritingTwice()
+ {
+ Directory dir = NewDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(Random(), dir);
+
+ IList<Document> docs = new List<Document>();
+
+ for (int i = 0; i < 10; i++)
+ {
+ docs.Clear();
+ docs.Add(MakeJob("ruby", i));
+ docs.Add(MakeJob("java", 2007));
+ docs.Add(MakeResume("Frank", "United States"));
+ w.AddDocuments(docs);
+ }
+
+ IndexReader r = w.Reader;
+ w.Dispose();
+ IndexSearcher s = NewSearcher(r);
+
+ MultiTermQuery qc = NumericRangeQuery.NewIntRange("year", 2007, 2007, true, true);
+ // Hacky: this causes the query to need 2 rewrite
+ // iterations:
+ qc.SetRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE);
+
+ Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));
+
+ int h1 = qc.GetHashCode();
+ Query qw1 = qc.Rewrite(r);
+ int h2 = qw1.GetHashCode();
+ Query qw2 = qw1.Rewrite(r);
+ int h3 = qw2.GetHashCode();
+
+ assertTrue(h1 != h2);
+ assertTrue(h2 != h3);
+ assertTrue(h3 != h1);
+
+ ToParentBlockJoinQuery qp = new ToParentBlockJoinQuery(qc, parentsFilter, ScoreMode.Max);
+ ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(Sort.RELEVANCE, 10, true, true);
+
+ s.Search(qp, c);
+ TopGroups<int> groups = c.GetTopGroups(qp, Sort.INDEXORDER, 0, 10, 0, true);
+ foreach (GroupDocs<int> group in groups.Groups)
+ {
+ assertEquals(1, group.TotalHits);
+ }
+
+ r.Dispose();
+ dir.Dispose();
+ }
+
+ protected QueryWrapperFilter Skill(string skill)
+ {
+ return new QueryWrapperFilter(new TermQuery(new Term("skill", skill)));
+ }
+
+ [Test]
+ public virtual void TestSimpleFilter()
+ {
+ Directory dir = NewDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(Random(), dir);
+
+ IList<Document> docs = new List<Document>();
+ docs.Add(MakeJob("java", 2007));
+ docs.Add(MakeJob("python", 2010));
+ CollectionsHelper.Shuffle(docs);
+ docs.Add(MakeResume("Lisa", "United Kingdom"));
+
+ IList<Document> docs2 = new List<Document>();
+ docs2.Add(MakeJob("ruby", 2005));
+ docs2.Add(MakeJob("java", 2006));
+ CollectionsHelper.Shuffle(docs2);
+ docs2.Add(MakeResume("Frank", "United States"));
+
+ AddSkillless(w);
+ bool turn = Random().NextBoolean();
+ w.AddDocuments(turn ? docs : docs2);
+
+ AddSkillless(w);
+
+ w.AddDocuments(!turn ? docs : docs2);
+
+ AddSkillless(w);
+
+ IndexReader r = w.Reader;
+ w.Dispose();
+ IndexSearcher s = NewSearcher(r);
+
+ // Create a filter that defines "parent" documents in the index - in this case resumes
+ Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));
+
+ // Define child document criteria (finds an example of relevant work experience)
+ BooleanQuery childQuery = new BooleanQuery();
+ childQuery.Add(new BooleanClause(new TermQuery(new Term("skill", "java")), BooleanClause.Occur.MUST));
+ childQuery.Add(new BooleanClause(NumericRangeQuery.NewIntRange("year", 2006, 2011, true, true), BooleanClause.Occur.MUST));
+
+ // Define parent document criteria (find a resident in the UK)
+ Query parentQuery = new TermQuery(new Term("country", "United Kingdom"));
+
+ // Wrap the child document query to 'join' any matches
+ // up to corresponding parent:
+ ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);
+
+ assertEquals("no filter - both passed", 2, s.Search(childJoinQuery, 10).TotalHits);
+
+ assertEquals("dummy filter passes everyone ", 2, s.Search(childJoinQuery, parentsFilter, 10).TotalHits);
+ assertEquals("dummy filter passes everyone ", 2, s.Search(childJoinQuery, new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))), 10).TotalHits);
+
+ // not found test
+ assertEquals("noone live there", 0, s.Search(childJoinQuery, new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("country", "Oz")))), 1).TotalHits);
+ assertEquals("noone live there", 0, s.Search(childJoinQuery, new QueryWrapperFilter(new TermQuery(new Term("country", "Oz"))), 1).TotalHits);
+
+ // apply the UK filter by the searcher
+ TopDocs ukOnly = s.Search(childJoinQuery, new QueryWrapperFilter(parentQuery), 1);
+ assertEquals("has filter - single passed", 1, ukOnly.TotalHits);
+ assertEquals("Lisa", r.Document(ukOnly.ScoreDocs[0].Doc).Get("name"));
+
+ // looking for US candidates
+ TopDocs usThen = s.Search(childJoinQuery, new QueryWrapperFilter(new TermQuery(new Term("country", "United States"))), 1);
+ assertEquals("has filter - single passed", 1, usThen.TotalHits);
+ assertEquals("Frank", r.Document(usThen.ScoreDocs[0].Doc).Get("name"));
+
+
+ TermQuery us = new TermQuery(new Term("country", "United States"));
+ assertEquals("@ US we have java and ruby", 2, s.Search(new ToChildBlockJoinQuery(us, parentsFilter, Random().NextBoolean()), 10).TotalHits);
+
+ assertEquals("java skills in US", 1, s.Search(new ToChildBlockJoinQuery(us, parentsFilter, Random().NextBoolean()), Skill("java"), 10).TotalHits);
+
+ BooleanQuery rubyPython = new BooleanQuery();
+ rubyPython.Add(new TermQuery(new Term("skill", "ruby")), BooleanClause.Occur.SHOULD);
+ rubyPython.Add(new TermQuery(new Term("skill", "python")), BooleanClause.Occur.SHOULD);
+ assertEquals("ruby skills in US", 1, s.Search(new ToChildBlockJoinQuery(us, parentsFilter, Random().NextBoolean()), new QueryWrapperFilter(rubyPython), 10).TotalHits);
+
+ r.Dispose();
+ dir.Dispose();
+ }
+
+ private void AddSkillless(RandomIndexWriter w)
+ {
+ if (Random().NextBoolean())
+ {
+ w.AddDocument(MakeResume("Skillless", Random().NextBoolean() ? "United Kingdom" : "United States"));
+ }
+ }
+
+ private Document GetParentDoc(IndexReader reader, Filter parents, int childDocID)
+ {
+ IList<AtomicReaderContext> leaves = reader.Leaves;
+ int subIndex = ReaderUtil.SubIndex(childDocID, leaves);
+ AtomicReaderContext leaf = leaves[subIndex];
+ FixedBitSet bits = (FixedBitSet)parents.GetDocIdSet(leaf, null);
+ return leaf.AtomicReader.Document(bits.NextSetBit(childDocID - leaf.DocBase));
+ }
+
+ [Test]
+ public void TestBoostBug()
+ {
+ Directory dir = NewDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(Random(), dir);
+ IndexReader r = w.Reader;
+ w.Dispose();
+ IndexSearcher s = NewSearcher(r);
+
+ ToParentBlockJoinQuery q = new ToParentBlockJoinQuery(new MatchAllDocsQuery(), new QueryWrapperFilter(new MatchAllDocsQuery()), ScoreMode.Avg);
+ QueryUtils.Check(Random(), q, s);
+ s.Search(q, 10);
+ BooleanQuery bq = new BooleanQuery();
+ bq.Boost = 2f; // we boost the BQ
+ bq.Add(q, BooleanClause.Occur.MUST);
+ s.Search(bq, 10);
+ r.Dispose();
+ dir.Dispose();
+ }
+
+ [Test]
+ public void TestNestedDocScoringWithDeletes()
+ {
+ Directory dir = NewDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.COMPOUND_FILES));
+
+ // Cannot assert this since we use NoMergePolicy:
+ w.DoRandomForceMergeAssert = false;
+
+ IList<Document> docs = new List<Document>();
+ docs.Add(MakeJob("java", 2007));
+ docs.Add(MakeJob("python", 2010));
+ docs.Add(MakeResume("Lisa", "United Kingdom"));
+ w.AddDocuments(docs);
+
+ docs.Clear();
+ docs.Add(MakeJob("c", 1999));
+ docs.Add(MakeJob("ruby", 2005));
+ docs.Add(MakeJob("java", 2006));
+ docs.Add(MakeResume("Frank", "United States"));
+ w.AddDocuments(docs);
+
+ w.Commit();
+ IndexSearcher s = NewSearcher(DirectoryReader.Open(dir));
+
+ ToParentBlockJoinQuery q = new ToParentBlockJoinQuery(NumericRangeQuery.NewIntRange("year", 1990, 2010, true, true), new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume")))), ScoreMode.Total);
+
+ TopDocs topDocs = s.Search(q, 10);
+ assertEquals(2, topDocs.TotalHits);
+ assertEquals(6, topDocs.ScoreDocs[0].Doc);
+ assertEquals(3.0f, topDocs.ScoreDocs[0].Score, 0.0f);
+ assertEquals(2, topDocs.ScoreDocs[1].Doc);
+ assertEquals(2.0f, topDocs.ScoreDocs[1].Score, 0.0f);
+
+ s.IndexReader.Dispose();
+ w.DeleteDocuments(new Term("skill", "java"));
+ w.Dispose();
+ s = NewSearcher(DirectoryReader.Open(dir));
+
+ topDocs = s.Search(q, 10);
+ assertEquals(2, topDocs.TotalHits);
+ assertEquals(6, topDocs.ScoreDocs[0].Doc);
+ assertEquals(2.0f, topDocs.ScoreDocs[0].Score, 0.0f);
+ assertEquals(2, topDocs.ScoreDocs[1].Doc);
+ assertEquals(1.0f, topDocs.ScoreDocs[1].Score, 0.0f);
+
+ s.IndexReader.Dispose();
+ dir.Dispose();
+ }
+
+ private string[][] GetRandomFields(int maxUniqueValues)
+ {
+
+ string[][] fields = new string[TestUtil.NextInt(Random(), 2, 4)][];
+ for (int fieldID = 0; fieldID < fields.Length; fieldID++)
+ {
+ int valueCount;
+ if (fieldID == 0)
+ {
+ valueCount = 2;
+ }
+ else
+ {
+ valueCount = TestUtil.NextInt(Random(), 1, maxUniqueValues);
+ }
+
+ string[] values = fields[fieldID] = new string[valueCount];
+ for (int i = 0; i < valueCount; i++)
+ {
+ values[i] = TestUtil.RandomRealisticUnicodeString(Random());
+ //values[i] = TestUtil.randomSimpleString(random);
+ }
+ }
+
+ return fields;
+ }
+
+ private Term RandomParentTerm(string[] values)
+ {
+ return new Term("parent0", values[Random().Next(values.Length)]);
+ }
+
+ private Term RandomChildTerm(string[] values)
+ {
+ return new Term("child0", values[Random().Next(values.Length)]);
+ }
+
+ private Sort GetRandomSort(string prefix, int numFields)
+ {
+ IList<SortField> sortFields = new List<SortField>();
+ // TODO: sometimes sort by score; problem is scores are
+ // not comparable across the two indices
+ // sortFields.Add(SortField.FIELD_SCORE);
+ if (Random().NextBoolean())
+ {
+ sortFields.Add(new SortField(prefix + Random().Next(numFields), SortField.Type_e.STRING, Random().NextBoolean()));
+ }
+ else if (Random().NextBoolean())
+ {
+ sortFields.Add(new SortField(prefix + Random().Next(numFields), SortField.Type_e.STRING, Random().NextBoolean()));
+ sortFields.Add(new SortField(prefix + Random().Next(numFields), SortField.Type_e.STRING, Random().NextBoolean()));
+ }
+ // Break ties:
+ sortFields.Add(new SortField(prefix + "ID", SortField.Type_e.INT));
+ return new Sort(sortFields.ToArray());
+ }
+
+ [Test]
+ public void TestRandom()
+ {
+ // We build two indices at once: one normalized (which
+ // ToParentBlockJoinQuery/Collector,
+ // ToChildBlockJoinQuery can query) and the other w/
+ // the same docs, just fully denormalized:
+ Directory dir = NewDirectory();
+ Directory joinDir = NewDirectory();
+
+ int numParentDocs = TestUtil.NextInt(Random(), 100 * RANDOM_MULTIPLIER, 300 * RANDOM_MULTIPLIER);
+ //final int numParentDocs = 30;
+
+ // Values for parent fields:
+ string[][] parentFields = GetRandomFields(numParentDocs / 2);
+ // Values for child fields:
+ string[][] childFields = GetRandomFields(numParentDocs);
+
+ bool doDeletes = Random().NextBoolean();
+ IList<int> toDelete = new List<int>();
+
+ // TODO: parallel star join, nested join cases too!
+ RandomIndexWriter w = new RandomIndexWriter(Random(), dir);
+ RandomIndexWriter joinW = new RandomIndexWriter(Random(), joinDir);
+ for (int parentDocID = 0; parentDocID < numParentDocs; parentDocID++)
+ {
+ Document parentDoc = new Document();
+ Document parentJoinDoc = new Document();
+ Field id = NewStringField("parentID", "" + parentDocID, Field.Store.YES);
+ parentDoc.Add(id);
+ parentJoinDoc.Add(id);
+ parentJoinDoc.Add(NewStringField("isParent", "x", Field.Store.NO));
+ for (int field = 0; field < parentFields.Length; field++)
+ {
+ if (Random().NextDouble() < 0.9)
+ {
+ Field f = NewStringField("parent" + field, parentFields[field][Random().Next(parentFields[field].Length)], Field.Store.NO);
+ parentDoc.Add(f);
+ parentJoinDoc.Add(f);
+ }
+ }
+
+ if (doDeletes)
+ {
+ parentDoc.Add(NewStringField("blockID", "" + parentDocID, Field.Store.NO));
+ parentJoinDoc.Add(NewStringField("blockID", "" + parentDocID, Field.Store.NO));
+ }
+
+ IList<Document> joinDocs = new List<Document>();
+
+ if (VERBOSE)
+ {
+ StringBuilder sb = new StringBuilder();
+ sb.Append("parentID=").Append(parentDoc.Get("parentID"));
+ for (int fieldID = 0; fieldID < parentFields.Length; fieldID++)
+ {
+ string parent = parentDoc.Get("parent" + fieldID);
+ if (parent != null)
+ {
+ sb.Append(" parent" + fieldID + "=" + parent);
+ }
+ }
+ Console.WriteLine(" " + sb);
+ }
+
+ int numChildDocs = TestUtil.NextInt(Random(), 1, 20);
+ for (int childDocID = 0; childDocID < numChildDocs; childDocID++)
+ {
+ // Denormalize: copy all parent fields into child doc:
+ Document childDoc = TestUtil.CloneDocument(parentDoc);
+ Document joinChildDoc = new Document();
+ joinDocs.Add(joinChildDoc);
+
+ Field childID = NewStringField("childID", "" + childDocID, Field.Store.YES);
+ childDoc.Add(childID);
+ joinChildDoc.Add(childID);
+
+ for (int childFieldID = 0; childFieldID < childFields.Length; childFieldID++)
+ {
+ if (Random().NextDouble() < 0.9)
+ {
+ Field f = NewStringField("child" + childFieldID, childFields[childFieldID][Random().Next(childFields[childFieldID].Length)], Field.Store.NO);
+ childDoc.Add(f);
+ joinChildDoc.Add(f);
+ }
+ }
+
+ if (VERBOSE)
+ {
+ StringBuilder sb = new StringBuilder();
+ sb.Append("childID=").Append(joinChildDoc.Get("childID"));
+ for (int fieldID = 0; fieldID < childFields.Length; fieldID++)
+ {
+ string child = joinChildDoc.Get("child" + fieldID);
+ if (child != null)
+ {
+ sb.Append(" child" + fieldID + "=" + child);
+ }
+ }
+ Console.WriteLine(" " + sb);
+ }
+
+ if (doDeletes)
+ {
+ joinChildDoc.Add(NewStringField("blockID", "" + parentDocID, Field.Store.NO));
+ }
+
+ w.AddDocument(childDoc);
+ }
+
+ // Parent last:
+ joinDocs.Add(parentJoinDoc);
+ joinW.AddDocuments(joinDocs);
+
+ if (doDeletes && Random().Next(30) == 7)
+ {
+ toDelete.Add(parentDocID);
+ }
+ }
+
+ foreach (int deleteID in toDelete)
+ {
+ if (VERBOSE)
+ {
+ Console.WriteLine("DELETE parentID=" + deleteID);
+ }
+ w.DeleteDocuments(new Term("blockID", "" + deleteID));
+ joinW.DeleteDocuments(new Term("blockID", "" + deleteID));
+ }
+
+ IndexReader r = w.Reader;
+ w.Dispose();
+ IndexReader joinR = joinW.Reader;
+ joinW.Dispose();
+
+ if (VERBOSE)
+ {
+ Console.WriteLine("TEST: reader=" + r);
+ Console.WriteLine("TEST: joinReader=" + joinR);
+
+ for (int docIDX = 0; docIDX < joinR.MaxDoc; docIDX++)
+ {
+ Console.WriteLine(" docID=" + docIDX + " doc=" + joinR.Document(docIDX));
+ }
+ }
+
+ IndexSearcher s = NewSearcher(r);
+
+ IndexSearcher joinS = new IndexSearcher(joinR);
+
+ Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isParent", "x"))));
+
+ int iters = 200 * RANDOM_MULTIPLIER;
+
+ for (int iter = 0; iter < iters; iter++)
+ {
+ if (VERBOSE)
+ {
+ Console.WriteLine("TEST: iter=" + (1 + iter) + " of " + iters);
+ }
+
+ Query childQuery;
+ if (Random().Next(3) == 2)
+ {
+ int childFieldID = Random().Next(childFields.Length);
+ childQuery = new TermQuery(new Term("child" + childFieldID, childFields[childFieldID][Random().Next(childFields[childFieldID].Length)]));
+ }
+ else if (Random().Next(3) == 2)
+ {
+ BooleanQuery bq = new BooleanQuery();
+ childQuery = bq;
+ int numClauses = TestUtil.NextInt(Random(), 2, 4);
+ bool didMust = false;
+ for (int clauseIDX = 0; clauseIDX < numClauses; clauseIDX++)
+ {
+ Query clause;
+ BooleanClause.Occur occur;
+ if (!didMust && Random().NextBoolean())
+ {
+ occur = Random().NextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT;
+ clause = new TermQuery(RandomChildTerm(childFields[0]));
+ didMust = true;
+ }
+ else
+ {
+ occur = BooleanClause.Occur.SHOULD;
+ int childFieldID = TestUtil.NextInt(Random(), 1, childFields.Length - 1);
+ clause = new TermQuery(new Term("child" + childFieldID, childFields[childFieldID][Random().Next(childFields[childFieldID].Length)]));
+ }
+ bq.Add(clause, occur);
+ }
+ }
+ else
+ {
+ BooleanQuery bq = new BooleanQuery();
+ childQuery = bq;
+
+ bq.Add(new TermQuery(RandomChildTerm(childFields[0])), BooleanClause.Occur.MUST);
+ int childFieldID = TestUtil.NextInt(Random(), 1, childFields.Length - 1);
+ bq.Add(new TermQuery(new Term("child" + childFieldID, childFields[childFieldID][Random().Next(childFields[childFieldID].Length)])), Random().NextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT);
+ }
+
+ int x = Random().Next(4);
+ ScoreMode agg;
+ if (x == 0)
+ {
+ agg = ScoreMode.None;
+ }
+ else if (x == 1)
+ {
+ agg = ScoreMode.Max;
+ }
+ else if (x == 2)
+ {
+ agg = ScoreMode.Total;
+ }
+ else
+ {
+ agg = ScoreMode.Avg;
+ }
+
+ ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, agg);
+
+ // To run against the block-join index:
+ Query parentJoinQuery;
+
+ // Same query as parentJoinQuery, but to run against
+ // the fully denormalized index (so we can compare
+ // results):
+ Query parentQuery;
+
+ if (Random().NextBoolean())
+ {
+ parentQuery = childQuery;
+ parentJoinQuery = childJoinQuery;
+ }
+ else
+ {
+ // AND parent field w/ child field
+ BooleanQuery bq = new BooleanQuery();
+ parentJoinQuery = bq;
+ Term parentTerm = RandomParentTerm(parentFields[0]);
+ if (Random().NextBoolean())
+ {
+ bq.Add(childJoinQuery, BooleanClause.Occur.MUST);
+ bq.Add(new TermQuery(parentTerm), BooleanClause.Occur.MUST);
+ }
+ else
+ {
+ bq.Add(new TermQuery(parentTerm), BooleanClause.Occur.MUST);
+ bq.Add(childJoinQuery, BooleanClause.Occur.MUST);
+ }
+
+ BooleanQuery bq2 = new BooleanQuery();
+ parentQuery = bq2;
+ if (Random().NextBoolean())
+ {
+ bq2.Add(childQuery, BooleanClause.Occur.MUST);
+ bq2.Add(new TermQuery(parentTerm), BooleanClause.Occur.MUST);
+ }
+ else
+ {
+ bq2.Add(new TermQuery(parentTerm), BooleanClause.Occur.MUST);
+ bq2.Add(childQuery, BooleanClause.Occur.MUST);
+ }
+ }
+
+ Sort parentSort = GetRandomSort("parent", parentFields.Length);
+ Sort childSort = GetRandomSort("child", childFields.Length);
+
+ if (VERBOSE)
+ {
+ Console.WriteLine("\nTEST: query=" + parentQuery + " joinQuery=" + parentJoinQuery + " parentSort=" + parentSort + " childSort=" + childSort);
+ }
+
+ // Merge both sorts:
+ IList<SortField> sortFields = new List<SortField>(Arrays.AsList(parentSort.GetSort()));
+ sortFields.AddRange(Arrays.AsList(childSort.GetSort()));
+ Sort parentAndChildSort = new Sort(sortFields.ToArray());
+
+ TopDocs results = s.Search(parentQuery, null, r.NumDocs, parentAndChildSort);
+
+ if (VERBOSE)
+ {
+ Console.WriteLine("\nTEST: normal index gets " + results.TotalHits + " hits");
+ ScoreDoc[] hits = results.ScoreDocs;
+ for (int hitIDX = 0; hitIDX < hits.Length; hitIDX++)
+ {
+ Document doc = s.Doc(hits[hitIDX].Doc);
+ //System.out.println(" score=" + hits[hitIDX].Score + " parentID=" + doc.Get("parentID") + " childID=" + doc.Get("childID") + " (docID=" + hits[hitIDX].Doc + ")");
+ Console.WriteLine(" parentID=" + doc.Get("parentID") + " childID=" + doc.Get("childID") + " (docID=" + hits[hitIDX].Doc + ")");
+ FieldDoc fd = (FieldDoc)hits[hitIDX];
+ if (fd.Fields != null)
+ {
+ Console.Write(" ");
+ foreach (object o in fd.Fields)
+ {
+ if (o is BytesRef)
+ {
+ Console.Write(((BytesRef)o).Utf8ToString() + " ");
+ }
+ else
+ {
+ Console.Write(o + " ");
+ }
+ }
+ Console.WriteLine();
+ }
+ }
+ }
+
+ bool trackScores;
+ bool trackMaxScore;
+ if (agg == ScoreMode.None)
+ {
+ trackScores = false;
+ trackMaxScore = false;
+ }
+ else
+ {
+ trackScores = Random().NextBoolean();
+ trackMaxScore = Random().NextBoolean();
+ }
+ ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(parentSort, 10, trackScores, trackMaxScore);
+
+ joinS.Search(parentJoinQuery, c);
+
+ int hitsPerGroup = TestUtil.NextInt(Random(), 1, 20);
+ //final int hitsPerGroup = 100;
+ TopGroups<int> joinResults = c.GetTopGroups(childJoinQuery, childSort, 0, hitsPerGroup, 0, true);
+
+ if (VERBOSE)
+ {
+ Console.WriteLine("\nTEST: block join index gets " + (joinResults == null ? 0 : joinResults.Groups.Length) + " groups; hitsPerGroup=" + hitsPerGroup);
+ if (joinResults != null)
+ {
+ GroupDocs<int>[] groups = joinResults.Groups;
+ for (int groupIDX = 0; groupIDX < groups.Length; groupIDX++)
+ {
+ GroupDocs<int> group = groups[groupIDX];
+ if (group.GroupSortValues != null)
+ {
+ Console.Write(" ");
+ foreach (object o in group.GroupSortValues)
+ {
+ if (o is BytesRef)
+ {
+ Console.Write(((BytesRef)o).Utf8ToString() + " ");
+ }
+ else
+ {
+ Console.Write(o + " ");
+ }
+ }
+ Console.WriteLine();
+ }
+
+ assertNotNull(group.GroupValue);
+ Document parentDoc = joinS.Doc(group.GroupValue);
+ Console.WriteLine(" group parentID=" + parentDoc.Get("parentID") + " (docID=" + group.GroupValue + ")");
+ for (int hitIDX = 0; hitIDX < group.ScoreDocs.Length; hitIDX++)
+ {
+ Document doc = joinS.Doc(group.ScoreDocs[hitIDX].Doc);
+ //System.out.println(" score=" + group.ScoreDocs[hitIDX].Score + " childID=" + doc.Get("childID") + " (docID=" + group.ScoreDocs[hitIDX].Doc + ")");
+ Console.WriteLine(" childID=" + doc.Get("childID") + " child0=" + doc.Get("child0") + " (docID=" + group.ScoreDocs[hitIDX].Doc + ")");
+ }
+ }
+ }
+ }
+
+ if (results.TotalHits == 0)
+ {
+ assertNull(joinResults);
+ }
+ else
+ {
+ CompareHits(r, joinR, results, joinResults);
+ TopDocs b = joinS.Search(childJoinQuery, 10);
+ foreach (ScoreDoc hit in b.ScoreDocs)
+ {
+ Explanation explanation = joinS.Explain(childJoinQuery, hit.Doc);
+ Document document = joinS.Doc(hit.Doc - 1);
+ int childId = Convert.ToInt32(document.Get("childID"));
+ assertTrue(explanation.IsMatch);
+ assertEquals(hit.Score, explanation.Value, 0.0f);
+ assertEquals(string.Format("Score based on child doc range from {0} to {1}", hit.Doc - 1 - childId, hit.Doc - 1), explanation.Description);
+ }
+ }
+
+ // Test joining in the opposite direction (parent to
+ // child):
+
+ // Get random query against parent documents:
+ Query parentQuery2;
+ if (Random().Next(3) == 2)
+ {
+ int fieldID = Random().Next(parentFields.Length);
+ parentQuery2 = new TermQuery(new Term("parent" + fieldID, parentFields[fieldID][Random().Next(parentFields[fieldID].Length)]));
+ }
+ else if (Random().Next(3) == 2)
+ {
+ BooleanQuery bq = new BooleanQuery();
+ parentQuery2 = bq;
+ int numClauses = TestUtil.NextInt(Random(), 2, 4);
+ bool didMust = false;
+ for (int clauseIDX = 0; clauseIDX < numClauses; clauseIDX++)
+ {
+ Query clause;
+ BooleanClause.Occur occur;
+ if (!didMust && Random().NextBoolean())
+ {
+ occur = Random().NextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT;
+ clause = new TermQuery(RandomParentTerm(parentFields[0]));
+ didMust = true;
+ }
+ else
+ {
+ occur = BooleanClause.Occur.SHOULD;
+ int fieldID = TestUtil.NextInt(Random(), 1, parentFields.Length - 1);
+ clause = new TermQuery(new Term("parent" + fieldID, parentFields[fieldID][Random().Next(parentFields[fieldID].Length)]));
+ }
+ bq.Add(clause, occur);
+ }
+ }
+ else
+ {
+ BooleanQuery bq = new BooleanQuery();
+ parentQuery2 = bq;
+
+ bq.Add(new TermQuery(RandomParentTerm(parentFields[0])), BooleanClause.Occur.MUST);
+ int fieldID = TestUtil.NextInt(Random(), 1, parentFields.Length - 1);
+ bq.Add(new TermQuery(new Term("parent" + fieldID, parentFields[fieldID][Random().Next(parentFields[fieldID].Length)])), Random().NextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT);
+ }
+
+ if (VERBOSE)
+ {
+ Console.WriteLine("\nTEST: top down: parentQuery2=" + parentQuery2);
+ }
+
+ // Maps parent query to child docs:
+ ToChildBlockJoinQuery parentJoinQuery2 = new ToChildBlockJoinQuery(parentQuery2, parentsFilter, Random().NextBoolean());
+
+ // To run against the block-join index:
+ Query childJoinQuery2;
+
+ // Same query as parentJoinQuery, but to run against
+ // the fully denormalized index (so we can compare
+ // results):
+ Query childQuery2;
+
+ // apply a filter to children
+ Filter childFilter2, childJoinFilter2;
+
+ if (Random().NextBoolean())
+ {
+ childQuery2 = parentQuery2;
+ childJoinQuery2 = parentJoinQuery2;
+ childFilter2 = null;
+ childJoinFilter2 = null;
+ }
+ else
+ {
+ Term childTerm = RandomChildTerm(childFields[0]);
+ if (Random().NextBoolean()) // filtered case
+ {
+ childJoinQuery2 = parentJoinQuery2;
+ Filter f = new QueryWrapperFilter(new TermQuery(childTerm));
+ childJoinFilter2 = Random().NextBoolean() ? new FixedBitSetCachingWrapperFilter(f) : f;
+ }
+ else
+ {
+ childJoinFilter2 = null;
+ // AND child field w/ parent query:
+ BooleanQuery bq = new BooleanQuery();
+ childJoinQuery2 = bq;
+ if (Random().NextBoolean())
+ {
+ bq.Add(parentJoinQuery2, BooleanClause.Occur.MUST);
+ bq.Add(new TermQuery(childTerm), BooleanClause.Occur.MUST);
+ }
+ else
+ {
+ bq.Add(new TermQuery(childTerm), BooleanClause.Occur.MUST);
+ bq.Add(parentJoinQuery2, BooleanClause.Occur.MUST);
+ }
+ }
+
+ if (Random().NextBoolean()) // filtered case
+ {
+ childQuery2 = parentQuery2;
+ Filter f = new QueryWrapperFilter(new TermQuery(childTerm));
+ childFilter2 = Random().NextBoolean() ? new FixedBitSetCachingWrapperFilter(f) : f;
+ }
+ else
+ {
+ childFilter2 = null;
+ BooleanQuery bq2 = new BooleanQuery();
+ childQuery2 = bq2;
+ if (Random().NextBoolean())
+ {
+ bq2.Add(parentQuery2, BooleanClause.Occur.MUST);
+ bq2.Add(new TermQuery(childTerm), BooleanClause.Occur.MUST);
+ }
+ else
+ {
+ bq2.Add(new TermQuery(childTerm), BooleanClause.Occur.MUST);
+ bq2.Add(parentQuery2, BooleanClause.Occur.MUST);
+ }
+ }
+ }
+
+ Sort childSort2 = GetRandomSort("child", childFields.Length);
+
+ // Search denormalized index:
+ if (VERBOSE)
+ {
+ Console.WriteLine("TEST: run top down query=" + childQuery2 + " filter=" + childFilter2 + " sort=" + childSort2);
+ }
+ TopDocs results2 = s.Search(childQuery2, childFilter2, r.NumDocs, childSort2);
+ if (VERBOSE)
+ {
+ Console.WriteLine(" " + results2.TotalHits + " totalHits:");
+ foreach (ScoreDoc sd in results2.ScoreDocs)
+ {
+ Document doc = s.Doc(sd.Doc);
+ Console.WriteLine(" childID=" + doc.Get("childID") + " parentID=" + doc.Get("parentID") + " docID=" + sd.Doc);
+ }
+ }
+
+ // Search join index:
+ if (VERBOSE)
+ {
+ Console.WriteLine("TEST: run top down join query=" + childJoinQuery2 + " filter=" + childJoinFilter2 + " sort=" + childSort2);
+ }
+ TopDocs joinResults2 = joinS.Search(childJoinQuery2, childJoinFilter2, joinR.NumDocs, childSort2);
+ if (VERBOSE)
+ {
+ Console.WriteLine(" " + joinResults2.TotalHits + " totalHits:");
+ foreach (ScoreDoc sd in joinResults2.ScoreDocs)
+ {
+ Document doc = joinS.Doc(sd.Doc);
+ Document parentDoc = GetParentDoc(joinR, parentsFilter, sd.Doc);
+ Console.WriteLine(" childID=" + doc.Get("childID") + " parentID=" + parentDoc.Get("parentID") + " docID=" + sd.Doc);
+ }
+ }
+
+ CompareChildHits(r, joinR, results2, joinResults2);
+ }
+
+ r.Dispose();
+ joinR.Dispose();
+ dir.Dispose();
+ joinDir.Dispose();
+ }
+
+ private void CompareChildHits(IndexReader r, IndexReader joinR, TopDocs results, TopDocs joinResults)
+ {
+ assertEquals(results.TotalHits, joinResults.TotalHits);
+ assertEquals(results.ScoreDocs.Length, joinResults.ScoreDocs.Length);
+ for (int hitCount = 0; hitCount < results.ScoreDocs.Length; hitCount++)
+ {
+ ScoreDoc hit = results.ScoreDocs[hitCount];
+ ScoreDoc joinHit = joinResults.ScoreDocs[hitCount];
+ Document doc1 = r.Document(hit.Doc);
+ Document doc2 = joinR.Document(joinHit.Doc);
+ assertEquals("hit " + hitCount + " differs", doc1.Get("childID"), doc2.Get("childID"));
+ // don't compare scores -- they are expected to differ
+
+
+ assertTrue(hit is FieldDoc);
+ assertTrue(joinHit is FieldDoc);
+
+ FieldDoc hit0 = (FieldDoc)hit;
+ FieldDoc joinHit0 = (FieldDoc)joinHit;
+ assertArrayEquals(hit0.Fields, joinHit0.Fields);
+ }
+ }
+
+ private void CompareHits(IndexReader r, IndexReader joinR, TopDocs results, TopGroups<int> joinResults)
+ {
+ // results is 'complete'; joinResults is a subset
+ int resultUpto = 0;
+ int joinGroupUpto = 0;
+
+ ScoreDoc[] hits = results.ScoreDocs;
+ GroupDocs<int>[] groupDocs = joinResults.Groups;
+
+ while (joinGroupUpto < groupDocs.Length)
+ {
+ GroupDocs<int> group = groupDocs[joinGroupUpto++];
+ ScoreDoc[] groupHits = group.ScoreDocs;
+ assertNotNull(group.GroupValue);
+ Document parentDoc = joinR.Document(group.GroupValue);
+ string parentID = parentDoc.Get("parentID");
+ //System.out.println("GROUP groupDoc=" + group.groupDoc + " parent=" + parentDoc);
+ assertNotNull(parentID);
+ assertTrue(groupHits.Length > 0);
+ for (int hitIDX = 0; hitIDX < groupHits.Length; hitIDX++)
+ {
+ Document nonJoinHit = r.Document(hits[resultUpto++].Doc);
+ Document joinHit = joinR.Document(groupHits[hitIDX].Doc);
+ assertEquals(parentID, nonJoinHit.Get("parentID"));
+ assertEquals(joinHit.Get("childID"), nonJoinHit.Get("childID"));
+ }
+
+ if (joinGroupUpto < groupDocs.Length)
+ {
+ // Advance non-join hit to the next parentID:
+ //System.out.println(" next joingroupUpto=" + joinGroupUpto + " gd.Length=" + groupDocs.Length + " parentID=" + parentID);
+ while (true)
+ {
+ assertTrue(resultUpto < hits.Length);
+ if (!parentID.Equals(r.Document(hits[resultUpto].Doc).Get("parentID")))
+ {
+ break;
+ }
+ resultUpto++;
+ }
+ }
+ }
+ }
+
+ [Test]
+ public void TestMultiChildTypes()
+ {
+ Directory dir = NewDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(Random(), dir);
+
+ IList<Document> docs = new List<Document>();
+
+ docs.Add(MakeJob("java", 2007));
+ docs.Add(MakeJob("python", 2010));
+ docs.Add(MakeQualification("maths", 1999));
+ docs.Add(MakeResume("Lisa", "United Kingdom"));
+ w.AddDocuments(docs);
+
+ IndexReader r = w.Reader;
+ w.Dispose();
+ IndexSearcher s = NewSearcher(r);
+
+ // Create a filter that defines "parent" documents in the index - in this case resumes
+ Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));
+
+ // Define child document criteria (finds an example of relevant work experience)
+ BooleanQuery childJobQuery = new BooleanQuery();
+ childJobQuery.Add(new BooleanClause(new TermQuery(new Term("skill", "java")), BooleanClause.Occur.MUST));
+ childJobQuery.Add(new BooleanClause(NumericRangeQuery.NewIntRange("year", 2006, 2011, true, true), BooleanClause.Occur.MUST));
+
+ BooleanQuery childQualificationQuery = new BooleanQuery();
+ childQualificationQuery.Add(new BooleanClause(new TermQuery(new Term("qualification", "maths")), BooleanClause.Occur.MUST));
+ childQualificationQuery.Add(new BooleanClause(NumericRangeQuery.NewIntRange("year", 1980, 2000, true, true), BooleanClause.Occur.MUST));
+
+
+ // Define parent document criteria (find a resident in the UK)
+ Query parentQuery = new TermQuery(new Term("country", "United Kingdom"));
+
+ // Wrap the child document query to 'join' any matches
+ // up to corresponding parent:
+ ToParentBlockJoinQuery childJobJoinQuery = new ToParentBlockJoinQuery(childJobQuery, parentsFilter, ScoreMode.Avg);
+ ToParentBlockJoinQuery childQualificationJoinQuery = new ToParentBlockJoinQuery(childQualificationQuery, parentsFilter, ScoreMode.Avg);
+
+ // Combine the parent and nested child queries into a single query for a candidate
+ BooleanQuery fullQuery = new BooleanQuery();
+ fullQuery.Add(new BooleanClause(parentQuery, BooleanClause.Occur.MUST));
+ fullQuery.Add(new BooleanClause(childJobJoinQuery, BooleanClause.Occur.MUST));
+ fullQuery.Add(new BooleanClause(childQualificationJoinQuery, BooleanClause.Occur.MUST));
+
+ // Collects all job and qualification child docs for
+ // each resume hit in the top N (sorted by score):
+ ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(Sort.RELEVANCE, 10, true, false);
+
+ s.Search(fullQuery, c);
+
+ // Examine "Job" children
+ TopGroups<int> jobResults = c.GetTopGroups(childJobJoinQuery, null, 0, 10, 0, true);
+
+ //assertEquals(1, results.totalHitCount);
+ assertEquals(1, jobResults.TotalGroupedHitCount);
+ assertEquals(1, jobResults.Groups.Length);
+
+ GroupDocs<int> group = jobResults.Groups[0];
+ assertEquals(1, group.TotalHits);
+
+ Document childJobDoc = s.Doc(group.ScoreDocs[0].Doc);
+ //System.out.println(" doc=" + group.ScoreDocs[0].Doc);
+ assertEquals("java", childJobDoc.Get("skill"));
+ assertNotNull(group.GroupValue);
+ Document parentDoc = s.Doc(group.GroupValue);
+ assertEquals("Lisa", parentDoc.Get("name"));
+
+ // Now Examine qualification children
+ TopGroups<int> qualificationResults = c.GetTopGroups(childQualificationJoinQuery, null, 0, 10, 0, true);
+
+ assertEquals(1, qualificationResults.TotalGroupedHitCount);
+ assertEquals(1, qualificationResults.Groups.Length);
+
+ GroupDocs<int> qGroup = qualificationResults.Groups[0];
+ assertEquals(1, qGroup.TotalHits);
+
+ Document childQualificationDoc = s.Doc(qGroup.ScoreDocs[0].Doc);
+ assertEquals("maths", childQualificationDoc.Get("qualification"));
+ assertNotNull(qGroup.GroupValue);
+ parentDoc = s.Doc(qGroup.GroupValue);
+ assertEquals("Lisa", parentDoc.Get("name"));
+
+ r.Dispose();
+ dir.Dispose();
+ }
+
+ [Test]
+ public void TestAdvanceSingleParentSingleChild()
+ {
+ Directory dir = NewDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(Random(), dir);
+ Document childDoc = new Document();
+ childDoc.Add(NewStringField("child", "1", Field.Store.NO));
+ Document parentDoc = new Document();
+ parentDoc.Add(NewStringField("parent", "1", Field.Store.NO));
+ w.AddDocuments(Arrays.AsList(childDoc, parentDoc));
+ IndexReader r = w.Reader;
+ w.Dispose();
+ IndexSearcher s = NewSearcher(r);
+ Query tq = new TermQuery(new Term("child", "1"));
+ Filter parentFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("parent", "1"))));
+
+ ToParentBlockJoinQuery q = new ToParentBlockJoinQuery(tq, parentFilter, ScoreMode.Avg);
+ Weight weight = s.CreateNormalizedWeight(q);
+ DocIdSetIterator disi = weight.Scorer(s.IndexReader.Leaves.First(), null);
+ assertEquals(1, disi.Advance(1));
+ r.Dispose();
+ dir.Dispose();
+ }
+
+ [Test]
+ public void TestAdvanceSingleParentNoChild()
+ {
+ Directory dir = NewDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(new LogDocMergePolicy()));
+ Document parentDoc = new Document();
+ parentDoc.Add(NewStringField("parent", "1", Field.Store.NO));
+ parentDoc.Add(NewStringField("isparent", "yes", Field.Store.NO));
+ w.AddDocuments(Arrays.AsList(parentDoc));
+
+ // Add another doc so scorer is not null
+ parentDoc = new Document();
+ parentDoc.Add(NewStringField("parent", "2", Field.Store.NO));
+ parentDoc.Add(NewStringField("isparent", "yes", Field.Store.NO));
+ Document childDoc = new Document();
+ childDoc.Add(NewStringField("child", "2", Field.Store.NO));
+ w.AddDocuments(Arrays.AsList(childDoc, parentDoc));
+
+ // Need single seg:
+ w.ForceMerge(1);
+ IndexReader r = w.Reader;
+ w.Dispose();
+ IndexSearcher s = NewSearcher(r);
+ Query tq = new TermQuery(new Term("child", "2"));
+ Filter parentFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isparent", "yes"))));
+
+ ToParentBlockJoinQuery q = new ToParentBlockJoinQuery(tq, parentFilter, ScoreMode.Avg);
+ Weight weight = s.CreateNormalizedWeight(q);
+ DocIdSetIterator disi = weight.Scorer(s.IndexReader.Leaves.First(), null);
+ assertEquals(2, disi.Advance(0));
+ r.Dispose();
+ dir.Dispose();
+ }
+
+ [Test]
+ public void TestGetTopGroups()
+ {
+
+ Directory dir = NewDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(Random(), dir);
+
+ IList<Document> docs = new List<Document>();
+ docs.Add(MakeJob("ruby", 2005));
+ docs.Add(MakeJob("java", 2006));
+ docs.Add(MakeJob("java", 2010));
+ docs.Add(MakeJob("java", 2012));
+ CollectionsHelper.Shuffle(docs);
+ docs.Add(MakeResume("Frank", "United States"));
+
+ AddSkillless(w);
+ w.AddDocuments(docs);
+ AddSkillless(w);
+
+ IndexReader r = w.Reader;
+ w.Dispose();
+ IndexSearcher s = new IndexSearcher(r);
+
+ // Create a filter that defines "parent" documents in the index - in this case resumes
+ Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));
+
+ // Define child document criteria (finds an example of relevant work experience)
+ BooleanQuery childQuery = new BooleanQuery();
+ childQuery.Add(new BooleanClause(new TermQuery(new Term("skill", "java")), BooleanClause.Occur.MUST));
+ childQuery.Add(new BooleanClause(NumericRangeQuery.NewIntRange("year", 2006, 2011, true, true), BooleanClause.Occur.MUST));
+
+ // Wrap the child document query to 'join' any matches
+ // up to corresponding parent:
+ ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);
+
+ ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(Sort.RELEVANCE, 2, true, true);
+ s.Search(childJoinQuery, c);
+
+ //Get all child documents within groups
+ TopGroups<int>[] getTopGroupsResults = new TopGroups<int>[2];
+ getTopGroupsResults[0] = c.GetTopGroups(childJoinQuery, null, 0, 10, 0, true);
+ getTopGroupsResults[1] = c.GetTopGroupsWithAllChildDocs(childJoinQuery, null, 0, 0, true);
+
+ foreach (TopGroups<int> results in getTopGroupsResults)
+ {
+ assertFalse(float.IsNaN(results.MaxScore));
+ assertEquals(2, results.TotalGroupedHitCount);
+ assertEquals(1, results.Groups.Length);
+
+ GroupDocs<int> resultGroup = results.Groups[0];
+ assertEquals(2, resultGroup.TotalHits);
+ assertFalse(float.IsNaN(resultGroup.Score));
+ assertNotNull(resultGroup.GroupValue);
+ Document parentDocument = s.Doc(resultGroup.GroupValue);
+ assertEquals("Frank", parentDocument.Get("name"));
+
+ assertEquals(2, resultGroup.ScoreDocs.Length); //all matched child documents collected
+
+ foreach (ScoreDoc scoreDoc in resultGroup.ScoreDocs)
+ {
+ Document childDoc = s.Doc(scoreDoc.Doc);
+ assertEquals("java", childDoc.Get("skill"));
+ int year = Convert.ToInt32(childDoc.Get("year"));
+ assertTrue(year >= 2006 && year <= 2011);
+ }
+ }
+
+ //Get part of child documents
+ TopGroups<int> boundedResults = c.GetTopGroups(childJoinQuery, null, 0, 1, 0, true);
+ assertFalse(float.IsNaN(boundedResults.MaxScore));
+ assertEquals(2, boundedResults.TotalGroupedHitCount);
+ assertEquals(1, boundedResults.Groups.Length);
+
+ GroupDocs<int> group = boundedResults.Groups[0];
+ assertEquals(2, group.TotalHits);
+ assertFalse(float.IsNaN(group.Score));
+ assertNotNull(group.GroupValue);
+ Document parentDoc = s.Doc(group.GroupValue);
+ assertEquals("Frank", parentDoc.Get("name"));
+
+ assertEquals(1, group.ScoreDocs.Length); //not all matched child documents collected
+
+ foreach (ScoreDoc scoreDoc in group.ScoreDocs)
+ {
+ Document childDoc = s.Doc(scoreDoc.Doc);
+ assertEquals("java", childDoc.Get("skill"));
+ int year = Convert.ToInt32(childDoc.Get("year"));
+ assertTrue(year >= 2006 && year <= 2011);
+ }
+
+ r.Dispose();
+ dir.Dispose();
+ }
+
+ // LUCENE-4968
+ [Test]
+ public void TestSometimesParentOnlyMatches()
+ {
+ Directory d = NewDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(Random(), d);
+ Document parent = new Document();
+ parent.Add(new StoredField("parentID", "0"));
+ parent.Add(NewTextField("parentText", "text", Field.Store.NO));
+ parent.Add(NewStringField("isParent", "yes", Field.Store.NO));
+
+ IList<Document> docs = new List<Document>();
+
+ Document child = new Document();
+ docs.Add(child);
+ child.Add(new StoredField("childID", "0"));
+ child.Add(NewTextField("childText", "text", Field.Store.NO));
+
+ // parent last:
+ docs.Add(parent);
+ w.AddDocuments(docs);
+
+ docs.Clear();
+
+ parent = new Document();
+ parent.Add(NewTextField("parentText", "text", Field.Store.NO));
+ parent.Add(NewStringField("isParent", "yes", Field.Store.NO));
+ parent.Add(new StoredField("parentID", "1"));
+
+ // parent last:
+ docs.Add(parent);
+ w.AddDocuments(docs);
+
+ IndexReader r = w.Reader;
+ w.Dispose();
+
+ Query childQuery = new TermQuery(new Term("childText", "text"));
+ Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isParent", "yes"))));
+ ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);
+ BooleanQuery parentQuery = new BooleanQuery();
+ parentQuery.Add(childJoinQuery, BooleanClause.Occur.SHOULD);
+ parentQuery.Add(new TermQuery(new Term("parentText", "text")), BooleanClause.Occur.SHOULD);
+
+ ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(new Sort(new SortField("parentID", SortField.Type_e.STRING)), 10, true, true);
+ NewSearcher(r).Search(parentQuery, c);
+ TopGroups<int> groups = c.GetTopGroups(childJoinQuery, null, 0, 10, 0, false);
+
+ // Two parents:
+ assertEquals(2, (int)groups.TotalGroupCount);
+
+ // One child docs:
+ assertEquals(1, groups.TotalGroupedHitCount);
+
+ GroupDocs<int> group = groups.Groups[0];
+ Document doc = r.Document((int)group.GroupValue);
+ assertEquals("0", doc.Get("parentID"));
+
+ group = groups.Groups[1];
+ doc = r.Document((int)group.GroupValue);
+ assertEquals("1", doc.Get("parentID"));
+
+ r.Dispose();
+ d.Dispose();
+ }
+
+ // LUCENE-4968
+ [Test]
+ public void TestChildQueryNeverMatches()
+ {
+ Directory d = NewDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(Random(), d);
+ Document parent = new Document();
+ parent.Add(new StoredField("parentID", "0"));
+ parent.Add(NewTextField("parentText", "text", Field.Store.NO));
+ parent.Add(NewStringField("isParent", "yes", Field.Store.NO));
+
+ IList<Document> docs = new List<Document>();
+
+ Document child = new Document();
+ docs.Add(child);
+ child.Add(new StoredField("childID", "0"));
+ child.Add(NewTextField("childText", "text", Field.Store.NO));
+
+ // parent last:
+ docs.Add(parent);
+ w.AddDocuments(docs);
+
+ docs.Clear();
+
+ parent = new Document();
+ parent.Add(NewTextField("parentText", "text", Field.Store.NO));
+ parent.Add(NewStringField("isParent", "yes", Field.Store.NO));
+ parent.Add(new StoredField("parentID", "1"));
+
+ // parent last:
+ docs.Add(parent);
+ w.AddDocuments(docs);
+
+ IndexReader r = w.Reader;
+ w.Dispose();
+
+ // never matches:
+ Query childQuery = new TermQuery(new Term("childText", "bogus"));
+ Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isParent", "yes"))));
+ ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);
+ BooleanQuery parentQuery = new BooleanQuery();
+ parentQuery.Add(childJoinQuery, BooleanClause.Occur.SHOULD);
+ parentQuery.Add(new TermQuery(new Term("parentText", "text")), BooleanClause.Occur.SHOULD);
+
+ ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(new Sort(new SortField("parentID", SortField.Type_e.STRING)), 10, true, true);
+ NewSearcher(r).Search(parentQuery, c);
+ TopGroups<int> groups = c.GetTopGroups(childJoinQuery, null, 0, 10, 0, false);
+
+ // Two parents:
+ assertEquals(2, (int)groups.TotalGroupCount);
+
+ // One child docs:
+ assertEquals(0, groups.TotalGroupedHitCount);
+
+ GroupDocs<int> group = groups.Groups[0];
+ Document doc = r.Document((int)group.GroupValue);
+ assertEquals("0", doc.Get("parentID"));
+
+ group = groups.Groups[1];
+ doc = r.Document((int)group.GroupValue);
+ assertEquals("1", doc.Get("parentID"));
+
+ r.Dispose();
+ d.Dispose();
+ }
+
+ // LUCENE-4968
+ [Test]
+ public void TestChildQueryMatchesParent()
+ {
+ Directory d = NewDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(Random(), d);
+ Document parent = new Document();
+ parent.Add(new StoredField("parentID", "0"));
+ parent.Add(NewTextField("parentText", "text", Field.Store.NO));
+ parent.Add(NewStringField("isParent", "yes", Field.Store.NO));
+
+ IList<Document> docs = new List<Document>();
+
+ Document child = new Document();
+ docs.Add(child);
+ child.Add(new StoredField("childID", "0"));
+ child.Add(NewTextField("childText", "text", Field.Store.NO));
+
+ // parent last:
+ docs.Add(parent);
+ w.AddDocuments(docs);
+
+ docs.Clear();
+
+ parent = new Document();
+ parent.Add(NewTextField("parentText", "text", Field.Store.NO));
+ parent.Add(NewStringField("isParent", "yes", Field.Store.NO));
+ parent.Add(new StoredField("parentID", "1"));
+
+ // parent last:
+ docs.Add(parent);
+ w.AddDocuments(docs);
+
+ IndexReader r = w.Reader;
+ w.Dispose();
+
+ // illegally matches parent:
+ Query childQuery = new TermQuery(new Term("parentText", "text"));
+ Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isParent", "yes"))));
+ ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);
+ BooleanQuery parentQuery = new BooleanQuery();
+ parentQuery.Add(childJoinQuery, BooleanClause.Occur.SHOULD);
+ parentQuery.Add(new TermQuery(new Term("parentText", "text")), BooleanClause.Occur.SHOULD);
+
+ ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(new Sort(new SortField("parentID", SortField.Type_e.STRING)), 10, true, true);
+
+ try
+ {
+ NewSearcher(r).Search(parentQuery, c);
+ fail("should have hit exception");
+ }
+ catch (IllegalStateException ise)
+ {
+ // expected
+ }
+
+ r.Dispose();
+ d.Dispose();
+ }
+
+ [Test]
+ public void TestAdvanceSingleDeletedParentNoChild()
+ {
+ Directory dir = NewDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(Random(), dir);
+
+ // First doc with 1 children
+ Document parentDoc = new Document();
+ parentDoc.Add(NewStringField("parent", "1", Field.Store.NO));
+ parentDoc.Add(NewStringField("isparent", "yes", Field.Store.NO));
+ Document childDoc = new Document();
+ childDoc.Add(NewStringField("child", "1", Field.Store.NO));
+ w.AddDocuments(Arrays.AsList(childDoc, parentDoc));
+
+ parentDoc = new Document();
+ parentDoc.Add(NewStringField("parent", "2", Field.Store.NO));
+ parentDoc.Add(NewStringField("isparent", "yes", Field.Store.NO));
+ w.AddDocuments(Arrays.AsList(parentDoc));
+
+ w.DeleteDocuments(new Term("parent", "2"));
+
+ parentDoc = new Document();
+ parentDoc.Add(NewStringField("parent", "2", Field.Store.NO));
+ parentDoc.Add(NewStringField("isparent", "yes", Field.Store.NO));
+ childDoc = new Document();
+ childDoc.Add(NewStringField("child", "2", Field.Store.NO));
+ w.AddDocuments(Arrays.AsList(childDoc, parentDoc));
+
+ IndexReader r = w.Reader;
+ w.Dispose();
+ IndexSearcher s = NewSearcher(r);
+
+ // Create a filter that defines "parent" documents in the index - in this case resumes
+ Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isparent", "yes"))));
+
+ Query parentQuery = new TermQuery(new Term("parent", "2"));
+
+ ToChildBlockJoinQuery parentJoinQuery = new ToChildBlockJoinQuery(parentQuery, parentsFilter, Random().NextBoolean());
+ TopDocs topdocs = s.Search(parentJoinQuery, 3);
+ assertEquals(1, topdocs.TotalHits);
+
+ r.Dispose();
+ dir.Dispose();
+ }
}
}
\ No newline at end of file
[10/17] lucenenet git commit: Lucene.Net.Join tests now passing
Posted by sy...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/src/Lucene.Net.Tests.Join/TestBlockJoin.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Join/TestBlockJoin.cs b/src/Lucene.Net.Tests.Join/TestBlockJoin.cs
new file mode 100644
index 0000000..d43b2df
--- /dev/null
+++ b/src/Lucene.Net.Tests.Join/TestBlockJoin.cs
@@ -0,0 +1,1591 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using Lucene.Net.Analysis;
+using Lucene.Net.Documents;
+using Lucene.Net.Grouping;
+using Lucene.Net.Index;
+using Lucene.Net.Join;
+using Lucene.Net.Randomized;
+using Lucene.Net.Randomized.Generators;
+using Lucene.Net.Search;
+using Lucene.Net.Store;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using NUnit.Framework;
+
+namespace Lucene.Net.Tests.Join
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ public class TestBlockJoin : LuceneTestCase
+ {
+ // One resume...
+ private Document MakeResume(string name, string country)
+ {
+ Document resume = new Document();
+ resume.Add(NewStringField("docType", "resume", Field.Store.NO));
+ resume.Add(NewStringField("name", name, Field.Store.YES));
+ resume.Add(NewStringField("country", country, Field.Store.NO));
+ return resume;
+ }
+
+ // ... has multiple jobs
+ private Document MakeJob(string skill, int year)
+ {
+ Document job = new Document();
+ job.Add(NewStringField("skill", skill, Field.Store.YES));
+ job.Add(new IntField("year", year, Field.Store.NO));
+ job.Add(new StoredField("year", year));
+ return job;
+ }
+
+ // ... has multiple qualifications
+ private Document MakeQualification(string qualification, int year)
+ {
+ Document job = new Document();
+ job.Add(NewStringField("qualification", qualification, Field.Store.YES));
+ job.Add(new IntField("year", year, Field.Store.NO));
+ return job;
+ }
+
+ [Test]
+ public void TestEmptyChildFilter()
+ {
+ Directory dir = NewDirectory();
+ IndexWriterConfig config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
+ config.SetMergePolicy(NoMergePolicy.NO_COMPOUND_FILES);
+ // we don't want to merge - since we rely on certain segment setup
+ IndexWriter w = new IndexWriter(dir, config);
+
+ IList<Document> docs = new List<Document>();
+
+ docs.Add(MakeJob("java", 2007));
+ docs.Add(MakeJob("python", 2010));
+ docs.Add(MakeResume("Lisa", "United Kingdom"));
+ w.AddDocuments(docs);
+
+ docs.Clear();
+ docs.Add(MakeJob("ruby", 2005));
+ docs.Add(MakeJob("java", 2006));
+ docs.Add(MakeResume("Frank", "United States"));
+ w.AddDocuments(docs);
+ w.Commit();
+ int num = AtLeast(10); // produce a segment that doesn't have a value in the docType field
+ for (int i = 0; i < num; i++)
+ {
+ docs.Clear();
+ docs.Add(MakeJob("java", 2007));
+ w.AddDocuments(docs);
+ }
+
+ IndexReader r = DirectoryReader.Open(w, Random().NextBoolean());
+ w.Dispose();
+ assertTrue(r.Leaves.size() > 1);
+ IndexSearcher s = new IndexSearcher(r);
+ Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));
+
+ BooleanQuery childQuery = new BooleanQuery();
+ childQuery.Add(new BooleanClause(new TermQuery(new Term("skill", "java")), BooleanClause.Occur.MUST));
+ childQuery.Add(new BooleanClause(NumericRangeQuery.NewIntRange("year", 2006, 2011, true, true), BooleanClause.Occur.MUST));
+
+ ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);
+
+ BooleanQuery fullQuery = new BooleanQuery();
+ fullQuery.Add(new BooleanClause(childJoinQuery, BooleanClause.Occur.MUST));
+ fullQuery.Add(new BooleanClause(new MatchAllDocsQuery(), BooleanClause.Occur.MUST));
+ ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(Sort.RELEVANCE, 1, true, true);
+ s.Search(fullQuery, c);
+ TopGroups<int> results = c.GetTopGroups(childJoinQuery, null, 0, 10, 0, true);
+ assertFalse(float.IsNaN(results.MaxScore));
+ assertEquals(1, results.TotalGroupedHitCount);
+ assertEquals(1, results.Groups.Length);
+ GroupDocs<int> group = results.Groups[0];
+ Document childDoc = s.Doc(group.ScoreDocs[0].Doc);
+ assertEquals("java", childDoc.Get("skill"));
+ assertNotNull(group.GroupValue);
+ Document parentDoc = s.Doc(group.GroupValue);
+ assertEquals("Lisa", parentDoc.Get("name"));
+
+ r.Dispose();
+ dir.Dispose();
+ }
+
+ [Test]
+ public void TestSimple()
+ {
+ Directory dir = NewDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(Random(), dir);
+
+ IList<Document> docs = new List<Document>();
+
+ docs.Add(MakeJob("java", 2007));
+ docs.Add(MakeJob("python", 2010));
+ docs.Add(MakeResume("Lisa", "United Kingdom"));
+ w.AddDocuments(docs);
+
+ docs.Clear();
+ docs.Add(MakeJob("ruby", 2005));
+ docs.Add(MakeJob("java", 2006));
+ docs.Add(MakeResume("Frank", "United States"));
+ w.AddDocuments(docs);
+
+ IndexReader r = w.Reader;
+ w.Dispose();
+ IndexSearcher s = NewSearcher(r);
+
+ // Create a filter that defines "parent" documents in the index - in this case resumes
+ Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));
+
+ // Define child document criteria (finds an example of relevant work experience)
+ BooleanQuery childQuery = new BooleanQuery();
+ childQuery.Add(new BooleanClause(new TermQuery(new Term("skill", "java")), BooleanClause.Occur.MUST));
+ childQuery.Add(new BooleanClause(NumericRangeQuery.NewIntRange("year", 2006, 2011, true, true), BooleanClause.Occur.MUST));
+
+ // Define parent document criteria (find a resident in the UK)
+ Query parentQuery = new TermQuery(new Term("country", "United Kingdom"));
+
+ // Wrap the child document query to 'join' any matches
+ // up to corresponding parent:
+ ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);
+
+ // Combine the parent and nested child queries into a single query for a candidate
+ BooleanQuery fullQuery = new BooleanQuery();
+ fullQuery.Add(new BooleanClause(parentQuery, BooleanClause.Occur.MUST));
+ fullQuery.Add(new BooleanClause(childJoinQuery, BooleanClause.Occur.MUST));
+
+ ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(Sort.RELEVANCE, 1, true, true);
+
+ s.Search(fullQuery, c);
+
+ TopGroups<int> results = c.GetTopGroups(childJoinQuery, null, 0, 10, 0, true);
+ assertFalse(float.IsNaN(results.MaxScore));
+
+ //assertEquals(1, results.totalHitCount);
+ assertEquals(1, results.TotalGroupedHitCount);
+ assertEquals(1, results.Groups.Length);
+
+ GroupDocs<int> group = results.Groups[0];
+ assertEquals(1, group.TotalHits);
+ assertFalse(float.IsNaN(group.Score));
+
+ Document childDoc = s.Doc(group.ScoreDocs[0].Doc);
+ //System.out.println(" doc=" + group.ScoreDocs[0].Doc);
+ assertEquals("java", childDoc.Get("skill"));
+ assertNotNull(group.GroupValue);
+ Document parentDoc = s.Doc(group.GroupValue);
+ assertEquals("Lisa", parentDoc.Get("name"));
+
+
+ //System.out.println("TEST: now test up");
+
+ // Now join "up" (map parent hits to child docs) instead...:
+ ToChildBlockJoinQuery parentJoinQuery = new ToChildBlockJoinQuery(parentQuery, parentsFilter, Random().NextBoolean());
+ BooleanQuery fullChildQuery = new BooleanQuery();
+ fullChildQuery.Add(new BooleanClause(parentJoinQuery, BooleanClause.Occur.MUST));
+ fullChildQuery.Add(new BooleanClause(childQuery, BooleanClause.Occur.MUST));
+
+ //System.out.println("FULL: " + fullChildQuery);
+ TopDocs hits = s.Search(fullChildQuery, 10);
+ assertEquals(1, hits.TotalHits);
+ childDoc = s.Doc(hits.ScoreDocs[0].Doc);
+ //System.out.println("CHILD = " + childDoc + " docID=" + hits.ScoreDocs[0].Doc);
+ assertEquals("java", childDoc.Get("skill"));
+ assertEquals(2007, childDoc.GetField("year").NumericValue);
+ assertEquals("Lisa", GetParentDoc(r, parentsFilter, hits.ScoreDocs[0].Doc).Get("name"));
+
+ // Test with filter on child docs:
+ assertEquals(0, s.Search(fullChildQuery, new QueryWrapperFilter(new TermQuery(new Term("skill", "foosball"))), 1).TotalHits);
+
+ r.Dispose();
+ dir.Dispose();
+ }
+
+ [Test]
+ public void TestBugCausedByRewritingTwice()
+ {
+ Directory dir = NewDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(Random(), dir);
+
+ IList<Document> docs = new List<Document>();
+
+ for (int i = 0; i < 10; i++)
+ {
+ docs.Clear();
+ docs.Add(MakeJob("ruby", i));
+ docs.Add(MakeJob("java", 2007));
+ docs.Add(MakeResume("Frank", "United States"));
+ w.AddDocuments(docs);
+ }
+
+ IndexReader r = w.Reader;
+ w.Dispose();
+ IndexSearcher s = NewSearcher(r);
+
+ MultiTermQuery qc = NumericRangeQuery.NewIntRange("year", 2007, 2007, true, true);
+ // Hacky: this causes the query to need 2 rewrite
+ // iterations:
+ qc.SetRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE);
+
+ Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));
+
+ int h1 = qc.GetHashCode();
+ Query qw1 = qc.Rewrite(r);
+ int h2 = qw1.GetHashCode();
+ Query qw2 = qw1.Rewrite(r);
+ int h3 = qw2.GetHashCode();
+
+ assertTrue(h1 != h2);
+ assertTrue(h2 != h3);
+ assertTrue(h3 != h1);
+
+ ToParentBlockJoinQuery qp = new ToParentBlockJoinQuery(qc, parentsFilter, ScoreMode.Max);
+ ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(Sort.RELEVANCE, 10, true, true);
+
+ s.Search(qp, c);
+ TopGroups<int> groups = c.GetTopGroups(qp, Sort.INDEXORDER, 0, 10, 0, true);
+ foreach (GroupDocs<int> group in groups.Groups)
+ {
+ assertEquals(1, group.TotalHits);
+ }
+
+ r.Dispose();
+ dir.Dispose();
+ }
+
+ protected QueryWrapperFilter Skill(string skill)
+ {
+ return new QueryWrapperFilter(new TermQuery(new Term("skill", skill)));
+ }
+
+ [Test]
+ public virtual void TestSimpleFilter()
+ {
+ Directory dir = NewDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(Random(), dir);
+
+ IList<Document> docs = new List<Document>();
+ docs.Add(MakeJob("java", 2007));
+ docs.Add(MakeJob("python", 2010));
+ CollectionsHelper.Shuffle(docs);
+ docs.Add(MakeResume("Lisa", "United Kingdom"));
+
+ IList<Document> docs2 = new List<Document>();
+ docs2.Add(MakeJob("ruby", 2005));
+ docs2.Add(MakeJob("java", 2006));
+ CollectionsHelper.Shuffle(docs2);
+ docs2.Add(MakeResume("Frank", "United States"));
+
+ AddSkillless(w);
+ bool turn = Random().NextBoolean();
+ w.AddDocuments(turn ? docs : docs2);
+
+ AddSkillless(w);
+
+ w.AddDocuments(!turn ? docs : docs2);
+
+ AddSkillless(w);
+
+ IndexReader r = w.Reader;
+ w.Dispose();
+ IndexSearcher s = NewSearcher(r);
+
+ // Create a filter that defines "parent" documents in the index - in this case resumes
+ Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));
+
+ // Define child document criteria (finds an example of relevant work experience)
+ BooleanQuery childQuery = new BooleanQuery();
+ childQuery.Add(new BooleanClause(new TermQuery(new Term("skill", "java")), BooleanClause.Occur.MUST));
+ childQuery.Add(new BooleanClause(NumericRangeQuery.NewIntRange("year", 2006, 2011, true, true), BooleanClause.Occur.MUST));
+
+ // Define parent document criteria (find a resident in the UK)
+ Query parentQuery = new TermQuery(new Term("country", "United Kingdom"));
+
+ // Wrap the child document query to 'join' any matches
+ // up to corresponding parent:
+ ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);
+
+ assertEquals("no filter - both passed", 2, s.Search(childJoinQuery, 10).TotalHits);
+
+ assertEquals("dummy filter passes everyone ", 2, s.Search(childJoinQuery, parentsFilter, 10).TotalHits);
+ assertEquals("dummy filter passes everyone ", 2, s.Search(childJoinQuery, new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))), 10).TotalHits);
+
+ // not found test
+ assertEquals("noone live there", 0, s.Search(childJoinQuery, new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("country", "Oz")))), 1).TotalHits);
+ assertEquals("noone live there", 0, s.Search(childJoinQuery, new QueryWrapperFilter(new TermQuery(new Term("country", "Oz"))), 1).TotalHits);
+
+ // apply the UK filter by the searcher
+ TopDocs ukOnly = s.Search(childJoinQuery, new QueryWrapperFilter(parentQuery), 1);
+ assertEquals("has filter - single passed", 1, ukOnly.TotalHits);
+ assertEquals("Lisa", r.Document(ukOnly.ScoreDocs[0].Doc).Get("name"));
+
+ // looking for US candidates
+ TopDocs usThen = s.Search(childJoinQuery, new QueryWrapperFilter(new TermQuery(new Term("country", "United States"))), 1);
+ assertEquals("has filter - single passed", 1, usThen.TotalHits);
+ assertEquals("Frank", r.Document(usThen.ScoreDocs[0].Doc).Get("name"));
+
+
+ TermQuery us = new TermQuery(new Term("country", "United States"));
+ assertEquals("@ US we have java and ruby", 2, s.Search(new ToChildBlockJoinQuery(us, parentsFilter, Random().NextBoolean()), 10).TotalHits);
+
+ assertEquals("java skills in US", 1, s.Search(new ToChildBlockJoinQuery(us, parentsFilter, Random().NextBoolean()), Skill("java"), 10).TotalHits);
+
+ BooleanQuery rubyPython = new BooleanQuery();
+ rubyPython.Add(new TermQuery(new Term("skill", "ruby")), BooleanClause.Occur.SHOULD);
+ rubyPython.Add(new TermQuery(new Term("skill", "python")), BooleanClause.Occur.SHOULD);
+ assertEquals("ruby skills in US", 1, s.Search(new ToChildBlockJoinQuery(us, parentsFilter, Random().NextBoolean()), new QueryWrapperFilter(rubyPython), 10).TotalHits);
+
+ r.Dispose();
+ dir.Dispose();
+ }
+
+ private void AddSkillless(RandomIndexWriter w)
+ {
+ if (Random().NextBoolean())
+ {
+ w.AddDocument(MakeResume("Skillless", Random().NextBoolean() ? "United Kingdom" : "United States"));
+ }
+ }
+
+ private Document GetParentDoc(IndexReader reader, Filter parents, int childDocID)
+ {
+ IList<AtomicReaderContext> leaves = reader.Leaves;
+ int subIndex = ReaderUtil.SubIndex(childDocID, leaves);
+ AtomicReaderContext leaf = leaves[subIndex];
+ FixedBitSet bits = (FixedBitSet)parents.GetDocIdSet(leaf, null);
+ return leaf.AtomicReader.Document(bits.NextSetBit(childDocID - leaf.DocBase));
+ }
+
+ [Test]
+ public void TestBoostBug()
+ {
+ Directory dir = NewDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(Random(), dir);
+ IndexReader r = w.Reader;
+ w.Dispose();
+ IndexSearcher s = NewSearcher(r);
+
+ ToParentBlockJoinQuery q = new ToParentBlockJoinQuery(new MatchAllDocsQuery(), new QueryWrapperFilter(new MatchAllDocsQuery()), ScoreMode.Avg);
+ QueryUtils.Check(Random(), q, s);
+ s.Search(q, 10);
+ BooleanQuery bq = new BooleanQuery();
+ bq.Boost = 2f; // we boost the BQ
+ bq.Add(q, BooleanClause.Occur.MUST);
+ s.Search(bq, 10);
+ r.Dispose();
+ dir.Dispose();
+ }
+
+ [Test]
+ public void TestNestedDocScoringWithDeletes()
+ {
+ Directory dir = NewDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.COMPOUND_FILES));
+
+ // Cannot assert this since we use NoMergePolicy:
+ w.DoRandomForceMergeAssert = false;
+
+ IList<Document> docs = new List<Document>();
+ docs.Add(MakeJob("java", 2007));
+ docs.Add(MakeJob("python", 2010));
+ docs.Add(MakeResume("Lisa", "United Kingdom"));
+ w.AddDocuments(docs);
+
+ docs.Clear();
+ docs.Add(MakeJob("c", 1999));
+ docs.Add(MakeJob("ruby", 2005));
+ docs.Add(MakeJob("java", 2006));
+ docs.Add(MakeResume("Frank", "United States"));
+ w.AddDocuments(docs);
+
+ w.Commit();
+ IndexSearcher s = NewSearcher(DirectoryReader.Open(dir));
+
+ ToParentBlockJoinQuery q = new ToParentBlockJoinQuery(NumericRangeQuery.NewIntRange("year", 1990, 2010, true, true), new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume")))), ScoreMode.Total);
+
+ TopDocs topDocs = s.Search(q, 10);
+ assertEquals(2, topDocs.TotalHits);
+ assertEquals(6, topDocs.ScoreDocs[0].Doc);
+ assertEquals(3.0f, topDocs.ScoreDocs[0].Score, 0.0f);
+ assertEquals(2, topDocs.ScoreDocs[1].Doc);
+ assertEquals(2.0f, topDocs.ScoreDocs[1].Score, 0.0f);
+
+ s.IndexReader.Dispose();
+ w.DeleteDocuments(new Term("skill", "java"));
+ w.Dispose();
+ s = NewSearcher(DirectoryReader.Open(dir));
+
+ topDocs = s.Search(q, 10);
+ assertEquals(2, topDocs.TotalHits);
+ assertEquals(6, topDocs.ScoreDocs[0].Doc);
+ assertEquals(2.0f, topDocs.ScoreDocs[0].Score, 0.0f);
+ assertEquals(2, topDocs.ScoreDocs[1].Doc);
+ assertEquals(1.0f, topDocs.ScoreDocs[1].Score, 0.0f);
+
+ s.IndexReader.Dispose();
+ dir.Dispose();
+ }
+
+ private string[][] GetRandomFields(int maxUniqueValues)
+ {
+
+ string[][] fields = new string[TestUtil.NextInt(Random(), 2, 4)][];
+ for (int fieldID = 0; fieldID < fields.Length; fieldID++)
+ {
+ int valueCount;
+ if (fieldID == 0)
+ {
+ valueCount = 2;
+ }
+ else
+ {
+ valueCount = TestUtil.NextInt(Random(), 1, maxUniqueValues);
+ }
+
+ string[] values = fields[fieldID] = new string[valueCount];
+ for (int i = 0; i < valueCount; i++)
+ {
+ values[i] = TestUtil.RandomRealisticUnicodeString(Random());
+ //values[i] = TestUtil.randomSimpleString(random);
+ }
+ }
+
+ return fields;
+ }
+
+ private Term RandomParentTerm(string[] values)
+ {
+ return new Term("parent0", values[Random().Next(values.Length)]);
+ }
+
+ private Term RandomChildTerm(string[] values)
+ {
+ return new Term("child0", values[Random().Next(values.Length)]);
+ }
+
+ private Sort GetRandomSort(string prefix, int numFields)
+ {
+ IList<SortField> sortFields = new List<SortField>();
+ // TODO: sometimes sort by score; problem is scores are
+ // not comparable across the two indices
+ // sortFields.Add(SortField.FIELD_SCORE);
+ if (Random().NextBoolean())
+ {
+ sortFields.Add(new SortField(prefix + Random().Next(numFields), SortField.Type_e.STRING, Random().NextBoolean()));
+ }
+ else if (Random().NextBoolean())
+ {
+ sortFields.Add(new SortField(prefix + Random().Next(numFields), SortField.Type_e.STRING, Random().NextBoolean()));
+ sortFields.Add(new SortField(prefix + Random().Next(numFields), SortField.Type_e.STRING, Random().NextBoolean()));
+ }
+ // Break ties:
+ sortFields.Add(new SortField(prefix + "ID", SortField.Type_e.INT));
+ return new Sort(sortFields.ToArray());
+ }
+
+ [Test]
+ public void TestRandom()
+ {
+ // We build two indices at once: one normalized (which
+ // ToParentBlockJoinQuery/Collector,
+ // ToChildBlockJoinQuery can query) and the other w/
+ // the same docs, just fully denormalized:
+ Directory dir = NewDirectory();
+ Directory joinDir = NewDirectory();
+
+ int numParentDocs = TestUtil.NextInt(Random(), 100 * RANDOM_MULTIPLIER, 300 * RANDOM_MULTIPLIER);
+ //final int numParentDocs = 30;
+
+ // Values for parent fields:
+ string[][] parentFields = GetRandomFields(numParentDocs / 2);
+ // Values for child fields:
+ string[][] childFields = GetRandomFields(numParentDocs);
+
+ bool doDeletes = Random().NextBoolean();
+ IList<int> toDelete = new List<int>();
+
+ // TODO: parallel star join, nested join cases too!
+ RandomIndexWriter w = new RandomIndexWriter(Random(), dir);
+ RandomIndexWriter joinW = new RandomIndexWriter(Random(), joinDir);
+ for (int parentDocID = 0; parentDocID < numParentDocs; parentDocID++)
+ {
+ Document parentDoc = new Document();
+ Document parentJoinDoc = new Document();
+ Field id = NewStringField("parentID", "" + parentDocID, Field.Store.YES);
+ parentDoc.Add(id);
+ parentJoinDoc.Add(id);
+ parentJoinDoc.Add(NewStringField("isParent", "x", Field.Store.NO));
+ for (int field = 0; field < parentFields.Length; field++)
+ {
+ if (Random().NextDouble() < 0.9)
+ {
+ Field f = NewStringField("parent" + field, parentFields[field][Random().Next(parentFields[field].Length)], Field.Store.NO);
+ parentDoc.Add(f);
+ parentJoinDoc.Add(f);
+ }
+ }
+
+ if (doDeletes)
+ {
+ parentDoc.Add(NewStringField("blockID", "" + parentDocID, Field.Store.NO));
+ parentJoinDoc.Add(NewStringField("blockID", "" + parentDocID, Field.Store.NO));
+ }
+
+ IList<Document> joinDocs = new List<Document>();
+
+ if (VERBOSE)
+ {
+ StringBuilder sb = new StringBuilder();
+ sb.Append("parentID=").Append(parentDoc.Get("parentID"));
+ for (int fieldID = 0; fieldID < parentFields.Length; fieldID++)
+ {
+ string parent = parentDoc.Get("parent" + fieldID);
+ if (parent != null)
+ {
+ sb.Append(" parent" + fieldID + "=" + parent);
+ }
+ }
+ Console.WriteLine(" " + sb);
+ }
+
+ int numChildDocs = TestUtil.NextInt(Random(), 1, 20);
+ for (int childDocID = 0; childDocID < numChildDocs; childDocID++)
+ {
+ // Denormalize: copy all parent fields into child doc:
+ Document childDoc = TestUtil.CloneDocument(parentDoc);
+ Document joinChildDoc = new Document();
+ joinDocs.Add(joinChildDoc);
+
+ Field childID = NewStringField("childID", "" + childDocID, Field.Store.YES);
+ childDoc.Add(childID);
+ joinChildDoc.Add(childID);
+
+ for (int childFieldID = 0; childFieldID < childFields.Length; childFieldID++)
+ {
+ if (Random().NextDouble() < 0.9)
+ {
+ Field f = NewStringField("child" + childFieldID, childFields[childFieldID][Random().Next(childFields[childFieldID].Length)], Field.Store.NO);
+ childDoc.Add(f);
+ joinChildDoc.Add(f);
+ }
+ }
+
+ if (VERBOSE)
+ {
+ StringBuilder sb = new StringBuilder();
+ sb.Append("childID=").Append(joinChildDoc.Get("childID"));
+ for (int fieldID = 0; fieldID < childFields.Length; fieldID++)
+ {
+ string child = joinChildDoc.Get("child" + fieldID);
+ if (child != null)
+ {
+ sb.Append(" child" + fieldID + "=" + child);
+ }
+ }
+ Console.WriteLine(" " + sb);
+ }
+
+ if (doDeletes)
+ {
+ joinChildDoc.Add(NewStringField("blockID", "" + parentDocID, Field.Store.NO));
+ }
+
+ w.AddDocument(childDoc);
+ }
+
+ // Parent last:
+ joinDocs.Add(parentJoinDoc);
+ joinW.AddDocuments(joinDocs);
+
+ if (doDeletes && Random().Next(30) == 7)
+ {
+ toDelete.Add(parentDocID);
+ }
+ }
+
+ foreach (int deleteID in toDelete)
+ {
+ if (VERBOSE)
+ {
+ Console.WriteLine("DELETE parentID=" + deleteID);
+ }
+ w.DeleteDocuments(new Term("blockID", "" + deleteID));
+ joinW.DeleteDocuments(new Term("blockID", "" + deleteID));
+ }
+
+ IndexReader r = w.Reader;
+ w.Dispose();
+ IndexReader joinR = joinW.Reader;
+ joinW.Dispose();
+
+ if (VERBOSE)
+ {
+ Console.WriteLine("TEST: reader=" + r);
+ Console.WriteLine("TEST: joinReader=" + joinR);
+
+ for (int docIDX = 0; docIDX < joinR.MaxDoc; docIDX++)
+ {
+ Console.WriteLine(" docID=" + docIDX + " doc=" + joinR.Document(docIDX));
+ }
+ }
+
+ IndexSearcher s = NewSearcher(r);
+
+ IndexSearcher joinS = new IndexSearcher(joinR);
+
+ Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isParent", "x"))));
+
+ int iters = 200 * RANDOM_MULTIPLIER;
+
+ for (int iter = 0; iter < iters; iter++)
+ {
+ if (VERBOSE)
+ {
+ Console.WriteLine("TEST: iter=" + (1 + iter) + " of " + iters);
+ }
+
+ Query childQuery;
+ if (Random().Next(3) == 2)
+ {
+ int childFieldID = Random().Next(childFields.Length);
+ childQuery = new TermQuery(new Term("child" + childFieldID, childFields[childFieldID][Random().Next(childFields[childFieldID].Length)]));
+ }
+ else if (Random().Next(3) == 2)
+ {
+ BooleanQuery bq = new BooleanQuery();
+ childQuery = bq;
+ int numClauses = TestUtil.NextInt(Random(), 2, 4);
+ bool didMust = false;
+ for (int clauseIDX = 0; clauseIDX < numClauses; clauseIDX++)
+ {
+ Query clause;
+ BooleanClause.Occur occur;
+ if (!didMust && Random().NextBoolean())
+ {
+ occur = Random().NextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT;
+ clause = new TermQuery(RandomChildTerm(childFields[0]));
+ didMust = true;
+ }
+ else
+ {
+ occur = BooleanClause.Occur.SHOULD;
+ int childFieldID = TestUtil.NextInt(Random(), 1, childFields.Length - 1);
+ clause = new TermQuery(new Term("child" + childFieldID, childFields[childFieldID][Random().Next(childFields[childFieldID].Length)]));
+ }
+ bq.Add(clause, occur);
+ }
+ }
+ else
+ {
+ BooleanQuery bq = new BooleanQuery();
+ childQuery = bq;
+
+ bq.Add(new TermQuery(RandomChildTerm(childFields[0])), BooleanClause.Occur.MUST);
+ int childFieldID = TestUtil.NextInt(Random(), 1, childFields.Length - 1);
+ bq.Add(new TermQuery(new Term("child" + childFieldID, childFields[childFieldID][Random().Next(childFields[childFieldID].Length)])), Random().NextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT);
+ }
+
+ int x = Random().Next(4);
+ ScoreMode agg;
+ if (x == 0)
+ {
+ agg = ScoreMode.None;
+ }
+ else if (x == 1)
+ {
+ agg = ScoreMode.Max;
+ }
+ else if (x == 2)
+ {
+ agg = ScoreMode.Total;
+ }
+ else
+ {
+ agg = ScoreMode.Avg;
+ }
+
+ ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, agg);
+
+ // To run against the block-join index:
+ Query parentJoinQuery;
+
+ // Same query as parentJoinQuery, but to run against
+ // the fully denormalized index (so we can compare
+ // results):
+ Query parentQuery;
+
+ if (Random().NextBoolean())
+ {
+ parentQuery = childQuery;
+ parentJoinQuery = childJoinQuery;
+ }
+ else
+ {
+ // AND parent field w/ child field
+ BooleanQuery bq = new BooleanQuery();
+ parentJoinQuery = bq;
+ Term parentTerm = RandomParentTerm(parentFields[0]);
+ if (Random().NextBoolean())
+ {
+ bq.Add(childJoinQuery, BooleanClause.Occur.MUST);
+ bq.Add(new TermQuery(parentTerm), BooleanClause.Occur.MUST);
+ }
+ else
+ {
+ bq.Add(new TermQuery(parentTerm), BooleanClause.Occur.MUST);
+ bq.Add(childJoinQuery, BooleanClause.Occur.MUST);
+ }
+
+ BooleanQuery bq2 = new BooleanQuery();
+ parentQuery = bq2;
+ if (Random().NextBoolean())
+ {
+ bq2.Add(childQuery, BooleanClause.Occur.MUST);
+ bq2.Add(new TermQuery(parentTerm), BooleanClause.Occur.MUST);
+ }
+ else
+ {
+ bq2.Add(new TermQuery(parentTerm), BooleanClause.Occur.MUST);
+ bq2.Add(childQuery, BooleanClause.Occur.MUST);
+ }
+ }
+
+ Sort parentSort = GetRandomSort("parent", parentFields.Length);
+ Sort childSort = GetRandomSort("child", childFields.Length);
+
+ if (VERBOSE)
+ {
+ Console.WriteLine("\nTEST: query=" + parentQuery + " joinQuery=" + parentJoinQuery + " parentSort=" + parentSort + " childSort=" + childSort);
+ }
+
+ // Merge both sorts:
+ IList<SortField> sortFields = new List<SortField>(Arrays.AsList(parentSort.GetSort()));
+ sortFields.AddRange(Arrays.AsList(childSort.GetSort()));
+ Sort parentAndChildSort = new Sort(sortFields.ToArray());
+
+ TopDocs results = s.Search(parentQuery, null, r.NumDocs, parentAndChildSort);
+
+ if (VERBOSE)
+ {
+ Console.WriteLine("\nTEST: normal index gets " + results.TotalHits + " hits");
+ ScoreDoc[] hits = results.ScoreDocs;
+ for (int hitIDX = 0; hitIDX < hits.Length; hitIDX++)
+ {
+ Document doc = s.Doc(hits[hitIDX].Doc);
+ //System.out.println(" score=" + hits[hitIDX].Score + " parentID=" + doc.Get("parentID") + " childID=" + doc.Get("childID") + " (docID=" + hits[hitIDX].Doc + ")");
+ Console.WriteLine(" parentID=" + doc.Get("parentID") + " childID=" + doc.Get("childID") + " (docID=" + hits[hitIDX].Doc + ")");
+ FieldDoc fd = (FieldDoc)hits[hitIDX];
+ if (fd.Fields != null)
+ {
+ Console.Write(" ");
+ foreach (object o in fd.Fields)
+ {
+ if (o is BytesRef)
+ {
+ Console.Write(((BytesRef)o).Utf8ToString() + " ");
+ }
+ else
+ {
+ Console.Write(o + " ");
+ }
+ }
+ Console.WriteLine();
+ }
+ }
+ }
+
+ bool trackScores;
+ bool trackMaxScore;
+ if (agg == ScoreMode.None)
+ {
+ trackScores = false;
+ trackMaxScore = false;
+ }
+ else
+ {
+ trackScores = Random().NextBoolean();
+ trackMaxScore = Random().NextBoolean();
+ }
+ ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(parentSort, 10, trackScores, trackMaxScore);
+
+ joinS.Search(parentJoinQuery, c);
+
+ int hitsPerGroup = TestUtil.NextInt(Random(), 1, 20);
+ //final int hitsPerGroup = 100;
+ TopGroups<int> joinResults = c.GetTopGroups(childJoinQuery, childSort, 0, hitsPerGroup, 0, true);
+
+ if (VERBOSE)
+ {
+ Console.WriteLine("\nTEST: block join index gets " + (joinResults == null ? 0 : joinResults.Groups.Length) + " groups; hitsPerGroup=" + hitsPerGroup);
+ if (joinResults != null)
+ {
+ GroupDocs<int>[] groups = joinResults.Groups;
+ for (int groupIDX = 0; groupIDX < groups.Length; groupIDX++)
+ {
+ GroupDocs<int> group = groups[groupIDX];
+ if (group.GroupSortValues != null)
+ {
+ Console.Write(" ");
+ foreach (object o in group.GroupSortValues)
+ {
+ if (o is BytesRef)
+ {
+ Console.Write(((BytesRef)o).Utf8ToString() + " ");
+ }
+ else
+ {
+ Console.Write(o + " ");
+ }
+ }
+ Console.WriteLine();
+ }
+
+ assertNotNull(group.GroupValue);
+ Document parentDoc = joinS.Doc(group.GroupValue);
+ Console.WriteLine(" group parentID=" + parentDoc.Get("parentID") + " (docID=" + group.GroupValue + ")");
+ for (int hitIDX = 0; hitIDX < group.ScoreDocs.Length; hitIDX++)
+ {
+ Document doc = joinS.Doc(group.ScoreDocs[hitIDX].Doc);
+ //System.out.println(" score=" + group.ScoreDocs[hitIDX].Score + " childID=" + doc.Get("childID") + " (docID=" + group.ScoreDocs[hitIDX].Doc + ")");
+ Console.WriteLine(" childID=" + doc.Get("childID") + " child0=" + doc.Get("child0") + " (docID=" + group.ScoreDocs[hitIDX].Doc + ")");
+ }
+ }
+ }
+ }
+
+ if (results.TotalHits == 0)
+ {
+ assertNull(joinResults);
+ }
+ else
+ {
+ CompareHits(r, joinR, results, joinResults);
+ TopDocs b = joinS.Search(childJoinQuery, 10);
+ foreach (ScoreDoc hit in b.ScoreDocs)
+ {
+ Explanation explanation = joinS.Explain(childJoinQuery, hit.Doc);
+ Document document = joinS.Doc(hit.Doc - 1);
+ int childId = Convert.ToInt32(document.Get("childID"));
+ assertTrue(explanation.IsMatch);
+ assertEquals(hit.Score, explanation.Value, 0.0f);
+ assertEquals(string.Format("Score based on child doc range from {0} to {1}", hit.Doc - 1 - childId, hit.Doc - 1), explanation.Description);
+ }
+ }
+
+ // Test joining in the opposite direction (parent to
+ // child):
+
+ // Get random query against parent documents:
+ Query parentQuery2;
+ if (Random().Next(3) == 2)
+ {
+ int fieldID = Random().Next(parentFields.Length);
+ parentQuery2 = new TermQuery(new Term("parent" + fieldID, parentFields[fieldID][Random().Next(parentFields[fieldID].Length)]));
+ }
+ else if (Random().Next(3) == 2)
+ {
+ BooleanQuery bq = new BooleanQuery();
+ parentQuery2 = bq;
+ int numClauses = TestUtil.NextInt(Random(), 2, 4);
+ bool didMust = false;
+ for (int clauseIDX = 0; clauseIDX < numClauses; clauseIDX++)
+ {
+ Query clause;
+ BooleanClause.Occur occur;
+ if (!didMust && Random().NextBoolean())
+ {
+ occur = Random().NextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT;
+ clause = new TermQuery(RandomParentTerm(parentFields[0]));
+ didMust = true;
+ }
+ else
+ {
+ occur = BooleanClause.Occur.SHOULD;
+ int fieldID = TestUtil.NextInt(Random(), 1, parentFields.Length - 1);
+ clause = new TermQuery(new Term("parent" + fieldID, parentFields[fieldID][Random().Next(parentFields[fieldID].Length)]));
+ }
+ bq.Add(clause, occur);
+ }
+ }
+ else
+ {
+ BooleanQuery bq = new BooleanQuery();
+ parentQuery2 = bq;
+
+ bq.Add(new TermQuery(RandomParentTerm(parentFields[0])), BooleanClause.Occur.MUST);
+ int fieldID = TestUtil.NextInt(Random(), 1, parentFields.Length - 1);
+ bq.Add(new TermQuery(new Term("parent" + fieldID, parentFields[fieldID][Random().Next(parentFields[fieldID].Length)])), Random().NextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT);
+ }
+
+ if (VERBOSE)
+ {
+ Console.WriteLine("\nTEST: top down: parentQuery2=" + parentQuery2);
+ }
+
+ // Maps parent query to child docs:
+ ToChildBlockJoinQuery parentJoinQuery2 = new ToChildBlockJoinQuery(parentQuery2, parentsFilter, Random().NextBoolean());
+
+ // To run against the block-join index:
+ Query childJoinQuery2;
+
+ // Same query as parentJoinQuery, but to run against
+ // the fully denormalized index (so we can compare
+ // results):
+ Query childQuery2;
+
+ // apply a filter to children
+ Filter childFilter2, childJoinFilter2;
+
+ if (Random().NextBoolean())
+ {
+ childQuery2 = parentQuery2;
+ childJoinQuery2 = parentJoinQuery2;
+ childFilter2 = null;
+ childJoinFilter2 = null;
+ }
+ else
+ {
+ Term childTerm = RandomChildTerm(childFields[0]);
+ if (Random().NextBoolean()) // filtered case
+ {
+ childJoinQuery2 = parentJoinQuery2;
+ Filter f = new QueryWrapperFilter(new TermQuery(childTerm));
+ childJoinFilter2 = Random().NextBoolean() ? new FixedBitSetCachingWrapperFilter(f) : f;
+ }
+ else
+ {
+ childJoinFilter2 = null;
+ // AND child field w/ parent query:
+ BooleanQuery bq = new BooleanQuery();
+ childJoinQuery2 = bq;
+ if (Random().NextBoolean())
+ {
+ bq.Add(parentJoinQuery2, BooleanClause.Occur.MUST);
+ bq.Add(new TermQuery(childTerm), BooleanClause.Occur.MUST);
+ }
+ else
+ {
+ bq.Add(new TermQuery(childTerm), BooleanClause.Occur.MUST);
+ bq.Add(parentJoinQuery2, BooleanClause.Occur.MUST);
+ }
+ }
+
+ if (Random().NextBoolean()) // filtered case
+ {
+ childQuery2 = parentQuery2;
+ Filter f = new QueryWrapperFilter(new TermQuery(childTerm));
+ childFilter2 = Random().NextBoolean() ? new FixedBitSetCachingWrapperFilter(f) : f;
+ }
+ else
+ {
+ childFilter2 = null;
+ BooleanQuery bq2 = new BooleanQuery();
+ childQuery2 = bq2;
+ if (Random().NextBoolean())
+ {
+ bq2.Add(parentQuery2, BooleanClause.Occur.MUST);
+ bq2.Add(new TermQuery(childTerm), BooleanClause.Occur.MUST);
+ }
+ else
+ {
+ bq2.Add(new TermQuery(childTerm), BooleanClause.Occur.MUST);
+ bq2.Add(parentQuery2, BooleanClause.Occur.MUST);
+ }
+ }
+ }
+
+ Sort childSort2 = GetRandomSort("child", childFields.Length);
+
+ // Search denormalized index:
+ if (VERBOSE)
+ {
+ Console.WriteLine("TEST: run top down query=" + childQuery2 + " filter=" + childFilter2 + " sort=" + childSort2);
+ }
+ TopDocs results2 = s.Search(childQuery2, childFilter2, r.NumDocs, childSort2);
+ if (VERBOSE)
+ {
+ Console.WriteLine(" " + results2.TotalHits + " totalHits:");
+ foreach (ScoreDoc sd in results2.ScoreDocs)
+ {
+ Document doc = s.Doc(sd.Doc);
+ Console.WriteLine(" childID=" + doc.Get("childID") + " parentID=" + doc.Get("parentID") + " docID=" + sd.Doc);
+ }
+ }
+
+ // Search join index:
+ if (VERBOSE)
+ {
+ Console.WriteLine("TEST: run top down join query=" + childJoinQuery2 + " filter=" + childJoinFilter2 + " sort=" + childSort2);
+ }
+ TopDocs joinResults2 = joinS.Search(childJoinQuery2, childJoinFilter2, joinR.NumDocs, childSort2);
+ if (VERBOSE)
+ {
+ Console.WriteLine(" " + joinResults2.TotalHits + " totalHits:");
+ foreach (ScoreDoc sd in joinResults2.ScoreDocs)
+ {
+ Document doc = joinS.Doc(sd.Doc);
+ Document parentDoc = GetParentDoc(joinR, parentsFilter, sd.Doc);
+ Console.WriteLine(" childID=" + doc.Get("childID") + " parentID=" + parentDoc.Get("parentID") + " docID=" + sd.Doc);
+ }
+ }
+
+ CompareChildHits(r, joinR, results2, joinResults2);
+ }
+
+ r.Dispose();
+ joinR.Dispose();
+ dir.Dispose();
+ joinDir.Dispose();
+ }
+
+ private void CompareChildHits(IndexReader r, IndexReader joinR, TopDocs results, TopDocs joinResults)
+ {
+ assertEquals(results.TotalHits, joinResults.TotalHits);
+ assertEquals(results.ScoreDocs.Length, joinResults.ScoreDocs.Length);
+ for (int hitCount = 0; hitCount < results.ScoreDocs.Length; hitCount++)
+ {
+ ScoreDoc hit = results.ScoreDocs[hitCount];
+ ScoreDoc joinHit = joinResults.ScoreDocs[hitCount];
+ Document doc1 = r.Document(hit.Doc);
+ Document doc2 = joinR.Document(joinHit.Doc);
+ assertEquals("hit " + hitCount + " differs", doc1.Get("childID"), doc2.Get("childID"));
+ // don't compare scores -- they are expected to differ
+
+
+ assertTrue(hit is FieldDoc);
+ assertTrue(joinHit is FieldDoc);
+
+ FieldDoc hit0 = (FieldDoc)hit;
+ FieldDoc joinHit0 = (FieldDoc)joinHit;
+ assertArrayEquals(hit0.Fields, joinHit0.Fields);
+ }
+ }
+
+ private void CompareHits(IndexReader r, IndexReader joinR, TopDocs results, TopGroups<int> joinResults)
+ {
+ // results is 'complete'; joinResults is a subset
+ int resultUpto = 0;
+ int joinGroupUpto = 0;
+
+ ScoreDoc[] hits = results.ScoreDocs;
+ GroupDocs<int>[] groupDocs = joinResults.Groups;
+
+ while (joinGroupUpto < groupDocs.Length)
+ {
+ GroupDocs<int> group = groupDocs[joinGroupUpto++];
+ ScoreDoc[] groupHits = group.ScoreDocs;
+ assertNotNull(group.GroupValue);
+ Document parentDoc = joinR.Document(group.GroupValue);
+ string parentID = parentDoc.Get("parentID");
+ //System.out.println("GROUP groupDoc=" + group.groupDoc + " parent=" + parentDoc);
+ assertNotNull(parentID);
+ assertTrue(groupHits.Length > 0);
+ for (int hitIDX = 0; hitIDX < groupHits.Length; hitIDX++)
+ {
+ Document nonJoinHit = r.Document(hits[resultUpto++].Doc);
+ Document joinHit = joinR.Document(groupHits[hitIDX].Doc);
+ assertEquals(parentID, nonJoinHit.Get("parentID"));
+ assertEquals(joinHit.Get("childID"), nonJoinHit.Get("childID"));
+ }
+
+ if (joinGroupUpto < groupDocs.Length)
+ {
+ // Advance non-join hit to the next parentID:
+ //System.out.println(" next joingroupUpto=" + joinGroupUpto + " gd.Length=" + groupDocs.Length + " parentID=" + parentID);
+ while (true)
+ {
+ assertTrue(resultUpto < hits.Length);
+ if (!parentID.Equals(r.Document(hits[resultUpto].Doc).Get("parentID")))
+ {
+ break;
+ }
+ resultUpto++;
+ }
+ }
+ }
+ }
+
+ [Test]
+ public void TestMultiChildTypes()
+ {
+ Directory dir = NewDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(Random(), dir);
+
+ IList<Document> docs = new List<Document>();
+
+ docs.Add(MakeJob("java", 2007));
+ docs.Add(MakeJob("python", 2010));
+ docs.Add(MakeQualification("maths", 1999));
+ docs.Add(MakeResume("Lisa", "United Kingdom"));
+ w.AddDocuments(docs);
+
+ IndexReader r = w.Reader;
+ w.Dispose();
+ IndexSearcher s = NewSearcher(r);
+
+ // Create a filter that defines "parent" documents in the index - in this case resumes
+ Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));
+
+ // Define child document criteria (finds an example of relevant work experience)
+ BooleanQuery childJobQuery = new BooleanQuery();
+ childJobQuery.Add(new BooleanClause(new TermQuery(new Term("skill", "java")), BooleanClause.Occur.MUST));
+ childJobQuery.Add(new BooleanClause(NumericRangeQuery.NewIntRange("year", 2006, 2011, true, true), BooleanClause.Occur.MUST));
+
+ BooleanQuery childQualificationQuery = new BooleanQuery();
+ childQualificationQuery.Add(new BooleanClause(new TermQuery(new Term("qualification", "maths")), BooleanClause.Occur.MUST));
+ childQualificationQuery.Add(new BooleanClause(NumericRangeQuery.NewIntRange("year", 1980, 2000, true, true), BooleanClause.Occur.MUST));
+
+
+ // Define parent document criteria (find a resident in the UK)
+ Query parentQuery = new TermQuery(new Term("country", "United Kingdom"));
+
+ // Wrap the child document query to 'join' any matches
+ // up to corresponding parent:
+ ToParentBlockJoinQuery childJobJoinQuery = new ToParentBlockJoinQuery(childJobQuery, parentsFilter, ScoreMode.Avg);
+ ToParentBlockJoinQuery childQualificationJoinQuery = new ToParentBlockJoinQuery(childQualificationQuery, parentsFilter, ScoreMode.Avg);
+
+ // Combine the parent and nested child queries into a single query for a candidate
+ BooleanQuery fullQuery = new BooleanQuery();
+ fullQuery.Add(new BooleanClause(parentQuery, BooleanClause.Occur.MUST));
+ fullQuery.Add(new BooleanClause(childJobJoinQuery, BooleanClause.Occur.MUST));
+ fullQuery.Add(new BooleanClause(childQualificationJoinQuery, BooleanClause.Occur.MUST));
+
+ // Collects all job and qualification child docs for
+ // each resume hit in the top N (sorted by score):
+ ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(Sort.RELEVANCE, 10, true, false);
+
+ s.Search(fullQuery, c);
+
+ // Examine "Job" children
+ TopGroups<int> jobResults = c.GetTopGroups(childJobJoinQuery, null, 0, 10, 0, true);
+
+ //assertEquals(1, results.totalHitCount);
+ assertEquals(1, jobResults.TotalGroupedHitCount);
+ assertEquals(1, jobResults.Groups.Length);
+
+ GroupDocs<int> group = jobResults.Groups[0];
+ assertEquals(1, group.TotalHits);
+
+ Document childJobDoc = s.Doc(group.ScoreDocs[0].Doc);
+ //System.out.println(" doc=" + group.ScoreDocs[0].Doc);
+ assertEquals("java", childJobDoc.Get("skill"));
+ assertNotNull(group.GroupValue);
+ Document parentDoc = s.Doc(group.GroupValue);
+ assertEquals("Lisa", parentDoc.Get("name"));
+
+ // Now Examine qualification children
+ TopGroups<int> qualificationResults = c.GetTopGroups(childQualificationJoinQuery, null, 0, 10, 0, true);
+
+ assertEquals(1, qualificationResults.TotalGroupedHitCount);
+ assertEquals(1, qualificationResults.Groups.Length);
+
+ GroupDocs<int> qGroup = qualificationResults.Groups[0];
+ assertEquals(1, qGroup.TotalHits);
+
+ Document childQualificationDoc = s.Doc(qGroup.ScoreDocs[0].Doc);
+ assertEquals("maths", childQualificationDoc.Get("qualification"));
+ assertNotNull(qGroup.GroupValue);
+ parentDoc = s.Doc(qGroup.GroupValue);
+ assertEquals("Lisa", parentDoc.Get("name"));
+
+ r.Dispose();
+ dir.Dispose();
+ }
+
+ [Test]
+ public void TestAdvanceSingleParentSingleChild()
+ {
+ Directory dir = NewDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(Random(), dir);
+ Document childDoc = new Document();
+ childDoc.Add(NewStringField("child", "1", Field.Store.NO));
+ Document parentDoc = new Document();
+ parentDoc.Add(NewStringField("parent", "1", Field.Store.NO));
+ w.AddDocuments(Arrays.AsList(childDoc, parentDoc));
+ IndexReader r = w.Reader;
+ w.Dispose();
+ IndexSearcher s = NewSearcher(r);
+ Query tq = new TermQuery(new Term("child", "1"));
+ Filter parentFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("parent", "1"))));
+
+ ToParentBlockJoinQuery q = new ToParentBlockJoinQuery(tq, parentFilter, ScoreMode.Avg);
+ Weight weight = s.CreateNormalizedWeight(q);
+ DocIdSetIterator disi = weight.Scorer(s.IndexReader.Leaves.First(), null);
+ assertEquals(1, disi.Advance(1));
+ r.Dispose();
+ dir.Dispose();
+ }
+
+ [Test]
+ public void TestAdvanceSingleParentNoChild()
+ {
+ Directory dir = NewDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(new LogDocMergePolicy()));
+ Document parentDoc = new Document();
+ parentDoc.Add(NewStringField("parent", "1", Field.Store.NO));
+ parentDoc.Add(NewStringField("isparent", "yes", Field.Store.NO));
+ w.AddDocuments(Arrays.AsList(parentDoc));
+
+ // Add another doc so scorer is not null
+ parentDoc = new Document();
+ parentDoc.Add(NewStringField("parent", "2", Field.Store.NO));
+ parentDoc.Add(NewStringField("isparent", "yes", Field.Store.NO));
+ Document childDoc = new Document();
+ childDoc.Add(NewStringField("child", "2", Field.Store.NO));
+ w.AddDocuments(Arrays.AsList(childDoc, parentDoc));
+
+ // Need single seg:
+ w.ForceMerge(1);
+ IndexReader r = w.Reader;
+ w.Dispose();
+ IndexSearcher s = NewSearcher(r);
+ Query tq = new TermQuery(new Term("child", "2"));
+ Filter parentFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isparent", "yes"))));
+
+ ToParentBlockJoinQuery q = new ToParentBlockJoinQuery(tq, parentFilter, ScoreMode.Avg);
+ Weight weight = s.CreateNormalizedWeight(q);
+ DocIdSetIterator disi = weight.Scorer(s.IndexReader.Leaves.First(), null);
+ assertEquals(2, disi.Advance(0));
+ r.Dispose();
+ dir.Dispose();
+ }
+
+ [Test]
+ public void TestGetTopGroups()
+ {
+
+ Directory dir = NewDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(Random(), dir);
+
+ IList<Document> docs = new List<Document>();
+ docs.Add(MakeJob("ruby", 2005));
+ docs.Add(MakeJob("java", 2006));
+ docs.Add(MakeJob("java", 2010));
+ docs.Add(MakeJob("java", 2012));
+ CollectionsHelper.Shuffle(docs);
+ docs.Add(MakeResume("Frank", "United States"));
+
+ AddSkillless(w);
+ w.AddDocuments(docs);
+ AddSkillless(w);
+
+ IndexReader r = w.Reader;
+ w.Dispose();
+ IndexSearcher s = new IndexSearcher(r);
+
+ // Create a filter that defines "parent" documents in the index - in this case resumes
+ Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));
+
+ // Define child document criteria (finds an example of relevant work experience)
+ BooleanQuery childQuery = new BooleanQuery();
+ childQuery.Add(new BooleanClause(new TermQuery(new Term("skill", "java")), BooleanClause.Occur.MUST));
+ childQuery.Add(new BooleanClause(NumericRangeQuery.NewIntRange("year", 2006, 2011, true, true), BooleanClause.Occur.MUST));
+
+ // Wrap the child document query to 'join' any matches
+ // up to corresponding parent:
+ ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);
+
+ ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(Sort.RELEVANCE, 2, true, true);
+ s.Search(childJoinQuery, c);
+
+ //Get all child documents within groups
+ TopGroups<int>[] getTopGroupsResults = new TopGroups<int>[2];
+ getTopGroupsResults[0] = c.GetTopGroups(childJoinQuery, null, 0, 10, 0, true);
+ getTopGroupsResults[1] = c.GetTopGroupsWithAllChildDocs(childJoinQuery, null, 0, 0, true);
+
+ foreach (TopGroups<int> results in getTopGroupsResults)
+ {
+ assertFalse(float.IsNaN(results.MaxScore));
+ assertEquals(2, results.TotalGroupedHitCount);
+ assertEquals(1, results.Groups.Length);
+
+ GroupDocs<int> resultGroup = results.Groups[0];
+ assertEquals(2, resultGroup.TotalHits);
+ assertFalse(float.IsNaN(resultGroup.Score));
+ assertNotNull(resultGroup.GroupValue);
+ Document parentDocument = s.Doc(resultGroup.GroupValue);
+ assertEquals("Frank", parentDocument.Get("name"));
+
+ assertEquals(2, resultGroup.ScoreDocs.Length); //all matched child documents collected
+
+ foreach (ScoreDoc scoreDoc in resultGroup.ScoreDocs)
+ {
+ Document childDoc = s.Doc(scoreDoc.Doc);
+ assertEquals("java", childDoc.Get("skill"));
+ int year = Convert.ToInt32(childDoc.Get("year"));
+ assertTrue(year >= 2006 && year <= 2011);
+ }
+ }
+
+ //Get part of child documents
+ TopGroups<int> boundedResults = c.GetTopGroups(childJoinQuery, null, 0, 1, 0, true);
+ assertFalse(float.IsNaN(boundedResults.MaxScore));
+ assertEquals(2, boundedResults.TotalGroupedHitCount);
+ assertEquals(1, boundedResults.Groups.Length);
+
+ GroupDocs<int> group = boundedResults.Groups[0];
+ assertEquals(2, group.TotalHits);
+ assertFalse(float.IsNaN(group.Score));
+ assertNotNull(group.GroupValue);
+ Document parentDoc = s.Doc(group.GroupValue);
+ assertEquals("Frank", parentDoc.Get("name"));
+
+ assertEquals(1, group.ScoreDocs.Length); //not all matched child documents collected
+
+ foreach (ScoreDoc scoreDoc in group.ScoreDocs)
+ {
+ Document childDoc = s.Doc(scoreDoc.Doc);
+ assertEquals("java", childDoc.Get("skill"));
+ int year = Convert.ToInt32(childDoc.Get("year"));
+ assertTrue(year >= 2006 && year <= 2011);
+ }
+
+ r.Dispose();
+ dir.Dispose();
+ }
+
+ // LUCENE-4968
+ [Test]
+ public void TestSometimesParentOnlyMatches()
+ {
+ Directory d = NewDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(Random(), d);
+ Document parent = new Document();
+ parent.Add(new StoredField("parentID", "0"));
+ parent.Add(NewTextField("parentText", "text", Field.Store.NO));
+ parent.Add(NewStringField("isParent", "yes", Field.Store.NO));
+
+ IList<Document> docs = new List<Document>();
+
+ Document child = new Document();
+ docs.Add(child);
+ child.Add(new StoredField("childID", "0"));
+ child.Add(NewTextField("childText", "text", Field.Store.NO));
+
+ // parent last:
+ docs.Add(parent);
+ w.AddDocuments(docs);
+
+ docs.Clear();
+
+ parent = new Document();
+ parent.Add(NewTextField("parentText", "text", Field.Store.NO));
+ parent.Add(NewStringField("isParent", "yes", Field.Store.NO));
+ parent.Add(new StoredField("parentID", "1"));
+
+ // parent last:
+ docs.Add(parent);
+ w.AddDocuments(docs);
+
+ IndexReader r = w.Reader;
+ w.Dispose();
+
+ Query childQuery = new TermQuery(new Term("childText", "text"));
+ Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isParent", "yes"))));
+ ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);
+ BooleanQuery parentQuery = new BooleanQuery();
+ parentQuery.Add(childJoinQuery, BooleanClause.Occur.SHOULD);
+ parentQuery.Add(new TermQuery(new Term("parentText", "text")), BooleanClause.Occur.SHOULD);
+
+ ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(new Sort(new SortField("parentID", SortField.Type_e.STRING)), 10, true, true);
+ NewSearcher(r).Search(parentQuery, c);
+ TopGroups<int> groups = c.GetTopGroups(childJoinQuery, null, 0, 10, 0, false);
+
+ // Two parents:
+ assertEquals(2, (int)groups.TotalGroupCount);
+
+ // One child docs:
+ assertEquals(1, groups.TotalGroupedHitCount);
+
+ GroupDocs<int> group = groups.Groups[0];
+ Document doc = r.Document((int)group.GroupValue);
+ assertEquals("0", doc.Get("parentID"));
+
+ group = groups.Groups[1];
+ doc = r.Document((int)group.GroupValue);
+ assertEquals("1", doc.Get("parentID"));
+
+ r.Dispose();
+ d.Dispose();
+ }
+
+ // LUCENE-4968
+ [Test]
+ public void TestChildQueryNeverMatches()
+ {
+ Directory d = NewDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(Random(), d);
+ Document parent = new Document();
+ parent.Add(new StoredField("parentID", "0"));
+ parent.Add(NewTextField("parentText", "text", Field.Store.NO));
+ parent.Add(NewStringField("isParent", "yes", Field.Store.NO));
+
+ IList<Document> docs = new List<Document>();
+
+ Document child = new Document();
+ docs.Add(child);
+ child.Add(new StoredField("childID", "0"));
+ child.Add(NewTextField("childText", "text", Field.Store.NO));
+
+ // parent last:
+ docs.Add(parent);
+ w.AddDocuments(docs);
+
+ docs.Clear();
+
+ parent = new Document();
+ parent.Add(NewTextField("parentText", "text", Field.Store.NO));
+ parent.Add(NewStringField("isParent", "yes", Field.Store.NO));
+ parent.Add(new StoredField("parentID", "1"));
+
+ // parent last:
+ docs.Add(parent);
+ w.AddDocuments(docs);
+
+ IndexReader r = w.Reader;
+ w.Dispose();
+
+ // never matches:
+ Query childQuery = new TermQuery(new Term("childText", "bogus"));
+ Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isParent", "yes"))));
+ ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);
+ BooleanQuery parentQuery = new BooleanQuery();
+ parentQuery.Add(childJoinQuery, BooleanClause.Occur.SHOULD);
+ parentQuery.Add(new TermQuery(new Term("parentText", "text")), BooleanClause.Occur.SHOULD);
+
+ ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(new Sort(new SortField("parentID", SortField.Type_e.STRING)), 10, true, true);
+ NewSearcher(r).Search(parentQuery, c);
+ TopGroups<int> groups = c.GetTopGroups(childJoinQuery, null, 0, 10, 0, false);
+
+ // Two parents:
+ assertEquals(2, (int)groups.TotalGroupCount);
+
+ // One child docs:
+ assertEquals(0, groups.TotalGroupedHitCount);
+
+ GroupDocs<int> group = groups.Groups[0];
+ Document doc = r.Document((int)group.GroupValue);
+ assertEquals("0", doc.Get("parentID"));
+
+ group = groups.Groups[1];
+ doc = r.Document((int)group.GroupValue);
+ assertEquals("1", doc.Get("parentID"));
+
+ r.Dispose();
+ d.Dispose();
+ }
+
+ // LUCENE-4968
+ [Test]
+ public void TestChildQueryMatchesParent()
+ {
+ Directory d = NewDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(Random(), d);
+ Document parent = new Document();
+ parent.Add(new StoredField("parentID", "0"));
+ parent.Add(NewTextField("parentText", "text", Field.Store.NO));
+ parent.Add(NewStringField("isParent", "yes", Field.Store.NO));
+
+ IList<Document> docs = new List<Document>();
+
+ Document child = new Document();
+ docs.Add(child);
+ child.Add(new StoredField("childID", "0"));
+ child.Add(NewTextField("childText", "text", Field.Store.NO));
+
+ // parent last:
+ docs.Add(parent);
+ w.AddDocuments(docs);
+
+ docs.Clear();
+
+ parent = new Document();
+ parent.Add(NewTextField("parentText", "text", Field.Store.NO));
+ parent.Add(NewStringField("isParent", "yes", Field.Store.NO));
+ parent.Add(new StoredField("parentID", "1"));
+
+ // parent last:
+ docs.Add(parent);
+ w.AddDocuments(docs);
+
+ IndexReader r = w.Reader;
+ w.Dispose();
+
+ // illegally matches parent:
+ Query childQuery = new TermQuery(new Term("parentText", "text"));
+ Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isParent", "yes"))));
+ ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);
+ BooleanQuery parentQuery = new BooleanQuery();
+ parentQuery.Add(childJoinQuery, BooleanClause.Occur.SHOULD);
+ parentQuery.Add(new TermQuery(new Term("parentText", "text")), BooleanClause.Occur.SHOULD);
+
+ ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(new Sort(new SortField("parentID", SortField.Type_e.STRING)), 10, true, true);
+
+ Assert.Throws<InvalidOperationException>(() => NewSearcher(r).Search(parentQuery, c));
+
+ r.Dispose();
+ d.Dispose();
+ }
+
+ [Test]
+ public void TestAdvanceSingleDeletedParentNoChild()
+ {
+ Directory dir = NewDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(Random(), dir);
+
+ // First doc with 1 children
+ Document parentDoc = new Document();
+ parentDoc.Add(NewStringField("parent", "1", Field.Store.NO));
+ parentDoc.Add(NewStringField("isparent", "yes", Field.Store.NO));
+ Document childDoc = new Document();
+ childDoc.Add(NewStringField("child", "1", Field.Store.NO));
+ w.AddDocuments(Arrays.AsList(childDoc, parentDoc));
+
+ parentDoc = new Document();
+ parentDoc.Add(NewStringField("parent", "2", Field.Store.NO));
+ parentDoc.Add(NewStringField("isparent", "yes", Field.Store.NO));
+ w.AddDocuments(Arrays.AsList(parentDoc));
+
+ w.DeleteDocuments(new Term("parent", "2"));
+
+ parentDoc = new Document();
+ parentDoc.Add(NewStringField("parent", "2", Field.Store.NO));
+ parentDoc.Add(NewStringField("isparent", "yes", Field.Store.NO));
+ childDoc = new Document();
+ childDoc.Add(NewStringField("child", "2", Field.Store.NO));
+ w.AddDocuments(Arrays.AsList(childDoc, parentDoc));
+
+ IndexReader r = w.Reader;
+ w.Dispose();
+ IndexSearcher s = NewSearcher(r);
+
+ // Create a filter that defines "parent" documents in the index - in this case resumes
+ Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isparent", "yes"))));
+
+ Query parentQuery = new TermQuery(new Term("parent", "2"));
+
+ ToChildBlockJoinQuery parentJoinQuery = new ToChildBlockJoinQuery(parentQuery, parentsFilter, Random().NextBoolean());
+ TopDocs topdocs = s.Search(parentJoinQuery, 3);
+ assertEquals(1, topdocs.TotalHits);
+
+ r.Dispose();
+ dir.Dispose();
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/src/Lucene.Net.Tests.Join/TestBlockJoinSorting.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Join/TestBlockJoinSorting.cs b/src/Lucene.Net.Tests.Join/TestBlockJoinSorting.cs
new file mode 100644
index 0000000..c2df0df
--- /dev/null
+++ b/src/Lucene.Net.Tests.Join/TestBlockJoinSorting.cs
@@ -0,0 +1,277 @@
+using System.Collections.Generic;
+using Lucene.Net.Analysis;
+using Lucene.Net.Documents;
+using Lucene.Net.Index;
+using Lucene.Net.Join;
+using Lucene.Net.Randomized.Generators;
+using Lucene.Net.Search;
+using Lucene.Net.Store;
+using Lucene.Net.Util;
+using NUnit.Framework;
+
+namespace Lucene.Net.Tests.Join
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ public class TestBlockJoinSorting : LuceneTestCase
+ {
+ [Test]
+ public void TestNestedSorting()
+ {
+ Directory dir = NewDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.COMPOUND_FILES));
+
+ IList<Document> docs = new List<Document>();
+ Document document = new Document();
+ document.Add(new StringField("field2", "a", Field.Store.NO));
+ document.Add(new StringField("filter_1", "T", Field.Store.NO));
+ docs.Add(document);
+ document = new Document();
+ document.Add(new StringField("field2", "b", Field.Store.NO));
+ document.Add(new StringField("filter_1", "T", Field.Store.NO));
+ docs.Add(document);
+ document = new Document();
+ document.Add(new StringField("field2", "c", Field.Store.NO));
+ document.Add(new StringField("filter_1", "T", Field.Store.NO));
+ docs.Add(document);
+ document = new Document();
+ document.Add(new StringField("__type", "parent", Field.Store.NO));
+ document.Add(new StringField("field1", "a", Field.Store.NO));
+ docs.Add(document);
+ w.AddDocuments(docs);
+ w.Commit();
+
+ docs.Clear();
+ document = new Document();
+ document.Add(new StringField("field2", "c", Field.Store.NO));
+ document.Add(new StringField("filter_1", "T", Field.Store.NO));
+ docs.Add(document);
+ document = new Document();
+ document.Add(new StringField("field2", "d", Field.Store.NO));
+ document.Add(new StringField("filter_1", "T", Field.Store.NO));
+ docs.Add(document);
+ document = new Document();
+ document.Add(new StringField("field2", "e", Field.Store.NO));
+ document.Add(new StringField("filter_1", "T", Field.Store.NO));
+ docs.Add(document);
+ document = new Document();
+ document.Add(new StringField("__type", "parent", Field.Store.NO));
+ document.Add(new StringField("field1", "b", Field.Store.NO));
+ docs.Add(document);
+ w.AddDocuments(docs);
+
+ docs.Clear();
+ document = new Document();
+ document.Add(new StringField("field2", "e", Field.Store.NO));
+ document.Add(new StringField("filter_1", "T", Field.Store.NO));
+ docs.Add(document);
+ document = new Document();
+ document.Add(new StringField("field2", "f", Field.Store.NO));
+ document.Add(new StringField("filter_1", "T", Field.Store.NO));
+ docs.Add(document);
+ document = new Document();
+ document.Add(new StringField("field2", "g", Field.Store.NO));
+ document.Add(new StringField("filter_1", "T", Field.Store.NO));
+ docs.Add(document);
+ document = new Document();
+ document.Add(new StringField("__type", "parent", Field.Store.NO));
+ document.Add(new StringField("field1", "c", Field.Store.NO));
+ docs.Add(document);
+ w.AddDocuments(docs);
+
+ docs.Clear();
+ document = new Document();
+ document.Add(new StringField("field2", "g", Field.Store.NO));
+ document.Add(new StringField("filter_1", "T", Field.Store.NO));
+ docs.Add(document);
+ document = new Document();
+ document.Add(new StringField("field2", "h", Field.Store.NO));
+ document.Add(new StringField("filter_1", "F", Field.Store.NO));
+ docs.Add(document);
+ document = new Document();
+ document.Add(new StringField("field2", "i", Field.Store.NO));
+ document.Add(new StringField("filter_1", "F", Field.Store.NO));
+ docs.Add(document);
+ document = new Document();
+ document.Add(new StringField("__type", "parent", Field.Store.NO));
+ document.Add(new StringField("field1", "d", Field.Store.NO));
+ docs.Add(document);
+ w.AddDocuments(docs);
+ w.Commit();
+
+ docs.Clear();
+ document = new Document();
+ document.Add(new StringField("field2", "i", Field.Store.NO));
+ document.Add(new StringField("filter_1", "F", Field.Store.NO));
+ docs.Add(document);
+ document = new Document();
+ document.Add(new StringField("field2", "j", Field.Store.NO));
+ document.Add(new StringField("filter_1", "F", Field.Store.NO));
+ docs.Add(document);
+ document = new Document();
+ document.Add(new StringField("field2", "k", Field.Store.NO));
+ document.Add(new StringField("filter_1", "F", Field.Store.NO));
+ docs.Add(document);
+ document = new Document();
+ document.Add(new StringField("__type", "parent", Field.Store.NO));
+ document.Add(new StringField("field1", "f", Field.Store.NO));
+ docs.Add(document);
+ w.AddDocuments(docs);
+
+ docs.Clear();
+ document = new Document();
+ document.Add(new StringField("field2", "k", Field.Store.NO));
+ document.Add(new StringField("filter_1", "T", Field.Store.NO));
+ docs.Add(document);
+ document = new Document();
+ document.Add(new StringField("field2", "l", Field.Store.NO));
+ document.Add(new StringField("filter_1", "T", Field.Store.NO));
+ docs.Add(document);
+ document = new Document();
+ document.Add(new StringField("field2", "m", Field.Store.NO));
+ document.Add(new StringField("filter_1", "T", Field.Store.NO));
+ docs.Add(document);
+ document = new Document();
+ document.Add(new StringField("__type", "parent", Field.Store.NO));
+ document.Add(new StringField("field1", "g", Field.Store.NO));
+ docs.Add(document);
+ w.AddDocuments(docs);
+
+ // This doc will not be included, because it doesn't have nested docs
+ document = new Document();
+ document.Add(new StringField("__type", "parent", Field.Store.NO));
+ document.Add(new StringField("field1", "h", Field.Store.NO));
+ w.AddDocument(document);
+
+ docs.Clear();
+ document = new Document();
+ document.Add(new StringField("field2", "m", Field.Store.NO));
+ document.Add(new StringField("filter_1", "T", Field.Store.NO));
+ docs.Add(document);
+ document = new Document();
+ document.Add(new StringField("field2", "n", Field.Store.NO));
+ document.Add(new StringField("filter_1", "F", Field.Store.NO));
+ docs.Add(document);
+ document = new Document();
+ document.Add(new StringField("field2", "o", Field.Store.NO));
+ document.Add(new StringField("filter_1", "F", Field.Store.NO));
+ docs.Add(document);
+ document = new Document();
+ document.Add(new StringField("__type", "parent", Field.Store.NO));
+ document.Add(new StringField("field1", "i", Field.Store.NO));
+ docs.Add(document);
+ w.AddDocuments(docs);
+ w.Commit();
+
+ // Some garbage docs, just to check if the NestedFieldComparator can deal with this.
+ document = new Document();
+ document.Add(new StringField("fieldXXX", "x", Field.Store.NO));
+ w.AddDocument(document);
+ document = new Document();
+ document.Add(new StringField("fieldXXX", "x", Field.Store.NO));
+ w.AddDocument(document);
+ document = new Document();
+ document.Add(new StringField("fieldXXX", "x", Field.Store.NO));
+ w.AddDocument(document);
+
+ IndexSearcher searcher = new IndexSearcher(DirectoryReader.Open(w.w, false));
+ w.Dispose();
+ Filter parentFilter = new QueryWrapperFilter(new TermQuery(new Term("__type", "parent")));
+ Filter childFilter = new QueryWrapperFilter(new PrefixQuery(new Term("field2")));
+ ToParentBlockJoinQuery query = new ToParentBlockJoinQuery(new FilteredQuery(new MatchAllDocsQuery(), childFilter), new FixedBitSetCachingWrapperFilter(parentFilter), ScoreMode.None);
+
+ // Sort by field ascending, order first
+ ToParentBlockJoinSortField sortField = new ToParentBlockJoinSortField("field2", SortField.Type_e.STRING, false, Wrap(parentFilter), Wrap(childFilter));
+ Sort sort = new Sort(sortField);
+ TopFieldDocs topDocs = searcher.Search(query, 5, sort);
+ assertEquals(7, topDocs.TotalHits);
+ assertEquals(5, topDocs.ScoreDocs.Length);
+ assertEquals(3, topDocs.ScoreDocs[0].Doc);
+ assertEquals("a", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[0]).Fields[0]).Utf8ToString());
+ assertEquals(7, topDocs.ScoreDocs[1].Doc);
+ assertEquals("c", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[1]).Fields[0]).Utf8ToString());
+ assertEquals(11, topDocs.ScoreDocs[2].Doc);
+ assertEquals("e", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[2]).Fields[0]).Utf8ToString());
+ assertEquals(15, topDocs.ScoreDocs[3].Doc);
+ assertEquals("g", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[3]).Fields[0]).Utf8ToString());
+ assertEquals(19, topDocs.ScoreDocs[4].Doc);
+ assertEquals("i", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[4]).Fields[0]).Utf8ToString());
+
+ // Sort by field ascending, order last
+ sortField = new ToParentBlockJoinSortField("field2", SortField.Type_e.STRING, false, true, Wrap(parentFilter), Wrap(childFilter));
+ sort = new Sort(sortField);
+ topDocs = searcher.Search(query, 5, sort);
+ assertEquals(7, topDocs.TotalHits);
+ assertEquals(5, topDocs.ScoreDocs.Length);
+ assertEquals(3, topDocs.ScoreDocs[0].Doc);
+ assertEquals("c", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[0]).Fields[0]).Utf8ToString());
+ assertEquals(7, topDocs.ScoreDocs[1].Doc);
+ assertEquals("e", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[1]).Fields[0]).Utf8ToString());
+ assertEquals(11, topDocs.ScoreDocs[2].Doc);
+ assertEquals("g", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[2]).Fields[0]).Utf8ToString());
+ assertEquals(15, topDocs.ScoreDocs[3].Doc);
+ assertEquals("i", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[3]).Fields[0]).Utf8ToString());
+ assertEquals(19, topDocs.ScoreDocs[4].Doc);
+ assertEquals("k", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[4]).Fields[0]).Utf8ToString());
+
+ // Sort by field descending, order last
+ sortField = new ToParentBlockJoinSortField("field2", SortField.Type_e.STRING, true, Wrap(parentFilter), Wrap(childFilter));
+ sort = new Sort(sortField);
+ topDocs = searcher.Search(query, 5, sort);
+ assertEquals(topDocs.TotalHits, 7);
+ assertEquals(5, topDocs.ScoreDocs.Length);
+ assertEquals(28, topDocs.ScoreDocs[0].Doc);
+ assertEquals("o", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[0]).Fields[0]).Utf8ToString());
+ assertEquals(23, topDocs.ScoreDocs[1].Doc);
+ assertEquals("m", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[1]).Fields[0]).Utf8ToString());
+ assertEquals(19, topDocs.ScoreDocs[2].Doc);
+ assertEquals("k", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[2]).Fields[0]).Utf8ToString());
+ assertEquals(15, topDocs.ScoreDocs[3].Doc);
+ assertEquals("i", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[3]).Fields[0]).Utf8ToString());
+ assertEquals(11, topDocs.ScoreDocs[4].Doc);
+ assertEquals("g", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[4]).Fields[0]).Utf8ToString());
+
+ // Sort by field descending, order last, sort filter (filter_1:T)
+ childFilter = new QueryWrapperFilter(new TermQuery((new Term("filter_1", "T"))));
+ query = new ToParentBlockJoinQuery(new FilteredQuery(new MatchAllDocsQuery(), childFilter), new FixedBitSetCachingWrapperFilter(parentFilter), ScoreMode.None);
+ sortField = new ToParentBlockJoinSortField("field2", SortField.Type_e.STRING, true, Wrap(parentFilter), Wrap(childFilter));
+ sort = new Sort(sortField);
+ topDocs = searcher.Search(query, 5, sort);
+ assertEquals(6, topDocs.TotalHits);
+ assertEquals(5, topDocs.ScoreDocs.Length);
+ assertEquals(23, topDocs.ScoreDocs[0].Doc);
+ assertEquals("m", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[0]).Fields[0]).Utf8ToString());
+ assertEquals(28, topDocs.ScoreDocs[1].Doc);
+ assertEquals("m", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[1]).Fields[0]).Utf8ToString());
+ assertEquals(11, topDocs.ScoreDocs[2].Doc);
+ assertEquals("g", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[2]).Fields[0]).Utf8ToString());
+ assertEquals(15, topDocs.ScoreDocs[3].Doc);
+ assertEquals("g", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[3]).Fields[0]).Utf8ToString());
+ assertEquals(7, topDocs.ScoreDocs[4].Doc);
+ assertEquals("e", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[4]).Fields[0]).Utf8ToString());
+
+ searcher.IndexReader.Dispose();
+ dir.Dispose();
+ }
+
+ private Filter Wrap(Filter filter)
+ {
+ return Random().NextBoolean() ? new FixedBitSetCachingWrapperFilter(filter) : filter;
+ }
+ }
+}
\ No newline at end of file
[02/17] lucenenet git commit: Ported over the first batch of items
from the Join project
Posted by sy...@apache.org.
Ported over the first batch of items from the Join project
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/bd772f02
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/bd772f02
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/bd772f02
Branch: refs/heads/master
Commit: bd772f024f87a8e5b1115d5ce5ce07cd944fcdef
Parents: ecd2860
Author: Josh Sullivan <ja...@gmail.com>
Authored: Fri Aug 14 02:40:45 2015 -0400
Committer: Josh Sullivan <ja...@gmail.com>
Committed: Fri Aug 14 02:40:45 2015 -0400
----------------------------------------------------------------------
Lucene.Net.Join/FakeScorer.cs | 76 +++
.../FixedBitSetCachingWrapperFilter.cs | 62 +++
Lucene.Net.Join/JoinUtil.cs | 80 ++++
Lucene.Net.Join/Lucene.Net.Join.csproj | 66 +++
Lucene.Net.Join/Properties/AssemblyInfo.cs | 36 ++
Lucene.Net.Join/ScoreMode.cs | 45 ++
Lucene.Net.Join/TermsCollector.cs | 127 +++++
Lucene.Net.Join/TermsIncludingScoreQuery.cs | 472 +++++++++++++++++++
Lucene.Net.Join/TermsQuery.cs | 147 ++++++
Lucene.Net.Join/TermsWithScoreCollector.cs | 333 +++++++++++++
Lucene.Net.sln | 18 +-
11 files changed, 1460 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bd772f02/Lucene.Net.Join/FakeScorer.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Join/FakeScorer.cs b/Lucene.Net.Join/FakeScorer.cs
new file mode 100644
index 0000000..42bf91b
--- /dev/null
+++ b/Lucene.Net.Join/FakeScorer.cs
@@ -0,0 +1,76 @@
+using System;
+using System.Collections.Generic;
+using Lucene.Net.Search;
+
+namespace Lucene.Net.Join
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Passed to <see cref="Collector.Scorer"/> during join collection.
+ /// </summary>
+ internal sealed class FakeScorer : Scorer
+ {
+ internal float _score;
+ internal int doc = -1;
+
+ public FakeScorer() : base(null)
+ {
+ }
+
+ public override int DocID()
+ {
+ return doc;
+ }
+
+ public override int NextDoc()
+ {
+ throw new NotSupportedException("FakeScorer doesn't support NextDoc()");
+ }
+
+ public override int Advance(int target)
+ {
+ throw new NotSupportedException("FakeScorer doesn't support Advance(int)");
+ }
+
+ public override long Cost()
+ {
+ return 1;
+ }
+
+ public override int Freq()
+ {
+ throw new NotSupportedException("FakeScorer doesn't support Freq()");
+ }
+
+ public override float Score()
+ {
+ return _score;
+ }
+
+ public override Weight Weight
+ {
+ get { throw new NotSupportedException(); }
+ }
+
+ public override ICollection<ChildScorer> Children
+ {
+ get { throw new NotSupportedException(); }
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bd772f02/Lucene.Net.Join/FixedBitSetCachingWrapperFilter.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Join/FixedBitSetCachingWrapperFilter.cs b/Lucene.Net.Join/FixedBitSetCachingWrapperFilter.cs
new file mode 100644
index 0000000..da8b0b8
--- /dev/null
+++ b/Lucene.Net.Join/FixedBitSetCachingWrapperFilter.cs
@@ -0,0 +1,62 @@
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Join
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// A <see cref="CachingWrapperFilter"/> that caches sets using a <see cref="FixedBitSet"/>,
+ /// as required for joins.
+ /// </summary>
+ public sealed class FixedBitSetCachingWrapperFilter : CachingWrapperFilter
+ {
+ /// <summary>
+ /// Sole constructor, see <see cref="CachingWrapperFilter"/>.
+ /// </summary>
+ public FixedBitSetCachingWrapperFilter(Filter filter) : base(filter)
+ {
+ }
+
+ protected override DocIdSet DocIdSetToCache(DocIdSet docIdSet, AtomicReader reader)
+ {
+ if (docIdSet == null)
+ {
+ return EMPTY_DOCIDSET;
+ }
+
+ if (docIdSet is FixedBitSet)
+ {
+ // this is different from CachingWrapperFilter: even when the DocIdSet is
+ // cacheable, we convert it to a FixedBitSet since we require all the
+ // cached filters to be FixedBitSets
+ return docIdSet;
+ }
+
+ DocIdSetIterator it = docIdSet.GetIterator();
+ if (it == null)
+ {
+ return EMPTY_DOCIDSET;
+ }
+ FixedBitSet copy = new FixedBitSet(reader.MaxDoc);
+ copy.Or(it);
+ return copy;
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bd772f02/Lucene.Net.Join/JoinUtil.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Join/JoinUtil.cs b/Lucene.Net.Join/JoinUtil.cs
new file mode 100644
index 0000000..726731e
--- /dev/null
+++ b/Lucene.Net.Join/JoinUtil.cs
@@ -0,0 +1,80 @@
+using System.IO;
+using Lucene.Net.Search;
+
+namespace Lucene.Net.Join
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ /// <summary>
+ /// Utility for query time joining using TermsQuery and TermsCollector.
+ ///
+ /// @lucene.experimental
+ /// </summary>
+ public sealed class JoinUtil
+ {
+ // No instances allowed
+ private JoinUtil()
+ {
+ }
+
+ /// <summary>
+ /// Method for query time joining.
+ /// <p/>
+ /// Execute the returned query with a <seealso cref="IndexSearcher"/> to retrieve all documents that have the same terms in the
+ /// to field that match with documents matching the specified fromQuery and have the same terms in the from field.
+ /// <p/>
+ /// In the case a single document relates to more than one document the <code>multipleValuesPerDocument</code> option
+ /// should be set to true. When the <code>multipleValuesPerDocument</code> is set to <code>true</code> only the
+ /// the score from the first encountered join value originating from the 'from' side is mapped into the 'to' side.
+ /// Even in the case when a second join value related to a specific document yields a higher score. Obviously this
+ /// doesn't apply in the case that <seealso cref="ScoreMode.None"/> is used, since no scores are computed at all.
+ /// </p>
+ /// Memory considerations: During joining all unique join values are kept in memory. On top of that when the scoreMode
+ /// isn't set to <seealso cref="ScoreMode.None"/> a float value per unique join value is kept in memory for computing scores.
+ /// When scoreMode is set to <seealso cref="ScoreMode.Avg"/> also an additional integer value is kept in memory per unique
+ /// join value.
+ /// </summary>
+ /// <param name="fromField"> The from field to join from </param>
+ /// <param name="multipleValuesPerDocument"> Whether the from field has multiple terms per document </param>
+ /// <param name="toField"> The to field to join to </param>
+ /// <param name="fromQuery"> The query to match documents on the from side </param>
+ /// <param name="fromSearcher"> The searcher that executed the specified fromQuery </param>
+ /// <param name="scoreMode"> Instructs how scores from the fromQuery are mapped to the returned query </param>
+ /// <returns>A <see cref="Query"/> instance that can be used to join documents based on the terms in the from and to field</returns>
+ /// <exception cref="IOException"> If I/O related errors occur </exception>
+ public static Query CreateJoinQuery(string fromField, bool multipleValuesPerDocument, string toField, Query fromQuery, IndexSearcher fromSearcher, ScoreMode scoreMode)
+ {
+ switch (scoreMode)
+ {
+ case ScoreMode.None:
+ TermsCollector termsCollector = TermsCollector.Create(fromField, multipleValuesPerDocument);
+ fromSearcher.Search(fromQuery, termsCollector);
+ return new TermsQuery(toField, fromQuery, termsCollector.CollectorTerms);
+ case ScoreMode.Total:
+ case ScoreMode.Max:
+ case ScoreMode.Avg:
+ TermsWithScoreCollector termsWithScoreCollector = TermsWithScoreCollector.Create(fromField, multipleValuesPerDocument, scoreMode);
+ fromSearcher.Search(fromQuery, termsWithScoreCollector);
+ return new TermsIncludingScoreQuery(toField, multipleValuesPerDocument, termsWithScoreCollector.CollectedTerms, termsWithScoreCollector.ScoresPerTerm, fromQuery);
+ default:
+ throw new System.ArgumentException(string.Format("Score mode {0} isn't supported.", scoreMode));
+ }
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bd772f02/Lucene.Net.Join/Lucene.Net.Join.csproj
----------------------------------------------------------------------
diff --git a/Lucene.Net.Join/Lucene.Net.Join.csproj b/Lucene.Net.Join/Lucene.Net.Join.csproj
new file mode 100644
index 0000000..b38303e
--- /dev/null
+++ b/Lucene.Net.Join/Lucene.Net.Join.csproj
@@ -0,0 +1,66 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
+ <PropertyGroup>
+ <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+ <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+ <ProjectGuid>{E8A339C7-FCF6-4A72-8586-56D8961D7B99}</ProjectGuid>
+ <OutputType>Library</OutputType>
+ <AppDesignerFolder>Properties</AppDesignerFolder>
+ <RootNamespace>Lucene.Net.Join</RootNamespace>
+ <AssemblyName>Lucene.Net.Join</AssemblyName>
+ <TargetFrameworkVersion>v4.5.1</TargetFrameworkVersion>
+ <FileAlignment>512</FileAlignment>
+ </PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
+ <DebugSymbols>true</DebugSymbols>
+ <DebugType>full</DebugType>
+ <Optimize>false</Optimize>
+ <OutputPath>bin\Debug\</OutputPath>
+ <DefineConstants>DEBUG;TRACE</DefineConstants>
+ <ErrorReport>prompt</ErrorReport>
+ <WarningLevel>4</WarningLevel>
+ </PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
+ <DebugType>pdbonly</DebugType>
+ <Optimize>true</Optimize>
+ <OutputPath>bin\Release\</OutputPath>
+ <DefineConstants>TRACE</DefineConstants>
+ <ErrorReport>prompt</ErrorReport>
+ <WarningLevel>4</WarningLevel>
+ </PropertyGroup>
+ <ItemGroup>
+ <Reference Include="System" />
+ <Reference Include="System.Core" />
+ <Reference Include="System.Xml.Linq" />
+ <Reference Include="System.Data.DataSetExtensions" />
+ <Reference Include="Microsoft.CSharp" />
+ <Reference Include="System.Data" />
+ <Reference Include="System.Net.Http" />
+ <Reference Include="System.Xml" />
+ </ItemGroup>
+ <ItemGroup>
+ <Compile Include="FakeScorer.cs" />
+ <Compile Include="FixedBitSetCachingWrapperFilter.cs" />
+ <Compile Include="JoinUtil.cs" />
+ <Compile Include="Properties\AssemblyInfo.cs" />
+ <Compile Include="ScoreMode.cs" />
+ <Compile Include="TermsCollector.cs" />
+ <Compile Include="TermsIncludingScoreQuery.cs" />
+ <Compile Include="TermsQuery.cs" />
+ </ItemGroup>
+ <ItemGroup>
+ <ProjectReference Include="..\src\Lucene.Net.Core\Lucene.Net.csproj">
+ <Project>{5D4AD9BE-1FFB-41AB-9943-25737971BF57}</Project>
+ <Name>Lucene.Net</Name>
+ </ProjectReference>
+ </ItemGroup>
+ <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
+ <!-- To modify your build process, add your task inside one of the targets below and uncomment it.
+ Other similar extension points exist, see Microsoft.Common.targets.
+ <Target Name="BeforeBuild">
+ </Target>
+ <Target Name="AfterBuild">
+ </Target>
+ -->
+</Project>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bd772f02/Lucene.Net.Join/Properties/AssemblyInfo.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Join/Properties/AssemblyInfo.cs b/Lucene.Net.Join/Properties/AssemblyInfo.cs
new file mode 100644
index 0000000..2c17c13
--- /dev/null
+++ b/Lucene.Net.Join/Properties/AssemblyInfo.cs
@@ -0,0 +1,36 @@
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+// General Information about an assembly is controlled through the following
+// set of attributes. Change these attribute values to modify the information
+// associated with an assembly.
+[assembly: AssemblyTitle("Lucene.Net.Join")]
+[assembly: AssemblyDescription("")]
+[assembly: AssemblyConfiguration("")]
+[assembly: AssemblyCompany("")]
+[assembly: AssemblyProduct("Lucene.Net.Join")]
+[assembly: AssemblyCopyright("Copyright © 2015")]
+[assembly: AssemblyTrademark("")]
+[assembly: AssemblyCulture("")]
+
+// Setting ComVisible to false makes the types in this assembly not visible
+// to COM components. If you need to access a type in this assembly from
+// COM, set the ComVisible attribute to true on that type.
+[assembly: ComVisible(false)]
+
+// The following GUID is for the ID of the typelib if this project is exposed to COM
+[assembly: Guid("e8a339c7-fcf6-4a72-8586-56d8961d7b99")]
+
+// Version information for an assembly consists of the following four values:
+//
+// Major Version
+// Minor Version
+// Build Number
+// Revision
+//
+// You can specify all the values or you can default the Build and Revision Numbers
+// by using the '*' as shown below:
+// [assembly: AssemblyVersion("1.0.*")]
+[assembly: AssemblyVersion("1.0.0.0")]
+[assembly: AssemblyFileVersion("1.0.0.0")]
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bd772f02/Lucene.Net.Join/ScoreMode.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Join/ScoreMode.cs b/Lucene.Net.Join/ScoreMode.cs
new file mode 100644
index 0000000..a5b91be
--- /dev/null
+++ b/Lucene.Net.Join/ScoreMode.cs
@@ -0,0 +1,45 @@
+namespace Lucene.Net.Join
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// How to aggregate multiple child hit scores into a single parent score.
+ /// </summary>
+ public enum ScoreMode
+ {
+ /// <summary>
+ /// Do no scoring.
+ /// </summary>
+ None,
+
+ /// <summary>
+ /// Parent hit's score is the average of all child scores.
+ /// </summary>
+ Avg,
+
+ /// <summary>
+ /// Parent hit's score is the max of all child scores.
+ /// </summary>
+ Max,
+
+ /// <summary>
+ /// Parent hit's score is the sum of all child scores.
+ /// </summary>
+ Total
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bd772f02/Lucene.Net.Join/TermsCollector.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Join/TermsCollector.cs b/Lucene.Net.Join/TermsCollector.cs
new file mode 100644
index 0000000..8f8e4f5
--- /dev/null
+++ b/Lucene.Net.Join/TermsCollector.cs
@@ -0,0 +1,127 @@
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Join
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// A collector that collects all terms from a specified field matching the query.
+ ///
+ /// @lucene.experimental
+ /// </summary>
+ internal abstract class TermsCollector : Collector
+ {
+ private readonly string _field;
+ private readonly BytesRefHash _collectorTerms = new BytesRefHash();
+
+ internal TermsCollector(string field)
+ {
+ _field = field;
+ }
+
+ public BytesRefHash CollectorTerms
+ {
+ get
+ {
+ return _collectorTerms;
+ }
+ }
+
+ public override Scorer Scorer
+ {
+ set {}
+ }
+
+ public override bool AcceptsDocsOutOfOrder()
+ {
+ return true;
+ }
+
+ /// <summary>
+ /// Chooses the right <see cref="TermsCollector"/> implementation.
+ /// </summary>
+ /// <param name="field">The field to collect terms for.</param>
+ /// <param name="multipleValuesPerDocument">Whether the field to collect terms for has multiple values per document.</param>
+ /// <returns>A <see cref="TermsCollector"/> instance.</returns>
+ internal static TermsCollector Create(string field, bool multipleValuesPerDocument)
+ {
+ return multipleValuesPerDocument ? (TermsCollector) new MV(field) : new SV(field);
+ }
+
+ // impl that works with multiple values per document
+ private class MV : TermsCollector
+ {
+ private readonly BytesRef _scratch = new BytesRef();
+ private SortedSetDocValues _docTermOrds;
+
+ internal MV(string field) : base(field)
+ {
+ }
+
+ public override void Collect(int doc)
+ {
+ _docTermOrds.Document = doc;
+ long ord;
+ while ((ord = _docTermOrds.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
+ {
+ _docTermOrds.LookupOrd(ord, _scratch);
+ _collectorTerms.Add(_scratch);
+ }
+ }
+
+ public override AtomicReaderContext NextReader
+ {
+ set { _docTermOrds = FieldCache.DEFAULT.GetDocTermOrds(value.AtomicReader, _field); }
+ }
+
+ public override bool AcceptsDocsOutOfOrder()
+ {
+ throw new System.NotImplementedException();
+ }
+ }
+
+ // impl that works with single value per document
+ private class SV : TermsCollector
+ {
+ private readonly BytesRef _spare = new BytesRef();
+ private BinaryDocValues _fromDocTerms;
+
+ internal SV(string field) : base(field)
+ {
+ }
+
+ public override void Collect(int doc)
+ {
+ _fromDocTerms.Get(doc, _spare);
+ _collectorTerms.Add(_spare);
+ }
+
+ public override AtomicReaderContext NextReader
+ {
+ set { _fromDocTerms = FieldCache.DEFAULT.GetTerms(value.AtomicReader, _field, false); }
+ }
+
+ public override bool AcceptsDocsOutOfOrder()
+ {
+ throw new System.NotImplementedException();
+ }
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bd772f02/Lucene.Net.Join/TermsIncludingScoreQuery.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Join/TermsIncludingScoreQuery.cs b/Lucene.Net.Join/TermsIncludingScoreQuery.cs
new file mode 100644
index 0000000..9f3befc
--- /dev/null
+++ b/Lucene.Net.Join/TermsIncludingScoreQuery.cs
@@ -0,0 +1,472 @@
+using System.Collections.Generic;
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Join
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ internal class TermsIncludingScoreQuery : Query
+ {
+ private readonly string _field;
+ private readonly bool _multipleValuesPerDocument;
+ private readonly BytesRefHash _terms;
+ private readonly float[] _scores;
+ private readonly int[] _ords;
+ private readonly Query _originalQuery;
+ private readonly Query _unwrittenOriginalQuery;
+
+ internal TermsIncludingScoreQuery(string field, bool multipleValuesPerDocument, BytesRefHash terms,
+ float[] scores, Query originalQuery)
+ {
+ _field = field;
+ _multipleValuesPerDocument = multipleValuesPerDocument;
+ _terms = terms;
+ _scores = scores;
+ _originalQuery = originalQuery;
+ _ords = terms.Sort(BytesRef.UTF8SortedAsUnicodeComparer);
+ _unwrittenOriginalQuery = originalQuery;
+ }
+
+ private TermsIncludingScoreQuery(string field, bool multipleValuesPerDocument, BytesRefHash terms,
+ float[] scores, int[] ords, Query originalQuery, Query unwrittenOriginalQuery)
+ {
+ _field = field;
+ _multipleValuesPerDocument = multipleValuesPerDocument;
+ _terms = terms;
+ _scores = scores;
+ _originalQuery = originalQuery;
+ _ords = ords;
+ _unwrittenOriginalQuery = unwrittenOriginalQuery;
+ }
+
+ public override string ToString(string @string)
+ {
+ return string.Format("TermsIncludingScoreQuery{{field={0};originalQuery={1}}}", _field,
+ _unwrittenOriginalQuery);
+ }
+
+ public override void ExtractTerms(ISet<Term> terms)
+ {
+ _originalQuery.ExtractTerms(terms);
+ }
+
+ public override Query Rewrite(IndexReader reader)
+ {
+ Query originalQueryRewrite = _originalQuery.Rewrite(reader);
+ if (originalQueryRewrite != _originalQuery)
+ {
+ Query rewritten = new TermsIncludingScoreQuery(_field, _multipleValuesPerDocument, _terms, _scores,
+ _ords, originalQueryRewrite, _originalQuery);
+ rewritten.Boost = Boost;
+ return rewritten;
+ }
+
+ return this;
+ }
+
+ protected bool Equals(TermsIncludingScoreQuery other)
+ {
+ return base.Equals(other) && string.Equals(_field, other._field) &&
+ Equals(_unwrittenOriginalQuery, other._unwrittenOriginalQuery);
+ }
+
+ public override bool Equals(object obj)
+ {
+ if (ReferenceEquals(null, obj)) return false;
+ if (ReferenceEquals(this, obj)) return true;
+ if (obj.GetType() != GetType()) return false;
+ return Equals((TermsIncludingScoreQuery) obj);
+ }
+
+ public override int GetHashCode()
+ {
+ unchecked
+ {
+ int hashCode = base.GetHashCode();
+ hashCode = (hashCode*397) ^ (_field != null ? _field.GetHashCode() : 0);
+ hashCode = (hashCode*397) ^
+ (_unwrittenOriginalQuery != null ? _unwrittenOriginalQuery.GetHashCode() : 0);
+ return hashCode;
+ }
+ }
+
+ public override Weight CreateWeight(IndexSearcher searcher)
+ {
+ Weight originalWeight = _originalQuery.CreateWeight(searcher);
+ return new WeightAnonymousInnerClassHelper(this, originalWeight);
+ }
+
+ private class WeightAnonymousInnerClassHelper : Weight
+ {
+ private readonly TermsIncludingScoreQuery outerInstance;
+
+ private Weight originalWeight;
+
+ public WeightAnonymousInnerClassHelper(TermsIncludingScoreQuery outerInstance, Weight originalWeight)
+ {
+ this.outerInstance = outerInstance;
+ this.originalWeight = originalWeight;
+ }
+
+
+ private TermsEnum segmentTermsEnum;
+
+ public override Explanation Explain(AtomicReaderContext context, int doc)
+ {
+ SVInnerScorer scorer = (SVInnerScorer) BulkScorer(context, false, null);
+ if (scorer != null)
+ {
+ return scorer.Explain(doc);
+ }
+ return new ComplexExplanation(false, 0.0f, "Not a match");
+ }
+
+ public override bool ScoresDocsOutOfOrder()
+ {
+ // We have optimized impls below if we are allowed
+ // to score out-of-order:
+ return true;
+ }
+
+ public override Query Query
+ {
+ get { return outerInstance; }
+ }
+
+ public override float ValueForNormalization
+ {
+ get { return originalWeight.ValueForNormalization*outerInstance.Boost*outerInstance.Boost; }
+ }
+
+ public override void Normalize(float norm, float topLevelBoost)
+ {
+ originalWeight.Normalize(norm, topLevelBoost*outerInstance.Boost);
+ }
+
+ public override Scorer Scorer(AtomicReaderContext context, Bits acceptDocs)
+ {
+ Terms terms = context.AtomicReader.Terms(outerInstance._field);
+ if (terms == null)
+ {
+ return null;
+ }
+
+ // what is the runtime...seems ok?
+ long cost = context.AtomicReader.MaxDoc * terms.Size();
+
+ segmentTermsEnum = terms.Iterator(segmentTermsEnum);
+ if (outerInstance._multipleValuesPerDocument)
+ {
+ return new MVInOrderScorer(outerInstance, this, acceptDocs, segmentTermsEnum, context.AtomicReader.MaxDoc, cost);
+ }
+
+ return new SVInOrderScorer(outerInstance, this, acceptDocs, segmentTermsEnum, context.AtomicReader.MaxDoc, cost);
+ }
+
+ public override BulkScorer BulkScorer(AtomicReaderContext context, bool scoreDocsInOrder, Bits acceptDocs)
+ {
+ if (scoreDocsInOrder)
+ {
+ return base.BulkScorer(context, scoreDocsInOrder, acceptDocs);
+ }
+
+ Terms terms = context.AtomicReader.Terms(outerInstance._field);
+ if (terms == null)
+ {
+ return null;
+ }
+ // what is the runtime...seems ok?
+ long cost = context.AtomicReader.MaxDoc * terms.Size();
+
+ segmentTermsEnum = terms.Iterator(segmentTermsEnum);
+ // Optimized impls that take advantage of docs
+ // being allowed to be out of order:
+ if (outerInstance._multipleValuesPerDocument)
+ {
+ return new MVInnerScorer(outerInstance, this, acceptDocs, segmentTermsEnum, context.AtomicReader.MaxDoc, cost);
+ }
+
+ return new SVInnerScorer(outerInstance, this, acceptDocs, segmentTermsEnum, cost);
+ }
+ }
+
+ // This impl assumes that the 'join' values are used uniquely per doc per field. Used for one to many relations.
+ internal class SVInnerScorer : BulkScorer
+ {
+ private readonly TermsIncludingScoreQuery outerInstance;
+
+ private readonly BytesRef _spare = new BytesRef();
+ private readonly Bits _acceptDocs;
+ private readonly TermsEnum _termsEnum;
+ private readonly long _cost;
+
+ private int _upto;
+ internal DocsEnum DocsEnum;
+ private DocsEnum _reuse;
+ private int _scoreUpto;
+ private int _doc;
+
+ internal SVInnerScorer(TermsIncludingScoreQuery outerInstance, Weight weight, Bits acceptDocs, TermsEnum termsEnum, long cost)
+ {
+ this.outerInstance = outerInstance;
+ _acceptDocs = acceptDocs;
+ _termsEnum = termsEnum;
+ _cost = cost;
+ _doc = -1;
+ }
+
+ public override bool Score(Collector collector, int max)
+ {
+ FakeScorer fakeScorer = new FakeScorer();
+ collector.Scorer = fakeScorer;
+ if (_doc == -1)
+ {
+ _doc = NextDocOutOfOrder();
+ }
+ while (_doc < max)
+ {
+ fakeScorer.doc = _doc;
+ fakeScorer._score = outerInstance._scores[outerInstance._ords[_scoreUpto]];
+ collector.Collect(_doc);
+ _doc = NextDocOutOfOrder();
+ }
+
+ return _doc != DocIdSetIterator.NO_MORE_DOCS;
+ }
+
+ private int NextDocOutOfOrder()
+ {
+ while (true)
+ {
+ if (DocsEnum != null)
+ {
+ int docId = DocsEnumNextDoc();
+ if (docId == DocIdSetIterator.NO_MORE_DOCS)
+ {
+ DocsEnum = null;
+ }
+ else
+ {
+ return _doc = docId;
+ }
+ }
+
+ if (_upto == outerInstance._terms.Size())
+ {
+ return _doc = DocIdSetIterator.NO_MORE_DOCS;
+ }
+
+ _scoreUpto = _upto;
+ if (_termsEnum.SeekExact(outerInstance._terms.Get(outerInstance._ords[_upto++], _spare)))
+ {
+ DocsEnum = _reuse = _termsEnum.Docs(_acceptDocs, _reuse, DocsEnum.FLAG_NONE);
+ }
+ }
+ }
+
+ protected virtual int DocsEnumNextDoc()
+ {
+ return DocsEnum.NextDoc();
+ }
+
+ internal Explanation Explain(int target)
+ {
+ int docId;
+ do
+ {
+ docId = NextDocOutOfOrder();
+ if (docId < target)
+ {
+ int tempDocId = DocsEnum.Advance(target);
+ if (tempDocId == target)
+ {
+ docId = tempDocId;
+ break;
+ }
+ }
+ else if (docId == target)
+ {
+ break;
+ }
+ DocsEnum = null; // goto the next ord.
+ } while (docId != DocIdSetIterator.NO_MORE_DOCS);
+
+ return new ComplexExplanation(true, outerInstance._scores[outerInstance._ords[_scoreUpto]],
+ "Score based on join value " + _termsEnum.Term().Utf8ToString());
+ }
+ }
+
+ // This impl that tracks whether a docid has already been emitted. This check makes sure that docs aren't emitted
+ // twice for different join values. This means that the first encountered join value determines the score of a document
+ // even if other join values yield a higher score.
+ internal class MVInnerScorer : SVInnerScorer
+ {
+ private readonly TermsIncludingScoreQuery outerInstance;
+
+
+ internal readonly FixedBitSet alreadyEmittedDocs;
+
+ internal MVInnerScorer(TermsIncludingScoreQuery outerInstance, Weight weight, Bits acceptDocs,
+ TermsEnum termsEnum, int maxDoc, long cost) : base(outerInstance, weight, acceptDocs, termsEnum, cost)
+ {
+ this.outerInstance = outerInstance;
+ alreadyEmittedDocs = new FixedBitSet(maxDoc);
+ }
+
+ protected override int DocsEnumNextDoc()
+ {
+ while (true)
+ {
+ int docId = DocsEnum.NextDoc();
+ if (docId == DocIdSetIterator.NO_MORE_DOCS)
+ {
+ return docId;
+ }
+ if (!alreadyEmittedDocs.GetAndSet(docId))
+ {
+ return docId; //if it wasn't previously set, return it
+ }
+ }
+ }
+ }
+
+ internal class SVInOrderScorer : Scorer
+ {
+ private readonly TermsIncludingScoreQuery outerInstance;
+
+
+ internal readonly DocIdSetIterator matchingDocsIterator;
+ internal readonly float[] scores;
+ internal readonly long cost_Renamed;
+
+ internal int currentDoc = -1;
+
+ internal SVInOrderScorer(TermsIncludingScoreQuery outerInstance, Weight weight, Bits acceptDocs,
+ TermsEnum termsEnum, int maxDoc, long cost) : base(weight)
+ {
+ this.outerInstance = outerInstance;
+ FixedBitSet matchingDocs = new FixedBitSet(maxDoc);
+ scores = new float[maxDoc];
+ FillDocsAndScores(matchingDocs, acceptDocs, termsEnum);
+ matchingDocsIterator = matchingDocs.GetIterator();
+ cost_Renamed = cost;
+ }
+
+ protected virtual void FillDocsAndScores(FixedBitSet matchingDocs, Bits acceptDocs,
+ TermsEnum termsEnum)
+ {
+ BytesRef spare = new BytesRef();
+ DocsEnum docsEnum = null;
+ for (int i = 0; i < outerInstance._terms.Size(); i++)
+ {
+ if (termsEnum.SeekExact(outerInstance._terms.Get(outerInstance._ords[i], spare)))
+ {
+ docsEnum = termsEnum.Docs(acceptDocs, docsEnum, FLAG_NONE);
+ float score = outerInstance._scores[outerInstance._ords[i]];
+ for (int doc = docsEnum.NextDoc();
+ doc != NO_MORE_DOCS;
+ doc = docsEnum.NextDoc())
+ {
+ matchingDocs.Set(doc);
+ // In the case the same doc is also related to a another doc, a score might be overwritten. I think this
+ // can only happen in a many-to-many relation
+ scores[doc] = score;
+ }
+ }
+ }
+ }
+
+ public override float Score()
+ {
+ return scores[currentDoc];
+ }
+
+ public override int Freq()
+ {
+ return 1;
+ }
+
+ public override int DocID()
+ {
+ return currentDoc;
+ }
+
+ public override int NextDoc()
+ {
+ return currentDoc = matchingDocsIterator.NextDoc();
+ }
+
+ public override int Advance(int target)
+ {
+ return currentDoc = matchingDocsIterator.Advance(target);
+ }
+
+ public override long Cost()
+ {
+ return cost_Renamed;
+ }
+ }
+
+ // This scorer deals with the fact that a document can have more than one score from multiple related documents.
+ internal class MVInOrderScorer : SVInOrderScorer
+ {
+ private readonly TermsIncludingScoreQuery outerInstance;
+
+
+ internal MVInOrderScorer(TermsIncludingScoreQuery outerInstance, Weight weight, Bits acceptDocs,
+ TermsEnum termsEnum, int maxDoc, long cost)
+ : base(outerInstance, weight, acceptDocs, termsEnum, maxDoc, cost)
+ {
+ this.outerInstance = outerInstance;
+ }
+
+ protected override void FillDocsAndScores(FixedBitSet matchingDocs, Bits acceptDocs,
+ TermsEnum termsEnum)
+ {
+ BytesRef spare = new BytesRef();
+ DocsEnum docsEnum = null;
+ for (int i = 0; i < outerInstance._terms.Size(); i++)
+ {
+ if (termsEnum.SeekExact(outerInstance._terms.Get(outerInstance._ords[i], spare)))
+ {
+ docsEnum = termsEnum.Docs(acceptDocs, docsEnum, FLAG_NONE);
+ float score = outerInstance._scores[outerInstance._ords[i]];
+ for (int doc = docsEnum.NextDoc();
+ doc != NO_MORE_DOCS;
+ doc = docsEnum.NextDoc())
+ {
+ // I prefer this:
+ /*if (scores[doc] < score) {
+ scores[doc] = score;
+ matchingDocs.set(doc);
+ }*/
+ // But this behaves the same as MVInnerScorer and only then the tests will pass:
+ if (!matchingDocs.Get(doc))
+ {
+ scores[doc] = score;
+ matchingDocs.Set(doc);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bd772f02/Lucene.Net.Join/TermsQuery.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Join/TermsQuery.cs b/Lucene.Net.Join/TermsQuery.cs
new file mode 100644
index 0000000..2d5ccf8
--- /dev/null
+++ b/Lucene.Net.Join/TermsQuery.cs
@@ -0,0 +1,147 @@
+using System.Collections.Generic;
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Join
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// A query that has an array of terms from a specific field. This query will match documents have one or more terms in
+ /// the specified field that match with the terms specified in the array.
+ ///
+ /// @lucene.experimental
+ /// </summary>
+ internal class TermsQuery : MultiTermQuery
+ {
+ private readonly BytesRefHash _terms;
+ private readonly int[] _ords;
+ private readonly Query _fromQuery; // Used for equals() only
+
+ /// <summary>
+ ///
+ /// </summary>
+ /// <param name="field">The field that should contain terms that are specified in the previous parameter.</param>
+ /// <param name="fromQuery"></param>
+ /// <param name="terms">The terms that matching documents should have. The terms must be sorted by natural order.</param>
+ internal TermsQuery(string field, Query fromQuery, BytesRefHash terms) : base(field)
+ {
+ _fromQuery = fromQuery;
+ _terms = terms;
+ _ords = terms.Sort(BytesRef.UTF8SortedAsUnicodeComparer);
+ }
+
+ public override TermsEnum GetTermsEnum(Terms terms, AttributeSource atts)
+ {
+ if (_terms.Size() == 0)
+ {
+ return TermsEnum.EMPTY;
+ }
+
+ return new SeekingTermSetTermsEnum(terms.Iterator(null), _terms, _ords);
+
+ }
+
+ public override string ToString(string field)
+ {
+ return string.Format("TermsQuery{{field={0}}}", field);
+ }
+
+ private class SeekingTermSetTermsEnum : FilteredTermsEnum
+ {
+ private readonly BytesRefHash Terms;
+ private readonly int[] Ords;
+ private readonly int _lastElement;
+
+ private readonly BytesRef _lastTerm;
+ private readonly BytesRef _spare = new BytesRef();
+ private readonly IComparer<BytesRef> _comparator;
+
+ private BytesRef _seekTerm;
+ private int _upto;
+
+ internal SeekingTermSetTermsEnum(TermsEnum tenum, BytesRefHash terms, int[] ords) : base(tenum)
+ {
+ Terms = terms;
+ Ords = ords;
+ _comparator = BytesRef.UTF8SortedAsUnicodeComparer;
+ _lastElement = terms.Size() - 1;
+ _lastTerm = terms.Get(ords[_lastElement], new BytesRef());
+ _seekTerm = terms.Get(ords[_upto], _spare);
+ }
+
+
+
+ protected override BytesRef NextSeekTerm(BytesRef currentTerm)
+ {
+ BytesRef temp = _seekTerm;
+ _seekTerm = null;
+ return temp;
+ }
+
+ protected override AcceptStatus Accept(BytesRef term)
+ {
+ if (_comparator.Compare(term, _lastTerm) > 0)
+ {
+ return AcceptStatus.END;
+ }
+
+ BytesRef currentTerm = Terms.Get(Ords[_upto], _spare);
+ if (_comparator.Compare(term, currentTerm) == 0)
+ {
+ if (_upto == _lastElement)
+ {
+ return AcceptStatus.YES;
+ }
+
+ _seekTerm = Terms.Get(Ords[++_upto], _spare);
+ return AcceptStatus.YES_AND_SEEK;
+ }
+
+ if (_upto == _lastElement)
+ {
+ return AcceptStatus.NO;
+ } // Our current term doesn't match the the given term.
+
+ int cmp;
+ do // We maybe are behind the given term by more than one step. Keep incrementing till we're the same or higher.
+ {
+ if (_upto == _lastElement)
+ {
+ return AcceptStatus.NO;
+ }
+ // typically the terms dict is a superset of query's terms so it's unusual that we have to skip many of
+ // our terms so we don't do a binary search here
+ _seekTerm = Terms.Get(Ords[++_upto], _spare);
+ } while ((cmp = _comparator.Compare(_seekTerm, term)) < 0);
+ if (cmp == 0)
+ {
+ if (_upto == _lastElement)
+ {
+ return AcceptStatus.YES;
+ }
+ _seekTerm = Terms.Get(Ords[++_upto], _spare);
+ return AcceptStatus.YES_AND_SEEK;
+ }
+
+ return AcceptStatus.NO_AND_SEEK;
+ }
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bd772f02/Lucene.Net.Join/TermsWithScoreCollector.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Join/TermsWithScoreCollector.cs b/Lucene.Net.Join/TermsWithScoreCollector.cs
new file mode 100644
index 0000000..c4dc97d
--- /dev/null
+++ b/Lucene.Net.Join/TermsWithScoreCollector.cs
@@ -0,0 +1,333 @@
+using System;
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Join
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ internal abstract class TermsWithScoreCollector : Collector
+ {
+ private const int InitialArraySize = 256;
+
+ private readonly string _field;
+ private readonly BytesRefHash _collectedTerms = new BytesRefHash();
+ private readonly ScoreMode _scoreMode;
+
+ private Scorer _scorer;
+ private float[] _scoreSums = new float[InitialArraySize];
+
+ internal TermsWithScoreCollector(string field, ScoreMode scoreMode)
+ {
+ this._field = field;
+ this._scoreMode = scoreMode;
+ }
+
+ public BytesRefHash CollectedTerms
+ {
+ get
+ {
+ return _collectedTerms;
+ }
+ }
+
+ public virtual float[] ScoresPerTerm
+ {
+ get
+ {
+ return _scoreSums;
+ }
+ }
+
+ //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+ //ORIGINAL LINE: @Override public void setScorer(org.apache.lucene.search.Scorer scorer) throws java.io.IOException
+ public override Scorer Scorer
+ {
+ set
+ {
+ _scorer = value;
+ }
+ }
+
+ public override bool AcceptsDocsOutOfOrder()
+ {
+ return true;
+ }
+
+ /// <summary>
+ /// Chooses the right <seealso cref="TermsWithScoreCollector"/> implementation.
+ /// </summary>
+ /// <param name="field">The field to collect terms for.</param>
+ /// <param name="multipleValuesPerDocument">Whether the field to collect terms for has multiple values per document.</param>
+ /// <returns>A <see cref="TermsWithScoreCollector"/> instance</returns>
+ internal static TermsWithScoreCollector Create(string field, bool multipleValuesPerDocument, ScoreMode scoreMode)
+ {
+ if (multipleValuesPerDocument)
+ {
+ switch (scoreMode)
+ {
+ case ScoreMode.Avg:
+ return new Mv.Avg(field);
+ default:
+ return new Mv(field, scoreMode);
+ }
+ }
+
+ switch (scoreMode)
+ {
+ case ScoreMode.Avg:
+ return new Sv.Avg(field);
+ default:
+ return new Sv(field, scoreMode);
+ }
+ }
+
+ // impl that works with single value per document
+ internal class Sv : TermsWithScoreCollector
+ {
+ private readonly BytesRef _spare = new BytesRef();
+ private BinaryDocValues _fromDocTerms;
+
+ internal Sv(string field, ScoreMode scoreMode) : base(field, scoreMode)
+ {
+ }
+
+ public override void Collect(int doc)
+ {
+ _fromDocTerms.Get(doc, _spare);
+ int ord = _collectedTerms.Add(_spare);
+ if (ord < 0)
+ {
+ ord = -ord - 1;
+ }
+ else
+ {
+ if (ord >= _scoreSums.Length)
+ {
+ _scoreSums = ArrayUtil.Grow(_scoreSums);
+ }
+ }
+
+ float current = _scorer.Score();
+ float existing = _scoreSums[ord];
+ if (existing.CompareTo(0.0f) == 0)
+ {
+ _scoreSums[ord] = current;
+ }
+ else
+ {
+ switch (_scoreMode)
+ {
+ case ScoreMode.Total:
+ _scoreSums[ord] = _scoreSums[ord] + current;
+ break;
+ case ScoreMode.Max:
+ if (current > existing)
+ {
+ _scoreSums[ord] = current;
+ }
+ break;
+ }
+ }
+ }
+
+ public override AtomicReaderContext NextReader
+ {
+ set
+ {
+ _fromDocTerms = FieldCache.DEFAULT.GetTerms(value.AtomicReader, _field, false);
+ }
+ }
+
+ public override bool AcceptsDocsOutOfOrder()
+ {
+ throw new NotImplementedException();
+ }
+
+ internal class Avg : Sv
+ {
+ private int[] _scoreCounts = new int[InitialArraySize];
+
+ internal Avg(string field) : base(field, ScoreMode.Avg)
+ {
+ }
+
+ public override void Collect(int doc)
+ {
+ _fromDocTerms.Get(doc, _spare);
+ int ord = _collectedTerms.Add(_spare);
+ if (ord < 0)
+ {
+ ord = -ord - 1;
+ }
+ else
+ {
+ if (ord >= _scoreSums.Length)
+ {
+ _scoreSums = ArrayUtil.Grow(_scoreSums);
+ _scoreCounts = ArrayUtil.Grow(_scoreCounts);
+ }
+ }
+
+ float current = _scorer.Score();
+ float existing = _scoreSums[ord];
+ if (existing.CompareTo(0.0f) == 0)
+ {
+ _scoreSums[ord] = current;
+ _scoreCounts[ord] = 1;
+ }
+ else
+ {
+ _scoreSums[ord] = _scoreSums[ord] + current;
+ _scoreCounts[ord]++;
+ }
+ }
+
+ public override float[] ScoresPerTerm
+ {
+ get
+ {
+ if (_scoreCounts != null)
+ {
+ for (int i = 0; i < _scoreCounts.Length; i++)
+ {
+ _scoreSums[i] = _scoreSums[i] / _scoreCounts[i];
+ }
+ _scoreCounts = null;
+ }
+ return _scoreSums;
+ }
+ }
+ }
+ }
+
+ // impl that works with multiple values per document
+ internal class Mv : TermsWithScoreCollector
+ {
+ private SortedSetDocValues _fromDocTermOrds;
+ private readonly BytesRef _scratch = new BytesRef();
+
+ internal Mv(string field, ScoreMode scoreMode) : base(field, scoreMode)
+ {
+ }
+
+ public override void Collect(int doc)
+ {
+ _fromDocTermOrds.Document = doc;
+ long ord;
+ while ((ord = _fromDocTermOrds.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
+ {
+ _fromDocTermOrds.LookupOrd(ord, _scratch);
+
+ int termId = _collectedTerms.Add(_scratch);
+ if (termId < 0)
+ {
+ termId = -termId - 1;
+ }
+ else
+ {
+ if (termId >= _scoreSums.Length)
+ {
+ _scoreSums = ArrayUtil.Grow(_scoreSums);
+ }
+ }
+
+ switch (_scoreMode)
+ {
+ case ScoreMode.Total:
+ _scoreSums[termId] += _scorer.Score();
+ break;
+ case ScoreMode.Max:
+ _scoreSums[termId] = Math.Max(_scoreSums[termId], _scorer.Score());
+ break;
+ }
+ }
+ }
+
+ //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+ //ORIGINAL LINE: @Override public void setNextReader(org.apache.lucene.index.AtomicReaderContext context) throws java.io.IOException
+ public override AtomicReaderContext NextReader
+ {
+ set
+ {
+ _fromDocTermOrds = FieldCache.DEFAULT.GetDocTermOrds(value.AtomicReader, _field);
+ }
+ }
+
+ public override bool AcceptsDocsOutOfOrder()
+ {
+ throw new NotImplementedException();
+ }
+
+ internal class Avg : Mv
+ {
+ private int[] _scoreCounts = new int[InitialArraySize];
+
+ internal Avg(string field) : base(field, ScoreMode.Avg)
+ {
+ }
+
+ public override void Collect(int doc)
+ {
+ _fromDocTermOrds.Document = doc;
+ long ord;
+ while ((ord = _fromDocTermOrds.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
+ {
+ _fromDocTermOrds.LookupOrd(ord, _scratch);
+
+ int termId = _collectedTerms.Add(_scratch);
+ if (termId < 0)
+ {
+ termId = -termId - 1;
+ }
+ else
+ {
+ if (termId >= _scoreSums.Length)
+ {
+ _scoreSums = ArrayUtil.Grow(_scoreSums);
+ _scoreCounts = ArrayUtil.Grow(_scoreCounts);
+ }
+ }
+
+ _scoreSums[termId] += _scorer.Score();
+ _scoreCounts[termId]++;
+ }
+ }
+
+ public override float[] ScoresPerTerm
+ {
+ get
+ {
+ if (_scoreCounts != null)
+ {
+ for (int i = 0; i < _scoreCounts.Length; i++)
+ {
+ _scoreSums[i] = _scoreSums[i] / _scoreCounts[i];
+ }
+ _scoreCounts = null;
+ }
+ return _scoreSums;
+ }
+ }
+ }
+ }
+
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bd772f02/Lucene.Net.sln
----------------------------------------------------------------------
diff --git a/Lucene.Net.sln b/Lucene.Net.sln
index 18f70a3..34c4804 100644
--- a/Lucene.Net.sln
+++ b/Lucene.Net.sln
@@ -1,7 +1,7 @@
Microsoft Visual Studio Solution File, Format Version 12.00
-# Visual Studio 2013
-VisualStudioVersion = 12.0.30501.0
+# Visual Studio 14
+VisualStudioVersion = 14.0.23107.0
MinimumVisualStudioVersion = 10.0.40219.1
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net", "src\Lucene.Net.Core\Lucene.Net.csproj", "{5D4AD9BE-1FFB-41AB-9943-25737971BF57}"
EndProject
@@ -38,6 +38,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.Codecs", "src\Lu
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.Codecs.Tests", "src\Lucene.Net.Tests.Codecs\Lucene.Net.Codecs.Tests.csproj", "{351B75B1-BBD5-4E32-8036-7BED4E0135A6}"
EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.Join", "Lucene.Net.Join\Lucene.Net.Join.csproj", "{E8A339C7-FCF6-4A72-8586-56D8961D7B99}"
+EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
@@ -179,6 +181,18 @@ Global
{351B75B1-BBD5-4E32-8036-7BED4E0135A6}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU
{351B75B1-BBD5-4E32-8036-7BED4E0135A6}.Release|Mixed Platforms.Build.0 = Release|Any CPU
{351B75B1-BBD5-4E32-8036-7BED4E0135A6}.Release|x86.ActiveCfg = Release|Any CPU
+ {E8A339C7-FCF6-4A72-8586-56D8961D7B99}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {E8A339C7-FCF6-4A72-8586-56D8961D7B99}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {E8A339C7-FCF6-4A72-8586-56D8961D7B99}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU
+ {E8A339C7-FCF6-4A72-8586-56D8961D7B99}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU
+ {E8A339C7-FCF6-4A72-8586-56D8961D7B99}.Debug|x86.ActiveCfg = Debug|Any CPU
+ {E8A339C7-FCF6-4A72-8586-56D8961D7B99}.Debug|x86.Build.0 = Debug|Any CPU
+ {E8A339C7-FCF6-4A72-8586-56D8961D7B99}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {E8A339C7-FCF6-4A72-8586-56D8961D7B99}.Release|Any CPU.Build.0 = Release|Any CPU
+ {E8A339C7-FCF6-4A72-8586-56D8961D7B99}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU
+ {E8A339C7-FCF6-4A72-8586-56D8961D7B99}.Release|Mixed Platforms.Build.0 = Release|Any CPU
+ {E8A339C7-FCF6-4A72-8586-56D8961D7B99}.Release|x86.ActiveCfg = Release|Any CPU
+ {E8A339C7-FCF6-4A72-8586-56D8961D7B99}.Release|x86.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
[14/17] lucenenet git commit: Lucene.Net.Join tests now passing
Posted by sy...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/Lucene.Net.Tests.Join/Properties/AssemblyInfo.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Tests.Join/Properties/AssemblyInfo.cs b/Lucene.Net.Tests.Join/Properties/AssemblyInfo.cs
deleted file mode 100644
index f94805a..0000000
--- a/Lucene.Net.Tests.Join/Properties/AssemblyInfo.cs
+++ /dev/null
@@ -1,36 +0,0 @@
-using System.Reflection;
-using System.Runtime.CompilerServices;
-using System.Runtime.InteropServices;
-
-// General Information about an assembly is controlled through the following
-// set of attributes. Change these attribute values to modify the information
-// associated with an assembly.
-[assembly: AssemblyTitle("Lucene.Net.Tests.Join")]
-[assembly: AssemblyDescription("")]
-[assembly: AssemblyConfiguration("")]
-[assembly: AssemblyCompany("")]
-[assembly: AssemblyProduct("Lucene.Net.Tests.Join")]
-[assembly: AssemblyCopyright("Copyright © 2015")]
-[assembly: AssemblyTrademark("")]
-[assembly: AssemblyCulture("")]
-
-// Setting ComVisible to false makes the types in this assembly not visible
-// to COM components. If you need to access a type in this assembly from
-// COM, set the ComVisible attribute to true on that type.
-[assembly: ComVisible(false)]
-
-// The following GUID is for the ID of the typelib if this project is exposed to COM
-[assembly: Guid("4c1b794f-8158-45e6-85b3-2c46569bebc2")]
-
-// Version information for an assembly consists of the following four values:
-//
-// Major Version
-// Minor Version
-// Build Number
-// Revision
-//
-// You can specify all the values or you can default the Build and Revision Numbers
-// by using the '*' as shown below:
-// [assembly: AssemblyVersion("1.0.*")]
-[assembly: AssemblyVersion("1.0.0.0")]
-[assembly: AssemblyFileVersion("1.0.0.0")]
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/Lucene.Net.Tests.Join/TestBlockJoin.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Tests.Join/TestBlockJoin.cs b/Lucene.Net.Tests.Join/TestBlockJoin.cs
deleted file mode 100644
index 7c6861f..0000000
--- a/Lucene.Net.Tests.Join/TestBlockJoin.cs
+++ /dev/null
@@ -1,1599 +0,0 @@
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-using Lucene.Net.Analysis;
-using Lucene.Net.Documents;
-using Lucene.Net.Grouping;
-using Lucene.Net.Index;
-using Lucene.Net.Join;
-using Lucene.Net.Randomized;
-using Lucene.Net.Randomized.Generators;
-using Lucene.Net.Search;
-using Lucene.Net.Store;
-using Lucene.Net.Support;
-using Lucene.Net.Util;
-using NUnit.Framework;
-
-namespace Lucene.Net.Tests.Join
-{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- public class TestBlockJoin : LuceneTestCase
- {
- // One resume...
- private Document MakeResume(string name, string country)
- {
- Document resume = new Document();
- resume.Add(NewStringField("docType", "resume", Field.Store.NO));
- resume.Add(NewStringField("name", name, Field.Store.YES));
- resume.Add(NewStringField("country", country, Field.Store.NO));
- return resume;
- }
-
- // ... has multiple jobs
- private Document MakeJob(string skill, int year)
- {
- Document job = new Document();
- job.Add(NewStringField("skill", skill, Field.Store.YES));
- job.Add(new IntField("year", year, Field.Store.NO));
- job.Add(new StoredField("year", year));
- return job;
- }
-
- // ... has multiple qualifications
- private Document MakeQualification(string qualification, int year)
- {
- Document job = new Document();
- job.Add(NewStringField("qualification", qualification, Field.Store.YES));
- job.Add(new IntField("year", year, Field.Store.NO));
- return job;
- }
-
- [Test]
- public void TestEmptyChildFilter()
- {
- Directory dir = NewDirectory();
- IndexWriterConfig config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
- config.SetMergePolicy(NoMergePolicy.NO_COMPOUND_FILES);
- // we don't want to merge - since we rely on certain segment setup
- IndexWriter w = new IndexWriter(dir, config);
-
- IList<Document> docs = new List<Document>();
-
- docs.Add(MakeJob("java", 2007));
- docs.Add(MakeJob("python", 2010));
- docs.Add(MakeResume("Lisa", "United Kingdom"));
- w.AddDocuments(docs);
-
- docs.Clear();
- docs.Add(MakeJob("ruby", 2005));
- docs.Add(MakeJob("java", 2006));
- docs.Add(MakeResume("Frank", "United States"));
- w.AddDocuments(docs);
- w.Commit();
- int num = AtLeast(10); // produce a segment that doesn't have a value in the docType field
- for (int i = 0; i < num; i++)
- {
- docs.Clear();
- docs.Add(MakeJob("java", 2007));
- w.AddDocuments(docs);
- }
-
- IndexReader r = DirectoryReader.Open(w, Random().NextBoolean());
- w.Dispose();
- assertTrue(r.Leaves.size() > 1);
- IndexSearcher s = new IndexSearcher(r);
- Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));
-
- BooleanQuery childQuery = new BooleanQuery();
- childQuery.Add(new BooleanClause(new TermQuery(new Term("skill", "java")), BooleanClause.Occur.MUST));
- childQuery.Add(new BooleanClause(NumericRangeQuery.NewIntRange("year", 2006, 2011, true, true), BooleanClause.Occur.MUST));
-
- ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);
-
- BooleanQuery fullQuery = new BooleanQuery();
- fullQuery.Add(new BooleanClause(childJoinQuery, BooleanClause.Occur.MUST));
- fullQuery.Add(new BooleanClause(new MatchAllDocsQuery(), BooleanClause.Occur.MUST));
- ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(Sort.RELEVANCE, 1, true, true);
- s.Search(fullQuery, c);
- TopGroups<int> results = c.GetTopGroups(childJoinQuery, null, 0, 10, 0, true);
- assertFalse(float.IsNaN(results.MaxScore));
- assertEquals(1, results.TotalGroupedHitCount);
- assertEquals(1, results.Groups.Length);
- GroupDocs<int> group = results.Groups[0];
- Document childDoc = s.Doc(group.ScoreDocs[0].Doc);
- assertEquals("java", childDoc.Get("skill"));
- assertNotNull(group.GroupValue);
- Document parentDoc = s.Doc(group.GroupValue);
- assertEquals("Lisa", parentDoc.Get("name"));
-
- r.Dispose();
- dir.Dispose();
- }
-
- [Test]
- public void TestSimple()
- {
- Directory dir = NewDirectory();
- RandomIndexWriter w = new RandomIndexWriter(Random(), dir);
-
- IList<Document> docs = new List<Document>();
-
- docs.Add(MakeJob("java", 2007));
- docs.Add(MakeJob("python", 2010));
- docs.Add(MakeResume("Lisa", "United Kingdom"));
- w.AddDocuments(docs);
-
- docs.Clear();
- docs.Add(MakeJob("ruby", 2005));
- docs.Add(MakeJob("java", 2006));
- docs.Add(MakeResume("Frank", "United States"));
- w.AddDocuments(docs);
-
- IndexReader r = w.Reader;
- w.Dispose();
- IndexSearcher s = NewSearcher(r);
-
- // Create a filter that defines "parent" documents in the index - in this case resumes
- Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));
-
- // Define child document criteria (finds an example of relevant work experience)
- BooleanQuery childQuery = new BooleanQuery();
- childQuery.Add(new BooleanClause(new TermQuery(new Term("skill", "java")), BooleanClause.Occur.MUST));
- childQuery.Add(new BooleanClause(NumericRangeQuery.NewIntRange("year", 2006, 2011, true, true), BooleanClause.Occur.MUST));
-
- // Define parent document criteria (find a resident in the UK)
- Query parentQuery = new TermQuery(new Term("country", "United Kingdom"));
-
- // Wrap the child document query to 'join' any matches
- // up to corresponding parent:
- ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);
-
- // Combine the parent and nested child queries into a single query for a candidate
- BooleanQuery fullQuery = new BooleanQuery();
- fullQuery.Add(new BooleanClause(parentQuery, BooleanClause.Occur.MUST));
- fullQuery.Add(new BooleanClause(childJoinQuery, BooleanClause.Occur.MUST));
-
- ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(Sort.RELEVANCE, 1, true, true);
-
- s.Search(fullQuery, c);
-
- TopGroups<int> results = c.GetTopGroups(childJoinQuery, null, 0, 10, 0, true);
- assertFalse(float.IsNaN(results.MaxScore));
-
- //assertEquals(1, results.totalHitCount);
- assertEquals(1, results.TotalGroupedHitCount);
- assertEquals(1, results.Groups.Length);
-
- GroupDocs<int> group = results.Groups[0];
- assertEquals(1, group.TotalHits);
- assertFalse(float.IsNaN(group.Score));
-
- Document childDoc = s.Doc(group.ScoreDocs[0].Doc);
- //System.out.println(" doc=" + group.ScoreDocs[0].Doc);
- assertEquals("java", childDoc.Get("skill"));
- assertNotNull(group.GroupValue);
- Document parentDoc = s.Doc(group.GroupValue);
- assertEquals("Lisa", parentDoc.Get("name"));
-
-
- //System.out.println("TEST: now test up");
-
- // Now join "up" (map parent hits to child docs) instead...:
- ToChildBlockJoinQuery parentJoinQuery = new ToChildBlockJoinQuery(parentQuery, parentsFilter, Random().NextBoolean());
- BooleanQuery fullChildQuery = new BooleanQuery();
- fullChildQuery.Add(new BooleanClause(parentJoinQuery, BooleanClause.Occur.MUST));
- fullChildQuery.Add(new BooleanClause(childQuery, BooleanClause.Occur.MUST));
-
- //System.out.println("FULL: " + fullChildQuery);
- TopDocs hits = s.Search(fullChildQuery, 10);
- assertEquals(1, hits.TotalHits);
- childDoc = s.Doc(hits.ScoreDocs[0].Doc);
- //System.out.println("CHILD = " + childDoc + " docID=" + hits.ScoreDocs[0].Doc);
- assertEquals("java", childDoc.Get("skill"));
- assertEquals(2007, childDoc.GetField("year").NumericValue);
- assertEquals("Lisa", GetParentDoc(r, parentsFilter, hits.ScoreDocs[0].Doc).Get("name"));
-
- // Test with filter on child docs:
- assertEquals(0, s.Search(fullChildQuery, new QueryWrapperFilter(new TermQuery(new Term("skill", "foosball"))), 1).TotalHits);
-
- r.Dispose();
- dir.Dispose();
- }
-
- [Test]
- public void TestBugCausedByRewritingTwice()
- {
- Directory dir = NewDirectory();
- RandomIndexWriter w = new RandomIndexWriter(Random(), dir);
-
- IList<Document> docs = new List<Document>();
-
- for (int i = 0; i < 10; i++)
- {
- docs.Clear();
- docs.Add(MakeJob("ruby", i));
- docs.Add(MakeJob("java", 2007));
- docs.Add(MakeResume("Frank", "United States"));
- w.AddDocuments(docs);
- }
-
- IndexReader r = w.Reader;
- w.Dispose();
- IndexSearcher s = NewSearcher(r);
-
- MultiTermQuery qc = NumericRangeQuery.NewIntRange("year", 2007, 2007, true, true);
- // Hacky: this causes the query to need 2 rewrite
- // iterations:
- qc.SetRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE);
-
- Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));
-
- int h1 = qc.GetHashCode();
- Query qw1 = qc.Rewrite(r);
- int h2 = qw1.GetHashCode();
- Query qw2 = qw1.Rewrite(r);
- int h3 = qw2.GetHashCode();
-
- assertTrue(h1 != h2);
- assertTrue(h2 != h3);
- assertTrue(h3 != h1);
-
- ToParentBlockJoinQuery qp = new ToParentBlockJoinQuery(qc, parentsFilter, ScoreMode.Max);
- ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(Sort.RELEVANCE, 10, true, true);
-
- s.Search(qp, c);
- TopGroups<int> groups = c.GetTopGroups(qp, Sort.INDEXORDER, 0, 10, 0, true);
- foreach (GroupDocs<int> group in groups.Groups)
- {
- assertEquals(1, group.TotalHits);
- }
-
- r.Dispose();
- dir.Dispose();
- }
-
- protected QueryWrapperFilter Skill(string skill)
- {
- return new QueryWrapperFilter(new TermQuery(new Term("skill", skill)));
- }
-
- [Test]
- public virtual void TestSimpleFilter()
- {
- Directory dir = NewDirectory();
- RandomIndexWriter w = new RandomIndexWriter(Random(), dir);
-
- IList<Document> docs = new List<Document>();
- docs.Add(MakeJob("java", 2007));
- docs.Add(MakeJob("python", 2010));
- CollectionsHelper.Shuffle(docs);
- docs.Add(MakeResume("Lisa", "United Kingdom"));
-
- IList<Document> docs2 = new List<Document>();
- docs2.Add(MakeJob("ruby", 2005));
- docs2.Add(MakeJob("java", 2006));
- CollectionsHelper.Shuffle(docs2);
- docs2.Add(MakeResume("Frank", "United States"));
-
- AddSkillless(w);
- bool turn = Random().NextBoolean();
- w.AddDocuments(turn ? docs : docs2);
-
- AddSkillless(w);
-
- w.AddDocuments(!turn ? docs : docs2);
-
- AddSkillless(w);
-
- IndexReader r = w.Reader;
- w.Dispose();
- IndexSearcher s = NewSearcher(r);
-
- // Create a filter that defines "parent" documents in the index - in this case resumes
- Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));
-
- // Define child document criteria (finds an example of relevant work experience)
- BooleanQuery childQuery = new BooleanQuery();
- childQuery.Add(new BooleanClause(new TermQuery(new Term("skill", "java")), BooleanClause.Occur.MUST));
- childQuery.Add(new BooleanClause(NumericRangeQuery.NewIntRange("year", 2006, 2011, true, true), BooleanClause.Occur.MUST));
-
- // Define parent document criteria (find a resident in the UK)
- Query parentQuery = new TermQuery(new Term("country", "United Kingdom"));
-
- // Wrap the child document query to 'join' any matches
- // up to corresponding parent:
- ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);
-
- assertEquals("no filter - both passed", 2, s.Search(childJoinQuery, 10).TotalHits);
-
- assertEquals("dummy filter passes everyone ", 2, s.Search(childJoinQuery, parentsFilter, 10).TotalHits);
- assertEquals("dummy filter passes everyone ", 2, s.Search(childJoinQuery, new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))), 10).TotalHits);
-
- // not found test
- assertEquals("noone live there", 0, s.Search(childJoinQuery, new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("country", "Oz")))), 1).TotalHits);
- assertEquals("noone live there", 0, s.Search(childJoinQuery, new QueryWrapperFilter(new TermQuery(new Term("country", "Oz"))), 1).TotalHits);
-
- // apply the UK filter by the searcher
- TopDocs ukOnly = s.Search(childJoinQuery, new QueryWrapperFilter(parentQuery), 1);
- assertEquals("has filter - single passed", 1, ukOnly.TotalHits);
- assertEquals("Lisa", r.Document(ukOnly.ScoreDocs[0].Doc).Get("name"));
-
- // looking for US candidates
- TopDocs usThen = s.Search(childJoinQuery, new QueryWrapperFilter(new TermQuery(new Term("country", "United States"))), 1);
- assertEquals("has filter - single passed", 1, usThen.TotalHits);
- assertEquals("Frank", r.Document(usThen.ScoreDocs[0].Doc).Get("name"));
-
-
- TermQuery us = new TermQuery(new Term("country", "United States"));
- assertEquals("@ US we have java and ruby", 2, s.Search(new ToChildBlockJoinQuery(us, parentsFilter, Random().NextBoolean()), 10).TotalHits);
-
- assertEquals("java skills in US", 1, s.Search(new ToChildBlockJoinQuery(us, parentsFilter, Random().NextBoolean()), Skill("java"), 10).TotalHits);
-
- BooleanQuery rubyPython = new BooleanQuery();
- rubyPython.Add(new TermQuery(new Term("skill", "ruby")), BooleanClause.Occur.SHOULD);
- rubyPython.Add(new TermQuery(new Term("skill", "python")), BooleanClause.Occur.SHOULD);
- assertEquals("ruby skills in US", 1, s.Search(new ToChildBlockJoinQuery(us, parentsFilter, Random().NextBoolean()), new QueryWrapperFilter(rubyPython), 10).TotalHits);
-
- r.Dispose();
- dir.Dispose();
- }
-
- private void AddSkillless(RandomIndexWriter w)
- {
- if (Random().NextBoolean())
- {
- w.AddDocument(MakeResume("Skillless", Random().NextBoolean() ? "United Kingdom" : "United States"));
- }
- }
-
- private Document GetParentDoc(IndexReader reader, Filter parents, int childDocID)
- {
- IList<AtomicReaderContext> leaves = reader.Leaves;
- int subIndex = ReaderUtil.SubIndex(childDocID, leaves);
- AtomicReaderContext leaf = leaves[subIndex];
- FixedBitSet bits = (FixedBitSet)parents.GetDocIdSet(leaf, null);
- return leaf.AtomicReader.Document(bits.NextSetBit(childDocID - leaf.DocBase));
- }
-
- [Test]
- public void TestBoostBug()
- {
- Directory dir = NewDirectory();
- RandomIndexWriter w = new RandomIndexWriter(Random(), dir);
- IndexReader r = w.Reader;
- w.Dispose();
- IndexSearcher s = NewSearcher(r);
-
- ToParentBlockJoinQuery q = new ToParentBlockJoinQuery(new MatchAllDocsQuery(), new QueryWrapperFilter(new MatchAllDocsQuery()), ScoreMode.Avg);
- QueryUtils.Check(Random(), q, s);
- s.Search(q, 10);
- BooleanQuery bq = new BooleanQuery();
- bq.Boost = 2f; // we boost the BQ
- bq.Add(q, BooleanClause.Occur.MUST);
- s.Search(bq, 10);
- r.Dispose();
- dir.Dispose();
- }
-
- [Test]
- public void TestNestedDocScoringWithDeletes()
- {
- Directory dir = NewDirectory();
- RandomIndexWriter w = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.COMPOUND_FILES));
-
- // Cannot assert this since we use NoMergePolicy:
- w.DoRandomForceMergeAssert = false;
-
- IList<Document> docs = new List<Document>();
- docs.Add(MakeJob("java", 2007));
- docs.Add(MakeJob("python", 2010));
- docs.Add(MakeResume("Lisa", "United Kingdom"));
- w.AddDocuments(docs);
-
- docs.Clear();
- docs.Add(MakeJob("c", 1999));
- docs.Add(MakeJob("ruby", 2005));
- docs.Add(MakeJob("java", 2006));
- docs.Add(MakeResume("Frank", "United States"));
- w.AddDocuments(docs);
-
- w.Commit();
- IndexSearcher s = NewSearcher(DirectoryReader.Open(dir));
-
- ToParentBlockJoinQuery q = new ToParentBlockJoinQuery(NumericRangeQuery.NewIntRange("year", 1990, 2010, true, true), new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume")))), ScoreMode.Total);
-
- TopDocs topDocs = s.Search(q, 10);
- assertEquals(2, topDocs.TotalHits);
- assertEquals(6, topDocs.ScoreDocs[0].Doc);
- assertEquals(3.0f, topDocs.ScoreDocs[0].Score, 0.0f);
- assertEquals(2, topDocs.ScoreDocs[1].Doc);
- assertEquals(2.0f, topDocs.ScoreDocs[1].Score, 0.0f);
-
- s.IndexReader.Dispose();
- w.DeleteDocuments(new Term("skill", "java"));
- w.Dispose();
- s = NewSearcher(DirectoryReader.Open(dir));
-
- topDocs = s.Search(q, 10);
- assertEquals(2, topDocs.TotalHits);
- assertEquals(6, topDocs.ScoreDocs[0].Doc);
- assertEquals(2.0f, topDocs.ScoreDocs[0].Score, 0.0f);
- assertEquals(2, topDocs.ScoreDocs[1].Doc);
- assertEquals(1.0f, topDocs.ScoreDocs[1].Score, 0.0f);
-
- s.IndexReader.Dispose();
- dir.Dispose();
- }
-
- private string[][] GetRandomFields(int maxUniqueValues)
- {
-
- string[][] fields = new string[TestUtil.NextInt(Random(), 2, 4)][];
- for (int fieldID = 0; fieldID < fields.Length; fieldID++)
- {
- int valueCount;
- if (fieldID == 0)
- {
- valueCount = 2;
- }
- else
- {
- valueCount = TestUtil.NextInt(Random(), 1, maxUniqueValues);
- }
-
- string[] values = fields[fieldID] = new string[valueCount];
- for (int i = 0; i < valueCount; i++)
- {
- values[i] = TestUtil.RandomRealisticUnicodeString(Random());
- //values[i] = TestUtil.randomSimpleString(random);
- }
- }
-
- return fields;
- }
-
- private Term RandomParentTerm(string[] values)
- {
- return new Term("parent0", values[Random().Next(values.Length)]);
- }
-
- private Term RandomChildTerm(string[] values)
- {
- return new Term("child0", values[Random().Next(values.Length)]);
- }
-
- private Sort GetRandomSort(string prefix, int numFields)
- {
- IList<SortField> sortFields = new List<SortField>();
- // TODO: sometimes sort by score; problem is scores are
- // not comparable across the two indices
- // sortFields.Add(SortField.FIELD_SCORE);
- if (Random().NextBoolean())
- {
- sortFields.Add(new SortField(prefix + Random().Next(numFields), SortField.Type_e.STRING, Random().NextBoolean()));
- }
- else if (Random().NextBoolean())
- {
- sortFields.Add(new SortField(prefix + Random().Next(numFields), SortField.Type_e.STRING, Random().NextBoolean()));
- sortFields.Add(new SortField(prefix + Random().Next(numFields), SortField.Type_e.STRING, Random().NextBoolean()));
- }
- // Break ties:
- sortFields.Add(new SortField(prefix + "ID", SortField.Type_e.INT));
- return new Sort(sortFields.ToArray());
- }
-
- [Test]
- public void TestRandom()
- {
- // We build two indices at once: one normalized (which
- // ToParentBlockJoinQuery/Collector,
- // ToChildBlockJoinQuery can query) and the other w/
- // the same docs, just fully denormalized:
- Directory dir = NewDirectory();
- Directory joinDir = NewDirectory();
-
- int numParentDocs = TestUtil.NextInt(Random(), 100 * RANDOM_MULTIPLIER, 300 * RANDOM_MULTIPLIER);
- //final int numParentDocs = 30;
-
- // Values for parent fields:
- string[][] parentFields = GetRandomFields(numParentDocs / 2);
- // Values for child fields:
- string[][] childFields = GetRandomFields(numParentDocs);
-
- bool doDeletes = Random().NextBoolean();
- IList<int> toDelete = new List<int>();
-
- // TODO: parallel star join, nested join cases too!
- RandomIndexWriter w = new RandomIndexWriter(Random(), dir);
- RandomIndexWriter joinW = new RandomIndexWriter(Random(), joinDir);
- for (int parentDocID = 0; parentDocID < numParentDocs; parentDocID++)
- {
- Document parentDoc = new Document();
- Document parentJoinDoc = new Document();
- Field id = NewStringField("parentID", "" + parentDocID, Field.Store.YES);
- parentDoc.Add(id);
- parentJoinDoc.Add(id);
- parentJoinDoc.Add(NewStringField("isParent", "x", Field.Store.NO));
- for (int field = 0; field < parentFields.Length; field++)
- {
- if (Random().NextDouble() < 0.9)
- {
- Field f = NewStringField("parent" + field, parentFields[field][Random().Next(parentFields[field].Length)], Field.Store.NO);
- parentDoc.Add(f);
- parentJoinDoc.Add(f);
- }
- }
-
- if (doDeletes)
- {
- parentDoc.Add(NewStringField("blockID", "" + parentDocID, Field.Store.NO));
- parentJoinDoc.Add(NewStringField("blockID", "" + parentDocID, Field.Store.NO));
- }
-
- IList<Document> joinDocs = new List<Document>();
-
- if (VERBOSE)
- {
- StringBuilder sb = new StringBuilder();
- sb.Append("parentID=").Append(parentDoc.Get("parentID"));
- for (int fieldID = 0; fieldID < parentFields.Length; fieldID++)
- {
- string parent = parentDoc.Get("parent" + fieldID);
- if (parent != null)
- {
- sb.Append(" parent" + fieldID + "=" + parent);
- }
- }
- Console.WriteLine(" " + sb);
- }
-
- int numChildDocs = TestUtil.NextInt(Random(), 1, 20);
- for (int childDocID = 0; childDocID < numChildDocs; childDocID++)
- {
- // Denormalize: copy all parent fields into child doc:
- Document childDoc = TestUtil.CloneDocument(parentDoc);
- Document joinChildDoc = new Document();
- joinDocs.Add(joinChildDoc);
-
- Field childID = NewStringField("childID", "" + childDocID, Field.Store.YES);
- childDoc.Add(childID);
- joinChildDoc.Add(childID);
-
- for (int childFieldID = 0; childFieldID < childFields.Length; childFieldID++)
- {
- if (Random().NextDouble() < 0.9)
- {
- Field f = NewStringField("child" + childFieldID, childFields[childFieldID][Random().Next(childFields[childFieldID].Length)], Field.Store.NO);
- childDoc.Add(f);
- joinChildDoc.Add(f);
- }
- }
-
- if (VERBOSE)
- {
- StringBuilder sb = new StringBuilder();
- sb.Append("childID=").Append(joinChildDoc.Get("childID"));
- for (int fieldID = 0; fieldID < childFields.Length; fieldID++)
- {
- string child = joinChildDoc.Get("child" + fieldID);
- if (child != null)
- {
- sb.Append(" child" + fieldID + "=" + child);
- }
- }
- Console.WriteLine(" " + sb);
- }
-
- if (doDeletes)
- {
- joinChildDoc.Add(NewStringField("blockID", "" + parentDocID, Field.Store.NO));
- }
-
- w.AddDocument(childDoc);
- }
-
- // Parent last:
- joinDocs.Add(parentJoinDoc);
- joinW.AddDocuments(joinDocs);
-
- if (doDeletes && Random().Next(30) == 7)
- {
- toDelete.Add(parentDocID);
- }
- }
-
- foreach (int deleteID in toDelete)
- {
- if (VERBOSE)
- {
- Console.WriteLine("DELETE parentID=" + deleteID);
- }
- w.DeleteDocuments(new Term("blockID", "" + deleteID));
- joinW.DeleteDocuments(new Term("blockID", "" + deleteID));
- }
-
- IndexReader r = w.Reader;
- w.Dispose();
- IndexReader joinR = joinW.Reader;
- joinW.Dispose();
-
- if (VERBOSE)
- {
- Console.WriteLine("TEST: reader=" + r);
- Console.WriteLine("TEST: joinReader=" + joinR);
-
- for (int docIDX = 0; docIDX < joinR.MaxDoc; docIDX++)
- {
- Console.WriteLine(" docID=" + docIDX + " doc=" + joinR.Document(docIDX));
- }
- }
-
- IndexSearcher s = NewSearcher(r);
-
- IndexSearcher joinS = new IndexSearcher(joinR);
-
- Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isParent", "x"))));
-
- int iters = 200 * RANDOM_MULTIPLIER;
-
- for (int iter = 0; iter < iters; iter++)
- {
- if (VERBOSE)
- {
- Console.WriteLine("TEST: iter=" + (1 + iter) + " of " + iters);
- }
-
- Query childQuery;
- if (Random().Next(3) == 2)
- {
- int childFieldID = Random().Next(childFields.Length);
- childQuery = new TermQuery(new Term("child" + childFieldID, childFields[childFieldID][Random().Next(childFields[childFieldID].Length)]));
- }
- else if (Random().Next(3) == 2)
- {
- BooleanQuery bq = new BooleanQuery();
- childQuery = bq;
- int numClauses = TestUtil.NextInt(Random(), 2, 4);
- bool didMust = false;
- for (int clauseIDX = 0; clauseIDX < numClauses; clauseIDX++)
- {
- Query clause;
- BooleanClause.Occur occur;
- if (!didMust && Random().NextBoolean())
- {
- occur = Random().NextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT;
- clause = new TermQuery(RandomChildTerm(childFields[0]));
- didMust = true;
- }
- else
- {
- occur = BooleanClause.Occur.SHOULD;
- int childFieldID = TestUtil.NextInt(Random(), 1, childFields.Length - 1);
- clause = new TermQuery(new Term("child" + childFieldID, childFields[childFieldID][Random().Next(childFields[childFieldID].Length)]));
- }
- bq.Add(clause, occur);
- }
- }
- else
- {
- BooleanQuery bq = new BooleanQuery();
- childQuery = bq;
-
- bq.Add(new TermQuery(RandomChildTerm(childFields[0])), BooleanClause.Occur.MUST);
- int childFieldID = TestUtil.NextInt(Random(), 1, childFields.Length - 1);
- bq.Add(new TermQuery(new Term("child" + childFieldID, childFields[childFieldID][Random().Next(childFields[childFieldID].Length)])), Random().NextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT);
- }
-
- int x = Random().Next(4);
- ScoreMode agg;
- if (x == 0)
- {
- agg = ScoreMode.None;
- }
- else if (x == 1)
- {
- agg = ScoreMode.Max;
- }
- else if (x == 2)
- {
- agg = ScoreMode.Total;
- }
- else
- {
- agg = ScoreMode.Avg;
- }
-
- ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, agg);
-
- // To run against the block-join index:
- Query parentJoinQuery;
-
- // Same query as parentJoinQuery, but to run against
- // the fully denormalized index (so we can compare
- // results):
- Query parentQuery;
-
- if (Random().NextBoolean())
- {
- parentQuery = childQuery;
- parentJoinQuery = childJoinQuery;
- }
- else
- {
- // AND parent field w/ child field
- BooleanQuery bq = new BooleanQuery();
- parentJoinQuery = bq;
- Term parentTerm = RandomParentTerm(parentFields[0]);
- if (Random().NextBoolean())
- {
- bq.Add(childJoinQuery, BooleanClause.Occur.MUST);
- bq.Add(new TermQuery(parentTerm), BooleanClause.Occur.MUST);
- }
- else
- {
- bq.Add(new TermQuery(parentTerm), BooleanClause.Occur.MUST);
- bq.Add(childJoinQuery, BooleanClause.Occur.MUST);
- }
-
- BooleanQuery bq2 = new BooleanQuery();
- parentQuery = bq2;
- if (Random().NextBoolean())
- {
- bq2.Add(childQuery, BooleanClause.Occur.MUST);
- bq2.Add(new TermQuery(parentTerm), BooleanClause.Occur.MUST);
- }
- else
- {
- bq2.Add(new TermQuery(parentTerm), BooleanClause.Occur.MUST);
- bq2.Add(childQuery, BooleanClause.Occur.MUST);
- }
- }
-
- Sort parentSort = GetRandomSort("parent", parentFields.Length);
- Sort childSort = GetRandomSort("child", childFields.Length);
-
- if (VERBOSE)
- {
- Console.WriteLine("\nTEST: query=" + parentQuery + " joinQuery=" + parentJoinQuery + " parentSort=" + parentSort + " childSort=" + childSort);
- }
-
- // Merge both sorts:
- IList<SortField> sortFields = new List<SortField>(Arrays.AsList(parentSort.GetSort()));
- sortFields.AddRange(Arrays.AsList(childSort.GetSort()));
- Sort parentAndChildSort = new Sort(sortFields.ToArray());
-
- TopDocs results = s.Search(parentQuery, null, r.NumDocs, parentAndChildSort);
-
- if (VERBOSE)
- {
- Console.WriteLine("\nTEST: normal index gets " + results.TotalHits + " hits");
- ScoreDoc[] hits = results.ScoreDocs;
- for (int hitIDX = 0; hitIDX < hits.Length; hitIDX++)
- {
- Document doc = s.Doc(hits[hitIDX].Doc);
- //System.out.println(" score=" + hits[hitIDX].Score + " parentID=" + doc.Get("parentID") + " childID=" + doc.Get("childID") + " (docID=" + hits[hitIDX].Doc + ")");
- Console.WriteLine(" parentID=" + doc.Get("parentID") + " childID=" + doc.Get("childID") + " (docID=" + hits[hitIDX].Doc + ")");
- FieldDoc fd = (FieldDoc)hits[hitIDX];
- if (fd.Fields != null)
- {
- Console.Write(" ");
- foreach (object o in fd.Fields)
- {
- if (o is BytesRef)
- {
- Console.Write(((BytesRef)o).Utf8ToString() + " ");
- }
- else
- {
- Console.Write(o + " ");
- }
- }
- Console.WriteLine();
- }
- }
- }
-
- bool trackScores;
- bool trackMaxScore;
- if (agg == ScoreMode.None)
- {
- trackScores = false;
- trackMaxScore = false;
- }
- else
- {
- trackScores = Random().NextBoolean();
- trackMaxScore = Random().NextBoolean();
- }
- ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(parentSort, 10, trackScores, trackMaxScore);
-
- joinS.Search(parentJoinQuery, c);
-
- int hitsPerGroup = TestUtil.NextInt(Random(), 1, 20);
- //final int hitsPerGroup = 100;
- TopGroups<int> joinResults = c.GetTopGroups(childJoinQuery, childSort, 0, hitsPerGroup, 0, true);
-
- if (VERBOSE)
- {
- Console.WriteLine("\nTEST: block join index gets " + (joinResults == null ? 0 : joinResults.Groups.Length) + " groups; hitsPerGroup=" + hitsPerGroup);
- if (joinResults != null)
- {
- GroupDocs<int>[] groups = joinResults.Groups;
- for (int groupIDX = 0; groupIDX < groups.Length; groupIDX++)
- {
- GroupDocs<int> group = groups[groupIDX];
- if (group.GroupSortValues != null)
- {
- Console.Write(" ");
- foreach (object o in group.GroupSortValues)
- {
- if (o is BytesRef)
- {
- Console.Write(((BytesRef)o).Utf8ToString() + " ");
- }
- else
- {
- Console.Write(o + " ");
- }
- }
- Console.WriteLine();
- }
-
- assertNotNull(group.GroupValue);
- Document parentDoc = joinS.Doc(group.GroupValue);
- Console.WriteLine(" group parentID=" + parentDoc.Get("parentID") + " (docID=" + group.GroupValue + ")");
- for (int hitIDX = 0; hitIDX < group.ScoreDocs.Length; hitIDX++)
- {
- Document doc = joinS.Doc(group.ScoreDocs[hitIDX].Doc);
- //System.out.println(" score=" + group.ScoreDocs[hitIDX].Score + " childID=" + doc.Get("childID") + " (docID=" + group.ScoreDocs[hitIDX].Doc + ")");
- Console.WriteLine(" childID=" + doc.Get("childID") + " child0=" + doc.Get("child0") + " (docID=" + group.ScoreDocs[hitIDX].Doc + ")");
- }
- }
- }
- }
-
- if (results.TotalHits == 0)
- {
- assertNull(joinResults);
- }
- else
- {
- CompareHits(r, joinR, results, joinResults);
- TopDocs b = joinS.Search(childJoinQuery, 10);
- foreach (ScoreDoc hit in b.ScoreDocs)
- {
- Explanation explanation = joinS.Explain(childJoinQuery, hit.Doc);
- Document document = joinS.Doc(hit.Doc - 1);
- int childId = Convert.ToInt32(document.Get("childID"));
- assertTrue(explanation.IsMatch);
- assertEquals(hit.Score, explanation.Value, 0.0f);
- assertEquals(string.Format("Score based on child doc range from {0} to {1}", hit.Doc - 1 - childId, hit.Doc - 1), explanation.Description);
- }
- }
-
- // Test joining in the opposite direction (parent to
- // child):
-
- // Get random query against parent documents:
- Query parentQuery2;
- if (Random().Next(3) == 2)
- {
- int fieldID = Random().Next(parentFields.Length);
- parentQuery2 = new TermQuery(new Term("parent" + fieldID, parentFields[fieldID][Random().Next(parentFields[fieldID].Length)]));
- }
- else if (Random().Next(3) == 2)
- {
- BooleanQuery bq = new BooleanQuery();
- parentQuery2 = bq;
- int numClauses = TestUtil.NextInt(Random(), 2, 4);
- bool didMust = false;
- for (int clauseIDX = 0; clauseIDX < numClauses; clauseIDX++)
- {
- Query clause;
- BooleanClause.Occur occur;
- if (!didMust && Random().NextBoolean())
- {
- occur = Random().NextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT;
- clause = new TermQuery(RandomParentTerm(parentFields[0]));
- didMust = true;
- }
- else
- {
- occur = BooleanClause.Occur.SHOULD;
- int fieldID = TestUtil.NextInt(Random(), 1, parentFields.Length - 1);
- clause = new TermQuery(new Term("parent" + fieldID, parentFields[fieldID][Random().Next(parentFields[fieldID].Length)]));
- }
- bq.Add(clause, occur);
- }
- }
- else
- {
- BooleanQuery bq = new BooleanQuery();
- parentQuery2 = bq;
-
- bq.Add(new TermQuery(RandomParentTerm(parentFields[0])), BooleanClause.Occur.MUST);
- int fieldID = TestUtil.NextInt(Random(), 1, parentFields.Length - 1);
- bq.Add(new TermQuery(new Term("parent" + fieldID, parentFields[fieldID][Random().Next(parentFields[fieldID].Length)])), Random().NextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT);
- }
-
- if (VERBOSE)
- {
- Console.WriteLine("\nTEST: top down: parentQuery2=" + parentQuery2);
- }
-
- // Maps parent query to child docs:
- ToChildBlockJoinQuery parentJoinQuery2 = new ToChildBlockJoinQuery(parentQuery2, parentsFilter, Random().NextBoolean());
-
- // To run against the block-join index:
- Query childJoinQuery2;
-
- // Same query as parentJoinQuery, but to run against
- // the fully denormalized index (so we can compare
- // results):
- Query childQuery2;
-
- // apply a filter to children
- Filter childFilter2, childJoinFilter2;
-
- if (Random().NextBoolean())
- {
- childQuery2 = parentQuery2;
- childJoinQuery2 = parentJoinQuery2;
- childFilter2 = null;
- childJoinFilter2 = null;
- }
- else
- {
- Term childTerm = RandomChildTerm(childFields[0]);
- if (Random().NextBoolean()) // filtered case
- {
- childJoinQuery2 = parentJoinQuery2;
- Filter f = new QueryWrapperFilter(new TermQuery(childTerm));
- childJoinFilter2 = Random().NextBoolean() ? new FixedBitSetCachingWrapperFilter(f) : f;
- }
- else
- {
- childJoinFilter2 = null;
- // AND child field w/ parent query:
- BooleanQuery bq = new BooleanQuery();
- childJoinQuery2 = bq;
- if (Random().NextBoolean())
- {
- bq.Add(parentJoinQuery2, BooleanClause.Occur.MUST);
- bq.Add(new TermQuery(childTerm), BooleanClause.Occur.MUST);
- }
- else
- {
- bq.Add(new TermQuery(childTerm), BooleanClause.Occur.MUST);
- bq.Add(parentJoinQuery2, BooleanClause.Occur.MUST);
- }
- }
-
- if (Random().NextBoolean()) // filtered case
- {
- childQuery2 = parentQuery2;
- Filter f = new QueryWrapperFilter(new TermQuery(childTerm));
- childFilter2 = Random().NextBoolean() ? new FixedBitSetCachingWrapperFilter(f) : f;
- }
- else
- {
- childFilter2 = null;
- BooleanQuery bq2 = new BooleanQuery();
- childQuery2 = bq2;
- if (Random().NextBoolean())
- {
- bq2.Add(parentQuery2, BooleanClause.Occur.MUST);
- bq2.Add(new TermQuery(childTerm), BooleanClause.Occur.MUST);
- }
- else
- {
- bq2.Add(new TermQuery(childTerm), BooleanClause.Occur.MUST);
- bq2.Add(parentQuery2, BooleanClause.Occur.MUST);
- }
- }
- }
-
- Sort childSort2 = GetRandomSort("child", childFields.Length);
-
- // Search denormalized index:
- if (VERBOSE)
- {
- Console.WriteLine("TEST: run top down query=" + childQuery2 + " filter=" + childFilter2 + " sort=" + childSort2);
- }
- TopDocs results2 = s.Search(childQuery2, childFilter2, r.NumDocs, childSort2);
- if (VERBOSE)
- {
- Console.WriteLine(" " + results2.TotalHits + " totalHits:");
- foreach (ScoreDoc sd in results2.ScoreDocs)
- {
- Document doc = s.Doc(sd.Doc);
- Console.WriteLine(" childID=" + doc.Get("childID") + " parentID=" + doc.Get("parentID") + " docID=" + sd.Doc);
- }
- }
-
- // Search join index:
- if (VERBOSE)
- {
- Console.WriteLine("TEST: run top down join query=" + childJoinQuery2 + " filter=" + childJoinFilter2 + " sort=" + childSort2);
- }
- TopDocs joinResults2 = joinS.Search(childJoinQuery2, childJoinFilter2, joinR.NumDocs, childSort2);
- if (VERBOSE)
- {
- Console.WriteLine(" " + joinResults2.TotalHits + " totalHits:");
- foreach (ScoreDoc sd in joinResults2.ScoreDocs)
- {
- Document doc = joinS.Doc(sd.Doc);
- Document parentDoc = GetParentDoc(joinR, parentsFilter, sd.Doc);
- Console.WriteLine(" childID=" + doc.Get("childID") + " parentID=" + parentDoc.Get("parentID") + " docID=" + sd.Doc);
- }
- }
-
- CompareChildHits(r, joinR, results2, joinResults2);
- }
-
- r.Dispose();
- joinR.Dispose();
- dir.Dispose();
- joinDir.Dispose();
- }
-
- private void CompareChildHits(IndexReader r, IndexReader joinR, TopDocs results, TopDocs joinResults)
- {
- assertEquals(results.TotalHits, joinResults.TotalHits);
- assertEquals(results.ScoreDocs.Length, joinResults.ScoreDocs.Length);
- for (int hitCount = 0; hitCount < results.ScoreDocs.Length; hitCount++)
- {
- ScoreDoc hit = results.ScoreDocs[hitCount];
- ScoreDoc joinHit = joinResults.ScoreDocs[hitCount];
- Document doc1 = r.Document(hit.Doc);
- Document doc2 = joinR.Document(joinHit.Doc);
- assertEquals("hit " + hitCount + " differs", doc1.Get("childID"), doc2.Get("childID"));
- // don't compare scores -- they are expected to differ
-
-
- assertTrue(hit is FieldDoc);
- assertTrue(joinHit is FieldDoc);
-
- FieldDoc hit0 = (FieldDoc)hit;
- FieldDoc joinHit0 = (FieldDoc)joinHit;
- assertArrayEquals(hit0.Fields, joinHit0.Fields);
- }
- }
-
- private void CompareHits(IndexReader r, IndexReader joinR, TopDocs results, TopGroups<int> joinResults)
- {
- // results is 'complete'; joinResults is a subset
- int resultUpto = 0;
- int joinGroupUpto = 0;
-
- ScoreDoc[] hits = results.ScoreDocs;
- GroupDocs<int>[] groupDocs = joinResults.Groups;
-
- while (joinGroupUpto < groupDocs.Length)
- {
- GroupDocs<int> group = groupDocs[joinGroupUpto++];
- ScoreDoc[] groupHits = group.ScoreDocs;
- assertNotNull(group.GroupValue);
- Document parentDoc = joinR.Document(group.GroupValue);
- string parentID = parentDoc.Get("parentID");
- //System.out.println("GROUP groupDoc=" + group.groupDoc + " parent=" + parentDoc);
- assertNotNull(parentID);
- assertTrue(groupHits.Length > 0);
- for (int hitIDX = 0; hitIDX < groupHits.Length; hitIDX++)
- {
- Document nonJoinHit = r.Document(hits[resultUpto++].Doc);
- Document joinHit = joinR.Document(groupHits[hitIDX].Doc);
- assertEquals(parentID, nonJoinHit.Get("parentID"));
- assertEquals(joinHit.Get("childID"), nonJoinHit.Get("childID"));
- }
-
- if (joinGroupUpto < groupDocs.Length)
- {
- // Advance non-join hit to the next parentID:
- //System.out.println(" next joingroupUpto=" + joinGroupUpto + " gd.Length=" + groupDocs.Length + " parentID=" + parentID);
- while (true)
- {
- assertTrue(resultUpto < hits.Length);
- if (!parentID.Equals(r.Document(hits[resultUpto].Doc).Get("parentID")))
- {
- break;
- }
- resultUpto++;
- }
- }
- }
- }
-
- [Test]
- public void TestMultiChildTypes()
- {
- Directory dir = NewDirectory();
- RandomIndexWriter w = new RandomIndexWriter(Random(), dir);
-
- IList<Document> docs = new List<Document>();
-
- docs.Add(MakeJob("java", 2007));
- docs.Add(MakeJob("python", 2010));
- docs.Add(MakeQualification("maths", 1999));
- docs.Add(MakeResume("Lisa", "United Kingdom"));
- w.AddDocuments(docs);
-
- IndexReader r = w.Reader;
- w.Dispose();
- IndexSearcher s = NewSearcher(r);
-
- // Create a filter that defines "parent" documents in the index - in this case resumes
- Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));
-
- // Define child document criteria (finds an example of relevant work experience)
- BooleanQuery childJobQuery = new BooleanQuery();
- childJobQuery.Add(new BooleanClause(new TermQuery(new Term("skill", "java")), BooleanClause.Occur.MUST));
- childJobQuery.Add(new BooleanClause(NumericRangeQuery.NewIntRange("year", 2006, 2011, true, true), BooleanClause.Occur.MUST));
-
- BooleanQuery childQualificationQuery = new BooleanQuery();
- childQualificationQuery.Add(new BooleanClause(new TermQuery(new Term("qualification", "maths")), BooleanClause.Occur.MUST));
- childQualificationQuery.Add(new BooleanClause(NumericRangeQuery.NewIntRange("year", 1980, 2000, true, true), BooleanClause.Occur.MUST));
-
-
- // Define parent document criteria (find a resident in the UK)
- Query parentQuery = new TermQuery(new Term("country", "United Kingdom"));
-
- // Wrap the child document query to 'join' any matches
- // up to corresponding parent:
- ToParentBlockJoinQuery childJobJoinQuery = new ToParentBlockJoinQuery(childJobQuery, parentsFilter, ScoreMode.Avg);
- ToParentBlockJoinQuery childQualificationJoinQuery = new ToParentBlockJoinQuery(childQualificationQuery, parentsFilter, ScoreMode.Avg);
-
- // Combine the parent and nested child queries into a single query for a candidate
- BooleanQuery fullQuery = new BooleanQuery();
- fullQuery.Add(new BooleanClause(parentQuery, BooleanClause.Occur.MUST));
- fullQuery.Add(new BooleanClause(childJobJoinQuery, BooleanClause.Occur.MUST));
- fullQuery.Add(new BooleanClause(childQualificationJoinQuery, BooleanClause.Occur.MUST));
-
- // Collects all job and qualification child docs for
- // each resume hit in the top N (sorted by score):
- ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(Sort.RELEVANCE, 10, true, false);
-
- s.Search(fullQuery, c);
-
- // Examine "Job" children
- TopGroups<int> jobResults = c.GetTopGroups(childJobJoinQuery, null, 0, 10, 0, true);
-
- //assertEquals(1, results.totalHitCount);
- assertEquals(1, jobResults.TotalGroupedHitCount);
- assertEquals(1, jobResults.Groups.Length);
-
- GroupDocs<int> group = jobResults.Groups[0];
- assertEquals(1, group.TotalHits);
-
- Document childJobDoc = s.Doc(group.ScoreDocs[0].Doc);
- //System.out.println(" doc=" + group.ScoreDocs[0].Doc);
- assertEquals("java", childJobDoc.Get("skill"));
- assertNotNull(group.GroupValue);
- Document parentDoc = s.Doc(group.GroupValue);
- assertEquals("Lisa", parentDoc.Get("name"));
-
- // Now Examine qualification children
- TopGroups<int> qualificationResults = c.GetTopGroups(childQualificationJoinQuery, null, 0, 10, 0, true);
-
- assertEquals(1, qualificationResults.TotalGroupedHitCount);
- assertEquals(1, qualificationResults.Groups.Length);
-
- GroupDocs<int> qGroup = qualificationResults.Groups[0];
- assertEquals(1, qGroup.TotalHits);
-
- Document childQualificationDoc = s.Doc(qGroup.ScoreDocs[0].Doc);
- assertEquals("maths", childQualificationDoc.Get("qualification"));
- assertNotNull(qGroup.GroupValue);
- parentDoc = s.Doc(qGroup.GroupValue);
- assertEquals("Lisa", parentDoc.Get("name"));
-
- r.Dispose();
- dir.Dispose();
- }
-
- [Test]
- public void TestAdvanceSingleParentSingleChild()
- {
- Directory dir = NewDirectory();
- RandomIndexWriter w = new RandomIndexWriter(Random(), dir);
- Document childDoc = new Document();
- childDoc.Add(NewStringField("child", "1", Field.Store.NO));
- Document parentDoc = new Document();
- parentDoc.Add(NewStringField("parent", "1", Field.Store.NO));
- w.AddDocuments(Arrays.AsList(childDoc, parentDoc));
- IndexReader r = w.Reader;
- w.Dispose();
- IndexSearcher s = NewSearcher(r);
- Query tq = new TermQuery(new Term("child", "1"));
- Filter parentFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("parent", "1"))));
-
- ToParentBlockJoinQuery q = new ToParentBlockJoinQuery(tq, parentFilter, ScoreMode.Avg);
- Weight weight = s.CreateNormalizedWeight(q);
- DocIdSetIterator disi = weight.Scorer(s.IndexReader.Leaves.First(), null);
- assertEquals(1, disi.Advance(1));
- r.Dispose();
- dir.Dispose();
- }
-
- [Test]
- public void TestAdvanceSingleParentNoChild()
- {
- Directory dir = NewDirectory();
- RandomIndexWriter w = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(new LogDocMergePolicy()));
- Document parentDoc = new Document();
- parentDoc.Add(NewStringField("parent", "1", Field.Store.NO));
- parentDoc.Add(NewStringField("isparent", "yes", Field.Store.NO));
- w.AddDocuments(Arrays.AsList(parentDoc));
-
- // Add another doc so scorer is not null
- parentDoc = new Document();
- parentDoc.Add(NewStringField("parent", "2", Field.Store.NO));
- parentDoc.Add(NewStringField("isparent", "yes", Field.Store.NO));
- Document childDoc = new Document();
- childDoc.Add(NewStringField("child", "2", Field.Store.NO));
- w.AddDocuments(Arrays.AsList(childDoc, parentDoc));
-
- // Need single seg:
- w.ForceMerge(1);
- IndexReader r = w.Reader;
- w.Dispose();
- IndexSearcher s = NewSearcher(r);
- Query tq = new TermQuery(new Term("child", "2"));
- Filter parentFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isparent", "yes"))));
-
- ToParentBlockJoinQuery q = new ToParentBlockJoinQuery(tq, parentFilter, ScoreMode.Avg);
- Weight weight = s.CreateNormalizedWeight(q);
- DocIdSetIterator disi = weight.Scorer(s.IndexReader.Leaves.First(), null);
- assertEquals(2, disi.Advance(0));
- r.Dispose();
- dir.Dispose();
- }
-
- [Test]
- public void TestGetTopGroups()
- {
-
- Directory dir = NewDirectory();
- RandomIndexWriter w = new RandomIndexWriter(Random(), dir);
-
- IList<Document> docs = new List<Document>();
- docs.Add(MakeJob("ruby", 2005));
- docs.Add(MakeJob("java", 2006));
- docs.Add(MakeJob("java", 2010));
- docs.Add(MakeJob("java", 2012));
- CollectionsHelper.Shuffle(docs);
- docs.Add(MakeResume("Frank", "United States"));
-
- AddSkillless(w);
- w.AddDocuments(docs);
- AddSkillless(w);
-
- IndexReader r = w.Reader;
- w.Dispose();
- IndexSearcher s = new IndexSearcher(r);
-
- // Create a filter that defines "parent" documents in the index - in this case resumes
- Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));
-
- // Define child document criteria (finds an example of relevant work experience)
- BooleanQuery childQuery = new BooleanQuery();
- childQuery.Add(new BooleanClause(new TermQuery(new Term("skill", "java")), BooleanClause.Occur.MUST));
- childQuery.Add(new BooleanClause(NumericRangeQuery.NewIntRange("year", 2006, 2011, true, true), BooleanClause.Occur.MUST));
-
- // Wrap the child document query to 'join' any matches
- // up to corresponding parent:
- ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);
-
- ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(Sort.RELEVANCE, 2, true, true);
- s.Search(childJoinQuery, c);
-
- //Get all child documents within groups
- TopGroups<int>[] getTopGroupsResults = new TopGroups<int>[2];
- getTopGroupsResults[0] = c.GetTopGroups(childJoinQuery, null, 0, 10, 0, true);
- getTopGroupsResults[1] = c.GetTopGroupsWithAllChildDocs(childJoinQuery, null, 0, 0, true);
-
- foreach (TopGroups<int> results in getTopGroupsResults)
- {
- assertFalse(float.IsNaN(results.MaxScore));
- assertEquals(2, results.TotalGroupedHitCount);
- assertEquals(1, results.Groups.Length);
-
- GroupDocs<int> resultGroup = results.Groups[0];
- assertEquals(2, resultGroup.TotalHits);
- assertFalse(float.IsNaN(resultGroup.Score));
- assertNotNull(resultGroup.GroupValue);
- Document parentDocument = s.Doc(resultGroup.GroupValue);
- assertEquals("Frank", parentDocument.Get("name"));
-
- assertEquals(2, resultGroup.ScoreDocs.Length); //all matched child documents collected
-
- foreach (ScoreDoc scoreDoc in resultGroup.ScoreDocs)
- {
- Document childDoc = s.Doc(scoreDoc.Doc);
- assertEquals("java", childDoc.Get("skill"));
- int year = Convert.ToInt32(childDoc.Get("year"));
- assertTrue(year >= 2006 && year <= 2011);
- }
- }
-
- //Get part of child documents
- TopGroups<int> boundedResults = c.GetTopGroups(childJoinQuery, null, 0, 1, 0, true);
- assertFalse(float.IsNaN(boundedResults.MaxScore));
- assertEquals(2, boundedResults.TotalGroupedHitCount);
- assertEquals(1, boundedResults.Groups.Length);
-
- GroupDocs<int> group = boundedResults.Groups[0];
- assertEquals(2, group.TotalHits);
- assertFalse(float.IsNaN(group.Score));
- assertNotNull(group.GroupValue);
- Document parentDoc = s.Doc(group.GroupValue);
- assertEquals("Frank", parentDoc.Get("name"));
-
- assertEquals(1, group.ScoreDocs.Length); //not all matched child documents collected
-
- foreach (ScoreDoc scoreDoc in group.ScoreDocs)
- {
- Document childDoc = s.Doc(scoreDoc.Doc);
- assertEquals("java", childDoc.Get("skill"));
- int year = Convert.ToInt32(childDoc.Get("year"));
- assertTrue(year >= 2006 && year <= 2011);
- }
-
- r.Dispose();
- dir.Dispose();
- }
-
- // LUCENE-4968
- [Test]
- public void TestSometimesParentOnlyMatches()
- {
- Directory d = NewDirectory();
- RandomIndexWriter w = new RandomIndexWriter(Random(), d);
- Document parent = new Document();
- parent.Add(new StoredField("parentID", "0"));
- parent.Add(NewTextField("parentText", "text", Field.Store.NO));
- parent.Add(NewStringField("isParent", "yes", Field.Store.NO));
-
- IList<Document> docs = new List<Document>();
-
- Document child = new Document();
- docs.Add(child);
- child.Add(new StoredField("childID", "0"));
- child.Add(NewTextField("childText", "text", Field.Store.NO));
-
- // parent last:
- docs.Add(parent);
- w.AddDocuments(docs);
-
- docs.Clear();
-
- parent = new Document();
- parent.Add(NewTextField("parentText", "text", Field.Store.NO));
- parent.Add(NewStringField("isParent", "yes", Field.Store.NO));
- parent.Add(new StoredField("parentID", "1"));
-
- // parent last:
- docs.Add(parent);
- w.AddDocuments(docs);
-
- IndexReader r = w.Reader;
- w.Dispose();
-
- Query childQuery = new TermQuery(new Term("childText", "text"));
- Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isParent", "yes"))));
- ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);
- BooleanQuery parentQuery = new BooleanQuery();
- parentQuery.Add(childJoinQuery, BooleanClause.Occur.SHOULD);
- parentQuery.Add(new TermQuery(new Term("parentText", "text")), BooleanClause.Occur.SHOULD);
-
- ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(new Sort(new SortField("parentID", SortField.Type_e.STRING)), 10, true, true);
- NewSearcher(r).Search(parentQuery, c);
- TopGroups<int> groups = c.GetTopGroups(childJoinQuery, null, 0, 10, 0, false);
-
- // Two parents:
- assertEquals(2, (int)groups.TotalGroupCount);
-
- // One child docs:
- assertEquals(1, groups.TotalGroupedHitCount);
-
- GroupDocs<int> group = groups.Groups[0];
- Document doc = r.Document((int)group.GroupValue);
- assertEquals("0", doc.Get("parentID"));
-
- group = groups.Groups[1];
- doc = r.Document((int)group.GroupValue);
- assertEquals("1", doc.Get("parentID"));
-
- r.Dispose();
- d.Dispose();
- }
-
- // LUCENE-4968
- [Test]
- public void TestChildQueryNeverMatches()
- {
- Directory d = NewDirectory();
- RandomIndexWriter w = new RandomIndexWriter(Random(), d);
- Document parent = new Document();
- parent.Add(new StoredField("parentID", "0"));
- parent.Add(NewTextField("parentText", "text", Field.Store.NO));
- parent.Add(NewStringField("isParent", "yes", Field.Store.NO));
-
- IList<Document> docs = new List<Document>();
-
- Document child = new Document();
- docs.Add(child);
- child.Add(new StoredField("childID", "0"));
- child.Add(NewTextField("childText", "text", Field.Store.NO));
-
- // parent last:
- docs.Add(parent);
- w.AddDocuments(docs);
-
- docs.Clear();
-
- parent = new Document();
- parent.Add(NewTextField("parentText", "text", Field.Store.NO));
- parent.Add(NewStringField("isParent", "yes", Field.Store.NO));
- parent.Add(new StoredField("parentID", "1"));
-
- // parent last:
- docs.Add(parent);
- w.AddDocuments(docs);
-
- IndexReader r = w.Reader;
- w.Dispose();
-
- // never matches:
- Query childQuery = new TermQuery(new Term("childText", "bogus"));
- Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isParent", "yes"))));
- ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);
- BooleanQuery parentQuery = new BooleanQuery();
- parentQuery.Add(childJoinQuery, BooleanClause.Occur.SHOULD);
- parentQuery.Add(new TermQuery(new Term("parentText", "text")), BooleanClause.Occur.SHOULD);
-
- ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(new Sort(new SortField("parentID", SortField.Type_e.STRING)), 10, true, true);
- NewSearcher(r).Search(parentQuery, c);
- TopGroups<int> groups = c.GetTopGroups(childJoinQuery, null, 0, 10, 0, false);
-
- // Two parents:
- assertEquals(2, (int)groups.TotalGroupCount);
-
- // One child docs:
- assertEquals(0, groups.TotalGroupedHitCount);
-
- GroupDocs<int> group = groups.Groups[0];
- Document doc = r.Document((int)group.GroupValue);
- assertEquals("0", doc.Get("parentID"));
-
- group = groups.Groups[1];
- doc = r.Document((int)group.GroupValue);
- assertEquals("1", doc.Get("parentID"));
-
- r.Dispose();
- d.Dispose();
- }
-
- // LUCENE-4968
- [Test]
- public void TestChildQueryMatchesParent()
- {
- Directory d = NewDirectory();
- RandomIndexWriter w = new RandomIndexWriter(Random(), d);
- Document parent = new Document();
- parent.Add(new StoredField("parentID", "0"));
- parent.Add(NewTextField("parentText", "text", Field.Store.NO));
- parent.Add(NewStringField("isParent", "yes", Field.Store.NO));
-
- IList<Document> docs = new List<Document>();
-
- Document child = new Document();
- docs.Add(child);
- child.Add(new StoredField("childID", "0"));
- child.Add(NewTextField("childText", "text", Field.Store.NO));
-
- // parent last:
- docs.Add(parent);
- w.AddDocuments(docs);
-
- docs.Clear();
-
- parent = new Document();
- parent.Add(NewTextField("parentText", "text", Field.Store.NO));
- parent.Add(NewStringField("isParent", "yes", Field.Store.NO));
- parent.Add(new StoredField("parentID", "1"));
-
- // parent last:
- docs.Add(parent);
- w.AddDocuments(docs);
-
- IndexReader r = w.Reader;
- w.Dispose();
-
- // illegally matches parent:
- Query childQuery = new TermQuery(new Term("parentText", "text"));
- Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isParent", "yes"))));
- ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);
- BooleanQuery parentQuery = new BooleanQuery();
- parentQuery.Add(childJoinQuery, BooleanClause.Occur.SHOULD);
- parentQuery.Add(new TermQuery(new Term("parentText", "text")), BooleanClause.Occur.SHOULD);
-
- ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(new Sort(new SortField("parentID", SortField.Type_e.STRING)), 10, true, true);
-
- try
- {
- NewSearcher(r).Search(parentQuery, c);
- fail("should have hit exception");
- }
- catch (IllegalStateException ise)
- {
- // expected
- }
-
- r.Dispose();
- d.Dispose();
- }
-
- [Test]
- public void TestAdvanceSingleDeletedParentNoChild()
- {
- Directory dir = NewDirectory();
- RandomIndexWriter w = new RandomIndexWriter(Random(), dir);
-
- // First doc with 1 children
- Document parentDoc = new Document();
- parentDoc.Add(NewStringField("parent", "1", Field.Store.NO));
- parentDoc.Add(NewStringField("isparent", "yes", Field.Store.NO));
- Document childDoc = new Document();
- childDoc.Add(NewStringField("child", "1", Field.Store.NO));
- w.AddDocuments(Arrays.AsList(childDoc, parentDoc));
-
- parentDoc = new Document();
- parentDoc.Add(NewStringField("parent", "2", Field.Store.NO));
- parentDoc.Add(NewStringField("isparent", "yes", Field.Store.NO));
- w.AddDocuments(Arrays.AsList(parentDoc));
-
- w.DeleteDocuments(new Term("parent", "2"));
-
- parentDoc = new Document();
- parentDoc.Add(NewStringField("parent", "2", Field.Store.NO));
- parentDoc.Add(NewStringField("isparent", "yes", Field.Store.NO));
- childDoc = new Document();
- childDoc.Add(NewStringField("child", "2", Field.Store.NO));
- w.AddDocuments(Arrays.AsList(childDoc, parentDoc));
-
- IndexReader r = w.Reader;
- w.Dispose();
- IndexSearcher s = NewSearcher(r);
-
- // Create a filter that defines "parent" documents in the index - in this case resumes
- Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isparent", "yes"))));
-
- Query parentQuery = new TermQuery(new Term("parent", "2"));
-
- ToChildBlockJoinQuery parentJoinQuery = new ToChildBlockJoinQuery(parentQuery, parentsFilter, Random().NextBoolean());
- TopDocs topdocs = s.Search(parentJoinQuery, 3);
- assertEquals(1, topdocs.TotalHits);
-
- r.Dispose();
- dir.Dispose();
- }
- }
-}
\ No newline at end of file
[16/17] lucenenet git commit: Lucene.Net.Join tests now passing
Posted by sy...@apache.org.
Lucene.Net.Join tests now passing
Moved the Join/Grouping projects into the src folder and updated the sln
path mapping accordingly.
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/4820f236
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/4820f236
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/4820f236
Branch: refs/heads/master
Commit: 4820f236b2444636452012c42e2450e408720335
Parents: 0213f53
Author: Josh Sullivan <ja...@gmail.com>
Authored: Sun Aug 23 00:27:54 2015 -0400
Committer: Josh Sullivan <ja...@gmail.com>
Committed: Sun Aug 23 00:27:54 2015 -0400
----------------------------------------------------------------------
Lucene.Net.Grouping/GroupDocs.cs | 71 -
Lucene.Net.Grouping/Lucene.Net.Grouping.csproj | 61 -
Lucene.Net.Grouping/Properties/AssemblyInfo.cs | 36 -
Lucene.Net.Grouping/TopGroups.cs | 249 ---
Lucene.Net.Join/FakeScorer.cs | 76 -
.../FixedBitSetCachingWrapperFilter.cs | 62 -
Lucene.Net.Join/JoinUtil.cs | 80 -
Lucene.Net.Join/Lucene.Net.Join.csproj | 76 -
Lucene.Net.Join/Properties/AssemblyInfo.cs | 36 -
Lucene.Net.Join/ScoreMode.cs | 45 -
Lucene.Net.Join/TermsCollector.cs | 127 --
Lucene.Net.Join/TermsIncludingScoreQuery.cs | 472 ------
Lucene.Net.Join/TermsQuery.cs | 147 --
Lucene.Net.Join/TermsWithScoreCollector.cs | 333 ----
Lucene.Net.Join/ToChildBlockJoinQuery.cs | 396 -----
Lucene.Net.Join/ToParentBlockJoinCollector.cs | 560 ------
.../ToParentBlockJoinFieldComparator.cs | 393 -----
Lucene.Net.Join/ToParentBlockJoinQuery.cs | 516 ------
Lucene.Net.Join/ToParentBlockJoinSortField.cs | 78 -
.../Lucene.Net.Tests.Join.csproj | 86 -
.../Properties/AssemblyInfo.cs | 36 -
Lucene.Net.Tests.Join/TestBlockJoin.cs | 1599 ------------------
Lucene.Net.Tests.Join/TestBlockJoinSorting.cs | 277 ---
.../TestBlockJoinValidation.cs | 227 ---
Lucene.Net.Tests.Join/TestJoinUtil.cs | 1165 -------------
Lucene.Net.Tests.Join/packages.config | 5 -
Lucene.Net.sln | 6 +-
src/Lucene.Net.Grouping/GroupDocs.cs | 71 +
.../Lucene.Net.Grouping.csproj | 61 +
.../Properties/AssemblyInfo.cs | 36 +
src/Lucene.Net.Grouping/TopGroups.cs | 249 +++
src/Lucene.Net.Join/FakeScorer.cs | 76 +
.../FixedBitSetCachingWrapperFilter.cs | 62 +
src/Lucene.Net.Join/JoinUtil.cs | 80 +
src/Lucene.Net.Join/Lucene.Net.Join.csproj | 76 +
src/Lucene.Net.Join/Properties/AssemblyInfo.cs | 36 +
src/Lucene.Net.Join/ScoreMode.cs | 45 +
src/Lucene.Net.Join/TermsCollector.cs | 127 ++
src/Lucene.Net.Join/TermsIncludingScoreQuery.cs | 472 ++++++
src/Lucene.Net.Join/TermsQuery.cs | 147 ++
src/Lucene.Net.Join/TermsWithScoreCollector.cs | 333 ++++
src/Lucene.Net.Join/ToChildBlockJoinQuery.cs | 396 +++++
.../ToParentBlockJoinCollector.cs | 578 +++++++
.../ToParentBlockJoinFieldComparator.cs | 393 +++++
src/Lucene.Net.Join/ToParentBlockJoinQuery.cs | 516 ++++++
.../ToParentBlockJoinSortField.cs | 78 +
.../Util/LuceneTestCase.cs | 2 +-
.../Lucene.Net.Tests.Join.csproj | 86 +
.../Properties/AssemblyInfo.cs | 36 +
src/Lucene.Net.Tests.Join/TestBlockJoin.cs | 1591 +++++++++++++++++
.../TestBlockJoinSorting.cs | 277 +++
.../TestBlockJoinValidation.cs | 227 +++
src/Lucene.Net.Tests.Join/TestJoinUtil.cs | 1165 +++++++++++++
src/Lucene.Net.Tests.Join/packages.config | 5 +
54 files changed, 7223 insertions(+), 7213 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/Lucene.Net.Grouping/GroupDocs.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Grouping/GroupDocs.cs b/Lucene.Net.Grouping/GroupDocs.cs
deleted file mode 100644
index 00cdf83..0000000
--- a/Lucene.Net.Grouping/GroupDocs.cs
+++ /dev/null
@@ -1,71 +0,0 @@
-using Lucene.Net.Search;
-
-namespace Lucene.Net.Grouping
-{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- /// <summary>
- /// Represents one group in the results.
- ///
- /// @lucene.experimental
- /// </summary>
- public class GroupDocs<TGroupValueType>
- {
- /// <summary>
- /// The groupField value for all docs in this group; this
- /// may be null if hits did not have the groupField.
- /// </summary>
- public readonly TGroupValueType GroupValue;
-
- /// <summary>
- /// Max score in this group
- /// </summary>
- public readonly float MaxScore;
-
- /// <summary>
- /// Overall aggregated score of this group (currently only set by join queries).
- /// </summary>
- public readonly float Score;
-
- /// <summary>
- /// Hits; this may be {@link org.apache.lucene.search.FieldDoc} instances if the
- /// withinGroupSort sorted by fields.
- /// </summary>
- public readonly ScoreDoc[] ScoreDocs;
-
- /// <summary>
- /// Total hits within this group
- /// </summary>
- public readonly int TotalHits;
-
- /// <summary>
- /// Matches the groupSort passed to {@link AbstractFirstPassGroupingCollector}.
- /// </summary>
- public readonly object[] GroupSortValues;
-
- public GroupDocs(float score, float maxScore, int totalHits, ScoreDoc[] scoreDocs, TGroupValueType groupValue, object[] groupSortValues)
- {
- Score = score;
- MaxScore = maxScore;
- TotalHits = totalHits;
- ScoreDocs = scoreDocs;
- GroupValue = groupValue;
- GroupSortValues = groupSortValues;
- }
- }
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/Lucene.Net.Grouping/Lucene.Net.Grouping.csproj
----------------------------------------------------------------------
diff --git a/Lucene.Net.Grouping/Lucene.Net.Grouping.csproj b/Lucene.Net.Grouping/Lucene.Net.Grouping.csproj
deleted file mode 100644
index 540b438..0000000
--- a/Lucene.Net.Grouping/Lucene.Net.Grouping.csproj
+++ /dev/null
@@ -1,61 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
- <Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
- <PropertyGroup>
- <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
- <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
- <ProjectGuid>{02BAB603-067D-48B1-AEDD-316849652568}</ProjectGuid>
- <OutputType>Library</OutputType>
- <AppDesignerFolder>Properties</AppDesignerFolder>
- <RootNamespace>Lucene.Net.Grouping</RootNamespace>
- <AssemblyName>Lucene.Net.Grouping</AssemblyName>
- <TargetFrameworkVersion>v4.5.1</TargetFrameworkVersion>
- <FileAlignment>512</FileAlignment>
- </PropertyGroup>
- <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
- <DebugSymbols>true</DebugSymbols>
- <DebugType>full</DebugType>
- <Optimize>false</Optimize>
- <OutputPath>bin\Debug\</OutputPath>
- <DefineConstants>DEBUG;TRACE</DefineConstants>
- <ErrorReport>prompt</ErrorReport>
- <WarningLevel>4</WarningLevel>
- </PropertyGroup>
- <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
- <DebugType>pdbonly</DebugType>
- <Optimize>true</Optimize>
- <OutputPath>bin\Release\</OutputPath>
- <DefineConstants>TRACE</DefineConstants>
- <ErrorReport>prompt</ErrorReport>
- <WarningLevel>4</WarningLevel>
- </PropertyGroup>
- <ItemGroup>
- <Reference Include="System" />
- <Reference Include="System.Core" />
- <Reference Include="System.Xml.Linq" />
- <Reference Include="System.Data.DataSetExtensions" />
- <Reference Include="Microsoft.CSharp" />
- <Reference Include="System.Data" />
- <Reference Include="System.Net.Http" />
- <Reference Include="System.Xml" />
- </ItemGroup>
- <ItemGroup>
- <Compile Include="GroupDocs.cs" />
- <Compile Include="Properties\AssemblyInfo.cs" />
- <Compile Include="TopGroups.cs" />
- </ItemGroup>
- <ItemGroup>
- <ProjectReference Include="..\src\Lucene.Net.Core\Lucene.Net.csproj">
- <Project>{5D4AD9BE-1FFB-41AB-9943-25737971BF57}</Project>
- <Name>Lucene.Net</Name>
- </ProjectReference>
- </ItemGroup>
- <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
- <!-- To modify your build process, add your task inside one of the targets below and uncomment it.
- Other similar extension points exist, see Microsoft.Common.targets.
- <Target Name="BeforeBuild">
- </Target>
- <Target Name="AfterBuild">
- </Target>
- -->
-</Project>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/Lucene.Net.Grouping/Properties/AssemblyInfo.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Grouping/Properties/AssemblyInfo.cs b/Lucene.Net.Grouping/Properties/AssemblyInfo.cs
deleted file mode 100644
index 9e6c1ce..0000000
--- a/Lucene.Net.Grouping/Properties/AssemblyInfo.cs
+++ /dev/null
@@ -1,36 +0,0 @@
-using System.Reflection;
-using System.Runtime.CompilerServices;
-using System.Runtime.InteropServices;
-
-// General Information about an assembly is controlled through the following
-// set of attributes. Change these attribute values to modify the information
-// associated with an assembly.
-[assembly: AssemblyTitle("Lucene.Net.Grouping")]
-[assembly: AssemblyDescription("")]
-[assembly: AssemblyConfiguration("")]
-[assembly: AssemblyCompany("")]
-[assembly: AssemblyProduct("Lucene.Net.Grouping")]
-[assembly: AssemblyCopyright("Copyright © 2015")]
-[assembly: AssemblyTrademark("")]
-[assembly: AssemblyCulture("")]
-
-// Setting ComVisible to false makes the types in this assembly not visible
-// to COM components. If you need to access a type in this assembly from
-// COM, set the ComVisible attribute to true on that type.
-[assembly: ComVisible(false)]
-
-// The following GUID is for the ID of the typelib if this project is exposed to COM
-[assembly: Guid("02bab603-067d-48b1-aedd-316849652568")]
-
-// Version information for an assembly consists of the following four values:
-//
-// Major Version
-// Minor Version
-// Build Number
-// Revision
-//
-// You can specify all the values or you can default the Build and Revision Numbers
-// by using the '*' as shown below:
-// [assembly: AssemblyVersion("1.0.*")]
-[assembly: AssemblyVersion("1.0.0.0")]
-[assembly: AssemblyFileVersion("1.0.0.0")]
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/Lucene.Net.Grouping/TopGroups.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Grouping/TopGroups.cs b/Lucene.Net.Grouping/TopGroups.cs
deleted file mode 100644
index 017c975..0000000
--- a/Lucene.Net.Grouping/TopGroups.cs
+++ /dev/null
@@ -1,249 +0,0 @@
-using System;
-using Lucene.Net.Search;
-
-namespace Lucene.Net.Grouping
-{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- /// <summary>
- /// Represents result returned by a grouping search.
- ///
- /// @lucene.experimental
- /// </summary>
- public class TopGroups<TGroupValueType>
- {
- /// <summary>
- /// Number of documents matching the search </summary>
- public readonly int TotalHitCount;
-
- /// <summary>
- /// Number of documents grouped into the topN groups </summary>
- public readonly int TotalGroupedHitCount;
-
- /// <summary>
- /// The total number of unique groups. If <code>null</code> this value is not computed. </summary>
- public readonly int? TotalGroupCount;
-
- /// <summary>
- /// Group results in groupSort order </summary>
- public readonly GroupDocs<TGroupValueType>[] Groups;
-
- /// <summary>
- /// How groups are sorted against each other </summary>
- public readonly SortField[] GroupSort;
-
- /// <summary>
- /// How docs are sorted within each group </summary>
- public readonly SortField[] WithinGroupSort;
-
- /// <summary>
- /// Highest score across all hits, or
- /// <code>Float.NaN</code> if scores were not computed.
- /// </summary>
- public readonly float MaxScore;
-
- public TopGroups(SortField[] groupSort, SortField[] withinGroupSort, int totalHitCount, int totalGroupedHitCount, GroupDocs<TGroupValueType>[] groups, float maxScore)
- {
- GroupSort = groupSort;
- WithinGroupSort = withinGroupSort;
- TotalHitCount = totalHitCount;
- TotalGroupedHitCount = totalGroupedHitCount;
- Groups = groups;
- TotalGroupCount = null;
- MaxScore = maxScore;
- }
-
- public TopGroups(TopGroups<TGroupValueType> oldTopGroups, int? totalGroupCount)
- {
- GroupSort = oldTopGroups.GroupSort;
- WithinGroupSort = oldTopGroups.WithinGroupSort;
- TotalHitCount = oldTopGroups.TotalHitCount;
- TotalGroupedHitCount = oldTopGroups.TotalGroupedHitCount;
- Groups = oldTopGroups.Groups;
- MaxScore = oldTopGroups.MaxScore;
- TotalGroupCount = totalGroupCount;
- }
-
- /// <summary>
- /// How the GroupDocs score (if any) should be merged. </summary>
- public enum ScoreMergeMode
- {
- /// <summary>
- /// Set score to Float.NaN
- /// </summary>
- None,
-
- /// <summary>
- /// Sum score across all shards for this group.
- /// </summary>
- Total,
-
- /// <summary>
- /// Avg score across all shards for this group.
- /// </summary>
- Avg,
- }
-
- /// <summary>
- /// Merges an array of TopGroups, for example obtained from the second-pass
- /// collector across multiple shards. Each TopGroups must have been sorted by the
- /// same groupSort and docSort, and the top groups passed to all second-pass
- /// collectors must be the same.
- ///
- /// <b>NOTE</b>: We can't always compute an exact totalGroupCount.
- /// Documents belonging to a group may occur on more than
- /// one shard and thus the merged totalGroupCount can be
- /// higher than the actual totalGroupCount. In this case the
- /// totalGroupCount represents a upper bound. If the documents
- /// of one group do only reside in one shard then the
- /// totalGroupCount is exact.
- ///
- /// <b>NOTE</b>: the topDocs in each GroupDocs is actually
- /// an instance of TopDocsAndShards
- /// </summary>
- public static TopGroups<T> Merge<T>(TopGroups<T>[] shardGroups, Sort groupSort, Sort docSort, int docOffset, int docTopN, ScoreMergeMode scoreMergeMode)
- {
- //System.out.println("TopGroups.merge");
-
- if (shardGroups.Length == 0)
- {
- return null;
- }
-
- int totalHitCount = 0;
- int totalGroupedHitCount = 0;
- // Optionally merge the totalGroupCount.
- int? totalGroupCount = null;
-
- int numGroups = shardGroups[0].Groups.Length;
- foreach (var shard in shardGroups)
- {
- if (numGroups != shard.Groups.Length)
- {
- throw new ArgumentException("number of groups differs across shards; you must pass same top groups to all shards' second-pass collector");
- }
- totalHitCount += shard.TotalHitCount;
- totalGroupedHitCount += shard.TotalGroupedHitCount;
- if (shard.TotalGroupCount != null)
- {
- if (totalGroupCount == null)
- {
- totalGroupCount = 0;
- }
-
- totalGroupCount += shard.TotalGroupCount;
- }
- }
-
- var mergedGroupDocs = new GroupDocs<T>[numGroups];
-
- TopDocs[] shardTopDocs = new TopDocs[shardGroups.Length];
- float totalMaxScore = float.MinValue;
-
- for (int groupIDX = 0; groupIDX < numGroups; groupIDX++)
- {
- T groupValue = shardGroups[0].Groups[groupIDX].GroupValue;
- //System.out.println(" merge groupValue=" + groupValue + " sortValues=" + Arrays.toString(shardGroups[0].groups[groupIDX].groupSortValues));
- float maxScore = float.MinValue;
- int totalHits = 0;
- double scoreSum = 0.0;
- for (int shardIdx = 0; shardIdx < shardGroups.Length; shardIdx++)
- {
- //System.out.println(" shard=" + shardIDX);
- TopGroups<T> shard = shardGroups[shardIdx];
- var shardGroupDocs = shard.Groups[groupIDX];
- if (groupValue == null)
- {
- if (shardGroupDocs.GroupValue != null)
- {
- throw new ArgumentException("group values differ across shards; you must pass same top groups to all shards' second-pass collector");
- }
- }
- else if (!groupValue.Equals(shardGroupDocs.GroupValue))
- {
- throw new ArgumentException("group values differ across shards; you must pass same top groups to all shards' second-pass collector");
- }
-
- /*
- for(ScoreDoc sd : shardGroupDocs.scoreDocs) {
- System.out.println(" doc=" + sd.doc);
- }
- */
-
- shardTopDocs[shardIdx] = new TopDocs(shardGroupDocs.TotalHits, shardGroupDocs.ScoreDocs, shardGroupDocs.MaxScore);
- maxScore = Math.Max(maxScore, shardGroupDocs.MaxScore);
- totalHits += shardGroupDocs.TotalHits;
- scoreSum += shardGroupDocs.Score;
- }
-
- TopDocs mergedTopDocs = TopDocs.Merge(docSort, docOffset + docTopN, shardTopDocs);
-
- // Slice;
- ScoreDoc[] mergedScoreDocs;
- if (docOffset == 0)
- {
- mergedScoreDocs = mergedTopDocs.ScoreDocs;
- }
- else if (docOffset >= mergedTopDocs.ScoreDocs.Length)
- {
- mergedScoreDocs = new ScoreDoc[0];
- }
- else
- {
- mergedScoreDocs = new ScoreDoc[mergedTopDocs.ScoreDocs.Length - docOffset];
- Array.Copy(mergedTopDocs.ScoreDocs, docOffset, mergedScoreDocs, 0, mergedTopDocs.ScoreDocs.Length - docOffset);
- }
-
- float groupScore;
- switch (scoreMergeMode)
- {
- case ScoreMergeMode.None:
- groupScore = float.NaN;
- break;
- case ScoreMergeMode.Avg:
- if (totalHits > 0)
- {
- groupScore = (float)(scoreSum / totalHits);
- }
- else
- {
- groupScore = float.NaN;
- }
- break;
- case ScoreMergeMode.Total:
- groupScore = (float)scoreSum;
- break;
- default:
- throw new ArgumentException("can't handle ScoreMergeMode " + scoreMergeMode);
- }
-
- //System.out.println("SHARDS=" + Arrays.toString(mergedTopDocs.shardIndex));
- mergedGroupDocs[groupIDX] = new GroupDocs<T>(groupScore, maxScore, totalHits, mergedScoreDocs, groupValue, shardGroups[0].Groups[groupIDX].GroupSortValues);
- totalMaxScore = Math.Max(totalMaxScore, maxScore);
- }
-
- if (totalGroupCount != null)
- {
- var result = new TopGroups<T>(groupSort.GetSort(), docSort == null ? null : docSort.GetSort(), totalHitCount, totalGroupedHitCount, mergedGroupDocs, totalMaxScore);
- return new TopGroups<T>(result, totalGroupCount);
- }
-
- return new TopGroups<T>(groupSort.GetSort(), docSort == null ? null : docSort.GetSort(), totalHitCount, totalGroupedHitCount, mergedGroupDocs, totalMaxScore);
- }
- }
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/Lucene.Net.Join/FakeScorer.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Join/FakeScorer.cs b/Lucene.Net.Join/FakeScorer.cs
deleted file mode 100644
index 42bf91b..0000000
--- a/Lucene.Net.Join/FakeScorer.cs
+++ /dev/null
@@ -1,76 +0,0 @@
-using System;
-using System.Collections.Generic;
-using Lucene.Net.Search;
-
-namespace Lucene.Net.Join
-{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- /// <summary>
- /// Passed to <see cref="Collector.Scorer"/> during join collection.
- /// </summary>
- internal sealed class FakeScorer : Scorer
- {
- internal float _score;
- internal int doc = -1;
-
- public FakeScorer() : base(null)
- {
- }
-
- public override int DocID()
- {
- return doc;
- }
-
- public override int NextDoc()
- {
- throw new NotSupportedException("FakeScorer doesn't support NextDoc()");
- }
-
- public override int Advance(int target)
- {
- throw new NotSupportedException("FakeScorer doesn't support Advance(int)");
- }
-
- public override long Cost()
- {
- return 1;
- }
-
- public override int Freq()
- {
- throw new NotSupportedException("FakeScorer doesn't support Freq()");
- }
-
- public override float Score()
- {
- return _score;
- }
-
- public override Weight Weight
- {
- get { throw new NotSupportedException(); }
- }
-
- public override ICollection<ChildScorer> Children
- {
- get { throw new NotSupportedException(); }
- }
- }
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/Lucene.Net.Join/FixedBitSetCachingWrapperFilter.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Join/FixedBitSetCachingWrapperFilter.cs b/Lucene.Net.Join/FixedBitSetCachingWrapperFilter.cs
deleted file mode 100644
index da8b0b8..0000000
--- a/Lucene.Net.Join/FixedBitSetCachingWrapperFilter.cs
+++ /dev/null
@@ -1,62 +0,0 @@
-using Lucene.Net.Index;
-using Lucene.Net.Search;
-using Lucene.Net.Util;
-
-namespace Lucene.Net.Join
-{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- /// <summary>
- /// A <see cref="CachingWrapperFilter"/> that caches sets using a <see cref="FixedBitSet"/>,
- /// as required for joins.
- /// </summary>
- public sealed class FixedBitSetCachingWrapperFilter : CachingWrapperFilter
- {
- /// <summary>
- /// Sole constructor, see <see cref="CachingWrapperFilter"/>.
- /// </summary>
- public FixedBitSetCachingWrapperFilter(Filter filter) : base(filter)
- {
- }
-
- protected override DocIdSet DocIdSetToCache(DocIdSet docIdSet, AtomicReader reader)
- {
- if (docIdSet == null)
- {
- return EMPTY_DOCIDSET;
- }
-
- if (docIdSet is FixedBitSet)
- {
- // this is different from CachingWrapperFilter: even when the DocIdSet is
- // cacheable, we convert it to a FixedBitSet since we require all the
- // cached filters to be FixedBitSets
- return docIdSet;
- }
-
- DocIdSetIterator it = docIdSet.GetIterator();
- if (it == null)
- {
- return EMPTY_DOCIDSET;
- }
- FixedBitSet copy = new FixedBitSet(reader.MaxDoc);
- copy.Or(it);
- return copy;
- }
- }
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/Lucene.Net.Join/JoinUtil.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Join/JoinUtil.cs b/Lucene.Net.Join/JoinUtil.cs
deleted file mode 100644
index 726731e..0000000
--- a/Lucene.Net.Join/JoinUtil.cs
+++ /dev/null
@@ -1,80 +0,0 @@
-using System.IO;
-using Lucene.Net.Search;
-
-namespace Lucene.Net.Join
-{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
- /// <summary>
- /// Utility for query time joining using TermsQuery and TermsCollector.
- ///
- /// @lucene.experimental
- /// </summary>
- public sealed class JoinUtil
- {
- // No instances allowed
- private JoinUtil()
- {
- }
-
- /// <summary>
- /// Method for query time joining.
- /// <p/>
- /// Execute the returned query with a <seealso cref="IndexSearcher"/> to retrieve all documents that have the same terms in the
- /// to field that match with documents matching the specified fromQuery and have the same terms in the from field.
- /// <p/>
- /// In the case a single document relates to more than one document the <code>multipleValuesPerDocument</code> option
- /// should be set to true. When the <code>multipleValuesPerDocument</code> is set to <code>true</code> only the
- /// the score from the first encountered join value originating from the 'from' side is mapped into the 'to' side.
- /// Even in the case when a second join value related to a specific document yields a higher score. Obviously this
- /// doesn't apply in the case that <seealso cref="ScoreMode.None"/> is used, since no scores are computed at all.
- /// </p>
- /// Memory considerations: During joining all unique join values are kept in memory. On top of that when the scoreMode
- /// isn't set to <seealso cref="ScoreMode.None"/> a float value per unique join value is kept in memory for computing scores.
- /// When scoreMode is set to <seealso cref="ScoreMode.Avg"/> also an additional integer value is kept in memory per unique
- /// join value.
- /// </summary>
- /// <param name="fromField"> The from field to join from </param>
- /// <param name="multipleValuesPerDocument"> Whether the from field has multiple terms per document </param>
- /// <param name="toField"> The to field to join to </param>
- /// <param name="fromQuery"> The query to match documents on the from side </param>
- /// <param name="fromSearcher"> The searcher that executed the specified fromQuery </param>
- /// <param name="scoreMode"> Instructs how scores from the fromQuery are mapped to the returned query </param>
- /// <returns>A <see cref="Query"/> instance that can be used to join documents based on the terms in the from and to field</returns>
- /// <exception cref="IOException"> If I/O related errors occur </exception>
- public static Query CreateJoinQuery(string fromField, bool multipleValuesPerDocument, string toField, Query fromQuery, IndexSearcher fromSearcher, ScoreMode scoreMode)
- {
- switch (scoreMode)
- {
- case ScoreMode.None:
- TermsCollector termsCollector = TermsCollector.Create(fromField, multipleValuesPerDocument);
- fromSearcher.Search(fromQuery, termsCollector);
- return new TermsQuery(toField, fromQuery, termsCollector.CollectorTerms);
- case ScoreMode.Total:
- case ScoreMode.Max:
- case ScoreMode.Avg:
- TermsWithScoreCollector termsWithScoreCollector = TermsWithScoreCollector.Create(fromField, multipleValuesPerDocument, scoreMode);
- fromSearcher.Search(fromQuery, termsWithScoreCollector);
- return new TermsIncludingScoreQuery(toField, multipleValuesPerDocument, termsWithScoreCollector.CollectedTerms, termsWithScoreCollector.ScoresPerTerm, fromQuery);
- default:
- throw new System.ArgumentException(string.Format("Score mode {0} isn't supported.", scoreMode));
- }
- }
- }
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/Lucene.Net.Join/Lucene.Net.Join.csproj
----------------------------------------------------------------------
diff --git a/Lucene.Net.Join/Lucene.Net.Join.csproj b/Lucene.Net.Join/Lucene.Net.Join.csproj
deleted file mode 100644
index 2222b0e..0000000
--- a/Lucene.Net.Join/Lucene.Net.Join.csproj
+++ /dev/null
@@ -1,76 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
- <Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
- <PropertyGroup>
- <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
- <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
- <ProjectGuid>{E8A339C7-FCF6-4A72-8586-56D8961D7B99}</ProjectGuid>
- <OutputType>Library</OutputType>
- <AppDesignerFolder>Properties</AppDesignerFolder>
- <RootNamespace>Lucene.Net.Join</RootNamespace>
- <AssemblyName>Lucene.Net.Join</AssemblyName>
- <TargetFrameworkVersion>v4.5.1</TargetFrameworkVersion>
- <FileAlignment>512</FileAlignment>
- </PropertyGroup>
- <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
- <DebugSymbols>true</DebugSymbols>
- <DebugType>full</DebugType>
- <Optimize>false</Optimize>
- <OutputPath>bin\Debug\</OutputPath>
- <DefineConstants>DEBUG;TRACE</DefineConstants>
- <ErrorReport>prompt</ErrorReport>
- <WarningLevel>4</WarningLevel>
- </PropertyGroup>
- <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
- <DebugType>pdbonly</DebugType>
- <Optimize>true</Optimize>
- <OutputPath>bin\Release\</OutputPath>
- <DefineConstants>TRACE</DefineConstants>
- <ErrorReport>prompt</ErrorReport>
- <WarningLevel>4</WarningLevel>
- </PropertyGroup>
- <ItemGroup>
- <Reference Include="System" />
- <Reference Include="System.Core" />
- <Reference Include="System.Xml.Linq" />
- <Reference Include="System.Data.DataSetExtensions" />
- <Reference Include="Microsoft.CSharp" />
- <Reference Include="System.Data" />
- <Reference Include="System.Net.Http" />
- <Reference Include="System.Xml" />
- </ItemGroup>
- <ItemGroup>
- <Compile Include="FakeScorer.cs" />
- <Compile Include="FixedBitSetCachingWrapperFilter.cs" />
- <Compile Include="JoinUtil.cs" />
- <Compile Include="Properties\AssemblyInfo.cs" />
- <Compile Include="ScoreMode.cs" />
- <Compile Include="TermsCollector.cs" />
- <Compile Include="TermsIncludingScoreQuery.cs" />
- <Compile Include="TermsQuery.cs" />
- <Compile Include="TermsWithScoreCollector.cs" />
- <Compile Include="ToChildBlockJoinQuery.cs" />
- <Compile Include="ToParentBlockJoinCollector.cs" />
- <Compile Include="ToParentBlockJoinFieldComparator.cs" />
- <Compile Include="ToParentBlockJoinQuery.cs" />
- <Compile Include="ToParentBlockJoinSortField.cs" />
- </ItemGroup>
- <ItemGroup>
- <ProjectReference Include="..\Lucene.Net.Grouping\Lucene.Net.Grouping.csproj">
- <Project>{02BAB603-067D-48B1-AEDD-316849652568}</Project>
- <Name>Lucene.Net.Grouping</Name>
- </ProjectReference>
- <ProjectReference Include="..\src\Lucene.Net.Core\Lucene.Net.csproj">
- <Project>{5D4AD9BE-1FFB-41AB-9943-25737971BF57}</Project>
- <Name>Lucene.Net</Name>
- </ProjectReference>
- </ItemGroup>
- <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
- <!-- To modify your build process, add your task inside one of the targets below and uncomment it.
- Other similar extension points exist, see Microsoft.Common.targets.
- <Target Name="BeforeBuild">
- </Target>
- <Target Name="AfterBuild">
- </Target>
- -->
-</Project>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/Lucene.Net.Join/Properties/AssemblyInfo.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Join/Properties/AssemblyInfo.cs b/Lucene.Net.Join/Properties/AssemblyInfo.cs
deleted file mode 100644
index 2c17c13..0000000
--- a/Lucene.Net.Join/Properties/AssemblyInfo.cs
+++ /dev/null
@@ -1,36 +0,0 @@
-using System.Reflection;
-using System.Runtime.CompilerServices;
-using System.Runtime.InteropServices;
-
-// General Information about an assembly is controlled through the following
-// set of attributes. Change these attribute values to modify the information
-// associated with an assembly.
-[assembly: AssemblyTitle("Lucene.Net.Join")]
-[assembly: AssemblyDescription("")]
-[assembly: AssemblyConfiguration("")]
-[assembly: AssemblyCompany("")]
-[assembly: AssemblyProduct("Lucene.Net.Join")]
-[assembly: AssemblyCopyright("Copyright © 2015")]
-[assembly: AssemblyTrademark("")]
-[assembly: AssemblyCulture("")]
-
-// Setting ComVisible to false makes the types in this assembly not visible
-// to COM components. If you need to access a type in this assembly from
-// COM, set the ComVisible attribute to true on that type.
-[assembly: ComVisible(false)]
-
-// The following GUID is for the ID of the typelib if this project is exposed to COM
-[assembly: Guid("e8a339c7-fcf6-4a72-8586-56d8961d7b99")]
-
-// Version information for an assembly consists of the following four values:
-//
-// Major Version
-// Minor Version
-// Build Number
-// Revision
-//
-// You can specify all the values or you can default the Build and Revision Numbers
-// by using the '*' as shown below:
-// [assembly: AssemblyVersion("1.0.*")]
-[assembly: AssemblyVersion("1.0.0.0")]
-[assembly: AssemblyFileVersion("1.0.0.0")]
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/Lucene.Net.Join/ScoreMode.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Join/ScoreMode.cs b/Lucene.Net.Join/ScoreMode.cs
deleted file mode 100644
index a5b91be..0000000
--- a/Lucene.Net.Join/ScoreMode.cs
+++ /dev/null
@@ -1,45 +0,0 @@
-namespace Lucene.Net.Join
-{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- /// <summary>
- /// How to aggregate multiple child hit scores into a single parent score.
- /// </summary>
- public enum ScoreMode
- {
- /// <summary>
- /// Do no scoring.
- /// </summary>
- None,
-
- /// <summary>
- /// Parent hit's score is the average of all child scores.
- /// </summary>
- Avg,
-
- /// <summary>
- /// Parent hit's score is the max of all child scores.
- /// </summary>
- Max,
-
- /// <summary>
- /// Parent hit's score is the sum of all child scores.
- /// </summary>
- Total
- }
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/Lucene.Net.Join/TermsCollector.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Join/TermsCollector.cs b/Lucene.Net.Join/TermsCollector.cs
deleted file mode 100644
index 2ccf1ed..0000000
--- a/Lucene.Net.Join/TermsCollector.cs
+++ /dev/null
@@ -1,127 +0,0 @@
-using Lucene.Net.Index;
-using Lucene.Net.Search;
-using Lucene.Net.Util;
-
-namespace Lucene.Net.Join
-{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- /// <summary>
- /// A collector that collects all terms from a specified field matching the query.
- ///
- /// @lucene.experimental
- /// </summary>
- internal abstract class TermsCollector : Collector
- {
- private readonly string _field;
- private readonly BytesRefHash _collectorTerms = new BytesRefHash();
-
- internal TermsCollector(string field)
- {
- _field = field;
- }
-
- public BytesRefHash CollectorTerms
- {
- get
- {
- return _collectorTerms;
- }
- }
-
- public override Scorer Scorer
- {
- set {}
- }
-
- public override bool AcceptsDocsOutOfOrder()
- {
- return true;
- }
-
- /// <summary>
- /// Chooses the right <see cref="TermsCollector"/> implementation.
- /// </summary>
- /// <param name="field">The field to collect terms for.</param>
- /// <param name="multipleValuesPerDocument">Whether the field to collect terms for has multiple values per document.</param>
- /// <returns>A <see cref="TermsCollector"/> instance.</returns>
- internal static TermsCollector Create(string field, bool multipleValuesPerDocument)
- {
- return multipleValuesPerDocument ? (TermsCollector) new MV(field) : new SV(field);
- }
-
- // impl that works with multiple values per document
- private class MV : TermsCollector
- {
- private readonly BytesRef _scratch = new BytesRef();
- private SortedSetDocValues _docTermOrds;
-
- internal MV(string field) : base(field)
- {
- }
-
- public override void Collect(int doc)
- {
- _docTermOrds.Document = doc;
- long ord;
- while ((ord = _docTermOrds.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
- {
- _docTermOrds.LookupOrd(ord, _scratch);
- _collectorTerms.Add(_scratch);
- }
- }
-
- public override AtomicReaderContext NextReader
- {
- set { _docTermOrds = FieldCache.DEFAULT.GetDocTermOrds(value.AtomicReader, _field); }
- }
-
- public override bool AcceptsDocsOutOfOrder()
- {
- throw new System.NotImplementedException();
- }
- }
-
- // impl that works with single value per document
- private class SV : TermsCollector
- {
- private readonly BytesRef _spare = new BytesRef();
- private BinaryDocValues _fromDocTerms;
-
- internal SV(string field) : base(field)
- {
- }
-
- public override void Collect(int doc)
- {
- _fromDocTerms.Get(doc, _spare);
- _collectorTerms.Add(_spare);
- }
-
- public override AtomicReaderContext NextReader
- {
- set { _fromDocTerms = FieldCache.DEFAULT.GetTerms(value.AtomicReader, _field, false); }
- }
-
- public override bool AcceptsDocsOutOfOrder()
- {
- return base.AcceptsDocsOutOfOrder();
- }
- }
- }
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/Lucene.Net.Join/TermsIncludingScoreQuery.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Join/TermsIncludingScoreQuery.cs b/Lucene.Net.Join/TermsIncludingScoreQuery.cs
deleted file mode 100644
index 9f3befc..0000000
--- a/Lucene.Net.Join/TermsIncludingScoreQuery.cs
+++ /dev/null
@@ -1,472 +0,0 @@
-using System.Collections.Generic;
-using Lucene.Net.Index;
-using Lucene.Net.Search;
-using Lucene.Net.Util;
-
-namespace Lucene.Net.Join
-{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- internal class TermsIncludingScoreQuery : Query
- {
- private readonly string _field;
- private readonly bool _multipleValuesPerDocument;
- private readonly BytesRefHash _terms;
- private readonly float[] _scores;
- private readonly int[] _ords;
- private readonly Query _originalQuery;
- private readonly Query _unwrittenOriginalQuery;
-
- internal TermsIncludingScoreQuery(string field, bool multipleValuesPerDocument, BytesRefHash terms,
- float[] scores, Query originalQuery)
- {
- _field = field;
- _multipleValuesPerDocument = multipleValuesPerDocument;
- _terms = terms;
- _scores = scores;
- _originalQuery = originalQuery;
- _ords = terms.Sort(BytesRef.UTF8SortedAsUnicodeComparer);
- _unwrittenOriginalQuery = originalQuery;
- }
-
- private TermsIncludingScoreQuery(string field, bool multipleValuesPerDocument, BytesRefHash terms,
- float[] scores, int[] ords, Query originalQuery, Query unwrittenOriginalQuery)
- {
- _field = field;
- _multipleValuesPerDocument = multipleValuesPerDocument;
- _terms = terms;
- _scores = scores;
- _originalQuery = originalQuery;
- _ords = ords;
- _unwrittenOriginalQuery = unwrittenOriginalQuery;
- }
-
- public override string ToString(string @string)
- {
- return string.Format("TermsIncludingScoreQuery{{field={0};originalQuery={1}}}", _field,
- _unwrittenOriginalQuery);
- }
-
- public override void ExtractTerms(ISet<Term> terms)
- {
- _originalQuery.ExtractTerms(terms);
- }
-
- public override Query Rewrite(IndexReader reader)
- {
- Query originalQueryRewrite = _originalQuery.Rewrite(reader);
- if (originalQueryRewrite != _originalQuery)
- {
- Query rewritten = new TermsIncludingScoreQuery(_field, _multipleValuesPerDocument, _terms, _scores,
- _ords, originalQueryRewrite, _originalQuery);
- rewritten.Boost = Boost;
- return rewritten;
- }
-
- return this;
- }
-
- protected bool Equals(TermsIncludingScoreQuery other)
- {
- return base.Equals(other) && string.Equals(_field, other._field) &&
- Equals(_unwrittenOriginalQuery, other._unwrittenOriginalQuery);
- }
-
- public override bool Equals(object obj)
- {
- if (ReferenceEquals(null, obj)) return false;
- if (ReferenceEquals(this, obj)) return true;
- if (obj.GetType() != GetType()) return false;
- return Equals((TermsIncludingScoreQuery) obj);
- }
-
- public override int GetHashCode()
- {
- unchecked
- {
- int hashCode = base.GetHashCode();
- hashCode = (hashCode*397) ^ (_field != null ? _field.GetHashCode() : 0);
- hashCode = (hashCode*397) ^
- (_unwrittenOriginalQuery != null ? _unwrittenOriginalQuery.GetHashCode() : 0);
- return hashCode;
- }
- }
-
- public override Weight CreateWeight(IndexSearcher searcher)
- {
- Weight originalWeight = _originalQuery.CreateWeight(searcher);
- return new WeightAnonymousInnerClassHelper(this, originalWeight);
- }
-
- private class WeightAnonymousInnerClassHelper : Weight
- {
- private readonly TermsIncludingScoreQuery outerInstance;
-
- private Weight originalWeight;
-
- public WeightAnonymousInnerClassHelper(TermsIncludingScoreQuery outerInstance, Weight originalWeight)
- {
- this.outerInstance = outerInstance;
- this.originalWeight = originalWeight;
- }
-
-
- private TermsEnum segmentTermsEnum;
-
- public override Explanation Explain(AtomicReaderContext context, int doc)
- {
- SVInnerScorer scorer = (SVInnerScorer) BulkScorer(context, false, null);
- if (scorer != null)
- {
- return scorer.Explain(doc);
- }
- return new ComplexExplanation(false, 0.0f, "Not a match");
- }
-
- public override bool ScoresDocsOutOfOrder()
- {
- // We have optimized impls below if we are allowed
- // to score out-of-order:
- return true;
- }
-
- public override Query Query
- {
- get { return outerInstance; }
- }
-
- public override float ValueForNormalization
- {
- get { return originalWeight.ValueForNormalization*outerInstance.Boost*outerInstance.Boost; }
- }
-
- public override void Normalize(float norm, float topLevelBoost)
- {
- originalWeight.Normalize(norm, topLevelBoost*outerInstance.Boost);
- }
-
- public override Scorer Scorer(AtomicReaderContext context, Bits acceptDocs)
- {
- Terms terms = context.AtomicReader.Terms(outerInstance._field);
- if (terms == null)
- {
- return null;
- }
-
- // what is the runtime...seems ok?
- long cost = context.AtomicReader.MaxDoc * terms.Size();
-
- segmentTermsEnum = terms.Iterator(segmentTermsEnum);
- if (outerInstance._multipleValuesPerDocument)
- {
- return new MVInOrderScorer(outerInstance, this, acceptDocs, segmentTermsEnum, context.AtomicReader.MaxDoc, cost);
- }
-
- return new SVInOrderScorer(outerInstance, this, acceptDocs, segmentTermsEnum, context.AtomicReader.MaxDoc, cost);
- }
-
- public override BulkScorer BulkScorer(AtomicReaderContext context, bool scoreDocsInOrder, Bits acceptDocs)
- {
- if (scoreDocsInOrder)
- {
- return base.BulkScorer(context, scoreDocsInOrder, acceptDocs);
- }
-
- Terms terms = context.AtomicReader.Terms(outerInstance._field);
- if (terms == null)
- {
- return null;
- }
- // what is the runtime...seems ok?
- long cost = context.AtomicReader.MaxDoc * terms.Size();
-
- segmentTermsEnum = terms.Iterator(segmentTermsEnum);
- // Optimized impls that take advantage of docs
- // being allowed to be out of order:
- if (outerInstance._multipleValuesPerDocument)
- {
- return new MVInnerScorer(outerInstance, this, acceptDocs, segmentTermsEnum, context.AtomicReader.MaxDoc, cost);
- }
-
- return new SVInnerScorer(outerInstance, this, acceptDocs, segmentTermsEnum, cost);
- }
- }
-
- // This impl assumes that the 'join' values are used uniquely per doc per field. Used for one to many relations.
- internal class SVInnerScorer : BulkScorer
- {
- private readonly TermsIncludingScoreQuery outerInstance;
-
- private readonly BytesRef _spare = new BytesRef();
- private readonly Bits _acceptDocs;
- private readonly TermsEnum _termsEnum;
- private readonly long _cost;
-
- private int _upto;
- internal DocsEnum DocsEnum;
- private DocsEnum _reuse;
- private int _scoreUpto;
- private int _doc;
-
- internal SVInnerScorer(TermsIncludingScoreQuery outerInstance, Weight weight, Bits acceptDocs, TermsEnum termsEnum, long cost)
- {
- this.outerInstance = outerInstance;
- _acceptDocs = acceptDocs;
- _termsEnum = termsEnum;
- _cost = cost;
- _doc = -1;
- }
-
- public override bool Score(Collector collector, int max)
- {
- FakeScorer fakeScorer = new FakeScorer();
- collector.Scorer = fakeScorer;
- if (_doc == -1)
- {
- _doc = NextDocOutOfOrder();
- }
- while (_doc < max)
- {
- fakeScorer.doc = _doc;
- fakeScorer._score = outerInstance._scores[outerInstance._ords[_scoreUpto]];
- collector.Collect(_doc);
- _doc = NextDocOutOfOrder();
- }
-
- return _doc != DocIdSetIterator.NO_MORE_DOCS;
- }
-
- private int NextDocOutOfOrder()
- {
- while (true)
- {
- if (DocsEnum != null)
- {
- int docId = DocsEnumNextDoc();
- if (docId == DocIdSetIterator.NO_MORE_DOCS)
- {
- DocsEnum = null;
- }
- else
- {
- return _doc = docId;
- }
- }
-
- if (_upto == outerInstance._terms.Size())
- {
- return _doc = DocIdSetIterator.NO_MORE_DOCS;
- }
-
- _scoreUpto = _upto;
- if (_termsEnum.SeekExact(outerInstance._terms.Get(outerInstance._ords[_upto++], _spare)))
- {
- DocsEnum = _reuse = _termsEnum.Docs(_acceptDocs, _reuse, DocsEnum.FLAG_NONE);
- }
- }
- }
-
- protected virtual int DocsEnumNextDoc()
- {
- return DocsEnum.NextDoc();
- }
-
- internal Explanation Explain(int target)
- {
- int docId;
- do
- {
- docId = NextDocOutOfOrder();
- if (docId < target)
- {
- int tempDocId = DocsEnum.Advance(target);
- if (tempDocId == target)
- {
- docId = tempDocId;
- break;
- }
- }
- else if (docId == target)
- {
- break;
- }
- DocsEnum = null; // goto the next ord.
- } while (docId != DocIdSetIterator.NO_MORE_DOCS);
-
- return new ComplexExplanation(true, outerInstance._scores[outerInstance._ords[_scoreUpto]],
- "Score based on join value " + _termsEnum.Term().Utf8ToString());
- }
- }
-
- // This impl that tracks whether a docid has already been emitted. This check makes sure that docs aren't emitted
- // twice for different join values. This means that the first encountered join value determines the score of a document
- // even if other join values yield a higher score.
- internal class MVInnerScorer : SVInnerScorer
- {
- private readonly TermsIncludingScoreQuery outerInstance;
-
-
- internal readonly FixedBitSet alreadyEmittedDocs;
-
- internal MVInnerScorer(TermsIncludingScoreQuery outerInstance, Weight weight, Bits acceptDocs,
- TermsEnum termsEnum, int maxDoc, long cost) : base(outerInstance, weight, acceptDocs, termsEnum, cost)
- {
- this.outerInstance = outerInstance;
- alreadyEmittedDocs = new FixedBitSet(maxDoc);
- }
-
- protected override int DocsEnumNextDoc()
- {
- while (true)
- {
- int docId = DocsEnum.NextDoc();
- if (docId == DocIdSetIterator.NO_MORE_DOCS)
- {
- return docId;
- }
- if (!alreadyEmittedDocs.GetAndSet(docId))
- {
- return docId; //if it wasn't previously set, return it
- }
- }
- }
- }
-
- internal class SVInOrderScorer : Scorer
- {
- private readonly TermsIncludingScoreQuery outerInstance;
-
-
- internal readonly DocIdSetIterator matchingDocsIterator;
- internal readonly float[] scores;
- internal readonly long cost_Renamed;
-
- internal int currentDoc = -1;
-
- internal SVInOrderScorer(TermsIncludingScoreQuery outerInstance, Weight weight, Bits acceptDocs,
- TermsEnum termsEnum, int maxDoc, long cost) : base(weight)
- {
- this.outerInstance = outerInstance;
- FixedBitSet matchingDocs = new FixedBitSet(maxDoc);
- scores = new float[maxDoc];
- FillDocsAndScores(matchingDocs, acceptDocs, termsEnum);
- matchingDocsIterator = matchingDocs.GetIterator();
- cost_Renamed = cost;
- }
-
- protected virtual void FillDocsAndScores(FixedBitSet matchingDocs, Bits acceptDocs,
- TermsEnum termsEnum)
- {
- BytesRef spare = new BytesRef();
- DocsEnum docsEnum = null;
- for (int i = 0; i < outerInstance._terms.Size(); i++)
- {
- if (termsEnum.SeekExact(outerInstance._terms.Get(outerInstance._ords[i], spare)))
- {
- docsEnum = termsEnum.Docs(acceptDocs, docsEnum, FLAG_NONE);
- float score = outerInstance._scores[outerInstance._ords[i]];
- for (int doc = docsEnum.NextDoc();
- doc != NO_MORE_DOCS;
- doc = docsEnum.NextDoc())
- {
- matchingDocs.Set(doc);
- // In the case the same doc is also related to a another doc, a score might be overwritten. I think this
- // can only happen in a many-to-many relation
- scores[doc] = score;
- }
- }
- }
- }
-
- public override float Score()
- {
- return scores[currentDoc];
- }
-
- public override int Freq()
- {
- return 1;
- }
-
- public override int DocID()
- {
- return currentDoc;
- }
-
- public override int NextDoc()
- {
- return currentDoc = matchingDocsIterator.NextDoc();
- }
-
- public override int Advance(int target)
- {
- return currentDoc = matchingDocsIterator.Advance(target);
- }
-
- public override long Cost()
- {
- return cost_Renamed;
- }
- }
-
- // This scorer deals with the fact that a document can have more than one score from multiple related documents.
- internal class MVInOrderScorer : SVInOrderScorer
- {
- private readonly TermsIncludingScoreQuery outerInstance;
-
-
- internal MVInOrderScorer(TermsIncludingScoreQuery outerInstance, Weight weight, Bits acceptDocs,
- TermsEnum termsEnum, int maxDoc, long cost)
- : base(outerInstance, weight, acceptDocs, termsEnum, maxDoc, cost)
- {
- this.outerInstance = outerInstance;
- }
-
- protected override void FillDocsAndScores(FixedBitSet matchingDocs, Bits acceptDocs,
- TermsEnum termsEnum)
- {
- BytesRef spare = new BytesRef();
- DocsEnum docsEnum = null;
- for (int i = 0; i < outerInstance._terms.Size(); i++)
- {
- if (termsEnum.SeekExact(outerInstance._terms.Get(outerInstance._ords[i], spare)))
- {
- docsEnum = termsEnum.Docs(acceptDocs, docsEnum, FLAG_NONE);
- float score = outerInstance._scores[outerInstance._ords[i]];
- for (int doc = docsEnum.NextDoc();
- doc != NO_MORE_DOCS;
- doc = docsEnum.NextDoc())
- {
- // I prefer this:
- /*if (scores[doc] < score) {
- scores[doc] = score;
- matchingDocs.set(doc);
- }*/
- // But this behaves the same as MVInnerScorer and only then the tests will pass:
- if (!matchingDocs.Get(doc))
- {
- scores[doc] = score;
- matchingDocs.Set(doc);
- }
- }
- }
- }
- }
- }
- }
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/Lucene.Net.Join/TermsQuery.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Join/TermsQuery.cs b/Lucene.Net.Join/TermsQuery.cs
deleted file mode 100644
index 2d5ccf8..0000000
--- a/Lucene.Net.Join/TermsQuery.cs
+++ /dev/null
@@ -1,147 +0,0 @@
-using System.Collections.Generic;
-using Lucene.Net.Index;
-using Lucene.Net.Search;
-using Lucene.Net.Util;
-
-namespace Lucene.Net.Join
-{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- /// <summary>
- /// A query that has an array of terms from a specific field. This query will match documents have one or more terms in
- /// the specified field that match with the terms specified in the array.
- ///
- /// @lucene.experimental
- /// </summary>
- internal class TermsQuery : MultiTermQuery
- {
- private readonly BytesRefHash _terms;
- private readonly int[] _ords;
- private readonly Query _fromQuery; // Used for equals() only
-
- /// <summary>
- ///
- /// </summary>
- /// <param name="field">The field that should contain terms that are specified in the previous parameter.</param>
- /// <param name="fromQuery"></param>
- /// <param name="terms">The terms that matching documents should have. The terms must be sorted by natural order.</param>
- internal TermsQuery(string field, Query fromQuery, BytesRefHash terms) : base(field)
- {
- _fromQuery = fromQuery;
- _terms = terms;
- _ords = terms.Sort(BytesRef.UTF8SortedAsUnicodeComparer);
- }
-
- public override TermsEnum GetTermsEnum(Terms terms, AttributeSource atts)
- {
- if (_terms.Size() == 0)
- {
- return TermsEnum.EMPTY;
- }
-
- return new SeekingTermSetTermsEnum(terms.Iterator(null), _terms, _ords);
-
- }
-
- public override string ToString(string field)
- {
- return string.Format("TermsQuery{{field={0}}}", field);
- }
-
- private class SeekingTermSetTermsEnum : FilteredTermsEnum
- {
- private readonly BytesRefHash Terms;
- private readonly int[] Ords;
- private readonly int _lastElement;
-
- private readonly BytesRef _lastTerm;
- private readonly BytesRef _spare = new BytesRef();
- private readonly IComparer<BytesRef> _comparator;
-
- private BytesRef _seekTerm;
- private int _upto;
-
- internal SeekingTermSetTermsEnum(TermsEnum tenum, BytesRefHash terms, int[] ords) : base(tenum)
- {
- Terms = terms;
- Ords = ords;
- _comparator = BytesRef.UTF8SortedAsUnicodeComparer;
- _lastElement = terms.Size() - 1;
- _lastTerm = terms.Get(ords[_lastElement], new BytesRef());
- _seekTerm = terms.Get(ords[_upto], _spare);
- }
-
-
-
- protected override BytesRef NextSeekTerm(BytesRef currentTerm)
- {
- BytesRef temp = _seekTerm;
- _seekTerm = null;
- return temp;
- }
-
- protected override AcceptStatus Accept(BytesRef term)
- {
- if (_comparator.Compare(term, _lastTerm) > 0)
- {
- return AcceptStatus.END;
- }
-
- BytesRef currentTerm = Terms.Get(Ords[_upto], _spare);
- if (_comparator.Compare(term, currentTerm) == 0)
- {
- if (_upto == _lastElement)
- {
- return AcceptStatus.YES;
- }
-
- _seekTerm = Terms.Get(Ords[++_upto], _spare);
- return AcceptStatus.YES_AND_SEEK;
- }
-
- if (_upto == _lastElement)
- {
- return AcceptStatus.NO;
- } // Our current term doesn't match the the given term.
-
- int cmp;
- do // We maybe are behind the given term by more than one step. Keep incrementing till we're the same or higher.
- {
- if (_upto == _lastElement)
- {
- return AcceptStatus.NO;
- }
- // typically the terms dict is a superset of query's terms so it's unusual that we have to skip many of
- // our terms so we don't do a binary search here
- _seekTerm = Terms.Get(Ords[++_upto], _spare);
- } while ((cmp = _comparator.Compare(_seekTerm, term)) < 0);
- if (cmp == 0)
- {
- if (_upto == _lastElement)
- {
- return AcceptStatus.YES;
- }
- _seekTerm = Terms.Get(Ords[++_upto], _spare);
- return AcceptStatus.YES_AND_SEEK;
- }
-
- return AcceptStatus.NO_AND_SEEK;
- }
- }
- }
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/Lucene.Net.Join/TermsWithScoreCollector.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Join/TermsWithScoreCollector.cs b/Lucene.Net.Join/TermsWithScoreCollector.cs
deleted file mode 100644
index e823293..0000000
--- a/Lucene.Net.Join/TermsWithScoreCollector.cs
+++ /dev/null
@@ -1,333 +0,0 @@
-using System;
-using Lucene.Net.Index;
-using Lucene.Net.Search;
-using Lucene.Net.Util;
-
-namespace Lucene.Net.Join
-{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
- internal abstract class TermsWithScoreCollector : Collector
- {
- private const int InitialArraySize = 256;
-
- private readonly string _field;
- private readonly BytesRefHash _collectedTerms = new BytesRefHash();
- private readonly ScoreMode _scoreMode;
-
- private Scorer _scorer;
- private float[] _scoreSums = new float[InitialArraySize];
-
- internal TermsWithScoreCollector(string field, ScoreMode scoreMode)
- {
- this._field = field;
- this._scoreMode = scoreMode;
- }
-
- public BytesRefHash CollectedTerms
- {
- get
- {
- return _collectedTerms;
- }
- }
-
- public virtual float[] ScoresPerTerm
- {
- get
- {
- return _scoreSums;
- }
- }
-
- //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
- //ORIGINAL LINE: @Override public void setScorer(org.apache.lucene.search.Scorer scorer) throws java.io.IOException
- public override Scorer Scorer
- {
- set
- {
- _scorer = value;
- }
- }
-
- public override bool AcceptsDocsOutOfOrder()
- {
- return true;
- }
-
- /// <summary>
- /// Chooses the right <seealso cref="TermsWithScoreCollector"/> implementation.
- /// </summary>
- /// <param name="field">The field to collect terms for.</param>
- /// <param name="multipleValuesPerDocument">Whether the field to collect terms for has multiple values per document.</param>
- /// <returns>A <see cref="TermsWithScoreCollector"/> instance</returns>
- internal static TermsWithScoreCollector Create(string field, bool multipleValuesPerDocument, ScoreMode scoreMode)
- {
- if (multipleValuesPerDocument)
- {
- switch (scoreMode)
- {
- case ScoreMode.Avg:
- return new Mv.Avg(field);
- default:
- return new Mv(field, scoreMode);
- }
- }
-
- switch (scoreMode)
- {
- case ScoreMode.Avg:
- return new Sv.Avg(field);
- default:
- return new Sv(field, scoreMode);
- }
- }
-
- // impl that works with single value per document
- internal class Sv : TermsWithScoreCollector
- {
- private readonly BytesRef _spare = new BytesRef();
- private BinaryDocValues _fromDocTerms;
-
- internal Sv(string field, ScoreMode scoreMode) : base(field, scoreMode)
- {
- }
-
- public override void Collect(int doc)
- {
- _fromDocTerms.Get(doc, _spare);
- int ord = _collectedTerms.Add(_spare);
- if (ord < 0)
- {
- ord = -ord - 1;
- }
- else
- {
- if (ord >= _scoreSums.Length)
- {
- _scoreSums = ArrayUtil.Grow(_scoreSums);
- }
- }
-
- float current = _scorer.Score();
- float existing = _scoreSums[ord];
- if (existing.CompareTo(0.0f) == 0)
- {
- _scoreSums[ord] = current;
- }
- else
- {
- switch (_scoreMode)
- {
- case ScoreMode.Total:
- _scoreSums[ord] = _scoreSums[ord] + current;
- break;
- case ScoreMode.Max:
- if (current > existing)
- {
- _scoreSums[ord] = current;
- }
- break;
- }
- }
- }
-
- public override AtomicReaderContext NextReader
- {
- set
- {
- _fromDocTerms = FieldCache.DEFAULT.GetTerms(value.AtomicReader, _field, false);
- }
- }
-
- public override bool AcceptsDocsOutOfOrder()
- {
- return base.AcceptsDocsOutOfOrder();
- }
-
- internal class Avg : Sv
- {
- private int[] _scoreCounts = new int[InitialArraySize];
-
- internal Avg(string field) : base(field, ScoreMode.Avg)
- {
- }
-
- public override void Collect(int doc)
- {
- _fromDocTerms.Get(doc, _spare);
- int ord = _collectedTerms.Add(_spare);
- if (ord < 0)
- {
- ord = -ord - 1;
- }
- else
- {
- if (ord >= _scoreSums.Length)
- {
- _scoreSums = ArrayUtil.Grow(_scoreSums);
- _scoreCounts = ArrayUtil.Grow(_scoreCounts);
- }
- }
-
- float current = _scorer.Score();
- float existing = _scoreSums[ord];
- if (existing.CompareTo(0.0f) == 0)
- {
- _scoreSums[ord] = current;
- _scoreCounts[ord] = 1;
- }
- else
- {
- _scoreSums[ord] = _scoreSums[ord] + current;
- _scoreCounts[ord]++;
- }
- }
-
- public override float[] ScoresPerTerm
- {
- get
- {
- if (_scoreCounts != null)
- {
- for (int i = 0; i < _scoreCounts.Length; i++)
- {
- _scoreSums[i] = _scoreSums[i] / _scoreCounts[i];
- }
- _scoreCounts = null;
- }
- return _scoreSums;
- }
- }
- }
- }
-
- // impl that works with multiple values per document
- internal class Mv : TermsWithScoreCollector
- {
- private SortedSetDocValues _fromDocTermOrds;
- private readonly BytesRef _scratch = new BytesRef();
-
- internal Mv(string field, ScoreMode scoreMode) : base(field, scoreMode)
- {
- }
-
- public override void Collect(int doc)
- {
- _fromDocTermOrds.Document = doc;
- long ord;
- while ((ord = _fromDocTermOrds.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
- {
- _fromDocTermOrds.LookupOrd(ord, _scratch);
-
- int termId = _collectedTerms.Add(_scratch);
- if (termId < 0)
- {
- termId = -termId - 1;
- }
- else
- {
- if (termId >= _scoreSums.Length)
- {
- _scoreSums = ArrayUtil.Grow(_scoreSums);
- }
- }
-
- switch (_scoreMode)
- {
- case ScoreMode.Total:
- _scoreSums[termId] += _scorer.Score();
- break;
- case ScoreMode.Max:
- _scoreSums[termId] = Math.Max(_scoreSums[termId], _scorer.Score());
- break;
- }
- }
- }
-
- //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
- //ORIGINAL LINE: @Override public void setNextReader(org.apache.lucene.index.AtomicReaderContext context) throws java.io.IOException
- public override AtomicReaderContext NextReader
- {
- set
- {
- _fromDocTermOrds = FieldCache.DEFAULT.GetDocTermOrds(value.AtomicReader, _field);
- }
- }
-
- public override bool AcceptsDocsOutOfOrder()
- {
- throw new NotImplementedException();
- }
-
- internal class Avg : Mv
- {
- private int[] _scoreCounts = new int[InitialArraySize];
-
- internal Avg(string field) : base(field, ScoreMode.Avg)
- {
- }
-
- public override void Collect(int doc)
- {
- _fromDocTermOrds.Document = doc;
- long ord;
- while ((ord = _fromDocTermOrds.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
- {
- _fromDocTermOrds.LookupOrd(ord, _scratch);
-
- int termId = _collectedTerms.Add(_scratch);
- if (termId < 0)
- {
- termId = -termId - 1;
- }
- else
- {
- if (termId >= _scoreSums.Length)
- {
- _scoreSums = ArrayUtil.Grow(_scoreSums);
- _scoreCounts = ArrayUtil.Grow(_scoreCounts);
- }
- }
-
- _scoreSums[termId] += _scorer.Score();
- _scoreCounts[termId]++;
- }
- }
-
- public override float[] ScoresPerTerm
- {
- get
- {
- if (_scoreCounts != null)
- {
- for (int i = 0; i < _scoreCounts.Length; i++)
- {
- _scoreSums[i] = _scoreSums[i] / _scoreCounts[i];
- }
- _scoreCounts = null;
- }
- return _scoreSums;
- }
- }
- }
- }
-
- }
-}
\ No newline at end of file
[12/17] lucenenet git commit: Lucene.Net.Join tests now passing
Posted by sy...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/src/Lucene.Net.Grouping/TopGroups.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Grouping/TopGroups.cs b/src/Lucene.Net.Grouping/TopGroups.cs
new file mode 100644
index 0000000..017c975
--- /dev/null
+++ b/src/Lucene.Net.Grouping/TopGroups.cs
@@ -0,0 +1,249 @@
+using System;
+using Lucene.Net.Search;
+
+namespace Lucene.Net.Grouping
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Represents result returned by a grouping search.
+ ///
+ /// @lucene.experimental
+ /// </summary>
+ public class TopGroups<TGroupValueType>
+ {
+ /// <summary>
+ /// Number of documents matching the search </summary>
+ public readonly int TotalHitCount;
+
+ /// <summary>
+ /// Number of documents grouped into the topN groups </summary>
+ public readonly int TotalGroupedHitCount;
+
+ /// <summary>
+ /// The total number of unique groups. If <code>null</code> this value is not computed. </summary>
+ public readonly int? TotalGroupCount;
+
+ /// <summary>
+ /// Group results in groupSort order </summary>
+ public readonly GroupDocs<TGroupValueType>[] Groups;
+
+ /// <summary>
+ /// How groups are sorted against each other </summary>
+ public readonly SortField[] GroupSort;
+
+ /// <summary>
+ /// How docs are sorted within each group </summary>
+ public readonly SortField[] WithinGroupSort;
+
+ /// <summary>
+ /// Highest score across all hits, or
+ /// <code>Float.NaN</code> if scores were not computed.
+ /// </summary>
+ public readonly float MaxScore;
+
+ public TopGroups(SortField[] groupSort, SortField[] withinGroupSort, int totalHitCount, int totalGroupedHitCount, GroupDocs<TGroupValueType>[] groups, float maxScore)
+ {
+ GroupSort = groupSort;
+ WithinGroupSort = withinGroupSort;
+ TotalHitCount = totalHitCount;
+ TotalGroupedHitCount = totalGroupedHitCount;
+ Groups = groups;
+ TotalGroupCount = null;
+ MaxScore = maxScore;
+ }
+
+ public TopGroups(TopGroups<TGroupValueType> oldTopGroups, int? totalGroupCount)
+ {
+ GroupSort = oldTopGroups.GroupSort;
+ WithinGroupSort = oldTopGroups.WithinGroupSort;
+ TotalHitCount = oldTopGroups.TotalHitCount;
+ TotalGroupedHitCount = oldTopGroups.TotalGroupedHitCount;
+ Groups = oldTopGroups.Groups;
+ MaxScore = oldTopGroups.MaxScore;
+ TotalGroupCount = totalGroupCount;
+ }
+
+ /// <summary>
+ /// How the GroupDocs score (if any) should be merged. </summary>
+ public enum ScoreMergeMode
+ {
+ /// <summary>
+ /// Set score to Float.NaN
+ /// </summary>
+ None,
+
+ /// <summary>
+ /// Sum score across all shards for this group.
+ /// </summary>
+ Total,
+
+ /// <summary>
+ /// Avg score across all shards for this group.
+ /// </summary>
+ Avg,
+ }
+
+ /// <summary>
+ /// Merges an array of TopGroups, for example obtained from the second-pass
+ /// collector across multiple shards. Each TopGroups must have been sorted by the
+ /// same groupSort and docSort, and the top groups passed to all second-pass
+ /// collectors must be the same.
+ ///
+ /// <b>NOTE</b>: We can't always compute an exact totalGroupCount.
+ /// Documents belonging to a group may occur on more than
+ /// one shard and thus the merged totalGroupCount can be
+ /// higher than the actual totalGroupCount. In this case the
+ /// totalGroupCount represents a upper bound. If the documents
+ /// of one group do only reside in one shard then the
+ /// totalGroupCount is exact.
+ ///
+ /// <b>NOTE</b>: the topDocs in each GroupDocs is actually
+ /// an instance of TopDocsAndShards
+ /// </summary>
+ public static TopGroups<T> Merge<T>(TopGroups<T>[] shardGroups, Sort groupSort, Sort docSort, int docOffset, int docTopN, ScoreMergeMode scoreMergeMode)
+ {
+ //System.out.println("TopGroups.merge");
+
+ if (shardGroups.Length == 0)
+ {
+ return null;
+ }
+
+ int totalHitCount = 0;
+ int totalGroupedHitCount = 0;
+ // Optionally merge the totalGroupCount.
+ int? totalGroupCount = null;
+
+ int numGroups = shardGroups[0].Groups.Length;
+ foreach (var shard in shardGroups)
+ {
+ if (numGroups != shard.Groups.Length)
+ {
+ throw new ArgumentException("number of groups differs across shards; you must pass same top groups to all shards' second-pass collector");
+ }
+ totalHitCount += shard.TotalHitCount;
+ totalGroupedHitCount += shard.TotalGroupedHitCount;
+ if (shard.TotalGroupCount != null)
+ {
+ if (totalGroupCount == null)
+ {
+ totalGroupCount = 0;
+ }
+
+ totalGroupCount += shard.TotalGroupCount;
+ }
+ }
+
+ var mergedGroupDocs = new GroupDocs<T>[numGroups];
+
+ TopDocs[] shardTopDocs = new TopDocs[shardGroups.Length];
+ float totalMaxScore = float.MinValue;
+
+ for (int groupIDX = 0; groupIDX < numGroups; groupIDX++)
+ {
+ T groupValue = shardGroups[0].Groups[groupIDX].GroupValue;
+ //System.out.println(" merge groupValue=" + groupValue + " sortValues=" + Arrays.toString(shardGroups[0].groups[groupIDX].groupSortValues));
+ float maxScore = float.MinValue;
+ int totalHits = 0;
+ double scoreSum = 0.0;
+ for (int shardIdx = 0; shardIdx < shardGroups.Length; shardIdx++)
+ {
+ //System.out.println(" shard=" + shardIDX);
+ TopGroups<T> shard = shardGroups[shardIdx];
+ var shardGroupDocs = shard.Groups[groupIDX];
+ if (groupValue == null)
+ {
+ if (shardGroupDocs.GroupValue != null)
+ {
+ throw new ArgumentException("group values differ across shards; you must pass same top groups to all shards' second-pass collector");
+ }
+ }
+ else if (!groupValue.Equals(shardGroupDocs.GroupValue))
+ {
+ throw new ArgumentException("group values differ across shards; you must pass same top groups to all shards' second-pass collector");
+ }
+
+ /*
+ for(ScoreDoc sd : shardGroupDocs.scoreDocs) {
+ System.out.println(" doc=" + sd.doc);
+ }
+ */
+
+ shardTopDocs[shardIdx] = new TopDocs(shardGroupDocs.TotalHits, shardGroupDocs.ScoreDocs, shardGroupDocs.MaxScore);
+ maxScore = Math.Max(maxScore, shardGroupDocs.MaxScore);
+ totalHits += shardGroupDocs.TotalHits;
+ scoreSum += shardGroupDocs.Score;
+ }
+
+ TopDocs mergedTopDocs = TopDocs.Merge(docSort, docOffset + docTopN, shardTopDocs);
+
+ // Slice;
+ ScoreDoc[] mergedScoreDocs;
+ if (docOffset == 0)
+ {
+ mergedScoreDocs = mergedTopDocs.ScoreDocs;
+ }
+ else if (docOffset >= mergedTopDocs.ScoreDocs.Length)
+ {
+ mergedScoreDocs = new ScoreDoc[0];
+ }
+ else
+ {
+ mergedScoreDocs = new ScoreDoc[mergedTopDocs.ScoreDocs.Length - docOffset];
+ Array.Copy(mergedTopDocs.ScoreDocs, docOffset, mergedScoreDocs, 0, mergedTopDocs.ScoreDocs.Length - docOffset);
+ }
+
+ float groupScore;
+ switch (scoreMergeMode)
+ {
+ case ScoreMergeMode.None:
+ groupScore = float.NaN;
+ break;
+ case ScoreMergeMode.Avg:
+ if (totalHits > 0)
+ {
+ groupScore = (float)(scoreSum / totalHits);
+ }
+ else
+ {
+ groupScore = float.NaN;
+ }
+ break;
+ case ScoreMergeMode.Total:
+ groupScore = (float)scoreSum;
+ break;
+ default:
+ throw new ArgumentException("can't handle ScoreMergeMode " + scoreMergeMode);
+ }
+
+ //System.out.println("SHARDS=" + Arrays.toString(mergedTopDocs.shardIndex));
+ mergedGroupDocs[groupIDX] = new GroupDocs<T>(groupScore, maxScore, totalHits, mergedScoreDocs, groupValue, shardGroups[0].Groups[groupIDX].GroupSortValues);
+ totalMaxScore = Math.Max(totalMaxScore, maxScore);
+ }
+
+ if (totalGroupCount != null)
+ {
+ var result = new TopGroups<T>(groupSort.GetSort(), docSort == null ? null : docSort.GetSort(), totalHitCount, totalGroupedHitCount, mergedGroupDocs, totalMaxScore);
+ return new TopGroups<T>(result, totalGroupCount);
+ }
+
+ return new TopGroups<T>(groupSort.GetSort(), docSort == null ? null : docSort.GetSort(), totalHitCount, totalGroupedHitCount, mergedGroupDocs, totalMaxScore);
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/src/Lucene.Net.Join/FakeScorer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Join/FakeScorer.cs b/src/Lucene.Net.Join/FakeScorer.cs
new file mode 100644
index 0000000..42bf91b
--- /dev/null
+++ b/src/Lucene.Net.Join/FakeScorer.cs
@@ -0,0 +1,76 @@
+using System;
+using System.Collections.Generic;
+using Lucene.Net.Search;
+
+namespace Lucene.Net.Join
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Passed to <see cref="Collector.Scorer"/> during join collection.
+ /// </summary>
+ internal sealed class FakeScorer : Scorer
+ {
+ internal float _score;
+ internal int doc = -1;
+
+ public FakeScorer() : base(null)
+ {
+ }
+
+ public override int DocID()
+ {
+ return doc;
+ }
+
+ public override int NextDoc()
+ {
+ throw new NotSupportedException("FakeScorer doesn't support NextDoc()");
+ }
+
+ public override int Advance(int target)
+ {
+ throw new NotSupportedException("FakeScorer doesn't support Advance(int)");
+ }
+
+ public override long Cost()
+ {
+ return 1;
+ }
+
+ public override int Freq()
+ {
+ throw new NotSupportedException("FakeScorer doesn't support Freq()");
+ }
+
+ public override float Score()
+ {
+ return _score;
+ }
+
+ public override Weight Weight
+ {
+ get { throw new NotSupportedException(); }
+ }
+
+ public override ICollection<ChildScorer> Children
+ {
+ get { throw new NotSupportedException(); }
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/src/Lucene.Net.Join/FixedBitSetCachingWrapperFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Join/FixedBitSetCachingWrapperFilter.cs b/src/Lucene.Net.Join/FixedBitSetCachingWrapperFilter.cs
new file mode 100644
index 0000000..da8b0b8
--- /dev/null
+++ b/src/Lucene.Net.Join/FixedBitSetCachingWrapperFilter.cs
@@ -0,0 +1,62 @@
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Join
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// A <see cref="CachingWrapperFilter"/> that caches sets using a <see cref="FixedBitSet"/>,
+ /// as required for joins.
+ /// </summary>
+ public sealed class FixedBitSetCachingWrapperFilter : CachingWrapperFilter
+ {
+ /// <summary>
+ /// Sole constructor, see <see cref="CachingWrapperFilter"/>.
+ /// </summary>
+ public FixedBitSetCachingWrapperFilter(Filter filter) : base(filter)
+ {
+ }
+
+ protected override DocIdSet DocIdSetToCache(DocIdSet docIdSet, AtomicReader reader)
+ {
+ if (docIdSet == null)
+ {
+ return EMPTY_DOCIDSET;
+ }
+
+ if (docIdSet is FixedBitSet)
+ {
+ // this is different from CachingWrapperFilter: even when the DocIdSet is
+ // cacheable, we convert it to a FixedBitSet since we require all the
+ // cached filters to be FixedBitSets
+ return docIdSet;
+ }
+
+ DocIdSetIterator it = docIdSet.GetIterator();
+ if (it == null)
+ {
+ return EMPTY_DOCIDSET;
+ }
+ FixedBitSet copy = new FixedBitSet(reader.MaxDoc);
+ copy.Or(it);
+ return copy;
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/src/Lucene.Net.Join/JoinUtil.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Join/JoinUtil.cs b/src/Lucene.Net.Join/JoinUtil.cs
new file mode 100644
index 0000000..726731e
--- /dev/null
+++ b/src/Lucene.Net.Join/JoinUtil.cs
@@ -0,0 +1,80 @@
+using System.IO;
+using Lucene.Net.Search;
+
+namespace Lucene.Net.Join
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ /// <summary>
+ /// Utility for query time joining using TermsQuery and TermsCollector.
+ ///
+ /// @lucene.experimental
+ /// </summary>
+ public sealed class JoinUtil
+ {
+ // No instances allowed
+ private JoinUtil()
+ {
+ }
+
+ /// <summary>
+ /// Method for query time joining.
+ /// <p/>
+ /// Execute the returned query with a <seealso cref="IndexSearcher"/> to retrieve all documents that have the same terms in the
+ /// to field that match with documents matching the specified fromQuery and have the same terms in the from field.
+ /// <p/>
+ /// In the case a single document relates to more than one document the <code>multipleValuesPerDocument</code> option
+ /// should be set to true. When the <code>multipleValuesPerDocument</code> is set to <code>true</code> only the
+ /// the score from the first encountered join value originating from the 'from' side is mapped into the 'to' side.
+ /// Even in the case when a second join value related to a specific document yields a higher score. Obviously this
+ /// doesn't apply in the case that <seealso cref="ScoreMode.None"/> is used, since no scores are computed at all.
+ /// </p>
+ /// Memory considerations: During joining all unique join values are kept in memory. On top of that when the scoreMode
+ /// isn't set to <seealso cref="ScoreMode.None"/> a float value per unique join value is kept in memory for computing scores.
+ /// When scoreMode is set to <seealso cref="ScoreMode.Avg"/> also an additional integer value is kept in memory per unique
+ /// join value.
+ /// </summary>
+ /// <param name="fromField"> The from field to join from </param>
+ /// <param name="multipleValuesPerDocument"> Whether the from field has multiple terms per document </param>
+ /// <param name="toField"> The to field to join to </param>
+ /// <param name="fromQuery"> The query to match documents on the from side </param>
+ /// <param name="fromSearcher"> The searcher that executed the specified fromQuery </param>
+ /// <param name="scoreMode"> Instructs how scores from the fromQuery are mapped to the returned query </param>
+ /// <returns>A <see cref="Query"/> instance that can be used to join documents based on the terms in the from and to field</returns>
+ /// <exception cref="IOException"> If I/O related errors occur </exception>
+ public static Query CreateJoinQuery(string fromField, bool multipleValuesPerDocument, string toField, Query fromQuery, IndexSearcher fromSearcher, ScoreMode scoreMode)
+ {
+ switch (scoreMode)
+ {
+ case ScoreMode.None:
+ TermsCollector termsCollector = TermsCollector.Create(fromField, multipleValuesPerDocument);
+ fromSearcher.Search(fromQuery, termsCollector);
+ return new TermsQuery(toField, fromQuery, termsCollector.CollectorTerms);
+ case ScoreMode.Total:
+ case ScoreMode.Max:
+ case ScoreMode.Avg:
+ TermsWithScoreCollector termsWithScoreCollector = TermsWithScoreCollector.Create(fromField, multipleValuesPerDocument, scoreMode);
+ fromSearcher.Search(fromQuery, termsWithScoreCollector);
+ return new TermsIncludingScoreQuery(toField, multipleValuesPerDocument, termsWithScoreCollector.CollectedTerms, termsWithScoreCollector.ScoresPerTerm, fromQuery);
+ default:
+ throw new System.ArgumentException(string.Format("Score mode {0} isn't supported.", scoreMode));
+ }
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/src/Lucene.Net.Join/Lucene.Net.Join.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Join/Lucene.Net.Join.csproj b/src/Lucene.Net.Join/Lucene.Net.Join.csproj
new file mode 100644
index 0000000..72bda4a
--- /dev/null
+++ b/src/Lucene.Net.Join/Lucene.Net.Join.csproj
@@ -0,0 +1,76 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
+ <PropertyGroup>
+ <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+ <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+ <ProjectGuid>{E8A339C7-FCF6-4A72-8586-56D8961D7B99}</ProjectGuid>
+ <OutputType>Library</OutputType>
+ <AppDesignerFolder>Properties</AppDesignerFolder>
+ <RootNamespace>Lucene.Net.Join</RootNamespace>
+ <AssemblyName>Lucene.Net.Join</AssemblyName>
+ <TargetFrameworkVersion>v4.5.1</TargetFrameworkVersion>
+ <FileAlignment>512</FileAlignment>
+ </PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
+ <DebugSymbols>true</DebugSymbols>
+ <DebugType>full</DebugType>
+ <Optimize>false</Optimize>
+ <OutputPath>bin\Debug\</OutputPath>
+ <DefineConstants>DEBUG;TRACE</DefineConstants>
+ <ErrorReport>prompt</ErrorReport>
+ <WarningLevel>4</WarningLevel>
+ </PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
+ <DebugType>pdbonly</DebugType>
+ <Optimize>true</Optimize>
+ <OutputPath>bin\Release\</OutputPath>
+ <DefineConstants>TRACE</DefineConstants>
+ <ErrorReport>prompt</ErrorReport>
+ <WarningLevel>4</WarningLevel>
+ </PropertyGroup>
+ <ItemGroup>
+ <Reference Include="System" />
+ <Reference Include="System.Core" />
+ <Reference Include="System.Xml.Linq" />
+ <Reference Include="System.Data.DataSetExtensions" />
+ <Reference Include="Microsoft.CSharp" />
+ <Reference Include="System.Data" />
+ <Reference Include="System.Net.Http" />
+ <Reference Include="System.Xml" />
+ </ItemGroup>
+ <ItemGroup>
+ <Compile Include="FakeScorer.cs" />
+ <Compile Include="FixedBitSetCachingWrapperFilter.cs" />
+ <Compile Include="JoinUtil.cs" />
+ <Compile Include="Properties\AssemblyInfo.cs" />
+ <Compile Include="ScoreMode.cs" />
+ <Compile Include="TermsCollector.cs" />
+ <Compile Include="TermsIncludingScoreQuery.cs" />
+ <Compile Include="TermsQuery.cs" />
+ <Compile Include="TermsWithScoreCollector.cs" />
+ <Compile Include="ToChildBlockJoinQuery.cs" />
+ <Compile Include="ToParentBlockJoinCollector.cs" />
+ <Compile Include="ToParentBlockJoinFieldComparator.cs" />
+ <Compile Include="ToParentBlockJoinQuery.cs" />
+ <Compile Include="ToParentBlockJoinSortField.cs" />
+ </ItemGroup>
+ <ItemGroup>
+ <ProjectReference Include="..\Lucene.Net.Grouping\Lucene.Net.Grouping.csproj">
+ <Project>{02BAB603-067D-48B1-AEDD-316849652568}</Project>
+ <Name>Lucene.Net.Grouping</Name>
+ </ProjectReference>
+ <ProjectReference Include="..\Lucene.Net.Core\Lucene.Net.csproj">
+ <Project>{5D4AD9BE-1FFB-41AB-9943-25737971BF57}</Project>
+ <Name>Lucene.Net</Name>
+ </ProjectReference>
+ </ItemGroup>
+ <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
+ <!-- To modify your build process, add your task inside one of the targets below and uncomment it.
+ Other similar extension points exist, see Microsoft.Common.targets.
+ <Target Name="BeforeBuild">
+ </Target>
+ <Target Name="AfterBuild">
+ </Target>
+ -->
+</Project>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/src/Lucene.Net.Join/Properties/AssemblyInfo.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Join/Properties/AssemblyInfo.cs b/src/Lucene.Net.Join/Properties/AssemblyInfo.cs
new file mode 100644
index 0000000..2c17c13
--- /dev/null
+++ b/src/Lucene.Net.Join/Properties/AssemblyInfo.cs
@@ -0,0 +1,36 @@
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+// General Information about an assembly is controlled through the following
+// set of attributes. Change these attribute values to modify the information
+// associated with an assembly.
+[assembly: AssemblyTitle("Lucene.Net.Join")]
+[assembly: AssemblyDescription("")]
+[assembly: AssemblyConfiguration("")]
+[assembly: AssemblyCompany("")]
+[assembly: AssemblyProduct("Lucene.Net.Join")]
+[assembly: AssemblyCopyright("Copyright © 2015")]
+[assembly: AssemblyTrademark("")]
+[assembly: AssemblyCulture("")]
+
+// Setting ComVisible to false makes the types in this assembly not visible
+// to COM components. If you need to access a type in this assembly from
+// COM, set the ComVisible attribute to true on that type.
+[assembly: ComVisible(false)]
+
+// The following GUID is for the ID of the typelib if this project is exposed to COM
+[assembly: Guid("e8a339c7-fcf6-4a72-8586-56d8961d7b99")]
+
+// Version information for an assembly consists of the following four values:
+//
+// Major Version
+// Minor Version
+// Build Number
+// Revision
+//
+// You can specify all the values or you can default the Build and Revision Numbers
+// by using the '*' as shown below:
+// [assembly: AssemblyVersion("1.0.*")]
+[assembly: AssemblyVersion("1.0.0.0")]
+[assembly: AssemblyFileVersion("1.0.0.0")]
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/src/Lucene.Net.Join/ScoreMode.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Join/ScoreMode.cs b/src/Lucene.Net.Join/ScoreMode.cs
new file mode 100644
index 0000000..a5b91be
--- /dev/null
+++ b/src/Lucene.Net.Join/ScoreMode.cs
@@ -0,0 +1,45 @@
+namespace Lucene.Net.Join
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// How to aggregate multiple child hit scores into a single parent score.
+ /// </summary>
+ public enum ScoreMode
+ {
+ /// <summary>
+ /// Do no scoring.
+ /// </summary>
+ None,
+
+ /// <summary>
+ /// Parent hit's score is the average of all child scores.
+ /// </summary>
+ Avg,
+
+ /// <summary>
+ /// Parent hit's score is the max of all child scores.
+ /// </summary>
+ Max,
+
+ /// <summary>
+ /// Parent hit's score is the sum of all child scores.
+ /// </summary>
+ Total
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/src/Lucene.Net.Join/TermsCollector.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Join/TermsCollector.cs b/src/Lucene.Net.Join/TermsCollector.cs
new file mode 100644
index 0000000..2ccf1ed
--- /dev/null
+++ b/src/Lucene.Net.Join/TermsCollector.cs
@@ -0,0 +1,127 @@
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Join
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// A collector that collects all terms from a specified field matching the query.
+ ///
+ /// @lucene.experimental
+ /// </summary>
+ internal abstract class TermsCollector : Collector
+ {
+ private readonly string _field;
+ private readonly BytesRefHash _collectorTerms = new BytesRefHash();
+
+ internal TermsCollector(string field)
+ {
+ _field = field;
+ }
+
+ public BytesRefHash CollectorTerms
+ {
+ get
+ {
+ return _collectorTerms;
+ }
+ }
+
+ public override Scorer Scorer
+ {
+ set {}
+ }
+
+ public override bool AcceptsDocsOutOfOrder()
+ {
+ return true;
+ }
+
+ /// <summary>
+ /// Chooses the right <see cref="TermsCollector"/> implementation.
+ /// </summary>
+ /// <param name="field">The field to collect terms for.</param>
+ /// <param name="multipleValuesPerDocument">Whether the field to collect terms for has multiple values per document.</param>
+ /// <returns>A <see cref="TermsCollector"/> instance.</returns>
+ internal static TermsCollector Create(string field, bool multipleValuesPerDocument)
+ {
+ return multipleValuesPerDocument ? (TermsCollector) new MV(field) : new SV(field);
+ }
+
+ // impl that works with multiple values per document
+ private class MV : TermsCollector
+ {
+ private readonly BytesRef _scratch = new BytesRef();
+ private SortedSetDocValues _docTermOrds;
+
+ internal MV(string field) : base(field)
+ {
+ }
+
+ public override void Collect(int doc)
+ {
+ _docTermOrds.Document = doc;
+ long ord;
+ while ((ord = _docTermOrds.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
+ {
+ _docTermOrds.LookupOrd(ord, _scratch);
+ _collectorTerms.Add(_scratch);
+ }
+ }
+
+ public override AtomicReaderContext NextReader
+ {
+ set { _docTermOrds = FieldCache.DEFAULT.GetDocTermOrds(value.AtomicReader, _field); }
+ }
+
+ public override bool AcceptsDocsOutOfOrder()
+ {
+ throw new System.NotImplementedException();
+ }
+ }
+
+ // impl that works with single value per document
+ private class SV : TermsCollector
+ {
+ private readonly BytesRef _spare = new BytesRef();
+ private BinaryDocValues _fromDocTerms;
+
+ internal SV(string field) : base(field)
+ {
+ }
+
+ public override void Collect(int doc)
+ {
+ _fromDocTerms.Get(doc, _spare);
+ _collectorTerms.Add(_spare);
+ }
+
+ public override AtomicReaderContext NextReader
+ {
+ set { _fromDocTerms = FieldCache.DEFAULT.GetTerms(value.AtomicReader, _field, false); }
+ }
+
+ public override bool AcceptsDocsOutOfOrder()
+ {
+ return base.AcceptsDocsOutOfOrder();
+ }
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/src/Lucene.Net.Join/TermsIncludingScoreQuery.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Join/TermsIncludingScoreQuery.cs b/src/Lucene.Net.Join/TermsIncludingScoreQuery.cs
new file mode 100644
index 0000000..9f3befc
--- /dev/null
+++ b/src/Lucene.Net.Join/TermsIncludingScoreQuery.cs
@@ -0,0 +1,472 @@
+using System.Collections.Generic;
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Join
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ internal class TermsIncludingScoreQuery : Query
+ {
+ private readonly string _field;
+ private readonly bool _multipleValuesPerDocument;
+ private readonly BytesRefHash _terms;
+ private readonly float[] _scores;
+ private readonly int[] _ords;
+ private readonly Query _originalQuery;
+ private readonly Query _unwrittenOriginalQuery;
+
+ internal TermsIncludingScoreQuery(string field, bool multipleValuesPerDocument, BytesRefHash terms,
+ float[] scores, Query originalQuery)
+ {
+ _field = field;
+ _multipleValuesPerDocument = multipleValuesPerDocument;
+ _terms = terms;
+ _scores = scores;
+ _originalQuery = originalQuery;
+ _ords = terms.Sort(BytesRef.UTF8SortedAsUnicodeComparer);
+ _unwrittenOriginalQuery = originalQuery;
+ }
+
+ private TermsIncludingScoreQuery(string field, bool multipleValuesPerDocument, BytesRefHash terms,
+ float[] scores, int[] ords, Query originalQuery, Query unwrittenOriginalQuery)
+ {
+ _field = field;
+ _multipleValuesPerDocument = multipleValuesPerDocument;
+ _terms = terms;
+ _scores = scores;
+ _originalQuery = originalQuery;
+ _ords = ords;
+ _unwrittenOriginalQuery = unwrittenOriginalQuery;
+ }
+
+ public override string ToString(string @string)
+ {
+ return string.Format("TermsIncludingScoreQuery{{field={0};originalQuery={1}}}", _field,
+ _unwrittenOriginalQuery);
+ }
+
+ public override void ExtractTerms(ISet<Term> terms)
+ {
+ _originalQuery.ExtractTerms(terms);
+ }
+
+ public override Query Rewrite(IndexReader reader)
+ {
+ Query originalQueryRewrite = _originalQuery.Rewrite(reader);
+ if (originalQueryRewrite != _originalQuery)
+ {
+ Query rewritten = new TermsIncludingScoreQuery(_field, _multipleValuesPerDocument, _terms, _scores,
+ _ords, originalQueryRewrite, _originalQuery);
+ rewritten.Boost = Boost;
+ return rewritten;
+ }
+
+ return this;
+ }
+
+ protected bool Equals(TermsIncludingScoreQuery other)
+ {
+ return base.Equals(other) && string.Equals(_field, other._field) &&
+ Equals(_unwrittenOriginalQuery, other._unwrittenOriginalQuery);
+ }
+
+ public override bool Equals(object obj)
+ {
+ if (ReferenceEquals(null, obj)) return false;
+ if (ReferenceEquals(this, obj)) return true;
+ if (obj.GetType() != GetType()) return false;
+ return Equals((TermsIncludingScoreQuery) obj);
+ }
+
+ public override int GetHashCode()
+ {
+ unchecked
+ {
+ int hashCode = base.GetHashCode();
+ hashCode = (hashCode*397) ^ (_field != null ? _field.GetHashCode() : 0);
+ hashCode = (hashCode*397) ^
+ (_unwrittenOriginalQuery != null ? _unwrittenOriginalQuery.GetHashCode() : 0);
+ return hashCode;
+ }
+ }
+
+ public override Weight CreateWeight(IndexSearcher searcher)
+ {
+ Weight originalWeight = _originalQuery.CreateWeight(searcher);
+ return new WeightAnonymousInnerClassHelper(this, originalWeight);
+ }
+
+ private class WeightAnonymousInnerClassHelper : Weight
+ {
+ private readonly TermsIncludingScoreQuery outerInstance;
+
+ private Weight originalWeight;
+
+ public WeightAnonymousInnerClassHelper(TermsIncludingScoreQuery outerInstance, Weight originalWeight)
+ {
+ this.outerInstance = outerInstance;
+ this.originalWeight = originalWeight;
+ }
+
+
+ private TermsEnum segmentTermsEnum;
+
+ public override Explanation Explain(AtomicReaderContext context, int doc)
+ {
+ SVInnerScorer scorer = (SVInnerScorer) BulkScorer(context, false, null);
+ if (scorer != null)
+ {
+ return scorer.Explain(doc);
+ }
+ return new ComplexExplanation(false, 0.0f, "Not a match");
+ }
+
+ public override bool ScoresDocsOutOfOrder()
+ {
+ // We have optimized impls below if we are allowed
+ // to score out-of-order:
+ return true;
+ }
+
+ public override Query Query
+ {
+ get { return outerInstance; }
+ }
+
+ public override float ValueForNormalization
+ {
+ get { return originalWeight.ValueForNormalization*outerInstance.Boost*outerInstance.Boost; }
+ }
+
+ public override void Normalize(float norm, float topLevelBoost)
+ {
+ originalWeight.Normalize(norm, topLevelBoost*outerInstance.Boost);
+ }
+
+ public override Scorer Scorer(AtomicReaderContext context, Bits acceptDocs)
+ {
+ Terms terms = context.AtomicReader.Terms(outerInstance._field);
+ if (terms == null)
+ {
+ return null;
+ }
+
+ // what is the runtime...seems ok?
+ long cost = context.AtomicReader.MaxDoc * terms.Size();
+
+ segmentTermsEnum = terms.Iterator(segmentTermsEnum);
+ if (outerInstance._multipleValuesPerDocument)
+ {
+ return new MVInOrderScorer(outerInstance, this, acceptDocs, segmentTermsEnum, context.AtomicReader.MaxDoc, cost);
+ }
+
+ return new SVInOrderScorer(outerInstance, this, acceptDocs, segmentTermsEnum, context.AtomicReader.MaxDoc, cost);
+ }
+
+ public override BulkScorer BulkScorer(AtomicReaderContext context, bool scoreDocsInOrder, Bits acceptDocs)
+ {
+ if (scoreDocsInOrder)
+ {
+ return base.BulkScorer(context, scoreDocsInOrder, acceptDocs);
+ }
+
+ Terms terms = context.AtomicReader.Terms(outerInstance._field);
+ if (terms == null)
+ {
+ return null;
+ }
+ // what is the runtime...seems ok?
+ long cost = context.AtomicReader.MaxDoc * terms.Size();
+
+ segmentTermsEnum = terms.Iterator(segmentTermsEnum);
+ // Optimized impls that take advantage of docs
+ // being allowed to be out of order:
+ if (outerInstance._multipleValuesPerDocument)
+ {
+ return new MVInnerScorer(outerInstance, this, acceptDocs, segmentTermsEnum, context.AtomicReader.MaxDoc, cost);
+ }
+
+ return new SVInnerScorer(outerInstance, this, acceptDocs, segmentTermsEnum, cost);
+ }
+ }
+
+ // This impl assumes that the 'join' values are used uniquely per doc per field. Used for one to many relations.
+ internal class SVInnerScorer : BulkScorer
+ {
+ private readonly TermsIncludingScoreQuery outerInstance;
+
+ private readonly BytesRef _spare = new BytesRef();
+ private readonly Bits _acceptDocs;
+ private readonly TermsEnum _termsEnum;
+ private readonly long _cost;
+
+ private int _upto;
+ internal DocsEnum DocsEnum;
+ private DocsEnum _reuse;
+ private int _scoreUpto;
+ private int _doc;
+
+ internal SVInnerScorer(TermsIncludingScoreQuery outerInstance, Weight weight, Bits acceptDocs, TermsEnum termsEnum, long cost)
+ {
+ this.outerInstance = outerInstance;
+ _acceptDocs = acceptDocs;
+ _termsEnum = termsEnum;
+ _cost = cost;
+ _doc = -1;
+ }
+
+ public override bool Score(Collector collector, int max)
+ {
+ FakeScorer fakeScorer = new FakeScorer();
+ collector.Scorer = fakeScorer;
+ if (_doc == -1)
+ {
+ _doc = NextDocOutOfOrder();
+ }
+ while (_doc < max)
+ {
+ fakeScorer.doc = _doc;
+ fakeScorer._score = outerInstance._scores[outerInstance._ords[_scoreUpto]];
+ collector.Collect(_doc);
+ _doc = NextDocOutOfOrder();
+ }
+
+ return _doc != DocIdSetIterator.NO_MORE_DOCS;
+ }
+
+ private int NextDocOutOfOrder()
+ {
+ while (true)
+ {
+ if (DocsEnum != null)
+ {
+ int docId = DocsEnumNextDoc();
+ if (docId == DocIdSetIterator.NO_MORE_DOCS)
+ {
+ DocsEnum = null;
+ }
+ else
+ {
+ return _doc = docId;
+ }
+ }
+
+ if (_upto == outerInstance._terms.Size())
+ {
+ return _doc = DocIdSetIterator.NO_MORE_DOCS;
+ }
+
+ _scoreUpto = _upto;
+ if (_termsEnum.SeekExact(outerInstance._terms.Get(outerInstance._ords[_upto++], _spare)))
+ {
+ DocsEnum = _reuse = _termsEnum.Docs(_acceptDocs, _reuse, DocsEnum.FLAG_NONE);
+ }
+ }
+ }
+
+ protected virtual int DocsEnumNextDoc()
+ {
+ return DocsEnum.NextDoc();
+ }
+
+ internal Explanation Explain(int target)
+ {
+ int docId;
+ do
+ {
+ docId = NextDocOutOfOrder();
+ if (docId < target)
+ {
+ int tempDocId = DocsEnum.Advance(target);
+ if (tempDocId == target)
+ {
+ docId = tempDocId;
+ break;
+ }
+ }
+ else if (docId == target)
+ {
+ break;
+ }
+ DocsEnum = null; // goto the next ord.
+ } while (docId != DocIdSetIterator.NO_MORE_DOCS);
+
+ return new ComplexExplanation(true, outerInstance._scores[outerInstance._ords[_scoreUpto]],
+ "Score based on join value " + _termsEnum.Term().Utf8ToString());
+ }
+ }
+
+ // This impl that tracks whether a docid has already been emitted. This check makes sure that docs aren't emitted
+ // twice for different join values. This means that the first encountered join value determines the score of a document
+ // even if other join values yield a higher score.
+ internal class MVInnerScorer : SVInnerScorer
+ {
+ private readonly TermsIncludingScoreQuery outerInstance;
+
+
+ internal readonly FixedBitSet alreadyEmittedDocs;
+
+ internal MVInnerScorer(TermsIncludingScoreQuery outerInstance, Weight weight, Bits acceptDocs,
+ TermsEnum termsEnum, int maxDoc, long cost) : base(outerInstance, weight, acceptDocs, termsEnum, cost)
+ {
+ this.outerInstance = outerInstance;
+ alreadyEmittedDocs = new FixedBitSet(maxDoc);
+ }
+
+ protected override int DocsEnumNextDoc()
+ {
+ while (true)
+ {
+ int docId = DocsEnum.NextDoc();
+ if (docId == DocIdSetIterator.NO_MORE_DOCS)
+ {
+ return docId;
+ }
+ if (!alreadyEmittedDocs.GetAndSet(docId))
+ {
+ return docId; //if it wasn't previously set, return it
+ }
+ }
+ }
+ }
+
+ internal class SVInOrderScorer : Scorer
+ {
+ private readonly TermsIncludingScoreQuery outerInstance;
+
+
+ internal readonly DocIdSetIterator matchingDocsIterator;
+ internal readonly float[] scores;
+ internal readonly long cost_Renamed;
+
+ internal int currentDoc = -1;
+
+ internal SVInOrderScorer(TermsIncludingScoreQuery outerInstance, Weight weight, Bits acceptDocs,
+ TermsEnum termsEnum, int maxDoc, long cost) : base(weight)
+ {
+ this.outerInstance = outerInstance;
+ FixedBitSet matchingDocs = new FixedBitSet(maxDoc);
+ scores = new float[maxDoc];
+ FillDocsAndScores(matchingDocs, acceptDocs, termsEnum);
+ matchingDocsIterator = matchingDocs.GetIterator();
+ cost_Renamed = cost;
+ }
+
+ protected virtual void FillDocsAndScores(FixedBitSet matchingDocs, Bits acceptDocs,
+ TermsEnum termsEnum)
+ {
+ BytesRef spare = new BytesRef();
+ DocsEnum docsEnum = null;
+ for (int i = 0; i < outerInstance._terms.Size(); i++)
+ {
+ if (termsEnum.SeekExact(outerInstance._terms.Get(outerInstance._ords[i], spare)))
+ {
+ docsEnum = termsEnum.Docs(acceptDocs, docsEnum, FLAG_NONE);
+ float score = outerInstance._scores[outerInstance._ords[i]];
+ for (int doc = docsEnum.NextDoc();
+ doc != NO_MORE_DOCS;
+ doc = docsEnum.NextDoc())
+ {
+ matchingDocs.Set(doc);
+ // In the case the same doc is also related to a another doc, a score might be overwritten. I think this
+ // can only happen in a many-to-many relation
+ scores[doc] = score;
+ }
+ }
+ }
+ }
+
+ public override float Score()
+ {
+ return scores[currentDoc];
+ }
+
+ public override int Freq()
+ {
+ return 1;
+ }
+
+ public override int DocID()
+ {
+ return currentDoc;
+ }
+
+ public override int NextDoc()
+ {
+ return currentDoc = matchingDocsIterator.NextDoc();
+ }
+
+ public override int Advance(int target)
+ {
+ return currentDoc = matchingDocsIterator.Advance(target);
+ }
+
+ public override long Cost()
+ {
+ return cost_Renamed;
+ }
+ }
+
+ // This scorer deals with the fact that a document can have more than one score from multiple related documents.
+ internal class MVInOrderScorer : SVInOrderScorer
+ {
+ private readonly TermsIncludingScoreQuery outerInstance;
+
+
+ internal MVInOrderScorer(TermsIncludingScoreQuery outerInstance, Weight weight, Bits acceptDocs,
+ TermsEnum termsEnum, int maxDoc, long cost)
+ : base(outerInstance, weight, acceptDocs, termsEnum, maxDoc, cost)
+ {
+ this.outerInstance = outerInstance;
+ }
+
+ protected override void FillDocsAndScores(FixedBitSet matchingDocs, Bits acceptDocs,
+ TermsEnum termsEnum)
+ {
+ BytesRef spare = new BytesRef();
+ DocsEnum docsEnum = null;
+ for (int i = 0; i < outerInstance._terms.Size(); i++)
+ {
+ if (termsEnum.SeekExact(outerInstance._terms.Get(outerInstance._ords[i], spare)))
+ {
+ docsEnum = termsEnum.Docs(acceptDocs, docsEnum, FLAG_NONE);
+ float score = outerInstance._scores[outerInstance._ords[i]];
+ for (int doc = docsEnum.NextDoc();
+ doc != NO_MORE_DOCS;
+ doc = docsEnum.NextDoc())
+ {
+ // I prefer this:
+ /*if (scores[doc] < score) {
+ scores[doc] = score;
+ matchingDocs.set(doc);
+ }*/
+ // But this behaves the same as MVInnerScorer and only then the tests will pass:
+ if (!matchingDocs.Get(doc))
+ {
+ scores[doc] = score;
+ matchingDocs.Set(doc);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/src/Lucene.Net.Join/TermsQuery.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Join/TermsQuery.cs b/src/Lucene.Net.Join/TermsQuery.cs
new file mode 100644
index 0000000..2d5ccf8
--- /dev/null
+++ b/src/Lucene.Net.Join/TermsQuery.cs
@@ -0,0 +1,147 @@
+using System.Collections.Generic;
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Join
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// A query that has an array of terms from a specific field. This query will match documents have one or more terms in
+ /// the specified field that match with the terms specified in the array.
+ ///
+ /// @lucene.experimental
+ /// </summary>
+ internal class TermsQuery : MultiTermQuery
+ {
+ private readonly BytesRefHash _terms;
+ private readonly int[] _ords;
+ private readonly Query _fromQuery; // Used for equals() only
+
+ /// <summary>
+ ///
+ /// </summary>
+ /// <param name="field">The field that should contain terms that are specified in the previous parameter.</param>
+ /// <param name="fromQuery"></param>
+ /// <param name="terms">The terms that matching documents should have. The terms must be sorted by natural order.</param>
+ internal TermsQuery(string field, Query fromQuery, BytesRefHash terms) : base(field)
+ {
+ _fromQuery = fromQuery;
+ _terms = terms;
+ _ords = terms.Sort(BytesRef.UTF8SortedAsUnicodeComparer);
+ }
+
+ public override TermsEnum GetTermsEnum(Terms terms, AttributeSource atts)
+ {
+ if (_terms.Size() == 0)
+ {
+ return TermsEnum.EMPTY;
+ }
+
+ return new SeekingTermSetTermsEnum(terms.Iterator(null), _terms, _ords);
+
+ }
+
+ public override string ToString(string field)
+ {
+ return string.Format("TermsQuery{{field={0}}}", field);
+ }
+
+ private class SeekingTermSetTermsEnum : FilteredTermsEnum
+ {
+ private readonly BytesRefHash Terms;
+ private readonly int[] Ords;
+ private readonly int _lastElement;
+
+ private readonly BytesRef _lastTerm;
+ private readonly BytesRef _spare = new BytesRef();
+ private readonly IComparer<BytesRef> _comparator;
+
+ private BytesRef _seekTerm;
+ private int _upto;
+
+ internal SeekingTermSetTermsEnum(TermsEnum tenum, BytesRefHash terms, int[] ords) : base(tenum)
+ {
+ Terms = terms;
+ Ords = ords;
+ _comparator = BytesRef.UTF8SortedAsUnicodeComparer;
+ _lastElement = terms.Size() - 1;
+ _lastTerm = terms.Get(ords[_lastElement], new BytesRef());
+ _seekTerm = terms.Get(ords[_upto], _spare);
+ }
+
+
+
+ protected override BytesRef NextSeekTerm(BytesRef currentTerm)
+ {
+ BytesRef temp = _seekTerm;
+ _seekTerm = null;
+ return temp;
+ }
+
+ protected override AcceptStatus Accept(BytesRef term)
+ {
+ if (_comparator.Compare(term, _lastTerm) > 0)
+ {
+ return AcceptStatus.END;
+ }
+
+ BytesRef currentTerm = Terms.Get(Ords[_upto], _spare);
+ if (_comparator.Compare(term, currentTerm) == 0)
+ {
+ if (_upto == _lastElement)
+ {
+ return AcceptStatus.YES;
+ }
+
+ _seekTerm = Terms.Get(Ords[++_upto], _spare);
+ return AcceptStatus.YES_AND_SEEK;
+ }
+
+ if (_upto == _lastElement)
+ {
+ return AcceptStatus.NO;
+ } // Our current term doesn't match the the given term.
+
+ int cmp;
+ do // We maybe are behind the given term by more than one step. Keep incrementing till we're the same or higher.
+ {
+ if (_upto == _lastElement)
+ {
+ return AcceptStatus.NO;
+ }
+ // typically the terms dict is a superset of query's terms so it's unusual that we have to skip many of
+ // our terms so we don't do a binary search here
+ _seekTerm = Terms.Get(Ords[++_upto], _spare);
+ } while ((cmp = _comparator.Compare(_seekTerm, term)) < 0);
+ if (cmp == 0)
+ {
+ if (_upto == _lastElement)
+ {
+ return AcceptStatus.YES;
+ }
+ _seekTerm = Terms.Get(Ords[++_upto], _spare);
+ return AcceptStatus.YES_AND_SEEK;
+ }
+
+ return AcceptStatus.NO_AND_SEEK;
+ }
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/src/Lucene.Net.Join/TermsWithScoreCollector.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Join/TermsWithScoreCollector.cs b/src/Lucene.Net.Join/TermsWithScoreCollector.cs
new file mode 100644
index 0000000..e823293
--- /dev/null
+++ b/src/Lucene.Net.Join/TermsWithScoreCollector.cs
@@ -0,0 +1,333 @@
+using System;
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Join
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ internal abstract class TermsWithScoreCollector : Collector
+ {
+ private const int InitialArraySize = 256;
+
+ private readonly string _field;
+ private readonly BytesRefHash _collectedTerms = new BytesRefHash();
+ private readonly ScoreMode _scoreMode;
+
+ private Scorer _scorer;
+ private float[] _scoreSums = new float[InitialArraySize];
+
+ internal TermsWithScoreCollector(string field, ScoreMode scoreMode)
+ {
+ this._field = field;
+ this._scoreMode = scoreMode;
+ }
+
+ public BytesRefHash CollectedTerms
+ {
+ get
+ {
+ return _collectedTerms;
+ }
+ }
+
+ public virtual float[] ScoresPerTerm
+ {
+ get
+ {
+ return _scoreSums;
+ }
+ }
+
+ //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+ //ORIGINAL LINE: @Override public void setScorer(org.apache.lucene.search.Scorer scorer) throws java.io.IOException
+ public override Scorer Scorer
+ {
+ set
+ {
+ _scorer = value;
+ }
+ }
+
+ public override bool AcceptsDocsOutOfOrder()
+ {
+ return true;
+ }
+
+ /// <summary>
+ /// Chooses the right <seealso cref="TermsWithScoreCollector"/> implementation.
+ /// </summary>
+ /// <param name="field">The field to collect terms for.</param>
+ /// <param name="multipleValuesPerDocument">Whether the field to collect terms for has multiple values per document.</param>
+ /// <returns>A <see cref="TermsWithScoreCollector"/> instance</returns>
+ internal static TermsWithScoreCollector Create(string field, bool multipleValuesPerDocument, ScoreMode scoreMode)
+ {
+ if (multipleValuesPerDocument)
+ {
+ switch (scoreMode)
+ {
+ case ScoreMode.Avg:
+ return new Mv.Avg(field);
+ default:
+ return new Mv(field, scoreMode);
+ }
+ }
+
+ switch (scoreMode)
+ {
+ case ScoreMode.Avg:
+ return new Sv.Avg(field);
+ default:
+ return new Sv(field, scoreMode);
+ }
+ }
+
+ // impl that works with single value per document
+ internal class Sv : TermsWithScoreCollector
+ {
+ private readonly BytesRef _spare = new BytesRef();
+ private BinaryDocValues _fromDocTerms;
+
+ internal Sv(string field, ScoreMode scoreMode) : base(field, scoreMode)
+ {
+ }
+
+ public override void Collect(int doc)
+ {
+ _fromDocTerms.Get(doc, _spare);
+ int ord = _collectedTerms.Add(_spare);
+ if (ord < 0)
+ {
+ ord = -ord - 1;
+ }
+ else
+ {
+ if (ord >= _scoreSums.Length)
+ {
+ _scoreSums = ArrayUtil.Grow(_scoreSums);
+ }
+ }
+
+ float current = _scorer.Score();
+ float existing = _scoreSums[ord];
+ if (existing.CompareTo(0.0f) == 0)
+ {
+ _scoreSums[ord] = current;
+ }
+ else
+ {
+ switch (_scoreMode)
+ {
+ case ScoreMode.Total:
+ _scoreSums[ord] = _scoreSums[ord] + current;
+ break;
+ case ScoreMode.Max:
+ if (current > existing)
+ {
+ _scoreSums[ord] = current;
+ }
+ break;
+ }
+ }
+ }
+
+ public override AtomicReaderContext NextReader
+ {
+ set
+ {
+ _fromDocTerms = FieldCache.DEFAULT.GetTerms(value.AtomicReader, _field, false);
+ }
+ }
+
+ public override bool AcceptsDocsOutOfOrder()
+ {
+ return base.AcceptsDocsOutOfOrder();
+ }
+
+ internal class Avg : Sv
+ {
+ private int[] _scoreCounts = new int[InitialArraySize];
+
+ internal Avg(string field) : base(field, ScoreMode.Avg)
+ {
+ }
+
+ public override void Collect(int doc)
+ {
+ _fromDocTerms.Get(doc, _spare);
+ int ord = _collectedTerms.Add(_spare);
+ if (ord < 0)
+ {
+ ord = -ord - 1;
+ }
+ else
+ {
+ if (ord >= _scoreSums.Length)
+ {
+ _scoreSums = ArrayUtil.Grow(_scoreSums);
+ _scoreCounts = ArrayUtil.Grow(_scoreCounts);
+ }
+ }
+
+ float current = _scorer.Score();
+ float existing = _scoreSums[ord];
+ if (existing.CompareTo(0.0f) == 0)
+ {
+ _scoreSums[ord] = current;
+ _scoreCounts[ord] = 1;
+ }
+ else
+ {
+ _scoreSums[ord] = _scoreSums[ord] + current;
+ _scoreCounts[ord]++;
+ }
+ }
+
+ public override float[] ScoresPerTerm
+ {
+ get
+ {
+ if (_scoreCounts != null)
+ {
+ for (int i = 0; i < _scoreCounts.Length; i++)
+ {
+ _scoreSums[i] = _scoreSums[i] / _scoreCounts[i];
+ }
+ _scoreCounts = null;
+ }
+ return _scoreSums;
+ }
+ }
+ }
+ }
+
+ // impl that works with multiple values per document
+ internal class Mv : TermsWithScoreCollector
+ {
+ private SortedSetDocValues _fromDocTermOrds;
+ private readonly BytesRef _scratch = new BytesRef();
+
+ internal Mv(string field, ScoreMode scoreMode) : base(field, scoreMode)
+ {
+ }
+
+ public override void Collect(int doc)
+ {
+ _fromDocTermOrds.Document = doc;
+ long ord;
+ while ((ord = _fromDocTermOrds.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
+ {
+ _fromDocTermOrds.LookupOrd(ord, _scratch);
+
+ int termId = _collectedTerms.Add(_scratch);
+ if (termId < 0)
+ {
+ termId = -termId - 1;
+ }
+ else
+ {
+ if (termId >= _scoreSums.Length)
+ {
+ _scoreSums = ArrayUtil.Grow(_scoreSums);
+ }
+ }
+
+ switch (_scoreMode)
+ {
+ case ScoreMode.Total:
+ _scoreSums[termId] += _scorer.Score();
+ break;
+ case ScoreMode.Max:
+ _scoreSums[termId] = Math.Max(_scoreSums[termId], _scorer.Score());
+ break;
+ }
+ }
+ }
+
+ //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+ //ORIGINAL LINE: @Override public void setNextReader(org.apache.lucene.index.AtomicReaderContext context) throws java.io.IOException
+ public override AtomicReaderContext NextReader
+ {
+ set
+ {
+ _fromDocTermOrds = FieldCache.DEFAULT.GetDocTermOrds(value.AtomicReader, _field);
+ }
+ }
+
+ public override bool AcceptsDocsOutOfOrder()
+ {
+ throw new NotImplementedException();
+ }
+
+ internal class Avg : Mv
+ {
+ private int[] _scoreCounts = new int[InitialArraySize];
+
+ internal Avg(string field) : base(field, ScoreMode.Avg)
+ {
+ }
+
+ public override void Collect(int doc)
+ {
+ _fromDocTermOrds.Document = doc;
+ long ord;
+ while ((ord = _fromDocTermOrds.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
+ {
+ _fromDocTermOrds.LookupOrd(ord, _scratch);
+
+ int termId = _collectedTerms.Add(_scratch);
+ if (termId < 0)
+ {
+ termId = -termId - 1;
+ }
+ else
+ {
+ if (termId >= _scoreSums.Length)
+ {
+ _scoreSums = ArrayUtil.Grow(_scoreSums);
+ _scoreCounts = ArrayUtil.Grow(_scoreCounts);
+ }
+ }
+
+ _scoreSums[termId] += _scorer.Score();
+ _scoreCounts[termId]++;
+ }
+ }
+
+ public override float[] ScoresPerTerm
+ {
+ get
+ {
+ if (_scoreCounts != null)
+ {
+ for (int i = 0; i < _scoreCounts.Length; i++)
+ {
+ _scoreSums[i] = _scoreSums[i] / _scoreCounts[i];
+ }
+ _scoreCounts = null;
+ }
+ return _scoreSums;
+ }
+ }
+ }
+ }
+
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/src/Lucene.Net.Join/ToChildBlockJoinQuery.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Join/ToChildBlockJoinQuery.cs b/src/Lucene.Net.Join/ToChildBlockJoinQuery.cs
new file mode 100644
index 0000000..3d4f2d5
--- /dev/null
+++ b/src/Lucene.Net.Join/ToChildBlockJoinQuery.cs
@@ -0,0 +1,396 @@
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Join
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Just like <see cref="ToParentBlockJoinQuery"/>, except this
+ /// query joins in reverse: you provide a Query matching
+ /// parent documents and it joins down to child
+ /// documents.
+ ///
+ /// @lucene.experimental
+ /// </summary>
+ public class ToChildBlockJoinQuery : Query
+ {
+ /// <summary>
+ /// Message thrown from <see cref="ToChildBlockJoinScorer.ValidateParentDoc"/>
+ /// on mis-use, when the parent query incorrectly returns child docs.
+ /// </summary>
+ public const string InvalidQueryMessage = "Parent query yields document which is not matched by parents filter, docID=";
+
+ private readonly Filter _parentsFilter;
+ private readonly Query _parentQuery;
+
+ // If we are rewritten, this is the original parentQuery we
+ // were passed; we use this for .equals() and
+ // .hashCode(). This makes rewritten query equal the
+ // original, so that user does not have to .rewrite() their
+ // query before searching:
+ private readonly Query _origParentQuery;
+ private readonly bool _doScores;
+
+ /// <summary>
+ /// Create a ToChildBlockJoinQuery.
+ /// </summary>
+ /// <param name="parentQuery">Query that matches parent documents</param>
+ /// <param name="parentsFilter">Filter (must produce FixedBitSet per-segment, like <see cref="FixedBitSetCachingWrapperFilter"/>)
+ /// identifying the parent documents.</param>
+ /// <param name="doScores">True if parent scores should be calculated.</param>
+ public ToChildBlockJoinQuery(Query parentQuery, Filter parentsFilter, bool doScores)
+ {
+ _origParentQuery = parentQuery;
+ _parentQuery = parentQuery;
+ _parentsFilter = parentsFilter;
+ _doScores = doScores;
+ }
+
+ private ToChildBlockJoinQuery(Query origParentQuery, Query parentQuery, Filter parentsFilter, bool doScores) : base()
+ {
+ _origParentQuery = origParentQuery;
+ _parentQuery = parentQuery;
+ _parentsFilter = parentsFilter;
+ _doScores = doScores;
+ }
+
+ public override Weight CreateWeight(IndexSearcher searcher)
+ {
+ return new ToChildBlockJoinWeight(this, _parentQuery.CreateWeight(searcher), _parentsFilter, _doScores);
+ }
+
+ private class ToChildBlockJoinWeight : Weight
+ {
+ private readonly Query _joinQuery;
+ private readonly Weight _parentWeight;
+ private readonly Filter _parentsFilter;
+ private readonly bool _doScores;
+
+ public ToChildBlockJoinWeight(Query joinQuery, Weight parentWeight, Filter parentsFilter, bool doScores) : base()
+ {
+ _joinQuery = joinQuery;
+ _parentWeight = parentWeight;
+ _parentsFilter = parentsFilter;
+ _doScores = doScores;
+ }
+
+ public override Query Query
+ {
+ get { return _joinQuery; }
+ }
+
+ public override float ValueForNormalization
+ {
+ get { return _parentWeight.ValueForNormalization*_joinQuery.Boost*_joinQuery.Boost; }
+ }
+
+ public override void Normalize(float norm, float topLevelBoost)
+ {
+ _parentWeight.Normalize(norm, topLevelBoost * _joinQuery.Boost);
+ }
+
+ // NOTE: acceptDocs applies (and is checked) only in the child document space
+ public override Scorer Scorer(AtomicReaderContext readerContext, Bits acceptDocs)
+ {
+ Scorer parentScorer = _parentWeight.Scorer(readerContext, null);
+
+ if (parentScorer == null)
+ {
+ // No matches
+ return null;
+ }
+
+ // NOTE: we cannot pass acceptDocs here because this
+ // will (most likely, justifiably) cause the filter to
+ // not return a FixedBitSet but rather a
+ // BitsFilteredDocIdSet. Instead, we filter by
+ // acceptDocs when we score:
+ DocIdSet parents = _parentsFilter.GetDocIdSet(readerContext, null);
+
+ if (parents == null)
+ {
+ // No matches
+ return null;
+ }
+ if (!(parents is FixedBitSet))
+ {
+ throw new InvalidOperationException("parentFilter must return FixedBitSet; got " + parents);
+ }
+
+ return new ToChildBlockJoinScorer(this, parentScorer, (FixedBitSet)parents, _doScores, acceptDocs);
+ }
+
+ public override Explanation Explain(AtomicReaderContext reader, int doc)
+ {
+ // TODO
+ throw new NotSupportedException(GetType().Name + " cannot explain match on parent document");
+ }
+
+ public override bool ScoresDocsOutOfOrder()
+ {
+ return false;
+ }
+ }
+
+ private sealed class ToChildBlockJoinScorer : Scorer
+ {
+ private readonly Scorer _parentScorer;
+ private readonly FixedBitSet _parentBits;
+ private readonly bool _doScores;
+ private readonly Bits _acceptDocs;
+
+ private float _parentScore;
+ private int _parentFreq = 1;
+
+ private int _childDoc = -1;
+ private int _parentDoc;
+
+ public ToChildBlockJoinScorer(Weight weight, Scorer parentScorer, FixedBitSet parentBits, bool doScores, Bits acceptDocs) : base(weight)
+ {
+ _doScores = doScores;
+ _parentBits = parentBits;
+ _parentScorer = parentScorer;
+ _acceptDocs = acceptDocs;
+ }
+
+ public override ICollection<ChildScorer> Children
+ {
+ get { return Collections.Singleton(new ChildScorer(_parentScorer, "BLOCK_JOIN")); }
+ }
+
+ public override int NextDoc()
+ {
+ //System.out.println("Q.nextDoc() parentDoc=" + parentDoc + " childDoc=" + childDoc);
+
+ // Loop until we hit a childDoc that's accepted
+ while (true)
+ {
+ if (_childDoc + 1 == _parentDoc)
+ {
+ // OK, we are done iterating through all children
+ // matching this one parent doc, so we now nextDoc()
+ // the parent. Use a while loop because we may have
+ // to skip over some number of parents w/ no
+ // children:
+ while (true)
+ {
+ _parentDoc = _parentScorer.NextDoc();
+ ValidateParentDoc();
+
+ if (_parentDoc == 0)
+ {
+ // Degenerate but allowed: first parent doc has no children
+ // TODO: would be nice to pull initial parent
+ // into ctor so we can skip this if... but it's
+ // tricky because scorer must return -1 for
+ // .doc() on init...
+ _parentDoc = _parentScorer.NextDoc();
+ ValidateParentDoc();
+ }
+
+ if (_parentDoc == NO_MORE_DOCS)
+ {
+ _childDoc = NO_MORE_DOCS;
+ //System.out.println(" END");
+ return _childDoc;
+ }
+
+ // Go to first child for this next parentDoc:
+ _childDoc = 1 + _parentBits.PrevSetBit(_parentDoc - 1);
+
+ if (_childDoc == _parentDoc)
+ {
+ // This parent has no children; continue
+ // parent loop so we move to next parent
+ continue;
+ }
+
+ if (_acceptDocs != null && !_acceptDocs.Get(_childDoc))
+ {
+ goto nextChildDocContinue;
+ }
+
+ if (_childDoc < _parentDoc)
+ {
+ if (_doScores)
+ {
+ _parentScore = _parentScorer.Score();
+ _parentFreq = _parentScorer.Freq();
+ }
+ //System.out.println(" " + childDoc);
+ return _childDoc;
+ }
+ else
+ {
+ // Degenerate but allowed: parent has no children
+ }
+ }
+ }
+
+ Debug.Assert(_childDoc < _parentDoc, "childDoc=" + _childDoc + " parentDoc=" + _parentDoc);
+ _childDoc++;
+ if (_acceptDocs != null && !_acceptDocs.Get(_childDoc))
+ {
+ continue;
+ }
+ //System.out.println(" " + childDoc);
+ return _childDoc;
+ nextChildDocContinue:;
+ }
+ }
+
+ /// <summary>
+ /// Detect mis-use, where provided parent query in fact sometimes returns child documents.
+ /// </summary>
+ private void ValidateParentDoc()
+ {
+ if (_parentDoc != NO_MORE_DOCS && !_parentBits.Get(_parentDoc))
+ {
+ throw new InvalidOperationException(InvalidQueryMessage + _parentDoc);
+ }
+ }
+
+ public override int DocID()
+ {
+ return _childDoc;
+ }
+
+ public override float Score()
+ {
+ return _parentScore;
+ }
+
+ public override int Freq()
+ {
+ return _parentFreq;
+ }
+
+ public override int Advance(int childTarget)
+ {
+ Debug.Assert(childTarget >= _parentBits.Length() || !_parentBits.Get(childTarget));
+
+ //System.out.println("Q.advance childTarget=" + childTarget);
+ if (childTarget == NO_MORE_DOCS)
+ {
+ //System.out.println(" END");
+ return _childDoc = _parentDoc = NO_MORE_DOCS;
+ }
+
+ Debug.Assert(_childDoc == -1 || childTarget != _parentDoc, "childTarget=" + childTarget);
+ if (_childDoc == -1 || childTarget > _parentDoc)
+ {
+ // Advance to new parent:
+ _parentDoc = _parentScorer.Advance(childTarget);
+ ValidateParentDoc();
+ //System.out.println(" advance to parentDoc=" + parentDoc);
+ Debug.Assert(_parentDoc > childTarget);
+ if (_parentDoc == NO_MORE_DOCS)
+ {
+ //System.out.println(" END");
+ return _childDoc = NO_MORE_DOCS;
+ }
+ if (_doScores)
+ {
+ _parentScore = _parentScorer.Score();
+ _parentFreq = _parentScorer.Freq();
+ }
+ int firstChild = _parentBits.PrevSetBit(_parentDoc - 1);
+ //System.out.println(" firstChild=" + firstChild);
+ childTarget = Math.Max(childTarget, firstChild);
+ }
+
+ Debug.Assert(childTarget < _parentDoc);
+
+ // Advance within children of current parent:
+ _childDoc = childTarget;
+ //System.out.println(" " + childDoc);
+ if (_acceptDocs != null && !_acceptDocs.Get(_childDoc))
+ {
+ NextDoc();
+ }
+ return _childDoc;
+ }
+
+ public override long Cost()
+ {
+ return _parentScorer.Cost();
+ }
+ }
+
+ public override void ExtractTerms(ISet<Term> terms)
+ {
+ _parentQuery.ExtractTerms(terms);
+ }
+
+ public override Query Rewrite(IndexReader reader)
+ {
+ Query parentRewrite = _parentQuery.Rewrite(reader);
+ if (parentRewrite != _parentQuery)
+ {
+ Query rewritten = new ToChildBlockJoinQuery(_parentQuery, parentRewrite, _parentsFilter, _doScores);
+ rewritten.Boost = Boost;
+ return rewritten;
+ }
+
+ return this;
+ }
+
+ public override string ToString(string field)
+ {
+ return "ToChildBlockJoinQuery (" + _parentQuery + ")";
+ }
+
+ protected bool Equals(ToChildBlockJoinQuery other)
+ {
+ return base.Equals(other) &&
+ Equals(_origParentQuery, other._origParentQuery) &&
+ _doScores == other._doScores &&
+ Equals(_parentsFilter, other._parentsFilter);
+ }
+
+ public override bool Equals(object obj)
+ {
+ if (ReferenceEquals(null, obj)) return false;
+ if (ReferenceEquals(this, obj)) return true;
+ if (obj.GetType() != GetType()) return false;
+ return Equals((ToChildBlockJoinQuery) obj);
+ }
+
+ public override int GetHashCode()
+ {
+ unchecked
+ {
+ int hashCode = base.GetHashCode();
+ hashCode = (hashCode*397) ^ (_origParentQuery != null ? _origParentQuery.GetHashCode() : 0);
+ hashCode = (hashCode*397) ^ _doScores.GetHashCode();
+ hashCode = (hashCode*397) ^ (_parentsFilter != null ? _parentsFilter.GetHashCode() : 0);
+ return hashCode;
+ }
+ }
+
+ public override object Clone()
+ {
+ return new ToChildBlockJoinQuery((Query) _origParentQuery.Clone(), _parentsFilter, _doScores);
+ }
+ }
+}
\ No newline at end of file
[04/17] lucenenet git commit: Completed the implementation port of
the Join project
Posted by sy...@apache.org.
Completed the implementation port of the Join project
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/47f20b9a
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/47f20b9a
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/47f20b9a
Branch: refs/heads/master
Commit: 47f20b9ac6a5890936fb48887cfef20830d9c4d9
Parents: bd772f0
Author: Josh Sullivan <ja...@gmail.com>
Authored: Mon Aug 17 11:30:55 2015 -0400
Committer: Josh Sullivan <ja...@gmail.com>
Committed: Mon Aug 17 11:30:55 2015 -0400
----------------------------------------------------------------------
Lucene.Net.Grouping/GroupDocs.cs | 71 +++
Lucene.Net.Grouping/Lucene.Net.Grouping.csproj | 61 ++
Lucene.Net.Grouping/Properties/AssemblyInfo.cs | 36 ++
Lucene.Net.Grouping/TopGroups.cs | 249 +++++++++
Lucene.Net.Join/Lucene.Net.Join.csproj | 9 +
Lucene.Net.Join/ToChildBlockJoinQuery.cs | 396 +++++++++++++
Lucene.Net.Join/ToParentBlockJoinCollector.cs | 560 +++++++++++++++++++
.../ToParentBlockJoinFieldComparator.cs | 391 +++++++++++++
Lucene.Net.Join/ToParentBlockJoinQuery.cs | 516 +++++++++++++++++
.../Lucene.Net.Tests.Join.csproj | 64 +++
.../Properties/AssemblyInfo.cs | 36 ++
Lucene.Net.Tests.Join/TestBlockJoin.cs | 7 +
Lucene.Net.sln | 14 +
.../Search/FieldValueHitQueue.cs | 4 +-
14 files changed, 2412 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/47f20b9a/Lucene.Net.Grouping/GroupDocs.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Grouping/GroupDocs.cs b/Lucene.Net.Grouping/GroupDocs.cs
new file mode 100644
index 0000000..00cdf83
--- /dev/null
+++ b/Lucene.Net.Grouping/GroupDocs.cs
@@ -0,0 +1,71 @@
+using Lucene.Net.Search;
+
+namespace Lucene.Net.Grouping
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Represents one group in the results.
+ ///
+ /// @lucene.experimental
+ /// </summary>
+ public class GroupDocs<TGroupValueType>
+ {
+ /// <summary>
+ /// The groupField value for all docs in this group; this
+ /// may be null if hits did not have the groupField.
+ /// </summary>
+ public readonly TGroupValueType GroupValue;
+
+ /// <summary>
+ /// Max score in this group
+ /// </summary>
+ public readonly float MaxScore;
+
+ /// <summary>
+ /// Overall aggregated score of this group (currently only set by join queries).
+ /// </summary>
+ public readonly float Score;
+
+ /// <summary>
+ /// Hits; this may be {@link org.apache.lucene.search.FieldDoc} instances if the
+ /// withinGroupSort sorted by fields.
+ /// </summary>
+ public readonly ScoreDoc[] ScoreDocs;
+
+ /// <summary>
+ /// Total hits within this group
+ /// </summary>
+ public readonly int TotalHits;
+
+ /// <summary>
+ /// Matches the groupSort passed to {@link AbstractFirstPassGroupingCollector}.
+ /// </summary>
+ public readonly object[] GroupSortValues;
+
+ public GroupDocs(float score, float maxScore, int totalHits, ScoreDoc[] scoreDocs, TGroupValueType groupValue, object[] groupSortValues)
+ {
+ Score = score;
+ MaxScore = maxScore;
+ TotalHits = totalHits;
+ ScoreDocs = scoreDocs;
+ GroupValue = groupValue;
+ GroupSortValues = groupSortValues;
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/47f20b9a/Lucene.Net.Grouping/Lucene.Net.Grouping.csproj
----------------------------------------------------------------------
diff --git a/Lucene.Net.Grouping/Lucene.Net.Grouping.csproj b/Lucene.Net.Grouping/Lucene.Net.Grouping.csproj
new file mode 100644
index 0000000..540b438
--- /dev/null
+++ b/Lucene.Net.Grouping/Lucene.Net.Grouping.csproj
@@ -0,0 +1,61 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
+ <PropertyGroup>
+ <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+ <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+ <ProjectGuid>{02BAB603-067D-48B1-AEDD-316849652568}</ProjectGuid>
+ <OutputType>Library</OutputType>
+ <AppDesignerFolder>Properties</AppDesignerFolder>
+ <RootNamespace>Lucene.Net.Grouping</RootNamespace>
+ <AssemblyName>Lucene.Net.Grouping</AssemblyName>
+ <TargetFrameworkVersion>v4.5.1</TargetFrameworkVersion>
+ <FileAlignment>512</FileAlignment>
+ </PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
+ <DebugSymbols>true</DebugSymbols>
+ <DebugType>full</DebugType>
+ <Optimize>false</Optimize>
+ <OutputPath>bin\Debug\</OutputPath>
+ <DefineConstants>DEBUG;TRACE</DefineConstants>
+ <ErrorReport>prompt</ErrorReport>
+ <WarningLevel>4</WarningLevel>
+ </PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
+ <DebugType>pdbonly</DebugType>
+ <Optimize>true</Optimize>
+ <OutputPath>bin\Release\</OutputPath>
+ <DefineConstants>TRACE</DefineConstants>
+ <ErrorReport>prompt</ErrorReport>
+ <WarningLevel>4</WarningLevel>
+ </PropertyGroup>
+ <ItemGroup>
+ <Reference Include="System" />
+ <Reference Include="System.Core" />
+ <Reference Include="System.Xml.Linq" />
+ <Reference Include="System.Data.DataSetExtensions" />
+ <Reference Include="Microsoft.CSharp" />
+ <Reference Include="System.Data" />
+ <Reference Include="System.Net.Http" />
+ <Reference Include="System.Xml" />
+ </ItemGroup>
+ <ItemGroup>
+ <Compile Include="GroupDocs.cs" />
+ <Compile Include="Properties\AssemblyInfo.cs" />
+ <Compile Include="TopGroups.cs" />
+ </ItemGroup>
+ <ItemGroup>
+ <ProjectReference Include="..\src\Lucene.Net.Core\Lucene.Net.csproj">
+ <Project>{5D4AD9BE-1FFB-41AB-9943-25737971BF57}</Project>
+ <Name>Lucene.Net</Name>
+ </ProjectReference>
+ </ItemGroup>
+ <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
+ <!-- To modify your build process, add your task inside one of the targets below and uncomment it.
+ Other similar extension points exist, see Microsoft.Common.targets.
+ <Target Name="BeforeBuild">
+ </Target>
+ <Target Name="AfterBuild">
+ </Target>
+ -->
+</Project>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/47f20b9a/Lucene.Net.Grouping/Properties/AssemblyInfo.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Grouping/Properties/AssemblyInfo.cs b/Lucene.Net.Grouping/Properties/AssemblyInfo.cs
new file mode 100644
index 0000000..9e6c1ce
--- /dev/null
+++ b/Lucene.Net.Grouping/Properties/AssemblyInfo.cs
@@ -0,0 +1,36 @@
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+// General Information about an assembly is controlled through the following
+// set of attributes. Change these attribute values to modify the information
+// associated with an assembly.
+[assembly: AssemblyTitle("Lucene.Net.Grouping")]
+[assembly: AssemblyDescription("")]
+[assembly: AssemblyConfiguration("")]
+[assembly: AssemblyCompany("")]
+[assembly: AssemblyProduct("Lucene.Net.Grouping")]
+[assembly: AssemblyCopyright("Copyright © 2015")]
+[assembly: AssemblyTrademark("")]
+[assembly: AssemblyCulture("")]
+
+// Setting ComVisible to false makes the types in this assembly not visible
+// to COM components. If you need to access a type in this assembly from
+// COM, set the ComVisible attribute to true on that type.
+[assembly: ComVisible(false)]
+
+// The following GUID is for the ID of the typelib if this project is exposed to COM
+[assembly: Guid("02bab603-067d-48b1-aedd-316849652568")]
+
+// Version information for an assembly consists of the following four values:
+//
+// Major Version
+// Minor Version
+// Build Number
+// Revision
+//
+// You can specify all the values or you can default the Build and Revision Numbers
+// by using the '*' as shown below:
+// [assembly: AssemblyVersion("1.0.*")]
+[assembly: AssemblyVersion("1.0.0.0")]
+[assembly: AssemblyFileVersion("1.0.0.0")]
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/47f20b9a/Lucene.Net.Grouping/TopGroups.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Grouping/TopGroups.cs b/Lucene.Net.Grouping/TopGroups.cs
new file mode 100644
index 0000000..017c975
--- /dev/null
+++ b/Lucene.Net.Grouping/TopGroups.cs
@@ -0,0 +1,249 @@
+using System;
+using Lucene.Net.Search;
+
+namespace Lucene.Net.Grouping
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Represents result returned by a grouping search.
+ ///
+ /// @lucene.experimental
+ /// </summary>
+ public class TopGroups<TGroupValueType>
+ {
+ /// <summary>
+ /// Number of documents matching the search </summary>
+ public readonly int TotalHitCount;
+
+ /// <summary>
+ /// Number of documents grouped into the topN groups </summary>
+ public readonly int TotalGroupedHitCount;
+
+ /// <summary>
+ /// The total number of unique groups. If <code>null</code> this value is not computed. </summary>
+ public readonly int? TotalGroupCount;
+
+ /// <summary>
+ /// Group results in groupSort order </summary>
+ public readonly GroupDocs<TGroupValueType>[] Groups;
+
+ /// <summary>
+ /// How groups are sorted against each other </summary>
+ public readonly SortField[] GroupSort;
+
+ /// <summary>
+ /// How docs are sorted within each group </summary>
+ public readonly SortField[] WithinGroupSort;
+
+ /// <summary>
+ /// Highest score across all hits, or
+ /// <code>Float.NaN</code> if scores were not computed.
+ /// </summary>
+ public readonly float MaxScore;
+
+ public TopGroups(SortField[] groupSort, SortField[] withinGroupSort, int totalHitCount, int totalGroupedHitCount, GroupDocs<TGroupValueType>[] groups, float maxScore)
+ {
+ GroupSort = groupSort;
+ WithinGroupSort = withinGroupSort;
+ TotalHitCount = totalHitCount;
+ TotalGroupedHitCount = totalGroupedHitCount;
+ Groups = groups;
+ TotalGroupCount = null;
+ MaxScore = maxScore;
+ }
+
+ public TopGroups(TopGroups<TGroupValueType> oldTopGroups, int? totalGroupCount)
+ {
+ GroupSort = oldTopGroups.GroupSort;
+ WithinGroupSort = oldTopGroups.WithinGroupSort;
+ TotalHitCount = oldTopGroups.TotalHitCount;
+ TotalGroupedHitCount = oldTopGroups.TotalGroupedHitCount;
+ Groups = oldTopGroups.Groups;
+ MaxScore = oldTopGroups.MaxScore;
+ TotalGroupCount = totalGroupCount;
+ }
+
+ /// <summary>
+ /// How the GroupDocs score (if any) should be merged. </summary>
+ public enum ScoreMergeMode
+ {
+ /// <summary>
+ /// Set score to Float.NaN
+ /// </summary>
+ None,
+
+ /// <summary>
+ /// Sum score across all shards for this group.
+ /// </summary>
+ Total,
+
+ /// <summary>
+ /// Avg score across all shards for this group.
+ /// </summary>
+ Avg,
+ }
+
+ /// <summary>
+ /// Merges an array of TopGroups, for example obtained from the second-pass
+ /// collector across multiple shards. Each TopGroups must have been sorted by the
+ /// same groupSort and docSort, and the top groups passed to all second-pass
+ /// collectors must be the same.
+ ///
+ /// <b>NOTE</b>: We can't always compute an exact totalGroupCount.
+ /// Documents belonging to a group may occur on more than
+ /// one shard and thus the merged totalGroupCount can be
+ /// higher than the actual totalGroupCount. In this case the
+ /// totalGroupCount represents a upper bound. If the documents
+ /// of one group do only reside in one shard then the
+ /// totalGroupCount is exact.
+ ///
+ /// <b>NOTE</b>: the topDocs in each GroupDocs is actually
+ /// an instance of TopDocsAndShards
+ /// </summary>
+ public static TopGroups<T> Merge<T>(TopGroups<T>[] shardGroups, Sort groupSort, Sort docSort, int docOffset, int docTopN, ScoreMergeMode scoreMergeMode)
+ {
+ //System.out.println("TopGroups.merge");
+
+ if (shardGroups.Length == 0)
+ {
+ return null;
+ }
+
+ int totalHitCount = 0;
+ int totalGroupedHitCount = 0;
+ // Optionally merge the totalGroupCount.
+ int? totalGroupCount = null;
+
+ int numGroups = shardGroups[0].Groups.Length;
+ foreach (var shard in shardGroups)
+ {
+ if (numGroups != shard.Groups.Length)
+ {
+ throw new ArgumentException("number of groups differs across shards; you must pass same top groups to all shards' second-pass collector");
+ }
+ totalHitCount += shard.TotalHitCount;
+ totalGroupedHitCount += shard.TotalGroupedHitCount;
+ if (shard.TotalGroupCount != null)
+ {
+ if (totalGroupCount == null)
+ {
+ totalGroupCount = 0;
+ }
+
+ totalGroupCount += shard.TotalGroupCount;
+ }
+ }
+
+ var mergedGroupDocs = new GroupDocs<T>[numGroups];
+
+ TopDocs[] shardTopDocs = new TopDocs[shardGroups.Length];
+ float totalMaxScore = float.MinValue;
+
+ for (int groupIDX = 0; groupIDX < numGroups; groupIDX++)
+ {
+ T groupValue = shardGroups[0].Groups[groupIDX].GroupValue;
+ //System.out.println(" merge groupValue=" + groupValue + " sortValues=" + Arrays.toString(shardGroups[0].groups[groupIDX].groupSortValues));
+ float maxScore = float.MinValue;
+ int totalHits = 0;
+ double scoreSum = 0.0;
+ for (int shardIdx = 0; shardIdx < shardGroups.Length; shardIdx++)
+ {
+ //System.out.println(" shard=" + shardIDX);
+ TopGroups<T> shard = shardGroups[shardIdx];
+ var shardGroupDocs = shard.Groups[groupIDX];
+ if (groupValue == null)
+ {
+ if (shardGroupDocs.GroupValue != null)
+ {
+ throw new ArgumentException("group values differ across shards; you must pass same top groups to all shards' second-pass collector");
+ }
+ }
+ else if (!groupValue.Equals(shardGroupDocs.GroupValue))
+ {
+ throw new ArgumentException("group values differ across shards; you must pass same top groups to all shards' second-pass collector");
+ }
+
+ /*
+ for(ScoreDoc sd : shardGroupDocs.scoreDocs) {
+ System.out.println(" doc=" + sd.doc);
+ }
+ */
+
+ shardTopDocs[shardIdx] = new TopDocs(shardGroupDocs.TotalHits, shardGroupDocs.ScoreDocs, shardGroupDocs.MaxScore);
+ maxScore = Math.Max(maxScore, shardGroupDocs.MaxScore);
+ totalHits += shardGroupDocs.TotalHits;
+ scoreSum += shardGroupDocs.Score;
+ }
+
+ TopDocs mergedTopDocs = TopDocs.Merge(docSort, docOffset + docTopN, shardTopDocs);
+
+ // Slice;
+ ScoreDoc[] mergedScoreDocs;
+ if (docOffset == 0)
+ {
+ mergedScoreDocs = mergedTopDocs.ScoreDocs;
+ }
+ else if (docOffset >= mergedTopDocs.ScoreDocs.Length)
+ {
+ mergedScoreDocs = new ScoreDoc[0];
+ }
+ else
+ {
+ mergedScoreDocs = new ScoreDoc[mergedTopDocs.ScoreDocs.Length - docOffset];
+ Array.Copy(mergedTopDocs.ScoreDocs, docOffset, mergedScoreDocs, 0, mergedTopDocs.ScoreDocs.Length - docOffset);
+ }
+
+ float groupScore;
+ switch (scoreMergeMode)
+ {
+ case ScoreMergeMode.None:
+ groupScore = float.NaN;
+ break;
+ case ScoreMergeMode.Avg:
+ if (totalHits > 0)
+ {
+ groupScore = (float)(scoreSum / totalHits);
+ }
+ else
+ {
+ groupScore = float.NaN;
+ }
+ break;
+ case ScoreMergeMode.Total:
+ groupScore = (float)scoreSum;
+ break;
+ default:
+ throw new ArgumentException("can't handle ScoreMergeMode " + scoreMergeMode);
+ }
+
+ //System.out.println("SHARDS=" + Arrays.toString(mergedTopDocs.shardIndex));
+ mergedGroupDocs[groupIDX] = new GroupDocs<T>(groupScore, maxScore, totalHits, mergedScoreDocs, groupValue, shardGroups[0].Groups[groupIDX].GroupSortValues);
+ totalMaxScore = Math.Max(totalMaxScore, maxScore);
+ }
+
+ if (totalGroupCount != null)
+ {
+ var result = new TopGroups<T>(groupSort.GetSort(), docSort == null ? null : docSort.GetSort(), totalHitCount, totalGroupedHitCount, mergedGroupDocs, totalMaxScore);
+ return new TopGroups<T>(result, totalGroupCount);
+ }
+
+ return new TopGroups<T>(groupSort.GetSort(), docSort == null ? null : docSort.GetSort(), totalHitCount, totalGroupedHitCount, mergedGroupDocs, totalMaxScore);
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/47f20b9a/Lucene.Net.Join/Lucene.Net.Join.csproj
----------------------------------------------------------------------
diff --git a/Lucene.Net.Join/Lucene.Net.Join.csproj b/Lucene.Net.Join/Lucene.Net.Join.csproj
index b38303e..30e64c6 100644
--- a/Lucene.Net.Join/Lucene.Net.Join.csproj
+++ b/Lucene.Net.Join/Lucene.Net.Join.csproj
@@ -48,8 +48,17 @@
<Compile Include="TermsCollector.cs" />
<Compile Include="TermsIncludingScoreQuery.cs" />
<Compile Include="TermsQuery.cs" />
+ <Compile Include="TermsWithScoreCollector.cs" />
+ <Compile Include="ToChildBlockJoinQuery.cs" />
+ <Compile Include="ToParentBlockJoinCollector.cs" />
+ <Compile Include="ToParentBlockJoinFieldComparator.cs" />
+ <Compile Include="ToParentBlockJoinQuery.cs" />
</ItemGroup>
<ItemGroup>
+ <ProjectReference Include="..\Lucene.Net.Grouping\Lucene.Net.Grouping.csproj">
+ <Project>{02BAB603-067D-48B1-AEDD-316849652568}</Project>
+ <Name>Lucene.Net.Grouping</Name>
+ </ProjectReference>
<ProjectReference Include="..\src\Lucene.Net.Core\Lucene.Net.csproj">
<Project>{5D4AD9BE-1FFB-41AB-9943-25737971BF57}</Project>
<Name>Lucene.Net</Name>
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/47f20b9a/Lucene.Net.Join/ToChildBlockJoinQuery.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Join/ToChildBlockJoinQuery.cs b/Lucene.Net.Join/ToChildBlockJoinQuery.cs
new file mode 100644
index 0000000..035301e
--- /dev/null
+++ b/Lucene.Net.Join/ToChildBlockJoinQuery.cs
@@ -0,0 +1,396 @@
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Join
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Just like <see cref="ToParentBlockJoinQuery"/>, except this
+ /// query joins in reverse: you provide a Query matching
+ /// parent documents and it joins down to child
+ /// documents.
+ ///
+ /// @lucene.experimental
+ /// </summary>
+ public class ToChildBlockJoinQuery : Query
+ {
+ /// <summary>
+ /// Message thrown from <see cref="ToChildBlockJoinScorer.ValidateParentDoc"/>
+ /// on mis-use, when the parent query incorrectly returns child docs.
+ /// </summary>
+ internal const string InvalidQueryMessage = "Parent query yields document which is not matched by parents filter, docID=";
+
+ private readonly Filter _parentsFilter;
+ private readonly Query _parentQuery;
+
+ // If we are rewritten, this is the original parentQuery we
+ // were passed; we use this for .equals() and
+ // .hashCode(). This makes rewritten query equal the
+ // original, so that user does not have to .rewrite() their
+ // query before searching:
+ private readonly Query _origParentQuery;
+ private readonly bool _doScores;
+
+ /// <summary>
+ /// Create a ToChildBlockJoinQuery.
+ /// </summary>
+ /// <param name="parentQuery">Query that matches parent documents</param>
+ /// <param name="parentsFilter">Filter (must produce FixedBitSet per-segment, like <see cref="FixedBitSetCachingWrapperFilter"/>)
+ /// identifying the parent documents.</param>
+ /// <param name="doScores">True if parent scores should be calculated.</param>
+ public ToChildBlockJoinQuery(Query parentQuery, Filter parentsFilter, bool doScores)
+ {
+ _origParentQuery = parentQuery;
+ _parentQuery = parentQuery;
+ _parentsFilter = parentsFilter;
+ _doScores = doScores;
+ }
+
+ private ToChildBlockJoinQuery(Query origParentQuery, Query parentQuery, Filter parentsFilter, bool doScores) : base()
+ {
+ _origParentQuery = origParentQuery;
+ _parentQuery = parentQuery;
+ _parentsFilter = parentsFilter;
+ _doScores = doScores;
+ }
+
+ public override Weight CreateWeight(IndexSearcher searcher)
+ {
+ return new ToChildBlockJoinWeight(this, _parentQuery.CreateWeight(searcher), _parentsFilter, _doScores);
+ }
+
+ private class ToChildBlockJoinWeight : Weight
+ {
+ private readonly Query _joinQuery;
+ private readonly Weight _parentWeight;
+ private readonly Filter _parentsFilter;
+ private readonly bool _doScores;
+
+ public ToChildBlockJoinWeight(Query joinQuery, Weight parentWeight, Filter parentsFilter, bool doScores) : base()
+ {
+ _joinQuery = joinQuery;
+ _parentWeight = parentWeight;
+ _parentsFilter = parentsFilter;
+ _doScores = doScores;
+ }
+
+ public override Query Query
+ {
+ get { return _joinQuery; }
+ }
+
+ public override float ValueForNormalization
+ {
+ get { return _parentWeight.ValueForNormalization*_joinQuery.Boost*_joinQuery.Boost; }
+ }
+
+ public override void Normalize(float norm, float topLevelBoost)
+ {
+ _parentWeight.Normalize(norm, topLevelBoost * _joinQuery.Boost);
+ }
+
+ // NOTE: acceptDocs applies (and is checked) only in the child document space
+ public override Scorer Scorer(AtomicReaderContext readerContext, Bits acceptDocs)
+ {
+ Scorer parentScorer = _parentWeight.Scorer(readerContext, null);
+
+ if (parentScorer == null)
+ {
+ // No matches
+ return null;
+ }
+
+ // NOTE: we cannot pass acceptDocs here because this
+ // will (most likely, justifiably) cause the filter to
+ // not return a FixedBitSet but rather a
+ // BitsFilteredDocIdSet. Instead, we filter by
+ // acceptDocs when we score:
+ DocIdSet parents = _parentsFilter.GetDocIdSet(readerContext, null);
+
+ if (parents == null)
+ {
+ // No matches
+ return null;
+ }
+ if (!(parents is FixedBitSet))
+ {
+ throw new InvalidOperationException("parentFilter must return FixedBitSet; got " + parents);
+ }
+
+ return new ToChildBlockJoinScorer(this, parentScorer, (FixedBitSet)parents, _doScores, acceptDocs);
+ }
+
+ public override Explanation Explain(AtomicReaderContext reader, int doc)
+ {
+ // TODO
+ throw new NotSupportedException(GetType().Name + " cannot explain match on parent document");
+ }
+
+ public override bool ScoresDocsOutOfOrder()
+ {
+ return false;
+ }
+ }
+
+ private sealed class ToChildBlockJoinScorer : Scorer
+ {
+ private readonly Scorer _parentScorer;
+ private readonly FixedBitSet _parentBits;
+ private readonly bool _doScores;
+ private readonly Bits _acceptDocs;
+
+ private float _parentScore;
+ private int _parentFreq = 1;
+
+ private int _childDoc = -1;
+ private int _parentDoc;
+
+ public ToChildBlockJoinScorer(Weight weight, Scorer parentScorer, FixedBitSet parentBits, bool doScores, Bits acceptDocs) : base(weight)
+ {
+ _doScores = doScores;
+ _parentBits = parentBits;
+ _parentScorer = parentScorer;
+ _acceptDocs = acceptDocs;
+ }
+
+ public override ICollection<ChildScorer> Children
+ {
+ get { return Collections.Singleton(new ChildScorer(_parentScorer, "BLOCK_JOIN")); }
+ }
+
+ public override int NextDoc()
+ {
+ //System.out.println("Q.nextDoc() parentDoc=" + parentDoc + " childDoc=" + childDoc);
+
+ // Loop until we hit a childDoc that's accepted
+ while (true)
+ {
+ if (_childDoc + 1 == _parentDoc)
+ {
+ // OK, we are done iterating through all children
+ // matching this one parent doc, so we now nextDoc()
+ // the parent. Use a while loop because we may have
+ // to skip over some number of parents w/ no
+ // children:
+ while (true)
+ {
+ _parentDoc = _parentScorer.NextDoc();
+ ValidateParentDoc();
+
+ if (_parentDoc == 0)
+ {
+ // Degenerate but allowed: first parent doc has no children
+ // TODO: would be nice to pull initial parent
+ // into ctor so we can skip this if... but it's
+ // tricky because scorer must return -1 for
+ // .doc() on init...
+ _parentDoc = _parentScorer.NextDoc();
+ ValidateParentDoc();
+ }
+
+ if (_parentDoc == NO_MORE_DOCS)
+ {
+ _childDoc = NO_MORE_DOCS;
+ //System.out.println(" END");
+ return _childDoc;
+ }
+
+ // Go to first child for this next parentDoc:
+ _childDoc = 1 + _parentBits.PrevSetBit(_parentDoc - 1);
+
+ if (_childDoc == _parentDoc)
+ {
+ // This parent has no children; continue
+ // parent loop so we move to next parent
+ continue;
+ }
+
+ if (_acceptDocs != null && !_acceptDocs.Get(_childDoc))
+ {
+ goto nextChildDocContinue;
+ }
+
+ if (_childDoc < _parentDoc)
+ {
+ if (_doScores)
+ {
+ _parentScore = _parentScorer.Score();
+ _parentFreq = _parentScorer.Freq();
+ }
+ //System.out.println(" " + childDoc);
+ return _childDoc;
+ }
+ else
+ {
+ // Degenerate but allowed: parent has no children
+ }
+ }
+ }
+
+ Debug.Assert(_childDoc < _parentDoc, "childDoc=" + _childDoc + " parentDoc=" + _parentDoc);
+ _childDoc++;
+ if (_acceptDocs != null && !_acceptDocs.Get(_childDoc))
+ {
+ continue;
+ }
+ //System.out.println(" " + childDoc);
+ return _childDoc;
+ nextChildDocContinue:;
+ }
+ }
+
+ /// <summary>
+ /// Detect mis-use, where provided parent query in fact sometimes returns child documents.
+ /// </summary>
+ private void ValidateParentDoc()
+ {
+ if (_parentDoc != NO_MORE_DOCS && !_parentBits.Get(_parentDoc))
+ {
+ throw new InvalidOperationException(InvalidQueryMessage + _parentDoc);
+ }
+ }
+
+ public override int DocID()
+ {
+ return _childDoc;
+ }
+
+ public override float Score()
+ {
+ return _parentScore;
+ }
+
+ public override int Freq()
+ {
+ return _parentFreq;
+ }
+
+ public override int Advance(int childTarget)
+ {
+ Debug.Assert(childTarget >= _parentBits.Length() || !_parentBits.Get(childTarget));
+
+ //System.out.println("Q.advance childTarget=" + childTarget);
+ if (childTarget == NO_MORE_DOCS)
+ {
+ //System.out.println(" END");
+ return _childDoc = _parentDoc = NO_MORE_DOCS;
+ }
+
+ Debug.Assert(_childDoc == -1 || childTarget != _parentDoc, "childTarget=" + childTarget);
+ if (_childDoc == -1 || childTarget > _parentDoc)
+ {
+ // Advance to new parent:
+ _parentDoc = _parentScorer.Advance(childTarget);
+ ValidateParentDoc();
+ //System.out.println(" advance to parentDoc=" + parentDoc);
+ Debug.Assert(_parentDoc > childTarget);
+ if (_parentDoc == NO_MORE_DOCS)
+ {
+ //System.out.println(" END");
+ return _childDoc = NO_MORE_DOCS;
+ }
+ if (_doScores)
+ {
+ _parentScore = _parentScorer.Score();
+ _parentFreq = _parentScorer.Freq();
+ }
+ int firstChild = _parentBits.PrevSetBit(_parentDoc - 1);
+ //System.out.println(" firstChild=" + firstChild);
+ childTarget = Math.Max(childTarget, firstChild);
+ }
+
+ Debug.Assert(childTarget < _parentDoc);
+
+ // Advance within children of current parent:
+ _childDoc = childTarget;
+ //System.out.println(" " + childDoc);
+ if (_acceptDocs != null && !_acceptDocs.Get(_childDoc))
+ {
+ NextDoc();
+ }
+ return _childDoc;
+ }
+
+ public override long Cost()
+ {
+ return _parentScorer.Cost();
+ }
+ }
+
+ public override void ExtractTerms(ISet<Term> terms)
+ {
+ _parentQuery.ExtractTerms(terms);
+ }
+
+ public override Query Rewrite(IndexReader reader)
+ {
+ Query parentRewrite = _parentQuery.Rewrite(reader);
+ if (parentRewrite != _parentQuery)
+ {
+ Query rewritten = new ToChildBlockJoinQuery(_parentQuery, parentRewrite, _parentsFilter, _doScores);
+ rewritten.Boost = Boost;
+ return rewritten;
+ }
+
+ return this;
+ }
+
+ public override string ToString(string field)
+ {
+ return "ToChildBlockJoinQuery (" + _parentQuery + ")";
+ }
+
+ protected bool Equals(ToChildBlockJoinQuery other)
+ {
+ return base.Equals(other) &&
+ Equals(_origParentQuery, other._origParentQuery) &&
+ _doScores == other._doScores &&
+ Equals(_parentsFilter, other._parentsFilter);
+ }
+
+ public override bool Equals(object obj)
+ {
+ if (ReferenceEquals(null, obj)) return false;
+ if (ReferenceEquals(this, obj)) return true;
+ if (obj.GetType() != GetType()) return false;
+ return Equals((ToChildBlockJoinQuery) obj);
+ }
+
+ public override int GetHashCode()
+ {
+ unchecked
+ {
+ int hashCode = base.GetHashCode();
+ hashCode = (hashCode*397) ^ (_origParentQuery != null ? _origParentQuery.GetHashCode() : 0);
+ hashCode = (hashCode*397) ^ _doScores.GetHashCode();
+ hashCode = (hashCode*397) ^ (_parentsFilter != null ? _parentsFilter.GetHashCode() : 0);
+ return hashCode;
+ }
+ }
+
+ public override object Clone()
+ {
+ return new ToChildBlockJoinQuery((ToChildBlockJoinQuery) _origParentQuery.Clone(), _parentsFilter, _doScores);
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/47f20b9a/Lucene.Net.Join/ToParentBlockJoinCollector.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Join/ToParentBlockJoinCollector.cs b/Lucene.Net.Join/ToParentBlockJoinCollector.cs
new file mode 100644
index 0000000..22fa53e
--- /dev/null
+++ b/Lucene.Net.Join/ToParentBlockJoinCollector.cs
@@ -0,0 +1,560 @@
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.IO;
+using Lucene.Net.Grouping;
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Join
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Collects parent document hits for a Query containing one more more
+ /// BlockJoinQuery clauses, sorted by the
+ /// specified parent Sort. Note that this cannot perform
+ /// arbitrary joins; rather, it requires that all joined
+ /// documents are indexed as a doc block (using {@link
+ /// IndexWriter#addDocuments} or {@link
+ /// IndexWriter#updateDocuments}). Ie, the join is computed
+ /// at index time.
+ ///
+ /// <p>The parent Sort must only use
+ /// fields from the parent documents; sorting by field in
+ /// the child documents is not supported.</p>
+ ///
+ /// <p>You should only use this
+ /// collector if one or more of the clauses in the query is
+ /// a <seealso cref="ToParentBlockJoinQuery"/>. This collector will find those query
+ /// clauses and record the matching child documents for the
+ /// top scoring parent documents.</p>
+ ///
+ /// <p>Multiple joins (star join) and nested joins and a mix
+ /// of the two are allowed, as long as in all cases the
+ /// documents corresponding to a single row of each joined
+ /// parent table were indexed as a doc block.</p>
+ ///
+ /// <p>For the simple star join you can retrieve the
+ /// <seealso cref="TopGroups"/> instance containing each <seealso cref="ToParentBlockJoinQuery"/>'s
+ /// matching child documents for the top parent groups,
+ /// using <seealso cref="#getTopGroups"/>. Ie,
+ /// a single query, which will contain two or more
+ /// <seealso cref="ToParentBlockJoinQuery"/>'s as clauses representing the star join,
+ /// can then retrieve two or more <seealso cref="TopGroups"/> instances.</p>
+ ///
+ /// <p>For nested joins, the query will run correctly (ie,
+ /// match the right parent and child documents), however,
+ /// because TopGroups is currently unable to support nesting
+ /// (each group is not able to hold another TopGroups), you
+ /// are only able to retrieve the TopGroups of the first
+ /// join. The TopGroups of the nested joins will not be
+ /// correct.
+ ///
+ /// See <seealso cref="org.apache.lucene.search.join"/> for a code
+ /// sample.
+ ///
+ /// @lucene.experimental
+ /// </summary>
+ public class ToParentBlockJoinCollector : Collector
+ {
+ private readonly Sort sort;
+
+ // Maps each BlockJoinQuery instance to its "slot" in
+ // joinScorers and in OneGroup's cached doc/scores/count:
+ private readonly IDictionary<Query, int?> joinQueryID = new Dictionary<Query, int?>();
+ private readonly int numParentHits;
+ private readonly FieldValueHitQueue<OneGroup> queue;
+ private readonly FieldComparator[] comparators;
+ private readonly int[] reverseMul;
+ private readonly int compEnd;
+ private readonly bool trackMaxScore;
+ private readonly bool trackScores;
+
+ private int docBase;
+ private ToParentBlockJoinQuery.BlockJoinScorer[] joinScorers = new ToParentBlockJoinQuery.BlockJoinScorer[0];
+ private AtomicReaderContext currentReaderContext;
+ private Scorer scorer;
+ private bool queueFull;
+
+ private OneGroup bottom;
+ private int totalHitCount;
+ private float maxScore = float.NaN;
+
+ /// <summary>
+ /// Creates a ToParentBlockJoinCollector. The provided sort must
+ /// not be null. If you pass true trackScores, all
+ /// ToParentBlockQuery instances must not use
+ /// ScoreMode.None.
+ /// </summary>
+ public ToParentBlockJoinCollector(Sort sort, int numParentHits, bool trackScores, bool trackMaxScore)
+ {
+ // TODO: allow null sort to be specialized to relevance
+ // only collector
+ this.sort = sort;
+ this.trackMaxScore = trackMaxScore;
+ if (trackMaxScore)
+ {
+ maxScore = float.MinValue;
+ }
+ //System.out.println("numParentHits=" + numParentHits);
+ this.trackScores = trackScores;
+ this.numParentHits = numParentHits;
+ queue = FieldValueHitQueue.Create<OneGroup>(sort.GetSort(), numParentHits);
+ comparators = queue.Comparators;
+ reverseMul = queue.ReverseMul;
+ compEnd = comparators.Length - 1;
+ }
+
+ private sealed class OneGroup : FieldValueHitQueue.Entry
+ {
+ public OneGroup(int comparatorSlot, int parentDoc, float parentScore, int numJoins, bool doScores)
+ : base(comparatorSlot, parentDoc, parentScore)
+ {
+ //System.out.println("make OneGroup parentDoc=" + parentDoc);
+ docs = new int[numJoins][];
+ for (int joinId = 0; joinId < numJoins; joinId++)
+ {
+ docs[joinId] = new int[5];
+ }
+ if (doScores)
+ {
+ scores = new float[numJoins][];
+ for (int joinId = 0; joinId < numJoins; joinId++)
+ {
+ scores[joinId] = new float[5];
+ }
+ }
+ counts = new int[numJoins];
+ }
+ internal AtomicReaderContext readerContext;
+ internal int[][] docs;
+ internal float[][] scores;
+ internal int[] counts;
+ }
+
+ public override void Collect(int parentDoc)
+ {
+ //System.out.println("\nC parentDoc=" + parentDoc);
+ totalHitCount++;
+
+ float score = float.NaN;
+
+ if (trackMaxScore)
+ {
+ score = scorer.Score();
+ maxScore = Math.Max(maxScore, score);
+ }
+
+ // TODO: we could sweep all joinScorers here and
+ // aggregate total child hit count, so we can fill this
+ // in getTopGroups (we wire it to 0 now)
+
+ if (queueFull)
+ {
+ //System.out.println(" queueFull");
+ // Fastmatch: return if this hit is not competitive
+ for (int i = 0; ; i++)
+ {
+ int c = reverseMul[i] * comparators[i].CompareBottom(parentDoc);
+ if (c < 0)
+ {
+ // Definitely not competitive.
+ //System.out.println(" skip");
+ return;
+ }
+ if (c > 0)
+ {
+ // Definitely competitive.
+ break;
+ }
+ if (i == compEnd)
+ {
+ // Here c=0. If we're at the last comparator, this doc is not
+ // competitive, since docs are visited in doc Id order, which means
+ // this doc cannot compete with any other document in the queue.
+ //System.out.println(" skip");
+ return;
+ }
+ }
+
+ //System.out.println(" competes! doc=" + (docBase + parentDoc));
+
+ // This hit is competitive - replace bottom element in queue & adjustTop
+ for (int i = 0; i < comparators.Length; i++)
+ {
+ comparators[i].Copy(bottom.Slot, parentDoc);
+ }
+ if (!trackMaxScore && trackScores)
+ {
+ score = scorer.Score();
+ }
+ bottom.Doc = docBase + parentDoc;
+ bottom.readerContext = currentReaderContext;
+ bottom.Score = score;
+ CopyGroups(bottom);
+ bottom = queue.UpdateTop();
+
+ for (int i = 0; i < comparators.Length; i++)
+ {
+ comparators[i].Bottom = bottom.Slot;
+ }
+ }
+ else
+ {
+ // Startup transient: queue is not yet full:
+ int comparatorSlot = totalHitCount - 1;
+
+ // Copy hit into queue
+ for (int i = 0; i < comparators.Length; i++)
+ {
+ comparators[i].Copy(comparatorSlot, parentDoc);
+ }
+ //System.out.println(" startup: new OG doc=" + (docBase+parentDoc));
+ if (!trackMaxScore && trackScores)
+ {
+ score = scorer.Score();
+ }
+ OneGroup og = new OneGroup(comparatorSlot, docBase + parentDoc, score, joinScorers.Length, trackScores);
+ og.readerContext = currentReaderContext;
+ CopyGroups(og);
+ bottom = queue.Add(og);
+ queueFull = totalHitCount == numParentHits;
+ if (queueFull)
+ {
+ // End of startup transient: queue just filled up:
+ for (int i = 0; i < comparators.Length; i++)
+ {
+ comparators[i].Bottom = bottom.Slot;
+ }
+ }
+ }
+ }
+
+ // Pulls out child doc and scores for all join queries:
+ private void CopyGroups(OneGroup og)
+ {
+ // While rare, it's possible top arrays could be too
+ // short if join query had null scorer on first
+ // segment(s) but then became non-null on later segments
+ int numSubScorers = joinScorers.Length;
+ if (og.docs.Length < numSubScorers)
+ {
+ // While rare, this could happen if join query had
+ // null scorer on first segment(s) but then became
+ // non-null on later segments
+ og.docs = ArrayUtil.Grow(og.docs);
+ }
+ if (og.counts.Length < numSubScorers)
+ {
+ og.counts = ArrayUtil.Grow(og.counts);
+ }
+ if (trackScores && og.scores.Length < numSubScorers)
+ {
+ og.scores = ArrayUtil.Grow(og.scores);
+ }
+
+ //System.out.println("\ncopyGroups parentDoc=" + og.doc);
+ for (int scorerIDX = 0; scorerIDX < numSubScorers; scorerIDX++)
+ {
+ ToParentBlockJoinQuery.BlockJoinScorer joinScorer = joinScorers[scorerIDX];
+ //System.out.println(" scorer=" + joinScorer);
+ if (joinScorer != null && docBase + joinScorer.ParentDoc == og.Doc)
+ {
+ og.counts[scorerIDX] = joinScorer.ChildCount;
+ //System.out.println(" count=" + og.counts[scorerIDX]);
+ og.docs[scorerIDX] = joinScorer.SwapChildDocs(og.docs[scorerIDX]);
+ Debug.Assert(og.docs[scorerIDX].Length >= og.counts[scorerIDX], "length=" + og.docs[scorerIDX].Length + " vs count=" + og.counts[scorerIDX]);
+ //System.out.println(" len=" + og.docs[scorerIDX].length);
+ /*
+ for(int idx=0;idx<og.counts[scorerIDX];idx++) {
+ System.out.println(" docs[" + idx + "]=" + og.docs[scorerIDX][idx]);
+ }
+ */
+ if (trackScores)
+ {
+ //System.out.println(" copy scores");
+ og.scores[scorerIDX] = joinScorer.SwapChildScores(og.scores[scorerIDX]);
+ Debug.Assert(og.scores[scorerIDX].Length >= og.counts[scorerIDX], "length=" + og.scores[scorerIDX].Length + " vs count=" + og.counts[scorerIDX]);
+ }
+ }
+ else
+ {
+ og.counts[scorerIDX] = 0;
+ }
+ }
+ }
+
+ public override AtomicReaderContext NextReader
+ {
+ set
+ {
+ currentReaderContext = value;
+ docBase = value.DocBase;
+ for (int compIDX = 0; compIDX < comparators.Length; compIDX++)
+ {
+ queue.SetComparator(compIDX, comparators[compIDX].SetNextReader(value));
+ }
+ }
+ }
+
+ public override bool AcceptsDocsOutOfOrder()
+ {
+ return false;
+ }
+
+ private void enroll(ToParentBlockJoinQuery query, ToParentBlockJoinQuery.BlockJoinScorer scorer)
+ {
+ scorer.TrackPendingChildHits();
+ int? slot = joinQueryID[query];
+ if (slot == null)
+ {
+ joinQueryID[query] = joinScorers.Length;
+ //System.out.println("found JQ: " + query + " slot=" + joinScorers.length);
+ ToParentBlockJoinQuery.BlockJoinScorer[] newArray = new ToParentBlockJoinQuery.BlockJoinScorer[1 + joinScorers.Length];
+ Array.Copy(joinScorers, 0, newArray, 0, joinScorers.Length);
+ joinScorers = newArray;
+ joinScorers[joinScorers.Length - 1] = scorer;
+ }
+ else
+ {
+ joinScorers[(int) slot] = scorer;
+ }
+ }
+
+ public override Scorer Scorer
+ {
+ set
+ {
+ //System.out.println("C.setScorer scorer=" + value);
+ // Since we invoke .score(), and the comparators likely
+ // do as well, cache it so it's only "really" computed
+ // once:
+ scorer = new ScoreCachingWrappingScorer(value);
+ for (int compIdx = 0; compIdx < comparators.Length; compIdx++)
+ {
+ comparators[compIdx].Scorer = scorer;
+ }
+ Arrays.Fill(joinScorers, null);
+
+ var queue = new Queue<Scorer>();
+ //System.out.println("\nqueue: add top scorer=" + value);
+ queue.Enqueue(value);
+ while ((queue.Count > 0 && (queue.Dequeue()) != null))
+ {
+ //System.out.println(" poll: " + value + "; " + value.getWeight().getQuery());
+ if (value is ToParentBlockJoinQuery.BlockJoinScorer)
+ {
+ enroll((ToParentBlockJoinQuery)value.Weight.Query, (ToParentBlockJoinQuery.BlockJoinScorer)value);
+ }
+
+ foreach (Scorer.ChildScorer sub in value.Children)
+ {
+ //System.out.println(" add sub: " + sub.child + "; " + sub.child.getWeight().getQuery());
+ queue.Enqueue(sub.Child);
+ }
+ }
+ }
+ }
+
+ private OneGroup[] sortedGroups;
+
+ private void sortQueue()
+ {
+ sortedGroups = new OneGroup[queue.Size()];
+ for (int downTo = queue.Size() - 1; downTo >= 0; downTo--)
+ {
+ sortedGroups[downTo] = queue.Pop();
+ }
+ }
+
+ /// <summary>
+ /// Returns the TopGroups for the specified
+ /// BlockJoinQuery. The groupValue of each GroupDocs will
+ /// be the parent docID for that group.
+ /// The number of documents within each group is calculated as minimum of <code>maxDocsPerGroup</code>
+ /// and number of matched child documents for that group.
+ /// Returns null if no groups matched.
+ /// </summary>
+ /// <param name="query"> Search query </param>
+ /// <param name="withinGroupSort"> Sort criteria within groups </param>
+ /// <param name="offset"> Parent docs offset </param>
+ /// <param name="maxDocsPerGroup"> Upper bound of documents per group number </param>
+ /// <param name="withinGroupOffset"> Offset within each group of child docs </param>
+ /// <param name="fillSortFields"> Specifies whether to add sort fields or not </param>
+ /// <returns> TopGroups for specified query </returns>
+ /// <exception cref="IOException"> if there is a low-level I/O error </exception>
+ public virtual TopGroups<int> GetTopGroups(ToParentBlockJoinQuery query, Sort withinGroupSort, int offset, int maxDocsPerGroup, int withinGroupOffset, bool fillSortFields)
+ {
+
+ var slot = joinQueryID[query];
+ if (slot == null && totalHitCount == 0)
+ {
+ return null;
+ }
+
+ if (sortedGroups == null)
+ {
+ if (offset >= queue.Size())
+ {
+ return null;
+ }
+ sortQueue();
+ }
+ else if (offset > sortedGroups.Length)
+ {
+ return null;
+ }
+
+ return AccumulateGroups(slot == null ? -1 : (int)slot, offset, maxDocsPerGroup, withinGroupOffset, withinGroupSort, fillSortFields);
+ }
+
+ /// <summary>
+ /// Accumulates groups for the BlockJoinQuery specified by its slot.
+ /// </summary>
+ /// <param name="slot"> Search query's slot </param>
+ /// <param name="offset"> Parent docs offset </param>
+ /// <param name="maxDocsPerGroup"> Upper bound of documents per group number </param>
+ /// <param name="withinGroupOffset"> Offset within each group of child docs </param>
+ /// <param name="withinGroupSort"> Sort criteria within groups </param>
+ /// <param name="fillSortFields"> Specifies whether to add sort fields or not </param>
+ /// <returns> TopGroups for the query specified by slot </returns>
+ /// <exception cref="IOException"> if there is a low-level I/O error </exception>
+ private TopGroups<int> AccumulateGroups(int slot, int offset, int maxDocsPerGroup, int withinGroupOffset, Sort withinGroupSort, bool fillSortFields)
+ {
+ var groups = new GroupDocs<int>[sortedGroups.Length - offset];
+ var fakeScorer = new FakeScorer();
+
+ int totalGroupedHitCount = 0;
+ //System.out.println("slot=" + slot);
+
+ for (int groupIdx = offset; groupIdx < sortedGroups.Length; groupIdx++)
+ {
+ OneGroup og = sortedGroups[groupIdx];
+ int numChildDocs;
+ if (slot == -1 || slot >= og.counts.Length)
+ {
+ numChildDocs = 0;
+ }
+ else
+ {
+ numChildDocs = og.counts[slot];
+ }
+
+ // Number of documents in group should be bounded to prevent redundant memory allocation
+ int numDocsInGroup = Math.Max(1, Math.Min(numChildDocs, maxDocsPerGroup));
+ //System.out.println("parent doc=" + og.doc + " numChildDocs=" + numChildDocs + " maxDocsPG=" + maxDocsPerGroup);
+
+ // At this point we hold all docs w/ in each group, unsorted; we now sort them:
+ Collector collector;
+ if (withinGroupSort == null)
+ {
+ //System.out.println("sort by score");
+ // Sort by score
+ if (!trackScores)
+ {
+ throw new ArgumentException("cannot sort by relevance within group: trackScores=false");
+ }
+ collector = TopScoreDocCollector.Create(numDocsInGroup, true);
+ }
+ else
+ {
+ // Sort by fields
+ collector = TopFieldCollector.Create(withinGroupSort, numDocsInGroup, fillSortFields, trackScores, trackMaxScore, true);
+ }
+
+ collector.Scorer = fakeScorer;
+ collector.NextReader = og.readerContext;
+ for (int docIdx = 0; docIdx < numChildDocs; docIdx++)
+ {
+ //System.out.println("docIDX=" + docIDX + " vs " + og.docs[slot].length);
+ int doc = og.docs[slot][docIdx];
+ fakeScorer.doc = doc;
+ if (trackScores)
+ {
+ fakeScorer._score = og.scores[slot][docIdx];
+ }
+ collector.Collect(doc);
+ }
+ totalGroupedHitCount += numChildDocs;
+
+ object[] groupSortValues;
+
+ if (fillSortFields)
+ {
+ groupSortValues = new object[comparators.Length];
+ for (int sortFieldIdx = 0; sortFieldIdx < comparators.Length; sortFieldIdx++)
+ {
+ groupSortValues[sortFieldIdx] = comparators[sortFieldIdx].Value(og.Slot);
+ }
+ }
+ else
+ {
+ groupSortValues = null;
+ }
+
+ TopDocs topDocs;
+ if (withinGroupSort == null)
+ {
+ var tempCollector = (TopScoreDocCollector) collector;
+ topDocs = tempCollector.TopDocs(withinGroupOffset, numDocsInGroup);
+ }
+ else
+ {
+ var tempCollector = (TopFieldCollector) collector;
+ topDocs = tempCollector.TopDocs(withinGroupOffset, numDocsInGroup);
+ }
+
+ groups[groupIdx - offset] = new GroupDocs<int>(og.Score, topDocs.MaxScore, numChildDocs, topDocs.ScoreDocs, og.Doc, groupSortValues);
+ }
+
+ return new TopGroups<int>(new TopGroups<int>(sort.GetSort(), withinGroupSort == null ? null : withinGroupSort.GetSort(), 0, totalGroupedHitCount, groups, maxScore), totalHitCount);
+ }
+
+ /// <summary>
+ /// Returns the TopGroups for the specified BlockJoinQuery. The groupValue of each
+ /// GroupDocs will be the parent docID for that group. The number of documents within
+ /// each group equals to the total number of matched child documents for that group.
+ /// Returns null if no groups matched.
+ /// </summary>
+ /// <param name="query">Search query</param>
+ /// <param name="withinGroupSort">Sort criteria within groups</param>
+ /// <param name="offset">Parent docs offset</param>
+ /// <param name="withinGroupOffset">Offset within each group of child docs</param>
+ /// <param name="fillSortFields">Specifies whether to add sort fields or not</param>
+ /// <returns>TopGroups for specified query</returns>
+ /// <exception cref="IOException"> if there is a low-level I/O error </exception>
+ public virtual TopGroups<int> GetTopGroupsWithAllChildDocs(ToParentBlockJoinQuery query, Sort withinGroupSort, int offset, int withinGroupOffset, bool fillSortFields)
+ {
+ return GetTopGroups(query, withinGroupSort, offset, int.MaxValue, withinGroupOffset, fillSortFields);
+ }
+
+ /// <summary>
+ /// Returns the highest score across all collected parent hits, as long as
+ /// <code>trackMaxScores=true</code> was passed
+ /// {@link #ToParentBlockJoinCollector(Sort, int, boolean, boolean) on
+ /// construction}. Else, this returns <code>Float.NaN</code>
+ /// </summary>
+ public virtual float MaxScore
+ {
+ get { return maxScore; }
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/47f20b9a/Lucene.Net.Join/ToParentBlockJoinFieldComparator.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Join/ToParentBlockJoinFieldComparator.cs b/Lucene.Net.Join/ToParentBlockJoinFieldComparator.cs
new file mode 100644
index 0000000..4386e39
--- /dev/null
+++ b/Lucene.Net.Join/ToParentBlockJoinFieldComparator.cs
@@ -0,0 +1,391 @@
+using System;
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Join
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// A field comparator that allows parent documents to be sorted by fields
+ /// from the nested / child documents.
+ ///
+ /// @lucene.experimental
+ /// </summary>
+ public abstract class ToParentBlockJoinFieldComparator : FieldComparator<object>
+ {
+ private readonly Filter _parentFilter;
+ private readonly Filter _childFilter;
+ private readonly int _spareSlot;
+
+ private FieldComparator<object> _wrappedComparator;
+ private FixedBitSet _parentDocuments;
+ private FixedBitSet _childDocuments;
+
+ internal ToParentBlockJoinFieldComparator(FieldComparator<object> wrappedComparator, Filter parentFilter, Filter childFilter, int spareSlot)
+ {
+ _wrappedComparator = wrappedComparator;
+ _parentFilter = parentFilter;
+ _childFilter = childFilter;
+ _spareSlot = spareSlot;
+ }
+
+ public override int Compare(int slot1, int slot2)
+ {
+ return _wrappedComparator.Compare(slot1, slot2);
+ }
+
+ public override int Bottom
+ {
+ set
+ {
+ _wrappedComparator.Bottom = value;
+ }
+ }
+
+ public override object TopValue
+ {
+ set
+ {
+ _wrappedComparator.TopValue = value;
+ }
+ }
+
+ public override FieldComparator SetNextReader(AtomicReaderContext context)
+ {
+ DocIdSet innerDocuments = _childFilter.GetDocIdSet(context, null);
+ if (IsEmpty(innerDocuments))
+ {
+ _childDocuments = null;
+ }
+ else if (innerDocuments is FixedBitSet)
+ {
+ _childDocuments = (FixedBitSet)innerDocuments;
+ }
+ else
+ {
+ DocIdSetIterator iterator = innerDocuments.GetIterator();
+ _childDocuments = iterator != null ? ToFixedBitSet(iterator, context.AtomicReader.MaxDoc) : null;
+ }
+ DocIdSet rootDocuments = _parentFilter.GetDocIdSet(context, null);
+ if (IsEmpty(rootDocuments))
+ {
+ _parentDocuments = null;
+ }
+ else if (rootDocuments is FixedBitSet)
+ {
+ _parentDocuments = (FixedBitSet)rootDocuments;
+ }
+ else
+ {
+ DocIdSetIterator iterator = rootDocuments.GetIterator();
+ _parentDocuments = iterator != null ? ToFixedBitSet(iterator, context.AtomicReader.MaxDoc) : null;
+ }
+
+ _wrappedComparator = _wrappedComparator.SetNextReader(context) as FieldComparator<object>;
+ return this;
+ }
+
+ private static bool IsEmpty(DocIdSet set)
+ {
+ return set == null;
+ }
+
+ private static FixedBitSet ToFixedBitSet(DocIdSetIterator iterator, int numBits)
+ {
+ var set = new FixedBitSet(numBits);
+ int doc;
+ while ((doc = iterator.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
+ {
+ set.Set(doc);
+ }
+ return set;
+ }
+
+ public override IComparable Value(int slot)
+ {
+ return _wrappedComparator.Value(slot);
+ }
+
+ /// <summary>
+ /// Concrete implementation of <see cref="ToParentBlockJoinSortField"/> to sorts the parent docs with the lowest values
+ /// in the child / nested docs first.
+ /// </summary>
+ public sealed class Lowest : ToParentBlockJoinFieldComparator
+ {
+ /// <summary>
+ /// Create ToParentBlockJoinFieldComparator.Lowest
+ /// </summary>
+ /// <param name="wrappedComparator">The <see cref="FieldComparator"/> on the child / nested level. </param>
+ /// <param name="parentFilter">Filter (must produce FixedBitSet per-segment) that identifies the parent documents. </param>
+ /// <param name="childFilter">Filter that defines which child / nested documents participates in sorting. </param>
+ /// <param name="spareSlot">The extra slot inside the wrapped comparator that is used to compare which nested document
+ /// inside the parent document scope is most competitive. </param>
+ public Lowest(FieldComparator<object> wrappedComparator, Filter parentFilter, Filter childFilter, int spareSlot) : base(wrappedComparator, parentFilter, childFilter, spareSlot)
+ {
+ }
+
+ public override int CompareBottom(int parentDoc)
+ {
+ if (parentDoc == 0 || _parentDocuments == null || _childDocuments == null)
+ {
+ return 0;
+ }
+
+ // We need to copy the lowest value from all child docs into slot.
+ int prevParentDoc = _parentDocuments.PrevSetBit(parentDoc - 1);
+ int childDoc = _childDocuments.NextSetBit(prevParentDoc + 1);
+ if (childDoc >= parentDoc || childDoc == -1)
+ {
+ return 0;
+ }
+
+ // We only need to emit a single cmp value for any matching child doc
+ int cmp = _wrappedComparator.CompareBottom(childDoc);
+ if (cmp > 0)
+ {
+ return cmp;
+ }
+
+ while (true)
+ {
+ childDoc = _childDocuments.NextSetBit(childDoc + 1);
+ if (childDoc >= parentDoc || childDoc == -1)
+ {
+ return cmp;
+ }
+ int cmp1 = _wrappedComparator.CompareBottom(childDoc);
+ if (cmp1 > 0)
+ {
+ return cmp1;
+ }
+ if (cmp1 == 0)
+ {
+ cmp = 0;
+ }
+ }
+ }
+
+ public override void Copy(int slot, int parentDoc)
+ {
+ if (parentDoc == 0 || _parentDocuments == null || _childDocuments == null)
+ {
+ return;
+ }
+
+ // We need to copy the lowest value from all child docs into slot.
+ int prevParentDoc = _parentDocuments.PrevSetBit(parentDoc - 1);
+ int childDoc = _childDocuments.NextSetBit(prevParentDoc + 1);
+ if (childDoc >= parentDoc || childDoc == -1)
+ {
+ return;
+ }
+ _wrappedComparator.Copy(_spareSlot, childDoc);
+ _wrappedComparator.Copy(slot, childDoc);
+
+ while (true)
+ {
+ childDoc = _childDocuments.NextSetBit(childDoc + 1);
+ if (childDoc >= parentDoc || childDoc == -1)
+ {
+ return;
+ }
+ _wrappedComparator.Copy(_spareSlot, childDoc);
+ if (_wrappedComparator.Compare(_spareSlot, slot) < 0)
+ {
+ _wrappedComparator.Copy(slot, childDoc);
+ }
+ }
+ }
+
+ public override int CompareTop(int parentDoc)
+ {
+ if (parentDoc == 0 || _parentDocuments == null || _childDocuments == null)
+ {
+ return 0;
+ }
+
+ // We need to copy the lowest value from all nested docs into slot.
+ int prevParentDoc = _parentDocuments.PrevSetBit(parentDoc - 1);
+ int childDoc = _childDocuments.NextSetBit(prevParentDoc + 1);
+ if (childDoc >= parentDoc || childDoc == -1)
+ {
+ return 0;
+ }
+
+ // We only need to emit a single cmp value for any matching child doc
+ int cmp = _wrappedComparator.CompareBottom(childDoc);
+ if (cmp > 0)
+ {
+ return cmp;
+ }
+
+ while (true)
+ {
+ childDoc = _childDocuments.NextSetBit(childDoc + 1);
+ if (childDoc >= parentDoc || childDoc == -1)
+ {
+ return cmp;
+ }
+ int cmp1 = _wrappedComparator.CompareTop(childDoc);
+ if (cmp1 > 0)
+ {
+ return cmp1;
+ }
+ if (cmp1 == 0)
+ {
+ cmp = 0;
+ }
+ }
+ }
+
+ }
+
+ /// <summary>
+ /// Concrete implementation of <see cref="ToParentBlockJoinSortField"/> to sorts the parent docs with the highest values
+ /// in the child / nested docs first.
+ /// </summary>
+ public sealed class Highest : ToParentBlockJoinFieldComparator
+ {
+ /// <summary>
+ /// Create ToParentBlockJoinFieldComparator.Highest
+ /// </summary>
+ /// <param name="wrappedComparator">The <see cref="FieldComparator"/> on the child / nested level. </param>
+ /// <param name="parentFilter">Filter (must produce FixedBitSet per-segment) that identifies the parent documents. </param>
+ /// <param name="childFilter">Filter that defines which child / nested documents participates in sorting. </param>
+ /// <param name="spareSlot">The extra slot inside the wrapped comparator that is used to compare which nested document
+ /// inside the parent document scope is most competitive. </param>
+ public Highest(FieldComparator<object> wrappedComparator, Filter parentFilter, Filter childFilter, int spareSlot) : base(wrappedComparator, parentFilter, childFilter, spareSlot)
+ {
+ }
+
+ public override int CompareBottom(int parentDoc)
+ {
+ if (parentDoc == 0 || _parentDocuments == null || _childDocuments == null)
+ {
+ return 0;
+ }
+
+ int prevParentDoc = _parentDocuments.PrevSetBit(parentDoc - 1);
+ int childDoc = _childDocuments.NextSetBit(prevParentDoc + 1);
+ if (childDoc >= parentDoc || childDoc == -1)
+ {
+ return 0;
+ }
+
+ int cmp = _wrappedComparator.CompareBottom(childDoc);
+ if (cmp < 0)
+ {
+ return cmp;
+ }
+
+ while (true)
+ {
+ childDoc = _childDocuments.NextSetBit(childDoc + 1);
+ if (childDoc >= parentDoc || childDoc == -1)
+ {
+ return cmp;
+ }
+ int cmp1 = _wrappedComparator.CompareBottom(childDoc);
+ if (cmp1 < 0)
+ {
+ return cmp1;
+ }
+ else
+ {
+ if (cmp1 == 0)
+ {
+ cmp = 0;
+ }
+ }
+ }
+ }
+
+ public override void Copy(int slot, int parentDoc)
+ {
+ if (parentDoc == 0 || _parentDocuments == null || _childDocuments == null)
+ {
+ return;
+ }
+
+ int prevParentDoc = _parentDocuments.PrevSetBit(parentDoc - 1);
+ int childDoc = _childDocuments.NextSetBit(prevParentDoc + 1);
+ if (childDoc >= parentDoc || childDoc == -1)
+ {
+ return;
+ }
+ _wrappedComparator.Copy(_spareSlot, childDoc);
+ _wrappedComparator.Copy(slot, childDoc);
+
+ while (true)
+ {
+ childDoc = _childDocuments.NextSetBit(childDoc + 1);
+ if (childDoc >= parentDoc || childDoc == -1)
+ {
+ return;
+ }
+ _wrappedComparator.Copy(_spareSlot, childDoc);
+ if (_wrappedComparator.Compare(_spareSlot, slot) > 0)
+ {
+ _wrappedComparator.Copy(slot, childDoc);
+ }
+ }
+ }
+
+ public override int CompareTop(int parentDoc)
+ {
+ if (parentDoc == 0 || _parentDocuments == null || _childDocuments == null)
+ {
+ return 0;
+ }
+
+ int prevParentDoc = _parentDocuments.PrevSetBit(parentDoc - 1);
+ int childDoc = _childDocuments.NextSetBit(prevParentDoc + 1);
+ if (childDoc >= parentDoc || childDoc == -1)
+ {
+ return 0;
+ }
+
+ int cmp = _wrappedComparator.CompareBottom(childDoc);
+ if (cmp < 0)
+ {
+ return cmp;
+ }
+
+ while (true)
+ {
+ childDoc = _childDocuments.NextSetBit(childDoc + 1);
+ if (childDoc >= parentDoc || childDoc == -1)
+ {
+ return cmp;
+ }
+ int cmp1 = _wrappedComparator.CompareTop(childDoc);
+ if (cmp1 < 0)
+ {
+ return cmp1;
+ }
+ if (cmp1 == 0)
+ {
+ cmp = 0;
+ }
+ }
+ }
+ }
+ }
+}
\ No newline at end of file
[05/17] lucenenet git commit: Introduced tests for Lucene.Net.Join
Posted by sy...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/dff959ff/Lucene.Net.Tests.Join/TestBlockJoinSorting.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Tests.Join/TestBlockJoinSorting.cs b/Lucene.Net.Tests.Join/TestBlockJoinSorting.cs
new file mode 100644
index 0000000..c2df0df
--- /dev/null
+++ b/Lucene.Net.Tests.Join/TestBlockJoinSorting.cs
@@ -0,0 +1,277 @@
+using System.Collections.Generic;
+using Lucene.Net.Analysis;
+using Lucene.Net.Documents;
+using Lucene.Net.Index;
+using Lucene.Net.Join;
+using Lucene.Net.Randomized.Generators;
+using Lucene.Net.Search;
+using Lucene.Net.Store;
+using Lucene.Net.Util;
+using NUnit.Framework;
+
+namespace Lucene.Net.Tests.Join
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ public class TestBlockJoinSorting : LuceneTestCase
+ {
+ [Test]
+ public void TestNestedSorting()
+ {
+ Directory dir = NewDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.COMPOUND_FILES));
+
+ IList<Document> docs = new List<Document>();
+ Document document = new Document();
+ document.Add(new StringField("field2", "a", Field.Store.NO));
+ document.Add(new StringField("filter_1", "T", Field.Store.NO));
+ docs.Add(document);
+ document = new Document();
+ document.Add(new StringField("field2", "b", Field.Store.NO));
+ document.Add(new StringField("filter_1", "T", Field.Store.NO));
+ docs.Add(document);
+ document = new Document();
+ document.Add(new StringField("field2", "c", Field.Store.NO));
+ document.Add(new StringField("filter_1", "T", Field.Store.NO));
+ docs.Add(document);
+ document = new Document();
+ document.Add(new StringField("__type", "parent", Field.Store.NO));
+ document.Add(new StringField("field1", "a", Field.Store.NO));
+ docs.Add(document);
+ w.AddDocuments(docs);
+ w.Commit();
+
+ docs.Clear();
+ document = new Document();
+ document.Add(new StringField("field2", "c", Field.Store.NO));
+ document.Add(new StringField("filter_1", "T", Field.Store.NO));
+ docs.Add(document);
+ document = new Document();
+ document.Add(new StringField("field2", "d", Field.Store.NO));
+ document.Add(new StringField("filter_1", "T", Field.Store.NO));
+ docs.Add(document);
+ document = new Document();
+ document.Add(new StringField("field2", "e", Field.Store.NO));
+ document.Add(new StringField("filter_1", "T", Field.Store.NO));
+ docs.Add(document);
+ document = new Document();
+ document.Add(new StringField("__type", "parent", Field.Store.NO));
+ document.Add(new StringField("field1", "b", Field.Store.NO));
+ docs.Add(document);
+ w.AddDocuments(docs);
+
+ docs.Clear();
+ document = new Document();
+ document.Add(new StringField("field2", "e", Field.Store.NO));
+ document.Add(new StringField("filter_1", "T", Field.Store.NO));
+ docs.Add(document);
+ document = new Document();
+ document.Add(new StringField("field2", "f", Field.Store.NO));
+ document.Add(new StringField("filter_1", "T", Field.Store.NO));
+ docs.Add(document);
+ document = new Document();
+ document.Add(new StringField("field2", "g", Field.Store.NO));
+ document.Add(new StringField("filter_1", "T", Field.Store.NO));
+ docs.Add(document);
+ document = new Document();
+ document.Add(new StringField("__type", "parent", Field.Store.NO));
+ document.Add(new StringField("field1", "c", Field.Store.NO));
+ docs.Add(document);
+ w.AddDocuments(docs);
+
+ docs.Clear();
+ document = new Document();
+ document.Add(new StringField("field2", "g", Field.Store.NO));
+ document.Add(new StringField("filter_1", "T", Field.Store.NO));
+ docs.Add(document);
+ document = new Document();
+ document.Add(new StringField("field2", "h", Field.Store.NO));
+ document.Add(new StringField("filter_1", "F", Field.Store.NO));
+ docs.Add(document);
+ document = new Document();
+ document.Add(new StringField("field2", "i", Field.Store.NO));
+ document.Add(new StringField("filter_1", "F", Field.Store.NO));
+ docs.Add(document);
+ document = new Document();
+ document.Add(new StringField("__type", "parent", Field.Store.NO));
+ document.Add(new StringField("field1", "d", Field.Store.NO));
+ docs.Add(document);
+ w.AddDocuments(docs);
+ w.Commit();
+
+ docs.Clear();
+ document = new Document();
+ document.Add(new StringField("field2", "i", Field.Store.NO));
+ document.Add(new StringField("filter_1", "F", Field.Store.NO));
+ docs.Add(document);
+ document = new Document();
+ document.Add(new StringField("field2", "j", Field.Store.NO));
+ document.Add(new StringField("filter_1", "F", Field.Store.NO));
+ docs.Add(document);
+ document = new Document();
+ document.Add(new StringField("field2", "k", Field.Store.NO));
+ document.Add(new StringField("filter_1", "F", Field.Store.NO));
+ docs.Add(document);
+ document = new Document();
+ document.Add(new StringField("__type", "parent", Field.Store.NO));
+ document.Add(new StringField("field1", "f", Field.Store.NO));
+ docs.Add(document);
+ w.AddDocuments(docs);
+
+ docs.Clear();
+ document = new Document();
+ document.Add(new StringField("field2", "k", Field.Store.NO));
+ document.Add(new StringField("filter_1", "T", Field.Store.NO));
+ docs.Add(document);
+ document = new Document();
+ document.Add(new StringField("field2", "l", Field.Store.NO));
+ document.Add(new StringField("filter_1", "T", Field.Store.NO));
+ docs.Add(document);
+ document = new Document();
+ document.Add(new StringField("field2", "m", Field.Store.NO));
+ document.Add(new StringField("filter_1", "T", Field.Store.NO));
+ docs.Add(document);
+ document = new Document();
+ document.Add(new StringField("__type", "parent", Field.Store.NO));
+ document.Add(new StringField("field1", "g", Field.Store.NO));
+ docs.Add(document);
+ w.AddDocuments(docs);
+
+ // This doc will not be included, because it doesn't have nested docs
+ document = new Document();
+ document.Add(new StringField("__type", "parent", Field.Store.NO));
+ document.Add(new StringField("field1", "h", Field.Store.NO));
+ w.AddDocument(document);
+
+ docs.Clear();
+ document = new Document();
+ document.Add(new StringField("field2", "m", Field.Store.NO));
+ document.Add(new StringField("filter_1", "T", Field.Store.NO));
+ docs.Add(document);
+ document = new Document();
+ document.Add(new StringField("field2", "n", Field.Store.NO));
+ document.Add(new StringField("filter_1", "F", Field.Store.NO));
+ docs.Add(document);
+ document = new Document();
+ document.Add(new StringField("field2", "o", Field.Store.NO));
+ document.Add(new StringField("filter_1", "F", Field.Store.NO));
+ docs.Add(document);
+ document = new Document();
+ document.Add(new StringField("__type", "parent", Field.Store.NO));
+ document.Add(new StringField("field1", "i", Field.Store.NO));
+ docs.Add(document);
+ w.AddDocuments(docs);
+ w.Commit();
+
+ // Some garbage docs, just to check if the NestedFieldComparator can deal with this.
+ document = new Document();
+ document.Add(new StringField("fieldXXX", "x", Field.Store.NO));
+ w.AddDocument(document);
+ document = new Document();
+ document.Add(new StringField("fieldXXX", "x", Field.Store.NO));
+ w.AddDocument(document);
+ document = new Document();
+ document.Add(new StringField("fieldXXX", "x", Field.Store.NO));
+ w.AddDocument(document);
+
+ IndexSearcher searcher = new IndexSearcher(DirectoryReader.Open(w.w, false));
+ w.Dispose();
+ Filter parentFilter = new QueryWrapperFilter(new TermQuery(new Term("__type", "parent")));
+ Filter childFilter = new QueryWrapperFilter(new PrefixQuery(new Term("field2")));
+ ToParentBlockJoinQuery query = new ToParentBlockJoinQuery(new FilteredQuery(new MatchAllDocsQuery(), childFilter), new FixedBitSetCachingWrapperFilter(parentFilter), ScoreMode.None);
+
+ // Sort by field ascending, order first
+ ToParentBlockJoinSortField sortField = new ToParentBlockJoinSortField("field2", SortField.Type_e.STRING, false, Wrap(parentFilter), Wrap(childFilter));
+ Sort sort = new Sort(sortField);
+ TopFieldDocs topDocs = searcher.Search(query, 5, sort);
+ assertEquals(7, topDocs.TotalHits);
+ assertEquals(5, topDocs.ScoreDocs.Length);
+ assertEquals(3, topDocs.ScoreDocs[0].Doc);
+ assertEquals("a", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[0]).Fields[0]).Utf8ToString());
+ assertEquals(7, topDocs.ScoreDocs[1].Doc);
+ assertEquals("c", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[1]).Fields[0]).Utf8ToString());
+ assertEquals(11, topDocs.ScoreDocs[2].Doc);
+ assertEquals("e", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[2]).Fields[0]).Utf8ToString());
+ assertEquals(15, topDocs.ScoreDocs[3].Doc);
+ assertEquals("g", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[3]).Fields[0]).Utf8ToString());
+ assertEquals(19, topDocs.ScoreDocs[4].Doc);
+ assertEquals("i", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[4]).Fields[0]).Utf8ToString());
+
+ // Sort by field ascending, order last
+ sortField = new ToParentBlockJoinSortField("field2", SortField.Type_e.STRING, false, true, Wrap(parentFilter), Wrap(childFilter));
+ sort = new Sort(sortField);
+ topDocs = searcher.Search(query, 5, sort);
+ assertEquals(7, topDocs.TotalHits);
+ assertEquals(5, topDocs.ScoreDocs.Length);
+ assertEquals(3, topDocs.ScoreDocs[0].Doc);
+ assertEquals("c", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[0]).Fields[0]).Utf8ToString());
+ assertEquals(7, topDocs.ScoreDocs[1].Doc);
+ assertEquals("e", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[1]).Fields[0]).Utf8ToString());
+ assertEquals(11, topDocs.ScoreDocs[2].Doc);
+ assertEquals("g", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[2]).Fields[0]).Utf8ToString());
+ assertEquals(15, topDocs.ScoreDocs[3].Doc);
+ assertEquals("i", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[3]).Fields[0]).Utf8ToString());
+ assertEquals(19, topDocs.ScoreDocs[4].Doc);
+ assertEquals("k", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[4]).Fields[0]).Utf8ToString());
+
+ // Sort by field descending, order last
+ sortField = new ToParentBlockJoinSortField("field2", SortField.Type_e.STRING, true, Wrap(parentFilter), Wrap(childFilter));
+ sort = new Sort(sortField);
+ topDocs = searcher.Search(query, 5, sort);
+ assertEquals(topDocs.TotalHits, 7);
+ assertEquals(5, topDocs.ScoreDocs.Length);
+ assertEquals(28, topDocs.ScoreDocs[0].Doc);
+ assertEquals("o", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[0]).Fields[0]).Utf8ToString());
+ assertEquals(23, topDocs.ScoreDocs[1].Doc);
+ assertEquals("m", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[1]).Fields[0]).Utf8ToString());
+ assertEquals(19, topDocs.ScoreDocs[2].Doc);
+ assertEquals("k", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[2]).Fields[0]).Utf8ToString());
+ assertEquals(15, topDocs.ScoreDocs[3].Doc);
+ assertEquals("i", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[3]).Fields[0]).Utf8ToString());
+ assertEquals(11, topDocs.ScoreDocs[4].Doc);
+ assertEquals("g", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[4]).Fields[0]).Utf8ToString());
+
+ // Sort by field descending, order last, sort filter (filter_1:T)
+ childFilter = new QueryWrapperFilter(new TermQuery((new Term("filter_1", "T"))));
+ query = new ToParentBlockJoinQuery(new FilteredQuery(new MatchAllDocsQuery(), childFilter), new FixedBitSetCachingWrapperFilter(parentFilter), ScoreMode.None);
+ sortField = new ToParentBlockJoinSortField("field2", SortField.Type_e.STRING, true, Wrap(parentFilter), Wrap(childFilter));
+ sort = new Sort(sortField);
+ topDocs = searcher.Search(query, 5, sort);
+ assertEquals(6, topDocs.TotalHits);
+ assertEquals(5, topDocs.ScoreDocs.Length);
+ assertEquals(23, topDocs.ScoreDocs[0].Doc);
+ assertEquals("m", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[0]).Fields[0]).Utf8ToString());
+ assertEquals(28, topDocs.ScoreDocs[1].Doc);
+ assertEquals("m", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[1]).Fields[0]).Utf8ToString());
+ assertEquals(11, topDocs.ScoreDocs[2].Doc);
+ assertEquals("g", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[2]).Fields[0]).Utf8ToString());
+ assertEquals(15, topDocs.ScoreDocs[3].Doc);
+ assertEquals("g", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[3]).Fields[0]).Utf8ToString());
+ assertEquals(7, topDocs.ScoreDocs[4].Doc);
+ assertEquals("e", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[4]).Fields[0]).Utf8ToString());
+
+ searcher.IndexReader.Dispose();
+ dir.Dispose();
+ }
+
+ private Filter Wrap(Filter filter)
+ {
+ return Random().NextBoolean() ? new FixedBitSetCachingWrapperFilter(filter) : filter;
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/dff959ff/Lucene.Net.Tests.Join/packages.config
----------------------------------------------------------------------
diff --git a/Lucene.Net.Tests.Join/packages.config b/Lucene.Net.Tests.Join/packages.config
new file mode 100644
index 0000000..f0ed309
--- /dev/null
+++ b/Lucene.Net.Tests.Join/packages.config
@@ -0,0 +1,5 @@
+<?xml version="1.0" encoding="utf-8"?>
+<packages>
+ <package id="Apache.NMS" version="1.6.0.3083" targetFramework="net451" />
+ <package id="NUnit" version="2.6.3" targetFramework="net451" />
+</packages>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/dff959ff/Lucene.Net.sln
----------------------------------------------------------------------
diff --git a/Lucene.Net.sln b/Lucene.Net.sln
index d76fe0c..2051e77 100644
--- a/Lucene.Net.sln
+++ b/Lucene.Net.sln
@@ -42,6 +42,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.Join", "Lucene.N
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.Grouping", "Lucene.Net.Grouping\Lucene.Net.Grouping.csproj", "{02BAB603-067D-48B1-AEDD-316849652568}"
EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.Tests.Join", "Lucene.Net.Tests.Join\Lucene.Net.Tests.Join.csproj", "{4C1B794F-8158-45E6-85B3-2C46569BEBC2}"
+EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
@@ -207,6 +209,18 @@ Global
{02BAB603-067D-48B1-AEDD-316849652568}.Release|Mixed Platforms.Build.0 = Release|Any CPU
{02BAB603-067D-48B1-AEDD-316849652568}.Release|x86.ActiveCfg = Release|Any CPU
{02BAB603-067D-48B1-AEDD-316849652568}.Release|x86.Build.0 = Release|Any CPU
+ {4C1B794F-8158-45E6-85B3-2C46569BEBC2}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {4C1B794F-8158-45E6-85B3-2C46569BEBC2}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {4C1B794F-8158-45E6-85B3-2C46569BEBC2}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU
+ {4C1B794F-8158-45E6-85B3-2C46569BEBC2}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU
+ {4C1B794F-8158-45E6-85B3-2C46569BEBC2}.Debug|x86.ActiveCfg = Debug|Any CPU
+ {4C1B794F-8158-45E6-85B3-2C46569BEBC2}.Debug|x86.Build.0 = Debug|Any CPU
+ {4C1B794F-8158-45E6-85B3-2C46569BEBC2}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {4C1B794F-8158-45E6-85B3-2C46569BEBC2}.Release|Any CPU.Build.0 = Release|Any CPU
+ {4C1B794F-8158-45E6-85B3-2C46569BEBC2}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU
+ {4C1B794F-8158-45E6-85B3-2C46569BEBC2}.Release|Mixed Platforms.Build.0 = Release|Any CPU
+ {4C1B794F-8158-45E6-85B3-2C46569BEBC2}.Release|x86.ActiveCfg = Release|Any CPU
+ {4C1B794F-8158-45E6-85B3-2C46569BEBC2}.Release|x86.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
[11/17] lucenenet git commit: Lucene.Net.Join tests now passing
Posted by sy...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/src/Lucene.Net.Join/ToParentBlockJoinCollector.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Join/ToParentBlockJoinCollector.cs b/src/Lucene.Net.Join/ToParentBlockJoinCollector.cs
new file mode 100644
index 0000000..85d8ee8
--- /dev/null
+++ b/src/Lucene.Net.Join/ToParentBlockJoinCollector.cs
@@ -0,0 +1,578 @@
+using System;
+using System.Collections.Concurrent;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.IO;
+using Lucene.Net.Grouping;
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Join
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Collects parent document hits for a Query containing one more more
+ /// BlockJoinQuery clauses, sorted by the
+ /// specified parent Sort. Note that this cannot perform
+ /// arbitrary joins; rather, it requires that all joined
+ /// documents are indexed as a doc block (using {@link
+ /// IndexWriter#addDocuments} or {@link
+ /// IndexWriter#updateDocuments}). Ie, the join is computed
+ /// at index time.
+ ///
+ /// <p>The parent Sort must only use
+ /// fields from the parent documents; sorting by field in
+ /// the child documents is not supported.</p>
+ ///
+ /// <p>You should only use this
+ /// collector if one or more of the clauses in the query is
+ /// a <seealso cref="ToParentBlockJoinQuery"/>. This collector will find those query
+ /// clauses and record the matching child documents for the
+ /// top scoring parent documents.</p>
+ ///
+ /// <p>Multiple joins (star join) and nested joins and a mix
+ /// of the two are allowed, as long as in all cases the
+ /// documents corresponding to a single row of each joined
+ /// parent table were indexed as a doc block.</p>
+ ///
+ /// <p>For the simple star join you can retrieve the
+ /// <seealso cref="TopGroups"/> instance containing each <seealso cref="ToParentBlockJoinQuery"/>'s
+ /// matching child documents for the top parent groups,
+ /// using <seealso cref="#getTopGroups"/>. Ie,
+ /// a single query, which will contain two or more
+ /// <seealso cref="ToParentBlockJoinQuery"/>'s as clauses representing the star join,
+ /// can then retrieve two or more <seealso cref="TopGroups"/> instances.</p>
+ ///
+ /// <p>For nested joins, the query will run correctly (ie,
+ /// match the right parent and child documents), however,
+ /// because TopGroups is currently unable to support nesting
+ /// (each group is not able to hold another TopGroups), you
+ /// are only able to retrieve the TopGroups of the first
+ /// join. The TopGroups of the nested joins will not be
+ /// correct.
+ ///
+ /// See <seealso cref="org.apache.lucene.search.join"/> for a code
+ /// sample.
+ ///
+ /// @lucene.experimental
+ /// </summary>
+ public class ToParentBlockJoinCollector : Collector
+ {
+ private readonly Sort sort;
+
+ // Maps each BlockJoinQuery instance to its "slot" in
+ // joinScorers and in OneGroup's cached doc/scores/count:
+ private readonly IDictionary<Query, int?> joinQueryID = new Dictionary<Query, int?>();
+ private readonly int numParentHits;
+ private readonly FieldValueHitQueue<OneGroup> queue;
+ private readonly FieldComparator[] comparators;
+ private readonly int[] reverseMul;
+ private readonly int compEnd;
+ private readonly bool trackMaxScore;
+ private readonly bool trackScores;
+
+ private int docBase;
+ private ToParentBlockJoinQuery.BlockJoinScorer[] joinScorers = new ToParentBlockJoinQuery.BlockJoinScorer[0];
+ private AtomicReaderContext currentReaderContext;
+ private Scorer scorer;
+ private bool queueFull;
+
+ private OneGroup bottom;
+ private int totalHitCount;
+ private float maxScore = float.NaN;
+
+ /// <summary>
+ /// Creates a ToParentBlockJoinCollector. The provided sort must
+ /// not be null. If you pass true trackScores, all
+ /// ToParentBlockQuery instances must not use
+ /// ScoreMode.None.
+ /// </summary>
+ public ToParentBlockJoinCollector(Sort sort, int numParentHits, bool trackScores, bool trackMaxScore)
+ {
+ // TODO: allow null sort to be specialized to relevance
+ // only collector
+ this.sort = sort;
+ this.trackMaxScore = trackMaxScore;
+ if (trackMaxScore)
+ {
+ maxScore = float.MinValue;
+ }
+ //System.out.println("numParentHits=" + numParentHits);
+ this.trackScores = trackScores;
+ this.numParentHits = numParentHits;
+ queue = FieldValueHitQueue.Create<OneGroup>(sort.GetSort(), numParentHits);
+ comparators = queue.Comparators;
+ reverseMul = queue.ReverseMul;
+ compEnd = comparators.Length - 1;
+ }
+
+ private sealed class OneGroup : FieldValueHitQueue.Entry
+ {
+ public OneGroup(int comparatorSlot, int parentDoc, float parentScore, int numJoins, bool doScores)
+ : base(comparatorSlot, parentDoc, parentScore)
+ {
+ //System.out.println("make OneGroup parentDoc=" + parentDoc);
+ docs = new int[numJoins][];
+ for (int joinId = 0; joinId < numJoins; joinId++)
+ {
+ docs[joinId] = new int[5];
+ }
+ if (doScores)
+ {
+ scores = new float[numJoins][];
+ for (int joinId = 0; joinId < numJoins; joinId++)
+ {
+ scores[joinId] = new float[5];
+ }
+ }
+ counts = new int[numJoins];
+ }
+ internal AtomicReaderContext readerContext;
+ internal int[][] docs;
+ internal float[][] scores;
+ internal int[] counts;
+ }
+
+ public override void Collect(int parentDoc)
+ {
+ //System.out.println("\nC parentDoc=" + parentDoc);
+ totalHitCount++;
+
+ float score = float.NaN;
+
+ if (trackMaxScore)
+ {
+ score = scorer.Score();
+ maxScore = Math.Max(maxScore, score);
+ }
+
+ // TODO: we could sweep all joinScorers here and
+ // aggregate total child hit count, so we can fill this
+ // in getTopGroups (we wire it to 0 now)
+
+ if (queueFull)
+ {
+ //System.out.println(" queueFull");
+ // Fastmatch: return if this hit is not competitive
+ for (int i = 0; ; i++)
+ {
+ int c = reverseMul[i] * comparators[i].CompareBottom(parentDoc);
+ if (c < 0)
+ {
+ // Definitely not competitive.
+ //System.out.println(" skip");
+ return;
+ }
+ if (c > 0)
+ {
+ // Definitely competitive.
+ break;
+ }
+ if (i == compEnd)
+ {
+ // Here c=0. If we're at the last comparator, this doc is not
+ // competitive, since docs are visited in doc Id order, which means
+ // this doc cannot compete with any other document in the queue.
+ //System.out.println(" skip");
+ return;
+ }
+ }
+
+ //System.out.println(" competes! doc=" + (docBase + parentDoc));
+
+ // This hit is competitive - replace bottom element in queue & adjustTop
+ for (int i = 0; i < comparators.Length; i++)
+ {
+ comparators[i].Copy(bottom.Slot, parentDoc);
+ }
+ if (!trackMaxScore && trackScores)
+ {
+ score = scorer.Score();
+ }
+ bottom.Doc = docBase + parentDoc;
+ bottom.readerContext = currentReaderContext;
+ bottom.Score = score;
+ CopyGroups(bottom);
+ bottom = queue.UpdateTop();
+
+ for (int i = 0; i < comparators.Length; i++)
+ {
+ comparators[i].Bottom = bottom.Slot;
+ }
+ }
+ else
+ {
+ // Startup transient: queue is not yet full:
+ int comparatorSlot = totalHitCount - 1;
+
+ // Copy hit into queue
+ for (int i = 0; i < comparators.Length; i++)
+ {
+ comparators[i].Copy(comparatorSlot, parentDoc);
+ }
+ //System.out.println(" startup: new OG doc=" + (docBase+parentDoc));
+ if (!trackMaxScore && trackScores)
+ {
+ score = scorer.Score();
+ }
+ OneGroup og = new OneGroup(comparatorSlot, docBase + parentDoc, score, joinScorers.Length, trackScores);
+ og.readerContext = currentReaderContext;
+ CopyGroups(og);
+ bottom = queue.Add(og);
+ queueFull = totalHitCount == numParentHits;
+ if (queueFull)
+ {
+ // End of startup transient: queue just filled up:
+ for (int i = 0; i < comparators.Length; i++)
+ {
+ comparators[i].Bottom = bottom.Slot;
+ }
+ }
+ }
+ }
+
+ // Pulls out child doc and scores for all join queries:
+ private void CopyGroups(OneGroup og)
+ {
+ // While rare, it's possible top arrays could be too
+ // short if join query had null scorer on first
+ // segment(s) but then became non-null on later segments
+ int numSubScorers = joinScorers.Length;
+ if (og.docs.Length < numSubScorers)
+ {
+ // While rare, this could happen if join query had
+ // null scorer on first segment(s) but then became
+ // non-null on later segments
+ og.docs = ArrayUtil.Grow(og.docs);
+ }
+ if (og.counts.Length < numSubScorers)
+ {
+ og.counts = ArrayUtil.Grow(og.counts);
+ }
+ if (trackScores && og.scores.Length < numSubScorers)
+ {
+ og.scores = ArrayUtil.Grow(og.scores);
+ }
+
+ //System.out.println("\ncopyGroups parentDoc=" + og.doc);
+ for (int scorerIDX = 0; scorerIDX < numSubScorers; scorerIDX++)
+ {
+ ToParentBlockJoinQuery.BlockJoinScorer joinScorer = joinScorers[scorerIDX];
+ //System.out.println(" scorer=" + joinScorer);
+ if (joinScorer != null && docBase + joinScorer.ParentDoc == og.Doc)
+ {
+ og.counts[scorerIDX] = joinScorer.ChildCount;
+ //System.out.println(" count=" + og.counts[scorerIDX]);
+ og.docs[scorerIDX] = joinScorer.SwapChildDocs(og.docs[scorerIDX]);
+ Debug.Assert(og.docs[scorerIDX].Length >= og.counts[scorerIDX], "length=" + og.docs[scorerIDX].Length + " vs count=" + og.counts[scorerIDX]);
+ //System.out.println(" len=" + og.docs[scorerIDX].length);
+ /*
+ for(int idx=0;idx<og.counts[scorerIDX];idx++) {
+ System.out.println(" docs[" + idx + "]=" + og.docs[scorerIDX][idx]);
+ }
+ */
+ if (trackScores)
+ {
+ //System.out.println(" copy scores");
+ og.scores[scorerIDX] = joinScorer.SwapChildScores(og.scores[scorerIDX]);
+ Debug.Assert(og.scores[scorerIDX].Length >= og.counts[scorerIDX], "length=" + og.scores[scorerIDX].Length + " vs count=" + og.counts[scorerIDX]);
+ }
+ }
+ else
+ {
+ og.counts[scorerIDX] = 0;
+ }
+ }
+ }
+
+ public override AtomicReaderContext NextReader
+ {
+ set
+ {
+ currentReaderContext = value;
+ docBase = value.DocBase;
+ for (int compIDX = 0; compIDX < comparators.Length; compIDX++)
+ {
+ queue.SetComparator(compIDX, comparators[compIDX].SetNextReader(value));
+ }
+ }
+ }
+
+ public override bool AcceptsDocsOutOfOrder()
+ {
+ return false;
+ }
+
+ private void Enroll(ToParentBlockJoinQuery query, ToParentBlockJoinQuery.BlockJoinScorer scorer)
+ {
+ scorer.TrackPendingChildHits();
+ int? slot;
+ if (joinQueryID.TryGetValue(query, out slot))
+ {
+ joinScorers[(int) slot] = scorer;
+ }
+ else
+ {
+ joinQueryID[query] = joinScorers.Length;
+ //System.out.println("found JQ: " + query + " slot=" + joinScorers.length);
+ ToParentBlockJoinQuery.BlockJoinScorer[] newArray = new ToParentBlockJoinQuery.BlockJoinScorer[1 + joinScorers.Length];
+ Array.Copy(joinScorers, 0, newArray, 0, joinScorers.Length);
+ joinScorers = newArray;
+ joinScorers[joinScorers.Length - 1] = scorer;
+ }
+ }
+
+ public override Scorer Scorer
+ {
+ set
+ {
+ //System.out.println("C.setScorer scorer=" + value);
+ // Since we invoke .score(), and the comparators likely
+ // do as well, cache it so it's only "really" computed
+ // once:
+ scorer = new ScoreCachingWrappingScorer(value);
+ for (int compIdx = 0; compIdx < comparators.Length; compIdx++)
+ {
+ comparators[compIdx].Scorer = scorer;
+ }
+ Arrays.Fill(joinScorers, null);
+
+ var queue2 = new ConcurrentQueue<Scorer>();
+ //System.out.println("\nqueue: add top scorer=" + value);
+ queue2.Enqueue(value);
+// while ((queue.Count > 0 && (queue.Dequeue()) != null))
+// {
+// //System.out.println(" poll: " + value + "; " + value.getWeight().getQuery());
+// if (value is ToParentBlockJoinQuery.BlockJoinScorer)
+// {
+// Enroll((ToParentBlockJoinQuery)value.Weight.Query, (ToParentBlockJoinQuery.BlockJoinScorer)value);
+// }
+//
+// foreach (Scorer.ChildScorer sub in value.Children)
+// {
+// //System.out.println(" add sub: " + sub.child + "; " + sub.child.getWeight().getQuery());
+// queue.Enqueue(sub.Child);
+// }
+// }
+
+ while (queue2.TryDequeue(out value))
+ {
+ //System.out.println(" poll: " + value + "; " + value.getWeight().getQuery());
+ if (value is ToParentBlockJoinQuery.BlockJoinScorer)
+ {
+ Enroll((ToParentBlockJoinQuery)value.Weight.Query, (ToParentBlockJoinQuery.BlockJoinScorer)value);
+ }
+
+ foreach (Scorer.ChildScorer sub in value.Children)
+ {
+ //System.out.println(" add sub: " + sub.child + "; " + sub.child.getWeight().getQuery());
+ queue2.Enqueue(sub.Child);
+ }
+ }
+ }
+ }
+
+ private OneGroup[] sortedGroups;
+
+ private void sortQueue()
+ {
+ sortedGroups = new OneGroup[queue.Size()];
+ for (int downTo = queue.Size() - 1; downTo >= 0; downTo--)
+ {
+ sortedGroups[downTo] = queue.Pop();
+ }
+ }
+
+ /// <summary>
+ /// Returns the TopGroups for the specified
+ /// BlockJoinQuery. The groupValue of each GroupDocs will
+ /// be the parent docID for that group.
+ /// The number of documents within each group is calculated as minimum of <code>maxDocsPerGroup</code>
+ /// and number of matched child documents for that group.
+ /// Returns null if no groups matched.
+ /// </summary>
+ /// <param name="query"> Search query </param>
+ /// <param name="withinGroupSort"> Sort criteria within groups </param>
+ /// <param name="offset"> Parent docs offset </param>
+ /// <param name="maxDocsPerGroup"> Upper bound of documents per group number </param>
+ /// <param name="withinGroupOffset"> Offset within each group of child docs </param>
+ /// <param name="fillSortFields"> Specifies whether to add sort fields or not </param>
+ /// <returns> TopGroups for specified query </returns>
+ /// <exception cref="IOException"> if there is a low-level I/O error </exception>
+ public virtual TopGroups<int> GetTopGroups(ToParentBlockJoinQuery query, Sort withinGroupSort, int offset, int maxDocsPerGroup, int withinGroupOffset, bool fillSortFields)
+ {
+ int? slot;
+ if (!joinQueryID.TryGetValue(query, out slot))
+ {
+ if (totalHitCount == 0)
+ {
+ return null;
+ }
+ }
+
+ if (sortedGroups == null)
+ {
+ if (offset >= queue.Size())
+ {
+ return null;
+ }
+ sortQueue();
+ }
+ else if (offset > sortedGroups.Length)
+ {
+ return null;
+ }
+
+ return AccumulateGroups(slot == null ? -1 : (int)slot, offset, maxDocsPerGroup, withinGroupOffset, withinGroupSort, fillSortFields);
+ }
+
+ /// <summary>
+ /// Accumulates groups for the BlockJoinQuery specified by its slot.
+ /// </summary>
+ /// <param name="slot"> Search query's slot </param>
+ /// <param name="offset"> Parent docs offset </param>
+ /// <param name="maxDocsPerGroup"> Upper bound of documents per group number </param>
+ /// <param name="withinGroupOffset"> Offset within each group of child docs </param>
+ /// <param name="withinGroupSort"> Sort criteria within groups </param>
+ /// <param name="fillSortFields"> Specifies whether to add sort fields or not </param>
+ /// <returns> TopGroups for the query specified by slot </returns>
+ /// <exception cref="IOException"> if there is a low-level I/O error </exception>
+ private TopGroups<int> AccumulateGroups(int slot, int offset, int maxDocsPerGroup, int withinGroupOffset, Sort withinGroupSort, bool fillSortFields)
+ {
+ var groups = new GroupDocs<int>[sortedGroups.Length - offset];
+ var fakeScorer = new FakeScorer();
+
+ int totalGroupedHitCount = 0;
+ //System.out.println("slot=" + slot);
+
+ for (int groupIdx = offset; groupIdx < sortedGroups.Length; groupIdx++)
+ {
+ OneGroup og = sortedGroups[groupIdx];
+ int numChildDocs;
+ if (slot == -1 || slot >= og.counts.Length)
+ {
+ numChildDocs = 0;
+ }
+ else
+ {
+ numChildDocs = og.counts[slot];
+ }
+
+ // Number of documents in group should be bounded to prevent redundant memory allocation
+ int numDocsInGroup = Math.Max(1, Math.Min(numChildDocs, maxDocsPerGroup));
+ //System.out.println("parent doc=" + og.doc + " numChildDocs=" + numChildDocs + " maxDocsPG=" + maxDocsPerGroup);
+
+ // At this point we hold all docs w/ in each group, unsorted; we now sort them:
+ Collector collector;
+ if (withinGroupSort == null)
+ {
+ //System.out.println("sort by score");
+ // Sort by score
+ if (!trackScores)
+ {
+ throw new ArgumentException("cannot sort by relevance within group: trackScores=false");
+ }
+ collector = TopScoreDocCollector.Create(numDocsInGroup, true);
+ }
+ else
+ {
+ // Sort by fields
+ collector = TopFieldCollector.Create(withinGroupSort, numDocsInGroup, fillSortFields, trackScores, trackMaxScore, true);
+ }
+
+ collector.Scorer = fakeScorer;
+ collector.NextReader = og.readerContext;
+ for (int docIdx = 0; docIdx < numChildDocs; docIdx++)
+ {
+ //System.out.println("docIDX=" + docIDX + " vs " + og.docs[slot].length);
+ int doc = og.docs[slot][docIdx];
+ fakeScorer.doc = doc;
+ if (trackScores)
+ {
+ fakeScorer._score = og.scores[slot][docIdx];
+ }
+ collector.Collect(doc);
+ }
+ totalGroupedHitCount += numChildDocs;
+
+ object[] groupSortValues;
+
+ if (fillSortFields)
+ {
+ groupSortValues = new object[comparators.Length];
+ for (int sortFieldIdx = 0; sortFieldIdx < comparators.Length; sortFieldIdx++)
+ {
+ groupSortValues[sortFieldIdx] = comparators[sortFieldIdx].Value(og.Slot);
+ }
+ }
+ else
+ {
+ groupSortValues = null;
+ }
+
+ TopDocs topDocs;
+ if (withinGroupSort == null)
+ {
+ var tempCollector = (TopScoreDocCollector) collector;
+ topDocs = tempCollector.TopDocs(withinGroupOffset, numDocsInGroup);
+ }
+ else
+ {
+ var tempCollector = (TopFieldCollector) collector;
+ topDocs = tempCollector.TopDocs(withinGroupOffset, numDocsInGroup);
+ }
+
+ groups[groupIdx - offset] = new GroupDocs<int>(og.Score, topDocs.MaxScore, numChildDocs, topDocs.ScoreDocs, og.Doc, groupSortValues);
+ }
+
+ return new TopGroups<int>(new TopGroups<int>(sort.GetSort(), withinGroupSort == null ? null : withinGroupSort.GetSort(), 0, totalGroupedHitCount, groups, maxScore), totalHitCount);
+ }
+
+ /// <summary>
+ /// Returns the TopGroups for the specified BlockJoinQuery. The groupValue of each
+ /// GroupDocs will be the parent docID for that group. The number of documents within
+ /// each group equals to the total number of matched child documents for that group.
+ /// Returns null if no groups matched.
+ /// </summary>
+ /// <param name="query">Search query</param>
+ /// <param name="withinGroupSort">Sort criteria within groups</param>
+ /// <param name="offset">Parent docs offset</param>
+ /// <param name="withinGroupOffset">Offset within each group of child docs</param>
+ /// <param name="fillSortFields">Specifies whether to add sort fields or not</param>
+ /// <returns>TopGroups for specified query</returns>
+ /// <exception cref="IOException"> if there is a low-level I/O error </exception>
+ public virtual TopGroups<int> GetTopGroupsWithAllChildDocs(ToParentBlockJoinQuery query, Sort withinGroupSort, int offset, int withinGroupOffset, bool fillSortFields)
+ {
+ return GetTopGroups(query, withinGroupSort, offset, int.MaxValue, withinGroupOffset, fillSortFields);
+ }
+
+ /// <summary>
+ /// Returns the highest score across all collected parent hits, as long as
+ /// <code>trackMaxScores=true</code> was passed
+ /// {@link #ToParentBlockJoinCollector(Sort, int, boolean, boolean) on
+ /// construction}. Else, this returns <code>Float.NaN</code>
+ /// </summary>
+ public virtual float MaxScore
+ {
+ get { return maxScore; }
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/src/Lucene.Net.Join/ToParentBlockJoinFieldComparator.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Join/ToParentBlockJoinFieldComparator.cs b/src/Lucene.Net.Join/ToParentBlockJoinFieldComparator.cs
new file mode 100644
index 0000000..c41fd50
--- /dev/null
+++ b/src/Lucene.Net.Join/ToParentBlockJoinFieldComparator.cs
@@ -0,0 +1,393 @@
+using System;
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Join
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// A field comparator that allows parent documents to be sorted by fields
+ /// from the nested / child documents.
+ ///
+ /// @lucene.experimental
+ /// </summary>
+ public abstract class ToParentBlockJoinFieldComparator : FieldComparator<object>
+ {
+ private readonly Filter _parentFilter;
+ private readonly Filter _childFilter;
+ private readonly int _spareSlot;
+
+ private FieldComparator _wrappedComparator;
+ private FixedBitSet _parentDocuments;
+ private FixedBitSet _childDocuments;
+
+ private ToParentBlockJoinFieldComparator(FieldComparator wrappedComparator, Filter parentFilter, Filter childFilter, int spareSlot)
+ {
+ _wrappedComparator = wrappedComparator;
+ _parentFilter = parentFilter;
+ _childFilter = childFilter;
+ _spareSlot = spareSlot;
+ }
+
+ public override int Compare(int slot1, int slot2)
+ {
+ return _wrappedComparator.Compare(slot1, slot2);
+ }
+
+ public override int Bottom
+ {
+ set
+ {
+ _wrappedComparator.Bottom = value;
+ }
+ }
+
+ public override object TopValue
+ {
+ set
+ {
+ _wrappedComparator.TopValue = value;
+ }
+ }
+
+ public override FieldComparator SetNextReader(AtomicReaderContext context)
+ {
+ DocIdSet innerDocuments = _childFilter.GetDocIdSet(context, null);
+ if (IsEmpty(innerDocuments))
+ {
+ _childDocuments = null;
+ }
+ else if (innerDocuments is FixedBitSet)
+ {
+ _childDocuments = (FixedBitSet)innerDocuments;
+ }
+ else
+ {
+ DocIdSetIterator iterator = innerDocuments.GetIterator();
+ _childDocuments = iterator != null ? ToFixedBitSet(iterator, context.AtomicReader.MaxDoc) : null;
+ }
+ DocIdSet rootDocuments = _parentFilter.GetDocIdSet(context, null);
+ if (IsEmpty(rootDocuments))
+ {
+ _parentDocuments = null;
+ }
+ else if (rootDocuments is FixedBitSet)
+ {
+ _parentDocuments = (FixedBitSet)rootDocuments;
+ }
+ else
+ {
+ DocIdSetIterator iterator = rootDocuments.GetIterator();
+ _parentDocuments = iterator != null ? ToFixedBitSet(iterator, context.AtomicReader.MaxDoc) : null;
+ }
+
+ _wrappedComparator = _wrappedComparator.SetNextReader(context);
+ return this;
+ }
+
+ private static bool IsEmpty(DocIdSet set)
+ {
+ return set == null;
+ }
+
+ private static FixedBitSet ToFixedBitSet(DocIdSetIterator iterator, int numBits)
+ {
+ var set = new FixedBitSet(numBits);
+ int doc;
+ while ((doc = iterator.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
+ {
+ set.Set(doc);
+ }
+ return set;
+ }
+
+ public override IComparable Value(int slot)
+ {
+ return _wrappedComparator.Value(slot);
+ }
+
+ /// <summary>
+ /// Concrete implementation of <see cref="ToParentBlockJoinSortField"/> to sorts the parent docs with the lowest values
+ /// in the child / nested docs first.
+ /// </summary>
+ public sealed class Lowest : ToParentBlockJoinFieldComparator
+ {
+ /// <summary>
+ /// Create ToParentBlockJoinFieldComparator.Lowest
+ /// </summary>
+ /// <param name="wrappedComparator">The <see cref="FieldComparator"/> on the child / nested level. </param>
+ /// <param name="parentFilter">Filter (must produce FixedBitSet per-segment) that identifies the parent documents. </param>
+ /// <param name="childFilter">Filter that defines which child / nested documents participates in sorting. </param>
+ /// <param name="spareSlot">The extra slot inside the wrapped comparator that is used to compare which nested document
+ /// inside the parent document scope is most competitive. </param>
+ public Lowest(FieldComparator wrappedComparator, Filter parentFilter, Filter childFilter, int spareSlot)
+ : base(wrappedComparator, parentFilter, childFilter, spareSlot)
+ {
+ }
+
+ public override int CompareBottom(int parentDoc)
+ {
+ if (parentDoc == 0 || _parentDocuments == null || _childDocuments == null)
+ {
+ return 0;
+ }
+
+ // We need to copy the lowest value from all child docs into slot.
+ int prevParentDoc = _parentDocuments.PrevSetBit(parentDoc - 1);
+ int childDoc = _childDocuments.NextSetBit(prevParentDoc + 1);
+ if (childDoc >= parentDoc || childDoc == -1)
+ {
+ return 0;
+ }
+
+ // We only need to emit a single cmp value for any matching child doc
+ int cmp = _wrappedComparator.CompareBottom(childDoc);
+ if (cmp > 0)
+ {
+ return cmp;
+ }
+
+ while (true)
+ {
+ childDoc = _childDocuments.NextSetBit(childDoc + 1);
+ if (childDoc >= parentDoc || childDoc == -1)
+ {
+ return cmp;
+ }
+ int cmp1 = _wrappedComparator.CompareBottom(childDoc);
+ if (cmp1 > 0)
+ {
+ return cmp1;
+ }
+ if (cmp1 == 0)
+ {
+ cmp = 0;
+ }
+ }
+ }
+
+ public override void Copy(int slot, int parentDoc)
+ {
+ if (parentDoc == 0 || _parentDocuments == null || _childDocuments == null)
+ {
+ return;
+ }
+
+ // We need to copy the lowest value from all child docs into slot.
+ int prevParentDoc = _parentDocuments.PrevSetBit(parentDoc - 1);
+ int childDoc = _childDocuments.NextSetBit(prevParentDoc + 1);
+ if (childDoc >= parentDoc || childDoc == -1)
+ {
+ return;
+ }
+ _wrappedComparator.Copy(_spareSlot, childDoc);
+ _wrappedComparator.Copy(slot, childDoc);
+
+ while (true)
+ {
+ childDoc = _childDocuments.NextSetBit(childDoc + 1);
+ if (childDoc >= parentDoc || childDoc == -1)
+ {
+ return;
+ }
+ _wrappedComparator.Copy(_spareSlot, childDoc);
+ if (_wrappedComparator.Compare(_spareSlot, slot) < 0)
+ {
+ _wrappedComparator.Copy(slot, childDoc);
+ }
+ }
+ }
+
+ public override int CompareTop(int parentDoc)
+ {
+ if (parentDoc == 0 || _parentDocuments == null || _childDocuments == null)
+ {
+ return 0;
+ }
+
+ // We need to copy the lowest value from all nested docs into slot.
+ int prevParentDoc = _parentDocuments.PrevSetBit(parentDoc - 1);
+ int childDoc = _childDocuments.NextSetBit(prevParentDoc + 1);
+ if (childDoc >= parentDoc || childDoc == -1)
+ {
+ return 0;
+ }
+
+ // We only need to emit a single cmp value for any matching child doc
+ int cmp = _wrappedComparator.CompareBottom(childDoc);
+ if (cmp > 0)
+ {
+ return cmp;
+ }
+
+ while (true)
+ {
+ childDoc = _childDocuments.NextSetBit(childDoc + 1);
+ if (childDoc >= parentDoc || childDoc == -1)
+ {
+ return cmp;
+ }
+ int cmp1 = _wrappedComparator.CompareTop(childDoc);
+ if (cmp1 > 0)
+ {
+ return cmp1;
+ }
+ if (cmp1 == 0)
+ {
+ cmp = 0;
+ }
+ }
+ }
+
+ }
+
+ /// <summary>
+ /// Concrete implementation of <see cref="ToParentBlockJoinSortField"/> to sorts the parent docs with the highest values
+ /// in the child / nested docs first.
+ /// </summary>
+ public sealed class Highest : ToParentBlockJoinFieldComparator
+ {
+ /// <summary>
+ /// Create ToParentBlockJoinFieldComparator.Highest
+ /// </summary>
+ /// <param name="wrappedComparator">The <see cref="FieldComparator"/> on the child / nested level. </param>
+ /// <param name="parentFilter">Filter (must produce FixedBitSet per-segment) that identifies the parent documents. </param>
+ /// <param name="childFilter">Filter that defines which child / nested documents participates in sorting. </param>
+ /// <param name="spareSlot">The extra slot inside the wrapped comparator that is used to compare which nested document
+ /// inside the parent document scope is most competitive. </param>
+ public Highest(FieldComparator wrappedComparator, Filter parentFilter, Filter childFilter, int spareSlot)
+ : base(wrappedComparator, parentFilter, childFilter, spareSlot)
+ {
+ }
+
+ public override int CompareBottom(int parentDoc)
+ {
+ if (parentDoc == 0 || _parentDocuments == null || _childDocuments == null)
+ {
+ return 0;
+ }
+
+ int prevParentDoc = _parentDocuments.PrevSetBit(parentDoc - 1);
+ int childDoc = _childDocuments.NextSetBit(prevParentDoc + 1);
+ if (childDoc >= parentDoc || childDoc == -1)
+ {
+ return 0;
+ }
+
+ int cmp = _wrappedComparator.CompareBottom(childDoc);
+ if (cmp < 0)
+ {
+ return cmp;
+ }
+
+ while (true)
+ {
+ childDoc = _childDocuments.NextSetBit(childDoc + 1);
+ if (childDoc >= parentDoc || childDoc == -1)
+ {
+ return cmp;
+ }
+ int cmp1 = _wrappedComparator.CompareBottom(childDoc);
+ if (cmp1 < 0)
+ {
+ return cmp1;
+ }
+ else
+ {
+ if (cmp1 == 0)
+ {
+ cmp = 0;
+ }
+ }
+ }
+ }
+
+ public override void Copy(int slot, int parentDoc)
+ {
+ if (parentDoc == 0 || _parentDocuments == null || _childDocuments == null)
+ {
+ return;
+ }
+
+ int prevParentDoc = _parentDocuments.PrevSetBit(parentDoc - 1);
+ int childDoc = _childDocuments.NextSetBit(prevParentDoc + 1);
+ if (childDoc >= parentDoc || childDoc == -1)
+ {
+ return;
+ }
+ _wrappedComparator.Copy(_spareSlot, childDoc);
+ _wrappedComparator.Copy(slot, childDoc);
+
+ while (true)
+ {
+ childDoc = _childDocuments.NextSetBit(childDoc + 1);
+ if (childDoc >= parentDoc || childDoc == -1)
+ {
+ return;
+ }
+ _wrappedComparator.Copy(_spareSlot, childDoc);
+ if (_wrappedComparator.Compare(_spareSlot, slot) > 0)
+ {
+ _wrappedComparator.Copy(slot, childDoc);
+ }
+ }
+ }
+
+ public override int CompareTop(int parentDoc)
+ {
+ if (parentDoc == 0 || _parentDocuments == null || _childDocuments == null)
+ {
+ return 0;
+ }
+
+ int prevParentDoc = _parentDocuments.PrevSetBit(parentDoc - 1);
+ int childDoc = _childDocuments.NextSetBit(prevParentDoc + 1);
+ if (childDoc >= parentDoc || childDoc == -1)
+ {
+ return 0;
+ }
+
+ int cmp = _wrappedComparator.CompareBottom(childDoc);
+ if (cmp < 0)
+ {
+ return cmp;
+ }
+
+ while (true)
+ {
+ childDoc = _childDocuments.NextSetBit(childDoc + 1);
+ if (childDoc >= parentDoc || childDoc == -1)
+ {
+ return cmp;
+ }
+ int cmp1 = _wrappedComparator.CompareTop(childDoc);
+ if (cmp1 < 0)
+ {
+ return cmp1;
+ }
+ if (cmp1 == 0)
+ {
+ cmp = 0;
+ }
+ }
+ }
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/src/Lucene.Net.Join/ToParentBlockJoinQuery.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Join/ToParentBlockJoinQuery.cs b/src/Lucene.Net.Join/ToParentBlockJoinQuery.cs
new file mode 100644
index 0000000..810f30e
--- /dev/null
+++ b/src/Lucene.Net.Join/ToParentBlockJoinQuery.cs
@@ -0,0 +1,516 @@
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Join
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// This query requires that you index
+ /// children and parent docs as a single block, using the
+ /// <see cref="IndexWriter#addDocuments IndexWriter.addDocuments()"/> or {@link
+ /// IndexWriter#updateDocuments IndexWriter.updateDocuments()} API. In each block, the
+ /// child documents must appear first, ending with the parent
+ /// document. At search time you provide a Filter
+ /// identifying the parents, however this Filter must provide
+ /// an <see cref="FixedBitSet"/> per sub-reader.
+ ///
+ /// <p>Once the block index is built, use this query to wrap
+ /// any sub-query matching only child docs and join matches in that
+ /// child document space up to the parent document space.
+ /// You can then use this Query as a clause with
+ /// other queries in the parent document space.</p>
+ ///
+ /// <p>See <see cref="ToChildBlockJoinQuery"/> if you need to join
+ /// in the reverse order.
+ ///
+ /// <p>The child documents must be orthogonal to the parent
+ /// documents: the wrapped child query must never
+ /// return a parent document.</p>
+ ///
+ /// If you'd like to retrieve <see cref="TopGroups"/> for the
+ /// resulting query, use the <see cref="ToParentBlockJoinCollector"/>.
+ /// Note that this is not necessary, ie, if you simply want
+ /// to collect the parent documents and don't need to see
+ /// which child documents matched under that parent, then
+ /// you can use any collector.
+ ///
+ /// <p><b>NOTE</b>: If the overall query contains parent-only
+ /// matches, for example you OR a parent-only query with a
+ /// joined child-only query, then the resulting collected documents
+ /// will be correct, however the <see cref="TopGroups"/> you get
+ /// from <see cref="ToParentBlockJoinCollector"/> will not contain every
+ /// child for parents that had matched.
+ ///
+ /// <p>See <see cref="org.apache.lucene.search.join"/> for an
+ /// overview. </p>
+ ///
+ /// @lucene.experimental
+ /// </summary>
+ public class ToParentBlockJoinQuery : Query
+ {
+
+ private readonly Filter _parentsFilter;
+ private readonly Query _childQuery;
+
+ // If we are rewritten, this is the original childQuery we
+ // were passed; we use this for .equals() and
+ // .hashCode(). This makes rewritten query equal the
+ // original, so that user does not have to .rewrite() their
+ // query before searching:
+ private readonly Query _origChildQuery;
+ private readonly ScoreMode _scoreMode;
+
+ /// <summary>
+ /// Create a ToParentBlockJoinQuery.
+ /// </summary>
+ /// <param name="childQuery"> Query matching child documents. </param>
+ /// <param name="parentsFilter"> Filter (must produce FixedBitSet
+ /// per-segment, like <see cref="FixedBitSetCachingWrapperFilter"/>)
+ /// identifying the parent documents. </param>
+ /// <param name="scoreMode"> How to aggregate multiple child scores
+ /// into a single parent score.
+ /// </param>
+ public ToParentBlockJoinQuery(Query childQuery, Filter parentsFilter, ScoreMode scoreMode)
+ {
+ _origChildQuery = childQuery;
+ _childQuery = childQuery;
+ _parentsFilter = parentsFilter;
+ _scoreMode = scoreMode;
+ }
+
+ private ToParentBlockJoinQuery(Query origChildQuery, Query childQuery, Filter parentsFilter, ScoreMode scoreMode) : base()
+ {
+ _origChildQuery = origChildQuery;
+ _childQuery = childQuery;
+ _parentsFilter = parentsFilter;
+ _scoreMode = scoreMode;
+ }
+
+ public override Weight CreateWeight(IndexSearcher searcher)
+ {
+ return new BlockJoinWeight(this, _childQuery.CreateWeight(searcher), _parentsFilter, _scoreMode);
+ }
+
+ private class BlockJoinWeight : Weight
+ {
+ internal readonly Query JoinQuery;
+ internal readonly Weight ChildWeight;
+ internal readonly Filter ParentsFilter;
+ internal readonly ScoreMode ScoreMode;
+
+ public BlockJoinWeight(Query joinQuery, Weight childWeight, Filter parentsFilter, ScoreMode scoreMode) : base()
+ {
+ JoinQuery = joinQuery;
+ ChildWeight = childWeight;
+ ParentsFilter = parentsFilter;
+ ScoreMode = scoreMode;
+ }
+
+ public override Query Query
+ {
+ get { return JoinQuery; }
+ }
+
+ public override float ValueForNormalization
+ {
+ get { return ChildWeight.ValueForNormalization*JoinQuery.Boost*JoinQuery.Boost; }
+ }
+
+ public override void Normalize(float norm, float topLevelBoost)
+ {
+ ChildWeight.Normalize(norm, topLevelBoost * JoinQuery.Boost);
+ }
+
+ // NOTE: acceptDocs applies (and is checked) only in the parent document space
+ public override Scorer Scorer(AtomicReaderContext readerContext, Bits acceptDocs)
+ {
+
+ Scorer childScorer = ChildWeight.Scorer(readerContext, readerContext.AtomicReader.LiveDocs);
+ if (childScorer == null)
+ {
+ // No matches
+ return null;
+ }
+
+ int firstChildDoc = childScorer.NextDoc();
+ if (firstChildDoc == DocIdSetIterator.NO_MORE_DOCS)
+ {
+ // No matches
+ return null;
+ }
+
+ // NOTE: we cannot pass acceptDocs here because this
+ // will (most likely, justifiably) cause the filter to
+ // not return a FixedBitSet but rather a
+ // BitsFilteredDocIdSet. Instead, we filter by
+ // acceptDocs when we score:
+ DocIdSet parents = ParentsFilter.GetDocIdSet(readerContext, null);
+
+ if (parents == null)
+ {
+ // No matches
+ return null;
+ }
+ if (!(parents is FixedBitSet))
+ {
+ throw new InvalidOperationException("parentFilter must return FixedBitSet; got " + parents);
+ }
+
+ return new BlockJoinScorer(this, childScorer, (FixedBitSet)parents, firstChildDoc, ScoreMode, acceptDocs);
+ }
+
+ public override Explanation Explain(AtomicReaderContext context, int doc)
+ {
+ BlockJoinScorer scorer = (BlockJoinScorer)Scorer(context, context.AtomicReader.LiveDocs);
+ if (scorer != null && scorer.Advance(doc) == doc)
+ {
+ return scorer.Explain(context.DocBase);
+ }
+ return new ComplexExplanation(false, 0.0f, "Not a match");
+ }
+
+ public override bool ScoresDocsOutOfOrder()
+ {
+ return false;
+ }
+ }
+
+ internal class BlockJoinScorer : Scorer
+ {
+ private readonly Scorer _childScorer;
+ private readonly FixedBitSet _parentBits;
+ private readonly ScoreMode _scoreMode;
+ private readonly Bits _acceptDocs;
+ private int _parentDocRenamed = -1;
+ private int _prevParentDoc;
+ private float _parentScore;
+ private int _parentFreq;
+ private int _nextChildDoc;
+ private int[] _pendingChildDocs;
+ private float[] _pendingChildScores;
+ private int _childDocUpto;
+
+ public BlockJoinScorer(Weight weight, Scorer childScorer, FixedBitSet parentBits, int firstChildDoc, ScoreMode scoreMode, Bits acceptDocs) : base(weight)
+ {
+ //System.out.println("Q.init firstChildDoc=" + firstChildDoc);
+ _parentBits = parentBits;
+ _childScorer = childScorer;
+ _scoreMode = scoreMode;
+ _acceptDocs = acceptDocs;
+ _nextChildDoc = firstChildDoc;
+ }
+
+ public override ICollection<ChildScorer> Children
+ {
+ get { return Collections.Singleton(new ChildScorer(_childScorer, "BLOCK_JOIN")); }
+ }
+
+ internal virtual int ChildCount
+ {
+ get { return _childDocUpto; }
+ }
+
+ internal virtual int ParentDoc
+ {
+ get { return _parentDocRenamed; }
+ }
+
+ internal virtual int[] SwapChildDocs(int[] other)
+ {
+ int[] ret = _pendingChildDocs;
+ if (other == null)
+ {
+ _pendingChildDocs = new int[5];
+ }
+ else
+ {
+ _pendingChildDocs = other;
+ }
+ return ret;
+ }
+
+ internal virtual float[] SwapChildScores(float[] other)
+ {
+ if (_scoreMode == ScoreMode.None)
+ {
+ throw new InvalidOperationException("ScoreMode is None; you must pass trackScores=false to ToParentBlockJoinCollector");
+ }
+ float[] ret = _pendingChildScores;
+ if (other == null)
+ {
+ _pendingChildScores = new float[5];
+ }
+ else
+ {
+ _pendingChildScores = other;
+ }
+ return ret;
+ }
+
+ public override int NextDoc()
+ {
+ //System.out.println("Q.nextDoc() nextChildDoc=" + nextChildDoc);
+ // Loop until we hit a parentDoc that's accepted
+ while (true)
+ {
+ if (_nextChildDoc == NO_MORE_DOCS)
+ {
+ //System.out.println(" end");
+ return _parentDocRenamed = NO_MORE_DOCS;
+ }
+
+ // Gather all children sharing the same parent as
+ // nextChildDoc
+
+ _parentDocRenamed = _parentBits.NextSetBit(_nextChildDoc);
+
+ // Parent & child docs are supposed to be
+ // orthogonal:
+ if (_nextChildDoc == _parentDocRenamed)
+ {
+ throw new InvalidOperationException("child query must only match non-parent docs, but parent docID=" + _nextChildDoc + " matched childScorer=" + _childScorer.GetType());
+ }
+
+ //System.out.println(" parentDoc=" + parentDoc);
+ Debug.Assert(_parentDocRenamed != -1);
+
+ //System.out.println(" nextChildDoc=" + nextChildDoc);
+ if (_acceptDocs != null && !_acceptDocs.Get(_parentDocRenamed))
+ {
+ // Parent doc not accepted; skip child docs until
+ // we hit a new parent doc:
+ do
+ {
+ _nextChildDoc = _childScorer.NextDoc();
+ } while (_nextChildDoc < _parentDocRenamed);
+
+ // Parent & child docs are supposed to be
+ // orthogonal:
+ if (_nextChildDoc == _parentDocRenamed)
+ {
+ throw new InvalidOperationException("child query must only match non-parent docs, but parent docID=" + _nextChildDoc + " matched childScorer=" + _childScorer.GetType());
+ }
+
+ continue;
+ }
+
+ float totalScore = 0;
+ float maxScore = float.NegativeInfinity;
+
+ _childDocUpto = 0;
+ _parentFreq = 0;
+ do
+ {
+ //System.out.println(" c=" + nextChildDoc);
+ if (_pendingChildDocs != null && _pendingChildDocs.Length == _childDocUpto)
+ {
+ _pendingChildDocs = ArrayUtil.Grow(_pendingChildDocs);
+ }
+ if (_pendingChildScores != null && _scoreMode != ScoreMode.None && _pendingChildScores.Length == _childDocUpto)
+ {
+ _pendingChildScores = ArrayUtil.Grow(_pendingChildScores);
+ }
+ if (_pendingChildDocs != null)
+ {
+ _pendingChildDocs[_childDocUpto] = _nextChildDoc;
+ }
+ if (_scoreMode != ScoreMode.None)
+ {
+ // TODO: specialize this into dedicated classes per-scoreMode
+ float childScore = _childScorer.Score();
+ int childFreq = _childScorer.Freq();
+ if (_pendingChildScores != null)
+ {
+ _pendingChildScores[_childDocUpto] = childScore;
+ }
+ maxScore = Math.Max(childScore, maxScore);
+ totalScore += childScore;
+ _parentFreq += childFreq;
+ }
+ _childDocUpto++;
+ _nextChildDoc = _childScorer.NextDoc();
+ } while (_nextChildDoc < _parentDocRenamed);
+
+ // Parent & child docs are supposed to be
+ // orthogonal:
+ if (_nextChildDoc == _parentDocRenamed)
+ {
+ throw new InvalidOperationException("child query must only match non-parent docs, but parent docID=" + _nextChildDoc + " matched childScorer=" + _childScorer.GetType());
+ }
+
+ switch (_scoreMode)
+ {
+ case ScoreMode.Avg:
+ _parentScore = totalScore / _childDocUpto;
+ break;
+ case ScoreMode.Max:
+ _parentScore = maxScore;
+ break;
+ case ScoreMode.Total:
+ _parentScore = totalScore;
+ break;
+ case ScoreMode.None:
+ break;
+ }
+
+ //System.out.println(" return parentDoc=" + parentDoc + " childDocUpto=" + childDocUpto);
+ return _parentDocRenamed;
+ }
+ }
+
+ public override int DocID()
+ {
+ return _parentDocRenamed;
+ }
+
+ //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+ //ORIGINAL LINE: @Override public float score() throws java.io.IOException
+ public override float Score()
+ {
+ return _parentScore;
+ }
+
+ public override int Freq()
+ {
+ return _parentFreq;
+ }
+
+ public override int Advance(int parentTarget)
+ {
+
+ //System.out.println("Q.advance parentTarget=" + parentTarget);
+ if (parentTarget == NO_MORE_DOCS)
+ {
+ return _parentDocRenamed = NO_MORE_DOCS;
+ }
+
+ if (parentTarget == 0)
+ {
+ // Callers should only be passing in a docID from
+ // the parent space, so this means this parent
+ // has no children (it got docID 0), so it cannot
+ // possibly match. We must handle this case
+ // separately otherwise we pass invalid -1 to
+ // prevSetBit below:
+ return NextDoc();
+ }
+
+ _prevParentDoc = _parentBits.PrevSetBit(parentTarget - 1);
+
+ //System.out.println(" rolled back to prevParentDoc=" + prevParentDoc + " vs parentDoc=" + parentDoc);
+ Debug.Assert(_prevParentDoc >= _parentDocRenamed);
+ if (_prevParentDoc > _nextChildDoc)
+ {
+ _nextChildDoc = _childScorer.Advance(_prevParentDoc);
+ // System.out.println(" childScorer advanced to child docID=" + nextChildDoc);
+ //} else {
+ //System.out.println(" skip childScorer advance");
+ }
+
+ // Parent & child docs are supposed to be orthogonal:
+ if (_nextChildDoc == _prevParentDoc)
+ {
+ throw new InvalidOperationException("child query must only match non-parent docs, but parent docID=" + _nextChildDoc + " matched childScorer=" + _childScorer.GetType());
+ }
+
+ int nd = NextDoc();
+ //System.out.println(" return nextParentDoc=" + nd);
+ return nd;
+ }
+
+ public virtual Explanation Explain(int docBase)
+ {
+ int start = docBase + _prevParentDoc + 1; // +1 b/c prevParentDoc is previous parent doc
+ int end = docBase + _parentDocRenamed - 1; // -1 b/c parentDoc is parent doc
+ return new ComplexExplanation(true, Score(), string.Format("Score based on child doc range from {0} to {1}", start, end));
+ }
+
+ public override long Cost()
+ {
+ return _childScorer.Cost();
+ }
+
+ /// <summary>
+ /// Instructs this scorer to keep track of the child docIds and score ids for retrieval purposes.
+ /// </summary>
+ public virtual void TrackPendingChildHits()
+ {
+ _pendingChildDocs = new int[5];
+ if (_scoreMode != ScoreMode.None)
+ {
+ _pendingChildScores = new float[5];
+ }
+ }
+ }
+
+ public override void ExtractTerms(ISet<Term> terms)
+ {
+ _childQuery.ExtractTerms(terms);
+ }
+
+ public override Query Rewrite(IndexReader reader)
+ {
+ Query childRewrite = _childQuery.Rewrite(reader);
+ if (childRewrite != _childQuery)
+ {
+ Query rewritten = new ToParentBlockJoinQuery(_origChildQuery, childRewrite, _parentsFilter, _scoreMode);
+ rewritten.Boost = Boost;
+ return rewritten;
+ }
+ return this;
+ }
+
+ public override string ToString(string field)
+ {
+ return "ToParentBlockJoinQuery (" + _childQuery + ")";
+ }
+
+ protected bool Equals(ToParentBlockJoinQuery other)
+ {
+ return base.Equals(other) &&
+ Equals(_parentsFilter, other._parentsFilter) &&
+ _scoreMode == other._scoreMode &&
+ Equals(_origChildQuery, other._origChildQuery);
+ }
+
+ public override bool Equals(object obj)
+ {
+ if (ReferenceEquals(null, obj)) return false;
+ if (ReferenceEquals(this, obj)) return true;
+ if (obj.GetType() != GetType()) return false;
+ return Equals((ToParentBlockJoinQuery) obj);
+ }
+
+ public override int GetHashCode()
+ {
+ unchecked
+ {
+ int hashCode = base.GetHashCode();
+ hashCode = (hashCode*397) ^ (_parentsFilter != null ? _parentsFilter.GetHashCode() : 0);
+ hashCode = (hashCode*397) ^ (int) _scoreMode;
+ hashCode = (hashCode*397) ^ (_origChildQuery != null ? _origChildQuery.GetHashCode() : 0);
+ return hashCode;
+ }
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/src/Lucene.Net.Join/ToParentBlockJoinSortField.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Join/ToParentBlockJoinSortField.cs b/src/Lucene.Net.Join/ToParentBlockJoinSortField.cs
new file mode 100644
index 0000000..aa2a3b6
--- /dev/null
+++ b/src/Lucene.Net.Join/ToParentBlockJoinSortField.cs
@@ -0,0 +1,78 @@
+using System;
+using Lucene.Net.Search;
+
+namespace Lucene.Net.Join
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// A special sort field that allows sorting parent docs based on nested / child level fields.
+ /// Based on the sort order it either takes the document with the lowest or highest field value into account.
+ ///
+ /// @lucene.experimental
+ /// </summary>
+ public class ToParentBlockJoinSortField : SortField
+ {
+ private readonly bool Order;
+ private readonly Filter ParentFilter;
+ private readonly Filter ChildFilter;
+
+ /// <summary>
+ /// Create ToParentBlockJoinSortField. The parent document ordering is based on child document ordering (reverse).
+ /// </summary>
+ /// <param name="field"> The sort field on the nested / child level. </param>
+ /// <param name="type"> The sort type on the nested / child level. </param>
+ /// <param name="reverse"> Whether natural order should be reversed on the nested / child level. </param>
+ /// <param name="parentFilter"> Filter that identifies the parent documents. </param>
+ /// <param name="childFilter"> Filter that defines which child documents participates in sorting. </param>
+ public ToParentBlockJoinSortField(string field, Type_e type, bool reverse, Filter parentFilter, Filter childFilter) : base(field, type, reverse)
+ {
+ Order = reverse;
+ ParentFilter = parentFilter;
+ ChildFilter = childFilter;
+ }
+
+ /// <summary>
+ /// Create ToParentBlockJoinSortField.
+ /// </summary>
+ /// <param name="field"> The sort field on the nested / child level. </param>
+ /// <param name="type"> The sort type on the nested / child level. </param>
+ /// <param name="reverse"> Whether natural order should be reversed on the nested / child document level. </param>
+ /// <param name="order"> Whether natural order should be reversed on the parent level. </param>
+ /// <param name="parentFilter"> Filter that identifies the parent documents. </param>
+ /// <param name="childFilter"> Filter that defines which child documents participates in sorting. </param>
+ public ToParentBlockJoinSortField(string field, Type_e type, bool reverse, bool order, Filter parentFilter, Filter childFilter)
+ : base(field, type, reverse)
+ {
+ Order = order;
+ ParentFilter = parentFilter;
+ ChildFilter = childFilter;
+ }
+
+ public override FieldComparator GetComparator(int numHits, int sortPos)
+ {
+ var wrappedFieldComparator = base.GetComparator(numHits + 1, sortPos);
+ if (Order)
+ {
+ return new ToParentBlockJoinFieldComparator.Highest(wrappedFieldComparator, ParentFilter, ChildFilter, numHits);
+ }
+
+ return new ToParentBlockJoinFieldComparator.Lowest(wrappedFieldComparator, ParentFilter, ChildFilter, numHits);
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/src/Lucene.Net.TestFramework/Util/LuceneTestCase.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.TestFramework/Util/LuceneTestCase.cs b/src/Lucene.Net.TestFramework/Util/LuceneTestCase.cs
index 9e2879c..ea68c2f 100644
--- a/src/Lucene.Net.TestFramework/Util/LuceneTestCase.cs
+++ b/src/Lucene.Net.TestFramework/Util/LuceneTestCase.cs
@@ -607,7 +607,7 @@ namespace Lucene.Net.Util
/// </summary>
public static Random Random()
{
- return _random ?? (_random = new Random( /* LUCENENET TODO seed */));
+ return _random ?? (_random = new Random(1));
//return RandomizedContext.Current.Random;
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/src/Lucene.Net.Tests.Join/Lucene.Net.Tests.Join.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Join/Lucene.Net.Tests.Join.csproj b/src/Lucene.Net.Tests.Join/Lucene.Net.Tests.Join.csproj
new file mode 100644
index 0000000..d6cd6d1
--- /dev/null
+++ b/src/Lucene.Net.Tests.Join/Lucene.Net.Tests.Join.csproj
@@ -0,0 +1,86 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
+ <PropertyGroup>
+ <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+ <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+ <ProjectGuid>{4C1B794F-8158-45E6-85B3-2C46569BEBC2}</ProjectGuid>
+ <OutputType>Library</OutputType>
+ <AppDesignerFolder>Properties</AppDesignerFolder>
+ <RootNamespace>Lucene.Net.Tests.Join</RootNamespace>
+ <AssemblyName>Lucene.Net.Tests.Join</AssemblyName>
+ <TargetFrameworkVersion>v4.5.1</TargetFrameworkVersion>
+ <FileAlignment>512</FileAlignment>
+ </PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
+ <DebugSymbols>true</DebugSymbols>
+ <DebugType>full</DebugType>
+ <Optimize>false</Optimize>
+ <OutputPath>bin\Debug\</OutputPath>
+ <DefineConstants>DEBUG;TRACE</DefineConstants>
+ <ErrorReport>prompt</ErrorReport>
+ <WarningLevel>4</WarningLevel>
+ </PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
+ <DebugType>pdbonly</DebugType>
+ <Optimize>true</Optimize>
+ <OutputPath>bin\Release\</OutputPath>
+ <DefineConstants>TRACE</DefineConstants>
+ <ErrorReport>prompt</ErrorReport>
+ <WarningLevel>4</WarningLevel>
+ </PropertyGroup>
+ <ItemGroup>
+ <Reference Include="Apache.NMS, Version=1.6.0.3083, Culture=neutral, PublicKeyToken=82756feee3957618, processorArchitecture=MSIL">
+ <HintPath>..\packages\Apache.NMS.1.6.0.3083\lib\net40\Apache.NMS.dll</HintPath>
+ <Private>True</Private>
+ </Reference>
+ <Reference Include="nunit.framework, Version=2.6.3.13283, Culture=neutral, PublicKeyToken=96d09a1eb7f44a77, processorArchitecture=MSIL">
+ <HintPath>..\packages\NUnit.2.6.3\lib\nunit.framework.dll</HintPath>
+ <Private>True</Private>
+ </Reference>
+ <Reference Include="System" />
+ <Reference Include="System.Core" />
+ <Reference Include="System.Xml.Linq" />
+ <Reference Include="System.Data.DataSetExtensions" />
+ <Reference Include="Microsoft.CSharp" />
+ <Reference Include="System.Data" />
+ <Reference Include="System.Net.Http" />
+ <Reference Include="System.Xml" />
+ </ItemGroup>
+ <ItemGroup>
+ <Compile Include="Properties\AssemblyInfo.cs" />
+ <Compile Include="TestBlockJoin.cs" />
+ <Compile Include="TestBlockJoinSorting.cs" />
+ <Compile Include="TestBlockJoinValidation.cs" />
+ <Compile Include="TestJoinUtil.cs" />
+ </ItemGroup>
+ <ItemGroup>
+ <ProjectReference Include="..\Lucene.Net.Grouping\Lucene.Net.Grouping.csproj">
+ <Project>{02BAB603-067D-48B1-AEDD-316849652568}</Project>
+ <Name>Lucene.Net.Grouping</Name>
+ </ProjectReference>
+ <ProjectReference Include="..\Lucene.Net.Join\Lucene.Net.Join.csproj">
+ <Project>{e8a339c7-fcf6-4a72-8586-56d8961d7b99}</Project>
+ <Name>Lucene.Net.Join</Name>
+ </ProjectReference>
+ <ProjectReference Include="..\Lucene.Net.Core\Lucene.Net.csproj">
+ <Project>{5d4ad9be-1ffb-41ab-9943-25737971bf57}</Project>
+ <Name>Lucene.Net</Name>
+ </ProjectReference>
+ <ProjectReference Include="..\Lucene.Net.TestFramework\Lucene.Net.TestFramework.csproj">
+ <Project>{b2c0d749-ce34-4f62-a15e-00cb2ff5ddb3}</Project>
+ <Name>Lucene.Net.TestFramework</Name>
+ </ProjectReference>
+ </ItemGroup>
+ <ItemGroup>
+ <None Include="packages.config" />
+ </ItemGroup>
+ <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
+ <!-- To modify your build process, add your task inside one of the targets below and uncomment it.
+ Other similar extension points exist, see Microsoft.Common.targets.
+ <Target Name="BeforeBuild">
+ </Target>
+ <Target Name="AfterBuild">
+ </Target>
+ -->
+</Project>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/src/Lucene.Net.Tests.Join/Properties/AssemblyInfo.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Join/Properties/AssemblyInfo.cs b/src/Lucene.Net.Tests.Join/Properties/AssemblyInfo.cs
new file mode 100644
index 0000000..f94805a
--- /dev/null
+++ b/src/Lucene.Net.Tests.Join/Properties/AssemblyInfo.cs
@@ -0,0 +1,36 @@
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+// General Information about an assembly is controlled through the following
+// set of attributes. Change these attribute values to modify the information
+// associated with an assembly.
+[assembly: AssemblyTitle("Lucene.Net.Tests.Join")]
+[assembly: AssemblyDescription("")]
+[assembly: AssemblyConfiguration("")]
+[assembly: AssemblyCompany("")]
+[assembly: AssemblyProduct("Lucene.Net.Tests.Join")]
+[assembly: AssemblyCopyright("Copyright © 2015")]
+[assembly: AssemblyTrademark("")]
+[assembly: AssemblyCulture("")]
+
+// Setting ComVisible to false makes the types in this assembly not visible
+// to COM components. If you need to access a type in this assembly from
+// COM, set the ComVisible attribute to true on that type.
+[assembly: ComVisible(false)]
+
+// The following GUID is for the ID of the typelib if this project is exposed to COM
+[assembly: Guid("4c1b794f-8158-45e6-85b3-2c46569bebc2")]
+
+// Version information for an assembly consists of the following four values:
+//
+// Major Version
+// Minor Version
+// Build Number
+// Revision
+//
+// You can specify all the values or you can default the Build and Revision Numbers
+// by using the '*' as shown below:
+// [assembly: AssemblyVersion("1.0.*")]
+[assembly: AssemblyVersion("1.0.0.0")]
+[assembly: AssemblyFileVersion("1.0.0.0")]
[03/17] lucenenet git commit: Completed the implementation port of
the Join project
Posted by sy...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/47f20b9a/Lucene.Net.Join/ToParentBlockJoinQuery.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Join/ToParentBlockJoinQuery.cs b/Lucene.Net.Join/ToParentBlockJoinQuery.cs
new file mode 100644
index 0000000..810f30e
--- /dev/null
+++ b/Lucene.Net.Join/ToParentBlockJoinQuery.cs
@@ -0,0 +1,516 @@
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Join
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// This query requires that you index
+ /// children and parent docs as a single block, using the
+ /// <see cref="IndexWriter#addDocuments IndexWriter.addDocuments()"/> or {@link
+ /// IndexWriter#updateDocuments IndexWriter.updateDocuments()} API. In each block, the
+ /// child documents must appear first, ending with the parent
+ /// document. At search time you provide a Filter
+ /// identifying the parents, however this Filter must provide
+ /// an <see cref="FixedBitSet"/> per sub-reader.
+ ///
+ /// <p>Once the block index is built, use this query to wrap
+ /// any sub-query matching only child docs and join matches in that
+ /// child document space up to the parent document space.
+ /// You can then use this Query as a clause with
+ /// other queries in the parent document space.</p>
+ ///
+ /// <p>See <see cref="ToChildBlockJoinQuery"/> if you need to join
+ /// in the reverse order.
+ ///
+ /// <p>The child documents must be orthogonal to the parent
+ /// documents: the wrapped child query must never
+ /// return a parent document.</p>
+ ///
+ /// If you'd like to retrieve <see cref="TopGroups"/> for the
+ /// resulting query, use the <see cref="ToParentBlockJoinCollector"/>.
+ /// Note that this is not necessary, ie, if you simply want
+ /// to collect the parent documents and don't need to see
+ /// which child documents matched under that parent, then
+ /// you can use any collector.
+ ///
+ /// <p><b>NOTE</b>: If the overall query contains parent-only
+ /// matches, for example you OR a parent-only query with a
+ /// joined child-only query, then the resulting collected documents
+ /// will be correct, however the <see cref="TopGroups"/> you get
+ /// from <see cref="ToParentBlockJoinCollector"/> will not contain every
+ /// child for parents that had matched.
+ ///
+ /// <p>See <see cref="org.apache.lucene.search.join"/> for an
+ /// overview. </p>
+ ///
+ /// @lucene.experimental
+ /// </summary>
+ public class ToParentBlockJoinQuery : Query
+ {
+
+ private readonly Filter _parentsFilter;
+ private readonly Query _childQuery;
+
+ // If we are rewritten, this is the original childQuery we
+ // were passed; we use this for .equals() and
+ // .hashCode(). This makes rewritten query equal the
+ // original, so that user does not have to .rewrite() their
+ // query before searching:
+ private readonly Query _origChildQuery;
+ private readonly ScoreMode _scoreMode;
+
+ /// <summary>
+ /// Create a ToParentBlockJoinQuery.
+ /// </summary>
+ /// <param name="childQuery"> Query matching child documents. </param>
+ /// <param name="parentsFilter"> Filter (must produce FixedBitSet
+ /// per-segment, like <see cref="FixedBitSetCachingWrapperFilter"/>)
+ /// identifying the parent documents. </param>
+ /// <param name="scoreMode"> How to aggregate multiple child scores
+ /// into a single parent score.
+ /// </param>
+ public ToParentBlockJoinQuery(Query childQuery, Filter parentsFilter, ScoreMode scoreMode)
+ {
+ _origChildQuery = childQuery;
+ _childQuery = childQuery;
+ _parentsFilter = parentsFilter;
+ _scoreMode = scoreMode;
+ }
+
+ private ToParentBlockJoinQuery(Query origChildQuery, Query childQuery, Filter parentsFilter, ScoreMode scoreMode) : base()
+ {
+ _origChildQuery = origChildQuery;
+ _childQuery = childQuery;
+ _parentsFilter = parentsFilter;
+ _scoreMode = scoreMode;
+ }
+
+ public override Weight CreateWeight(IndexSearcher searcher)
+ {
+ return new BlockJoinWeight(this, _childQuery.CreateWeight(searcher), _parentsFilter, _scoreMode);
+ }
+
+ private class BlockJoinWeight : Weight
+ {
+ internal readonly Query JoinQuery;
+ internal readonly Weight ChildWeight;
+ internal readonly Filter ParentsFilter;
+ internal readonly ScoreMode ScoreMode;
+
+ public BlockJoinWeight(Query joinQuery, Weight childWeight, Filter parentsFilter, ScoreMode scoreMode) : base()
+ {
+ JoinQuery = joinQuery;
+ ChildWeight = childWeight;
+ ParentsFilter = parentsFilter;
+ ScoreMode = scoreMode;
+ }
+
+ public override Query Query
+ {
+ get { return JoinQuery; }
+ }
+
+ public override float ValueForNormalization
+ {
+ get { return ChildWeight.ValueForNormalization*JoinQuery.Boost*JoinQuery.Boost; }
+ }
+
+ public override void Normalize(float norm, float topLevelBoost)
+ {
+ ChildWeight.Normalize(norm, topLevelBoost * JoinQuery.Boost);
+ }
+
+ // NOTE: acceptDocs applies (and is checked) only in the parent document space
+ public override Scorer Scorer(AtomicReaderContext readerContext, Bits acceptDocs)
+ {
+
+ Scorer childScorer = ChildWeight.Scorer(readerContext, readerContext.AtomicReader.LiveDocs);
+ if (childScorer == null)
+ {
+ // No matches
+ return null;
+ }
+
+ int firstChildDoc = childScorer.NextDoc();
+ if (firstChildDoc == DocIdSetIterator.NO_MORE_DOCS)
+ {
+ // No matches
+ return null;
+ }
+
+ // NOTE: we cannot pass acceptDocs here because this
+ // will (most likely, justifiably) cause the filter to
+ // not return a FixedBitSet but rather a
+ // BitsFilteredDocIdSet. Instead, we filter by
+ // acceptDocs when we score:
+ DocIdSet parents = ParentsFilter.GetDocIdSet(readerContext, null);
+
+ if (parents == null)
+ {
+ // No matches
+ return null;
+ }
+ if (!(parents is FixedBitSet))
+ {
+ throw new InvalidOperationException("parentFilter must return FixedBitSet; got " + parents);
+ }
+
+ return new BlockJoinScorer(this, childScorer, (FixedBitSet)parents, firstChildDoc, ScoreMode, acceptDocs);
+ }
+
+ public override Explanation Explain(AtomicReaderContext context, int doc)
+ {
+ BlockJoinScorer scorer = (BlockJoinScorer)Scorer(context, context.AtomicReader.LiveDocs);
+ if (scorer != null && scorer.Advance(doc) == doc)
+ {
+ return scorer.Explain(context.DocBase);
+ }
+ return new ComplexExplanation(false, 0.0f, "Not a match");
+ }
+
+ public override bool ScoresDocsOutOfOrder()
+ {
+ return false;
+ }
+ }
+
+ internal class BlockJoinScorer : Scorer
+ {
+ private readonly Scorer _childScorer;
+ private readonly FixedBitSet _parentBits;
+ private readonly ScoreMode _scoreMode;
+ private readonly Bits _acceptDocs;
+ private int _parentDocRenamed = -1;
+ private int _prevParentDoc;
+ private float _parentScore;
+ private int _parentFreq;
+ private int _nextChildDoc;
+ private int[] _pendingChildDocs;
+ private float[] _pendingChildScores;
+ private int _childDocUpto;
+
+ public BlockJoinScorer(Weight weight, Scorer childScorer, FixedBitSet parentBits, int firstChildDoc, ScoreMode scoreMode, Bits acceptDocs) : base(weight)
+ {
+ //System.out.println("Q.init firstChildDoc=" + firstChildDoc);
+ _parentBits = parentBits;
+ _childScorer = childScorer;
+ _scoreMode = scoreMode;
+ _acceptDocs = acceptDocs;
+ _nextChildDoc = firstChildDoc;
+ }
+
+ public override ICollection<ChildScorer> Children
+ {
+ get { return Collections.Singleton(new ChildScorer(_childScorer, "BLOCK_JOIN")); }
+ }
+
+ internal virtual int ChildCount
+ {
+ get { return _childDocUpto; }
+ }
+
+ internal virtual int ParentDoc
+ {
+ get { return _parentDocRenamed; }
+ }
+
+ internal virtual int[] SwapChildDocs(int[] other)
+ {
+ int[] ret = _pendingChildDocs;
+ if (other == null)
+ {
+ _pendingChildDocs = new int[5];
+ }
+ else
+ {
+ _pendingChildDocs = other;
+ }
+ return ret;
+ }
+
+ internal virtual float[] SwapChildScores(float[] other)
+ {
+ if (_scoreMode == ScoreMode.None)
+ {
+ throw new InvalidOperationException("ScoreMode is None; you must pass trackScores=false to ToParentBlockJoinCollector");
+ }
+ float[] ret = _pendingChildScores;
+ if (other == null)
+ {
+ _pendingChildScores = new float[5];
+ }
+ else
+ {
+ _pendingChildScores = other;
+ }
+ return ret;
+ }
+
+ public override int NextDoc()
+ {
+ //System.out.println("Q.nextDoc() nextChildDoc=" + nextChildDoc);
+ // Loop until we hit a parentDoc that's accepted
+ while (true)
+ {
+ if (_nextChildDoc == NO_MORE_DOCS)
+ {
+ //System.out.println(" end");
+ return _parentDocRenamed = NO_MORE_DOCS;
+ }
+
+ // Gather all children sharing the same parent as
+ // nextChildDoc
+
+ _parentDocRenamed = _parentBits.NextSetBit(_nextChildDoc);
+
+ // Parent & child docs are supposed to be
+ // orthogonal:
+ if (_nextChildDoc == _parentDocRenamed)
+ {
+ throw new InvalidOperationException("child query must only match non-parent docs, but parent docID=" + _nextChildDoc + " matched childScorer=" + _childScorer.GetType());
+ }
+
+ //System.out.println(" parentDoc=" + parentDoc);
+ Debug.Assert(_parentDocRenamed != -1);
+
+ //System.out.println(" nextChildDoc=" + nextChildDoc);
+ if (_acceptDocs != null && !_acceptDocs.Get(_parentDocRenamed))
+ {
+ // Parent doc not accepted; skip child docs until
+ // we hit a new parent doc:
+ do
+ {
+ _nextChildDoc = _childScorer.NextDoc();
+ } while (_nextChildDoc < _parentDocRenamed);
+
+ // Parent & child docs are supposed to be
+ // orthogonal:
+ if (_nextChildDoc == _parentDocRenamed)
+ {
+ throw new InvalidOperationException("child query must only match non-parent docs, but parent docID=" + _nextChildDoc + " matched childScorer=" + _childScorer.GetType());
+ }
+
+ continue;
+ }
+
+ float totalScore = 0;
+ float maxScore = float.NegativeInfinity;
+
+ _childDocUpto = 0;
+ _parentFreq = 0;
+ do
+ {
+ //System.out.println(" c=" + nextChildDoc);
+ if (_pendingChildDocs != null && _pendingChildDocs.Length == _childDocUpto)
+ {
+ _pendingChildDocs = ArrayUtil.Grow(_pendingChildDocs);
+ }
+ if (_pendingChildScores != null && _scoreMode != ScoreMode.None && _pendingChildScores.Length == _childDocUpto)
+ {
+ _pendingChildScores = ArrayUtil.Grow(_pendingChildScores);
+ }
+ if (_pendingChildDocs != null)
+ {
+ _pendingChildDocs[_childDocUpto] = _nextChildDoc;
+ }
+ if (_scoreMode != ScoreMode.None)
+ {
+ // TODO: specialize this into dedicated classes per-scoreMode
+ float childScore = _childScorer.Score();
+ int childFreq = _childScorer.Freq();
+ if (_pendingChildScores != null)
+ {
+ _pendingChildScores[_childDocUpto] = childScore;
+ }
+ maxScore = Math.Max(childScore, maxScore);
+ totalScore += childScore;
+ _parentFreq += childFreq;
+ }
+ _childDocUpto++;
+ _nextChildDoc = _childScorer.NextDoc();
+ } while (_nextChildDoc < _parentDocRenamed);
+
+ // Parent & child docs are supposed to be
+ // orthogonal:
+ if (_nextChildDoc == _parentDocRenamed)
+ {
+ throw new InvalidOperationException("child query must only match non-parent docs, but parent docID=" + _nextChildDoc + " matched childScorer=" + _childScorer.GetType());
+ }
+
+ switch (_scoreMode)
+ {
+ case ScoreMode.Avg:
+ _parentScore = totalScore / _childDocUpto;
+ break;
+ case ScoreMode.Max:
+ _parentScore = maxScore;
+ break;
+ case ScoreMode.Total:
+ _parentScore = totalScore;
+ break;
+ case ScoreMode.None:
+ break;
+ }
+
+ //System.out.println(" return parentDoc=" + parentDoc + " childDocUpto=" + childDocUpto);
+ return _parentDocRenamed;
+ }
+ }
+
+ public override int DocID()
+ {
+ return _parentDocRenamed;
+ }
+
+ //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+ //ORIGINAL LINE: @Override public float score() throws java.io.IOException
+ public override float Score()
+ {
+ return _parentScore;
+ }
+
+ public override int Freq()
+ {
+ return _parentFreq;
+ }
+
+ public override int Advance(int parentTarget)
+ {
+
+ //System.out.println("Q.advance parentTarget=" + parentTarget);
+ if (parentTarget == NO_MORE_DOCS)
+ {
+ return _parentDocRenamed = NO_MORE_DOCS;
+ }
+
+ if (parentTarget == 0)
+ {
+ // Callers should only be passing in a docID from
+ // the parent space, so this means this parent
+ // has no children (it got docID 0), so it cannot
+ // possibly match. We must handle this case
+ // separately otherwise we pass invalid -1 to
+ // prevSetBit below:
+ return NextDoc();
+ }
+
+ _prevParentDoc = _parentBits.PrevSetBit(parentTarget - 1);
+
+ //System.out.println(" rolled back to prevParentDoc=" + prevParentDoc + " vs parentDoc=" + parentDoc);
+ Debug.Assert(_prevParentDoc >= _parentDocRenamed);
+ if (_prevParentDoc > _nextChildDoc)
+ {
+ _nextChildDoc = _childScorer.Advance(_prevParentDoc);
+ // System.out.println(" childScorer advanced to child docID=" + nextChildDoc);
+ //} else {
+ //System.out.println(" skip childScorer advance");
+ }
+
+ // Parent & child docs are supposed to be orthogonal:
+ if (_nextChildDoc == _prevParentDoc)
+ {
+ throw new InvalidOperationException("child query must only match non-parent docs, but parent docID=" + _nextChildDoc + " matched childScorer=" + _childScorer.GetType());
+ }
+
+ int nd = NextDoc();
+ //System.out.println(" return nextParentDoc=" + nd);
+ return nd;
+ }
+
+ public virtual Explanation Explain(int docBase)
+ {
+ int start = docBase + _prevParentDoc + 1; // +1 b/c prevParentDoc is previous parent doc
+ int end = docBase + _parentDocRenamed - 1; // -1 b/c parentDoc is parent doc
+ return new ComplexExplanation(true, Score(), string.Format("Score based on child doc range from {0} to {1}", start, end));
+ }
+
+ public override long Cost()
+ {
+ return _childScorer.Cost();
+ }
+
+ /// <summary>
+ /// Instructs this scorer to keep track of the child docIds and score ids for retrieval purposes.
+ /// </summary>
+ public virtual void TrackPendingChildHits()
+ {
+ _pendingChildDocs = new int[5];
+ if (_scoreMode != ScoreMode.None)
+ {
+ _pendingChildScores = new float[5];
+ }
+ }
+ }
+
+ public override void ExtractTerms(ISet<Term> terms)
+ {
+ _childQuery.ExtractTerms(terms);
+ }
+
+ public override Query Rewrite(IndexReader reader)
+ {
+ Query childRewrite = _childQuery.Rewrite(reader);
+ if (childRewrite != _childQuery)
+ {
+ Query rewritten = new ToParentBlockJoinQuery(_origChildQuery, childRewrite, _parentsFilter, _scoreMode);
+ rewritten.Boost = Boost;
+ return rewritten;
+ }
+ return this;
+ }
+
+ public override string ToString(string field)
+ {
+ return "ToParentBlockJoinQuery (" + _childQuery + ")";
+ }
+
+ protected bool Equals(ToParentBlockJoinQuery other)
+ {
+ return base.Equals(other) &&
+ Equals(_parentsFilter, other._parentsFilter) &&
+ _scoreMode == other._scoreMode &&
+ Equals(_origChildQuery, other._origChildQuery);
+ }
+
+ public override bool Equals(object obj)
+ {
+ if (ReferenceEquals(null, obj)) return false;
+ if (ReferenceEquals(this, obj)) return true;
+ if (obj.GetType() != GetType()) return false;
+ return Equals((ToParentBlockJoinQuery) obj);
+ }
+
+ public override int GetHashCode()
+ {
+ unchecked
+ {
+ int hashCode = base.GetHashCode();
+ hashCode = (hashCode*397) ^ (_parentsFilter != null ? _parentsFilter.GetHashCode() : 0);
+ hashCode = (hashCode*397) ^ (int) _scoreMode;
+ hashCode = (hashCode*397) ^ (_origChildQuery != null ? _origChildQuery.GetHashCode() : 0);
+ return hashCode;
+ }
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/47f20b9a/Lucene.Net.Tests.Join/Lucene.Net.Tests.Join.csproj
----------------------------------------------------------------------
diff --git a/Lucene.Net.Tests.Join/Lucene.Net.Tests.Join.csproj b/Lucene.Net.Tests.Join/Lucene.Net.Tests.Join.csproj
new file mode 100644
index 0000000..30d5a7b
--- /dev/null
+++ b/Lucene.Net.Tests.Join/Lucene.Net.Tests.Join.csproj
@@ -0,0 +1,64 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
+ <PropertyGroup>
+ <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+ <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+ <ProjectGuid>{1866F8E4-ABF5-4CBE-B23B-4BADF6CD20DC}</ProjectGuid>
+ <OutputType>Library</OutputType>
+ <AppDesignerFolder>Properties</AppDesignerFolder>
+ <RootNamespace>Lucene.Net.Tests.Join</RootNamespace>
+ <AssemblyName>Lucene.Net.Tests.Join</AssemblyName>
+ <TargetFrameworkVersion>v4.5.1</TargetFrameworkVersion>
+ <FileAlignment>512</FileAlignment>
+ </PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
+ <DebugSymbols>true</DebugSymbols>
+ <DebugType>full</DebugType>
+ <Optimize>false</Optimize>
+ <OutputPath>bin\Debug\</OutputPath>
+ <DefineConstants>DEBUG;TRACE</DefineConstants>
+ <ErrorReport>prompt</ErrorReport>
+ <WarningLevel>4</WarningLevel>
+ </PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
+ <DebugType>pdbonly</DebugType>
+ <Optimize>true</Optimize>
+ <OutputPath>bin\Release\</OutputPath>
+ <DefineConstants>TRACE</DefineConstants>
+ <ErrorReport>prompt</ErrorReport>
+ <WarningLevel>4</WarningLevel>
+ </PropertyGroup>
+ <ItemGroup>
+ <Reference Include="System" />
+ <Reference Include="System.Core" />
+ <Reference Include="System.Xml.Linq" />
+ <Reference Include="System.Data.DataSetExtensions" />
+ <Reference Include="Microsoft.CSharp" />
+ <Reference Include="System.Data" />
+ <Reference Include="System.Net.Http" />
+ <Reference Include="System.Xml" />
+ </ItemGroup>
+ <ItemGroup>
+ <Compile Include="Properties\AssemblyInfo.cs" />
+ <Compile Include="TestBlockJoin.cs" />
+ </ItemGroup>
+ <ItemGroup>
+ <ProjectReference Include="..\Lucene.Net.Join\Lucene.Net.Join.csproj">
+ <Project>{e8a339c7-fcf6-4a72-8586-56d8961d7b99}</Project>
+ <Name>Lucene.Net.Join</Name>
+ </ProjectReference>
+ <ProjectReference Include="..\src\Lucene.Net.Core\Lucene.Net.csproj">
+ <Project>{5d4ad9be-1ffb-41ab-9943-25737971bf57}</Project>
+ <Name>Lucene.Net</Name>
+ </ProjectReference>
+ </ItemGroup>
+ <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
+ <!-- To modify your build process, add your task inside one of the targets below and uncomment it.
+ Other similar extension points exist, see Microsoft.Common.targets.
+ <Target Name="BeforeBuild">
+ </Target>
+ <Target Name="AfterBuild">
+ </Target>
+ -->
+</Project>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/47f20b9a/Lucene.Net.Tests.Join/Properties/AssemblyInfo.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Tests.Join/Properties/AssemblyInfo.cs b/Lucene.Net.Tests.Join/Properties/AssemblyInfo.cs
new file mode 100644
index 0000000..cd01ea7
--- /dev/null
+++ b/Lucene.Net.Tests.Join/Properties/AssemblyInfo.cs
@@ -0,0 +1,36 @@
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+// General Information about an assembly is controlled through the following
+// set of attributes. Change these attribute values to modify the information
+// associated with an assembly.
+[assembly: AssemblyTitle("Lucene.Net.Tests.Join")]
+[assembly: AssemblyDescription("")]
+[assembly: AssemblyConfiguration("")]
+[assembly: AssemblyCompany("")]
+[assembly: AssemblyProduct("Lucene.Net.Tests.Join")]
+[assembly: AssemblyCopyright("Copyright © 2015")]
+[assembly: AssemblyTrademark("")]
+[assembly: AssemblyCulture("")]
+
+// Setting ComVisible to false makes the types in this assembly not visible
+// to COM components. If you need to access a type in this assembly from
+// COM, set the ComVisible attribute to true on that type.
+[assembly: ComVisible(false)]
+
+// The following GUID is for the ID of the typelib if this project is exposed to COM
+[assembly: Guid("1866f8e4-abf5-4cbe-b23b-4badf6cd20dc")]
+
+// Version information for an assembly consists of the following four values:
+//
+// Major Version
+// Minor Version
+// Build Number
+// Revision
+//
+// You can specify all the values or you can default the Build and Revision Numbers
+// by using the '*' as shown below:
+// [assembly: AssemblyVersion("1.0.*")]
+[assembly: AssemblyVersion("1.0.0.0")]
+[assembly: AssemblyFileVersion("1.0.0.0")]
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/47f20b9a/Lucene.Net.Tests.Join/TestBlockJoin.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Tests.Join/TestBlockJoin.cs b/Lucene.Net.Tests.Join/TestBlockJoin.cs
new file mode 100644
index 0000000..1278782
--- /dev/null
+++ b/Lucene.Net.Tests.Join/TestBlockJoin.cs
@@ -0,0 +1,7 @@
+namespace Lucene.Net.Tests.Join
+{
+ public class TestBlockJoin
+ {
+
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/47f20b9a/Lucene.Net.sln
----------------------------------------------------------------------
diff --git a/Lucene.Net.sln b/Lucene.Net.sln
index 34c4804..d76fe0c 100644
--- a/Lucene.Net.sln
+++ b/Lucene.Net.sln
@@ -40,6 +40,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.Codecs.Tests", "
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.Join", "Lucene.Net.Join\Lucene.Net.Join.csproj", "{E8A339C7-FCF6-4A72-8586-56D8961D7B99}"
EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.Grouping", "Lucene.Net.Grouping\Lucene.Net.Grouping.csproj", "{02BAB603-067D-48B1-AEDD-316849652568}"
+EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
@@ -193,6 +195,18 @@ Global
{E8A339C7-FCF6-4A72-8586-56D8961D7B99}.Release|Mixed Platforms.Build.0 = Release|Any CPU
{E8A339C7-FCF6-4A72-8586-56D8961D7B99}.Release|x86.ActiveCfg = Release|Any CPU
{E8A339C7-FCF6-4A72-8586-56D8961D7B99}.Release|x86.Build.0 = Release|Any CPU
+ {02BAB603-067D-48B1-AEDD-316849652568}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {02BAB603-067D-48B1-AEDD-316849652568}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {02BAB603-067D-48B1-AEDD-316849652568}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU
+ {02BAB603-067D-48B1-AEDD-316849652568}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU
+ {02BAB603-067D-48B1-AEDD-316849652568}.Debug|x86.ActiveCfg = Debug|Any CPU
+ {02BAB603-067D-48B1-AEDD-316849652568}.Debug|x86.Build.0 = Debug|Any CPU
+ {02BAB603-067D-48B1-AEDD-316849652568}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {02BAB603-067D-48B1-AEDD-316849652568}.Release|Any CPU.Build.0 = Release|Any CPU
+ {02BAB603-067D-48B1-AEDD-316849652568}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU
+ {02BAB603-067D-48B1-AEDD-316849652568}.Release|Mixed Platforms.Build.0 = Release|Any CPU
+ {02BAB603-067D-48B1-AEDD-316849652568}.Release|x86.ActiveCfg = Release|Any CPU
+ {02BAB603-067D-48B1-AEDD-316849652568}.Release|x86.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/47f20b9a/src/Lucene.Net.Core/Search/FieldValueHitQueue.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Search/FieldValueHitQueue.cs b/src/Lucene.Net.Core/Search/FieldValueHitQueue.cs
index 3249110..df19e86 100644
--- a/src/Lucene.Net.Core/Search/FieldValueHitQueue.cs
+++ b/src/Lucene.Net.Core/Search/FieldValueHitQueue.cs
@@ -28,9 +28,9 @@ namespace Lucene.Net.Search
// had to change from internal to public, due to public accessability of FieldValueHitQueue
public class Entry : ScoreDoc
{
- internal int Slot;
+ public int Slot;
- internal Entry(int slot, int doc, float score)
+ public Entry(int slot, int doc, float score)
: base(doc, score)
{
this.Slot = slot;
[08/17] lucenenet git commit: Implemented the last of the
Lucene.Net.Join tests
Posted by sy...@apache.org.
Implemented the last of the Lucene.Net.Join tests
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/0213f530
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/0213f530
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/0213f530
Branch: refs/heads/master
Commit: 0213f5301801aa372750b576af50ead184173072
Parents: 1213ca7
Author: Josh Sullivan <ja...@gmail.com>
Authored: Mon Aug 17 16:04:05 2015 -0400
Committer: Josh Sullivan <ja...@gmail.com>
Committed: Mon Aug 17 16:04:05 2015 -0400
----------------------------------------------------------------------
Lucene.Net.Join/TermsCollector.cs | 2 +-
Lucene.Net.Join/TermsWithScoreCollector.cs | 2 +-
.../Lucene.Net.Tests.Join.csproj | 1 +
Lucene.Net.Tests.Join/TestJoinUtil.cs | 1165 ++++++++++++++++++
4 files changed, 1168 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0213f530/Lucene.Net.Join/TermsCollector.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Join/TermsCollector.cs b/Lucene.Net.Join/TermsCollector.cs
index 8f8e4f5..2ccf1ed 100644
--- a/Lucene.Net.Join/TermsCollector.cs
+++ b/Lucene.Net.Join/TermsCollector.cs
@@ -120,7 +120,7 @@ namespace Lucene.Net.Join
public override bool AcceptsDocsOutOfOrder()
{
- throw new System.NotImplementedException();
+ return base.AcceptsDocsOutOfOrder();
}
}
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0213f530/Lucene.Net.Join/TermsWithScoreCollector.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Join/TermsWithScoreCollector.cs b/Lucene.Net.Join/TermsWithScoreCollector.cs
index c4dc97d..e823293 100644
--- a/Lucene.Net.Join/TermsWithScoreCollector.cs
+++ b/Lucene.Net.Join/TermsWithScoreCollector.cs
@@ -158,7 +158,7 @@ namespace Lucene.Net.Join
public override bool AcceptsDocsOutOfOrder()
{
- throw new NotImplementedException();
+ return base.AcceptsDocsOutOfOrder();
}
internal class Avg : Sv
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0213f530/Lucene.Net.Tests.Join/Lucene.Net.Tests.Join.csproj
----------------------------------------------------------------------
diff --git a/Lucene.Net.Tests.Join/Lucene.Net.Tests.Join.csproj b/Lucene.Net.Tests.Join/Lucene.Net.Tests.Join.csproj
index 9c959f8..e5f6d16 100644
--- a/Lucene.Net.Tests.Join/Lucene.Net.Tests.Join.csproj
+++ b/Lucene.Net.Tests.Join/Lucene.Net.Tests.Join.csproj
@@ -52,6 +52,7 @@
<Compile Include="TestBlockJoin.cs" />
<Compile Include="TestBlockJoinSorting.cs" />
<Compile Include="TestBlockJoinValidation.cs" />
+ <Compile Include="TestJoinUtil.cs" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\Lucene.Net.Grouping\Lucene.Net.Grouping.csproj">
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0213f530/Lucene.Net.Tests.Join/TestJoinUtil.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Tests.Join/TestJoinUtil.cs b/Lucene.Net.Tests.Join/TestJoinUtil.cs
new file mode 100644
index 0000000..81513c7
--- /dev/null
+++ b/Lucene.Net.Tests.Join/TestJoinUtil.cs
@@ -0,0 +1,1165 @@
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using Lucene.Net.Analysis;
+using Lucene.Net.Documents;
+using Lucene.Net.Index;
+using Lucene.Net.Join;
+using Lucene.Net.Randomized.Generators;
+using Lucene.Net.Search;
+using Lucene.Net.Store;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using NUnit.Framework;
+
+namespace Lucene.Net.Tests.Join
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ public class TestJoinUtil : LuceneTestCase
+ {
+ [Test]
+ public void TestSimple()
+ {
+ const string idField = "id";
+ const string toField = "productId";
+
+ Directory dir = NewDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(Random(), dir,
+ NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))
+ .SetMergePolicy(NewLogMergePolicy()));
+
+ // 0
+ Document doc = new Document();
+ doc.Add(new TextField("description", "random text", Field.Store.NO));
+ doc.Add(new TextField("name", "name1", Field.Store.NO));
+ doc.Add(new TextField(idField, "1", Field.Store.NO));
+ w.AddDocument(doc);
+
+ // 1
+ doc = new Document();
+ doc.Add(new TextField("price", "10.0", Field.Store.NO));
+ doc.Add(new TextField(idField, "2", Field.Store.NO));
+ doc.Add(new TextField(toField, "1", Field.Store.NO));
+ w.AddDocument(doc);
+
+ // 2
+ doc = new Document();
+ doc.Add(new TextField("price", "20.0", Field.Store.NO));
+ doc.Add(new TextField(idField, "3", Field.Store.NO));
+ doc.Add(new TextField(toField, "1", Field.Store.NO));
+ w.AddDocument(doc);
+
+ // 3
+ doc = new Document();
+ doc.Add(new TextField("description", "more random text", Field.Store.NO));
+ doc.Add(new TextField("name", "name2", Field.Store.NO));
+ doc.Add(new TextField(idField, "4", Field.Store.NO));
+ w.AddDocument(doc);
+ w.Commit();
+
+ // 4
+ doc = new Document();
+ doc.Add(new TextField("price", "10.0", Field.Store.NO));
+ doc.Add(new TextField(idField, "5", Field.Store.NO));
+ doc.Add(new TextField(toField, "4", Field.Store.NO));
+ w.AddDocument(doc);
+
+ // 5
+ doc = new Document();
+ doc.Add(new TextField("price", "20.0", Field.Store.NO));
+ doc.Add(new TextField(idField, "6", Field.Store.NO));
+ doc.Add(new TextField(toField, "4", Field.Store.NO));
+ w.AddDocument(doc);
+
+ IndexSearcher indexSearcher = new IndexSearcher(w.Reader);
+ w.Dispose();
+
+ // Search for product
+ Query joinQuery = JoinUtil.CreateJoinQuery(idField, false, toField, new TermQuery(new Term("name", "name2")),
+ indexSearcher, ScoreMode.None);
+
+ TopDocs result = indexSearcher.Search(joinQuery, 10);
+ assertEquals(2, result.TotalHits);
+ assertEquals(4, result.ScoreDocs[0].Doc);
+ assertEquals(5, result.ScoreDocs[1].Doc);
+
+ joinQuery = JoinUtil.CreateJoinQuery(idField, false, toField, new TermQuery(new Term("name", "name1")),
+ indexSearcher, ScoreMode.None);
+ result = indexSearcher.Search(joinQuery, 10);
+ assertEquals(2, result.TotalHits);
+ assertEquals(1, result.ScoreDocs[0].Doc);
+ assertEquals(2, result.ScoreDocs[1].Doc);
+
+ // Search for offer
+ joinQuery = JoinUtil.CreateJoinQuery(toField, false, idField, new TermQuery(new Term("id", "5")),
+ indexSearcher, ScoreMode.None);
+ result = indexSearcher.Search(joinQuery, 10);
+ assertEquals(1, result.TotalHits);
+ assertEquals(3, result.ScoreDocs[0].Doc);
+
+ indexSearcher.IndexReader.Dispose();
+ dir.Dispose();
+ }
+
+ // TermsWithScoreCollector.MV.Avg forgets to grow beyond TermsWithScoreCollector.INITIAL_ARRAY_SIZE
+ [Test]
+ public void TestOverflowTermsWithScoreCollector()
+ {
+ Test300spartans(true, ScoreMode.Avg);
+ }
+
+ [Test]
+ public void TestOverflowTermsWithScoreCollectorRandom()
+ {
+ var scoreModeLength = Enum.GetNames(typeof(ScoreMode)).Length;
+ Test300spartans(Random().NextBoolean(), (ScoreMode) Random().Next(scoreModeLength));
+ }
+
+ protected virtual void Test300spartans(bool multipleValues, ScoreMode scoreMode)
+ {
+ const string idField = "id";
+ const string toField = "productId";
+
+ Directory dir = NewDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(Random(), dir,
+ NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))
+ .SetMergePolicy(NewLogMergePolicy()));
+
+ // 0
+ Document doc = new Document();
+ doc.Add(new TextField("description", "random text", Field.Store.NO));
+ doc.Add(new TextField("name", "name1", Field.Store.NO));
+ doc.Add(new TextField(idField, "0", Field.Store.NO));
+ w.AddDocument(doc);
+
+ doc = new Document();
+ doc.Add(new TextField("price", "10.0", Field.Store.NO));
+ for (int i = 0; i < 300; i++)
+ {
+ doc.Add(new TextField(toField, "" + i, Field.Store.NO));
+ if (!multipleValues)
+ {
+ w.AddDocument(doc);
+ doc.RemoveFields(toField);
+ }
+ }
+ w.AddDocument(doc);
+
+ IndexSearcher indexSearcher = new IndexSearcher(w.Reader);
+ w.Dispose();
+
+ // Search for product
+ Query joinQuery = JoinUtil.CreateJoinQuery(toField, multipleValues, idField,
+ new TermQuery(new Term("price", "10.0")), indexSearcher, scoreMode);
+
+ TopDocs result = indexSearcher.Search(joinQuery, 10);
+ assertEquals(1, result.TotalHits);
+ assertEquals(0, result.ScoreDocs[0].Doc);
+
+
+ indexSearcher.IndexReader.Dispose();
+ dir.Dispose();
+ }
+
+ /// <summary>
+ /// LUCENE-5487: verify a join query inside a SHOULD BQ
+ /// will still use the join query's optimized BulkScorers
+ /// </summary>
+ [Test]
+ public void TestInsideBooleanQuery()
+ {
+ const string idField = "id";
+ const string toField = "productId";
+
+ Directory dir = NewDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(Random(), dir,
+ NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))
+ .SetMergePolicy(NewLogMergePolicy()));
+
+ // 0
+ Document doc = new Document();
+ doc.Add(new TextField("description", "random text", Field.Store.NO));
+ doc.Add(new TextField("name", "name1", Field.Store.NO));
+ doc.Add(new TextField(idField, "7", Field.Store.NO));
+ w.AddDocument(doc);
+
+ // 1
+ doc = new Document();
+ doc.Add(new TextField("price", "10.0", Field.Store.NO));
+ doc.Add(new TextField(idField, "2", Field.Store.NO));
+ doc.Add(new TextField(toField, "7", Field.Store.NO));
+ w.AddDocument(doc);
+
+ // 2
+ doc = new Document();
+ doc.Add(new TextField("price", "20.0", Field.Store.NO));
+ doc.Add(new TextField(idField, "3", Field.Store.NO));
+ doc.Add(new TextField(toField, "7", Field.Store.NO));
+ w.AddDocument(doc);
+
+ // 3
+ doc = new Document();
+ doc.Add(new TextField("description", "more random text", Field.Store.NO));
+ doc.Add(new TextField("name", "name2", Field.Store.NO));
+ doc.Add(new TextField(idField, "0", Field.Store.NO));
+ w.AddDocument(doc);
+ w.Commit();
+
+ // 4
+ doc = new Document();
+ doc.Add(new TextField("price", "10.0", Field.Store.NO));
+ doc.Add(new TextField(idField, "5", Field.Store.NO));
+ doc.Add(new TextField(toField, "0", Field.Store.NO));
+ w.AddDocument(doc);
+
+ // 5
+ doc = new Document();
+ doc.Add(new TextField("price", "20.0", Field.Store.NO));
+ doc.Add(new TextField(idField, "6", Field.Store.NO));
+ doc.Add(new TextField(toField, "0", Field.Store.NO));
+ w.AddDocument(doc);
+
+ w.ForceMerge(1);
+
+ IndexSearcher indexSearcher = new IndexSearcher(w.Reader);
+ w.Dispose();
+
+ // Search for product
+ Query joinQuery = JoinUtil.CreateJoinQuery(idField, false, toField,
+ new TermQuery(new Term("description", "random")), indexSearcher, ScoreMode.Avg);
+
+ BooleanQuery bq = new BooleanQuery();
+ bq.Add(joinQuery, BooleanClause.Occur.SHOULD);
+ bq.Add(new TermQuery(new Term("id", "3")), BooleanClause.Occur.SHOULD);
+
+ indexSearcher.Search(bq, new CollectorAnonymousInnerClassHelper(this));
+
+ indexSearcher.IndexReader.Dispose();
+ dir.Dispose();
+ }
+
+ private class CollectorAnonymousInnerClassHelper : Collector
+ {
+ private readonly TestJoinUtil OuterInstance;
+
+ public CollectorAnonymousInnerClassHelper(TestJoinUtil outerInstance)
+ {
+ OuterInstance = outerInstance;
+ }
+
+ internal bool sawFive;
+
+ public override AtomicReaderContext NextReader
+ {
+ set { }
+ }
+
+ public override void Collect(int docID)
+ {
+ // Hairy / evil (depends on how BooleanScorer
+ // stores temporarily collected docIDs by
+ // appending to head of linked list):
+ if (docID == 5)
+ {
+ sawFive = true;
+ }
+ else if (docID == 1)
+ {
+ assertFalse("optimized bulkScorer was not used for join query embedded in boolean query!", sawFive);
+ }
+ }
+
+ public override Scorer Scorer
+ {
+ set { }
+ }
+
+ public override bool AcceptsDocsOutOfOrder()
+ {
+ return true;
+ }
+ }
+
+ [Test]
+ public void TestSimpleWithScoring()
+ {
+ const string idField = "id";
+ const string toField = "movieId";
+
+ Directory dir = NewDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(Random(), dir,
+ NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))
+ .SetMergePolicy(NewLogMergePolicy()));
+
+ // 0
+ Document doc = new Document();
+ doc.Add(new TextField("description", "A random movie", Field.Store.NO));
+ doc.Add(new TextField("name", "Movie 1", Field.Store.NO));
+ doc.Add(new TextField(idField, "1", Field.Store.NO));
+ w.AddDocument(doc);
+
+ // 1
+ doc = new Document();
+ doc.Add(new TextField("subtitle", "The first subtitle of this movie", Field.Store.NO));
+ doc.Add(new TextField(idField, "2", Field.Store.NO));
+ doc.Add(new TextField(toField, "1", Field.Store.NO));
+ w.AddDocument(doc);
+
+ // 2
+ doc = new Document();
+ doc.Add(new TextField("subtitle", "random subtitle; random event movie", Field.Store.NO));
+ doc.Add(new TextField(idField, "3", Field.Store.NO));
+ doc.Add(new TextField(toField, "1", Field.Store.NO));
+ w.AddDocument(doc);
+
+ // 3
+ doc = new Document();
+ doc.Add(new TextField("description", "A second random movie", Field.Store.NO));
+ doc.Add(new TextField("name", "Movie 2", Field.Store.NO));
+ doc.Add(new TextField(idField, "4", Field.Store.NO));
+ w.AddDocument(doc);
+ w.Commit();
+
+ // 4
+ doc = new Document();
+ doc.Add(new TextField("subtitle", "a very random event happened during christmas night", Field.Store.NO));
+ doc.Add(new TextField(idField, "5", Field.Store.NO));
+ doc.Add(new TextField(toField, "4", Field.Store.NO));
+ w.AddDocument(doc);
+
+ // 5
+ doc = new Document();
+ doc.Add(new TextField("subtitle", "movie end movie test 123 test 123 random", Field.Store.NO));
+ doc.Add(new TextField(idField, "6", Field.Store.NO));
+ doc.Add(new TextField(toField, "4", Field.Store.NO));
+ w.AddDocument(doc);
+
+ IndexSearcher indexSearcher = new IndexSearcher(w.Reader);
+ w.Dispose();
+
+ // Search for movie via subtitle
+ Query joinQuery = JoinUtil.CreateJoinQuery(toField, false, idField,
+ new TermQuery(new Term("subtitle", "random")), indexSearcher, ScoreMode.Max);
+ TopDocs result = indexSearcher.Search(joinQuery, 10);
+ assertEquals(2, result.TotalHits);
+ assertEquals(0, result.ScoreDocs[0].Doc);
+ assertEquals(3, result.ScoreDocs[1].Doc);
+
+ // Score mode max.
+ joinQuery = JoinUtil.CreateJoinQuery(toField, false, idField, new TermQuery(new Term("subtitle", "movie")),
+ indexSearcher, ScoreMode.Max);
+ result = indexSearcher.Search(joinQuery, 10);
+ assertEquals(2, result.TotalHits);
+ assertEquals(3, result.ScoreDocs[0].Doc);
+ assertEquals(0, result.ScoreDocs[1].Doc);
+
+ // Score mode total
+ joinQuery = JoinUtil.CreateJoinQuery(toField, false, idField, new TermQuery(new Term("subtitle", "movie")),
+ indexSearcher, ScoreMode.Total);
+ result = indexSearcher.Search(joinQuery, 10);
+ assertEquals(2, result.TotalHits);
+ assertEquals(0, result.ScoreDocs[0].Doc);
+ assertEquals(3, result.ScoreDocs[1].Doc);
+
+ //Score mode avg
+ joinQuery = JoinUtil.CreateJoinQuery(toField, false, idField, new TermQuery(new Term("subtitle", "movie")),
+ indexSearcher, ScoreMode.Avg);
+ result = indexSearcher.Search(joinQuery, 10);
+ assertEquals(2, result.TotalHits);
+ assertEquals(3, result.ScoreDocs[0].Doc);
+ assertEquals(0, result.ScoreDocs[1].Doc);
+
+ indexSearcher.IndexReader.Dispose();
+ dir.Dispose();
+ }
+
+ [Test]
+ public void TestSingleValueRandomJoin()
+ {
+ int maxIndexIter = TestUtil.NextInt(Random(), 6, 12);
+ int maxSearchIter = TestUtil.NextInt(Random(), 13, 26);
+ ExecuteRandomJoin(false, maxIndexIter, maxSearchIter, TestUtil.NextInt(Random(), 87, 764));
+ }
+
+ [Test]
+ public void TestMultiValueRandomJoin()
+ // this test really takes more time, that is why the number of iterations are smaller.
+ {
+ int maxIndexIter = TestUtil.NextInt(Random(), 3, 6);
+ int maxSearchIter = TestUtil.NextInt(Random(), 6, 12);
+ ExecuteRandomJoin(true, maxIndexIter, maxSearchIter, TestUtil.NextInt(Random(), 11, 57));
+ }
+
+ private void ExecuteRandomJoin(bool multipleValuesPerDocument, int maxIndexIter, int maxSearchIter,
+ int numberOfDocumentsToIndex)
+ {
+ for (int indexIter = 1; indexIter <= maxIndexIter; indexIter++)
+ {
+ if (VERBOSE)
+ {
+ Console.WriteLine("indexIter=" + indexIter);
+ }
+ Directory dir = NewDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(Random(), dir,
+ NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.KEYWORD, false))
+ .SetMergePolicy(NewLogMergePolicy()));
+ bool scoreDocsInOrder = TestJoinUtil.Random().NextBoolean();
+ IndexIterationContext context = CreateContext(numberOfDocumentsToIndex, w, multipleValuesPerDocument,
+ scoreDocsInOrder);
+
+ IndexReader topLevelReader = w.Reader;
+ w.Dispose();
+ for (int searchIter = 1; searchIter <= maxSearchIter; searchIter++)
+ {
+ if (VERBOSE)
+ {
+ Console.WriteLine("searchIter=" + searchIter);
+ }
+ IndexSearcher indexSearcher = NewSearcher(topLevelReader);
+
+ int r = Random().Next(context.RandomUniqueValues.Length);
+ bool from = context.RandomFrom[r];
+ string randomValue = context.RandomUniqueValues[r];
+ FixedBitSet expectedResult = CreateExpectedResult(randomValue, from, indexSearcher.IndexReader,
+ context);
+
+ Query actualQuery = new TermQuery(new Term("value", randomValue));
+ if (VERBOSE)
+ {
+ Console.WriteLine("actualQuery=" + actualQuery);
+ }
+
+ var scoreModeLength = Enum.GetNames(typeof(ScoreMode)).Length;
+ ScoreMode scoreMode = (ScoreMode) Random().Next(scoreModeLength);
+ if (VERBOSE)
+ {
+ Console.WriteLine("scoreMode=" + scoreMode);
+ }
+
+ Query joinQuery;
+ if (from)
+ {
+ joinQuery = JoinUtil.CreateJoinQuery("from", multipleValuesPerDocument, "to", actualQuery,
+ indexSearcher, scoreMode);
+ }
+ else
+ {
+ joinQuery = JoinUtil.CreateJoinQuery("to", multipleValuesPerDocument, "from", actualQuery,
+ indexSearcher, scoreMode);
+ }
+ if (VERBOSE)
+ {
+ Console.WriteLine("joinQuery=" + joinQuery);
+ }
+
+ // Need to know all documents that have matches. TopDocs doesn't give me that and then I'd be also testing TopDocsCollector...
+ FixedBitSet actualResult = new FixedBitSet(indexSearcher.IndexReader.MaxDoc);
+ TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.Create(10, false);
+ indexSearcher.Search(joinQuery,
+ new CollectorAnonymousInnerClassHelper2(this, scoreDocsInOrder, context, actualResult,
+ topScoreDocCollector));
+ // Asserting bit set...
+ if (VERBOSE)
+ {
+ Console.WriteLine("expected cardinality:" + expectedResult.Cardinality());
+ DocIdSetIterator iterator = expectedResult.GetIterator();
+ for (int doc = iterator.NextDoc();
+ doc != DocIdSetIterator.NO_MORE_DOCS;
+ doc = iterator.NextDoc())
+ {
+ Console.WriteLine(string.Format("Expected doc[{0}] with id value {1}", doc, indexSearcher.Doc(doc).Get("id")));
+ }
+ Console.WriteLine("actual cardinality:" + actualResult.Cardinality());
+ iterator = actualResult.GetIterator();
+ for (int doc = iterator.NextDoc();
+ doc != DocIdSetIterator.NO_MORE_DOCS;
+ doc = iterator.NextDoc())
+ {
+ Console.WriteLine(string.Format("Actual doc[{0}] with id value {1}", doc, indexSearcher.Doc(doc).Get("id")));
+ }
+ }
+ assertEquals(expectedResult, actualResult);
+
+ // Asserting TopDocs...
+ TopDocs expectedTopDocs = CreateExpectedTopDocs(randomValue, from, scoreMode, context);
+ TopDocs actualTopDocs = topScoreDocCollector.TopDocs();
+ assertEquals(expectedTopDocs.TotalHits, actualTopDocs.TotalHits);
+ assertEquals(expectedTopDocs.ScoreDocs.Length, actualTopDocs.ScoreDocs.Length);
+ if (scoreMode == ScoreMode.None)
+ {
+ continue;
+ }
+
+ assertEquals(expectedTopDocs.MaxScore, actualTopDocs.MaxScore, 0.0f);
+ for (int i = 0; i < expectedTopDocs.ScoreDocs.Length; i++)
+ {
+ if (VERBOSE)
+ {
+ string.Format("Expected doc: {0} | Actual doc: {1}\n", expectedTopDocs.ScoreDocs[i].Doc, actualTopDocs.ScoreDocs[i].Doc);
+ string.Format("Expected score: {0} | Actual score: {1}\n", expectedTopDocs.ScoreDocs[i].Score, actualTopDocs.ScoreDocs[i].Score);
+ }
+ assertEquals(expectedTopDocs.ScoreDocs[i].Doc, actualTopDocs.ScoreDocs[i].Doc);
+ assertEquals(expectedTopDocs.ScoreDocs[i].Score, actualTopDocs.ScoreDocs[i].Score, 0.0f);
+ Explanation explanation = indexSearcher.Explain(joinQuery, expectedTopDocs.ScoreDocs[i].Doc);
+ assertEquals(expectedTopDocs.ScoreDocs[i].Score, explanation.Value, 0.0f);
+ }
+ }
+ topLevelReader.Dispose();
+ dir.Dispose();
+ }
+ }
+
+ private class CollectorAnonymousInnerClassHelper2 : Collector
+ {
+ private readonly TestJoinUtil OuterInstance;
+
+ private bool ScoreDocsInOrder;
+ private IndexIterationContext Context;
+ private FixedBitSet ActualResult;
+ private TopScoreDocCollector TopScoreDocCollector;
+
+ public CollectorAnonymousInnerClassHelper2(TestJoinUtil outerInstance, bool scoreDocsInOrder,
+ IndexIterationContext context, FixedBitSet actualResult,
+ TopScoreDocCollector topScoreDocCollector)
+ {
+ OuterInstance = outerInstance;
+ ScoreDocsInOrder = scoreDocsInOrder;
+ Context = context;
+ ActualResult = actualResult;
+ TopScoreDocCollector = topScoreDocCollector;
+ }
+
+
+ private int _docBase;
+
+ public override void Collect(int doc)
+ {
+ ActualResult.Set(doc + _docBase);
+ TopScoreDocCollector.Collect(doc);
+ }
+
+ public override AtomicReaderContext NextReader
+ {
+ set
+ {
+ _docBase = value.DocBase;
+ TopScoreDocCollector.NextReader = value;
+ }
+ }
+
+ public override Scorer Scorer
+ {
+ set { TopScoreDocCollector.Scorer = value; }
+ }
+
+ public override bool AcceptsDocsOutOfOrder()
+ {
+ return ScoreDocsInOrder;
+ }
+ }
+
+ private IndexIterationContext CreateContext(int nDocs, RandomIndexWriter writer, bool multipleValuesPerDocument,
+ bool scoreDocsInOrder)
+ {
+ return CreateContext(nDocs, writer, writer, multipleValuesPerDocument, scoreDocsInOrder);
+ }
+
+ private IndexIterationContext CreateContext(int nDocs, RandomIndexWriter fromWriter, RandomIndexWriter toWriter,
+ bool multipleValuesPerDocument, bool scoreDocsInOrder)
+ {
+ IndexIterationContext context = new IndexIterationContext();
+ int numRandomValues = nDocs/2;
+ context.RandomUniqueValues = new string[numRandomValues];
+ ISet<string> trackSet = new HashSet<string>();
+ context.RandomFrom = new bool[numRandomValues];
+ for (int i = 0; i < numRandomValues; i++)
+ {
+ string uniqueRandomValue;
+ do
+ {
+ uniqueRandomValue = TestUtil.RandomRealisticUnicodeString(Random());
+ // uniqueRandomValue = TestUtil.randomSimpleString(random);
+ } while ("".Equals(uniqueRandomValue) || trackSet.Contains(uniqueRandomValue));
+ // Generate unique values and empty strings aren't allowed.
+ trackSet.Add(uniqueRandomValue);
+ context.RandomFrom[i] = Random().NextBoolean();
+ context.RandomUniqueValues[i] = uniqueRandomValue;
+ }
+
+ RandomDoc[] docs = new RandomDoc[nDocs];
+ for (int i = 0; i < nDocs; i++)
+ {
+ string id = Convert.ToString(i);
+ int randomI = Random().Next(context.RandomUniqueValues.Length);
+ string value = context.RandomUniqueValues[randomI];
+ Document document = new Document();
+ document.Add(NewTextField(Random(), "id", id, Field.Store.NO));
+ document.Add(NewTextField(Random(), "value", value, Field.Store.NO));
+
+ bool from = context.RandomFrom[randomI];
+ int numberOfLinkValues = multipleValuesPerDocument ? 2 + Random().Next(10) : 1;
+ docs[i] = new RandomDoc(id, numberOfLinkValues, value, from);
+ for (int j = 0; j < numberOfLinkValues; j++)
+ {
+ string linkValue = context.RandomUniqueValues[Random().Next(context.RandomUniqueValues.Length)];
+ docs[i].LinkValues.Add(linkValue);
+ if (from)
+ {
+ if (!context.FromDocuments.ContainsKey(linkValue))
+ {
+ context.FromDocuments[linkValue] = new List<RandomDoc>();
+ }
+ if (!context.RandomValueFromDocs.ContainsKey(value))
+ {
+ context.RandomValueFromDocs[value] = new List<RandomDoc>();
+ }
+
+ context.FromDocuments[linkValue].Add(docs[i]);
+ context.RandomValueFromDocs[value].Add(docs[i]);
+ document.Add(NewTextField(Random(), "from", linkValue, Field.Store.NO));
+ }
+ else
+ {
+ if (!context.ToDocuments.ContainsKey(linkValue))
+ {
+ context.ToDocuments[linkValue] = new List<RandomDoc>();
+ }
+ if (!context.RandomValueToDocs.ContainsKey(value))
+ {
+ context.RandomValueToDocs[value] = new List<RandomDoc>();
+ }
+
+ context.ToDocuments[linkValue].Add(docs[i]);
+ context.RandomValueToDocs[value].Add(docs[i]);
+ document.Add(NewTextField(Random(), "to", linkValue, Field.Store.NO));
+ }
+ }
+
+ RandomIndexWriter w;
+ if (from)
+ {
+ w = fromWriter;
+ }
+ else
+ {
+ w = toWriter;
+ }
+
+ w.AddDocument(document);
+ if (Random().Next(10) == 4)
+ {
+ w.Commit();
+ }
+ if (VERBOSE)
+ {
+ Console.WriteLine("Added document[" + docs[i].Id + "]: " + document);
+ }
+ }
+
+ // Pre-compute all possible hits for all unique random values. On top of this also compute all possible score for
+ // any ScoreMode.
+ IndexSearcher fromSearcher = NewSearcher(fromWriter.Reader);
+ IndexSearcher toSearcher = NewSearcher(toWriter.Reader);
+ for (int i = 0; i < context.RandomUniqueValues.Length; i++)
+ {
+ string uniqueRandomValue = context.RandomUniqueValues[i];
+ string fromField;
+ string toField;
+ IDictionary<string, IDictionary<int, JoinScore>> queryVals;
+ if (context.RandomFrom[i])
+ {
+ fromField = "from";
+ toField = "to";
+ queryVals = context.FromHitsToJoinScore;
+ }
+ else
+ {
+ fromField = "to";
+ toField = "from";
+ queryVals = context.ToHitsToJoinScore;
+ }
+ IDictionary<BytesRef, JoinScore> joinValueToJoinScores = new Dictionary<BytesRef, JoinScore>();
+ if (multipleValuesPerDocument)
+ {
+ fromSearcher.Search(new TermQuery(new Term("value", uniqueRandomValue)),
+ new CollectorAnonymousInnerClassHelper3(this, context, fromField, joinValueToJoinScores));
+ }
+ else
+ {
+ fromSearcher.Search(new TermQuery(new Term("value", uniqueRandomValue)),
+ new CollectorAnonymousInnerClassHelper4(this, context, fromField, joinValueToJoinScores));
+ }
+
+ IDictionary<int, JoinScore> docToJoinScore = new Dictionary<int, JoinScore>();
+ if (multipleValuesPerDocument)
+ {
+ if (scoreDocsInOrder)
+ {
+ AtomicReader slowCompositeReader = SlowCompositeReaderWrapper.Wrap(toSearcher.IndexReader);
+ Terms terms = slowCompositeReader.Terms(toField);
+ if (terms != null)
+ {
+ DocsEnum docsEnum = null;
+ TermsEnum termsEnum = null;
+ SortedSet<BytesRef> joinValues =
+ new SortedSet<BytesRef>(BytesRef.UTF8SortedAsUnicodeComparer);
+ joinValues.AddAll(joinValueToJoinScores.Keys);
+ foreach (BytesRef joinValue in joinValues)
+ {
+ termsEnum = terms.Iterator(termsEnum);
+ if (termsEnum.SeekExact(joinValue))
+ {
+ docsEnum = termsEnum.Docs(slowCompositeReader.LiveDocs, docsEnum, DocsEnum.FLAG_NONE);
+ JoinScore joinScore = joinValueToJoinScores[joinValue];
+
+ for (int doc = docsEnum.NextDoc();
+ doc != DocIdSetIterator.NO_MORE_DOCS;
+ doc = docsEnum.NextDoc())
+ {
+ // First encountered join value determines the score.
+ // Something to keep in mind for many-to-many relations.
+ if (!docToJoinScore.ContainsKey(doc))
+ {
+ docToJoinScore[doc] = joinScore;
+ }
+ }
+ }
+ }
+ }
+ }
+ else
+ {
+ toSearcher.Search(new MatchAllDocsQuery(),
+ new CollectorAnonymousInnerClassHelper5(this, context, toField, joinValueToJoinScores,
+ docToJoinScore));
+ }
+ }
+ else
+ {
+ toSearcher.Search(new MatchAllDocsQuery(),
+ new CollectorAnonymousInnerClassHelper6(this, context, toField, joinValueToJoinScores,
+ docToJoinScore));
+ }
+ queryVals[uniqueRandomValue] = docToJoinScore;
+ }
+
+ fromSearcher.IndexReader.Dispose();
+ toSearcher.IndexReader.Dispose();
+
+ return context;
+ }
+
+ private class CollectorAnonymousInnerClassHelper3 : Collector
+ {
+ private readonly TestJoinUtil OuterInstance;
+
+ private IndexIterationContext Context;
+ private string FromField;
+ private IDictionary<BytesRef, JoinScore> JoinValueToJoinScores;
+
+ public CollectorAnonymousInnerClassHelper3(TestJoinUtil outerInstance,
+ IndexIterationContext context, string fromField,
+ IDictionary<BytesRef, JoinScore> joinValueToJoinScores)
+ {
+ OuterInstance = outerInstance;
+ Context = context;
+ FromField = fromField;
+ JoinValueToJoinScores = joinValueToJoinScores;
+ joinValue = new BytesRef();
+ }
+
+
+ private Scorer scorer;
+ private SortedSetDocValues docTermOrds;
+ internal readonly BytesRef joinValue;
+
+ public override void Collect(int doc)
+ {
+ docTermOrds.Document = doc;
+ long ord;
+ while ((ord = docTermOrds.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
+ {
+ docTermOrds.LookupOrd(ord, joinValue);
+ var joinScore = JoinValueToJoinScores[joinValue];
+ if (joinScore == null)
+ {
+ JoinValueToJoinScores[BytesRef.DeepCopyOf(joinValue)] = joinScore = new JoinScore();
+ }
+ joinScore.AddScore(scorer.Score());
+ }
+ }
+
+ public override AtomicReaderContext NextReader
+ {
+ set { docTermOrds = FieldCache.DEFAULT.GetDocTermOrds(value.AtomicReader, FromField); }
+ }
+
+ public override Scorer Scorer
+ {
+ set { scorer = value; }
+ }
+
+ public override bool AcceptsDocsOutOfOrder()
+ {
+ return false;
+ }
+ }
+
+ private class CollectorAnonymousInnerClassHelper4 : Collector
+ {
+ private readonly TestJoinUtil OuterInstance;
+
+ private IndexIterationContext Context;
+ private string FromField;
+ private IDictionary<BytesRef, JoinScore> JoinValueToJoinScores;
+
+ public CollectorAnonymousInnerClassHelper4(TestJoinUtil outerInstance,
+ IndexIterationContext context, string fromField,
+ IDictionary<BytesRef, JoinScore> joinValueToJoinScores)
+ {
+ OuterInstance = outerInstance;
+ Context = context;
+ FromField = fromField;
+ JoinValueToJoinScores = joinValueToJoinScores;
+ spare = new BytesRef();
+ }
+
+
+ private Scorer scorer;
+ private BinaryDocValues terms;
+ private Bits docsWithField;
+ private readonly BytesRef spare;
+
+ public override void Collect(int doc)
+ {
+ terms.Get(doc, spare);
+ BytesRef joinValue = spare;
+ if (joinValue.Length == 0 && !docsWithField.Get(doc))
+ {
+ return;
+ }
+
+ var joinScore = JoinValueToJoinScores[joinValue];
+ if (joinScore == null)
+ {
+ JoinValueToJoinScores[BytesRef.DeepCopyOf(joinValue)] = joinScore = new JoinScore();
+ }
+ joinScore.AddScore(scorer.Score());
+ }
+
+ public override AtomicReaderContext NextReader
+ {
+ set
+ {
+ terms = FieldCache.DEFAULT.GetTerms(value.AtomicReader, FromField, true);
+ docsWithField = FieldCache.DEFAULT.GetDocsWithField(value.AtomicReader, FromField);
+ }
+ }
+
+ public override Scorer Scorer
+ {
+ set { scorer = value; }
+ }
+
+ public override bool AcceptsDocsOutOfOrder()
+ {
+ return false;
+ }
+ }
+
+ private class CollectorAnonymousInnerClassHelper5 : Collector
+ {
+ private readonly TestJoinUtil OuterInstance;
+
+ private string _toField;
+ private readonly IDictionary<BytesRef, JoinScore> _joinValueToJoinScores;
+ private readonly IDictionary<int, JoinScore> _docToJoinScore;
+
+ private SortedSetDocValues docTermOrds;
+ private readonly BytesRef scratch;
+ private int docBase;
+
+ public CollectorAnonymousInnerClassHelper5(TestJoinUtil testJoinUtil, IndexIterationContext context,
+ string toField, IDictionary<BytesRef, JoinScore> joinValueToJoinScores,
+ IDictionary<int, JoinScore> docToJoinScore)
+ {
+ OuterInstance = testJoinUtil;
+ _toField = toField;
+ _joinValueToJoinScores = joinValueToJoinScores;
+ _docToJoinScore = docToJoinScore;
+ }
+
+ public override void Collect(int doc)
+ {
+ docTermOrds.Document = doc;
+ long ord;
+ while ((ord = docTermOrds.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
+ {
+ docTermOrds.LookupOrd(ord, scratch);
+ JoinScore joinScore = _joinValueToJoinScores[scratch];
+ if (joinScore == null)
+ {
+ continue;
+ }
+ int basedDoc = docBase + doc;
+ // First encountered join value determines the score.
+ // Something to keep in mind for many-to-many relations.
+ if (!_docToJoinScore.ContainsKey(basedDoc))
+ {
+ _docToJoinScore[basedDoc] = joinScore;
+ }
+ }
+ }
+
+ public override AtomicReaderContext NextReader
+ {
+ set
+ {
+ docBase = value.DocBase;
+ docTermOrds = FieldCache.DEFAULT.GetDocTermOrds(value.AtomicReader, _toField);
+ }
+ }
+
+ public override bool AcceptsDocsOutOfOrder()
+ {
+ return false;
+ }
+
+ public override Scorer Scorer
+ {
+ set { }
+ }
+ }
+
+ private class CollectorAnonymousInnerClassHelper6 : Collector
+ {
+ private readonly TestJoinUtil OuterInstance;
+
+ private IndexIterationContext Context;
+ private string ToField;
+ private IDictionary<BytesRef, JoinScore> JoinValueToJoinScores;
+ private IDictionary<int, JoinScore> DocToJoinScore;
+
+ private BinaryDocValues terms;
+ private int docBase;
+ private readonly BytesRef spare;
+
+ public CollectorAnonymousInnerClassHelper6(TestJoinUtil testJoinUtil,
+ IndexIterationContext context, string toField,
+ IDictionary<BytesRef, JoinScore> joinValueToJoinScores,
+ IDictionary<int, JoinScore> docToJoinScore)
+ {
+ OuterInstance = testJoinUtil;
+ ToField = toField;
+ JoinValueToJoinScores = joinValueToJoinScores;
+ DocToJoinScore = docToJoinScore;
+ }
+
+ public override void Collect(int doc)
+ {
+ terms.Get(doc, spare);
+ JoinScore joinScore = JoinValueToJoinScores[spare];
+ if (joinScore == null)
+ {
+ return;
+ }
+ DocToJoinScore[docBase + doc] = joinScore;
+ }
+
+ public override AtomicReaderContext NextReader
+ {
+ set
+ {
+ terms = FieldCache.DEFAULT.GetTerms(value.AtomicReader, ToField, false);
+ docBase = value.DocBase;
+ }
+ }
+
+ public override bool AcceptsDocsOutOfOrder()
+ {
+ return false;
+ }
+
+ public override Scorer Scorer
+ {
+ set { }
+ }
+ }
+
+ private TopDocs CreateExpectedTopDocs(string queryValue, bool from, ScoreMode scoreMode,
+ IndexIterationContext context)
+ {
+ var hitsToJoinScores = @from
+ ? context.FromHitsToJoinScore[queryValue]
+ : context.ToHitsToJoinScore[queryValue];
+
+ var hits = new List<KeyValuePair<int, JoinScore>>(hitsToJoinScores.EntrySet());
+ hits.Sort(new ComparatorAnonymousInnerClassHelper(this, scoreMode));
+ ScoreDoc[] scoreDocs = new ScoreDoc[Math.Min(10, hits.Count)];
+ for (int i = 0; i < scoreDocs.Length; i++)
+ {
+ KeyValuePair<int, JoinScore> hit = hits[i];
+ scoreDocs[i] = new ScoreDoc(hit.Key, hit.Value.Score(scoreMode));
+ }
+ return new TopDocs(hits.Count, scoreDocs, hits.Count == 0 ? float.NaN : hits[0].Value.Score(scoreMode));
+ }
+
+ private class ComparatorAnonymousInnerClassHelper : IComparer<KeyValuePair<int, JoinScore>>
+ {
+ private readonly TestJoinUtil OuterInstance;
+
+ private ScoreMode ScoreMode;
+
+ public ComparatorAnonymousInnerClassHelper(TestJoinUtil outerInstance, ScoreMode scoreMode)
+ {
+ OuterInstance = outerInstance;
+ ScoreMode = scoreMode;
+ }
+
+ public virtual int Compare(KeyValuePair<int, JoinScore> hit1, KeyValuePair<int, JoinScore> hit2)
+ {
+ float score1 = hit1.Value.Score(ScoreMode);
+ float score2 = hit2.Value.Score(ScoreMode);
+
+ int cmp = score2.CompareTo(score1);
+ if (cmp != 0)
+ {
+ return cmp;
+ }
+ return hit1.Key - hit2.Key;
+ }
+ }
+
+ private FixedBitSet CreateExpectedResult(string queryValue, bool from, IndexReader topLevelReader,
+ IndexIterationContext context)
+ {
+ IDictionary<string, IList<RandomDoc>> randomValueDocs;
+ IDictionary<string, IList<RandomDoc>> linkValueDocuments;
+ if (from)
+ {
+ randomValueDocs = context.RandomValueFromDocs;
+ linkValueDocuments = context.ToDocuments;
+ }
+ else
+ {
+ randomValueDocs = context.RandomValueToDocs;
+ linkValueDocuments = context.FromDocuments;
+ }
+
+ FixedBitSet expectedResult = new FixedBitSet(topLevelReader.MaxDoc);
+ IList<RandomDoc> matchingDocs = randomValueDocs[queryValue];
+ if (matchingDocs == null)
+ {
+ return new FixedBitSet(topLevelReader.MaxDoc);
+ }
+
+ foreach (RandomDoc matchingDoc in matchingDocs)
+ {
+ foreach (string linkValue in matchingDoc.LinkValues)
+ {
+ IList<RandomDoc> otherMatchingDocs = linkValueDocuments[linkValue];
+ if (otherMatchingDocs == null)
+ {
+ continue;
+ }
+
+ foreach (RandomDoc otherSideDoc in otherMatchingDocs)
+ {
+ DocsEnum docsEnum = MultiFields.GetTermDocsEnum(topLevelReader,
+ MultiFields.GetLiveDocs(topLevelReader), "id", new BytesRef(otherSideDoc.Id), 0);
+ Debug.Assert(docsEnum != null);
+ int doc = docsEnum.NextDoc();
+ expectedResult.Set(doc);
+ }
+ }
+ }
+ return expectedResult;
+ }
+
+ private class IndexIterationContext
+ {
+
+ internal string[] RandomUniqueValues;
+ internal bool[] RandomFrom;
+ internal IDictionary<string, IList<RandomDoc>> FromDocuments = new Dictionary<string, IList<RandomDoc>>();
+ internal IDictionary<string, IList<RandomDoc>> ToDocuments = new Dictionary<string, IList<RandomDoc>>();
+
+ internal IDictionary<string, IList<RandomDoc>> RandomValueFromDocs =
+ new Dictionary<string, IList<RandomDoc>>();
+
+ internal IDictionary<string, IList<RandomDoc>> RandomValueToDocs =
+ new Dictionary<string, IList<RandomDoc>>();
+
+ internal IDictionary<string, IDictionary<int, JoinScore>> FromHitsToJoinScore =
+ new Dictionary<string, IDictionary<int, JoinScore>>();
+
+ internal IDictionary<string, IDictionary<int, JoinScore>> ToHitsToJoinScore =
+ new Dictionary<string, IDictionary<int, JoinScore>>();
+ }
+
+ private class RandomDoc
+ {
+ internal readonly string Id;
+ internal readonly IList<string> LinkValues;
+ internal readonly string Value;
+ internal readonly bool From;
+
+ internal RandomDoc(string id, int numberOfLinkValues, string value, bool from)
+ {
+ Id = id;
+ From = from;
+ LinkValues = new List<string>(numberOfLinkValues);
+ Value = value;
+ }
+ }
+
+ private class JoinScore
+ {
+ internal float MaxScore;
+ internal float Total;
+ internal int Count;
+
+ internal virtual void AddScore(float score)
+ {
+ Total += score;
+ if (score > MaxScore)
+ {
+ MaxScore = score;
+ }
+ Count++;
+ }
+
+ internal virtual float Score(ScoreMode mode)
+ {
+ switch (mode)
+ {
+ case ScoreMode.None:
+ return 1.0f;
+ case ScoreMode.Total:
+ return Total;
+ case ScoreMode.Avg:
+ return Total/Count;
+ case ScoreMode.Max:
+ return MaxScore;
+ }
+ throw new ArgumentException("Unsupported ScoreMode: " + mode);
+ }
+ }
+ }
+}
\ No newline at end of file