You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ni...@apache.org on 2016/11/10 11:33:41 UTC
[30/58] [abbrv] lucenenet git commit: WIP on Grouping
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9d72bcb3/src/Lucene.Net.Tests.Grouping/AllGroupHeadsCollectorTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Grouping/AllGroupHeadsCollectorTest.cs b/src/Lucene.Net.Tests.Grouping/AllGroupHeadsCollectorTest.cs
new file mode 100644
index 0000000..2a0b307
--- /dev/null
+++ b/src/Lucene.Net.Tests.Grouping/AllGroupHeadsCollectorTest.cs
@@ -0,0 +1,718 @@
+\ufeffusing Lucene.Net.Analysis;
+using Lucene.Net.Documents;
+using Lucene.Net.Search.Grouping.Function;
+using Lucene.Net.Index;
+using Lucene.Net.Queries.Function;
+using Lucene.Net.Queries.Function.ValueSources;
+using Lucene.Net.Search;
+using Lucene.Net.Store;
+using Lucene.Net.Util;
+using NUnit.Framework;
+using System;
+using System.Collections.Generic;
+using System.Globalization;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+using static Lucene.Net.Index.FieldInfo;
+using Lucene.Net.Search.Grouping.Terms;
+using System.Collections;
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Search.Grouping
+{
+ public class AllGroupHeadsCollectorTest : LuceneTestCase
+ {
+ private static readonly DocValuesType_e[] vts = new DocValuesType_e[]{
+ DocValuesType_e.BINARY, DocValuesType_e.SORTED
+ };
+
+ [Test]
+ public void TestBasic()
+ {
+ string groupField = "author";
+ Directory dir = NewDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(
+ Random(),
+ dir,
+ NewIndexWriterConfig(TEST_VERSION_CURRENT,
+ new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy()));
+ bool canUseIDV = !"Lucene3x".equals(w.w.Config.Codec.Name);
+ DocValuesType_e valueType = vts[Random().nextInt(vts.Length)];
+
+ // 0
+ Document doc = new Document();
+ AddGroupField(doc, groupField, "author1", canUseIDV, valueType);
+ doc.Add(NewTextField("content", "random text", Field.Store.NO));
+ doc.Add(NewStringField("id_1", "1", Field.Store.NO));
+ doc.Add(NewStringField("id_2", "1", Field.Store.NO));
+ w.AddDocument(doc);
+
+ // 1
+ doc = new Document();
+ AddGroupField(doc, groupField, "author1", canUseIDV, valueType);
+ doc.Add(NewTextField("content", "some more random text blob", Field.Store.NO));
+ doc.Add(NewStringField("id_1", "2", Field.Store.NO));
+ doc.Add(NewStringField("id_2", "2", Field.Store.NO));
+ w.AddDocument(doc);
+
+ // 2
+ doc = new Document();
+ AddGroupField(doc, groupField, "author1", canUseIDV, valueType);
+ doc.Add(NewTextField("content", "some more random textual data", Field.Store.NO));
+ doc.Add(NewStringField("id_1", "3", Field.Store.NO));
+ doc.Add(NewStringField("id_2", "3", Field.Store.NO));
+ w.AddDocument(doc);
+ w.Commit(); // To ensure a second segment
+
+ // 3
+ doc = new Document();
+ AddGroupField(doc, groupField, "author2", canUseIDV, valueType);
+ doc.Add(NewTextField("content", "some random text", Field.Store.NO));
+ doc.Add(NewStringField("id_1", "4", Field.Store.NO));
+ doc.Add(NewStringField("id_2", "4", Field.Store.NO));
+ w.AddDocument(doc);
+
+ // 4
+ doc = new Document();
+ AddGroupField(doc, groupField, "author3", canUseIDV, valueType);
+ doc.Add(NewTextField("content", "some more random text", Field.Store.NO));
+ doc.Add(NewStringField("id_1", "5", Field.Store.NO));
+ doc.Add(NewStringField("id_2", "5", Field.Store.NO));
+ w.AddDocument(doc);
+
+ // 5
+ doc = new Document();
+ AddGroupField(doc, groupField, "author3", canUseIDV, valueType);
+ doc.Add(NewTextField("content", "random blob", Field.Store.NO));
+ doc.Add(NewStringField("id_1", "6", Field.Store.NO));
+ doc.Add(NewStringField("id_2", "6", Field.Store.NO));
+ w.AddDocument(doc);
+
+ // 6 -- no author field
+ doc = new Document();
+ doc.Add(NewTextField("content", "random word stuck in alot of other text", Field.Store.NO));
+ doc.Add(NewStringField("id_1", "6", Field.Store.NO));
+ doc.Add(NewStringField("id_2", "6", Field.Store.NO));
+ w.AddDocument(doc);
+
+ // 7 -- no author field
+ doc = new Document();
+ doc.Add(NewTextField("content", "random word stuck in alot of other text", Field.Store.NO));
+ doc.Add(NewStringField("id_1", "7", Field.Store.NO));
+ doc.Add(NewStringField("id_2", "7", Field.Store.NO));
+ w.AddDocument(doc);
+
+ IndexReader reader = w.Reader;
+ IndexSearcher indexSearcher = NewSearcher(reader);
+
+ w.Dispose();
+ int maxDoc = reader.MaxDoc;
+
+ Sort sortWithinGroup = new Sort(new SortField("id_1", SortField.Type_e.INT, true));
+ var allGroupHeadsCollector = CreateRandomCollector(groupField, sortWithinGroup, canUseIDV, valueType);
+ indexSearcher.Search(new TermQuery(new Term("content", "random")), allGroupHeadsCollector);
+ assertTrue(ArrayContains(new int[] { 2, 3, 5, 7 }, allGroupHeadsCollector.RetrieveGroupHeads()));
+ assertTrue(OpenBitSetContains(new int[] { 2, 3, 5, 7 }, allGroupHeadsCollector.RetrieveGroupHeads(maxDoc), maxDoc));
+
+ allGroupHeadsCollector = CreateRandomCollector(groupField, sortWithinGroup, canUseIDV, valueType);
+ indexSearcher.Search(new TermQuery(new Term("content", "some")), allGroupHeadsCollector);
+ assertTrue(ArrayContains(new int[] { 2, 3, 4 }, allGroupHeadsCollector.RetrieveGroupHeads()));
+ assertTrue(OpenBitSetContains(new int[] { 2, 3, 4 }, allGroupHeadsCollector.RetrieveGroupHeads(maxDoc), maxDoc));
+
+ allGroupHeadsCollector = CreateRandomCollector(groupField, sortWithinGroup, canUseIDV, valueType);
+ indexSearcher.Search(new TermQuery(new Term("content", "blob")), allGroupHeadsCollector);
+ assertTrue(ArrayContains(new int[] { 1, 5 }, allGroupHeadsCollector.RetrieveGroupHeads()));
+ assertTrue(OpenBitSetContains(new int[] { 1, 5 }, allGroupHeadsCollector.RetrieveGroupHeads(maxDoc), maxDoc));
+
+ // STRING sort type triggers different implementation
+ Sort sortWithinGroup2 = new Sort(new SortField("id_2", SortField.Type_e.STRING, true));
+ allGroupHeadsCollector = CreateRandomCollector(groupField, sortWithinGroup2, canUseIDV, valueType);
+ indexSearcher.Search(new TermQuery(new Term("content", "random")), allGroupHeadsCollector);
+ assertTrue(ArrayContains(new int[] { 2, 3, 5, 7 }, allGroupHeadsCollector.RetrieveGroupHeads()));
+ assertTrue(OpenBitSetContains(new int[] { 2, 3, 5, 7 }, allGroupHeadsCollector.RetrieveGroupHeads(maxDoc), maxDoc));
+
+ Sort sortWithinGroup3 = new Sort(new SortField("id_2", SortField.Type_e.STRING, false));
+ allGroupHeadsCollector = CreateRandomCollector(groupField, sortWithinGroup3, canUseIDV, valueType);
+ indexSearcher.Search(new TermQuery(new Term("content", "random")), allGroupHeadsCollector);
+ // 7 b/c higher doc id wins, even if order of field is in not in reverse.
+ assertTrue(ArrayContains(new int[] { 0, 3, 4, 6 }, allGroupHeadsCollector.RetrieveGroupHeads()));
+ assertTrue(OpenBitSetContains(new int[] { 0, 3, 4, 6 }, allGroupHeadsCollector.RetrieveGroupHeads(maxDoc), maxDoc));
+
+ indexSearcher.IndexReader.Dispose();
+ dir.Dispose();
+ }
+
+ [Test]
+ public void TestRandom()
+ {
+ int numberOfRuns = TestUtil.NextInt(Random(), 3, 6);
+ for (int iter = 0; iter < numberOfRuns; iter++)
+ {
+ if (VERBOSE)
+ {
+ Console.WriteLine(string.Format("TEST: iter={0} total={1}", iter, numberOfRuns));
+ }
+
+ int numDocs = TestUtil.NextInt(Random(), 100, 1000) * RANDOM_MULTIPLIER;
+ int numGroups = TestUtil.NextInt(Random(), 1, numDocs);
+
+ if (VERBOSE)
+ {
+ Console.WriteLine("TEST: numDocs=" + numDocs + " numGroups=" + numGroups);
+ }
+
+ List<BytesRef> groups = new List<BytesRef>();
+ for (int i = 0; i < numGroups; i++)
+ {
+ string randomValue;
+ do
+ {
+ // B/c of DV based impl we can't see the difference between an empty string and a null value.
+ // For that reason we don't generate empty string groups.
+ randomValue = TestUtil.RandomRealisticUnicodeString(Random());
+ } while ("".equals(randomValue));
+ groups.Add(new BytesRef(randomValue));
+ }
+ string[] contentStrings = new string[TestUtil.NextInt(Random(), 2, 20)];
+ if (VERBOSE)
+ {
+ Console.WriteLine("TEST: create fake content");
+ }
+ for (int contentIDX = 0; contentIDX < contentStrings.Length; contentIDX++)
+ {
+ StringBuilder sb = new StringBuilder();
+ sb.append("real").append(Random().nextInt(3)).append(' ');
+ int fakeCount = Random().nextInt(10);
+ for (int fakeIDX = 0; fakeIDX < fakeCount; fakeIDX++)
+ {
+ sb.append("fake ");
+ }
+ contentStrings[contentIDX] = sb.toString();
+ if (VERBOSE)
+ {
+ Console.WriteLine(" content=" + sb.toString());
+ }
+ }
+
+ Directory dir = NewDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(
+ Random(),
+ dir,
+ NewIndexWriterConfig(TEST_VERSION_CURRENT,
+ new MockAnalyzer(Random())));
+ bool preFlex = "Lucene3x".equals(w.w.Config.Codec.Name);
+ bool canUseIDV = !preFlex;
+ DocValuesType_e valueType = vts[Random().nextInt(vts.Length)];
+
+ Document doc = new Document();
+ Document docNoGroup = new Document();
+ Field group = NewStringField("group", "", Field.Store.NO);
+ doc.Add(group);
+ Field valuesField = null;
+ if (canUseIDV)
+ {
+ switch (valueType)
+ {
+ case DocValuesType_e.BINARY:
+ valuesField = new BinaryDocValuesField("group_dv", new BytesRef());
+ break;
+ case DocValuesType_e.SORTED:
+ valuesField = new SortedDocValuesField("group_dv", new BytesRef());
+ break;
+ //default:
+ // fail("unhandled type");
+ }
+ doc.Add(valuesField);
+ }
+ Field sort1 = NewStringField("sort1", "", Field.Store.NO);
+ doc.Add(sort1);
+ docNoGroup.Add(sort1);
+ Field sort2 = NewStringField("sort2", "", Field.Store.NO);
+ doc.Add(sort2);
+ docNoGroup.Add(sort2);
+ Field sort3 = NewStringField("sort3", "", Field.Store.NO);
+ doc.Add(sort3);
+ docNoGroup.Add(sort3);
+ Field content = NewTextField("content", "", Field.Store.NO);
+ doc.Add(content);
+ docNoGroup.Add(content);
+ IntField id = new IntField("id", 0, Field.Store.NO);
+ doc.Add(id);
+ docNoGroup.Add(id);
+ GroupDoc[] groupDocs = new GroupDoc[numDocs];
+ for (int i = 0; i < numDocs; i++)
+ {
+ BytesRef groupValue;
+ if (Random().nextInt(24) == 17)
+ {
+ // So we test the "doc doesn't have the group'd
+ // field" case:
+ groupValue = null;
+ }
+ else
+ {
+ groupValue = groups[Random().nextInt(groups.size())];
+ }
+
+ GroupDoc groupDoc = new GroupDoc(
+ i,
+ groupValue,
+ groups[Random().nextInt(groups.size())],
+ groups[Random().nextInt(groups.size())],
+ new BytesRef(string.Format(CultureInfo.InvariantCulture, "{0:D5}", i)),
+ contentStrings[Random().nextInt(contentStrings.Length)]
+ );
+
+ if (VERBOSE)
+ {
+ Console.WriteLine(" doc content=" + groupDoc.content + " id=" + i + " group=" + (groupDoc.group == null ? "null" : groupDoc.group.Utf8ToString()) + " sort1=" + groupDoc.sort1.Utf8ToString() + " sort2=" + groupDoc.sort2.Utf8ToString() + " sort3=" + groupDoc.sort3.Utf8ToString());
+ }
+
+ groupDocs[i] = groupDoc;
+ if (groupDoc.group != null)
+ {
+ group.StringValue = (groupDoc.group.Utf8ToString());
+ if (canUseIDV)
+ {
+ valuesField.BytesValue = (new BytesRef(groupDoc.group.Utf8ToString()));
+ }
+ }
+ sort1.StringValue = (groupDoc.sort1.Utf8ToString());
+ sort2.StringValue = (groupDoc.sort2.Utf8ToString());
+ sort3.StringValue = (groupDoc.sort3.Utf8ToString());
+ content.StringValue = (groupDoc.content);
+ id.IntValue = (groupDoc.id);
+ if (groupDoc.group == null)
+ {
+ w.AddDocument(docNoGroup);
+ }
+ else
+ {
+ w.AddDocument(doc);
+ }
+ }
+
+ DirectoryReader r = w.Reader;
+ w.Dispose();
+
+ // NOTE: intentional but temporary field cache insanity!
+ FieldCache.Ints docIdToFieldId = FieldCache.DEFAULT.GetInts(SlowCompositeReaderWrapper.Wrap(r), "id", false);
+ int[] fieldIdToDocID = new int[numDocs];
+ for (int i = 0; i < numDocs; i++)
+ {
+ int fieldId = docIdToFieldId.Get(i);
+ fieldIdToDocID[fieldId] = i;
+ }
+
+ try
+ {
+ IndexSearcher s = NewSearcher(r);
+ if (typeof(SlowCompositeReaderWrapper).IsAssignableFrom(s.IndexReader.GetType()))
+ {
+ canUseIDV = false;
+ }
+ else
+ {
+ canUseIDV = !preFlex;
+ }
+
+ for (int contentID = 0; contentID < 3; contentID++)
+ {
+ ScoreDoc[] hits = s.Search(new TermQuery(new Term("content", "real" + contentID)), numDocs).ScoreDocs;
+ foreach (ScoreDoc hit in hits)
+ {
+ GroupDoc gd = groupDocs[docIdToFieldId.Get(hit.Doc)];
+ assertTrue(gd.score == 0.0);
+ gd.score = hit.Score;
+ int docId = gd.id;
+ assertEquals(docId, docIdToFieldId.Get(hit.Doc));
+ }
+ }
+
+ foreach (GroupDoc gd in groupDocs)
+ {
+ assertTrue(gd.score != 0.0);
+ }
+
+ for (int searchIter = 0; searchIter < 100; searchIter++)
+ {
+
+ if (VERBOSE)
+ {
+ Console.WriteLine("TEST: searchIter=" + searchIter);
+ }
+
+ string searchTerm = "real" + Random().nextInt(3);
+ bool sortByScoreOnly = Random().nextBoolean();
+ Sort sortWithinGroup = GetRandomSort(sortByScoreOnly);
+ var allGroupHeadsCollector = CreateRandomCollector("group", sortWithinGroup, canUseIDV, valueType);
+ s.Search(new TermQuery(new Term("content", searchTerm)), allGroupHeadsCollector);
+ int[] expectedGroupHeads = CreateExpectedGroupHeads(searchTerm, groupDocs, sortWithinGroup, sortByScoreOnly, fieldIdToDocID);
+ int[] actualGroupHeads = allGroupHeadsCollector.RetrieveGroupHeads();
+ // The actual group heads contains Lucene ids. Need to change them into our id value.
+ for (int i = 0; i < actualGroupHeads.Length; i++)
+ {
+ actualGroupHeads[i] = docIdToFieldId.Get(actualGroupHeads[i]);
+ }
+ // Allows us the easily iterate and assert the actual and expected results.
+ Array.Sort(expectedGroupHeads);
+ Array.Sort(actualGroupHeads);
+
+ if (VERBOSE)
+ {
+ Console.WriteLine("Collector: " + allGroupHeadsCollector.GetType().Name);
+ Console.WriteLine("Sort within group: " + sortWithinGroup);
+ Console.WriteLine("Num group: " + numGroups);
+ Console.WriteLine("Num doc: " + numDocs);
+ Console.WriteLine("\n=== Expected: \n");
+ foreach (int expectedDocId in expectedGroupHeads)
+ {
+ GroupDoc expectedGroupDoc = groupDocs[expectedDocId];
+ string expectedGroup = expectedGroupDoc.group == null ? null : expectedGroupDoc.group.Utf8ToString();
+ Console.WriteLine(
+ string.Format(CultureInfo.InvariantCulture,
+ "Group:{0,10} score{1:0.0#######,5} Sort1:{2,10} Sort2:{3,10} Sort3:{4,10} doc:{5,10}",
+ expectedGroup, expectedGroupDoc.score, expectedGroupDoc.sort1.Utf8ToString(),
+ expectedGroupDoc.sort2.Utf8ToString(), expectedGroupDoc.sort3.Utf8ToString(), expectedDocId)
+ );
+ }
+ Console.WriteLine("\n=== Actual: \n");
+ foreach (int actualDocId in actualGroupHeads)
+ {
+ GroupDoc actualGroupDoc = groupDocs[actualDocId];
+ string actualGroup = actualGroupDoc.group == null ? null : actualGroupDoc.group.Utf8ToString();
+ Console.WriteLine(
+ string.Format(CultureInfo.InvariantCulture,
+ "Group:{0,10} score{1:0.0#######,5} Sort1:{2,10} Sort2:{3,10} Sort3:{4,10} doc:{5,10}",
+ actualGroup, actualGroupDoc.score, actualGroupDoc.sort1.Utf8ToString(),
+ actualGroupDoc.sort2.Utf8ToString(), actualGroupDoc.sort3.Utf8ToString(), actualDocId)
+ );
+ }
+ Console.WriteLine("\n===================================================================================");
+ }
+
+ assertArrayEquals(expectedGroupHeads, actualGroupHeads);
+ }
+ }
+ finally
+ {
+ QueryUtils.PurgeFieldCache(r);
+ }
+
+ r.Dispose();
+ dir.Dispose();
+ }
+ }
+
+
+ private bool ArrayContains(int[] expected, int[] actual)
+ {
+ Array.Sort(actual); // in some cases the actual docs aren't sorted by docid. This method expects that.
+ if (expected.Length != actual.Length)
+ {
+ return false;
+ }
+
+ foreach (int e in expected)
+ {
+ bool found = false;
+ foreach (int a in actual)
+ {
+ if (e == a)
+ {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found)
+ {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ private bool OpenBitSetContains(int[] expectedDocs, FixedBitSet actual, int maxDoc)
+ {
+ if (expectedDocs.Length != actual.Cardinality())
+ {
+ return false;
+ }
+
+ FixedBitSet expected = new FixedBitSet(maxDoc);
+ foreach (int expectedDoc in expectedDocs)
+ {
+ expected.Set(expectedDoc);
+ }
+
+ int docId;
+ DocIdSetIterator iterator = expected.GetIterator();
+ while ((docId = iterator.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
+ {
+ if (!actual.Get(docId))
+ {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ private int[] CreateExpectedGroupHeads(string searchTerm, GroupDoc[] groupDocs, Sort docSort, bool sortByScoreOnly, int[] fieldIdToDocID)
+ {
+ IDictionary<BytesRef, List<GroupDoc>> groupHeads = new HashMap<BytesRef, List<GroupDoc>>();
+ foreach (GroupDoc groupDoc in groupDocs)
+ {
+ if (!groupDoc.content.StartsWith(searchTerm))
+ {
+ continue;
+ }
+
+ if (!groupHeads.ContainsKey(groupDoc.group))
+ {
+ List<GroupDoc> list = new List<GroupDoc>();
+ list.Add(groupDoc);
+ groupHeads[groupDoc.group] = list;
+ continue;
+ }
+ groupHeads[groupDoc.group].Add(groupDoc);
+ }
+
+ int[] allGroupHeads = new int[groupHeads.Count];
+ int i = 0;
+ foreach (BytesRef groupValue in groupHeads.Keys)
+ {
+ List<GroupDoc> docs = groupHeads[groupValue];
+ docs.Sort(GetComparator(docSort, sortByScoreOnly, fieldIdToDocID));
+ //Collections.Sort(docs, getComparator(docSort, sortByScoreOnly, fieldIdToDocID));
+ allGroupHeads[i++] = docs[0].id;
+ }
+
+ return allGroupHeads;
+ }
+
+ private Sort GetRandomSort(bool scoreOnly)
+ {
+ List<SortField> sortFields = new List<SortField>();
+ if (Random().nextInt(7) == 2 || scoreOnly)
+ {
+ sortFields.Add(SortField.FIELD_SCORE);
+ }
+ else
+ {
+ if (Random().nextBoolean())
+ {
+ if (Random().nextBoolean())
+ {
+ sortFields.Add(new SortField("sort1", SortField.Type_e.STRING, Random().nextBoolean()));
+ }
+ else
+ {
+ sortFields.Add(new SortField("sort2", SortField.Type_e.STRING, Random().nextBoolean()));
+ }
+ }
+ else if (Random().nextBoolean())
+ {
+ sortFields.Add(new SortField("sort1", SortField.Type_e.STRING, Random().nextBoolean()));
+ sortFields.Add(new SortField("sort2", SortField.Type_e.STRING, Random().nextBoolean()));
+ }
+ }
+ // Break ties:
+ if (Random().nextBoolean() && !scoreOnly)
+ {
+ sortFields.Add(new SortField("sort3", SortField.Type_e.STRING));
+ }
+ else if (!scoreOnly)
+ {
+ sortFields.Add(new SortField("id", SortField.Type_e.INT));
+ }
+ return new Sort(sortFields.ToArray(/*new SortField[sortFields.size()]*/));
+ }
+
+ internal class ComparatorAnonymousHelper : IComparer<GroupDoc>
+ {
+ private readonly AllGroupHeadsCollectorTest outerInstance;
+ private readonly SortField[] sortFields;
+ private readonly bool sortByScoreOnly;
+ private readonly int[] fieldIdToDocID;
+
+ public ComparatorAnonymousHelper(AllGroupHeadsCollectorTest outerInstance, SortField[] sortFields, bool sortByScoreOnly, int[] fieldIdToDocID)
+ {
+ this.outerInstance = outerInstance;
+ this.sortFields = sortFields;
+ this.sortByScoreOnly = sortByScoreOnly;
+ this.fieldIdToDocID = fieldIdToDocID;
+ }
+
+ public int Compare(GroupDoc d1, GroupDoc d2)
+ {
+ foreach (SortField sf in sortFields)
+ {
+ int cmp;
+ if (sf.Type == SortField.Type_e.SCORE)
+ {
+ if (d1.score > d2.score)
+ {
+ cmp = -1;
+ }
+ else if (d1.score < d2.score)
+ {
+ cmp = 1;
+ }
+ else
+ {
+ cmp = sortByScoreOnly ? fieldIdToDocID[d1.id] - fieldIdToDocID[d2.id] : 0;
+ }
+ }
+ else if (sf.Field.equals("sort1"))
+ {
+ cmp = d1.sort1.CompareTo(d2.sort1);
+ }
+ else if (sf.Field.equals("sort2"))
+ {
+ cmp = d1.sort2.CompareTo(d2.sort2);
+ }
+ else if (sf.Field.equals("sort3"))
+ {
+ cmp = d1.sort3.CompareTo(d2.sort3);
+ }
+ else
+ {
+ assertEquals(sf.Field, "id");
+ cmp = d1.id - d2.id;
+ }
+ if (cmp != 0)
+ {
+ return sf.Reverse ? -cmp : cmp;
+ }
+ }
+ // Our sort always fully tie breaks:
+ fail();
+ return 0;
+ }
+ }
+
+ private IComparer<GroupDoc> GetComparator(Sort sort, bool sortByScoreOnly, int[] fieldIdToDocID)
+ {
+ SortField[] sortFields = sort.GetSort();
+ return new ComparatorAnonymousHelper(this, sortFields, sortByScoreOnly, fieldIdToDocID);
+ // return new Comparator<GroupDoc>() {
+ // @Override
+ // public int compare(GroupDoc d1, GroupDoc d2)
+ //{
+ // for (SortField sf : sortFields)
+ // {
+ // final int cmp;
+ // if (sf.getType() == SortField.Type.SCORE)
+ // {
+ // if (d1.score > d2.score)
+ // {
+ // cmp = -1;
+ // }
+ // else if (d1.score < d2.score)
+ // {
+ // cmp = 1;
+ // }
+ // else
+ // {
+ // cmp = sortByScoreOnly ? fieldIdToDocID[d1.id] - fieldIdToDocID[d2.id] : 0;
+ // }
+ // }
+ // else if (sf.getField().equals("sort1"))
+ // {
+ // cmp = d1.sort1.compareTo(d2.sort1);
+ // }
+ // else if (sf.getField().equals("sort2"))
+ // {
+ // cmp = d1.sort2.compareTo(d2.sort2);
+ // }
+ // else if (sf.getField().equals("sort3"))
+ // {
+ // cmp = d1.sort3.compareTo(d2.sort3);
+ // }
+ // else
+ // {
+ // assertEquals(sf.getField(), "id");
+ // cmp = d1.id - d2.id;
+ // }
+ // if (cmp != 0)
+ // {
+ // return sf.getReverse() ? -cmp : cmp;
+ // }
+ // }
+ // // Our sort always fully tie breaks:
+ // fail();
+ // return 0;
+ //}
+ // };
+ }
+
+ private AbstractAllGroupHeadsCollector CreateRandomCollector(string groupField, Sort sortWithinGroup, bool canUseIDV, DocValuesType_e valueType)
+ {
+ AbstractAllGroupHeadsCollector collector;
+ if (Random().nextBoolean())
+ {
+ ValueSource vs = new BytesRefFieldSource(groupField);
+ collector = new FunctionAllGroupHeadsCollector(vs, new Hashtable(), sortWithinGroup);
+ }
+ else
+ {
+ collector = TermAllGroupHeadsCollector.Create(groupField, sortWithinGroup);
+ }
+
+ if (VERBOSE)
+ {
+ Console.WriteLine("Selected implementation: " + collector.GetType().Name);
+ }
+
+ return collector;
+ }
+
+ private void AddGroupField(Document doc, string groupField, string value, bool canUseIDV, DocValuesType_e valueType)
+ {
+ doc.Add(new TextField(groupField, value, Field.Store.YES));
+ if (canUseIDV)
+ {
+ Field valuesField = null;
+ switch (valueType)
+ {
+ case DocValuesType_e.BINARY:
+ valuesField = new BinaryDocValuesField(groupField + "_dv", new BytesRef(value));
+ break;
+ case DocValuesType_e.SORTED:
+ valuesField = new SortedDocValuesField(groupField + "_dv", new BytesRef(value));
+ break;
+ //default:
+ // fail("unhandled type");
+ }
+ doc.Add(valuesField);
+ }
+ }
+
+ internal class GroupDoc
+ {
+ internal readonly int id;
+ internal readonly BytesRef group;
+ internal readonly BytesRef sort1;
+ internal readonly BytesRef sort2;
+ internal readonly BytesRef sort3;
+ // content must be "realN ..."
+ internal readonly string content;
+ internal float score;
+
+ public GroupDoc(int id, BytesRef group, BytesRef sort1, BytesRef sort2, BytesRef sort3, String content)
+ {
+ this.id = id;
+ this.group = group;
+ this.sort1 = sort1;
+ this.sort2 = sort2;
+ this.sort3 = sort3;
+ this.content = content;
+ }
+
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9d72bcb3/src/Lucene.Net.Tests.Grouping/AllGroupsCollectorTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Grouping/AllGroupsCollectorTest.cs b/src/Lucene.Net.Tests.Grouping/AllGroupsCollectorTest.cs
new file mode 100644
index 0000000..c76a4da
--- /dev/null
+++ b/src/Lucene.Net.Tests.Grouping/AllGroupsCollectorTest.cs
@@ -0,0 +1,138 @@
+\ufeffusing Lucene.Net.Analysis;
+using Lucene.Net.Documents;
+using Lucene.Net.Search.Grouping.Function;
+using Lucene.Net.Index;
+using Lucene.Net.Queries.Function;
+using Lucene.Net.Queries.Function.ValueSources;
+using Lucene.Net.Search;
+using Lucene.Net.Store;
+using Lucene.Net.Util;
+using NUnit.Framework;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+using System.Collections;
+using Lucene.Net.Search.Grouping.Terms;
+
+namespace Lucene.Net.Search.Grouping
+{
+ public class AllGroupsCollectorTest : LuceneTestCase
+ {
+ [Test]
+ public void TestTotalGroupCount()
+ {
+
+ string groupField = "author";
+ FieldType customType = new FieldType();
+ customType.Stored = true;
+
+ Directory dir = NewDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(
+ Random(),
+ dir,
+ NewIndexWriterConfig(TEST_VERSION_CURRENT,
+ new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy()));
+ bool canUseIDV = !"Lucene3x".equals(w.w.Config.Codec.Name);
+
+ // 0
+ Document doc = new Document();
+ AddGroupField(doc, groupField, "author1", canUseIDV);
+ doc.Add(new TextField("content", "random text", Field.Store.YES));
+ doc.Add(new Field("id", "1", customType));
+ w.AddDocument(doc);
+
+ // 1
+ doc = new Document();
+ AddGroupField(doc, groupField, "author1", canUseIDV);
+ doc.Add(new TextField("content", "some more random text blob", Field.Store.YES));
+ doc.Add(new Field("id", "2", customType));
+ w.AddDocument(doc);
+
+ // 2
+ doc = new Document();
+ AddGroupField(doc, groupField, "author1", canUseIDV);
+ doc.Add(new TextField("content", "some more random textual data", Field.Store.YES));
+ doc.Add(new Field("id", "3", customType));
+ w.AddDocument(doc);
+ w.Commit(); // To ensure a second segment
+
+ // 3
+ doc = new Document();
+ AddGroupField(doc, groupField, "author2", canUseIDV);
+ doc.Add(new TextField("content", "some random text", Field.Store.YES));
+ doc.Add(new Field("id", "4", customType));
+ w.AddDocument(doc);
+
+ // 4
+ doc = new Document();
+ AddGroupField(doc, groupField, "author3", canUseIDV);
+ doc.Add(new TextField("content", "some more random text", Field.Store.YES));
+ doc.Add(new Field("id", "5", customType));
+ w.AddDocument(doc);
+
+ // 5
+ doc = new Document();
+ AddGroupField(doc, groupField, "author3", canUseIDV);
+ doc.Add(new TextField("content", "random blob", Field.Store.YES));
+ doc.Add(new Field("id", "6", customType));
+ w.AddDocument(doc);
+
+ // 6 -- no author field
+ doc = new Document();
+ doc.Add(new TextField("content", "random word stuck in alot of other text", Field.Store.YES));
+ doc.Add(new Field("id", "6", customType));
+ w.AddDocument(doc);
+
+ IndexSearcher indexSearcher = NewSearcher(w.Reader);
+ w.Dispose();
+
+ AbstractAllGroupsCollector allGroupsCollector = CreateRandomCollector(groupField, canUseIDV);
+ indexSearcher.Search(new TermQuery(new Term("content", "random")), allGroupsCollector);
+ assertEquals(4, allGroupsCollector.GroupCount);
+
+ allGroupsCollector = CreateRandomCollector(groupField, canUseIDV);
+ indexSearcher.Search(new TermQuery(new Term("content", "some")), allGroupsCollector);
+ assertEquals(3, allGroupsCollector.GroupCount);
+
+ allGroupsCollector = CreateRandomCollector(groupField, canUseIDV);
+ indexSearcher.Search(new TermQuery(new Term("content", "blob")), allGroupsCollector);
+ assertEquals(2, allGroupsCollector.GroupCount);
+
+ indexSearcher.IndexReader.Dispose();
+ dir.Dispose();
+ }
+
+ private void AddGroupField(Document doc, string groupField, string value, bool canUseIDV)
+ {
+ doc.Add(new TextField(groupField, value, Field.Store.YES));
+ if (canUseIDV)
+ {
+ doc.Add(new SortedDocValuesField(groupField, new BytesRef(value)));
+ }
+ }
+
+ private AbstractAllGroupsCollector CreateRandomCollector(string groupField, bool canUseIDV)
+ {
+ AbstractAllGroupsCollector selected;
+ if (Random().nextBoolean())
+ {
+ selected = new TermAllGroupsCollector(groupField);
+ }
+ else
+ {
+ ValueSource vs = new BytesRefFieldSource(groupField);
+ selected = new FunctionAllGroupsCollector(vs, new Hashtable());
+ }
+
+ if (VERBOSE)
+ {
+ Console.WriteLine("Selected implementation: " + selected.GetType().Name);
+ }
+
+ return selected;
+ }
+
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9d72bcb3/src/Lucene.Net.Tests.Grouping/DistinctValuesCollectorTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Grouping/DistinctValuesCollectorTest.cs b/src/Lucene.Net.Tests.Grouping/DistinctValuesCollectorTest.cs
new file mode 100644
index 0000000..854050b
--- /dev/null
+++ b/src/Lucene.Net.Tests.Grouping/DistinctValuesCollectorTest.cs
@@ -0,0 +1,648 @@
+\ufeffusing Lucene.Net.Analysis;
+using Lucene.Net.Documents;
+using Lucene.Net.Search.Grouping.Function;
+using Lucene.Net.Search.Grouping.Term;
+using Lucene.Net.Index;
+using Lucene.Net.Queries.Function.ValueSources;
+using Lucene.Net.Search;
+using Lucene.Net.Store;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using Lucene.Net.Util.Mutable;
+using NUnit.Framework;
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.Globalization;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+using Lucene.Net.Search.Grouping;
+using Lucene.Net.Search.Grouping.Terms;
+
+namespace Lucene.Net.Search.Grouping
+{
+ public class DistinctValuesCollectorTest : AbstractGroupingTestCase
+ {
+ private readonly static NullComparator nullComparator = new NullComparator();
+
+ private readonly string groupField = "author";
+ private readonly string dvGroupField = "author_dv";
+ private readonly string countField = "publisher";
+ private readonly string dvCountField = "publisher_dv";
+
+ internal class ComparerAnonymousHelper1 : IComparer<AbstractGroupCount<IComparable<object>>>
+ {
+ private readonly DistinctValuesCollectorTest outerInstance;
+
+ public ComparerAnonymousHelper1(DistinctValuesCollectorTest outerInstance)
+ {
+ this.outerInstance = outerInstance;
+ }
+
+ public int Compare(AbstractGroupCount<IComparable<object>> groupCount1, AbstractGroupCount<IComparable<object>> groupCount2)
+ {
+ if (groupCount1.groupValue == null)
+ {
+ if (groupCount2.groupValue == null)
+ {
+ return 0;
+ }
+ return -1;
+ }
+ else if (groupCount2.groupValue == null)
+ {
+ return 1;
+ }
+ else
+ {
+ return groupCount1.groupValue.CompareTo(groupCount2.groupValue);
+ }
+ }
+ }
+
+ [Test]
+ public void TestSimple()
+ {
+ Random random = Random();
+ FieldInfo.DocValuesType_e[] dvTypes = new FieldInfo.DocValuesType_e[]{
+ FieldInfo.DocValuesType_e.NUMERIC,
+ FieldInfo.DocValuesType_e.BINARY,
+ FieldInfo.DocValuesType_e.SORTED,
+ };
+ Directory dir = NewDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(
+ random,
+ dir,
+ NewIndexWriterConfig(TEST_VERSION_CURRENT,
+ new MockAnalyzer(random)).SetMergePolicy(NewLogMergePolicy()));
+ bool canUseDV = !"Lucene3x".equals(w.w.Config.Codec.Name);
+ FieldInfo.DocValuesType_e? dvType = canUseDV ? dvTypes[random.nextInt(dvTypes.Length)] : (FieldInfo.DocValuesType_e?)null;
+
+ Document doc = new Document();
+ addField(doc, groupField, "1", dvType);
+ addField(doc, countField, "1", dvType);
+ doc.Add(new TextField("content", "random text", Field.Store.NO));
+ doc.Add(new StringField("id", "1", Field.Store.NO));
+ w.AddDocument(doc);
+
+ // 1
+ doc = new Document();
+ addField(doc, groupField, "1", dvType);
+ addField(doc, countField, "1", dvType);
+ doc.Add(new TextField("content", "some more random text blob", Field.Store.NO));
+ doc.Add(new StringField("id", "2", Field.Store.NO));
+ w.AddDocument(doc);
+
+ // 2
+ doc = new Document();
+ addField(doc, groupField, "1", dvType);
+ addField(doc, countField, "2", dvType);
+ doc.Add(new TextField("content", "some more random textual data", Field.Store.NO));
+ doc.Add(new StringField("id", "3", Field.Store.NO));
+ w.AddDocument(doc);
+ w.Commit(); // To ensure a second segment
+
+ // 3
+ doc = new Document();
+ addField(doc, groupField, "2", dvType);
+ doc.Add(new TextField("content", "some random text", Field.Store.NO));
+ doc.Add(new StringField("id", "4", Field.Store.NO));
+ w.AddDocument(doc);
+
+ // 4
+ doc = new Document();
+ addField(doc, groupField, "3", dvType);
+ addField(doc, countField, "1", dvType);
+ doc.Add(new TextField("content", "some more random text", Field.Store.NO));
+ doc.Add(new StringField("id", "5", Field.Store.NO));
+ w.AddDocument(doc);
+
+ // 5
+ doc = new Document();
+ addField(doc, groupField, "3", dvType);
+ addField(doc, countField, "1", dvType);
+ doc.Add(new TextField("content", "random blob", Field.Store.NO));
+ doc.Add(new StringField("id", "6", Field.Store.NO));
+ w.AddDocument(doc);
+
+ // 6 -- no author field
+ doc = new Document();
+ doc.Add(new TextField("content", "random word stuck in alot of other text", Field.Store.YES));
+ addField(doc, countField, "1", dvType);
+ doc.Add(new StringField("id", "6", Field.Store.NO));
+ w.AddDocument(doc);
+
+ IndexSearcher indexSearcher = NewSearcher(w.Reader);
+ w.Dispose();
+
+ var cmp = new ComparerAnonymousHelper1(this);
+
+ // Comparator<AbstractDistinctValuesCollector.GroupCount<Comparable<Object>>> cmp = new Comparator<AbstractDistinctValuesCollector.GroupCount<Comparable<Object>>>() {
+
+ // @Override
+ // public int compare(AbstractDistinctValuesCollector.GroupCount<Comparable<Object>> groupCount1, AbstractDistinctValuesCollector.GroupCount<Comparable<Object>> groupCount2)
+ // {
+ // if (groupCount1.groupValue == null)
+ // {
+ // if (groupCount2.groupValue == null)
+ // {
+ // return 0;
+ // }
+ // return -1;
+ // }
+ // else if (groupCount2.groupValue == null)
+ // {
+ // return 1;
+ // }
+ // else
+ // {
+ // return groupCount1.groupValue.compareTo(groupCount2.groupValue);
+ // }
+ // }
+
+ //};
+
+ // === Search for content:random
+ AbstractFirstPassGroupingCollector<IComparable<object>> firstCollector = createRandomFirstPassCollector(dvType, new Sort(), groupField, 10);
+ indexSearcher.Search(new TermQuery(new Term("content", "random")), firstCollector);
+ Collector distinctValuesCollector
+ = createDistinctCountCollector(firstCollector, groupField, countField, dvType.GetValueOrDefault());
+ indexSearcher.Search(new TermQuery(new Term("content", "random")), distinctValuesCollector);
+
+ var gcs = distinctValuesCollector.GetGroups();
+ //Collections.sort(gcs, cmp);
+ gcs.Sort(cmp);
+ assertEquals(4, gcs.Count);
+
+ compareNull(gcs[0].groupValue);
+ List<IComparable> countValues = new List<IComparable>(gcs[0].uniqueValues);
+ assertEquals(1, countValues.size());
+ compare("1", countValues[0]);
+
+ compare("1", gcs[1].groupValue);
+ countValues = new List<IComparable>(gcs[1].uniqueValues);
+ //Collections.sort(countValues, nullComparator);
+ countValues.Sort(nullComparator);
+ assertEquals(2, countValues.size());
+ compare("1", countValues[0]);
+ compare("2", countValues[1]);
+
+ compare("2", gcs[2].groupValue);
+ countValues = new List<IComparable>(gcs[2].uniqueValues);
+ assertEquals(1, countValues.size());
+ compareNull(countValues[0]);
+
+ compare("3", gcs[3].groupValue);
+ countValues = new List<IComparable>(gcs[3].uniqueValues);
+ assertEquals(1, countValues.size());
+ compare("1", countValues[0]);
+
+ // === Search for content:some
+ firstCollector = createRandomFirstPassCollector(dvType, new Sort(), groupField, 10);
+ indexSearcher.Search(new TermQuery(new Term("content", "some")), firstCollector);
+ distinctValuesCollector = createDistinctCountCollector(firstCollector, groupField, countField, dvType);
+ indexSearcher.Search(new TermQuery(new Term("content", "some")), distinctValuesCollector);
+
+ gcs = distinctValuesCollector.getGroups();
+ //Collections.sort(gcs, cmp);
+ gcs.Sort(cmp);
+ assertEquals(3, gcs.Count);
+
+ compare("1", gcs.get(0).groupValue);
+ countValues = new List<IComparable>(gcs[0].uniqueValues);
+ assertEquals(2, countValues.size());
+ //Collections.sort(countValues, nullComparator);
+ countValues.Sort(nullComparator);
+ compare("1", countValues[0]);
+ compare("2", countValues[1]);
+
+ compare("2", gcs[1].groupValue);
+ countValues = new List<IComparable>(gcs[1].uniqueValues);
+ assertEquals(1, countValues.size());
+ compareNull(countValues[0]);
+
+ compare("3", gcs.get(2).groupValue);
+ countValues = new List<IComparable>(gcs.get(2).uniqueValues);
+ assertEquals(1, countValues.size());
+ compare("1", countValues[0]);
+
+ // === Search for content:blob
+ firstCollector = createRandomFirstPassCollector(dvType, new Sort(), groupField, 10);
+ indexSearcher.search(new TermQuery(new Term("content", "blob")), firstCollector);
+ distinctValuesCollector = createDistinctCountCollector(firstCollector, groupField, countField, dvType);
+ indexSearcher.search(new TermQuery(new Term("content", "blob")), distinctValuesCollector);
+
+ gcs = distinctValuesCollector.getGroups();
+ //Collections.sort(gcs, cmp);
+ gcs.Sort(cmp);
+ assertEquals(2, gcs.Count);
+
+ compare("1", gcs[0].groupValue);
+ countValues = new List<IComparable>(gcs[0].uniqueValues);
+ // B/c the only one document matched with blob inside the author 1 group
+ assertEquals(1, countValues.Count);
+ compare("1", countValues[0]);
+
+ compare("3", gcs[1].groupValue);
+ countValues = new List<IComparable>(gcs[1].uniqueValues);
+ assertEquals(1, countValues.Count);
+ compare("1", countValues[0]);
+
+ indexSearcher.IndexReader.Dispose();
+ dir.Dispose();
+ }
+
+ [Test]
+ public void testRandom()
+ {
+ Random random = Random();
+ int numberOfRuns = TestUtil.NextInt(random, 3, 6);
+ for (int indexIter = 0; indexIter < numberOfRuns; indexIter++)
+ {
+ IndexContext context = createIndexContext();
+ for (int searchIter = 0; searchIter < 100; searchIter++)
+ {
+ IndexSearcher searcher = NewSearcher(context.indexReader);
+ bool useDv = context.dvType != null && random.nextBoolean();
+ FieldInfo.DocValuesType_e? dvType = useDv ? context.dvType : (FieldInfo.DocValuesType_e?)null;
+ string term = context.contentStrings[random.nextInt(context.contentStrings.Length)];
+ Sort groupSort = new Sort(new SortField("id", SortField.Type_e.STRING));
+ int topN = 1 + random.nextInt(10);
+
+ List<AbstractGroupCount<IComparable>> expectedResult = createExpectedResult(context, term, groupSort, topN);
+
+ AbstractFirstPassGroupingCollector < Comparable <?>> firstCollector = createRandomFirstPassCollector(dvType, groupSort, groupField, topN);
+ searcher.Search(new TermQuery(new Term("content", term)), firstCollector);
+ AbstractDistinctValuesCollector <? extends AbstractDistinctValuesCollector.GroupCount < Comparable <?>>> distinctValuesCollector
+ = createDistinctCountCollector(firstCollector, groupField, countField, dvType);
+ searcher.Search(new TermQuery(new Term("content", term)), distinctValuesCollector);
+
+ List<AbstractGroupCount<IComparable>> actualResult = (List<AbstractGroupCount<IComparable>>)distinctValuesCollector.Groups;
+
+ if (VERBOSE)
+ {
+ Console.WriteLine("Index iter=" + indexIter);
+ Console.WriteLine("Search iter=" + searchIter);
+ Console.WriteLine("1st pass collector class name=" + firstCollector.GetType().Name);
+ Console.WriteLine("2nd pass collector class name=" + distinctValuesCollector.GetType().Name);
+ Console.WriteLine("Search term=" + term);
+ Console.WriteLine("DVType=" + dvType);
+ Console.WriteLine("1st pass groups=" + firstCollector.GetTopGroups(0, false));
+ Console.WriteLine("Expected:");
+ printGroups(expectedResult);
+ Console.WriteLine("Actual:");
+ printGroups(actualResult);
+ }
+
+ assertEquals(expectedResult.Count, actualResult.Count);
+ for (int i = 0; i < expectedResult.size(); i++)
+ {
+ AbstractDistinctValuesCollector.GroupCount < Comparable <?>> expected = expectedResult.get(i);
+ AbstractDistinctValuesCollector.GroupCount < Comparable <?>> actual = actualResult.get(i);
+ assertValues(expected.groupValue, actual.groupValue);
+ assertEquals(expected.uniqueValues.size(), actual.uniqueValues.size());
+ List < Comparable <?>> expectedUniqueValues = new ArrayList<>(expected.uniqueValues);
+ Collections.sort(expectedUniqueValues, nullComparator);
+ List < Comparable <?>> actualUniqueValues = new ArrayList<>(actual.uniqueValues);
+ Collections.sort(actualUniqueValues, nullComparator);
+ for (int j = 0; j < expectedUniqueValues.size(); j++)
+ {
+ assertValues(expectedUniqueValues.get(j), actualUniqueValues.get(j));
+ }
+ }
+ }
+ context.indexReader.Dispose();
+ context.directory.Dispose();
+ }
+ }
+
+ private void printGroups(List<AbstractDistinctValuesCollector.GroupCount<IComparable>> results)
+ {
+ for (int i = 0; i < results.size(); i++)
+ {
+ var group = results[i];
+ object gv = group.groupValue;
+ if (gv is BytesRef)
+ {
+ Console.WriteLine(i + ": groupValue=" + ((BytesRef)gv).Utf8ToString());
+ }
+ else
+ {
+ Console.WriteLine(i + ": groupValue=" + gv);
+ }
+ foreach (object o in group.uniqueValues)
+ {
+ if (o is BytesRef)
+ {
+ Console.WriteLine(" " + ((BytesRef)o).Utf8ToString());
+ }
+ else
+ {
+ Console.WriteLine(" " + o);
+ }
+ }
+ }
+ }
+
+ private void assertValues(object expected, object actual)
+ {
+ if (expected == null)
+ {
+ compareNull(actual);
+ }
+ else
+ {
+ compare(((BytesRef)expected).Utf8ToString(), actual);
+ }
+ }
+
+ private void compare(string expected, object groupValue)
+ {
+ if (typeof(BytesRef).IsAssignableFrom(groupValue.GetType()))
+ {
+ assertEquals(expected, ((BytesRef)groupValue).Utf8ToString());
+ }
+ else if (typeof(double).IsAssignableFrom(groupValue.GetType()))
+ {
+ assertEquals(double.Parse(expected, CultureInfo.InvariantCulture), groupValue);
+ }
+ else if (typeof(long).IsAssignableFrom(groupValue.GetType()))
+ {
+ assertEquals(long.Parse(expected, CultureInfo.InvariantCulture), groupValue);
+ }
+ else if (typeof(MutableValue).IsAssignableFrom(groupValue.GetType()))
+ {
+ MutableValueStr mutableValue = new MutableValueStr();
+ mutableValue.Value = new BytesRef(expected);
+ assertEquals(mutableValue, groupValue);
+ }
+ else
+ {
+ fail();
+ }
+ }
+
+ private void compareNull(object groupValue)
+ {
+ if (groupValue == null)
+ {
+ return; // term based impl...
+ }
+ // DV based impls..
+ if (typeof(BytesRef).IsAssignableFrom(groupValue.GetType()))
+ {
+ assertEquals("", ((BytesRef)groupValue).Utf8ToString());
+ }
+ else if (typeof(double).IsAssignableFrom(groupValue.GetType()))
+ {
+ assertEquals(0.0d, groupValue);
+ }
+ else if (typeof(long).IsAssignableFrom(groupValue.GetType()))
+ {
+ assertEquals(0L, groupValue);
+ // Function based impl
+ }
+ else if (typeof(MutableValue).IsAssignableFrom(groupValue.GetType()))
+ {
+ assertFalse(((MutableValue)groupValue).Exists);
+ }
+ else
+ {
+ fail();
+ }
+ }
+
+ private void addField(Document doc, string field, string value, FieldInfo.DocValuesType_e? type)
+ {
+ doc.Add(new StringField(field, value, Field.Store.YES));
+ if (type == null)
+ {
+ return;
+ }
+ string dvField = field + "_dv";
+
+ Field valuesField = null;
+ switch (type)
+ {
+ case FieldInfo.DocValuesType_e.NUMERIC:
+ valuesField = new NumericDocValuesField(dvField, int.Parse(value, CultureInfo.InvariantCulture));
+ break;
+ case FieldInfo.DocValuesType_e.BINARY:
+ valuesField = new BinaryDocValuesField(dvField, new BytesRef(value));
+ break;
+ case FieldInfo.DocValuesType_e.SORTED:
+ valuesField = new SortedDocValuesField(dvField, new BytesRef(value));
+ break;
+ }
+ doc.Add(valuesField);
+ }
+
+ private AbstractDistinctValuesCollector<AbstractGroupCount<T>> createDistinctCountCollector<T>(AbstractFirstPassGroupingCollector<T> firstPassGroupingCollector,
+ string groupField,
+ string countField,
+ FieldInfo.DocValuesType_e dvType)
+ where T : IComparable
+ {
+ Random random = Random();
+ ICollection<SearchGroup<T>> searchGroups = firstPassGroupingCollector.GetTopGroups(0, false);
+ if (typeof(FunctionFirstPassGroupingCollector).IsAssignableFrom(firstPassGroupingCollector.GetType()))
+ {
+ return (AbstractDistinctValuesCollector)new FunctionDistinctValuesCollector(new Hashtable(), new BytesRefFieldSource(groupField), new BytesRefFieldSource(countField), searchGroups as ICollection<SearchGroup<MutableValue>>);
+ }
+ else
+ {
+ return (AbstractDistinctValuesCollector)new TermDistinctValuesCollector(groupField, countField, searchGroups as ICollection<SearchGroup<BytesRef>>);
+ }
+ }
+
+ private AbstractFirstPassGroupingCollector<T> createRandomFirstPassCollector<T>(FieldInfo.DocValuesType_e dvType, Sort groupSort, string groupField, int topNGroups)
+ {
+ Random random = Random();
+ if (dvType != null)
+ {
+ if (random.nextBoolean())
+ {
+ return (AbstractFirstPassGroupingCollector<T>)new FunctionFirstPassGroupingCollector(new BytesRefFieldSource(groupField), new Hashtable(), groupSort, topNGroups);
+ }
+ else
+ {
+ return (AbstractFirstPassGroupingCollector<T>)new TermFirstPassGroupingCollector(groupField, groupSort, topNGroups);
+ }
+ }
+ else
+ {
+ if (random.nextBoolean())
+ {
+ return (AbstractFirstPassGroupingCollector<T>)new FunctionFirstPassGroupingCollector(new BytesRefFieldSource(groupField), new Hashtable(), groupSort, topNGroups);
+ }
+ else
+ {
+ return (AbstractFirstPassGroupingCollector<T>)new TermFirstPassGroupingCollector(groupField, groupSort, topNGroups);
+ }
+ }
+ }
+
+ internal class GroupCount : AbstractGroupCount<BytesRef>
+ {
+ internal GroupCount(BytesRef groupValue, ICollection<BytesRef> uniqueValues)
+ : base(groupValue)
+ {
+ this.uniqueValues.UnionWith(uniqueValues);
+ }
+ }
+
+ private List<AbstractGroupCount<IComparable>> createExpectedResult(IndexContext context, string term, Sort groupSort, int topN)
+ {
+
+
+ List<AbstractGroupCount<IComparable>> result = new List<AbstractGroupCount<IComparable>>();
+ IDictionary<string, ISet<string>> groupCounts = context.searchTermToGroupCounts[term];
+ int i = 0;
+ foreach (string group in groupCounts.Keys)
+ {
+ if (topN <= i++)
+ {
+ break;
+ }
+ ISet<BytesRef> uniqueValues = new HashSet<BytesRef>();
+ foreach (string val in groupCounts[group])
+ {
+ uniqueValues.Add(val != null ? new BytesRef(val) : null);
+ }
+ result.Add(new GroupCount(group != null ? new BytesRef(group) : (BytesRef)null, uniqueValues));
+ }
+ return result;
+ }
+
+ private IndexContext createIndexContext()
+ {
+ Random random = Random();
+ FieldInfo.DocValuesType_e[] dvTypes = new FieldInfo.DocValuesType_e[]{
+ FieldInfo.DocValuesType_e.BINARY,
+ FieldInfo.DocValuesType_e.SORTED
+ };
+
+ Directory dir = NewDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(
+ random,
+ dir,
+ NewIndexWriterConfig(TEST_VERSION_CURRENT,
+ new MockAnalyzer(random)).SetMergePolicy(NewLogMergePolicy())
+ );
+
+ bool canUseDV = !"Lucene3x".equals(w.w.Config.Codec.Name);
+ FieldInfo.DocValuesType_e? dvType = canUseDV ? dvTypes[random.nextInt(dvTypes.Length)] : (FieldInfo.DocValuesType_e?)null;
+
+ int numDocs = 86 + random.nextInt(1087) * RANDOM_MULTIPLIER;
+ string[] groupValues = new string[numDocs / 5];
+ string[] countValues = new string[numDocs / 10];
+ for (int i = 0; i < groupValues.Length; i++)
+ {
+ groupValues[i] = GenerateRandomNonEmptyString();
+ }
+ for (int i = 0; i < countValues.Length; i++)
+ {
+ countValues[i] = GenerateRandomNonEmptyString();
+ }
+
+ List<string> contentStrings = new List<string>();
+ IDictionary<string, IDictionary<string, ISet<string>>> searchTermToGroupCounts = new Dictionary<string, IDictionary<string, ISet<string>>>();
+ for (int i = 1; i <= numDocs; i++)
+ {
+ string groupValue = random.nextInt(23) == 14 ? null : groupValues[random.nextInt(groupValues.Length)];
+ string countValue = random.nextInt(21) == 13 ? null : countValues[random.nextInt(countValues.Length)];
+ string content = "random" + random.nextInt(numDocs / 20);
+ //IDictionary<string, ISet<string>> groupToCounts = searchTermToGroupCounts[content];
+ // if (groupToCounts == null)
+ IDictionary<string, ISet<string>> groupToCounts;
+ if (!searchTermToGroupCounts.TryGetValue(content, out groupToCounts))
+ {
+ // Groups sort always DOCID asc...
+ searchTermToGroupCounts[content] = groupToCounts = new LurchTable<string, ISet<string>>(16);
+ contentStrings.Add(content);
+ }
+
+ //ISet<string> countsVals = groupToCounts.get(groupValue);
+ //if (countsVals == null)
+ ISet<string> countsVals;
+ if (!groupToCounts.TryGetValue(groupValue, out countsVals))
+ {
+ groupToCounts[groupValue] = countsVals = new HashSet<string>();
+ }
+ countsVals.Add(countValue);
+
+ Document doc = new Document();
+ doc.Add(new StringField("id", string.Format(CultureInfo.InvariantCulture, "{0:D9}", i), Field.Store.YES));
+ if (groupValue != null)
+ {
+ addField(doc, groupField, groupValue, dvType);
+ }
+ if (countValue != null)
+ {
+ addField(doc, countField, countValue, dvType);
+ }
+ doc.Add(new TextField("content", content, Field.Store.YES));
+ w.AddDocument(doc);
+ }
+
+ DirectoryReader reader = w.Reader;
+ if (VERBOSE)
+ {
+ for (int docID = 0; docID < reader.MaxDoc; docID++)
+ {
+ Document doc = reader.Document(docID);
+ Console.WriteLine("docID=" + docID + " id=" + doc.Get("id") + " content=" + doc.Get("content") + " author=" + doc.Get("author") + " publisher=" + doc.Get("publisher"));
+ }
+ }
+
+ w.Dispose();
+ return new IndexContext(dir, reader, dvType.GetValueOrDefault(), searchTermToGroupCounts, contentStrings.ToArray(/*new String[contentStrings.size()]*/));
+ }
+
+ internal class IndexContext
+ {
+
+ internal readonly Directory directory;
+ internal readonly DirectoryReader indexReader;
+ internal readonly FieldInfo.DocValuesType_e dvType;
+ internal readonly IDictionary<string, IDictionary<string, ISet<string>>> searchTermToGroupCounts;
+ internal readonly string[] contentStrings;
+
+ internal IndexContext(Directory directory, DirectoryReader indexReader, FieldInfo.DocValuesType_e dvType,
+ IDictionary<string, IDictionary<string, ISet<string>>> searchTermToGroupCounts, string[] contentStrings)
+ {
+ this.directory = directory;
+ this.indexReader = indexReader;
+ this.dvType = dvType;
+ this.searchTermToGroupCounts = searchTermToGroupCounts;
+ this.contentStrings = contentStrings;
+ }
+ }
+
+ internal class NullComparator : IComparer<IComparable>
+ {
+
+ public int Compare(IComparable a, IComparable b)
+ {
+ if (a == b)
+ {
+ return 0;
+ }
+ else if (a == null)
+ {
+ return -1;
+ }
+ else if (b == null)
+ {
+ return 1;
+ }
+ else
+ {
+ return a.CompareTo(b);
+ }
+ }
+
+ }
+ }
+}