You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by sy...@apache.org on 2014/11/14 12:59:22 UTC
[07/26] lucenenet git commit: first commit of facet porting,
failing tests will be fixed in next commits.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/982eaf60/src/Lucene.Net.Tests/core/Facet/Taxonomy/WriterCache/TestCompactLabelToOrdinal.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests/core/Facet/Taxonomy/WriterCache/TestCompactLabelToOrdinal.cs b/src/Lucene.Net.Tests/core/Facet/Taxonomy/WriterCache/TestCompactLabelToOrdinal.cs
new file mode 100644
index 0000000..549bf09
--- /dev/null
+++ b/src/Lucene.Net.Tests/core/Facet/Taxonomy/WriterCache/TestCompactLabelToOrdinal.cs
@@ -0,0 +1,151 @@
+using System;
+using System.Collections.Generic;
+using Lucene.Net.Support;
+using NUnit.Framework;
+
+namespace Lucene.Net.Facet.Taxonomy.WriterCache
+{
+
+
+ using TestUtil = Lucene.Net.Util.TestUtil;
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ [TestFixture]
+ public class TestCompactLabelToOrdinal : FacetTestCase
+ {
+ /* not finished to porting yet because of missing decoder implementation */
+ /*
+ public virtual void TestL2O()
+ {
+ LabelToOrdinal map = new LabelToOrdinalMap();
+
+ CompactLabelToOrdinal compact = new CompactLabelToOrdinal(2000000, 0.15f, 3);
+
+ int n = AtLeast(10 * 1000);
+ const int numUniqueValues = 50 * 1000;
+
+ string[] uniqueValues = new string[numUniqueValues];
+ byte[] buffer = new byte[50];
+
+ Random random = Random();
+ for (int i = 0; i < numUniqueValues; )
+ {
+ random.NextBytes(buffer);
+ int size = 1 + random.Next(buffer.Length);
+
+ // This test is turning random bytes into a string,
+ // this is asking for trouble.
+ CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder().onUnmappableCharacter(CodingErrorAction.REPLACE).onMalformedInput(CodingErrorAction.REPLACE);
+ uniqueValues[i] = decoder.decode(ByteBuffer.Wrap(buffer, 0, size)).ToString();
+ // we cannot have empty path components, so eliminate all prefix as well
+ // as middle consecutive delimiter chars.
+ uniqueValues[i] = uniqueValues[i].replaceAll("/+", "/");
+ if (uniqueValues[i].StartsWith("/", StringComparison.Ordinal))
+ {
+ uniqueValues[i] = uniqueValues[i].Substring(1);
+ }
+ if (uniqueValues[i].IndexOf(CompactLabelToOrdinal.TERMINATOR_CHAR) == -1)
+ {
+ i++;
+ }
+ }
+
+ var tmpDir = CreateTempDir("testLableToOrdinal");
+ var f = new File(tmpDir, "CompactLabelToOrdinalTest.tmp");
+ int flushInterval = 10;
+
+ for (int i = 0; i < n; i++)
+ {
+ if (i > 0 && i % flushInterval == 0)
+ {
+ compact.Flush(f);
+ compact = CompactLabelToOrdinal.open(f, 0.15f, 3);
+ Assert.True(f.delete());
+ if (flushInterval < (n / 10))
+ {
+ flushInterval *= 10;
+ }
+ }
+
+ int index = random.Next(numUniqueValues);
+ FacetLabel label;
+ string s = uniqueValues[index];
+ if (s.Length == 0)
+ {
+ label = new FacetLabel();
+ }
+ else
+ {
+ label = new FacetLabel(s.Split("/".ToCharArray(), StringSplitOptions.RemoveEmptyEntries));
+ }
+
+ int ord1 = map.GetOrdinal(label);
+ int ord2 = compact.GetOrdinal(label);
+
+ Assert.AreEqual(ord1, ord2);
+
+ if (ord1 == LabelToOrdinal.INVALID_ORDINAL)
+ {
+ ord1 = compact.NextOrdinal;
+ map.AddLabel(label, ord1);
+ compact.AddLabel(label, ord1);
+ }
+ }
+
+ for (int i = 0; i < numUniqueValues; i++)
+ {
+ FacetLabel label;
+ string s = uniqueValues[i];
+ if (s.Length == 0)
+ {
+ label = new FacetLabel();
+ }
+ else
+ {
+ label = new FacetLabel(s.Split("/".ToCharArray(), StringSplitOptions.RemoveEmptyEntries));
+ }
+ int ord1 = map.GetOrdinal(label);
+ int ord2 = compact.GetOrdinal(label);
+ Assert.AreEqual(ord1, ord2);
+ }
+ }
+
+ private class LabelToOrdinalMap : LabelToOrdinal
+ {
+ internal IDictionary<FacetLabel, int?> map = new Dictionary<FacetLabel, int?>();
+
+ internal LabelToOrdinalMap()
+ {
+ }
+
+ public override void AddLabel(FacetLabel label, int ordinal)
+ {
+ map[label] = ordinal;
+ }
+
+ public override int GetOrdinal(FacetLabel label)
+ {
+ int? value = map[label];
+ return (value != null) ? (int)value : LabelToOrdinal.INVALID_ORDINAL;
+ }
+
+ } */
+
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/982eaf60/src/Lucene.Net.Tests/core/Facet/TestDrillDownQuery.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests/core/Facet/TestDrillDownQuery.cs b/src/Lucene.Net.Tests/core/Facet/TestDrillDownQuery.cs
new file mode 100644
index 0000000..1947b8b
--- /dev/null
+++ b/src/Lucene.Net.Tests/core/Facet/TestDrillDownQuery.cs
@@ -0,0 +1,282 @@
+using System;
+using System.Diagnostics;
+using Lucene.Net.Randomized.Generators;
+using NUnit.Framework;
+
+namespace Lucene.Net.Facet
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using MockAnalyzer = Lucene.Net.Analysis.MockAnalyzer;
+ using MockTokenizer = Lucene.Net.Analysis.MockTokenizer;
+ using Document = Lucene.Net.Documents.Document;
+ using Field = Lucene.Net.Documents.Field;
+ using TextField = Lucene.Net.Documents.TextField;
+ using TaxonomyWriter = Lucene.Net.Facet.Taxonomy.TaxonomyWriter;
+ using DirectoryTaxonomyReader = Lucene.Net.Facet.Taxonomy.Directory.DirectoryTaxonomyReader;
+ using DirectoryTaxonomyWriter = Lucene.Net.Facet.Taxonomy.Directory.DirectoryTaxonomyWriter;
+ using IndexReader = Lucene.Net.Index.IndexReader;
+ using RandomIndexWriter = Lucene.Net.Index.RandomIndexWriter;
+ using Term = Lucene.Net.Index.Term;
+ using IndexSearcher = Lucene.Net.Search.IndexSearcher;
+ using MatchAllDocsQuery = Lucene.Net.Search.MatchAllDocsQuery;
+ using Query = Lucene.Net.Search.Query;
+ using QueryUtils = Lucene.Net.Search.QueryUtils;
+ using ScoreDoc = Lucene.Net.Search.ScoreDoc;
+ using TermQuery = Lucene.Net.Search.TermQuery;
+ using TopDocs = Lucene.Net.Search.TopDocs;
+ using Directory = Lucene.Net.Store.Directory;
+ using IOUtils = Lucene.Net.Util.IOUtils;
+
+ [TestFixture]
+ public class TestDrillDownQuery : FacetTestCase
+ {
+
+ private static IndexReader reader;
+ private static DirectoryTaxonomyReader taxo;
+ private static Directory dir;
+ private static Directory taxoDir;
+ private static FacetsConfig config;
+
+
+ [TestFixtureTearDown]
+ public static void AfterClassDrillDownQueryTest()
+ {
+ IOUtils.Close(reader, taxo, dir, taxoDir);
+ reader = null;
+ taxo = null;
+ dir = null;
+ taxoDir = null;
+ config = null;
+ }
+
+ [TestFixtureSetUp]
+ public static void BeforeClassDrillDownQueryTest()
+ {
+ dir = NewDirectory();
+ Random r = Random();
+ RandomIndexWriter writer = new RandomIndexWriter(r, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(r, MockTokenizer.KEYWORD, false)));
+
+ taxoDir = NewDirectory();
+ TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
+ config = new FacetsConfig();
+
+ // Randomize the per-dim config:
+ config.SetHierarchical("a", Random().NextBoolean());
+ config.SetMultiValued("a", Random().NextBoolean());
+ if (Random().NextBoolean())
+ {
+ config.SetIndexFieldName("a", "$a");
+ }
+ config.SetRequireDimCount("a", true);
+
+ config.SetHierarchical("b", Random().NextBoolean());
+ config.SetMultiValued("b", Random().NextBoolean());
+ if (Random().NextBoolean())
+ {
+ config.SetIndexFieldName("b", "$b");
+ }
+ config.SetRequireDimCount("b", true);
+
+ for (int i = 0; i < 100; i++)
+ {
+ Document doc = new Document();
+ if (i % 2 == 0) // 50
+ {
+ doc.Add(new TextField("content", "foo", Field.Store.NO));
+ }
+ if (i % 3 == 0) // 33
+ {
+ doc.Add(new TextField("content", "bar", Field.Store.NO));
+ }
+ if (i % 4 == 0) // 25
+ {
+ if (r.NextBoolean())
+ {
+ doc.Add(new FacetField("a", "1"));
+ }
+ else
+ {
+ doc.Add(new FacetField("a", "2"));
+ }
+ }
+ if (i % 5 == 0) // 20
+ {
+ doc.Add(new FacetField("b", "1"));
+ }
+ writer.AddDocument(config.Build(taxoWriter, doc));
+ }
+
+ taxoWriter.Dispose();
+ reader = writer.Reader;
+ writer.Dispose();
+
+ taxo = new DirectoryTaxonomyReader(taxoDir);
+ }
+
+ [Test]
+ public virtual void TestAndOrs()
+ {
+ IndexSearcher searcher = NewSearcher(reader);
+
+ // test (a/1 OR a/2) AND b/1
+ DrillDownQuery q = new DrillDownQuery(config);
+ q.Add("a", "1");
+ q.Add("a", "2");
+ q.Add("b", "1");
+ TopDocs docs = searcher.Search(q, 100);
+ Assert.AreEqual(5, docs.TotalHits);
+ }
+
+ [Test]
+ public virtual void TestQuery()
+ {
+ IndexSearcher searcher = NewSearcher(reader);
+
+ // Making sure the query yields 25 documents with the facet "a"
+ DrillDownQuery q = new DrillDownQuery(config);
+ q.Add("a");
+ QueryUtils.Check(q);
+ TopDocs docs = searcher.Search(q, 100);
+ Assert.AreEqual(25, docs.TotalHits);
+
+ // Making sure the query yields 5 documents with the facet "b" and the
+ // previous (facet "a") query as a base query
+ DrillDownQuery q2 = new DrillDownQuery(config, q);
+ q2.Add("b");
+ docs = searcher.Search(q2, 100);
+ Assert.AreEqual(5, docs.TotalHits);
+
+ // Making sure that a query of both facet "a" and facet "b" yields 5 results
+ DrillDownQuery q3 = new DrillDownQuery(config);
+ q3.Add("a");
+ q3.Add("b");
+ docs = searcher.Search(q3, 100);
+
+ Assert.AreEqual(5, docs.TotalHits);
+ // Check that content:foo (which yields 50% results) and facet/b (which yields 20%)
+ // would gather together 10 results (10%..)
+ Query fooQuery = new TermQuery(new Term("content", "foo"));
+ DrillDownQuery q4 = new DrillDownQuery(config, fooQuery);
+ q4.Add("b");
+ docs = searcher.Search(q4, 100);
+ Assert.AreEqual(10, docs.TotalHits);
+ }
+
+ [Test]
+ public virtual void TestQueryImplicitDefaultParams()
+ {
+ IndexSearcher searcher = NewSearcher(reader);
+
+ // Create the base query to start with
+ DrillDownQuery q = new DrillDownQuery(config);
+ q.Add("a");
+
+ // Making sure the query yields 5 documents with the facet "b" and the
+ // previous (facet "a") query as a base query
+ DrillDownQuery q2 = new DrillDownQuery(config, q);
+ q2.Add("b");
+ TopDocs docs = searcher.Search(q2, 100);
+ Assert.AreEqual(5, docs.TotalHits);
+
+ // Check that content:foo (which yields 50% results) and facet/b (which yields 20%)
+ // would gather together 10 results (10%..)
+ Query fooQuery = new TermQuery(new Term("content", "foo"));
+ DrillDownQuery q4 = new DrillDownQuery(config, fooQuery);
+ q4.Add("b");
+ docs = searcher.Search(q4, 100);
+ Assert.AreEqual(10, docs.TotalHits);
+ }
+
+ [Test]
+ public virtual void TestScoring()
+ {
+ // verify that drill-down queries do not modify scores
+ IndexSearcher searcher = NewSearcher(reader);
+
+ float[] scores = new float[reader.MaxDoc];
+
+ Query q = new TermQuery(new Term("content", "foo"));
+ TopDocs docs = searcher.Search(q, reader.MaxDoc); // fetch all available docs to this query
+ foreach (ScoreDoc sd in docs.ScoreDocs)
+ {
+ scores[sd.Doc] = sd.Score;
+ }
+
+ // create a drill-down query with category "a", scores should not change
+ DrillDownQuery q2 = new DrillDownQuery(config, q);
+ q2.Add("a");
+ docs = searcher.Search(q2, reader.MaxDoc); // fetch all available docs to this query
+ foreach (ScoreDoc sd in docs.ScoreDocs)
+ {
+ Assert.AreEqual(scores[sd.Doc], sd.Score, 0f, "score of doc=" + sd.Doc + " modified");
+ }
+ }
+
+ [Test]
+ public virtual void TestScoringNoBaseQuery()
+ {
+ // verify that drill-down queries (with no base query) returns 0.0 score
+ IndexSearcher searcher = NewSearcher(reader);
+
+ DrillDownQuery q = new DrillDownQuery(config);
+ q.Add("a");
+ TopDocs docs = searcher.Search(q, reader.MaxDoc); // fetch all available docs to this query
+ foreach (ScoreDoc sd in docs.ScoreDocs)
+ {
+ Assert.AreEqual(0f, sd.Score, 0f);
+ }
+ }
+
+ [Test]
+ public virtual void TestTermNonDefault()
+ {
+ string aField = config.GetDimConfig("a").indexFieldName;
+ Term termA = DrillDownQuery.Term(aField, "a");
+ Assert.AreEqual(new Term(aField, "a"), termA);
+
+ string bField = config.GetDimConfig("b").indexFieldName;
+ Term termB = DrillDownQuery.Term(bField, "b");
+ Assert.AreEqual(new Term(bField, "b"), termB);
+ }
+
+ [Test]
+ public virtual void TestClone()
+ {
+ var q = new DrillDownQuery(config, new MatchAllDocsQuery());
+ q.Add("a");
+
+ var clone = q.Clone() as DrillDownQuery;
+ Assert.NotNull(clone);
+ clone.Add("b");
+ Assert.False(q.ToString().Equals(clone.ToString()), "query wasn't cloned: source=" + q + " clone=" + clone);
+ }
+
+ [Test]
+ public virtual void TestNoDrillDown()
+ {
+ Query @base = new MatchAllDocsQuery();
+ DrillDownQuery q = new DrillDownQuery(config, @base);
+ Query rewrite = q.Rewrite(reader).Rewrite(reader);
+ Assert.AreSame(@base, rewrite);
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/982eaf60/src/Lucene.Net.Tests/core/Facet/TestDrillSideways.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests/core/Facet/TestDrillSideways.cs b/src/Lucene.Net.Tests/core/Facet/TestDrillSideways.cs
new file mode 100644
index 0000000..14b3e12
--- /dev/null
+++ b/src/Lucene.Net.Tests/core/Facet/TestDrillSideways.cs
@@ -0,0 +1,1332 @@
+using System;
+using System.Diagnostics;
+using System.Collections.Generic;
+using System.Linq;
+using Lucene.Net.Randomized.Generators;
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Facet
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using MockAnalyzer = Lucene.Net.Analysis.MockAnalyzer;
+ using Document = Lucene.Net.Documents.Document;
+ using Field = Lucene.Net.Documents.Field;
+ using StringField = Lucene.Net.Documents.StringField;
+ using DrillSidewaysResult = Lucene.Net.Facet.DrillSideways.DrillSidewaysResult;
+ using DefaultSortedSetDocValuesReaderState = Lucene.Net.Facet.SortedSet.DefaultSortedSetDocValuesReaderState;
+ using SortedSetDocValuesFacetField = Lucene.Net.Facet.SortedSet.SortedSetDocValuesFacetField;
+ using SortedSetDocValuesReaderState = Lucene.Net.Facet.SortedSet.SortedSetDocValuesReaderState;
+ using TaxonomyReader = Lucene.Net.Facet.Taxonomy.TaxonomyReader;
+ using DirectoryTaxonomyReader = Lucene.Net.Facet.Taxonomy.Directory.DirectoryTaxonomyReader;
+ using DirectoryTaxonomyWriter = Lucene.Net.Facet.Taxonomy.Directory.DirectoryTaxonomyWriter;
+ using AtomicReaderContext = Lucene.Net.Index.AtomicReaderContext;
+ using IndexReader = Lucene.Net.Index.IndexReader;
+ using IndexWriterConfig = Lucene.Net.Index.IndexWriterConfig;
+ using RandomIndexWriter = Lucene.Net.Index.RandomIndexWriter;
+ using Term = Lucene.Net.Index.Term;
+ using Collector = Lucene.Net.Search.Collector;
+ using DocIdSet = Lucene.Net.Search.DocIdSet;
+ using Filter = Lucene.Net.Search.Filter;
+ using IndexSearcher = Lucene.Net.Search.IndexSearcher;
+ using MatchAllDocsQuery = Lucene.Net.Search.MatchAllDocsQuery;
+ using Query = Lucene.Net.Search.Query;
+ using ScoreDoc = Lucene.Net.Search.ScoreDoc;
+ using Scorer = Lucene.Net.Search.Scorer;
+ using Sort = Lucene.Net.Search.Sort;
+ using SortField = Lucene.Net.Search.SortField;
+ using TermQuery = Lucene.Net.Search.TermQuery;
+ using TopDocs = Lucene.Net.Search.TopDocs;
+ using Directory = Lucene.Net.Store.Directory;
+ using Bits = Lucene.Net.Util.Bits;
+ using BytesRef = Lucene.Net.Util.BytesRef;
+ using FixedBitSet = Lucene.Net.Util.FixedBitSet;
+ using IOUtils = Lucene.Net.Util.IOUtils;
+ using InPlaceMergeSorter = Lucene.Net.Util.InPlaceMergeSorter;
+ using InfoStream = Lucene.Net.Util.InfoStream;
+ using TestUtil = Lucene.Net.Util.TestUtil;
+ using NUnit.Framework;
+
+ [TestFixture]
+ public class TestDrillSideways : FacetTestCase
+ {
+
+ [Test]
+ public virtual void TestBasic()
+ {
+ Directory dir = NewDirectory();
+ Directory taxoDir = NewDirectory();
+
+ // Writes facet ords to a separate directory from the
+ // main index:
+ var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE);
+
+ FacetsConfig config = new FacetsConfig();
+ config.SetHierarchical("Publish Date", true);
+
+ RandomIndexWriter writer = new RandomIndexWriter(Random(), dir);
+
+ Document doc = new Document();
+ doc.Add(new FacetField("Author", "Bob"));
+ doc.Add(new FacetField("Publish Date", "2010", "10", "15"));
+ writer.AddDocument(config.Build(taxoWriter, doc));
+
+ doc = new Document();
+ doc.Add(new FacetField("Author", "Lisa"));
+ doc.Add(new FacetField("Publish Date", "2010", "10", "20"));
+ writer.AddDocument(config.Build(taxoWriter, doc));
+
+ doc = new Document();
+ doc.Add(new FacetField("Author", "Lisa"));
+ doc.Add(new FacetField("Publish Date", "2012", "1", "1"));
+ writer.AddDocument(config.Build(taxoWriter, doc));
+
+ doc = new Document();
+ doc.Add(new FacetField("Author", "Susan"));
+ doc.Add(new FacetField("Publish Date", "2012", "1", "7"));
+ writer.AddDocument(config.Build(taxoWriter, doc));
+
+ doc = new Document();
+ doc.Add(new FacetField("Author", "Frank"));
+ doc.Add(new FacetField("Publish Date", "1999", "5", "5"));
+ writer.AddDocument(config.Build(taxoWriter, doc));
+
+ // NRT open
+ IndexSearcher searcher = NewSearcher(writer.Reader);
+
+ //System.out.println("searcher=" + searcher);
+
+ // NRT open
+ var taxoReader = new DirectoryTaxonomyReader(taxoWriter);
+
+ DrillSideways ds = new DrillSideways(searcher, config, taxoReader);
+
+ // case: drill-down on a single field; in this
+ // case the drill-sideways + drill-down counts ==
+ // drill-down of just the query:
+ DrillDownQuery ddq = new DrillDownQuery(config);
+ ddq.Add("Author", "Lisa");
+ DrillSidewaysResult r = ds.Search(null, ddq, 10);
+ Assert.AreEqual(2, r.Hits.TotalHits);
+ // Publish Date is only drill-down, and Lisa published
+ // one in 2012 and one in 2010:
+ Assert.AreEqual("dim=Publish Date path=[] value=2 childCount=2\n 2010 (1)\n 2012 (1)\n", r.Facets.GetTopChildren(10, "Publish Date").ToString());
+
+ // Author is drill-sideways + drill-down: Lisa
+ // (drill-down) published twice, and Frank/Susan/Bob
+ // published once:
+ Assert.AreEqual("dim=Author path=[] value=5 childCount=4\n Lisa (2)\n Bob (1)\n Susan (1)\n Frank (1)\n", r.Facets.GetTopChildren(10, "Author").ToString());
+
+ // Same simple case, but no baseQuery (pure browse):
+ // drill-down on a single field; in this case the
+ // drill-sideways + drill-down counts == drill-down of
+ // just the query:
+ ddq = new DrillDownQuery(config);
+ ddq.Add("Author", "Lisa");
+ r = ds.Search(null, ddq, 10);
+
+ Assert.AreEqual(2, r.Hits.TotalHits);
+ // Publish Date is only drill-down, and Lisa published
+ // one in 2012 and one in 2010:
+ Assert.AreEqual("dim=Publish Date path=[] value=2 childCount=2\n 2010 (1)\n 2012 (1)\n", r.Facets.GetTopChildren(10, "Publish Date").ToString());
+
+ // Author is drill-sideways + drill-down: Lisa
+ // (drill-down) published twice, and Frank/Susan/Bob
+ // published once:
+ Assert.AreEqual("dim=Author path=[] value=5 childCount=4\n Lisa (2)\n Bob (1)\n Susan (1)\n Frank (1)\n", r.Facets.GetTopChildren(10, "Author").ToString());
+
+ // Another simple case: drill-down on single fields
+ // but OR of two values
+ ddq = new DrillDownQuery(config);
+ ddq.Add("Author", "Lisa");
+ ddq.Add("Author", "Bob");
+ r = ds.Search(null, ddq, 10);
+ Assert.AreEqual(3, r.Hits.TotalHits);
+ // Publish Date is only drill-down: Lisa and Bob
+ // (drill-down) published twice in 2010 and once in 2012:
+ Assert.AreEqual("dim=Publish Date path=[] value=3 childCount=2\n 2010 (2)\n 2012 (1)\n", r.Facets.GetTopChildren(10, "Publish Date").ToString());
+ // Author is drill-sideways + drill-down: Lisa
+ // (drill-down) published twice, and Frank/Susan/Bob
+ // published once:
+ Assert.AreEqual("dim=Author path=[] value=5 childCount=4\n Lisa (2)\n Bob (1)\n Susan (1)\n Frank (1)\n", r.Facets.GetTopChildren(10, "Author").ToString());
+
+ Assert.True(r.Facets is MultiFacets);
+ IList<FacetResult> allResults = r.Facets.GetAllDims(10);
+ Assert.AreEqual(2, allResults.Count);
+ Assert.AreEqual("dim=Author path=[] value=5 childCount=4\n Lisa (2)\n Bob (1)\n Susan (1)\n Frank (1)\n", allResults[0].ToString());
+ Assert.AreEqual("dim=Publish Date path=[] value=3 childCount=2\n 2010 (2)\n 2012 (1)\n", allResults[1].ToString());
+
+ // More interesting case: drill-down on two fields
+ ddq = new DrillDownQuery(config);
+ ddq.Add("Author", "Lisa");
+ ddq.Add("Publish Date", "2010");
+ r = ds.Search(null, ddq, 10);
+ Assert.AreEqual(1, r.Hits.TotalHits);
+ // Publish Date is drill-sideways + drill-down: Lisa
+ // (drill-down) published once in 2010 and once in 2012:
+ Assert.AreEqual("dim=Publish Date path=[] value=2 childCount=2\n 2010 (1)\n 2012 (1)\n", r.Facets.GetTopChildren(10, "Publish Date").ToString());
+ // Author is drill-sideways + drill-down:
+ // only Lisa & Bob published (once each) in 2010:
+ Assert.AreEqual("dim=Author path=[] value=2 childCount=2\n Bob (1)\n Lisa (1)\n", r.Facets.GetTopChildren(10, "Author").ToString());
+
+ // Even more interesting case: drill down on two fields,
+ // but one of them is OR
+ ddq = new DrillDownQuery(config);
+
+ // Drill down on Lisa or Bob:
+ ddq.Add("Author", "Lisa");
+ ddq.Add("Publish Date", "2010");
+ ddq.Add("Author", "Bob");
+ r = ds.Search(null, ddq, 10);
+ Assert.AreEqual(2, r.Hits.TotalHits);
+ // Publish Date is both drill-sideways + drill-down:
+ // Lisa or Bob published twice in 2010 and once in 2012:
+ Assert.AreEqual("dim=Publish Date path=[] value=3 childCount=2\n 2010 (2)\n 2012 (1)\n", r.Facets.GetTopChildren(10, "Publish Date").ToString());
+ // Author is drill-sideways + drill-down:
+ // only Lisa & Bob published (once each) in 2010:
+ Assert.AreEqual("dim=Author path=[] value=2 childCount=2\n Bob (1)\n Lisa (1)\n", r.Facets.GetTopChildren(10, "Author").ToString());
+
+ // Test drilling down on invalid field:
+ ddq = new DrillDownQuery(config);
+ ddq.Add("Foobar", "Baz");
+ r = ds.Search(null, ddq, 10);
+ Assert.AreEqual(0, r.Hits.TotalHits);
+ Assert.Null(r.Facets.GetTopChildren(10, "Publish Date"));
+ Assert.Null(r.Facets.GetTopChildren(10, "Foobar"));
+
+ // Test drilling down on valid term or'd with invalid term:
+ ddq = new DrillDownQuery(config);
+ ddq.Add("Author", "Lisa");
+ ddq.Add("Author", "Tom");
+ r = ds.Search(null, ddq, 10);
+ Assert.AreEqual(2, r.Hits.TotalHits);
+ // Publish Date is only drill-down, and Lisa published
+ // one in 2012 and one in 2010:
+ Assert.AreEqual("dim=Publish Date path=[] value=2 childCount=2\n 2010 (1)\n 2012 (1)\n", r.Facets.GetTopChildren(10, "Publish Date").ToString());
+ // Author is drill-sideways + drill-down: Lisa
+ // (drill-down) published twice, and Frank/Susan/Bob
+ // published once:
+ Assert.AreEqual("dim=Author path=[] value=5 childCount=4\n Lisa (2)\n Bob (1)\n Susan (1)\n Frank (1)\n", r.Facets.GetTopChildren(10, "Author").ToString());
+
+ // LUCENE-4915: test drilling down on a dimension but
+ // NOT facet counting it:
+ ddq = new DrillDownQuery(config);
+ ddq.Add("Author", "Lisa");
+ ddq.Add("Author", "Tom");
+ r = ds.Search(null, ddq, 10);
+ Assert.AreEqual(2, r.Hits.TotalHits);
+ // Publish Date is only drill-down, and Lisa published
+ // one in 2012 and one in 2010:
+ Assert.AreEqual("dim=Publish Date path=[] value=2 childCount=2\n 2010 (1)\n 2012 (1)\n", r.Facets.GetTopChildren(10, "Publish Date").ToString());
+
+ // Test main query gets null scorer:
+ ddq = new DrillDownQuery(config, new TermQuery(new Term("foobar", "baz")));
+ ddq.Add("Author", "Lisa");
+ r = ds.Search(null, ddq, 10);
+
+ Assert.AreEqual(0, r.Hits.TotalHits);
+ Assert.Null(r.Facets.GetTopChildren(10, "Publish Date"));
+ Assert.Null(r.Facets.GetTopChildren(10, "Author"));
+ IOUtils.Close(searcher.IndexReader, taxoReader, writer, taxoWriter, dir, taxoDir);
+ }
+
+ [Test]
+ public virtual void TestSometimesInvalidDrillDown()
+ {
+ Directory dir = NewDirectory();
+ Directory taxoDir = NewDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(Random(), dir);
+
+ // Writes facet ords to a separate directory from the
+ // main index:
+ DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE);
+
+ FacetsConfig config = new FacetsConfig();
+ config.SetHierarchical("Publish Date", true);
+
+ Document doc = new Document();
+ doc.Add(new FacetField("Author", "Bob"));
+ doc.Add(new FacetField("Publish Date", "2010", "10", "15"));
+ writer.AddDocument(config.Build(taxoWriter, doc));
+
+ doc = new Document();
+ doc.Add(new FacetField("Author", "Lisa"));
+ doc.Add(new FacetField("Publish Date", "2010", "10", "20"));
+ writer.AddDocument(config.Build(taxoWriter, doc));
+
+ writer.Commit();
+
+ // 2nd segment has no Author:
+ doc = new Document();
+ doc.Add(new FacetField("Foobar", "Lisa"));
+ doc.Add(new FacetField("Publish Date", "2012", "1", "1"));
+ writer.AddDocument(config.Build(taxoWriter, doc));
+
+ // NRT open
+ IndexSearcher searcher = NewSearcher(writer.Reader);
+
+ //System.out.println("searcher=" + searcher);
+
+ // NRT open
+ var taxoReader = new DirectoryTaxonomyReader(taxoWriter);
+
+ DrillDownQuery ddq = new DrillDownQuery(config);
+ ddq.Add("Author", "Lisa");
+ DrillSidewaysResult r = (new DrillSideways(searcher, config, taxoReader)).Search(null, ddq, 10);
+
+ Assert.AreEqual(1, r.Hits.TotalHits);
+ // Publish Date is only drill-down, and Lisa published
+ // one in 2012 and one in 2010:
+ Assert.AreEqual("dim=Publish Date path=[] value=1 childCount=1\n 2010 (1)\n", r.Facets.GetTopChildren(10, "Publish Date").ToString());
+ // Author is drill-sideways + drill-down: Lisa
+ // (drill-down) published once, and Bob
+ // published once:
+ Assert.AreEqual("dim=Author path=[] value=2 childCount=2\n Bob (1)\n Lisa (1)\n", r.Facets.GetTopChildren(10, "Author").ToString());
+
+ IOUtils.Close(searcher.IndexReader, taxoReader, writer, taxoWriter, dir, taxoDir);
+ }
+
+ [Test]
+ public virtual void TestMultipleRequestsPerDim()
+ {
+ Directory dir = NewDirectory();
+ Directory taxoDir = NewDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(Random(), dir);
+
+ // Writes facet ords to a separate directory from the
+ // main index:
+ DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE);
+
+ FacetsConfig config = new FacetsConfig();
+ config.SetHierarchical("dim", true);
+
+ Document doc = new Document();
+ doc.Add(new FacetField("dim", "a", "x"));
+ writer.AddDocument(config.Build(taxoWriter, doc));
+
+ doc = new Document();
+ doc.Add(new FacetField("dim", "a", "y"));
+ writer.AddDocument(config.Build(taxoWriter, doc));
+
+ doc = new Document();
+ doc.Add(new FacetField("dim", "a", "z"));
+ writer.AddDocument(config.Build(taxoWriter, doc));
+
+ doc = new Document();
+ doc.Add(new FacetField("dim", "b"));
+ writer.AddDocument(config.Build(taxoWriter, doc));
+
+ doc = new Document();
+ doc.Add(new FacetField("dim", "c"));
+ writer.AddDocument(config.Build(taxoWriter, doc));
+
+ doc = new Document();
+ doc.Add(new FacetField("dim", "d"));
+ writer.AddDocument(config.Build(taxoWriter, doc));
+
+ // NRT open
+ IndexSearcher searcher = NewSearcher(writer.Reader);
+
+ //System.out.println("searcher=" + searcher);
+
+ // NRT open
+ var taxoReader = new DirectoryTaxonomyReader(taxoWriter);
+
+ DrillDownQuery ddq = new DrillDownQuery(config);
+ ddq.Add("dim", "a");
+ DrillSidewaysResult r = (new DrillSideways(searcher, config, taxoReader)).Search(null, ddq, 10);
+
+ Assert.AreEqual(3, r.Hits.TotalHits);
+ Assert.AreEqual("dim=dim path=[] value=6 childCount=4\n a (3)\n b (1)\n c (1)\n d (1)\n", r.Facets.GetTopChildren(10, "dim").ToString());
+ Assert.AreEqual("dim=dim path=[a] value=3 childCount=3\n x (1)\n y (1)\n z (1)\n", r.Facets.GetTopChildren(10, "dim", "a").ToString());
+
+ IOUtils.Close(searcher.IndexReader, taxoReader, writer, taxoWriter, dir, taxoDir);
+ }
+
+ internal class Doc : IComparable<Doc>
+ {
+ internal string id;
+ internal string contentToken;
+
+ public Doc()
+ {
+ }
+
+ // -1 if the doc is missing this dim, else the index
+ // -into the values for this dim:
+ internal int[] dims;
+
+ // 2nd value per dim for the doc (so we test
+ // multi-valued fields):
+ internal int[] dims2;
+ internal bool deleted;
+
+ public virtual int CompareTo(Doc other)
+ {
+ return id.CompareTo(other.id);
+ }
+ }
+
+ private double aChance, bChance, cChance;
+
+ private string randomContentToken(bool isQuery)
+ {
+ double d = Random().NextDouble();
+ if (isQuery)
+ {
+ if (d < 0.33)
+ {
+ return "a";
+ }
+ else if (d < 0.66)
+ {
+ return "b";
+ }
+ else
+ {
+ return "c";
+ }
+ }
+ else
+ {
+ if (d <= aChance)
+ {
+ return "a";
+ }
+ else if (d < aChance + bChance)
+ {
+ return "b";
+ }
+ else
+ {
+ return "c";
+ }
+ }
+ }
+
+ [Test]
+ public virtual void TestRandom()
+ {
+
+ bool canUseDV = DefaultCodecSupportsSortedSet();
+
+ while (aChance == 0.0)
+ {
+ aChance = Random().NextDouble();
+ }
+ while (bChance == 0.0)
+ {
+ bChance = Random().NextDouble();
+ }
+ while (cChance == 0.0)
+ {
+ cChance = Random().NextDouble();
+ }
+ //aChance = .01;
+ //bChance = 0.5;
+ //cChance = 1.0;
+ double sum = aChance + bChance + cChance;
+ aChance /= sum;
+ bChance /= sum;
+ cChance /= sum;
+
+ int numDims = TestUtil.NextInt(Random(), 2, 5);
+ //int numDims = 3;
+ int numDocs = AtLeast(3000);
+ //int numDocs = 20;
+ if (VERBOSE)
+ {
+ Console.WriteLine("numDims=" + numDims + " numDocs=" + numDocs + " aChance=" + aChance + " bChance=" + bChance + " cChance=" + cChance);
+ }
+ string[][] dimValues = new string[numDims][];
+ int valueCount = 2;
+
+ for (int dim = 0; dim < numDims; dim++)
+ {
+ var values = new HashSet<string>();
+ while (values.Count < valueCount)
+ {
+ var str = TestUtil.RandomRealisticUnicodeString(Random());
+ //String s = TestUtil.randomString(Random());
+ if (str.Length > 0)
+ {
+ values.Add(str);
+ }
+ }
+ dimValues[dim] = values.ToArray();
+ valueCount *= 2;
+ }
+
+ IList<Doc> docs = new List<Doc>();
+ for (int i = 0; i < numDocs; i++)
+ {
+ Doc doc = new Doc();
+ doc.id = "" + i;
+ doc.contentToken = randomContentToken(false);
+ doc.dims = new int[numDims];
+ doc.dims2 = new int[numDims];
+ for (int dim = 0; dim < numDims; dim++)
+ {
+ if (Random().Next(5) == 3)
+ {
+ // This doc is missing this dim:
+ doc.dims[dim] = -1;
+ }
+ else if (dimValues[dim].Length <= 4)
+ {
+ int dimUpto = 0;
+ doc.dims[dim] = dimValues[dim].Length - 1;
+ while (dimUpto < dimValues[dim].Length)
+ {
+ if (Random().NextBoolean())
+ {
+ doc.dims[dim] = dimUpto;
+ break;
+ }
+ dimUpto++;
+ }
+ }
+ else
+ {
+ doc.dims[dim] = Random().Next(dimValues[dim].Length);
+ }
+
+ if (Random().Next(5) == 3)
+ {
+ // 2nd value:
+ doc.dims2[dim] = Random().Next(dimValues[dim].Length);
+ }
+ else
+ {
+ doc.dims2[dim] = -1;
+ }
+ }
+ docs.Add(doc);
+ }
+
+ Directory d = NewDirectory();
+ Directory td = NewDirectory();
+
+ IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
+ iwc.SetInfoStream(InfoStream.NO_OUTPUT);
+ var w = new RandomIndexWriter(Random(), d, iwc);
+ var tw = new DirectoryTaxonomyWriter(td, IndexWriterConfig.OpenMode_e.CREATE);
+ FacetsConfig config = new FacetsConfig();
+ for (int i = 0; i < numDims; i++)
+ {
+ config.SetMultiValued("dim" + i, true);
+ }
+
+ bool doUseDV = canUseDV && Random().NextBoolean();
+
+ foreach (Doc rawDoc in docs)
+ {
+ Document doc = new Document();
+ doc.Add(NewStringField("id", rawDoc.id, Field.Store.YES));
+ doc.Add(NewStringField("content", rawDoc.contentToken, Field.Store.NO));
+
+ if (VERBOSE)
+ {
+ Console.WriteLine(" doc id=" + rawDoc.id + " token=" + rawDoc.contentToken);
+ }
+ for (int dim = 0; dim < numDims; dim++)
+ {
+ int dimValue = rawDoc.dims[dim];
+ if (dimValue != -1)
+ {
+ if (doUseDV)
+ {
+ doc.Add(new SortedSetDocValuesFacetField("dim" + dim, dimValues[dim][dimValue]));
+ }
+ else
+ {
+ doc.Add(new FacetField("dim" + dim, dimValues[dim][dimValue]));
+ }
+ doc.Add(new StringField("dim" + dim, dimValues[dim][dimValue], Field.Store.YES));
+ if (VERBOSE)
+ {
+ Console.WriteLine(" dim" + dim + "=" + new BytesRef(dimValues[dim][dimValue]));
+ }
+ }
+ int dimValue2 = rawDoc.dims2[dim];
+ if (dimValue2 != -1)
+ {
+ if (doUseDV)
+ {
+ doc.Add(new SortedSetDocValuesFacetField("dim" + dim, dimValues[dim][dimValue2]));
+ }
+ else
+ {
+ doc.Add(new FacetField("dim" + dim, dimValues[dim][dimValue2]));
+ }
+ doc.Add(new StringField("dim" + dim, dimValues[dim][dimValue2], Field.Store.YES));
+ if (VERBOSE)
+ {
+ Console.WriteLine(" dim" + dim + "=" + new BytesRef(dimValues[dim][dimValue2]));
+ }
+ }
+ }
+
+ w.AddDocument(config.Build(tw, doc));
+ }
+
+ if (Random().NextBoolean())
+ {
+ // Randomly delete a few docs:
+ int numDel = TestUtil.NextInt(Random(), 1, (int)(numDocs * 0.05));
+ if (VERBOSE)
+ {
+ Console.WriteLine("delete " + numDel);
+ }
+ int delCount = 0;
+ while (delCount < numDel)
+ {
+ Doc doc = docs[Random().Next(docs.Count)];
+ if (!doc.deleted)
+ {
+ if (VERBOSE)
+ {
+ Console.WriteLine(" delete id=" + doc.id);
+ }
+ doc.deleted = true;
+ w.DeleteDocuments(new Term("id", doc.id));
+ delCount++;
+ }
+ }
+ }
+
+ if (Random().NextBoolean())
+ {
+ if (VERBOSE)
+ {
+ Console.WriteLine("TEST: forceMerge(1)...");
+ }
+ w.ForceMerge(1);
+ }
+ IndexReader r = w.Reader;
+
+ SortedSetDocValuesReaderState sortedSetDVState;
+ IndexSearcher s = NewSearcher(r);
+
+ if (doUseDV)
+ {
+ sortedSetDVState = new DefaultSortedSetDocValuesReaderState(s.IndexReader);
+ }
+ else
+ {
+ sortedSetDVState = null;
+ }
+
+ if (VERBOSE)
+ {
+ Console.WriteLine("r.numDocs() = " + r.NumDocs);
+ }
+
+ // NRT open
+ var tr = new DirectoryTaxonomyReader(tw);
+
+ int numIters = AtLeast(10);
+
+ for (int iter = 0; iter < numIters; iter++)
+ {
+
+ string contentToken = Random().Next(30) == 17 ? null : randomContentToken(true);
+ int numDrillDown = TestUtil.NextInt(Random(), 1, Math.Min(4, numDims));
+ if (VERBOSE)
+ {
+ Console.WriteLine("\nTEST: iter=" + iter + " baseQuery=" + contentToken + " numDrillDown=" + numDrillDown + " useSortedSetDV=" + doUseDV);
+ }
+
+ string[][] drillDowns = new string[numDims][];
+
+ int count = 0;
+ bool anyMultiValuedDrillDowns = false;
+ while (count < numDrillDown)
+ {
+ int dim = Random().Next(numDims);
+ if (drillDowns[dim] == null)
+ {
+ if (Random().NextBoolean())
+ {
+ // Drill down on one value:
+ drillDowns[dim] = new string[] { dimValues[dim][Random().Next(dimValues[dim].Length)] };
+ }
+ else
+ {
+ int orCount = TestUtil.NextInt(Random(), 1, Math.Min(5, dimValues[dim].Length));
+ drillDowns[dim] = new string[orCount];
+ anyMultiValuedDrillDowns |= orCount > 1;
+ for (int i = 0; i < orCount; i++)
+ {
+ while (true)
+ {
+ string value = dimValues[dim][Random().Next(dimValues[dim].Length)];
+ for (int j = 0; j < i; j++)
+ {
+ if (value.Equals(drillDowns[dim][j]))
+ {
+ value = null;
+ break;
+ }
+ }
+ if (value != null)
+ {
+ drillDowns[dim][i] = value;
+ break;
+ }
+ }
+ }
+ }
+ if (VERBOSE)
+ {
+ BytesRef[] values = new BytesRef[drillDowns[dim].Length];
+ for (int i = 0; i < values.Length; i++)
+ {
+ values[i] = new BytesRef(drillDowns[dim][i]);
+ }
+ Console.WriteLine(" dim" + dim + "=" + Arrays.ToString(values));
+ }
+ count++;
+ }
+ }
+
+ Query baseQuery;
+ if (contentToken == null)
+ {
+ baseQuery = new MatchAllDocsQuery();
+ }
+ else
+ {
+ baseQuery = new TermQuery(new Term("content", contentToken));
+ }
+
+ DrillDownQuery ddq = new DrillDownQuery(config, baseQuery);
+
+ for (int dim = 0; dim < numDims; dim++)
+ {
+ if (drillDowns[dim] != null)
+ {
+ foreach (string value in drillDowns[dim])
+ {
+ ddq.Add("dim" + dim, value);
+ }
+ }
+ }
+
+ Filter filter;
+ if (Random().Next(7) == 6)
+ {
+ if (VERBOSE)
+ {
+ Console.WriteLine(" only-even filter");
+ }
+ filter = new FilterAnonymousInnerClassHelper(this);
+ }
+ else
+ {
+ filter = null;
+ }
+
+ // Verify docs are always collected in order. If we
+ // had an AssertingScorer it could catch it when
+ // Weight.scoresDocsOutOfOrder lies!:
+ (new DrillSideways(s, config, tr)).Search(ddq, new CollectorAnonymousInnerClassHelper(this, s));
+
+ // Also separately verify that DS respects the
+ // scoreSubDocsAtOnce method, to ensure that all
+ // subScorers are on the same docID:
+ if (!anyMultiValuedDrillDowns)
+ {
+ // Can only do this test when there are no OR'd
+ // drill-down values, because in that case it's
+ // easily possible for one of the DD terms to be on
+ // a future docID:
+ new DrillSidewaysAnonymousInnerClassHelper(this, s, config, tr)
+ .Search(ddq, new AssertingSubDocsAtOnceCollector());
+ }
+
+ TestFacetResult expected = slowDrillSidewaysSearch(s, docs, contentToken, drillDowns, dimValues, filter);
+
+ Sort sort = new Sort(new SortField("id", SortField.Type_e.STRING));
+ DrillSideways ds;
+ if (doUseDV)
+ {
+ ds = new DrillSideways(s, config, sortedSetDVState);
+ }
+ else
+ {
+ ds = new DrillSidewaysAnonymousInnerClassHelper2(this, s, config, tr, drillDowns);
+ }
+
+ // Retrieve all facets:
+ DrillSidewaysResult actual = ds.Search(ddq, filter, null, numDocs, sort, true, true);
+
+ TopDocs hits = s.Search(baseQuery, numDocs);
+ IDictionary<string, float?> scores = new Dictionary<string, float?>();
+ foreach (ScoreDoc sd in hits.ScoreDocs)
+ {
+ scores[s.Doc(sd.Doc).Get("id")] = sd.Score;
+ }
+ if (VERBOSE)
+ {
+ Console.WriteLine(" verify all facets");
+ }
+ VerifyEquals(dimValues, s, expected, actual, scores, doUseDV);
+
+ // Make sure drill down doesn't change score:
+ TopDocs ddqHits = s.Search(ddq, filter, numDocs);
+ Assert.AreEqual(expected.Hits.Count, ddqHits.TotalHits);
+ for (int i = 0; i < expected.Hits.Count; i++)
+ {
+ // Score should be IDENTICAL:
+ Assert.AreEqual(scores[expected.Hits[i].id], ddqHits.ScoreDocs[i].Score);
+ }
+ }
+
+ IOUtils.Close(r, tr, w, tw, d, td);
+ }
+
+ private class FilterAnonymousInnerClassHelper : Filter
+ {
+ private readonly TestDrillSideways outerInstance;
+
+ public FilterAnonymousInnerClassHelper(TestDrillSideways outerInstance)
+ {
+ this.outerInstance = outerInstance;
+ }
+
+ public override DocIdSet GetDocIdSet(AtomicReaderContext context, Bits acceptDocs)
+ {
+ int maxDoc = context.Reader.MaxDoc;
+ FixedBitSet bits = new FixedBitSet(maxDoc);
+ for (int docID = 0; docID < maxDoc; docID++)
+ {
+ // Keeps only the even ids:
+ if ((acceptDocs == null || acceptDocs.Get(docID)) && (Convert.ToInt32(context.Reader.Document(docID).Get("id")) & 1) == 0)
+ {
+ bits.Set(docID);
+ }
+ }
+ return bits;
+ }
+ }
+
+ private class CollectorAnonymousInnerClassHelper : Collector
+ {
+ private readonly TestDrillSideways outerInstance;
+
+ private IndexSearcher s;
+
+ public CollectorAnonymousInnerClassHelper(TestDrillSideways outerInstance, IndexSearcher s)
+ {
+ this.outerInstance = outerInstance;
+ this.s = s;
+ }
+
+ internal int lastDocID;
+
+ public override Scorer Scorer
+ {
+ set
+ {
+ }
+ }
+
+ public override void Collect(int doc)
+ {
+ Debug.Assert(doc > lastDocID);
+ lastDocID = doc;
+ }
+
+ public override AtomicReaderContext NextReader
+ {
+ set
+ {
+ lastDocID = -1;
+ }
+ }
+
+ public override bool AcceptsDocsOutOfOrder()
+ {
+ return false;
+ }
+ }
+
+ private class DrillSidewaysAnonymousInnerClassHelper : DrillSideways
+ {
+ private readonly TestDrillSideways outerInstance;
+
+ public DrillSidewaysAnonymousInnerClassHelper(TestDrillSideways outerInstance, IndexSearcher s, Lucene.Net.Facet.FacetsConfig config, TaxonomyReader tr)
+ : base(s, config, tr)
+ {
+ this.outerInstance = outerInstance;
+ }
+
+ protected override bool ScoreSubDocsAtOnce()
+ {
+ return true;
+ }
+ }
+
+ private class DrillSidewaysAnonymousInnerClassHelper2 : DrillSideways
+ {
+ private readonly TestDrillSideways outerInstance;
+
+ private new Lucene.Net.Facet.FacetsConfig config;
+ private string[][] drillDowns;
+
+ public DrillSidewaysAnonymousInnerClassHelper2(TestDrillSideways outerInstance, IndexSearcher s, Lucene.Net.Facet.FacetsConfig config, TaxonomyReader tr, string[][] drillDowns)
+ : base(s, config, tr)
+ {
+ this.outerInstance = outerInstance;
+ this.config = config;
+ this.drillDowns = drillDowns;
+ }
+
+ protected override Facets BuildFacetsResult(FacetsCollector drillDowns, FacetsCollector[] drillSideways, string[] drillSidewaysDims)
+ {
+ IDictionary<string, Facets> drillSidewaysFacets = new Dictionary<string, Facets>();
+ Facets drillDownFacets = outerInstance.GetTaxonomyFacetCounts(taxoReader, config, drillDowns);
+ if (drillSideways != null)
+ {
+ for (int i = 0; i < drillSideways.Length; i++)
+ {
+ drillSidewaysFacets[drillSidewaysDims[i]] = outerInstance.GetTaxonomyFacetCounts(taxoReader, config, drillSideways[i]);
+ }
+ }
+
+ if (drillSidewaysFacets.Count == 0)
+ {
+ return drillDownFacets;
+ }
+ else
+ {
+ return new MultiFacets(drillSidewaysFacets, drillDownFacets);
+ }
+
+ }
+ }
+
+ private class Counters
+ {
+ internal int[][] counts;
+
+ public Counters(string[][] dimValues)
+ {
+ counts = new int[dimValues.Length][];
+ for (int dim = 0; dim < dimValues.Length; dim++)
+ {
+ counts[dim] = new int[dimValues[dim].Length];
+ }
+ }
+
+ public virtual void Inc(int[] dims, int[] dims2)
+ {
+ Inc(dims, dims2, -1);
+ }
+
+ public virtual void Inc(int[] dims, int[] dims2, int onlyDim)
+ {
+ Debug.Assert(dims.Length == counts.Length);
+ Debug.Assert(dims2.Length == counts.Length);
+ for (int dim = 0; dim < dims.Length; dim++)
+ {
+ if (onlyDim == -1 || dim == onlyDim)
+ {
+ if (dims[dim] != -1)
+ {
+ counts[dim][dims[dim]]++;
+ }
+ if (dims2[dim] != -1 && dims2[dim] != dims[dim])
+ {
+ counts[dim][dims2[dim]]++;
+ }
+ }
+ }
+ }
+ }
+
+ internal class TestFacetResult
+ {
+ internal IList<Doc> Hits;
+ internal int[][] Counts;
+ internal int[] UniqueCounts;
+ public TestFacetResult()
+ {
+ }
+ }
+
+ private int[] GetTopNOrds(int[] counts, string[] values, int topN)
+ {
+ int[] ids = new int[counts.Length];
+ for (int i = 0; i < ids.Length; i++)
+ {
+ ids[i] = i;
+ }
+
+ // Naive (on purpose, to reduce bug in tester/gold):
+ // sort all ids, then return top N slice:
+ new InPlaceMergeSorterAnonymousInnerClassHelper(this, counts, values, ids).Sort(0, ids.Length);
+
+ if (topN > ids.Length)
+ {
+ topN = ids.Length;
+ }
+
+ int numSet = topN;
+ for (int i = 0; i < topN; i++)
+ {
+ if (counts[ids[i]] == 0)
+ {
+ numSet = i;
+ break;
+ }
+ }
+
+ int[] topNIDs = new int[numSet];
+ Array.Copy(ids, 0, topNIDs, 0, topNIDs.Length);
+ return topNIDs;
+ }
+
+ private class InPlaceMergeSorterAnonymousInnerClassHelper : InPlaceMergeSorter
+ {
+ private readonly TestDrillSideways outerInstance;
+
+ private int[] counts;
+ private string[] values;
+ private int[] ids;
+
+ public InPlaceMergeSorterAnonymousInnerClassHelper(TestDrillSideways outerInstance, int[] counts, string[] values, int[] ids)
+ {
+ this.outerInstance = outerInstance;
+ this.counts = counts;
+ this.values = values;
+ this.ids = ids;
+ }
+
+
+ protected override void Swap(int i, int j)
+ {
+ int id = ids[i];
+ ids[i] = ids[j];
+ ids[j] = id;
+ }
+
+ protected override int Compare(int i, int j)
+ {
+ int counti = counts[ids[i]];
+ int countj = counts[ids[j]];
+ // Sort by count descending...
+ if (counti > countj)
+ {
+ return -1;
+ }
+ else if (counti < countj)
+ {
+ return 1;
+ }
+ else
+ {
+ // ... then by label ascending:
+ return (new BytesRef(values[ids[i]])).CompareTo(new BytesRef(values[ids[j]]));
+ }
+ }
+
+ }
+
+ private TestFacetResult slowDrillSidewaysSearch(IndexSearcher s, IList<Doc> docs, string contentToken, string[][] drillDowns, string[][] dimValues, Filter onlyEven)
+ {
+ int numDims = dimValues.Length;
+
+ IList<Doc> hits = new List<Doc>();
+ Counters drillDownCounts = new Counters(dimValues);
+ Counters[] drillSidewaysCounts = new Counters[dimValues.Length];
+ for (int dim = 0; dim < numDims; dim++)
+ {
+ drillSidewaysCounts[dim] = new Counters(dimValues);
+ }
+
+ if (VERBOSE)
+ {
+ Console.WriteLine(" compute expected");
+ }
+
+ foreach (Doc doc in docs)
+ {
+ if (doc.deleted)
+ {
+ continue;
+ }
+ if (onlyEven != null & (Convert.ToInt32(doc.id) & 1) != 0)
+ {
+ continue;
+ }
+ if (contentToken == null || doc.contentToken.Equals(contentToken))
+ {
+ int failDim = -1;
+ for (int dim = 0; dim < numDims; dim++)
+ {
+ if (drillDowns[dim] != null)
+ {
+ string docValue = doc.dims[dim] == -1 ? null : dimValues[dim][doc.dims[dim]];
+ string docValue2 = doc.dims2[dim] == -1 ? null : dimValues[dim][doc.dims2[dim]];
+ bool matches = false;
+ foreach (string value in drillDowns[dim])
+ {
+ if (value.Equals(docValue) || value.Equals(docValue2))
+ {
+ matches = true;
+ break;
+ }
+ }
+ if (!matches)
+ {
+ if (failDim == -1)
+ {
+ // Doc could be a near-miss, if no other dim fails
+ failDim = dim;
+ }
+ else
+ {
+ // Doc isn't a hit nor a near-miss
+ goto nextDocContinue;
+ }
+ }
+ }
+ }
+
+ if (failDim == -1)
+ {
+ if (VERBOSE)
+ {
+ Console.WriteLine(" exp: id=" + doc.id + " is a hit");
+ }
+ // Hit:
+ hits.Add(doc);
+ drillDownCounts.Inc(doc.dims, doc.dims2);
+ for (int dim = 0; dim < dimValues.Length; dim++)
+ {
+ drillSidewaysCounts[dim].Inc(doc.dims, doc.dims2);
+ }
+ }
+ else
+ {
+ if (VERBOSE)
+ {
+ Console.WriteLine(" exp: id=" + doc.id + " is a near-miss on dim=" + failDim);
+ }
+ drillSidewaysCounts[failDim].Inc(doc.dims, doc.dims2, failDim);
+ }
+ }
+ nextDocContinue: ;
+ }
+ nextDocBreak:
+
+ IDictionary<string, int?> idToDocID = new Dictionary<string, int?>();
+ for (int i = 0; i < s.IndexReader.MaxDoc; i++)
+ {
+ idToDocID[s.Doc(i).Get("id")] = i;
+ }
+
+ var hitsSorted = hits.ToArray();
+ Array.Sort(hitsSorted);
+ hits = hitsSorted.ToList();
+
+ TestFacetResult res = new TestFacetResult();
+ res.Hits = hits;
+ res.Counts = new int[numDims][];
+ res.UniqueCounts = new int[numDims];
+ for (int dim = 0; dim < numDims; dim++)
+ {
+ if (drillDowns[dim] != null)
+ {
+ res.Counts[dim] = drillSidewaysCounts[dim].counts[dim];
+ }
+ else
+ {
+ res.Counts[dim] = drillDownCounts.counts[dim];
+ }
+ int uniqueCount = 0;
+ for (int j = 0; j < res.Counts[dim].Length; j++)
+ {
+ if (res.Counts[dim][j] != 0)
+ {
+ uniqueCount++;
+ }
+ }
+ res.UniqueCounts[dim] = uniqueCount;
+ }
+
+ return res;
+ }
+
+ internal virtual void VerifyEquals(string[][] dimValues, IndexSearcher s, TestFacetResult expected, DrillSidewaysResult actual, IDictionary<string, float?> scores, bool isSortedSetDV)
+ {
+ if (VERBOSE)
+ {
+ Console.WriteLine(" verify totHits=" + expected.Hits.Count);
+ }
+ Assert.AreEqual(expected.Hits.Count, actual.Hits.TotalHits);
+ Assert.AreEqual(expected.Hits.Count, actual.Hits.ScoreDocs.Length);
+ for (int i = 0; i < expected.Hits.Count; i++)
+ {
+ if (VERBOSE)
+ {
+ Console.WriteLine(" hit " + i + " expected=" + expected.Hits[i].id);
+ }
+ Assert.AreEqual(expected.Hits[i].id, s.Doc(actual.Hits.ScoreDocs[i].Doc).Get("id"));
+ // Score should be IDENTICAL:
+ Assert.AreEqual(scores[expected.Hits[i].id], actual.Hits.ScoreDocs[i].Score);
+ }
+
+ for (int dim = 0; dim < expected.Counts.Length; dim++)
+ {
+ int topN = Random().NextBoolean() ? dimValues[dim].Length : TestUtil.NextInt(Random(), 1, dimValues[dim].Length);
+ FacetResult fr = actual.Facets.GetTopChildren(topN, "dim" + dim);
+ if (VERBOSE)
+ {
+ Console.WriteLine(" dim" + dim + " topN=" + topN + " (vs " + dimValues[dim].Length + " unique values)");
+ Console.WriteLine(" actual");
+ }
+
+ int idx = 0;
+ IDictionary<string, int?> actualValues = new Dictionary<string, int?>();
+
+ if (fr != null)
+ {
+ foreach (LabelAndValue labelValue in fr.labelValues)
+ {
+ actualValues[labelValue.label] = (int)labelValue.value;
+ if (VERBOSE)
+ {
+ Console.WriteLine(" " + idx + ": " + new BytesRef(labelValue.label) + ": " + labelValue.value);
+ idx++;
+ }
+ }
+ Assert.AreEqual(expected.UniqueCounts[dim], fr.childCount, "dim=" + dim);
+ }
+
+ if (topN < dimValues[dim].Length)
+ {
+ int[] topNIDs = GetTopNOrds(expected.Counts[dim], dimValues[dim], topN);
+ if (VERBOSE)
+ {
+ idx = 0;
+ Console.WriteLine(" expected (sorted)");
+ for (int i = 0; i < topNIDs.Length; i++)
+ {
+ int expectedOrd = topNIDs[i];
+ string value = dimValues[dim][expectedOrd];
+ Console.WriteLine(" " + idx + ": " + new BytesRef(value) + ": " + expected.Counts[dim][expectedOrd]);
+ idx++;
+ }
+ }
+ if (VERBOSE)
+ {
+ Console.WriteLine(" topN=" + topN + " expectedTopN=" + topNIDs.Length);
+ }
+
+ if (fr != null)
+ {
+ Assert.AreEqual(topNIDs.Length, fr.labelValues.Length);
+ }
+ else
+ {
+ Assert.AreEqual(0, topNIDs.Length);
+ }
+ for (int i = 0; i < topNIDs.Length; i++)
+ {
+ int expectedOrd = topNIDs[i];
+ Assert.AreEqual(expected.Counts[dim][expectedOrd], (int)fr.labelValues[i].value);
+ if (isSortedSetDV)
+ {
+ // Tie-break facet labels are only in unicode
+ // order with SortedSetDVFacets:
+ Assert.AreEqual("value @ idx=" + i, dimValues[dim][expectedOrd], fr.labelValues[i].label);
+ }
+ }
+ }
+ else
+ {
+
+ if (VERBOSE)
+ {
+ idx = 0;
+ Console.WriteLine(" expected (unsorted)");
+ for (int i = 0; i < dimValues[dim].Length; i++)
+ {
+ string value = dimValues[dim][i];
+ if (expected.Counts[dim][i] != 0)
+ {
+ Console.WriteLine(" " + idx + ": " + new BytesRef(value) + ": " + expected.Counts[dim][i]);
+ idx++;
+ }
+ }
+ }
+
+ int setCount = 0;
+ for (int i = 0; i < dimValues[dim].Length; i++)
+ {
+ string value = dimValues[dim][i];
+ if (expected.Counts[dim][i] != 0)
+ {
+ Assert.True(actualValues.ContainsKey(value));
+ Assert.AreEqual(expected.Counts[dim][i], (int)actualValues[value]);
+ setCount++;
+ }
+ else
+ {
+ Assert.False(actualValues.ContainsKey(value));
+ }
+ }
+ Assert.AreEqual(setCount, actualValues.Count);
+ }
+ }
+ }
+
+ [Test]
+ public virtual void TestEmptyIndex()
+ {
+ // LUCENE-5045: make sure DrillSideways works with an empty index
+ Directory dir = NewDirectory();
+ Directory taxoDir = NewDirectory();
+ var writer = new RandomIndexWriter(Random(), dir);
+ var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE);
+ IndexSearcher searcher = NewSearcher(writer.Reader);
+ var taxoReader = new DirectoryTaxonomyReader(taxoWriter);
+
+ // Count "Author"
+ FacetsConfig config = new FacetsConfig();
+ DrillSideways ds = new DrillSideways(searcher, config, taxoReader);
+ DrillDownQuery ddq = new DrillDownQuery(config);
+ ddq.Add("Author", "Lisa");
+
+ DrillSidewaysResult r = ds.Search(ddq, 10); // this used to fail on IllegalArgEx
+ Assert.AreEqual(0, r.Hits.TotalHits);
+
+ r = ds.Search(ddq, null, null, 10, new Sort(new SortField("foo", SortField.Type_e.INT)), false, false); // this used to fail on IllegalArgEx
+ Assert.AreEqual(0, r.Hits.TotalHits);
+
+ IOUtils.Close(writer, taxoWriter, searcher.IndexReader, taxoReader, dir, taxoDir);
+ }
+ }
+
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/982eaf60/src/Lucene.Net.Tests/core/Facet/TestFacetsConfig.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests/core/Facet/TestFacetsConfig.cs b/src/Lucene.Net.Tests/core/Facet/TestFacetsConfig.cs
new file mode 100644
index 0000000..f9de7c7
--- /dev/null
+++ b/src/Lucene.Net.Tests/core/Facet/TestFacetsConfig.cs
@@ -0,0 +1,131 @@
+using Lucene.Net.Support;
+using NUnit.Framework;
+
+namespace Lucene.Net.Facet
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ using MockAnalyzer = Lucene.Net.Analysis.MockAnalyzer;
+ using Document = Lucene.Net.Documents.Document;
+ using DirectoryTaxonomyReader = Lucene.Net.Facet.Taxonomy.Directory.DirectoryTaxonomyReader;
+ using DirectoryTaxonomyWriter = Lucene.Net.Facet.Taxonomy.Directory.DirectoryTaxonomyWriter;
+ using DirectoryReader = Lucene.Net.Index.DirectoryReader;
+ using IndexWriter = Lucene.Net.Index.IndexWriter;
+ using IndexSearcher = Lucene.Net.Search.IndexSearcher;
+ using MatchAllDocsQuery = Lucene.Net.Search.MatchAllDocsQuery;
+ using Directory = Lucene.Net.Store.Directory;
+ using IOUtils = Lucene.Net.Util.IOUtils;
+ using TestUtil = Lucene.Net.Util.TestUtil;
+
+ public class TestFacetsConfig : FacetTestCase
+ {
+
+ [Test]
+ public virtual void TestPathToStringAndBack()
+ {
+ int iters = AtLeast(1000);
+ for (int i = 0; i < iters; i++)
+ {
+ int numParts = TestUtil.NextInt(Random(), 1, 6);
+ string[] parts = new string[numParts];
+ for (int j = 0; j < numParts; j++)
+ {
+ string s;
+ while (true)
+ {
+ s = TestUtil.RandomUnicodeString(Random());
+ if (s.Length > 0)
+ {
+ break;
+ }
+ }
+ parts[j] = s;
+ }
+
+ string s1 = FacetsConfig.PathToString(parts);
+ string[] parts2 = FacetsConfig.StringToPath(s1);
+ Assert.True(Arrays.Equals(parts, parts2));
+ }
+ }
+
+ [Test]
+ public virtual void TestAddSameDocTwice()
+ {
+ // LUCENE-5367: this was a problem with the previous code, making sure it
+ // works with the new code.
+ Directory indexDir = NewDirectory(), taxoDir = NewDirectory();
+ IndexWriter indexWriter = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));
+ DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
+ FacetsConfig facetsConfig = new FacetsConfig();
+ Document doc = new Document();
+ doc.Add(new FacetField("a", "b"));
+ doc = facetsConfig.Build(taxoWriter, doc);
+ // these two addDocument() used to fail
+ indexWriter.AddDocument(doc);
+ indexWriter.AddDocument(doc);
+ IOUtils.Close(indexWriter, taxoWriter);
+
+ DirectoryReader indexReader = DirectoryReader.Open(indexDir);
+ DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
+ IndexSearcher searcher = NewSearcher(indexReader);
+ FacetsCollector fc = new FacetsCollector();
+ searcher.Search(new MatchAllDocsQuery(), fc);
+
+ Facets facets = GetTaxonomyFacetCounts(taxoReader, facetsConfig, fc);
+ FacetResult res = facets.GetTopChildren(10, "a");
+ Assert.AreEqual(1, res.labelValues.Length);
+ Assert.AreEqual(2, res.labelValues[0].value);
+ IOUtils.Close(indexReader, taxoReader);
+
+ IOUtils.Close(indexDir, taxoDir);
+ }
+
+ /// <summary>
+ /// LUCENE-5479
+ /// </summary>
+ [Test]
+ public virtual void TestCustomDefault()
+ {
+ FacetsConfig config = new FacetsConfigAnonymousInnerClassHelper(this);
+
+ Assert.True(config.GetDimConfig("foobar").hierarchical);
+ }
+
+ private class FacetsConfigAnonymousInnerClassHelper : FacetsConfig
+ {
+ private readonly TestFacetsConfig outerInstance;
+
+ public FacetsConfigAnonymousInnerClassHelper(TestFacetsConfig outerInstance)
+ {
+ this.outerInstance = outerInstance;
+ }
+
+ protected override DimConfig DefaultDimConfig
+ {
+ get
+ {
+ DimConfig config = new DimConfig();
+ config.hierarchical = true;
+ return config;
+ }
+ }
+ }
+ }
+
+}
\ No newline at end of file