You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by sy...@apache.org on 2014/11/14 12:59:21 UTC
[06/26] lucenenet git commit: first commit of facet porting,
failing tests will be fixed in next commits.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/982eaf60/src/Lucene.Net.Tests/core/Facet/TestMultipleIndexFields.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests/core/Facet/TestMultipleIndexFields.cs b/src/Lucene.Net.Tests/core/Facet/TestMultipleIndexFields.cs
new file mode 100644
index 0000000..490836b
--- /dev/null
+++ b/src/Lucene.Net.Tests/core/Facet/TestMultipleIndexFields.cs
@@ -0,0 +1,300 @@
+using System.Collections.Generic;
+using NUnit.Framework;
+
+namespace Lucene.Net.Facet
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using MockAnalyzer = Lucene.Net.Analysis.MockAnalyzer;
+ using MockTokenizer = Lucene.Net.Analysis.MockTokenizer;
+ using Document = Lucene.Net.Documents.Document;
+ using Field = Lucene.Net.Documents.Field;
+ using TextField = Lucene.Net.Documents.TextField;
+ using TaxonomyReader = Lucene.Net.Facet.Taxonomy.TaxonomyReader;
+ using TaxonomyWriter = Lucene.Net.Facet.Taxonomy.TaxonomyWriter;
+ using DirectoryTaxonomyReader = Lucene.Net.Facet.Taxonomy.Directory.DirectoryTaxonomyReader;
+ using DirectoryTaxonomyWriter = Lucene.Net.Facet.Taxonomy.Directory.DirectoryTaxonomyWriter;
+ using AtomicReader = Lucene.Net.Index.AtomicReader;
+ using AtomicReaderContext = Lucene.Net.Index.AtomicReaderContext;
+ using IndexReader = Lucene.Net.Index.IndexReader;
+ using OpenMode = Lucene.Net.Index.IndexWriterConfig.OpenMode_e;
+ using RandomIndexWriter = Lucene.Net.Index.RandomIndexWriter;
+ using IndexSearcher = Lucene.Net.Search.IndexSearcher;
+ using MatchAllDocsQuery = Lucene.Net.Search.MatchAllDocsQuery;
+ using Directory = Lucene.Net.Store.Directory;
+ using IOUtils = Lucene.Net.Util.IOUtils;
+
+ public class TestMultipleIndexFields : FacetTestCase
+ {
+
+ private static readonly FacetField[] CATEGORIES = new FacetField[] { new FacetField("Author", "Mark Twain"), new FacetField("Author", "Stephen King"), new FacetField("Author", "Kurt Vonnegut"), new FacetField("Band", "Rock & Pop", "The Beatles"), new FacetField("Band", "Punk", "The Ramones"), new FacetField("Band", "Rock & Pop", "U2"), new FacetField("Band", "Rock & Pop", "REM"), new FacetField("Band", "Rock & Pop", "Dave Matthews Band"), new FacetField("Composer", "Bach") };
+
+ private FacetsConfig Config
+ {
+ get
+ {
+ FacetsConfig config = new FacetsConfig();
+ config.SetHierarchical("Band", true);
+ return config;
+ }
+ }
+
+ [Test]
+ public virtual void TestDefault()
+ {
+ Directory indexDir = NewDirectory();
+ Directory taxoDir = NewDirectory();
+
+ // create and open an index writer
+ var iw = new RandomIndexWriter(Random(), indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false)));
+ // create and open a taxonomy writer
+ var tw = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);
+ var config = Config;
+
+ seedIndex(tw, iw, config);
+
+ IndexReader ir = iw.Reader;
+ tw.Commit();
+
+ // prepare index reader and taxonomy.
+ var tr = new DirectoryTaxonomyReader(taxoDir);
+
+ // prepare searcher to search against
+ IndexSearcher searcher = NewSearcher(ir);
+
+ FacetsCollector sfc = performSearch(tr, ir, searcher);
+
+ // Obtain facets results and hand-test them
+ AssertCorrectResults(GetTaxonomyFacetCounts(tr, config, sfc));
+
+ assertOrdinalsExist("$facets", ir);
+
+ IOUtils.Close(tr, ir, iw, tw, indexDir, taxoDir);
+ }
+
+ [Test]
+ public virtual void TestCustom()
+ {
+ Directory indexDir = NewDirectory();
+ Directory taxoDir = NewDirectory();
+
+ // create and open an index writer
+ RandomIndexWriter iw = new RandomIndexWriter(Random(), indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false)));
+ // create and open a taxonomy writer
+ TaxonomyWriter tw = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);
+
+ FacetsConfig config = Config;
+ config.SetIndexFieldName("Author", "$author");
+ seedIndex(tw, iw, config);
+
+ IndexReader ir = iw.Reader;
+ tw.Commit();
+
+ // prepare index reader and taxonomy.
+ var tr = new DirectoryTaxonomyReader(taxoDir);
+
+ // prepare searcher to search against
+ IndexSearcher searcher = NewSearcher(ir);
+
+ FacetsCollector sfc = performSearch(tr, ir, searcher);
+
+ IDictionary<string, Facets> facetsMap = new Dictionary<string, Facets>();
+ facetsMap["Author"] = GetTaxonomyFacetCounts(tr, config, sfc, "$author");
+ Facets facets = new MultiFacets(facetsMap, GetTaxonomyFacetCounts(tr, config, sfc));
+
+ // Obtain facets results and hand-test them
+ AssertCorrectResults(facets);
+
+ assertOrdinalsExist("$facets", ir);
+ assertOrdinalsExist("$author", ir);
+
+ IOUtils.Close(tr, ir, iw, tw, indexDir, taxoDir);
+ }
+
+ [Test]
+ public virtual void TestTwoCustomsSameField()
+ {
+ Directory indexDir = NewDirectory();
+ Directory taxoDir = NewDirectory();
+
+ // create and open an index writer
+ RandomIndexWriter iw = new RandomIndexWriter(Random(), indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false)));
+ // create and open a taxonomy writer
+ var tw = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);
+
+ FacetsConfig config = Config;
+ config.SetIndexFieldName("Band", "$music");
+ config.SetIndexFieldName("Composer", "$music");
+ seedIndex(tw, iw, config);
+
+ IndexReader ir = iw.Reader;
+ tw.Commit();
+
+ // prepare index reader and taxonomy.
+ var tr = new DirectoryTaxonomyReader(taxoDir);
+
+ // prepare searcher to search against
+ IndexSearcher searcher = NewSearcher(ir);
+
+ FacetsCollector sfc = performSearch(tr, ir, searcher);
+
+ IDictionary<string, Facets> facetsMap = new Dictionary<string, Facets>();
+ Facets facets2 = GetTaxonomyFacetCounts(tr, config, sfc, "$music");
+ facetsMap["Band"] = facets2;
+ facetsMap["Composer"] = facets2;
+ Facets facets = new MultiFacets(facetsMap, GetTaxonomyFacetCounts(tr, config, sfc));
+
+ // Obtain facets results and hand-test them
+ AssertCorrectResults(facets);
+
+ assertOrdinalsExist("$facets", ir);
+ assertOrdinalsExist("$music", ir);
+ assertOrdinalsExist("$music", ir);
+
+ IOUtils.Close(tr, ir, iw, tw, indexDir, taxoDir);
+ }
+
+ private void assertOrdinalsExist(string field, IndexReader ir)
+ {
+ foreach (AtomicReaderContext context in ir.Leaves)
+ {
+ AtomicReader r = context.AtomicReader;
+ if (r.GetBinaryDocValues(field) != null)
+ {
+ return; // not all segments must have this DocValues
+ }
+ }
+ Fail("no ordinals found for " + field);
+ }
+
+ [Test]
+ public virtual void TestDifferentFieldsAndText()
+ {
+ Directory indexDir = NewDirectory();
+ Directory taxoDir = NewDirectory();
+
+ // create and open an index writer
+ var iw = new RandomIndexWriter(Random(), indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false)));
+ // create and open a taxonomy writer
+ var tw = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);
+
+ FacetsConfig config = Config;
+ config.SetIndexFieldName("Band", "$bands");
+ config.SetIndexFieldName("Composer", "$composers");
+ seedIndex(tw, iw, config);
+
+ IndexReader ir = iw.Reader;
+ tw.Commit();
+
+ // prepare index reader and taxonomy.
+ var tr = new DirectoryTaxonomyReader(taxoDir);
+
+ // prepare searcher to search against
+ IndexSearcher searcher = NewSearcher(ir);
+
+ FacetsCollector sfc = performSearch(tr, ir, searcher);
+
+ IDictionary<string, Facets> facetsMap = new Dictionary<string, Facets>();
+ facetsMap["Band"] = GetTaxonomyFacetCounts(tr, config, sfc, "$bands");
+ facetsMap["Composer"] = GetTaxonomyFacetCounts(tr, config, sfc, "$composers");
+ Facets facets = new MultiFacets(facetsMap, GetTaxonomyFacetCounts(tr, config, sfc));
+
+ // Obtain facets results and hand-test them
+ AssertCorrectResults(facets);
+ assertOrdinalsExist("$facets", ir);
+ assertOrdinalsExist("$bands", ir);
+ assertOrdinalsExist("$composers", ir);
+
+ IOUtils.Close(tr, ir, iw, tw, indexDir, taxoDir);
+ }
+
+ [Test]
+ public virtual void TestSomeSameSomeDifferent()
+ {
+ Directory indexDir = NewDirectory();
+ Directory taxoDir = NewDirectory();
+
+ // create and open an index writer
+ RandomIndexWriter iw = new RandomIndexWriter(Random(), indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false)));
+ // create and open a taxonomy writer
+ TaxonomyWriter tw = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);
+
+ FacetsConfig config = Config;
+ config.SetIndexFieldName("Band", "$music");
+ config.SetIndexFieldName("Composer", "$music");
+ config.SetIndexFieldName("Author", "$literature");
+ seedIndex(tw, iw, config);
+
+ IndexReader ir = iw.Reader;
+ tw.Commit();
+
+ // prepare index reader and taxonomy.
+ var tr = new DirectoryTaxonomyReader(taxoDir);
+
+ // prepare searcher to search against
+ IndexSearcher searcher = NewSearcher(ir);
+
+ FacetsCollector sfc = performSearch(tr, ir, searcher);
+
+ IDictionary<string, Facets> facetsMap = new Dictionary<string, Facets>();
+ Facets facets2 = GetTaxonomyFacetCounts(tr, config, sfc, "$music");
+ facetsMap["Band"] = facets2;
+ facetsMap["Composer"] = facets2;
+ facetsMap["Author"] = GetTaxonomyFacetCounts(tr, config, sfc, "$literature");
+ Facets facets = new MultiFacets(facetsMap, GetTaxonomyFacetCounts(tr, config, sfc));
+
+ // Obtain facets results and hand-test them
+ AssertCorrectResults(facets);
+ assertOrdinalsExist("$music", ir);
+ assertOrdinalsExist("$literature", ir);
+
+ IOUtils.Close(tr, ir, iw, tw);
+ IOUtils.Close(indexDir, taxoDir);
+ }
+
+
+ private void AssertCorrectResults(Facets facets)
+ {
+ Assert.AreEqual(5, facets.GetSpecificValue("Band"));
+ Assert.AreEqual("dim=Band path=[] value=5 childCount=2\n Rock & Pop (4)\n Punk (1)\n", facets.GetTopChildren(10, "Band").ToString());
+ Assert.AreEqual("dim=Band path=[Rock & Pop] value=4 childCount=4\n The Beatles (1)\n U2 (1)\n REM (1)\n Dave Matthews Band (1)\n", facets.GetTopChildren(10, "Band", "Rock & Pop").ToString());
+ Assert.AreEqual("dim=Author path=[] value=3 childCount=3\n Mark Twain (1)\n Stephen King (1)\n Kurt Vonnegut (1)\n", facets.GetTopChildren(10, "Author").ToString());
+ }
+
+
+ private static FacetsCollector performSearch(TaxonomyReader tr, IndexReader ir, IndexSearcher searcher)
+ {
+ FacetsCollector fc = new FacetsCollector();
+ FacetsCollector.Search(searcher, new MatchAllDocsQuery(), 10, fc);
+ return fc;
+ }
+
+ private static void seedIndex(TaxonomyWriter tw, RandomIndexWriter iw, FacetsConfig config)
+ {
+ foreach (FacetField ff in CATEGORIES)
+ {
+ Document doc = new Document();
+ doc.Add(ff);
+ doc.Add(new TextField("content", "alpha", Field.Store.YES));
+ iw.AddDocument(config.Build(tw, doc));
+ }
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/982eaf60/src/Lucene.Net.Tests/core/Facet/TestRandomSamplingFacetsCollector.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests/core/Facet/TestRandomSamplingFacetsCollector.cs b/src/Lucene.Net.Tests/core/Facet/TestRandomSamplingFacetsCollector.cs
new file mode 100644
index 0000000..3ada2c5
--- /dev/null
+++ b/src/Lucene.Net.Tests/core/Facet/TestRandomSamplingFacetsCollector.cs
@@ -0,0 +1,154 @@
+using System;
+using System.Diagnostics;
+using Lucene.Net.Randomized.Generators;
+using NUnit.Framework;
+
+namespace Lucene.Net.Facet
+{
+
+ using Document = Lucene.Net.Documents.Document;
+ using Store = Lucene.Net.Documents.Field.Store;
+ using StringField = Lucene.Net.Documents.StringField;
+ using MatchingDocs = Lucene.Net.Facet.FacetsCollector.MatchingDocs;
+ using FastTaxonomyFacetCounts = Lucene.Net.Facet.Taxonomy.FastTaxonomyFacetCounts;
+ using TaxonomyReader = Lucene.Net.Facet.Taxonomy.TaxonomyReader;
+ using DirectoryTaxonomyReader = Lucene.Net.Facet.Taxonomy.Directory.DirectoryTaxonomyReader;
+ using DirectoryTaxonomyWriter = Lucene.Net.Facet.Taxonomy.Directory.DirectoryTaxonomyWriter;
+ using RandomIndexWriter = Lucene.Net.Index.RandomIndexWriter;
+ using Term = Lucene.Net.Index.Term;
+ using IndexSearcher = Lucene.Net.Search.IndexSearcher;
+ using MultiCollector = Lucene.Net.Search.MultiCollector;
+ using TermQuery = Lucene.Net.Search.TermQuery;
+ using Directory = Lucene.Net.Store.Directory;
+ using IOUtils = Lucene.Net.Util.IOUtils;
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ public class TestRandomSamplingFacetsCollector : FacetTestCase
+ {
+
+ [Test]
+ public virtual void TestRandomSampling()
+ {
+ Directory dir = NewDirectory();
+ Directory taxoDir = NewDirectory();
+
+ DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
+ RandomIndexWriter writer = new RandomIndexWriter(Random(), dir);
+
+ FacetsConfig config = new FacetsConfig();
+
+ int numDocs = AtLeast(10000);
+ for (int i = 0; i < numDocs; i++)
+ {
+ Document doc = new Document();
+ doc.Add(new StringField("EvenOdd", (i % 2 == 0) ? "even" : "odd", Store.NO));
+ doc.Add(new FacetField("iMod10", Convert.ToString(i % 10)));
+ writer.AddDocument(config.Build(taxoWriter, doc));
+ }
+ Random random = Random();
+
+ // NRT open
+ IndexSearcher searcher = NewSearcher(writer.Reader);
+ var taxoReader = new DirectoryTaxonomyReader(taxoWriter);
+ IOUtils.Close(writer, taxoWriter);
+
+ // Test empty results
+ RandomSamplingFacetsCollector collectRandomZeroResults = new RandomSamplingFacetsCollector(numDocs / 10, random.NextLong());
+
+ // There should be no divisions by zero
+ searcher.Search(new TermQuery(new Term("EvenOdd", "NeverMatches")), collectRandomZeroResults);
+
+ // There should be no divisions by zero and no null result
+ Assert.NotNull(collectRandomZeroResults.GetMatchingDocs);
+
+ // There should be no results at all
+ foreach (MatchingDocs doc in collectRandomZeroResults.GetMatchingDocs)
+ {
+ Assert.AreEqual(0, doc.totalHits);
+ }
+
+ // Now start searching and retrieve results.
+
+ // Use a query to select half of the documents.
+ TermQuery query = new TermQuery(new Term("EvenOdd", "even"));
+
+ // there will be 5 facet values (0, 2, 4, 6 and 8), as only the even (i %
+ // 10) are hits.
+ // there is a REAL small chance that one of the 5 values will be missed when
+ // sampling.
+ // but is that 0.8 (chance not to take a value) ^ 2000 * 5 (any can be
+ // missing) ~ 10^-193
+ // so that is probably not going to happen.
+ int maxNumChildren = 5;
+
+ RandomSamplingFacetsCollector random100Percent = new RandomSamplingFacetsCollector(numDocs, random.NextLong()); // no sampling
+ RandomSamplingFacetsCollector random10Percent = new RandomSamplingFacetsCollector(numDocs / 10, random.NextLong()); // 10 % of total docs, 20% of the hits
+
+ FacetsCollector fc = new FacetsCollector();
+
+ searcher.Search(query, MultiCollector.Wrap(fc, random100Percent, random10Percent));
+
+ FastTaxonomyFacetCounts random10FacetCounts = new FastTaxonomyFacetCounts(taxoReader, config, random10Percent);
+ FastTaxonomyFacetCounts random100FacetCounts = new FastTaxonomyFacetCounts(taxoReader, config, random100Percent);
+ FastTaxonomyFacetCounts exactFacetCounts = new FastTaxonomyFacetCounts(taxoReader, config, fc);
+
+ FacetResult random10Result = random10Percent.AmortizeFacetCounts(random10FacetCounts.GetTopChildren(10, "iMod10"), config, searcher);
+ FacetResult random100Result = random100FacetCounts.GetTopChildren(10, "iMod10");
+ FacetResult exactResult = exactFacetCounts.GetTopChildren(10, "iMod10");
+
+ Assert.AreEqual(random100Result, exactResult);
+
+ // we should have five children, but there is a small chance we have less.
+ // (see above).
+ Assert.True(random10Result.childCount <= maxNumChildren);
+ // there should be one child at least.
+ Assert.True(random10Result.childCount >= 1);
+
+ // now calculate some statistics to determine if the sampled result is 'ok'.
+ // because random sampling is used, the results will vary each time.
+ int sum = 0;
+ foreach (LabelAndValue lav in random10Result.labelValues)
+ {
+ sum += (int)lav.value;
+ }
+ float mu = (float)sum / (float)maxNumChildren;
+
+ float variance = 0;
+ foreach (LabelAndValue lav in random10Result.labelValues)
+ {
+ variance += (float)Math.Pow((mu - (int)lav.value), 2);
+ }
+ variance = variance / maxNumChildren;
+ float sigma = (float)Math.Sqrt(variance);
+
+ // we query only half the documents and have 5 categories. The average
+ // number of docs in a category will thus be the total divided by 5*2
+ float targetMu = numDocs / (5.0f * 2.0f);
+
+ // the average should be in the range and the standard deviation should not
+ // be too great
+ Assert.True(sigma < 200);
+ Assert.True(targetMu - 3 * sigma < mu && mu < targetMu + 3 * sigma);
+
+ IOUtils.Close(searcher.IndexReader, taxoReader, dir, taxoDir);
+ }
+
+ }
+
+}
\ No newline at end of file