You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by sy...@apache.org on 2014/11/25 19:52:08 UTC
[07/16] lucenenet git commit: Move facets into src folder
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/eea269f3/Lucene.Net.Facet/TopOrdAndIntQueue.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Facet/TopOrdAndIntQueue.cs b/Lucene.Net.Facet/TopOrdAndIntQueue.cs
deleted file mode 100644
index 08bdffb..0000000
--- a/Lucene.Net.Facet/TopOrdAndIntQueue.cs
+++ /dev/null
@@ -1,76 +0,0 @@
-using Lucene.Net.Util;
-
-namespace Lucene.Net.Facet
-{
-
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- using Lucene.Net.Util;
-
- /// <summary>
- /// Keeps highest results, first by largest int value,
- /// then tie break by smallest ord.
- /// </summary>
- public class TopOrdAndIntQueue : PriorityQueue<TopOrdAndIntQueue.OrdAndValue>
- {
-
- /// <summary>
- /// Holds a single entry. </summary>
- public sealed class OrdAndValue
- {
-
- /// <summary>
- /// Ordinal of the entry. </summary>
- public int Ord;
-
- /// <summary>
- /// Value associated with the ordinal. </summary>
- public int Value;
-
- /// <summary>
- /// Default constructor. </summary>
- public OrdAndValue()
- {
- }
- }
-
- /// <summary>
- /// Sole constructor. </summary>
- public TopOrdAndIntQueue(int topN)
- : base(topN, false)
- {
- }
-
- public override bool LessThan(OrdAndValue a, OrdAndValue b)
- {
- if (a.Value < b.Value)
- {
- return true;
- }
- else if (a.Value > b.Value)
- {
- return false;
- }
- else
- {
- return a.Ord > b.Ord;
- }
- }
- }
-
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/eea269f3/Lucene.Net.sln
----------------------------------------------------------------------
diff --git a/Lucene.Net.sln b/Lucene.Net.sln
index afc21f4..9fb7415 100644
--- a/Lucene.Net.sln
+++ b/Lucene.Net.sln
@@ -11,7 +11,7 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.TestFramework",
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.Queries", "src\Lucene.Net.Queries\Lucene.Net.Queries.csproj", "{69D7956C-C2CC-4708-B399-A188FEC384C4}"
EndProject
-Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.Facet", "Lucene.Net.Facet\Lucene.Net.Facet.csproj", "{48F7884A-9454-4E88-8413-9D35992CB440}"
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.Facet", "src\Lucene.Net.Facet\Lucene.Net.Facet.csproj", "{48F7884A-9454-4E88-8413-9D35992CB440}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/eea269f3/src/Lucene.Net.Facet/DrillDownQuery.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Facet/DrillDownQuery.cs b/src/Lucene.Net.Facet/DrillDownQuery.cs
new file mode 100644
index 0000000..8234282
--- /dev/null
+++ b/src/Lucene.Net.Facet/DrillDownQuery.cs
@@ -0,0 +1,388 @@
+using System;
+using System.Diagnostics;
+using System.Collections.Generic;
+using System.Linq;
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Facet
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using IndexReader = Lucene.Net.Index.IndexReader;
+ using Term = Lucene.Net.Index.Term;
+ using Occur = Lucene.Net.Search.BooleanClause.Occur;
+ using BooleanClause = Lucene.Net.Search.BooleanClause;
+ using BooleanQuery = Lucene.Net.Search.BooleanQuery;
+ using ConstantScoreQuery = Lucene.Net.Search.ConstantScoreQuery;
+ using Filter = Lucene.Net.Search.Filter;
+ using FilteredQuery = Lucene.Net.Search.FilteredQuery;
+ using MatchAllDocsQuery = Lucene.Net.Search.MatchAllDocsQuery;
+ using Query = Lucene.Net.Search.Query;
+ using TermQuery = Lucene.Net.Search.TermQuery;
+
+ /// <summary>
+ /// A <seealso cref="Query"/> for drill-down over facet categories. You
+ /// should call <seealso cref="#add(String, String...)"/> for every group of categories you
+ /// want to drill-down over.
+ /// <para>
+ /// <b>NOTE:</b> if you choose to create your own <seealso cref="Query"/> by calling
+ /// <seealso cref="#term"/>, it is recommended to wrap it with <seealso cref="ConstantScoreQuery"/>
+ /// and set the <seealso cref="ConstantScoreQuery#setBoost(float) boost"/> to {@code 0.0f},
+ /// so that it does not affect the scores of the documents.
+ ///
+ /// @lucene.experimental
+ /// </para>
+ /// </summary>
+ public sealed class DrillDownQuery : Query
+ {
+
+ /// <summary>
+ /// Creates a drill-down term. </summary>
+ public static Term Term(string field, string dim, params string[] path)
+ {
+ return new Term(field, FacetsConfig.PathToString(dim, path));
+ }
+
+ private readonly FacetsConfig config;
+ private readonly BooleanQuery query;
+ private readonly IDictionary<string, int?> drillDownDims = new Dictionary<string, int?>();
+
+ /// <summary>
+ /// Used by clone() </summary>
+ internal DrillDownQuery(FacetsConfig config, BooleanQuery query, IDictionary<string, int?> drillDownDims)
+ {
+ this.query = (BooleanQuery) query.Clone();
+ this.drillDownDims.AddAll(drillDownDims);
+ this.config = config;
+ }
+
+ /// <summary>
+ /// Used by DrillSideways </summary>
+ internal DrillDownQuery(FacetsConfig config, Filter filter, DrillDownQuery other)
+ {
+ query = new BooleanQuery(true); // disable coord
+
+ BooleanClause[] clauses = other.query.Clauses;
+ if (clauses.Length == other.drillDownDims.Count)
+ {
+ throw new System.ArgumentException("cannot apply filter unless baseQuery isn't null; pass ConstantScoreQuery instead");
+ }
+ Debug.Assert(clauses.Length == 1 + other.drillDownDims.Count, clauses.Length + " vs " + (1 + other.drillDownDims.Count));
+ drillDownDims.AddAll(other.drillDownDims);
+ query.Add(new FilteredQuery(clauses[0].Query, filter), Occur.MUST);
+ for (int i = 1; i < clauses.Length; i++)
+ {
+ query.Add(clauses[i].Query, Occur.MUST);
+ }
+ this.config = config;
+ }
+
+ /// <summary>
+ /// Used by DrillSideways </summary>
+ internal DrillDownQuery(FacetsConfig config, Query baseQuery, IList<Query> clauses, IDictionary<string, int?> drillDownDims)
+ {
+ query = new BooleanQuery(true);
+ if (baseQuery != null)
+ {
+ query.Add(baseQuery, Occur.MUST);
+ }
+ foreach (Query clause in clauses)
+ {
+ query.Add(clause, Occur.MUST);
+ }
+ this.drillDownDims.AddAll(drillDownDims);
+ this.config = config;
+ }
+
+ /// <summary>
+ /// Creates a new {@code DrillDownQuery} without a base query,
+ /// to perform a pure browsing query (equivalent to using
+ /// <seealso cref="MatchAllDocsQuery"/> as base).
+ /// </summary>
+ public DrillDownQuery(FacetsConfig config)
+ : this(config, null)
+ {
+ }
+
+ /// <summary>
+ /// Creates a new {@code DrillDownQuery} over the given base query. Can be
+ /// {@code null}, in which case the result <seealso cref="Query"/> from
+ /// <seealso cref="#rewrite(IndexReader)"/> will be a pure browsing query, filtering on
+ /// the added categories only.
+ /// </summary>
+ public DrillDownQuery(FacetsConfig config, Query baseQuery)
+ {
+ query = new BooleanQuery(true); // disable coord
+ if (baseQuery != null)
+ {
+ query.Add(baseQuery, Occur.MUST);
+ }
+ this.config = config;
+ }
+
+ /// <summary>
+ /// Merges (ORs) a new path into an existing AND'd
+ /// clause.
+ /// </summary>
+ private void Merge(string dim, string[] path)
+ {
+ int index = 0;
+ int? idx;
+ if (drillDownDims.TryGetValue(dim, out idx) && idx.HasValue)
+ {
+ index = idx.Value;
+ }
+
+ if (query.Clauses.Length == drillDownDims.Count + 1)
+ {
+ index++;
+ }
+ ConstantScoreQuery q = (ConstantScoreQuery)query.Clauses[index].Query;
+ if ((q.Query is BooleanQuery) == false)
+ {
+ // App called .add(dim, customQuery) and then tried to
+ // merge a facet label in:
+ throw new Exception("cannot merge with custom Query");
+ }
+ string indexedField = config.GetDimConfig(dim).IndexFieldName;
+
+ BooleanQuery bq = (BooleanQuery)q.Query;
+ bq.Add(new TermQuery(Term(indexedField, dim, path)), Occur.SHOULD);
+ }
+
+ /// <summary>
+ /// Adds one dimension of drill downs; if you pass the same
+ /// dimension more than once it is OR'd with the previous
+ /// cofnstraints on that dimension, and all dimensions are
+ /// AND'd against each other and the base query.
+ /// </summary>
+ public void Add(string dim, params string[] path)
+ {
+
+ if (drillDownDims.ContainsKey(dim))
+ {
+ Merge(dim, path);
+ return;
+ }
+ string indexedField = config.GetDimConfig(dim).IndexFieldName;
+
+ BooleanQuery bq = new BooleanQuery(true); // disable coord
+ bq.Add(new TermQuery(Term(indexedField, dim, path)), Occur.SHOULD);
+
+ Add(dim, bq);
+ }
+
+ /// <summary>
+ /// Expert: add a custom drill-down subQuery. Use this
+ /// when you have a separate way to drill-down on the
+ /// dimension than the indexed facet ordinals.
+ /// </summary>
+ public void Add(string dim, Query subQuery)
+ {
+
+ if (drillDownDims.ContainsKey(dim))
+ {
+ throw new System.ArgumentException("dimension \"" + dim + "\" already has a drill-down");
+ }
+ // TODO: we should use FilteredQuery?
+
+ // So scores of the drill-down query don't have an
+ // effect:
+ ConstantScoreQuery drillDownQuery = new ConstantScoreQuery(subQuery);
+ drillDownQuery.Boost = 0.0f;
+
+ query.Add(drillDownQuery, Occur.MUST);
+
+ drillDownDims[dim] = drillDownDims.Count;
+ }
+
+ /// <summary>
+ /// Expert: add a custom drill-down Filter, e.g. when
+ /// drilling down after range faceting.
+ /// </summary>
+ public void Add(string dim, Filter subFilter)
+ {
+
+ if (drillDownDims.ContainsKey(dim))
+ {
+ throw new System.ArgumentException("dimension \"" + dim + "\" already has a drill-down");
+ }
+
+ // TODO: we should use FilteredQuery?
+
+ // So scores of the drill-down query don't have an
+ // effect:
+ ConstantScoreQuery drillDownQuery = new ConstantScoreQuery(subFilter);
+ drillDownQuery.Boost = 0.0f;
+
+ query.Add(drillDownQuery, Occur.MUST);
+
+ drillDownDims[dim] = drillDownDims.Count;
+ }
+
+ internal static Filter GetFilter(Query query)
+ {
+ var scoreQuery = query as ConstantScoreQuery;
+ if (scoreQuery != null)
+ {
+ ConstantScoreQuery csq = scoreQuery;
+ Filter filter = csq.Filter;
+ if (filter != null)
+ {
+ return filter;
+ }
+ else
+ {
+ return GetFilter(csq.Query);
+ }
+ }
+ else
+ {
+ return null;
+ }
+ }
+
+ public override object Clone()
+ {
+ return new DrillDownQuery(config, query, drillDownDims);
+ }
+
+ public override int GetHashCode()
+ {
+ const int prime = 31;
+ int result = base.GetHashCode();
+ return prime * result + query.GetHashCode();
+ }
+
+ public override bool Equals(object obj)
+ {
+ if (!(obj is DrillDownQuery))
+ {
+ return false;
+ }
+
+ DrillDownQuery other = (DrillDownQuery)obj;
+ return query.Equals(other.query) && base.Equals(other);
+ }
+
+ public override Query Rewrite(IndexReader r)
+ {
+ if (!query.Clauses.Any())
+ {
+ return new MatchAllDocsQuery();
+ }
+
+ IList<Filter> filters = new List<Filter>();
+ IList<Query> queries = new List<Query>();
+ IList<BooleanClause> clauses = query.Clauses;
+ Query baseQuery;
+ int startIndex;
+ if (drillDownDims.Count == query.Clauses.Count())
+ {
+ baseQuery = new MatchAllDocsQuery();
+ startIndex = 0;
+ }
+ else
+ {
+ baseQuery = clauses[0].Query;
+ startIndex = 1;
+ }
+
+ for (int i = startIndex; i < clauses.Count; i++)
+ {
+ BooleanClause clause = clauses[i];
+ Query queryClause = clause.Query;
+ Filter filter = GetFilter(queryClause);
+ if (filter != null)
+ {
+ filters.Add(filter);
+ }
+ else
+ {
+ queries.Add(queryClause);
+ }
+ }
+
+ if (filters.Count == 0)
+ {
+ return query;
+ }
+ else
+ {
+ // Wrap all filters using FilteredQuery
+
+ // TODO: this is hackish; we need to do it because
+ // BooleanQuery can't be trusted to handle the
+ // "expensive filter" case. Really, each Filter should
+ // know its cost and we should take that more
+ // carefully into account when picking the right
+ // strategy/optimization:
+ Query wrapped;
+ if (queries.Count == 0)
+ {
+ wrapped = baseQuery;
+ }
+ else
+ {
+ // disable coord
+ BooleanQuery wrappedBQ = new BooleanQuery(true);
+ if ((baseQuery is MatchAllDocsQuery) == false)
+ {
+ wrappedBQ.Add(baseQuery, Occur.MUST);
+ }
+ foreach (Query q in queries)
+ {
+ wrappedBQ.Add(q, Occur.MUST);
+ }
+ wrapped = wrappedBQ;
+ }
+
+ foreach (Filter filter in filters)
+ {
+ wrapped = new FilteredQuery(wrapped, filter, FilteredQuery.QUERY_FIRST_FILTER_STRATEGY);
+ }
+
+ return wrapped;
+ }
+ }
+
+ public override string ToString(string field)
+ {
+ return query.ToString(field);
+ }
+
+ internal BooleanQuery BooleanQuery
+ {
+ get
+ {
+ return query;
+ }
+ }
+
+ internal IDictionary<string, int?> Dims
+ {
+ get
+ {
+ return drillDownDims;
+ }
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/eea269f3/src/Lucene.Net.Facet/DrillSideways.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Facet/DrillSideways.cs b/src/Lucene.Net.Facet/DrillSideways.cs
new file mode 100644
index 0000000..5ba8f91
--- /dev/null
+++ b/src/Lucene.Net.Facet/DrillSideways.cs
@@ -0,0 +1,295 @@
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+using Lucene.Net.Search;
+using Lucene.Net.Facet;
+using Lucene.Net.Facet.SortedSet;
+using Lucene.Net.Facet.Taxonomy;
+
+namespace Lucene.Net.Facet
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ /// <summary>
+ /// Computes drill down and sideways counts for the provided
+ /// <seealso cref="DrillDownQuery"/>. Drill sideways counts include
+ /// alternative values/aggregates for the drill-down
+ /// dimensions so that a dimension does not disappear after
+ /// the user drills down into it.
+ ///
+ /// <para> Use one of the static search
+ /// methods to do the search, and then get the hits and facet
+ /// results from the returned <seealso cref="DrillSidewaysResult"/>.
+ ///
+ /// </para>
+ /// <para><b>NOTE</b>: this allocates one {@link
+ /// FacetsCollector} for each drill-down, plus one. If your
+ /// index has high number of facet labels then this will
+ /// multiply your memory usage.
+ ///
+ /// @lucene.experimental
+ /// </para>
+ /// </summary>
+ public class DrillSideways
+ {
+
+ /// <summary>
+ /// <seealso cref="IndexSearcher"/> passed to constructor. </summary>
+ protected internal readonly IndexSearcher searcher;
+
+ /// <summary>
+ /// <seealso cref="TaxonomyReader"/> passed to constructor. </summary>
+ protected internal readonly TaxonomyReader taxoReader;
+
+ /// <summary>
+ /// <seealso cref="SortedSetDocValuesReaderState"/> passed to
+ /// constructor; can be null.
+ /// </summary>
+ protected internal readonly SortedSetDocValuesReaderState state;
+
+ /// <summary>
+ /// <seealso cref="FacetsConfig"/> passed to constructor. </summary>
+ protected internal readonly FacetsConfig config;
+
+ /// <summary>
+ /// Create a new {@code DrillSideways} instance. </summary>
+ public DrillSideways(IndexSearcher searcher, FacetsConfig config, TaxonomyReader taxoReader)
+ : this(searcher, config, taxoReader, null)
+ {
+ }
+
+ /// <summary>
+ /// Create a new {@code DrillSideways} instance, assuming the categories were
+ /// indexed with <seealso cref="SortedSetDocValuesFacetField"/>.
+ /// </summary>
+ public DrillSideways(IndexSearcher searcher, FacetsConfig config, SortedSetDocValuesReaderState state)
+ : this(searcher, config, null, state)
+ {
+ }
+
+ /// <summary>
+ /// Create a new {@code DrillSideways} instance, where some
+ /// dimensions were indexed with {@link
+ /// SortedSetDocValuesFacetField} and others were indexed
+ /// with <seealso cref="FacetField"/>.
+ /// </summary>
+ public DrillSideways(IndexSearcher searcher, FacetsConfig config, TaxonomyReader taxoReader, SortedSetDocValuesReaderState state)
+ {
+ this.searcher = searcher;
+ this.config = config;
+ this.taxoReader = taxoReader;
+ this.state = state;
+ }
+
+
+ /// <summary>
+ /// Subclass can override to customize per-dim Facets
+ /// impl.
+ /// </summary>
+ protected virtual Facets BuildFacetsResult(FacetsCollector drillDowns, FacetsCollector[] drillSideways, string[] drillSidewaysDims)
+ {
+
+ Facets drillDownFacets;
+ var drillSidewaysFacets = new Dictionary<string, Facets>();
+
+ if (taxoReader != null)
+ {
+ drillDownFacets = new FastTaxonomyFacetCounts(taxoReader, config, drillDowns);
+ if (drillSideways != null)
+ {
+ for (int i = 0; i < drillSideways.Length; i++)
+ {
+ drillSidewaysFacets[drillSidewaysDims[i]] = new FastTaxonomyFacetCounts(taxoReader, config, drillSideways[i]);
+ }
+ }
+ }
+ else
+ {
+ drillDownFacets = new SortedSetDocValuesFacetCounts(state, drillDowns);
+ if (drillSideways != null)
+ {
+ for (int i = 0; i < drillSideways.Length; i++)
+ {
+ drillSidewaysFacets[drillSidewaysDims[i]] = new SortedSetDocValuesFacetCounts(state, drillSideways[i]);
+ }
+ }
+ }
+
+ if (drillSidewaysFacets.Count == 0)
+ {
+ return drillDownFacets;
+ }
+ else
+ {
+ return new MultiFacets(drillSidewaysFacets, drillDownFacets);
+ }
+ }
+
+ /// <summary>
+ /// Search, collecting hits with a <seealso cref="Collector"/>, and
+ /// computing drill down and sideways counts.
+ /// </summary>
+ public virtual DrillSidewaysResult Search(DrillDownQuery query, Collector hitCollector)
+ {
+
+ IDictionary<string, int?> drillDownDims = query.Dims;
+
+ FacetsCollector drillDownCollector = new FacetsCollector();
+
+ if (drillDownDims.Count == 0)
+ {
+ // There are no drill-down dims, so there is no
+ // drill-sideways to compute:
+ searcher.Search(query, MultiCollector.Wrap(hitCollector, drillDownCollector));
+ return new DrillSidewaysResult(BuildFacetsResult(drillDownCollector, null, null), null);
+ }
+
+ BooleanQuery ddq = query.BooleanQuery;
+ BooleanClause[] clauses = ddq.Clauses;
+
+ Query baseQuery;
+ int startClause;
+ if (clauses.Length == drillDownDims.Count)
+ {
+ // TODO: we could optimize this pure-browse case by
+ // making a custom scorer instead:
+ baseQuery = new MatchAllDocsQuery();
+ startClause = 0;
+ }
+ else
+ {
+ Debug.Assert(clauses.Length == 1 + drillDownDims.Count);
+ baseQuery = clauses[0].Query;
+ startClause = 1;
+ }
+
+ FacetsCollector[] drillSidewaysCollectors = new FacetsCollector[drillDownDims.Count];
+ for (int i = 0; i < drillSidewaysCollectors.Length; i++)
+ {
+ drillSidewaysCollectors[i] = new FacetsCollector();
+ }
+
+ Query[] drillDownQueries = new Query[clauses.Length - startClause];
+ for (int i = startClause; i < clauses.Length; i++)
+ {
+ drillDownQueries[i - startClause] = clauses[i].Query;
+ }
+ DrillSidewaysQuery dsq = new DrillSidewaysQuery(baseQuery, drillDownCollector, drillSidewaysCollectors, drillDownQueries, ScoreSubDocsAtOnce());
+ searcher.Search(dsq, hitCollector);
+
+ return new DrillSidewaysResult(BuildFacetsResult(drillDownCollector, drillSidewaysCollectors, drillDownDims.Keys.ToArray()), null);
+ }
+
+ /// <summary>
+ /// Search, sorting by <seealso cref="Sort"/>, and computing
+ /// drill down and sideways counts.
+ /// </summary>
+ public virtual DrillSidewaysResult Search(DrillDownQuery query, Filter filter, FieldDoc after, int topN, Sort sort, bool doDocScores, bool doMaxScore)
+ {
+ if (filter != null)
+ {
+ query = new DrillDownQuery(config, filter, query);
+ }
+ if (sort != null)
+ {
+ int limit = searcher.IndexReader.MaxDoc;
+ if (limit == 0)
+ {
+ limit = 1; // the collector does not alow numHits = 0
+ }
+ topN = Math.Min(topN, limit);
+ TopFieldCollector hitCollector = TopFieldCollector.Create(sort, topN, after, true, doDocScores, doMaxScore, true);
+ DrillSidewaysResult r = Search(query, hitCollector);
+ return new DrillSidewaysResult(r.Facets, hitCollector.TopDocs());
+ }
+ else
+ {
+ return Search(after, query, topN);
+ }
+ }
+
+ /// <summary>
+ /// Search, sorting by score, and computing
+ /// drill down and sideways counts.
+ /// </summary>
+ public virtual DrillSidewaysResult Search(DrillDownQuery query, int topN)
+ {
+ return Search(null, query, topN);
+ }
+
+ /// <summary>
+ /// Search, sorting by score, and computing
+ /// drill down and sideways counts.
+ /// </summary>
+ public virtual DrillSidewaysResult Search(ScoreDoc after, DrillDownQuery query, int topN)
+ {
+ int limit = searcher.IndexReader.MaxDoc;
+ if (limit == 0)
+ {
+ limit = 1; // the collector does not alow numHits = 0
+ }
+ topN = Math.Min(topN, limit);
+ TopScoreDocCollector hitCollector = TopScoreDocCollector.Create(topN, after, true);
+ DrillSidewaysResult r = Search(query, hitCollector);
+ return new DrillSidewaysResult(r.Facets, hitCollector.TopDocs());
+ }
+
+ /// <summary>
+ /// Override this and return true if your collector
+ /// (e.g., {@code ToParentBlockJoinCollector}) expects all
+ /// sub-scorers to be positioned on the document being
+ /// collected. This will cause some performance loss;
+ /// default is false. Note that if you return true from
+ /// this method (in a subclass) be sure your collector
+ /// also returns false from {@link
+ /// Collector#acceptsDocsOutOfOrder}: this will trick
+ /// {@code BooleanQuery} into also scoring all subDocs at
+ /// once.
+ /// </summary>
+ protected virtual bool ScoreSubDocsAtOnce()
+ {
+ return false;
+ }
+
+ /// <summary>
+ /// Result of a drill sideways search, including the
+ /// <seealso crTopDocsetss"/> and <seealso cref="Lucene"/>.
+ /// </summary>
+ public class DrillSidewaysResult
+ {
+ /// <summary>
+ /// Combined drill down & sideways results. </summary>
+ public readonly Facets Facets;
+
+ /// <summary>
+ /// Hits. </summary>
+ public readonly TopDocs Hits;
+
+ /// <summary>
+ /// Sole constructor. </summary>
+ public DrillSidewaysResult(Facets facets, TopDocs hits)
+ {
+ this.Facets = facets;
+ this.Hits = hits;
+ }
+ }
+ }
+
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/eea269f3/src/Lucene.Net.Facet/DrillSidewaysQuery.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Facet/DrillSidewaysQuery.cs b/src/Lucene.Net.Facet/DrillSidewaysQuery.cs
new file mode 100644
index 0000000..c8e782e
--- /dev/null
+++ b/src/Lucene.Net.Facet/DrillSidewaysQuery.cs
@@ -0,0 +1,316 @@
+using System;
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Facet
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ using AtomicReaderContext = Lucene.Net.Index.AtomicReaderContext;
+ using IndexReader = Lucene.Net.Index.IndexReader;
+ using Collector = Lucene.Net.Search.Collector;
+ using DocIdSet = Lucene.Net.Search.DocIdSet;
+ using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator;
+ using Explanation = Lucene.Net.Search.Explanation;
+ using Filter = Lucene.Net.Search.Filter;
+ using IndexSearcher = Lucene.Net.Search.IndexSearcher;
+ using Query = Lucene.Net.Search.Query;
+ using Scorer = Lucene.Net.Search.Scorer;
+ using BulkScorer = Lucene.Net.Search.BulkScorer;
+ using Weight = Lucene.Net.Search.Weight;
+ using Bits = Lucene.Net.Util.Bits;
+
+ /// <summary>
+ /// Only purpose is to punch through and return a
+ /// DrillSidewaysScorer
+ /// </summary>
+
+ internal class DrillSidewaysQuery : Query
+ {
+ internal readonly Query baseQuery;
+ internal readonly Collector drillDownCollector;
+ internal readonly Collector[] drillSidewaysCollectors;
+ internal readonly Query[] drillDownQueries;
+ internal readonly bool scoreSubDocsAtOnce;
+
+ internal DrillSidewaysQuery(Query baseQuery, Collector drillDownCollector, Collector[] drillSidewaysCollectors, Query[] drillDownQueries, bool scoreSubDocsAtOnce)
+ {
+ this.baseQuery = baseQuery;
+ this.drillDownCollector = drillDownCollector;
+ this.drillSidewaysCollectors = drillSidewaysCollectors;
+ this.drillDownQueries = drillDownQueries;
+ this.scoreSubDocsAtOnce = scoreSubDocsAtOnce;
+ }
+
+ public override string ToString(string field)
+ {
+ return "DrillSidewaysQuery";
+ }
+
+ public override Query Rewrite(IndexReader reader)
+ {
+ Query newQuery = baseQuery;
+ while (true)
+ {
+ Query rewrittenQuery = newQuery.Rewrite(reader);
+ if (rewrittenQuery == newQuery)
+ {
+ break;
+ }
+ newQuery = rewrittenQuery;
+ }
+ if (newQuery == baseQuery)
+ {
+ return this;
+ }
+ else
+ {
+ return new DrillSidewaysQuery(newQuery, drillDownCollector, drillSidewaysCollectors, drillDownQueries, scoreSubDocsAtOnce);
+ }
+ }
+
+ public override Weight CreateWeight(IndexSearcher searcher)
+ {
+ Weight baseWeight = baseQuery.CreateWeight(searcher);
+ object[] drillDowns = new object[drillDownQueries.Length];
+ for (int dim = 0; dim < drillDownQueries.Length; dim++)
+ {
+ Query query = drillDownQueries[dim];
+ Filter filter = DrillDownQuery.GetFilter(query);
+ if (filter != null)
+ {
+ drillDowns[dim] = filter;
+ }
+ else
+ {
+ // TODO: would be nice if we could say "we will do no
+ // scoring" here....
+ drillDowns[dim] = searcher.Rewrite(query).CreateWeight(searcher);
+ }
+ }
+
+ return new WeightAnonymousInnerClassHelper(this, baseWeight, drillDowns);
+ }
+
+ private class WeightAnonymousInnerClassHelper : Weight
+ {
+ private readonly DrillSidewaysQuery outerInstance;
+
+ private Weight baseWeight;
+ private object[] drillDowns;
+
+ public WeightAnonymousInnerClassHelper(DrillSidewaysQuery outerInstance, Weight baseWeight, object[] drillDowns)
+ {
+ this.outerInstance = outerInstance;
+ this.baseWeight = baseWeight;
+ this.drillDowns = drillDowns;
+ }
+
+ public override Explanation Explain(AtomicReaderContext context, int doc)
+ {
+ return baseWeight.Explain(context, doc);
+ }
+
+ public override Query Query
+ {
+ get
+ {
+ return outerInstance.baseQuery;
+ }
+ }
+
+ public override float ValueForNormalization
+ {
+ get
+ {
+ return baseWeight.ValueForNormalization;
+ }
+ }
+
+ public override void Normalize(float norm, float topLevelBoost)
+ {
+ baseWeight.Normalize(norm, topLevelBoost);
+ }
+
+ public override bool ScoresDocsOutOfOrder()
+ {
+ // TODO: would be nice if AssertingIndexSearcher
+ // confirmed this for us
+ return false;
+ }
+
+ public override Scorer Scorer(AtomicReaderContext context, Bits acceptDocs)
+ {
+ // We can only run as a top scorer:
+ throw new System.NotSupportedException();
+ }
+
+ public override BulkScorer BulkScorer(AtomicReaderContext context, bool scoreDocsInOrder, Bits acceptDocs)
+ {
+
+ // TODO: it could be better if we take acceptDocs
+ // into account instead of baseScorer?
+ Scorer baseScorer = baseWeight.Scorer(context, acceptDocs);
+
+ DrillSidewaysScorer.DocsAndCost[] dims = new DrillSidewaysScorer.DocsAndCost[drillDowns.Length];
+ int nullCount = 0;
+ for (int dim = 0; dim < dims.Length; dim++)
+ {
+ dims[dim] = new DrillSidewaysScorer.DocsAndCost();
+ dims[dim].sidewaysCollector = outerInstance.drillSidewaysCollectors[dim];
+ if (drillDowns[dim] is Filter)
+ {
+ // Pass null for acceptDocs because we already
+ // passed it to baseScorer and baseScorer is
+ // MUST'd here
+ DocIdSet dis = ((Filter)drillDowns[dim]).GetDocIdSet(context, null);
+
+ if (dis == null)
+ {
+ continue;
+ }
+
+ Bits bits = dis.GetBits();
+
+ if (bits != null)
+ {
+ // TODO: this logic is too naive: the
+ // existence of bits() in DIS today means
+ // either "I'm a cheap FixedBitSet so apply me down
+ // low as you decode the postings" or "I'm so
+ // horribly expensive so apply me after all
+ // other Query/Filter clauses pass"
+
+ // Filter supports random access; use that to
+ // prevent .advance() on costly filters:
+ dims[dim].bits = bits;
+
+ // TODO: Filter needs to express its expected
+ // cost somehow, before pulling the iterator;
+ // we should use that here to set the order to
+ // check the filters:
+
+ }
+ else
+ {
+ DocIdSetIterator disi = dis.GetIterator();
+ if (disi == null)
+ {
+ nullCount++;
+ continue;
+ }
+ dims[dim].disi = disi;
+ }
+ }
+ else
+ {
+ DocIdSetIterator disi = ((Weight)drillDowns[dim]).Scorer(context, null);
+ if (disi == null)
+ {
+ nullCount++;
+ continue;
+ }
+ dims[dim].disi = disi;
+ }
+ }
+
+ // If more than one dim has no matches, then there
+ // are no hits nor drill-sideways counts. Or, if we
+ // have only one dim and that dim has no matches,
+ // same thing.
+ //if (nullCount > 1 || (nullCount == 1 && dims.length == 1)) {
+ if (nullCount > 1)
+ {
+ return null;
+ }
+
+ // Sort drill-downs by most restrictive first:
+ Array.Sort(dims);
+
+ if (baseScorer == null)
+ {
+ return null;
+ }
+
+ return new DrillSidewaysScorer(context, baseScorer, outerInstance.drillDownCollector, dims, outerInstance.scoreSubDocsAtOnce);
+ }
+ }
+
+ // TODO: these should do "deeper" equals/hash on the 2-D drillDownTerms array
+
+ public override int GetHashCode()
+ {
+ const int prime = 31;
+ int result = base.GetHashCode();
+ result = prime * result + ((baseQuery == null) ? 0 : baseQuery.GetHashCode());
+ result = prime * result + ((drillDownCollector == null) ? 0 : drillDownCollector.GetHashCode());
+ result = prime * result + Arrays.GetHashCode(drillDownQueries);
+ result = prime * result + Arrays.GetHashCode(drillSidewaysCollectors);
+ return result;
+ }
+
+ public override bool Equals(object obj)
+ {
+ if (this == obj)
+ {
+ return true;
+ }
+ if (!base.Equals(obj))
+ {
+ return false;
+ }
+ if (this.GetType() != obj.GetType())
+ {
+ return false;
+ }
+ DrillSidewaysQuery other = (DrillSidewaysQuery)obj;
+ if (baseQuery == null)
+ {
+ if (other.baseQuery != null)
+ {
+ return false;
+ }
+ }
+ else if (!baseQuery.Equals(other.baseQuery))
+ {
+ return false;
+ }
+ if (drillDownCollector == null)
+ {
+ if (other.drillDownCollector != null)
+ {
+ return false;
+ }
+ }
+ else if (!drillDownCollector.Equals(other.drillDownCollector))
+ {
+ return false;
+ }
+ if (!Arrays.Equals(drillDownQueries, other.drillDownQueries))
+ {
+ return false;
+ }
+ if (!Arrays.Equals(drillSidewaysCollectors, other.drillSidewaysCollectors))
+ {
+ return false;
+ }
+ return true;
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/eea269f3/src/Lucene.Net.Facet/DrillSidewaysScorer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Facet/DrillSidewaysScorer.cs b/src/Lucene.Net.Facet/DrillSidewaysScorer.cs
new file mode 100644
index 0000000..bbd00c1
--- /dev/null
+++ b/src/Lucene.Net.Facet/DrillSidewaysScorer.cs
@@ -0,0 +1,826 @@
+using System;
+using System.Diagnostics;
+using System.Collections.Generic;
+using Scorer = Lucene.Net.Search.Scorer;
+
+namespace Lucene.Net.Facet
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using AtomicReaderContext = Lucene.Net.Index.AtomicReaderContext;
+ using DocsEnum = Lucene.Net.Index.DocsEnum;
+ using Collector = Lucene.Net.Search.Collector;
+ using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator;
+ using Scorer = Lucene.Net.Search.Scorer;
+ using BulkScorer = Lucene.Net.Search.BulkScorer;
+ using Weight = Lucene.Net.Search.Weight;
+ using Bits = Lucene.Net.Util.Bits;
+ using FixedBitSet = Lucene.Net.Util.FixedBitSet;
+
+ internal class DrillSidewaysScorer : BulkScorer
+ {
+
+ //private static boolean DEBUG = false;
+
+ private readonly Collector drillDownCollector;
+
+ private readonly DocsAndCost[] dims;
+
+ // DrillDown DocsEnums:
+ private readonly Scorer baseScorer;
+
+ private readonly AtomicReaderContext context;
+
+ internal readonly bool scoreSubDocsAtOnce;
+
+ private const int CHUNK = 2048;
+ private static readonly int MASK = CHUNK - 1;
+
+ private int collectDocID = -1;
+ private float collectScore;
+
+ internal DrillSidewaysScorer(AtomicReaderContext context, Scorer baseScorer, Collector drillDownCollector, DocsAndCost[] dims, bool scoreSubDocsAtOnce)
+ {
+ this.dims = dims;
+ this.context = context;
+ this.baseScorer = baseScorer;
+ this.drillDownCollector = drillDownCollector;
+ this.scoreSubDocsAtOnce = scoreSubDocsAtOnce;
+ }
+
+ public override bool Score(Collector collector, int maxDoc)
+ {
+ if (maxDoc != int.MaxValue)
+ {
+ throw new System.ArgumentException("maxDoc must be Integer.MAX_VALUE");
+ }
+ //if (DEBUG) {
+ // System.out.println("\nscore: reader=" + context.reader());
+ //}
+ //System.out.println("score r=" + context.reader());
+ FakeScorer scorer = new FakeScorer(this);
+ collector.Scorer = scorer;
+ if (drillDownCollector != null)
+ {
+ drillDownCollector.Scorer = scorer;
+ drillDownCollector.NextReader = context;
+ }
+ foreach (DocsAndCost dim in dims)
+ {
+ dim.sidewaysCollector.Scorer = scorer;
+ dim.sidewaysCollector.NextReader = context;
+ }
+
+ // TODO: if we ever allow null baseScorer ... it will
+ // mean we DO score docs out of order ... hmm, or if we
+ // change up the order of the conjuntions below
+ Debug.Assert(baseScorer != null);
+
+ // Position all scorers to their first matching doc:
+ baseScorer.NextDoc();
+ int numBits = 0;
+ foreach (DocsAndCost dim in dims)
+ {
+ if (dim.disi != null)
+ {
+ dim.disi.NextDoc();
+ }
+ else if (dim.bits != null)
+ {
+ numBits++;
+ }
+ }
+
+ int numDims = dims.Length;
+
+ Bits[] bits = new Bits[numBits];
+ Collector[] bitsSidewaysCollectors = new Collector[numBits];
+
+ DocIdSetIterator[] disis = new DocIdSetIterator[numDims - numBits];
+ Collector[] sidewaysCollectors = new Collector[numDims - numBits];
+ long drillDownCost = 0;
+ int disiUpto = 0;
+ int bitsUpto = 0;
+ for (int dim = 0; dim < numDims; dim++)
+ {
+ DocIdSetIterator disi = dims[dim].disi;
+ if (dims[dim].bits == null)
+ {
+ disis[disiUpto] = disi;
+ sidewaysCollectors[disiUpto] = dims[dim].sidewaysCollector;
+ disiUpto++;
+ if (disi != null)
+ {
+ drillDownCost += disi.Cost();
+ }
+ }
+ else
+ {
+ bits[bitsUpto] = dims[dim].bits;
+ bitsSidewaysCollectors[bitsUpto] = dims[dim].sidewaysCollector;
+ bitsUpto++;
+ }
+ }
+
+ long baseQueryCost = baseScorer.Cost();
+
+ /*
+ System.out.println("\nbaseDocID=" + baseScorer.docID() + " est=" + estBaseHitCount);
+ System.out.println(" maxDoc=" + context.reader().maxDoc());
+ System.out.println(" maxCost=" + maxCost);
+ System.out.println(" dims[0].freq=" + dims[0].freq);
+ if (numDims > 1) {
+ System.out.println(" dims[1].freq=" + dims[1].freq);
+ }
+ */
+
+ if (bitsUpto > 0 || scoreSubDocsAtOnce || baseQueryCost < drillDownCost / 10)
+ {
+ //System.out.println("queryFirst: baseScorer=" + baseScorer + " disis.length=" + disis.length + " bits.length=" + bits.length);
+ DoQueryFirstScoring(collector, disis, sidewaysCollectors, bits, bitsSidewaysCollectors);
+ }
+ else if (numDims > 1 && (dims[1].disi == null || dims[1].disi.Cost() < baseQueryCost / 10))
+ {
+ //System.out.println("drillDownAdvance");
+ DoDrillDownAdvanceScoring(collector, disis, sidewaysCollectors);
+ }
+ else
+ {
+ //System.out.println("union");
+ DoUnionScoring(collector, disis, sidewaysCollectors);
+ }
+
+ return false;
+ }
+
+ /// <summary>
+ /// Used when base query is highly constraining vs the
+ /// drilldowns, or when the docs must be scored at once
+ /// (i.e., like BooleanScorer2, not BooleanScorer). In
+ /// this case we just .next() on base and .advance() on
+ /// the dim filters.
+ /// </summary>
+ private void DoQueryFirstScoring(Collector collector, DocIdSetIterator[] disis, Collector[] sidewaysCollectors, Bits[] bits, Collector[] bitsSidewaysCollectors)
+ {
+ //if (DEBUG) {
+ // System.out.println(" doQueryFirstScoring");
+ //}
+ int docID = baseScorer.DocID();
+
+ while (docID != DocsEnum.NO_MORE_DOCS)
+ {
+ Collector failedCollector = null;
+ for (int i = 0; i < disis.Length; i++)
+ {
+ // TODO: should we sort this 2nd dimension of
+ // docsEnums from most frequent to least?
+ DocIdSetIterator disi = disis[i];
+ if (disi != null && disi.DocID() < docID)
+ {
+ disi.Advance(docID);
+ }
+ if (disi == null || disi.DocID() > docID)
+ {
+ if (failedCollector != null)
+ {
+ // More than one dim fails on this document, so
+ // it's neither a hit nor a near-miss; move to
+ // next doc:
+ docID = baseScorer.NextDoc();
+ goto nextDocContinue;
+ }
+ else
+ {
+ failedCollector = sidewaysCollectors[i];
+ }
+ }
+ }
+
+ // TODO: for the "non-costly Bits" we really should
+ // have passed them down as acceptDocs, but
+ // unfortunately we cannot distinguish today betwen
+ // "bits() is so costly that you should apply it last"
+ // from "bits() is so cheap that you should apply it
+ // everywhere down low"
+
+ // Fold in Filter Bits last, since they may be costly:
+ for (int i = 0; i < bits.Length; i++)
+ {
+ if (bits[i].Get(docID) == false)
+ {
+ if (failedCollector != null)
+ {
+ // More than one dim fails on this document, so
+ // it's neither a hit nor a near-miss; move to
+ // next doc:
+ docID = baseScorer.NextDoc();
+ goto nextDocContinue;
+ }
+ else
+ {
+ failedCollector = bitsSidewaysCollectors[i];
+ }
+ }
+ }
+
+ collectDocID = docID;
+
+ // TODO: we could score on demand instead since we are
+ // daat here:
+ collectScore = baseScorer.Score();
+
+ if (failedCollector == null)
+ {
+ // Hit passed all filters, so it's "real":
+ CollectHit(collector, sidewaysCollectors, bitsSidewaysCollectors);
+ }
+ else
+ {
+ // Hit missed exactly one filter:
+ CollectNearMiss(failedCollector);
+ }
+
+ docID = baseScorer.NextDoc();
+ nextDocContinue: ;
+ }
+ nextDocBreak: ;
+ }
+
+ /// <summary>
+ /// Used when drill downs are highly constraining vs
+ /// baseQuery.
+ /// </summary>
+ private void DoDrillDownAdvanceScoring(Collector collector, DocIdSetIterator[] disis, Collector[] sidewaysCollectors)
+ {
+ int maxDoc = context.Reader.MaxDoc;
+ int numDims = dims.Length;
+
+ //if (DEBUG) {
+ // System.out.println(" doDrillDownAdvanceScoring");
+ //}
+
+ // TODO: maybe a class like BS, instead of parallel arrays
+ int[] filledSlots = new int[CHUNK];
+ int[] docIDs = new int[CHUNK];
+ float[] scores = new float[CHUNK];
+ int[] missingDims = new int[CHUNK];
+ int[] counts = new int[CHUNK];
+
+ docIDs[0] = -1;
+ int nextChunkStart = CHUNK;
+
+ FixedBitSet seen = new FixedBitSet(CHUNK);
+
+ while (true)
+ {
+ //if (DEBUG) {
+ // System.out.println("\ncycle nextChunkStart=" + nextChunkStart + " docIds[0]=" + docIDs[0]);
+ //}
+
+ // First dim:
+ //if (DEBUG) {
+ // System.out.println(" dim0");
+ //}
+ DocIdSetIterator disi = disis[0];
+ if (disi != null)
+ {
+ int docID = disi.DocID();
+ while (docID < nextChunkStart)
+ {
+ int slot = docID & MASK;
+
+ if (docIDs[slot] != docID)
+ {
+ seen.Set(slot);
+ // Mark slot as valid:
+ //if (DEBUG) {
+ // System.out.println(" set docID=" + docID + " id=" + context.reader().document(docID).get("id"));
+ //}
+ docIDs[slot] = docID;
+ missingDims[slot] = 1;
+ counts[slot] = 1;
+ }
+
+ docID = disi.NextDoc();
+ }
+ }
+
+ // Second dim:
+ //if (DEBUG) {
+ // System.out.println(" dim1");
+ //}
+ disi = disis[1];
+ if (disi != null)
+ {
+ int docID = disi.DocID();
+ while (docID < nextChunkStart)
+ {
+ int slot = docID & MASK;
+
+ if (docIDs[slot] != docID)
+ {
+ // Mark slot as valid:
+ seen.Set(slot);
+ //if (DEBUG) {
+ // System.out.println(" set docID=" + docID + " missingDim=0 id=" + context.reader().document(docID).get("id"));
+ //}
+ docIDs[slot] = docID;
+ missingDims[slot] = 0;
+ counts[slot] = 1;
+ }
+ else
+ {
+ // TODO: single-valued dims will always be true
+ // below; we could somehow specialize
+ if (missingDims[slot] >= 1)
+ {
+ missingDims[slot] = 2;
+ counts[slot] = 2;
+ //if (DEBUG) {
+ // System.out.println(" set docID=" + docID + " missingDim=2 id=" + context.reader().document(docID).get("id"));
+ //}
+ }
+ else
+ {
+ counts[slot] = 1;
+ //if (DEBUG) {
+ // System.out.println(" set docID=" + docID + " missingDim=" + missingDims[slot] + " id=" + context.reader().document(docID).get("id"));
+ //}
+ }
+ }
+
+ docID = disi.NextDoc();
+ }
+ }
+
+ // After this we can "upgrade" to conjunction, because
+ // any doc not seen by either dim 0 or dim 1 cannot be
+ // a hit or a near miss:
+
+ //if (DEBUG) {
+ // System.out.println(" baseScorer");
+ //}
+
+ // Fold in baseScorer, using advance:
+ int filledCount = 0;
+ int slot0 = 0;
+ while (slot0 < CHUNK && (slot0 = seen.NextSetBit(slot0)) != -1)
+ {
+ int ddDocID = docIDs[slot0];
+ Debug.Assert(ddDocID != -1);
+
+ int baseDocID = baseScorer.DocID();
+ if (baseDocID < ddDocID)
+ {
+ baseDocID = baseScorer.Advance(ddDocID);
+ }
+ if (baseDocID == ddDocID)
+ {
+ //if (DEBUG) {
+ // System.out.println(" keep docID=" + ddDocID + " id=" + context.reader().document(ddDocID).get("id"));
+ //}
+ scores[slot0] = baseScorer.Score();
+ filledSlots[filledCount++] = slot0;
+ counts[slot0]++;
+ }
+ else
+ {
+ //if (DEBUG) {
+ // System.out.println(" no docID=" + ddDocID + " id=" + context.reader().document(ddDocID).get("id"));
+ //}
+ docIDs[slot0] = -1;
+
+ // TODO: we could jump slot0 forward to the
+ // baseDocID ... but we'd need to set docIDs for
+ // intervening slots to -1
+ }
+ slot0++;
+ }
+ seen.Clear(0, CHUNK);
+
+ if (filledCount == 0)
+ {
+ if (nextChunkStart >= maxDoc)
+ {
+ break;
+ }
+ nextChunkStart += CHUNK;
+ continue;
+ }
+
+ // TODO: factor this out & share w/ union scorer,
+ // except we start from dim=2 instead:
+ for (int dim = 2; dim < numDims; dim++)
+ {
+ //if (DEBUG) {
+ // System.out.println(" dim=" + dim + " [" + dims[dim].dim + "]");
+ //}
+ disi = disis[dim];
+ if (disi != null)
+ {
+ int docID = disi.DocID();
+ while (docID < nextChunkStart)
+ {
+ int slot = docID & MASK;
+ if (docIDs[slot] == docID && counts[slot] >= dim)
+ {
+ // TODO: single-valued dims will always be true
+ // below; we could somehow specialize
+ if (missingDims[slot] >= dim)
+ {
+ //if (DEBUG) {
+ // System.out.println(" set docID=" + docID + " count=" + (dim+2));
+ //}
+ missingDims[slot] = dim + 1;
+ counts[slot] = dim + 2;
+ }
+ else
+ {
+ //if (DEBUG) {
+ // System.out.println(" set docID=" + docID + " missing count=" + (dim+1));
+ //}
+ counts[slot] = dim + 1;
+ }
+ }
+
+ // TODO: sometimes use advance?
+ docID = disi.NextDoc();
+ }
+ }
+ }
+
+ // Collect:
+ //if (DEBUG) {
+ // System.out.println(" now collect: " + filledCount + " hits");
+ //}
+ for (int i = 0; i < filledCount; i++)
+ {
+ int slot = filledSlots[i];
+ collectDocID = docIDs[slot];
+ collectScore = scores[slot];
+ //if (DEBUG) {
+ // System.out.println(" docID=" + docIDs[slot] + " count=" + counts[slot]);
+ //}
+ if (counts[slot] == 1 + numDims)
+ {
+ CollectHit(collector, sidewaysCollectors);
+ }
+ else if (counts[slot] == numDims)
+ {
+ CollectNearMiss(sidewaysCollectors[missingDims[slot]]);
+ }
+ }
+
+ if (nextChunkStart >= maxDoc)
+ {
+ break;
+ }
+
+ nextChunkStart += CHUNK;
+ }
+ }
+
+ private void DoUnionScoring(Collector collector, DocIdSetIterator[] disis, Collector[] sidewaysCollectors)
+ {
+ //if (DEBUG) {
+ // System.out.println(" doUnionScoring");
+ //}
+
+ int maxDoc = context.Reader.MaxDoc;
+ int numDims = dims.Length;
+
+ // TODO: maybe a class like BS, instead of parallel arrays
+ int[] filledSlots = new int[CHUNK];
+ int[] docIDs = new int[CHUNK];
+ float[] scores = new float[CHUNK];
+ int[] missingDims = new int[CHUNK];
+ int[] counts = new int[CHUNK];
+
+ docIDs[0] = -1;
+
+ // NOTE: this is basically a specialized version of
+ // BooleanScorer, to the minShouldMatch=N-1 case, but
+ // carefully tracking which dimension failed to match
+
+ int nextChunkStart = CHUNK;
+
+ while (true)
+ {
+ //if (DEBUG) {
+ // System.out.println("\ncycle nextChunkStart=" + nextChunkStart + " docIds[0]=" + docIDs[0]);
+ //}
+ int filledCount = 0;
+ int docID = baseScorer.DocID();
+ //if (DEBUG) {
+ // System.out.println(" base docID=" + docID);
+ //}
+ while (docID < nextChunkStart)
+ {
+ int slot = docID & MASK;
+ //if (DEBUG) {
+ // System.out.println(" docIDs[slot=" + slot + "]=" + docID + " id=" + context.reader().document(docID).get("id"));
+ //}
+
+ // Mark slot as valid:
+ Debug.Assert(docIDs[slot] != docID, "slot=" + slot + " docID=" + docID);
+ docIDs[slot] = docID;
+ scores[slot] = baseScorer.Score();
+ filledSlots[filledCount++] = slot;
+ missingDims[slot] = 0;
+ counts[slot] = 1;
+
+ docID = baseScorer.NextDoc();
+ }
+
+ if (filledCount == 0)
+ {
+ if (nextChunkStart >= maxDoc)
+ {
+ break;
+ }
+ nextChunkStart += CHUNK;
+ continue;
+ }
+
+ // First drill-down dim, basically adds SHOULD onto
+ // the baseQuery:
+ //if (DEBUG) {
+ // System.out.println(" dim=0 [" + dims[0].dim + "]");
+ //}
+ DocIdSetIterator disi = disis[0];
+ if (disi != null)
+ {
+ docID = disi.DocID();
+ //if (DEBUG) {
+ // System.out.println(" start docID=" + docID);
+ //}
+ while (docID < nextChunkStart)
+ {
+ int slot = docID & MASK;
+ if (docIDs[slot] == docID)
+ {
+ //if (DEBUG) {
+ // System.out.println(" set docID=" + docID + " count=2");
+ //}
+ missingDims[slot] = 1;
+ counts[slot] = 2;
+ }
+ docID = disi.NextDoc();
+ }
+ }
+
+ for (int dim = 1; dim < numDims; dim++)
+ {
+ //if (DEBUG) {
+ // System.out.println(" dim=" + dim + " [" + dims[dim].dim + "]");
+ //}
+
+ disi = disis[dim];
+ if (disi != null)
+ {
+ docID = disi.DocID();
+ //if (DEBUG) {
+ // System.out.println(" start docID=" + docID);
+ //}
+ while (docID < nextChunkStart)
+ {
+ int slot = docID & MASK;
+ if (docIDs[slot] == docID && counts[slot] >= dim)
+ {
+ // This doc is still in the running...
+ // TODO: single-valued dims will always be true
+ // below; we could somehow specialize
+ if (missingDims[slot] >= dim)
+ {
+ //if (DEBUG) {
+ // System.out.println(" set docID=" + docID + " count=" + (dim+2));
+ //}
+ missingDims[slot] = dim + 1;
+ counts[slot] = dim + 2;
+ }
+ else
+ {
+ //if (DEBUG) {
+ // System.out.println(" set docID=" + docID + " missing count=" + (dim+1));
+ //}
+ counts[slot] = dim + 1;
+ }
+ }
+ docID = disi.NextDoc();
+ }
+ }
+ }
+
+ // Collect:
+ //System.out.println(" now collect: " + filledCount + " hits");
+ for (int i = 0; i < filledCount; i++)
+ {
+ // NOTE: This is actually in-order collection,
+ // because we only accept docs originally returned by
+ // the baseScorer (ie that Scorer is AND'd)
+ int slot = filledSlots[i];
+ collectDocID = docIDs[slot];
+ collectScore = scores[slot];
+ //if (DEBUG) {
+ // System.out.println(" docID=" + docIDs[slot] + " count=" + counts[slot]);
+ //}
+ //System.out.println(" collect doc=" + collectDocID + " main.freq=" + (counts[slot]-1) + " main.doc=" + collectDocID + " exactCount=" + numDims);
+ if (counts[slot] == 1 + numDims)
+ {
+ //System.out.println(" hit");
+ CollectHit(collector, sidewaysCollectors);
+ }
+ else if (counts[slot] == numDims)
+ {
+ //System.out.println(" sw");
+ CollectNearMiss(sidewaysCollectors[missingDims[slot]]);
+ }
+ }
+
+ if (nextChunkStart >= maxDoc)
+ {
+ break;
+ }
+
+ nextChunkStart += CHUNK;
+ }
+ }
+
+ private void CollectHit(Collector collector, Collector[] sidewaysCollectors)
+ {
+ //if (DEBUG) {
+ // System.out.println(" hit");
+ //}
+
+ collector.Collect(collectDocID);
+ if (drillDownCollector != null)
+ {
+ drillDownCollector.Collect(collectDocID);
+ }
+
+ // TODO: we could "fix" faceting of the sideways counts
+ // to do this "union" (of the drill down hits) in the
+ // end instead:
+
+ // Tally sideways counts:
+ for (int dim = 0; dim < sidewaysCollectors.Length; dim++)
+ {
+ sidewaysCollectors[dim].Collect(collectDocID);
+ }
+ }
+
+ private void CollectHit(Collector collector, Collector[] sidewaysCollectors, Collector[] sidewaysCollectors2)
+ {
+ //if (DEBUG) {
+ // System.out.println(" hit");
+ //}
+
+ collector.Collect(collectDocID);
+ if (drillDownCollector != null)
+ {
+ drillDownCollector.Collect(collectDocID);
+ }
+
+ // TODO: we could "fix" faceting of the sideways counts
+ // to do this "union" (of the drill down hits) in the
+ // end instead:
+
+ // Tally sideways counts:
+ for (int i = 0; i < sidewaysCollectors.Length; i++)
+ {
+ sidewaysCollectors[i].Collect(collectDocID);
+ }
+ for (int i = 0; i < sidewaysCollectors2.Length; i++)
+ {
+ sidewaysCollectors2[i].Collect(collectDocID);
+ }
+ }
+
+ private void CollectNearMiss(Collector sidewaysCollector)
+ {
+ //if (DEBUG) {
+ // System.out.println(" missingDim=" + dim);
+ //}
+ sidewaysCollector.Collect(collectDocID);
+ }
+
+ private sealed class FakeScorer : Scorer
+ {
+ private readonly DrillSidewaysScorer outerInstance;
+
+ internal float score_Renamed;
+ internal int doc;
+
+ public FakeScorer(DrillSidewaysScorer outerInstance)
+ : base(null)
+ {
+ this.outerInstance = outerInstance;
+ }
+
+ public override int Advance(int target)
+ {
+ throw new System.NotSupportedException("FakeScorer doesn't support advance(int)");
+ }
+
+ public override int DocID()
+ {
+ return outerInstance.collectDocID;
+ }
+
+ public override int Freq()
+ {
+ return 1 + outerInstance.dims.Length;
+ }
+
+ public override int NextDoc()
+ {
+ throw new System.NotSupportedException("FakeScorer doesn't support nextDoc()");
+ }
+
+ public override float Score()
+ {
+ return outerInstance.collectScore;
+ }
+
+ public override long Cost()
+ {
+ return outerInstance.baseScorer.Cost();
+ }
+
+ public override ICollection<ChildScorer> Children
+ {
+ get
+ {
+ return new[] { new Scorer.ChildScorer(outerInstance.baseScorer, "MUST") };
+ }
+ }
+
+ public override Weight Weight
+ {
+ get
+ {
+ throw new System.NotSupportedException();
+ }
+ }
+ }
+
+ internal class DocsAndCost : IComparable<DocsAndCost>
+ {
+ // Iterator for docs matching this dim's filter, or ...
+ internal DocIdSetIterator disi;
+ // Random access bits:
+ internal Bits bits;
+ internal Collector sidewaysCollector;
+ internal string dim;
+
+ public virtual int CompareTo(DocsAndCost other)
+ {
+ if (disi == null)
+ {
+ if (other.disi == null)
+ {
+ return 0;
+ }
+ else
+ {
+ return 1;
+ }
+ }
+ else if (other.disi == null)
+ {
+ return -1;
+ }
+ else if (disi.Cost() < other.disi.Cost())
+ {
+ return -1;
+ }
+ else if (disi.Cost() > other.disi.Cost())
+ {
+ return 1;
+ }
+ else
+ {
+ return 0;
+ }
+ }
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/eea269f3/src/Lucene.Net.Facet/FacetField.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Facet/FacetField.cs b/src/Lucene.Net.Facet/FacetField.cs
new file mode 100644
index 0000000..a11e5cf
--- /dev/null
+++ b/src/Lucene.Net.Facet/FacetField.cs
@@ -0,0 +1,91 @@
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Facet
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ using Document = Lucene.Net.Documents.Document;
+ using Field = Lucene.Net.Documents.Field;
+ using FieldType = Lucene.Net.Documents.FieldType;
+
+ /// <summary>
+ /// Add an instance of this to your <seealso cref="Document"/> for every facet label.
+ ///
+ /// <para>
+ /// <b>NOTE:</b> you must call <seealso cref="FacetsConfig#build(Document)"/> before
+ /// you add the document to IndexWriter.
+ /// </para>
+ /// </summary>
+ public class FacetField : Field
+ {
+ internal static readonly FieldType TYPE = new FieldType();
+ static FacetField()
+ {
+ TYPE.Indexed = true;
+ TYPE.Freeze();
+ }
+
+ /// <summary>
+ /// Dimension for this field. </summary>
+ public readonly string dim;
+
+ /// <summary>
+ /// Path for this field. </summary>
+ public readonly string[] path;
+
+ /// <summary>
+ /// Creates the this from {@code dim} and
+ /// {@code path}.
+ /// </summary>
+ public FacetField(string dim, params string[] path)
+ : base("dummy", TYPE)
+ {
+ VerifyLabel(dim);
+ foreach (string label in path)
+ {
+ VerifyLabel(label);
+ }
+ this.dim = dim;
+ if (path.Length == 0)
+ {
+ throw new System.ArgumentException("path must have at least one element");
+ }
+ this.path = path;
+ }
+
+ public override string ToString()
+ {
+ return "FacetField(dim=" + dim + " path=[" + Arrays.ToString(path) + "])";
+ }
+
+ /// <summary>
+ /// Verifies the label is not null or empty string.
+ ///
+ /// @lucene.internal
+ /// </summary>
+ public static void VerifyLabel(string label)
+ {
+ if (string.IsNullOrEmpty(label))
+ {
+ throw new System.ArgumentException("empty or null components not allowed; got: " + label);
+ }
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/eea269f3/src/Lucene.Net.Facet/FacetResult.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Facet/FacetResult.cs b/src/Lucene.Net.Facet/FacetResult.cs
new file mode 100644
index 0000000..be619b2
--- /dev/null
+++ b/src/Lucene.Net.Facet/FacetResult.cs
@@ -0,0 +1,104 @@
+using System.Text;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Facet
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Counts or aggregates for a single dimension. </summary>
+ public sealed class FacetResult
+ {
+
+ /// <summary>
+ /// Dimension that was requested. </summary>
+ public readonly string Dim;
+
+ /// <summary>
+ /// Path whose children were requested. </summary>
+ public readonly string[] Path;
+
+ /// <summary>
+ /// Total value for this path (sum of all child counts, or
+ /// sum of all child values), even those not included in
+ /// the topN.
+ /// </summary>
+ public readonly float Value;
+
+ /// <summary>
+ /// How many child labels were encountered. </summary>
+ public readonly int ChildCount;
+
+ /// <summary>
+ /// Child counts. </summary>
+ public readonly LabelAndValue[] LabelValues;
+
+ /// <summary>
+ /// Sole constructor. </summary>
+ public FacetResult(string dim, string[] path, float value, LabelAndValue[] labelValues, int childCount)
+ {
+ this.Dim = dim;
+ this.Path = path;
+ this.Value = value;
+ this.LabelValues = labelValues;
+ this.ChildCount = childCount;
+ }
+
+ public override string ToString()
+ {
+ StringBuilder sb = new StringBuilder();
+ sb.Append("dim=");
+ sb.Append(Dim);
+ sb.Append(" path=");
+ sb.Append("[" + Arrays.ToString(Path) + "]");
+ sb.Append(" value=");
+ sb.Append(Value);
+ sb.Append(" childCount=");
+ sb.Append(ChildCount);
+ sb.Append('\n');
+ foreach (LabelAndValue labelValue in LabelValues)
+ {
+ sb.Append(" " + labelValue + "\n");
+ }
+ return sb.ToString();
+ }
+
+ public override bool Equals(object _other)
+ {
+ if ((_other is FacetResult) == false)
+ {
+ return false;
+ }
+ FacetResult other = (FacetResult)_other;
+ return Value.Equals(other.Value) && ChildCount == other.ChildCount && Arrays.Equals(LabelValues, other.LabelValues);
+ }
+
+ public override int GetHashCode()
+ {
+ int hashCode = Value.GetHashCode() + 31 * ChildCount;
+ foreach (LabelAndValue labelValue in LabelValues)
+ {
+ hashCode = labelValue.GetHashCode() + 31 * hashCode;
+ }
+ return hashCode;
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/eea269f3/src/Lucene.Net.Facet/Facets.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Facet/Facets.cs b/src/Lucene.Net.Facet/Facets.cs
new file mode 100644
index 0000000..f046127
--- /dev/null
+++ b/src/Lucene.Net.Facet/Facets.cs
@@ -0,0 +1,64 @@
+using System.Collections.Generic;
+using Lucene.Net.Support;
+using Lucene.Net.Facet;
+
+namespace Lucene.Net.Facet
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ /// <summary>
+ /// Common base class for all facets implementations.
+ ///
+ /// @lucene.experimental
+ /// </summary>
+ public abstract class Facets
+ {
+
+ /// <summary>
+ /// Default constructor. </summary>
+ public Facets()
+ {
+ }
+
+ /// <summary>
+ /// Returns the topN child labels under the specified
+ /// path. Returns null if the specified path doesn't
+ /// exist or if this dimension was never seen.
+ /// </summary>
+ public abstract FacetResult GetTopChildren(int topN, string dim, params string[] path);
+
+ /// <summary>
+ /// Return the count or value
+ /// for a specific path. Returns -1 if
+ /// this path doesn't exist, else the count.
+ /// </summary>
+ public abstract float GetSpecificValue(string dim, params string[] path);
+
+ /// <summary>
+ /// Returns topN labels for any dimension that had hits,
+ /// sorted by the number of hits that dimension matched;
+ /// this is used for "sparse" faceting, where many
+ /// different dimensions were indexed, for example
+ /// depending on the type of document.
+ /// </summary>
+ public abstract IList<FacetResult> GetAllDims(int topN);
+ }
+
+}
\ No newline at end of file