You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by sy...@apache.org on 2014/11/14 12:59:32 UTC
[17/26] lucenenet git commit: first commit of facet porting,
failing tests will be fixed in next commits.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/982eaf60/Lucene.Net.Facet/FacetsCollector.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Facet/FacetsCollector.cs b/Lucene.Net.Facet/FacetsCollector.cs
new file mode 100644
index 0000000..4bbb76f
--- /dev/null
+++ b/Lucene.Net.Facet/FacetsCollector.cs
@@ -0,0 +1,372 @@
+using System;
+using System.Collections.Generic;
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Facet
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ /// <summary>
+ /// Collects hits for subsequent faceting. Once you've run
+ /// a search and collect hits into this, instantiate one of
+ /// the <seealso cref="Collector"/> subclasses to do the facet
+ /// counting. Use the {@code search} utility methods to
+ /// perform an "ordinary" search but also collect into a
+ /// <seealso cref="Facets"/>.
+ /// </summary>
+ public class FacetsCollector : Collector
+ {
+
+ private AtomicReaderContext context;
+ private Scorer scorer;
+ private int totalHits;
+ private float[] scores;
+ private readonly bool keepScores;
+ private readonly IList<MatchingDocs> matchingDocs = new List<MatchingDocs>();
+ private Docs docs;
+
+ /// <summary>
+ /// Used during collection to record matching docs and then return a
+ /// <seealso cref="DocIdSet"/> that contains them.
+ /// </summary>
+ protected internal abstract class Docs
+ {
+
+ /// <summary>
+ /// Solr constructor. </summary>
+ public Docs()
+ {
+ }
+
+ /// <summary>
+ /// Record the given document. </summary>
+ public abstract void AddDoc(int docId);
+
+ /// <summary>
+ /// Return the <seealso cref="DocIdSet"/> which contains all the recorded docs. </summary>
+ public abstract DocIdSet DocIdSet { get; }
+ }
+
+ /// <summary>
+ /// Holds the documents that were matched in the <seealso cref="AtomicReaderContext"/>.
+ /// If scores were required, then {@code scores} is not null.
+ /// </summary>
+ public sealed class MatchingDocs
+ {
+
+ /// <summary>
+ /// Context for this segment. </summary>
+ public readonly AtomicReaderContext context;
+
+ /// <summary>
+ /// Which documents were seen. </summary>
+ public readonly DocIdSet bits;
+
+ /// <summary>
+ /// Non-sparse scores array. </summary>
+ public readonly float[] scores;
+
+ /// <summary>
+ /// Total number of hits </summary>
+ public readonly int totalHits;
+
+ /// <summary>
+ /// Sole constructor. </summary>
+ public MatchingDocs(AtomicReaderContext context, DocIdSet bits, int totalHits, float[] scores)
+ {
+ this.context = context;
+ this.bits = bits;
+ this.scores = scores;
+ this.totalHits = totalHits;
+ }
+ }
+
+ /// <summary>
+ /// Default constructor </summary>
+ public FacetsCollector()
+ : this(false)
+ {
+ }
+
+ /// <summary>
+ /// Create this; if {@code keepScores} is true then a
+ /// float[] is allocated to hold score of all hits.
+ /// </summary>
+ public FacetsCollector(bool keepScores)
+ {
+ this.keepScores = keepScores;
+ }
+
+ /// <summary>
+ /// Creates a <seealso cref="Docs"/> to record hits. The default uses <seealso cref="FixedBitSet"/>
+ /// to record hits and you can override to e.g. record the docs in your own
+ /// <seealso cref="DocIdSet"/>.
+ /// </summary>
+ protected virtual Docs CreateDocs(int maxDoc)
+ {
+ return new DocsAnonymousInnerClassHelper(this, maxDoc);
+ }
+
+ private class DocsAnonymousInnerClassHelper : Docs
+ {
+ private readonly FacetsCollector outerInstance;
+
+ private int maxDoc;
+
+ public DocsAnonymousInnerClassHelper(FacetsCollector outerInstance, int maxDoc)
+ {
+ this.outerInstance = outerInstance;
+ this.maxDoc = maxDoc;
+ bits = new FixedBitSet(maxDoc);
+ }
+
+ private readonly FixedBitSet bits;
+
+ public override void AddDoc(int docId)
+ {
+ bits.Set(docId);
+ }
+
+ public override DocIdSet DocIdSet
+ {
+ get
+ {
+ return bits;
+ }
+ }
+ }
+
+ /// <summary>
+ /// True if scores were saved. </summary>
+ public bool KeepScores
+ {
+ get
+ {
+ return keepScores;
+ }
+ }
+
+ /// <summary>
+ /// Returns the documents matched by the query, one <seealso cref="GetMatchingDocs"/> per
+ /// visited segment.
+ /// </summary>
+ public virtual IList<MatchingDocs> GetMatchingDocs
+ {
+ get
+ {
+ if (docs != null)
+ {
+ matchingDocs.Add(new MatchingDocs(this.context, docs.DocIdSet, totalHits, scores));
+ docs = null;
+ scores = null;
+ context = null;
+ }
+
+ return matchingDocs;
+ }
+ }
+
+ public override sealed bool AcceptsDocsOutOfOrder()
+ {
+ // If we are keeping scores then we require in-order
+ // because we append each score to the float[] and
+ // expect that they correlate in order to the hits:
+ return keepScores == false;
+ }
+
+ public override sealed void Collect(int doc)
+ {
+ docs.AddDoc(doc);
+ if (keepScores)
+ {
+ if (totalHits >= scores.Length)
+ {
+ float[] newScores = new float[ArrayUtil.Oversize(totalHits + 1, 4)];
+ Array.Copy(scores, 0, newScores, 0, totalHits);
+ scores = newScores;
+ }
+ scores[totalHits] = scorer.Score();
+ }
+ totalHits++;
+ }
+
+ public override sealed Scorer Scorer
+ {
+ set
+ {
+ this.scorer = value;
+ }
+ }
+
+ public override sealed AtomicReaderContext NextReader
+ {
+ set
+ {
+ if (docs != null)
+ {
+ matchingDocs.Add(new MatchingDocs(this.context, docs.DocIdSet, totalHits, scores));
+ }
+ docs = CreateDocs(value.Reader.MaxDoc);
+ totalHits = 0;
+ if (keepScores)
+ {
+ scores = new float[64]; // some initial size
+ }
+ this.context = value;
+ }
+ }
+
+
+ /// <summary>
+ /// Utility method, to search and also collect all hits
+ /// into the provided <seealso cref="Collector"/>.
+ /// </summary>
+ public static TopDocs Search(IndexSearcher searcher, Query q, int n, Collector fc)
+ {
+ return DoSearch(searcher, null, q, null, n, null, false, false, fc);
+ }
+
+ /// <summary>
+ /// Utility method, to search and also collect all hits
+ /// into the provided <seealso cref="Collector"/>.
+ /// </summary>
+ public static TopDocs Search(IndexSearcher searcher, Query q, Filter filter, int n, Collector fc)
+ {
+ return DoSearch(searcher, null, q, filter, n, null, false, false, fc);
+ }
+
+ /// <summary>
+ /// Utility method, to search and also collect all hits
+ /// into the provided <seealso cref="Collector"/>.
+ /// </summary>
+ public static TopFieldDocs Search(IndexSearcher searcher, Query q, Filter filter, int n, Sort sort, Collector fc)
+ {
+ if (sort == null)
+ {
+ throw new System.ArgumentException("sort must not be null");
+ }
+ return (TopFieldDocs)DoSearch(searcher, null, q, filter, n, sort, false, false, fc);
+ }
+
+ /// <summary>
+ /// Utility method, to search and also collect all hits
+ /// into the provided <seealso cref="Collector"/>.
+ /// </summary>
+ public static TopFieldDocs Search(IndexSearcher searcher, Query q, Filter filter, int n, Sort sort, bool doDocScores, bool doMaxScore, Collector fc)
+ {
+ if (sort == null)
+ {
+ throw new System.ArgumentException("sort must not be null");
+ }
+ return (TopFieldDocs)DoSearch(searcher, null, q, filter, n, sort, doDocScores, doMaxScore, fc);
+ }
+
+ /// <summary>
+ /// Utility method, to search and also collect all hits
+ /// into the provided <seealso cref="Collector"/>.
+ /// </summary>
+ public virtual TopDocs SearchAfter(IndexSearcher searcher, ScoreDoc after, Query q, int n, Collector fc)
+ {
+ return DoSearch(searcher, after, q, null, n, null, false, false, fc);
+ }
+
+ /// <summary>
+ /// Utility method, to search and also collect all hits
+ /// into the provided <seealso cref="Collector"/>.
+ /// </summary>
+ public static TopDocs SearchAfter(IndexSearcher searcher, ScoreDoc after, Query q, Filter filter, int n, Collector fc)
+ {
+ return DoSearch(searcher, after, q, filter, n, null, false, false, fc);
+ }
+
+ /// <summary>
+ /// Utility method, to search and also collect all hits
+ /// into the provided <seealso cref="Collector"/>.
+ /// </summary>
+ public static TopDocs SearchAfter(IndexSearcher searcher, ScoreDoc after, Query q, Filter filter, int n, Sort sort, Collector fc)
+ {
+ if (sort == null)
+ {
+ throw new System.ArgumentException("sort must not be null");
+ }
+ return DoSearch(searcher, after, q, filter, n, sort, false, false, fc);
+ }
+
+ /// <summary>
+ /// Utility method, to search and also collect all hits
+ /// into the provided <seealso cref="Collector"/>.
+ /// </summary>
+ public static TopDocs SearchAfter(IndexSearcher searcher, ScoreDoc after, Query q, Filter filter, int n, Sort sort, bool doDocScores, bool doMaxScore, Collector fc)
+ {
+ if (sort == null)
+ {
+ throw new System.ArgumentException("sort must not be null");
+ }
+ return DoSearch(searcher, after, q, filter, n, sort, doDocScores, doMaxScore, fc);
+ }
+
+ private static TopDocs DoSearch(IndexSearcher searcher, ScoreDoc after, Query q, Filter filter, int n, Sort sort, bool doDocScores, bool doMaxScore, Collector fc)
+ {
+
+ if (filter != null)
+ {
+ q = new FilteredQuery(q, filter);
+ }
+
+ int limit = searcher.IndexReader.MaxDoc;
+ if (limit == 0)
+ {
+ limit = 1;
+ }
+ n = Math.Min(n, limit);
+
+ if (after != null && after.Doc >= limit)
+ {
+ throw new System.ArgumentException("after.doc exceeds the number of documents in the reader: after.doc=" + after.Doc + " limit=" + limit);
+ }
+
+
+ if (sort != null)
+ {
+ if (after != null && !(after is FieldDoc))
+ {
+ // TODO: if we fix type safety of TopFieldDocs we can
+ // remove this
+ throw new System.ArgumentException("after must be a FieldDoc; got " + after);
+ }
+ const bool fillFields = true;
+ var hitsCollector = TopFieldCollector.Create(sort, n, (FieldDoc)after, fillFields, doDocScores, doMaxScore, false);
+ searcher.Search(q, MultiCollector.Wrap(hitsCollector, fc));
+ return hitsCollector.TopDocs();
+ }
+ else
+ {
+ // TODO: can we pass the right boolean for
+ // in-order instead of hardwired to false...? we'd
+ // need access to the protected IS.search methods
+ // taking Weight... could use reflection...
+ var hitsCollector = TopScoreDocCollector.Create(n, after, false);
+ searcher.Search(q, MultiCollector.Wrap(hitsCollector, fc));
+ return hitsCollector.TopDocs();
+ }
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/982eaf60/Lucene.Net.Facet/FacetsConfig.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Facet/FacetsConfig.cs b/Lucene.Net.Facet/FacetsConfig.cs
new file mode 100644
index 0000000..b8f8140
--- /dev/null
+++ b/Lucene.Net.Facet/FacetsConfig.cs
@@ -0,0 +1,703 @@
+using System;
+using System.Diagnostics;
+using System.Collections.Generic;
+using System.Collections.Concurrent;
+using System.Linq;
+using System.Text;
+using System.Threading;
+using Lucene.Net.Facet.SortedSet;
+using Lucene.Net.Facet.Taxonomy;
+
+namespace Lucene.Net.Facet
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using BinaryDocValuesField = Lucene.Net.Documents.BinaryDocValuesField;
+ using Document = Lucene.Net.Documents.Document;
+ using Field = Lucene.Net.Documents.Field;
+ using SortedSetDocValuesField = Lucene.Net.Documents.SortedSetDocValuesField;
+ using StringField = Lucene.Net.Documents.StringField;
+ using SortedSetDocValuesFacetField = Lucene.Net.Facet.SortedSet.SortedSetDocValuesFacetField;
+ using AssociationFacetField = Lucene.Net.Facet.Taxonomy.AssociationFacetField;
+ using FacetLabel = Lucene.Net.Facet.Taxonomy.FacetLabel;
+ using FloatAssociationFacetField = Lucene.Net.Facet.Taxonomy.FloatAssociationFacetField;
+ using IntAssociationFacetField = Lucene.Net.Facet.Taxonomy.IntAssociationFacetField;
+ using TaxonomyWriter = Lucene.Net.Facet.Taxonomy.TaxonomyWriter;
+ using IndexableField = Lucene.Net.Index.IndexableField;
+ using IndexableFieldType = Lucene.Net.Index.IndexableFieldType;
+ using ArrayUtil = Lucene.Net.Util.ArrayUtil;
+ using BytesRef = Lucene.Net.Util.BytesRef;
+ using IntsRef = Lucene.Net.Util.IntsRef;
+
+ /// <summary>
+ /// Records per-dimension configuration. By default a
+ /// dimension is flat, single valued and does
+ /// not require count for the dimension; use
+ /// the setters in this class to change these settings for
+ /// each dim.
+ ///
+ /// <para><b>NOTE</b>: this configuration is not saved into the
+ /// index, but it's vital, and up to the application to
+ /// ensure, that at search time the provided {@code
+ /// FacetsConfig} matches what was used during indexing.
+ ///
+ /// @lucene.experimental
+ /// </para>
+ /// </summary>
+ public class FacetsConfig
+ {
+
+ /// <summary>
+ /// Which Lucene field holds the drill-downs and ords (as
+ /// doc values).
+ /// </summary>
+ public const string DEFAULT_INDEX_FIELD_NAME = "$facets";
+
+ private readonly IDictionary<string, DimConfig> fieldTypes = new ConcurrentDictionary<string, DimConfig>();
+
+ // Used only for best-effort detection of app mixing
+ // int/float/bytes in a single indexed field:
+ private readonly IDictionary<string, string> assocDimTypes = new ConcurrentDictionary<string, string>();
+
+ /// <summary>
+ /// Holds the configuration for one dimension
+ ///
+ /// @lucene.experimental
+ /// </summary>
+ public sealed class DimConfig
+ {
+ /// <summary>
+ /// True if this dimension is hierarchical. </summary>
+ public bool hierarchical;
+
+ /// <summary>
+ /// True if this dimension is multi-valued. </summary>
+ public bool multiValued;
+
+ /// <summary>
+ /// True if the count/aggregate for the entire dimension
+ /// is required, which is unusual (default is false).
+ /// </summary>
+ public bool requireDimCount;
+
+ /// <summary>
+ /// Actual field where this dimension's facet labels
+ /// should be indexed
+ /// </summary>
+ public string indexFieldName = DEFAULT_INDEX_FIELD_NAME;
+
+ /// <summary>
+ /// Default constructor. </summary>
+ public DimConfig()
+ {
+ }
+ }
+
+ /// <summary>
+ /// Default per-dimension configuration. </summary>
+ public static readonly DimConfig DEFAULT_DIM_CONFIG = new DimConfig();
+
+ /// <summary>
+ /// Default constructor. </summary>
+ public FacetsConfig()
+ {
+ }
+
+ /// <summary>
+ /// Get the default configuration for new dimensions. Useful when
+ /// the dimension is not known beforehand and may need different
+ /// global default settings, like {@code multivalue =
+ /// true}.
+ /// </summary>
+ /// <returns> The default configuration to be used for dimensions that
+ /// are not yet set in the <seealso cref="FacetsConfig"/> </returns>
+ protected virtual DimConfig DefaultDimConfig
+ {
+ get
+ {
+ return DEFAULT_DIM_CONFIG;
+ }
+ }
+
+ /// <summary>
+ /// Get the current configuration for a dimension. </summary>
+ public virtual DimConfig GetDimConfig(string dimName)
+ {
+ lock (this)
+ {
+ DimConfig ft;
+ if (!fieldTypes.TryGetValue(dimName,out ft))
+ {
+ ft = DefaultDimConfig;
+ }
+ return ft;
+ }
+ }
+
+ /// <summary>
+ /// Pass {@code true} if this dimension is hierarchical
+ /// (has depth > 1 paths).
+ /// </summary>
+ public virtual void SetHierarchical(string dimName, bool v)
+ {
+ lock (this)
+ {
+ DimConfig ft;
+ if (!fieldTypes.TryGetValue(dimName,out ft))
+ {
+ ft = new DimConfig();
+ fieldTypes[dimName] = ft;
+ }
+ ft.hierarchical = v;
+ }
+ }
+
+ /// <summary>
+ /// Pass {@code true} if this dimension may have more than
+ /// one value per document.
+ /// </summary>
+ public virtual void SetMultiValued(string dimName, bool v)
+ {
+ lock (this)
+ {
+ DimConfig ft;
+ if (!fieldTypes.TryGetValue(dimName,out ft))
+ {
+ ft = new DimConfig();
+ fieldTypes[dimName] = ft;
+ }
+ ft.multiValued = v;
+ }
+ }
+
+ /// <summary>
+ /// Pass {@code true} if at search time you require
+ /// accurate counts of the dimension, i.e. how many
+ /// hits have this dimension.
+ /// </summary>
+ public virtual void SetRequireDimCount(string dimName, bool v)
+ {
+ lock (this)
+ {
+ DimConfig ft;
+ if (!fieldTypes.TryGetValue(dimName,out ft))
+ {
+ ft = new DimConfig();
+ fieldTypes[dimName] = ft;
+ }
+ ft.requireDimCount = v;
+ }
+ }
+
+ /// <summary>
+ /// Specify which index field name should hold the
+ /// ordinals for this dimension; this is only used by the
+ /// taxonomy based facet methods.
+ /// </summary>
+ public virtual void SetIndexFieldName(string dimName, string indexFieldName)
+ {
+ lock (this)
+ {
+ DimConfig ft;
+ if (!fieldTypes.TryGetValue(dimName,out ft))
+ {
+ ft = new DimConfig();
+ fieldTypes[dimName] = ft;
+ }
+ ft.indexFieldName = indexFieldName;
+ }
+ }
+
+ /// <summary>
+ /// Returns map of field name to <seealso cref="DimConfig"/>. </summary>
+ public virtual IDictionary<string, DimConfig> DimConfigs
+ {
+ get
+ {
+ return fieldTypes;
+ }
+ }
+
+ private static void CheckSeen(HashSet<string> seenDims, string dim)
+ {
+ if (seenDims.Contains(dim))
+ {
+ throw new System.ArgumentException("dimension \"" + dim + "\" is not multiValued, but it appears more than once in this document");
+ }
+ seenDims.Add(dim);
+ }
+
+ /// <summary>
+ /// Translates any added <seealso cref="FacetField"/>s into normal fields for indexing;
+ /// only use this version if you did not add any taxonomy-based fields (
+ /// <seealso cref="FacetField"/> or <seealso cref="AssociationFacetField"/>).
+ ///
+ /// <para>
+ /// <b>NOTE:</b> you should add the returned document to IndexWriter, not the
+ /// input one!
+ /// </para>
+ /// </summary>
+ public virtual Document Build(Document doc)
+ {
+ return Build(null, doc);
+ }
+
+ /// <summary>
+ /// Translates any added <seealso cref="FacetField"/>s into normal fields for indexing.
+ ///
+ /// <para>
+ /// <b>NOTE:</b> you should add the returned document to IndexWriter, not the
+ /// input one!
+ /// </para>
+ /// </summary>
+ public virtual Document Build(TaxonomyWriter taxoWriter, Document doc)
+ {
+ // Find all FacetFields, collated by the actual field:
+ IDictionary<string, IList<FacetField>> byField = new Dictionary<string, IList<FacetField>>();
+
+ // ... and also all SortedSetDocValuesFacetFields:
+ IDictionary<string, IList<SortedSetDocValuesFacetField>> dvByField = new Dictionary<string, IList<SortedSetDocValuesFacetField>>();
+
+ // ... and also all AssociationFacetFields
+ IDictionary<string, IList<AssociationFacetField>> assocByField = new Dictionary<string, IList<AssociationFacetField>>();
+
+ var seenDims = new HashSet<string>();
+
+ foreach (IndexableField field in doc.Fields)
+ {
+ if (field.FieldType() == FacetField.TYPE)
+ {
+ FacetField facetField = (FacetField)field;
+ FacetsConfig.DimConfig dimConfig = GetDimConfig(facetField.dim);
+ if (dimConfig.multiValued == false)
+ {
+ CheckSeen(seenDims, facetField.dim);
+ }
+ string indexFieldName = dimConfig.indexFieldName;
+ IList<FacetField> fields;
+ if (!byField.TryGetValue(indexFieldName,out fields))
+ {
+ fields = new List<FacetField>();
+ byField[indexFieldName] = fields;
+ }
+ fields.Add(facetField);
+ }
+
+ if (field.FieldType() == SortedSetDocValuesFacetField.TYPE)
+ {
+ var facetField = (SortedSetDocValuesFacetField)field;
+ FacetsConfig.DimConfig dimConfig = GetDimConfig(facetField.Dim);
+ if (dimConfig.multiValued == false)
+ {
+ CheckSeen(seenDims, facetField.Dim);
+ }
+ string indexFieldName = dimConfig.indexFieldName;
+ IList<SortedSetDocValuesFacetField> fields;
+ if (!dvByField.TryGetValue(indexFieldName,out fields))
+ {
+ fields = new List<SortedSetDocValuesFacetField>();
+ dvByField[indexFieldName] = fields;
+ }
+ fields.Add(facetField);
+ }
+
+ if (field.FieldType() == AssociationFacetField.TYPE)
+ {
+ AssociationFacetField facetField = (AssociationFacetField)field;
+ FacetsConfig.DimConfig dimConfig = GetDimConfig(facetField.dim);
+ if (dimConfig.multiValued == false)
+ {
+ CheckSeen(seenDims, facetField.dim);
+ }
+ if (dimConfig.hierarchical)
+ {
+ throw new System.ArgumentException("AssociationFacetField cannot be hierarchical (dim=\"" + facetField.dim + "\")");
+ }
+ if (dimConfig.requireDimCount)
+ {
+ throw new System.ArgumentException("AssociationFacetField cannot requireDimCount (dim=\"" + facetField.dim + "\")");
+ }
+
+ string indexFieldName = dimConfig.indexFieldName;
+ IList<AssociationFacetField> fields;
+ if (!assocByField.TryGetValue(indexFieldName,out fields))
+ {
+ fields = new List<AssociationFacetField>();
+ assocByField[indexFieldName] = fields;
+ }
+ fields.Add(facetField);
+
+ // Best effort: detect mis-matched types in same
+ // indexed field:
+ string type;
+ if (facetField is IntAssociationFacetField)
+ {
+ type = "int";
+ }
+ else if (facetField is FloatAssociationFacetField)
+ {
+ type = "float";
+ }
+ else
+ {
+ type = "bytes";
+ }
+ // NOTE: not thread safe, but this is just best effort:
+ string curType;
+ if (!assocDimTypes.TryGetValue(indexFieldName,out curType))
+ {
+ assocDimTypes[indexFieldName] = type;
+ }
+ else if (!curType.Equals(type))
+ {
+ throw new System.ArgumentException("mixing incompatible types of AssocationFacetField (" + curType + " and " + type + ") in indexed field \"" + indexFieldName + "\"; use FacetsConfig to change the indexFieldName for each dimension");
+ }
+ }
+ }
+
+ Document result = new Document();
+
+ ProcessFacetFields(taxoWriter, byField, result);
+ processSSDVFacetFields(dvByField, result);
+ ProcessAssocFacetFields(taxoWriter, assocByField, result);
+
+ //System.out.println("add stored: " + addedStoredFields);
+
+ foreach (IndexableField field in doc.Fields)
+ {
+ IndexableFieldType ft = field.FieldType();
+ if (ft != FacetField.TYPE && ft != SortedSetDocValuesFacetField.TYPE && ft != AssociationFacetField.TYPE)
+ {
+ result.Add(field);
+ }
+ }
+
+ return result;
+ }
+
+ private void ProcessFacetFields(TaxonomyWriter taxoWriter, IDictionary<string, IList<FacetField>> byField, Document doc)
+ {
+
+ foreach (KeyValuePair<string, IList<FacetField>> ent in byField)
+ {
+
+ string indexFieldName = ent.Key;
+ //System.out.println(" indexFieldName=" + indexFieldName + " fields=" + ent.getValue());
+
+ IntsRef ordinals = new IntsRef(32);
+ foreach (FacetField facetField in ent.Value)
+ {
+
+ FacetsConfig.DimConfig ft = GetDimConfig(facetField.dim);
+ if (facetField.path.Length > 1 && ft.hierarchical == false)
+ {
+ throw new System.ArgumentException("dimension \"" + facetField.dim + "\" is not hierarchical yet has " + facetField.path.Length + " components");
+ }
+
+ FacetLabel cp = new FacetLabel(facetField.dim, facetField.path);
+
+ checkTaxoWriter(taxoWriter);
+ int ordinal = taxoWriter.AddCategory(cp);
+ if (ordinals.Length == ordinals.Ints.Length)
+ {
+ ordinals.Grow(ordinals.Length + 1);
+ }
+ ordinals.Ints[ordinals.Length++] = ordinal;
+ //System.out.println("ords[" + (ordinals.length-1) + "]=" + ordinal);
+ //System.out.println(" add cp=" + cp);
+
+ if (ft.multiValued && (ft.hierarchical || ft.requireDimCount))
+ {
+ //System.out.println(" add parents");
+ // Add all parents too:
+ int parent = taxoWriter.GetParent(ordinal);
+ while (parent > 0)
+ {
+ if (ordinals.Ints.Length == ordinals.Length)
+ {
+ ordinals.Grow(ordinals.Length + 1);
+ }
+ ordinals.Ints[ordinals.Length++] = parent;
+ parent = taxoWriter.GetParent(parent);
+ }
+
+ if (ft.requireDimCount == false)
+ {
+ // Remove last (dimension) ord:
+ ordinals.Length--;
+ }
+ }
+
+ // Drill down:
+ for (int i = 1; i <= cp.length; i++)
+ {
+ doc.Add(new StringField(indexFieldName, PathToString(cp.components, i), Field.Store.NO));
+ }
+ }
+
+ // Facet counts:
+ // DocValues are considered stored fields:
+ doc.Add(new BinaryDocValuesField(indexFieldName, DedupAndEncode(ordinals)));
+ }
+ }
+
+ public void processSSDVFacetFields(IDictionary<string, IList<SortedSetDocValuesFacetField>> byField, Document doc)
+ {
+ //System.out.println("process SSDV: " + byField);
+ foreach (KeyValuePair<string, IList<SortedSetDocValuesFacetField>> ent in byField)
+ {
+
+ string indexFieldName = ent.Key;
+ //System.out.println(" field=" + indexFieldName);
+
+ foreach (SortedSetDocValuesFacetField facetField in ent.Value)
+ {
+ FacetLabel cp = new FacetLabel(facetField.Dim, facetField.Label);
+ string fullPath = PathToString(cp.components, cp.length);
+ //System.out.println("add " + fullPath);
+
+ // For facet counts:
+ doc.Add(new SortedSetDocValuesField(indexFieldName, new BytesRef(fullPath)));
+
+ // For drill-down:
+ doc.Add(new StringField(indexFieldName, fullPath, Field.Store.NO));
+ doc.Add(new StringField(indexFieldName, facetField.Dim, Field.Store.NO));
+ }
+ }
+ }
+
+ private void ProcessAssocFacetFields(TaxonomyWriter taxoWriter, IDictionary<string, IList<AssociationFacetField>> byField, Document doc)
+ {
+ foreach (KeyValuePair<string, IList<AssociationFacetField>> ent in byField)
+ {
+ sbyte[] bytes = new sbyte[16];
+ int upto = 0;
+ string indexFieldName = ent.Key;
+ foreach (AssociationFacetField field in ent.Value)
+ {
+ // NOTE: we don't add parents for associations
+ checkTaxoWriter(taxoWriter);
+ FacetLabel label = new FacetLabel(field.dim, field.path);
+ int ordinal = taxoWriter.AddCategory(label);
+ if (upto + 4 > bytes.Length)
+ {
+ bytes = ArrayUtil.Grow(bytes, upto + 4);
+ }
+ // big-endian:
+ bytes[upto++] = (sbyte)(ordinal >> 24);
+ bytes[upto++] = (sbyte)(ordinal >> 16);
+ bytes[upto++] = (sbyte)(ordinal >> 8);
+ bytes[upto++] = (sbyte)ordinal;
+ if (upto + field.assoc.Length > bytes.Length)
+ {
+ bytes = ArrayUtil.Grow(bytes, upto + field.assoc.Length);
+ }
+ Array.Copy(field.assoc.Bytes, field.assoc.Offset, bytes, upto, field.assoc.Length);
+ upto += field.assoc.Length;
+
+ // Drill down:
+ for (int i = 1; i <= label.length; i++)
+ {
+ doc.Add(new StringField(indexFieldName, PathToString(label.components, i), Field.Store.NO));
+ }
+ }
+ doc.Add(new BinaryDocValuesField(indexFieldName, new BytesRef(bytes, 0, upto)));
+ }
+ }
+
+ /// <summary>
+ /// Encodes ordinals into a BytesRef; expert: subclass can
+ /// override this to change encoding.
+ /// </summary>
+ protected internal virtual BytesRef DedupAndEncode(IntsRef ordinals)
+ {
+ Array.Sort(ordinals.Ints, ordinals.Offset, ordinals.Length);
+ sbyte[] bytes = new sbyte[5 * ordinals.Length];
+ int lastOrd = -1;
+ int upto = 0;
+ for (int i = 0; i < ordinals.Length; i++)
+ {
+ int ord = ordinals.Ints[ordinals.Offset + i];
+ // ord could be == lastOrd, so we must dedup:
+ if (ord > lastOrd)
+ {
+ int delta;
+ if (lastOrd == -1)
+ {
+ delta = ord;
+ }
+ else
+ {
+ delta = ord - lastOrd;
+ }
+ if ((delta & ~0x7F) == 0)
+ {
+ bytes[upto] = (sbyte)delta;
+ upto++;
+ }
+ else if ((delta & ~0x3FFF) == 0)
+ {
+ bytes[upto] = unchecked((sbyte)(0x80 | ((delta & 0x3F80) >> 7)));
+ bytes[upto + 1] = (sbyte)(delta & 0x7F);
+ upto += 2;
+ }
+ else if ((delta & ~0x1FFFFF) == 0)
+ {
+ bytes[upto] = unchecked((sbyte)(0x80 | ((delta & 0x1FC000) >> 14)));
+ bytes[upto + 1] = unchecked((sbyte)(0x80 | ((delta & 0x3F80) >> 7)));
+ bytes[upto + 2] = (sbyte)(delta & 0x7F);
+ upto += 3;
+ }
+ else if ((delta & ~0xFFFFFFF) == 0)
+ {
+ bytes[upto] = unchecked((sbyte)(0x80 | ((delta & 0xFE00000) >> 21)));
+ bytes[upto + 1] = unchecked((sbyte)(0x80 | ((delta & 0x1FC000) >> 14)));
+ bytes[upto + 2] = unchecked((sbyte)(0x80 | ((delta & 0x3F80) >> 7)));
+ bytes[upto + 3] = (sbyte)(delta & 0x7F);
+ upto += 4;
+ }
+ else
+ {
+ bytes[upto] = unchecked((sbyte)(0x80 | ((delta & 0xF0000000) >> 28)));
+ bytes[upto + 1] = unchecked((sbyte)(0x80 | ((delta & 0xFE00000) >> 21)));
+ bytes[upto + 2] = unchecked((sbyte)(0x80 | ((delta & 0x1FC000) >> 14)));
+ bytes[upto + 3] = unchecked((sbyte)(0x80 | ((delta & 0x3F80) >> 7)));
+ bytes[upto + 4] = (sbyte)(delta & 0x7F);
+ upto += 5;
+ }
+ lastOrd = ord;
+ }
+ }
+ return new BytesRef(bytes, 0, upto);
+ }
+
+ private void checkTaxoWriter(TaxonomyWriter taxoWriter)
+ {
+ if (taxoWriter == null)
+ {
+ throw new ThreadStateException("a non-null TaxonomyWriter must be provided when indexing FacetField or AssociationFacetField");
+ }
+ }
+
+ // Joins the path components together:
+ private const char DELIM_CHAR = '\u001F';
+
+ // Escapes any occurrence of the path component inside the label:
+ private const char ESCAPE_CHAR = '\u001E';
+
+ /// <summary>
+ /// Turns a dim + path into an encoded string. </summary>
+ public static string PathToString(string dim, string[] path)
+ {
+ string[] fullPath = new string[1 + path.Length];
+ fullPath[0] = dim;
+ Array.Copy(path, 0, fullPath, 1, path.Length);
+ return PathToString(fullPath, fullPath.Length);
+ }
+
+ /// <summary>
+ /// Turns a dim + path into an encoded string. </summary>
+ public static string PathToString(string[] path)
+ {
+ return PathToString(path, path.Length);
+ }
+
+ /// <summary>
+ /// Turns the first {@code length} elements of {@code
+ /// path} into an encoded string.
+ /// </summary>
+ public static string PathToString(string[] path, int length)
+ {
+ if (length == 0)
+ {
+ return "";
+ }
+ StringBuilder sb = new StringBuilder();
+ for (int i = 0; i < length; i++)
+ {
+ string s = path[i];
+ if (s.Length == 0)
+ {
+ throw new System.ArgumentException("each path component must have length > 0 (got: \"\")");
+ }
+ int numChars = s.Length;
+ for (int j = 0; j < numChars; j++)
+ {
+ char ch = s[j];
+ if (ch == DELIM_CHAR || ch == ESCAPE_CHAR)
+ {
+ sb.Append(ESCAPE_CHAR);
+ }
+ sb.Append(ch);
+ }
+ sb.Append(DELIM_CHAR);
+ }
+
+ // Trim off last DELIM_CHAR:
+ sb.Length = sb.Length - 1;
+ return sb.ToString();
+ }
+
+ /// <summary>
+ /// Turns an encoded string (from a previous call to {@link
+ /// #pathToString}) back into the original {@code
+ /// String[]}.
+ /// </summary>
+ public static string[] StringToPath(string s)
+ {
+ IList<string> parts = new List<string>();
+ int length = s.Length;
+ if (length == 0)
+ {
+ return new string[0];
+ }
+ char[] buffer = new char[length];
+
+ int upto = 0;
+ bool lastEscape = false;
+ for (int i = 0; i < length; i++)
+ {
+ char ch = s[i];
+ if (lastEscape)
+ {
+ buffer[upto++] = ch;
+ lastEscape = false;
+ }
+ else if (ch == ESCAPE_CHAR)
+ {
+ lastEscape = true;
+ }
+ else if (ch == DELIM_CHAR)
+ {
+ parts.Add(new string(buffer, 0, upto));
+ upto = 0;
+ }
+ else
+ {
+ buffer[upto++] = ch;
+ }
+ }
+ parts.Add(new string(buffer, 0, upto));
+ Debug.Assert(!lastEscape);
+ return parts.ToArray();
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/982eaf60/Lucene.Net.Facet/LabelAndValue.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Facet/LabelAndValue.cs b/Lucene.Net.Facet/LabelAndValue.cs
new file mode 100644
index 0000000..0cbcddc
--- /dev/null
+++ b/Lucene.Net.Facet/LabelAndValue.cs
@@ -0,0 +1,66 @@
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Facet
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Single label and its value, usually contained in a
+ /// <seealso cref="FacetResult"/>.
+ /// </summary>
+ public sealed class LabelAndValue
+ {
+ /// <summary>
+ /// Facet's label. </summary>
+ public readonly string label;
+
+ /// <summary>
+ /// Value associated with this label. </summary>
+ public readonly float value;
+
+ /// <summary>
+ /// Sole constructor. </summary>
+ public LabelAndValue(string label, float value)
+ {
+ this.label = label;
+ this.value = value;
+ }
+
+ public override string ToString()
+ {
+ return label + " (" + value + ")";
+ }
+
+ public override bool Equals(object _other)
+ {
+ if ((_other is LabelAndValue) == false)
+ {
+ return false;
+ }
+ LabelAndValue other = (LabelAndValue)_other;
+ return label.Equals(other.label) && value.Equals(other.value);
+ }
+
+ public override int GetHashCode()
+ {
+ return label.GetHashCode() + 1439 * value.GetHashCode();
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/982eaf60/Lucene.Net.Facet/Lucene.Net.Facet.csproj
----------------------------------------------------------------------
diff --git a/Lucene.Net.Facet/Lucene.Net.Facet.csproj b/Lucene.Net.Facet/Lucene.Net.Facet.csproj
new file mode 100644
index 0000000..f7a60f2
--- /dev/null
+++ b/Lucene.Net.Facet/Lucene.Net.Facet.csproj
@@ -0,0 +1,123 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
+ <PropertyGroup>
+ <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+ <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+ <ProjectGuid>{48F7884A-9454-4E88-8413-9D35992CB440}</ProjectGuid>
+ <OutputType>Library</OutputType>
+ <AppDesignerFolder>Properties</AppDesignerFolder>
+ <RootNamespace>Lucene.Net.Facet</RootNamespace>
+ <AssemblyName>Lucene.Net.Facet</AssemblyName>
+ <TargetFrameworkVersion>v4.5.1</TargetFrameworkVersion>
+ <FileAlignment>512</FileAlignment>
+ </PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
+ <DebugSymbols>true</DebugSymbols>
+ <DebugType>full</DebugType>
+ <Optimize>false</Optimize>
+ <OutputPath>bin\Debug\</OutputPath>
+ <DefineConstants>DEBUG;TRACE</DefineConstants>
+ <ErrorReport>prompt</ErrorReport>
+ <WarningLevel>4</WarningLevel>
+ </PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
+ <DebugType>pdbonly</DebugType>
+ <Optimize>true</Optimize>
+ <OutputPath>bin\Release\</OutputPath>
+ <DefineConstants>TRACE</DefineConstants>
+ <ErrorReport>prompt</ErrorReport>
+ <WarningLevel>4</WarningLevel>
+ </PropertyGroup>
+ <ItemGroup>
+ <Reference Include="System" />
+ <Reference Include="System.Core" />
+ <Reference Include="System.Xml.Linq" />
+ <Reference Include="System.Data.DataSetExtensions" />
+ <Reference Include="Microsoft.CSharp" />
+ <Reference Include="System.Data" />
+ <Reference Include="System.Xml" />
+ </ItemGroup>
+ <ItemGroup>
+ <Compile Include="DrillDownQuery.cs" />
+ <Compile Include="DrillSideways.cs" />
+ <Compile Include="DrillSidewaysQuery.cs" />
+ <Compile Include="DrillSidewaysScorer.cs" />
+ <Compile Include="FacetField.cs" />
+ <Compile Include="FacetResult.cs" />
+ <Compile Include="Facets.cs" />
+ <Compile Include="FacetsCollector.cs" />
+ <Compile Include="FacetsConfig.cs" />
+ <Compile Include="LabelAndValue.cs" />
+ <Compile Include="MultiFacets.cs" />
+ <Compile Include="Properties\AssemblyInfo.cs" />
+ <Compile Include="RandomSamplingFacetsCollector.cs" />
+ <Compile Include="Range\DoubleRange.cs" />
+ <Compile Include="Range\DoubleRangeFacetCounts.cs" />
+ <Compile Include="Range\LongRange.cs" />
+ <Compile Include="Range\LongRangeCounter.cs" />
+ <Compile Include="Range\LongRangeFacetCounts.cs" />
+ <Compile Include="Range\Range.cs" />
+ <Compile Include="Range\RangeFacetCounts.cs" />
+ <Compile Include="SortedSet\DefaultSortedSetDocValuesReaderState.cs" />
+ <Compile Include="SortedSet\SortedSetDocValuesFacetCounts.cs" />
+ <Compile Include="SortedSet\SortedSetDocValuesFacetField.cs" />
+ <Compile Include="SortedSet\SortedSetDocValuesReaderState.cs" />
+ <Compile Include="Taxonomy\AssociationFacetField.cs" />
+ <Compile Include="Taxonomy\CachedOrdinalsReader.cs" />
+ <Compile Include="Taxonomy\CategoryPath.cs" />
+ <Compile Include="Taxonomy\Directory\Consts.cs" />
+ <Compile Include="Taxonomy\Directory\DirectoryTaxonomyReader.cs" />
+ <Compile Include="Taxonomy\Directory\DirectoryTaxonomyWriter.cs" />
+ <Compile Include="Taxonomy\Directory\TaxonomyIndexArrays.cs" />
+ <Compile Include="Taxonomy\DocValuesOrdinalsReader.cs" />
+ <Compile Include="Taxonomy\FacetLabel.cs" />
+ <Compile Include="Taxonomy\FastTaxonomyFacetCounts.cs" />
+ <Compile Include="Taxonomy\FloatAssociationFacetField.cs" />
+ <Compile Include="Taxonomy\FloatTaxonomyFacets.cs" />
+ <Compile Include="Taxonomy\IntAssociationFacetField.cs" />
+ <Compile Include="Taxonomy\IntTaxonomyFacets.cs" />
+ <Compile Include="Taxonomy\LRUHashMap.cs" />
+ <Compile Include="Taxonomy\OrdinalsReader.cs" />
+ <Compile Include="Taxonomy\ParallelTaxonomyArrays.cs" />
+ <Compile Include="Taxonomy\PrintTaxonomyStats.cs" />
+ <Compile Include="Taxonomy\SearcherTaxonomyManager.cs" />
+ <Compile Include="Taxonomy\TaxonomyFacetCounts.cs" />
+ <Compile Include="Taxonomy\TaxonomyFacets.cs" />
+ <Compile Include="Taxonomy\TaxonomyFacetSumFloatAssociations.cs" />
+ <Compile Include="Taxonomy\TaxonomyFacetSumIntAssociations.cs" />
+ <Compile Include="Taxonomy\TaxonomyFacetSumValueSource.cs" />
+ <Compile Include="Taxonomy\TaxonomyReader.cs" />
+ <Compile Include="Taxonomy\TaxonomyWriter.cs" />
+ <Compile Include="Taxonomy\WriterCache\CategoryPathUtils.cs" />
+ <Compile Include="Taxonomy\WriterCache\CharBlockArray.cs" />
+ <Compile Include="Taxonomy\WriterCache\Cl2oTaxonomyWriterCache.cs" />
+ <Compile Include="Taxonomy\WriterCache\CollisionMap.cs" />
+ <Compile Include="Taxonomy\WriterCache\CompactLabelToOrdinal.cs" />
+ <Compile Include="Taxonomy\WriterCache\LabelToOrdinal.cs" />
+ <Compile Include="Taxonomy\WriterCache\LruTaxonomyWriterCache.cs" />
+ <Compile Include="Taxonomy\WriterCache\NameHashIntCacheLRU.cs" />
+ <Compile Include="Taxonomy\WriterCache\NameIntCacheLRU.cs" />
+ <Compile Include="Taxonomy\WriterCache\TaxonomyWriterCache.cs" />
+ <Compile Include="TopOrdAndFloatQueue.cs" />
+ <Compile Include="TopOrdAndIntQueue.cs" />
+ </ItemGroup>
+ <ItemGroup>
+ <ProjectReference Include="..\src\Lucene.Net.Core\Lucene.Net.csproj">
+ <Project>{5d4ad9be-1ffb-41ab-9943-25737971bf57}</Project>
+ <Name>Lucene.Net</Name>
+ </ProjectReference>
+ <ProjectReference Include="..\src\Lucene.Net.Queries\Lucene.Net.Queries.csproj">
+ <Project>{69d7956c-c2cc-4708-b399-a188fec384c4}</Project>
+ <Name>Lucene.Net.Queries</Name>
+ </ProjectReference>
+ </ItemGroup>
+ <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
+ <!-- To modify your build process, add your task inside one of the targets below and uncomment it.
+ Other similar extension points exist, see Microsoft.Common.targets.
+ <Target Name="BeforeBuild">
+ </Target>
+ <Target Name="AfterBuild">
+ </Target>
+ -->
+</Project>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/982eaf60/Lucene.Net.Facet/MultiFacets.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Facet/MultiFacets.cs b/Lucene.Net.Facet/MultiFacets.cs
new file mode 100644
index 0000000..b980a5d
--- /dev/null
+++ b/Lucene.Net.Facet/MultiFacets.cs
@@ -0,0 +1,103 @@
+using System.Collections.Generic;
+using Lucene.Net.Facet;
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Facet
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ /// <summary>
+ /// Maps specified dims to provided Facets impls; else, uses
+ /// the default Facets impl.
+ /// </summary>
+ public class MultiFacets : Facets
+ {
+ private readonly IDictionary<string, Facets> dimToFacets;
+ private readonly Facets defaultFacets;
+
+ /// <summary>
+ /// Create this, with the specified default <seealso cref="Facets"/>
+ /// for fields not included in {@code dimToFacets}.
+ /// </summary>
+ public MultiFacets(IDictionary<string, Facets> dimToFacets, Facets defaultFacets = null)
+ {
+ this.dimToFacets = dimToFacets;
+ this.defaultFacets = defaultFacets;
+ }
+
+ public override FacetResult GetTopChildren(int topN, string dim, params string[] path)
+ {
+ Facets facets = dimToFacets[dim];
+ if (facets == null)
+ {
+ if (defaultFacets == null)
+ {
+ throw new System.ArgumentException("invalid dim \"" + dim + "\"");
+ }
+ facets = defaultFacets;
+ }
+ return facets.GetTopChildren(topN, dim, path);
+ }
+
+
+ public override float GetSpecificValue(string dim, params string[] path)
+ {
+ Facets facets;
+ if (!dimToFacets.TryGetValue(dim,out facets))
+ {
+ if (defaultFacets == null)
+ {
+ throw new System.ArgumentException("invalid dim \"" + dim + "\"");
+ }
+ facets = defaultFacets;
+ }
+ return facets.GetSpecificValue(dim, path);
+ }
+
+ public override IList<FacetResult> GetAllDims(int topN)
+ {
+
+ IList<FacetResult> results = new List<FacetResult>();
+
+ // First add the specific dim's facets:
+ foreach (KeyValuePair<string, Facets> ent in dimToFacets)
+ {
+ results.Add(ent.Value.GetTopChildren(topN, ent.Key));
+ }
+
+ if (defaultFacets != null)
+ {
+
+ // Then add all default facets as long as we didn't
+ // already add that dim:
+ foreach (FacetResult result in defaultFacets.GetAllDims(topN))
+ {
+ if (dimToFacets.ContainsKey(result.dim) == false)
+ {
+ results.Add(result);
+ }
+ }
+ }
+
+ return results;
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/982eaf60/Lucene.Net.Facet/Properties/AssemblyInfo.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Facet/Properties/AssemblyInfo.cs b/Lucene.Net.Facet/Properties/AssemblyInfo.cs
new file mode 100644
index 0000000..741693d
--- /dev/null
+++ b/Lucene.Net.Facet/Properties/AssemblyInfo.cs
@@ -0,0 +1,36 @@
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+// General Information about an assembly is controlled through the following
+// set of attributes. Change these attribute values to modify the information
+// associated with an assembly.
+[assembly: AssemblyTitle("Lucene.Net.Facet")]
+[assembly: AssemblyDescription("")]
+[assembly: AssemblyConfiguration("")]
+[assembly: AssemblyCompany("")]
+[assembly: AssemblyProduct("Lucene.Net.Facet")]
+[assembly: AssemblyCopyright("Copyright © 2014")]
+[assembly: AssemblyTrademark("")]
+[assembly: AssemblyCulture("")]
+
+// Setting ComVisible to false makes the types in this assembly not visible
+// to COM components. If you need to access a type in this assembly from
+// COM, set the ComVisible attribute to true on that type.
+[assembly: ComVisible(false)]
+
+// The following GUID is for the ID of the typelib if this project is exposed to COM
+[assembly: Guid("8dd11ab2-c5b3-4691-99da-2941f27e0e10")]
+
+// Version information for an assembly consists of the following four values:
+//
+// Major Version
+// Minor Version
+// Build Number
+// Revision
+//
+// You can specify all the values or you can default the Build and Revision Numbers
+// by using the '*' as shown below:
+// [assembly: AssemblyVersion("1.0.*")]
+[assembly: AssemblyVersion("1.0.0.0")]
+[assembly: AssemblyFileVersion("1.0.0.0")]
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/982eaf60/Lucene.Net.Facet/RandomSamplingFacetsCollector.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Facet/RandomSamplingFacetsCollector.cs b/Lucene.Net.Facet/RandomSamplingFacetsCollector.cs
new file mode 100644
index 0000000..02a4d9e
--- /dev/null
+++ b/Lucene.Net.Facet/RandomSamplingFacetsCollector.cs
@@ -0,0 +1,323 @@
+using System;
+using System.Collections.Generic;
+using System.IO;
+using Lucene.Net.Facet;
+using Lucene.Net.Search;
+
+namespace Lucene.Net.Facet
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using DimConfig = FacetsConfig.DimConfig;
+ using IndexReader = Lucene.Net.Index.IndexReader;
+ using Term = Lucene.Net.Index.Term;
+ using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator;
+ using IndexSearcher = Lucene.Net.Search.IndexSearcher;
+ using FixedBitSet = Lucene.Net.Util.FixedBitSet;
+
+ /// <summary>
+ /// Collects hits for subsequent faceting, using sampling if needed. Once you've
+ /// run a search and collect hits into this, instantiate one of the
+ /// <seealso cref="Facets"/> subclasses to do the facet counting. Note that this collector
+ /// does not collect the scores of matching docs (i.e.
+ /// <seealso cref="FacetsCollector.MatchingDocs#scores"/>) is {@code null}.
+ /// <para>
+ /// If you require the original set of hits, you can call
+ /// <seealso cref="#getOriginalMatchingDocs()"/>. Also, since the counts of the top-facets
+ /// is based on the sampled set, you can amortize the counts by calling
+ /// <seealso cref="#amortizeFacetCounts"/>.
+ /// </para>
+ /// </summary>
+ public class RandomSamplingFacetsCollector : FacetsCollector
+ {
+
+ /// <summary>
+ /// Faster alternative for java.util.Random, inspired by
+ /// http://dmurphy747.wordpress.com/2011/03/23/xorshift-vs-random-
+ /// performance-in-java/
+ /// <para>
+ /// Has a period of 2^64-1
+ /// </para>
+ /// </summary>
+ private class XORShift64Random
+ {
+
+ internal long x;
+
+ /// <summary>
+ /// Creates a xorshift random generator using the provided seed </summary>
+ public XORShift64Random(long seed)
+ {
+ x = seed == 0 ? 0xdeadbeef : seed;
+ }
+
+ /// <summary>
+ /// Get the next random long value </summary>
+ public virtual long RandomLong()
+ {
+ x ^= (x << 21);
+ x ^= ((long)((ulong)x >> 35));
+ x ^= (x << 4);
+ return x;
+ }
+
+ /// <summary>
+ /// Get the next random int, between 0 (inclusive) and n (exclusive) </summary>
+ public virtual int NextInt(int n)
+ {
+ int res = (int)(RandomLong() % n);
+ return (res < 0) ? -res : res;
+ }
+
+ }
+
+ private const int NOT_CALCULATED = -1;
+
+ private readonly int sampleSize;
+ private readonly XORShift64Random random;
+
+ private double samplingRate;
+ private IList<MatchingDocs> sampledDocs;
+ private int totalHits = NOT_CALCULATED;
+ private int leftoverBin = NOT_CALCULATED;
+ private int leftoverIndex = NOT_CALCULATED;
+
+ /// <summary>
+ /// Constructor with the given sample size and default seed.
+ /// </summary>
+ /// <seealso cref= #RandomSamplingFacetsCollector(int, long) </seealso>
+ public RandomSamplingFacetsCollector(int sampleSize)
+ : this(sampleSize, 0)
+ {
+ }
+
+ /// <summary>
+ /// Constructor with the given sample size and seed.
+ /// </summary>
+ /// <param name="sampleSize">
+ /// The preferred sample size. If the number of hits is greater than
+ /// the size, sampling will be done using a sample ratio of sampling
+ /// size / totalN. For example: 1000 hits, sample size = 10 results in
+ /// samplingRatio of 0.01. If the number of hits is lower, no sampling
+ /// is done at all </param>
+ /// <param name="seed">
+ /// The random seed. If {@code 0} then a seed will be chosen for you. </param>
+ public RandomSamplingFacetsCollector(int sampleSize, long seed)
+ : base(false)
+ {
+ this.sampleSize = sampleSize;
+ this.random = new XORShift64Random(seed);
+ this.sampledDocs = null;
+ }
+
+ /// <summary>
+ /// Returns the sampled list of the matching documents. Note that a
+ /// <seealso cref="FacetsCollector.MatchingDocs"/> instance is returned per segment, even
+ /// if no hits from that segment are included in the sampled set.
+ /// <para>
+ /// Note: One or more of the MatchingDocs might be empty (not containing any
+ /// hits) as result of sampling.
+ /// </para>
+ /// <para>
+ /// Note: {@code MatchingDocs.totalHits} is copied from the original
+ /// MatchingDocs, scores is set to {@code null}
+ /// </para>
+ /// </summary>
+ public override IList<MatchingDocs> GetMatchingDocs
+ {
+ get
+ {
+ IList<MatchingDocs> matchingDocs = base.GetMatchingDocs;
+
+ if (totalHits == NOT_CALCULATED)
+ {
+ totalHits = 0;
+ foreach (MatchingDocs md in matchingDocs)
+ {
+ totalHits += md.totalHits;
+ }
+ }
+
+ if (totalHits <= sampleSize)
+ {
+ return matchingDocs;
+ }
+
+ if (sampledDocs == null)
+ {
+ samplingRate = (1.0 * sampleSize) / totalHits;
+ sampledDocs = CreateSampledDocs(matchingDocs);
+ }
+ return sampledDocs;
+ }
+ }
+
+ /// <summary>
+ /// Returns the original matching documents. </summary>
+ public virtual IList<MatchingDocs> OriginalMatchingDocs
+ {
+ get
+ {
+ return base.GetMatchingDocs;
+ }
+ }
+
+ /// <summary>
+ /// Create a sampled copy of the matching documents list. </summary>
+ private IList<MatchingDocs> CreateSampledDocs(IList<MatchingDocs> matchingDocsList)
+ {
+ IList<MatchingDocs> sampledDocsList = new List<MatchingDocs>(matchingDocsList.Count);
+ foreach (MatchingDocs docs in matchingDocsList)
+ {
+ sampledDocsList.Add(CreateSample(docs));
+ }
+ return sampledDocsList;
+ }
+
+ /// <summary>
+ /// Create a sampled of the given hits. </summary>
+ private MatchingDocs CreateSample(MatchingDocs docs)
+ {
+ int maxdoc = docs.context.Reader.MaxDoc;
+
+ // TODO: we could try the WAH8DocIdSet here as well, as the results will be sparse
+ FixedBitSet sampleDocs = new FixedBitSet(maxdoc);
+
+ int binSize = (int)(1.0 / samplingRate);
+
+ try
+ {
+ int counter = 0;
+ int limit, randomIndex;
+ if (leftoverBin != NOT_CALCULATED)
+ {
+ limit = leftoverBin;
+ // either NOT_CALCULATED, which means we already sampled from that bin,
+ // or the next document to sample
+ randomIndex = leftoverIndex;
+ }
+ else
+ {
+ limit = binSize;
+ randomIndex = random.NextInt(binSize);
+ }
+ DocIdSetIterator it = docs.bits.GetIterator();
+ for (int doc = it.NextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.NextDoc())
+ {
+ if (counter == randomIndex)
+ {
+ sampleDocs.Set(doc);
+ }
+ counter++;
+ if (counter >= limit)
+ {
+ counter = 0;
+ limit = binSize;
+ randomIndex = random.NextInt(binSize);
+ }
+ }
+
+ if (counter == 0)
+ {
+ // we either exhausted the bin and the iterator at the same time, or
+ // this segment had no results. in the latter case we might want to
+ // carry leftover to the next segment as is, but that complicates the
+ // code and doesn't seem so important.
+ leftoverBin = leftoverIndex = NOT_CALCULATED;
+ }
+ else
+ {
+ leftoverBin = limit - counter;
+ if (randomIndex > counter)
+ {
+ // the document to sample is in the next bin
+ leftoverIndex = randomIndex - counter;
+ }
+ else if (randomIndex < counter)
+ {
+ // we sampled a document from the bin, so just skip over remaining
+ // documents in the bin in the next segment.
+ leftoverIndex = NOT_CALCULATED;
+ }
+ }
+
+ return new MatchingDocs(docs.context, sampleDocs, docs.totalHits, null);
+ }
+ catch (IOException)
+ {
+ throw new Exception();
+ }
+ }
+
+ /// <summary>
+ /// Note: if you use a counting <seealso cref="Facets"/> implementation, you can amortize the
+ /// sampled counts by calling this method. Uses the <seealso cref="FacetsConfig"/> and
+ /// the <seealso cref="IndexSearcher"/> to determine the upper bound for each facet value.
+ /// </summary>
+ public virtual FacetResult AmortizeFacetCounts(FacetResult res, FacetsConfig config, IndexSearcher searcher)
+ {
+ if (res == null || totalHits <= sampleSize)
+ {
+ return res;
+ }
+
+ LabelAndValue[] fixedLabelValues = new LabelAndValue[res.labelValues.Length];
+ IndexReader reader = searcher.IndexReader;
+ DimConfig dimConfig = config.GetDimConfig(res.dim);
+
+ // +2 to prepend dimension, append child label
+ string[] childPath = new string[res.path.Length + 2];
+ childPath[0] = res.dim;
+
+ Array.Copy(res.path, 0, childPath, 1, res.path.Length); // reuse
+
+ for (int i = 0; i < res.labelValues.Length; i++)
+ {
+ childPath[res.path.Length + 1] = res.labelValues[i].label;
+ string fullPath = FacetsConfig.PathToString(childPath, childPath.Length);
+ int max = reader.DocFreq(new Term(dimConfig.indexFieldName, fullPath));
+ int correctedCount = (int)((double)res.labelValues[i].value / samplingRate);
+ correctedCount = Math.Min(max, correctedCount);
+ fixedLabelValues[i] = new LabelAndValue(res.labelValues[i].label, correctedCount);
+ }
+
+ // cap the total count on the total number of non-deleted documents in the reader
+ int correctedTotalCount = (int)res.value;
+ if (correctedTotalCount > 0)
+ {
+ correctedTotalCount = Math.Min(reader.NumDocs, (int)((double)res.value / samplingRate));
+ }
+
+ return new FacetResult(res.dim, res.path, correctedTotalCount, fixedLabelValues, res.childCount);
+ }
+
+ /// <summary>
+ /// Returns the sampling rate that was used. </summary>
+ public virtual double SamplingRate
+ {
+ get
+ {
+ return samplingRate;
+ }
+ }
+
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/982eaf60/Lucene.Net.Facet/Range/DoubleRange.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Facet/Range/DoubleRange.cs b/Lucene.Net.Facet/Range/DoubleRange.cs
new file mode 100644
index 0000000..6e7e5ae
--- /dev/null
+++ b/Lucene.Net.Facet/Range/DoubleRange.cs
@@ -0,0 +1,242 @@
+using System;
+using System.Collections.Generic;
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Facet.Range
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using AtomicReaderContext = Lucene.Net.Index.AtomicReaderContext;
+ using FunctionValues = Lucene.Net.Queries.Function.FunctionValues;
+ using ValueSource = Lucene.Net.Queries.Function.ValueSource;
+ using DocIdSet = Lucene.Net.Search.DocIdSet;
+ using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator;
+ using Filter = Lucene.Net.Search.Filter;
+ using Bits = Lucene.Net.Util.Bits;
+ using NumericUtils = Lucene.Net.Util.NumericUtils;
+
+ /// <summary>
+ /// Represents a range over double values.
+ ///
+ /// @lucene.experimental
+ /// </summary>
+ public sealed class DoubleRange : Range
+ {
+ internal readonly double minIncl;
+ internal readonly double maxIncl;
+
+ /// <summary>
+ /// Minimum. </summary>
+ public readonly double min;
+
+ /// <summary>
+ /// Maximum. </summary>
+ public readonly double max;
+
+ /// <summary>
+ /// True if the minimum value is inclusive. </summary>
+ public readonly bool minInclusive;
+
+ /// <summary>
+ /// True if the maximum value is inclusive. </summary>
+ public readonly bool maxInclusive;
+
+ /// <summary>
+ /// Create a DoubleRange. </summary>
+ public DoubleRange(string label, double minIn, bool minInclusive, double maxIn, bool maxInclusive)
+ : base(label)
+ {
+ this.min = minIn;
+ this.max = maxIn;
+ this.minInclusive = minInclusive;
+ this.maxInclusive = maxInclusive;
+
+ // TODO: if DoubleDocValuesField used
+ // NumericUtils.doubleToSortableLong format (instead of
+ // Double.doubleToRawLongBits) we could do comparisons
+ // in long space
+
+ if (double.IsNaN(min))
+ {
+ throw new System.ArgumentException("min cannot be NaN");
+ }
+ if (!minInclusive)
+ {
+ minIn += double.Epsilon;
+ }
+
+ if (double.IsNaN(max))
+ {
+ throw new System.ArgumentException("max cannot be NaN");
+ }
+ if (!maxInclusive)
+ {
+ // Why no Math.nextDown?
+ maxIn = maxIn -= double.Epsilon;
+ }
+
+ if (minIn > maxIn)
+ {
+ FailNoMatch();
+ }
+
+ this.minIncl = minIn;
+ this.maxIncl = maxIn;
+ }
+
+ /// <summary>
+ /// True if this range accepts the provided value. </summary>
+ public bool accept(double value)
+ {
+ return value >= minIncl && value <= maxIncl;
+ }
+
+ internal LongRange ToLongRange()
+ {
+ return new LongRange(Label, NumericUtils.DoubleToSortableLong(minIncl), true, NumericUtils.DoubleToSortableLong(maxIncl), true);
+ }
+
+ public override string ToString()
+ {
+ return "DoubleRange(" + minIncl + " to " + maxIncl + ")";
+ }
+
+ public override Filter GetFilter(Filter fastMatchFilter, ValueSource valueSource)
+ {
+ return new FilterAnonymousInnerClassHelper(this, fastMatchFilter, valueSource);
+ }
+
+ private class FilterAnonymousInnerClassHelper : Filter
+ {
+ private readonly DoubleRange outerInstance;
+
+ private Filter fastMatchFilter;
+ private ValueSource valueSource;
+
+ public FilterAnonymousInnerClassHelper(DoubleRange outerInstance, Filter fastMatchFilter, ValueSource valueSource)
+ {
+ this.outerInstance = outerInstance;
+ this.fastMatchFilter = fastMatchFilter;
+ this.valueSource = valueSource;
+ }
+
+
+ public override string ToString()
+ {
+ return "Filter(" + outerInstance.ToString() + ")";
+ }
+
+ public override DocIdSet GetDocIdSet(AtomicReaderContext context, Bits acceptDocs)
+ {
+
+ // TODO: this is just like ValueSourceScorer,
+ // ValueSourceFilter (spatial),
+ // ValueSourceRangeFilter (solr); also,
+ // https://issues.apache.org/jira/browse/LUCENE-4251
+
+ var values = valueSource.GetValues(new Dictionary<string,Lucene.Net.Search.Scorer>(), context);
+
+ int maxDoc = context.Reader.MaxDoc;
+
+ Bits fastMatchBits;
+ if (fastMatchFilter != null)
+ {
+ DocIdSet dis = fastMatchFilter.GetDocIdSet(context, null);
+ if (dis == null)
+ {
+ // No documents match
+ return null;
+ }
+ fastMatchBits = dis.GetBits();
+ if (fastMatchBits == null)
+ {
+ throw new System.ArgumentException("fastMatchFilter does not implement DocIdSet.bits");
+ }
+ }
+ else
+ {
+ fastMatchBits = null;
+ }
+
+ return new DocIdSetAnonymousInnerClassHelper(this, acceptDocs, values, maxDoc, fastMatchBits);
+ }
+
+ private class DocIdSetAnonymousInnerClassHelper : DocIdSet
+ {
+ private readonly FilterAnonymousInnerClassHelper outerInstance;
+
+ private Bits acceptDocs;
+ private FunctionValues values;
+ private int maxDoc;
+ private Bits fastMatchBits;
+
+ public DocIdSetAnonymousInnerClassHelper(FilterAnonymousInnerClassHelper outerInstance, Bits acceptDocs, FunctionValues values, int maxDoc, Bits fastMatchBits)
+ {
+ this.outerInstance = outerInstance;
+ this.acceptDocs = acceptDocs;
+ this.values = values;
+ this.maxDoc = maxDoc;
+ this.fastMatchBits = fastMatchBits;
+ }
+
+
+ public override Bits GetBits()
+ {
+ return new BitsAnonymousInnerClassHelper(this);
+ }
+
+ private class BitsAnonymousInnerClassHelper : Bits
+ {
+ private readonly DocIdSetAnonymousInnerClassHelper outerInstance;
+
+ public BitsAnonymousInnerClassHelper(DocIdSetAnonymousInnerClassHelper outerInstance)
+ {
+ this.outerInstance = outerInstance;
+ }
+
+ public virtual bool Get(int docID)
+ {
+ if (outerInstance.acceptDocs != null && outerInstance.acceptDocs.Get(docID) == false)
+ {
+ return false;
+ }
+ if (outerInstance.fastMatchBits != null && outerInstance.fastMatchBits.Get(docID) == false)
+ {
+ return false;
+ }
+ return outerInstance.outerInstance.outerInstance.accept(outerInstance.values.DoubleVal(docID));
+ }
+
+ public virtual int Length()
+ {
+ return outerInstance.maxDoc;
+ }
+ }
+
+ public override DocIdSetIterator GetIterator()
+ {
+ throw new System.NotSupportedException("this filter can only be accessed via bits()");
+ }
+ }
+ }
+ }
+
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/982eaf60/Lucene.Net.Facet/Range/DoubleRangeFacetCounts.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Facet/Range/DoubleRangeFacetCounts.cs b/Lucene.Net.Facet/Range/DoubleRangeFacetCounts.cs
new file mode 100644
index 0000000..f6908dc
--- /dev/null
+++ b/Lucene.Net.Facet/Range/DoubleRangeFacetCounts.cs
@@ -0,0 +1,160 @@
+using System.Collections.Generic;
+using Lucene.Net.Facet;
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Facet.Range
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using DoubleDocValuesField = Lucene.Net.Documents.DoubleDocValuesField; // javadocs
+ using FloatDocValuesField = Lucene.Net.Documents.FloatDocValuesField; // javadocs
+ using MatchingDocs = FacetsCollector.MatchingDocs;
+ using FunctionValues = Lucene.Net.Queries.Function.FunctionValues;
+ using ValueSource = Lucene.Net.Queries.Function.ValueSource;
+ using DoubleFieldSource = Lucene.Net.Queries.Function.ValueSources.DoubleFieldSource;
+ using FloatFieldSource = Lucene.Net.Queries.Function.ValueSources.FloatFieldSource; // javadocs
+ using DocIdSet = Lucene.Net.Search.DocIdSet;
+ using Filter = Lucene.Net.Search.Filter;
+ using Bits = Lucene.Net.Util.Bits;
+ using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator;
+ using NumericUtils = Lucene.Net.Util.NumericUtils;
+
+ /// <summary>
+ /// <seealso cref="Facets"/> implementation that computes counts for
+ /// dynamic double ranges from a provided {@link
+ /// ValueSource}, using <seealso cref="FunctionValues#doubleVal"/>. Use
+ /// this for dimensions that change in real-time (e.g. a
+ /// relative time based dimension like "Past day", "Past 2
+ /// days", etc.) or that change for each request (e.g.
+ /// distance from the user's location, "< 1 km", "< 2 km",
+ /// etc.).
+ ///
+ /// <para> If you had indexed your field using {@link
+ /// FloatDocValuesField} then pass <seealso cref="FloatFieldSource"/>
+ /// as the <seealso cref="ValueSource"/>; if you used {@link
+ /// DoubleDocValuesField} then pass {@link
+ /// DoubleFieldSource} (this is the default used when you
+ /// pass just a the field name).
+ ///
+ /// @lucene.experimental
+ /// </para>
+ /// </summary>
+ public class DoubleRangeFacetCounts : RangeFacetCounts
+ {
+
+ /// <summary>
+ /// Create {@code RangeFacetCounts}, using {@link
+ /// DoubleFieldSource} from the specified field.
+ /// </summary>
+ public DoubleRangeFacetCounts(string field, FacetsCollector hits, params DoubleRange[] ranges)
+ : this(field, new DoubleFieldSource(field), hits, ranges)
+ {
+ }
+
+ /// <summary>
+ /// Create {@code RangeFacetCounts}, using the provided
+ /// <seealso cref="ValueSource"/>.
+ /// </summary>
+ public DoubleRangeFacetCounts(string field, ValueSource valueSource, FacetsCollector hits, params DoubleRange[] ranges)
+ : this(field, valueSource, hits, null, ranges)
+ {
+ }
+
+ /// <summary>
+ /// Create {@code RangeFacetCounts}, using the provided
+ /// <seealso cref="ValueSource"/>, and using the provided Filter as
+ /// a fastmatch: only documents passing the filter are
+ /// checked for the matching ranges. The filter must be
+ /// random access (implement <seealso cref="DocIdSet#bits"/>).
+ /// </summary>
+ public DoubleRangeFacetCounts(string field, ValueSource valueSource, FacetsCollector hits, Filter fastMatchFilter, params DoubleRange[] ranges)
+ : base(field, ranges, fastMatchFilter)
+ {
+ Count(valueSource, hits.GetMatchingDocs);
+ }
+
+ private void Count(ValueSource valueSource, IEnumerable<MatchingDocs> matchingDocs)
+ {
+
+ DoubleRange[] ranges = (DoubleRange[])this.ranges;
+
+ LongRange[] longRanges = new LongRange[ranges.Length];
+ for (int i = 0; i < ranges.Length; i++)
+ {
+ DoubleRange range = ranges[i];
+ longRanges[i] = new LongRange(range.Label, NumericUtils.DoubleToSortableLong(range.minIncl), true, NumericUtils.DoubleToSortableLong(range.maxIncl), true);
+ }
+
+ LongRangeCounter counter = new LongRangeCounter(longRanges);
+
+ int missingCount = 0;
+ foreach (MatchingDocs hits in matchingDocs)
+ {
+ FunctionValues fv = valueSource.GetValues(new Dictionary<string,object>(), hits.context);
+
+ totCount += hits.totalHits;
+ Bits bits;
+ if (fastMatchFilter != null)
+ {
+ DocIdSet dis = fastMatchFilter.GetDocIdSet(hits.context, null);
+ if (dis == null)
+ {
+ // No documents match
+ continue;
+ }
+ bits = dis.GetBits();
+ if (bits == null)
+ {
+ throw new System.ArgumentException("fastMatchFilter does not implement DocIdSet.bits");
+ }
+ }
+ else
+ {
+ bits = null;
+ }
+
+ DocIdSetIterator docs = hits.bits.GetIterator();
+
+ int doc;
+ while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
+ {
+ if (bits != null && bits.Get(doc) == false)
+ {
+ doc++;
+ continue;
+ }
+ // Skip missing docs:
+ if (fv.Exists(doc))
+ {
+ counter.add(NumericUtils.DoubleToSortableLong(fv.DoubleVal(doc)));
+ }
+ else
+ {
+ missingCount++;
+ }
+ }
+ }
+
+ missingCount += counter.fillCounts(counts);
+ totCount -= missingCount;
+ }
+ }
+
+}
\ No newline at end of file