You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by sy...@apache.org on 2014/11/14 12:59:29 UTC
[14/26] lucenenet git commit: first commit of facet porting,
failing tests will be fixed in next commits.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/982eaf60/Lucene.Net.Facet/Taxonomy/FacetLabel.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Facet/Taxonomy/FacetLabel.cs b/Lucene.Net.Facet/Taxonomy/FacetLabel.cs
new file mode 100644
index 0000000..287a18c
--- /dev/null
+++ b/Lucene.Net.Facet/Taxonomy/FacetLabel.cs
@@ -0,0 +1,224 @@
+using System;
+using System.Diagnostics;
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Facet.Taxonomy
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ using LruTaxonomyWriterCache = Lucene.Net.Facet.Taxonomy.WriterCache.LruTaxonomyWriterCache;
+ using NameHashIntCacheLRU = Lucene.Net.Facet.Taxonomy.WriterCache.NameHashIntCacheLRU;
+
+ /// <summary>
+ /// Holds a sequence of string components, specifying the hierarchical name of a
+ /// category.
+ ///
+ /// @lucene.internal
+ /// </summary>
+ public class FacetLabel : IComparable<FacetLabel>
+ {
+ private static readonly int BYTE_BLOCK_SIZE = Lucene.Net.Util.ByteBlockPool.BYTE_BLOCK_SIZE;
+ /*
+ * copied from DocumentWriterPerThread -- if a FacetLabel is resolved to a
+ * drill-down term which is encoded to a larger term than that length, it is
+ * silently dropped! Therefore we limit the number of characters to MAX/4 to
+ * be on the safe side.
+ */
+ /// <summary>
+ /// The maximum number of characters a <seealso cref="FacetLabel"/> can have.
+ /// </summary>
+ public static readonly int MAX_CATEGORY_PATH_LENGTH = (BYTE_BLOCK_SIZE - 2) / 4;
+
+ /// <summary>
+ /// The components of this <seealso cref="FacetLabel"/>. Note that this array may be
+ /// shared with other <seealso cref="FacetLabel"/> instances, e.g. as a result of
+ /// <seealso cref="#subpath(int)"/>, therefore you should traverse the array up to
+ /// <seealso cref="#length"/> for this path's components.
+ /// </summary>
+ public readonly string[] components;
+
+ /// <summary>
+ /// The number of components of this <seealso cref="FacetLabel"/>. </summary>
+ public readonly int length;
+
+ // Used by subpath
+ private FacetLabel(FacetLabel copyFrom, int prefixLen)
+ {
+ // while the code which calls this method is safe, at some point a test
+ // tripped on AIOOBE in toString, but we failed to reproduce. adding the
+ // assert as a safety check.
+ Debug.Assert(prefixLen >= 0 && prefixLen <= copyFrom.components.Length, "prefixLen cannot be negative nor larger than the given components' length: prefixLen=" + prefixLen + " components.length=" + copyFrom.components.Length);
+ this.components = copyFrom.components;
+ length = prefixLen;
+ }
+
+ /// <summary>
+ /// Construct from the given path components. </summary>
+ public FacetLabel(params string[] components)
+ {
+ this.components = components;
+ length = components.Length;
+ CheckComponents();
+ }
+
+ /// <summary>
+ /// Construct from the dimension plus the given path components. </summary>
+ public FacetLabel(string dim, string[] path)
+ {
+ components = new string[1 + path.Length];
+ components[0] = dim;
+ Array.Copy(path, 0, components, 1, path.Length);
+ length = components.Length;
+ CheckComponents();
+ }
+
+ private void CheckComponents()
+ {
+ long len = 0;
+ foreach (string comp in components)
+ {
+ if (string.IsNullOrEmpty(comp))
+ {
+ throw new System.ArgumentException("empty or null components not allowed: " + Arrays.ToString(components));
+ }
+ len += comp.Length;
+ }
+ len += components.Length - 1; // add separators
+ if (len > MAX_CATEGORY_PATH_LENGTH)
+ {
+ throw new System.ArgumentException("category path exceeds maximum allowed path length: max=" + MAX_CATEGORY_PATH_LENGTH + " len=" + len + " path=" + Arrays.ToString(components).Substring(0, 30) + "...");
+ }
+ }
+
+ /// <summary>
+ /// Compares this path with another <seealso cref="FacetLabel"/> for lexicographic
+ /// order.
+ /// </summary>
+ public virtual int CompareTo(FacetLabel other)
+ {
+ int len = length < other.length ? length : other.length;
+ for (int i = 0, j = 0; i < len; i++, j++)
+ {
+ int cmp = components[i].CompareTo(other.components[j]);
+ if (cmp < 0)
+ {
+ return -1; // this is 'before'
+ }
+ if (cmp > 0)
+ {
+ return 1; // this is 'after'
+ }
+ }
+
+ // one is a prefix of the other
+ return length - other.length;
+ }
+
+ public override bool Equals(object obj)
+ {
+ if (!(obj is FacetLabel))
+ {
+ return false;
+ }
+
+ FacetLabel other = (FacetLabel)obj;
+ if (length != other.length)
+ {
+ return false; // not same length, cannot be equal
+ }
+
+ // CategoryPaths are more likely to differ at the last components, so start
+ // from last-first
+ for (int i = length - 1; i >= 0; i--)
+ {
+ if (!components[i].Equals(other.components[i]))
+ {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ public override int GetHashCode()
+ {
+ if (length == 0)
+ {
+ return 0;
+ }
+
+ int hash = length;
+ for (int i = 0; i < length; i++)
+ {
+ hash = hash * 31 + components[i].GetHashCode();
+ }
+ return hash;
+ }
+
+ /// <summary>
+ /// Calculate a 64-bit hash function for this path. This
+ /// is necessary for <seealso cref="NameHashIntCacheLRU"/> (the
+ /// default cache impl for {@link
+ /// LruTaxonomyWriterCache}) to reduce the chance of
+ /// "silent but deadly" collisions.
+ /// </summary>
+ public virtual long LongHashCode()
+ {
+ if (length == 0)
+ {
+ return 0;
+ }
+
+ long hash = length;
+ for (int i = 0; i < length; i++)
+ {
+ hash = hash * 65599 + components[i].GetHashCode();
+ }
+ return hash;
+ }
+
+ /// <summary>
+ /// Returns a sub-path of this path up to {@code length} components. </summary>
+ public virtual FacetLabel Subpath(int len)
+ {
+ if (len >= this.length || len < 0)
+ {
+ return this;
+ }
+ else
+ {
+ return new FacetLabel(this, len);
+ }
+ }
+
+ /// <summary>
+ /// Returns a string representation of the path.
+ /// </summary>
+ public override string ToString()
+ {
+ if (length == 0)
+ {
+ return "FacetLabel: []";
+ }
+ string[] parts = new string[length];
+ Array.Copy(components, 0, parts, 0, length);
+ return "FacetLabel: [" + Arrays.ToString(parts) + "]";
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/982eaf60/Lucene.Net.Facet/Taxonomy/FastTaxonomyFacetCounts.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Facet/Taxonomy/FastTaxonomyFacetCounts.cs b/Lucene.Net.Facet/Taxonomy/FastTaxonomyFacetCounts.cs
new file mode 100644
index 0000000..01545f0
--- /dev/null
+++ b/Lucene.Net.Facet/Taxonomy/FastTaxonomyFacetCounts.cs
@@ -0,0 +1,105 @@
+using System.Collections.Generic;
+using Lucene.Net.Facet;
+using Lucene.Net.Search;
+
+namespace Lucene.Net.Facet.Taxonomy
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using MatchingDocs = FacetsCollector.MatchingDocs;
+ using BinaryDocValues = Lucene.Net.Index.BinaryDocValues;
+ using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator;
+ using BytesRef = Lucene.Net.Util.BytesRef;
+
+ /// <summary>
+ /// Computes facets counts, assuming the default encoding
+ /// into DocValues was used.
+ ///
+ /// @lucene.experimental
+ /// </summary>
+ public class FastTaxonomyFacetCounts : IntTaxonomyFacets
+ {
+
+ /// <summary>
+ /// Create {@code FastTaxonomyFacetCounts}, which also
+ /// counts all facet labels.
+ /// </summary>
+ public FastTaxonomyFacetCounts(TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc)
+ : this(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, taxoReader, config, fc)
+ {
+ }
+
+ /// <summary>
+ /// Create {@code FastTaxonomyFacetCounts}, using the
+ /// specified {@code indexFieldName} for ordinals. Use
+ /// this if you had set {@link
+ /// FacetsConfig#setIndexFieldName} to change the index
+ /// field name for certain dimensions.
+ /// </summary>
+ public FastTaxonomyFacetCounts(string indexFieldName, TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc)
+ : base(indexFieldName, taxoReader, config)
+ {
+ Count(fc.GetMatchingDocs);
+ }
+
+ private void Count(IList<FacetsCollector.MatchingDocs> matchingDocs)
+ {
+ foreach (FacetsCollector.MatchingDocs hits in matchingDocs)
+ {
+ BinaryDocValues dv = hits.context.AtomicReader.GetBinaryDocValues(IndexFieldName);
+ if (dv == null) // this reader does not have DocValues for the requested category list
+ {
+ continue;
+ }
+
+ DocIdSetIterator docs = hits.bits.GetIterator();
+
+ int doc;
+ BytesRef bytesRef = new BytesRef();
+ while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
+ {
+ dv.Get(doc,bytesRef);
+ sbyte[] bytes = bytesRef.Bytes;
+ int end = bytesRef.Offset + bytesRef.Length;
+ int ord = 0;
+ int offset = bytesRef.Offset;
+ int prev = 0;
+ while (offset < end)
+ {
+ sbyte b = bytes[offset++];
+ if (b >= 0)
+ {
+ prev = ord = ((ord << 7) | b) + prev;
+ ++values[ord];
+ ord = 0;
+ }
+ else
+ {
+ ord = (ord << 7) | (b & 0x7F);
+ }
+ }
+ }
+ }
+
+ Rollup();
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/982eaf60/Lucene.Net.Facet/Taxonomy/FloatAssociationFacetField.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Facet/Taxonomy/FloatAssociationFacetField.cs b/Lucene.Net.Facet/Taxonomy/FloatAssociationFacetField.cs
new file mode 100644
index 0000000..cc90e61
--- /dev/null
+++ b/Lucene.Net.Facet/Taxonomy/FloatAssociationFacetField.cs
@@ -0,0 +1,65 @@
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Facet.Taxonomy
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ using Document = Lucene.Net.Documents.Document;
+ using BytesRef = Lucene.Net.Util.BytesRef;
+
+ /// <summary>
+ /// Add an instance of this to your <seealso cref="Document"/> to add
+ /// a facet label associated with a float. Use {@link
+ /// TaxonomyFacetSumFloatAssociations} to aggregate float values
+ /// per facet label at search time.
+ ///
+ /// @lucene.experimental
+ /// </summary>
+ public class FloatAssociationFacetField : AssociationFacetField
+ {
+
+ /// <summary>
+ /// Creates this from {@code dim} and {@code path} and a
+ /// float association
+ /// </summary>
+ public FloatAssociationFacetField(float assoc, string dim, params string[] path) : base(floatToBytesRef(assoc), dim, path)
+ {
+ }
+
+ /// <summary>
+ /// Encodes a {@code float} as a 4-byte <seealso cref="BytesRef"/>. </summary>
+ public static BytesRef floatToBytesRef(float v)
+ {
+ return IntAssociationFacetField.intToBytesRef(Number.FloatToIntBits(v));
+ }
+
+ /// <summary>
+ /// Decodes a previously encoded {@code float}. </summary>
+ public static float bytesRefToFloat(BytesRef b)
+ {
+ return Number.IntBitsToFloat(IntAssociationFacetField.bytesRefToInt(b));
+ }
+
+ public override string ToString()
+ {
+ return "FloatAssociationFacetField(dim=" + dim + " path=" + Arrays.ToString(path) + " value=" + bytesRefToFloat(assoc) + ")";
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/982eaf60/Lucene.Net.Facet/Taxonomy/FloatTaxonomyFacets.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Facet/Taxonomy/FloatTaxonomyFacets.cs b/Lucene.Net.Facet/Taxonomy/FloatTaxonomyFacets.cs
new file mode 100644
index 0000000..9dbe71d
--- /dev/null
+++ b/Lucene.Net.Facet/Taxonomy/FloatTaxonomyFacets.cs
@@ -0,0 +1,183 @@
+using System;
+using System.Diagnostics;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Facet.Taxonomy
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ using DimConfig = Lucene.Net.Facet.FacetsConfig.DimConfig;
+
+ /// <summary>
+ /// Base class for all taxonomy-based facets that aggregate
+ /// to a per-ords float[].
+ /// </summary>
+ public abstract class FloatTaxonomyFacets : TaxonomyFacets
+ {
+
+ /// <summary>
+ /// Per-ordinal value. </summary>
+ protected readonly float[] values;
+
+ /// <summary>
+ /// Sole constructor. </summary>
+ protected internal FloatTaxonomyFacets(string indexFieldName, TaxonomyReader taxoReader, FacetsConfig config)
+ : base(indexFieldName, taxoReader, config)
+ {
+ values = new float[taxoReader.Size];
+ }
+
+ /// <summary>
+ /// Rolls up any single-valued hierarchical dimensions. </summary>
+ protected virtual void Rollup()
+ {
+ // Rollup any necessary dims:
+ foreach (KeyValuePair<string, FacetsConfig.DimConfig> ent in Config.DimConfigs)
+ {
+ string dim = ent.Key;
+ FacetsConfig.DimConfig ft = ent.Value;
+ if (ft.hierarchical && ft.multiValued == false)
+ {
+ int dimRootOrd = TaxoReader.GetOrdinal(new FacetLabel(dim));
+ Debug.Assert(dimRootOrd > 0);
+ values[dimRootOrd] += Rollup(Children[dimRootOrd]);
+ }
+ }
+ }
+
+ private float Rollup(int ord)
+ {
+ float sum = 0;
+ while (ord != TaxonomyReader.INVALID_ORDINAL)
+ {
+ float childValue = values[ord] + Rollup(Children[ord]);
+ values[ord] = childValue;
+ sum += childValue;
+ ord = Siblings[ord];
+ }
+ return sum;
+ }
+
+ public override float GetSpecificValue(string dim, params string[] path)
+ {
+ FacetsConfig.DimConfig dimConfig = VerifyDim(dim);
+ if (path.Length == 0)
+ {
+ if (dimConfig.hierarchical && dimConfig.multiValued == false)
+ {
+ // ok: rolled up at search time
+ }
+ else if (dimConfig.requireDimCount && dimConfig.multiValued)
+ {
+ // ok: we indexed all ords at index time
+ }
+ else
+ {
+ throw new System.ArgumentException("cannot return dimension-level value alone; use getTopChildren instead");
+ }
+ }
+ int ord = TaxoReader.GetOrdinal(new FacetLabel(dim, path));
+ if (ord < 0)
+ {
+ return -1;
+ }
+ return values[ord];
+ }
+
+ public override FacetResult GetTopChildren(int topN, string dim, params string[] path)
+ {
+ if (topN <= 0)
+ {
+ throw new System.ArgumentException("topN must be > 0 (got: " + topN + ")");
+ }
+ FacetsConfig.DimConfig dimConfig = VerifyDim(dim);
+ FacetLabel cp = new FacetLabel(dim, path);
+ int dimOrd = TaxoReader.GetOrdinal(cp);
+ if (dimOrd == -1)
+ {
+ return null;
+ }
+
+ TopOrdAndFloatQueue q = new TopOrdAndFloatQueue(Math.Min(TaxoReader.Size, topN));
+ float bottomValue = 0;
+
+ int ord = Children[dimOrd];
+ float sumValues = 0;
+ int childCount = 0;
+
+ TopOrdAndFloatQueue.OrdAndValue reuse = null;
+ while (ord != TaxonomyReader.INVALID_ORDINAL)
+ {
+ if (values[ord] > 0)
+ {
+ sumValues += values[ord];
+ childCount++;
+ if (values[ord] > bottomValue)
+ {
+ if (reuse == null)
+ {
+ reuse = new TopOrdAndFloatQueue.OrdAndValue();
+ }
+ reuse.ord = ord;
+ reuse.value = values[ord];
+ reuse = q.InsertWithOverflow(reuse);
+ if (q.Size() == topN)
+ {
+ bottomValue = q.Top().value;
+ }
+ }
+ }
+
+ ord = Siblings[ord];
+ }
+
+ if (sumValues == 0)
+ {
+ return null;
+ }
+
+ if (dimConfig.multiValued)
+ {
+ if (dimConfig.requireDimCount)
+ {
+ sumValues = values[dimOrd];
+ }
+ else
+ {
+ // Our sum'd count is not correct, in general:
+ sumValues = -1;
+ }
+ }
+ else
+ {
+ // Our sum'd dim count is accurate, so we keep it
+ }
+
+ LabelAndValue[] labelValues = new LabelAndValue[q.Size()];
+ for (int i = labelValues.Length - 1; i >= 0; i--)
+ {
+ TopOrdAndFloatQueue.OrdAndValue ordAndValue = q.Pop();
+ FacetLabel child = TaxoReader.GetPath(ordAndValue.ord);
+ labelValues[i] = new LabelAndValue(child.components[cp.length], ordAndValue.value);
+ }
+
+ return new FacetResult(dim, path, sumValues, labelValues, childCount);
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/982eaf60/Lucene.Net.Facet/Taxonomy/IntAssociationFacetField.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Facet/Taxonomy/IntAssociationFacetField.cs b/Lucene.Net.Facet/Taxonomy/IntAssociationFacetField.cs
new file mode 100644
index 0000000..b3f1ddd
--- /dev/null
+++ b/Lucene.Net.Facet/Taxonomy/IntAssociationFacetField.cs
@@ -0,0 +1,74 @@
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Facet.Taxonomy
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ using Document = Lucene.Net.Documents.Document;
+ using BytesRef = Lucene.Net.Util.BytesRef;
+
+ /// <summary>
+ /// Add an instance of this to your <seealso cref="Document"/> to add
+ /// a facet label associated with an int. Use {@link
+ /// TaxonomyFacetSumIntAssociations} to aggregate int values
+ /// per facet label at search time.
+ ///
+ /// @lucene.experimental
+ /// </summary>
+ public class IntAssociationFacetField : AssociationFacetField
+ {
+
+ /// <summary>
+ /// Creates this from {@code dim} and {@code path} and an
+ /// int association
+ /// </summary>
+ public IntAssociationFacetField(int assoc, string dim, params string[] path)
+ : base(intToBytesRef(assoc), dim, path)
+ {
+ }
+
+ /// <summary>
+ /// Encodes an {@code int} as a 4-byte <seealso cref="BytesRef"/>,
+ /// big-endian.
+ /// </summary>
+ public static BytesRef intToBytesRef(int v)
+ {
+ sbyte[] bytes = new sbyte[4];
+ // big-endian:
+ bytes[0] = (sbyte)(v >> 24);
+ bytes[1] = (sbyte)(v >> 16);
+ bytes[2] = (sbyte)(v >> 8);
+ bytes[3] = (sbyte)v;
+ return new BytesRef(bytes);
+ }
+
+ /// <summary>
+ /// Decodes a previously encoded {@code int}. </summary>
+ public static int bytesRefToInt(BytesRef b)
+ {
+ return ((b.Bytes[b.Offset] & 0xFF) << 24) | ((b.Bytes[b.Offset + 1] & 0xFF) << 16) | ((b.Bytes[b.Offset + 2] & 0xFF) << 8) | (b.Bytes[b.Offset + 3] & 0xFF);
+ }
+
+ public override string ToString()
+ {
+ return "IntAssociationFacetField(dim=" + dim + " path=" + Arrays.ToString(path) + " value=" + bytesRefToInt(assoc) + ")";
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/982eaf60/Lucene.Net.Facet/Taxonomy/IntTaxonomyFacets.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Facet/Taxonomy/IntTaxonomyFacets.cs b/Lucene.Net.Facet/Taxonomy/IntTaxonomyFacets.cs
new file mode 100644
index 0000000..ad40137
--- /dev/null
+++ b/Lucene.Net.Facet/Taxonomy/IntTaxonomyFacets.cs
@@ -0,0 +1,189 @@
+using System;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Facet.Taxonomy
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using DimConfig = Lucene.Net.Facet.FacetsConfig.DimConfig;
+
+ /// <summary>
+ /// Base class for all taxonomy-based facets that aggregate
+ /// to a per-ords int[].
+ /// </summary>
+
+ public abstract class IntTaxonomyFacets : TaxonomyFacets
+ {
+
+ /// <summary>
+ /// Per-ordinal value. </summary>
+ protected internal readonly int[] values;
+
+ /// <summary>
+ /// Sole constructor. </summary>
+ protected internal IntTaxonomyFacets(string indexFieldName, TaxonomyReader taxoReader, FacetsConfig config)
+ : base(indexFieldName, taxoReader, config)
+ {
+ values = new int[taxoReader.Size];
+ }
+
+ /// <summary>
+ /// Rolls up any single-valued hierarchical dimensions. </summary>
+ protected virtual void Rollup()
+ {
+ // Rollup any necessary dims:
+ foreach (KeyValuePair<string, FacetsConfig.DimConfig> ent in Config.DimConfigs)
+ {
+ string dim = ent.Key;
+ FacetsConfig.DimConfig ft = ent.Value;
+ if (ft.hierarchical && ft.multiValued == false)
+ {
+ int dimRootOrd = TaxoReader.GetOrdinal(new FacetLabel(dim));
+ // It can be -1 if this field was declared in the
+ // config but never indexed:
+ if (dimRootOrd > 0)
+ {
+ values[dimRootOrd] += Rollup(Children[dimRootOrd]);
+ }
+ }
+ }
+ }
+
+ private int Rollup(int ord)
+ {
+ int sum = 0;
+ while (ord != TaxonomyReader.INVALID_ORDINAL)
+ {
+ int childValue = values[ord] + Rollup(Children[ord]);
+ values[ord] = childValue;
+ sum += childValue;
+ ord = Siblings[ord];
+ }
+ return sum;
+ }
+
+ public override float GetSpecificValue(string dim, params string[] path)
+ {
+ var dimConfig = VerifyDim(dim);
+ if (path.Length == 0)
+ {
+ if (dimConfig.hierarchical && dimConfig.multiValued == false)
+ {
+ // ok: rolled up at search time
+ }
+ else if (dimConfig.requireDimCount && dimConfig.multiValued)
+ {
+ // ok: we indexed all ords at index time
+ }
+ else
+ {
+ throw new System.ArgumentException("cannot return dimension-level value alone; use getTopChildren instead");
+ }
+ }
+ int ord = TaxoReader.GetOrdinal(new FacetLabel(dim, path));
+ if (ord < 0)
+ {
+ return -1;
+ }
+ return values[ord];
+ }
+
+ public override FacetResult GetTopChildren(int topN, string dim, params string[] path)
+ {
+ if (topN <= 0)
+ {
+ throw new System.ArgumentException("topN must be > 0 (got: " + topN + ")");
+ }
+ var dimConfig = VerifyDim(dim);
+ FacetLabel cp = new FacetLabel(dim, path);
+ int dimOrd = TaxoReader.GetOrdinal(cp);
+ if (dimOrd == -1)
+ {
+ return null;
+ }
+
+ TopOrdAndIntQueue q = new TopOrdAndIntQueue(Math.Min(TaxoReader.Size, topN));
+
+ int bottomValue = 0;
+
+ int ord = Children[dimOrd];
+ int totValue = 0;
+ int childCount = 0;
+
+ TopOrdAndIntQueue.OrdAndValue reuse = null;
+ while (ord != TaxonomyReader.INVALID_ORDINAL)
+ {
+ if (values[ord] > 0)
+ {
+ totValue += values[ord];
+ childCount++;
+ if (values[ord] > bottomValue)
+ {
+ if (reuse == null)
+ {
+ reuse = new TopOrdAndIntQueue.OrdAndValue();
+ }
+ reuse.ord = ord;
+ reuse.value = values[ord];
+ reuse = q.InsertWithOverflow(reuse);
+ if (q.Size() == topN)
+ {
+ bottomValue = q.Top().value;
+ }
+ }
+ }
+
+ ord = Siblings[ord];
+ }
+
+ if (totValue == 0)
+ {
+ return null;
+ }
+
+ if (dimConfig.multiValued)
+ {
+ if (dimConfig.requireDimCount)
+ {
+ totValue = values[dimOrd];
+ }
+ else
+ {
+ // Our sum'd value is not correct, in general:
+ totValue = -1;
+ }
+ }
+ else
+ {
+ // Our sum'd dim value is accurate, so we keep it
+ }
+
+ LabelAndValue[] labelValues = new LabelAndValue[q.Size()];
+ for (int i = labelValues.Length - 1; i >= 0; i--)
+ {
+ TopOrdAndIntQueue.OrdAndValue ordAndValue = q.Pop();
+ FacetLabel child = TaxoReader.GetPath(ordAndValue.ord);
+ labelValues[i] = new LabelAndValue(child.components[cp.length], ordAndValue.value);
+ }
+
+ return new FacetResult(dim, path, totValue, labelValues, childCount);
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/982eaf60/Lucene.Net.Facet/Taxonomy/LRUHashMap.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Facet/Taxonomy/LRUHashMap.cs b/Lucene.Net.Facet/Taxonomy/LRUHashMap.cs
new file mode 100644
index 0000000..d442992
--- /dev/null
+++ b/Lucene.Net.Facet/Taxonomy/LRUHashMap.cs
@@ -0,0 +1,154 @@
+using System;
+using System.Collections.Concurrent;
+using System.Collections.Generic;
+using System.Linq;
+using System.Threading;
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Facet.Taxonomy
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ /// <summary>
+ /// LRUHashMap is an extension of Java's HashMap, which has a bounded size();
+ /// When it reaches that size, each time a new element is added, the least
+ /// recently used (LRU) entry is removed.
+ /// <para>
+ /// Java makes it very easy to implement LRUHashMap - all its functionality is
+ /// already available from <seealso cref="java.util.LinkedHashMap"/>, and we just need to
+ /// configure that properly.
+ /// </para>
+ /// <para>
+ /// Note that like HashMap, LRUHashMap is unsynchronized, and the user MUST
+ /// synchronize the access to it if used from several threads. Moreover, while
+ /// with HashMap this is only a concern if one of the threads is modifies the
+ /// map, with LURHashMap every read is a modification (because the LRU order
+ /// needs to be remembered) so proper synchronization is always necessary.
+ /// </para>
+ /// <para>
+ /// With the usual synchronization mechanisms available to the user, this
+ /// unfortunately means that LRUHashMap will probably perform sub-optimally under
+ /// heavy contention: while one thread uses the hash table (reads or writes), any
+ /// other thread will be blocked from using it - or even just starting to use it
+ /// (e.g., calculating the hash function). A more efficient approach would be not
+ /// to use LinkedHashMap at all, but rather to use a non-locking (as much as
+ /// possible) thread-safe solution, something along the lines of
+ /// java.util.concurrent.ConcurrentHashMap (though that particular class does not
+ /// support the additional LRU semantics, which will need to be added separately
+ /// using a concurrent linked list or additional storage of timestamps (in an
+ /// array or inside the entry objects), or whatever).
+ ///
+ /// @lucene.experimental
+ /// </para>
+ /// </summary>
+ public class LRUHashMap<TV, TU> where TU : class //this is implementation of LRU Cache
+ {
+
+ public int MaxSize { get; set; }
+ private int CleanSize;
+ private TimeSpan MaxDuration;
+
+
+ private readonly ConcurrentDictionary<TV, CacheDataObject<TU>> _cache = new ConcurrentDictionary<TV, CacheDataObject<TU>>();
+
+ public LRUHashMap(int maxSize = 50000, int cleanPercentage = 30, TimeSpan maxDuration = default(TimeSpan))
+ {
+ MaxSize = maxSize;
+ CleanSize = (int)Math.Max(MaxSize * (1.0 * cleanPercentage / 100), 1);
+ if (maxDuration == default(TimeSpan))
+ {
+ MaxDuration = TimeSpan.FromDays(1);
+ }
+ else
+ {
+ MaxDuration = maxDuration;
+ }
+ }
+
+
+ public bool Put(TV cacheKey, TU value)
+ {
+ return AddToCache(cacheKey, value);
+ }
+
+ public bool AddToCache(TV cacheKey, TU value)
+ {
+ var cachedResult = new CacheDataObject<TU>
+ {
+ Usage = 1, //value == null ? 1 : value.Usage + 1,
+ Value = value,
+ Timestamp = DateTime.UtcNow
+ };
+
+ _cache.AddOrUpdate(cacheKey, cachedResult, (_, __) => cachedResult);
+ if (_cache.Count > MaxSize)
+ {
+ foreach (var source in _cache
+ .OrderByDescending(x => x.Value.Usage)
+ .ThenBy(x => x.Value.Timestamp)
+ .Skip(MaxSize - CleanSize))
+ {
+ if (EqualityComparer<TV>.Default.Equals(source.Key, cacheKey))
+ continue; // we don't want to remove the one we just added
+ CacheDataObject<TU> ignored;
+ _cache.TryRemove(source.Key, out ignored);
+ }
+ }
+ return true;
+ }
+
+ public TU Get(TV cacheKey, bool increment = false)
+ {
+ CacheDataObject<TU> value;
+ if (_cache.TryGetValue(cacheKey, out value) && (DateTime.UtcNow - value.Timestamp) <= MaxDuration)
+ {
+ if (increment)
+ {
+ Interlocked.Increment(ref value.Usage);
+ }
+ return value.Value;
+ }
+ return null;
+ }
+
+ public bool IsExistInCache(TV cacheKey)
+ {
+ return (_cache.ContainsKey(cacheKey));
+ }
+
+ public int Size()
+ {
+ return _cache.Count;
+ }
+
+ #region Nested type: CacheDataObject
+
+ private class CacheDataObject<T> where T : class
+ {
+ public DateTime Timestamp;
+ public int Usage;
+ public T Value;
+ }
+
+ #endregion
+
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/982eaf60/Lucene.Net.Facet/Taxonomy/OrdinalsReader.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Facet/Taxonomy/OrdinalsReader.cs b/Lucene.Net.Facet/Taxonomy/OrdinalsReader.cs
new file mode 100644
index 0000000..544a1ef
--- /dev/null
+++ b/Lucene.Net.Facet/Taxonomy/OrdinalsReader.cs
@@ -0,0 +1,68 @@
+namespace Lucene.Net.Facet.Taxonomy
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ using AtomicReaderContext = Lucene.Net.Index.AtomicReaderContext;
+ using IntsRef = Lucene.Net.Util.IntsRef;
+
+ /// <summary>
+ /// Provides per-document ordinals.
+ /// </summary>
+
+ public abstract class OrdinalsReader
+ {
+
+ /// <summary>
+ /// Returns ordinals for documents in one segment. </summary>
+ public abstract class OrdinalsSegmentReader
+ {
+ /// <summary>
+ /// Get the ordinals for this document. ordinals.offset
+ /// must always be 0!
+ /// </summary>
+ public abstract void Get(int doc, IntsRef ordinals);
+
+ /// <summary>
+ /// Default constructor.
+ /// </summary>
+ public OrdinalsSegmentReader()
+ {
+ }
+ }
+
+ /// <summary>
+ /// Default constructor.
+ /// </summary>
+ public OrdinalsReader()
+ {
+ }
+
+ /// <summary>
+ /// Set current atomic reader.
+ /// </summary>
+ public abstract OrdinalsSegmentReader GetReader(AtomicReaderContext context);
+
+ /// <summary>
+ /// Returns the indexed field name this {@code
+ /// OrdinalsReader} is reading from.
+ /// </summary>
+ public abstract string IndexFieldName { get; }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/982eaf60/Lucene.Net.Facet/Taxonomy/ParallelTaxonomyArrays.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Facet/Taxonomy/ParallelTaxonomyArrays.cs b/Lucene.Net.Facet/Taxonomy/ParallelTaxonomyArrays.cs
new file mode 100644
index 0000000..26f1d8a
--- /dev/null
+++ b/Lucene.Net.Facet/Taxonomy/ParallelTaxonomyArrays.cs
@@ -0,0 +1,74 @@
+namespace Lucene.Net.Facet.Taxonomy
+{
+
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Returns 3 arrays for traversing the taxonomy:
+ /// <ul>
+ /// <li>{@code parents}: {@code parents[i]} denotes the parent of category
+ /// ordinal {@code i}.</li>
+ /// <li>{@code children}: {@code children[i]} denotes a child of category ordinal
+ /// {@code i}.</li>
+ /// <li>{@code siblings}: {@code siblings[i]} denotes the sibling of category
+ /// ordinal {@code i}.</li>
+ /// </ul>
+ ///
+ /// To traverse the taxonomy tree, you typically start with {@code children[0]}
+ /// (ordinal 0 is reserved for ROOT), and then depends if you want to do DFS or
+ /// BFS, you call {@code children[children[0]]} or {@code siblings[children[0]]}
+ /// and so forth, respectively.
+ ///
+ /// <para>
+ /// <b>NOTE:</b> you are not expected to modify the values of the arrays, since
+ /// the arrays are shared with other threads.
+ ///
+ /// @lucene.experimental
+ /// </para>
+ /// </summary>
+ public abstract class ParallelTaxonomyArrays
+ {
+
+ /// <summary>
+ /// Sole constructor. </summary>
+ public ParallelTaxonomyArrays()
+ {
+ }
+
+ /// <summary>
+ /// Returns the parents array, where {@code parents[i]} denotes the parent of
+ /// category ordinal {@code i}.
+ /// </summary>
+ public abstract int[] Parents();
+
+ /// <summary>
+ /// Returns the children array, where {@code children[i]} denotes a child of
+ /// category ordinal {@code i}.
+ /// </summary>
+ public abstract int[] Children();
+
+ /// <summary>
+ /// Returns the siblings array, where {@code siblings[i]} denotes the sibling
+ /// of category ordinal {@code i}.
+ /// </summary>
+ public abstract int[] Siblings();
+
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/982eaf60/Lucene.Net.Facet/Taxonomy/PrintTaxonomyStats.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Facet/Taxonomy/PrintTaxonomyStats.cs b/Lucene.Net.Facet/Taxonomy/PrintTaxonomyStats.cs
new file mode 100644
index 0000000..bfcbda7
--- /dev/null
+++ b/Lucene.Net.Facet/Taxonomy/PrintTaxonomyStats.cs
@@ -0,0 +1,121 @@
+using System;
+using System.IO;
+
+namespace Lucene.Net.Facet.Taxonomy
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using ChildrenIterator = Lucene.Net.Facet.Taxonomy.TaxonomyReader.ChildrenIterator;
+ using DirectoryTaxonomyReader = Lucene.Net.Facet.Taxonomy.Directory.DirectoryTaxonomyReader;
+ using Directory = Lucene.Net.Store.Directory;
+ using FSDirectory = Lucene.Net.Store.FSDirectory;
+
+ /// <summary>
+ /// Prints how many ords are under each dimension. </summary>
+
+ // java -cp ../build/core/classes/java:../build/facet/classes/java org.apache.lucene.facet.util.PrintTaxonomyStats -printTree /s2/scratch/indices/wikibig.trunk.noparents.facets.Lucene41.nd1M/facets
+ public class PrintTaxonomyStats
+ {
+
+ /// <summary>
+ /// Sole constructor. </summary>
+ public PrintTaxonomyStats()
+ {
+ }
+
+ /// <summary>
+ /// Command-line tool. </summary>
+ public static void Main(string[] args)
+ {
+ bool printTree = false;
+ string path = null;
+ for (int i = 0; i < args.Length; i++)
+ {
+ if (args[i].Equals("-printTree"))
+ {
+ printTree = true;
+ }
+ else
+ {
+ path = args[i];
+ }
+ }
+ if (args.Length != (printTree ? 2 : 1))
+ {
+ Console.WriteLine("\nUsage: java -classpath ... org.apache.lucene.facet.util.PrintTaxonomyStats [-printTree] /path/to/taxononmy/index\n");
+ Environment.Exit(1);
+ }
+ Store.Directory dir = FSDirectory.Open(new DirectoryInfo(path));
+ var r = new DirectoryTaxonomyReader(dir);
+ PrintStats(r, System.Console.Out, printTree);
+ r.Dispose();
+ //dir.close();
+ }
+
+ /// <summary>
+ /// Recursively prints stats for all ordinals. </summary>
+ public static void PrintStats(TaxonomyReader r, TextWriter @out, bool printTree)
+ {
+ @out.WriteLine(r.Size + " total categories.");
+
+ ChildrenIterator it = r.GetChildren(TaxonomyReader.ROOT_ORDINAL);
+ int child;
+ while ((child = it.Next()) != TaxonomyReader.INVALID_ORDINAL)
+ {
+ ChildrenIterator chilrenIt = r.GetChildren(child);
+ int numImmediateChildren = 0;
+ while (chilrenIt.Next() != TaxonomyReader.INVALID_ORDINAL)
+ {
+ numImmediateChildren++;
+ }
+ FacetLabel cp = r.GetPath(child);
+ @out.WriteLine("/" + cp.components[0] + ": " + numImmediateChildren + " immediate children; " + (1 + CountAllChildren(r, child)) + " total categories");
+ if (printTree)
+ {
+ PrintAllChildren(@out, r, child, " ", 1);
+ }
+ }
+ }
+
+ private static int CountAllChildren(TaxonomyReader r, int ord)
+ {
+ int count = 0;
+ ChildrenIterator it = r.GetChildren(ord);
+ int child;
+ while ((child = it.Next()) != TaxonomyReader.INVALID_ORDINAL)
+ {
+ count += 1 + CountAllChildren(r, child);
+ }
+ return count;
+ }
+
+ private static void PrintAllChildren(TextWriter @out, TaxonomyReader r, int ord, string indent, int depth)
+ {
+ ChildrenIterator it = r.GetChildren(ord);
+ int child;
+ while ((child = it.Next()) != TaxonomyReader.INVALID_ORDINAL)
+ {
+ @out.WriteLine(indent + "/" + r.GetPath(child).components[depth]);
+ PrintAllChildren(@out, r, child, indent + " ", depth + 1);
+ }
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/982eaf60/Lucene.Net.Facet/Taxonomy/SearcherTaxonomyManager.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Facet/Taxonomy/SearcherTaxonomyManager.cs b/Lucene.Net.Facet/Taxonomy/SearcherTaxonomyManager.cs
new file mode 100644
index 0000000..d543aad
--- /dev/null
+++ b/Lucene.Net.Facet/Taxonomy/SearcherTaxonomyManager.cs
@@ -0,0 +1,179 @@
+using System.Threading;
+using Lucene.Net.Search;
+
+namespace Lucene.Net.Facet.Taxonomy
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ using DirectoryTaxonomyReader = Lucene.Net.Facet.Taxonomy.Directory.DirectoryTaxonomyReader;
+ using DirectoryTaxonomyWriter = Lucene.Net.Facet.Taxonomy.Directory.DirectoryTaxonomyWriter;
+ using DirectoryReader = Lucene.Net.Index.DirectoryReader;
+ using IndexReader = Lucene.Net.Index.IndexReader;
+ using IndexWriter = Lucene.Net.Index.IndexWriter;
+ using IndexSearcher = Lucene.Net.Search.IndexSearcher;
+ using Lucene.Net.Search;
+ using SearcherFactory = Lucene.Net.Search.SearcherFactory;
+ using SearcherManager = Lucene.Net.Search.SearcherManager;
+ using Directory = Lucene.Net.Store.Directory;
+ using IOUtils = Lucene.Net.Util.IOUtils;
+
+ /// <summary>
+ /// Manages near-real-time reopen of both an IndexSearcher
+ /// and a TaxonomyReader.
+ ///
+ /// <para><b>NOTE</b>: If you call {@link
+ /// DirectoryTaxonomyWriter#replaceTaxonomy} then you must
+ /// open a new {@code SearcherTaxonomyManager} afterwards.
+ /// </para>
+ /// </summary>
+ public class SearcherTaxonomyManager : ReferenceManager<SearcherTaxonomyManager.SearcherAndTaxonomy>
+ {
+
+ /// <summary>
+ /// Holds a matched pair of <seealso cref="IndexSearcher"/> and
+ /// <seealso cref="TaxonomyReader"/>
+ /// </summary>
+ public class SearcherAndTaxonomy
+ {
+ /// <summary>
+ /// Point-in-time <seealso cref="IndexSearcher"/>. </summary>
+ public readonly IndexSearcher searcher;
+
+ /// <summary>
+ /// Matching point-in-time <seealso cref="DirectoryTaxonomyReader"/>. </summary>
+ public readonly DirectoryTaxonomyReader taxonomyReader;
+
+ /// <summary>
+ /// Create a SearcherAndTaxonomy </summary>
+ public SearcherAndTaxonomy(IndexSearcher searcher, DirectoryTaxonomyReader taxonomyReader)
+ {
+ this.searcher = searcher;
+ this.taxonomyReader = taxonomyReader;
+ }
+ }
+
+ private readonly SearcherFactory searcherFactory;
+ private readonly long taxoEpoch;
+ private readonly DirectoryTaxonomyWriter taxoWriter;
+
+ /// <summary>
+ /// Creates near-real-time searcher and taxonomy reader
+ /// from the corresponding writers.
+ /// </summary>
+ public SearcherTaxonomyManager(IndexWriter writer, bool applyAllDeletes, SearcherFactory searcherFactory, DirectoryTaxonomyWriter taxoWriter)
+ {
+ if (searcherFactory == null)
+ {
+ searcherFactory = new SearcherFactory();
+ }
+ this.searcherFactory = searcherFactory;
+ this.taxoWriter = taxoWriter;
+ var taxoReader = new DirectoryTaxonomyReader(taxoWriter);
+ Current = new SearcherAndTaxonomy(SearcherManager.GetSearcher(searcherFactory, DirectoryReader.Open(writer, applyAllDeletes)), taxoReader);
+ this.taxoEpoch = taxoWriter.TaxonomyEpoch;
+ }
+
+ /// <summary>
+ /// Creates search and taxonomy readers over the corresponding directories.
+ ///
+ /// <para>
+ /// <b>NOTE:</b> you should only use this constructor if you commit and call
+ /// <seealso cref="#maybeRefresh()"/> in the same thread. Otherwise it could lead to an
+ /// unsync'd <seealso cref="IndexSearcher"/> and <seealso cref="TaxonomyReader"/> pair.
+ /// </para>
+ /// </summary>
+ public SearcherTaxonomyManager(Store.Directory indexDir, Store.Directory taxoDir, SearcherFactory searcherFactory)
+ {
+ if (searcherFactory == null)
+ {
+ searcherFactory = new SearcherFactory();
+ }
+ this.searcherFactory = searcherFactory;
+ var taxoReader = new DirectoryTaxonomyReader(taxoDir);
+ Current = new SearcherAndTaxonomy(SearcherManager.GetSearcher(searcherFactory, DirectoryReader.Open(indexDir)), taxoReader);
+ this.taxoWriter = null;
+ taxoEpoch = -1;
+ }
+
+ protected override void DecRef(SearcherAndTaxonomy @ref)
+ {
+ @ref.searcher.IndexReader.DecRef();
+
+ // This decRef can fail, and then in theory we should
+ // tryIncRef the searcher to put back the ref count
+ // ... but 1) the below decRef should only fail because
+ // it decRef'd to 0 and closed and hit some IOException
+ // during close, in which case 2) very likely the
+ // searcher was also just closed by the above decRef and
+ // a tryIncRef would fail:
+ @ref.taxonomyReader.DecRef();
+ }
+
+ protected override bool TryIncRef(SearcherAndTaxonomy @ref)
+ {
+ if (@ref.searcher.IndexReader.TryIncRef())
+ {
+ if (@ref.taxonomyReader.TryIncRef())
+ {
+ return true;
+ }
+ else
+ {
+ @ref.searcher.IndexReader.DecRef();
+ }
+ }
+ return false;
+ }
+
+ protected override SearcherAndTaxonomy RefreshIfNeeded(SearcherAndTaxonomy @ref)
+ {
+ // Must re-open searcher first, otherwise we may get a
+ // new reader that references ords not yet known to the
+ // taxonomy reader:
+ IndexReader r = @ref.searcher.IndexReader;
+ IndexReader newReader = DirectoryReader.OpenIfChanged((DirectoryReader)r);
+ if (newReader == null)
+ {
+ return null;
+ }
+ else
+ {
+ var tr = TaxonomyReader.OpenIfChanged(@ref.taxonomyReader);
+ if (tr == null)
+ {
+ @ref.taxonomyReader.IncRef();
+ tr = @ref.taxonomyReader;
+ }
+ else if (taxoWriter != null && taxoWriter.TaxonomyEpoch != taxoEpoch)
+ {
+ IOUtils.Close(newReader, tr);
+ throw new ThreadStateException("DirectoryTaxonomyWriter.replaceTaxonomy was called, which is not allowed when using SearcherTaxonomyManager");
+ }
+
+ return new SearcherAndTaxonomy(SearcherManager.GetSearcher(searcherFactory, newReader), tr);
+ }
+ }
+
+ protected override int GetRefCount(SearcherAndTaxonomy reference)
+ {
+ return reference.searcher.IndexReader.RefCount;
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/982eaf60/Lucene.Net.Facet/Taxonomy/TaxonomyFacetCounts.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Facet/Taxonomy/TaxonomyFacetCounts.cs b/Lucene.Net.Facet/Taxonomy/TaxonomyFacetCounts.cs
new file mode 100644
index 0000000..49be839
--- /dev/null
+++ b/Lucene.Net.Facet/Taxonomy/TaxonomyFacetCounts.cs
@@ -0,0 +1,77 @@
+using System.Collections.Generic;
+using Lucene.Net.Facet;
+
+namespace Lucene.Net.Facet.Taxonomy
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using MatchingDocs = FacetsCollector.MatchingDocs;
+ using BinaryDocValues = Lucene.Net.Index.BinaryDocValues;
+ using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator;
+ using IntsRef = Lucene.Net.Util.IntsRef;
+
+ /// <summary>
+ /// Reads from any <seealso cref="OrdinalsReader"/>; use {@link
+ /// FastTaxonomyFacetCounts} if you are using the
+ /// default encoding from <seealso cref="BinaryDocValues"/>.
+ ///
+ /// @lucene.experimental
+ /// </summary>
+ public class TaxonomyFacetCounts : IntTaxonomyFacets
+ {
+ private readonly OrdinalsReader ordinalsReader;
+
+ /// <summary>
+ /// Create {@code TaxonomyFacetCounts}, which also
+ /// counts all facet labels. Use this for a non-default
+ /// <seealso cref="OrdinalsReader"/>; otherwise use {@link
+ /// FastTaxonomyFacetCounts}.
+ /// </summary>
+ public TaxonomyFacetCounts(OrdinalsReader ordinalsReader, TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc)
+ : base(ordinalsReader.IndexFieldName, taxoReader, config)
+ {
+ this.ordinalsReader = ordinalsReader;
+ Count(fc.GetMatchingDocs);
+ }
+
+ private void Count(IList<FacetsCollector.MatchingDocs> matchingDocs)
+ {
+ IntsRef scratch = new IntsRef();
+ foreach (FacetsCollector.MatchingDocs hits in matchingDocs)
+ {
+ OrdinalsReader.OrdinalsSegmentReader ords = ordinalsReader.GetReader(hits.context);
+ DocIdSetIterator docs = hits.bits.GetIterator();
+
+ int doc;
+ while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
+ {
+ ords.Get(doc, scratch);
+ for (int i = 0; i < scratch.Length; i++)
+ {
+ values[scratch.Ints[scratch.Offset + i]]++;
+ }
+ }
+ }
+
+ Rollup();
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/982eaf60/Lucene.Net.Facet/Taxonomy/TaxonomyFacetSumFloatAssociations.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Facet/Taxonomy/TaxonomyFacetSumFloatAssociations.cs b/Lucene.Net.Facet/Taxonomy/TaxonomyFacetSumFloatAssociations.cs
new file mode 100644
index 0000000..bf0d417
--- /dev/null
+++ b/Lucene.Net.Facet/Taxonomy/TaxonomyFacetSumFloatAssociations.cs
@@ -0,0 +1,98 @@
+using System.Collections.Generic;
+using Lucene.Net.Facet;
+using Lucene.Net.Search;
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Facet.Taxonomy
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using MatchingDocs = FacetsCollector.MatchingDocs;
+ using BinaryDocValues = Lucene.Net.Index.BinaryDocValues;
+ using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator;
+ using BytesRef = Lucene.Net.Util.BytesRef;
+
+ /// <summary>
+ /// Aggregates sum of int values previously indexed with
+ /// <seealso cref="FloatAssociationFacetField"/>, assuming the default
+ /// encoding.
+ ///
+ /// @lucene.experimental
+ /// </summary>
+ public class TaxonomyFacetSumFloatAssociations : FloatTaxonomyFacets
+ {
+
+ /// <summary>
+ /// Create {@code TaxonomyFacetSumFloatAssociations} against
+ /// the default index field.
+ /// </summary>
+ public TaxonomyFacetSumFloatAssociations(TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc)
+ : this(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, taxoReader, config, fc)
+ {
+ }
+
+ /// <summary>
+ /// Create {@code TaxonomyFacetSumFloatAssociations} against
+ /// the specified index field.
+ /// </summary>
+ public TaxonomyFacetSumFloatAssociations(string indexFieldName, TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc)
+ : base(indexFieldName, taxoReader, config)
+ {
+ SumValues(fc.GetMatchingDocs);
+ }
+
+ private void SumValues(IList<FacetsCollector.MatchingDocs> matchingDocs)
+ {
+ //System.out.println("count matchingDocs=" + matchingDocs + " facetsField=" + facetsFieldName);
+ foreach (FacetsCollector.MatchingDocs hits in matchingDocs)
+ {
+ BinaryDocValues dv = hits.context.AtomicReader.GetBinaryDocValues(IndexFieldName);
+ if (dv == null) // this reader does not have DocValues for the requested category list
+ {
+ continue;
+ }
+
+ DocIdSetIterator docs = hits.bits.GetIterator();
+
+ int doc;
+ while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
+ {
+ //System.out.println(" doc=" + doc);
+ // TODO: use OrdinalsReader? we'd need to add a
+ // BytesRef getAssociation()?
+ BytesRef bytesRef = new BytesRef();
+ dv.Get(doc, bytesRef);
+ sbyte[] bytes = bytesRef.Bytes;
+ int end = bytesRef.Offset + bytesRef.Length;
+ int offset = bytesRef.Offset;
+ while (offset < end)
+ {
+ int ord = ((bytes[offset] & 0xFF) << 24) | ((bytes[offset + 1] & 0xFF) << 16) | ((bytes[offset + 2] & 0xFF) << 8) | (bytes[offset + 3] & 0xFF);
+ offset += 4;
+ int value = ((bytes[offset] & 0xFF) << 24) | ((bytes[offset + 1] & 0xFF) << 16) | ((bytes[offset + 2] & 0xFF) << 8) | (bytes[offset + 3] & 0xFF);
+ offset += 4;
+ values[ord] += Number.IntBitsToFloat(value);
+ }
+ }
+ }
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/982eaf60/Lucene.Net.Facet/Taxonomy/TaxonomyFacetSumIntAssociations.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Facet/Taxonomy/TaxonomyFacetSumIntAssociations.cs b/Lucene.Net.Facet/Taxonomy/TaxonomyFacetSumIntAssociations.cs
new file mode 100644
index 0000000..849cb30
--- /dev/null
+++ b/Lucene.Net.Facet/Taxonomy/TaxonomyFacetSumIntAssociations.cs
@@ -0,0 +1,96 @@
+using System.Collections.Generic;
+using Lucene.Net.Facet;
+
+namespace Lucene.Net.Facet.Taxonomy
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using MatchingDocs = FacetsCollector.MatchingDocs;
+ using BinaryDocValues = Lucene.Net.Index.BinaryDocValues;
+ using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator;
+ using BytesRef = Lucene.Net.Util.BytesRef;
+
+ /// <summary>
+ /// Aggregates sum of int values previously indexed with
+ /// <seealso cref="IntAssociationFacetField"/>, assuming the default
+ /// encoding.
+ ///
+ /// @lucene.experimental
+ /// </summary>
+ public class TaxonomyFacetSumIntAssociations : IntTaxonomyFacets
+ {
+
+ /// <summary>
+ /// Create {@code TaxonomyFacetSumIntAssociations} against
+ /// the default index field.
+ /// </summary>
+ public TaxonomyFacetSumIntAssociations(TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc)
+ : this(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, taxoReader, config, fc)
+ {
+ }
+
+ /// <summary>
+ /// Create {@code TaxonomyFacetSumIntAssociations} against
+ /// the specified index field.
+ /// </summary>
+ public TaxonomyFacetSumIntAssociations(string indexFieldName, TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc)
+ : base(indexFieldName, taxoReader, config)
+ {
+ SumValues(fc.GetMatchingDocs);
+ }
+
+ private void SumValues(IList<FacetsCollector.MatchingDocs> matchingDocs)
+ {
+ //System.out.println("count matchingDocs=" + matchingDocs + " facetsField=" + facetsFieldName);
+ foreach (FacetsCollector.MatchingDocs hits in matchingDocs)
+ {
+ BinaryDocValues dv = hits.context.AtomicReader.GetBinaryDocValues(IndexFieldName);
+ if (dv == null) // this reader does not have DocValues for the requested category list
+ {
+ continue;
+ }
+
+ DocIdSetIterator docs = hits.bits.GetIterator();
+
+ int doc;
+ while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
+ {
+ //System.out.println(" doc=" + doc);
+ // TODO: use OrdinalsReader? we'd need to add a
+ // BytesRef getAssociation()?
+ BytesRef bytesRef = new BytesRef();
+ dv.Get(doc, bytesRef);
+ sbyte[] bytes = bytesRef.Bytes;
+ int end = bytesRef.Offset + bytesRef.Length;
+ int offset = bytesRef.Offset;
+ while (offset < end)
+ {
+ int ord = ((bytes[offset] & 0xFF) << 24) | ((bytes[offset + 1] & 0xFF) << 16) | ((bytes[offset + 2] & 0xFF) << 8) | (bytes[offset + 3] & 0xFF);
+ offset += 4;
+ int value = ((bytes[offset] & 0xFF) << 24) | ((bytes[offset + 1] & 0xFF) << 16) | ((bytes[offset + 2] & 0xFF) << 8) | (bytes[offset + 3] & 0xFF);
+ offset += 4;
+ values[ord] += value;
+ }
+ }
+ }
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/982eaf60/Lucene.Net.Facet/Taxonomy/TaxonomyFacetSumValueSource.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Facet/Taxonomy/TaxonomyFacetSumValueSource.cs b/Lucene.Net.Facet/Taxonomy/TaxonomyFacetSumValueSource.cs
new file mode 100644
index 0000000..4d4fc76
--- /dev/null
+++ b/Lucene.Net.Facet/Taxonomy/TaxonomyFacetSumValueSource.cs
@@ -0,0 +1,247 @@
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.IO;
+using System.Threading;
+using Lucene.Net.Facet;
+
+namespace Lucene.Net.Facet.Taxonomy
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using MatchingDocs = FacetsCollector.MatchingDocs;
+ using AtomicReaderContext = Lucene.Net.Index.AtomicReaderContext;
+ using FunctionValues = Lucene.Net.Queries.Function.FunctionValues;
+ using ValueSource = Lucene.Net.Queries.Function.ValueSource;
+ using DoubleDocValues = Lucene.Net.Queries.Function.DocValues.DoubleDocValues;
+ using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator;
+ using Scorer = Lucene.Net.Search.Scorer;
+ using Weight = Lucene.Net.Search.Weight;
+ using IntsRef = Lucene.Net.Util.IntsRef;
+
+ /// <summary>
+ /// Aggregates sum of values from {@link
+ /// FunctionValues#doubleVal}, for each facet label.
+ ///
+ /// @lucene.experimental
+ /// </summary>
+ public class TaxonomyFacetSumValueSource : FloatTaxonomyFacets
+ {
+ private readonly OrdinalsReader ordinalsReader;
+
+ /// <summary>
+ /// Aggreggates float facet values from the provided
+ /// <seealso cref="ValueSource"/>, pulling ordinals using {@link
+ /// DocValuesOrdinalsReader} against the default indexed
+ /// facet field {@link
+ /// FacetsConfig#DEFAULT_INDEX_FIELD_NAME}.
+ /// </summary>
+ public TaxonomyFacetSumValueSource(TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc, ValueSource valueSource)
+ : this(new DocValuesOrdinalsReader(FacetsConfig.DEFAULT_INDEX_FIELD_NAME), taxoReader, config, fc, valueSource)
+ {
+ }
+
+ /// <summary>
+ /// Aggreggates float facet values from the provided
+ /// <seealso cref="ValueSource"/>, and pulls ordinals from the
+ /// provided <seealso cref="OrdinalsReader"/>.
+ /// </summary>
+ public TaxonomyFacetSumValueSource(OrdinalsReader ordinalsReader, TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc, ValueSource valueSource)
+ : base(ordinalsReader.IndexFieldName, taxoReader, config)
+ {
+ this.ordinalsReader = ordinalsReader;
+ SumValues(fc.GetMatchingDocs, fc.KeepScores, valueSource);
+ }
+
+ private sealed class FakeScorer : Scorer
+ {
+ internal float score_Renamed;
+ internal int docID_Renamed;
+ internal FakeScorer()
+ : base(null)
+ {
+ }
+ public override float Score()
+ {
+ return score_Renamed;
+ }
+ public override int Freq()
+ {
+ throw new System.NotSupportedException();
+ }
+ public override int DocID()
+ {
+ return docID_Renamed;
+ }
+ public override int NextDoc()
+ {
+ throw new System.NotSupportedException();
+ }
+ public override int Advance(int target)
+ {
+ throw new System.NotSupportedException();
+ }
+ public override long Cost()
+ {
+ return 0;
+ }
+ public override Weight Weight
+ {
+ get
+ {
+ throw new System.NotSupportedException();
+ }
+ }
+
+ public override ICollection<ChildScorer> Children
+ {
+ get
+ {
+ throw new System.NotSupportedException();
+ }
+ }
+ }
+
+ private void SumValues(IList<MatchingDocs> matchingDocs, bool keepScores, ValueSource valueSource)
+ {
+ FakeScorer scorer = new FakeScorer();
+ IDictionary context = new Dictionary<string, Scorer>();
+ if (keepScores)
+ {
+ context["scorer"] = scorer;
+ }
+ IntsRef scratch = new IntsRef();
+ foreach (MatchingDocs hits in matchingDocs)
+ {
+ OrdinalsReader.OrdinalsSegmentReader ords = ordinalsReader.GetReader(hits.context);
+
+ int scoresIdx = 0;
+ float[] scores = hits.scores;
+
+ FunctionValues functionValues = valueSource.GetValues(context, hits.context);
+ DocIdSetIterator docs = hits.bits.GetIterator();
+
+ int doc;
+ while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
+ {
+ ords.Get(doc, scratch);
+ if (keepScores)
+ {
+ scorer.docID_Renamed = doc;
+ scorer.score_Renamed = scores[scoresIdx++];
+ }
+ float value = (float)functionValues.DoubleVal(doc);
+ for (int i = 0; i < scratch.Length; i++)
+ {
+ values[scratch.Ints[i]] += value;
+ }
+ }
+ }
+
+ Rollup();
+ }
+
+ /// <summary>
+ /// <seealso cref="ValueSource"/> that returns the score for each
+ /// hit; use this to aggregate the sum of all hit scores
+ /// for each facet label.
+ /// </summary>
+ public class ScoreValueSource : ValueSource
+ {
+
+ /// <summary>
+ /// Sole constructor. </summary>
+ public ScoreValueSource()
+ {
+ }
+
+ public override FunctionValues GetValues(IDictionary context, AtomicReaderContext readerContext)
+ {
+ Scorer scorer = (Scorer)context["scorer"];
+ if (scorer == null)
+ {
+ throw new ThreadStateException("scores are missing; be sure to pass keepScores=true to FacetsCollector");
+ }
+ return new DoubleDocValuesAnonymousInnerClassHelper(this, scorer);
+ }
+
+ private class DoubleDocValuesAnonymousInnerClassHelper : DoubleDocValues
+ {
+ private readonly ScoreValueSource outerInstance;
+
+ private Scorer scorer;
+
+ public DoubleDocValuesAnonymousInnerClassHelper(ScoreValueSource outerInstance, Scorer scorer)
+ : base(outerInstance)
+ {
+ this.outerInstance = outerInstance;
+ this.scorer = scorer;
+
+ }
+
+ public override double DoubleVal(int document)
+ {
+ try
+ {
+ return scorer.Score();
+ }
+ catch (Exception exception)
+ {
+ throw;
+ }
+ }
+ }
+
+ public override bool Equals(object o)
+ {
+ if (ReferenceEquals(null, o)) return false;
+ if (ReferenceEquals(this, o)) return true;
+ if (o.GetType() != this.GetType()) return false;
+ return Equals((ScoreValueSource) o);
+ }
+
+ protected bool Equals(ScoreValueSource other)
+ {
+ return Equals(this, other);
+ }
+
+ public override int GetHashCode()
+ {
+ return hcode;
+ }
+
+ private static readonly int hcode = typeof(DoubleDocValuesAnonymousInnerClassHelper).GetHashCode();
+
+
+
+
+ public override string Description
+ {
+ get
+ {
+ return "score()";
+ }
+
+ }
+ }
+
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/982eaf60/Lucene.Net.Facet/Taxonomy/TaxonomyFacets.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Facet/Taxonomy/TaxonomyFacets.cs b/Lucene.Net.Facet/Taxonomy/TaxonomyFacets.cs
new file mode 100644
index 0000000..b23530d
--- /dev/null
+++ b/Lucene.Net.Facet/Taxonomy/TaxonomyFacets.cs
@@ -0,0 +1,137 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using Lucene.Net.Facet;
+
+namespace Lucene.Net.Facet.Taxonomy
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using DimConfig = Lucene.Net.Facet.FacetsConfig.DimConfig; // javadocs
+
+ /// <summary>
+ /// Base class for all taxonomy-based facets impls. </summary>
+ public abstract class TaxonomyFacets : Facets
+ {
+
+ private static readonly IComparer<FacetResult> BY_VALUE_THEN_DIM = new ComparatorAnonymousInnerClassHelper();
+
+ private class ComparatorAnonymousInnerClassHelper : IComparer<FacetResult>
+ {
+ public ComparatorAnonymousInnerClassHelper()
+ {
+ }
+
+ public virtual int Compare(FacetResult a, FacetResult b)
+ {
+ if ((double)a.value > (double)b.value)
+ {
+ return -1;
+ }
+ else if ((double)b.value > (double)a.value)
+ {
+ return 1;
+ }
+ else
+ {
+ return a.dim.CompareTo(b.dim);
+ }
+ }
+ }
+
+ /// <summary>
+ /// Index field name provided to the constructor. </summary>
+ protected internal readonly string IndexFieldName;
+
+ /// <summary>
+ /// {@code TaxonomyReader} provided to the constructor. </summary>
+ protected internal readonly TaxonomyReader TaxoReader;
+
+ /// <summary>
+ /// {@code FacetsConfig} provided to the constructor. </summary>
+ protected internal readonly FacetsConfig Config;
+
+ /// <summary>
+ /// Maps parent ordinal to its child, or -1 if the parent
+ /// is childless.
+ /// </summary>
+ protected internal readonly int[] Children;
+
+ /// <summary>
+ /// Maps an ordinal to its sibling, or -1 if there is no
+ /// sibling.
+ /// </summary>
+ protected internal readonly int[] Siblings;
+
+ /// <summary>
+ /// Sole constructor.
+ /// </summary>
+ protected internal TaxonomyFacets(string indexFieldName, TaxonomyReader taxoReader, FacetsConfig config)
+ {
+ this.IndexFieldName = indexFieldName;
+ this.TaxoReader = taxoReader;
+ this.Config = config;
+ ParallelTaxonomyArrays pta = taxoReader.ParallelTaxonomyArrays;
+ Children = pta.Children();
+ Siblings = pta.Siblings();
+ }
+
+ /// <summary>
+ /// Throws {@code IllegalArgumentException} if the
+ /// dimension is not recognized. Otherwise, returns the
+ /// <seealso cref="DimConfig"/> for this dimension.
+ /// </summary>
+ protected internal virtual DimConfig VerifyDim(string dim)
+ {
+ DimConfig dimConfig = Config.GetDimConfig(dim);
+ if (!dimConfig.indexFieldName.Equals(IndexFieldName))
+ {
+ throw new System.ArgumentException("dimension \"" + dim + "\" was not indexed into field \"" + IndexFieldName);
+ }
+ return dimConfig;
+ }
+
+ public override IList<FacetResult> GetAllDims(int topN)
+ {
+ int ord = Children[TaxonomyReader.ROOT_ORDINAL];
+ IList<FacetResult> results = new List<FacetResult>();
+ while (ord != TaxonomyReader.INVALID_ORDINAL)
+ {
+ string dim = TaxoReader.GetPath(ord).components[0];
+ DimConfig dimConfig = Config.GetDimConfig(dim);
+ if (dimConfig.indexFieldName.Equals(IndexFieldName))
+ {
+ FacetResult result = GetTopChildren(topN, dim);
+ if (result != null)
+ {
+ results.Add(result);
+ }
+ }
+ ord = Siblings[ord];
+ }
+
+ // Sort by highest value, tie break by dim:
+ var resultArray = results.ToArray();
+ Array.Sort(resultArray, BY_VALUE_THEN_DIM);
+ return resultArray;
+ }
+
+ }
+}
\ No newline at end of file