You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by pa...@apache.org on 2013/11/07 14:53:45 UTC
[30/53] [abbrv] git commit: Port Facet.Codecs
Port Facet.Codecs
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/9e1b6df7
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/9e1b6df7
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/9e1b6df7
Branch: refs/heads/branch_4x
Commit: 9e1b6df70f570cfe529c9bcd9c9097b780aa3360
Parents: 2332015
Author: Paul Irwin <pa...@gmail.com>
Authored: Sun Nov 3 19:15:58 2013 -0500
Committer: Paul Irwin <pa...@gmail.com>
Committed: Sun Nov 3 19:15:58 2013 -0500
----------------------------------------------------------------------
.../Codecs/Facet42/Facet42BinaryDocValues.cs | 33 ++++++
.../Facet/Codecs/Facet42/Facet42Codec.cs | 48 ++++++++
.../Codecs/Facet42/Facet42DocValuesConsumer.cs | 113 +++++++++++++++++++
.../Codecs/Facet42/Facet42DocValuesFormat.cs | 32 ++++++
.../Codecs/Facet42/Facet42DocValuesProducer.cs | 71 ++++++++++++
src/contrib/Facet/Contrib.Facet.csproj | 5 +
src/core/Codecs/Lucene42/Lucene42Codec.cs | 4 +-
7 files changed, 304 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9e1b6df7/src/contrib/Facet/Codecs/Facet42/Facet42BinaryDocValues.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Facet/Codecs/Facet42/Facet42BinaryDocValues.cs b/src/contrib/Facet/Codecs/Facet42/Facet42BinaryDocValues.cs
new file mode 100644
index 0000000..fdaffd5
--- /dev/null
+++ b/src/contrib/Facet/Codecs/Facet42/Facet42BinaryDocValues.cs
@@ -0,0 +1,33 @@
+using Lucene.Net.Index;
+using Lucene.Net.Store;
+using Lucene.Net.Util;
+using Lucene.Net.Util.Packed;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net.Facet.Codecs.Facet42
+{
+ internal class Facet42BinaryDocValues : BinaryDocValues
+ {
+ private readonly sbyte[] bytes;
+ private readonly PackedInts.IReader addresses;
+
+ internal Facet42BinaryDocValues(DataInput in_renamed)
+ {
+ int totBytes = in_renamed.ReadVInt();
+ bytes = new sbyte[totBytes];
+ in_renamed.ReadBytes(bytes, 0, totBytes);
+ addresses = PackedInts.GetReader(in_renamed);
+ }
+
+ public override void Get(int docID, BytesRef ret)
+ {
+ int start = (int)addresses.Get(docID);
+ ret.bytes = bytes;
+ ret.offset = start;
+ ret.length = (int)(addresses.Get(docID + 1) - start);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9e1b6df7/src/contrib/Facet/Codecs/Facet42/Facet42Codec.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Facet/Codecs/Facet42/Facet42Codec.cs b/src/contrib/Facet/Codecs/Facet42/Facet42Codec.cs
new file mode 100644
index 0000000..58fa223
--- /dev/null
+++ b/src/contrib/Facet/Codecs/Facet42/Facet42Codec.cs
@@ -0,0 +1,48 @@
+using Lucene.Net.Codecs;
+using Lucene.Net.Codecs.Lucene42;
+using Lucene.Net.Facet.Params;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net.Facet.Codecs.Facet42
+{
+ public class Facet42Codec : Lucene42Codec
+ {
+ private readonly ISet<String> facetFields;
+ private readonly DocValuesFormat facetsDVFormat = DocValuesFormat.ForName(@"Facet42");
+ private readonly DocValuesFormat lucene42DVFormat = DocValuesFormat.ForName(@"Lucene42");
+
+ public Facet42Codec()
+ : this(FacetIndexingParams.DEFAULT)
+ {
+ }
+
+ public Facet42Codec(FacetIndexingParams fip)
+ {
+ if (fip.PartitionSize != int.MaxValue)
+ {
+ throw new ArgumentException("this Codec does not support partitions");
+ }
+
+ this.facetFields = new HashSet<String>();
+ foreach (CategoryListParams clp in fip.AllCategoryListParams)
+ {
+ facetFields.Add(clp.field);
+ }
+ }
+
+ public override DocValuesFormat GetDocValuesFormatForField(string field)
+ {
+ if (facetFields.Contains(field))
+ {
+ return facetsDVFormat;
+ }
+ else
+ {
+ return lucene42DVFormat;
+ }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9e1b6df7/src/contrib/Facet/Codecs/Facet42/Facet42DocValuesConsumer.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Facet/Codecs/Facet42/Facet42DocValuesConsumer.cs b/src/contrib/Facet/Codecs/Facet42/Facet42DocValuesConsumer.cs
new file mode 100644
index 0000000..bf0b4d4
--- /dev/null
+++ b/src/contrib/Facet/Codecs/Facet42/Facet42DocValuesConsumer.cs
@@ -0,0 +1,113 @@
+using Lucene.Net.Codecs;
+using Lucene.Net.Index;
+using Lucene.Net.Store;
+using Lucene.Net.Util;
+using Lucene.Net.Util.Packed;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net.Facet.Codecs.Facet42
+{
+ public class Facet42DocValuesConsumer : DocValuesConsumer
+ {
+ readonly IndexOutput output;
+ readonly int maxDoc;
+ readonly float acceptableOverheadRatio;
+
+ public Facet42DocValuesConsumer(SegmentWriteState state)
+ : this(state, PackedInts.DEFAULT)
+ {
+ }
+
+ public Facet42DocValuesConsumer(SegmentWriteState state, float acceptableOverheadRatio)
+ {
+ this.acceptableOverheadRatio = acceptableOverheadRatio;
+ bool success = false;
+ try
+ {
+ string fileName = IndexFileNames.SegmentFileName(state.segmentInfo.name, state.segmentSuffix, Facet42DocValuesFormat.EXTENSION);
+ output = state.directory.CreateOutput(fileName, state.context);
+ CodecUtil.WriteHeader(output, Facet42DocValuesFormat.CODEC, Facet42DocValuesFormat.VERSION_CURRENT);
+ maxDoc = state.segmentInfo.DocCount;
+ success = true;
+ }
+ finally
+ {
+ if (!success)
+ {
+ IOUtils.CloseWhileHandlingException((IDisposable)this);
+ }
+ }
+ }
+
+ public override void AddNumericField(FieldInfo field, IEnumerable<long> values)
+ {
+ throw new NotSupportedException(@"FacetsDocValues can only handle binary fields");
+ }
+
+ public override void AddBinaryField(FieldInfo field, IEnumerable<BytesRef> values)
+ {
+ output.WriteVInt(field.number);
+ long totBytes = 0;
+ foreach (BytesRef v in values)
+ {
+ totBytes += v.length;
+ }
+
+ if (totBytes > int.MaxValue)
+ {
+ throw new InvalidOperationException(@"too many facets in one segment: Facet42DocValues cannot handle more than 2 GB facet data per segment");
+ }
+
+ output.WriteVInt((int)totBytes);
+ foreach (BytesRef v in values)
+ {
+ output.WriteBytes(v.bytes, v.offset, v.length);
+ }
+
+ PackedInts.Writer w = PackedInts.GetWriter(output, maxDoc + 1, PackedInts.BitsRequired(totBytes + 1), acceptableOverheadRatio);
+ int address = 0;
+ foreach (BytesRef v in values)
+ {
+ w.Add(address);
+ address += v.length;
+ }
+
+ w.Add(address);
+ w.Finish();
+ }
+
+ public override void AddSortedField(FieldInfo field, IEnumerable<BytesRef> values, IEnumerable<int> docToOrd)
+ {
+ throw new NotSupportedException(@"FacetsDocValues can only handle binary fields");
+ }
+
+ public override void AddSortedSetField(FieldInfo field, IEnumerable<BytesRef> values, IEnumerable<int> docToOrdCount, IEnumerable<long> ords)
+ {
+ throw new NotSupportedException(@"FacetsDocValues can only handle binary fields");
+ }
+
+ protected override void Dispose(bool disposing)
+ {
+ bool success = false;
+ try
+ {
+ output.WriteVInt(-1);
+ success = true;
+ }
+ finally
+ {
+ if (success)
+ {
+ IOUtils.Close(output);
+ }
+ else
+ {
+ IOUtils.CloseWhileHandlingException((IDisposable)output);
+ }
+ }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9e1b6df7/src/contrib/Facet/Codecs/Facet42/Facet42DocValuesFormat.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Facet/Codecs/Facet42/Facet42DocValuesFormat.cs b/src/contrib/Facet/Codecs/Facet42/Facet42DocValuesFormat.cs
new file mode 100644
index 0000000..4de3192
--- /dev/null
+++ b/src/contrib/Facet/Codecs/Facet42/Facet42DocValuesFormat.cs
@@ -0,0 +1,32 @@
+using Lucene.Net.Codecs;
+using Lucene.Net.Index;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net.Facet.Codecs.Facet42
+{
+ public sealed class Facet42DocValuesFormat : DocValuesFormat
+ {
+ public const string CODEC = @"FacetsDocValues";
+ public const string EXTENSION = @"fdv";
+ public const int VERSION_START = 0;
+ public const int VERSION_CURRENT = VERSION_START;
+
+ public Facet42DocValuesFormat()
+ : base(@"Facet42")
+ {
+ }
+
+ public override DocValuesConsumer FieldsConsumer(SegmentWriteState state)
+ {
+ return new Facet42DocValuesConsumer(state);
+ }
+
+ public override DocValuesProducer FieldsProducer(SegmentReadState state)
+ {
+ return new Facet42DocValuesProducer(state);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9e1b6df7/src/contrib/Facet/Codecs/Facet42/Facet42DocValuesProducer.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Facet/Codecs/Facet42/Facet42DocValuesProducer.cs b/src/contrib/Facet/Codecs/Facet42/Facet42DocValuesProducer.cs
new file mode 100644
index 0000000..6ec8239
--- /dev/null
+++ b/src/contrib/Facet/Codecs/Facet42/Facet42DocValuesProducer.cs
@@ -0,0 +1,71 @@
+using Lucene.Net.Codecs;
+using Lucene.Net.Index;
+using Lucene.Net.Store;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net.Facet.Codecs.Facet42
+{
+ internal class Facet42DocValuesProducer : DocValuesProducer
+ {
+ private readonly IDictionary<int, Facet42BinaryDocValues> fields = new HashMap<int, Facet42BinaryDocValues>();
+
+ internal Facet42DocValuesProducer(SegmentReadState state)
+ {
+ string fileName = IndexFileNames.SegmentFileName(state.segmentInfo.name, state.segmentSuffix, Facet42DocValuesFormat.EXTENSION);
+ IndexInput input = state.directory.OpenInput(fileName, state.context);
+ bool success = false;
+ try
+ {
+ CodecUtil.CheckHeader(input, Facet42DocValuesFormat.CODEC, Facet42DocValuesFormat.VERSION_START, Facet42DocValuesFormat.VERSION_START);
+ int fieldNumber = input.ReadVInt();
+ while (fieldNumber != -1)
+ {
+ fields[fieldNumber] = new Facet42BinaryDocValues(input);
+ fieldNumber = input.ReadVInt();
+ }
+
+ success = true;
+ }
+ finally
+ {
+ if (success)
+ {
+ IOUtils.Close(input);
+ }
+ else
+ {
+ IOUtils.CloseWhileHandlingException((IDisposable)input);
+ }
+ }
+ }
+
+ public override NumericDocValues GetNumeric(FieldInfo field)
+ {
+ throw new NotSupportedException(@"FacetsDocValues only implements binary");
+ }
+
+ public override BinaryDocValues GetBinary(FieldInfo field)
+ {
+ return fields[field.number];
+ }
+
+ public override SortedDocValues GetSorted(FieldInfo field)
+ {
+ throw new NotSupportedException(@"FacetsDocValues only implements binary");
+ }
+
+ public override SortedSetDocValues GetSortedSet(FieldInfo field)
+ {
+ throw new NotSupportedException(@"FacetsDocValues only implements binary");
+ }
+
+ protected override void Dispose(bool disposing)
+ {
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9e1b6df7/src/contrib/Facet/Contrib.Facet.csproj
----------------------------------------------------------------------
diff --git a/src/contrib/Facet/Contrib.Facet.csproj b/src/contrib/Facet/Contrib.Facet.csproj
index ff48e89..750ab35 100644
--- a/src/contrib/Facet/Contrib.Facet.csproj
+++ b/src/contrib/Facet/Contrib.Facet.csproj
@@ -39,6 +39,11 @@
<Reference Include="System.Xml" />
</ItemGroup>
<ItemGroup>
+ <Compile Include="Codecs\Facet42\Facet42BinaryDocValues.cs" />
+ <Compile Include="Codecs\Facet42\Facet42Codec.cs" />
+ <Compile Include="Codecs\Facet42\Facet42DocValuesConsumer.cs" />
+ <Compile Include="Codecs\Facet42\Facet42DocValuesFormat.cs" />
+ <Compile Include="Codecs\Facet42\Facet42DocValuesProducer.cs" />
<Compile Include="Collections\LRUHashMap.cs" />
<Compile Include="Encoding\DGapVInt8IntDecoder.cs" />
<Compile Include="Encoding\DGapVInt8IntEncoder.cs" />
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9e1b6df7/src/core/Codecs/Lucene42/Lucene42Codec.cs
----------------------------------------------------------------------
diff --git a/src/core/Codecs/Lucene42/Lucene42Codec.cs b/src/core/Codecs/Lucene42/Lucene42Codec.cs
index 2484e34..c8c766e 100644
--- a/src/core/Codecs/Lucene42/Lucene42Codec.cs
+++ b/src/core/Codecs/Lucene42/Lucene42Codec.cs
@@ -88,12 +88,12 @@ namespace Lucene.Net.Codecs.Lucene42
get { return liveDocsFormat; }
}
- public PostingsFormat GetPostingsFormatForField(String field)
+ public virtual PostingsFormat GetPostingsFormatForField(String field)
{
return defaultFormat;
}
- public DocValuesFormat GetDocValuesFormatForField(String field)
+ public virtual DocValuesFormat GetDocValuesFormatForField(String field)
{
return defaultDVFormat;
}